Create Numpy dataset from images
If you have images and you want convert it to numpy, follow the below steps
Firstly, you need to install Tensorflow
1
$ pip install tensorflow
Copied!
After that, run the below code which takes the image directory as input i.e in (.jpg, .png, .tiff) format and convert it to the NumPy format i.e (.npz)
  • Change the image_dataset_dir path with yours and run the following snippets
conver_img_to_numpy.py
1
import numpy as np
2
import os
3
from tensorflow.keras.preprocessing.image import img_to_array, load_img
4
import pandas as pd
5
6
7
image_dataset_dir = "datasets/cifar-10/images"
8
new_dataset_folder = "datasets/cifar-10_new/"
9
10
11
dataset = {
12
"image" :[],
13
"label" : []
14
}
15
for label in os.listdir(image_dataset_dir):
16
images_dir= image_dataset_dir + "/" + label
17
if not os.path.isdir(images_dir):
18
continue
19
for image_file in os.listdir(images_dir):
20
if not image_file.endswith(".jpg", ".png",".tiff"):
21
continue
22
img = load_img(os.path.join(image_dataset_dir, label, image_file))
23
x = img_to_array(img)
24
25
26
rel_path = label + "/" + os.path.splitext(image_file)[0] + '.npz'
27
os.makedirs(new_dataset_folder + "/" + label, exist_ok=True)
28
npz_file = os.path.join(new_dataset_folder, rel_path)
29
np.savez(npz_file, x)
30
dataset["image"].append(rel_path)
31
dataset["label"].append(label)
32
33
34
df = pd.DataFrame(dataset)
35
df.to_csv(os.path.join(new_dataset_folder, "train.csv"), index=False)
36
37
print('Dataset converted to npz and saved here at %s '%new_dataset_folder)
38
39
df.head()
Copied!
Congratulation!
You have successfully converted your image dataset into NumPy format
Last modified 1yr ago
Copy link