-
Notifications
You must be signed in to change notification settings - Fork 4
/
tocsv.py
executable file
·20 lines (19 loc) · 966 Bytes
/
tocsv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
from scipy.misc import imread
import numpy as np
import pandas as pd
import os
root = './train' # or ‘./test’ depending on for which the CSV is being created
# go through each directory in the root folder given above
for directory, subdirectories, files in os.walk(root):
# go through each file in that directory
for file in files:
# read the image file and extract its pixels
im = imread(os.path.join(directory,file))
value = im.flatten()
# I renamed the folders containing digits to the contained digit itself. For example, digit_0 folder was renamed to 0.
# so taking the 9th value of the folder gave the digit (i.e. "./train/8" ==> 9th value is 8), which was inserted into the first column of the dataset.
value = np.hstack((directory[8:],value))
df = pd.DataFrame(value).T
df = df.sample(frac=1) # shuffle the dataset
with open('train.csv', 'a') as dataset:
df.to_csv(dataset, header=False, index=False)