Skip to content

Commit

Permalink
add multi gpu training
Browse files Browse the repository at this point in the history
  • Loading branch information
RocketFlash committed Jul 16, 2020
1 parent 21fd6d9 commit 4088aea
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 14 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,5 @@ core
work_dirs/
wandb/
*.csv
tmp/
configs/google_embeddings.yml
40 changes: 30 additions & 10 deletions embedding_net/datagenerators.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from .utils import get_image
from tensorflow.keras import backend as K
import tensorflow as tf
import tqdm
import pickle

class ENDataLoader():
def __init__(self, dataset_path,
Expand All @@ -18,14 +20,15 @@ def __init__(self, dataset_path,
image_id_column = 'image_id',
label_column = 'label',
validate = True,
val_ratio = 0.1):
val_ratio = 0.1,
is_google=False):

self.dataset_path = dataset_path
self.class_files_paths = {}
self.class_names = []

if train_csv_file is not None:
self.class_files_paths = self._load_from_dataframe(train_csv_file, image_id_column, label_column)
self.class_files_paths = self._load_from_dataframe(train_csv_file, image_id_column, label_column, is_google)
else:
self.class_files_paths = self._load_from_directory()

Expand All @@ -38,7 +41,7 @@ def __init__(self, dataset_path,
if self.validate:
if val_csv_file is not None:
self.train_data = self.class_files_paths
self.val_data = self._load_from_dataframe(val_csv_file, image_id_column, label_column)
self.val_data = self._load_from_dataframe(val_csv_file, image_id_column, label_column, is_google)
else:
self.train_data, self.val_data = self.split_train_val(self.val_ratio)
else:
Expand All @@ -54,25 +57,43 @@ def split_train_val(self, val_ratio):
val_data[k] = val_d
return train_data, val_data

def _load_from_dataframe(self, csv_file, image_id_column, label_column):
def _load_from_dataframe(self, csv_file, image_id_column, label_column, is_google):
class_files_paths = {}

# Load data from file if it's already created
os.makedirs('tmp' , exist_ok=True)
if os.path.isfile('tmp/data.pickle'):
print('LOAD DATA FROM FILE')
with open('tmp/data.pickle', 'rb') as f:
class_files_paths = pickle.load(f)
self.class_names = list(class_files_paths.keys())
print('LOADING DATA FROM FILE COMPLETED')
return class_files_paths

dataframe = pd.read_csv(csv_file)
self.class_names = list(dataframe[label_column].unique())
for class_name in self.class_names:

for class_name in tqdm.tqdm(self.class_names):
image_names = dataframe.loc[dataframe[label_column] == class_name][image_id_column]
image_paths = [os.path.join(self.dataset_path, f) for f in image_names]
if is_google:
image_paths = [os.path.join(self.dataset_path,f'{f[0]}/{f[1]}/{f[2]}/', f+'.jpg') for f in image_names]
else:
image_paths = [os.path.join(self.dataset_path, f) for f in image_names]
class_files_paths[class_name] = image_paths

# Save data to file for fast loading
with open('tmp/data.pickle', 'wb') as f:
pickle.dump(class_files_paths, f)
return class_files_paths

def _load_from_directory(self):
class_files_paths = {}
self.class_names = [f.name for f in os.scandir(self.dataset_path) if f.is_dir()]
class_dir_paths = [f.path for f in os.scandir(self.dataset_path) if f.is_dir()]

for class_name, class_dir_path in zip(self.class_names, class_dir_paths):
for class_name, class_dir_path in tqdm.tqdm(zip(self.class_names, class_dir_paths)):
subdirs = [f.path for f in os.scandir(class_dir_path) if f.is_dir()]
temp_list = []
print(class_dir_path)
if len(subdirs)>0:
for subdir in subdirs:
class_image_paths = [f.path for f in os.scandir(subdir) if f.is_file() and
Expand Down Expand Up @@ -181,8 +202,7 @@ def get_batch_triplets_mining(self):
selected_classes_idxs = np.random.choice(self.n_classes, size=self.k_classes, replace=False)
selected_classes = [self.class_names[cl] for cl in selected_classes_idxs]
selected_classes_n_elements = [self.n_samples[cl] for cl in selected_classes]

selected_images = [np.random.choice(cl_n, size=self.k_samples, replace=False) for cl_n in selected_classes_n_elements]
selected_images = [np.random.choice(cl_n, size=self.k_samples, replace=True) for cl_n in selected_classes_n_elements]

all_embeddings_list = []
all_images_list = []
Expand Down
3 changes: 0 additions & 3 deletions embedding_net/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,6 @@ def _create_model_triplet(self):
merged_vector = concatenate([image_encoding_a, image_encoding_p, image_encoding_n],axis=-1, name='merged_layer')
self.model = Model(inputs=[input_image_a, input_image_p, input_image_n],outputs=merged_vector)

print('Base model summary')
self.base_model.summary()

print('Whole model summary')
self.model.summary()

Expand Down
22 changes: 21 additions & 1 deletion tools/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,12 @@
from embedding_net.losses_and_accuracies import contrastive_loss, triplet_loss, accuracy
import argparse
from tensorflow import keras
from tensorflow.keras.utils import multi_gpu_model
import tensorflow as tf




def parse_args():
parser = argparse.ArgumentParser(description='Train a classificator')
parser.add_argument('config', help='model config file path')
Expand Down Expand Up @@ -101,7 +104,6 @@ def main():
callbacks.append(WandbCallback(data_type="image", labels=data_loader.class_names))

val_generator = None

print('CREATE MODEL AND DATA GENETATORS')
if params_model['mode'] == 'siamese':
model = SiameseNet(cfg_params, training=True)
Expand All @@ -116,7 +118,25 @@ def main():
losses = {'output_siamese' : contrastive_loss}
metric = {'output_siamese' : accuracy}
else:
if cfg_params['general']['gpu_ids']:
print('Multiple gpu mode')
gpu_ids = cfg_params['general']['gpu_ids']
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = gpu_ids
print(f'Using gpu ids: {gpu_ids}')
gpu_ids_list = gpu_ids.split(',')
n_gpu = len(gpu_ids_list)
else:
n_gpu = 1
print('Use single gpu mode')

model = TripletNet(cfg_params, training=True)
if n_gpu>1:
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
model.base_model = multi_gpu_model(model.base_model, gpus=n_gpu)
# model.base_model = tf.keras.utils.multi_gpu_model(model.base_model, gpus=n_gpu)

train_generator = TripletsDataGenerator(embedding_model=model.base_model,
class_files_paths=data_loader.train_data,
class_names=data_loader.class_names,
Expand Down

0 comments on commit 4088aea

Please sign in to comment.