Skip to content

Commit

Permalink
Fix pickle issue for CLIP training (NVIDIA#8722)
Browse files Browse the repository at this point in the history
* fix local pickle issue

Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
yaoyu-33 and pre-commit-ci[bot] authored Mar 22, 2024
1 parent 284e0c3 commit c098dcf
Showing 1 changed file with 14 additions and 12 deletions.
26 changes: 14 additions & 12 deletions nemo/collections/multimodal/data/clip/clip_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,23 +85,25 @@ def get_preprocess_fns(model_cfg, tokenizer=None, is_train=True):
return img_transform, text_transform


# This function maps data that are tuples to dictionary.
def tuple_to_dict(inp):
for input in inp:
out_dict = dict()
out_dict['images'] = input[0]
out_dict['captions'] = input[1]
yield out_dict


def transform_fn(sample, img_transform, text_transform):
image, text = sample["jpg"], sample["txt"]
return img_transform(image), text_transform(text)


def build_train_valid_datasets(
model_cfg, consumed_samples, tokenizer=None,
):
data_cfg = model_cfg.data

# This function maps data that are tuples to dictionary.
def tuple_to_dict(inp):
for input in inp:
out_dict = dict()
out_dict['images'] = input[0]
out_dict['captions'] = input[1]
yield out_dict

def transform_fn(sample, img_transform, text_transform):
image, text = sample["jpg"], sample["txt"]
return img_transform(image), text_transform(text)

train_img_transform, text_transform = get_preprocess_fns(model_cfg, tokenizer, is_train=True)
train_data = WebDatasetCommon(
dataset_cfg=data_cfg,
Expand Down

0 comments on commit c098dcf

Please sign in to comment.