From 6602373e4c41857e25024c20dab8ceef8e637308 Mon Sep 17 00:00:00 2001 From: Zhimin Li <46835311+zml-ai@users.noreply.github.com> Date: Fri, 14 Jun 2024 11:58:53 +0800 Subject: [PATCH] Update csv2arrow.py --- hydit/data_loader/csv2arrow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hydit/data_loader/csv2arrow.py b/hydit/data_loader/csv2arrow.py index 10518e2..b2e364c 100644 --- a/hydit/data_loader/csv2arrow.py +++ b/hydit/data_loader/csv2arrow.py @@ -19,7 +19,7 @@ def parse_data(data): with open(img_path, "rb") as fp: image = fp.read() - md5 = hashlib.md5(fp.read()).hexdigest() + md5 = hashlib.md5(image).hexdigest() with Image.open(img_path) as f: width, height = f.size @@ -85,4 +85,4 @@ def make_arrow(csv_root, dataset_root, start_id=0, end_id=-1): csv_root = sys.argv[1] output_arrow_data_path = sys.argv[2] pool = Pool(500) - make_arrow(csv_root, output_arrow_data_path) \ No newline at end of file + make_arrow(csv_root, output_arrow_data_path)