From 7994a990402f56698a0b07b96f695aad7124693d Mon Sep 17 00:00:00 2001 From: Rex Wang <85261458+Rexwang8@users.noreply.github.com> Date: Sun, 22 Oct 2023 12:14:57 -0400 Subject: [PATCH] Fix channels for s3 fixed blocking code for s3 channels when dataset size greater then 64 --- cmd/dataset_tokenizer/dataset_tokenizer.go | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/cmd/dataset_tokenizer/dataset_tokenizer.go b/cmd/dataset_tokenizer/dataset_tokenizer.go index 4af6131..2e1bdfd 100644 --- a/cmd/dataset_tokenizer/dataset_tokenizer.go +++ b/cmd/dataset_tokenizer/dataset_tokenizer.go @@ -416,17 +416,19 @@ func ReadTextsFromS3( go startReader() } - // List objects recursively. - getObjectsS3Recursively(svc, bucketName, "", objects) + go func() { + // List objects recursively. + getObjectsS3Recursively(svc, bucketName, "", objects) - // Close the objects channel when done. - close(objects) + // Close the objects channel when done. + close(objects) - // Wait for all reader goroutines to finish. - wg.Wait() + // Wait for all reader goroutines to finish. + wg.Wait() - // Close the runeReaders channel. - close(runeReaders) + // Close the runeReaders channel. + close(runeReaders) + }() return runeReaders, nil }