diff --git a/CHANGELOG.md b/CHANGELOG.md index 0794e92e..79e2fb89 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ ### Main (unreleased) -Nil +- Added CSV batching functionality to EnumeratorBuilder with `build_csv_enumerator_on_batches` method and `csv_on_batches` alias. ## v1.6.0 (Sep 24, 2024) @@ -29,7 +29,7 @@ when generating position for cursor based on `:id` column (Rails 7.1 and above, primary models are now supported). This ensures we grab the value of the id column, rather than a potentially composite primary key value. - [456](https://github.com/Shopify/job-iteration/pull/431) - Use Arel to generate SQL that's type compatible for the - cursor pagination conditionals in ActiveRecord cursor. Previously, the cursor would coerce numeric ids to a string value + cursor pagination conditionals in ActiveRecord cursor. Previously, the cursor would coerce numeric ids to a string value (e.g.: `... AND id > '1'`) ## v1.4.1 (Sep 5, 2023) diff --git a/lib/job-iteration/enumerator_builder.rb b/lib/job-iteration/enumerator_builder.rb index ce3c510d..c293d047 100644 --- a/lib/job-iteration/enumerator_builder.rb +++ b/lib/job-iteration/enumerator_builder.rb @@ -144,6 +144,10 @@ def build_csv_enumerator(enumerable, cursor:) CsvEnumerator.new(enumerable).rows(cursor: cursor) end + def build_csv_enumerator_on_batches(enumerable, cursor:, batch_size: 100) + CsvEnumerator.new(enumerable).batches(cursor: cursor, batch_size: batch_size) + end + # Builds Enumerator for nested iteration. # # @param enums [Array] an Array of Procs, each should return an Enumerator. @@ -186,6 +190,7 @@ def build_nested_enumerator(enums, cursor:) alias_method :active_record_on_batch_relations, :build_active_record_enumerator_on_batch_relations alias_method :throttle, :build_throttle_enumerator alias_method :csv, :build_csv_enumerator + alias_method :csv_on_batches, :build_csv_enumerator_on_batches alias_method :nested, :build_nested_enumerator private diff --git a/test/unit/enumerator_builder_test.rb b/test/unit/enumerator_builder_test.rb index ecb9977f..49b31664 100644 --- a/test/unit/enumerator_builder_test.rb +++ b/test/unit/enumerator_builder_test.rb @@ -60,6 +60,10 @@ class EnumeratorBuilderTest < ActiveSupport::TestCase enumerator_builder(wraps: 0).build_csv_enumerator(CSV.new("test"), cursor: nil) end + test_builder_method(:build_csv_enumerator_on_batches) do + enumerator_builder(wraps: 0).build_csv_enumerator_on_batches(CSV.new("test"), cursor: nil) + end + test_builder_method(:build_nested_enumerator) do enumerator_builder(wraps: 0).build_nested_enumerator( [ @@ -79,7 +83,7 @@ class EnumeratorBuilderTest < ActiveSupport::TestCase test "#build_csv_enumerator uses the CsvEnumerator class" do csv = CSV.open( - ["test", "support", "sample_csv_with_headers.csv"].join("/"), + sample_csv_with_headers, converters: :integer, headers: true, ) @@ -92,6 +96,24 @@ class EnumeratorBuilderTest < ActiveSupport::TestCase end end + test "#build_csv_enumerator_on_batches uses the CsvEnumerator class with batches" do + csv = CSV.open( + sample_csv_with_headers, + converters: :integer, + headers: true, + ) + builder = EnumeratorBuilder.new(mock, wrapper: mock) + + enum = builder.build_csv_enumerator_on_batches(csv, cursor: nil, batch_size: 2) + csv_rows = open_csv.to_a + enum.each_with_index do |batch_and_cursor, index| + batch, cursor = batch_and_cursor + expected_batch = csv_rows[index * 2, 2] + assert_equal expected_batch, batch + assert_equal index, cursor + end + end + private def enumerator_builder(wraps: 1)