From 39de7cc31cab53f64c99500c1c0bf7d7c25b548f Mon Sep 17 00:00:00 2001 From: xiangyu0xf Date: Mon, 6 Jan 2025 22:14:24 +0800 Subject: [PATCH] [core] support null value for record-level.time-field (#4839) --- .../apache/paimon/io/RecordLevelExpire.java | 50 +++++--- .../paimon/table/RecordLevelExpireTest.java | 12 +- .../RecordLevelExpireWithAggregationTest.java | 116 ++++++++++++++++++ .../RecordLevelExpireWithMillisecondTest.java | 9 +- ...ecordLevelExpireWithTimestampBaseTest.java | 9 +- 5 files changed, 167 insertions(+), 29 deletions(-) create mode 100644 paimon-core/src/test/java/org/apache/paimon/table/RecordLevelExpireWithAggregationTest.java diff --git a/paimon-core/src/main/java/org/apache/paimon/io/RecordLevelExpire.java b/paimon-core/src/main/java/org/apache/paimon/io/RecordLevelExpire.java index e43a9d03d9b0..05c70dcbe133 100644 --- a/paimon-core/src/main/java/org/apache/paimon/io/RecordLevelExpire.java +++ b/paimon-core/src/main/java/org/apache/paimon/io/RecordLevelExpire.java @@ -33,15 +33,14 @@ import javax.annotation.Nullable; import java.time.Duration; +import java.util.Optional; import java.util.function.Function; -import static org.apache.paimon.utils.Preconditions.checkArgument; - /** A factory to create {@link RecordReader} expires records by time. */ public class RecordLevelExpire { private final int expireTime; - private final Function fieldGetter; + private final Function> fieldGetter; @Nullable public static RecordLevelExpire create(CoreOptions options, RowType rowType) { @@ -65,11 +64,13 @@ public static RecordLevelExpire create(CoreOptions options, RowType rowType) { } DataType dataType = rowType.getField(timeFieldName).type(); - Function fieldGetter = createFieldGetter(dataType, fieldIndex); + Function> fieldGetter = + createFieldGetter(dataType, fieldIndex); return new RecordLevelExpire((int) expireTime.getSeconds(), fieldGetter); } - private RecordLevelExpire(int expireTime, Function fieldGetter) { + private RecordLevelExpire( + int expireTime, Function> fieldGetter) { this.expireTime = expireTime; this.fieldGetter = fieldGetter; } @@ -80,26 +81,46 @@ public FileReaderFactory wrap(FileReaderFactory readerFactor private RecordReader wrap(RecordReader reader) { int currentTime = (int) (System.currentTimeMillis() / 1000); - return reader.filter(kv -> currentTime <= fieldGetter.apply(kv.value()) + expireTime); + return reader.filter( + keyValue -> + fieldGetter + .apply(keyValue.value()) + .map(integer -> currentTime <= integer + expireTime) + .orElse(true)); } - private static Function createFieldGetter( + private static Function> createFieldGetter( DataType dataType, int fieldIndex) { - final Function fieldGetter; + final Function> fieldGetter; if (dataType instanceof IntType) { - fieldGetter = row -> row.getInt(fieldIndex); + fieldGetter = + row -> + row.isNullAt(fieldIndex) + ? Optional.empty() + : Optional.of(row.getInt(fieldIndex)); } else if (dataType instanceof BigIntType) { fieldGetter = row -> { + if (row.isNullAt(fieldIndex)) { + return Optional.empty(); + } long value = row.getLong(fieldIndex); // If it is milliseconds, convert it to seconds. - return (int) (value >= 1_000_000_000_000L ? value / 1000 : value); + return Optional.of( + (int) (value >= 1_000_000_000_000L ? value / 1000 : value)); }; } else if (dataType instanceof TimestampType || dataType instanceof LocalZonedTimestampType) { int precision = DataTypeChecks.getPrecision(dataType); fieldGetter = - row -> (int) (row.getTimestamp(fieldIndex, precision).getMillisecond() / 1000); + row -> + row.isNullAt(fieldIndex) + ? Optional.empty() + : Optional.of( + (int) + (row.getTimestamp(fieldIndex, precision) + .getMillisecond() + / 1000)); } else { throw new IllegalArgumentException( String.format( @@ -107,11 +128,6 @@ private static Function createFieldGetter( dataType)); } - return row -> { - checkArgument( - !row.isNullAt(fieldIndex), - "Time field for record-level expire should not be null."); - return fieldGetter.apply(row); - }; + return fieldGetter; } } diff --git a/paimon-core/src/test/java/org/apache/paimon/table/RecordLevelExpireTest.java b/paimon-core/src/test/java/org/apache/paimon/table/RecordLevelExpireTest.java index bd13b0ecf8fa..bce57c275281 100644 --- a/paimon-core/src/test/java/org/apache/paimon/table/RecordLevelExpireTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/table/RecordLevelExpireTest.java @@ -38,7 +38,6 @@ import java.util.UUID; import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; class RecordLevelExpireTest extends PrimaryKeyTableTestBase { @@ -102,12 +101,17 @@ public void test() throws Exception { .containsExactlyInAnyOrder(GenericRow.of(currentSecs + 60 * 60)); writeCommit(GenericRow.of(1, 5, null)); + compact(1); assertThat(query()) .containsExactlyInAnyOrder( GenericRow.of(1, 4, currentSecs + 60 * 60), GenericRow.of(1, 5, null)); - // null time field for record-level expire is not supported yet. - assertThatThrownBy(() -> compact(1)) - .hasMessageContaining("Time field for record-level expire should not be null."); + writeCommit(GenericRow.of(1, 5, currentSecs + 60 * 60)); + // compact, merged + compact(1); + assertThat(query()) + .containsExactlyInAnyOrder( + GenericRow.of(1, 4, currentSecs + 60 * 60), + GenericRow.of(1, 5, currentSecs + 60 * 60)); } } diff --git a/paimon-core/src/test/java/org/apache/paimon/table/RecordLevelExpireWithAggregationTest.java b/paimon-core/src/test/java/org/apache/paimon/table/RecordLevelExpireWithAggregationTest.java new file mode 100644 index 000000000000..02ed8ee3ed65 --- /dev/null +++ b/paimon-core/src/test/java/org/apache/paimon/table/RecordLevelExpireWithAggregationTest.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.table; + +import org.apache.paimon.CoreOptions; +import org.apache.paimon.catalog.Catalog; +import org.apache.paimon.catalog.CatalogContext; +import org.apache.paimon.catalog.CatalogFactory; +import org.apache.paimon.catalog.Identifier; +import org.apache.paimon.catalog.PrimaryKeyTableTestBase; +import org.apache.paimon.data.GenericRow; +import org.apache.paimon.fs.Path; +import org.apache.paimon.options.Options; +import org.apache.paimon.schema.Schema; +import org.apache.paimon.types.DataTypes; +import org.apache.paimon.types.RowKind; +import org.apache.paimon.utils.TraceableFileIO; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.time.Duration; +import java.util.UUID; + +import static org.assertj.core.api.Assertions.assertThat; + +class RecordLevelExpireWithAggregationTest extends PrimaryKeyTableTestBase { + + @Override + @BeforeEach + public void beforeEachBase() throws Exception { + CatalogContext context = + CatalogContext.create( + new Path(TraceableFileIO.SCHEME + "://" + tempPath.toString())); + Catalog catalog = CatalogFactory.createCatalog(context); + Identifier identifier = new Identifier("default", "T"); + catalog.createDatabase(identifier.getDatabaseName(), true); + Schema schema = + Schema.newBuilder() + .column("pt", DataTypes.INT()) + .column("pk", DataTypes.INT()) + .column("col1", DataTypes.INT()) + .partitionKeys("pt") + .primaryKey("pk", "pt") + .options(tableOptions().toMap()) + .build(); + catalog.createTable(identifier, schema, true); + table = (FileStoreTable) catalog.getTable(identifier); + commitUser = UUID.randomUUID().toString(); + } + + @Override + protected Options tableOptions() { + Options options = new Options(); + options.set(CoreOptions.BUCKET, 1); + options.set(CoreOptions.RECORD_LEVEL_EXPIRE_TIME, Duration.ofSeconds(1)); + options.set(CoreOptions.RECORD_LEVEL_TIME_FIELD, "col1"); + options.set(CoreOptions.MERGE_ENGINE, CoreOptions.MergeEngine.AGGREGATE); + options.set(CoreOptions.FIELDS_DEFAULT_AGG_FUNC, "last_non_null_value"); + options.set("fields.col1.ignore-retract", "true"); + return options; + } + + @Test + public void test() throws Exception { + writeCommit(GenericRow.of(1, 1, 1), GenericRow.of(1, 2, 2)); + // disordered retract message will generate null fields + writeCommit( + GenericRow.ofKind(RowKind.UPDATE_BEFORE, 1, 3, 1), + GenericRow.ofKind(RowKind.DELETE, 1, 3, 1)); + + int currentSecs = (int) (System.currentTimeMillis() / 1000); + writeCommit(GenericRow.of(1, 4, currentSecs + 60 * 60)); + + Thread.sleep(2000); + + // no compaction, can be queried + assertThat(query()) + .containsExactlyInAnyOrder( + GenericRow.of(1, 1, 1), + GenericRow.of(1, 2, 2), + GenericRow.of(1, 3, null), + GenericRow.of(1, 4, currentSecs + 60 * 60)); + + // compact, expired + compact(1); + assertThat(query()) + .containsExactlyInAnyOrder( + GenericRow.of(1, 3, null), GenericRow.of(1, 4, currentSecs + 60 * 60)); + + writeCommit(GenericRow.of(1, 3, currentSecs + 60 * 60)); + compact(1); + + // compact, merged + assertThat(query()) + .containsExactlyInAnyOrder( + GenericRow.of(1, 4, currentSecs + 60 * 60), + GenericRow.of(1, 3, currentSecs + 60 * 60)); + } +} diff --git a/paimon-core/src/test/java/org/apache/paimon/table/RecordLevelExpireWithMillisecondTest.java b/paimon-core/src/test/java/org/apache/paimon/table/RecordLevelExpireWithMillisecondTest.java index 295058bfbd22..f9cfbf0ce331 100644 --- a/paimon-core/src/test/java/org/apache/paimon/table/RecordLevelExpireWithMillisecondTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/table/RecordLevelExpireWithMillisecondTest.java @@ -38,7 +38,6 @@ import java.util.UUID; import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; class RecordLevelExpireWithMillisecondTest extends PrimaryKeyTableTestBase { @Override @@ -102,8 +101,10 @@ public void test() throws Exception { assertThat(query(new int[] {0, 1})) .containsExactlyInAnyOrder(GenericRow.of(1, 4), GenericRow.of(1, 5)); - // null time field for record-level expire is not supported yet. - assertThatThrownBy(() -> compact(1)) - .hasMessageContaining("Time field for record-level expire should not be null."); + writeCommit(GenericRow.of(1, 5, currentSecs + 60 * 60 * 1000)); + // compact, merged + compact(1); + assertThat(query(new int[] {0, 1})) + .containsExactlyInAnyOrder(GenericRow.of(1, 4), GenericRow.of(1, 5)); } } diff --git a/paimon-core/src/test/java/org/apache/paimon/table/RecordLevelExpireWithTimestampBaseTest.java b/paimon-core/src/test/java/org/apache/paimon/table/RecordLevelExpireWithTimestampBaseTest.java index f352693759a7..f9aef643f6a6 100644 --- a/paimon-core/src/test/java/org/apache/paimon/table/RecordLevelExpireWithTimestampBaseTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/table/RecordLevelExpireWithTimestampBaseTest.java @@ -29,7 +29,6 @@ import java.time.Duration; import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; abstract class RecordLevelExpireWithTimestampBaseTest extends PrimaryKeyTableTestBase { @@ -67,8 +66,10 @@ public void testTimestampTypeExpire() throws Exception { assertThat(query(new int[] {0, 1})) .containsExactlyInAnyOrder(GenericRow.of(1, 3), GenericRow.of(1, 5)); - // null time field for record-level expire is not supported yet. - assertThatThrownBy(() -> compact(1)) - .hasMessageContaining("Time field for record-level expire should not be null."); + writeCommit(GenericRow.of(1, 5, timestamp3)); + // compact, merged + compact(1); + assertThat(query(new int[] {0, 1})) + .containsExactlyInAnyOrder(GenericRow.of(1, 3), GenericRow.of(1, 5)); } }