diff --git a/.github/config/labeler-config.yml b/.github/config/labeler-config.yml index ae685830c4a43..e7ff2840486e0 100644 --- a/.github/config/labeler-config.yml +++ b/.github/config/labeler-config.yml @@ -3,7 +3,6 @@ - lib/trino-orc/** - lib/trino-parquet/** - lib/trino-hive-formats/** - - plugin/trino-hive-hadoop2/** - plugin/trino-hive/** - testing/trino-product-tests/** - lib/trino-filesystem/** @@ -20,7 +19,6 @@ delta-lake: hive: - plugin/trino-hive/** - - plugin/trino-hive-hadoop2/** hudi: - plugin/trino-hudi/** diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bc0df89d98b44..71fcf9249f7e3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -57,7 +57,6 @@ jobs: fail-fast: false matrix: java-version: - - 17 # Keep testing on JDK 17 to ensure basic backward compatibility - 21 timeout-minutes: 45 steps: @@ -301,27 +300,7 @@ jobs: - name: Install Hive Module run: | export MAVEN_OPTS="${MAVEN_INSTALL_OPTS}" - $MAVEN clean install ${MAVEN_FAST_INSTALL} ${MAVEN_GIB} -Dgib.logImpactedTo=gib-impacted.log -am -pl :trino-hive-hadoop2 - - name: Run Hive Tests - run: | - source plugin/trino-hive-hadoop2/conf/hive-tests-${{ matrix.config }}.sh && - plugin/trino-hive-hadoop2/bin/run_hive_tests.sh - - name: Run Hive S3 Tests - env: - AWS_ACCESS_KEY_ID: ${{ secrets.TRINO_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.TRINO_AWS_SECRET_ACCESS_KEY }} - AWS_REGION: ${{ vars.TRINO_AWS_REGION }} - S3_BUCKET: ${{ vars.TRINO_S3_BUCKET }} - S3_BUCKET_ENDPOINT: "https://s3.${{ vars.TRINO_AWS_REGION }}.amazonaws.com" - run: | - if [ "${AWS_ACCESS_KEY_ID}" != "" ]; then - source plugin/trino-hive-hadoop2/conf/hive-tests-${{ matrix.config }}.sh && - plugin/trino-hive-hadoop2/bin/run_hive_s3_tests.sh - if [ matrix.config == 'config-hdp3' ]; then - # JsonSerde class needed for the S3 Select JSON tests is only available on hdp3. - plugin/trino-hive-hadoop2/bin/run_hive_s3_select_json_tests.sh - fi - fi + $MAVEN clean install ${MAVEN_FAST_INSTALL} ${MAVEN_GIB} -Dgib.logImpactedTo=gib-impacted.log -am -pl :trino-hive - name: Run Hive AWS Tests env: AWS_ACCESS_KEY_ID: ${{ secrets.TRINO_AWS_ACCESS_KEY_ID }} @@ -333,53 +312,6 @@ jobs: if [ "${AWS_ACCESS_KEY_ID}" != "" ]; then $MAVEN test ${MAVEN_TEST} -pl :trino-hive -P aws-tests fi - - name: Run Hive Azure ABFS Access Key Tests - if: matrix.config != 'config-empty' # Hive 1.x does not support Azure storage - env: - ABFS_CONTAINER: ${{ secrets.AZURE_ABFS_CONTAINER }} - ABFS_ACCOUNT: ${{ secrets.AZURE_ABFS_ACCOUNT }} - ABFS_ACCESS_KEY: ${{ secrets.AZURE_ABFS_ACCESSKEY }} - run: | - if [ "${ABFS_CONTAINER}" != "" ]; then - source plugin/trino-hive-hadoop2/conf/hive-tests-${{ matrix.config }}.sh && - plugin/trino-hive-hadoop2/bin/run_hive_abfs_access_key_tests.sh - fi - - name: Run Hive Azure ABFS OAuth Tests - if: matrix.config != 'config-empty' # Hive 1.x does not support Azure storage - env: - ABFS_CONTAINER: ${{ secrets.AZURE_ABFS_CONTAINER }} - ABFS_ACCOUNT: ${{ secrets.AZURE_ABFS_ACCOUNT }} - ABFS_OAUTH_ENDPOINT: ${{ secrets.AZURE_ABFS_OAUTH_ENDPOINT }} - ABFS_OAUTH_CLIENTID: ${{ secrets.AZURE_ABFS_OAUTH_CLIENTID }} - ABFS_OAUTH_SECRET: ${{ secrets.AZURE_ABFS_OAUTH_SECRET }} - run: | - if [ -n "$ABFS_CONTAINER" ]; then - source plugin/trino-hive-hadoop2/conf/hive-tests-${{ matrix.config }}.sh && - plugin/trino-hive-hadoop2/bin/run_hive_abfs_oauth_tests.sh - fi - - name: Run Hive Azure WASB Tests - if: matrix.config != 'config-empty' # Hive 1.x does not support Azure storage - env: - WASB_CONTAINER: ${{ secrets.AZURE_WASB_CONTAINER }} - WASB_ACCOUNT: ${{ secrets.AZURE_WASB_ACCOUNT }} - WASB_ACCESS_KEY: ${{ secrets.AZURE_WASB_ACCESSKEY }} - run: | - if [ "${WASB_CONTAINER}" != "" ]; then - source plugin/trino-hive-hadoop2/conf/hive-tests-${{ matrix.config }}.sh && - plugin/trino-hive-hadoop2/bin/run_hive_wasb_tests.sh - fi - - name: Run Hive Azure ADL Tests - if: matrix.config != 'config-empty' # Hive 1.x does not support Azure storage - env: - ADL_NAME: ${{ secrets.AZURE_ADL_NAME }} - ADL_CLIENT_ID: ${{ secrets.AZURE_ADL_CLIENTID }} - ADL_CREDENTIAL: ${{ secrets.AZURE_ADL_CREDENTIAL }} - ADL_REFRESH_URL: ${{ secrets.AZURE_ADL_REFRESHURL }} - run: | - if [ "${ADL_NAME}" != "" ]; then - source plugin/trino-hive-hadoop2/conf/hive-tests-${{ matrix.config }}.sh && - plugin/trino-hive-hadoop2/bin/run_hive_adl_tests.sh - fi - name: Upload test results uses: actions/upload-artifact@v3 # Upload all test reports only on failure, because the artifacts are large @@ -475,6 +407,7 @@ jobs: !:trino-memory, !:trino-mongodb, !:trino-mysql, + !:trino-opensearch, !:trino-oracle, !:trino-orc, !:trino-parquet, @@ -579,7 +512,7 @@ jobs: - { modules: lib/trino-filesystem-gcs, profile: cloud-tests } - { modules: plugin/trino-accumulo } - { modules: plugin/trino-bigquery } - - { modules: plugin/trino-bigquery, profile: cloud-tests-arrow-and-fte } + - { modules: plugin/trino-bigquery, profile: cloud-tests-2 } - { modules: plugin/trino-cassandra } - { modules: plugin/trino-clickhouse } - { modules: plugin/trino-delta-lake } @@ -602,6 +535,7 @@ jobs: - { modules: plugin/trino-mariadb } - { modules: plugin/trino-mongodb } - { modules: plugin/trino-mysql } + - { modules: plugin/trino-opensearch } - { modules: plugin/trino-oracle } - { modules: plugin/trino-phoenix5 } - { modules: plugin/trino-pinot } @@ -658,7 +592,7 @@ jobs: matrix.modules != 'plugin/trino-singlestore' && ! (contains(matrix.modules, 'trino-delta-lake') && contains(matrix.profile, 'cloud-tests')) && ! (contains(matrix.modules, 'trino-iceberg') && contains(matrix.profile, 'cloud-tests')) - && ! (contains(matrix.modules, 'trino-bigquery') && contains(matrix.profile, 'cloud-tests-arrow-and-fte')) + && ! (contains(matrix.modules, 'trino-bigquery') && contains(matrix.profile, 'cloud-tests-2')) && ! (contains(matrix.modules, 'trino-redshift') && contains(matrix.profile, 'cloud-tests')) && ! (contains(matrix.modules, 'trino-redshift') && contains(matrix.profile, 'fte-tests')) && ! (contains(matrix.modules, 'trino-filesystem-s3') && contains(matrix.profile, 'cloud-tests')) @@ -761,25 +695,25 @@ jobs: env: BIGQUERY_CREDENTIALS_KEY: ${{ secrets.BIGQUERY_CREDENTIALS_KEY }} GCP_STORAGE_BUCKET: ${{ vars.GCP_STORAGE_BUCKET }} - if: matrix.modules == 'plugin/trino-bigquery' && !contains(matrix.profile, 'cloud-tests-arrow-and-fte') && (env.CI_SKIP_SECRETS_PRESENCE_CHECKS != '' || env.BIGQUERY_CREDENTIALS_KEY != '') + if: matrix.modules == 'plugin/trino-bigquery' && !contains(matrix.profile, 'cloud-tests-2') && (env.CI_SKIP_SECRETS_PRESENCE_CHECKS != '' || env.BIGQUERY_CREDENTIALS_KEY != '') run: | - $MAVEN test ${MAVEN_TEST} -pl :trino-bigquery -Pcloud-tests \ + $MAVEN test ${MAVEN_TEST} -pl :trino-bigquery -Pcloud-tests-1 \ -Dbigquery.credentials-key="${BIGQUERY_CREDENTIALS_KEY}" \ - -Dtesting.gcp-storage-bucket="${GCP_STORAGE_BUCKET}" \ - -Dtesting.alternate-bq-project-id=bigquery-cicd-alternate - - name: Cloud BigQuery Arrow and FTE Tests + -Dtesting.gcp-storage-bucket="${GCP_STORAGE_BUCKET}" + - name: Cloud BigQuery Smoke Tests env: BIGQUERY_CREDENTIALS_KEY: ${{ secrets.BIGQUERY_CREDENTIALS_KEY }} GCP_STORAGE_BUCKET: ${{ vars.GCP_STORAGE_BUCKET }} - if: matrix.modules == 'plugin/trino-bigquery' && contains(matrix.profile, 'cloud-tests-arrow-and-fte') && (env.CI_SKIP_SECRETS_PRESENCE_CHECKS != '' || env.BIGQUERY_CREDENTIALS_KEY != '') + if: matrix.modules == 'plugin/trino-bigquery' && contains(matrix.profile, 'cloud-tests-2') && (env.CI_SKIP_SECRETS_PRESENCE_CHECKS != '' || env.BIGQUERY_CREDENTIALS_KEY != '') run: | - $MAVEN test ${MAVEN_TEST} -pl :trino-bigquery -Pcloud-tests-arrow-and-fte \ + $MAVEN test ${MAVEN_TEST} -pl :trino-bigquery -Pcloud-tests-2 \ -Dbigquery.credentials-key="${BIGQUERY_CREDENTIALS_KEY}" \ - -Dtesting.gcp-storage-bucket="${GCP_STORAGE_BUCKET}" + -Dtesting.gcp-storage-bucket="${GCP_STORAGE_BUCKET}" \ + -Dtesting.alternate-bq-project-id=bigquery-cicd-alternate - name: Cloud BigQuery Case Insensitive Mapping Tests env: BIGQUERY_CASE_INSENSITIVE_CREDENTIALS_KEY: ${{ secrets.BIGQUERY_CASE_INSENSITIVE_CREDENTIALS_KEY }} - if: matrix.modules == 'plugin/trino-bigquery' && !contains(matrix.profile, 'cloud-tests-arrow-and-fte') && (env.CI_SKIP_SECRETS_PRESENCE_CHECKS != '' || env.BIGQUERY_CASE_INSENSITIVE_CREDENTIALS_KEY != '') + if: matrix.modules == 'plugin/trino-bigquery' && !contains(matrix.profile, 'cloud-tests-2') && (env.CI_SKIP_SECRETS_PRESENCE_CHECKS != '' || env.BIGQUERY_CASE_INSENSITIVE_CREDENTIALS_KEY != '') run: | $MAVEN test ${MAVEN_TEST} -pl :trino-bigquery -Pcloud-tests-case-insensitive-mapping -Dbigquery.credentials-key="${BIGQUERY_CASE_INSENSITIVE_CREDENTIALS_KEY}" - name: Cloud Snowflake Tests @@ -971,7 +905,7 @@ jobs: - suite-5 - suite-6-non-generic - suite-7-non-generic - - suite-8-non-generic + - suite-hive-transactional - suite-azure - suite-delta-lake-databricks91 - suite-delta-lake-databricks104 diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 82cb8ccfa07b9..a137d9ee2ba21 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest if: github.repository == 'trinodb/trino' steps: - - uses: actions/stale@v8.0.0 + - uses: actions/stale@v9.0.0 with: stale-pr-message: 'This pull request has gone a while without any activity. Tagging the Trino developer relations team: @bitsondatadev @colebow @mosabua' days-before-pr-stale: 21 @@ -21,3 +21,7 @@ jobs: stale-pr-label: 'stale' start-date: '2023-01-01T00:00:00Z' exempt-draft-pr: true + operations-per-run: 200 + # Avoid processing issues completely, see https://github.com/actions/stale/issues/1112 + days-before-issue-stale: -1 + days-before-issue-close: -1 diff --git a/.mvn/jvm.config b/.mvn/jvm.config index 65c0bc4d12016..6a0272bcee2a2 100644 --- a/.mvn/jvm.config +++ b/.mvn/jvm.config @@ -9,3 +9,5 @@ --add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED --add-opens=jdk.compiler/com.sun.tools.javac.code=ALL-UNNAMED --add-opens=jdk.compiler/com.sun.tools.javac.comp=ALL-UNNAMED +-XX:+UnlockDiagnosticVMOptions +-XX:GCLockerRetryAllocationCount=100 diff --git a/README.md b/README.md index a95e04aac1fd0..e93d0c57c9e7a 100644 --- a/README.md +++ b/README.md @@ -34,8 +34,10 @@ information about reporting vulnerabilities. ## Build requirements * Mac OS X or Linux -* Java 17.0.4+, 64-bit +* Java 21.0.1+, 64-bit * Docker + * Turn SELinux or other systems disabling write access to the local checkout + off, to allow containers to mount parts of the Trino source tree ## Building Trino @@ -68,8 +70,8 @@ After opening the project in IntelliJ, double check that the Java SDK is properly configured for the project: * Open the File menu and select Project Structure -* In the SDKs section, ensure that JDK 17 is selected (create one if none exist) -* In the Project section, ensure the Project language level is set to 17 +* In the SDKs section, ensure that JDK 21 is selected (create one if none exist) +* In the Project section, ensure the Project language level is set to 21 ### Running a testing server diff --git a/client/trino-cli/pom.xml b/client/trino-cli/pom.xml index 7720045b66874..d64a685f4b199 100644 --- a/client/trino-cli/pom.xml +++ b/client/trino-cli/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -15,7 +15,7 @@ ${project.parent.basedir} 8 io.trino.cli.Trino - 3.24.1 + 3.25.0 @@ -147,12 +147,6 @@ junit-jupiter-engine test - - - org.testng - testng - test - diff --git a/client/trino-client/pom.xml b/client/trino-client/pom.xml index 27587ca34d0af..875d625530ff7 100644 --- a/client/trino-client/pom.xml +++ b/client/trino-client/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -122,11 +122,5 @@ junit-jupiter-api test - - - org.testng - testng - test - diff --git a/client/trino-client/src/main/java/io/trino/client/ClientTypeSignature.java b/client/trino-client/src/main/java/io/trino/client/ClientTypeSignature.java index b408006b9949a..3e964f279b47a 100644 --- a/client/trino-client/src/main/java/io/trino/client/ClientTypeSignature.java +++ b/client/trino-client/src/main/java/io/trino/client/ClientTypeSignature.java @@ -29,6 +29,8 @@ import static com.google.common.base.Preconditions.checkState; import static com.google.common.collect.ImmutableList.toImmutableList; import static io.trino.client.ClientStandardTypes.ROW; +import static io.trino.client.ClientStandardTypes.TIMESTAMP_WITH_TIME_ZONE; +import static io.trino.client.ClientStandardTypes.TIME_WITH_TIME_ZONE; import static io.trino.client.ClientStandardTypes.VARCHAR; import static java.util.Collections.unmodifiableList; import static java.util.Objects.requireNonNull; @@ -94,6 +96,15 @@ public String toString() if (arguments.isEmpty()) { return rawType; } + + if (rawType.equals(TIME_WITH_TIME_ZONE)) { + return "time(" + arguments.get(0) + ") with time zone"; + } + + if (rawType.equals(TIMESTAMP_WITH_TIME_ZONE)) { + return "timestamp(" + arguments.get(0) + ") with time zone"; + } + return rawType + arguments.stream() .map(ClientTypeSignatureParameter::toString) .collect(joining(",", "(", ")")); diff --git a/client/trino-jdbc/pom.xml b/client/trino-jdbc/pom.xml index 9312577b7979f..c5472b875de37 100644 --- a/client/trino-jdbc/pom.xml +++ b/client/trino-jdbc/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -193,7 +193,7 @@ io.trino - trino-hive-hadoop2 + trino-hive test @@ -207,6 +207,12 @@ io.trino trino-main test + + + io.trino + re2j + + @@ -237,6 +243,12 @@ io.trino trino-testing test + + + io.trino + re2j + + diff --git a/client/trino-jdbc/src/test/java/io/trino/jdbc/BaseTestJdbcResultSet.java b/client/trino-jdbc/src/test/java/io/trino/jdbc/BaseTestJdbcResultSet.java index 6bae338a8cbb8..705b50a8e15d9 100644 --- a/client/trino-jdbc/src/test/java/io/trino/jdbc/BaseTestJdbcResultSet.java +++ b/client/trino-jdbc/src/test/java/io/trino/jdbc/BaseTestJdbcResultSet.java @@ -58,8 +58,6 @@ public abstract class BaseTestJdbcResultSet protected abstract Connection createConnection() throws SQLException; - protected abstract int getTestedServerVersion(); - @Test public void testDuplicateColumnLabels() throws Exception @@ -441,10 +439,9 @@ public void testTimeWithTimeZone() .isInstanceOf(SQLException.class) .hasMessage("Expected value to be a date but is: 09:39:07+01:00"); assertThat(rs.getTime(column)).isEqualTo(Time.valueOf(LocalTime.of(1, 39, 7))); // TODO this should fail, or represent TIME '09:39:07' - // TODO (https://github.com/trinodb/trino/issues/5317) placement of precision parameter assertThatThrownBy(() -> rs.getTimestamp(column)) .isInstanceOf(IllegalArgumentException.class) // TODO (https://github.com/trinodb/trino/issues/5315) SQLException - .hasMessage("Expected column to be a timestamp type but is time with time zone(0)"); + .hasMessage("Expected column to be a timestamp type but is time(0) with time zone"); }); checkRepresentation(connectedStatement.getStatement(), "TIME '01:39:07 +01:00'", Types.TIME_WITH_TIMEZONE, (rs, column) -> { @@ -458,10 +455,9 @@ public void testTimeWithTimeZone() .isInstanceOf(SQLException.class) .hasMessage("Expected value to be a date but is: 01:39:07+01:00"); assertThat(rs.getTime(column)).isEqualTo(someBogusValue); // TODO this should fail, or represent TIME '01:39:07' - // TODO (https://github.com/trinodb/trino/issues/5317) placement of precision parameter assertThatThrownBy(() -> rs.getTimestamp(column)) .isInstanceOf(IllegalArgumentException.class) // TODO (https://github.com/trinodb/trino/issues/5315) SQLException - .hasMessage("Expected column to be a timestamp type but is time with time zone(0)"); + .hasMessage("Expected column to be a timestamp type but is time(0) with time zone"); }); checkRepresentation(connectedStatement.getStatement(), "TIME '00:39:07 +01:00'", Types.TIME_WITH_TIMEZONE, (rs, column) -> { @@ -475,10 +471,9 @@ public void testTimeWithTimeZone() .isInstanceOf(SQLException.class) .hasMessage("Expected value to be a date but is: 00:39:07+01:00"); assertThat(rs.getTime(column)).isEqualTo(someBogusValue); // TODO this should fail, as there no java.sql.Time representation for TIME '00:39:07' in America/Bahia_Banderas - // TODO (https://github.com/trinodb/trino/issues/5317) placement of precision parameter assertThatThrownBy(() -> rs.getTimestamp(column)) .isInstanceOf(IllegalArgumentException.class) // TODO (https://github.com/trinodb/trino/issues/5315) SQLException - .hasMessage("Expected column to be a timestamp type but is time with time zone(0)"); + .hasMessage("Expected column to be a timestamp type but is time(0) with time zone"); }); } } @@ -642,10 +637,9 @@ public void testTimestampWithTimeZone() assertThatThrownBy(() -> rs.getDate(column)) .isInstanceOf(SQLException.class) .hasMessage("Expected value to be a date but is: 1970-01-01 00:00:00.000 UTC"); - // TODO (https://github.com/trinodb/trino/issues/5317) placement of precision parameter assertThatThrownBy(() -> rs.getTime(column)) .isInstanceOf(IllegalArgumentException.class) // TODO (https://github.com/trinodb/trino/issues/5315) SQLException - .hasMessage("Expected column to be a time type but is timestamp with time zone(3)"); + .hasMessage("Expected column to be a time type but is timestamp(3) with time zone"); assertThat(rs.getTimestamp(column)).isEqualTo(timestampForPointInTime); }); @@ -657,10 +651,9 @@ public void testTimestampWithTimeZone() assertThatThrownBy(() -> rs.getDate(column)) .isInstanceOf(SQLException.class) .hasMessage("Expected value to be a date but is: 2018-02-13 13:14:15.227 Europe/Warsaw"); - // TODO (https://github.com/trinodb/trino/issues/5317) placement of precision parameter assertThatThrownBy(() -> rs.getTime(column)) .isInstanceOf(IllegalArgumentException.class) // TODO (https://github.com/trinodb/trino/issues/5315) SQLException - .hasMessage("Expected column to be a time type but is timestamp with time zone(3)"); + .hasMessage("Expected column to be a time type but is timestamp(3) with time zone"); assertThat(rs.getTimestamp(column)).isEqualTo(timestampForPointInTime); }); @@ -675,7 +668,7 @@ public void testTimestampWithTimeZone() .hasMessage("Expected value to be a date but is: 2019-12-31 23:59:59.999999999999 Europe/Warsaw"); assertThatThrownBy(() -> rs.getTime(column)) .isInstanceOf(IllegalArgumentException.class) // TODO (https://github.com/trinodb/trino/issues/5315) SQLException - .hasMessage("Expected column to be a time type but is timestamp with time zone(12)"); // TODO (https://github.com/trinodb/trino/issues/5317) placement of precision parameter + .hasMessage("Expected column to be a time type but is timestamp(12) with time zone"); assertThat(rs.getTimestamp(column)).isEqualTo(timestampForPointInTime); }); @@ -694,7 +687,7 @@ public void testTimestampWithTimeZone() .hasMessage("Expected value to be a date but is: 2019-12-31 23:59:59.999999999999 America/Bahia_Banderas"); assertThatThrownBy(() -> rs.getTime(column)) .isInstanceOf(IllegalArgumentException.class) // TODO (https://github.com/trinodb/trino/issues/5315) SQLException - .hasMessage("Expected column to be a time type but is timestamp with time zone(12)"); // TODO (https://github.com/trinodb/trino/issues/5317) placement of precision parameter + .hasMessage("Expected column to be a time type but is timestamp(12) with time zone"); assertThat(rs.getTimestamp(column)).isEqualTo(timestampForPointInTime); }); @@ -708,7 +701,7 @@ public void testTimestampWithTimeZone() .hasMessage("Expected value to be a date but is: 1957-12-31 23:59:59.999999999999 Europe/Warsaw"); assertThatThrownBy(() -> rs.getTime(column)) .isInstanceOf(IllegalArgumentException.class) // TODO (https://github.com/trinodb/trino/issues/5315) SQLException - .hasMessage("Expected column to be a time type but is timestamp with time zone(12)"); // TODO (https://github.com/trinodb/trino/issues/5317) placement of precision parameter + .hasMessage("Expected column to be a time type but is timestamp(12) with time zone"); assertThat(rs.getTimestamp(column)).isEqualTo(timestampForPointInTime); }); @@ -720,10 +713,9 @@ public void testTimestampWithTimeZone() assertThatThrownBy(() -> rs.getDate(column)) .isInstanceOf(SQLException.class) .hasMessage("Expected value to be a date but is: 1970-01-01 09:14:15.227 Europe/Warsaw"); - // TODO (https://github.com/trinodb/trino/issues/5317) placement of precision parameter assertThatThrownBy(() -> rs.getTime(column)) .isInstanceOf(IllegalArgumentException.class) // TODO (https://github.com/trinodb/trino/issues/5315) SQLException - .hasMessage("Expected column to be a time type but is timestamp with time zone(3)"); + .hasMessage("Expected column to be a time type but is timestamp(3) with time zone"); assertThat(rs.getTimestamp(column)).isEqualTo(timestampForPointInTime); }); @@ -735,10 +727,9 @@ public void testTimestampWithTimeZone() assertThatThrownBy(() -> rs.getDate(column)) .isInstanceOf(SQLException.class) .hasMessage("Expected value to be a date but is: 1970-01-01 00:14:15.227 Europe/Warsaw"); - // TODO (https://github.com/trinodb/trino/issues/5317) placement of precision parameter assertThatThrownBy(() -> rs.getTime(column)) .isInstanceOf(IllegalArgumentException.class) // TODO (https://github.com/trinodb/trino/issues/5315) SQLException - .hasMessage("Expected column to be a time type but is timestamp with time zone(3)"); + .hasMessage("Expected column to be a time type but is timestamp(3) with time zone"); assertThat(rs.getTimestamp(column)).isEqualTo(timestampForPointInTime); }); @@ -757,7 +748,7 @@ public void testTimestampWithTimeZone() .hasMessage("Expected value to be a date but is: +12345-01-23 01:23:45.123456789 Europe/Warsaw"); assertThatThrownBy(() -> rs.getTime(column)) .isInstanceOf(IllegalArgumentException.class) // TODO (https://github.com/trinodb/trino/issues/5315) SQLException - .hasMessage("Expected column to be a time type but is timestamp with time zone(9)"); // TODO (https://github.com/trinodb/trino/issues/5317) placement of precision parameter + .hasMessage("Expected column to be a time type but is timestamp(9) with time zone"); assertThat(rs.getTimestamp(column)).isEqualTo(timestampForPointInTime); }); } @@ -810,8 +801,8 @@ public void testArray() checkArrayRepresentation(connectedStatement.getStatement(), "TIMESTAMP '2017-01-02 09:00:00.123'", Types.TIMESTAMP, "timestamp(3)"); checkArrayRepresentation(connectedStatement.getStatement(), "TIMESTAMP '2017-01-02 09:00:00.123456789'", Types.TIMESTAMP, "timestamp(9)"); - checkArrayRepresentation(connectedStatement.getStatement(), "TIMESTAMP '2017-01-02 09:00:00.123 Europe/Warsaw'", Types.TIMESTAMP_WITH_TIMEZONE, "timestamp with time zone(3)"); - checkArrayRepresentation(connectedStatement.getStatement(), "TIMESTAMP '2017-01-02 09:00:00.123456789 Europe/Warsaw'", Types.TIMESTAMP_WITH_TIMEZONE, "timestamp with time zone(9)"); + checkArrayRepresentation(connectedStatement.getStatement(), "TIMESTAMP '2017-01-02 09:00:00.123 Europe/Warsaw'", Types.TIMESTAMP_WITH_TIMEZONE, "timestamp(3) with time zone"); + checkArrayRepresentation(connectedStatement.getStatement(), "TIMESTAMP '2017-01-02 09:00:00.123456789 Europe/Warsaw'", Types.TIMESTAMP_WITH_TIMEZONE, "timestamp(9) with time zone"); // array or array checkRepresentation(connectedStatement.getStatement(), "ARRAY[NULL, ARRAY[NULL, BIGINT '1', 2]]", Types.ARRAY, (rs, column) -> { diff --git a/client/trino-jdbc/src/test/java/io/trino/jdbc/TestJdbcResultSet.java b/client/trino-jdbc/src/test/java/io/trino/jdbc/TestJdbcResultSet.java index 95c3378ed18cb..1a159b45ed55a 100644 --- a/client/trino-jdbc/src/test/java/io/trino/jdbc/TestJdbcResultSet.java +++ b/client/trino-jdbc/src/test/java/io/trino/jdbc/TestJdbcResultSet.java @@ -59,11 +59,4 @@ protected Connection createConnection() String url = format("jdbc:trino://%s", server.getAddress()); return DriverManager.getConnection(url, "test", null); } - - @Override - protected int getTestedServerVersion() - { - // Latest version - return Integer.MAX_VALUE; - } } diff --git a/core/trino-grammar/pom.xml b/core/trino-grammar/pom.xml index 75cf06246cc92..a77a6d86389c8 100644 --- a/core/trino-grammar/pom.xml +++ b/core/trino-grammar/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/core/trino-grammar/src/main/antlr4/io/trino/grammar/sql/SqlBase.g4 b/core/trino-grammar/src/main/antlr4/io/trino/grammar/sql/SqlBase.g4 index c13530d62a80f..cea6360647f34 100644 --- a/core/trino-grammar/src/main/antlr4/io/trino/grammar/sql/SqlBase.g4 +++ b/core/trino-grammar/src/main/antlr4/io/trino/grammar/sql/SqlBase.g4 @@ -160,7 +160,7 @@ statement (LIKE pattern=string (ESCAPE escape=string)?)? #showSchemas | SHOW CATALOGS (LIKE pattern=string (ESCAPE escape=string)?)? #showCatalogs - | SHOW COLUMNS (FROM | IN) qualifiedName? + | SHOW COLUMNS (FROM | IN) qualifiedName (LIKE pattern=string (ESCAPE escape=string)?)? #showColumns | SHOW STATS FOR qualifiedName #showStats | SHOW STATS FOR '(' rootQuery ')' #showStatsForQuery diff --git a/core/trino-main/pom.xml b/core/trino-main/pom.xml index 1363a9fa9a7c6..2722379dfe03f 100644 --- a/core/trino-main/pom.xml +++ b/core/trino-main/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -13,15 +13,6 @@ ${project.parent.basedir} - - - instances @@ -247,6 +238,11 @@ opentelemetry-context + + io.opentelemetry + opentelemetry-sdk-trace + + io.trino re2j @@ -339,8 +335,8 @@ org.apache.lucene - lucene-analyzers-common - 8.11.2 + lucene-analysis-common + 9.7.0 @@ -390,12 +386,6 @@ provided - - org.testng - testng - provided - - com.squareup.okhttp3 okhttp @@ -533,25 +523,35 @@ - - - - org.apache.maven.plugins - maven-surefire-plugin - - - - org.apache.maven.surefire - surefire-junit-platform - ${dep.plugin.surefire.version} - - - org.apache.maven.surefire - surefire-testng - ${dep.plugin.surefire.version} - - - - - + + + benchmarks + + + + org.codehaus.mojo + exec-maven-plugin + + ${java.home}/bin/java + + -DoutputDirectory=benchmark_outputs + -classpath + + io.trino.benchmark.BenchmarkSuite + + test + + + + benchmarks + + exec + + + + + + + + diff --git a/core/trino-main/src/main/java/io/trino/SystemSessionProperties.java b/core/trino-main/src/main/java/io/trino/SystemSessionProperties.java index 2f56a09680cc6..44d2805976aa8 100644 --- a/core/trino-main/src/main/java/io/trino/SystemSessionProperties.java +++ b/core/trino-main/src/main/java/io/trino/SystemSessionProperties.java @@ -53,6 +53,7 @@ import static io.trino.spi.type.TimeZoneKey.getTimeZoneKey; import static java.lang.Math.min; import static java.lang.String.format; +import static java.util.concurrent.TimeUnit.SECONDS; public final class SystemSessionProperties implements SystemSessionPropertiesProvider @@ -207,6 +208,8 @@ public final class SystemSessionProperties public static final String USE_COST_BASED_PARTITIONING = "use_cost_based_partitioning"; public static final String FORCE_SPILLING_JOIN = "force_spilling_join"; public static final String PAGE_PARTITIONING_BUFFER_POOL_SIZE = "page_partitioning_buffer_pool_size"; + public static final String IDLE_WRITER_MIN_DATA_SIZE_THRESHOLD = "idle_writer_min_data_size_threshold"; + public static final String CLOSE_IDLE_WRITERS_TRIGGER_DURATION = "close_idle_writers_trigger_duration"; private final List> sessionProperties; @@ -712,6 +715,7 @@ public SystemSessionProperties( COST_ESTIMATION_WORKER_COUNT, "Set the estimate count of workers while planning", null, + value -> validateIntegerValue(value, COST_ESTIMATION_WORKER_COUNT, 1, true), true), booleanProperty( OMIT_DATETIME_TYPE_PRECISION, @@ -1058,6 +1062,14 @@ public SystemSessionProperties( integerProperty(PAGE_PARTITIONING_BUFFER_POOL_SIZE, "Maximum number of free buffers in the per task partitioned page buffer pool. Setting this to zero effectively disables the pool", taskManagerConfig.getPagePartitioningBufferPoolSize(), + true), + dataSizeProperty(IDLE_WRITER_MIN_DATA_SIZE_THRESHOLD, + "Minimum amount of data written by a writer operator on average before it tries to close the idle writers", + DataSize.of(256, MEGABYTE), + true), + durationProperty(CLOSE_IDLE_WRITERS_TRIGGER_DURATION, + "The duration after which the writer operator tries to close the idle writers", + new Duration(5, SECONDS), true)); } @@ -1896,4 +1908,14 @@ public static int getPagePartitioningBufferPoolSize(Session session) { return session.getSystemProperty(PAGE_PARTITIONING_BUFFER_POOL_SIZE, Integer.class); } + + public static DataSize getIdleWriterMinDataSizeThreshold(Session session) + { + return session.getSystemProperty(IDLE_WRITER_MIN_DATA_SIZE_THRESHOLD, DataSize.class); + } + + public static Duration getCloseIdleWritersTriggerDuration(Session session) + { + return session.getSystemProperty(CLOSE_IDLE_WRITERS_TRIGGER_DURATION, Duration.class); + } } diff --git a/core/trino-main/src/main/java/io/trino/connector/informationschema/InformationSchemaPageSource.java b/core/trino-main/src/main/java/io/trino/connector/informationschema/InformationSchemaPageSource.java index b7a7f6ee41862..143cc6493c63d 100644 --- a/core/trino-main/src/main/java/io/trino/connector/informationschema/InformationSchemaPageSource.java +++ b/core/trino-main/src/main/java/io/trino/connector/informationschema/InformationSchemaPageSource.java @@ -277,43 +277,34 @@ private void addColumnsRecords(QualifiedTablePrefix prefix) private void addTablesRecords(QualifiedTablePrefix prefix) { - boolean needsTableType = requiredColumns.contains("table_type") || requiredColumns.contains("trino_relation_type"); + boolean needsTableType = requiredColumns.contains("table_type"); Set relations; - Map relationTypes; + Set views; if (needsTableType) { - relationTypes = getRelationTypes(session, metadata, accessControl, prefix); + Map relationTypes = getRelationTypes(session, metadata, accessControl, prefix); relations = relationTypes.keySet(); + views = relationTypes.entrySet().stream() + .filter(entry -> entry.getValue() == RelationType.VIEW) + .map(Entry::getKey) + .collect(toImmutableSet()); } else { relations = listTables(session, metadata, accessControl, prefix); - relationTypes = null; + views = Set.of(); } + // TODO (https://github.com/trinodb/trino/issues/8207) define a type for materialized views for (SchemaTableName name : relations) { String type = null; - String trinoRelationType = null; if (needsTableType) { - switch (relationTypes.get(name)) { - case TABLE -> { - type = "BASE TABLE"; - trinoRelationType = type; - } - case VIEW -> { - type = "VIEW"; - trinoRelationType = type; - } - case MATERIALIZED_VIEW -> { - type = "BASE TABLE"; - trinoRelationType = "MATERIALIZED VIEW"; - } - } + // if table and view names overlap, the view wins + type = views.contains(name) ? "VIEW" : "BASE TABLE"; } addRecord( prefix.getCatalogName(), name.getSchemaName(), name.getTableName(), type, - trinoRelationType, null); if (isLimitExhausted()) { return; diff --git a/core/trino-main/src/main/java/io/trino/connector/informationschema/InformationSchemaTable.java b/core/trino-main/src/main/java/io/trino/connector/informationschema/InformationSchemaTable.java index e45bb8594d742..605076aa38419 100644 --- a/core/trino-main/src/main/java/io/trino/connector/informationschema/InformationSchemaTable.java +++ b/core/trino-main/src/main/java/io/trino/connector/informationschema/InformationSchemaTable.java @@ -47,7 +47,6 @@ public enum InformationSchemaTable .column("table_schema", createUnboundedVarcharType()) .column("table_name", createUnboundedVarcharType()) .column("table_type", createUnboundedVarcharType()) - .hiddenColumn("trino_relation_type", createUnboundedVarcharType()) .hiddenColumn("table_comment", createUnboundedVarcharType()) // MySQL compatible .build()), VIEWS(table("views") diff --git a/core/trino-main/src/main/java/io/trino/connector/system/GlobalSystemConnector.java b/core/trino-main/src/main/java/io/trino/connector/system/GlobalSystemConnector.java index 3912bbe262c98..6fb93e229023e 100644 --- a/core/trino-main/src/main/java/io/trino/connector/system/GlobalSystemConnector.java +++ b/core/trino-main/src/main/java/io/trino/connector/system/GlobalSystemConnector.java @@ -15,7 +15,7 @@ import com.google.common.collect.ImmutableSet; import com.google.inject.Inject; -import io.trino.operator.table.Sequence.SequenceFunctionHandle; +import io.trino.operator.table.SequenceFunction.SequenceFunctionHandle; import io.trino.spi.connector.CatalogHandle; import io.trino.spi.connector.CatalogHandle.CatalogVersion; import io.trino.spi.connector.ConnectorMetadata; @@ -33,7 +33,7 @@ import java.util.Set; -import static io.trino.operator.table.Sequence.getSequenceFunctionSplitSource; +import static io.trino.operator.table.SequenceFunction.getSequenceFunctionSplitSource; import static io.trino.spi.connector.CatalogHandle.createRootCatalogHandle; import static java.util.Objects.requireNonNull; diff --git a/core/trino-main/src/main/java/io/trino/connector/system/SystemConnectorModule.java b/core/trino-main/src/main/java/io/trino/connector/system/SystemConnectorModule.java index 306fb4347c39d..7d6bb634309bc 100644 --- a/core/trino-main/src/main/java/io/trino/connector/system/SystemConnectorModule.java +++ b/core/trino-main/src/main/java/io/trino/connector/system/SystemConnectorModule.java @@ -30,8 +30,8 @@ import io.trino.connector.system.jdbc.TableTypeJdbcTable; import io.trino.connector.system.jdbc.TypesJdbcTable; import io.trino.connector.system.jdbc.UdtJdbcTable; -import io.trino.operator.table.ExcludeColumns; -import io.trino.operator.table.Sequence; +import io.trino.operator.table.ExcludeColumnsFunction; +import io.trino.operator.table.SequenceFunction; import io.trino.spi.connector.SystemTable; import io.trino.spi.function.table.ConnectorTableFunction; import io.trino.spi.procedure.Procedure; @@ -79,7 +79,7 @@ public void configure(Binder binder) binder.bind(GlobalSystemConnector.class).in(Scopes.SINGLETON); Multibinder tableFunctions = Multibinder.newSetBinder(binder, ConnectorTableFunction.class); - tableFunctions.addBinding().toProvider(ExcludeColumns.class).in(Scopes.SINGLETON); - tableFunctions.addBinding().toProvider(Sequence.class).in(Scopes.SINGLETON); + tableFunctions.addBinding().to(ExcludeColumnsFunction.class).in(Scopes.SINGLETON); + tableFunctions.addBinding().to(SequenceFunction.class).in(Scopes.SINGLETON); } } diff --git a/core/trino-main/src/main/java/io/trino/cost/CostCalculatorWithEstimatedExchanges.java b/core/trino-main/src/main/java/io/trino/cost/CostCalculatorWithEstimatedExchanges.java index d536ca27c102c..cfc5ee9fb1ce7 100644 --- a/core/trino-main/src/main/java/io/trino/cost/CostCalculatorWithEstimatedExchanges.java +++ b/core/trino-main/src/main/java/io/trino/cost/CostCalculatorWithEstimatedExchanges.java @@ -32,6 +32,7 @@ import java.util.Objects; import java.util.Optional; +import static com.google.common.base.Preconditions.checkArgument; import static io.trino.cost.LocalCostEstimate.addPartialComponents; import static java.util.Objects.requireNonNull; @@ -206,6 +207,7 @@ public static LocalCostEstimate calculateJoinCostWithoutOutput( boolean replicated, int estimatedSourceDistributedTaskCount) { + checkArgument(estimatedSourceDistributedTaskCount > 0, "estimatedSourceDistributedTaskCount must be positive: %s", estimatedSourceDistributedTaskCount); LocalCostEstimate exchangesCost = calculateJoinExchangeCost( probe, build, diff --git a/core/trino-main/src/main/java/io/trino/cost/LocalCostEstimate.java b/core/trino-main/src/main/java/io/trino/cost/LocalCostEstimate.java index 62a575ed3057d..cbeaad3913540 100644 --- a/core/trino-main/src/main/java/io/trino/cost/LocalCostEstimate.java +++ b/core/trino-main/src/main/java/io/trino/cost/LocalCostEstimate.java @@ -21,7 +21,9 @@ import java.util.stream.Stream; import static com.google.common.base.MoreObjects.toStringHelper; +import static com.google.common.base.Preconditions.checkArgument; import static java.lang.Double.NaN; +import static java.lang.Double.isNaN; /** * Represents inherent cost of some plan node, not including cost of its sources. @@ -63,6 +65,9 @@ public LocalCostEstimate( @JsonProperty("maxMemory") double maxMemory, @JsonProperty("networkCost") double networkCost) { + checkArgument(isNaN(cpuCost) || cpuCost >= 0, "cpuCost cannot be negative: %s", cpuCost); + checkArgument(isNaN(maxMemory) || maxMemory >= 0, "maxMemory cannot be negative: %s", maxMemory); + checkArgument(isNaN(networkCost) || networkCost >= 0, "networkCost cannot be negative: %s", networkCost); this.cpuCost = cpuCost; this.maxMemory = maxMemory; this.networkCost = networkCost; diff --git a/core/trino-main/src/main/java/io/trino/cost/PlanNodeStatsAndCostSummary.java b/core/trino-main/src/main/java/io/trino/cost/PlanNodeStatsAndCostSummary.java index 9d151c0162e41..97b1f93855018 100644 --- a/core/trino-main/src/main/java/io/trino/cost/PlanNodeStatsAndCostSummary.java +++ b/core/trino-main/src/main/java/io/trino/cost/PlanNodeStatsAndCostSummary.java @@ -16,6 +16,9 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import static com.google.common.base.Preconditions.checkArgument; +import static java.lang.Double.isNaN; + public class PlanNodeStatsAndCostSummary { private final double outputRowCount; @@ -32,6 +35,11 @@ public PlanNodeStatsAndCostSummary( @JsonProperty("memoryCost") double memoryCost, @JsonProperty("networkCost") double networkCost) { + checkArgument(isNaN(outputRowCount) || outputRowCount >= 0, "outputRowCount cannot be negative: %s", outputRowCount); + checkArgument(isNaN(outputSizeInBytes) || outputSizeInBytes >= 0, "outputSizeInBytes cannot be negative: %s", outputSizeInBytes); + checkArgument(isNaN(cpuCost) || cpuCost >= 0, "cpuCost cannot be negative: %s", cpuCost); + checkArgument(isNaN(memoryCost) || memoryCost >= 0, "memoryCost cannot be negative: %s", memoryCost); + checkArgument(isNaN(networkCost) || networkCost >= 0, "networkCost cannot be negative: %s", networkCost); this.outputRowCount = outputRowCount; this.outputSizeInBytes = outputSizeInBytes; this.cpuCost = cpuCost; diff --git a/core/trino-main/src/main/java/io/trino/cost/TaskCountEstimator.java b/core/trino-main/src/main/java/io/trino/cost/TaskCountEstimator.java index 21a460d5a8b3b..c78a11de5bb3a 100644 --- a/core/trino-main/src/main/java/io/trino/cost/TaskCountEstimator.java +++ b/core/trino-main/src/main/java/io/trino/cost/TaskCountEstimator.java @@ -23,6 +23,7 @@ import java.util.Set; import java.util.function.IntSupplier; +import static com.google.common.base.Preconditions.checkState; import static io.trino.SystemSessionProperties.getCostEstimationWorkerCount; import static io.trino.SystemSessionProperties.getFaultTolerantExecutionMaxPartitionCount; import static io.trino.SystemSessionProperties.getMaxHashPartitionCount; @@ -42,12 +43,17 @@ public TaskCountEstimator(NodeSchedulerConfig nodeSchedulerConfig, InternalNodeM requireNonNull(nodeManager, "nodeManager is null"); this.numberOfNodes = () -> { Set activeNodes = nodeManager.getAllNodes().getActiveNodes(); + int count; if (schedulerIncludeCoordinator) { - return activeNodes.size(); + count = activeNodes.size(); } - return toIntExact(activeNodes.stream() - .filter(node -> !node.isCoordinator()) - .count()); + else { + count = toIntExact(activeNodes.stream() + .filter(node -> !node.isCoordinator()) + .count()); + } + // At least 1 even if no worker nodes currently registered. This is to prevent underflow or other mis-estimations. + return Math.max(count, 1); }; } @@ -60,9 +66,12 @@ public int estimateSourceDistributedTaskCount(Session session) { Integer costEstimationWorkerCount = getCostEstimationWorkerCount(session); if (costEstimationWorkerCount != null) { + // validated to be at least 1 return costEstimationWorkerCount; } - return numberOfNodes.getAsInt(); + int count = numberOfNodes.getAsInt(); + checkState(count > 0, "%s should return positive number of nodes: %s", numberOfNodes, count); + return count; } public int estimateHashedTaskCount(Session session) diff --git a/core/trino-main/src/main/java/io/trino/dispatcher/DispatchExecutor.java b/core/trino-main/src/main/java/io/trino/dispatcher/DispatchExecutor.java index 7acd03cc6812b..54b979e503fc0 100644 --- a/core/trino-main/src/main/java/io/trino/dispatcher/DispatchExecutor.java +++ b/core/trino-main/src/main/java/io/trino/dispatcher/DispatchExecutor.java @@ -25,13 +25,13 @@ import org.weakref.jmx.Managed; import org.weakref.jmx.Nested; -import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ThreadPoolExecutor; import static com.google.common.util.concurrent.MoreExecutors.listeningDecorator; import static io.airlift.concurrent.Threads.daemonThreadsNamed; +import static io.trino.util.Executors.decorateWithVersion; import static java.util.Objects.requireNonNull; import static java.util.concurrent.Executors.newCachedThreadPool; import static java.util.concurrent.Executors.newScheduledThreadPool; @@ -51,22 +51,7 @@ public DispatchExecutor(QueryManagerConfig config, VersionEmbedder versionEmbedd { ExecutorService coreExecutor = newCachedThreadPool(daemonThreadsNamed("dispatcher-query-%s")); closer.register(coreExecutor::shutdownNow); - executor = new DecoratingListeningExecutorService( - listeningDecorator(coreExecutor), - new DecoratingListeningExecutorService.TaskDecorator() - { - @Override - public Runnable decorate(Runnable command) - { - return versionEmbedder.embedVersion(command); - } - - @Override - public Callable decorate(Callable task) - { - return versionEmbedder.embedVersion(task); - } - }); + executor = decorateWithVersion(coreExecutor, versionEmbedder); ScheduledExecutorService coreScheduledExecutor = newScheduledThreadPool(config.getQueryManagerExecutorPoolSize(), daemonThreadsNamed("dispatch-executor-%s")); closer.register(coreScheduledExecutor::shutdownNow); diff --git a/core/trino-main/src/main/java/io/trino/dispatcher/DispatchManager.java b/core/trino-main/src/main/java/io/trino/dispatcher/DispatchManager.java index fd3e9b3563c65..39b7d1795a0d3 100644 --- a/core/trino-main/src/main/java/io/trino/dispatcher/DispatchManager.java +++ b/core/trino-main/src/main/java/io/trino/dispatcher/DispatchManager.java @@ -22,6 +22,7 @@ import io.opentelemetry.api.trace.Tracer; import io.opentelemetry.context.Context; import io.trino.Session; +import io.trino.event.QueryMonitor; import io.trino.execution.QueryIdGenerator; import io.trino.execution.QueryInfo; import io.trino.execution.QueryManagerConfig; @@ -56,6 +57,7 @@ import static io.trino.execution.QueryState.RUNNING; import static io.trino.spi.StandardErrorCode.QUERY_TEXT_TOO_LARGE; import static io.trino.tracing.ScopedSpan.scopedSpan; +import static io.trino.util.Failures.toFailure; import static io.trino.util.StatementUtils.getQueryType; import static java.lang.String.format; import static java.util.Objects.requireNonNull; @@ -80,6 +82,7 @@ public class DispatchManager private final QueryTracker queryTracker; private final QueryManagerStats stats = new QueryManagerStats(); + private final QueryMonitor queryMonitor; @Inject public DispatchManager( @@ -94,7 +97,8 @@ public DispatchManager( SessionPropertyManager sessionPropertyManager, Tracer tracer, QueryManagerConfig queryManagerConfig, - DispatchExecutor dispatchExecutor) + DispatchExecutor dispatchExecutor, + QueryMonitor queryMonitor) { this.queryIdGenerator = requireNonNull(queryIdGenerator, "queryIdGenerator is null"); this.queryPreparer = requireNonNull(queryPreparer, "queryPreparer is null"); @@ -112,6 +116,7 @@ public DispatchManager( this.dispatchExecutor = dispatchExecutor.getExecutor(); this.queryTracker = new QueryTracker<>(queryManagerConfig, dispatchExecutor.getScheduledExecutor()); + this.queryMonitor = requireNonNull(queryMonitor, "queryMonitor is null"); } @PostConstruct @@ -236,6 +241,11 @@ private void createQueryInternal(QueryId queryId, Span querySpan, Slug slug, Optional preparedSql = Optional.ofNullable(preparedQuery).flatMap(PreparedQuery::getPrepareSql); DispatchQuery failedDispatchQuery = failedDispatchQueryFactory.createFailedDispatchQuery(session, query, preparedSql, Optional.empty(), throwable); queryCreated(failedDispatchQuery); + // maintain proper order of calls such that EventListener has access to QueryInfo + // - add query to tracker + // - fire query created event + // - fire query completed event + queryMonitor.queryImmediateFailureEvent(failedDispatchQuery.getBasicQueryInfo(), toFailure(throwable)); querySpan.setStatus(StatusCode.ERROR, throwable.getMessage()) .recordException(throwable) .end(); diff --git a/core/trino-main/src/main/java/io/trino/dispatcher/FailedDispatchQueryFactory.java b/core/trino-main/src/main/java/io/trino/dispatcher/FailedDispatchQueryFactory.java index 3eeab6198ef39..65c75d35780e3 100644 --- a/core/trino-main/src/main/java/io/trino/dispatcher/FailedDispatchQueryFactory.java +++ b/core/trino-main/src/main/java/io/trino/dispatcher/FailedDispatchQueryFactory.java @@ -24,7 +24,6 @@ import java.util.Optional; import java.util.concurrent.ExecutorService; -import static io.trino.util.Failures.toFailure; import static java.util.Objects.requireNonNull; public class FailedDispatchQueryFactory @@ -58,7 +57,6 @@ public FailedDispatchQuery createFailedDispatchQuery(Session session, String que BasicQueryInfo queryInfo = failedDispatchQuery.getBasicQueryInfo(); queryMonitor.queryCreatedEvent(queryInfo); - queryMonitor.queryImmediateFailureEvent(queryInfo, toFailure(throwable)); return failedDispatchQuery; } diff --git a/core/trino-main/src/main/java/io/trino/dispatcher/QueuedStatementResource.java b/core/trino-main/src/main/java/io/trino/dispatcher/QueuedStatementResource.java index b4cd5713344cd..f2feb98f73574 100644 --- a/core/trino-main/src/main/java/io/trino/dispatcher/QueuedStatementResource.java +++ b/core/trino-main/src/main/java/io/trino/dispatcher/QueuedStatementResource.java @@ -32,7 +32,6 @@ import io.trino.execution.QueryManagerConfig; import io.trino.execution.QueryState; import io.trino.server.HttpRequestSessionContextFactory; -import io.trino.server.ProtocolConfig; import io.trino.server.ServerConfig; import io.trino.server.SessionContext; import io.trino.server.protocol.QueryInfoUrlFactory; @@ -85,7 +84,8 @@ import static io.airlift.jaxrs.AsyncResponseHandler.bindAsyncResponse; import static io.trino.execution.QueryState.FAILED; import static io.trino.execution.QueryState.QUEUED; -import static io.trino.server.HttpRequestSessionContextFactory.AUTHENTICATED_IDENTITY; +import static io.trino.server.ServletSecurityUtils.authenticatedIdentity; +import static io.trino.server.ServletSecurityUtils.clearAuthenticatedIdentity; import static io.trino.server.protocol.QueryInfoUrlFactory.getQueryInfoUri; import static io.trino.server.protocol.Slug.Context.EXECUTING_QUERY; import static io.trino.server.protocol.Slug.Context.QUEUED_QUERY; @@ -120,7 +120,6 @@ public class QueuedStatementResource private final ScheduledExecutorService timeoutExecutor; private final boolean compressionEnabled; - private final Optional alternateHeaderName; private final QueryManager queryManager; @Inject @@ -131,7 +130,6 @@ public QueuedStatementResource( DispatchExecutor executor, QueryInfoUrlFactory queryInfoUrlTemplate, ServerConfig serverConfig, - ProtocolConfig protocolConfig, QueryManagerConfig queryManagerConfig) { this.sessionContextFactory = requireNonNull(sessionContextFactory, "sessionContextFactory is null"); @@ -141,7 +139,6 @@ public QueuedStatementResource( this.timeoutExecutor = executor.getScheduledExecutor(); this.queryInfoUrlFactory = requireNonNull(queryInfoUrlTemplate, "queryInfoUrlTemplate is null"); this.compressionEnabled = serverConfig.isQueryResultsCompressionEnabled(); - this.alternateHeaderName = protocolConfig.getAlternateHeaderName(); queryManager = new QueryManager(queryManagerConfig.getClientTimeout()); } @@ -178,19 +175,19 @@ public Response postStatement( private Query registerQuery(String statement, HttpServletRequest servletRequest, HttpHeaders httpHeaders) { Optional remoteAddress = Optional.ofNullable(servletRequest.getRemoteAddr()); - Optional identity = Optional.ofNullable((Identity) servletRequest.getAttribute(AUTHENTICATED_IDENTITY)); + Optional identity = authenticatedIdentity(servletRequest); if (identity.flatMap(Identity::getPrincipal).map(InternalPrincipal.class::isInstance).orElse(false)) { throw badRequest(FORBIDDEN, "Internal communication can not be used to start a query"); } MultivaluedMap headers = httpHeaders.getRequestHeaders(); - SessionContext sessionContext = sessionContextFactory.createSessionContext(headers, alternateHeaderName, remoteAddress, identity); + SessionContext sessionContext = sessionContextFactory.createSessionContext(headers, remoteAddress, identity); Query query = new Query(statement, sessionContext, dispatchManager, queryInfoUrlFactory, tracer); queryManager.registerQuery(query); // let authentication filter know that identity lifecycle has been handed off - servletRequest.setAttribute(AUTHENTICATED_IDENTITY, null); + clearAuthenticatedIdentity(servletRequest); return query; } diff --git a/core/trino-main/src/main/java/io/trino/execution/CreateMaterializedViewTask.java b/core/trino-main/src/main/java/io/trino/execution/CreateMaterializedViewTask.java index 021ed3a89760c..ff4b109d209b0 100644 --- a/core/trino-main/src/main/java/io/trino/execution/CreateMaterializedViewTask.java +++ b/core/trino-main/src/main/java/io/trino/execution/CreateMaterializedViewTask.java @@ -160,8 +160,7 @@ public ListenableFuture execute( // system path elements are not stored .filter(element -> !element.getCatalogName().equals(GlobalSystemConnector.NAME)) .collect(toImmutableList()), - Optional.empty(), - properties); + Optional.empty()); Set specifiedPropertyKeys = statement.getProperties().stream() // property names are case-insensitive and normalized to lower case @@ -172,7 +171,7 @@ public ListenableFuture execute( .filter(specifiedPropertyKeys::contains) .collect(toImmutableMap(Function.identity(), properties::get)); accessControl.checkCanCreateMaterializedView(session.toSecurityContext(), name, explicitlySetProperties); - plannerContext.getMetadata().createMaterializedView(session, name, definition, statement.isReplace(), statement.isNotExists()); + plannerContext.getMetadata().createMaterializedView(session, name, definition, properties, statement.isReplace(), statement.isNotExists()); stateMachine.setOutput(analysis.getTarget()); stateMachine.setReferencedTables(analysis.getReferencedTables()); diff --git a/core/trino-main/src/main/java/io/trino/execution/FailureInjectionConfig.java b/core/trino-main/src/main/java/io/trino/execution/FailureInjectionConfig.java index 2ee459b524842..b8d0a5b36aab7 100644 --- a/core/trino-main/src/main/java/io/trino/execution/FailureInjectionConfig.java +++ b/core/trino-main/src/main/java/io/trino/execution/FailureInjectionConfig.java @@ -15,6 +15,7 @@ import io.airlift.configuration.Config; import io.airlift.configuration.ConfigDescription; +import io.airlift.configuration.ConfigHidden; import io.airlift.units.Duration; import jakarta.validation.constraints.NotNull; @@ -31,6 +32,7 @@ public Duration getExpirationPeriod() return expirationPeriod; } + @ConfigHidden // not supposed to be used outside of tests @Config("failure-injection.expiration-period") @ConfigDescription("Period after which an injected failure is considered expired and will no longer be triggering a failure") public FailureInjectionConfig setExpirationPeriod(Duration expirationPeriod) @@ -45,6 +47,7 @@ public Duration getRequestTimeout() return requestTimeout; } + @ConfigHidden // not supposed to be used outside of tests @Config("failure-injection.request-timeout") @ConfigDescription("Period after which requests blocked to emulate a timeout are released") public FailureInjectionConfig setRequestTimeout(Duration requestTimeout) diff --git a/core/trino-main/src/main/java/io/trino/execution/QueryExecutionMBean.java b/core/trino-main/src/main/java/io/trino/execution/QueryExecutionMBean.java index cebb0948a5032..ccb8e18e4854d 100644 --- a/core/trino-main/src/main/java/io/trino/execution/QueryExecutionMBean.java +++ b/core/trino-main/src/main/java/io/trino/execution/QueryExecutionMBean.java @@ -26,7 +26,7 @@ public class QueryExecutionMBean private final ThreadPoolExecutorMBean executorMBean; @Inject - public QueryExecutionMBean(@ForQueryExecution ExecutorService executor) + public QueryExecutionMBean(@QueryExecutorInternal ExecutorService executor) { this.executorMBean = new ThreadPoolExecutorMBean((ThreadPoolExecutor) executor); } diff --git a/core/trino-main/src/main/java/io/trino/execution/QueryExecutorInternal.java b/core/trino-main/src/main/java/io/trino/execution/QueryExecutorInternal.java new file mode 100644 index 0000000000000..39d798b567bac --- /dev/null +++ b/core/trino-main/src/main/java/io/trino/execution/QueryExecutorInternal.java @@ -0,0 +1,29 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.execution; + +import com.google.inject.BindingAnnotation; + +import java.lang.annotation.Retention; +import java.lang.annotation.Target; + +import static java.lang.annotation.ElementType.FIELD; +import static java.lang.annotation.ElementType.METHOD; +import static java.lang.annotation.ElementType.PARAMETER; +import static java.lang.annotation.RetentionPolicy.RUNTIME; + +@Retention(RUNTIME) +@Target({FIELD, PARAMETER, METHOD}) +@BindingAnnotation +public @interface QueryExecutorInternal {} diff --git a/core/trino-main/src/main/java/io/trino/execution/QueryStateMachine.java b/core/trino-main/src/main/java/io/trino/execution/QueryStateMachine.java index d83d74f074dc0..e5b6052855a72 100644 --- a/core/trino-main/src/main/java/io/trino/execution/QueryStateMachine.java +++ b/core/trino-main/src/main/java/io/trino/execution/QueryStateMachine.java @@ -54,7 +54,6 @@ import io.trino.spi.type.Type; import io.trino.sql.analyzer.Output; import io.trino.sql.planner.PlanFragment; -import io.trino.sql.planner.plan.TableScanNode; import io.trino.tracing.TrinoAttributes; import io.trino.transaction.TransactionId; import io.trino.transaction.TransactionInfo; @@ -669,7 +668,7 @@ private QueryStats getQueryStats(Optional rootStage, List failedInternalNetworkInputPositions += stageStats.getFailedInternalNetworkInputPositions(); PlanFragment plan = stageInfo.getPlan(); - if (plan != null && plan.getPartitionedSourceNodes().stream().anyMatch(TableScanNode.class::isInstance)) { + if (plan != null && plan.containsTableScanNode()) { rawInputDataSize += stageStats.getRawInputDataSize().toBytes(); failedRawInputDataSize += stageStats.getFailedRawInputDataSize().toBytes(); rawInputPositions += stageStats.getRawInputPositions(); diff --git a/core/trino-main/src/main/java/io/trino/execution/SqlQueryManager.java b/core/trino-main/src/main/java/io/trino/execution/SqlQueryManager.java index fa3ee37b94545..acce256889772 100644 --- a/core/trino-main/src/main/java/io/trino/execution/SqlQueryManager.java +++ b/core/trino-main/src/main/java/io/trino/execution/SqlQueryManager.java @@ -22,6 +22,8 @@ import io.airlift.log.Logger; import io.airlift.units.DataSize; import io.airlift.units.Duration; +import io.opentelemetry.api.trace.Tracer; +import io.opentelemetry.context.Context; import io.trino.ExceededCpuLimitException; import io.trino.ExceededScanLimitException; import io.trino.Session; @@ -55,6 +57,7 @@ import static io.trino.SystemSessionProperties.getQueryMaxScanPhysicalBytes; import static io.trino.execution.QueryState.RUNNING; import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR; +import static io.trino.tracing.ScopedSpan.scopedSpan; import static java.lang.String.format; import static java.util.Objects.requireNonNull; import static java.util.concurrent.Executors.newCachedThreadPool; @@ -67,6 +70,7 @@ public class SqlQueryManager private static final Logger log = Logger.get(SqlQueryManager.class); private final ClusterMemoryManager memoryManager; + private final Tracer tracer; private final QueryTracker queryTracker; private final Duration maxQueryCpuTime; @@ -79,9 +83,10 @@ public class SqlQueryManager private final ThreadPoolExecutorMBean queryManagementExecutorMBean; @Inject - public SqlQueryManager(ClusterMemoryManager memoryManager, QueryManagerConfig queryManagerConfig) + public SqlQueryManager(ClusterMemoryManager memoryManager, Tracer tracer, QueryManagerConfig queryManagerConfig) { this.memoryManager = requireNonNull(memoryManager, "memoryManager is null"); + this.tracer = requireNonNull(tracer, "tracer is null"); this.maxQueryCpuTime = queryManagerConfig.getQueryMaxCpuTime(); this.maxQueryScanPhysicalBytes = queryManagerConfig.getQueryMaxScanPhysicalBytes(); @@ -253,7 +258,11 @@ public void createQuery(QueryExecution queryExecution) }); try (SetThreadName ignored = new SetThreadName("Query-%s", queryExecution.getQueryId())) { - queryExecution.start(); + try (var ignoredStartScope = scopedSpan(tracer.spanBuilder("query-start") + .setParent(Context.current().with(queryExecution.getSession().getQuerySpan())) + .startSpan())) { + queryExecution.start(); + } } } diff --git a/core/trino-main/src/main/java/io/trino/execution/SqlTaskManager.java b/core/trino-main/src/main/java/io/trino/execution/SqlTaskManager.java index f4c8fe43ab19e..12d3a4a2a01e6 100644 --- a/core/trino-main/src/main/java/io/trino/execution/SqlTaskManager.java +++ b/core/trino-main/src/main/java/io/trino/execution/SqlTaskManager.java @@ -123,6 +123,7 @@ public class SqlTaskManager private final ScheduledExecutorService taskManagementExecutor; private final ScheduledExecutorService driverYieldExecutor; + private final ScheduledExecutorService driverTimeoutExecutor; private final Duration infoCacheTime; private final Duration clientTimeout; @@ -216,6 +217,7 @@ public SqlTaskManager( this.taskManagementExecutor = taskManagementExecutor.getExecutor(); this.driverYieldExecutor = newScheduledThreadPool(config.getTaskYieldThreads(), threadsNamed("task-yield-%s")); + this.driverTimeoutExecutor = newScheduledThreadPool(config.getDriverTimeoutThreads(), threadsNamed("task-driver-timeout-%s")); SqlTaskExecutionFactory sqlTaskExecutionFactory = new SqlTaskExecutionFactory(taskNotificationExecutor, taskExecutor, planner, splitMonitor, tracer, config); @@ -269,6 +271,7 @@ private QueryContext createQueryContext( gcMonitor, taskNotificationExecutor, driverYieldExecutor, + driverTimeoutExecutor, maxQuerySpillPerNode, localSpillManager.getSpillSpaceTracker()); } diff --git a/core/trino-main/src/main/java/io/trino/execution/StageStateMachine.java b/core/trino-main/src/main/java/io/trino/execution/StageStateMachine.java index 2ddc4a6c3a9e0..5205c42943428 100644 --- a/core/trino-main/src/main/java/io/trino/execution/StageStateMachine.java +++ b/core/trino-main/src/main/java/io/trino/execution/StageStateMachine.java @@ -33,7 +33,6 @@ import io.trino.spi.eventlistener.StageGcStatistics; import io.trino.sql.planner.PlanFragment; import io.trino.sql.planner.plan.PlanNodeId; -import io.trino.sql.planner.plan.TableScanNode; import io.trino.tracing.TrinoAttributes; import io.trino.util.Failures; import it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap; @@ -338,7 +337,7 @@ public BasicStageStats getBasicStageStats(Supplier> taskInfos internalNetworkInputDataSize += taskStats.getInternalNetworkInputDataSize().toBytes(); internalNetworkInputPositions += taskStats.getInternalNetworkInputPositions(); - if (fragment.getPartitionedSourceNodes().stream().anyMatch(TableScanNode.class::isInstance)) { + if (fragment.containsTableScanNode()) { rawInputDataSize += taskStats.getRawInputDataSize().toBytes(); rawInputPositions += taskStats.getRawInputPositions(); } diff --git a/core/trino-main/src/main/java/io/trino/execution/TaskManagerConfig.java b/core/trino-main/src/main/java/io/trino/execution/TaskManagerConfig.java index 4d584c36034ab..709e2d03c861a 100644 --- a/core/trino-main/src/main/java/io/trino/execution/TaskManagerConfig.java +++ b/core/trino-main/src/main/java/io/trino/execution/TaskManagerConfig.java @@ -100,6 +100,7 @@ public class TaskManagerConfig private int taskNotificationThreads = 5; private int taskYieldThreads = 3; + private int driverTimeoutThreads = 5; private BigDecimal levelTimeMultiplier = new BigDecimal(2.0); @@ -569,6 +570,20 @@ public TaskManagerConfig setTaskYieldThreads(int taskYieldThreads) return this; } + @Min(1) + public int getDriverTimeoutThreads() + { + return driverTimeoutThreads; + } + + @Config("task.driver-timeout-threads") + @ConfigDescription("Number of threads used for timing out blocked drivers if the timeout is set") + public TaskManagerConfig setDriverTimeoutThreads(int driverTimeoutThreads) + { + this.driverTimeoutThreads = driverTimeoutThreads; + return this; + } + public boolean isInterruptStuckSplitTasksEnabled() { return interruptStuckSplitTasksEnabled; diff --git a/core/trino-main/src/main/java/io/trino/execution/executor/timesharing/TimeSharingTaskExecutor.java b/core/trino-main/src/main/java/io/trino/execution/executor/timesharing/TimeSharingTaskExecutor.java index 4536d9437af13..8a49ca436cd61 100644 --- a/core/trino-main/src/main/java/io/trino/execution/executor/timesharing/TimeSharingTaskExecutor.java +++ b/core/trino-main/src/main/java/io/trino/execution/executor/timesharing/TimeSharingTaskExecutor.java @@ -73,7 +73,7 @@ import static io.airlift.concurrent.Threads.threadsNamed; import static io.airlift.tracing.Tracing.noopTracer; import static io.trino.execution.executor.timesharing.MultilevelSplitQueue.computeLevel; -import static io.trino.version.EmbedVersion.testingVersionEmbedder; +import static io.trino.util.EmbedVersion.testingVersionEmbedder; import static java.lang.Math.min; import static java.lang.String.format; import static java.util.Objects.requireNonNull; diff --git a/core/trino-main/src/main/java/io/trino/execution/scheduler/PipelinedQueryScheduler.java b/core/trino-main/src/main/java/io/trino/execution/scheduler/PipelinedQueryScheduler.java index c370e7bca4da6..d5169126d8ee5 100644 --- a/core/trino-main/src/main/java/io/trino/execution/scheduler/PipelinedQueryScheduler.java +++ b/core/trino-main/src/main/java/io/trino/execution/scheduler/PipelinedQueryScheduler.java @@ -585,7 +585,7 @@ private static Map createOutputBuf private static PipelinedOutputBufferManager createSingleStreamOutputBuffer(SqlStage stage) { PartitioningHandle partitioningHandle = stage.getFragment().getOutputPartitioningScheme().getPartitioning().getHandle(); - checkArgument(partitioningHandle.isSingleNode(), "partitioning is expected to be single node: " + partitioningHandle); + checkArgument(partitioningHandle.isSingleNode(), "partitioning is expected to be single node: %s", partitioningHandle); return new PartitionedPipelinedOutputBufferManager(partitioningHandle, 1); } diff --git a/core/trino-main/src/main/java/io/trino/execution/scheduler/faulttolerant/BinPackingNodeAllocatorService.java b/core/trino-main/src/main/java/io/trino/execution/scheduler/faulttolerant/BinPackingNodeAllocatorService.java index d6757fbce2d2f..2b8b874d488ba 100644 --- a/core/trino-main/src/main/java/io/trino/execution/scheduler/faulttolerant/BinPackingNodeAllocatorService.java +++ b/core/trino-main/src/main/java/io/trino/execution/scheduler/faulttolerant/BinPackingNodeAllocatorService.java @@ -34,6 +34,7 @@ import io.trino.metadata.InternalNode; import io.trino.metadata.InternalNodeManager; import io.trino.metadata.InternalNodeManager.NodesSnapshot; +import io.trino.spi.HostAddress; import io.trino.spi.TrinoException; import io.trino.spi.memory.MemoryPoolInfo; import jakarta.annotation.PostConstruct; @@ -41,6 +42,7 @@ import org.assertj.core.util.VisibleForTesting; import java.time.Duration; +import java.util.ArrayList; import java.util.Comparator; import java.util.Deque; import java.util.HashMap; @@ -573,24 +575,25 @@ public BinPackingSimulation( } } + private List dropCoordinatorsIfNecessary(List candidates) + { + return scheduleOnCoordinator ? candidates : candidates.stream().filter(node -> !node.isCoordinator()).collect(toImmutableList()); + } + public ReserveResult tryReserve(PendingAcquire acquire) { NodeRequirements requirements = acquire.getNodeRequirements(); Optional> catalogNodes = requirements.getCatalogHandle().map(nodesSnapshot::getConnectorNodes); - List candidates = allNodesSorted.stream() - .filter(node -> catalogNodes.isEmpty() || catalogNodes.get().contains(node)) - .filter(node -> { - // Allow using coordinator if explicitly requested - if (requirements.getAddresses().contains(node.getHostAndPort())) { - return true; - } - if (requirements.getAddresses().isEmpty()) { - return scheduleOnCoordinator || !node.isCoordinator(); - } - return false; - }) - .collect(toImmutableList()); + List candidates = new ArrayList<>(allNodesSorted); + catalogNodes.ifPresent(candidates::retainAll); // Drop non-catalog nodes, if any. + Set addresses = requirements.getAddresses(); + if (!addresses.isEmpty()) { + candidates = candidates.stream().filter(node -> addresses.contains(node.getHostAndPort())).collect(toImmutableList()); + } + else { + candidates = dropCoordinatorsIfNecessary(candidates); + } if (candidates.isEmpty()) { return ReserveResult.NONE_MATCHING; diff --git a/core/trino-main/src/main/java/io/trino/execution/scheduler/faulttolerant/EventDrivenFaultTolerantQueryScheduler.java b/core/trino-main/src/main/java/io/trino/execution/scheduler/faulttolerant/EventDrivenFaultTolerantQueryScheduler.java index 1c8f611b7d236..2132a0b95c345 100644 --- a/core/trino-main/src/main/java/io/trino/execution/scheduler/faulttolerant/EventDrivenFaultTolerantQueryScheduler.java +++ b/core/trino-main/src/main/java/io/trino/execution/scheduler/faulttolerant/EventDrivenFaultTolerantQueryScheduler.java @@ -651,9 +651,11 @@ private static class Scheduler private static final int EVENT_BUFFER_CAPACITY = 100; private static final long EVENT_PROCESSING_ENFORCED_FREQUENCY_MILLIS = MINUTES.toMillis(1); // If scheduler is stalled for SCHEDULER_STALLED_DURATION_THRESHOLD debug log will be emitted. - // This value must be larger than EVENT_PROCESSING_ENFORCED_FREQUENCY as prerequiste for processing is + // If situation persists event logs will be emitted at SCHEDULER_MAX_DEBUG_INFO_FREQUENCY. + // SCHEDULER_STALLED_DURATION_THRESHOLD must be larger than EVENT_PROCESSING_ENFORCED_FREQUENCY as prerequiste for processing is // that there are no events in the event queue. - private static final long SCHEDULER_STALLED_DURATION_THRESHOLD_MILLIS = MINUTES.toMillis(5); + private static final long SCHEDULER_STALLED_DURATION_THRESHOLD_MILLIS = MINUTES.toMillis(10); + private static final long SCHEDULER_MAX_DEBUG_INFO_FREQUENCY_MILLIS = MINUTES.toMillis(10); private static final long SCHEDULER_STALLED_DURATION_ON_TIME_EXCEEDED_THRESHOLD_MILLIS = SECONDS.toMillis(30); private static final int EVENTS_DEBUG_INFOS_PER_BUCKET = 10; @@ -688,7 +690,8 @@ private static class Scheduler private final BlockingQueue eventQueue = new LinkedBlockingQueue<>(); private final List eventBuffer = new ArrayList<>(EVENT_BUFFER_CAPACITY); - private final Stopwatch eventDebugInfoStopwatch = Stopwatch.createUnstarted(); + private final Stopwatch noEventsStopwatch = Stopwatch.createUnstarted(); + private final Stopwatch debugInfoStopwatch = Stopwatch.createUnstarted(); private final Optional eventDebugInfos; private boolean started; @@ -781,7 +784,7 @@ public Scheduler( } planInTopologicalOrder = sortPlanInTopologicalOrder(plan); - eventDebugInfoStopwatch.start(); + noEventsStopwatch.start(); } public void run() @@ -801,8 +804,8 @@ public void run() } if (queryInfo.getState() == QueryState.FAILED && queryInfo.getErrorCode() == EXCEEDED_TIME_LIMIT.toErrorCode() - && eventDebugInfoStopwatch.elapsed().toMillis() > SCHEDULER_STALLED_DURATION_ON_TIME_EXCEEDED_THRESHOLD_MILLIS) { - logDebugInfoSafe(format("Scheduler stalled for %s on EXCEEDED_TIME_LIMIT", eventDebugInfoStopwatch.elapsed())); + && noEventsStopwatch.elapsed().toMillis() > SCHEDULER_STALLED_DURATION_ON_TIME_EXCEEDED_THRESHOLD_MILLIS) { + logDebugInfoSafe(format("Scheduler stalled for %s on EXCEEDED_TIME_LIMIT", noEventsStopwatch.elapsed())); } }); @@ -896,13 +899,16 @@ private boolean processEvents() if (eventDebugInfoRecorded) { // mark that we processed some events; we filter out some no-op events. // If only no-op events appear in event queue we still treat scheduler as stuck - eventDebugInfoStopwatch.reset().start(); + noEventsStopwatch.reset().start(); + debugInfoStopwatch.reset(); } else { // if no events were recorded there is a chance scheduler is stalled - if (log.isDebugEnabled() && eventDebugInfoStopwatch.elapsed().toMillis() > SCHEDULER_STALLED_DURATION_THRESHOLD_MILLIS) { - logDebugInfoSafe("Scheduler stalled for %s".formatted(eventDebugInfoStopwatch.elapsed())); - eventDebugInfoStopwatch.reset().start(); // reset to prevent extensive logging + if (log.isDebugEnabled() + && (!debugInfoStopwatch.isRunning() || debugInfoStopwatch.elapsed().toMillis() > SCHEDULER_MAX_DEBUG_INFO_FREQUENCY_MILLIS) + && noEventsStopwatch.elapsed().toMillis() > SCHEDULER_STALLED_DURATION_THRESHOLD_MILLIS) { + logDebugInfoSafe("Scheduler stalled for %s".formatted(noEventsStopwatch.elapsed())); + debugInfoStopwatch.reset().start(); // reset to prevent extensive logging } } @@ -936,6 +942,7 @@ private void logDebugInfo(String reason) log.debug("Scheduler debug info for %s START; reason=%s", queryStateMachine.getQueryId(), reason); log.debug("General state: %s", toStringHelper(this) .add("queryState", queryStateMachine.getQueryState()) + .add("finalQueryInfo", queryStateMachine.getFinalQueryInfo()) .add("maxTaskExecutionAttempts", maxTaskExecutionAttempts) .add("maxTasksWaitingForNode", maxTasksWaitingForNode) .add("maxTasksWaitingForExecution", maxTasksWaitingForExecution) diff --git a/core/trino-main/src/main/java/io/trino/likematcher/FjsMatcher.java b/core/trino-main/src/main/java/io/trino/likematcher/FjsMatcher.java index 24fee50d8072b..365bff87b28a9 100644 --- a/core/trino-main/src/main/java/io/trino/likematcher/FjsMatcher.java +++ b/core/trino-main/src/main/java/io/trino/likematcher/FjsMatcher.java @@ -146,7 +146,7 @@ else if (matchEnd >= inputLimit - 1) { j = kmpShifts[j]; // Continue to match the whole pattern using KMP - while (j > 0) { + while (j >= 0) { int size = findLongestMatch(input, i, pattern, j, Math.min(inputLimit - i, pattern.length - j)); i += size; j += size; diff --git a/core/trino-main/src/main/java/io/trino/memory/MemoryPool.java b/core/trino-main/src/main/java/io/trino/memory/MemoryPool.java index b330931af945d..191179a8ae168 100644 --- a/core/trino-main/src/main/java/io/trino/memory/MemoryPool.java +++ b/core/trino-main/src/main/java/io/trino/memory/MemoryPool.java @@ -31,6 +31,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; import static com.google.common.base.MoreObjects.toStringHelper; @@ -53,9 +54,9 @@ public class MemoryPool @GuardedBy("this") private NonCancellableMemoryFuture future; - @GuardedBy("this") // TODO: It would be better if we just tracked QueryContexts, but their lifecycle is managed by a weak reference, so we can't do that - private final Map queryMemoryReservations = new HashMap<>(); + // It is guarded for updates by this, but can be read without holding a lock + private final Map queryMemoryReservations = new ConcurrentHashMap<>(); // This map keeps track of all the tagged allocations, e.g., query-1 -> ['TableScanOperator': 10MB, 'LazyOutputBuffer': 5MB, ...] @GuardedBy("this") @@ -347,7 +348,7 @@ public synchronized long getReservedRevocableBytes() return reservedRevocableBytes; } - synchronized long getQueryMemoryReservation(QueryId queryId) + long getQueryMemoryReservation(QueryId queryId) { return queryMemoryReservations.getOrDefault(queryId, 0L); } diff --git a/core/trino-main/src/main/java/io/trino/memory/QueryContext.java b/core/trino-main/src/main/java/io/trino/memory/QueryContext.java index c30e3ad282f71..3331e60d539d1 100644 --- a/core/trino-main/src/main/java/io/trino/memory/QueryContext.java +++ b/core/trino-main/src/main/java/io/trino/memory/QueryContext.java @@ -63,6 +63,7 @@ public class QueryContext private final GcMonitor gcMonitor; private final Executor notificationExecutor; private final ScheduledExecutorService yieldExecutor; + private final ScheduledExecutorService timeoutExecutor; private final long maxSpill; private final SpillSpaceTracker spillSpaceTracker; private final Map taskContexts = new ConcurrentHashMap<>(); @@ -86,6 +87,7 @@ public QueryContext( GcMonitor gcMonitor, Executor notificationExecutor, ScheduledExecutorService yieldExecutor, + ScheduledExecutorService timeoutExecutor, DataSize maxSpill, SpillSpaceTracker spillSpaceTracker) { @@ -97,6 +99,7 @@ public QueryContext( gcMonitor, notificationExecutor, yieldExecutor, + timeoutExecutor, maxSpill, spillSpaceTracker); } @@ -109,6 +112,7 @@ public QueryContext( GcMonitor gcMonitor, Executor notificationExecutor, ScheduledExecutorService yieldExecutor, + ScheduledExecutorService timeoutExecutor, DataSize maxSpill, SpillSpaceTracker spillSpaceTracker) { @@ -118,6 +122,7 @@ public QueryContext( this.gcMonitor = requireNonNull(gcMonitor, "gcMonitor is null"); this.notificationExecutor = requireNonNull(notificationExecutor, "notificationExecutor is null"); this.yieldExecutor = requireNonNull(yieldExecutor, "yieldExecutor is null"); + this.timeoutExecutor = requireNonNull(timeoutExecutor, "timeoutExecutor is null"); this.maxSpill = maxSpill.toBytes(); this.spillSpaceTracker = requireNonNull(spillSpaceTracker, "spillSpaceTracker is null"); this.guaranteedMemory = guaranteedMemory; @@ -220,12 +225,12 @@ public synchronized void freeSpill(long bytes) spillSpaceTracker.free(bytes); } - public synchronized MemoryPool getMemoryPool() + public MemoryPool getMemoryPool() { return memoryPool; } - public synchronized long getUserMemoryReservation() + public long getUserMemoryReservation() { return memoryPool.getQueryMemoryReservation(queryId); } @@ -257,6 +262,7 @@ public TaskContext addTaskContext( gcMonitor, notificationExecutor, yieldExecutor, + timeoutExecutor, session, taskMemoryContext, notifyStatusChanged, diff --git a/core/trino-main/src/main/java/io/trino/metadata/FunctionManager.java b/core/trino-main/src/main/java/io/trino/metadata/FunctionManager.java index 23e071d2ecc2c..9a7c208380ea8 100644 --- a/core/trino-main/src/main/java/io/trino/metadata/FunctionManager.java +++ b/core/trino-main/src/main/java/io/trino/metadata/FunctionManager.java @@ -324,7 +324,10 @@ private record FunctionKey(ResolvedFunction resolvedFunction, InvocationConventi public static FunctionManager createTestingFunctionManager() { TypeOperators typeOperators = new TypeOperators(); - GlobalFunctionCatalog functionCatalog = new GlobalFunctionCatalog(); + GlobalFunctionCatalog functionCatalog = new GlobalFunctionCatalog( + () -> { throw new UnsupportedOperationException(); }, + () -> { throw new UnsupportedOperationException(); }, + () -> { throw new UnsupportedOperationException(); }); functionCatalog.addFunctions(SystemFunctionBundle.create(new FeaturesConfig(), typeOperators, new BlockTypeOperators(typeOperators), UNKNOWN)); functionCatalog.addFunctions(new InternalFunctionBundle(new LiteralFunction(new InternalBlockEncodingSerde(new BlockEncodingManager(), TESTING_TYPE_MANAGER)))); return new FunctionManager(CatalogServiceProvider.fail(), functionCatalog, LanguageFunctionProvider.DISABLED); diff --git a/core/trino-main/src/main/java/io/trino/metadata/GlobalFunctionCatalog.java b/core/trino-main/src/main/java/io/trino/metadata/GlobalFunctionCatalog.java index 0b8e28702a7cd..6cdd80f483000 100644 --- a/core/trino-main/src/main/java/io/trino/metadata/GlobalFunctionCatalog.java +++ b/core/trino-main/src/main/java/io/trino/metadata/GlobalFunctionCatalog.java @@ -18,9 +18,12 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Multimap; import com.google.errorprone.annotations.ThreadSafe; +import com.google.inject.Inject; +import com.google.inject.Provider; import io.trino.connector.system.GlobalSystemConnector; -import io.trino.operator.table.ExcludeColumns.ExcludeColumnsFunctionHandle; -import io.trino.operator.table.Sequence.SequenceFunctionHandle; +import io.trino.operator.table.ExcludeColumnsFunction.ExcludeColumnsFunctionHandle; +import io.trino.operator.table.SequenceFunction.SequenceFunctionHandle; +import io.trino.operator.table.json.JsonTable.JsonTableFunctionHandle; import io.trino.spi.function.AggregationFunctionMetadata; import io.trino.spi.function.AggregationImplementation; import io.trino.spi.function.BoundSignature; @@ -37,6 +40,7 @@ import io.trino.spi.function.WindowFunctionSupplier; import io.trino.spi.function.table.ConnectorTableFunctionHandle; import io.trino.spi.function.table.TableFunctionProcessorProvider; +import io.trino.spi.type.TypeManager; import io.trino.spi.type.TypeSignature; import java.util.Collection; @@ -51,21 +55,35 @@ import static io.trino.metadata.OperatorNameUtil.isOperatorName; import static io.trino.metadata.OperatorNameUtil.mangleOperatorName; import static io.trino.metadata.OperatorNameUtil.unmangleOperator; -import static io.trino.operator.table.ExcludeColumns.getExcludeColumnsFunctionProcessorProvider; -import static io.trino.operator.table.Sequence.getSequenceFunctionProcessorProvider; +import static io.trino.operator.table.ExcludeColumnsFunction.getExcludeColumnsFunctionProcessorProvider; +import static io.trino.operator.table.SequenceFunction.getSequenceFunctionProcessorProvider; +import static io.trino.operator.table.json.JsonTable.getJsonTableFunctionProcessorProvider; import static io.trino.spi.function.FunctionKind.AGGREGATE; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.BooleanType.BOOLEAN; import static io.trino.spi.type.IntegerType.INTEGER; import static java.util.Locale.ENGLISH; +import static java.util.Objects.requireNonNull; @ThreadSafe public class GlobalFunctionCatalog implements FunctionProvider { public static final String BUILTIN_SCHEMA = "builtin"; + + private final Provider metadata; + private final Provider typeManager; + private final Provider functionManager; private volatile FunctionMap functions = new FunctionMap(); + @Inject + public GlobalFunctionCatalog(Provider metadata, Provider typeManager, Provider functionManager) + { + this.metadata = requireNonNull(metadata, "metadata is null"); + this.typeManager = requireNonNull(typeManager, "typeManager is null"); + this.functionManager = requireNonNull(functionManager, "functionManager is null"); + } + public final synchronized void addFunctions(FunctionBundle functionBundle) { for (FunctionMetadata functionMetadata : functionBundle.getFunctions()) { @@ -187,6 +205,9 @@ public TableFunctionProcessorProvider getTableFunctionProcessorProvider(Connecto if (functionHandle instanceof SequenceFunctionHandle) { return getSequenceFunctionProcessorProvider(); } + if (functionHandle instanceof JsonTableFunctionHandle) { + return getJsonTableFunctionProcessorProvider(metadata.get(), typeManager.get(), functionManager.get()); + } return null; } @@ -272,14 +293,14 @@ public Collection get(String functionName) public FunctionMetadata get(FunctionId functionId) { FunctionMetadata functionMetadata = functionsById.get(functionId); - checkArgument(functionMetadata != null, "Unknown function implementation: " + functionId); + checkArgument(functionMetadata != null, "Unknown function implementation: %s", functionId); return functionMetadata; } public FunctionBundle getFunctionBundle(FunctionId functionId) { FunctionBundle functionBundle = functionBundlesById.get(functionId); - checkArgument(functionBundle != null, "Unknown function implementation: " + functionId); + checkArgument(functionBundle != null, "Unknown function implementation: %s", functionId); return functionBundle; } } diff --git a/core/trino-main/src/main/java/io/trino/metadata/InternalFunctionBundle.java b/core/trino-main/src/main/java/io/trino/metadata/InternalFunctionBundle.java index 20a94c3362fcf..54a8b6c65f502 100644 --- a/core/trino-main/src/main/java/io/trino/metadata/InternalFunctionBundle.java +++ b/core/trino-main/src/main/java/io/trino/metadata/InternalFunctionBundle.java @@ -184,7 +184,7 @@ private WindowFunctionSupplier specializeWindow(FunctionId functionId, BoundSign private SqlFunction getSqlFunction(FunctionId functionId) { SqlFunction function = functions.get(functionId); - checkArgument(function != null, "Unknown function implementation: " + functionId); + checkArgument(function != null, "Unknown function implementation: %s", functionId); return function; } diff --git a/core/trino-main/src/main/java/io/trino/metadata/LanguageFunctionManager.java b/core/trino-main/src/main/java/io/trino/metadata/LanguageFunctionManager.java index e4ae4ecfd77da..a415c339a0b9c 100644 --- a/core/trino-main/src/main/java/io/trino/metadata/LanguageFunctionManager.java +++ b/core/trino-main/src/main/java/io/trino/metadata/LanguageFunctionManager.java @@ -269,7 +269,7 @@ public synchronized List getFunctions(CatalogHandle catalogHan public FunctionDependencyDeclaration getDependencies(FunctionId functionId, AccessControl accessControl) { LanguageFunctionImplementation function = implementationsById.get(functionId); - checkArgument(function != null, "Unknown function implementation: " + functionId); + checkArgument(function != null, "Unknown function implementation: %s", functionId); return function.getFunctionDependencies(accessControl); } @@ -285,7 +285,7 @@ public Optional specialize(ResolvedFunction resolv public FunctionMetadata getFunctionMetadata(FunctionId functionId) { LanguageFunctionImplementation function = implementationsById.get(functionId); - checkArgument(function != null, "Unknown function implementation: " + functionId); + checkArgument(function != null, "Unknown function implementation: %s", functionId); return function.getFunctionMetadata(); } @@ -293,7 +293,7 @@ public void registerResolvedFunction(ResolvedFunction resolvedFunction) { FunctionId functionId = resolvedFunction.getFunctionId(); LanguageFunctionImplementation function = implementationsById.get(functionId); - checkArgument(function != null, "Unknown function implementation: " + functionId); + checkArgument(function != null, "Unknown function implementation: %s", functionId); implementationsByResolvedFunction.put(resolvedFunction, function); } diff --git a/core/trino-main/src/main/java/io/trino/metadata/MaterializedViewDefinition.java b/core/trino-main/src/main/java/io/trino/metadata/MaterializedViewDefinition.java index caff709a19b55..6dc49947b0308 100644 --- a/core/trino-main/src/main/java/io/trino/metadata/MaterializedViewDefinition.java +++ b/core/trino-main/src/main/java/io/trino/metadata/MaterializedViewDefinition.java @@ -13,7 +13,6 @@ */ package io.trino.metadata; -import com.google.common.collect.ImmutableMap; import io.trino.spi.connector.CatalogSchemaName; import io.trino.spi.connector.CatalogSchemaTableName; import io.trino.spi.connector.ConnectorMaterializedViewDefinition; @@ -21,7 +20,6 @@ import java.time.Duration; import java.util.List; -import java.util.Map; import java.util.Optional; import static com.google.common.base.MoreObjects.toStringHelper; @@ -34,7 +32,6 @@ public class MaterializedViewDefinition { private final Optional gracePeriod; private final Optional storageTable; - private final Map properties; public MaterializedViewDefinition( String originalSql, @@ -45,14 +42,12 @@ public MaterializedViewDefinition( Optional comment, Identity owner, List path, - Optional storageTable, - Map properties) + Optional storageTable) { super(originalSql, catalog, schema, columns, comment, Optional.of(owner), path); checkArgument(gracePeriod.isEmpty() || !gracePeriod.get().isNegative(), "gracePeriod cannot be negative: %s", gracePeriod); this.gracePeriod = gracePeriod; this.storageTable = requireNonNull(storageTable, "storageTable is null"); - this.properties = ImmutableMap.copyOf(requireNonNull(properties, "properties is null")); } public Optional getGracePeriod() @@ -65,11 +60,6 @@ public Optional getStorageTable() return storageTable; } - public Map getProperties() - { - return properties; - } - public ConnectorMaterializedViewDefinition toConnectorMaterializedViewDefinition() { return new ConnectorMaterializedViewDefinition( @@ -83,8 +73,7 @@ public ConnectorMaterializedViewDefinition toConnectorMaterializedViewDefinition getGracePeriod(), getComment(), getRunAsIdentity().map(Identity::getUser), - getPath(), - properties); + getPath()); } @Override @@ -100,7 +89,6 @@ public String toString() .add("runAsIdentity", getRunAsIdentity()) .add("path", getPath()) .add("storageTable", storageTable.orElse(null)) - .add("properties", properties) .toString(); } } diff --git a/core/trino-main/src/main/java/io/trino/metadata/Metadata.java b/core/trino-main/src/main/java/io/trino/metadata/Metadata.java index bb2cad83e056f..1b7ffa8e706c4 100644 --- a/core/trino-main/src/main/java/io/trino/metadata/Metadata.java +++ b/core/trino-main/src/main/java/io/trino/metadata/Metadata.java @@ -710,7 +710,13 @@ default ResolvedFunction getCoercion(Type fromType, Type toType) /** * Creates the specified materialized view with the specified view definition. */ - void createMaterializedView(Session session, QualifiedObjectName viewName, MaterializedViewDefinition definition, boolean replace, boolean ignoreExisting); + void createMaterializedView( + Session session, + QualifiedObjectName viewName, + MaterializedViewDefinition definition, + Map properties, + boolean replace, + boolean ignoreExisting); /** * Drops the specified materialized view. @@ -740,6 +746,8 @@ default boolean isMaterializedView(Session session, QualifiedObjectName viewName */ Optional getMaterializedView(Session session, QualifiedObjectName viewName); + Map getMaterializedViewProperties(Session session, QualifiedObjectName objectName, MaterializedViewDefinition materializedViewDefinition); + /** * Method to get difference between the states of table at two different points in time/or as of given token-ids. * The method is used by the engine to determine if a materialized view is current with respect to the tables it depends on. diff --git a/core/trino-main/src/main/java/io/trino/metadata/MetadataManager.java b/core/trino-main/src/main/java/io/trino/metadata/MetadataManager.java index 1adeb8d71a4a5..67e5666663a3b 100644 --- a/core/trino-main/src/main/java/io/trino/metadata/MetadataManager.java +++ b/core/trino-main/src/main/java/io/trino/metadata/MetadataManager.java @@ -112,7 +112,6 @@ import io.trino.spi.type.TypeOperators; import io.trino.sql.analyzer.TypeSignatureProvider; import io.trino.sql.parser.SqlParser; -import io.trino.sql.planner.ConnectorExpressions; import io.trino.sql.planner.PartitioningHandle; import io.trino.sql.tree.QualifiedName; import io.trino.transaction.TransactionManager; @@ -158,6 +157,7 @@ import static io.trino.metadata.RedirectionAwareTableHandle.noRedirection; import static io.trino.metadata.RedirectionAwareTableHandle.withRedirectionTo; import static io.trino.metadata.SignatureBinder.applyBoundVariables; +import static io.trino.plugin.base.expression.ConnectorExpressions.extractVariables; import static io.trino.spi.ErrorType.EXTERNAL; import static io.trino.spi.StandardErrorCode.FUNCTION_IMPLEMENTATION_ERROR; import static io.trino.spi.StandardErrorCode.FUNCTION_IMPLEMENTATION_MISSING; @@ -1530,7 +1530,13 @@ public void dropView(Session session, QualifiedObjectName viewName) } @Override - public void createMaterializedView(Session session, QualifiedObjectName viewName, MaterializedViewDefinition definition, boolean replace, boolean ignoreExisting) + public void createMaterializedView( + Session session, + QualifiedObjectName viewName, + MaterializedViewDefinition definition, + Map properties, + boolean replace, + boolean ignoreExisting) { CatalogMetadata catalogMetadata = getCatalogMetadataForWrite(session, viewName.getCatalogName()); CatalogHandle catalogHandle = catalogMetadata.getCatalogHandle(); @@ -1540,6 +1546,7 @@ public void createMaterializedView(Session session, QualifiedObjectName viewName session.toConnectorSession(catalogHandle), viewName.asSchemaTableName(), definition.toConnectorMaterializedViewDefinition(), + properties, replace, ignoreExisting); if (catalogMetadata.getSecurityManagement() == SYSTEM) { @@ -1673,8 +1680,7 @@ private static MaterializedViewDefinition createMaterializedViewDefinition(Conne view.getComment(), runAsIdentity, view.getPath(), - view.getStorageTable(), - view.getProperties()); + view.getStorageTable()); } private Optional getMaterializedViewInternal(Session session, QualifiedObjectName viewName) @@ -1695,6 +1701,24 @@ private Optional getMaterializedViewInterna return Optional.empty(); } + @Override + public Map getMaterializedViewProperties(Session session, QualifiedObjectName viewName, MaterializedViewDefinition materializedViewDefinition) + { + Optional catalog = getOptionalCatalogMetadata(session, viewName.getCatalogName()); + if (catalog.isPresent()) { + CatalogMetadata catalogMetadata = catalog.get(); + CatalogHandle catalogHandle = catalogMetadata.getCatalogHandle(session, viewName); + ConnectorMetadata metadata = catalogMetadata.getMetadataFor(session, catalogHandle); + + ConnectorSession connectorSession = session.toConnectorSession(catalogHandle); + return ImmutableMap.copyOf(metadata.getMaterializedViewProperties( + connectorSession, + viewName.asSchemaTableName(), + materializedViewDefinition.toConnectorMaterializedViewDefinition())); + } + return ImmutableMap.of(); + } + @Override public MaterializedViewFreshness getMaterializedViewFreshness(Session session, QualifiedObjectName viewName) { @@ -2016,7 +2040,7 @@ private void verifyProjection(TableHandle table, List proje .map(Assignment::getVariable) .collect(toImmutableSet()); projections.stream() - .flatMap(connectorExpression -> ConnectorExpressions.extractVariables(connectorExpression).stream()) + .flatMap(connectorExpression -> extractVariables(connectorExpression).stream()) .map(Variable::getName) .filter(variableName -> !assignedVariables.contains(variableName)) .findAny() @@ -2737,7 +2761,10 @@ public MetadataManager build() GlobalFunctionCatalog globalFunctionCatalog = this.globalFunctionCatalog; if (globalFunctionCatalog == null) { - globalFunctionCatalog = new GlobalFunctionCatalog(); + globalFunctionCatalog = new GlobalFunctionCatalog( + () -> { throw new UnsupportedOperationException(); }, + () -> { throw new UnsupportedOperationException(); }, + () -> { throw new UnsupportedOperationException(); }); TypeOperators typeOperators = new TypeOperators(); globalFunctionCatalog.addFunctions(SystemFunctionBundle.create(new FeaturesConfig(), typeOperators, new BlockTypeOperators(typeOperators), UNKNOWN)); globalFunctionCatalog.addFunctions(new InternalFunctionBundle(new LiteralFunction(new InternalBlockEncodingSerde(new BlockEncodingManager(), typeManager)))); diff --git a/core/trino-main/src/main/java/io/trino/operator/Driver.java b/core/trino-main/src/main/java/io/trino/operator/Driver.java index f6949c4bb204a..0033344d7fe57 100644 --- a/core/trino-main/src/main/java/io/trino/operator/Driver.java +++ b/core/trino-main/src/main/java/io/trino/operator/Driver.java @@ -46,12 +46,15 @@ import static com.google.common.base.Preconditions.checkState; import static com.google.common.base.Throwables.throwIfUnchecked; import static com.google.common.base.Verify.verify; +import static com.google.common.util.concurrent.Futures.nonCancellationPropagating; +import static com.google.common.util.concurrent.Futures.withTimeout; import static com.google.common.util.concurrent.MoreExecutors.directExecutor; import static io.airlift.concurrent.MoreFutures.getFutureValue; import static io.trino.operator.Operator.NOT_BLOCKED; import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR; import static java.lang.Boolean.TRUE; import static java.util.Objects.requireNonNull; +import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.concurrent.TimeUnit.NANOSECONDS; // @@ -455,6 +458,13 @@ private ListenableFuture processInternal(OperationTimer operationTimer) // unblock when the first future is complete ListenableFuture blocked = firstFinishedFuture(blockedFutures); + if (driverContext.getBlockedTimeout().isPresent()) { + blocked = withTimeout( + nonCancellationPropagating(blocked), + driverContext.getBlockedTimeout().get().toMillis(), + MILLISECONDS, + driverContext.getTimeoutExecutor()); + } // driver records serial blocked time driverContext.recordBlocked(blocked); // each blocked operator is responsible for blocking the execution diff --git a/core/trino-main/src/main/java/io/trino/operator/DriverContext.java b/core/trino-main/src/main/java/io/trino/operator/DriverContext.java index 0ea5d3a5d70e7..67998f0a5e45a 100644 --- a/core/trino-main/src/main/java/io/trino/operator/DriverContext.java +++ b/core/trino-main/src/main/java/io/trino/operator/DriverContext.java @@ -28,6 +28,7 @@ import org.joda.time.DateTime; import java.util.List; +import java.util.Optional; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.Executor; import java.util.concurrent.ScheduledExecutorService; @@ -54,6 +55,7 @@ public class DriverContext private final PipelineContext pipelineContext; private final Executor notificationExecutor; private final ScheduledExecutorService yieldExecutor; + private final ScheduledExecutorService timeoutExecutor; private final AtomicBoolean finished = new AtomicBoolean(); @@ -70,6 +72,7 @@ public class DriverContext private final AtomicReference executionStartTime = new AtomicReference<>(); private final AtomicReference executionEndTime = new AtomicReference<>(); + private final AtomicReference> blockedTimeout = new AtomicReference<>(Optional.empty()); private final MemoryTrackingContext driverMemoryContext; @@ -82,12 +85,14 @@ public DriverContext( PipelineContext pipelineContext, Executor notificationExecutor, ScheduledExecutorService yieldExecutor, + ScheduledExecutorService timeoutExecutor, MemoryTrackingContext driverMemoryContext, long splitWeight) { this.pipelineContext = requireNonNull(pipelineContext, "pipelineContext is null"); this.notificationExecutor = requireNonNull(notificationExecutor, "notificationExecutor is null"); this.yieldExecutor = requireNonNull(yieldExecutor, "yieldExecutor is null"); + this.timeoutExecutor = requireNonNull(timeoutExecutor, "timeoutExecutor is null"); this.driverMemoryContext = requireNonNull(driverMemoryContext, "driverMemoryContext is null"); this.yieldSignal = new DriverYieldSignal(); this.splitWeight = splitWeight; @@ -447,6 +452,21 @@ public ScheduledExecutorService getYieldExecutor() return yieldExecutor; } + public ScheduledExecutorService getTimeoutExecutor() + { + return timeoutExecutor; + } + + public void setBlockedTimeout(Duration duration) + { + this.blockedTimeout.set(Optional.of(duration)); + } + + public Optional getBlockedTimeout() + { + return blockedTimeout.get(); + } + private static long nanosBetween(long start, long end) { return max(0, end - start); diff --git a/core/trino-main/src/main/java/io/trino/operator/FlatHash.java b/core/trino-main/src/main/java/io/trino/operator/FlatHash.java index 3086d95940322..36119075a6823 100644 --- a/core/trino-main/src/main/java/io/trino/operator/FlatHash.java +++ b/core/trino-main/src/main/java/io/trino/operator/FlatHash.java @@ -342,7 +342,7 @@ private void rehash(int minimumRequiredCapacity) // we incrementally allocate the record groups to smooth out memory allocation if (capacity <= RECORDS_PER_GROUP) { - recordGroups = new byte[][]{new byte[multiplyExact(capacity, recordSize)]}; + recordGroups = new byte[][] {new byte[multiplyExact(capacity, recordSize)]}; } else { recordGroups = new byte[(capacity + 1) >> RECORDS_PER_GROUP_SHIFT][]; diff --git a/core/trino-main/src/main/java/io/trino/operator/FlatSet.java b/core/trino-main/src/main/java/io/trino/operator/FlatSet.java index 5b5c298fdd284..fc6b968f4481f 100644 --- a/core/trino-main/src/main/java/io/trino/operator/FlatSet.java +++ b/core/trino-main/src/main/java/io/trino/operator/FlatSet.java @@ -99,7 +99,7 @@ public FlatSet( private static byte[][] createRecordGroups(int capacity, int recordSize) { if (capacity < RECORDS_PER_GROUP) { - return new byte[][]{new byte[multiplyExact(capacity, recordSize)]}; + return new byte[][] {new byte[multiplyExact(capacity, recordSize)]}; } byte[][] groups = new byte[(capacity + 1) >> RECORDS_PER_GROUP_SHIFT][]; diff --git a/core/trino-main/src/main/java/io/trino/operator/PipelineContext.java b/core/trino-main/src/main/java/io/trino/operator/PipelineContext.java index 605d970564e1f..1f64fd5fb76dc 100644 --- a/core/trino-main/src/main/java/io/trino/operator/PipelineContext.java +++ b/core/trino-main/src/main/java/io/trino/operator/PipelineContext.java @@ -55,6 +55,7 @@ public class PipelineContext private final TaskContext taskContext; private final Executor notificationExecutor; private final ScheduledExecutorService yieldExecutor; + private final ScheduledExecutorService timeoutExecutor; private final int pipelineId; private final boolean inputPipeline; @@ -105,7 +106,7 @@ public class PipelineContext private final MemoryTrackingContext pipelineMemoryContext; - public PipelineContext(int pipelineId, TaskContext taskContext, Executor notificationExecutor, ScheduledExecutorService yieldExecutor, MemoryTrackingContext pipelineMemoryContext, boolean inputPipeline, boolean outputPipeline, boolean partitioned) + public PipelineContext(int pipelineId, TaskContext taskContext, Executor notificationExecutor, ScheduledExecutorService yieldExecutor, ScheduledExecutorService timeoutExecutor, MemoryTrackingContext pipelineMemoryContext, boolean inputPipeline, boolean outputPipeline, boolean partitioned) { this.pipelineId = pipelineId; this.inputPipeline = inputPipeline; @@ -114,6 +115,7 @@ public PipelineContext(int pipelineId, TaskContext taskContext, Executor notific this.taskContext = requireNonNull(taskContext, "taskContext is null"); this.notificationExecutor = requireNonNull(notificationExecutor, "notificationExecutor is null"); this.yieldExecutor = requireNonNull(yieldExecutor, "yieldExecutor is null"); + this.timeoutExecutor = requireNonNull(timeoutExecutor, "timeoutExecutor is null"); this.pipelineMemoryContext = requireNonNull(pipelineMemoryContext, "pipelineMemoryContext is null"); // Initialize the local memory contexts with the ExchangeOperator tag as ExchangeOperator will do the local memory allocations pipelineMemoryContext.initializeLocalMemoryContexts(ExchangeOperator.class.getSimpleName()); @@ -156,6 +158,7 @@ public DriverContext addDriverContext(long splitWeight) this, notificationExecutor, yieldExecutor, + timeoutExecutor, pipelineMemoryContext.newMemoryTrackingContext(), splitWeight); drivers.add(driverContext); diff --git a/core/trino-main/src/main/java/io/trino/operator/TableFunctionOperator.java b/core/trino-main/src/main/java/io/trino/operator/TableFunctionOperator.java index f309a6d145c50..7b41bde101eda 100644 --- a/core/trino-main/src/main/java/io/trino/operator/TableFunctionOperator.java +++ b/core/trino-main/src/main/java/io/trino/operator/TableFunctionOperator.java @@ -128,7 +128,6 @@ public TableFunctionOperatorFactory( { requireNonNull(planNodeId, "planNodeId is null"); requireNonNull(tableFunctionProvider, "tableFunctionProvider is null"); - requireNonNull(catalogHandle, "catalogHandle is null"); requireNonNull(functionHandle, "functionHandle is null"); requireNonNull(requiredChannels, "requiredChannels is null"); requireNonNull(markerChannels, "markerChannels is null"); @@ -272,6 +271,7 @@ public TableFunctionOperator( this.operatorContext = operatorContext; this.session = operatorContext.getSession().toConnectorSession(catalogHandle); + this.processEmptyInput = !pruneWhenEmpty; PagesIndex pagesIndex = pagesIndexFactory.newPagesIndex(sourceTypes, expectedPositions); diff --git a/core/trino-main/src/main/java/io/trino/operator/TableWriterOperator.java b/core/trino-main/src/main/java/io/trino/operator/TableWriterOperator.java index 37619858e1e45..ac4efe5b63074 100644 --- a/core/trino-main/src/main/java/io/trino/operator/TableWriterOperator.java +++ b/core/trino-main/src/main/java/io/trino/operator/TableWriterOperator.java @@ -20,7 +20,9 @@ import com.google.common.primitives.Ints; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; +import io.airlift.log.Logger; import io.airlift.slice.Slice; +import io.airlift.units.DataSize; import io.airlift.units.Duration; import io.trino.Session; import io.trino.memory.context.LocalMemoryContext; @@ -42,6 +44,7 @@ import java.util.Collection; import java.util.List; +import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Supplier; @@ -53,6 +56,8 @@ import static com.google.common.util.concurrent.MoreExecutors.directExecutor; import static io.airlift.concurrent.MoreFutures.getFutureValue; import static io.airlift.concurrent.MoreFutures.toListenableFuture; +import static io.trino.SystemSessionProperties.getCloseIdleWritersTriggerDuration; +import static io.trino.SystemSessionProperties.getIdleWriterMinDataSizeThreshold; import static io.trino.SystemSessionProperties.isStatisticsCpuTimerEnabled; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.VarbinaryType.VARBINARY; @@ -64,6 +69,7 @@ public class TableWriterOperator implements Operator { + private static final Logger LOG = Logger.get(TableWriterOperator.class); public static final int ROW_COUNT_CHANNEL = 0; public static final int FRAGMENT_CHANNEL = 1; public static final int STATS_START_CHANNEL = 2; @@ -111,10 +117,22 @@ public TableWriterOperatorFactory( public Operator createOperator(DriverContext driverContext) { checkState(!closed, "Factory is already closed"); + // Driver should call getOutput() periodically on TableWriterOperator to close idle writers which will essentially + // decrease the memory usage even if no pages were added to that writer thread. + if (getCloseIdleWritersTriggerDuration(session).toMillis() > 0) { + driverContext.setBlockedTimeout(getCloseIdleWritersTriggerDuration(session)); + } OperatorContext context = driverContext.addOperatorContext(operatorId, planNodeId, TableWriterOperator.class.getSimpleName()); Operator statisticsAggregationOperator = statisticsAggregationOperatorFactory.createOperator(driverContext); boolean statisticsCpuTimerEnabled = !(statisticsAggregationOperator instanceof DevNullOperator) && isStatisticsCpuTimerEnabled(session); - return new TableWriterOperator(context, createPageSink(driverContext), columnChannels, statisticsAggregationOperator, types, statisticsCpuTimerEnabled); + return new TableWriterOperator( + context, + createPageSink(driverContext), + columnChannels, + statisticsAggregationOperator, + types, + statisticsCpuTimerEnabled, + getIdleWriterMinDataSizeThreshold(session)); } private ConnectorPageSink createPageSink(DriverContext driverContext) @@ -159,6 +177,7 @@ private enum State private final AtomicLong pageSinkPeakMemoryUsage = new AtomicLong(); private final Operator statisticAggregationOperator; private final List types; + private final DataSize idleWriterMinDataSizeThreshold; private ListenableFuture blocked = NOT_BLOCKED; private CompletableFuture> finishFuture; @@ -170,8 +189,10 @@ private enum State private final OperationTiming statisticsTiming = new OperationTiming(); private final boolean statisticsCpuTimerEnabled; - private final Supplier tableWriterInfoSupplier; + // This records the last physical written data size when connector closeIdleWriters is triggered. + private long lastPhysicalWrittenDataSize; + private boolean newPagesAdded; public TableWriterOperator( OperatorContext operatorContext, @@ -179,7 +200,8 @@ public TableWriterOperator( List columnChannels, Operator statisticAggregationOperator, List types, - boolean statisticsCpuTimerEnabled) + boolean statisticsCpuTimerEnabled, + DataSize idleWriterMinDataSizeThreshold) { this.operatorContext = requireNonNull(operatorContext, "operatorContext is null"); this.pageSinkMemoryContext = operatorContext.newLocalUserMemoryContext(TableWriterOperator.class.getSimpleName()); @@ -188,6 +210,7 @@ public TableWriterOperator( this.statisticAggregationOperator = requireNonNull(statisticAggregationOperator, "statisticAggregationOperator is null"); this.types = ImmutableList.copyOf(requireNonNull(types, "types is null")); this.statisticsCpuTimerEnabled = statisticsCpuTimerEnabled; + this.idleWriterMinDataSizeThreshold = requireNonNull(idleWriterMinDataSizeThreshold, "idleWriterMinDataSizeThreshold is null"); this.tableWriterInfoSupplier = createTableWriterInfoSupplier(pageSinkPeakMemoryUsage, statisticsTiming, pageSink); this.operatorContext.setInfoSupplier(tableWriterInfoSupplier); } @@ -259,14 +282,20 @@ public void addInput(Page page) rowCount += page.getPositionCount(); updateWrittenBytes(); operatorContext.recordWriterInputDataSize(page.getSizeInBytes()); + newPagesAdded = true; } @Override public Page getOutput() { - if (!blocked.isDone()) { + tryClosingIdleWriters(); + // This method could be called even when new pages have not been added. In that case, we don't have to + // try to get the output from the aggregation operator. It could be expensive since getOutput() is + // called quite frequently. + if (!(blocked.isDone() && (newPagesAdded || state != State.RUNNING))) { return null; } + newPagesAdded = false; if (!statisticAggregationOperator.isFinished()) { OperationTimer timer = new OperationTimer(statisticsCpuTimerEnabled); @@ -365,6 +394,24 @@ private void updateWrittenBytes() writtenBytes = current; } + private void tryClosingIdleWriters() + { + long physicalWrittenDataSize = getTaskContext().getPhysicalWrittenDataSize(); + Optional writerCount = getTaskContext().getMaxWriterCount(); + if (writerCount.isEmpty() || physicalWrittenDataSize - lastPhysicalWrittenDataSize <= idleWriterMinDataSizeThreshold.toBytes() * writerCount.get()) { + return; + } + pageSink.closeIdleWriters(); + updateMemoryUsage(); + updateWrittenBytes(); + lastPhysicalWrittenDataSize = physicalWrittenDataSize; + } + + private TaskContext getTaskContext() + { + return operatorContext.getDriverContext().getPipelineContext().getTaskContext(); + } + private void updateMemoryUsage() { long pageSinkMemoryUsage = pageSink.getMemoryUsage(); diff --git a/core/trino-main/src/main/java/io/trino/operator/TaskContext.java b/core/trino-main/src/main/java/io/trino/operator/TaskContext.java index f0cfa007d5e1a..e72670b2f4c69 100644 --- a/core/trino-main/src/main/java/io/trino/operator/TaskContext.java +++ b/core/trino-main/src/main/java/io/trino/operator/TaskContext.java @@ -69,6 +69,7 @@ public class TaskContext private final GcMonitor gcMonitor; private final Executor notificationExecutor; private final ScheduledExecutorService yieldExecutor; + private final ScheduledExecutorService timeoutExecutor; private final Session session; private final long createNanos = System.nanoTime(); @@ -117,6 +118,7 @@ public static TaskContext createTaskContext( GcMonitor gcMonitor, Executor notificationExecutor, ScheduledExecutorService yieldExecutor, + ScheduledExecutorService timeoutExecutor, Session session, MemoryTrackingContext taskMemoryContext, Runnable notifyStatusChanged, @@ -129,6 +131,7 @@ public static TaskContext createTaskContext( gcMonitor, notificationExecutor, yieldExecutor, + timeoutExecutor, session, taskMemoryContext, notifyStatusChanged, @@ -144,6 +147,7 @@ private TaskContext( GcMonitor gcMonitor, Executor notificationExecutor, ScheduledExecutorService yieldExecutor, + ScheduledExecutorService timeoutExecutor, Session session, MemoryTrackingContext taskMemoryContext, Runnable notifyStatusChanged, @@ -155,6 +159,7 @@ private TaskContext( this.queryContext = requireNonNull(queryContext, "queryContext is null"); this.notificationExecutor = requireNonNull(notificationExecutor, "notificationExecutor is null"); this.yieldExecutor = requireNonNull(yieldExecutor, "yieldExecutor is null"); + this.timeoutExecutor = requireNonNull(timeoutExecutor, "timeoutExecutor is null"); this.session = session; this.taskMemoryContext = requireNonNull(taskMemoryContext, "taskMemoryContext is null"); @@ -186,6 +191,7 @@ public PipelineContext addPipelineContext(int pipelineId, boolean inputPipeline, this, notificationExecutor, yieldExecutor, + timeoutExecutor, taskMemoryContext.newMemoryTrackingContext(), inputPipeline, outputPipeline, @@ -379,7 +385,7 @@ public void setMaxWriterCount(int maxWriterCount) checkArgument(maxWriterCount > 0, "maxWriterCount must be > 0"); int oldMaxWriterCount = this.maxWriterCount.getAndSet(maxWriterCount); - checkArgument(oldMaxWriterCount == -1 || oldMaxWriterCount == maxWriterCount, "maxWriterCount already set to " + oldMaxWriterCount); + checkArgument(oldMaxWriterCount == -1 || oldMaxWriterCount == maxWriterCount, "maxWriterCount already set to %s", oldMaxWriterCount); } public Optional getMaxWriterCount() diff --git a/core/trino-main/src/main/java/io/trino/operator/aggregation/AbstractMapAggregationState.java b/core/trino-main/src/main/java/io/trino/operator/aggregation/AbstractMapAggregationState.java index 4d4bfc76934fe..a18380534ad5f 100644 --- a/core/trino-main/src/main/java/io/trino/operator/aggregation/AbstractMapAggregationState.java +++ b/core/trino-main/src/main/java/io/trino/operator/aggregation/AbstractMapAggregationState.java @@ -186,7 +186,7 @@ public AbstractMapAggregationState(AbstractMapAggregationState state) private static byte[][] createRecordGroups(int capacity, int recordSize) { if (capacity < RECORDS_PER_GROUP) { - return new byte[][]{new byte[multiplyExact(capacity, recordSize)]}; + return new byte[][] {new byte[multiplyExact(capacity, recordSize)]}; } byte[][] groups = new byte[(capacity + 1) >> RECORDS_PER_GROUP_SHIFT][]; diff --git a/core/trino-main/src/main/java/io/trino/operator/aggregation/histogram/TypedHistogram.java b/core/trino-main/src/main/java/io/trino/operator/aggregation/histogram/TypedHistogram.java index e40f503047a0f..f4763b6c0037b 100644 --- a/core/trino-main/src/main/java/io/trino/operator/aggregation/histogram/TypedHistogram.java +++ b/core/trino-main/src/main/java/io/trino/operator/aggregation/histogram/TypedHistogram.java @@ -139,7 +139,7 @@ public TypedHistogram( private static byte[][] createRecordGroups(int capacity, int recordSize) { if (capacity < RECORDS_PER_GROUP) { - return new byte[][]{new byte[multiplyExact(capacity, recordSize)]}; + return new byte[][] {new byte[multiplyExact(capacity, recordSize)]}; } byte[][] groups = new byte[(capacity + 1) >> RECORDS_PER_GROUP_SHIFT][]; diff --git a/core/trino-main/src/main/java/io/trino/operator/aggregation/multimapagg/AbstractMultimapAggregationState.java b/core/trino-main/src/main/java/io/trino/operator/aggregation/multimapagg/AbstractMultimapAggregationState.java index 5a69677e9168b..0d2d57fffd9a1 100644 --- a/core/trino-main/src/main/java/io/trino/operator/aggregation/multimapagg/AbstractMultimapAggregationState.java +++ b/core/trino-main/src/main/java/io/trino/operator/aggregation/multimapagg/AbstractMultimapAggregationState.java @@ -191,7 +191,7 @@ public AbstractMultimapAggregationState(AbstractMultimapAggregationState state) private static byte[][] createRecordGroups(int capacity, int recordSize) { if (capacity < RECORDS_PER_GROUP) { - return new byte[][]{new byte[multiplyExact(capacity, recordSize)]}; + return new byte[][] {new byte[multiplyExact(capacity, recordSize)]}; } byte[][] groups = new byte[(capacity + 1) >> RECORDS_PER_GROUP_SHIFT][]; diff --git a/core/trino-main/src/main/java/io/trino/operator/exchange/LocalExchange.java b/core/trino-main/src/main/java/io/trino/operator/exchange/LocalExchange.java index ea5486a637509..cda37c84c4785 100644 --- a/core/trino-main/src/main/java/io/trino/operator/exchange/LocalExchange.java +++ b/core/trino-main/src/main/java/io/trino/operator/exchange/LocalExchange.java @@ -53,7 +53,6 @@ import static io.trino.SystemSessionProperties.getSkewedPartitionMinDataProcessedRebalanceThreshold; import static io.trino.operator.InterpretedHashGenerator.createChannelsHashGenerator; import static io.trino.operator.exchange.LocalExchangeSink.finishedLocalExchangeSink; -import static io.trino.operator.output.SkewedPartitionRebalancer.getScaleWritersMaxSkewedPartitions; import static io.trino.sql.planner.PartitioningHandle.isScaledWriterHashDistribution; import static io.trino.sql.planner.SystemPartitioningHandle.FIXED_ARBITRARY_DISTRIBUTION; import static io.trino.sql.planner.SystemPartitioningHandle.FIXED_BROADCAST_DISTRIBUTION; @@ -61,7 +60,6 @@ import static io.trino.sql.planner.SystemPartitioningHandle.FIXED_PASSTHROUGH_DISTRIBUTION; import static io.trino.sql.planner.SystemPartitioningHandle.SCALED_WRITER_ROUND_ROBIN_DISTRIBUTION; import static io.trino.sql.planner.SystemPartitioningHandle.SINGLE_DISTRIBUTION; -import static java.lang.Math.max; import static java.util.Objects.requireNonNull; import static java.util.function.Function.identity; @@ -147,12 +145,7 @@ else if (isScaledWriterHashDistribution(partitioning)) { bufferCount, 1, writerScalingMinDataProcessed.toBytes(), - getSkewedPartitionMinDataProcessedRebalanceThreshold(session).toBytes(), - // Keep the maxPartitionsToRebalance to atleast writer count such that single partition writes do - // not suffer from skewness and can scale uniformly across all writers. Additionally, note that - // maxWriterCount is calculated considering memory into account. So, it is safe to set the - // maxPartitionsToRebalance to maximum number of writers. - max(getScaleWritersMaxSkewedPartitions(session), bufferCount)); + getSkewedPartitionMinDataProcessedRebalanceThreshold(session).toBytes()); LocalExchangeMemoryManager memoryManager = new LocalExchangeMemoryManager(maxBufferedBytes.toBytes()); sources = IntStream.range(0, bufferCount) .mapToObj(i -> new LocalExchangeSource(memoryManager, source -> checkAllSourcesFinished())) diff --git a/core/trino-main/src/main/java/io/trino/operator/exchange/ScaleWriterPartitioningExchanger.java b/core/trino-main/src/main/java/io/trino/operator/exchange/ScaleWriterPartitioningExchanger.java index c4f7e56fcc469..8bc67b4d0b5c9 100644 --- a/core/trino-main/src/main/java/io/trino/operator/exchange/ScaleWriterPartitioningExchanger.java +++ b/core/trino-main/src/main/java/io/trino/operator/exchange/ScaleWriterPartitioningExchanger.java @@ -31,6 +31,7 @@ public class ScaleWriterPartitioningExchanger implements LocalExchanger { + private static final double SCALE_WRITER_MEMORY_PERCENTAGE = 0.7; private final List> buffers; private final LocalExchangeMemoryManager memoryManager; private final long maxBufferedBytes; @@ -83,11 +84,18 @@ public ScaleWriterPartitioningExchanger( @Override public void accept(Page page) { + // Reset the value of partition row count, writer ids and data processed for this page + long dataProcessed = 0; + for (int partitionId = 0; partitionId < partitionRowCounts.length; partitionId++) { + partitionRowCounts[partitionId] = 0; + partitionWriterIds[partitionId] = -1; + } + // Scale up writers when current buffer memory utilization is more than 50% of the maximum. - // Do not scale up if total memory used is greater than 50% of max memory per node. + // Do not scale up if total memory used is greater than 70% of max memory per node. // We have to be conservative here otherwise scaling of writers will happen first // before we hit this limit, and then we won't be able to do anything to stop OOM error. - if (memoryManager.getBufferedBytes() > maxBufferedBytes * 0.5 && totalMemoryUsed.get() < maxMemoryPerNode * 0.5) { + if (memoryManager.getBufferedBytes() > maxBufferedBytes * 0.5 && totalMemoryUsed.get() < maxMemoryPerNode * SCALE_WRITER_MEMORY_PERCENTAGE) { partitionRebalancer.rebalance(); } @@ -111,13 +119,6 @@ public void accept(Page page) writerAssignments[writerId].add(position); } - for (int partitionId = 0; partitionId < partitionRowCounts.length; partitionId++) { - partitionRebalancer.addPartitionRowCount(partitionId, partitionRowCounts[partitionId]); - // Reset the value of partition row count - partitionRowCounts[partitionId] = 0; - partitionWriterIds[partitionId] = -1; - } - // build a page for each writer for (int bucket = 0; bucket < writerAssignments.length; bucket++) { IntArrayList positionsList = writerAssignments[bucket]; @@ -135,12 +136,22 @@ public void accept(Page page) // whole input page will go to this partition, compact the input page avoid over-retaining memory and to // match the behavior of sub-partitioned pages that copy positions out page.compact(); - sendPageToPartition(buffers.get(bucket), page); - return; + dataProcessed += sendPageToPartition(buffers.get(bucket), page); + break; } Page pageSplit = page.copyPositions(positions, 0, bucketSize); - sendPageToPartition(buffers.get(bucket), pageSplit); + dataProcessed += sendPageToPartition(buffers.get(bucket), pageSplit); + } + + // Only update the scaling state if the memory used is below the SCALE_WRITER_MEMORY_PERCENTAGE limit. Otherwise, if we keep updating + // the scaling state and the memory used is fluctuating around the limit, then we could do massive scaling + // in a single rebalancing cycle which could cause OOM error. + if (totalMemoryUsed.get() < maxMemoryPerNode * SCALE_WRITER_MEMORY_PERCENTAGE) { + for (int partitionId = 0; partitionId < partitionRowCounts.length; partitionId++) { + partitionRebalancer.addPartitionRowCount(partitionId, partitionRowCounts[partitionId]); + } + partitionRebalancer.addDataProcessed(dataProcessed); } } @@ -155,11 +166,11 @@ private int getNextWriterId(int partitionId) return partitionRebalancer.getTaskId(partitionId, partitionWriterIndexes[partitionId]++); } - private void sendPageToPartition(Consumer buffer, Page pageSplit) + private long sendPageToPartition(Consumer buffer, Page pageSplit) { long retainedSizeInBytes = pageSplit.getRetainedSizeInBytes(); - partitionRebalancer.addDataProcessed(retainedSizeInBytes); memoryManager.updateMemoryUsage(retainedSizeInBytes); buffer.accept(pageSplit); + return retainedSizeInBytes; } } diff --git a/core/trino-main/src/main/java/io/trino/index/IndexManager.java b/core/trino-main/src/main/java/io/trino/operator/index/IndexManager.java similarity index 98% rename from core/trino-main/src/main/java/io/trino/index/IndexManager.java rename to core/trino-main/src/main/java/io/trino/operator/index/IndexManager.java index c204fbe804498..cd8b74ff7714b 100644 --- a/core/trino-main/src/main/java/io/trino/index/IndexManager.java +++ b/core/trino-main/src/main/java/io/trino/operator/index/IndexManager.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.index; +package io.trino.operator.index; import com.google.inject.Inject; import io.trino.Session; diff --git a/core/trino-main/src/main/java/io/trino/operator/output/PagePartitioner.java b/core/trino-main/src/main/java/io/trino/operator/output/PagePartitioner.java index 4a58724a729a4..a9446d3aba047 100644 --- a/core/trino-main/src/main/java/io/trino/operator/output/PagePartitioner.java +++ b/core/trino-main/src/main/java/io/trino/operator/output/PagePartitioner.java @@ -69,6 +69,9 @@ public class PagePartitioner private final int nullChannel; // when >= 0, send the position to every partition if this channel is null private boolean hasAnyRowBeenReplicated; + // outputSizeInBytes that has already been reported to the operator stats during release and should be subtracted + // from future stats reporting to avoid double counting + private long outputSizeReportedBeforeRelease; public PagePartitioner( PartitionFunction partitionFunction, @@ -135,7 +138,6 @@ public void partitionPage(Page page, OperatorContext operatorContext) } int outputPositionCount = replicatesAnyRow && !hasAnyRowBeenReplicated ? page.getPositionCount() + positionsAppenders.length - 1 : page.getPositionCount(); - long positionsAppendersSizeBefore = getPositionsAppendersSizeInBytes(); if (page.getPositionCount() < partitionFunction.partitionCount() * COLUMNAR_STRATEGY_COEFFICIENT) { // Partition will have on average less than COLUMNAR_STRATEGY_COEFFICIENT rows. // Doing it column-wise would degrade performance, so we fall back to row-wise approach. @@ -146,11 +148,73 @@ public void partitionPage(Page page, OperatorContext operatorContext) else { partitionPageByColumn(page); } - long positionsAppendersSizeAfter = getPositionsAppendersSizeInBytes(); - flushPositionsAppenders(false); + long outputSizeInBytes = flushPositionsAppenders(false); updateMemoryUsage(); + operatorContext.recordOutput(outputSizeInBytes, outputPositionCount); + } + + private long adjustFlushedOutputSizeWithEagerlyReportedBytes(long flushedOutputSize) + { + // Reduce the flushed output size by the previously eagerly reported amount to avoid double counting + if (outputSizeReportedBeforeRelease > 0) { + long adjustmentAmount = min(flushedOutputSize, outputSizeReportedBeforeRelease); + outputSizeReportedBeforeRelease -= adjustmentAmount; + flushedOutputSize -= adjustmentAmount; + } + return flushedOutputSize; + } - operatorContext.recordOutput(positionsAppendersSizeAfter - positionsAppendersSizeBefore, outputPositionCount); + private long adjustEagerlyReportedBytesWithBufferedBytesOnRelease(long bufferedBytesOnRelease) + { + // adjust the amount to eagerly report as output by the amount already eagerly reported if the new value + // is larger, since this indicates that no data was flushed and only the delta between the two values should + // be reported eagerly + if (bufferedBytesOnRelease > outputSizeReportedBeforeRelease) { + long additionalBufferedBytes = bufferedBytesOnRelease - outputSizeReportedBeforeRelease; + outputSizeReportedBeforeRelease = bufferedBytesOnRelease; + return additionalBufferedBytes; + } + else { + // buffered size is unchanged or reduced (as a result of flushing) since last release, so + // do not report any additional bytes as output eagerly + return 0; + } + } + + /** + * Prepares this {@link PagePartitioner} for release to the pool by checking for dictionary mode appenders and either flattening + * them into direct appenders or forcing their current pages to flush to preserve a valuable dictionary encoded representation. This + * is done before release because we know that after reuse, the appenders will not observe any more inputs using the same dictionary. + *

+ * When a {@link PagePartitioner} is released back to the {@link PagePartitionerPool} we don't know if it will ever be reused. If it is not + * reused, then we have no {@link OperatorContext} we can use to report the output size of the final flushed page, so instead we report the + * buffered bytes still in the partitioner after {@link PagePartitioner#prepareForRelease(OperatorContext)} as output bytes eagerly and record + * that amount in {@link #outputSizeReportedBeforeRelease}. If the {@link PagePartitioner} is reused after having reported buffered bytes eagerly, + * we then have to subtract that same amount from the subsequent output bytes to avoid double counting them. + */ + public void prepareForRelease(OperatorContext operatorContext) + { + long bufferedSizeInBytes = 0; + long outputSizeInBytes = 0; + for (int partition = 0; partition < positionsAppenders.length; partition++) { + PositionsAppenderPageBuilder positionsAppender = positionsAppenders[partition]; + Optional flushedPage = positionsAppender.flushOrFlattenBeforeRelease(); + if (flushedPage.isPresent()) { + Page page = flushedPage.get(); + outputSizeInBytes += page.getSizeInBytes(); + enqueuePage(page, partition); + } + else { + // Dictionaries have now been flattened, so the new reported size is trustworthy to report + // eagerly + bufferedSizeInBytes += positionsAppender.getSizeInBytes(); + } + } + updateMemoryUsage(); + // Adjust flushed and buffered values against the previously eagerly reported sizes + outputSizeInBytes = adjustFlushedOutputSizeWithEagerlyReportedBytes(outputSizeInBytes); + bufferedSizeInBytes = adjustEagerlyReportedBytesWithBufferedBytesOnRelease(bufferedSizeInBytes); + operatorContext.recordOutput(outputSizeInBytes + bufferedSizeInBytes, 0 /* no new positions */); } public void partitionPageByRow(Page page) @@ -210,15 +274,6 @@ public void partitionPageByColumn(Page page) } } - private long getPositionsAppendersSizeInBytes() - { - long sizeInBytes = 0; - for (PositionsAppenderPageBuilder pageBuilder : positionsAppenders) { - sizeInBytes += pageBuilder.getSizeInBytes(); - } - return sizeInBytes; - } - private IntArrayList[] partitionPositions(Page page) { verify(page.getPositionCount() > 0, "position count is 0"); @@ -424,6 +479,7 @@ public void close() { try { flushPositionsAppenders(true); + outputSizeReportedBeforeRelease = 0; } finally { // clear buffers before memory release @@ -432,16 +488,19 @@ public void close() } } - private void flushPositionsAppenders(boolean force) + private long flushPositionsAppenders(boolean force) { + long outputSizeInBytes = 0; // add all full pages to output buffer for (int partition = 0; partition < positionsAppenders.length; partition++) { PositionsAppenderPageBuilder partitionPageBuilder = positionsAppenders[partition]; if (!partitionPageBuilder.isEmpty() && (force || partitionPageBuilder.isFull())) { Page pagePartition = partitionPageBuilder.build(); + outputSizeInBytes += pagePartition.getSizeInBytes(); enqueuePage(pagePartition, partition); } } + return adjustFlushedOutputSizeWithEagerlyReportedBytes(outputSizeInBytes); } private void enqueuePage(Page pagePartition, int partition) diff --git a/core/trino-main/src/main/java/io/trino/operator/output/PartitionedOutputOperator.java b/core/trino-main/src/main/java/io/trino/operator/output/PartitionedOutputOperator.java index 0bc28fee83302..fd683e126352e 100644 --- a/core/trino-main/src/main/java/io/trino/operator/output/PartitionedOutputOperator.java +++ b/core/trino-main/src/main/java/io/trino/operator/output/PartitionedOutputOperator.java @@ -284,6 +284,7 @@ public OperatorContext getOperatorContext() public void finish() { if (!finished) { + pagePartitioner.prepareForRelease(operatorContext); pagePartitionerPool.release(pagePartitioner); finished = true; } diff --git a/core/trino-main/src/main/java/io/trino/operator/output/PositionsAppenderPageBuilder.java b/core/trino-main/src/main/java/io/trino/operator/output/PositionsAppenderPageBuilder.java index 4ba6fd3361dfb..91948beec7611 100644 --- a/core/trino-main/src/main/java/io/trino/operator/output/PositionsAppenderPageBuilder.java +++ b/core/trino-main/src/main/java/io/trino/operator/output/PositionsAppenderPageBuilder.java @@ -21,6 +21,7 @@ import it.unimi.dsi.fastutil.ints.IntArrayList; import java.util.List; +import java.util.Optional; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; @@ -144,6 +145,32 @@ public boolean isEmpty() return declaredPositions == 0; } + public Optional flushOrFlattenBeforeRelease() + { + if (declaredPositions == 0) { + return Optional.empty(); + } + + for (UnnestingPositionsAppender positionsAppender : channelAppenders) { + if (positionsAppender.shouldForceFlushBeforeRelease()) { + // dictionary encoding will be preserved, so force the current page to be flushed + return Optional.of(build()); + } + } + + // transition from dictionary to direct mode if necessary, since we won't be able to reuse the + // same dictionary from the new operator + for (UnnestingPositionsAppender positionsAppender : channelAppenders) { + positionsAppender.flattenPendingDictionary(); + } + + // flush the current page if forced or if the builder is now full as a result of transitioning dictionaries to direct mode + if (isFull()) { + return Optional.of(build()); + } + return Optional.empty(); + } + public Page build() { Block[] blocks = new Block[channelAppenders.length]; diff --git a/core/trino-main/src/main/java/io/trino/operator/output/SkewedPartitionRebalancer.java b/core/trino-main/src/main/java/io/trino/operator/output/SkewedPartitionRebalancer.java index 458b4ccd17d37..97a839b74b27c 100644 --- a/core/trino-main/src/main/java/io/trino/operator/output/SkewedPartitionRebalancer.java +++ b/core/trino-main/src/main/java/io/trino/operator/output/SkewedPartitionRebalancer.java @@ -32,7 +32,6 @@ import java.util.List; import java.util.Objects; import java.util.concurrent.CopyOnWriteArrayList; -import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLongArray; import java.util.stream.IntStream; @@ -86,12 +85,10 @@ public class SkewedPartitionRebalancer private final int taskBucketCount; private final long minPartitionDataProcessedRebalanceThreshold; private final long minDataProcessedRebalanceThreshold; - private final int maxPartitionsToRebalance; private final AtomicLongArray partitionRowCount; private final AtomicLong dataProcessed; private final AtomicLong dataProcessedAtLastRebalance; - private final AtomicInteger numOfRebalancedPartitions; @GuardedBy("this") private final long[] partitionDataSize; @@ -158,12 +155,6 @@ public static int getMaxWritersBasedOnMemory(Session session) return (int) ceil((double) getQueryMaxMemoryPerNode(session).toBytes() / getMaxMemoryPerPartitionWriter(session).toBytes()); } - public static int getScaleWritersMaxSkewedPartitions(Session session) - { - // Set the value of maxSkewedPartitions to scale to 60% of maximum number of writers possible per node. - return (int) (getMaxWritersBasedOnMemory(session) * 0.60); - } - public static int getTaskCount(PartitioningScheme partitioningScheme) { // Todo: Handle skewness if there are more nodes/tasks than the buckets coming from connector @@ -179,20 +170,17 @@ public SkewedPartitionRebalancer( int taskCount, int taskBucketCount, long minPartitionDataProcessedRebalanceThreshold, - long maxDataProcessedRebalanceThreshold, - int maxPartitionsToRebalance) + long maxDataProcessedRebalanceThreshold) { this.partitionCount = partitionCount; this.taskCount = taskCount; this.taskBucketCount = taskBucketCount; this.minPartitionDataProcessedRebalanceThreshold = minPartitionDataProcessedRebalanceThreshold; this.minDataProcessedRebalanceThreshold = max(minPartitionDataProcessedRebalanceThreshold, maxDataProcessedRebalanceThreshold); - this.maxPartitionsToRebalance = maxPartitionsToRebalance; this.partitionRowCount = new AtomicLongArray(partitionCount); this.dataProcessed = new AtomicLong(); this.dataProcessedAtLastRebalance = new AtomicLong(); - this.numOfRebalancedPartitions = new AtomicInteger(); this.partitionDataSize = new long[partitionCount]; this.partitionDataSizeAtLastRebalance = new long[partitionCount]; @@ -254,9 +242,7 @@ public void rebalance() private boolean shouldRebalance(long dataProcessed) { // Rebalance only when total bytes processed since last rebalance is greater than rebalance threshold. - // Check if the number of rebalanced partitions is less than maxPartitionsToRebalance. - return (dataProcessed - dataProcessedAtLastRebalance.get()) >= minDataProcessedRebalanceThreshold - && numOfRebalancedPartitions.get() < maxPartitionsToRebalance; + return (dataProcessed - dataProcessedAtLastRebalance.get()) >= minDataProcessedRebalanceThreshold; } private synchronized void rebalancePartitions(long dataProcessed) @@ -317,7 +303,15 @@ private void calculatePartitionDataSize(long dataProcessed) } for (int partition = 0; partition < partitionCount; partition++) { - partitionDataSize[partition] = (partitionRowCount.get(partition) * dataProcessed) / totalPartitionRowCount; + // Since we estimate the partitionDataSize based on partitionRowCount and total data processed. It is possible + // that the estimated partitionDataSize is slightly less than it was estimated at the last rebalance cycle. + // That's because for a given partition, row count hasn't increased, however overall data processed + // has increased. Therefore, we need to make sure that the estimated partitionDataSize should be + // at least partitionDataSizeAtLastRebalance. Otherwise, it will affect the ordering of minTaskBuckets + // priority queue. + partitionDataSize[partition] = max( + (partitionRowCount.get(partition) * dataProcessed) / totalPartitionRowCount, + partitionDataSize[partition]); } } @@ -412,12 +406,6 @@ private boolean rebalancePartition( return false; } - // If the number of rebalanced partitions is less than maxPartitionsToRebalance then assign - // the partition to the task. - if (numOfRebalancedPartitions.get() >= maxPartitionsToRebalance) { - return false; - } - assignments.add(toTaskBucket); int newTaskCount = assignments.size(); @@ -438,8 +426,6 @@ private boolean rebalancePartition( minTasks.addOrUpdate(taskBucket, Long.MAX_VALUE - estimatedTaskBucketDataSizeSinceLastRebalance[taskBucket.id]); } - // Increment the number of rebalanced partitions. - numOfRebalancedPartitions.incrementAndGet(); log.debug("Rebalanced partition %s to task %s with taskCount %s", partitionId, toTaskBucket.taskId, assignments.size()); return true; } diff --git a/core/trino-main/src/main/java/io/trino/operator/output/UnnestingPositionsAppender.java b/core/trino-main/src/main/java/io/trino/operator/output/UnnestingPositionsAppender.java index 23d2c11478615..258aeb54bd5e8 100644 --- a/core/trino-main/src/main/java/io/trino/operator/output/UnnestingPositionsAppender.java +++ b/core/trino-main/src/main/java/io/trino/operator/output/UnnestingPositionsAppender.java @@ -20,6 +20,7 @@ import io.trino.type.BlockTypeOperators.BlockPositionIsDistinctFrom; import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.ints.IntArrays; +import it.unimi.dsi.fastutil.ints.IntOpenHashSet; import jakarta.annotation.Nullable; import java.util.Optional; @@ -52,6 +53,7 @@ private enum State private State state = State.UNINITIALIZED; + @Nullable private ValueBlock dictionary; private DictionaryIdsBuilder dictionaryIdsBuilder; @@ -219,6 +221,28 @@ void addSizesToAccumulator(PositionsAppenderSizeAccumulator accumulator) accumulator.accumulate(sizeInBytes, directSizeInBytes); } + public void flattenPendingDictionary() + { + if (state == State.DICTIONARY && dictionary != null) { + transitionToDirect(); + } + } + + public boolean shouldForceFlushBeforeRelease() + { + if (state == State.DICTIONARY && dictionary != null) { + IntOpenHashSet uniqueIdsSet = new IntOpenHashSet(); + int[] dictionaryIds = dictionaryIdsBuilder.getDictionaryIds(); + for (int i = 0; i < dictionaryIdsBuilder.size(); i++) { + // At least one position is referenced multiple times, preserve the dictionary encoding and force the current page to flush + if (!uniqueIdsSet.add(dictionaryIds[i])) { + return true; + } + } + } + return false; + } + private static class DictionaryIdsBuilder { private static final int INSTANCE_SIZE = instanceSize(DictionaryIdsBuilder.class); diff --git a/core/trino-main/src/main/java/io/trino/operator/scalar/DateTimeFunctions.java b/core/trino-main/src/main/java/io/trino/operator/scalar/DateTimeFunctions.java index 34708653f848b..37df8394fb58c 100644 --- a/core/trino-main/src/main/java/io/trino/operator/scalar/DateTimeFunctions.java +++ b/core/trino-main/src/main/java/io/trino/operator/scalar/DateTimeFunctions.java @@ -127,7 +127,12 @@ public static Slice currentTimeZone(ConnectorSession session) public static long fromUnixTime(ConnectorSession session, @SqlType(StandardTypes.DOUBLE) double unixTime) { // TODO (https://github.com/trinodb/trino/issues/5781) - return packDateTimeWithZone(Math.round(unixTime * 1000), session.getTimeZoneKey()); + try { + return packDateTimeWithZone(Math.round(unixTime * 1000), session.getTimeZoneKey()); + } + catch (IllegalArgumentException e) { + throw new TrinoException(INVALID_FUNCTION_ARGUMENT, e); + } } @ScalarFunction("from_unixtime") @@ -137,11 +142,11 @@ public static long fromUnixTime(@SqlType(StandardTypes.DOUBLE) double unixTime, TimeZoneKey timeZoneKey; try { timeZoneKey = getTimeZoneKeyForOffset(toIntExact(hoursOffset * 60 + minutesOffset)); + return packDateTimeWithZone(Math.round(unixTime * 1000), timeZoneKey); } catch (IllegalArgumentException e) { throw new TrinoException(INVALID_FUNCTION_ARGUMENT, e); } - return packDateTimeWithZone(Math.round(unixTime * 1000), timeZoneKey); } @ScalarFunction("from_unixtime") @@ -149,7 +154,12 @@ public static long fromUnixTime(@SqlType(StandardTypes.DOUBLE) double unixTime, @SqlType("timestamp(3) with time zone") public static long fromUnixTime(@SqlType(StandardTypes.DOUBLE) double unixTime, @SqlType("varchar(x)") Slice zoneId) { - return packDateTimeWithZone(Math.round(unixTime * 1000), zoneId.toStringUtf8()); + try { + return packDateTimeWithZone(Math.round(unixTime * 1000), zoneId.toStringUtf8()); + } + catch (IllegalArgumentException e) { + throw new TrinoException(INVALID_FUNCTION_ARGUMENT, e); + } } @ScalarFunction("from_unixtime_nanos") @@ -172,7 +182,12 @@ public static LongTimestampWithTimeZone fromLong(@LiteralParameter("s") long sca epochSeconds -= 1; picosOfSecond += PICOSECONDS_PER_SECOND; } - return DateTimes.longTimestampWithTimeZone(epochSeconds, picosOfSecond, session.getTimeZoneKey().getZoneId()); + try { + return DateTimes.longTimestampWithTimeZone(epochSeconds, picosOfSecond, session.getTimeZoneKey().getZoneId()); + } + catch (ArithmeticException e) { + throw new TrinoException(INVALID_FUNCTION_ARGUMENT, e); + } } @LiteralParameters({"p", "s"}) @@ -216,7 +231,12 @@ public static long fromISO8601Timestamp(ConnectorSession session, @SqlType("varc DateTimeFormatter formatter = ISODateTimeFormat.dateTimeParser() .withChronology(getChronology(session.getTimeZoneKey())) .withOffsetParsed(); - return packDateTimeWithZone(parseDateTimeHelper(formatter, iso8601DateTime.toStringUtf8())); + try { + return packDateTimeWithZone(parseDateTimeHelper(formatter, iso8601DateTime.toStringUtf8())); + } + catch (IllegalArgumentException e) { + throw new TrinoException(INVALID_FUNCTION_ARGUMENT, e); + } } @ScalarFunction("from_iso8601_timestamp_nanos") diff --git a/core/trino-main/src/main/java/io/trino/operator/scalar/WordStemFunction.java b/core/trino-main/src/main/java/io/trino/operator/scalar/WordStemFunction.java index e258d38f60efe..7ca6ebf76dd77 100644 --- a/core/trino-main/src/main/java/io/trino/operator/scalar/WordStemFunction.java +++ b/core/trino-main/src/main/java/io/trino/operator/scalar/WordStemFunction.java @@ -20,7 +20,7 @@ import io.trino.spi.function.LiteralParameters; import io.trino.spi.function.ScalarFunction; import io.trino.spi.function.SqlType; -import org.tartarus.snowball.SnowballProgram; +import org.tartarus.snowball.SnowballStemmer; import org.tartarus.snowball.ext.ArmenianStemmer; import org.tartarus.snowball.ext.BasqueStemmer; import org.tartarus.snowball.ext.CatalanStemmer; @@ -52,7 +52,7 @@ public final class WordStemFunction { private WordStemFunction() {} - private static final Map> STEMMERS = ImmutableMap.>builder() + private static final Map> STEMMERS = ImmutableMap.>builder() .put(utf8Slice("ca"), CatalanStemmer::new) .put(utf8Slice("da"), DanishStemmer::new) .put(utf8Slice("de"), German2Stemmer::new) @@ -90,14 +90,14 @@ public static Slice wordStem(@SqlType("varchar(x)") Slice slice) @SqlType("varchar(x)") public static Slice wordStem(@SqlType("varchar(x)") Slice slice, @SqlType("varchar(2)") Slice language) { - Supplier stemmer = STEMMERS.get(language); + Supplier stemmer = STEMMERS.get(language); if (stemmer == null) { throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "Unknown stemmer language: " + language.toStringUtf8()); } return wordStem(slice, stemmer.get()); } - private static Slice wordStem(Slice slice, SnowballProgram stemmer) + private static Slice wordStem(Slice slice, SnowballStemmer stemmer) { stemmer.setCurrent(slice.toStringUtf8()); return stemmer.stem() ? utf8Slice(stemmer.getCurrent()) : slice; diff --git a/core/trino-main/src/main/java/io/trino/operator/scalar/timetz/VarcharToTimeWithTimeZoneCast.java b/core/trino-main/src/main/java/io/trino/operator/scalar/timetz/VarcharToTimeWithTimeZoneCast.java index b778ef6135ac5..17f6a39568030 100644 --- a/core/trino-main/src/main/java/io/trino/operator/scalar/timetz/VarcharToTimeWithTimeZoneCast.java +++ b/core/trino-main/src/main/java/io/trino/operator/scalar/timetz/VarcharToTimeWithTimeZoneCast.java @@ -51,11 +51,11 @@ private VarcharToTimeWithTimeZoneCast() {} @SqlType("time(p) with time zone") public static long castToShort(@LiteralParameter("p") long precision, ConnectorSession session, @SqlType("varchar(x)") Slice value) { - checkArgument((int) precision <= MAX_SHORT_PRECISION, "precision must be less than max short timestamp precision"); + checkArgument((int) precision <= MAX_SHORT_PRECISION, "precision must be less than max short time with time zone precision"); Matcher matcher = DateTimes.TIME_PATTERN.matcher(trim(value).toStringUtf8()); if (!matcher.matches()) { - throw new TrinoException(INVALID_CAST_ARGUMENT, "Value cannot be cast to timestamp: " + value.toStringUtf8()); + throw new TrinoException(INVALID_CAST_ARGUMENT, "Value cannot be cast to time with time zone: " + value.toStringUtf8()); } try { @@ -67,7 +67,7 @@ public static long castToShort(@LiteralParameter("p") long precision, ConnectorS return packTimeWithTimeZone(nanos, offsetMinutes); } catch (IllegalArgumentException e) { - throw new TrinoException(INVALID_CAST_ARGUMENT, "Value cannot be cast to timestamp: " + value.toStringUtf8(), e); + throw new TrinoException(INVALID_CAST_ARGUMENT, "Value cannot be cast to time with time zone: " + value.toStringUtf8(), e); } } @@ -79,7 +79,7 @@ public static LongTimeWithTimeZone castToLong(@LiteralParameter("p") long precis Matcher matcher = DateTimes.TIME_PATTERN.matcher(trim(value).toStringUtf8()); if (!matcher.matches()) { - throw new TrinoException(INVALID_CAST_ARGUMENT, "Value cannot be cast to timestamp: " + value.toStringUtf8()); + throw new TrinoException(INVALID_CAST_ARGUMENT, "Value cannot be cast to time with time zone: " + value.toStringUtf8()); } try { @@ -91,7 +91,7 @@ public static LongTimeWithTimeZone castToLong(@LiteralParameter("p") long precis return new LongTimeWithTimeZone(picos, offsetMinutes); } catch (IllegalArgumentException e) { - throw new TrinoException(INVALID_CAST_ARGUMENT, "Value cannot be cast to timestamp: " + value.toStringUtf8(), e); + throw new TrinoException(INVALID_CAST_ARGUMENT, "Value cannot be cast to time with time zone: " + value.toStringUtf8(), e); } } diff --git a/core/trino-main/src/main/java/io/trino/operator/table/ExcludeColumns.java b/core/trino-main/src/main/java/io/trino/operator/table/ExcludeColumns.java deleted file mode 100644 index d650c97b2b987..0000000000000 --- a/core/trino-main/src/main/java/io/trino/operator/table/ExcludeColumns.java +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.operator.table; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Sets; -import com.google.inject.Provider; -import io.trino.plugin.base.classloader.ClassLoaderSafeConnectorTableFunction; -import io.trino.spi.TrinoException; -import io.trino.spi.connector.ConnectorAccessControl; -import io.trino.spi.connector.ConnectorSession; -import io.trino.spi.connector.ConnectorTransactionHandle; -import io.trino.spi.function.table.AbstractConnectorTableFunction; -import io.trino.spi.function.table.Argument; -import io.trino.spi.function.table.ConnectorTableFunction; -import io.trino.spi.function.table.ConnectorTableFunctionHandle; -import io.trino.spi.function.table.Descriptor; -import io.trino.spi.function.table.DescriptorArgument; -import io.trino.spi.function.table.DescriptorArgumentSpecification; -import io.trino.spi.function.table.TableArgument; -import io.trino.spi.function.table.TableArgumentSpecification; -import io.trino.spi.function.table.TableFunctionAnalysis; -import io.trino.spi.function.table.TableFunctionDataProcessor; -import io.trino.spi.function.table.TableFunctionProcessorProvider; -import io.trino.spi.type.RowType; - -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Set; - -import static com.google.common.collect.ImmutableSet.toImmutableSet; -import static com.google.common.collect.Iterables.getOnlyElement; -import static io.trino.metadata.GlobalFunctionCatalog.BUILTIN_SCHEMA; -import static io.trino.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT; -import static io.trino.spi.function.table.DescriptorArgument.NULL_DESCRIPTOR; -import static io.trino.spi.function.table.ReturnTypeSpecification.GenericTable.GENERIC_TABLE; -import static io.trino.spi.function.table.TableFunctionProcessorState.Finished.FINISHED; -import static io.trino.spi.function.table.TableFunctionProcessorState.Processed.usedInputAndProduced; -import static java.lang.String.format; -import static java.util.Locale.ENGLISH; -import static java.util.stream.Collectors.joining; - -public class ExcludeColumns - implements Provider -{ - public static final String NAME = "exclude_columns"; - - @Override - public ConnectorTableFunction get() - { - return new ClassLoaderSafeConnectorTableFunction(new ExcludeColumnsFunction(), getClass().getClassLoader()); - } - - public static class ExcludeColumnsFunction - extends AbstractConnectorTableFunction - { - private static final String TABLE_ARGUMENT_NAME = "INPUT"; - private static final String DESCRIPTOR_ARGUMENT_NAME = "COLUMNS"; - - public ExcludeColumnsFunction() - { - super( - BUILTIN_SCHEMA, - NAME, - ImmutableList.of( - TableArgumentSpecification.builder() - .name(TABLE_ARGUMENT_NAME) - .rowSemantics() - .build(), - DescriptorArgumentSpecification.builder() - .name(DESCRIPTOR_ARGUMENT_NAME) - .build()), - GENERIC_TABLE); - } - - @Override - public TableFunctionAnalysis analyze( - ConnectorSession session, - ConnectorTransactionHandle transaction, - Map arguments, - ConnectorAccessControl accessControl) - { - DescriptorArgument excludedColumns = (DescriptorArgument) arguments.get(DESCRIPTOR_ARGUMENT_NAME); - if (excludedColumns.equals(NULL_DESCRIPTOR)) { - throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "COLUMNS descriptor is null"); - } - Descriptor excludedColumnsDescriptor = excludedColumns.getDescriptor().orElseThrow(); - if (excludedColumnsDescriptor.getFields().stream().anyMatch(field -> field.getType().isPresent())) { - throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "COLUMNS descriptor contains types"); - } - - // column names in DescriptorArgument are canonical wrt SQL identifier semantics. - // column names in TableArgument are not canonical wrt SQL identifier semantics, as they are taken from the corresponding RelationType. - // because of that, we match the excluded columns names case-insensitive - // TODO apply proper identifier semantics - Set excludedNames = excludedColumnsDescriptor.getFields().stream() - .map(Descriptor.Field::getName) - .map(name -> name.orElseThrow().toLowerCase(ENGLISH)) - .collect(toImmutableSet()); - - List inputSchema = ((TableArgument) arguments.get(TABLE_ARGUMENT_NAME)).getRowType().getFields(); - Set inputNames = inputSchema.stream() - .map(RowType.Field::getName) - .filter(Optional::isPresent) - .map(Optional::get) - .map(name -> name.toLowerCase(ENGLISH)) - .collect(toImmutableSet()); - - if (!inputNames.containsAll(excludedNames)) { - String missingColumns = Sets.difference(excludedNames, inputNames).stream() - .collect(joining(", ", "[", "]")); - throw new TrinoException(INVALID_FUNCTION_ARGUMENT, format("Excluded columns: %s not present in the table", missingColumns)); - } - - ImmutableList.Builder requiredColumns = ImmutableList.builder(); - ImmutableList.Builder returnedColumns = ImmutableList.builder(); - - for (int i = 0; i < inputSchema.size(); i++) { - Optional name = inputSchema.get(i).getName(); - if (name.isEmpty() || !excludedNames.contains(name.orElseThrow().toLowerCase(ENGLISH))) { - requiredColumns.add(i); - // per SQL standard, all columns produced by a table function must be named. We allow anonymous columns. - returnedColumns.add(new Descriptor.Field(name, Optional.of(inputSchema.get(i).getType()))); - } - } - - List returnedType = returnedColumns.build(); - if (returnedType.isEmpty()) { - throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "All columns are excluded"); - } - - return TableFunctionAnalysis.builder() - .requiredColumns(TABLE_ARGUMENT_NAME, requiredColumns.build()) - .returnedType(new Descriptor(returnedType)) - .handle(new ExcludeColumnsFunctionHandle()) - .build(); - } - } - - public static TableFunctionProcessorProvider getExcludeColumnsFunctionProcessorProvider() - { - return new TableFunctionProcessorProvider() - { - @Override - public TableFunctionDataProcessor getDataProcessor(ConnectorSession session, ConnectorTableFunctionHandle handle) - { - return input -> { - if (input == null) { - return FINISHED; - } - return usedInputAndProduced(getOnlyElement(input).orElseThrow()); - }; - } - }; - } - - public record ExcludeColumnsFunctionHandle() - implements ConnectorTableFunctionHandle - { - // there's no information to remember. All logic is effectively delegated to the engine via `requiredColumns`. - } -} diff --git a/core/trino-main/src/main/java/io/trino/operator/table/ExcludeColumnsFunction.java b/core/trino-main/src/main/java/io/trino/operator/table/ExcludeColumnsFunction.java new file mode 100644 index 0000000000000..0417b91ffa64f --- /dev/null +++ b/core/trino-main/src/main/java/io/trino/operator/table/ExcludeColumnsFunction.java @@ -0,0 +1,161 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.operator.table; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Sets; +import io.trino.spi.TrinoException; +import io.trino.spi.connector.ConnectorAccessControl; +import io.trino.spi.connector.ConnectorSession; +import io.trino.spi.connector.ConnectorTransactionHandle; +import io.trino.spi.function.table.AbstractConnectorTableFunction; +import io.trino.spi.function.table.Argument; +import io.trino.spi.function.table.ConnectorTableFunctionHandle; +import io.trino.spi.function.table.Descriptor; +import io.trino.spi.function.table.DescriptorArgument; +import io.trino.spi.function.table.DescriptorArgumentSpecification; +import io.trino.spi.function.table.TableArgument; +import io.trino.spi.function.table.TableArgumentSpecification; +import io.trino.spi.function.table.TableFunctionAnalysis; +import io.trino.spi.function.table.TableFunctionDataProcessor; +import io.trino.spi.function.table.TableFunctionProcessorProvider; +import io.trino.spi.type.RowType; + +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; + +import static com.google.common.collect.ImmutableSet.toImmutableSet; +import static com.google.common.collect.Iterables.getOnlyElement; +import static io.trino.metadata.GlobalFunctionCatalog.BUILTIN_SCHEMA; +import static io.trino.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT; +import static io.trino.spi.function.table.DescriptorArgument.NULL_DESCRIPTOR; +import static io.trino.spi.function.table.ReturnTypeSpecification.GenericTable.GENERIC_TABLE; +import static io.trino.spi.function.table.TableFunctionProcessorState.Finished.FINISHED; +import static io.trino.spi.function.table.TableFunctionProcessorState.Processed.usedInputAndProduced; +import static java.lang.String.format; +import static java.util.Locale.ENGLISH; +import static java.util.stream.Collectors.joining; + +public class ExcludeColumnsFunction + extends AbstractConnectorTableFunction +{ + public static final String NAME = "exclude_columns"; + + private static final String TABLE_ARGUMENT_NAME = "INPUT"; + private static final String DESCRIPTOR_ARGUMENT_NAME = "COLUMNS"; + + public ExcludeColumnsFunction() + { + super( + BUILTIN_SCHEMA, + NAME, + ImmutableList.of( + TableArgumentSpecification.builder() + .name(TABLE_ARGUMENT_NAME) + .rowSemantics() + .build(), + DescriptorArgumentSpecification.builder() + .name(DESCRIPTOR_ARGUMENT_NAME) + .build()), + GENERIC_TABLE); + } + + @Override + public TableFunctionAnalysis analyze( + ConnectorSession session, + ConnectorTransactionHandle transaction, + Map arguments, + ConnectorAccessControl accessControl) + { + DescriptorArgument excludedColumns = (DescriptorArgument) arguments.get(DESCRIPTOR_ARGUMENT_NAME); + if (excludedColumns.equals(NULL_DESCRIPTOR)) { + throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "COLUMNS descriptor is null"); + } + Descriptor excludedColumnsDescriptor = excludedColumns.getDescriptor().orElseThrow(); + if (excludedColumnsDescriptor.getFields().stream().anyMatch(field -> field.getType().isPresent())) { + throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "COLUMNS descriptor contains types"); + } + + // column names in DescriptorArgument are canonical wrt SQL identifier semantics. + // column names in TableArgument are not canonical wrt SQL identifier semantics, as they are taken from the corresponding RelationType. + // because of that, we match the excluded columns names case-insensitive + // TODO apply proper identifier semantics + Set excludedNames = excludedColumnsDescriptor.getFields().stream() + .map(Descriptor.Field::getName) + .map(name -> name.orElseThrow().toLowerCase(ENGLISH)) + .collect(toImmutableSet()); + + List inputSchema = ((TableArgument) arguments.get(TABLE_ARGUMENT_NAME)).getRowType().getFields(); + Set inputNames = inputSchema.stream() + .map(RowType.Field::getName) + .filter(Optional::isPresent) + .map(Optional::get) + .map(name -> name.toLowerCase(ENGLISH)) + .collect(toImmutableSet()); + + if (!inputNames.containsAll(excludedNames)) { + String missingColumns = Sets.difference(excludedNames, inputNames).stream() + .collect(joining(", ", "[", "]")); + throw new TrinoException(INVALID_FUNCTION_ARGUMENT, format("Excluded columns: %s not present in the table", missingColumns)); + } + + ImmutableList.Builder requiredColumns = ImmutableList.builder(); + ImmutableList.Builder returnedColumns = ImmutableList.builder(); + + for (int i = 0; i < inputSchema.size(); i++) { + Optional name = inputSchema.get(i).getName(); + if (name.isEmpty() || !excludedNames.contains(name.orElseThrow().toLowerCase(ENGLISH))) { + requiredColumns.add(i); + // per SQL standard, all columns produced by a table function must be named. We allow anonymous columns. + returnedColumns.add(new Descriptor.Field(name, Optional.of(inputSchema.get(i).getType()))); + } + } + + List returnedType = returnedColumns.build(); + if (returnedType.isEmpty()) { + throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "All columns are excluded"); + } + + return TableFunctionAnalysis.builder() + .requiredColumns(TABLE_ARGUMENT_NAME, requiredColumns.build()) + .returnedType(new Descriptor(returnedType)) + .handle(new ExcludeColumnsFunctionHandle()) + .build(); + } + + public static TableFunctionProcessorProvider getExcludeColumnsFunctionProcessorProvider() + { + return new TableFunctionProcessorProvider() + { + @Override + public TableFunctionDataProcessor getDataProcessor(ConnectorSession session, ConnectorTableFunctionHandle handle) + { + return input -> { + if (input == null) { + return FINISHED; + } + return usedInputAndProduced(getOnlyElement(input).orElseThrow()); + }; + } + }; + } + + public record ExcludeColumnsFunctionHandle() + implements ConnectorTableFunctionHandle + { + // there's no information to remember. All logic is effectively delegated to the engine via `requiredColumns`. + } +} diff --git a/core/trino-main/src/main/java/io/trino/operator/table/Sequence.java b/core/trino-main/src/main/java/io/trino/operator/table/SequenceFunction.java similarity index 68% rename from core/trino-main/src/main/java/io/trino/operator/table/Sequence.java rename to core/trino-main/src/main/java/io/trino/operator/table/SequenceFunction.java index c111f86c51401..2a1b176c95928 100644 --- a/core/trino-main/src/main/java/io/trino/operator/table/Sequence.java +++ b/core/trino-main/src/main/java/io/trino/operator/table/SequenceFunction.java @@ -17,8 +17,6 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.inject.Provider; -import io.trino.plugin.base.classloader.ClassLoaderSafeConnectorTableFunction; import io.trino.spi.Page; import io.trino.spi.PageBuilder; import io.trino.spi.TrinoException; @@ -31,7 +29,6 @@ import io.trino.spi.connector.FixedSplitSource; import io.trino.spi.function.table.AbstractConnectorTableFunction; import io.trino.spi.function.table.Argument; -import io.trino.spi.function.table.ConnectorTableFunction; import io.trino.spi.function.table.ConnectorTableFunctionHandle; import io.trino.spi.function.table.ReturnTypeSpecification.DescribedTable; import io.trino.spi.function.table.ScalarArgument; @@ -48,7 +45,7 @@ import static com.google.common.base.Preconditions.checkState; import static io.airlift.slice.SizeOf.instanceSize; import static io.trino.metadata.GlobalFunctionCatalog.BUILTIN_SCHEMA; -import static io.trino.operator.table.Sequence.SequenceFunctionSplit.MAX_SPLIT_SIZE; +import static io.trino.operator.table.SequenceFunction.SequenceFunctionSplit.MAX_SPLIT_SIZE; import static io.trino.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT; import static io.trino.spi.function.table.Descriptor.descriptor; import static io.trino.spi.function.table.TableFunctionProcessorState.Finished.FINISHED; @@ -56,85 +53,75 @@ import static io.trino.spi.type.BigintType.BIGINT; import static java.lang.String.format; -public class Sequence - implements Provider +public class SequenceFunction + extends AbstractConnectorTableFunction { public static final String NAME = "sequence"; - @Override - public ConnectorTableFunction get() + private static final String START_ARGUMENT_NAME = "START"; + private static final String STOP_ARGUMENT_NAME = "STOP"; + private static final String STEP_ARGUMENT_NAME = "STEP"; + + public SequenceFunction() { - return new ClassLoaderSafeConnectorTableFunction(new SequenceFunction(), getClass().getClassLoader()); + super( + BUILTIN_SCHEMA, + NAME, + ImmutableList.of( + ScalarArgumentSpecification.builder() + .name(START_ARGUMENT_NAME) + .type(BIGINT) + .defaultValue(0L) + .build(), + ScalarArgumentSpecification.builder() + .name(STOP_ARGUMENT_NAME) + .type(BIGINT) + .build(), + ScalarArgumentSpecification.builder() + .name(STEP_ARGUMENT_NAME) + .type(BIGINT) + .defaultValue(1L) + .build()), + new DescribedTable(descriptor(ImmutableList.of("sequential_number"), ImmutableList.of(BIGINT)))); } - public static class SequenceFunction - extends AbstractConnectorTableFunction + @Override + public TableFunctionAnalysis analyze( + ConnectorSession session, + ConnectorTransactionHandle transaction, + Map arguments, + ConnectorAccessControl accessControl) { - private static final String START_ARGUMENT_NAME = "START"; - private static final String STOP_ARGUMENT_NAME = "STOP"; - private static final String STEP_ARGUMENT_NAME = "STEP"; - - public SequenceFunction() - { - super( - BUILTIN_SCHEMA, - NAME, - ImmutableList.of( - ScalarArgumentSpecification.builder() - .name(START_ARGUMENT_NAME) - .type(BIGINT) - .defaultValue(0L) - .build(), - ScalarArgumentSpecification.builder() - .name(STOP_ARGUMENT_NAME) - .type(BIGINT) - .build(), - ScalarArgumentSpecification.builder() - .name(STEP_ARGUMENT_NAME) - .type(BIGINT) - .defaultValue(1L) - .build()), - new DescribedTable(descriptor(ImmutableList.of("sequential_number"), ImmutableList.of(BIGINT)))); + Object startValue = ((ScalarArgument) arguments.get(START_ARGUMENT_NAME)).getValue(); + if (startValue == null) { + throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "Start is null"); } - @Override - public TableFunctionAnalysis analyze( - ConnectorSession session, - ConnectorTransactionHandle transaction, - Map arguments, - ConnectorAccessControl accessControl) - { - Object startValue = ((ScalarArgument) arguments.get(START_ARGUMENT_NAME)).getValue(); - if (startValue == null) { - throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "Start is null"); - } - - Object stopValue = ((ScalarArgument) arguments.get(STOP_ARGUMENT_NAME)).getValue(); - if (stopValue == null) { - throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "Stop is null"); - } + Object stopValue = ((ScalarArgument) arguments.get(STOP_ARGUMENT_NAME)).getValue(); + if (stopValue == null) { + throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "Stop is null"); + } - Object stepValue = ((ScalarArgument) arguments.get(STEP_ARGUMENT_NAME)).getValue(); - if (stepValue == null) { - throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "Step is null"); - } + Object stepValue = ((ScalarArgument) arguments.get(STEP_ARGUMENT_NAME)).getValue(); + if (stepValue == null) { + throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "Step is null"); + } - long start = (long) startValue; - long stop = (long) stopValue; - long step = (long) stepValue; + long start = (long) startValue; + long stop = (long) stopValue; + long step = (long) stepValue; - if (start < stop && step <= 0) { - throw new TrinoException(INVALID_FUNCTION_ARGUMENT, format("Step must be positive for sequence [%s, %s]", start, stop)); - } - - if (start > stop && step >= 0) { - throw new TrinoException(INVALID_FUNCTION_ARGUMENT, format("Step must be negative for sequence [%s, %s]", start, stop)); - } + if (start < stop && step <= 0) { + throw new TrinoException(INVALID_FUNCTION_ARGUMENT, format("Step must be positive for sequence [%s, %s]", start, stop)); + } - return TableFunctionAnalysis.builder() - .handle(new SequenceFunctionHandle(start, stop, start == stop ? 0 : step)) - .build(); + if (start > stop && step >= 0) { + throw new TrinoException(INVALID_FUNCTION_ARGUMENT, format("Step must be negative for sequence [%s, %s]", start, stop)); } + + return TableFunctionAnalysis.builder() + .handle(new SequenceFunctionHandle(start, stop, start == stop ? 0 : step)) + .build(); } public record SequenceFunctionHandle(long start, long stop, long step) diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTable.java b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTable.java new file mode 100644 index 0000000000000..1e1aff0faf19d --- /dev/null +++ b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTable.java @@ -0,0 +1,219 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.operator.table.json; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableList; +import io.trino.metadata.FunctionManager; +import io.trino.metadata.Metadata; +import io.trino.operator.table.json.execution.JsonTableProcessingFragment; +import io.trino.spi.Page; +import io.trino.spi.PageBuilder; +import io.trino.spi.block.SqlRow; +import io.trino.spi.connector.ConnectorSession; +import io.trino.spi.function.table.ConnectorTableFunctionHandle; +import io.trino.spi.function.table.TableFunctionDataProcessor; +import io.trino.spi.function.table.TableFunctionProcessorProvider; +import io.trino.spi.function.table.TableFunctionProcessorState; +import io.trino.spi.type.RowType; +import io.trino.spi.type.Type; +import io.trino.spi.type.TypeManager; + +import java.util.Arrays; +import java.util.List; +import java.util.Optional; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.collect.Iterables.getOnlyElement; +import static io.trino.operator.scalar.json.ParameterUtil.getParametersArray; +import static io.trino.operator.table.json.execution.ExecutionPlanner.getExecutionPlan; +import static io.trino.spi.function.table.TableFunctionProcessorState.Finished.FINISHED; +import static io.trino.spi.function.table.TableFunctionProcessorState.Processed.produced; +import static io.trino.spi.function.table.TableFunctionProcessorState.Processed.usedInput; +import static io.trino.spi.type.BigintType.BIGINT; +import static io.trino.spi.type.TypeUtils.readNativeValue; +import static io.trino.spi.type.TypeUtils.writeNativeValue; +import static io.trino.type.Json2016Type.JSON_2016; +import static java.util.Objects.requireNonNull; + +/** + * Implements feature ISO/IEC 9075-2:2023(E) 7.11 'JSON table' + * including features T824, T827, T838 + */ +public class JsonTable +{ + private JsonTable() {} + + /** + * This class comprises all information necessary to execute the json_table function: + * + * @param processingPlan the root of the processing plan tree + * @param outer the parent-child relationship between the input relation and the processingPlan result + * @param errorOnError the error behavior: true for ERROR ON ERROR, false for EMPTY ON ERROR + * @param parametersType type of the row containing JSON path parameters for the root JSON path. The function expects the parameters row in the channel 1. + * Other channels in the input page correspond to JSON context item (channel 0), and default values for the value columns. Each value column in the processingPlan + * knows the indexes of its default channels. + * @param outputTypes types of the proper columns produced by the function + */ + public record JsonTableFunctionHandle(JsonTablePlanNode processingPlan, boolean outer, boolean errorOnError, Type parametersType, Type[] outputTypes) + implements ConnectorTableFunctionHandle + { + public JsonTableFunctionHandle + { + requireNonNull(processingPlan, "processingPlan is null"); + requireNonNull(parametersType, "parametersType is null"); + requireNonNull(outputTypes, "outputTypes is null"); + + // We can't use RowType in the public interface because it's not directly deserializeable from JSON. See TypeDeserializerModule. + checkArgument(parametersType instanceof RowType, "parametersType is not a row type"); + } + } + + public static TableFunctionProcessorProvider getJsonTableFunctionProcessorProvider(Metadata metadata, TypeManager typeManager, FunctionManager functionManager) + { + return new TableFunctionProcessorProvider() + { + @Override + public TableFunctionDataProcessor getDataProcessor(ConnectorSession session, ConnectorTableFunctionHandle handle) + { + JsonTableFunctionHandle jsonTableFunctionHandle = (JsonTableFunctionHandle) handle; + Object[] newRow = new Object[jsonTableFunctionHandle.outputTypes().length]; + JsonTableProcessingFragment executionPlan = getExecutionPlan( + jsonTableFunctionHandle.processingPlan(), + newRow, + jsonTableFunctionHandle.errorOnError(), + jsonTableFunctionHandle.outputTypes(), + session, + metadata, + typeManager, + functionManager); + return new JsonTableFunctionProcessor(executionPlan, newRow, jsonTableFunctionHandle.outputTypes(), (RowType) jsonTableFunctionHandle.parametersType(), jsonTableFunctionHandle.outer()); + } + }; + } + + public static class JsonTableFunctionProcessor + implements TableFunctionDataProcessor + { + private final PageBuilder pageBuilder; + private final int properColumnsCount; + private final JsonTableProcessingFragment executionPlan; + private final Object[] newRow; + private final RowType parametersType; + private final boolean outer; + + private long totalPositionsProcessed; + private int currentPosition = -1; + private boolean currentPositionAlreadyProduced; + + public JsonTableFunctionProcessor(JsonTableProcessingFragment executionPlan, Object[] newRow, Type[] outputTypes, RowType parametersType, boolean outer) + { + this.pageBuilder = new PageBuilder(ImmutableList.builder() + .add(outputTypes) + .add(BIGINT) // add additional position for pass-through index + .build()); + this.properColumnsCount = outputTypes.length; + this.executionPlan = requireNonNull(executionPlan, "executionPlan is null"); + this.newRow = requireNonNull(newRow, "newRow is null"); + this.parametersType = requireNonNull(parametersType, "parametersType is null"); + this.outer = outer; + } + + @Override + public TableFunctionProcessorState process(List> input) + { + // no more input pages + if (input == null) { + if (pageBuilder.isEmpty()) { + return FINISHED; + } + return flushPageBuilder(); + } + + Page inputPage = getOnlyElement(input).orElseThrow(); + while (!pageBuilder.isFull()) { + // new input page + if (currentPosition == -1) { + if (inputPage.getPositionCount() == 0) { + return usedInput(); + } + else { + currentPosition = 0; + currentPositionAlreadyProduced = false; + totalPositionsProcessed++; + SqlRow parametersRow = (SqlRow) readNativeValue(parametersType, inputPage.getBlock(1), currentPosition); + executionPlan.resetRoot( + (JsonNode) readNativeValue(JSON_2016, inputPage.getBlock(0), currentPosition), + inputPage, + currentPosition, + getParametersArray(parametersType, parametersRow)); + } + } + + // try to get output row for the current position (one position can produce multiple rows) + boolean gotNewRow = executionPlan.getRow(); + if (gotNewRow) { + currentPositionAlreadyProduced = true; + addOutputRow(); + } + else { + if (outer && !currentPositionAlreadyProduced) { + addNullPaddedRow(); + } + // go to next position in the input page + currentPosition++; + if (currentPosition < inputPage.getPositionCount()) { + currentPositionAlreadyProduced = false; + totalPositionsProcessed++; + SqlRow parametersRow = (SqlRow) readNativeValue(parametersType, inputPage.getBlock(1), currentPosition); + executionPlan.resetRoot( + (JsonNode) readNativeValue(JSON_2016, inputPage.getBlock(0), currentPosition), + inputPage, + currentPosition, + getParametersArray(parametersType, parametersRow)); + } + else { + currentPosition = -1; + return usedInput(); + } + } + } + + return flushPageBuilder(); + } + + private TableFunctionProcessorState flushPageBuilder() + { + TableFunctionProcessorState result = produced(pageBuilder.build()); + pageBuilder.reset(); + return result; + } + + private void addOutputRow() + { + pageBuilder.declarePosition(); + for (int channel = 0; channel < properColumnsCount; channel++) { + writeNativeValue(pageBuilder.getType(channel), pageBuilder.getBlockBuilder(channel), newRow[channel]); + } + // pass-through index from partition start + BIGINT.writeLong(pageBuilder.getBlockBuilder(properColumnsCount), totalPositionsProcessed - 1); + } + + private void addNullPaddedRow() + { + Arrays.fill(newRow, null); + addOutputRow(); + } + } +} diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableColumn.java b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableColumn.java new file mode 100644 index 0000000000000..8727e4254c67f --- /dev/null +++ b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableColumn.java @@ -0,0 +1,31 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.operator.table.json; + +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; + +@JsonTypeInfo( + use = JsonTypeInfo.Id.NAME, + property = "@type") +@JsonSubTypes({ + @JsonSubTypes.Type(value = JsonTableOrdinalityColumn.class, name = "ordinality"), + @JsonSubTypes.Type(value = JsonTableQueryColumn.class, name = "query"), + @JsonSubTypes.Type(value = JsonTableValueColumn.class, name = "value"), +}) + +public sealed interface JsonTableColumn + permits JsonTableOrdinalityColumn, JsonTableQueryColumn, JsonTableValueColumn +{ +} diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableOrdinalityColumn.java b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableOrdinalityColumn.java new file mode 100644 index 0000000000000..904bb385e4429 --- /dev/null +++ b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableOrdinalityColumn.java @@ -0,0 +1,19 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.operator.table.json; + +public record JsonTableOrdinalityColumn(int outputIndex) + implements JsonTableColumn +{ +} diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanCross.java b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanCross.java new file mode 100644 index 0000000000000..f61c13f920c9b --- /dev/null +++ b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanCross.java @@ -0,0 +1,30 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.operator.table.json; + +import com.google.common.collect.ImmutableList; + +import java.util.List; + +import static com.google.common.base.Preconditions.checkArgument; + +public record JsonTablePlanCross(List siblings) + implements JsonTablePlanNode +{ + public JsonTablePlanCross(List siblings) + { + this.siblings = ImmutableList.copyOf(siblings); + checkArgument(siblings.size() >= 2, "less than 2 siblings in Cross node"); + } +} diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanLeaf.java b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanLeaf.java new file mode 100644 index 0000000000000..f1cbafbe86cef --- /dev/null +++ b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanLeaf.java @@ -0,0 +1,31 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.operator.table.json; + +import com.google.common.collect.ImmutableList; +import io.trino.json.ir.IrJsonPath; + +import java.util.List; + +import static java.util.Objects.requireNonNull; + +public record JsonTablePlanLeaf(IrJsonPath path, List columns) + implements JsonTablePlanNode +{ + public JsonTablePlanLeaf(IrJsonPath path, List columns) + { + this.path = requireNonNull(path, "path is null"); + this.columns = ImmutableList.copyOf(columns); + } +} diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanNode.java b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanNode.java new file mode 100644 index 0000000000000..73b56a75fb17f --- /dev/null +++ b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanNode.java @@ -0,0 +1,32 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.operator.table.json; + +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; + +@JsonTypeInfo( + use = JsonTypeInfo.Id.NAME, + property = "@type") +@JsonSubTypes({ + @JsonSubTypes.Type(value = JsonTablePlanCross.class, name = "cross"), + @JsonSubTypes.Type(value = JsonTablePlanLeaf.class, name = "leaf"), + @JsonSubTypes.Type(value = JsonTablePlanSingle.class, name = "single"), + @JsonSubTypes.Type(value = JsonTablePlanUnion.class, name = "union"), +}) + +public sealed interface JsonTablePlanNode + permits JsonTablePlanCross, JsonTablePlanLeaf, JsonTablePlanSingle, JsonTablePlanUnion +{ +} diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanSingle.java b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanSingle.java new file mode 100644 index 0000000000000..49423e2c4bd2b --- /dev/null +++ b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanSingle.java @@ -0,0 +1,33 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.operator.table.json; + +import com.google.common.collect.ImmutableList; +import io.trino.json.ir.IrJsonPath; + +import java.util.List; + +import static java.util.Objects.requireNonNull; + +public record JsonTablePlanSingle(IrJsonPath path, List columns, boolean outer, JsonTablePlanNode child) + implements JsonTablePlanNode +{ + public JsonTablePlanSingle(IrJsonPath path, List columns, boolean outer, JsonTablePlanNode child) + { + this.path = requireNonNull(path, "path is null"); + this.columns = ImmutableList.copyOf(columns); + this.outer = outer; + this.child = requireNonNull(child, "child is null"); + } +} diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanUnion.java b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanUnion.java new file mode 100644 index 0000000000000..e8a1f1caeaf4a --- /dev/null +++ b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanUnion.java @@ -0,0 +1,30 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.operator.table.json; + +import com.google.common.collect.ImmutableList; + +import java.util.List; + +import static com.google.common.base.Preconditions.checkArgument; + +public record JsonTablePlanUnion(List siblings) + implements JsonTablePlanNode +{ + public JsonTablePlanUnion(List siblings) + { + this.siblings = ImmutableList.copyOf(siblings); + checkArgument(siblings.size() >= 2, "less than 2 siblings in Union node"); + } +} diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableQueryColumn.java b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableQueryColumn.java new file mode 100644 index 0000000000000..117df03c2c25f --- /dev/null +++ b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableQueryColumn.java @@ -0,0 +1,40 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.operator.table.json; + +import io.trino.json.ir.IrJsonPath; +import io.trino.metadata.ResolvedFunction; + +import static java.util.Objects.requireNonNull; + +/** + * This representation does not contain all properties of the column as specified in json_table invocation. + * Certain properties are handled by the output function which is applied later. + * These are: output format and quotes behavior. + */ +public record JsonTableQueryColumn( + int outputIndex, + ResolvedFunction function, + IrJsonPath path, + long wrapperBehavior, + long emptyBehavior, + long errorBehavior) + implements JsonTableColumn +{ + public JsonTableQueryColumn + { + requireNonNull(function, "function is null"); + requireNonNull(path, "path is null"); + } +} diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableValueColumn.java b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableValueColumn.java new file mode 100644 index 0000000000000..6d87bc4a5ffd8 --- /dev/null +++ b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableValueColumn.java @@ -0,0 +1,36 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.operator.table.json; + +import io.trino.json.ir.IrJsonPath; +import io.trino.metadata.ResolvedFunction; + +import static java.util.Objects.requireNonNull; + +public record JsonTableValueColumn( + int outputIndex, + ResolvedFunction function, + IrJsonPath path, + long emptyBehavior, + int emptyDefaultInput, // channel number or -1 when default not specified + long errorBehavior, + int errorDefaultInput) // channel number or -1 when default not specified + implements JsonTableColumn +{ + public JsonTableValueColumn + { + requireNonNull(function, "function is null"); + requireNonNull(path, "path is null"); + } +} diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/execution/Column.java b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/Column.java new file mode 100644 index 0000000000000..15eab03d10d33 --- /dev/null +++ b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/Column.java @@ -0,0 +1,24 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.operator.table.json.execution; + +import com.fasterxml.jackson.databind.JsonNode; +import io.trino.spi.Page; + +public interface Column +{ + Object evaluate(long sequentialNumber, JsonNode item, Page input, int position); + + int getOutputIndex(); +} diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/execution/ExecutionPlanner.java b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/ExecutionPlanner.java new file mode 100644 index 0000000000000..e6c4879db057a --- /dev/null +++ b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/ExecutionPlanner.java @@ -0,0 +1,159 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.operator.table.json.execution; + +import com.google.common.collect.ImmutableList; +import io.trino.json.JsonPathInvocationContext; +import io.trino.metadata.FunctionManager; +import io.trino.metadata.Metadata; +import io.trino.operator.table.json.JsonTableColumn; +import io.trino.operator.table.json.JsonTableOrdinalityColumn; +import io.trino.operator.table.json.JsonTablePlanCross; +import io.trino.operator.table.json.JsonTablePlanLeaf; +import io.trino.operator.table.json.JsonTablePlanNode; +import io.trino.operator.table.json.JsonTablePlanSingle; +import io.trino.operator.table.json.JsonTablePlanUnion; +import io.trino.operator.table.json.JsonTableQueryColumn; +import io.trino.operator.table.json.JsonTableValueColumn; +import io.trino.spi.connector.ConnectorSession; +import io.trino.spi.function.InvocationConvention; +import io.trino.spi.function.ScalarFunctionImplementation; +import io.trino.spi.type.Type; +import io.trino.spi.type.TypeManager; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Throwables.throwIfUnchecked; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static io.trino.spi.function.InvocationConvention.InvocationArgumentConvention.BOXED_NULLABLE; +import static io.trino.spi.function.InvocationConvention.InvocationArgumentConvention.NEVER_NULL; +import static io.trino.spi.function.InvocationConvention.InvocationReturnConvention.NULLABLE_RETURN; + +public class ExecutionPlanner +{ + private ExecutionPlanner() + { + } + + public static JsonTableProcessingFragment getExecutionPlan( + JsonTablePlanNode plan, + Object[] newRow, + boolean errorOnError, + Type[] outputTypes, + ConnectorSession session, + Metadata metadata, + TypeManager typeManager, + FunctionManager functionManager) + { + if (plan instanceof JsonTablePlanLeaf planLeaf) { + return new FragmentLeaf( + planLeaf.path(), + planLeaf.columns().stream() + .map(column -> getColumn(column, outputTypes, session, functionManager)) + .collect(toImmutableList()), + errorOnError, + newRow, + session, + metadata, + typeManager, + functionManager); + } + if (plan instanceof JsonTablePlanSingle planSingle) { + return new FragmentSingle( + planSingle.path(), + planSingle.columns().stream() + .map(column -> getColumn(column, outputTypes, session, functionManager)) + .collect(toImmutableList()), + errorOnError, + planSingle.outer(), + getExecutionPlan(planSingle.child(), newRow, errorOnError, outputTypes, session, metadata, typeManager, functionManager), + newRow, + session, + metadata, + typeManager, + functionManager); + } + if (plan instanceof JsonTablePlanCross planCross) { + return new FragmentCross(planCross.siblings().stream() + .map(sibling -> getExecutionPlan(sibling, newRow, errorOnError, outputTypes, session, metadata, typeManager, functionManager)) + .collect(toImmutableList())); + } + JsonTablePlanUnion planUnion = (JsonTablePlanUnion) plan; + return new FragmentUnion( + planUnion.siblings().stream() + .map(sibling -> getExecutionPlan(sibling, newRow, errorOnError, outputTypes, session, metadata, typeManager, functionManager)) + .collect(toImmutableList()), + newRow); + } + + private static Column getColumn(JsonTableColumn column, Type[] outputTypes, ConnectorSession session, FunctionManager functionManager) + { + if (column instanceof JsonTableValueColumn valueColumn) { + ScalarFunctionImplementation implementation = functionManager.getScalarFunctionImplementation( + valueColumn.function(), + new InvocationConvention( + ImmutableList.of(BOXED_NULLABLE, BOXED_NULLABLE, BOXED_NULLABLE, NEVER_NULL, BOXED_NULLABLE, NEVER_NULL, BOXED_NULLABLE), + NULLABLE_RETURN, + true, + true)); + JsonPathInvocationContext context; + checkArgument(implementation.getInstanceFactory().isPresent(), "instance factory is missing"); + try { + context = (JsonPathInvocationContext) implementation.getInstanceFactory().get().invoke(); + } + catch (Throwable throwable) { + throwIfUnchecked(throwable); + throw new RuntimeException(throwable); + } + return new ValueColumn( + valueColumn.outputIndex(), + implementation.getMethodHandle() + .bindTo(context) + .bindTo(session), + valueColumn.path(), + valueColumn.emptyBehavior(), + valueColumn.emptyDefaultInput(), + valueColumn.errorBehavior(), + valueColumn.errorDefaultInput(), + outputTypes[valueColumn.outputIndex()]); + } + if (column instanceof JsonTableQueryColumn queryColumn) { + ScalarFunctionImplementation implementation = functionManager.getScalarFunctionImplementation( + queryColumn.function(), + new InvocationConvention( + ImmutableList.of(BOXED_NULLABLE, BOXED_NULLABLE, BOXED_NULLABLE, NEVER_NULL, NEVER_NULL, NEVER_NULL), + NULLABLE_RETURN, + true, + true)); + JsonPathInvocationContext context; + checkArgument(implementation.getInstanceFactory().isPresent(), "instance factory is missing"); + try { + context = (JsonPathInvocationContext) implementation.getInstanceFactory().get().invoke(); + } + catch (Throwable throwable) { + throwIfUnchecked(throwable); + throw new RuntimeException(throwable); + } + return new QueryColumn( + queryColumn.outputIndex(), + implementation.getMethodHandle() + .bindTo(context) + .bindTo(session), + queryColumn.path(), + queryColumn.wrapperBehavior(), + queryColumn.emptyBehavior(), + queryColumn.errorBehavior()); + } + return new OrdinalityColumn(((JsonTableOrdinalityColumn) column).outputIndex()); + } +} diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentCross.java b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentCross.java new file mode 100644 index 0000000000000..56cbdbe724be0 --- /dev/null +++ b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentCross.java @@ -0,0 +1,93 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.operator.table.json.execution; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableList; +import io.trino.spi.Page; + +import java.util.Arrays; +import java.util.List; + +import static com.google.common.base.Preconditions.checkArgument; +import static java.util.Objects.requireNonNull; + +public class FragmentCross + implements JsonTableProcessingFragment +{ + private final List siblings; + private final int[] outputLayout; + + private Page input; + private int position; + private JsonNode currentItem; + private int currentSiblingIndex; + + public FragmentCross(List siblings) + { + this.siblings = ImmutableList.copyOf(siblings); + checkArgument(siblings.size() >= 2, "less than 2 siblings in Cross node"); + this.outputLayout = siblings.stream() + .map(JsonTableProcessingFragment::getOutputLayout) + .flatMapToInt(Arrays::stream) + .toArray(); + } + + @Override + public void reset(JsonNode item, Page input, int position) + { + this.currentItem = requireNonNull(item, "item is null"); + this.input = requireNonNull(input, "input is null"); + this.position = position; + siblings.get(0).reset(item, input, position); + this.currentSiblingIndex = 0; + } + + /** + * All values produced by the siblings are stored on corresponding positions in `newRow`. It is a temporary representation of the result row, and is shared by all Fragments. + * The values in `newRow` are not cleared between subsequent calls to getRow(), so that the parts which do not change are automatically reused. + */ + @Override + public boolean getRow() + { + while (currentSiblingIndex >= 0) { + boolean currentSiblingProducedRow = siblings.get(currentSiblingIndex).getRow(); + if (currentSiblingProducedRow) { + for (int i = currentSiblingIndex + 1; i < siblings.size(); i++) { + JsonTableProcessingFragment sibling = siblings.get(i); + sibling.reset(currentItem, input, position); + boolean siblingProducedRow = sibling.getRow(); + if (!siblingProducedRow) { + // if any sibling is empty, the whole CROSS fragment is empty + return false; + } + } + currentSiblingIndex = siblings.size() - 1; + return true; + } + + // current sibling is finished + currentSiblingIndex--; + } + + // fragment is finished + return false; + } + + @Override + public int[] getOutputLayout() + { + return outputLayout; + } +} diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentLeaf.java b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentLeaf.java new file mode 100644 index 0000000000000..9a11e63067d41 --- /dev/null +++ b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentLeaf.java @@ -0,0 +1,109 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.operator.table.json.execution; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableList; +import io.trino.json.JsonPathEvaluator; +import io.trino.json.ir.IrJsonPath; +import io.trino.metadata.FunctionManager; +import io.trino.metadata.Metadata; +import io.trino.spi.Page; +import io.trino.spi.connector.ConnectorSession; +import io.trino.spi.type.TypeManager; + +import java.util.List; + +import static io.trino.operator.table.json.execution.SequenceEvaluator.getSequence; +import static java.util.Objects.requireNonNull; + +public class FragmentLeaf + implements JsonTableProcessingFragment +{ + private static final Object[] NO_PARAMETERS = new Object[0]; + + private final JsonPathEvaluator pathEvaluator; + private final List columns; + private final boolean errorOnError; + private final int[] outputLayout; + + // the place where the computed values (or nulls) are stored while computing an output row + private final Object[] newRow; + + private Page input; + private int position; + private List sequence; + private int nextItemIndex; + + public FragmentLeaf( + IrJsonPath path, + List columns, + boolean errorOnError, + Object[] newRow, + ConnectorSession session, + Metadata metadata, + TypeManager typeManager, + FunctionManager functionManager) + { + requireNonNull(path, "path is null"); + this.pathEvaluator = new JsonPathEvaluator(path, session, metadata, typeManager, functionManager); + this.columns = ImmutableList.copyOf(columns); + this.errorOnError = errorOnError; + this.outputLayout = columns.stream() + .mapToInt(Column::getOutputIndex) + .toArray(); + this.newRow = requireNonNull(newRow, "newRow is null"); + } + + @Override + public void reset(JsonNode item, Page input, int position) + { + resetRoot(item, input, position, NO_PARAMETERS); + } + + /** + * FragmentLeaf can be the root Fragment. The root fragment is the only fragment that may have path parameters. + * Prepares the root Fragment to produce rows for the new JSON item and a set of path parameters. + */ + @Override + public void resetRoot(JsonNode item, Page input, int position, Object[] pathParameters) + { + requireNonNull(pathParameters, "pathParameters is null"); + this.input = requireNonNull(input, "input is null"); + this.position = position; + this.nextItemIndex = 0; + this.sequence = getSequence(item, pathParameters, pathEvaluator, errorOnError); + } + + @Override + public boolean getRow() + { + if (nextItemIndex >= sequence.size()) { + // fragment is finished + return false; + } + JsonNode currentItem = sequence.get(nextItemIndex); + nextItemIndex++; // it is correct to pass the updated value to `column.evaluate()` because ordinality numbers are 1-based according to ISO/IEC 9075-2:2016(E) 7.11 p.461 General rules. + for (Column column : columns) { + newRow[column.getOutputIndex()] = column.evaluate(nextItemIndex, currentItem, input, position); + } + return true; + } + + @Override + public int[] getOutputLayout() + { + return outputLayout; + } +} diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentSingle.java b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentSingle.java new file mode 100644 index 0000000000000..d3d285f0658e3 --- /dev/null +++ b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentSingle.java @@ -0,0 +1,156 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.operator.table.json.execution; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableList; +import io.trino.json.JsonPathEvaluator; +import io.trino.json.ir.IrJsonPath; +import io.trino.metadata.FunctionManager; +import io.trino.metadata.Metadata; +import io.trino.spi.Page; +import io.trino.spi.connector.ConnectorSession; +import io.trino.spi.type.TypeManager; + +import java.util.Arrays; +import java.util.List; +import java.util.stream.IntStream; + +import static io.trino.operator.table.json.execution.SequenceEvaluator.getSequence; +import static java.util.Objects.requireNonNull; + +public class FragmentSingle + implements JsonTableProcessingFragment +{ + private static final Object[] NO_PARAMETERS = new Object[] {}; + + private final JsonPathEvaluator pathEvaluator; + private final List columns; + private final boolean errorOnError; + private final boolean outer; + private final JsonTableProcessingFragment child; + private final int[] outputLayout; + + // the place where the computed values (or nulls) are stored while computing an output row + private final Object[] newRow; + + private Page input; + private int position; + private List sequence; + private int nextItemIndex; + + // start processing next item from the sequence + private boolean processNextItem; + + // indicates if we need to produce null-padded row for OUTER + private boolean childAlreadyProduced; + + public FragmentSingle( + IrJsonPath path, + List columns, + boolean errorOnError, + boolean outer, + JsonTableProcessingFragment child, + Object[] newRow, + ConnectorSession session, + Metadata metadata, + TypeManager typeManager, + FunctionManager functionManager) + { + requireNonNull(path, "path is null"); + this.pathEvaluator = new JsonPathEvaluator(path, session, metadata, typeManager, functionManager); + this.columns = ImmutableList.copyOf(columns); + this.errorOnError = errorOnError; + this.outer = outer; + this.child = requireNonNull(child, "child is null"); + this.outputLayout = IntStream.concat( + columns.stream() + .mapToInt(Column::getOutputIndex), + Arrays.stream(child.getOutputLayout())) + .toArray(); + this.newRow = requireNonNull(newRow, "newRow is null"); + } + + @Override + public void reset(JsonNode item, Page input, int position) + { + resetRoot(item, input, position, NO_PARAMETERS); + } + + /** + * FragmentSingle can be the root Fragment. The root fragment is the only fragment that may have path parameters. + * Prepares the root Fragment to produce rows for the new JSON item and a set of path parameters. + */ + @Override + public void resetRoot(JsonNode item, Page input, int position, Object[] pathParameters) + { + requireNonNull(pathParameters, "pathParameters is null"); + this.input = requireNonNull(input, "input is null"); + this.position = position; + this.nextItemIndex = 0; + this.processNextItem = true; + this.sequence = getSequence(item, pathParameters, pathEvaluator, errorOnError); + } + + /** + * All values produced by the columns are stored on corresponding positions in `newRow`. + * The values in `newRow` are not cleared between subsequent calls to `getRow()`, so the values for columns are automatically reused during iterating over child. + */ + @Override + public boolean getRow() + { + while (true) { + if (processNextItem) { + if (nextItemIndex >= sequence.size()) { + // fragment is finished + return false; + } + JsonNode currentItem = sequence.get(nextItemIndex); + nextItemIndex++; // it is correct to pass the updated value to `column.evaluate()` because ordinality numbers are 1-based according to ISO/IEC 9075-2:2016(E) 7.11 p.461 General rules. + for (Column column : columns) { + newRow[column.getOutputIndex()] = column.evaluate(nextItemIndex, currentItem, input, position); + } + child.reset(currentItem, input, position); + childAlreadyProduced = false; + processNextItem = false; + } + + boolean childProducedRow = child.getRow(); + if (childProducedRow) { + childAlreadyProduced = true; + return true; + } + + // child is finished + processNextItem = true; + if (outer && !childAlreadyProduced) { + appendNulls(child); + return true; + } + } + } + + private void appendNulls(JsonTableProcessingFragment fragment) + { + for (int column : fragment.getOutputLayout()) { + newRow[column] = null; + } + } + + @Override + public int[] getOutputLayout() + { + return outputLayout; + } +} diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentUnion.java b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentUnion.java new file mode 100644 index 0000000000000..30ae142f9dfad --- /dev/null +++ b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentUnion.java @@ -0,0 +1,96 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.operator.table.json.execution; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableList; +import io.trino.spi.Page; + +import java.util.Arrays; +import java.util.List; + +import static com.google.common.base.Preconditions.checkArgument; +import static java.util.Objects.requireNonNull; + +public class FragmentUnion + implements JsonTableProcessingFragment +{ + private final List siblings; + private final int[] outputLayout; + + // the place where the computed values (or nulls) are stored while computing an output row + private final Object[] newRow; + + private int currentSiblingIndex; + + public FragmentUnion(List siblings, Object[] newRow) + { + this.siblings = ImmutableList.copyOf(siblings); + checkArgument(siblings.size() >= 2, "less than 2 siblings in Union node"); + this.outputLayout = siblings.stream() + .map(JsonTableProcessingFragment::getOutputLayout) + .flatMapToInt(Arrays::stream) + .toArray(); + this.newRow = requireNonNull(newRow, "newRow is null"); + } + + @Override + public void reset(JsonNode item, Page input, int position) + { + requireNonNull(item, "item is null"); + requireNonNull(input, "input is null"); + siblings.stream() + .forEach(sibling -> sibling.reset(item, input, position)); + this.currentSiblingIndex = 0; + appendNulls(this); + } + + /** + * The values produced by the current sibling are stored on corresponding positions in `newRow`, and for other siblings `newRow` is filled with nulls. + * The values in `newRow` are not cleared between subsequent calls to getRow(), so that the parts which do not change are automatically reused. + */ + @Override + public boolean getRow() + { + while (true) { + if (currentSiblingIndex >= siblings.size()) { + // fragment is finished + return false; + } + + JsonTableProcessingFragment currentSibling = siblings.get(currentSiblingIndex); + boolean currentSiblingProducedRow = currentSibling.getRow(); + if (currentSiblingProducedRow) { + return true; + } + + // current sibling is finished + appendNulls(currentSibling); + currentSiblingIndex++; + } + } + + private void appendNulls(JsonTableProcessingFragment fragment) + { + for (int column : fragment.getOutputLayout()) { + newRow[column] = null; + } + } + + @Override + public int[] getOutputLayout() + { + return outputLayout; + } +} diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/execution/JsonTableProcessingFragment.java b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/JsonTableProcessingFragment.java new file mode 100644 index 0000000000000..bfe518b41036c --- /dev/null +++ b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/JsonTableProcessingFragment.java @@ -0,0 +1,63 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.operator.table.json.execution; + +import com.fasterxml.jackson.databind.JsonNode; +import io.trino.spi.Page; + +public interface JsonTableProcessingFragment +{ + /** + * Prepares the Fragment to produce rows for the new JSON item. + * Note: This method must be called for each new JSON item. Due to nesting, there might be multiple JSON items to process for a single position in the input page. + * Therefore, input and position may not change for subsequent calls. + * + * @param item the new JSON item + * @param input the input Page currently processed by json_table function + * @param position the currently processed position in the input page + */ + void reset(JsonNode item, Page input, int position); + + /** + * Prepares the root Fragment to produce rows for the new JSON item and new set of path parameters. + * Note: at the root level, there is one JSON item and one set of path parameters to process for each position in the input page. + * + * @param item the new JSON item + * @param input the input Page currently processed by json_table function + * @param position the currently processed position in the input page + * @param pathParameters JSON path parameters for the top-level JSON path + */ + default void resetRoot(JsonNode item, Page input, int position, Object[] pathParameters) + { + throw new IllegalStateException("not the root fragment"); + } + + /** + * Tries to produce output values for all columns included in the Fragment, + * and stores them in corresponding positions in `newRow`. + * Note: According to OUTER or UNION semantics, some values might be null-padded instead of computed. + * Note: a single JSON item might result in multiple output rows. To fully process a JSON item, the caller must: + * - reset the Fragment with the JSON item + * - call getRow() and collect output rows as long as `true` is returned + * If `false` is returned, there is no output row available, and the JSON item is fully processed + * + * @return true if row was produced, false if row was not produced (Fragment is finished) + */ + boolean getRow(); + + /** + * Returns an array containing indexes of columns produced by the fragment within all columns produced by json_table. + */ + int[] getOutputLayout(); +} diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/execution/OrdinalityColumn.java b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/OrdinalityColumn.java new file mode 100644 index 0000000000000..d26479ecf9e41 --- /dev/null +++ b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/OrdinalityColumn.java @@ -0,0 +1,40 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.operator.table.json.execution; + +import com.fasterxml.jackson.databind.JsonNode; +import io.trino.spi.Page; + +public class OrdinalityColumn + implements Column +{ + private final int outputIndex; + + public OrdinalityColumn(int outputIndex) + { + this.outputIndex = outputIndex; + } + + @Override + public Object evaluate(long sequentialNumber, JsonNode item, Page input, int position) + { + return sequentialNumber; + } + + @Override + public int getOutputIndex() + { + return outputIndex; + } +} diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/execution/QueryColumn.java b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/QueryColumn.java new file mode 100644 index 0000000000000..613ec5c41db39 --- /dev/null +++ b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/QueryColumn.java @@ -0,0 +1,63 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.operator.table.json.execution; + +import com.fasterxml.jackson.databind.JsonNode; +import io.trino.json.ir.IrJsonPath; +import io.trino.spi.Page; + +import java.lang.invoke.MethodHandle; + +import static com.google.common.base.Throwables.throwIfUnchecked; +import static java.util.Objects.requireNonNull; + +public class QueryColumn + implements Column +{ + private final int outputIndex; + private final MethodHandle methodHandle; + private final IrJsonPath path; + private final long wrapperBehavior; + private final long emptyBehavior; + private final long errorBehavior; + + public QueryColumn(int outputIndex, MethodHandle methodHandle, IrJsonPath path, long wrapperBehavior, long emptyBehavior, long errorBehavior) + { + this.outputIndex = outputIndex; + this.methodHandle = requireNonNull(methodHandle, "methodHandle is null"); + this.path = requireNonNull(path, "path is null"); + this.wrapperBehavior = wrapperBehavior; + this.emptyBehavior = emptyBehavior; + this.errorBehavior = errorBehavior; + } + + @Override + public Object evaluate(long sequentialNumber, JsonNode item, Page input, int position) + { + try { + return methodHandle.invoke(item, path, null, wrapperBehavior, emptyBehavior, errorBehavior); + } + catch (Throwable throwable) { + // According to ISO/IEC 9075-2:2016(E) 7.11 p.462 General rules 1) e) ii) 3) D) any exception thrown by column evaluation should be propagated. + throwIfUnchecked(throwable); + throw new RuntimeException(throwable); + } + } + + @Override + public int getOutputIndex() + { + return outputIndex; + } +} diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/execution/SequenceEvaluator.java b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/SequenceEvaluator.java new file mode 100644 index 0000000000000..32b4fe0b9389c --- /dev/null +++ b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/SequenceEvaluator.java @@ -0,0 +1,93 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.operator.table.json.execution; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableList; +import io.trino.json.JsonPathEvaluator; +import io.trino.json.PathEvaluationException; +import io.trino.json.ir.TypedValue; +import io.trino.operator.scalar.json.JsonOutputConversionException; + +import java.util.List; +import java.util.Optional; + +import static com.google.common.base.Preconditions.checkState; +import static io.trino.json.JsonInputErrorNode.JSON_ERROR; +import static io.trino.json.ir.SqlJsonLiteralConverter.getJsonNode; +import static java.lang.String.format; + +public class SequenceEvaluator +{ + private SequenceEvaluator() + { + } + + // creates a sequence of JSON items, and applies error handling + public static List getSequence(JsonNode item, Object[] pathParameters, JsonPathEvaluator pathEvaluator, boolean errorOnError) + { + if (item == null) { + // According to ISO/IEC 9075-2:2016(E) 7.11 p.461 General rules 1) a) empty table should be returned for null input. Empty sequence will result in an empty table. + return ImmutableList.of(); + } + // According to ISO/IEC 9075-2:2016(E) 7.11 p.461 General rules 1) e) exception thrown by path evaluation should be handled accordingly to json_table error behavior (ERROR or EMPTY). + // handle input conversion error for the context item + if (item.equals(JSON_ERROR)) { + checkState(!errorOnError, "input conversion error should have been thrown in the input function"); + // the error behavior is EMPTY ON ERROR. Empty sequence will result in an empty table. + return ImmutableList.of(); + } + // handle input conversion error for the path parameters + for (Object parameter : pathParameters) { + if (parameter.equals(JSON_ERROR)) { + checkState(!errorOnError, "input conversion error should have been thrown in the input function"); + // the error behavior is EMPTY ON ERROR. Empty sequence will result in an empty table. + return ImmutableList.of(); + } + } + // evaluate path into a sequence + List pathResult; + try { + pathResult = pathEvaluator.evaluate(item, pathParameters); + } + catch (PathEvaluationException e) { + if (errorOnError) { + throw e; + } + // the error behavior is EMPTY ON ERROR. Empty sequence will result in an empty table. + return ImmutableList.of(); + } + // convert sequence to JSON items + ImmutableList.Builder builder = ImmutableList.builder(); + for (Object element : pathResult) { + if (element instanceof TypedValue typedValue) { + Optional jsonNode = getJsonNode(typedValue); + if (jsonNode.isEmpty()) { + if (errorOnError) { + throw new JsonOutputConversionException(format( + "JSON path returned a scalar SQL value of type %s that cannot be represented as JSON", + ((TypedValue) element).getType())); + } + // the error behavior is EMPTY ON ERROR. Empty sequence will result in an empty table. + return ImmutableList.of(); + } + builder.add(jsonNode.get()); + } + else { + builder.add((JsonNode) element); + } + } + return builder.build(); + } +} diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/execution/ValueColumn.java b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/ValueColumn.java new file mode 100644 index 0000000000000..a8c29d8baff7d --- /dev/null +++ b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/ValueColumn.java @@ -0,0 +1,93 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.operator.table.json.execution; + +import com.fasterxml.jackson.databind.JsonNode; +import io.trino.json.ir.IrJsonPath; +import io.trino.spi.Page; +import io.trino.spi.type.Type; + +import java.lang.invoke.MethodHandle; + +import static com.google.common.base.Throwables.throwIfUnchecked; +import static io.trino.spi.type.TypeUtils.readNativeValue; +import static java.util.Objects.requireNonNull; + +public class ValueColumn + implements Column +{ + private final int outputIndex; + private final MethodHandle methodHandle; + private final IrJsonPath path; + private final long emptyBehavior; + private final int emptyDefaultInput; + private final long errorBehavior; + private final int errorDefaultInput; + private final Type resultType; + + public ValueColumn( + int outputIndex, + MethodHandle methodHandle, + IrJsonPath path, + long emptyBehavior, + int emptyDefaultInput, + long errorBehavior, + int errorDefaultInput, + Type resultType) + { + this.outputIndex = outputIndex; + this.methodHandle = requireNonNull(methodHandle, "methodHandle is null"); + this.path = requireNonNull(path, "path is null"); + this.emptyBehavior = emptyBehavior; + this.emptyDefaultInput = emptyDefaultInput; + this.errorBehavior = errorBehavior; + this.errorDefaultInput = errorDefaultInput; + this.resultType = requireNonNull(resultType, "resultType is null"); + } + + @Override + public Object evaluate(long sequentialNumber, JsonNode item, Page input, int position) + { + Object emptyDefault; + if (emptyDefaultInput == -1) { + emptyDefault = null; + } + else { + emptyDefault = readNativeValue(resultType, input.getBlock(emptyDefaultInput), position); + } + + Object errorDefault; + if (errorDefaultInput == -1) { + errorDefault = null; + } + else { + errorDefault = readNativeValue(resultType, input.getBlock(errorDefaultInput), position); + } + + try { + return methodHandle.invoke(item, path, null, emptyBehavior, emptyDefault, errorBehavior, errorDefault); + } + catch (Throwable throwable) { + // According to ISO/IEC 9075-2:2016(E) 7.11 p.462 General rules 1) e) ii) 2) D) any exception thrown by column evaluation should be propagated. + throwIfUnchecked(throwable); + throw new RuntimeException(throwable); + } + } + + @Override + public int getOutputIndex() + { + return outputIndex; + } +} diff --git a/core/trino-main/src/main/java/io/trino/operator/window/matcher/IrRowPatternToProgramRewriter.java b/core/trino-main/src/main/java/io/trino/operator/window/matcher/IrRowPatternToProgramRewriter.java index b6cb2d8de1351..b7e16be58da0d 100644 --- a/core/trino-main/src/main/java/io/trino/operator/window/matcher/IrRowPatternToProgramRewriter.java +++ b/core/trino-main/src/main/java/io/trino/operator/window/matcher/IrRowPatternToProgramRewriter.java @@ -33,7 +33,6 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.collect.Collections2.orderedPermutations; import static com.google.common.collect.ImmutableList.toImmutableList; -import static java.lang.String.format; import static java.util.Objects.requireNonNull; public class IrRowPatternToProgramRewriter @@ -200,7 +199,7 @@ protected Void visitIrQuantified(IrQuantified node, Void context) private void loopingQuantified(IrRowPattern pattern, boolean greedy, int min) { - checkArgument(min >= 0, "invalid min value: " + min); + checkArgument(min >= 0, "invalid min value: %s", min); if (min == 0) { int startSplitPosition = instructions.size(); @@ -243,7 +242,7 @@ private void loop(int loopingPosition, boolean greedy) private void rangeQuantified(IrRowPattern pattern, boolean greedy, int min, int max) { - checkArgument(min <= max, format("invalid range: (%s, %s)", min, max)); + checkArgument(min <= max, "invalid range: (%s, %s)", min, max); for (int i = 0; i < min; i++) { process(pattern); diff --git a/core/trino-main/src/main/java/io/trino/server/CoordinatorModule.java b/core/trino-main/src/main/java/io/trino/server/CoordinatorModule.java index 613edeb3335ef..f51575bc57b3f 100644 --- a/core/trino-main/src/main/java/io/trino/server/CoordinatorModule.java +++ b/core/trino-main/src/main/java/io/trino/server/CoordinatorModule.java @@ -49,6 +49,7 @@ import io.trino.execution.ForQueryExecution; import io.trino.execution.QueryExecution; import io.trino.execution.QueryExecutionMBean; +import io.trino.execution.QueryExecutorInternal; import io.trino.execution.QueryIdGenerator; import io.trino.execution.QueryManager; import io.trino.execution.QueryManagerConfig; @@ -105,6 +106,7 @@ import io.trino.server.remotetask.RemoteTaskStats; import io.trino.server.ui.WebUiModule; import io.trino.server.ui.WorkerResource; +import io.trino.spi.VersionEmbedder; import io.trino.spi.memory.ClusterMemoryPoolManager; import io.trino.sql.PlannerContext; import io.trino.sql.analyzer.AnalyzerFactory; @@ -141,6 +143,7 @@ import static io.airlift.json.JsonCodecBinder.jsonCodecBinder; import static io.airlift.units.DataSize.Unit.MEGABYTE; import static io.trino.server.InternalCommunicationHttpClientModule.internalHttpClientModule; +import static io.trino.util.Executors.decorateWithVersion; import static java.util.concurrent.Executors.newCachedThreadPool; import static java.util.concurrent.Executors.newScheduledThreadPool; import static java.util.concurrent.Executors.newSingleThreadScheduledExecutor; @@ -245,7 +248,8 @@ protected void setup(Binder binder) binder.bind(ByEagerParentOutputDataSizeEstimator.Factory.class).in(Scopes.SINGLETON); // use provider method returning list to ensure ordering // OutputDataSizeEstimator factories are ordered starting from most accurate - install(new AbstractConfigurationAwareModule() { + install(new AbstractConfigurationAwareModule() + { @Override protected void setup(Binder binder) {} @@ -333,15 +337,6 @@ List getCompositeOutputDataSizeEstimatorDelegate .toInstance(newSingleThreadScheduledExecutor(threadsNamed("stage-scheduler"))); // query execution - QueryManagerConfig queryManagerConfig = buildConfigObject(QueryManagerConfig.class); - ThreadPoolExecutor queryExecutor = new ThreadPoolExecutor( - queryManagerConfig.getQueryExecutorPoolSize(), - queryManagerConfig.getQueryExecutorPoolSize(), - 60, SECONDS, - new LinkedBlockingQueue<>(1000), - threadsNamed("query-execution-%s")); - queryExecutor.allowCoreThreadTimeOut(true); - binder.bind(ExecutorService.class).annotatedWith(ForQueryExecution.class).toInstance(queryExecutor); binder.bind(QueryExecutionMBean.class).in(Scopes.SINGLETON); newExporter(binder).export(QueryExecutionMBean.class) .as(generator -> generator.generatedNameOf(QueryExecution.class)); @@ -385,6 +380,29 @@ public static ResourceGroupManager getResourceGroupManager(@SuppressWarnings( return manager; } + @Provides + @Singleton + @QueryExecutorInternal + public static ExecutorService createQueryExecutor(QueryManagerConfig queryManagerConfig) + { + ThreadPoolExecutor queryExecutor = new ThreadPoolExecutor( + queryManagerConfig.getQueryExecutorPoolSize(), + queryManagerConfig.getQueryExecutorPoolSize(), + 60, SECONDS, + new LinkedBlockingQueue<>(1000), + threadsNamed("query-execution-%s")); + queryExecutor.allowCoreThreadTimeOut(true); + return queryExecutor; + } + + @Provides + @Singleton + @ForQueryExecution + public static ExecutorService createQueryExecutor(@QueryExecutorInternal ExecutorService queryExecutor, VersionEmbedder versionEmbedder) + { + return decorateWithVersion(queryExecutor, versionEmbedder); + } + @Provides @Singleton public static QueryPerformanceFetcher createQueryPerformanceFetcher(QueryManager queryManager) diff --git a/core/trino-main/src/main/java/io/trino/server/HttpRequestSessionContextFactory.java b/core/trino-main/src/main/java/io/trino/server/HttpRequestSessionContextFactory.java index cf821e7622105..775d663fe26bd 100644 --- a/core/trino-main/src/main/java/io/trino/server/HttpRequestSessionContextFactory.java +++ b/core/trino-main/src/main/java/io/trino/server/HttpRequestSessionContextFactory.java @@ -60,6 +60,7 @@ import static com.google.common.collect.ImmutableMap.toImmutableMap; import static com.google.common.net.HttpHeaders.USER_AGENT; import static io.trino.client.ProtocolHeaders.detectProtocol; +import static io.trino.server.ServletSecurityUtils.authenticatedIdentity; import static io.trino.spi.security.AccessDeniedException.denySetRole; import static java.lang.String.format; import static java.nio.charset.StandardCharsets.UTF_8; @@ -69,16 +70,22 @@ public class HttpRequestSessionContextFactory { private static final Splitter DOT_SPLITTER = Splitter.on('.'); - public static final String AUTHENTICATED_IDENTITY = "trino.authenticated-identity"; private final PreparedStatementEncoder preparedStatementEncoder; private final Metadata metadata; private final GroupProvider groupProvider; private final AccessControl accessControl; + private final Optional alternateHeaderName; @Inject - public HttpRequestSessionContextFactory(PreparedStatementEncoder preparedStatementEncoder, Metadata metadata, GroupProvider groupProvider, AccessControl accessControl) + public HttpRequestSessionContextFactory( + PreparedStatementEncoder preparedStatementEncoder, + Metadata metadata, + GroupProvider groupProvider, + AccessControl accessControl, + ProtocolConfig protocolConfig) { + this.alternateHeaderName = protocolConfig.getAlternateHeaderName(); this.preparedStatementEncoder = requireNonNull(preparedStatementEncoder, "preparedStatementEncoder is null"); this.metadata = requireNonNull(metadata, "metadata is null"); this.groupProvider = requireNonNull(groupProvider, "groupProvider is null"); @@ -87,7 +94,6 @@ public HttpRequestSessionContextFactory(PreparedStatementEncoder preparedStateme public SessionContext createSessionContext( MultivaluedMap headers, - Optional alternateHeaderName, Optional remoteAddress, Optional authenticatedIdentity) throws WebApplicationException @@ -184,21 +190,12 @@ else if (nameParts.size() == 2) { clientInfo); } - public Identity extractAuthorizedIdentity( - HttpServletRequest servletRequest, - HttpHeaders httpHeaders, - Optional alternateHeaderName) + public Identity extractAuthorizedIdentity(HttpServletRequest servletRequest, HttpHeaders httpHeaders) { - return extractAuthorizedIdentity( - Optional.ofNullable((Identity) servletRequest.getAttribute(AUTHENTICATED_IDENTITY)), - httpHeaders.getRequestHeaders(), - alternateHeaderName); + return extractAuthorizedIdentity(authenticatedIdentity(servletRequest), httpHeaders.getRequestHeaders()); } - public Identity extractAuthorizedIdentity( - Optional optionalAuthenticatedIdentity, - MultivaluedMap headers, - Optional alternateHeaderName) + public Identity extractAuthorizedIdentity(Optional optionalAuthenticatedIdentity, MultivaluedMap headers) throws AccessDeniedException { ProtocolHeaders protocolHeaders; diff --git a/core/trino-main/src/main/java/io/trino/server/QueryResource.java b/core/trino-main/src/main/java/io/trino/server/QueryResource.java index 0640059d25361..ae38cfabda018 100644 --- a/core/trino-main/src/main/java/io/trino/server/QueryResource.java +++ b/core/trino-main/src/main/java/io/trino/server/QueryResource.java @@ -58,15 +58,13 @@ public class QueryResource private final DispatchManager dispatchManager; private final AccessControl accessControl; private final HttpRequestSessionContextFactory sessionContextFactory; - private final Optional alternateHeaderName; @Inject - public QueryResource(DispatchManager dispatchManager, AccessControl accessControl, HttpRequestSessionContextFactory sessionContextFactory, ProtocolConfig protocolConfig) + public QueryResource(DispatchManager dispatchManager, AccessControl accessControl, HttpRequestSessionContextFactory sessionContextFactory) { this.dispatchManager = requireNonNull(dispatchManager, "dispatchManager is null"); this.accessControl = requireNonNull(accessControl, "accessControl is null"); this.sessionContextFactory = requireNonNull(sessionContextFactory, "sessionContextFactory is null"); - this.alternateHeaderName = protocolConfig.getAlternateHeaderName(); } @ResourceSecurity(AUTHENTICATED_USER) @@ -76,7 +74,7 @@ public List getAllQueryInfo(@QueryParam("state") String stateFil QueryState expectedState = stateFilter == null ? null : QueryState.valueOf(stateFilter.toUpperCase(Locale.ENGLISH)); List queries = dispatchManager.getQueries(); - queries = filterQueries(sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders, alternateHeaderName), queries, accessControl); + queries = filterQueries(sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders), queries, accessControl); ImmutableList.Builder builder = ImmutableList.builder(); for (BasicQueryInfo queryInfo : queries) { @@ -99,7 +97,7 @@ public Response getQueryInfo(@PathParam("queryId") QueryId queryId, @Context Htt return Response.status(Status.GONE).build(); } try { - checkCanViewQueryOwnedBy(sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders, alternateHeaderName), queryInfo.get().getSession().toIdentity(), accessControl); + checkCanViewQueryOwnedBy(sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders), queryInfo.get().getSession().toIdentity(), accessControl); return Response.ok(queryInfo.get()).build(); } catch (AccessDeniedException e) { @@ -116,7 +114,7 @@ public void cancelQuery(@PathParam("queryId") QueryId queryId, @Context HttpServ try { BasicQueryInfo queryInfo = dispatchManager.getQueryInfo(queryId); - checkCanKillQueryOwnedBy(sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders, alternateHeaderName), queryInfo.getSession().toIdentity(), accessControl); + checkCanKillQueryOwnedBy(sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders), queryInfo.getSession().toIdentity(), accessControl); dispatchManager.cancelQuery(queryId); } catch (AccessDeniedException e) { @@ -149,7 +147,7 @@ private Response failQuery(QueryId queryId, TrinoException queryException, HttpS try { BasicQueryInfo queryInfo = dispatchManager.getQueryInfo(queryId); - checkCanKillQueryOwnedBy(sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders, alternateHeaderName), queryInfo.getSession().toIdentity(), accessControl); + checkCanKillQueryOwnedBy(sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders), queryInfo.getSession().toIdentity(), accessControl); // check before killing to provide the proper error code (this is racy) if (queryInfo.getState().isDone()) { diff --git a/core/trino-main/src/main/java/io/trino/server/QueryStateInfoResource.java b/core/trino-main/src/main/java/io/trino/server/QueryStateInfoResource.java index 63997ff0e8c75..df07b0bff9ee3 100644 --- a/core/trino-main/src/main/java/io/trino/server/QueryStateInfoResource.java +++ b/core/trino-main/src/main/java/io/trino/server/QueryStateInfoResource.java @@ -56,21 +56,18 @@ public class QueryStateInfoResource private final ResourceGroupManager resourceGroupManager; private final AccessControl accessControl; private final HttpRequestSessionContextFactory sessionContextFactory; - private final Optional alternateHeaderName; @Inject public QueryStateInfoResource( DispatchManager dispatchManager, ResourceGroupManager resourceGroupManager, AccessControl accessControl, - HttpRequestSessionContextFactory sessionContextFactory, - ProtocolConfig protocolConfig) + HttpRequestSessionContextFactory sessionContextFactory) { this.dispatchManager = requireNonNull(dispatchManager, "dispatchManager is null"); this.resourceGroupManager = requireNonNull(resourceGroupManager, "resourceGroupManager is null"); this.accessControl = requireNonNull(accessControl, "accessControl is null"); this.sessionContextFactory = requireNonNull(sessionContextFactory, "sessionContextFactory is null"); - this.alternateHeaderName = protocolConfig.getAlternateHeaderName(); } @ResourceSecurity(AUTHENTICATED_USER) @@ -79,7 +76,7 @@ public QueryStateInfoResource( public List getQueryStateInfos(@QueryParam("user") String user, @Context HttpServletRequest servletRequest, @Context HttpHeaders httpHeaders) { List queryInfos = dispatchManager.getQueries(); - queryInfos = filterQueries(sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders, alternateHeaderName), queryInfos, accessControl); + queryInfos = filterQueries(sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders), queryInfos, accessControl); if (!isNullOrEmpty(user)) { queryInfos = queryInfos.stream() @@ -115,7 +112,7 @@ public QueryStateInfo getQueryStateInfo(@PathParam("queryId") String queryId, @C { try { BasicQueryInfo queryInfo = dispatchManager.getQueryInfo(new QueryId(queryId)); - checkCanViewQueryOwnedBy(sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders, alternateHeaderName), queryInfo.getSession().toIdentity(), accessControl); + checkCanViewQueryOwnedBy(sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders), queryInfo.getSession().toIdentity(), accessControl); return getQueryStateInfo(queryInfo); } catch (AccessDeniedException e) { diff --git a/core/trino-main/src/main/java/io/trino/server/Server.java b/core/trino-main/src/main/java/io/trino/server/Server.java index c90c1221af699..a2510514c9155 100644 --- a/core/trino-main/src/main/java/io/trino/server/Server.java +++ b/core/trino-main/src/main/java/io/trino/server/Server.java @@ -63,7 +63,7 @@ import io.trino.server.security.oauth2.OAuth2Client; import io.trino.spi.connector.CatalogHandle; import io.trino.transaction.TransactionManagerModule; -import io.trino.version.EmbedVersion; +import io.trino.util.EmbedVersion; import org.weakref.jmx.guice.MBeanModule; import java.io.IOException; diff --git a/core/trino-main/src/main/java/io/trino/server/ServerMainModule.java b/core/trino-main/src/main/java/io/trino/server/ServerMainModule.java index ad76199fe4b19..96c8f407d58a3 100644 --- a/core/trino-main/src/main/java/io/trino/server/ServerMainModule.java +++ b/core/trino-main/src/main/java/io/trino/server/ServerMainModule.java @@ -57,7 +57,6 @@ import io.trino.execution.scheduler.NodeSchedulerConfig; import io.trino.execution.scheduler.TopologyAwareNodeSelectorModule; import io.trino.execution.scheduler.UniformNodeSelectorModule; -import io.trino.index.IndexManager; import io.trino.memory.LocalMemoryManager; import io.trino.memory.LocalMemoryManagerExporter; import io.trino.memory.MemoryInfo; @@ -94,6 +93,7 @@ import io.trino.operator.PagesIndexPageSorter; import io.trino.operator.RetryPolicy; import io.trino.operator.index.IndexJoinLookupStats; +import io.trino.operator.index.IndexManager; import io.trino.operator.scalar.json.JsonExistsFunction; import io.trino.operator.scalar.json.JsonQueryFunction; import io.trino.operator.scalar.json.JsonValueFunction; @@ -153,8 +153,8 @@ import io.trino.type.TypeOperatorsCache; import io.trino.type.TypeSignatureDeserializer; import io.trino.type.TypeSignatureKeyDeserializer; +import io.trino.util.EmbedVersion; import io.trino.util.FinalizerService; -import io.trino.version.EmbedVersion; import jakarta.annotation.PreDestroy; import java.util.List; diff --git a/core/trino-main/src/main/java/io/trino/server/ServletSecurityUtils.java b/core/trino-main/src/main/java/io/trino/server/ServletSecurityUtils.java index e25e3be88760e..ebd3169b370f9 100644 --- a/core/trino-main/src/main/java/io/trino/server/ServletSecurityUtils.java +++ b/core/trino-main/src/main/java/io/trino/server/ServletSecurityUtils.java @@ -15,6 +15,7 @@ import io.trino.spi.security.BasicPrincipal; import io.trino.spi.security.Identity; +import jakarta.servlet.http.HttpServletRequest; import jakarta.ws.rs.container.ContainerRequestContext; import jakarta.ws.rs.core.Response; import jakarta.ws.rs.core.Response.ResponseBuilder; @@ -23,14 +24,16 @@ import java.security.Principal; import java.util.Collection; +import java.util.Optional; import static com.google.common.net.MediaType.PLAIN_TEXT_UTF_8; -import static io.trino.server.HttpRequestSessionContextFactory.AUTHENTICATED_IDENTITY; import static jakarta.ws.rs.core.HttpHeaders.WWW_AUTHENTICATE; import static jakarta.ws.rs.core.Response.Status.UNAUTHORIZED; public final class ServletSecurityUtils { + private static final String AUTHENTICATED_IDENTITY = "trino.authenticated-identity"; + private ServletSecurityUtils() {} public static void sendErrorMessage(ContainerRequestContext request, Status errorCode, String errorMessage) @@ -62,6 +65,21 @@ private static ResponseBuilder errorResponse(Status errorCode, String errorMessa .entity(errorMessage); } + public static Optional authenticatedIdentity(ContainerRequestContext request) + { + return Optional.ofNullable((Identity) request.getProperty(AUTHENTICATED_IDENTITY)); + } + + public static Optional authenticatedIdentity(HttpServletRequest request) + { + return Optional.ofNullable((Identity) request.getAttribute(AUTHENTICATED_IDENTITY)); + } + + public static void clearAuthenticatedIdentity(HttpServletRequest request) + { + request.setAttribute(AUTHENTICATED_IDENTITY, null); + } + public static void setAuthenticatedIdentity(ContainerRequestContext request, String username) { setAuthenticatedIdentity(request, Identity.forUser(username) diff --git a/core/trino-main/src/main/java/io/trino/server/TrinoSystemRequirements.java b/core/trino-main/src/main/java/io/trino/server/TrinoSystemRequirements.java index 7abb563cc108b..d3ec3cba17926 100644 --- a/core/trino-main/src/main/java/io/trino/server/TrinoSystemRequirements.java +++ b/core/trino-main/src/main/java/io/trino/server/TrinoSystemRequirements.java @@ -94,7 +94,7 @@ else if ("Mac OS X".equals(osName)) { private static void verifyJavaVersion() { - Version required = Version.parse("17.0.5"); + Version required = Version.parse("21.0.1"); if (Runtime.version().compareTo(required) < 0) { failRequirement("Trino requires Java %s at minimum (found %s)", required, Runtime.version()); diff --git a/core/trino-main/src/main/java/io/trino/server/remotetask/HttpRemoteTask.java b/core/trino-main/src/main/java/io/trino/server/remotetask/HttpRemoteTask.java index 1bfbfaf5919d7..d767cc62505cc 100644 --- a/core/trino-main/src/main/java/io/trino/server/remotetask/HttpRemoteTask.java +++ b/core/trino-main/src/main/java/io/trino/server/remotetask/HttpRemoteTask.java @@ -58,6 +58,7 @@ import io.trino.execution.buffer.PipelinedOutputBuffers; import io.trino.execution.buffer.SpoolingOutputStats; import io.trino.metadata.Split; +import io.trino.operator.RetryPolicy; import io.trino.operator.TaskStats; import io.trino.server.DynamicFilterService; import io.trino.server.FailTaskRequest; @@ -110,6 +111,7 @@ import static io.trino.SystemSessionProperties.getMaxUnacknowledgedSplitsPerTask; import static io.trino.SystemSessionProperties.getRemoteTaskGuaranteedSplitsPerRequest; import static io.trino.SystemSessionProperties.getRemoteTaskRequestSizeHeadroom; +import static io.trino.SystemSessionProperties.getRetryPolicy; import static io.trino.SystemSessionProperties.isRemoteTaskAdaptiveUpdateRequestSizeEnabled; import static io.trino.execution.DynamicFiltersCollector.INITIAL_DYNAMIC_FILTERS_VERSION; import static io.trino.execution.TaskInfo.createInitialTask; @@ -117,6 +119,7 @@ import static io.trino.execution.TaskStatus.failWith; import static io.trino.server.remotetask.RequestErrorTracker.logError; import static io.trino.spi.HostAddress.fromUri; +import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR; import static io.trino.spi.StandardErrorCode.REMOTE_TASK_ERROR; import static io.trino.util.Failures.toFailure; import static java.lang.Math.addExact; @@ -342,6 +345,7 @@ public HttpRemoteTask( errorScheduledExecutor, stats); + RetryPolicy retryPolicy = getRetryPolicy(session); this.taskInfoFetcher = new TaskInfoFetcher( this::fatalUnacknowledgedFailure, taskStatusFetcher, @@ -356,7 +360,8 @@ public HttpRemoteTask( updateScheduledExecutor, errorScheduledExecutor, stats, - estimatedMemory); + estimatedMemory, + retryPolicy); taskStatusFetcher.addStateChangeListener(newStatus -> { TaskState state = newStatus.getState(); @@ -714,6 +719,16 @@ boolean adjustSplitBatchSize(List splitAssignments, long reques } private void sendUpdate() + { + try { + sendUpdateInternal(); + } + catch (Throwable e) { + fatalUnacknowledgedFailure(new TrinoException(GENERIC_INTERNAL_ERROR, "unexpected error calling sendUpdate()", e)); + } + } + + private void sendUpdateInternal() { TaskStatus taskStatus = getTaskStatus(); // don't update if the task is already finishing or finished, or if we have sent a termination command diff --git a/core/trino-main/src/main/java/io/trino/server/remotetask/TaskInfoFetcher.java b/core/trino-main/src/main/java/io/trino/server/remotetask/TaskInfoFetcher.java index 3d1b170521bd5..7f87b2c5c94d1 100644 --- a/core/trino-main/src/main/java/io/trino/server/remotetask/TaskInfoFetcher.java +++ b/core/trino-main/src/main/java/io/trino/server/remotetask/TaskInfoFetcher.java @@ -33,6 +33,7 @@ import io.trino.execution.TaskState; import io.trino.execution.TaskStatus; import io.trino.execution.buffer.SpoolingOutputStats; +import io.trino.operator.RetryPolicy; import java.net.URI; import java.util.Optional; @@ -52,6 +53,7 @@ import static io.airlift.http.client.HttpUriBuilder.uriBuilderFrom; import static io.airlift.http.client.Request.Builder.prepareGet; import static io.airlift.units.Duration.nanosSince; +import static io.trino.operator.RetryPolicy.TASK; import static java.util.Objects.requireNonNull; import static java.util.concurrent.TimeUnit.MILLISECONDS; @@ -81,6 +83,8 @@ public class TaskInfoFetcher private final AtomicReference spoolingOutputStats = new AtomicReference<>(); + private final RetryPolicy retryPolicy; + @GuardedBy("this") private boolean running; @@ -104,7 +108,8 @@ public TaskInfoFetcher( ScheduledExecutorService updateScheduledExecutor, ScheduledExecutorService errorScheduledExecutor, RemoteTaskStats stats, - Optional estimatedMemory) + Optional estimatedMemory, + RetryPolicy retryPolicy) { requireNonNull(initialTask, "initialTask is null"); requireNonNull(errorScheduledExecutor, "errorScheduledExecutor is null"); @@ -127,6 +132,7 @@ public TaskInfoFetcher( this.spanBuilderFactory = requireNonNull(spanBuilderFactory, "spanBuilderFactory is null"); this.stats = requireNonNull(stats, "stats is null"); this.estimatedMemory = requireNonNull(estimatedMemory, "estimatedMemory is null"); + this.retryPolicy = requireNonNull(retryPolicy, "retryPolicy is null"); } public TaskInfo getTaskInfo() @@ -250,9 +256,16 @@ synchronized void updateTaskInfo(TaskInfo newTaskInfo) TaskStatus localTaskStatus = taskStatusFetcher.getTaskStatus(); TaskStatus newRemoteTaskStatus = newTaskInfo.getTaskStatus(); + if (!newRemoteTaskStatus.getTaskId().equals(taskId)) { + log.debug("Task ID mismatch on remote task status. Member task ID is %s, but remote task ID is %s. This will confuse finalTaskInfo listeners.", taskId, newRemoteTaskStatus.getTaskId()); + } + if (localTaskStatus.getState().isDone() && newRemoteTaskStatus.getState().isDone() && localTaskStatus.getState() != newRemoteTaskStatus.getState()) { // prefer local newTaskInfo = newTaskInfo.withTaskStatus(localTaskStatus); + if (!localTaskStatus.getTaskId().equals(taskId)) { + log.debug("Task ID mismatch on local task status. Member task ID is %s, but status-fetcher ID is %s. This will confuse finalTaskInfo listeners.", taskId, newRemoteTaskStatus.getTaskId()); + } } if (estimatedMemory.isPresent()) { @@ -260,7 +273,10 @@ synchronized void updateTaskInfo(TaskInfo newTaskInfo) } if (newTaskInfo.getTaskStatus().getState().isDone()) { - spoolingOutputStats.compareAndSet(null, newTaskInfo.getOutputBuffers().getSpoolingOutputStats().orElse(null)); + boolean wasSet = spoolingOutputStats.compareAndSet(null, newTaskInfo.getOutputBuffers().getSpoolingOutputStats().orElse(null)); + if (retryPolicy == TASK && wasSet && spoolingOutputStats.get() == null) { + log.debug("Task %s was updated to null spoolingOutputStats. Future calls to retrieveAndDropSpoolingOutputStats will fail.", taskId); + } newTaskInfo = newTaskInfo.pruneSpoolingOutputStats(); } diff --git a/core/trino-main/src/main/java/io/trino/server/security/ResourceSecurityDynamicFeature.java b/core/trino-main/src/main/java/io/trino/server/security/ResourceSecurityDynamicFeature.java index 2ffe53bdd71ad..9296ffdb24e3d 100644 --- a/core/trino-main/src/main/java/io/trino/server/security/ResourceSecurityDynamicFeature.java +++ b/core/trino-main/src/main/java/io/trino/server/security/ResourceSecurityDynamicFeature.java @@ -17,7 +17,6 @@ import io.trino.security.AccessControl; import io.trino.server.HttpRequestSessionContextFactory; import io.trino.server.InternalAuthenticationManager; -import io.trino.server.ProtocolConfig; import io.trino.server.security.ResourceSecurity.AccessType; import io.trino.server.ui.WebUiAuthenticationFilter; import io.trino.spi.TrinoException; @@ -37,7 +36,7 @@ import java.util.Optional; -import static io.trino.server.HttpRequestSessionContextFactory.AUTHENTICATED_IDENTITY; +import static io.trino.server.ServletSecurityUtils.authenticatedIdentity; import static io.trino.server.ServletSecurityUtils.setAuthenticatedIdentity; import static io.trino.server.security.ResourceSecurity.AccessType.MANAGEMENT_READ; import static io.trino.spi.StandardErrorCode.SERVER_STARTING_UP; @@ -54,7 +53,6 @@ public class ResourceSecurityDynamicFeature private final HttpRequestSessionContextFactory sessionContextFactory; private final Optional fixedManagementUser; private final boolean fixedManagementUserForHttps; - private final Optional alternateHeaderName; @Inject public ResourceSecurityDynamicFeature( @@ -64,8 +62,7 @@ public ResourceSecurityDynamicFeature( InternalAuthenticationManager internalAuthenticationManager, AccessControl accessControl, HttpRequestSessionContextFactory sessionContextFactory, - SecurityConfig securityConfig, - ProtocolConfig protocolConfig) + SecurityConfig securityConfig) { this.resourceAccessType = requireNonNull(resourceAccessType, "resourceAccessType is null"); this.authenticationFilter = requireNonNull(authenticationFilter, "authenticationFilter is null"); @@ -75,7 +72,6 @@ public ResourceSecurityDynamicFeature( this.sessionContextFactory = requireNonNull(sessionContextFactory, "sessionContextFactory is null"); this.fixedManagementUser = securityConfig.getFixedManagementUser(); this.fixedManagementUserForHttps = securityConfig.isFixedManagementUserForHttps(); - this.alternateHeaderName = protocolConfig.getAlternateHeaderName(); } @Override @@ -97,7 +93,7 @@ public void configure(ResourceInfo resourceInfo, FeatureContext context) case MANAGEMENT_READ: case MANAGEMENT_WRITE: context.register(new ManagementAuthenticationFilter(fixedManagementUser, fixedManagementUserForHttps, authenticationFilter)); - context.register(new ManagementAuthorizationFilter(accessControl, sessionContextFactory, accessType == MANAGEMENT_READ, alternateHeaderName)); + context.register(new ManagementAuthorizationFilter(accessControl, sessionContextFactory, accessType == MANAGEMENT_READ)); context.register(new DisposeIdentityResponseFilter()); return; case INTERNAL_ONLY: @@ -141,14 +137,12 @@ private static class ManagementAuthorizationFilter private final AccessControl accessControl; private final HttpRequestSessionContextFactory sessionContextFactory; private final boolean read; - private final Optional alternateHeaderName; - public ManagementAuthorizationFilter(AccessControl accessControl, HttpRequestSessionContextFactory sessionContextFactory, boolean read, Optional alternateHeaderName) + public ManagementAuthorizationFilter(AccessControl accessControl, HttpRequestSessionContextFactory sessionContextFactory, boolean read) { this.accessControl = requireNonNull(accessControl, "accessControl is null"); this.sessionContextFactory = requireNonNull(sessionContextFactory, "sessionContextFactory is null"); this.read = read; - this.alternateHeaderName = requireNonNull(alternateHeaderName, "alternateHeaderName is null"); } @Override @@ -159,10 +153,7 @@ public void filter(ContainerRequestContext request) } try { - Identity identity = sessionContextFactory.extractAuthorizedIdentity( - Optional.ofNullable((Identity) request.getProperty(AUTHENTICATED_IDENTITY)), - request.getHeaders(), - alternateHeaderName); + Identity identity = sessionContextFactory.extractAuthorizedIdentity(authenticatedIdentity(request), request.getHeaders()); if (read) { accessControl.checkCanReadSystemInformation(identity); } @@ -214,9 +205,7 @@ private static class DisposeIdentityResponseFilter public void filter(ContainerRequestContext request, ContainerResponseContext response) { // destroy identity if identity is still attached to the request - Optional.ofNullable(request.getProperty(AUTHENTICATED_IDENTITY)) - .map(Identity.class::cast) - .ifPresent(Identity::destroy); + authenticatedIdentity(request).ifPresent(Identity::destroy); } } } diff --git a/core/trino-main/src/main/java/io/trino/server/testing/TestingTrinoServer.java b/core/trino-main/src/main/java/io/trino/server/testing/TestingTrinoServer.java index 672b7497b3527..d702f91516e39 100644 --- a/core/trino-main/src/main/java/io/trino/server/testing/TestingTrinoServer.java +++ b/core/trino-main/src/main/java/io/trino/server/testing/TestingTrinoServer.java @@ -40,6 +40,7 @@ import io.airlift.openmetrics.JmxOpenMetricsModule; import io.airlift.tracetoken.TraceTokenModule; import io.airlift.tracing.TracingModule; +import io.opentelemetry.sdk.trace.SpanProcessor; import io.trino.connector.CatalogManagerModule; import io.trino.connector.ConnectorName; import io.trino.connector.ConnectorServicesProvider; @@ -128,6 +129,7 @@ import static com.google.common.base.Preconditions.checkState; import static com.google.common.io.MoreFiles.deleteRecursively; import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; +import static com.google.inject.multibindings.Multibinder.newSetBinder; import static com.google.inject.util.Modules.EMPTY_MODULE; import static io.airlift.concurrent.MoreFutures.getFutureValue; import static java.lang.Integer.parseInt; @@ -230,6 +232,7 @@ private TestingTrinoServer( Optional discoveryUri, Module additionalModule, Optional baseDataDir, + Optional spanProcessor, Optional systemAccessControlConfiguration, Optional> systemAccessControls, List eventListeners) @@ -302,6 +305,7 @@ private TestingTrinoServer( binder.bind(GracefulShutdownHandler.class).in(Scopes.SINGLETON); binder.bind(ProcedureTester.class).in(Scopes.SINGLETON); binder.bind(ExchangeManagerRegistry.class).in(Scopes.SINGLETON); + spanProcessor.ifPresent(processor -> newSetBinder(binder, SpanProcessor.class).addBinding().toInstance(processor)); }); if (coordinator) { @@ -716,6 +720,7 @@ public static class Builder private Optional discoveryUri = Optional.empty(); private Module additionalModule = EMPTY_MODULE; private Optional baseDataDir = Optional.empty(); + private Optional spanProcessor = Optional.empty(); private Optional systemAccessControlConfiguration = Optional.empty(); private Optional> systemAccessControls = Optional.of(ImmutableList.of()); private List eventListeners = ImmutableList.of(); @@ -756,6 +761,12 @@ public Builder setBaseDataDir(Optional baseDataDir) return this; } + public Builder setSpanProcessor(Optional spanProcessor) + { + this.spanProcessor = requireNonNull(spanProcessor, "spanProcessor is null"); + return this; + } + public Builder setSystemAccessControlConfiguration(Optional systemAccessControlConfiguration) { this.systemAccessControlConfiguration = requireNonNull(systemAccessControlConfiguration, "systemAccessControlConfiguration is null"); @@ -788,6 +799,7 @@ public TestingTrinoServer build() discoveryUri, additionalModule, baseDataDir, + spanProcessor, systemAccessControlConfiguration, systemAccessControls, eventListeners); diff --git a/core/trino-main/src/main/java/io/trino/server/ui/UiQueryResource.java b/core/trino-main/src/main/java/io/trino/server/ui/UiQueryResource.java index 40531ed228030..9ed0e1f6fac07 100644 --- a/core/trino-main/src/main/java/io/trino/server/ui/UiQueryResource.java +++ b/core/trino-main/src/main/java/io/trino/server/ui/UiQueryResource.java @@ -21,7 +21,6 @@ import io.trino.security.AccessControl; import io.trino.server.BasicQueryInfo; import io.trino.server.HttpRequestSessionContextFactory; -import io.trino.server.ProtocolConfig; import io.trino.server.security.ResourceSecurity; import io.trino.spi.QueryId; import io.trino.spi.TrinoException; @@ -57,15 +56,13 @@ public class UiQueryResource private final DispatchManager dispatchManager; private final AccessControl accessControl; private final HttpRequestSessionContextFactory sessionContextFactory; - private final Optional alternateHeaderName; @Inject - public UiQueryResource(DispatchManager dispatchManager, AccessControl accessControl, HttpRequestSessionContextFactory sessionContextFactory, ProtocolConfig protocolConfig) + public UiQueryResource(DispatchManager dispatchManager, AccessControl accessControl, HttpRequestSessionContextFactory sessionContextFactory) { this.dispatchManager = requireNonNull(dispatchManager, "dispatchManager is null"); this.accessControl = requireNonNull(accessControl, "accessControl is null"); this.sessionContextFactory = requireNonNull(sessionContextFactory, "sessionContextFactory is null"); - this.alternateHeaderName = protocolConfig.getAlternateHeaderName(); } @ResourceSecurity(WEB_UI) @@ -75,7 +72,7 @@ public List getAllQueryInfo(@QueryParam("state") String s QueryState expectedState = stateFilter == null ? null : QueryState.valueOf(stateFilter.toUpperCase(Locale.ENGLISH)); List queries = dispatchManager.getQueries(); - queries = filterQueries(sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders, alternateHeaderName), queries, accessControl); + queries = filterQueries(sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders), queries, accessControl); ImmutableList.Builder builder = ImmutableList.builder(); for (BasicQueryInfo queryInfo : queries) { @@ -96,7 +93,7 @@ public Response getQueryInfo(@PathParam("queryId") QueryId queryId, @Context Htt Optional queryInfo = dispatchManager.getFullQueryInfo(queryId); if (queryInfo.isPresent()) { try { - checkCanViewQueryOwnedBy(sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders, alternateHeaderName), queryInfo.get().getSession().toIdentity(), accessControl); + checkCanViewQueryOwnedBy(sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders), queryInfo.get().getSession().toIdentity(), accessControl); return Response.ok(queryInfo.get()).build(); } catch (AccessDeniedException e) { @@ -129,7 +126,7 @@ private Response failQuery(QueryId queryId, TrinoException queryException, HttpS try { BasicQueryInfo queryInfo = dispatchManager.getQueryInfo(queryId); - checkCanKillQueryOwnedBy(sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders, alternateHeaderName), queryInfo.getSession().toIdentity(), accessControl); + checkCanKillQueryOwnedBy(sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders), queryInfo.getSession().toIdentity(), accessControl); // check before killing to provide the proper error code (this is racy) if (queryInfo.getState().isDone()) { diff --git a/core/trino-main/src/main/java/io/trino/server/ui/WorkerResource.java b/core/trino-main/src/main/java/io/trino/server/ui/WorkerResource.java index bf03403eed123..df7e119e25c73 100644 --- a/core/trino-main/src/main/java/io/trino/server/ui/WorkerResource.java +++ b/core/trino-main/src/main/java/io/trino/server/ui/WorkerResource.java @@ -28,7 +28,6 @@ import io.trino.security.AccessControl; import io.trino.server.ForWorkerInfo; import io.trino.server.HttpRequestSessionContextFactory; -import io.trino.server.ProtocolConfig; import io.trino.server.security.ResourceSecurity; import io.trino.spi.Node; import io.trino.spi.QueryId; @@ -71,7 +70,6 @@ public class WorkerResource private final AccessControl accessControl; private final HttpClient httpClient; private final HttpRequestSessionContextFactory sessionContextFactory; - private final Optional alternateHeaderName; @Inject public WorkerResource( @@ -79,15 +77,13 @@ public WorkerResource( InternalNodeManager nodeManager, AccessControl accessControl, @ForWorkerInfo HttpClient httpClient, - HttpRequestSessionContextFactory sessionContextFactory, - ProtocolConfig protocolConfig) + HttpRequestSessionContextFactory sessionContextFactory) { this.dispatchManager = requireNonNull(dispatchManager, "dispatchManager is null"); this.nodeManager = requireNonNull(nodeManager, "nodeManager is null"); this.accessControl = requireNonNull(accessControl, "accessControl is null"); this.httpClient = requireNonNull(httpClient, "httpClient is null"); this.sessionContextFactory = requireNonNull(sessionContextFactory, "sessionContextFactory is null"); - this.alternateHeaderName = protocolConfig.getAlternateHeaderName(); } @ResourceSecurity(WEB_UI) @@ -119,7 +115,7 @@ public Response getThreads( Optional queryInfo = dispatchManager.getFullQueryInfo(queryId); if (queryInfo.isPresent()) { try { - checkCanViewQueryOwnedBy(sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders, alternateHeaderName), queryInfo.get().getSession().toIdentity(), accessControl); + checkCanViewQueryOwnedBy(sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders), queryInfo.get().getSession().toIdentity(), accessControl); return proxyJsonResponse(nodeId, "v1/task/" + task); } catch (AccessDeniedException e) { diff --git a/core/trino-main/src/main/java/io/trino/split/SplitManager.java b/core/trino-main/src/main/java/io/trino/split/SplitManager.java index 4ec43a8670a73..c35bfa05debd5 100644 --- a/core/trino-main/src/main/java/io/trino/split/SplitManager.java +++ b/core/trino-main/src/main/java/io/trino/split/SplitManager.java @@ -38,6 +38,7 @@ import static io.airlift.concurrent.Threads.daemonThreadsNamed; import static io.trino.SystemSessionProperties.isAllowPushdownIntoConnectors; +import static io.trino.tracing.ScopedSpan.scopedSpan; import static java.util.Objects.requireNonNull; import static java.util.concurrent.Executors.newCachedThreadPool; @@ -80,12 +81,18 @@ public SplitSource getSplits( ConnectorSession connectorSession = session.toConnectorSession(catalogHandle); - ConnectorSplitSource source = splitManager.getSplits( - table.getTransaction(), - connectorSession, - table.getConnectorHandle(), - dynamicFilter, - constraint); + ConnectorSplitSource source; + try (var ignore = scopedSpan(tracer.spanBuilder("SplitManager.getSplits") + .setParent(Context.current().with(parentSpan)) + .setAttribute(TrinoAttributes.TABLE, table.getConnectorHandle().toString()) + .startSpan())) { + source = splitManager.getSplits( + table.getTransaction(), + connectorSession, + table.getConnectorHandle(), + dynamicFilter, + constraint); + } SplitSource splitSource = new ConnectorAwareSplitSource(catalogHandle, source); @@ -108,10 +115,16 @@ public SplitSource getSplits(Session session, Span parentSpan, TableFunctionHand CatalogHandle catalogHandle = function.getCatalogHandle(); ConnectorSplitManager splitManager = splitManagerProvider.getService(catalogHandle); - ConnectorSplitSource source = splitManager.getSplits( - function.getTransactionHandle(), - session.toConnectorSession(catalogHandle), - function.getFunctionHandle()); + ConnectorSplitSource source; + try (var ignore = scopedSpan(tracer.spanBuilder("SplitManager.getSplits") + .setParent(Context.current().with(parentSpan)) + .setAttribute(TrinoAttributes.FUNCTION, function.getFunctionHandle().toString()) + .startSpan())) { + source = splitManager.getSplits( + function.getTransactionHandle(), + session.toConnectorSession(catalogHandle), + function.getFunctionHandle()); + } SplitSource splitSource = new ConnectorAwareSplitSource(catalogHandle, source); @@ -119,10 +132,10 @@ public SplitSource getSplits(Session session, Span parentSpan, TableFunctionHand return new TracingSplitSource(splitSource, tracer, Optional.of(span), "split-buffer"); } - private Span splitSourceSpan(Span querySpan, CatalogHandle catalogHandle) + private Span splitSourceSpan(Span parentSpan, CatalogHandle catalogHandle) { return tracer.spanBuilder("split-source") - .setParent(Context.current().with(querySpan)) + .setParent(Context.current().with(parentSpan)) .setAttribute(TrinoAttributes.CATALOG, catalogHandle.getCatalogName()) .startSpan(); } diff --git a/core/trino-main/src/main/java/io/trino/sql/DynamicFilters.java b/core/trino-main/src/main/java/io/trino/sql/DynamicFilters.java index db7f5c8f94fcb..82bf825822eb4 100644 --- a/core/trino-main/src/main/java/io/trino/sql/DynamicFilters.java +++ b/core/trino-main/src/main/java/io/trino/sql/DynamicFilters.java @@ -193,7 +193,11 @@ public static Optional getDescriptor(Expression expression) private static boolean isDynamicFilterFunction(FunctionCall functionCall) { - CatalogSchemaFunctionName functionName = ResolvedFunction.extractFunctionName(functionCall.getName()); + return isDynamicFilterFunction(ResolvedFunction.extractFunctionName(functionCall.getName())); + } + + public static boolean isDynamicFilterFunction(CatalogSchemaFunctionName functionName) + { return functionName.equals(builtinFunctionName(Function.NAME)) || functionName.equals(builtinFunctionName(NullableFunction.NAME)); } diff --git a/core/trino-main/src/main/java/io/trino/sql/analyzer/Analysis.java b/core/trino-main/src/main/java/io/trino/sql/analyzer/Analysis.java index 96b29a1b31cd8..c878a5b118584 100644 --- a/core/trino-main/src/main/java/io/trino/sql/analyzer/Analysis.java +++ b/core/trino-main/src/main/java/io/trino/sql/analyzer/Analysis.java @@ -62,6 +62,8 @@ import io.trino.sql.tree.Identifier; import io.trino.sql.tree.InPredicate; import io.trino.sql.tree.Join; +import io.trino.sql.tree.JsonTable; +import io.trino.sql.tree.JsonTableColumnDefinition; import io.trino.sql.tree.LambdaArgumentDeclaration; import io.trino.sql.tree.MeasureDefinition; import io.trino.sql.tree.Node; @@ -161,9 +163,10 @@ public class Analysis private final Set> patternAggregations = new LinkedHashSet<>(); // for JSON features - private final Map, JsonPathAnalysis> jsonPathAnalyses = new LinkedHashMap<>(); + private final Map, JsonPathAnalysis> jsonPathAnalyses = new LinkedHashMap<>(); private final Map, ResolvedFunction> jsonInputFunctions = new LinkedHashMap<>(); - private final Map, ResolvedFunction> jsonOutputFunctions = new LinkedHashMap<>(); + private final Map, ResolvedFunction> jsonOutputFunctions = new LinkedHashMap<>(); + private final Map, JsonTableAnalysis> jsonTableAnalyses = new LinkedHashMap<>(); private final Map, List> aggregates = new LinkedHashMap<>(); private final Map, List> orderByAggregates = new LinkedHashMap<>(); @@ -204,7 +207,7 @@ public class Analysis private final Map, Type> sortKeyCoercionsForFrameBoundComparison = new LinkedHashMap<>(); private final Map, ResolvedFunction> frameBoundCalculations = new LinkedHashMap<>(); private final Map, List> relationCoercions = new LinkedHashMap<>(); - private final Map, RoutineEntry> resolvedFunctions = new LinkedHashMap<>(); + private final Map, RoutineEntry> resolvedFunctions = new LinkedHashMap<>(); private final Map, LambdaArgumentDeclaration> lambdaArgumentReferences = new LinkedHashMap<>(); private final Map columns = new LinkedHashMap<>(); @@ -656,12 +659,12 @@ public Set getResolvedFunctions() .collect(toImmutableSet()); } - public ResolvedFunction getResolvedFunction(Expression node) + public ResolvedFunction getResolvedFunction(Node node) { return resolvedFunctions.get(NodeRef.of(node)).getFunction(); } - public void addResolvedFunction(Expression node, ResolvedFunction function, String authorization) + public void addResolvedFunction(Node node, ResolvedFunction function, String authorization) { resolvedFunctions.put(NodeRef.of(node), new RoutineEntry(function, authorization)); } @@ -1021,14 +1024,19 @@ public boolean isPatternAggregation(FunctionCall function) return patternAggregations.contains(NodeRef.of(function)); } - public void setJsonPathAnalyses(Map, JsonPathAnalysis> pathAnalyses) + public void setJsonPathAnalyses(Map, JsonPathAnalysis> pathAnalyses) { jsonPathAnalyses.putAll(pathAnalyses); } - public JsonPathAnalysis getJsonPathAnalysis(Expression expression) + public void setJsonPathAnalysis(Node node, JsonPathAnalysis pathAnalysis) { - return jsonPathAnalyses.get(NodeRef.of(expression)); + jsonPathAnalyses.put(NodeRef.of(node), pathAnalysis); + } + + public JsonPathAnalysis getJsonPathAnalysis(Node node) + { + return jsonPathAnalyses.get(NodeRef.of(node)); } public void setJsonInputFunctions(Map, ResolvedFunction> functions) @@ -1041,14 +1049,24 @@ public ResolvedFunction getJsonInputFunction(Expression expression) return jsonInputFunctions.get(NodeRef.of(expression)); } - public void setJsonOutputFunctions(Map, ResolvedFunction> functions) + public void setJsonOutputFunctions(Map, ResolvedFunction> functions) { jsonOutputFunctions.putAll(functions); } - public ResolvedFunction getJsonOutputFunction(Expression expression) + public ResolvedFunction getJsonOutputFunction(Node node) + { + return jsonOutputFunctions.get(NodeRef.of(node)); + } + + public void addJsonTableAnalysis(JsonTable jsonTable, JsonTableAnalysis analysis) + { + jsonTableAnalyses.put(NodeRef.of(jsonTable), analysis); + } + + public JsonTableAnalysis getJsonTableAnalysis(JsonTable jsonTable) { - return jsonOutputFunctions.get(NodeRef.of(expression)); + return jsonTableAnalyses.get(NodeRef.of(jsonTable)); } public Map>> getTableColumnReferences() @@ -2388,4 +2406,19 @@ public ConnectorTransactionHandle getTransactionHandle() return transactionHandle; } } + + public record JsonTableAnalysis( + CatalogHandle catalogHandle, + ConnectorTransactionHandle transactionHandle, + RowType parametersType, + List> orderedOutputColumns) + { + public JsonTableAnalysis + { + requireNonNull(catalogHandle, "catalogHandle is null"); + requireNonNull(transactionHandle, "transactionHandle is null"); + requireNonNull(parametersType, "parametersType is null"); + requireNonNull(orderedOutputColumns, "orderedOutputColumns is null"); + } + } } diff --git a/core/trino-main/src/main/java/io/trino/sql/analyzer/ExpressionAnalyzer.java b/core/trino-main/src/main/java/io/trino/sql/analyzer/ExpressionAnalyzer.java index 0ef7db9ef3843..c97fcd62bd4d1 100644 --- a/core/trino-main/src/main/java/io/trino/sql/analyzer/ExpressionAnalyzer.java +++ b/core/trino-main/src/main/java/io/trino/sql/analyzer/ExpressionAnalyzer.java @@ -78,6 +78,7 @@ import io.trino.sql.tree.CurrentSchema; import io.trino.sql.tree.CurrentTime; import io.trino.sql.tree.CurrentUser; +import io.trino.sql.tree.DataType; import io.trino.sql.tree.DecimalLiteral; import io.trino.sql.tree.DereferenceExpression; import io.trino.sql.tree.DoubleLiteral; @@ -106,6 +107,7 @@ import io.trino.sql.tree.JsonPathParameter; import io.trino.sql.tree.JsonPathParameter.JsonFormat; import io.trino.sql.tree.JsonQuery; +import io.trino.sql.tree.JsonTable; import io.trino.sql.tree.JsonValue; import io.trino.sql.tree.LambdaArgumentDeclaration; import io.trino.sql.tree.LambdaExpression; @@ -123,6 +125,7 @@ import io.trino.sql.tree.ProcessingMode; import io.trino.sql.tree.QualifiedName; import io.trino.sql.tree.QuantifiedComparisonExpression; +import io.trino.sql.tree.QueryColumn; import io.trino.sql.tree.RangeQuantifier; import io.trino.sql.tree.Row; import io.trino.sql.tree.RowPattern; @@ -139,6 +142,7 @@ import io.trino.sql.tree.TimestampLiteral; import io.trino.sql.tree.Trim; import io.trino.sql.tree.TryExpression; +import io.trino.sql.tree.ValueColumn; import io.trino.sql.tree.VariableDefinition; import io.trino.sql.tree.WhenClause; import io.trino.sql.tree.WindowFrame; @@ -297,7 +301,7 @@ public class ExpressionAnalyzer // Cache from SQL type name to Type; every Type in the cache has a CAST defined from VARCHAR private final Cache varcharCastableTypeCache = buildNonEvictableCache(CacheBuilder.newBuilder().maximumSize(1000)); - private final Map, ResolvedFunction> resolvedFunctions = new LinkedHashMap<>(); + private final Map, ResolvedFunction> resolvedFunctions = new LinkedHashMap<>(); private final Set> subqueries = new LinkedHashSet<>(); private final Set> existsSubqueries = new LinkedHashSet<>(); private final Map, Type> expressionCoercions = new LinkedHashMap<>(); @@ -336,9 +340,9 @@ public class ExpressionAnalyzer private final Set> patternAggregations = new LinkedHashSet<>(); // for JSON functions - private final Map, JsonPathAnalysis> jsonPathAnalyses = new LinkedHashMap<>(); + private final Map, JsonPathAnalysis> jsonPathAnalyses = new LinkedHashMap<>(); private final Map, ResolvedFunction> jsonInputFunctions = new LinkedHashMap<>(); - private final Map, ResolvedFunction> jsonOutputFunctions = new LinkedHashMap<>(); + private final Map, ResolvedFunction> jsonOutputFunctions = new LinkedHashMap<>(); private final Session session; private final Map, Expression> parameters; @@ -402,7 +406,7 @@ private ExpressionAnalyzer( this.functionResolver = plannerContext.getFunctionResolver(warningCollector); } - public Map, ResolvedFunction> getResolvedFunctions() + public Map, ResolvedFunction> getResolvedFunctions() { return unmodifiableMap(resolvedFunctions); } @@ -500,6 +504,42 @@ private Type analyze(Expression expression, Scope baseScope, Context context) return visitor.process(expression, new StackableAstVisitor.StackableAstVisitorContext<>(context)); } + private RowType analyzeJsonPathInvocation(JsonTable node, Scope scope, CorrelationSupport correlationSupport) + { + Visitor visitor = new Visitor(scope, warningCollector); + List inputTypes = visitor.analyzeJsonPathInvocation("JSON_TABLE", node, node.getJsonPathInvocation(), new StackableAstVisitor.StackableAstVisitorContext<>(Context.notInLambda(scope, correlationSupport))); + return (RowType) inputTypes.get(2); + } + + private Type analyzeJsonValueExpression(ValueColumn column, JsonPathAnalysis pathAnalysis, Scope scope, CorrelationSupport correlationSupport) + { + Visitor visitor = new Visitor(scope, warningCollector); + List pathInvocationArgumentTypes = ImmutableList.of(JSON_2016, plannerContext.getTypeManager().getType(TypeId.of(JsonPath2016Type.NAME)), JSON_NO_PARAMETERS_ROW_TYPE); + return visitor.analyzeJsonValueExpression( + column, + pathAnalysis, + Optional.of(column.getType()), + pathInvocationArgumentTypes, + column.getEmptyBehavior(), + column.getEmptyDefault(), + column.getErrorBehavior(), + column.getErrorDefault(), + new StackableAstVisitor.StackableAstVisitorContext<>(Context.notInLambda(scope, correlationSupport))); + } + + private Type analyzeJsonQueryExpression(QueryColumn column, Scope scope) + { + Visitor visitor = new Visitor(scope, warningCollector); + List pathInvocationArgumentTypes = ImmutableList.of(JSON_2016, plannerContext.getTypeManager().getType(TypeId.of(JsonPath2016Type.NAME)), JSON_NO_PARAMETERS_ROW_TYPE); + return visitor.analyzeJsonQueryExpression( + column, + column.getWrapperBehavior(), + column.getQuotesBehavior(), + pathInvocationArgumentTypes, + Optional.of(column.getType()), + Optional.of(column.getFormat())); + } + private void analyzeWindow(ResolvedWindow window, Scope scope, Node originalNode, CorrelationSupport correlationSupport) { Visitor visitor = new Visitor(scope, warningCollector); @@ -566,7 +606,7 @@ public Set> getPatternAggregations() return patternAggregations; } - public Map, JsonPathAnalysis> getJsonPathAnalyses() + public Map, JsonPathAnalysis> getJsonPathAnalyses() { return jsonPathAnalyses; } @@ -576,7 +616,7 @@ public Map, ResolvedFunction> getJsonInputFunctions() return jsonInputFunctions; } - public Map, ResolvedFunction> getJsonOutputFunctions() + public Map, ResolvedFunction> getJsonOutputFunctions() { return jsonOutputFunctions; } @@ -2532,15 +2572,38 @@ public Type visitJsonExists(JsonExists node, StackableAstVisitorContext public Type visitJsonValue(JsonValue node, StackableAstVisitorContext context) { List pathInvocationArgumentTypes = analyzeJsonPathInvocation("JSON_VALUE", node, node.getJsonPathInvocation(), context); + Type returnedType = analyzeJsonValueExpression( + node, + jsonPathAnalyses.get(NodeRef.of(node)), + node.getReturnedType(), + pathInvocationArgumentTypes, + node.getEmptyBehavior(), + node.getEmptyDefault(), + Optional.of(node.getErrorBehavior()), + node.getErrorDefault(), + context); + return setExpressionType(node, returnedType); + } + private Type analyzeJsonValueExpression( + Node node, + JsonPathAnalysis pathAnalysis, + Optional declaredReturnedType, + List pathInvocationArgumentTypes, + JsonValue.EmptyOrErrorBehavior emptyBehavior, + Optional declaredEmptyDefault, + Optional errorBehavior, + Optional declaredErrorDefault, + StackableAstVisitorContext context) + { // validate returned type Type returnedType = VARCHAR; // default - if (node.getReturnedType().isPresent()) { + if (declaredReturnedType.isPresent()) { try { - returnedType = plannerContext.getTypeManager().getType(toTypeSignature(node.getReturnedType().get())); + returnedType = plannerContext.getTypeManager().getType(toTypeSignature(declaredReturnedType.get())); } catch (TypeNotFoundException e) { - throw semanticException(TYPE_MISMATCH, node, "Unknown type: %s", node.getReturnedType().get()); + throw semanticException(TYPE_MISMATCH, node, "Unknown type: %s", declaredReturnedType.get()); } } @@ -2550,10 +2613,9 @@ public Type visitJsonValue(JsonValue node, StackableAstVisitorContext c !isDateTimeType(returnedType) || returnedType.equals(INTERVAL_DAY_TIME) || returnedType.equals(INTERVAL_YEAR_MONTH)) { - throw semanticException(TYPE_MISMATCH, node, "Invalid return type of function JSON_VALUE: %s", node.getReturnedType().get()); + throw semanticException(TYPE_MISMATCH, node, "Invalid return type of function JSON_VALUE: %s", declaredReturnedType.get()); } - JsonPathAnalysis pathAnalysis = jsonPathAnalyses.get(NodeRef.of(node)); Type resultType = pathAnalysis.getType(pathAnalysis.getPath()); if (resultType != null && !resultType.equals(returnedType)) { try { @@ -2565,20 +2627,23 @@ public Type visitJsonValue(JsonValue node, StackableAstVisitorContext c } // validate default values for empty and error behavior - if (node.getEmptyDefault().isPresent()) { - Expression emptyDefault = node.getEmptyDefault().get(); - if (node.getEmptyBehavior() != DEFAULT) { - throw semanticException(INVALID_FUNCTION_ARGUMENT, emptyDefault, "Default value specified for %s ON EMPTY behavior", node.getEmptyBehavior()); + if (declaredEmptyDefault.isPresent()) { + Expression emptyDefault = declaredEmptyDefault.get(); + if (emptyBehavior != DEFAULT) { + throw semanticException(INVALID_FUNCTION_ARGUMENT, emptyDefault, "Default value specified for %s ON EMPTY behavior", emptyBehavior); } Type type = process(emptyDefault, context); // this would normally be done after function resolution, but we know that the default expression is always coerced to the returnedType coerceType(emptyDefault, type, returnedType, "Function JSON_VALUE default ON EMPTY result"); } - if (node.getErrorDefault().isPresent()) { - Expression errorDefault = node.getErrorDefault().get(); - if (node.getErrorBehavior() != DEFAULT) { - throw semanticException(INVALID_FUNCTION_ARGUMENT, errorDefault, "Default value specified for %s ON ERROR behavior", node.getErrorBehavior()); + if (declaredErrorDefault.isPresent()) { + Expression errorDefault = declaredErrorDefault.get(); + if (errorBehavior.isEmpty()) { + throw new IllegalStateException("error default specified without error behavior specified"); + } + if (errorBehavior.orElseThrow() != DEFAULT) { + throw semanticException(INVALID_FUNCTION_ARGUMENT, errorDefault, "Default value specified for %s ON ERROR behavior", errorBehavior.orElseThrow()); } Type type = process(errorDefault, context); // this would normally be done after function resolution, but we know that the default expression is always coerced to the returnedType @@ -2606,21 +2671,32 @@ public Type visitJsonValue(JsonValue node, StackableAstVisitorContext c throw new TrinoException(e::getErrorCode, extractLocation(node), e.getMessage(), e); } resolvedFunctions.put(NodeRef.of(node), function); - Type type = function.getSignature().getReturnType(); - return setExpressionType(node, type); + return function.getSignature().getReturnType(); } @Override public Type visitJsonQuery(JsonQuery node, StackableAstVisitorContext context) { List pathInvocationArgumentTypes = analyzeJsonPathInvocation("JSON_QUERY", node, node.getJsonPathInvocation(), context); + Type returnedType = analyzeJsonQueryExpression( + node, + node.getWrapperBehavior(), + node.getQuotesBehavior(), + pathInvocationArgumentTypes, + node.getReturnedType(), + node.getOutputFormat()); + return setExpressionType(node, returnedType); + } - // validate wrapper and quotes behavior - if ((node.getWrapperBehavior() == CONDITIONAL || node.getWrapperBehavior() == UNCONDITIONAL) && node.getQuotesBehavior().isPresent()) { - throw semanticException(INVALID_FUNCTION_ARGUMENT, node, "%s QUOTES behavior specified with WITH %s ARRAY WRAPPER behavior", node.getQuotesBehavior().get(), node.getWrapperBehavior()); - } - + private Type analyzeJsonQueryExpression( + Node node, + JsonQuery.ArrayWrapperBehavior wrapperBehavior, + Optional quotesBehavior, + List pathInvocationArgumentTypes, + Optional declaredReturnedType, + Optional declaredOutputFormat) + { // wrapper behavior, empty behavior and error behavior will be passed as arguments to function // quotes behavior is handled by the corresponding output function List argumentTypes = ImmutableList.builder() @@ -2630,6 +2706,11 @@ public Type visitJsonQuery(JsonQuery node, StackableAstVisitorContext c .add(TINYINT) // error behavior: enum encoded as integer value .build(); + // validate wrapper and quotes behavior + if ((wrapperBehavior == CONDITIONAL || wrapperBehavior == UNCONDITIONAL) && quotesBehavior.isPresent()) { + throw semanticException(INVALID_FUNCTION_ARGUMENT, node, "%s QUOTES behavior specified with WITH %s ARRAY WRAPPER behavior", quotesBehavior.get(), wrapperBehavior); + } + // resolve function ResolvedFunction function; try { @@ -2645,15 +2726,15 @@ public Type visitJsonQuery(JsonQuery node, StackableAstVisitorContext c // analyze returned type and format Type returnedType = VARCHAR; // default - if (node.getReturnedType().isPresent()) { + if (declaredReturnedType.isPresent()) { try { - returnedType = plannerContext.getTypeManager().getType(toTypeSignature(node.getReturnedType().get())); + returnedType = plannerContext.getTypeManager().getType(toTypeSignature(declaredReturnedType.get())); } catch (TypeNotFoundException e) { - throw semanticException(TYPE_MISMATCH, node, "Unknown type: %s", node.getReturnedType().get()); + throw semanticException(TYPE_MISMATCH, node, "Unknown type: %s", declaredReturnedType.get()); } } - JsonFormat outputFormat = node.getOutputFormat().orElse(JsonFormat.JSON); // default + JsonFormat outputFormat = declaredOutputFormat.orElse(JsonFormat.JSON); // default // resolve function to format output ResolvedFunction outputFunction = getOutputFunction(returnedType, outputFormat, node); @@ -2670,13 +2751,15 @@ public Type visitJsonQuery(JsonQuery node, StackableAstVisitorContext c } } - return setExpressionType(node, returnedType); + return returnedType; } - private List analyzeJsonPathInvocation(String functionName, Expression node, JsonPathInvocation jsonPathInvocation, StackableAstVisitorContext context) + private List analyzeJsonPathInvocation(String functionName, Node node, JsonPathInvocation jsonPathInvocation, StackableAstVisitorContext context) { jsonPathInvocation.getPathName().ifPresent(pathName -> { - throw semanticException(INVALID_PATH, pathName, "JSON path name is not allowed in %s function", functionName); + if (!(node instanceof JsonTable)) { + throw semanticException(INVALID_PATH, pathName, "JSON path name is not allowed in %s function", functionName); + } }); // ANALYZE THE CONTEXT ITEM @@ -3444,6 +3527,79 @@ public static ExpressionAnalysis analyzeExpression( analyzer.getWindowFunctions()); } + public static ParametersTypeAndAnalysis analyzeJsonPathInvocation( + JsonTable node, + Session session, + PlannerContext plannerContext, + StatementAnalyzerFactory statementAnalyzerFactory, + AccessControl accessControl, + Scope scope, + Analysis analysis, + WarningCollector warningCollector, + CorrelationSupport correlationSupport) + { + ExpressionAnalyzer analyzer = new ExpressionAnalyzer(plannerContext, accessControl, statementAnalyzerFactory, analysis, session, TypeProvider.empty(), warningCollector); + RowType parametersRowType = analyzer.analyzeJsonPathInvocation(node, scope, correlationSupport); + updateAnalysis(analysis, analyzer, session, accessControl); + return new ParametersTypeAndAnalysis( + parametersRowType, + new ExpressionAnalysis( + analyzer.getExpressionTypes(), + analyzer.getExpressionCoercions(), + analyzer.getSubqueryInPredicates(), + analyzer.getSubqueries(), + analyzer.getExistsSubqueries(), + analyzer.getColumnReferences(), + analyzer.getTypeOnlyCoercions(), + analyzer.getQuantifiedComparisons(), + analyzer.getWindowFunctions())); + } + + public record ParametersTypeAndAnalysis(RowType parametersType, ExpressionAnalysis expressionAnalysis) {} + + public static TypeAndAnalysis analyzeJsonValueExpression( + ValueColumn column, + JsonPathAnalysis pathAnalysis, + Session session, + PlannerContext plannerContext, + StatementAnalyzerFactory statementAnalyzerFactory, + AccessControl accessControl, + Scope scope, + Analysis analysis, + WarningCollector warningCollector, + CorrelationSupport correlationSupport) + { + ExpressionAnalyzer analyzer = new ExpressionAnalyzer(plannerContext, accessControl, statementAnalyzerFactory, analysis, session, TypeProvider.empty(), warningCollector); + Type type = analyzer.analyzeJsonValueExpression(column, pathAnalysis, scope, correlationSupport); + updateAnalysis(analysis, analyzer, session, accessControl); + return new TypeAndAnalysis(type, new ExpressionAnalysis( + analyzer.getExpressionTypes(), + analyzer.getExpressionCoercions(), + analyzer.getSubqueryInPredicates(), + analyzer.getSubqueries(), + analyzer.getExistsSubqueries(), + analyzer.getColumnReferences(), + analyzer.getTypeOnlyCoercions(), + analyzer.getQuantifiedComparisons(), + analyzer.getWindowFunctions())); + } + + public static Type analyzeJsonQueryExpression( + QueryColumn column, + Session session, + PlannerContext plannerContext, + StatementAnalyzerFactory statementAnalyzerFactory, + AccessControl accessControl, + Scope scope, + Analysis analysis, + WarningCollector warningCollector) + { + ExpressionAnalyzer analyzer = new ExpressionAnalyzer(plannerContext, accessControl, statementAnalyzerFactory, analysis, session, TypeProvider.empty(), warningCollector); + Type type = analyzer.analyzeJsonQueryExpression(column, scope); + updateAnalysis(analysis, analyzer, session, accessControl); + return type; + } + public static void analyzeExpressionWithoutSubqueries( Session session, PlannerContext plannerContext, @@ -3715,4 +3871,6 @@ public Optional getLabel() return label; } } + + public record TypeAndAnalysis(Type type, ExpressionAnalysis analysis) {} } diff --git a/core/trino-main/src/main/java/io/trino/sql/analyzer/JsonPathAnalyzer.java b/core/trino-main/src/main/java/io/trino/sql/analyzer/JsonPathAnalyzer.java index 093d9f016cbef..ef949ccd2527d 100644 --- a/core/trino-main/src/main/java/io/trino/sql/analyzer/JsonPathAnalyzer.java +++ b/core/trino-main/src/main/java/io/trino/sql/analyzer/JsonPathAnalyzer.java @@ -59,6 +59,7 @@ import io.trino.sql.jsonpath.tree.StartsWithPredicate; import io.trino.sql.jsonpath.tree.TypeMethod; import io.trino.sql.tree.Node; +import io.trino.sql.tree.NodeLocation; import io.trino.sql.tree.StringLiteral; import java.util.LinkedHashMap; @@ -108,11 +109,18 @@ public JsonPathAnalysis analyzeJsonPath(StringLiteral path, Map pa Location pathStart = extractLocation(path) .map(location -> new Location(location.getLineNumber(), location.getColumnNumber())) .orElseThrow(() -> new IllegalStateException("missing NodeLocation in path")); - PathNode root = new PathParser(pathStart).parseJsonPath(path.getValue()); + PathNode root = PathParser.withRelativeErrorLocation(pathStart).parseJsonPath(path.getValue()); new Visitor(parameterTypes, path).process(root); return new JsonPathAnalysis((JsonPath) root, types, jsonParameters); } + public JsonPathAnalysis analyzeImplicitJsonPath(String path, NodeLocation location) + { + PathNode root = PathParser.withFixedErrorLocation(new Location(location.getLineNumber(), location.getColumnNumber())).parseJsonPath(path); + new Visitor(ImmutableMap.of(), new StringLiteral(path)).process(root); + return new JsonPathAnalysis((JsonPath) root, types, jsonParameters); + } + /** * This visitor determines and validates output types of PathNodes, whenever they can be deduced and represented as SQL types. * In some cases, the type of a PathNode can be determined without context. E.g., the `double()` method always returns DOUBLE. diff --git a/core/trino-main/src/main/java/io/trino/sql/analyzer/StatementAnalyzer.java b/core/trino-main/src/main/java/io/trino/sql/analyzer/StatementAnalyzer.java index 3641b35579ee2..4a935a59777c2 100644 --- a/core/trino-main/src/main/java/io/trino/sql/analyzer/StatementAnalyzer.java +++ b/core/trino-main/src/main/java/io/trino/sql/analyzer/StatementAnalyzer.java @@ -23,11 +23,13 @@ import com.google.common.collect.Iterables; import com.google.common.collect.ListMultimap; import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; import com.google.common.collect.Streams; import com.google.common.math.IntMath; import io.airlift.slice.Slice; import io.trino.Session; import io.trino.SystemSessionProperties; +import io.trino.connector.system.GlobalSystemConnector; import io.trino.execution.Column; import io.trino.execution.warnings.WarningCollector; import io.trino.metadata.AnalyzePropertyManager; @@ -104,6 +106,7 @@ import io.trino.sql.InterpretedFunctionInvoker; import io.trino.sql.PlannerContext; import io.trino.sql.analyzer.Analysis.GroupingSetAnalysis; +import io.trino.sql.analyzer.Analysis.JsonTableAnalysis; import io.trino.sql.analyzer.Analysis.MergeAnalysis; import io.trino.sql.analyzer.Analysis.ResolvedWindow; import io.trino.sql.analyzer.Analysis.SelectExpression; @@ -111,6 +114,9 @@ import io.trino.sql.analyzer.Analysis.TableArgumentAnalysis; import io.trino.sql.analyzer.Analysis.TableFunctionInvocationAnalysis; import io.trino.sql.analyzer.Analysis.UnnestAnalysis; +import io.trino.sql.analyzer.ExpressionAnalyzer.ParametersTypeAndAnalysis; +import io.trino.sql.analyzer.ExpressionAnalyzer.TypeAndAnalysis; +import io.trino.sql.analyzer.JsonPathAnalyzer.JsonPathAnalysis; import io.trino.sql.analyzer.PatternRecognitionAnalyzer.PatternRecognitionAnalysis; import io.trino.sql.analyzer.Scope.AsteriskedIdentifierChainBasis; import io.trino.sql.parser.ParsingException; @@ -173,7 +179,11 @@ import io.trino.sql.tree.JoinCriteria; import io.trino.sql.tree.JoinOn; import io.trino.sql.tree.JoinUsing; +import io.trino.sql.tree.JsonPathInvocation; +import io.trino.sql.tree.JsonPathParameter; import io.trino.sql.tree.JsonTable; +import io.trino.sql.tree.JsonTableColumnDefinition; +import io.trino.sql.tree.JsonTableSpecificPlan; import io.trino.sql.tree.Lateral; import io.trino.sql.tree.Limit; import io.trino.sql.tree.LongLiteral; @@ -184,16 +194,23 @@ import io.trino.sql.tree.MergeInsert; import io.trino.sql.tree.MergeUpdate; import io.trino.sql.tree.NaturalJoin; +import io.trino.sql.tree.NestedColumns; import io.trino.sql.tree.Node; +import io.trino.sql.tree.NodeLocation; import io.trino.sql.tree.NodeRef; import io.trino.sql.tree.Offset; import io.trino.sql.tree.OrderBy; +import io.trino.sql.tree.OrdinalityColumn; import io.trino.sql.tree.Parameter; import io.trino.sql.tree.PatternRecognitionRelation; +import io.trino.sql.tree.PlanLeaf; +import io.trino.sql.tree.PlanParentChild; +import io.trino.sql.tree.PlanSiblings; import io.trino.sql.tree.Prepare; import io.trino.sql.tree.Property; import io.trino.sql.tree.QualifiedName; import io.trino.sql.tree.Query; +import io.trino.sql.tree.QueryColumn; import io.trino.sql.tree.QueryPeriod; import io.trino.sql.tree.QuerySpecification; import io.trino.sql.tree.RefreshMaterializedView; @@ -227,6 +244,7 @@ import io.trino.sql.tree.SortItem; import io.trino.sql.tree.StartTransaction; import io.trino.sql.tree.Statement; +import io.trino.sql.tree.StringLiteral; import io.trino.sql.tree.SubqueryExpression; import io.trino.sql.tree.SubscriptExpression; import io.trino.sql.tree.Table; @@ -242,6 +260,7 @@ import io.trino.sql.tree.Update; import io.trino.sql.tree.UpdateAssignment; import io.trino.sql.tree.Use; +import io.trino.sql.tree.ValueColumn; import io.trino.sql.tree.Values; import io.trino.sql.tree.VariableDefinition; import io.trino.sql.tree.Window; @@ -294,6 +313,7 @@ import static io.trino.spi.StandardErrorCode.COLUMN_NOT_FOUND; import static io.trino.spi.StandardErrorCode.COLUMN_TYPE_UNKNOWN; import static io.trino.spi.StandardErrorCode.DUPLICATE_COLUMN_NAME; +import static io.trino.spi.StandardErrorCode.DUPLICATE_COLUMN_OR_PATH_NAME; import static io.trino.spi.StandardErrorCode.DUPLICATE_NAMED_QUERY; import static io.trino.spi.StandardErrorCode.DUPLICATE_PROPERTY; import static io.trino.spi.StandardErrorCode.DUPLICATE_RANGE_VARIABLE; @@ -312,6 +332,7 @@ import static io.trino.spi.StandardErrorCode.INVALID_LIMIT_CLAUSE; import static io.trino.spi.StandardErrorCode.INVALID_ORDER_BY; import static io.trino.spi.StandardErrorCode.INVALID_PARTITION_BY; +import static io.trino.spi.StandardErrorCode.INVALID_PLAN; import static io.trino.spi.StandardErrorCode.INVALID_RECURSIVE_REFERENCE; import static io.trino.spi.StandardErrorCode.INVALID_ROW_FILTER; import static io.trino.spi.StandardErrorCode.INVALID_TABLE_FUNCTION_INVOCATION; @@ -324,6 +345,7 @@ import static io.trino.spi.StandardErrorCode.MISSING_COLUMN_NAME; import static io.trino.spi.StandardErrorCode.MISSING_GROUP_BY; import static io.trino.spi.StandardErrorCode.MISSING_ORDER_BY; +import static io.trino.spi.StandardErrorCode.MISSING_PATH_NAME; import static io.trino.spi.StandardErrorCode.MISSING_RETURN_TYPE; import static io.trino.spi.StandardErrorCode.NESTED_RECURSIVE; import static io.trino.spi.StandardErrorCode.NESTED_ROW_PATTERN_RECOGNITION; @@ -363,6 +385,8 @@ import static io.trino.sql.analyzer.AggregationAnalyzer.verifySourceAggregations; import static io.trino.sql.analyzer.Analyzer.verifyNoAggregateWindowOrGroupingFunctions; import static io.trino.sql.analyzer.CanonicalizationAware.canonicalizationAwareKey; +import static io.trino.sql.analyzer.ExpressionAnalyzer.analyzeJsonQueryExpression; +import static io.trino.sql.analyzer.ExpressionAnalyzer.analyzeJsonValueExpression; import static io.trino.sql.analyzer.ExpressionAnalyzer.createConstantAnalyzer; import static io.trino.sql.analyzer.ExpressionTreeUtils.asQualifiedName; import static io.trino.sql.analyzer.ExpressionTreeUtils.extractAggregateFunctions; @@ -3250,6 +3274,17 @@ protected Scope visitJoin(Join node, Optional scope) } } } + else if (isJsonTable(node.getRight())) { + if (criteria != null) { + if (!(criteria instanceof JoinOn) || !((JoinOn) criteria).getExpression().equals(TRUE_LITERAL)) { + throw semanticException( + NOT_SUPPORTED, + criteria instanceof JoinOn ? ((JoinOn) criteria).getExpression() : node, + "%s JOIN involving JSON_TABLE is only supported with condition ON TRUE", + node.getType().name()); + } + } + } else if (node.getType() == FULL) { if (!(criteria instanceof JoinOn) || !((JoinOn) criteria).getExpression().equals(TRUE_LITERAL)) { throw semanticException( @@ -3776,7 +3811,7 @@ private boolean isLateralRelation(Relation node) if (node instanceof AliasedRelation) { return isLateralRelation(((AliasedRelation) node).getRelation()); } - return node instanceof Unnest || node instanceof Lateral; + return node instanceof Unnest || node instanceof Lateral || node instanceof JsonTable; } private boolean isUnnestRelation(Relation node) @@ -3787,6 +3822,14 @@ private boolean isUnnestRelation(Relation node) return node instanceof Unnest; } + private boolean isJsonTable(Relation node) + { + if (node instanceof AliasedRelation) { + return isJsonTable(((AliasedRelation) node).getRelation()); + } + return node instanceof JsonTable; + } + @Override protected Scope visitValues(Values node, Optional scope) { @@ -3862,9 +3905,254 @@ else if (actualType instanceof RowType) { } @Override - protected Scope visitJsonTable(JsonTable node, Optional context) + protected Scope visitJsonTable(JsonTable node, Optional scope) + { + Scope enclosingScope = createScope(scope); + + // analyze the context item, the root JSON path, and the path parameters + RowType parametersType = analyzeJsonPathInvocation(node, enclosingScope); + + // json_table is implemented as a table function provided by the global catalog. + CatalogHandle catalogHandle = getRequiredCatalogHandle(metadata, session, node, GlobalSystemConnector.NAME); + ConnectorTransactionHandle transactionHandle = transactionManager.getConnectorTransaction(session.getRequiredTransactionId(), catalogHandle); + + // all column and path names must be unique + Set uniqueNames = new HashSet<>(); + JsonPathInvocation rootPath = node.getJsonPathInvocation(); + rootPath.getPathName().ifPresent(name -> uniqueNames.add(name.getCanonicalValue())); + + ImmutableList.Builder outputFields = ImmutableList.builder(); + ImmutableList.Builder> orderedOutputColumns = ImmutableList.builder(); + analyzeJsonTableColumns(node.getColumns(), uniqueNames, outputFields, orderedOutputColumns, enclosingScope, node); + + analysis.addJsonTableAnalysis(node, new JsonTableAnalysis(catalogHandle, transactionHandle, parametersType, orderedOutputColumns.build())); + + node.getPlan().ifPresent(plan -> { + if (plan instanceof JsonTableSpecificPlan specificPlan) { + validateJsonTableSpecificPlan(rootPath, specificPlan, node.getColumns()); + } + else { + // if PLAN DEFAULT is specified, all nested paths should be named + checkAllNestedPathsNamed(node.getColumns()); + } + }); + + return createAndAssignScope(node, scope, outputFields.build()); + } + + private RowType analyzeJsonPathInvocation(JsonTable node, Scope scope) + { + verifyNoAggregateWindowOrGroupingFunctions(session, functionResolver, accessControl, node.getJsonPathInvocation().getInputExpression(), "JSON_TABLE input expression"); + node.getJsonPathInvocation().getPathParameters().stream() + .map(JsonPathParameter::getParameter) + .forEach(parameter -> verifyNoAggregateWindowOrGroupingFunctions(session, functionResolver, accessControl, parameter, "JSON_TABLE path parameter")); + + ParametersTypeAndAnalysis parametersTypeAndAnalysis = ExpressionAnalyzer.analyzeJsonPathInvocation( + node, + session, + plannerContext, + statementAnalyzerFactory, + accessControl, + scope, + analysis, + WarningCollector.NOOP, + correlationSupport); + // context item and passed path parameters can contain subqueries - the subqueries are recorded under the enclosing JsonTable node + analysis.recordSubqueries(node, parametersTypeAndAnalysis.expressionAnalysis()); + return parametersTypeAndAnalysis.parametersType(); + } + + private void analyzeJsonTableColumns( + List columns, + Set uniqueNames, + ImmutableList.Builder outputFields, + ImmutableList.Builder> orderedOutputColumns, + Scope enclosingScope, + JsonTable jsonTable) + { + for (JsonTableColumnDefinition column : columns) { + if (column instanceof OrdinalityColumn ordinalityColumn) { + String name = ordinalityColumn.getName().getCanonicalValue(); + if (!uniqueNames.add(name)) { + throw semanticException(DUPLICATE_COLUMN_OR_PATH_NAME, ordinalityColumn.getName(), "All column and path names in JSON_TABLE invocation must be unique"); + } + outputFields.add(Field.newUnqualified(name, BIGINT)); + orderedOutputColumns.add(NodeRef.of(ordinalityColumn)); + } + else if (column instanceof ValueColumn valueColumn) { + String name = valueColumn.getName().getCanonicalValue(); + if (!uniqueNames.add(name)) { + throw semanticException(DUPLICATE_COLUMN_OR_PATH_NAME, valueColumn.getName(), "All column and path names in JSON_TABLE invocation must be unique"); + } + valueColumn.getEmptyDefault().ifPresent(expression -> verifyNoAggregateWindowOrGroupingFunctions(session, functionResolver, accessControl, expression, "default expression for JSON_TABLE column")); + valueColumn.getErrorDefault().ifPresent(expression -> verifyNoAggregateWindowOrGroupingFunctions(session, functionResolver, accessControl, expression, "default expression for JSON_TABLE column")); + JsonPathAnalysis pathAnalysis = valueColumn.getJsonPath() + .map(this::analyzeJsonPath) + .orElseGet(() -> analyzeImplicitJsonPath(getImplicitJsonPath(name), valueColumn.getLocation())); + analysis.setJsonPathAnalysis(valueColumn, pathAnalysis); + TypeAndAnalysis typeAndAnalysis = analyzeJsonValueExpression( + valueColumn, + pathAnalysis, + session, + plannerContext, + statementAnalyzerFactory, + accessControl, + enclosingScope, + analysis, + warningCollector, + correlationSupport); + // default values can contain subqueries - the subqueries are recorded under the enclosing JsonTable node + analysis.recordSubqueries(jsonTable, typeAndAnalysis.analysis()); + outputFields.add(Field.newUnqualified(name, typeAndAnalysis.type())); + orderedOutputColumns.add(NodeRef.of(valueColumn)); + } + else if (column instanceof QueryColumn queryColumn) { + String name = queryColumn.getName().getCanonicalValue(); + if (!uniqueNames.add(name)) { + throw semanticException(DUPLICATE_COLUMN_OR_PATH_NAME, queryColumn.getName(), "All column and path names in JSON_TABLE invocation must be unique"); + } + JsonPathAnalysis pathAnalysis = queryColumn.getJsonPath() + .map(this::analyzeJsonPath) + .orElseGet(() -> analyzeImplicitJsonPath(getImplicitJsonPath(name), queryColumn.getLocation())); + analysis.setJsonPathAnalysis(queryColumn, pathAnalysis); + Type type = analyzeJsonQueryExpression(queryColumn, session, plannerContext, statementAnalyzerFactory, accessControl, enclosingScope, analysis, warningCollector); + outputFields.add(Field.newUnqualified(name, type)); + orderedOutputColumns.add(NodeRef.of(queryColumn)); + } + else if (column instanceof NestedColumns nestedColumns) { + nestedColumns.getPathName().ifPresent(name -> { + if (!uniqueNames.add(name.getCanonicalValue())) { + throw semanticException(DUPLICATE_COLUMN_OR_PATH_NAME, name, "All column and path names in JSON_TABLE invocation must be unique"); + } + }); + JsonPathAnalysis pathAnalysis = analyzeJsonPath(nestedColumns.getJsonPath()); + analysis.setJsonPathAnalysis(nestedColumns, pathAnalysis); + analyzeJsonTableColumns(nestedColumns.getColumns(), uniqueNames, outputFields, orderedOutputColumns, enclosingScope, jsonTable); + } + else { + throw new IllegalArgumentException("unexpected type of JSON_TABLE column: " + column.getClass().getSimpleName()); + } + } + } + + private static String getImplicitJsonPath(String name) + { + // TODO the spec misses the path mode. I put 'lax', but it should be confirmed, as the path mode is meaningful for the semantics of the implicit path. + return "lax $.\"" + name.replace("\"", "\"\"") + '"'; + } + + private JsonPathAnalysis analyzeJsonPath(StringLiteral path) + { + return new JsonPathAnalyzer( + plannerContext.getMetadata(), + session, + createConstantAnalyzer(plannerContext, accessControl, session, analysis.getParameters(), WarningCollector.NOOP, analysis.isDescribe())) + .analyzeJsonPath(path, ImmutableMap.of()); + } + + private JsonPathAnalysis analyzeImplicitJsonPath(String path, Optional columnLocation) + { + return new JsonPathAnalyzer( + plannerContext.getMetadata(), + session, + createConstantAnalyzer(plannerContext, accessControl, session, analysis.getParameters(), WarningCollector.NOOP, analysis.isDescribe())) + .analyzeImplicitJsonPath(path, columnLocation.orElseThrow(() -> new IllegalStateException("missing NodeLocation for JSON_TABLE column"))); + } + + private void validateJsonTableSpecificPlan(JsonPathInvocation rootPath, JsonTableSpecificPlan rootPlan, List rootColumns) + { + String rootPathName = rootPath.getPathName() + .orElseThrow(() -> semanticException(MISSING_PATH_NAME, rootPath, "All JSON paths must be named when specific plan is given")) + .getCanonicalValue(); + String rootPlanName; + if (rootPlan instanceof PlanLeaf planLeaf) { + rootPlanName = planLeaf.getName().getCanonicalValue(); + } + else if (rootPlan instanceof PlanParentChild planParentChild) { + rootPlanName = planParentChild.getParent().getName().getCanonicalValue(); + } + else { + throw semanticException(INVALID_PLAN, rootPlan, "JSON_TABLE plan must either be a single path name or it must be rooted in parent-child relationship (OUTER or INNER)"); + } + validateJsonTablePlan(ImmutableMap.of(rootPathName, rootColumns), ImmutableMap.of(rootPlanName, rootPlan), rootPlan); + } + + private void validateJsonTablePlan(Map> actualNodes, Map planNodes, JsonTableSpecificPlan rootPlan) + { + Set unhandledActualNodes = Sets.difference(actualNodes.keySet(), planNodes.keySet()); + if (!unhandledActualNodes.isEmpty()) { + throw semanticException(INVALID_PLAN, rootPlan, "JSON_TABLE plan should contain all JSON paths available at each level of nesting. Paths not included: %s", String.join(", ", unhandledActualNodes)); + } + Set irrelevantPlanChildren = Sets.difference(planNodes.keySet(), actualNodes.keySet()); + if (!irrelevantPlanChildren.isEmpty()) { + throw semanticException(INVALID_PLAN, rootPlan, "JSON_TABLE plan includes unavailable JSON path names: %s", String.join(", ", irrelevantPlanChildren)); + } + + // recurse into child nodes + actualNodes.forEach((name, columns) -> { + JsonTableSpecificPlan plan = planNodes.get(name); + + Map> actualChildren = columns.stream() + .filter(NestedColumns.class::isInstance) + .map(NestedColumns.class::cast) + .collect(toImmutableMap( + child -> child.getPathName() + .orElseThrow(() -> semanticException(MISSING_PATH_NAME, child.getJsonPath(), "All JSON paths must be named when specific plan is given")) + .getCanonicalValue(), + NestedColumns::getColumns)); + + Map planChildren; + if (plan instanceof PlanLeaf) { + planChildren = ImmutableMap.of(); + } + else if (plan instanceof PlanParentChild planParentChild) { + planChildren = new HashMap<>(); + getPlanSiblings(planParentChild.getChild(), planChildren); + } + else { + throw new IllegalStateException("unexpected JSON_TABLE plan node: " + plan.getClass().getSimpleName()); + } + + validateJsonTablePlan(actualChildren, planChildren, rootPlan); + }); + } + + private void getPlanSiblings(JsonTableSpecificPlan plan, Map plansByName) + { + if (plan instanceof PlanLeaf planLeaf) { + if (plansByName.put(planLeaf.getName().getCanonicalValue(), planLeaf) != null) { + throw semanticException(INVALID_PLAN, planLeaf, "Duplicate reference to JSON path name in sibling plan: %s", planLeaf.getName().getCanonicalValue()); + } + } + else if (plan instanceof PlanParentChild planParentChild) { + if (plansByName.put(planParentChild.getParent().getName().getCanonicalValue(), planParentChild) != null) { + throw semanticException(INVALID_PLAN, planParentChild.getParent(), "Duplicate reference to JSON path name in sibling plan: %s", planParentChild.getParent().getName().getCanonicalValue()); + } + } + else if (plan instanceof PlanSiblings planSiblings) { + for (JsonTableSpecificPlan sibling : planSiblings.getSiblings()) { + getPlanSiblings(sibling, plansByName); + } + } + } + + // Per SQL standard ISO/IEC STANDARD 9075-2, p. 453, g), i), and p. 821, 2), b), when PLAN DEFAULT is specified, all nested paths must be named, but the root path does not have to be named. + private void checkAllNestedPathsNamed(List columns) { - throw semanticException(NOT_SUPPORTED, node, "JSON_TABLE is not yet supported"); + List nestedColumns = columns.stream() + .filter(NestedColumns.class::isInstance) + .map(NestedColumns.class::cast) + .collect(toImmutableList()); + + nestedColumns.stream() + .forEach(definition -> { + if (definition.getPathName().isEmpty()) { + throw semanticException(MISSING_PATH_NAME, definition.getJsonPath(), "All nested JSON paths must be named when default plan is given"); + } + }); + + nestedColumns.stream() + .forEach(definition -> checkAllNestedPathsNamed(definition.getColumns())); } private void analyzeWindowDefinitions(QuerySpecification node, Scope scope) diff --git a/core/trino-main/src/main/java/io/trino/sql/gen/DereferenceCodeGenerator.java b/core/trino-main/src/main/java/io/trino/sql/gen/DereferenceCodeGenerator.java index 6f57363567913..8770e094926d1 100644 --- a/core/trino-main/src/main/java/io/trino/sql/gen/DereferenceCodeGenerator.java +++ b/core/trino-main/src/main/java/io/trino/sql/gen/DereferenceCodeGenerator.java @@ -29,6 +29,7 @@ import static com.google.common.base.Preconditions.checkArgument; import static io.airlift.bytecode.expression.BytecodeExpressions.constantInt; import static io.trino.sql.gen.SqlTypeBytecodeExpression.constantType; +import static java.lang.Math.toIntExact; import static java.util.Objects.requireNonNull; public class DereferenceCodeGenerator @@ -44,7 +45,7 @@ public DereferenceCodeGenerator(SpecialForm specialForm) returnType = specialForm.getType(); checkArgument(specialForm.getArguments().size() == 2); base = specialForm.getArguments().get(0); - index = (int) ((ConstantExpression) specialForm.getArguments().get(1)).getValue(); + index = toIntExact((long) ((ConstantExpression) specialForm.getArguments().get(1)).getValue()); } @Override diff --git a/core/trino-main/src/main/java/io/trino/sql/planner/ConnectorExpressionTranslator.java b/core/trino-main/src/main/java/io/trino/sql/planner/ConnectorExpressionTranslator.java index e9d28509754cf..5123303360b6b 100644 --- a/core/trino-main/src/main/java/io/trino/sql/planner/ConnectorExpressionTranslator.java +++ b/core/trino-main/src/main/java/io/trino/sql/planner/ConnectorExpressionTranslator.java @@ -21,6 +21,7 @@ import io.trino.Session; import io.trino.connector.system.GlobalSystemConnector; import io.trino.metadata.ResolvedFunction; +import io.trino.operator.scalar.JsonPath; import io.trino.plugin.base.expression.ConnectorExpressions; import io.trino.security.AllowAllAccessControl; import io.trino.spi.connector.CatalogSchemaName; @@ -36,7 +37,6 @@ import io.trino.spi.type.RowType; import io.trino.spi.type.Type; import io.trino.spi.type.VarcharType; -import io.trino.sql.DynamicFilters; import io.trino.sql.PlannerContext; import io.trino.sql.tree.ArithmeticBinaryExpression; import io.trino.sql.tree.ArithmeticUnaryExpression; @@ -63,6 +63,7 @@ import io.trino.sql.tree.SubscriptExpression; import io.trino.sql.tree.SymbolReference; import io.trino.type.JoniRegexp; +import io.trino.type.JsonPathType; import io.trino.type.LikePattern; import io.trino.type.Re2JRegexp; import io.trino.type.Re2JRegexpType; @@ -104,6 +105,7 @@ import static io.trino.spi.expression.StandardFunctions.SUBTRACT_FUNCTION_NAME; import static io.trino.spi.type.BooleanType.BOOLEAN; import static io.trino.spi.type.VarcharType.createVarcharType; +import static io.trino.sql.DynamicFilters.isDynamicFilterFunction; import static io.trino.sql.ExpressionUtils.combineConjuncts; import static io.trino.sql.ExpressionUtils.extractConjuncts; import static io.trino.sql.ExpressionUtils.isEffectivelyLiteral; @@ -333,7 +335,8 @@ private Optional translateCall(String functionName, ResolvedFunction return Optional.empty(); } Expression expression = translated.get(); - if ((formalType == JONI_REGEXP || formalType instanceof Re2JRegexpType) && argumentType instanceof VarcharType) { + if ((formalType == JONI_REGEXP || formalType instanceof Re2JRegexpType || formalType instanceof JsonPathType) + && argumentType instanceof VarcharType) { // These types are not used in connector expressions, so require special handling when translating back to expressions. expression = new Cast(expression, toSqlType(formalType)); } @@ -682,7 +685,7 @@ protected Optional visitFunctionCall(FunctionCall node, Voi } CatalogSchemaFunctionName functionName = ResolvedFunction.extractFunctionName(node.getName()); - checkArgument(!builtinFunctionName(DynamicFilters.Function.NAME).equals(functionName), "Dynamic filter has no meaning for a connector, it should not be translated into ConnectorExpression"); + checkArgument(!isDynamicFilterFunction(functionName), "Dynamic filter has no meaning for a connector, it should not be translated into ConnectorExpression"); // literals should be handled by isEffectivelyLiteral case above checkArgument(!builtinFunctionName(LITERAL_FUNCTION_NAME).equals(functionName), "Unexpected literal function"); @@ -814,6 +817,10 @@ private ConnectorExpression constantFor(Expression node) Slice pattern = Slices.utf8Slice(((Re2JRegexp) value).pattern()); return new Constant(pattern, createVarcharType(countCodePoints(pattern))); } + if (type instanceof JsonPathType) { + Slice pattern = Slices.utf8Slice(((JsonPath) value).pattern()); + return new Constant(pattern, createVarcharType(countCodePoints(pattern))); + } return new Constant(value, type); } diff --git a/core/trino-main/src/main/java/io/trino/sql/planner/ConnectorExpressions.java b/core/trino-main/src/main/java/io/trino/sql/planner/ConnectorExpressions.java deleted file mode 100644 index c7ecbec765281..0000000000000 --- a/core/trino-main/src/main/java/io/trino/sql/planner/ConnectorExpressions.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.sql.planner; - -import com.google.common.graph.SuccessorsFunction; -import com.google.common.graph.Traverser; -import io.trino.spi.expression.ConnectorExpression; -import io.trino.spi.expression.Variable; - -import java.util.List; -import java.util.stream.Stream; - -import static com.google.common.collect.ImmutableList.toImmutableList; -import static com.google.common.collect.Streams.stream; -import static java.util.Objects.requireNonNull; - -public final class ConnectorExpressions -{ - private ConnectorExpressions() {} - - public static List extractVariables(ConnectorExpression expression) - { - return preOrder(expression) - .filter(Variable.class::isInstance) - .map(Variable.class::cast) - .collect(toImmutableList()); - } - - public static Stream preOrder(ConnectorExpression expression) - { - return stream( - Traverser.forTree((SuccessorsFunction) ConnectorExpression::getChildren) - .depthFirstPreOrder(requireNonNull(expression, "expression is null"))); - } -} diff --git a/core/trino-main/src/main/java/io/trino/sql/planner/EqualityInference.java b/core/trino-main/src/main/java/io/trino/sql/planner/EqualityInference.java index a9cfc0289d35e..ecbdcf0bd599c 100644 --- a/core/trino-main/src/main/java/io/trino/sql/planner/EqualityInference.java +++ b/core/trino-main/src/main/java/io/trino/sql/planner/EqualityInference.java @@ -34,11 +34,10 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Map; -import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.function.Predicate; import java.util.function.ToIntFunction; -import java.util.stream.Collectors; import java.util.stream.Stream; import static com.google.common.collect.ImmutableList.toImmutableList; @@ -215,17 +214,34 @@ public EqualityPartition generateEqualitiesPartitionedBy(Set scope) .forEach(scopeComplementEqualities::add); } - // Compile the scope straddling equality expressions - List connectingExpressions = new ArrayList<>(); - connectingExpressions.add(matchingCanonical); - connectingExpressions.add(complementCanonical); - connectingExpressions.addAll(scopeStraddlingExpressions); - connectingExpressions = connectingExpressions.stream() - .filter(Objects::nonNull) - .collect(Collectors.toList()); - Expression connectingCanonical = getCanonical(connectingExpressions.stream()); + // Compile single equality between matching and complement scope. + // Only consider expressions that don't have derived expression in other scope. + // Otherwise, redundant equality would be generated. + Optional matchingConnecting = scopeExpressions.stream() + .filter(expression -> SymbolsExtractor.extractAll(expression).isEmpty() || rewrite(expression, symbol -> !scope.contains(symbol), false) == null) + .min(canonicalComparator); + Optional complementConnecting = scopeComplementExpressions.stream() + .filter(expression -> SymbolsExtractor.extractAll(expression).isEmpty() || rewrite(expression, scope::contains, false) == null) + .min(canonicalComparator); + if (matchingConnecting.isPresent() && complementConnecting.isPresent() && !matchingConnecting.equals(complementConnecting)) { + scopeStraddlingEqualities.add(new ComparisonExpression(ComparisonExpression.Operator.EQUAL, matchingConnecting.get(), complementConnecting.get())); + } + + // Compile the scope straddling equality expressions. + // scopeStraddlingExpressions couldn't be pushed to either side, + // therefore there needs to be an equality generated with + // one of the scopes (either matching or complement). + List straddlingExpressions = new ArrayList<>(); + if (matchingCanonical != null) { + straddlingExpressions.add(matchingCanonical); + } + else if (complementCanonical != null) { + straddlingExpressions.add(complementCanonical); + } + straddlingExpressions.addAll(scopeStraddlingExpressions); + Expression connectingCanonical = getCanonical(straddlingExpressions.stream()); if (connectingCanonical != null) { - connectingExpressions.stream() + straddlingExpressions.stream() .filter(expression -> !expression.equals(connectingCanonical)) .map(expression -> new ComparisonExpression(ComparisonExpression.Operator.EQUAL, connectingCanonical, expression)) .forEach(scopeStraddlingEqualities::add); diff --git a/core/trino-main/src/main/java/io/trino/sql/planner/ExpressionInterpreter.java b/core/trino-main/src/main/java/io/trino/sql/planner/ExpressionInterpreter.java index ebfb26d7209ee..4161e9ce3c3d2 100644 --- a/core/trino-main/src/main/java/io/trino/sql/planner/ExpressionInterpreter.java +++ b/core/trino-main/src/main/java/io/trino/sql/planner/ExpressionInterpreter.java @@ -364,7 +364,7 @@ public Object visitFieldReference(FieldReference node, Object context) @Override protected Object visitDereferenceExpression(DereferenceExpression node, Object context) { - checkArgument(!isQualifiedAllFieldsReference(node), "unexpected expression: all fields labeled reference " + node); + checkArgument(!isQualifiedAllFieldsReference(node), "unexpected expression: all fields labeled reference %s", node); Identifier fieldIdentifier = node.getField().orElseThrow(); Type type = type(node.getBase()); diff --git a/core/trino-main/src/main/java/io/trino/sql/planner/LocalExecutionPlanner.java b/core/trino-main/src/main/java/io/trino/sql/planner/LocalExecutionPlanner.java index fe5bab2061189..f8d56ee3b5865 100644 --- a/core/trino-main/src/main/java/io/trino/sql/planner/LocalExecutionPlanner.java +++ b/core/trino-main/src/main/java/io/trino/sql/planner/LocalExecutionPlanner.java @@ -43,7 +43,6 @@ import io.trino.execution.TaskManagerConfig; import io.trino.execution.buffer.OutputBuffer; import io.trino.execution.buffer.PagesSerdeFactory; -import io.trino.index.IndexManager; import io.trino.metadata.MergeHandle; import io.trino.metadata.Metadata; import io.trino.metadata.ResolvedFunction; @@ -115,6 +114,7 @@ import io.trino.operator.index.IndexBuildDriverFactoryProvider; import io.trino.operator.index.IndexJoinLookupStats; import io.trino.operator.index.IndexLookupSourceFactory; +import io.trino.operator.index.IndexManager; import io.trino.operator.index.IndexSourceOperator; import io.trino.operator.join.HashBuilderOperator.HashBuilderOperatorFactory; import io.trino.operator.join.JoinBridgeManager; @@ -333,7 +333,6 @@ import static io.trino.operator.output.SkewedPartitionRebalancer.checkCanScalePartitionsRemotely; import static io.trino.operator.output.SkewedPartitionRebalancer.createPartitionFunction; import static io.trino.operator.output.SkewedPartitionRebalancer.getMaxWritersBasedOnMemory; -import static io.trino.operator.output.SkewedPartitionRebalancer.getScaleWritersMaxSkewedPartitions; import static io.trino.operator.output.SkewedPartitionRebalancer.getTaskCount; import static io.trino.operator.window.pattern.PhysicalValuePointer.CLASSIFIER; import static io.trino.operator.window.pattern.PhysicalValuePointer.MATCH_NUMBER; @@ -382,7 +381,6 @@ import static io.trino.util.SpatialJoinUtils.extractSupportedSpatialComparisons; import static io.trino.util.SpatialJoinUtils.extractSupportedSpatialFunctions; import static java.lang.Math.ceil; -import static java.lang.Math.max; import static java.lang.Math.min; import static java.lang.Math.toIntExact; import static java.lang.String.format; @@ -593,10 +591,7 @@ public LocalExecutionPlan plan( taskCount, taskBucketCount, getWriterScalingMinDataProcessed(taskContext.getSession()).toBytes(), - getSkewedPartitionMinDataProcessedRebalanceThreshold(taskContext.getSession()).toBytes(), - // Keep the maxPartitionsToRebalance to atleast task count such that single partition writes do - // not suffer from skewness and can scale uniformly across all tasks. - max(getScaleWritersMaxSkewedPartitions(taskContext.getSession()), taskCount))); + getSkewedPartitionMinDataProcessedRebalanceThreshold(taskContext.getSession()).toBytes())); } else { partitionFunction = nodePartitioningManager.getPartitionFunction(taskContext.getSession(), partitioningScheme, partitionChannelTypes); @@ -906,7 +901,7 @@ public PhysicalOperation visitRemoteSource(RemoteSourceNode node, LocalExecution private PhysicalOperation createMergeSource(RemoteSourceNode node, LocalExecutionPlanContext context) { checkArgument(node.getOrderingScheme().isPresent(), "orderingScheme is absent"); - checkArgument(node.getRetryPolicy() == RetryPolicy.NONE, "unexpected retry policy: " + node.getRetryPolicy()); + checkArgument(node.getRetryPolicy() == RetryPolicy.NONE, "unexpected retry policy: %s", node.getRetryPolicy()); // merging remote source must have a single driver context.setDriverInstanceCount(1); diff --git a/core/trino-main/src/main/java/io/trino/sql/planner/PlanFragment.java b/core/trino-main/src/main/java/io/trino/sql/planner/PlanFragment.java index 42db56e5812ca..226149f770e4a 100644 --- a/core/trino-main/src/main/java/io/trino/sql/planner/PlanFragment.java +++ b/core/trino-main/src/main/java/io/trino/sql/planner/PlanFragment.java @@ -26,6 +26,7 @@ import io.trino.sql.planner.plan.PlanNode; import io.trino.sql.planner.plan.PlanNodeId; import io.trino.sql.planner.plan.RemoteSourceNode; +import io.trino.sql.planner.plan.TableScanNode; import java.util.List; import java.util.Map; @@ -55,6 +56,7 @@ public class PlanFragment private final List activeCatalogs; private final List languageFunctions; private final Optional jsonRepresentation; + private final boolean containsTableScanNode; // Only for creating instances without the JSON representation embedded private PlanFragment( @@ -88,6 +90,7 @@ private PlanFragment( this.activeCatalogs = requireNonNull(activeCatalogs, "activeCatalogs is null"); this.languageFunctions = requireNonNull(languageFunctions, "languageFunctions is null"); this.jsonRepresentation = Optional.empty(); + this.containsTableScanNode = partitionedSourceNodes.stream().anyMatch(TableScanNode.class::isInstance); } @JsonCreator @@ -135,6 +138,7 @@ public PlanFragment( this.remoteSourceNodes = remoteSourceNodes.build(); this.outputPartitioningScheme = requireNonNull(outputPartitioningScheme, "partitioningScheme is null"); + this.containsTableScanNode = partitionedSourceNodes.stream().anyMatch(TableScanNode.class::isInstance); } @JsonProperty @@ -372,4 +376,9 @@ public PlanFragment withActiveCatalogs(List activeCatalogs) this.languageFunctions, this.jsonRepresentation); } + + public boolean containsTableScanNode() + { + return containsTableScanNode; + } } diff --git a/core/trino-main/src/main/java/io/trino/sql/planner/RelationPlanner.java b/core/trino-main/src/main/java/io/trino/sql/planner/RelationPlanner.java index a88fa26caf753..cdcc86b8eb3af 100644 --- a/core/trino-main/src/main/java/io/trino/sql/planner/RelationPlanner.java +++ b/core/trino-main/src/main/java/io/trino/sql/planner/RelationPlanner.java @@ -19,14 +19,28 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.ListMultimap; import io.trino.Session; +import io.trino.json.ir.IrJsonPath; +import io.trino.metadata.ResolvedFunction; import io.trino.metadata.TableFunctionHandle; import io.trino.metadata.TableHandle; +import io.trino.operator.table.json.JsonTable.JsonTableFunctionHandle; +import io.trino.operator.table.json.JsonTableColumn; +import io.trino.operator.table.json.JsonTableOrdinalityColumn; +import io.trino.operator.table.json.JsonTablePlanCross; +import io.trino.operator.table.json.JsonTablePlanLeaf; +import io.trino.operator.table.json.JsonTablePlanNode; +import io.trino.operator.table.json.JsonTablePlanSingle; +import io.trino.operator.table.json.JsonTablePlanUnion; +import io.trino.operator.table.json.JsonTableQueryColumn; +import io.trino.operator.table.json.JsonTableValueColumn; import io.trino.spi.connector.ColumnHandle; +import io.trino.spi.function.table.TableArgument; import io.trino.spi.type.RowType; import io.trino.spi.type.Type; import io.trino.sql.ExpressionUtils; import io.trino.sql.PlannerContext; import io.trino.sql.analyzer.Analysis; +import io.trino.sql.analyzer.Analysis.JsonTableAnalysis; import io.trino.sql.analyzer.Analysis.TableArgumentAnalysis; import io.trino.sql.analyzer.Analysis.TableFunctionInvocationAnalysis; import io.trino.sql.analyzer.Analysis.UnnestAnalysis; @@ -34,6 +48,7 @@ import io.trino.sql.analyzer.RelationType; import io.trino.sql.analyzer.Scope; import io.trino.sql.planner.QueryPlanner.PlanAndMappings; +import io.trino.sql.planner.TranslationMap.ParametersRow; import io.trino.sql.planner.plan.Assignments; import io.trino.sql.planner.plan.CorrelatedJoinNode; import io.trino.sql.planner.plan.DataOrganizationSpecification; @@ -61,27 +76,45 @@ import io.trino.sql.planner.rowpattern.ir.IrRowPattern; import io.trino.sql.tree.AliasedRelation; import io.trino.sql.tree.AstVisitor; +import io.trino.sql.tree.BooleanLiteral; import io.trino.sql.tree.Cast; import io.trino.sql.tree.CoalesceExpression; import io.trino.sql.tree.ComparisonExpression; import io.trino.sql.tree.Except; import io.trino.sql.tree.Expression; +import io.trino.sql.tree.FunctionCall; +import io.trino.sql.tree.GenericLiteral; import io.trino.sql.tree.Identifier; import io.trino.sql.tree.IfExpression; import io.trino.sql.tree.Intersect; import io.trino.sql.tree.Join; import io.trino.sql.tree.JoinCriteria; import io.trino.sql.tree.JoinUsing; +import io.trino.sql.tree.JsonPathParameter; +import io.trino.sql.tree.JsonQuery; +import io.trino.sql.tree.JsonTable; +import io.trino.sql.tree.JsonTableColumnDefinition; +import io.trino.sql.tree.JsonTableDefaultPlan; +import io.trino.sql.tree.JsonTablePlan.ParentChildPlanType; +import io.trino.sql.tree.JsonTablePlan.SiblingsPlanType; +import io.trino.sql.tree.JsonTableSpecificPlan; +import io.trino.sql.tree.JsonValue; import io.trino.sql.tree.LambdaArgumentDeclaration; import io.trino.sql.tree.Lateral; import io.trino.sql.tree.MeasureDefinition; import io.trino.sql.tree.NaturalJoin; +import io.trino.sql.tree.NestedColumns; import io.trino.sql.tree.Node; import io.trino.sql.tree.NodeRef; +import io.trino.sql.tree.OrdinalityColumn; import io.trino.sql.tree.PatternRecognitionRelation; import io.trino.sql.tree.PatternSearchMode; +import io.trino.sql.tree.PlanLeaf; +import io.trino.sql.tree.PlanParentChild; +import io.trino.sql.tree.PlanSiblings; import io.trino.sql.tree.QualifiedName; import io.trino.sql.tree.Query; +import io.trino.sql.tree.QueryColumn; import io.trino.sql.tree.QuerySpecification; import io.trino.sql.tree.Relation; import io.trino.sql.tree.Row; @@ -97,6 +130,7 @@ import io.trino.sql.tree.TableSubquery; import io.trino.sql.tree.Union; import io.trino.sql.tree.Unnest; +import io.trino.sql.tree.ValueColumn; import io.trino.sql.tree.Values; import io.trino.sql.tree.VariableDefinition; import io.trino.type.TypeCoercion; @@ -118,6 +152,7 @@ import static io.trino.spi.StandardErrorCode.CONSTRAINT_VIOLATION; import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; import static io.trino.spi.type.BooleanType.BOOLEAN; +import static io.trino.spi.type.StandardTypes.TINYINT; import static io.trino.sql.NodeUtils.getSortItemsFromOrderBy; import static io.trino.sql.analyzer.SemanticExceptions.semanticException; import static io.trino.sql.analyzer.TypeSignatureTranslator.toSqlType; @@ -133,11 +168,18 @@ import static io.trino.sql.planner.plan.AggregationNode.singleGroupingSet; import static io.trino.sql.tree.BooleanLiteral.TRUE_LITERAL; import static io.trino.sql.tree.Join.Type.CROSS; +import static io.trino.sql.tree.Join.Type.FULL; import static io.trino.sql.tree.Join.Type.IMPLICIT; import static io.trino.sql.tree.Join.Type.INNER; +import static io.trino.sql.tree.Join.Type.LEFT; +import static io.trino.sql.tree.JsonQuery.QuotesBehavior.KEEP; +import static io.trino.sql.tree.JsonQuery.QuotesBehavior.OMIT; +import static io.trino.sql.tree.JsonTablePlan.ParentChildPlanType.OUTER; +import static io.trino.sql.tree.JsonTablePlan.SiblingsPlanType.UNION; import static io.trino.sql.tree.PatternRecognitionRelation.RowsPerMatch.ONE; import static io.trino.sql.tree.PatternSearchMode.Mode.INITIAL; import static io.trino.sql.tree.SkipTo.Position.PAST_LAST; +import static io.trino.type.Json2016Type.JSON_2016; import static java.lang.Boolean.TRUE; import static java.util.Locale.ENGLISH; import static java.util.Objects.requireNonNull; @@ -685,6 +727,16 @@ protected RelationPlan visitJoin(Join node, Void context) return planJoinUnnest(leftPlan, node, unnest.get()); } + Optional jsonTable = getJsonTable(node.getRight()); + if (jsonTable.isPresent()) { + return planJoinJsonTable( + newPlanBuilder(leftPlan, analysis, lambdaDeclarationToSymbolMap, session, plannerContext), + leftPlan.getFieldMappings(), + node.getType(), + jsonTable.get(), + analysis.getScope(node)); + } + Optional lateral = getLateral(node.getRight()); if (lateral.isPresent()) { return planCorrelatedJoin(node, leftPlan, lateral.get()); @@ -1002,6 +1054,17 @@ private static Optional getUnnest(Relation relation) return Optional.empty(); } + private static Optional getJsonTable(Relation relation) + { + if (relation instanceof AliasedRelation) { + return getJsonTable(((AliasedRelation) relation).getRelation()); + } + if (relation instanceof JsonTable) { + return Optional.of((JsonTable) relation); + } + return Optional.empty(); + } + private static Optional getLateral(Relation relation) { if (relation instanceof AliasedRelation) { @@ -1124,6 +1187,393 @@ private RelationPlan planUnnest(PlanBuilder subPlan, Unnest node, List r return new RelationPlan(unnestNode, outputScope, unnestNode.getOutputSymbols(), outerContext); } + private RelationPlan planJoinJsonTable(PlanBuilder leftPlan, List leftFieldMappings, Join.Type joinType, JsonTable jsonTable, Scope outputScope) + { + PlanBuilder planBuilder = leftPlan; + + // extract input expressions + ImmutableList.Builder builder = ImmutableList.builder(); + Expression inputExpression = jsonTable.getJsonPathInvocation().getInputExpression(); + builder.add(inputExpression); + List pathParameters = jsonTable.getJsonPathInvocation().getPathParameters(); + pathParameters.stream() + .map(JsonPathParameter::getParameter) + .forEach(builder::add); + List defaultExpressions = getDefaultExpressions(jsonTable.getColumns()); + builder.addAll(defaultExpressions); + List inputExpressions = builder.build(); + + planBuilder = subqueryPlanner.handleSubqueries(planBuilder, inputExpressions, analysis.getSubqueries(jsonTable)); + planBuilder = planBuilder.appendProjections(inputExpressions, symbolAllocator, idAllocator); + + // apply coercions + // coercions might be necessary for the context item and path parameters before the input functions are applied + // also, the default expressions in value columns (DEFAULT ... ON EMPTY / ON ERROR) might need a coercion to match the required output type + PlanAndMappings coerced = coerce(planBuilder, inputExpressions, analysis, idAllocator, symbolAllocator, typeCoercion); + planBuilder = coerced.getSubPlan(); + + // apply the input function to the input expression + BooleanLiteral failOnError = new BooleanLiteral(jsonTable.getErrorBehavior().orElse(JsonTable.ErrorBehavior.EMPTY) == JsonTable.ErrorBehavior.ERROR ? "true" : "false"); + ResolvedFunction inputToJson = analysis.getJsonInputFunction(inputExpression); + Expression inputJson = new FunctionCall(inputToJson.toQualifiedName(), ImmutableList.of(coerced.get(inputExpression).toSymbolReference(), failOnError)); + + // apply the input functions to the JSON path parameters having FORMAT, + // and collect all JSON path parameters in a Row + List coercedParameters = pathParameters.stream() + .map(parameter -> new JsonPathParameter( + parameter.getLocation(), + parameter.getName(), + coerced.get(parameter.getParameter()).toSymbolReference(), + parameter.getFormat())) + .collect(toImmutableList()); + JsonTableAnalysis jsonTableAnalysis = analysis.getJsonTableAnalysis(jsonTable); + RowType parametersType = jsonTableAnalysis.parametersType(); + ParametersRow orderedParameters = planBuilder.getTranslations().getParametersRow(pathParameters, coercedParameters, parametersType, failOnError); + Expression parametersRow = orderedParameters.getParametersRow(); + + // append projections for inputJson and parametersRow + // cannot use the 'appendProjections()' method because the projected expressions include resolved input functions, so they are not pure AST expressions + Symbol inputJsonSymbol = symbolAllocator.newSymbol("inputJson", JSON_2016); + Symbol parametersRowSymbol = symbolAllocator.newSymbol("parametersRow", parametersType); + ProjectNode appended = new ProjectNode( + idAllocator.getNextId(), + planBuilder.getRoot(), + Assignments.builder() + .putIdentities(planBuilder.getRoot().getOutputSymbols()) + .put(inputJsonSymbol, inputJson) + .put(parametersRowSymbol, parametersRow) + .build()); + planBuilder = planBuilder.withNewRoot(appended); + + // identify the required symbols + ImmutableList.Builder requiredSymbolsBuilder = ImmutableList.builder() + .add(inputJsonSymbol) + .add(parametersRowSymbol); + defaultExpressions.stream() + .map(coerced::get) + .distinct() + .forEach(requiredSymbolsBuilder::add); + List requiredSymbols = requiredSymbolsBuilder.build(); + + // map the default expressions of value columns to indexes in the required columns list + // use a HashMap because there might be duplicate expressions + Map defaultExpressionsMapping = new HashMap<>(); + for (Expression defaultExpression : defaultExpressions) { + defaultExpressionsMapping.put(defaultExpression, requiredSymbols.indexOf(coerced.get(defaultExpression))); + } + + // rewrite the root JSON path to IR using parameters + IrJsonPath rootPath = new JsonPathTranslator(session, plannerContext).rewriteToIr(analysis.getJsonPathAnalysis(jsonTable), orderedParameters.getParametersOrder()); + + // create json_table execution plan + List> orderedColumns = jsonTableAnalysis.orderedOutputColumns(); + Map, Integer> outputIndexMapping = IntStream.range(0, orderedColumns.size()) + .boxed() + .collect(toImmutableMap(orderedColumns::get, Function.identity())); + JsonTablePlanNode executionPlan; + boolean defaultErrorOnError = jsonTable.getErrorBehavior().map(errorBehavior -> errorBehavior == JsonTable.ErrorBehavior.ERROR).orElse(false); + if (jsonTable.getPlan().isEmpty()) { + executionPlan = getPlanFromDefaults(rootPath, jsonTable.getColumns(), OUTER, UNION, defaultErrorOnError, outputIndexMapping, defaultExpressionsMapping); + } + else if (jsonTable.getPlan().orElseThrow() instanceof JsonTableDefaultPlan defaultPlan) { + executionPlan = getPlanFromDefaults(rootPath, jsonTable.getColumns(), defaultPlan.getParentChild(), defaultPlan.getSiblings(), defaultErrorOnError, outputIndexMapping, defaultExpressionsMapping); + } + else { + executionPlan = getPlanFromSpecification(rootPath, jsonTable.getColumns(), (JsonTableSpecificPlan) jsonTable.getPlan().orElseThrow(), defaultErrorOnError, outputIndexMapping, defaultExpressionsMapping); + } + + // create new symbols for json_table function's proper columns + // These are the types produced by the table function. + // For ordinality and value columns, the types match the expected output type. + // Query columns return JSON_2016. Later we need to apply an output function, and potentially a coercion to match the declared output type. + RelationType jsonTableRelationType = analysis.getScope(jsonTable).getRelationType(); + List properOutputs = IntStream.range(0, orderedColumns.size()) + .mapToObj(index -> { + if (orderedColumns.get(index).getNode() instanceof QueryColumn queryColumn) { + return symbolAllocator.newSymbol(queryColumn.getName().getCanonicalValue(), JSON_2016); + } + return symbolAllocator.newSymbol(jsonTableRelationType.getFieldByIndex(index)); + }) + .collect(toImmutableList()); + + // pass through all columns from the left side of the join + List passThroughColumns = leftFieldMappings.stream() + .map(symbol -> new PassThroughColumn(symbol, false)) + .collect(toImmutableList()); + + // determine the join type between the input, and the json_table result + // this join type is not described in the plan, it depends on the enclosing join whose right source is the json_table + // since json_table is a lateral relation, and the join condition is 'true', effectively the join type is either LEFT OUTER or INNER + boolean outer = joinType == LEFT || joinType == FULL; + + // create the TableFunctionNode and TableFunctionHandle + JsonTableFunctionHandle functionHandle = new JsonTableFunctionHandle( + executionPlan, + outer, + defaultErrorOnError, + parametersType, + properOutputs.stream() + .map(symbolAllocator.getTypes()::get) + .toArray(Type[]::new)); + + TableFunctionNode tableFunctionNode = new TableFunctionNode( + idAllocator.getNextId(), + "$json_table", + jsonTableAnalysis.catalogHandle(), + ImmutableMap.of("$input", new TableArgument(getRowType(planBuilder.getRoot()), ImmutableList.of(), ImmutableList.of())), + properOutputs, + ImmutableList.of(planBuilder.getRoot()), + ImmutableList.of(new TableArgumentProperties( + "$input", + true, + true, + new PassThroughSpecification(true, passThroughColumns), + requiredSymbols, + Optional.empty())), + ImmutableList.of(), + new TableFunctionHandle( + jsonTableAnalysis.catalogHandle(), + functionHandle, + jsonTableAnalysis.transactionHandle())); + + // append output functions and coercions for query columns + // The table function returns JSON_2016 for query columns. We need to apply output functions and coercions to match the declared output type. + // create output layout: first the left side of the join, next the proper columns + ImmutableList.Builder outputLayout = ImmutableList.builder() + .addAll(leftFieldMappings); + Assignments.Builder assignments = Assignments.builder() + .putIdentities(leftFieldMappings); + for (int i = 0; i < properOutputs.size(); i++) { + Symbol properOutput = properOutputs.get(i); + if (orderedColumns.get(i).getNode() instanceof QueryColumn queryColumn) { + // apply output function + GenericLiteral errorBehavior = new GenericLiteral( + TINYINT, + String.valueOf(queryColumn.getErrorBehavior().orElse(defaultErrorOnError ? JsonQuery.EmptyOrErrorBehavior.ERROR : JsonQuery.EmptyOrErrorBehavior.NULL).ordinal())); + BooleanLiteral omitQuotes = new BooleanLiteral(queryColumn.getQuotesBehavior().orElse(KEEP) == OMIT ? "true" : "false"); + ResolvedFunction outputFunction = analysis.getJsonOutputFunction(queryColumn); + Expression result = new FunctionCall(outputFunction.toQualifiedName(), ImmutableList.of(properOutput.toSymbolReference(), errorBehavior, omitQuotes)); + + // cast to declared returned type + Type expectedType = jsonTableRelationType.getFieldByIndex(i).getType(); + Type resultType = outputFunction.getSignature().getReturnType(); + if (!resultType.equals(expectedType)) { + result = new Cast(result, toSqlType(expectedType)); + } + + Symbol output = symbolAllocator.newSymbol(result, expectedType); + outputLayout.add(output); + assignments.put(output, result); + } + else { + outputLayout.add(properOutput); + assignments.putIdentity(properOutput); + } + } + + ProjectNode projectNode = new ProjectNode( + idAllocator.getNextId(), + tableFunctionNode, + assignments.build()); + + return new RelationPlan(projectNode, outputScope, outputLayout.build(), outerContext); + } + + private static List getDefaultExpressions(List columns) + { + ImmutableList.Builder builder = ImmutableList.builder(); + for (JsonTableColumnDefinition column : columns) { + if (column instanceof ValueColumn valueColumn) { + valueColumn.getEmptyDefault().ifPresent(builder::add); + valueColumn.getErrorDefault().ifPresent(builder::add); + } + else if (column instanceof NestedColumns nestedColumns) { + builder.addAll(getDefaultExpressions(nestedColumns.getColumns())); + } + } + return builder.build(); + } + + private JsonTablePlanNode getPlanFromDefaults( + IrJsonPath path, + List columnDefinitions, + ParentChildPlanType parentChildPlanType, + SiblingsPlanType siblingsPlanType, + boolean defaultErrorOnError, + Map, Integer> outputIndexMapping, + Map defaultExpressionsMapping) + { + ImmutableList.Builder columns = ImmutableList.builder(); + ImmutableList.Builder childrenBuilder = ImmutableList.builder(); + + for (JsonTableColumnDefinition columnDefinition : columnDefinitions) { + if (columnDefinition instanceof NestedColumns nestedColumns) { + IrJsonPath nestedPath = new JsonPathTranslator(session, plannerContext).rewriteToIr(analysis.getJsonPathAnalysis(nestedColumns), ImmutableList.of()); + childrenBuilder.add(getPlanFromDefaults( + nestedPath, + nestedColumns.getColumns(), + parentChildPlanType, + siblingsPlanType, + defaultErrorOnError, + outputIndexMapping, + defaultExpressionsMapping)); + } + else { + columns.add(getColumn(columnDefinition, defaultErrorOnError, outputIndexMapping, defaultExpressionsMapping)); + } + } + + List children = childrenBuilder.build(); + if (children.isEmpty()) { + return new JsonTablePlanLeaf(path, columns.build()); + } + + JsonTablePlanNode child; + if (children.size() == 1) { + child = getOnlyElement(children); + } + else if (siblingsPlanType == UNION) { + child = new JsonTablePlanUnion(children); + } + else { + child = new JsonTablePlanCross(children); + } + + return new JsonTablePlanSingle(path, columns.build(), parentChildPlanType == OUTER, child); + } + + private JsonTablePlanNode getPlanFromSpecification( + IrJsonPath path, + List columnDefinitions, + JsonTableSpecificPlan specificPlan, + boolean defaultErrorOnError, + Map, Integer> outputIndexMapping, + Map defaultExpressionsMapping) + { + ImmutableList.Builder columns = ImmutableList.builder(); + ImmutableMap.Builder childrenBuilder = ImmutableMap.builder(); + Map planSiblings; + if (specificPlan instanceof PlanLeaf) { + planSiblings = ImmutableMap.of(); + } + else { + planSiblings = getSiblings(((PlanParentChild) specificPlan).getChild()); + } + + for (JsonTableColumnDefinition columnDefinition : columnDefinitions) { + if (columnDefinition instanceof NestedColumns nestedColumns) { + IrJsonPath nestedPath = new JsonPathTranslator(session, plannerContext).rewriteToIr(analysis.getJsonPathAnalysis(nestedColumns), ImmutableList.of()); + String nestedPathName = nestedColumns.getPathName().orElseThrow().getCanonicalValue(); + JsonTablePlanNode child = getPlanFromSpecification( + nestedPath, + nestedColumns.getColumns(), + planSiblings.get(nestedPathName), + defaultErrorOnError, + outputIndexMapping, + defaultExpressionsMapping); + childrenBuilder.put(nestedPathName, child); + } + else { + columns.add(getColumn(columnDefinition, defaultErrorOnError, outputIndexMapping, defaultExpressionsMapping)); + } + } + + Map children = childrenBuilder.buildOrThrow(); + if (children.isEmpty()) { + return new JsonTablePlanLeaf(path, columns.build()); + } + + PlanParentChild planParentChild = (PlanParentChild) specificPlan; + boolean outer = planParentChild.getType() == OUTER; + JsonTablePlanNode child = combineSiblings(children, planParentChild.getChild()); + return new JsonTablePlanSingle(path, columns.build(), outer, child); + } + + private Map getSiblings(JsonTableSpecificPlan plan) + { + if (plan instanceof PlanLeaf planLeaf) { + return ImmutableMap.of(planLeaf.getName().getCanonicalValue(), planLeaf); + } + if (plan instanceof PlanParentChild planParentChild) { + return ImmutableMap.of(planParentChild.getParent().getName().getCanonicalValue(), planParentChild); + } + PlanSiblings planSiblings = (PlanSiblings) plan; + ImmutableMap.Builder siblings = ImmutableMap.builder(); + for (JsonTableSpecificPlan sibling : planSiblings.getSiblings()) { + siblings.putAll(getSiblings(sibling)); + } + return siblings.buildOrThrow(); + } + + private JsonTableColumn getColumn( + JsonTableColumnDefinition columnDefinition, + boolean defaultErrorOnError, + Map, Integer> outputIndexMapping, + Map defaultExpressionsMapping) + { + int index = outputIndexMapping.get(NodeRef.of(columnDefinition)); + + if (columnDefinition instanceof OrdinalityColumn) { + return new JsonTableOrdinalityColumn(index); + } + ResolvedFunction columnFunction = analysis.getResolvedFunction(columnDefinition); + IrJsonPath columnPath = new JsonPathTranslator(session, plannerContext).rewriteToIr(analysis.getJsonPathAnalysis(columnDefinition), ImmutableList.of()); + if (columnDefinition instanceof QueryColumn queryColumn) { + return new JsonTableQueryColumn( + index, + columnFunction, + columnPath, + queryColumn.getWrapperBehavior().ordinal(), + queryColumn.getEmptyBehavior().ordinal(), + queryColumn.getErrorBehavior().orElse(defaultErrorOnError ? JsonQuery.EmptyOrErrorBehavior.ERROR : JsonQuery.EmptyOrErrorBehavior.NULL).ordinal()); + } + if (columnDefinition instanceof ValueColumn valueColumn) { + int emptyDefault = valueColumn.getEmptyDefault() + .map(defaultExpressionsMapping::get) + .orElse(-1); + int errorDefault = valueColumn.getErrorDefault() + .map(defaultExpressionsMapping::get) + .orElse(-1); + return new JsonTableValueColumn( + index, + columnFunction, + columnPath, + valueColumn.getEmptyBehavior().ordinal(), + emptyDefault, + valueColumn.getErrorBehavior().orElse(defaultErrorOnError ? JsonValue.EmptyOrErrorBehavior.ERROR : JsonValue.EmptyOrErrorBehavior.NULL).ordinal(), + errorDefault); + } + throw new IllegalStateException("unexpected column definition: " + columnDefinition.getClass().getSimpleName()); + } + + private JsonTablePlanNode combineSiblings(Map siblings, JsonTableSpecificPlan plan) + { + if (plan instanceof PlanLeaf planLeaf) { + return siblings.get(planLeaf.getName().getCanonicalValue()); + } + if (plan instanceof PlanParentChild planParentChild) { + return siblings.get(planParentChild.getParent().getName().getCanonicalValue()); + } + PlanSiblings planSiblings = (PlanSiblings) plan; + List siblingNodes = planSiblings.getSiblings().stream() + .map(sibling -> combineSiblings(siblings, sibling)) + .collect(toImmutableList()); + if (planSiblings.getType() == UNION) { + return new JsonTablePlanUnion(siblingNodes); + } + return new JsonTablePlanCross(siblingNodes); + } + + private RowType getRowType(PlanNode node) + { + // create a RowType based on output symbols of a node + // The node is an intermediate stage of planning json_table. There's no recorded relation type available for this node. + // The returned RowType is only used in plan printer + return RowType.from(node.getOutputSymbols().stream() + .map(symbol -> new RowType.Field(Optional.of(symbol.getName()), symbolAllocator.getTypes().get(symbol))) + .collect(toImmutableList())); + } + @Override protected RelationPlan visitTableSubquery(TableSubquery node, Void context) { @@ -1206,6 +1656,17 @@ private PlanBuilder planSingleEmptyRow(Optional parent) return new PlanBuilder(translations, values); } + @Override + protected RelationPlan visitJsonTable(JsonTable node, Void context) + { + return planJoinJsonTable( + planSingleEmptyRow(analysis.getScope(node).getOuterQueryParent()), + ImmutableList.of(), + INNER, + node, + analysis.getScope(node)); + } + @Override protected RelationPlan visitUnion(Union node, Void context) { diff --git a/core/trino-main/src/main/java/io/trino/sql/planner/ResolvedFunctionCallRewriter.java b/core/trino-main/src/main/java/io/trino/sql/planner/ResolvedFunctionCallRewriter.java index 2a5e457e611ac..8b2c6c9851064 100644 --- a/core/trino-main/src/main/java/io/trino/sql/planner/ResolvedFunctionCallRewriter.java +++ b/core/trino-main/src/main/java/io/trino/sql/planner/ResolvedFunctionCallRewriter.java @@ -18,6 +18,7 @@ import io.trino.sql.tree.ExpressionRewriter; import io.trino.sql.tree.ExpressionTreeRewriter; import io.trino.sql.tree.FunctionCall; +import io.trino.sql.tree.Node; import io.trino.sql.tree.NodeRef; import java.util.Map; @@ -29,7 +30,7 @@ public final class ResolvedFunctionCallRewriter { private ResolvedFunctionCallRewriter() {} - public static Expression rewriteResolvedFunctions(Expression expression, Map, ResolvedFunction> resolvedFunctions) + public static Expression rewriteResolvedFunctions(Expression expression, Map, ResolvedFunction> resolvedFunctions) { return ExpressionTreeRewriter.rewriteWith(new Visitor(resolvedFunctions), expression); } @@ -37,9 +38,9 @@ public static Expression rewriteResolvedFunctions(Expression expression, Map { - private final Map, ResolvedFunction> resolvedFunctions; + private final Map, ResolvedFunction> resolvedFunctions; - public Visitor(Map, ResolvedFunction> resolvedFunctions) + public Visitor(Map, ResolvedFunction> resolvedFunctions) { this.resolvedFunctions = requireNonNull(resolvedFunctions, "resolvedFunctions is null"); } diff --git a/core/trino-main/src/main/java/io/trino/sql/planner/TranslationMap.java b/core/trino-main/src/main/java/io/trino/sql/planner/TranslationMap.java index 9e5ec42b8aa4e..29de734e88044 100644 --- a/core/trino-main/src/main/java/io/trino/sql/planner/TranslationMap.java +++ b/core/trino-main/src/main/java/io/trino/sql/planner/TranslationMap.java @@ -913,40 +913,6 @@ public Expression rewriteJsonQuery(JsonQuery node, Void context, ExpressionTreeR return coerceIfNecessary(node, result); } - private ParametersRow getParametersRow( - List pathParameters, - List rewrittenPathParameters, - Type parameterRowType, - BooleanLiteral failOnError) - { - Expression parametersRow; - List parametersOrder; - if (!pathParameters.isEmpty()) { - ImmutableList.Builder parameters = ImmutableList.builder(); - for (int i = 0; i < pathParameters.size(); i++) { - ResolvedFunction parameterToJson = analysis.getJsonInputFunction(pathParameters.get(i).getParameter()); - Expression rewrittenParameter = rewrittenPathParameters.get(i).getParameter(); - if (parameterToJson != null) { - parameters.add(new FunctionCall(parameterToJson.toQualifiedName(), ImmutableList.of(rewrittenParameter, failOnError))); - } - else { - parameters.add(rewrittenParameter); - } - } - parametersRow = new Cast(new Row(parameters.build()), toSqlType(parameterRowType)); - parametersOrder = pathParameters.stream() - .map(parameter -> parameter.getName().getCanonicalValue()) - .collect(toImmutableList()); - } - else { - checkState(JSON_NO_PARAMETERS_ROW_TYPE.equals(parameterRowType), "invalid type of parameters row when no parameters are passed"); - parametersRow = new Cast(new NullLiteral(), toSqlType(JSON_NO_PARAMETERS_ROW_TYPE)); - parametersOrder = ImmutableList.of(); - } - - return new ParametersRow(parametersRow, parametersOrder); - } - @Override public Expression rewriteJsonObject(JsonObject node, Void context, ExpressionTreeRewriter treeRewriter) { @@ -1132,7 +1098,41 @@ public Scope getScope() return scope; } - private static class ParametersRow + public ParametersRow getParametersRow( + List pathParameters, + List rewrittenPathParameters, + Type parameterRowType, + BooleanLiteral failOnError) + { + Expression parametersRow; + List parametersOrder; + if (!pathParameters.isEmpty()) { + ImmutableList.Builder parameters = ImmutableList.builder(); + for (int i = 0; i < pathParameters.size(); i++) { + ResolvedFunction parameterToJson = analysis.getJsonInputFunction(pathParameters.get(i).getParameter()); + Expression rewrittenParameter = rewrittenPathParameters.get(i).getParameter(); + if (parameterToJson != null) { + parameters.add(new FunctionCall(parameterToJson.toQualifiedName(), ImmutableList.of(rewrittenParameter, failOnError))); + } + else { + parameters.add(rewrittenParameter); + } + } + parametersRow = new Cast(new Row(parameters.build()), toSqlType(parameterRowType)); + parametersOrder = pathParameters.stream() + .map(parameter -> parameter.getName().getCanonicalValue()) + .collect(toImmutableList()); + } + else { + checkState(JSON_NO_PARAMETERS_ROW_TYPE.equals(parameterRowType), "invalid type of parameters row when no parameters are passed"); + parametersRow = new Cast(new NullLiteral(), toSqlType(JSON_NO_PARAMETERS_ROW_TYPE)); + parametersOrder = ImmutableList.of(); + } + + return new ParametersRow(parametersRow, parametersOrder); + } + + public static class ParametersRow { private final Expression parametersRow; private final List parametersOrder; diff --git a/core/trino-main/src/main/java/io/trino/sql/planner/iterative/rule/TransformCorrelatedGlobalAggregationWithProjection.java b/core/trino-main/src/main/java/io/trino/sql/planner/iterative/rule/TransformCorrelatedGlobalAggregationWithProjection.java index 6966153344fb4..274626878ed71 100644 --- a/core/trino-main/src/main/java/io/trino/sql/planner/iterative/rule/TransformCorrelatedGlobalAggregationWithProjection.java +++ b/core/trino-main/src/main/java/io/trino/sql/planner/iterative/rule/TransformCorrelatedGlobalAggregationWithProjection.java @@ -150,7 +150,7 @@ public Pattern getPattern() @Override public Result apply(CorrelatedJoinNode correlatedJoinNode, Captures captures, Context context) { - checkArgument(correlatedJoinNode.getType() == INNER || correlatedJoinNode.getType() == LEFT, "unexpected correlated join type: " + correlatedJoinNode.getType()); + checkArgument(correlatedJoinNode.getType() == INNER || correlatedJoinNode.getType() == LEFT, "unexpected correlated join type: %s", correlatedJoinNode.getType()); // if there is another aggregation below the AggregationNode, handle both PlanNode source = captures.get(SOURCE); diff --git a/core/trino-main/src/main/java/io/trino/sql/planner/iterative/rule/TransformCorrelatedGlobalAggregationWithoutProjection.java b/core/trino-main/src/main/java/io/trino/sql/planner/iterative/rule/TransformCorrelatedGlobalAggregationWithoutProjection.java index b8420729e1284..9a3c32e538cba 100644 --- a/core/trino-main/src/main/java/io/trino/sql/planner/iterative/rule/TransformCorrelatedGlobalAggregationWithoutProjection.java +++ b/core/trino-main/src/main/java/io/trino/sql/planner/iterative/rule/TransformCorrelatedGlobalAggregationWithoutProjection.java @@ -143,7 +143,7 @@ public Pattern getPattern() @Override public Result apply(CorrelatedJoinNode correlatedJoinNode, Captures captures, Context context) { - checkArgument(correlatedJoinNode.getType() == INNER || correlatedJoinNode.getType() == LEFT, "unexpected correlated join type: " + correlatedJoinNode.getType()); + checkArgument(correlatedJoinNode.getType() == INNER || correlatedJoinNode.getType() == LEFT, "unexpected correlated join type: %s", correlatedJoinNode.getType()); PlanNode source = captures.get(SOURCE); diff --git a/core/trino-main/src/main/java/io/trino/sql/planner/iterative/rule/TransformCorrelatedScalarSubquery.java b/core/trino-main/src/main/java/io/trino/sql/planner/iterative/rule/TransformCorrelatedScalarSubquery.java index 401219173f837..f42c8dcdf04b3 100644 --- a/core/trino-main/src/main/java/io/trino/sql/planner/iterative/rule/TransformCorrelatedScalarSubquery.java +++ b/core/trino-main/src/main/java/io/trino/sql/planner/iterative/rule/TransformCorrelatedScalarSubquery.java @@ -103,7 +103,7 @@ public Pattern getPattern() public Result apply(CorrelatedJoinNode correlatedJoinNode, Captures captures, Context context) { // lateral references are only allowed for INNER or LEFT correlated join - checkArgument(correlatedJoinNode.getType() == INNER || correlatedJoinNode.getType() == LEFT, "unexpected correlated join type: " + correlatedJoinNode.getType()); + checkArgument(correlatedJoinNode.getType() == INNER || correlatedJoinNode.getType() == LEFT, "unexpected correlated join type: %s", correlatedJoinNode.getType()); PlanNode subquery = context.getLookup().resolve(correlatedJoinNode.getSubquery()); if (!searchFrom(subquery, context.getLookup()) diff --git a/core/trino-main/src/main/java/io/trino/sql/planner/optimizations/PredicatePushDown.java b/core/trino-main/src/main/java/io/trino/sql/planner/optimizations/PredicatePushDown.java index e4bb453baf727..2321e456fa1e1 100644 --- a/core/trino-main/src/main/java/io/trino/sql/planner/optimizations/PredicatePushDown.java +++ b/core/trino-main/src/main/java/io/trino/sql/planner/optimizations/PredicatePushDown.java @@ -1008,16 +1008,10 @@ private InnerJoinPushDownResult processInnerJoin( ImmutableSet leftScope = ImmutableSet.copyOf(leftSymbols); ImmutableSet rightScope = ImmutableSet.copyOf(rightSymbols); - // Attempt to simplify the effective left/right predicates with the predicate we're pushing down - // This, effectively, inlines any constants derived from such predicate - EqualityInference predicateInference = new EqualityInference(metadata, inheritedPredicate); - Expression simplifiedLeftEffectivePredicate = predicateInference.rewrite(leftEffectivePredicate, leftScope); - Expression simplifiedRightEffectivePredicate = predicateInference.rewrite(rightEffectivePredicate, rightScope); - // Generate equality inferences - EqualityInference allInference = new EqualityInference(metadata, inheritedPredicate, leftEffectivePredicate, rightEffectivePredicate, joinPredicate, simplifiedLeftEffectivePredicate, simplifiedRightEffectivePredicate); - EqualityInference allInferenceWithoutLeftInferred = new EqualityInference(metadata, inheritedPredicate, rightEffectivePredicate, joinPredicate, simplifiedRightEffectivePredicate); - EqualityInference allInferenceWithoutRightInferred = new EqualityInference(metadata, inheritedPredicate, leftEffectivePredicate, joinPredicate, simplifiedLeftEffectivePredicate); + EqualityInference allInference = new EqualityInference(metadata, inheritedPredicate, leftEffectivePredicate, rightEffectivePredicate, joinPredicate); + EqualityInference allInferenceWithoutLeftInferred = new EqualityInference(metadata, inheritedPredicate, rightEffectivePredicate, joinPredicate); + EqualityInference allInferenceWithoutRightInferred = new EqualityInference(metadata, inheritedPredicate, leftEffectivePredicate, joinPredicate); // Add equalities from the inference back in leftPushDownConjuncts.addAll(allInferenceWithoutLeftInferred.generateEqualitiesPartitionedBy(leftScope).getScopeEqualities()); @@ -1043,13 +1037,13 @@ private InnerJoinPushDownResult processInnerJoin( }); // See if we can push the right effective predicate to the left side - EqualityInference.nonInferrableConjuncts(metadata, simplifiedRightEffectivePredicate) + EqualityInference.nonInferrableConjuncts(metadata, rightEffectivePredicate) .map(conjunct -> allInference.rewrite(conjunct, leftScope)) .filter(Objects::nonNull) .forEach(leftPushDownConjuncts::add); // See if we can push the left effective predicate to the right side - EqualityInference.nonInferrableConjuncts(metadata, simplifiedLeftEffectivePredicate) + EqualityInference.nonInferrableConjuncts(metadata, leftEffectivePredicate) .map(conjunct -> allInference.rewrite(conjunct, rightScope)) .filter(Objects::nonNull) .forEach(rightPushDownConjuncts::add); diff --git a/core/trino-main/src/main/java/io/trino/sql/planner/rowpattern/LogicalIndexPointer.java b/core/trino-main/src/main/java/io/trino/sql/planner/rowpattern/LogicalIndexPointer.java index 57335836a4475..1edec2145f7dc 100644 --- a/core/trino-main/src/main/java/io/trino/sql/planner/rowpattern/LogicalIndexPointer.java +++ b/core/trino-main/src/main/java/io/trino/sql/planner/rowpattern/LogicalIndexPointer.java @@ -53,7 +53,7 @@ public LogicalIndexPointer(Set labels, boolean last, boolean running, i this.labels = requireNonNull(labels, "labels is null"); this.last = last; this.running = running; - checkArgument(logicalOffset >= 0, "logical offset must be >= 0, actual: " + logicalOffset); + checkArgument(logicalOffset >= 0, "logical offset must be >= 0, actual: %s", logicalOffset); this.logicalOffset = logicalOffset; this.physicalOffset = physicalOffset; } diff --git a/core/trino-main/src/main/java/io/trino/sql/relational/ConstantExpression.java b/core/trino-main/src/main/java/io/trino/sql/relational/ConstantExpression.java index 9cbf9fd8c8243..fa7f804120647 100644 --- a/core/trino-main/src/main/java/io/trino/sql/relational/ConstantExpression.java +++ b/core/trino-main/src/main/java/io/trino/sql/relational/ConstantExpression.java @@ -15,6 +15,7 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.primitives.Primitives; import com.google.errorprone.annotations.DoNotCall; import io.airlift.slice.Slice; import io.trino.spi.block.Block; @@ -48,6 +49,13 @@ public static ConstantExpression fromJson( public ConstantExpression(Object value, Type type) { requireNonNull(type, "type is null"); + if (value != null && !Primitives.wrap(type.getJavaType()).isInstance(value)) { + throw new IllegalArgumentException("Invalid value %s of Java type %s for Trino type %s, expected instance of %s".formatted( + value, + value.getClass(), + type, + type.getJavaType())); + } this.value = value; this.type = type; diff --git a/core/trino-main/src/main/java/io/trino/sql/relational/SqlToRowExpressionTranslator.java b/core/trino-main/src/main/java/io/trino/sql/relational/SqlToRowExpressionTranslator.java index 8fd3e2efd5b81..dfedb6720c67b 100644 --- a/core/trino-main/src/main/java/io/trino/sql/relational/SqlToRowExpressionTranslator.java +++ b/core/trino-main/src/main/java/io/trino/sql/relational/SqlToRowExpressionTranslator.java @@ -702,7 +702,7 @@ protected RowExpression visitSubscriptExpression(SubscriptExpression node, Void if (getType(node.getBase()) instanceof RowType) { long value = (Long) ((ConstantExpression) index).getValue(); - return new SpecialForm(DEREFERENCE, getType(node), base, constant((int) value - 1, INTEGER)); + return new SpecialForm(DEREFERENCE, getType(node), base, constant(value - 1, INTEGER)); } return call( diff --git a/core/trino-main/src/main/java/io/trino/sql/rewrite/DescribeInputRewrite.java b/core/trino-main/src/main/java/io/trino/sql/rewrite/DescribeInputRewrite.java index 9f917d2e6d445..fe524be5f298f 100644 --- a/core/trino-main/src/main/java/io/trino/sql/rewrite/DescribeInputRewrite.java +++ b/core/trino-main/src/main/java/io/trino/sql/rewrite/DescribeInputRewrite.java @@ -88,7 +88,7 @@ private static final class Visitor extends AstVisitor { private static final Query EMPTY_INPUT = createDesctibeInputQuery( - new Row[]{row( + new Row[] {row( new Cast(new NullLiteral(), toSqlType(BIGINT)), new Cast(new NullLiteral(), toSqlType(VARCHAR)))}, Optional.of(new Limit(new LongLiteral("0")))); diff --git a/core/trino-main/src/main/java/io/trino/sql/rewrite/DescribeOutputRewrite.java b/core/trino-main/src/main/java/io/trino/sql/rewrite/DescribeOutputRewrite.java index b8a78d502e299..9acab16abe275 100644 --- a/core/trino-main/src/main/java/io/trino/sql/rewrite/DescribeOutputRewrite.java +++ b/core/trino-main/src/main/java/io/trino/sql/rewrite/DescribeOutputRewrite.java @@ -88,7 +88,7 @@ private static final class Visitor extends AstVisitor { private static final Query EMPTY_OUTPUT = createDesctibeOutputQuery( - new Row[]{row( + new Row[] {row( new Cast(new NullLiteral(), toSqlType(VARCHAR)), new Cast(new NullLiteral(), toSqlType(VARCHAR)), new Cast(new NullLiteral(), toSqlType(VARCHAR)), diff --git a/core/trino-main/src/main/java/io/trino/sql/rewrite/ShowQueriesRewrite.java b/core/trino-main/src/main/java/io/trino/sql/rewrite/ShowQueriesRewrite.java index 54c5ddec92f2c..055bb28eb64bd 100644 --- a/core/trino-main/src/main/java/io/trino/sql/rewrite/ShowQueriesRewrite.java +++ b/core/trino-main/src/main/java/io/trino/sql/rewrite/ShowQueriesRewrite.java @@ -606,7 +606,7 @@ protected Node visitShowCreate(ShowCreate node, Void context) accessControl.checkCanShowCreateTable(session.toSecurityContext(), new QualifiedObjectName(catalogName.getValue(), schemaName.getValue(), tableName.getValue())); - Map properties = viewDefinition.get().getProperties(); + Map properties = metadata.getMaterializedViewProperties(session, objectName, viewDefinition.get()); CatalogHandle catalogHandle = getRequiredCatalogHandle(metadata, session, node, catalogName.getValue()); Collection> allMaterializedViewProperties = materializedViewPropertyManager.getAllProperties(catalogHandle); List propertyNodes = buildProperties(objectName, Optional.empty(), INVALID_MATERIALIZED_VIEW_PROPERTY, properties, allMaterializedViewProperties); diff --git a/core/trino-main/src/main/java/io/trino/testing/LocalQueryRunner.java b/core/trino-main/src/main/java/io/trino/testing/LocalQueryRunner.java index cf607bef71191..b2119dc59ab25 100644 --- a/core/trino-main/src/main/java/io/trino/testing/LocalQueryRunner.java +++ b/core/trino-main/src/main/java/io/trino/testing/LocalQueryRunner.java @@ -77,7 +77,6 @@ import io.trino.execution.scheduler.NodeSchedulerConfig; import io.trino.execution.scheduler.UniformNodeSelectorFactory; import io.trino.execution.warnings.WarningCollector; -import io.trino.index.IndexManager; import io.trino.memory.MemoryManagerConfig; import io.trino.memory.NodeMemoryConfig; import io.trino.metadata.AnalyzePropertyManager; @@ -121,10 +120,11 @@ import io.trino.operator.PagesIndexPageSorter; import io.trino.operator.TaskContext; import io.trino.operator.index.IndexJoinLookupStats; +import io.trino.operator.index.IndexManager; import io.trino.operator.scalar.json.JsonExistsFunction; import io.trino.operator.scalar.json.JsonQueryFunction; import io.trino.operator.scalar.json.JsonValueFunction; -import io.trino.operator.table.ExcludeColumns.ExcludeColumnsFunction; +import io.trino.operator.table.ExcludeColumnsFunction; import io.trino.plugin.base.security.AllowAllSystemAccessControl; import io.trino.security.GroupProviderManager; import io.trino.server.PluginManager; @@ -139,6 +139,7 @@ import io.trino.spi.PageSorter; import io.trino.spi.Plugin; import io.trino.spi.connector.CatalogHandle; +import io.trino.spi.connector.Connector; import io.trino.spi.connector.ConnectorFactory; import io.trino.spi.exchange.ExchangeManager; import io.trino.spi.session.PropertyMetadata; @@ -252,8 +253,8 @@ import static io.trino.sql.planner.LogicalPlanner.Stage.OPTIMIZED_AND_VALIDATED; import static io.trino.sql.planner.optimizations.PlanNodeSearcher.searchFrom; import static io.trino.sql.testing.TreeAssertions.assertFormattedSql; -import static io.trino.transaction.TransactionBuilder.transaction; -import static io.trino.version.EmbedVersion.testingVersionEmbedder; +import static io.trino.testing.TransactionBuilder.transaction; +import static io.trino.util.EmbedVersion.testingVersionEmbedder; import static java.util.Objects.requireNonNull; import static java.util.concurrent.Executors.newCachedThreadPool; import static java.util.concurrent.Executors.newScheduledThreadPool; @@ -377,7 +378,10 @@ private LocalQueryRunner( TypeManager typeManager = new InternalTypeManager(typeRegistry); InternalBlockEncodingSerde blockEncodingSerde = new InternalBlockEncodingSerde(blockEncodingManager, typeManager); - this.globalFunctionCatalog = new GlobalFunctionCatalog(); + this.globalFunctionCatalog = new GlobalFunctionCatalog( + this::getMetadata, + this::getTypeManager, + this::getFunctionManager); globalFunctionCatalog.addFunctions(new InternalFunctionBundle(new LiteralFunction(blockEncodingSerde))); globalFunctionCatalog.addFunctions(SystemFunctionBundle.create(featuresConfig, typeOperators, blockTypeOperators, nodeManager.getCurrentNode().getNodeVersion())); this.groupProvider = new TestingGroupProviderManager(); @@ -786,6 +790,13 @@ public CatalogManager getCatalogManager() return catalogManager; } + public Connector getConnector(String catalogName) + { + return catalogManager + .getConnectorServices(getCatalogHandle(catalogName)) + .getConnector(); + } + public LocalQueryRunner printPlan() { printPlan = true; @@ -1193,11 +1204,10 @@ public static class Builder private final Session defaultSession; private FeaturesConfig featuresConfig = new FeaturesConfig(); private NodeSpillConfig nodeSpillConfig = new NodeSpillConfig(); - private boolean initialTransaction; private boolean alwaysRevokeMemory; private Map>> defaultSessionProperties = ImmutableMap.of(); private Set extraSessionProperties = ImmutableSet.of(); - private int nodeCountForStats; + private int nodeCountForStats = 1; private Function metadataDecorator = Function.identity(); private Builder(Session defaultSession) diff --git a/core/trino-main/src/main/java/io/trino/testing/TestingConnectorContext.java b/core/trino-main/src/main/java/io/trino/testing/TestingConnectorContext.java index d7ff18054488c..a74f99807a950 100644 --- a/core/trino-main/src/main/java/io/trino/testing/TestingConnectorContext.java +++ b/core/trino-main/src/main/java/io/trino/testing/TestingConnectorContext.java @@ -31,7 +31,7 @@ import io.trino.spi.type.TypeManager; import io.trino.spi.type.TypeOperators; import io.trino.sql.gen.JoinCompiler; -import io.trino.version.EmbedVersion; +import io.trino.util.EmbedVersion; import static io.trino.spi.connector.MetadataProvider.NOOP_METADATA_PROVIDER; import static io.trino.testing.TestingHandles.TEST_CATALOG_HANDLE; diff --git a/core/trino-main/src/main/java/io/trino/testing/TestingMetadata.java b/core/trino-main/src/main/java/io/trino/testing/TestingMetadata.java index 27f174bc7da52..7cbd8aabfc387 100644 --- a/core/trino-main/src/main/java/io/trino/testing/TestingMetadata.java +++ b/core/trino-main/src/main/java/io/trino/testing/TestingMetadata.java @@ -234,7 +234,13 @@ public Optional getView(ConnectorSession session, Schem } @Override - public void createMaterializedView(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition, boolean replace, boolean ignoreExisting) + public void createMaterializedView( + ConnectorSession session, + SchemaTableName viewName, + ConnectorMaterializedViewDefinition definition, + Map properties, + boolean replace, + boolean ignoreExisting) { if (replace) { materializedViews.put(viewName, definition); @@ -264,6 +270,12 @@ public Optional getMaterializedView(Connect return Optional.ofNullable(materializedViews.get(viewName)); } + @Override + public Map getMaterializedViewProperties(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition materializedViewDefinition) + { + return ImmutableMap.of(); + } + @Override public void dropMaterializedView(ConnectorSession session, SchemaTableName viewName) { diff --git a/core/trino-main/src/main/java/io/trino/testing/TestingTaskContext.java b/core/trino-main/src/main/java/io/trino/testing/TestingTaskContext.java index 04d4fb8d00909..f5cb6160b2715 100644 --- a/core/trino-main/src/main/java/io/trino/testing/TestingTaskContext.java +++ b/core/trino-main/src/main/java/io/trino/testing/TestingTaskContext.java @@ -39,21 +39,21 @@ public final class TestingTaskContext private TestingTaskContext() {} - public static TaskContext createTaskContext(Executor notificationExecutor, ScheduledExecutorService yieldExecutor, Session session) + public static TaskContext createTaskContext(Executor notificationExecutor, ScheduledExecutorService scheduledExecutor, Session session) { - return builder(notificationExecutor, yieldExecutor, session).build(); + return builder(notificationExecutor, scheduledExecutor, session).build(); } - public static TaskContext createTaskContext(Executor notificationExecutor, ScheduledExecutorService yieldExecutor, Session session, DataSize maxMemory) + public static TaskContext createTaskContext(Executor notificationExecutor, ScheduledExecutorService scheduledExecutor, Session session, DataSize maxMemory) { - return builder(notificationExecutor, yieldExecutor, session) + return builder(notificationExecutor, scheduledExecutor, session) .setQueryMaxMemory(maxMemory) .build(); } - public static TaskContext createTaskContext(Executor notificationExecutor, ScheduledExecutorService yieldExecutor, Session session, TaskStateMachine taskStateMachine) + public static TaskContext createTaskContext(Executor notificationExecutor, ScheduledExecutorService scheduledExecutor, Session session, TaskStateMachine taskStateMachine) { - return builder(notificationExecutor, yieldExecutor, session) + return builder(notificationExecutor, scheduledExecutor, session) .setTaskStateMachine(taskStateMachine) .build(); } @@ -73,15 +73,15 @@ private static TaskContext createTaskContext(QueryContext queryContext, Session true); } - public static Builder builder(Executor notificationExecutor, ScheduledExecutorService yieldExecutor, Session session) + public static Builder builder(Executor notificationExecutor, ScheduledExecutorService scheduledExecutor, Session session) { - return new Builder(notificationExecutor, yieldExecutor, session); + return new Builder(notificationExecutor, scheduledExecutor, session); } public static class Builder { private final Executor notificationExecutor; - private final ScheduledExecutorService yieldExecutor; + private final ScheduledExecutorService scheduledExecutor; private final Session session; private QueryId queryId = new QueryId("test_query"); private TaskStateMachine taskStateMachine; @@ -90,10 +90,10 @@ public static class Builder private DataSize maxSpillSize = DataSize.of(1, GIGABYTE); private DataSize queryMaxSpillSize = DataSize.of(1, GIGABYTE); - private Builder(Executor notificationExecutor, ScheduledExecutorService yieldExecutor, Session session) + private Builder(Executor notificationExecutor, ScheduledExecutorService scheduledExecutor, Session session) { this.notificationExecutor = notificationExecutor; - this.yieldExecutor = yieldExecutor; + this.scheduledExecutor = scheduledExecutor; this.session = session; } @@ -148,7 +148,8 @@ public TaskContext build() 0L, GC_MONITOR, notificationExecutor, - yieldExecutor, + scheduledExecutor, + scheduledExecutor, queryMaxSpillSize, spillSpaceTracker); diff --git a/core/trino-main/src/main/java/io/trino/transaction/TransactionBuilder.java b/core/trino-main/src/main/java/io/trino/testing/TransactionBuilder.java similarity index 97% rename from core/trino-main/src/main/java/io/trino/transaction/TransactionBuilder.java rename to core/trino-main/src/main/java/io/trino/testing/TransactionBuilder.java index d2a1ec3e50fd1..d2345917c421b 100644 --- a/core/trino-main/src/main/java/io/trino/transaction/TransactionBuilder.java +++ b/core/trino-main/src/main/java/io/trino/testing/TransactionBuilder.java @@ -11,13 +11,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.transaction; +package io.trino.testing; import io.trino.Session; import io.trino.execution.QueryIdGenerator; import io.trino.metadata.Metadata; import io.trino.security.AccessControl; import io.trino.spi.transaction.IsolationLevel; +import io.trino.transaction.TransactionId; +import io.trino.transaction.TransactionInfo; +import io.trino.transaction.TransactionManager; import java.util.function.Consumer; import java.util.function.Function; diff --git a/core/trino-main/src/main/java/io/trino/tracing/TracingConnectorMetadata.java b/core/trino-main/src/main/java/io/trino/tracing/TracingConnectorMetadata.java index 89de7ad8879e4..774bc5b4a03a8 100644 --- a/core/trino-main/src/main/java/io/trino/tracing/TracingConnectorMetadata.java +++ b/core/trino-main/src/main/java/io/trino/tracing/TracingConnectorMetadata.java @@ -1245,11 +1245,17 @@ public void validateScan(ConnectorSession session, ConnectorTableHandle handle) } @Override - public void createMaterializedView(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition, boolean replace, boolean ignoreExisting) + public void createMaterializedView( + ConnectorSession session, + SchemaTableName viewName, + ConnectorMaterializedViewDefinition definition, + Map properties, + boolean replace, + boolean ignoreExisting) { Span span = startSpan("createMaterializedView", viewName); try (var ignored = scopedSpan(span)) { - delegate.createMaterializedView(session, viewName, definition, replace, ignoreExisting); + delegate.createMaterializedView(session, viewName, definition, properties, replace, ignoreExisting); } } @@ -1289,6 +1295,15 @@ public Optional getMaterializedView(Connect } } + @Override + public Map getMaterializedViewProperties(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition materializedViewDefinition) + { + Span span = startSpan("getMaterializedViewProperties", viewName); + try (var ignored = scopedSpan(span)) { + return delegate.getMaterializedViewProperties(session, viewName, materializedViewDefinition); + } + } + @Override public MaterializedViewFreshness getMaterializedViewFreshness(ConnectorSession session, SchemaTableName name) { diff --git a/core/trino-main/src/main/java/io/trino/tracing/TracingMetadata.java b/core/trino-main/src/main/java/io/trino/tracing/TracingMetadata.java index 09e35c8ef453d..d6e0dfedf6a58 100644 --- a/core/trino-main/src/main/java/io/trino/tracing/TracingMetadata.java +++ b/core/trino-main/src/main/java/io/trino/tracing/TracingMetadata.java @@ -1302,11 +1302,17 @@ public void dropLanguageFunction(Session session, QualifiedObjectName name, Stri } @Override - public void createMaterializedView(Session session, QualifiedObjectName viewName, MaterializedViewDefinition definition, boolean replace, boolean ignoreExisting) + public void createMaterializedView( + Session session, + QualifiedObjectName viewName, + MaterializedViewDefinition definition, + Map properties, + boolean replace, + boolean ignoreExisting) { Span span = startSpan("createMaterializedView", viewName); try (var ignored = scopedSpan(span)) { - delegate.createMaterializedView(session, viewName, definition, replace, ignoreExisting); + delegate.createMaterializedView(session, viewName, definition, properties, replace, ignoreExisting); } } @@ -1355,6 +1361,15 @@ public Optional getMaterializedView(Session session, } } + @Override + public Map getMaterializedViewProperties(Session session, QualifiedObjectName objectName, MaterializedViewDefinition materializedViewDefinition) + { + Span span = startSpan("getMaterializedViewProperties", objectName); + try (var ignored = scopedSpan(span)) { + return delegate.getMaterializedViewProperties(session, objectName, materializedViewDefinition); + } + } + @Override public MaterializedViewFreshness getMaterializedViewFreshness(Session session, QualifiedObjectName name) { diff --git a/core/trino-main/src/main/java/io/trino/dispatcher/DecoratingListeningExecutorService.java b/core/trino-main/src/main/java/io/trino/util/DecoratingListeningExecutorService.java similarity index 79% rename from core/trino-main/src/main/java/io/trino/dispatcher/DecoratingListeningExecutorService.java rename to core/trino-main/src/main/java/io/trino/util/DecoratingListeningExecutorService.java index e9ef1b4fe9f2c..405037f6a14fe 100644 --- a/core/trino-main/src/main/java/io/trino/dispatcher/DecoratingListeningExecutorService.java +++ b/core/trino-main/src/main/java/io/trino/util/DecoratingListeningExecutorService.java @@ -11,50 +11,28 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.dispatcher; +package io.trino.util; import com.google.common.util.concurrent.ForwardingListeningExecutorService; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningExecutorService; -import jakarta.annotation.Nullable; -import java.lang.invoke.MethodHandle; -import java.lang.reflect.Method; import java.time.Duration; import java.util.Collection; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; -import static com.google.common.base.Throwables.throwIfUnchecked; import static com.google.common.collect.ImmutableList.toImmutableList; -import static io.trino.util.Reflection.methodHandle; import static java.util.Objects.requireNonNull; public class DecoratingListeningExecutorService extends ForwardingListeningExecutorService implements ListeningExecutorService { - // TODO remove after requiring Java 19+ for runtime. - private static final @Nullable MethodHandle CLOSE_METHOD; - - static { - Method closeMethod; - try { - closeMethod = ExecutorService.class.getMethod("close"); - } - catch (NoSuchMethodException e) { - closeMethod = null; - } - CLOSE_METHOD = closeMethod != null - ? methodHandle(closeMethod) - : null; - } - private final ListeningExecutorService delegate; private final TaskDecorator decorator; @@ -194,21 +172,10 @@ public boolean awaitTermination(Duration duration) return super.awaitTermination(duration); } - // TODO This is temporary, until Guava's ForwardingExecutorService has the method in their interface. See https://github.com/google/guava/issues/6296 - //@Override + @Override public void close() { - if (CLOSE_METHOD == null) { - throw new UnsupportedOperationException("ExecutorService.close has close() method since Java 19. " + - "The DecoratingListeningExecutorService supports the method only when run with Java 19 runtime."); - } - try { - CLOSE_METHOD.invoke(delegate()); - } - catch (Throwable e) { - throwIfUnchecked(e); - throw new RuntimeException(e); - } + delegate.close(); } public interface TaskDecorator diff --git a/core/trino-main/src/main/java/io/trino/version/EmbedVersion.java b/core/trino-main/src/main/java/io/trino/util/EmbedVersion.java similarity index 99% rename from core/trino-main/src/main/java/io/trino/version/EmbedVersion.java rename to core/trino-main/src/main/java/io/trino/util/EmbedVersion.java index 07402220f6b37..774a8c672e523 100644 --- a/core/trino-main/src/main/java/io/trino/version/EmbedVersion.java +++ b/core/trino-main/src/main/java/io/trino/util/EmbedVersion.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.version; +package io.trino.util; import com.google.common.collect.ImmutableMap; import com.google.inject.Inject; diff --git a/core/trino-main/src/main/java/io/trino/util/Executors.java b/core/trino-main/src/main/java/io/trino/util/Executors.java index c98e087406a16..1062de31a53df 100644 --- a/core/trino-main/src/main/java/io/trino/util/Executors.java +++ b/core/trino-main/src/main/java/io/trino/util/Executors.java @@ -13,6 +13,9 @@ */ package io.trino.util; +import com.google.common.util.concurrent.ListeningExecutorService; +import io.trino.spi.VersionEmbedder; + import java.util.ArrayList; import java.util.Collection; import java.util.List; @@ -20,8 +23,10 @@ import java.util.concurrent.CompletionService; import java.util.concurrent.Executor; import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; +import static com.google.common.util.concurrent.MoreExecutors.listeningDecorator; import static io.airlift.concurrent.MoreFutures.getDone; public final class Executors @@ -66,4 +71,29 @@ private static Future take(CompletionService completionService) throw new RuntimeException("Interrupted", e); } } + + public static ListeningExecutorService decorateWithVersion(ExecutorService executorService, VersionEmbedder versionEmbedder) + { + return decorateWithVersion(listeningDecorator(executorService), versionEmbedder); + } + + public static ListeningExecutorService decorateWithVersion(ListeningExecutorService executorService, VersionEmbedder versionEmbedder) + { + return new DecoratingListeningExecutorService( + executorService, + new DecoratingListeningExecutorService.TaskDecorator() + { + @Override + public Runnable decorate(Runnable command) + { + return versionEmbedder.embedVersion(command); + } + + @Override + public Callable decorate(Callable task) + { + return versionEmbedder.embedVersion(task); + } + }); + } } diff --git a/core/trino-main/src/main/java/io/trino/util/StatementUtils.java b/core/trino-main/src/main/java/io/trino/util/StatementUtils.java index cf68c4ab700b8..d89628f67e4e9 100644 --- a/core/trino-main/src/main/java/io/trino/util/StatementUtils.java +++ b/core/trino-main/src/main/java/io/trino/util/StatementUtils.java @@ -317,10 +317,10 @@ private StatementTypeInfo(Class statementType, this.queryType = requireNonNull(queryType, "queryType is null"); this.taskType = requireNonNull(taskType, "taskType is null"); if (queryType == DATA_DEFINITION) { - checkArgument(taskType.isPresent(), "taskType is required for " + DATA_DEFINITION); + checkArgument(taskType.isPresent(), "taskType is required for %s", DATA_DEFINITION); } else { - checkArgument(taskType.isEmpty(), "taskType is not allowed for " + queryType); + checkArgument(taskType.isEmpty(), "taskType is not allowed for %s", queryType); } } diff --git a/core/trino-main/src/test/java/io/trino/block/BlockAssertions.java b/core/trino-main/src/test/java/io/trino/block/BlockAssertions.java index b6cffe2973545..038e82963e82c 100644 --- a/core/trino-main/src/test/java/io/trino/block/BlockAssertions.java +++ b/core/trino-main/src/test/java/io/trino/block/BlockAssertions.java @@ -139,7 +139,7 @@ public static Block createRandomDictionaryBlock(Block dictionary, int positionCo public static RunLengthEncodedBlock createRandomRleBlock(Block block, int positionCount) { - checkArgument(block.getPositionCount() >= 2, format("block positions %d is less 2", block.getPositionCount())); + checkArgument(block.getPositionCount() >= 2, "block positions %s is less than 2", block.getPositionCount()); return (RunLengthEncodedBlock) RunLengthEncodedBlock.create(block.getSingleValueBlock(random().nextInt(block.getPositionCount())), positionCount); } diff --git a/core/trino-main/src/test/java/io/trino/block/TestRowBlock.java b/core/trino-main/src/test/java/io/trino/block/TestRowBlock.java index 399d57adc4857..a980da71f20cf 100644 --- a/core/trino-main/src/test/java/io/trino/block/TestRowBlock.java +++ b/core/trino-main/src/test/java/io/trino/block/TestRowBlock.java @@ -69,9 +69,11 @@ public void testFromFieldBlocksNoNullsDetection() { // Blocks does not discard the null mask during creation if no values are null boolean[] rowIsNull = new boolean[5]; - assertThat(fromNotNullSuppressedFieldBlocks(5, Optional.of(rowIsNull), new Block[] {new ByteArrayBlock(5, Optional.empty(), createExpectedValue(5).getBytes())}).mayHaveNull()).isTrue(); + assertThat(fromNotNullSuppressedFieldBlocks(5, Optional.of(rowIsNull), new Block[] { + new ByteArrayBlock(5, Optional.empty(), createExpectedValue(5).getBytes())}).mayHaveNull()).isTrue(); rowIsNull[rowIsNull.length - 1] = true; - assertThat(fromNotNullSuppressedFieldBlocks(5, Optional.of(rowIsNull), new Block[] {new ByteArrayBlock(5, Optional.of(rowIsNull), createExpectedValue(5).getBytes())}).mayHaveNull()).isTrue(); + assertThat(fromNotNullSuppressedFieldBlocks(5, Optional.of(rowIsNull), new Block[] { + new ByteArrayBlock(5, Optional.of(rowIsNull), createExpectedValue(5).getBytes())}).mayHaveNull()).isTrue(); // Empty blocks have no nulls and can also discard their null mask assertThat(fromNotNullSuppressedFieldBlocks(0, Optional.of(new boolean[0]), new Block[] {new ByteArrayBlock(0, Optional.empty(), new byte[0])}).mayHaveNull()).isFalse(); @@ -101,7 +103,7 @@ public void testCompactBlock() // NOTE: nested row blocks are required to have the exact same size so they are always compact assertCompact(fromFieldBlocks(0, new Block[] {emptyBlock, emptyBlock})); - assertCompact(fromNotNullSuppressedFieldBlocks(rowIsNull.length, Optional.of(rowIsNull), new Block[]{ + assertCompact(fromNotNullSuppressedFieldBlocks(rowIsNull.length, Optional.of(rowIsNull), new Block[] { new ByteArrayBlock(6, Optional.of(rowIsNull), createExpectedValue(6).getBytes()), new ByteArrayBlock(6, Optional.of(rowIsNull), createExpectedValue(6).getBytes())})); } diff --git a/core/trino-main/src/test/java/io/trino/connector/MockConnector.java b/core/trino-main/src/test/java/io/trino/connector/MockConnector.java index 82bf03c0a12ac..3a3cb7a85ff14 100644 --- a/core/trino-main/src/test/java/io/trino/connector/MockConnector.java +++ b/core/trino-main/src/test/java/io/trino/connector/MockConnector.java @@ -665,7 +665,13 @@ public void setViewAuthorization(ConnectorSession session, SchemaTableName viewN public void dropView(ConnectorSession session, SchemaTableName viewName) {} @Override - public void createMaterializedView(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition, boolean replace, boolean ignoreExisting) {} + public void createMaterializedView( + ConnectorSession session, + SchemaTableName viewName, + ConnectorMaterializedViewDefinition definition, + Map properties, + boolean replace, + boolean ignoreExisting) {} @Override public List listMaterializedViews(ConnectorSession session, Optional schemaName) @@ -680,6 +686,12 @@ public Optional getMaterializedView(Connect return Optional.ofNullable(getMaterializedViews.apply(session, viewName.toSchemaTablePrefix()).get(viewName)); } + @Override + public Map getMaterializedViewProperties(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition materializedViewDefinition) + { + return ImmutableMap.of(); + } + @Override public MaterializedViewFreshness getMaterializedViewFreshness(ConnectorSession session, SchemaTableName viewName) { diff --git a/core/trino-main/src/test/java/io/trino/cost/TestCostCalculator.java b/core/trino-main/src/test/java/io/trino/cost/TestCostCalculator.java index 1a2f485762238..afb47bb2c7bb8 100644 --- a/core/trino-main/src/test/java/io/trino/cost/TestCostCalculator.java +++ b/core/trino-main/src/test/java/io/trino/cost/TestCostCalculator.java @@ -76,7 +76,7 @@ import static io.trino.sql.planner.plan.ExchangeNode.replicatedExchange; import static io.trino.testing.TestingHandles.TEST_CATALOG_NAME; import static io.trino.testing.TestingSession.testSessionBuilder; -import static io.trino.transaction.TransactionBuilder.transaction; +import static io.trino.testing.TransactionBuilder.transaction; import static java.lang.String.format; import static java.util.Objects.requireNonNull; import static org.assertj.core.api.Assertions.assertThat; diff --git a/core/trino-main/src/test/java/io/trino/cost/TestFilterStatsCalculator.java b/core/trino-main/src/test/java/io/trino/cost/TestFilterStatsCalculator.java index 2f52d31636708..40c953ebe282d 100644 --- a/core/trino-main/src/test/java/io/trino/cost/TestFilterStatsCalculator.java +++ b/core/trino-main/src/test/java/io/trino/cost/TestFilterStatsCalculator.java @@ -38,7 +38,7 @@ import static io.trino.sql.planner.TypeAnalyzer.createTestingTypeAnalyzer; import static io.trino.sql.planner.iterative.rule.test.PlanBuilder.expression; import static io.trino.testing.TestingSession.testSessionBuilder; -import static io.trino.transaction.TransactionBuilder.transaction; +import static io.trino.testing.TransactionBuilder.transaction; import static java.lang.Double.NEGATIVE_INFINITY; import static java.lang.Double.NaN; import static java.lang.Double.POSITIVE_INFINITY; diff --git a/core/trino-main/src/test/java/io/trino/cost/TestScalarStatsCalculator.java b/core/trino-main/src/test/java/io/trino/cost/TestScalarStatsCalculator.java index c2dac31de5ec4..4a12f759ac59d 100644 --- a/core/trino-main/src/test/java/io/trino/cost/TestScalarStatsCalculator.java +++ b/core/trino-main/src/test/java/io/trino/cost/TestScalarStatsCalculator.java @@ -44,7 +44,7 @@ import static io.trino.sql.analyzer.TypeSignatureTranslator.toSqlType; import static io.trino.sql.planner.TypeAnalyzer.createTestingTypeAnalyzer; import static io.trino.testing.TestingSession.testSessionBuilder; -import static io.trino.transaction.TransactionBuilder.transaction; +import static io.trino.testing.TransactionBuilder.transaction; import static java.lang.Double.NEGATIVE_INFINITY; import static java.lang.Double.POSITIVE_INFINITY; diff --git a/core/trino-main/src/test/java/io/trino/dispatcher/TestLocalDispatchQuery.java b/core/trino-main/src/test/java/io/trino/dispatcher/TestLocalDispatchQuery.java index d74a931f7b1e9..3a0b59b5b1955 100644 --- a/core/trino-main/src/test/java/io/trino/dispatcher/TestLocalDispatchQuery.java +++ b/core/trino-main/src/test/java/io/trino/dispatcher/TestLocalDispatchQuery.java @@ -133,7 +133,10 @@ public void testSubmittedForDispatchedQuery() metadata, new FunctionManager( new ConnectorCatalogServiceProvider<>("function provider", new NoConnectorServicesProvider(), ConnectorServices::getFunctionProvider), - new GlobalFunctionCatalog(), + new GlobalFunctionCatalog( + () -> { throw new UnsupportedOperationException(); }, + () -> { throw new UnsupportedOperationException(); }, + () -> { throw new UnsupportedOperationException(); }), LanguageFunctionProvider.DISABLED), new QueryMonitorConfig()); CreateTable createTable = new CreateTable(QualifiedName.of("table"), ImmutableList.of(), FAIL, ImmutableList.of(), Optional.empty()); diff --git a/core/trino-main/src/test/java/io/trino/execution/BaseDataDefinitionTaskTest.java b/core/trino-main/src/test/java/io/trino/execution/BaseDataDefinitionTaskTest.java index 8357a4d93a304..6d3dd9b72dde5 100644 --- a/core/trino-main/src/test/java/io/trino/execution/BaseDataDefinitionTaskTest.java +++ b/core/trino-main/src/test/java/io/trino/execution/BaseDataDefinitionTaskTest.java @@ -42,7 +42,6 @@ import io.trino.spi.connector.ColumnHandle; import io.trino.spi.connector.ColumnMetadata; import io.trino.spi.connector.ConnectorTableMetadata; -import io.trino.spi.connector.MaterializedViewNotFoundException; import io.trino.spi.connector.SaveMode; import io.trino.spi.connector.SchemaTableName; import io.trino.spi.connector.TestingColumnHandle; @@ -108,6 +107,7 @@ public abstract class BaseDataDefinitionTaskTest protected static final String MATERIALIZED_VIEW_PROPERTY_2_NAME = "property2"; protected static final String MATERIALIZED_VIEW_PROPERTY_2_DEFAULT_VALUE = "defaultProperty2Value"; + protected static final Map MATERIALIZED_VIEW_PROPERTIES = ImmutableMap.of(MATERIALIZED_VIEW_PROPERTY_2_NAME, MATERIALIZED_VIEW_PROPERTY_2_DEFAULT_VALUE); private LocalQueryRunner queryRunner; protected Session testSession; @@ -196,8 +196,7 @@ protected MaterializedViewDefinition someMaterializedView(String sql, List tables = new ConcurrentHashMap<>(); private final Map views = new ConcurrentHashMap<>(); private final Map materializedViews = new ConcurrentHashMap<>(); + private final Map> materializedViewProperties = new ConcurrentHashMap<>(); public MockMetadata(String catalogName) { @@ -455,10 +455,23 @@ public Optional getMaterializedView(Session session, } @Override - public void createMaterializedView(Session session, QualifiedObjectName viewName, MaterializedViewDefinition definition, boolean replace, boolean ignoreExisting) + public Map getMaterializedViewProperties(Session session, QualifiedObjectName viewName, MaterializedViewDefinition materializedViewDefinition) + { + return materializedViewProperties.get(viewName.asSchemaTableName()); + } + + @Override + public void createMaterializedView( + Session session, + QualifiedObjectName viewName, + MaterializedViewDefinition definition, + Map properties, + boolean replace, + boolean ignoreExisting) { checkArgument(ignoreExisting || !materializedViews.containsKey(viewName.asSchemaTableName())); materializedViews.put(viewName.asSchemaTableName(), definition); + materializedViewProperties.put(viewName.asSchemaTableName(), properties); } @Override @@ -467,9 +480,7 @@ public synchronized void setMaterializedViewProperties( QualifiedObjectName viewName, Map> properties) { - MaterializedViewDefinition existingDefinition = getMaterializedView(session, viewName) - .orElseThrow(() -> new MaterializedViewNotFoundException(viewName.asSchemaTableName())); - Map newProperties = new HashMap<>(existingDefinition.getProperties()); + Map newProperties = new HashMap<>(materializedViewProperties.getOrDefault(viewName.asSchemaTableName(), ImmutableMap.of())); for (Entry> entry : properties.entrySet()) { if (entry.getValue().isPresent()) { newProperties.put(entry.getKey(), entry.getValue().orElseThrow()); @@ -478,19 +489,7 @@ public synchronized void setMaterializedViewProperties( newProperties.remove(entry.getKey()); } } - materializedViews.put( - viewName.asSchemaTableName(), - new MaterializedViewDefinition( - existingDefinition.getOriginalSql(), - existingDefinition.getCatalog(), - existingDefinition.getSchema(), - existingDefinition.getColumns(), - existingDefinition.getGracePeriod(), - existingDefinition.getComment(), - existingDefinition.getRunAsIdentity().get(), - existingDefinition.getPath(), - existingDefinition.getStorageTable(), - newProperties)); + materializedViewProperties.put(viewName.asSchemaTableName(), newProperties); } @Override @@ -510,8 +509,7 @@ public void setMaterializedViewColumnComment(Session session, QualifiedObjectNam view.getComment(), view.getRunAsIdentity().get(), view.getPath(), - view.getStorageTable(), - view.getProperties())); + view.getStorageTable())); } @Override diff --git a/core/trino-main/src/test/java/io/trino/execution/BaseTestSqlTaskManager.java b/core/trino-main/src/test/java/io/trino/execution/BaseTestSqlTaskManager.java index 3323dcef7baac..cdae4c641db38 100644 --- a/core/trino-main/src/test/java/io/trino/execution/BaseTestSqlTaskManager.java +++ b/core/trino-main/src/test/java/io/trino/execution/BaseTestSqlTaskManager.java @@ -47,7 +47,7 @@ import io.trino.spi.exchange.ExchangeId; import io.trino.spiller.LocalSpillManager; import io.trino.spiller.NodeSpillConfig; -import io.trino.version.EmbedVersion; +import io.trino.util.EmbedVersion; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; diff --git a/core/trino-main/src/test/java/io/trino/execution/MockRemoteTaskFactory.java b/core/trino-main/src/test/java/io/trino/execution/MockRemoteTaskFactory.java index 681afda39efe5..b0fde885e1b10 100644 --- a/core/trino-main/src/test/java/io/trino/execution/MockRemoteTaskFactory.java +++ b/core/trino-main/src/test/java/io/trino/execution/MockRemoteTaskFactory.java @@ -212,6 +212,7 @@ public MockRemoteTask( new TestingGcMonitor(), executor, scheduledExecutor, + scheduledExecutor, DataSize.of(1, MEGABYTE), spillSpaceTracker); this.taskContext = queryContext.addTaskContext(taskStateMachine, TEST_SESSION, () -> {}, true, true); diff --git a/core/trino-main/src/test/java/io/trino/execution/TaskTestUtils.java b/core/trino-main/src/test/java/io/trino/execution/TaskTestUtils.java index 321a329998211..0bfc83a4b6c74 100644 --- a/core/trino-main/src/test/java/io/trino/execution/TaskTestUtils.java +++ b/core/trino-main/src/test/java/io/trino/execution/TaskTestUtils.java @@ -29,11 +29,11 @@ import io.trino.execution.scheduler.NodeScheduler; import io.trino.execution.scheduler.NodeSchedulerConfig; import io.trino.execution.scheduler.UniformNodeSelectorFactory; -import io.trino.index.IndexManager; import io.trino.metadata.InMemoryNodeManager; import io.trino.metadata.Split; import io.trino.operator.PagesIndex; import io.trino.operator.index.IndexJoinLookupStats; +import io.trino.operator.index.IndexManager; import io.trino.spi.connector.CatalogHandle; import io.trino.spiller.GenericSpillerFactory; import io.trino.split.PageSinkManager; diff --git a/core/trino-main/src/test/java/io/trino/execution/TestAddColumnTask.java b/core/trino-main/src/test/java/io/trino/execution/TestAddColumnTask.java index 5b497a1cc2f66..1d0e7699454d4 100644 --- a/core/trino-main/src/test/java/io/trino/execution/TestAddColumnTask.java +++ b/core/trino-main/src/test/java/io/trino/execution/TestAddColumnTask.java @@ -163,7 +163,7 @@ public void testAddColumnOnView() public void testAddColumnOnMaterializedView() { QualifiedObjectName materializedViewName = qualifiedObjectName("existing_materialized_view"); - metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(materializedViewName.toString()), someMaterializedView(), false, false); + metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(materializedViewName.toString()), someMaterializedView(), MATERIALIZED_VIEW_PROPERTIES, false, false); assertTrinoExceptionThrownBy(() -> getFutureValue(executeAddColumn(asQualifiedName(materializedViewName), QualifiedName.of("test"), INTEGER, Optional.empty(), false, false))) .hasErrorCode(TABLE_NOT_FOUND) diff --git a/core/trino-main/src/test/java/io/trino/execution/TestCommentTask.java b/core/trino-main/src/test/java/io/trino/execution/TestCommentTask.java index bca446767f66b..2068a2b916f7b 100644 --- a/core/trino-main/src/test/java/io/trino/execution/TestCommentTask.java +++ b/core/trino-main/src/test/java/io/trino/execution/TestCommentTask.java @@ -69,7 +69,7 @@ public void testCommentTableOnView() public void testCommentTableOnMaterializedView() { QualifiedObjectName materializedViewName = qualifiedObjectName("existing_materialized_view"); - metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(materializedViewName.toString()), someMaterializedView(), false, false); + metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(materializedViewName.toString()), someMaterializedView(), MATERIALIZED_VIEW_PROPERTIES, false, false); assertTrinoExceptionThrownBy(() -> getFutureValue(setComment(TABLE, asQualifiedName(materializedViewName), Optional.of("new comment")))) .hasErrorCode(TABLE_NOT_FOUND) @@ -102,7 +102,7 @@ public void testCommentViewOnTable() public void testCommentViewOnMaterializedView() { QualifiedObjectName materializedViewName = qualifiedObjectName("existing_materialized_view"); - metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(materializedViewName.toString()), someMaterializedView(), false, false); + metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(materializedViewName.toString()), someMaterializedView(), MATERIALIZED_VIEW_PROPERTIES, false, false); assertTrinoExceptionThrownBy(() -> getFutureValue(setComment(VIEW, asQualifiedName(materializedViewName), Optional.of("new comment")))) .hasErrorCode(TABLE_NOT_FOUND) @@ -145,7 +145,7 @@ public void testCommentViewColumn() public void testCommentMaterializedViewColumn() { QualifiedObjectName materializedViewName = qualifiedObjectName("existing_materialized_view"); - metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(materializedViewName.toString()), someMaterializedView(), false, false); + metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(materializedViewName.toString()), someMaterializedView(), MATERIALIZED_VIEW_PROPERTIES, false, false); assertThat(metadata.isMaterializedView(testSession, materializedViewName)).isTrue(); QualifiedName columnName = qualifiedColumnName("existing_materialized_view", "test"); diff --git a/core/trino-main/src/test/java/io/trino/execution/TestCreateMaterializedViewTask.java b/core/trino-main/src/test/java/io/trino/execution/TestCreateMaterializedViewTask.java index 7b103a902c60f..c1a04c050bb8d 100644 --- a/core/trino-main/src/test/java/io/trino/execution/TestCreateMaterializedViewTask.java +++ b/core/trino-main/src/test/java/io/trino/execution/TestCreateMaterializedViewTask.java @@ -252,7 +252,7 @@ public void testCreateMaterializedViewWithDefaultProperties() Optional definitionOptional = metadata.getMaterializedView(testSession, QualifiedObjectName.valueOf(materializedViewName.toString())); assertThat(definitionOptional).isPresent(); - Map properties = definitionOptional.get().getProperties(); + Map properties = metadata.getMaterializedViewProperties(testSession, new QualifiedObjectName(TEST_CATALOG_NAME, "schema", "mv"), definitionOptional.get()); assertThat(properties.get("foo")).isEqualTo(DEFAULT_MATERIALIZED_VIEW_FOO_PROPERTY_VALUE); assertThat(properties.get("bar")).isEqualTo(DEFAULT_MATERIALIZED_VIEW_BAR_PROPERTY_VALUE); } @@ -310,11 +310,19 @@ private class MockMetadata extends AbstractMockMetadata { private final Map materializedViews = new ConcurrentHashMap<>(); + private final Map> materializedViewProperties = new ConcurrentHashMap<>(); @Override - public void createMaterializedView(Session session, QualifiedObjectName viewName, MaterializedViewDefinition definition, boolean replace, boolean ignoreExisting) + public void createMaterializedView( + Session session, + QualifiedObjectName viewName, + MaterializedViewDefinition definition, + Map properties, + boolean replace, + boolean ignoreExisting) { materializedViews.put(viewName.asSchemaTableName(), definition); + materializedViewProperties.put(viewName.asSchemaTableName(), properties); if (!ignoreExisting) { throw new TrinoException(ALREADY_EXISTS, "Materialized view already exists"); } @@ -375,6 +383,12 @@ public Optional getMaterializedView(Session session, return Optional.ofNullable(materializedViews.get(viewName.asSchemaTableName())); } + @Override + public Map getMaterializedViewProperties(Session session, QualifiedObjectName viewName, MaterializedViewDefinition materializedViewDefinition) + { + return materializedViewProperties.get(viewName.asSchemaTableName()); + } + @Override public Optional getView(Session session, QualifiedObjectName viewName) { diff --git a/core/trino-main/src/test/java/io/trino/execution/TestCreateViewTask.java b/core/trino-main/src/test/java/io/trino/execution/TestCreateViewTask.java index 77c2c8240384a..f9fcd7a69e682 100644 --- a/core/trino-main/src/test/java/io/trino/execution/TestCreateViewTask.java +++ b/core/trino-main/src/test/java/io/trino/execution/TestCreateViewTask.java @@ -127,7 +127,7 @@ public void testReplaceViewOnTableIfExists() public void testCreateViewOnMaterializedView() { QualifiedObjectName viewName = qualifiedObjectName("existing_materialized_view"); - metadata.createMaterializedView(testSession, viewName, someMaterializedView(), false, false); + metadata.createMaterializedView(testSession, viewName, someMaterializedView(), MATERIALIZED_VIEW_PROPERTIES, false, false); assertTrinoExceptionThrownBy(() -> getFutureValue(executeCreateView(asQualifiedName(viewName), false))) .hasErrorCode(TABLE_ALREADY_EXISTS) diff --git a/core/trino-main/src/test/java/io/trino/execution/TestDropColumnTask.java b/core/trino-main/src/test/java/io/trino/execution/TestDropColumnTask.java index a443dc529e807..8b6b234a48d08 100644 --- a/core/trino-main/src/test/java/io/trino/execution/TestDropColumnTask.java +++ b/core/trino-main/src/test/java/io/trino/execution/TestDropColumnTask.java @@ -156,7 +156,7 @@ public void testDropColumnOnView() public void testDropColumnOnMaterializedView() { QualifiedObjectName materializedViewName = qualifiedObjectName("existing_materialized_view"); - metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(materializedViewName.toString()), someMaterializedView(), false, false); + metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(materializedViewName.toString()), someMaterializedView(), MATERIALIZED_VIEW_PROPERTIES, false, false); assertTrinoExceptionThrownBy(() -> getFutureValue(executeDropColumn(asQualifiedName(materializedViewName), QualifiedName.of("test"), false, false))) .hasErrorCode(TABLE_NOT_FOUND) diff --git a/core/trino-main/src/test/java/io/trino/execution/TestDropMaterializedViewTask.java b/core/trino-main/src/test/java/io/trino/execution/TestDropMaterializedViewTask.java index bfbd422399b90..2a7a2ba2fba6f 100644 --- a/core/trino-main/src/test/java/io/trino/execution/TestDropMaterializedViewTask.java +++ b/core/trino-main/src/test/java/io/trino/execution/TestDropMaterializedViewTask.java @@ -36,7 +36,7 @@ public class TestDropMaterializedViewTask public void testDropExistingMaterializedView() { QualifiedObjectName viewName = qualifiedObjectName("existing_materialized_view"); - metadata.createMaterializedView(testSession, viewName, someMaterializedView(), false, false); + metadata.createMaterializedView(testSession, viewName, someMaterializedView(), MATERIALIZED_VIEW_PROPERTIES, false, false); assertThat(metadata.isMaterializedView(testSession, viewName)).isTrue(); getFutureValue(executeDropMaterializedView(asQualifiedName(viewName), false)); diff --git a/core/trino-main/src/test/java/io/trino/execution/TestDropTableTask.java b/core/trino-main/src/test/java/io/trino/execution/TestDropTableTask.java index 79fcb4237689f..48b4ba0e9c0fe 100644 --- a/core/trino-main/src/test/java/io/trino/execution/TestDropTableTask.java +++ b/core/trino-main/src/test/java/io/trino/execution/TestDropTableTask.java @@ -107,7 +107,7 @@ public void testDropTableIfExistsOnView() public void testDropTableOnMaterializedView() { QualifiedName viewName = qualifiedName("existing_materialized_view"); - metadata.createMaterializedView(testSession, asQualifiedObjectName(viewName), someMaterializedView(), false, false); + metadata.createMaterializedView(testSession, asQualifiedObjectName(viewName), someMaterializedView(), MATERIALIZED_VIEW_PROPERTIES, false, false); assertTrinoExceptionThrownBy(() -> getFutureValue(executeDropTable(viewName, false))) .hasErrorCode(GENERIC_USER_ERROR) @@ -118,7 +118,7 @@ public void testDropTableOnMaterializedView() public void testDropTableIfExistsOnMaterializedView() { QualifiedName viewName = qualifiedName("existing_materialized_view"); - metadata.createMaterializedView(testSession, asQualifiedObjectName(viewName), someMaterializedView(), false, false); + metadata.createMaterializedView(testSession, asQualifiedObjectName(viewName), someMaterializedView(), MATERIALIZED_VIEW_PROPERTIES, false, false); assertTrinoExceptionThrownBy(() -> getFutureValue(executeDropTable(viewName, true))) .hasErrorCode(GENERIC_USER_ERROR) diff --git a/core/trino-main/src/test/java/io/trino/execution/TestDropViewTask.java b/core/trino-main/src/test/java/io/trino/execution/TestDropViewTask.java index 88af3f3802ee2..b4838b2d6c995 100644 --- a/core/trino-main/src/test/java/io/trino/execution/TestDropViewTask.java +++ b/core/trino-main/src/test/java/io/trino/execution/TestDropViewTask.java @@ -88,7 +88,7 @@ public void testDropViewOnTableIfExists() public void testDropViewOnMaterializedView() { QualifiedName viewName = qualifiedName("existing_materialized_view"); - metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(viewName.toString()), someMaterializedView(), false, false); + metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(viewName.toString()), someMaterializedView(), MATERIALIZED_VIEW_PROPERTIES, false, false); assertTrinoExceptionThrownBy(() -> getFutureValue(executeDropView(viewName, false))) .hasErrorCode(GENERIC_USER_ERROR) @@ -99,7 +99,7 @@ public void testDropViewOnMaterializedView() public void testDropViewOnMaterializedViewIfExists() { QualifiedName viewName = qualifiedName("existing_materialized_view"); - metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(viewName.toString()), someMaterializedView(), false, false); + metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(viewName.toString()), someMaterializedView(), MATERIALIZED_VIEW_PROPERTIES, false, false); assertTrinoExceptionThrownBy(() -> getFutureValue(executeDropView(viewName, true))) .hasErrorCode(GENERIC_USER_ERROR) diff --git a/core/trino-main/src/test/java/io/trino/execution/TestMemoryRevokingScheduler.java b/core/trino-main/src/test/java/io/trino/execution/TestMemoryRevokingScheduler.java index c5855316582ee..1c9d4bbf56802 100644 --- a/core/trino-main/src/test/java/io/trino/execution/TestMemoryRevokingScheduler.java +++ b/core/trino-main/src/test/java/io/trino/execution/TestMemoryRevokingScheduler.java @@ -292,6 +292,7 @@ private QueryContext getOrCreateQueryContext(QueryId queryId) new TestingGcMonitor(), executor, scheduledExecutor, + scheduledExecutor, DataSize.of(1, GIGABYTE), spillSpaceTracker)); } diff --git a/core/trino-main/src/test/java/io/trino/execution/TestRenameColumnTask.java b/core/trino-main/src/test/java/io/trino/execution/TestRenameColumnTask.java index 7441dc9969807..ec0729f8b5979 100644 --- a/core/trino-main/src/test/java/io/trino/execution/TestRenameColumnTask.java +++ b/core/trino-main/src/test/java/io/trino/execution/TestRenameColumnTask.java @@ -117,7 +117,7 @@ public void testRenameColumnOnView() public void testRenameColumnOnMaterializedView() { QualifiedObjectName materializedViewName = qualifiedObjectName("existing_materialized_view"); - metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(materializedViewName.toString()), someMaterializedView(), false, false); + metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(materializedViewName.toString()), someMaterializedView(), MATERIALIZED_VIEW_PROPERTIES, false, false); assertTrinoExceptionThrownBy(() -> getFutureValue(executeRenameColumn(asQualifiedName(materializedViewName), QualifiedName.of("a"), identifier("a_renamed"), false, false))) .hasErrorCode(TABLE_NOT_FOUND) @@ -211,7 +211,7 @@ public void testRenameFieldOnView() public void testRenameFieldOnMaterializedView() { QualifiedObjectName materializedViewName = qualifiedObjectName("existing_materialized_view"); - metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(materializedViewName.toString()), someMaterializedView(), false, false); + metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(materializedViewName.toString()), someMaterializedView(), MATERIALIZED_VIEW_PROPERTIES, false, false); assertTrinoExceptionThrownBy(() -> getFutureValue(executeRenameColumn(asQualifiedName(materializedViewName), QualifiedName.of("test"), identifier("x"), false, false))) .hasErrorCode(TABLE_NOT_FOUND) diff --git a/core/trino-main/src/test/java/io/trino/execution/TestRenameMaterializedViewTask.java b/core/trino-main/src/test/java/io/trino/execution/TestRenameMaterializedViewTask.java index e1527c7c06cf0..c898c14ee83e0 100644 --- a/core/trino-main/src/test/java/io/trino/execution/TestRenameMaterializedViewTask.java +++ b/core/trino-main/src/test/java/io/trino/execution/TestRenameMaterializedViewTask.java @@ -38,7 +38,7 @@ public void testRenameExistingMaterializedView() { QualifiedObjectName materializedViewName = qualifiedObjectName("existing_materialized_view"); QualifiedObjectName newMaterializedViewName = qualifiedObjectName("existing_materialized_view_new"); - metadata.createMaterializedView(testSession, materializedViewName, someMaterializedView(), false, false); + metadata.createMaterializedView(testSession, materializedViewName, someMaterializedView(), MATERIALIZED_VIEW_PROPERTIES, false, false); getFutureValue(executeRenameMaterializedView(asQualifiedName(materializedViewName), asQualifiedName(newMaterializedViewName))); assertThat(metadata.isMaterializedView(testSession, materializedViewName)).isFalse(); @@ -90,7 +90,7 @@ public void testRenameMaterializedViewOnTableIfExists() public void testRenameMaterializedViewTargetTableExists() { QualifiedObjectName materializedViewName = qualifiedObjectName("existing_materialized_view"); - metadata.createMaterializedView(testSession, materializedViewName, someMaterializedView(), false, false); + metadata.createMaterializedView(testSession, materializedViewName, someMaterializedView(), MATERIALIZED_VIEW_PROPERTIES, false, false); QualifiedObjectName tableName = qualifiedObjectName("existing_table"); metadata.createTable(testSession, TEST_CATALOG_NAME, someTable(tableName), FAIL); @@ -125,7 +125,7 @@ public void testRenameMaterializedViewOnViewIfExists() public void testRenameMaterializedViewTargetViewExists() { QualifiedObjectName materializedViewName = qualifiedObjectName("existing_materialized_view"); - metadata.createMaterializedView(testSession, materializedViewName, someMaterializedView(), false, false); + metadata.createMaterializedView(testSession, materializedViewName, someMaterializedView(), MATERIALIZED_VIEW_PROPERTIES, false, false); QualifiedName viewName = qualifiedName("existing_view"); metadata.createView(testSession, QualifiedObjectName.valueOf(viewName.toString()), someView(), false); diff --git a/core/trino-main/src/test/java/io/trino/execution/TestRenameTableTask.java b/core/trino-main/src/test/java/io/trino/execution/TestRenameTableTask.java index b6d06d046c66c..791b28051b841 100644 --- a/core/trino-main/src/test/java/io/trino/execution/TestRenameTableTask.java +++ b/core/trino-main/src/test/java/io/trino/execution/TestRenameTableTask.java @@ -90,7 +90,7 @@ public void testRenameTableOnViewIfExists() public void testRenameTableOnMaterializedView() { QualifiedName viewName = qualifiedName("existing_materialized_view"); - metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(viewName.toString()), someMaterializedView(), false, false); + metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(viewName.toString()), someMaterializedView(), MATERIALIZED_VIEW_PROPERTIES, false, false); assertTrinoExceptionThrownBy(() -> getFutureValue(executeRenameTable(viewName, qualifiedName("existing_materialized_view_new"), false))) .hasErrorCode(GENERIC_USER_ERROR) @@ -101,7 +101,7 @@ public void testRenameTableOnMaterializedView() public void testRenameTableOnMaterializedViewIfExists() { QualifiedName viewName = qualifiedName("existing_materialized_view"); - metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(viewName.toString()), someMaterializedView(), false, false); + metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(viewName.toString()), someMaterializedView(), MATERIALIZED_VIEW_PROPERTIES, false, false); assertTrinoExceptionThrownBy(() -> getFutureValue(executeRenameTable(viewName, qualifiedName("existing_materialized_view_new"), true))) .hasErrorCode(GENERIC_USER_ERROR) @@ -127,7 +127,7 @@ public void testRenameTableTargetMaterializedViewExists() QualifiedObjectName tableName = qualifiedObjectName("existing_table"); metadata.createTable(testSession, TEST_CATALOG_NAME, someTable(tableName), FAIL); QualifiedObjectName materializedViewName = qualifiedObjectName("existing_materialized_view"); - metadata.createMaterializedView(testSession, materializedViewName, someMaterializedView(), false, false); + metadata.createMaterializedView(testSession, materializedViewName, someMaterializedView(), MATERIALIZED_VIEW_PROPERTIES, false, false); assertTrinoExceptionThrownBy(() -> getFutureValue(executeRenameTable(asQualifiedName(tableName), asQualifiedName(materializedViewName), false))) .hasErrorCode(GENERIC_USER_ERROR) diff --git a/core/trino-main/src/test/java/io/trino/execution/TestRenameViewTask.java b/core/trino-main/src/test/java/io/trino/execution/TestRenameViewTask.java index 00c55d18b0ace..2ff29c5525788 100644 --- a/core/trino-main/src/test/java/io/trino/execution/TestRenameViewTask.java +++ b/core/trino-main/src/test/java/io/trino/execution/TestRenameViewTask.java @@ -71,7 +71,7 @@ public void testRenameViewOnTable() public void testRenameViewOnMaterializedView() { QualifiedName viewName = qualifiedName("existing_materialized_view"); - metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(viewName.toString()), someMaterializedView(), false, false); + metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(viewName.toString()), someMaterializedView(), MATERIALIZED_VIEW_PROPERTIES, false, false); assertTrinoExceptionThrownBy(() -> getFutureValue(executeRenameView(viewName, qualifiedName("existing_materialized_view_new")))) .hasErrorCode(TABLE_NOT_FOUND) @@ -97,7 +97,7 @@ public void testRenameViewTargetMaterializedViewExists() QualifiedName viewName = qualifiedName("existing_view"); metadata.createView(testSession, QualifiedObjectName.valueOf(viewName.toString()), someView(), false); QualifiedObjectName materializedViewName = qualifiedObjectName("existing_materialized_view"); - metadata.createMaterializedView(testSession, materializedViewName, someMaterializedView(), false, false); + metadata.createMaterializedView(testSession, materializedViewName, someMaterializedView(), MATERIALIZED_VIEW_PROPERTIES, false, false); assertTrinoExceptionThrownBy(() -> getFutureValue(executeRenameView(viewName, asQualifiedName(materializedViewName)))) .hasErrorCode(GENERIC_USER_ERROR) diff --git a/core/trino-main/src/test/java/io/trino/execution/TestSetColumnTypeTask.java b/core/trino-main/src/test/java/io/trino/execution/TestSetColumnTypeTask.java index 8de522663eebe..c7a0cc5f02739 100644 --- a/core/trino-main/src/test/java/io/trino/execution/TestSetColumnTypeTask.java +++ b/core/trino-main/src/test/java/io/trino/execution/TestSetColumnTypeTask.java @@ -113,7 +113,7 @@ public void testSetDataTypeOnView() public void testSetDataTypeOnMaterializedView() { QualifiedObjectName materializedViewName = qualifiedObjectName("existing_materialized_view"); - metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(materializedViewName.toString()), someMaterializedView(), false, false); + metadata.createMaterializedView(testSession, QualifiedObjectName.valueOf(materializedViewName.toString()), someMaterializedView(), MATERIALIZED_VIEW_PROPERTIES, false, false); assertTrinoExceptionThrownBy(() -> getFutureValue(executeSetColumnType(asQualifiedName(materializedViewName), QualifiedName.of("test"), toSqlType(INTEGER), false))) .hasErrorCode(TABLE_NOT_FOUND) diff --git a/core/trino-main/src/test/java/io/trino/execution/TestSetPropertiesTask.java b/core/trino-main/src/test/java/io/trino/execution/TestSetPropertiesTask.java index e351361d8d694..89de2490d2828 100644 --- a/core/trino-main/src/test/java/io/trino/execution/TestSetPropertiesTask.java +++ b/core/trino-main/src/test/java/io/trino/execution/TestSetPropertiesTask.java @@ -17,6 +17,7 @@ import com.google.common.collect.ImmutableMap; import io.trino.connector.CatalogServiceProvider; import io.trino.execution.warnings.WarningCollector; +import io.trino.metadata.MaterializedViewDefinition; import io.trino.metadata.QualifiedObjectName; import io.trino.metadata.TablePropertyManager; import io.trino.security.AllowAllAccessControl; @@ -37,7 +38,7 @@ public class TestSetPropertiesTask public void testSetMaterializedViewProperties() { QualifiedObjectName materializedViewName = qualifiedObjectName("test_materialized_view"); - metadata.createMaterializedView(testSession, materializedViewName, someMaterializedView(), false, false); + metadata.createMaterializedView(testSession, materializedViewName, someMaterializedView(), MATERIALIZED_VIEW_PROPERTIES, false, false); // set all properties to non-DEFAULT values and check the results executeSetProperties( @@ -47,7 +48,8 @@ public void testSetMaterializedViewProperties() ImmutableList.of( new Property(new Identifier(MATERIALIZED_VIEW_PROPERTY_1_NAME), new LongLiteral("111")), new Property(new Identifier(MATERIALIZED_VIEW_PROPERTY_2_NAME), new StringLiteral("abc"))))); - assertThat(metadata.getMaterializedView(testSession, materializedViewName).get().getProperties()).isEqualTo( + MaterializedViewDefinition materializedViewDefinition = metadata.getMaterializedView(testSession, materializedViewName).orElseThrow(); + assertThat(metadata.getMaterializedViewProperties(testSession, materializedViewName, materializedViewDefinition)).isEqualTo( ImmutableMap.of( MATERIALIZED_VIEW_PROPERTY_1_NAME, 111L, MATERIALIZED_VIEW_PROPERTY_2_NAME, "abc")); @@ -62,7 +64,7 @@ public void testSetMaterializedViewProperties() new Property(new Identifier(MATERIALIZED_VIEW_PROPERTY_2_NAME))))); // since the default value of property 1 is null, property 1 should not appear in the result, whereas property 2 should appear in // the result with its (non-null) default value - assertThat(metadata.getMaterializedView(testSession, materializedViewName).get().getProperties()).isEqualTo( + assertThat(metadata.getMaterializedViewProperties(testSession, materializedViewName, materializedViewDefinition)).isEqualTo( ImmutableMap.of(MATERIALIZED_VIEW_PROPERTY_2_NAME, MATERIALIZED_VIEW_PROPERTY_2_DEFAULT_VALUE)); } diff --git a/core/trino-main/src/test/java/io/trino/execution/TestSqlTask.java b/core/trino-main/src/test/java/io/trino/execution/TestSqlTask.java index d865468401e46..48e4ea9d60901 100644 --- a/core/trino-main/src/test/java/io/trino/execution/TestSqlTask.java +++ b/core/trino-main/src/test/java/io/trino/execution/TestSqlTask.java @@ -92,6 +92,7 @@ public class TestSqlTask private TaskExecutor taskExecutor; private ScheduledExecutorService taskNotificationExecutor; private ScheduledExecutorService driverYieldExecutor; + private ScheduledExecutorService driverTimeoutExecutor; private SqlTaskExecutionFactory sqlTaskExecutionFactory; private final AtomicInteger nextTaskId = new AtomicInteger(); @@ -104,7 +105,7 @@ public void setUp() taskNotificationExecutor = newScheduledThreadPool(10, threadsNamed("task-notification-%s")); driverYieldExecutor = newScheduledThreadPool(2, threadsNamed("driver-yield-%s")); - + driverTimeoutExecutor = newScheduledThreadPool(2, threadsNamed("driver-timeout-%s")); LocalExecutionPlanner planner = createTestingPlanner(); sqlTaskExecutionFactory = new SqlTaskExecutionFactory( @@ -123,6 +124,7 @@ public void destroy() taskExecutor = null; taskNotificationExecutor.shutdownNow(); driverYieldExecutor.shutdown(); + driverTimeoutExecutor.shutdown(); sqlTaskExecutionFactory = null; } @@ -435,6 +437,7 @@ private SqlTask createInitialTask() new TestingGcMonitor(), taskNotificationExecutor, driverYieldExecutor, + driverTimeoutExecutor, DataSize.of(1, MEGABYTE), new SpillSpaceTracker(DataSize.of(1, GIGABYTE))); diff --git a/core/trino-main/src/test/java/io/trino/execution/TestSqlTaskExecution.java b/core/trino-main/src/test/java/io/trino/execution/TestSqlTaskExecution.java index 282c65442b80e..aae9ad8661ac8 100644 --- a/core/trino-main/src/test/java/io/trino/execution/TestSqlTaskExecution.java +++ b/core/trino-main/src/test/java/io/trino/execution/TestSqlTaskExecution.java @@ -98,6 +98,7 @@ public void testSimple() { ScheduledExecutorService taskNotificationExecutor = newScheduledThreadPool(10, threadsNamed("task-notification-%s")); ScheduledExecutorService driverYieldExecutor = newScheduledThreadPool(2, threadsNamed("driver-yield-%s")); + ScheduledExecutorService driverTimeoutExecutor = newScheduledThreadPool(2, threadsNamed("driver-timeout-%s")); TaskExecutor taskExecutor = new TimeSharingTaskExecutor(5, 10, 3, 4, Ticker.systemTicker()); taskExecutor.start(); @@ -131,7 +132,7 @@ public void testSimple() ImmutableList.of(testingScanOperatorFactory, taskOutputOperatorFactory), OptionalInt.empty())), ImmutableList.of(TABLE_SCAN_NODE_ID)); - TaskContext taskContext = newTestingTaskContext(taskNotificationExecutor, driverYieldExecutor, taskStateMachine); + TaskContext taskContext = newTestingTaskContext(taskNotificationExecutor, driverYieldExecutor, driverTimeoutExecutor, taskStateMachine); SqlTaskExecution sqlTaskExecution = new SqlTaskExecution( taskStateMachine, taskContext, @@ -197,10 +198,11 @@ public void testSimple() taskExecutor.stop(); taskNotificationExecutor.shutdownNow(); driverYieldExecutor.shutdown(); + driverTimeoutExecutor.shutdown(); } } - private TaskContext newTestingTaskContext(ScheduledExecutorService taskNotificationExecutor, ScheduledExecutorService driverYieldExecutor, TaskStateMachine taskStateMachine) + private TaskContext newTestingTaskContext(ScheduledExecutorService taskNotificationExecutor, ScheduledExecutorService driverYieldExecutor, ScheduledExecutorService driverTimeoutExecutor, TaskStateMachine taskStateMachine) { QueryContext queryContext = new QueryContext( new QueryId("queryid"), @@ -209,6 +211,7 @@ private TaskContext newTestingTaskContext(ScheduledExecutorService taskNotificat new TestingGcMonitor(), taskNotificationExecutor, driverYieldExecutor, + driverTimeoutExecutor, DataSize.of(1, MEGABYTE), new SpillSpaceTracker(DataSize.of(1, GIGABYTE))); return queryContext.addTaskContext(taskStateMachine, TEST_SESSION, () -> {}, false, false); diff --git a/core/trino-main/src/test/java/io/trino/execution/TestSqlTaskManagerRaceWithCatalogPrune.java b/core/trino-main/src/test/java/io/trino/execution/TestSqlTaskManagerRaceWithCatalogPrune.java index 7fe6e9e4b997b..81eac9704bb3c 100644 --- a/core/trino-main/src/test/java/io/trino/execution/TestSqlTaskManagerRaceWithCatalogPrune.java +++ b/core/trino-main/src/test/java/io/trino/execution/TestSqlTaskManagerRaceWithCatalogPrune.java @@ -53,7 +53,7 @@ import io.trino.testing.TestingSession; import io.trino.transaction.NoOpTransactionManager; import io.trino.transaction.TransactionInfo; -import io.trino.version.EmbedVersion; +import io.trino.util.EmbedVersion; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; diff --git a/core/trino-main/src/test/java/io/trino/execution/TestSqlTaskManagerThreadPerDriver.java b/core/trino-main/src/test/java/io/trino/execution/TestSqlTaskManagerThreadPerDriver.java index bbec2769cdc57..08327f8c00823 100644 --- a/core/trino-main/src/test/java/io/trino/execution/TestSqlTaskManagerThreadPerDriver.java +++ b/core/trino-main/src/test/java/io/trino/execution/TestSqlTaskManagerThreadPerDriver.java @@ -19,7 +19,7 @@ import io.trino.execution.executor.dedicated.ThreadPerDriverTaskExecutor; import io.trino.execution.executor.scheduler.FairScheduler; -import static io.trino.version.EmbedVersion.testingVersionEmbedder; +import static io.trino.util.EmbedVersion.testingVersionEmbedder; public class TestSqlTaskManagerThreadPerDriver extends BaseTestSqlTaskManager diff --git a/core/trino-main/src/test/java/io/trino/execution/TestTaskExecutorStuckSplits.java b/core/trino-main/src/test/java/io/trino/execution/TestTaskExecutorStuckSplits.java index 6f305b10174b0..379e7afe6b726 100644 --- a/core/trino-main/src/test/java/io/trino/execution/TestTaskExecutorStuckSplits.java +++ b/core/trino-main/src/test/java/io/trino/execution/TestTaskExecutorStuckSplits.java @@ -36,7 +36,7 @@ import io.trino.spi.connector.CatalogHandle; import io.trino.spiller.LocalSpillManager; import io.trino.spiller.NodeSpillConfig; -import io.trino.version.EmbedVersion; +import io.trino.util.EmbedVersion; import org.junit.jupiter.api.Test; import java.util.List; diff --git a/core/trino-main/src/test/java/io/trino/execution/TestTaskManagerConfig.java b/core/trino-main/src/test/java/io/trino/execution/TestTaskManagerConfig.java index 73685aa8f09b7..965ee6e277cee 100644 --- a/core/trino-main/src/test/java/io/trino/execution/TestTaskManagerConfig.java +++ b/core/trino-main/src/test/java/io/trino/execution/TestTaskManagerConfig.java @@ -71,6 +71,7 @@ public void testDefaults() .setHttpTimeoutThreads(3) .setTaskNotificationThreads(5) .setTaskYieldThreads(3) + .setDriverTimeoutThreads(5) .setLevelTimeMultiplier(new BigDecimal("2")) .setStatisticsCpuTimerEnabled(true) .setInterruptStuckSplitTasksEnabled(true) @@ -116,6 +117,7 @@ public void testExplicitPropertyMappings() .put("task.http-timeout-threads", "10") .put("task.task-notification-threads", "13") .put("task.task-yield-threads", "8") + .put("task.driver-timeout-threads", "10") .put("task.level-time-multiplier", "2.1") .put("task.statistics-cpu-timer-enabled", "false") .put("task.interrupt-stuck-split-tasks-enabled", "false") @@ -156,6 +158,7 @@ public void testExplicitPropertyMappings() .setHttpTimeoutThreads(10) .setTaskNotificationThreads(13) .setTaskYieldThreads(8) + .setDriverTimeoutThreads(10) .setLevelTimeMultiplier(new BigDecimal("2.1")) .setStatisticsCpuTimerEnabled(false) .setInterruptStuckSplitTasksEnabled(false) diff --git a/core/trino-main/src/test/java/io/trino/execution/executor/dedicated/TestThreadPerDriverTaskExecutor.java b/core/trino-main/src/test/java/io/trino/execution/executor/dedicated/TestThreadPerDriverTaskExecutor.java index 2648bfbc581b3..ff6318e466010 100644 --- a/core/trino-main/src/test/java/io/trino/execution/executor/dedicated/TestThreadPerDriverTaskExecutor.java +++ b/core/trino-main/src/test/java/io/trino/execution/executor/dedicated/TestThreadPerDriverTaskExecutor.java @@ -39,7 +39,7 @@ import java.util.function.Function; import static io.airlift.tracing.Tracing.noopTracer; -import static io.trino.version.EmbedVersion.testingVersionEmbedder; +import static io.trino.util.EmbedVersion.testingVersionEmbedder; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static org.assertj.core.api.Assertions.assertThat; diff --git a/core/trino-main/src/test/java/io/trino/execution/scheduler/TestUniformNodeSelector.java b/core/trino-main/src/test/java/io/trino/execution/scheduler/TestUniformNodeSelector.java index 299f8b6f153a4..d3565e3322f00 100644 --- a/core/trino-main/src/test/java/io/trino/execution/scheduler/TestUniformNodeSelector.java +++ b/core/trino-main/src/test/java/io/trino/execution/scheduler/TestUniformNodeSelector.java @@ -64,6 +64,9 @@ @TestInstance(PER_METHOD) public class TestUniformNodeSelector { + private static final InternalNode node1 = new InternalNode("node1", URI.create("http://10.0.0.1:13"), NodeVersion.UNKNOWN, false); + private static final InternalNode node2 = new InternalNode("node2", URI.create("http://10.0.0.1:12"), NodeVersion.UNKNOWN, false); + private final Set splits = new LinkedHashSet<>(); private FinalizerService finalizerService; private NodeTaskMap nodeTaskMap; private InMemoryNodeManager nodeManager; @@ -82,6 +85,8 @@ public void setUp() finalizerService = new FinalizerService(); nodeTaskMap = new NodeTaskMap(finalizerService); nodeManager = new InMemoryNodeManager(); + nodeManager.addNodes(node1); + nodeManager.addNodes(node2); nodeSchedulerConfig = new NodeSchedulerConfig() .setMaxSplitsPerNode(20) @@ -116,11 +121,6 @@ public void tearDown() @Test public void testQueueSizeAdjustmentScaleDown() { - InternalNode node1 = new InternalNode("node1", URI.create("http://10.0.0.1:13"), NodeVersion.UNKNOWN, false); - nodeManager.addNodes(node1); - InternalNode node2 = new InternalNode("node2", URI.create("http://10.0.0.1:12"), NodeVersion.UNKNOWN, false); - nodeManager.addNodes(node2); - TestingTicker ticker = new TestingTicker(); UniformNodeSelector.QueueSizeAdjuster queueSizeAdjuster = new UniformNodeSelector.QueueSizeAdjuster(10, 100, ticker); @@ -137,8 +137,6 @@ public void testQueueSizeAdjustmentScaleDown() false, queueSizeAdjuster); - Set splits = new LinkedHashSet<>(); - for (int i = 0; i < 20; i++) { splits.add(new Split(TEST_CATALOG_HANDLE, TestingSplit.createRemoteSplit())); } @@ -187,13 +185,6 @@ public void testQueueSizeAdjustmentScaleDown() @Test public void testQueueSizeAdjustmentAllNodes() { - InternalNode node1 = new InternalNode("node1", URI.create("http://10.0.0.1:13"), NodeVersion.UNKNOWN, false); - nodeManager.addNodes(node1); - InternalNode node2 = new InternalNode("node2", URI.create("http://10.0.0.1:12"), NodeVersion.UNKNOWN, false); - nodeManager.addNodes(node2); - - Set splits = new LinkedHashSet<>(); - for (int i = 0; i < 20 * 9; i++) { splits.add(new Split(TEST_CATALOG_HANDLE, TestingSplit.createRemoteSplit())); } @@ -246,13 +237,6 @@ public void testQueueSizeAdjustmentAllNodes() @Test public void testQueueSizeAdjustmentOneOfAll() { - InternalNode node1 = new InternalNode("node1", URI.create("http://10.0.0.1:13"), NodeVersion.UNKNOWN, false); - nodeManager.addNodes(node1); - InternalNode node2 = new InternalNode("node2", URI.create("http://10.0.0.1:12"), NodeVersion.UNKNOWN, false); - nodeManager.addNodes(node2); - - Set splits = new LinkedHashSet<>(); - for (int i = 0; i < 20 * 9; i++) { splits.add(new Split(TEST_CATALOG_HANDLE, TestingSplit.createRemoteSplit())); } diff --git a/core/trino-main/src/test/java/io/trino/likematcher/TestLikeMatcher.java b/core/trino-main/src/test/java/io/trino/likematcher/TestLikeMatcher.java index fa0f9d6c38d29..398cf1daba604 100644 --- a/core/trino-main/src/test/java/io/trino/likematcher/TestLikeMatcher.java +++ b/core/trino-main/src/test/java/io/trino/likematcher/TestLikeMatcher.java @@ -96,6 +96,12 @@ public void test() assertFalse(match("%abaaa%", "ababaa")); + assertTrue(match("%paya%", "papaya")); + assertTrue(match("%paya%", "papapaya")); + assertTrue(match("%paya%", "papapapaya")); + assertTrue(match("%paya%", "papapapapaya")); + assertTrue(match("%paya%", "papapapapapaya")); + // utf-8 LikeMatcher singleOptimized = LikePattern.compile("_", Optional.empty(), true).getMatcher(); LikeMatcher multipleOptimized = LikePattern.compile("_a%b_", Optional.empty(), true).getMatcher(); // prefix and suffix with _a and b_ to avoid optimizations diff --git a/core/trino-main/src/test/java/io/trino/memory/TestMemoryPools.java b/core/trino-main/src/test/java/io/trino/memory/TestMemoryPools.java index fde67f51b1dd4..0b2b3cb1f0499 100644 --- a/core/trino-main/src/test/java/io/trino/memory/TestMemoryPools.java +++ b/core/trino-main/src/test/java/io/trino/memory/TestMemoryPools.java @@ -97,6 +97,7 @@ private void setUp(Supplier> driversSupplier) new TestingGcMonitor(), localQueryRunner.getExecutor(), localQueryRunner.getScheduler(), + localQueryRunner.getScheduler(), TEN_MEGABYTES, spillSpaceTracker); taskContext = createTaskContext(queryContext, localQueryRunner.getExecutor(), localQueryRunner.getDefaultSession()); diff --git a/core/trino-main/src/test/java/io/trino/memory/TestMemoryTracking.java b/core/trino-main/src/test/java/io/trino/memory/TestMemoryTracking.java index 6069671e6611e..4f39206b198b6 100644 --- a/core/trino-main/src/test/java/io/trino/memory/TestMemoryTracking.java +++ b/core/trino-main/src/test/java/io/trino/memory/TestMemoryTracking.java @@ -68,11 +68,13 @@ public class TestMemoryTracking private MemoryPool memoryPool; private ExecutorService notificationExecutor; private ScheduledExecutorService yieldExecutor; + private ScheduledExecutorService timeoutExecutor; @AfterEach public void tearDown() { notificationExecutor.shutdownNow(); + timeoutExecutor.shutdownNow(); yieldExecutor.shutdownNow(); queryContext = null; taskContext = null; @@ -87,6 +89,7 @@ public void setUpTest() { notificationExecutor = newCachedThreadPool(daemonThreadsNamed("local-query-runner-executor-%s")); yieldExecutor = newScheduledThreadPool(2, daemonThreadsNamed("local-query-runner-scheduler-%s")); + timeoutExecutor = newScheduledThreadPool(2, daemonThreadsNamed("local-query-runner-driver-timeout-%s")); memoryPool = new MemoryPool(memoryPoolSize); queryContext = new QueryContext( @@ -96,6 +99,7 @@ public void setUpTest() new TestingGcMonitor(), notificationExecutor, yieldExecutor, + timeoutExecutor, queryMaxSpillSize, spillSpaceTracker); taskContext = queryContext.addTaskContext( diff --git a/core/trino-main/src/test/java/io/trino/metadata/AbstractMockMetadata.java b/core/trino-main/src/test/java/io/trino/metadata/AbstractMockMetadata.java index 2611b045a74e8..6e94d564b9d87 100644 --- a/core/trino-main/src/test/java/io/trino/metadata/AbstractMockMetadata.java +++ b/core/trino-main/src/test/java/io/trino/metadata/AbstractMockMetadata.java @@ -910,7 +910,13 @@ public Optional> applyTopN(Session session, T } @Override - public void createMaterializedView(Session session, QualifiedObjectName viewName, MaterializedViewDefinition definition, boolean replace, boolean ignoreExisting) + public void createMaterializedView( + Session session, + QualifiedObjectName viewName, + MaterializedViewDefinition definition, + Map properties, + boolean replace, + boolean ignoreExisting) { throw new UnsupportedOperationException(); } @@ -939,6 +945,12 @@ public Optional getMaterializedView(Session session, throw new UnsupportedOperationException(); } + @Override + public Map getMaterializedViewProperties(Session session, QualifiedObjectName viewName, MaterializedViewDefinition materializedViewDefinition) + { + throw new UnsupportedOperationException(); + } + @Override public MaterializedViewFreshness getMaterializedViewFreshness(Session session, QualifiedObjectName name) { diff --git a/core/trino-main/src/test/java/io/trino/metadata/TestGlobalFunctionCatalog.java b/core/trino-main/src/test/java/io/trino/metadata/TestGlobalFunctionCatalog.java index 8c756bd8da6d5..2c6a42d9e975c 100644 --- a/core/trino-main/src/test/java/io/trino/metadata/TestGlobalFunctionCatalog.java +++ b/core/trino-main/src/test/java/io/trino/metadata/TestGlobalFunctionCatalog.java @@ -100,7 +100,10 @@ public void testDuplicateFunctions() FunctionBundle functionBundle = extractFunctions(CustomAdd.class); TypeOperators typeOperators = new TypeOperators(); - GlobalFunctionCatalog globalFunctionCatalog = new GlobalFunctionCatalog(); + GlobalFunctionCatalog globalFunctionCatalog = new GlobalFunctionCatalog( + () -> { throw new UnsupportedOperationException(); }, + () -> { throw new UnsupportedOperationException(); }, + () -> { throw new UnsupportedOperationException(); }); globalFunctionCatalog.addFunctions(SystemFunctionBundle.create(new FeaturesConfig(), typeOperators, new BlockTypeOperators(typeOperators), NodeVersion.UNKNOWN)); globalFunctionCatalog.addFunctions(functionBundle); assertThatThrownBy(() -> globalFunctionCatalog.addFunctions(functionBundle)) @@ -114,7 +117,10 @@ public void testConflictingScalarAggregation() FunctionBundle functions = extractFunctions(ScalarSum.class); TypeOperators typeOperators = new TypeOperators(); - GlobalFunctionCatalog globalFunctionCatalog = new GlobalFunctionCatalog(); + GlobalFunctionCatalog globalFunctionCatalog = new GlobalFunctionCatalog( + () -> { throw new UnsupportedOperationException(); }, + () -> { throw new UnsupportedOperationException(); }, + () -> { throw new UnsupportedOperationException(); }); globalFunctionCatalog.addFunctions(SystemFunctionBundle.create(new FeaturesConfig(), typeOperators, new BlockTypeOperators(typeOperators), NodeVersion.UNKNOWN)); assertThatThrownBy(() -> globalFunctionCatalog.addFunctions(functions)) .isInstanceOf(IllegalStateException.class) diff --git a/core/trino-main/src/test/java/io/trino/metadata/TestingFunctionResolution.java b/core/trino-main/src/test/java/io/trino/metadata/TestingFunctionResolution.java index a79adfb3f9b4c..bb1746f7bdc8f 100644 --- a/core/trino-main/src/test/java/io/trino/metadata/TestingFunctionResolution.java +++ b/core/trino-main/src/test/java/io/trino/metadata/TestingFunctionResolution.java @@ -39,8 +39,8 @@ import static io.trino.SessionTestUtils.TEST_SESSION; import static io.trino.sql.planner.TestingPlannerContext.plannerContextBuilder; +import static io.trino.testing.TransactionBuilder.transaction; import static io.trino.transaction.InMemoryTransactionManager.createTestTransactionManager; -import static io.trino.transaction.TransactionBuilder.transaction; import static java.util.Objects.requireNonNull; public class TestingFunctionResolution diff --git a/core/trino-main/src/test/java/io/trino/operator/GroupByHashYieldAssertion.java b/core/trino-main/src/test/java/io/trino/operator/GroupByHashYieldAssertion.java index 298ca2f30931e..a02624fb6a384 100644 --- a/core/trino-main/src/test/java/io/trino/operator/GroupByHashYieldAssertion.java +++ b/core/trino-main/src/test/java/io/trino/operator/GroupByHashYieldAssertion.java @@ -87,6 +87,7 @@ public static GroupByHashYieldResult finishOperatorWithYieldingGroupByHash(List< new TestingGcMonitor(), EXECUTOR, SCHEDULED_EXECUTOR, + SCHEDULED_EXECUTOR, DataSize.of(512, MEGABYTE), new SpillSpaceTracker(DataSize.of(512, MEGABYTE))); diff --git a/core/trino-main/src/test/java/io/trino/operator/TestDriver.java b/core/trino-main/src/test/java/io/trino/operator/TestDriver.java index d76f0ad55f5ab..8b3313b2c0d0b 100644 --- a/core/trino-main/src/test/java/io/trino/operator/TestDriver.java +++ b/core/trino-main/src/test/java/io/trino/operator/TestDriver.java @@ -62,8 +62,10 @@ import static io.trino.testing.TestingHandles.TEST_CATALOG_HANDLE; import static io.trino.testing.TestingHandles.TEST_TABLE_HANDLE; import static io.trino.testing.TestingTaskContext.createTaskContext; +import static java.lang.Thread.sleep; import static java.util.concurrent.Executors.newCachedThreadPool; import static java.util.concurrent.Executors.newScheduledThreadPool; +import static java.util.concurrent.TimeUnit.MILLISECONDS; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_METHOD; @@ -285,6 +287,53 @@ public void testUnblocksOnFinish() assertThat(blocked.isDone()).isTrue(); } + @Test + public void testUnblocksOnTimeout() + throws InterruptedException + { + List types = ImmutableList.of(VARCHAR, BIGINT, BIGINT); + driverContext.setBlockedTimeout(new Duration(70, MILLISECONDS)); + // Create driver with 3 operators, one of which is blocked such that it will not move any page and + // return a blocked timeout future. + Operator operator1 = createSinkOperator(types, 1, "test1"); + BlockedOperator operator2 = createBlockedOperator(types, 2, "test2"); + Operator operator3 = createSinkOperator(types, 3, "test3"); + Operator operator4 = createSinkOperator(types, 4, "test3"); + Driver driver = Driver.createDriver(driverContext, operator1, operator2, operator3, operator4); + + ListenableFuture blocked = driver.processForDuration(new Duration(200, MILLISECONDS)); + assertThat(blocked.isDone()).isFalse(); + // wait for the blocked future to be timed out + sleep(100); + assertThat(blocked.isDone()).isTrue(); + // verify that the blocked operator is not cancelled or done due to timeout + assertThat(operator2.isCancelled()).isFalse(); + assertThat(operator2.isDone()).isFalse(); + } + + @Test + public void testUnblocksWhenBlockedOperatorIsUnblockedAndTimeoutIsSet() + { + List types = ImmutableList.of(VARCHAR, BIGINT, BIGINT); + driverContext.setBlockedTimeout(new Duration(100, MILLISECONDS)); + // Create driver with 3 operators, one of which is blocked such that it will not move any page and + // return a blocked timeout future. + Operator operator1 = createSinkOperator(types, 1, "test1"); + BlockedOperator operator2 = createBlockedOperator(types, 2, "test2"); + Operator operator3 = createSinkOperator(types, 3, "test3"); + Operator operator4 = createSinkOperator(types, 4, "test3"); + Driver driver = Driver.createDriver(driverContext, operator1, operator2, operator3, operator4); + + ListenableFuture blocked = driver.processForDuration(new Duration(200, MILLISECONDS)); + assertThat(blocked.isDone()).isFalse(); + // unblock the blocked operator + operator2.setDone(); + // verify that the blocked future is done but is not cancelled + assertThat(operator2.isDone()).isTrue(); + assertThat(blocked.isDone()).isTrue(); + assertThat(operator2.isCancelled()).isFalse(); + } + @Test public void testBrokenOperatorAddSource() { @@ -336,10 +385,22 @@ private static Split newMockSplit() } private PageConsumerOperator createSinkOperator(List types) + { + return createSinkOperator(types, 1, "test"); + } + + private PageConsumerOperator createSinkOperator(List types, int operatorId, String planNodeId) + { + // materialize the output to catch some type errors + MaterializedResult.Builder resultBuilder = MaterializedResult.resultBuilder(driverContext.getSession(), types); + return new PageConsumerOperator(driverContext.addOperatorContext(operatorId, new PlanNodeId(planNodeId), "sink"), resultBuilder::page, Function.identity()); + } + + private BlockedOperator createBlockedOperator(List types, int operatorId, String planNodeId) { // materialize the output to catch some type errors MaterializedResult.Builder resultBuilder = MaterializedResult.resultBuilder(driverContext.getSession(), types); - return new PageConsumerOperator(driverContext.addOperatorContext(1, new PlanNodeId("test"), "sink"), resultBuilder::page, Function.identity()); + return new BlockedOperator(driverContext.addOperatorContext(operatorId, new PlanNodeId(planNodeId), "sink"), resultBuilder::page, Function.identity()); } private static class BrokenOperator @@ -477,6 +538,41 @@ void setFinished() } } + private static class BlockedOperator + extends PageConsumerOperator + { + private final SettableFuture blocked = SettableFuture.create(); + + public BlockedOperator( + OperatorContext operatorContext, + Consumer pageConsumer, + Function pagePreprocessor) + { + super(operatorContext, pageConsumer, pagePreprocessor); + } + + @Override + public ListenableFuture isBlocked() + { + return blocked; + } + + private void setDone() + { + blocked.set(null); + } + + private boolean isDone() + { + return blocked.isDone(); + } + + private boolean isCancelled() + { + return blocked.isCancelled(); + } + } + private static class AlwaysBlockedTableScanOperator extends TableScanOperator { diff --git a/core/trino-main/src/test/java/io/trino/operator/TestExchangeOperator.java b/core/trino-main/src/test/java/io/trino/operator/TestExchangeOperator.java index bc6ed291249dc..01aa3b10e3bf4 100644 --- a/core/trino-main/src/test/java/io/trino/operator/TestExchangeOperator.java +++ b/core/trino-main/src/test/java/io/trino/operator/TestExchangeOperator.java @@ -60,10 +60,10 @@ import static java.util.concurrent.Executors.newScheduledThreadPool; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; -import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; @TestInstance(PER_CLASS) -@Execution(CONCURRENT) +@Execution(SAME_THREAD) public class TestExchangeOperator { private static final List TYPES = ImmutableList.of(VARCHAR); diff --git a/core/trino-main/src/test/java/io/trino/operator/TestGroupedTopNRankAccumulator.java b/core/trino-main/src/test/java/io/trino/operator/TestGroupedTopNRankAccumulator.java index 7cf8f1f114f51..ca21f4cca5e5a 100644 --- a/core/trino-main/src/test/java/io/trino/operator/TestGroupedTopNRankAccumulator.java +++ b/core/trino-main/src/test/java/io/trino/operator/TestGroupedTopNRankAccumulator.java @@ -15,14 +15,12 @@ import io.trino.array.LongBigArray; import it.unimi.dsi.fastutil.longs.LongArrayList; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import static com.google.common.collect.Lists.cartesianProduct; import static java.lang.Math.min; import static org.assertj.core.api.Assertions.assertThat; @@ -43,27 +41,20 @@ public long hashCode(long rowId) } }; - @DataProvider - public static Object[][] parameters() - { - List topNs = Arrays.asList(1, 2, 3); - List valueCounts = Arrays.asList(0, 1, 2, 4, 8); - List groupCounts = Arrays.asList(1, 2, 3); - List drainWithRankings = Arrays.asList(true, false); - return to2DArray(cartesianProduct(topNs, valueCounts, groupCounts, drainWithRankings)); - } - - private static Object[][] to2DArray(List> nestedList) + @Test + public void testSinglePeerGroupInsert() { - Object[][] array = new Object[nestedList.size()][]; - for (int i = 0; i < nestedList.size(); i++) { - array[i] = nestedList.get(i).toArray(); + for (int topN : Arrays.asList(1, 2, 3)) { + for (int valueCount : Arrays.asList(0, 1, 2, 4, 8)) { + for (int groupCount : Arrays.asList(1, 2, 3)) { + testSinglePeerGroupInsert(topN, valueCount, groupCount, true); + testSinglePeerGroupInsert(topN, valueCount, groupCount, false); + } + } } - return array; } - @Test(dataProvider = "parameters") - public void testSinglePeerGroupInsert(int topN, long valueCount, long groupCount, boolean drainWithRanking) + private void testSinglePeerGroupInsert(int topN, long valueCount, long groupCount, boolean drainWithRanking) { List evicted = new LongArrayList(); GroupedTopNRankAccumulator accumulator = new GroupedTopNRankAccumulator(STRATEGY, topN, evicted::add); @@ -103,8 +94,20 @@ public void testSinglePeerGroupInsert(int topN, long valueCount, long groupCount } } - @Test(dataProvider = "parameters") - public void testIncreasingAllUniqueValues(int topN, long valueCount, long groupCount, boolean drainWithRanking) + @Test + public void testIncreasingAllUniqueValues() + { + for (int topN : Arrays.asList(1, 2, 3)) { + for (int valueCount : Arrays.asList(0, 1, 2, 4, 8)) { + for (int groupCount : Arrays.asList(1, 2, 3)) { + testIncreasingAllUniqueValues(topN, valueCount, groupCount, true); + testIncreasingAllUniqueValues(topN, valueCount, groupCount, false); + } + } + } + } + + private void testIncreasingAllUniqueValues(int topN, long valueCount, long groupCount, boolean drainWithRanking) { List evicted = new LongArrayList(); GroupedTopNRankAccumulator accumulator = new GroupedTopNRankAccumulator(STRATEGY, topN, evicted::add); @@ -144,8 +147,20 @@ public void testIncreasingAllUniqueValues(int topN, long valueCount, long groupC } } - @Test(dataProvider = "parameters") - public void testDecreasingAllUniqueValues(int topN, long valueCount, long groupCount, boolean drainWithRanking) + @Test + public void testDecreasingAllUniqueValues() + { + for (int topN : Arrays.asList(1, 2, 3)) { + for (int valueCount : Arrays.asList(0, 1, 2, 4, 8)) { + for (int groupCount : Arrays.asList(1, 2, 3)) { + testDecreasingAllUniqueValues(topN, valueCount, groupCount, true); + testDecreasingAllUniqueValues(topN, valueCount, groupCount, false); + } + } + } + } + + private void testDecreasingAllUniqueValues(int topN, long valueCount, long groupCount, boolean drainWithRanking) { List evicted = new LongArrayList(); GroupedTopNRankAccumulator accumulator = new GroupedTopNRankAccumulator(STRATEGY, topN, evicted::add); diff --git a/core/trino-main/src/test/java/io/trino/operator/TestGroupedTopNRankBuilder.java b/core/trino-main/src/test/java/io/trino/operator/TestGroupedTopNRankBuilder.java index 5b5a67a8b7d56..4f6c6a0a43a0c 100644 --- a/core/trino-main/src/test/java/io/trino/operator/TestGroupedTopNRankBuilder.java +++ b/core/trino-main/src/test/java/io/trino/operator/TestGroupedTopNRankBuilder.java @@ -19,8 +19,7 @@ import io.trino.spi.type.TypeOperators; import io.trino.sql.gen.JoinCompiler; import io.trino.type.BlockTypeOperators; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; @@ -39,12 +38,6 @@ public class TestGroupedTopNRankBuilder { - @DataProvider - public static Object[][] produceRanking() - { - return new Object[][] {{true}, {false}}; - } - @Test public void testEmptyInput() { @@ -74,8 +67,14 @@ public long hashCode(Page page, int position) assertThat(groupedTopNBuilder.buildResult().hasNext()).isFalse(); } - @Test(dataProvider = "produceRanking") - public void testSingleGroupTopN(boolean produceRanking) + @Test + public void testSingleGroupTopN() + { + testSingleGroupTopN(true); + testSingleGroupTopN(false); + } + + private void testSingleGroupTopN(boolean produceRanking) { TypeOperators typeOperators = new TypeOperators(); BlockTypeOperators blockTypeOperators = new BlockTypeOperators(typeOperators); @@ -133,8 +132,14 @@ public void testSingleGroupTopN(boolean produceRanking) assertPageEquals(outputTypes, getOnlyElement(output), expected); } - @Test(dataProvider = "produceRanking") - public void testMultiGroupTopN(boolean produceRanking) + @Test + public void testMultiGroupTopN() + { + testMultiGroupTopN(true); + testMultiGroupTopN(false); + } + + private void testMultiGroupTopN(boolean produceRanking) { TypeOperators typeOperators = new TypeOperators(); BlockTypeOperators blockTypeOperators = new BlockTypeOperators(typeOperators); diff --git a/core/trino-main/src/test/java/io/trino/operator/TestGroupedTopNRowNumberBuilder.java b/core/trino-main/src/test/java/io/trino/operator/TestGroupedTopNRowNumberBuilder.java index 06d1acce72194..a150a4e47dcf4 100644 --- a/core/trino-main/src/test/java/io/trino/operator/TestGroupedTopNRowNumberBuilder.java +++ b/core/trino-main/src/test/java/io/trino/operator/TestGroupedTopNRowNumberBuilder.java @@ -18,8 +18,7 @@ import io.trino.spi.type.Type; import io.trino.spi.type.TypeOperators; import io.trino.sql.gen.JoinCompiler; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; @@ -36,19 +35,6 @@ public class TestGroupedTopNRowNumberBuilder { private static final TypeOperators TYPE_OPERATORS_CACHE = new TypeOperators(); - @DataProvider - public static Object[][] produceRowNumbers() - { - return new Object[][] {{true}, {false}}; - } - - @DataProvider - public static Object[][] pageRowCounts() - { - // make either page or row count > 1024 to expand the big arrays - return new Object[][] {{10000, 20}, {20, 10000}}; - } - @Test public void testEmptyInput() { @@ -64,8 +50,14 @@ public void testEmptyInput() assertThat(groupedTopNBuilder.buildResult().hasNext()).isFalse(); } - @Test(dataProvider = "produceRowNumbers") - public void testMultiGroupTopN(boolean produceRowNumbers) + @Test + public void testMultiGroupTopN() + { + testMultiGroupTopN(true); + testMultiGroupTopN(false); + } + + private void testMultiGroupTopN(boolean produceRowNumbers) { List types = ImmutableList.of(BIGINT, DOUBLE); List input = rowPagesBuilder(types) @@ -131,8 +123,14 @@ public void testMultiGroupTopN(boolean produceRowNumbers) } } - @Test(dataProvider = "produceRowNumbers") - public void testSingleGroupTopN(boolean produceRowNumbers) + @Test + public void testSingleGroupTopN() + { + testSingleGroupTopN(true); + testSingleGroupTopN(false); + } + + private void testSingleGroupTopN(boolean produceRowNumbers) { List types = ImmutableList.of(BIGINT, DOUBLE); List input = rowPagesBuilder(types) diff --git a/core/trino-main/src/test/java/io/trino/operator/TestHashAggregationOperator.java b/core/trino-main/src/test/java/io/trino/operator/TestHashAggregationOperator.java index babf7dbdf044c..cdeb3e7487787 100644 --- a/core/trino-main/src/test/java/io/trino/operator/TestHashAggregationOperator.java +++ b/core/trino-main/src/test/java/io/trino/operator/TestHashAggregationOperator.java @@ -40,10 +40,10 @@ import io.trino.sql.planner.plan.PlanNodeId; import io.trino.testing.MaterializedResult; import io.trino.testing.TestingTaskContext; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.parallel.Execution; import java.io.IOException; import java.util.ArrayList; @@ -94,8 +94,11 @@ import static java.util.concurrent.Executors.newScheduledThreadPool; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; +import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; -@Test(singleThreaded = true) +@TestInstance(PER_CLASS) +@Execution(CONCURRENT) public class TestHashAggregationOperator { private static final TestingFunctionResolution FUNCTION_RESOLUTION = new TestingFunctionResolution(); @@ -107,58 +110,36 @@ public class TestHashAggregationOperator private static final int MAX_BLOCK_SIZE_IN_BYTES = 64 * 1024; - private ExecutorService executor; - private ScheduledExecutorService scheduledExecutor; + private final ExecutorService executor = newCachedThreadPool(daemonThreadsNamed(getClass().getSimpleName() + "-%s")); + private final ScheduledExecutorService scheduledExecutor = newScheduledThreadPool(2, daemonThreadsNamed(getClass().getSimpleName() + "-scheduledExecutor-%s")); private final TypeOperators typeOperators = new TypeOperators(); private final JoinCompiler joinCompiler = new JoinCompiler(typeOperators); - private DummySpillerFactory spillerFactory; - @BeforeMethod - public void setUp() - { - executor = newCachedThreadPool(daemonThreadsNamed(getClass().getSimpleName() + "-%s")); - scheduledExecutor = newScheduledThreadPool(2, daemonThreadsNamed(getClass().getSimpleName() + "-scheduledExecutor-%s")); - spillerFactory = new DummySpillerFactory(); - } - - @AfterMethod(alwaysRun = true) + @AfterAll public void tearDown() { - spillerFactory = null; executor.shutdownNow(); scheduledExecutor.shutdownNow(); } - @DataProvider(name = "hashEnabled") - public static Object[][] hashEnabled() - { - return new Object[][] {{true}, {false}}; - } - - @DataProvider(name = "hashEnabledAndMemoryLimitForMergeValues") - public static Object[][] hashEnabledAndMemoryLimitForMergeValuesProvider() + @Test + public void testHashAggregation() { - return new Object[][] { - {true, true, true, 8, Integer.MAX_VALUE}, - {true, true, false, 8, Integer.MAX_VALUE}, - {false, false, false, 0, 0}, - {false, true, true, 0, 0}, - {false, true, false, 0, 0}, - {false, true, true, 8, 0}, - {false, true, false, 8, 0}, - {false, true, true, 8, Integer.MAX_VALUE}, - {false, true, false, 8, Integer.MAX_VALUE}}; + testHashAggregation(true, true, true, 8, Integer.MAX_VALUE); + testHashAggregation(true, true, false, 8, Integer.MAX_VALUE); + testHashAggregation(false, false, false, 0, 0); + testHashAggregation(false, true, true, 0, 0); + testHashAggregation(false, true, false, 0, 0); + testHashAggregation(false, true, true, 8, 0); + testHashAggregation(false, true, false, 8, 0); + testHashAggregation(false, true, true, 8, Integer.MAX_VALUE); + testHashAggregation(false, true, false, 8, Integer.MAX_VALUE); } - @DataProvider - public Object[][] dataType() + private void testHashAggregation(boolean hashEnabled, boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimitForMerge, long memoryLimitForMergeWithMemory) { - return new Object[][] {{VARCHAR}, {BIGINT}}; - } + DummySpillerFactory spillerFactory = new DummySpillerFactory(); - @Test(dataProvider = "hashEnabledAndMemoryLimitForMergeValues") - public void testHashAggregation(boolean hashEnabled, boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimitForMerge, long memoryLimitForMergeWithMemory) - { // make operator produce multiple pages during finish phase int numberOfRows = 40_000; TestingAggregationFunction countVarcharColumn = FUNCTION_RESOLUTION.getAggregateFunction("count", fromTypes(VARCHAR)); @@ -215,9 +196,24 @@ public void testHashAggregation(boolean hashEnabled, boolean spillEnabled, boole .isTrue(); } - @Test(dataProvider = "hashEnabledAndMemoryLimitForMergeValues") - public void testHashAggregationWithGlobals(boolean hashEnabled, boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimitForMerge, long memoryLimitForMergeWithMemory) + @Test + public void testHashAggregationWithGlobals() + { + testHashAggregationWithGlobals(true, true, true, 8, Integer.MAX_VALUE); + testHashAggregationWithGlobals(true, true, false, 8, Integer.MAX_VALUE); + testHashAggregationWithGlobals(false, false, false, 0, 0); + testHashAggregationWithGlobals(false, true, true, 0, 0); + testHashAggregationWithGlobals(false, true, false, 0, 0); + testHashAggregationWithGlobals(false, true, true, 8, 0); + testHashAggregationWithGlobals(false, true, false, 8, 0); + testHashAggregationWithGlobals(false, true, true, 8, Integer.MAX_VALUE); + testHashAggregationWithGlobals(false, true, false, 8, Integer.MAX_VALUE); + } + + private void testHashAggregationWithGlobals(boolean hashEnabled, boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimitForMerge, long memoryLimitForMergeWithMemory) { + DummySpillerFactory spillerFactory = new DummySpillerFactory(); + TestingAggregationFunction countVarcharColumn = FUNCTION_RESOLUTION.getAggregateFunction("count", fromTypes(VARCHAR)); TestingAggregationFunction countBooleanColumn = FUNCTION_RESOLUTION.getAggregateFunction("count", fromTypes(BOOLEAN)); TestingAggregationFunction maxVarcharColumn = FUNCTION_RESOLUTION.getAggregateFunction("max", fromTypes(VARCHAR)); @@ -263,9 +259,24 @@ public void testHashAggregationWithGlobals(boolean hashEnabled, boolean spillEna assertOperatorEqualsIgnoreOrder(operatorFactory, driverContext, input, expected, hashEnabled, Optional.of(groupByChannels.size()), revokeMemoryWhenAddingPages); } - @Test(dataProvider = "hashEnabledAndMemoryLimitForMergeValues") - public void testHashAggregationMemoryReservation(boolean hashEnabled, boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimitForMerge, long memoryLimitForMergeWithMemory) + @Test + public void testHashAggregationMemoryReservation() + { + testHashAggregationMemoryReservation(true, true, true, 8, Integer.MAX_VALUE); + testHashAggregationMemoryReservation(true, true, false, 8, Integer.MAX_VALUE); + testHashAggregationMemoryReservation(false, false, false, 0, 0); + testHashAggregationMemoryReservation(false, true, true, 0, 0); + testHashAggregationMemoryReservation(false, true, false, 0, 0); + testHashAggregationMemoryReservation(false, true, true, 8, 0); + testHashAggregationMemoryReservation(false, true, false, 8, 0); + testHashAggregationMemoryReservation(false, true, true, 8, Integer.MAX_VALUE); + testHashAggregationMemoryReservation(false, true, false, 8, Integer.MAX_VALUE); + } + + private void testHashAggregationMemoryReservation(boolean hashEnabled, boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimitForMerge, long memoryLimitForMergeWithMemory) { + DummySpillerFactory spillerFactory = new DummySpillerFactory(); + TestingAggregationFunction arrayAggColumn = FUNCTION_RESOLUTION.getAggregateFunction("array_agg", fromTypes(BIGINT)); List hashChannels = Ints.asList(1); @@ -308,8 +319,19 @@ public void testHashAggregationMemoryReservation(boolean hashEnabled, boolean sp assertThat(getOnlyElement(operator.getOperatorContext().getNestedOperatorStats()).getRevocableMemoryReservation().toBytes()).isEqualTo(0); } - @Test(dataProvider = "hashEnabled", expectedExceptions = ExceededMemoryLimitException.class, expectedExceptionsMessageRegExp = "Query exceeded per-node memory limit of 10B.*") - public void testMemoryLimit(boolean hashEnabled) + @Test + public void testMemoryLimit() + { + assertThatThrownBy(() -> testMemoryLimit(true)) + .isInstanceOf(ExceededMemoryLimitException.class) + .hasMessageMatching("Query exceeded per-node memory limit of 10B.*"); + + assertThatThrownBy(() -> testMemoryLimit(false)) + .isInstanceOf(ExceededMemoryLimitException.class) + .hasMessageMatching("Query exceeded per-node memory limit of 10B.*"); + } + + private void testMemoryLimit(boolean hashEnabled) { TestingAggregationFunction maxVarcharColumn = FUNCTION_RESOLUTION.getAggregateFunction("max", fromTypes(VARCHAR)); @@ -347,9 +369,24 @@ public void testMemoryLimit(boolean hashEnabled) toPages(operatorFactory, driverContext, input); } - @Test(dataProvider = "hashEnabledAndMemoryLimitForMergeValues") - public void testHashBuilderResize(boolean hashEnabled, boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimitForMerge, long memoryLimitForMergeWithMemory) + @Test + public void testHashBuilderResize() + { + testHashBuilderResize(true, true, true, 8, Integer.MAX_VALUE); + testHashBuilderResize(true, true, false, 8, Integer.MAX_VALUE); + testHashBuilderResize(false, false, false, 0, 0); + testHashBuilderResize(false, true, true, 0, 0); + testHashBuilderResize(false, true, false, 0, 0); + testHashBuilderResize(false, true, true, 8, 0); + testHashBuilderResize(false, true, false, 8, 0); + testHashBuilderResize(false, true, true, 8, Integer.MAX_VALUE); + testHashBuilderResize(false, true, false, 8, Integer.MAX_VALUE); + } + + private void testHashBuilderResize(boolean hashEnabled, boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimitForMerge, long memoryLimitForMergeWithMemory) { + DummySpillerFactory spillerFactory = new DummySpillerFactory(); + BlockBuilder builder = VARCHAR.createBlockBuilder(null, 1, MAX_BLOCK_SIZE_IN_BYTES); VARCHAR.writeSlice(builder, Slices.allocate(200_000)); // this must be larger than MAX_BLOCK_SIZE_IN_BYTES, 64K builder.build(); @@ -388,7 +425,13 @@ public void testHashBuilderResize(boolean hashEnabled, boolean spillEnabled, boo toPages(operatorFactory, driverContext, input, revokeMemoryWhenAddingPages); } - @Test(dataProvider = "dataType") + @Test + public void testMemoryReservationYield() + { + testMemoryReservationYield(VARCHAR); + testMemoryReservationYield(BIGINT); + } + public void testMemoryReservationYield(Type type) { List input = createPagesWithDistinctHashKeys(type, 6_000, 600); @@ -426,8 +469,19 @@ public void testMemoryReservationYield(Type type) assertThat(count).isEqualTo(6_000 * 600); } - @Test(dataProvider = "hashEnabled", expectedExceptions = ExceededMemoryLimitException.class, expectedExceptionsMessageRegExp = "Query exceeded per-node memory limit of 3MB.*") - public void testHashBuilderResizeLimit(boolean hashEnabled) + @Test + public void testHashBuilderResizeLimit() + { + assertThatThrownBy(() -> testHashBuilderResizeLimit(true)) + .isInstanceOf(ExceededMemoryLimitException.class) + .hasMessageMatching("Query exceeded per-node memory limit of 3MB.*"); + + assertThatThrownBy(() -> testHashBuilderResizeLimit(false)) + .isInstanceOf(ExceededMemoryLimitException.class) + .hasMessageMatching("Query exceeded per-node memory limit of 3MB.*"); + } + + private void testHashBuilderResizeLimit(boolean hashEnabled) { BlockBuilder builder = VARCHAR.createBlockBuilder(null, 1, MAX_BLOCK_SIZE_IN_BYTES); VARCHAR.writeSlice(builder, Slices.allocate(5_000_000)); // this must be larger than MAX_BLOCK_SIZE_IN_BYTES, 64K @@ -464,8 +518,14 @@ public void testHashBuilderResizeLimit(boolean hashEnabled) toPages(operatorFactory, driverContext, input); } - @Test(dataProvider = "hashEnabled") - public void testMultiSliceAggregationOutput(boolean hashEnabled) + @Test + public void testMultiSliceAggregationOutput() + { + testMultiSliceAggregationOutput(true); + testMultiSliceAggregationOutput(false); + } + + private void testMultiSliceAggregationOutput(boolean hashEnabled) { // estimate the number of entries required to create 1.5 pages of results // See InMemoryHashAggregationBuilder.buildTypes() @@ -499,8 +559,15 @@ public void testMultiSliceAggregationOutput(boolean hashEnabled) assertThat(toPages(operatorFactory, createDriverContext(), input).size()).isEqualTo(2); } - @Test(dataProvider = "hashEnabled") - public void testMultiplePartialFlushes(boolean hashEnabled) + @Test + public void testMultiplePartialFlushes() + throws Exception + { + testMultiplePartialFlushes(true); + testMultiplePartialFlushes(false); + } + + private void testMultiplePartialFlushes(boolean hashEnabled) throws Exception { List hashChannels = Ints.asList(0); @@ -584,6 +651,8 @@ public void testMultiplePartialFlushes(boolean hashEnabled) @Test public void testMergeWithMemorySpill() { + DummySpillerFactory spillerFactory = new DummySpillerFactory(); + RowPagesBuilder rowPagesBuilder = rowPagesBuilder(BIGINT); int smallPagesSpillThresholdSize = 150000; diff --git a/core/trino-main/src/test/java/io/trino/operator/TestHashSemiJoinOperator.java b/core/trino-main/src/test/java/io/trino/operator/TestHashSemiJoinOperator.java index c43a6c412aa92..3171e6c59a7b5 100644 --- a/core/trino-main/src/test/java/io/trino/operator/TestHashSemiJoinOperator.java +++ b/core/trino-main/src/test/java/io/trino/operator/TestHashSemiJoinOperator.java @@ -25,10 +25,11 @@ import io.trino.sql.gen.JoinCompiler; import io.trino.sql.planner.plan.PlanNodeId; import io.trino.testing.MaterializedResult; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.parallel.Execution; import java.util.List; import java.util.Optional; @@ -46,8 +47,12 @@ import static io.trino.testing.TestingTaskContext.createTaskContext; import static java.util.concurrent.Executors.newCachedThreadPool; import static java.util.concurrent.Executors.newScheduledThreadPool; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_METHOD; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; -@Test(singleThreaded = true) +@TestInstance(PER_METHOD) +@Execution(SAME_THREAD) public class TestHashSemiJoinOperator { private ExecutorService executor; @@ -55,7 +60,7 @@ public class TestHashSemiJoinOperator private TaskContext taskContext; private TypeOperators typeOperators; - @BeforeMethod + @BeforeEach public void setUp() { executor = newCachedThreadPool(daemonThreadsNamed(getClass().getSimpleName() + "-%s")); @@ -64,27 +69,21 @@ public void setUp() typeOperators = new TypeOperators(); } - @AfterMethod(alwaysRun = true) + @AfterEach public void tearDown() { executor.shutdownNow(); scheduledExecutor.shutdownNow(); } - @DataProvider(name = "hashEnabledValues") - public static Object[][] hashEnabledValuesProvider() + @Test + public void testSemiJoin() { - return new Object[][] {{true}, {false}}; + testSemiJoin(true); + testSemiJoin(false); } - @DataProvider - public Object[][] dataType() - { - return new Object[][] {{VARCHAR}, {BIGINT}}; - } - - @Test(dataProvider = "hashEnabledValues") - public void testSemiJoin(boolean hashEnabled) + private void testSemiJoin(boolean hashEnabled) { DriverContext driverContext = taskContext.addPipelineContext(0, true, true, false).addDriverContext(); @@ -148,8 +147,14 @@ public void testSemiJoin(boolean hashEnabled) OperatorAssertion.assertOperatorEquals(joinOperatorFactory, driverContext, probeInput, expected, hashEnabled, ImmutableList.of(probeTypes.size())); } - @Test(dataProvider = "hashEnabledValues") - public void testSemiJoinOnVarcharType(boolean hashEnabled) + @Test + public void testSemiJoinOnVarcharType() + { + testSemiJoinOnVarcharType(true); + testSemiJoinOnVarcharType(false); + } + + private void testSemiJoinOnVarcharType(boolean hashEnabled) { DriverContext driverContext = taskContext.addPipelineContext(0, true, true, false).addDriverContext(); @@ -213,8 +218,14 @@ public void testSemiJoinOnVarcharType(boolean hashEnabled) OperatorAssertion.assertOperatorEquals(joinOperatorFactory, driverContext, probeInput, expected, hashEnabled, ImmutableList.of(probeTypes.size())); } - @Test(dataProvider = "hashEnabledValues") - public void testBuildSideNulls(boolean hashEnabled) + @Test + public void testBuildSideNulls() + { + testBuildSideNulls(true); + testBuildSideNulls(false); + } + + private void testBuildSideNulls(boolean hashEnabled) { DriverContext driverContext = taskContext.addPipelineContext(0, true, true, false).addDriverContext(); @@ -272,8 +283,14 @@ public void testBuildSideNulls(boolean hashEnabled) OperatorAssertion.assertOperatorEquals(joinOperatorFactory, driverContext, probeInput, expected, hashEnabled, ImmutableList.of(probeTypes.size())); } - @Test(dataProvider = "hashEnabledValues") - public void testProbeSideNulls(boolean hashEnabled) + @Test + public void testProbeSideNulls() + { + testProbeSideNulls(true); + testProbeSideNulls(false); + } + + private void testProbeSideNulls(boolean hashEnabled) { DriverContext driverContext = taskContext.addPipelineContext(0, true, true, false).addDriverContext(); @@ -331,8 +348,14 @@ public void testProbeSideNulls(boolean hashEnabled) OperatorAssertion.assertOperatorEquals(joinOperatorFactory, driverContext, probeInput, expected, hashEnabled, ImmutableList.of(probeTypes.size())); } - @Test(dataProvider = "hashEnabledValues") - public void testProbeAndBuildNulls(boolean hashEnabled) + @Test + public void testProbeAndBuildNulls() + { + testProbeAndBuildNulls(true); + testProbeAndBuildNulls(false); + } + + private void testProbeAndBuildNulls(boolean hashEnabled) { DriverContext driverContext = taskContext.addPipelineContext(0, true, true, false).addDriverContext(); @@ -391,8 +414,19 @@ public void testProbeAndBuildNulls(boolean hashEnabled) OperatorAssertion.assertOperatorEquals(joinOperatorFactory, driverContext, probeInput, expected, hashEnabled, ImmutableList.of(probeTypes.size())); } - @Test(dataProvider = "hashEnabledValues", expectedExceptions = ExceededMemoryLimitException.class, expectedExceptionsMessageRegExp = "Query exceeded per-node memory limit of.*") - public void testMemoryLimit(boolean hashEnabled) + @Test + public void testMemoryLimit() + { + assertThatThrownBy(() -> testMemoryLimit(true)) + .isInstanceOf(ExceededMemoryLimitException.class) + .hasMessageMatching("Query exceeded per-node memory limit of.*"); + + assertThatThrownBy(() -> testMemoryLimit(false)) + .isInstanceOf(ExceededMemoryLimitException.class) + .hasMessageMatching("Query exceeded per-node memory limit of.*"); + } + + private void testMemoryLimit(boolean hashEnabled) { DriverContext driverContext = createTaskContext(executor, scheduledExecutor, TEST_SESSION, DataSize.ofBytes(100)) .addPipelineContext(0, true, true, false) diff --git a/core/trino-main/src/test/java/io/trino/operator/TestMarkDistinctOperator.java b/core/trino-main/src/test/java/io/trino/operator/TestMarkDistinctOperator.java index 1657f51fb5d72..f79b8b979bcee 100644 --- a/core/trino-main/src/test/java/io/trino/operator/TestMarkDistinctOperator.java +++ b/core/trino-main/src/test/java/io/trino/operator/TestMarkDistinctOperator.java @@ -25,10 +25,10 @@ import io.trino.sql.gen.JoinCompiler; import io.trino.sql.planner.plan.PlanNodeId; import io.trino.testing.MaterializedResult; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.parallel.Execution; import java.util.List; import java.util.Optional; @@ -51,47 +51,33 @@ import static java.util.concurrent.Executors.newCachedThreadPool; import static java.util.concurrent.Executors.newScheduledThreadPool; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; +import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; -@Test(singleThreaded = true) +@TestInstance(PER_CLASS) +@Execution(CONCURRENT) public class TestMarkDistinctOperator { - private ExecutorService executor; - private ScheduledExecutorService scheduledExecutor; - private DriverContext driverContext; + private final ExecutorService executor = newCachedThreadPool(daemonThreadsNamed(getClass().getSimpleName() + "-%s")); + private final ScheduledExecutorService scheduledExecutor = newScheduledThreadPool(2, daemonThreadsNamed(getClass().getSimpleName() + "-scheduledExecutor-%s")); private final TypeOperators typeOperators = new TypeOperators(); private final JoinCompiler joinCompiler = new JoinCompiler(typeOperators); - @BeforeMethod - public void setUp() - { - executor = newCachedThreadPool(daemonThreadsNamed(getClass().getSimpleName() + "-%s")); - scheduledExecutor = newScheduledThreadPool(2, daemonThreadsNamed(getClass().getSimpleName() + "-scheduledExecutor-%s")); - driverContext = createTaskContext(executor, scheduledExecutor, TEST_SESSION) - .addPipelineContext(0, true, true, false) - .addDriverContext(); - } - - @AfterMethod(alwaysRun = true) + @AfterAll public void tearDown() { executor.shutdownNow(); scheduledExecutor.shutdownNow(); } - @DataProvider - public Object[][] dataType() + @Test + public void testMarkDistinct() { - return new Object[][] {{VARCHAR}, {BIGINT}}; + testMarkDistinct(true, newDriverContext()); + testMarkDistinct(false, newDriverContext()); } - @DataProvider(name = "hashEnabledValues") - public static Object[][] hashEnabledValuesProvider() - { - return new Object[][] {{true}, {false}}; - } - - @Test(dataProvider = "hashEnabledValues") - public void testMarkDistinct(boolean hashEnabled) + private void testMarkDistinct(boolean hashEnabled, DriverContext driverContext) { RowPagesBuilder rowPagesBuilder = rowPagesBuilder(hashEnabled, Ints.asList(0), BIGINT); List input = rowPagesBuilder @@ -116,8 +102,14 @@ public void testMarkDistinct(boolean hashEnabled) OperatorAssertion.assertOperatorEqualsIgnoreOrder(operatorFactory, driverContext, input, expected.build(), hashEnabled, Optional.of(1)); } - @Test(dataProvider = "hashEnabledValues") - public void testRleDistinctMask(boolean hashEnabled) + @Test + public void testRleDistinctMask() + { + testRleDistinctMask(true, newDriverContext()); + testRleDistinctMask(false, newDriverContext()); + } + + private void testRleDistinctMask(boolean hashEnabled, DriverContext driverContext) { RowPagesBuilder rowPagesBuilder = rowPagesBuilder(hashEnabled, Ints.asList(0), BIGINT); List inputs = rowPagesBuilder @@ -180,8 +172,14 @@ public void testRleDistinctMask(boolean hashEnabled) } } - @Test(dataProvider = "dataType") - public void testMemoryReservationYield(Type type) + @Test + public void testMemoryReservationYield() + { + testMemoryReservationYield(BIGINT); + testMemoryReservationYield(VARCHAR); + } + + private void testMemoryReservationYield(Type type) { List input = createPagesWithDistinctHashKeys(type, 6_000, 600); @@ -202,4 +200,11 @@ public void testMemoryReservationYield(Type type) } assertThat(count).isEqualTo(6_000 * 600); } + + private DriverContext newDriverContext() + { + return createTaskContext(executor, scheduledExecutor, TEST_SESSION) + .addPipelineContext(0, true, true, false) + .addDriverContext(); + } } diff --git a/core/trino-main/src/test/java/io/trino/operator/TestOrderByOperator.java b/core/trino-main/src/test/java/io/trino/operator/TestOrderByOperator.java index ed23a1d2860dd..2fbe0ac967933 100644 --- a/core/trino-main/src/test/java/io/trino/operator/TestOrderByOperator.java +++ b/core/trino-main/src/test/java/io/trino/operator/TestOrderByOperator.java @@ -23,10 +23,10 @@ import io.trino.sql.planner.plan.PlanNodeId; import io.trino.testing.MaterializedResult; import io.trino.testing.TestingTaskContext; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.parallel.Execution; import java.util.List; import java.util.Optional; @@ -53,45 +53,38 @@ import static java.util.concurrent.Executors.newScheduledThreadPool; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; +import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; -@Test(singleThreaded = true) +@TestInstance(PER_CLASS) +@Execution(CONCURRENT) public class TestOrderByOperator { - private ExecutorService executor; - private ScheduledExecutorService scheduledExecutor; - private DummySpillerFactory spillerFactory; + private final ExecutorService executor = newCachedThreadPool(daemonThreadsNamed(getClass().getSimpleName() + "-%s")); + private final ScheduledExecutorService scheduledExecutor = newScheduledThreadPool(2, daemonThreadsNamed(getClass().getSimpleName() + "-scheduledExecutor-%s")); private final TypeOperators typeOperators = new TypeOperators(); - @DataProvider - public static Object[][] spillEnabled() + @AfterAll + public void tearDown() { - return new Object[][] { - {false, false, 0}, - {true, false, 8}, - {true, true, 8}, - {true, false, 0}, - {true, true, 0}}; + executor.shutdownNow(); + scheduledExecutor.shutdownNow(); } - @BeforeMethod - public void setUp() + @Test + public void testMultipleOutputPages() { - executor = newCachedThreadPool(daemonThreadsNamed(getClass().getSimpleName() + "-%s")); - scheduledExecutor = newScheduledThreadPool(2, daemonThreadsNamed(getClass().getSimpleName() + "-scheduledExecutor-%s")); - spillerFactory = new DummySpillerFactory(); + testMultipleOutputPages(false, false, 0); + testMultipleOutputPages(true, false, 8); + testMultipleOutputPages(true, true, 8); + testMultipleOutputPages(true, false, 0); + testMultipleOutputPages(true, true, 0); } - @AfterMethod(alwaysRun = true) - public void tearDown() + private void testMultipleOutputPages(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) { - executor.shutdownNow(); - scheduledExecutor.shutdownNow(); - spillerFactory = null; - } + DummySpillerFactory spillerFactory = new DummySpillerFactory(); - @Test(dataProvider = "spillEnabled") - public void testMultipleOutputPages(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) - { // make operator produce multiple pages during finish phase int numberOfRows = 80_000; List input = rowPagesBuilder(BIGINT, DOUBLE) @@ -129,8 +122,17 @@ public void testMultipleOutputPages(boolean spillEnabled, boolean revokeMemoryWh .isTrue(); } - @Test(dataProvider = "spillEnabled") - public void testSingleFieldKey(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) + @Test + public void testSingleFieldKey() + { + testSingleFieldKey(false, false, 0); + testSingleFieldKey(true, false, 8); + testSingleFieldKey(true, true, 8); + testSingleFieldKey(true, false, 0); + testSingleFieldKey(true, true, 0); + } + + private void testSingleFieldKey(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) { List input = rowPagesBuilder(BIGINT, DOUBLE) .row(1L, 0.1) @@ -150,7 +152,7 @@ public void testSingleFieldKey(boolean spillEnabled, boolean revokeMemoryWhenAdd ImmutableList.of(ASC_NULLS_LAST), new PagesIndex.TestingFactory(false), spillEnabled, - Optional.of(spillerFactory), + Optional.of(new DummySpillerFactory()), new OrderingCompiler(typeOperators)); DriverContext driverContext = createDriverContext(memoryLimit); @@ -164,8 +166,17 @@ public void testSingleFieldKey(boolean spillEnabled, boolean revokeMemoryWhenAdd assertOperatorEquals(operatorFactory, driverContext, input, expected, revokeMemoryWhenAddingPages); } - @Test(dataProvider = "spillEnabled") - public void testMultiFieldKey(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) + @Test + public void testMultiFieldKey() + { + testMultiFieldKey(false, false, 0); + testMultiFieldKey(true, false, 8); + testMultiFieldKey(true, true, 8); + testMultiFieldKey(true, false, 0); + testMultiFieldKey(true, true, 0); + } + + private void testMultiFieldKey(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) { List input = rowPagesBuilder(VARCHAR, BIGINT) .row("a", 1L) @@ -185,7 +196,7 @@ public void testMultiFieldKey(boolean spillEnabled, boolean revokeMemoryWhenAddi ImmutableList.of(ASC_NULLS_LAST, DESC_NULLS_LAST), new PagesIndex.TestingFactory(false), spillEnabled, - Optional.of(spillerFactory), + Optional.of(new DummySpillerFactory()), new OrderingCompiler(typeOperators)); DriverContext driverContext = createDriverContext(memoryLimit); @@ -199,8 +210,17 @@ public void testMultiFieldKey(boolean spillEnabled, boolean revokeMemoryWhenAddi assertOperatorEquals(operatorFactory, driverContext, input, expected, revokeMemoryWhenAddingPages); } - @Test(dataProvider = "spillEnabled") - public void testReverseOrder(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) + @Test + public void testReverseOrder() + { + testReverseOrder(false, false, 0); + testReverseOrder(true, false, 8); + testReverseOrder(true, true, 8); + testReverseOrder(true, false, 0); + testReverseOrder(true, true, 0); + } + + private void testReverseOrder(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) { List input = rowPagesBuilder(BIGINT, DOUBLE) .row(1L, 0.1) @@ -220,7 +240,7 @@ public void testReverseOrder(boolean spillEnabled, boolean revokeMemoryWhenAddin ImmutableList.of(DESC_NULLS_LAST), new PagesIndex.TestingFactory(false), spillEnabled, - Optional.of(spillerFactory), + Optional.of(new DummySpillerFactory()), new OrderingCompiler(typeOperators)); DriverContext driverContext = createDriverContext(memoryLimit); @@ -259,7 +279,7 @@ public void testMemoryLimit() ImmutableList.of(ASC_NULLS_LAST), new PagesIndex.TestingFactory(false), false, - Optional.of(spillerFactory), + Optional.of(new DummySpillerFactory()), new OrderingCompiler(typeOperators)); assertThatThrownBy(() -> toPages(operatorFactory, driverContext, input)) diff --git a/core/trino-main/src/test/java/io/trino/operator/TestSimplePagesHashStrategy.java b/core/trino-main/src/test/java/io/trino/operator/TestSimplePagesHashStrategy.java index 379dcd0a1fcda..bb6957e05a7e1 100644 --- a/core/trino-main/src/test/java/io/trino/operator/TestSimplePagesHashStrategy.java +++ b/core/trino-main/src/test/java/io/trino/operator/TestSimplePagesHashStrategy.java @@ -37,7 +37,7 @@ public class TestSimplePagesHashStrategy @Test public void testHashRowWithIntegerType() { - Block block = new IntArrayBlock(1, Optional.empty(), new int[]{1234}); + Block block = new IntArrayBlock(1, Optional.empty(), new int[] {1234}); SimplePagesHashStrategy strategy = createSimplePagesHashStrategy(INTEGER, ImmutableList.of(block)); Page page = new Page(block); @@ -51,9 +51,9 @@ public void testHashRowWithMapType() MapType mapType = new MapType(INTEGER, INTEGER, new TypeOperators()); Block block = mapType.createBlockFromKeyValue( Optional.empty(), - new int[]{0, 1}, - new IntArrayBlock(1, Optional.empty(), new int[]{1234}), - new IntArrayBlock(1, Optional.empty(), new int[]{5678})); + new int[] {0, 1}, + new IntArrayBlock(1, Optional.empty(), new int[] {1234}), + new IntArrayBlock(1, Optional.empty(), new int[] {5678})); SimplePagesHashStrategy strategy = createSimplePagesHashStrategy(mapType, ImmutableList.of(block)); Page page = new Page(block); @@ -67,9 +67,9 @@ public void testRowEqualsRowWithIntegerType() { SimplePagesHashStrategy strategy = createSimplePagesHashStrategy(INTEGER, ImmutableList.of()); - Page leftPage = new Page(new IntArrayBlock(1, Optional.empty(), new int[]{1234})); - Page rightPage1 = new Page(new IntArrayBlock(1, Optional.empty(), new int[]{1234})); - Page rightPage2 = new Page(new IntArrayBlock(1, Optional.empty(), new int[]{5678})); + Page leftPage = new Page(new IntArrayBlock(1, Optional.empty(), new int[] {1234})); + Page rightPage1 = new Page(new IntArrayBlock(1, Optional.empty(), new int[] {1234})); + Page rightPage2 = new Page(new IntArrayBlock(1, Optional.empty(), new int[] {5678})); // This works because IntegerType is comparable. assertThat(strategy.rowEqualsRow(0, leftPage, 0, rightPage1)).isTrue(); @@ -84,21 +84,21 @@ public void testRowEqualsRowWithMapType() Page leftPage = new Page(mapType.createBlockFromKeyValue( Optional.empty(), - new int[]{0, 1}, - new IntArrayBlock(1, Optional.empty(), new int[]{1234}), - new IntArrayBlock(1, Optional.empty(), new int[]{5678}))); + new int[] {0, 1}, + new IntArrayBlock(1, Optional.empty(), new int[] {1234}), + new IntArrayBlock(1, Optional.empty(), new int[] {5678}))); Page rightPage1 = new Page(mapType.createBlockFromKeyValue( Optional.empty(), - new int[]{0, 1}, - new IntArrayBlock(1, Optional.empty(), new int[]{1234}), - new IntArrayBlock(1, Optional.empty(), new int[]{5678}))); + new int[] {0, 1}, + new IntArrayBlock(1, Optional.empty(), new int[] {1234}), + new IntArrayBlock(1, Optional.empty(), new int[] {5678}))); Page rightPage2 = new Page(mapType.createBlockFromKeyValue( Optional.empty(), - new int[]{0, 1}, - new IntArrayBlock(1, Optional.empty(), new int[]{1234}), - new IntArrayBlock(1, Optional.empty(), new int[]{1234}))); + new int[] {0, 1}, + new IntArrayBlock(1, Optional.empty(), new int[] {1234}), + new IntArrayBlock(1, Optional.empty(), new int[] {1234}))); // This works because MapType is comparable. assertThat(strategy.rowEqualsRow(0, leftPage, 0, rightPage1)).isTrue(); @@ -108,7 +108,7 @@ public void testRowEqualsRowWithMapType() @Test public void testCompareSortChannelPositionsWithIntegerType() { - Block block = new IntArrayBlock(3, Optional.empty(), new int[]{1234, 5678, 1234}); + Block block = new IntArrayBlock(3, Optional.empty(), new int[] {1234, 5678, 1234}); SimplePagesHashStrategy strategy = createSimplePagesHashStrategy(INTEGER, ImmutableList.of(block)); // This works because IntegerType is orderable. @@ -123,9 +123,9 @@ public void testCompareSortChannelPositionsWithMapType() MapType mapType = new MapType(INTEGER, INTEGER, new TypeOperators()); Block block = mapType.createBlockFromKeyValue( Optional.empty(), - new int[]{0, 1}, - new IntArrayBlock(1, Optional.empty(), new int[]{1234}), - new IntArrayBlock(1, Optional.empty(), new int[]{5678})); + new int[] {0, 1}, + new IntArrayBlock(1, Optional.empty(), new int[] {1234}), + new IntArrayBlock(1, Optional.empty(), new int[] {5678})); SimplePagesHashStrategy strategy = createSimplePagesHashStrategy(mapType, ImmutableList.of(block)); diff --git a/core/trino-main/src/test/java/io/trino/operator/TestTopNPeerGroupLookup.java b/core/trino-main/src/test/java/io/trino/operator/TestTopNPeerGroupLookup.java index d88a5cb00c7ab..a2c6c192026c3 100644 --- a/core/trino-main/src/test/java/io/trino/operator/TestTopNPeerGroupLookup.java +++ b/core/trino-main/src/test/java/io/trino/operator/TestTopNPeerGroupLookup.java @@ -13,13 +13,10 @@ */ package io.trino.operator; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import java.util.Arrays; -import java.util.List; -import static com.google.common.collect.Lists.cartesianProduct; import static org.assertj.core.api.Assertions.assertThat; public class TestTopNPeerGroupLookup @@ -41,28 +38,20 @@ public long hashCode(long rowId) private static final long UNMAPPED_GROUP_ID = Long.MIN_VALUE; private static final long DEFAULT_RETURN_VALUE = -1L; - @DataProvider - public static Object[][] parameters() + @Test + public void testCombinations() { - List expectedSizes = Arrays.asList(0, 1, 2, 3, 1_000); - List fillFactors = Arrays.asList(0.1f, 0.9f, 1f); - List totalGroupIds = Arrays.asList(1L, 10L); - List totalRowIds = Arrays.asList(1L, 1_000L); - - return to2DArray(cartesianProduct(expectedSizes, fillFactors, totalGroupIds, totalRowIds)); - } - - private static Object[][] to2DArray(List> nestedList) - { - Object[][] array = new Object[nestedList.size()][]; - for (int i = 0; i < nestedList.size(); i++) { - array[i] = nestedList.get(i).toArray(); + for (int expectedSize : Arrays.asList(0, 1, 2, 3, 1_000)) { + for (float fillFactor : Arrays.asList(0.1f, 0.9f, 1f)) { + testCombinations(expectedSize, fillFactor, 1L, 1L); + testCombinations(expectedSize, fillFactor, 10L, 1L); + testCombinations(expectedSize, fillFactor, 1L, 1_000L); + testCombinations(expectedSize, fillFactor, 10L, 1_000L); + } } - return array; } - @Test(dataProvider = "parameters") - public void testCombinations(int expectedSize, float fillFactor, long totalGroupIds, long totalRowIds) + private void testCombinations(int expectedSize, float fillFactor, long totalGroupIds, long totalRowIds) { TopNPeerGroupLookup lookup = new TopNPeerGroupLookup(expectedSize, fillFactor, HASH_STRATEGY, UNMAPPED_GROUP_ID, DEFAULT_RETURN_VALUE); diff --git a/core/trino-main/src/test/java/io/trino/operator/TestWindowOperator.java b/core/trino-main/src/test/java/io/trino/operator/TestWindowOperator.java index f9c3e5f6f4aaa..1c55c5d36569e 100644 --- a/core/trino-main/src/test/java/io/trino/operator/TestWindowOperator.java +++ b/core/trino-main/src/test/java/io/trino/operator/TestWindowOperator.java @@ -38,10 +38,10 @@ import io.trino.sql.planner.plan.PlanNodeId; import io.trino.testing.MaterializedResult; import io.trino.testing.TestingTaskContext; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.parallel.Execution; import java.util.List; import java.util.Optional; @@ -72,8 +72,12 @@ import static java.util.concurrent.Executors.newCachedThreadPool; import static java.util.concurrent.Executors.newScheduledThreadPool; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; +import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; -@Test(singleThreaded = true) +@TestInstance(PER_CLASS) +@Execution(CONCURRENT) public class TestWindowOperator { private static final TypeOperators TYPE_OPERATORS_CACHE = new TypeOperators(); @@ -100,40 +104,30 @@ public class TestWindowOperator private static final List LEAD = ImmutableList.of( window(new ReflectionWindowFunctionSupplier(3, LeadFunction.class), VARCHAR, UNBOUNDED_FRAME, false, ImmutableList.of(), 1, 3, 4)); - private ExecutorService executor; - private ScheduledExecutorService scheduledExecutor; - private DummySpillerFactory spillerFactory; + private final ExecutorService executor = newCachedThreadPool(daemonThreadsNamed(getClass().getSimpleName() + "-%s")); + private final ScheduledExecutorService scheduledExecutor = newScheduledThreadPool(2, daemonThreadsNamed(getClass().getSimpleName() + "-scheduledExecutor-%s")); - @BeforeMethod - public void setUp() - { - executor = newCachedThreadPool(daemonThreadsNamed(getClass().getSimpleName() + "-%s")); - scheduledExecutor = newScheduledThreadPool(2, daemonThreadsNamed(getClass().getSimpleName() + "-scheduledExecutor-%s")); - spillerFactory = new DummySpillerFactory(); - } - - @AfterMethod(alwaysRun = true) + @AfterAll public void tearDown() { executor.shutdownNow(); scheduledExecutor.shutdownNow(); - spillerFactory = null; } - @DataProvider - public static Object[][] spillEnabled() + @Test + public void testMultipleOutputPages() { - return new Object[][] { - {false, false, 0}, - {true, false, 8}, - {true, true, 8}, - {true, false, 0}, - {true, true, 0}}; + testMultipleOutputPages(false, false, 0); + testMultipleOutputPages(true, false, 8); + testMultipleOutputPages(true, true, 8); + testMultipleOutputPages(true, false, 0); + testMultipleOutputPages(true, true, 0); } - @Test(dataProvider = "spillEnabled") - public void testMultipleOutputPages(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) + private void testMultipleOutputPages(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) { + DummySpillerFactory spillerFactory = new DummySpillerFactory(); + // make operator produce multiple pages during finish phase int numberOfRows = 80_000; List input = rowPagesBuilder(BIGINT, DOUBLE) @@ -147,6 +141,7 @@ public void testMultipleOutputPages(boolean spillEnabled, boolean revokeMemoryWh Ints.asList(), Ints.asList(0), ImmutableList.copyOf(new SortOrder[] {SortOrder.DESC_NULLS_FIRST}), + spillerFactory, spillEnabled); DriverContext driverContext = createDriverContext(memoryLimit); @@ -167,8 +162,17 @@ public void testMultipleOutputPages(boolean spillEnabled, boolean revokeMemoryWh .isTrue(); } - @Test(dataProvider = "spillEnabled") - public void testRowNumber(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) + @Test + public void testRowNumber() + { + testRowNumber(false, false, 0); + testRowNumber(true, false, 8); + testRowNumber(true, true, 8); + testRowNumber(true, false, 0); + testRowNumber(true, true, 0); + } + + private void testRowNumber(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) { List input = rowPagesBuilder(BIGINT, DOUBLE) .row(2L, 0.3) @@ -186,6 +190,7 @@ public void testRowNumber(boolean spillEnabled, boolean revokeMemoryWhenAddingPa Ints.asList(), Ints.asList(0), ImmutableList.copyOf(new SortOrder[] {SortOrder.ASC_NULLS_LAST}), + new DummySpillerFactory(), spillEnabled); DriverContext driverContext = createDriverContext(memoryLimit); @@ -200,8 +205,17 @@ public void testRowNumber(boolean spillEnabled, boolean revokeMemoryWhenAddingPa assertOperatorEquals(operatorFactory, driverContext, input, expected, revokeMemoryWhenAddingPages); } - @Test(dataProvider = "spillEnabled") - public void testRowNumberPartition(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) + @Test + public void testRowNumberPartition() + { + testRowNumberPartition(false, false, 0); + testRowNumberPartition(true, false, 8); + testRowNumberPartition(true, true, 8); + testRowNumberPartition(true, false, 0); + testRowNumberPartition(true, true, 0); + } + + private void testRowNumberPartition(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) { List input = rowPagesBuilder(VARCHAR, BIGINT, DOUBLE, BOOLEAN) .row("b", -1L, -0.1, true) @@ -219,6 +233,7 @@ public void testRowNumberPartition(boolean spillEnabled, boolean revokeMemoryWhe Ints.asList(0), Ints.asList(1), ImmutableList.copyOf(new SortOrder[] {SortOrder.ASC_NULLS_LAST}), + new DummySpillerFactory(), spillEnabled); DriverContext driverContext = createDriverContext(memoryLimit); @@ -255,6 +270,7 @@ public void testRowNumberArbitrary() Ints.asList(), Ints.asList(), ImmutableList.copyOf(new SortOrder[] {}), + new DummySpillerFactory(), false); DriverContext driverContext = createDriverContext(); @@ -294,6 +310,7 @@ public void testRowNumberArbitraryWithSpill() Ints.asList(), Ints.asList(), ImmutableList.copyOf(new SortOrder[] {}), + new DummySpillerFactory(), true); DriverContext driverContext = createDriverContext(); @@ -311,7 +328,16 @@ public void testRowNumberArbitraryWithSpill() assertOperatorEquals(operatorFactory, driverContext, input, expected); } - @Test(dataProvider = "spillEnabled") + @Test + public void testDistinctPartitionAndPeers() + { + testDistinctPartitionAndPeers(false, false, 0); + testDistinctPartitionAndPeers(true, false, 8); + testDistinctPartitionAndPeers(true, true, 8); + testDistinctPartitionAndPeers(true, false, 0); + testDistinctPartitionAndPeers(true, true, 0); + } + public void testDistinctPartitionAndPeers(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) { List input = rowPagesBuilder(DOUBLE, DOUBLE) @@ -344,6 +370,7 @@ public void testDistinctPartitionAndPeers(boolean spillEnabled, boolean revokeMe Ints.asList(0), Ints.asList(1), ImmutableList.copyOf(new SortOrder[] {SortOrder.ASC_NULLS_LAST}), + new DummySpillerFactory(), spillEnabled); DriverContext driverContext = createDriverContext(memoryLimit); @@ -372,35 +399,49 @@ public void testDistinctPartitionAndPeers(boolean spillEnabled, boolean revokeMe assertOperatorEquals(operatorFactory, driverContext, input, expected, revokeMemoryWhenAddingPages); } - @Test(expectedExceptions = ExceededMemoryLimitException.class, expectedExceptionsMessageRegExp = "Query exceeded per-node memory limit of 10B.*") + @Test public void testMemoryLimit() { - List input = rowPagesBuilder(BIGINT, DOUBLE) - .row(1L, 0.1) - .row(2L, 0.2) - .pageBreak() - .row(-1L, -0.1) - .row(4L, 0.4) - .build(); - - DriverContext driverContext = createTaskContext(executor, scheduledExecutor, TEST_SESSION, DataSize.ofBytes(10)) - .addPipelineContext(0, true, true, false) - .addDriverContext(); - - WindowOperatorFactory operatorFactory = createFactoryUnbounded( - ImmutableList.of(BIGINT, DOUBLE), - Ints.asList(1), - ROW_NUMBER, - Ints.asList(), - Ints.asList(0), - ImmutableList.copyOf(new SortOrder[] {SortOrder.ASC_NULLS_LAST}), - false); + assertThatThrownBy(() -> { + List input = rowPagesBuilder(BIGINT, DOUBLE) + .row(1L, 0.1) + .row(2L, 0.2) + .pageBreak() + .row(-1L, -0.1) + .row(4L, 0.4) + .build(); + + DriverContext driverContext = createTaskContext(executor, scheduledExecutor, TEST_SESSION, DataSize.ofBytes(10)) + .addPipelineContext(0, true, true, false) + .addDriverContext(); + + WindowOperatorFactory operatorFactory = createFactoryUnbounded( + ImmutableList.of(BIGINT, DOUBLE), + Ints.asList(1), + ROW_NUMBER, + Ints.asList(), + Ints.asList(0), + ImmutableList.copyOf(new SortOrder[] {SortOrder.ASC_NULLS_LAST}), + new DummySpillerFactory(), + false); + + toPages(operatorFactory, driverContext, input); + }) + .isInstanceOf(ExceededMemoryLimitException.class) + .hasMessageMatching("Query exceeded per-node memory limit of 10B.*"); + } - toPages(operatorFactory, driverContext, input); + @Test + public void testFirstValuePartition() + { + testFirstValuePartition(false, false, 0); + testFirstValuePartition(true, false, 8); + testFirstValuePartition(true, true, 8); + testFirstValuePartition(true, false, 0); + testFirstValuePartition(true, true, 0); } - @Test(dataProvider = "spillEnabled") - public void testFirstValuePartition(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) + private void testFirstValuePartition(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) { List input = rowPagesBuilder(VARCHAR, VARCHAR, BIGINT, BOOLEAN, VARCHAR) .row("b", "A1", 1L, true, "") @@ -419,6 +460,7 @@ public void testFirstValuePartition(boolean spillEnabled, boolean revokeMemoryWh Ints.asList(0), Ints.asList(2), ImmutableList.copyOf(new SortOrder[] {SortOrder.ASC_NULLS_LAST}), + new DummySpillerFactory(), spillEnabled); DriverContext driverContext = createDriverContext(memoryLimit); @@ -454,6 +496,7 @@ public void testClose() Ints.asList(0), Ints.asList(1), ImmutableList.copyOf(new SortOrder[] {SortOrder.ASC_NULLS_LAST}), + new DummySpillerFactory(), false); DriverContext driverContext = createDriverContext(1000); @@ -469,8 +512,17 @@ public void testClose() operator.close(); } - @Test(dataProvider = "spillEnabled") - public void testLastValuePartition(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) + @Test + public void testLastValuePartition() + { + testLastValuePartition(false, false, 0); + testLastValuePartition(true, false, 8); + testLastValuePartition(true, true, 8); + testLastValuePartition(true, false, 0); + testLastValuePartition(true, true, 0); + } + + private void testLastValuePartition(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) { List input = rowPagesBuilder(VARCHAR, VARCHAR, BIGINT, BOOLEAN, VARCHAR) .row("b", "A1", 1L, true, "") @@ -490,6 +542,7 @@ public void testLastValuePartition(boolean spillEnabled, boolean revokeMemoryWhe Ints.asList(0), Ints.asList(2), ImmutableList.copyOf(new SortOrder[] {SortOrder.ASC_NULLS_LAST}), + new DummySpillerFactory(), spillEnabled); MaterializedResult expected = resultBuilder(driverContext.getSession(), VARCHAR, VARCHAR, BIGINT, BOOLEAN, VARCHAR) @@ -503,8 +556,17 @@ public void testLastValuePartition(boolean spillEnabled, boolean revokeMemoryWhe assertOperatorEquals(operatorFactory, driverContext, input, expected, revokeMemoryWhenAddingPages); } - @Test(dataProvider = "spillEnabled") - public void testNthValuePartition(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) + @Test + public void testNthValuePartition() + { + testNthValuePartition(false, false, 0); + testNthValuePartition(true, false, 8); + testNthValuePartition(true, true, 8); + testNthValuePartition(true, false, 0); + testNthValuePartition(true, true, 0); + } + + private void testNthValuePartition(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) { List input = rowPagesBuilder(VARCHAR, VARCHAR, BIGINT, BIGINT, BOOLEAN, VARCHAR) .row("b", "A1", 1L, 2L, true, "") @@ -523,6 +585,7 @@ public void testNthValuePartition(boolean spillEnabled, boolean revokeMemoryWhen Ints.asList(0), Ints.asList(2), ImmutableList.copyOf(new SortOrder[] {SortOrder.ASC_NULLS_LAST}), + new DummySpillerFactory(), spillEnabled); DriverContext driverContext = createDriverContext(memoryLimit); @@ -538,8 +601,17 @@ public void testNthValuePartition(boolean spillEnabled, boolean revokeMemoryWhen assertOperatorEquals(operatorFactory, driverContext, input, expected, revokeMemoryWhenAddingPages); } - @Test(dataProvider = "spillEnabled") - public void testLagPartition(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) + @Test + public void testLagPartition() + { + testLagPartition(false, false, 0); + testLagPartition(true, false, 8); + testLagPartition(true, true, 8); + testLagPartition(true, false, 0); + testLagPartition(true, true, 0); + } + + private void testLagPartition(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) { List input = rowPagesBuilder(VARCHAR, VARCHAR, BIGINT, BIGINT, VARCHAR, BOOLEAN, VARCHAR) .row("b", "A1", 1L, 1L, "D", true, "") @@ -558,6 +630,7 @@ public void testLagPartition(boolean spillEnabled, boolean revokeMemoryWhenAddin Ints.asList(0), Ints.asList(2), ImmutableList.copyOf(new SortOrder[] {SortOrder.ASC_NULLS_LAST}), + new DummySpillerFactory(), spillEnabled); DriverContext driverContext = createDriverContext(memoryLimit); @@ -573,8 +646,17 @@ public void testLagPartition(boolean spillEnabled, boolean revokeMemoryWhenAddin assertOperatorEquals(operatorFactory, driverContext, input, expected, revokeMemoryWhenAddingPages); } - @Test(dataProvider = "spillEnabled") - public void testLeadPartition(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) + @Test + public void testLeadPartition() + { + testLeadPartition(false, false, 0); + testLeadPartition(true, false, 8); + testLeadPartition(true, true, 8); + testLeadPartition(true, false, 0); + testLeadPartition(true, true, 0); + } + + private void testLeadPartition(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) { List input = rowPagesBuilder(VARCHAR, VARCHAR, BIGINT, BIGINT, VARCHAR, BOOLEAN, VARCHAR) .row("b", "A1", 1L, 1L, "D", true, "") @@ -593,6 +675,7 @@ public void testLeadPartition(boolean spillEnabled, boolean revokeMemoryWhenAddi Ints.asList(0), Ints.asList(2), ImmutableList.copyOf(new SortOrder[] {SortOrder.ASC_NULLS_LAST}), + new DummySpillerFactory(), spillEnabled); DriverContext driverContext = createDriverContext(memoryLimit); @@ -608,8 +691,17 @@ public void testLeadPartition(boolean spillEnabled, boolean revokeMemoryWhenAddi assertOperatorEquals(operatorFactory, driverContext, input, expected, revokeMemoryWhenAddingPages); } - @Test(dataProvider = "spillEnabled") - public void testPartiallyPreGroupedPartitionWithEmptyInput(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) + @Test + public void testPartiallyPreGroupedPartitionWithEmptyInput() + { + testPartiallyPreGroupedPartitionWithEmptyInput(false, false, 0); + testPartiallyPreGroupedPartitionWithEmptyInput(true, false, 8); + testPartiallyPreGroupedPartitionWithEmptyInput(true, true, 8); + testPartiallyPreGroupedPartitionWithEmptyInput(true, false, 0); + testPartiallyPreGroupedPartitionWithEmptyInput(true, true, 0); + } + + private void testPartiallyPreGroupedPartitionWithEmptyInput(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) { List input = rowPagesBuilder(BIGINT, VARCHAR, BIGINT, VARCHAR) .pageBreak() @@ -625,6 +717,7 @@ public void testPartiallyPreGroupedPartitionWithEmptyInput(boolean spillEnabled, Ints.asList(3), ImmutableList.of(SortOrder.ASC_NULLS_LAST), 0, + new DummySpillerFactory(), spillEnabled); DriverContext driverContext = createDriverContext(memoryLimit); @@ -634,8 +727,17 @@ public void testPartiallyPreGroupedPartitionWithEmptyInput(boolean spillEnabled, assertOperatorEquals(operatorFactory, driverContext, input, expected, revokeMemoryWhenAddingPages); } - @Test(dataProvider = "spillEnabled") - public void testPartiallyPreGroupedPartition(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) + @Test + public void testPartiallyPreGroupedPartition() + { + testPartiallyPreGroupedPartition(false, false, 0); + testPartiallyPreGroupedPartition(true, false, 8); + testPartiallyPreGroupedPartition(true, true, 8); + testPartiallyPreGroupedPartition(true, false, 0); + testPartiallyPreGroupedPartition(true, true, 0); + } + + private void testPartiallyPreGroupedPartition(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) { List input = rowPagesBuilder(BIGINT, VARCHAR, BIGINT, VARCHAR) .pageBreak() @@ -659,6 +761,7 @@ public void testPartiallyPreGroupedPartition(boolean spillEnabled, boolean revok Ints.asList(3), ImmutableList.of(SortOrder.ASC_NULLS_LAST), 0, + new DummySpillerFactory(), spillEnabled); DriverContext driverContext = createDriverContext(memoryLimit); @@ -674,8 +777,17 @@ public void testPartiallyPreGroupedPartition(boolean spillEnabled, boolean revok assertOperatorEqualsIgnoreOrder(operatorFactory, driverContext, input, expected, revokeMemoryWhenAddingPages); } - @Test(dataProvider = "spillEnabled") - public void testFullyPreGroupedPartition(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) + @Test + public void testFullyPreGroupedPartition() + { + testFullyPreGroupedPartition(false, false, 0); + testFullyPreGroupedPartition(true, false, 8); + testFullyPreGroupedPartition(true, true, 8); + testFullyPreGroupedPartition(true, false, 0); + testFullyPreGroupedPartition(true, true, 0); + } + + private void testFullyPreGroupedPartition(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) { List input = rowPagesBuilder(BIGINT, VARCHAR, BIGINT, VARCHAR) .pageBreak() @@ -700,6 +812,7 @@ public void testFullyPreGroupedPartition(boolean spillEnabled, boolean revokeMem Ints.asList(3), ImmutableList.of(SortOrder.ASC_NULLS_LAST), 0, + new DummySpillerFactory(), spillEnabled); DriverContext driverContext = createDriverContext(memoryLimit); @@ -716,8 +829,17 @@ public void testFullyPreGroupedPartition(boolean spillEnabled, boolean revokeMem assertOperatorEqualsIgnoreOrder(operatorFactory, driverContext, input, expected, revokeMemoryWhenAddingPages); } - @Test(dataProvider = "spillEnabled") - public void testFullyPreGroupedAndPartiallySortedPartition(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) + @Test + public void testFullyPreGroupedAndPartiallySortedPartition() + { + testFullyPreGroupedAndPartiallySortedPartition(false, false, 0); + testFullyPreGroupedAndPartiallySortedPartition(true, false, 8); + testFullyPreGroupedAndPartiallySortedPartition(true, true, 8); + testFullyPreGroupedAndPartiallySortedPartition(true, false, 0); + testFullyPreGroupedAndPartiallySortedPartition(true, true, 0); + } + + private void testFullyPreGroupedAndPartiallySortedPartition(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) { List input = rowPagesBuilder(BIGINT, VARCHAR, BIGINT, VARCHAR) .pageBreak() @@ -743,6 +865,7 @@ public void testFullyPreGroupedAndPartiallySortedPartition(boolean spillEnabled, Ints.asList(3, 2), ImmutableList.of(SortOrder.ASC_NULLS_LAST, SortOrder.ASC_NULLS_LAST), 1, + new DummySpillerFactory(), spillEnabled); DriverContext driverContext = createDriverContext(memoryLimit); @@ -760,8 +883,17 @@ public void testFullyPreGroupedAndPartiallySortedPartition(boolean spillEnabled, assertOperatorEqualsIgnoreOrder(operatorFactory, driverContext, input, expected, revokeMemoryWhenAddingPages); } - @Test(dataProvider = "spillEnabled") - public void testFullyPreGroupedAndFullySortedPartition(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) + @Test + public void testFullyPreGroupedAndFullySortedPartition() + { + testFullyPreGroupedAndFullySortedPartition(false, false, 0); + testFullyPreGroupedAndFullySortedPartition(true, false, 8); + testFullyPreGroupedAndFullySortedPartition(true, true, 8); + testFullyPreGroupedAndFullySortedPartition(true, false, 0); + testFullyPreGroupedAndFullySortedPartition(true, true, 0); + } + + private void testFullyPreGroupedAndFullySortedPartition(boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimit) { List input = rowPagesBuilder(BIGINT, VARCHAR, BIGINT, VARCHAR) .pageBreak() @@ -787,6 +919,7 @@ public void testFullyPreGroupedAndFullySortedPartition(boolean spillEnabled, boo Ints.asList(3), ImmutableList.of(SortOrder.ASC_NULLS_LAST), 1, + new DummySpillerFactory(), spillEnabled); DriverContext driverContext = createDriverContext(memoryLimit); @@ -844,6 +977,7 @@ private WindowOperatorFactory createFactoryUnbounded( List partitionChannels, List sortChannels, List sortOrder, + SpillerFactory spillerFactory, boolean spillEnabled) { return createFactoryUnbounded( @@ -855,6 +989,7 @@ private WindowOperatorFactory createFactoryUnbounded( sortChannels, sortOrder, 0, + spillerFactory, spillEnabled); } @@ -867,6 +1002,7 @@ private WindowOperatorFactory createFactoryUnbounded( List sortChannels, List sortOrder, int preSortedChannelPrefix, + DummySpillerFactory spillerFactory, boolean spillEnabled) { return new WindowOperatorFactory( diff --git a/core/trino-main/src/test/java/io/trino/operator/TestingOperatorContext.java b/core/trino-main/src/test/java/io/trino/operator/TestingOperatorContext.java index db29308522379..677e71e6540c7 100644 --- a/core/trino-main/src/test/java/io/trino/operator/TestingOperatorContext.java +++ b/core/trino-main/src/test/java/io/trino/operator/TestingOperatorContext.java @@ -42,6 +42,7 @@ public static OperatorContext create(ScheduledExecutorService scheduledExecutor) taskContext, executor, scheduledExecutor, + scheduledExecutor, pipelineMemoryContext, false, false, @@ -51,6 +52,7 @@ public static OperatorContext create(ScheduledExecutorService scheduledExecutor) pipelineContext, executor, scheduledExecutor, + scheduledExecutor, pipelineMemoryContext, 0L); diff --git a/core/trino-main/src/test/java/io/trino/operator/aggregation/AbstractTestApproximateCountDistinct.java b/core/trino-main/src/test/java/io/trino/operator/aggregation/AbstractTestApproximateCountDistinct.java index a697a552f7e5d..6ee64e55a1c96 100644 --- a/core/trino-main/src/test/java/io/trino/operator/aggregation/AbstractTestApproximateCountDistinct.java +++ b/core/trino-main/src/test/java/io/trino/operator/aggregation/AbstractTestApproximateCountDistinct.java @@ -21,8 +21,7 @@ import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import java.util.ArrayList; import java.util.Collections; @@ -52,35 +51,35 @@ protected int getUniqueValuesCount() return 20000; } - @DataProvider(name = "provideStandardErrors") - public Object[][] provideStandardErrors() + @Test + public void testNoPositions() { - return new Object[][] { - {0.0230}, // 2k buckets - {0.0115}, // 8k buckets - }; + assertCount(ImmutableList.of(), 0.0230, 0); + assertCount(ImmutableList.of(), 0.0115, 0); } - @Test(dataProvider = "provideStandardErrors") - public void testNoPositions(double maxStandardError) + @Test + public void testSinglePosition() { - assertCount(ImmutableList.of(), maxStandardError, 0); + assertCount(ImmutableList.of(randomValue()), 0.0230, 1); + assertCount(ImmutableList.of(randomValue()), 0.0115, 1); } - @Test(dataProvider = "provideStandardErrors") - public void testSinglePosition(double maxStandardError) + @Test + public void testAllPositionsNull() { - assertCount(ImmutableList.of(randomValue()), maxStandardError, 1); + assertCount(Collections.nCopies(100, null), 0.0230, 0); + assertCount(Collections.nCopies(100, null), 0.0115, 0); } - @Test(dataProvider = "provideStandardErrors") - public void testAllPositionsNull(double maxStandardError) + @Test + public void testMixedNullsAndNonNulls() { - assertCount(Collections.nCopies(100, null), maxStandardError, 0); + testMixedNullsAndNonNulls(0.0230); + testMixedNullsAndNonNulls(0.0115); } - @Test(dataProvider = "provideStandardErrors") - public void testMixedNullsAndNonNulls(double maxStandardError) + private void testMixedNullsAndNonNulls(double maxStandardError) { int uniques = getUniqueValuesCount(); List baseline = createRandomSample(uniques, (int) (uniques * 1.5)); @@ -96,8 +95,14 @@ public void testMixedNullsAndNonNulls(double maxStandardError) assertCount(mixed, maxStandardError, estimateGroupByCount(baseline, maxStandardError)); } - @Test(dataProvider = "provideStandardErrors") - public void testMultiplePositions(double maxStandardError) + @Test + public void testMultiplePositions() + { + testMultiplePositions(0.0230); + testMultiplePositions(0.0115); + } + + private void testMultiplePositions(double maxStandardError) { DescriptiveStatistics stats = new DescriptiveStatistics(); @@ -116,8 +121,14 @@ public void testMultiplePositions(double maxStandardError) assertLessThan(stats.getStandardDeviation(), 1.0e-2 + maxStandardError); } - @Test(dataProvider = "provideStandardErrors") - public void testMultiplePositionsPartial(double maxStandardError) + @Test + public void testMultiplePositionsPartial() + { + testMultiplePositionsPartial(0.0230); + testMultiplePositionsPartial(0.0115); + } + + private void testMultiplePositionsPartial(double maxStandardError) { for (int i = 0; i < 100; ++i) { int uniques = ThreadLocalRandom.current().nextInt(getUniqueValuesCount()) + 1; diff --git a/core/trino-main/src/test/java/io/trino/operator/aggregation/TestAggregationMaskCompiler.java b/core/trino-main/src/test/java/io/trino/operator/aggregation/TestAggregationMaskCompiler.java index 322028b2075f2..f9623ca027487 100644 --- a/core/trino-main/src/test/java/io/trino/operator/aggregation/TestAggregationMaskCompiler.java +++ b/core/trino-main/src/test/java/io/trino/operator/aggregation/TestAggregationMaskCompiler.java @@ -19,8 +19,7 @@ import io.trino.spi.block.IntArrayBlock; import io.trino.spi.block.RunLengthEncodedBlock; import io.trino.spi.block.ShortArrayBlock; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import java.util.Arrays; import java.util.Optional; @@ -32,23 +31,24 @@ public class TestAggregationMaskCompiler { - @DataProvider - public Object[][] maskBuilderSuppliers() + private static final Supplier INTERPRETED_MASK_BUILDER_SUPPLIER = () -> new InterpretedAggregationMaskBuilder(1); + private static final Supplier COMPILED_MASK_BUILDER_SUPPLIER = () -> { + try { + return generateAggregationMaskBuilder(1).newInstance(); + } + catch (ReflectiveOperationException e) { + throw new RuntimeException(e); + } + }; + + @Test + public void testSupplier() { - Supplier interpretedMaskBuilderSupplier = () -> new InterpretedAggregationMaskBuilder(1); - Supplier compiledMaskBuilderSupplier = () -> { - try { - return generateAggregationMaskBuilder(1).newInstance(); - } - catch (ReflectiveOperationException e) { - throw new RuntimeException(e); - } - }; - return new Object[][] {{compiledMaskBuilderSupplier}, {interpretedMaskBuilderSupplier}}; + testSupplier(INTERPRETED_MASK_BUILDER_SUPPLIER); + testSupplier(COMPILED_MASK_BUILDER_SUPPLIER); } - @Test(dataProvider = "maskBuilderSuppliers") - public void testSupplier(Supplier maskBuilderSupplier) + private void testSupplier(Supplier maskBuilderSupplier) { // each builder produced from a supplier could be completely independent assertThat(maskBuilderSupplier.get()).isNotSameAs(maskBuilderSupplier.get()); @@ -74,8 +74,14 @@ public void testSupplier(Supplier maskBuilderSupplier) .isSameAs(maskBuilder.buildAggregationMask(pageWithNulls, Optional.empty()).getSelectedPositions()); } - @Test(dataProvider = "maskBuilderSuppliers") - public void testUnsetNulls(Supplier maskBuilderSupplier) + @Test + public void testUnsetNulls() + { + testUnsetNulls(INTERPRETED_MASK_BUILDER_SUPPLIER); + testUnsetNulls(COMPILED_MASK_BUILDER_SUPPLIER); + } + + private void testUnsetNulls(Supplier maskBuilderSupplier) { AggregationMaskBuilder maskBuilder = maskBuilderSupplier.get(); AggregationMask aggregationMask = maskBuilder.buildAggregationMask(buildSingleColumnPage(0), Optional.empty()); @@ -107,8 +113,14 @@ public void testUnsetNulls(Supplier maskBuilderSupplier) } } - @Test(dataProvider = "maskBuilderSuppliers") - public void testApplyMask(Supplier maskBuilderSupplier) + @Test + public void testApplyMask() + { + testApplyMask(INTERPRETED_MASK_BUILDER_SUPPLIER); + testApplyMask(COMPILED_MASK_BUILDER_SUPPLIER); + } + + private void testApplyMask(Supplier maskBuilderSupplier) { AggregationMaskBuilder maskBuilder = maskBuilderSupplier.get(); @@ -135,8 +147,14 @@ public void testApplyMask(Supplier maskBuilderSupplier) } } - @Test(dataProvider = "maskBuilderSuppliers") - public void testApplyMaskNulls(Supplier maskBuilderSupplier) + @Test + public void testApplyMaskNulls() + { + testApplyMaskNulls(INTERPRETED_MASK_BUILDER_SUPPLIER); + testApplyMaskNulls(COMPILED_MASK_BUILDER_SUPPLIER); + } + + private void testApplyMaskNulls(Supplier maskBuilderSupplier) { AggregationMaskBuilder maskBuilder = maskBuilderSupplier.get(); diff --git a/core/trino-main/src/test/java/io/trino/operator/aggregation/TestApproximateCountDistinctBoolean.java b/core/trino-main/src/test/java/io/trino/operator/aggregation/TestApproximateCountDistinctBoolean.java index a766c9c019e56..ede51acce9688 100644 --- a/core/trino-main/src/test/java/io/trino/operator/aggregation/TestApproximateCountDistinctBoolean.java +++ b/core/trino-main/src/test/java/io/trino/operator/aggregation/TestApproximateCountDistinctBoolean.java @@ -14,13 +14,10 @@ package io.trino.operator.aggregation; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; import com.google.common.primitives.Booleans; import io.trino.spi.type.Type; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; -import java.util.List; import java.util.concurrent.ThreadLocalRandom; import static io.trino.spi.type.BooleanType.BOOLEAN; @@ -40,24 +37,15 @@ protected Object randomValue() return ThreadLocalRandom.current().nextBoolean(); } - @DataProvider(name = "inputSequences") - public Object[][] inputSequences() - { - return new Object[][] { - {true}, - {false}, - {true, false}, - {true, true, true}, - {false, false, false}, - {true, false, true, false}, - }; - } - - @Test(dataProvider = "inputSequences") - public void testNonEmptyInputs(boolean... inputSequence) + @Test + public void testNonEmptyInputs() { - List values = Booleans.asList(inputSequence); - assertCount(values, 0, distinctCount(values)); + assertCount(Booleans.asList(true), 0, 1); + assertCount(Booleans.asList(false), 0, 1); + assertCount(Booleans.asList(true, false), 0, 2); + assertCount(Booleans.asList(true, true, true), 0, 1); + assertCount(Booleans.asList(false, false, false), 0, 1); + assertCount(Booleans.asList(true, false, true, false), 0, 2); } @Test @@ -66,11 +54,6 @@ public void testNoInput() assertCount(ImmutableList.of(), 0, 0); } - private long distinctCount(List inputSequence) - { - return ImmutableSet.copyOf(inputSequence).size(); - } - @Override protected int getUniqueValuesCount() { diff --git a/core/trino-main/src/test/java/io/trino/operator/aggregation/TestApproximateSetGenericBoolean.java b/core/trino-main/src/test/java/io/trino/operator/aggregation/TestApproximateSetGenericBoolean.java index 624ba42722b5f..ff49acd651cbd 100644 --- a/core/trino-main/src/test/java/io/trino/operator/aggregation/TestApproximateSetGenericBoolean.java +++ b/core/trino-main/src/test/java/io/trino/operator/aggregation/TestApproximateSetGenericBoolean.java @@ -14,11 +14,9 @@ package io.trino.operator.aggregation; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; import com.google.common.primitives.Booleans; import io.trino.spi.type.Type; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import java.util.List; import java.util.concurrent.ThreadLocalRandom; @@ -40,29 +38,15 @@ protected Object randomValue() return ThreadLocalRandom.current().nextBoolean(); } - @DataProvider(name = "inputSequences") - public Object[][] inputSequences() + @Test + public void testNonEmptyInputs() { - return new Object[][] { - {true}, - {false}, - {true, false}, - {true, true, true}, - {false, false, false}, - {true, false, true, false}, - }; - } - - @Test(dataProvider = "inputSequences") - public void testNonEmptyInputs(boolean... inputSequence) - { - List values = Booleans.asList(inputSequence); - assertCount(values, distinctCount(values)); - } - - private long distinctCount(List inputSequence) - { - return ImmutableSet.copyOf(inputSequence).size(); + assertCount(Booleans.asList(true), 1); + assertCount(Booleans.asList(false), 1); + assertCount(Booleans.asList(true, false), 2); + assertCount(Booleans.asList(true, true, true), 1); + assertCount(Booleans.asList(false, false, false), 1); + assertCount(Booleans.asList(true, false, true, false), 2); } @Override diff --git a/core/trino-main/src/test/java/io/trino/operator/aggregation/TestDecimalAverageAggregation.java b/core/trino-main/src/test/java/io/trino/operator/aggregation/TestDecimalAverageAggregation.java index a81785db935c0..8897405f6150e 100644 --- a/core/trino-main/src/test/java/io/trino/operator/aggregation/TestDecimalAverageAggregation.java +++ b/core/trino-main/src/test/java/io/trino/operator/aggregation/TestDecimalAverageAggregation.java @@ -21,9 +21,7 @@ import io.trino.spi.type.DecimalType; import io.trino.spi.type.Decimals; import io.trino.spi.type.Int128; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import java.math.BigDecimal; import java.math.BigInteger; @@ -38,7 +36,6 @@ import static java.math.RoundingMode.HALF_UP; import static org.assertj.core.api.Assertions.assertThat; -@Test(singleThreaded = true) public class TestDecimalAverageAggregation { private static final BigInteger TWO = new BigInteger("2"); @@ -46,17 +43,11 @@ public class TestDecimalAverageAggregation private static final BigInteger TWO_HUNDRED = new BigInteger("200"); private static final DecimalType TYPE = createDecimalType(38, 0); - private LongDecimalWithOverflowAndLongState state; - - @BeforeMethod - public void setUp() - { - state = new LongDecimalWithOverflowAndLongStateFactory().createSingleState(); - } - @Test public void testOverflow() { + LongDecimalWithOverflowAndLongState state = new LongDecimalWithOverflowAndLongStateFactory().createSingleState(); + addToState(state, TWO.pow(126)); assertThat(state.getLong()).isEqualTo(1); @@ -69,12 +60,14 @@ public void testOverflow() assertThat(state.getOverflow()).isEqualTo(1); assertThat(getDecimal(state)).isEqualTo(Int128.valueOf(1L << 63, 0)); - assertAverageEquals(TWO.pow(126)); + assertAverageEquals(state, TWO.pow(126)); } @Test public void testUnderflow() { + LongDecimalWithOverflowAndLongState state = new LongDecimalWithOverflowAndLongStateFactory().createSingleState(); + addToState(state, Decimals.MIN_UNSCALED_DECIMAL.toBigInteger()); assertThat(state.getLong()).isEqualTo(1); @@ -87,12 +80,14 @@ public void testUnderflow() assertThat(state.getOverflow()).isEqualTo(-1); assertThat(getDecimal(state)).isEqualTo(Int128.valueOf(0x698966AF4AF2770BL, 0xECEBBB8000000002L)); - assertAverageEquals(Decimals.MIN_UNSCALED_DECIMAL.toBigInteger()); + assertAverageEquals(state, Decimals.MIN_UNSCALED_DECIMAL.toBigInteger()); } @Test public void testUnderflowAfterOverflow() { + LongDecimalWithOverflowAndLongState state = new LongDecimalWithOverflowAndLongStateFactory().createSingleState(); + addToState(state, TWO.pow(126)); addToState(state, TWO.pow(126)); addToState(state, TWO.pow(125)); @@ -107,12 +102,14 @@ public void testUnderflowAfterOverflow() assertThat(state.getOverflow()).isEqualTo(0); assertThat(getDecimal(state)).isEqualTo(Int128.valueOf(TWO.pow(125).negate())); - assertAverageEquals(TWO.pow(125).negate().divide(BigInteger.valueOf(6))); + assertAverageEquals(state, TWO.pow(125).negate().divide(BigInteger.valueOf(6))); } @Test public void testCombineOverflow() { + LongDecimalWithOverflowAndLongState state = new LongDecimalWithOverflowAndLongStateFactory().createSingleState(); + addToState(state, TWO.pow(126)); addToState(state, TWO.pow(126)); @@ -133,12 +130,14 @@ public void testCombineOverflow() .add(TWO.pow(126)) .divide(BigInteger.valueOf(4)); - assertAverageEquals(expectedAverage); + assertAverageEquals(state, expectedAverage); } @Test public void testCombineUnderflow() { + LongDecimalWithOverflowAndLongState state = new LongDecimalWithOverflowAndLongStateFactory().createSingleState(); + addToState(state, TWO.pow(125).negate()); addToState(state, TWO.pow(126).negate()); @@ -160,14 +159,39 @@ public void testCombineUnderflow() .negate() .divide(BigInteger.valueOf(4)); - assertAverageEquals(expectedAverage); + assertAverageEquals(state, expectedAverage); } - @Test(dataProvider = "testNoOverflowDataProvider") - public void testNoOverflow(List numbers) + @Test + public void testNoOverflow() { - testNoOverflow(createDecimalType(38, 0), numbers); - testNoOverflow(createDecimalType(38, 2), numbers); + testNoOverflow(createDecimalType(38, 0), ImmutableList.of(TEN.pow(37), ZERO)); + testNoOverflow(createDecimalType(38, 0), ImmutableList.of(TEN.pow(37).negate(), ZERO)); + testNoOverflow(createDecimalType(38, 0), ImmutableList.of(TWO, ONE)); + testNoOverflow(createDecimalType(38, 0), ImmutableList.of(ZERO, ONE)); + testNoOverflow(createDecimalType(38, 0), ImmutableList.of(TWO.negate(), ONE.negate())); + testNoOverflow(createDecimalType(38, 0), ImmutableList.of(ONE.negate(), ZERO)); + testNoOverflow(createDecimalType(38, 0), ImmutableList.of(ONE.negate(), ZERO, ZERO)); + testNoOverflow(createDecimalType(38, 0), ImmutableList.of(TWO.negate(), ZERO, ZERO)); + testNoOverflow(createDecimalType(38, 0), ImmutableList.of(TWO.negate(), ZERO)); + testNoOverflow(createDecimalType(38, 0), ImmutableList.of(TWO_HUNDRED, ONE_HUNDRED)); + testNoOverflow(createDecimalType(38, 0), ImmutableList.of(ZERO, ONE_HUNDRED)); + testNoOverflow(createDecimalType(38, 0), ImmutableList.of(TWO_HUNDRED.negate(), ONE_HUNDRED.negate())); + testNoOverflow(createDecimalType(38, 0), ImmutableList.of(ONE_HUNDRED.negate(), ZERO)); + + testNoOverflow(createDecimalType(38, 2), ImmutableList.of(TEN.pow(37), ZERO)); + testNoOverflow(createDecimalType(38, 2), ImmutableList.of(TEN.pow(37).negate(), ZERO)); + testNoOverflow(createDecimalType(38, 2), ImmutableList.of(TWO, ONE)); + testNoOverflow(createDecimalType(38, 2), ImmutableList.of(ZERO, ONE)); + testNoOverflow(createDecimalType(38, 2), ImmutableList.of(TWO.negate(), ONE.negate())); + testNoOverflow(createDecimalType(38, 2), ImmutableList.of(ONE.negate(), ZERO)); + testNoOverflow(createDecimalType(38, 2), ImmutableList.of(ONE.negate(), ZERO, ZERO)); + testNoOverflow(createDecimalType(38, 2), ImmutableList.of(TWO.negate(), ZERO, ZERO)); + testNoOverflow(createDecimalType(38, 2), ImmutableList.of(TWO.negate(), ZERO)); + testNoOverflow(createDecimalType(38, 2), ImmutableList.of(TWO_HUNDRED, ONE_HUNDRED)); + testNoOverflow(createDecimalType(38, 2), ImmutableList.of(ZERO, ONE_HUNDRED)); + testNoOverflow(createDecimalType(38, 2), ImmutableList.of(TWO_HUNDRED.negate(), ONE_HUNDRED.negate())); + testNoOverflow(createDecimalType(38, 2), ImmutableList.of(ONE_HUNDRED.negate(), ZERO)); } private void testNoOverflow(DecimalType type, List numbers) @@ -185,40 +209,15 @@ private void testNoOverflow(DecimalType type, List numbers) assertThat(decodeBigDecimal(type, average(state, type))).isEqualTo(expectedAverage); } - @DataProvider - public static Object[][] testNoOverflowDataProvider() - { - return new Object[][] { - {ImmutableList.of(TEN.pow(37), ZERO)}, - {ImmutableList.of(TEN.pow(37).negate(), ZERO)}, - {ImmutableList.of(TWO, ONE)}, - {ImmutableList.of(ZERO, ONE)}, - {ImmutableList.of(TWO.negate(), ONE.negate())}, - {ImmutableList.of(ONE.negate(), ZERO)}, - {ImmutableList.of(ONE.negate(), ZERO, ZERO)}, - {ImmutableList.of(TWO.negate(), ZERO, ZERO)}, - {ImmutableList.of(TWO.negate(), ZERO)}, - {ImmutableList.of(TWO_HUNDRED, ONE_HUNDRED)}, - {ImmutableList.of(ZERO, ONE_HUNDRED)}, - {ImmutableList.of(TWO_HUNDRED.negate(), ONE_HUNDRED.negate())}, - {ImmutableList.of(ONE_HUNDRED.negate(), ZERO)} - }; - } - private static BigDecimal decodeBigDecimal(DecimalType type, Int128 average) { BigInteger unscaledVal = average.toBigInteger(); return new BigDecimal(unscaledVal, type.getScale(), new MathContext(type.getPrecision())); } - private void assertAverageEquals(BigInteger expectedAverage) - { - assertAverageEquals(expectedAverage, TYPE); - } - - private void assertAverageEquals(BigInteger expectedAverage, DecimalType type) + private void assertAverageEquals(LongDecimalWithOverflowAndLongState state, BigInteger expectedAverage) { - assertThat(average(state, type).toBigInteger()).isEqualTo(expectedAverage); + assertThat(average(state, TYPE).toBigInteger()).isEqualTo(expectedAverage); } private static void addToState(LongDecimalWithOverflowAndLongState state, BigInteger value) diff --git a/core/trino-main/src/test/java/io/trino/operator/aggregation/state/TestLongDecimalWithOverflowAndLongStateSerializer.java b/core/trino-main/src/test/java/io/trino/operator/aggregation/state/TestLongDecimalWithOverflowAndLongStateSerializer.java index 257d63587ba75..fbdafaa434bd9 100644 --- a/core/trino-main/src/test/java/io/trino/operator/aggregation/state/TestLongDecimalWithOverflowAndLongStateSerializer.java +++ b/core/trino-main/src/test/java/io/trino/operator/aggregation/state/TestLongDecimalWithOverflowAndLongStateSerializer.java @@ -16,8 +16,7 @@ import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.block.VariableWidthBlockBuilder; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import static org.assertj.core.api.Assertions.assertThat; @@ -25,8 +24,28 @@ public class TestLongDecimalWithOverflowAndLongStateSerializer { private static final LongDecimalWithOverflowAndLongStateFactory STATE_FACTORY = new LongDecimalWithOverflowAndLongStateFactory(); - @Test(dataProvider = "input") - public void testSerde(long low, long high, long overflow, long count, int expectedLength) + @Test + public void testSerde() + { + testSerde(3, 0, 0, 1, 1); + testSerde(3, 5, 0, 1, 2); + testSerde(3, 5, 7, 1, 4); + testSerde(3, 0, 0, 2, 3); + testSerde(3, 5, 0, 2, 4); + testSerde(3, 5, 7, 2, 4); + testSerde(3, 0, 7, 1, 3); + testSerde(3, 0, 7, 2, 3); + testSerde(0, 0, 0, 1, 1); + testSerde(0, 5, 0, 1, 2); + testSerde(0, 5, 7, 1, 4); + testSerde(0, 0, 0, 2, 3); + testSerde(0, 5, 0, 2, 4); + testSerde(0, 5, 7, 2, 4); + testSerde(0, 0, 7, 1, 3); + testSerde(0, 0, 7, 2, 3); + } + + private void testSerde(long low, long high, long overflow, long count, int expectedLength) { LongDecimalWithOverflowAndLongState state = STATE_FACTORY.createSingleState(); state.getDecimalArray()[0] = high; @@ -66,27 +85,4 @@ private LongDecimalWithOverflowAndLongState roundTrip(LongDecimalWithOverflowAnd serializer.deserialize(serialized, 0, outState); return outState; } - - @DataProvider - public Object[][] input() - { - return new Object[][] { - {3, 0, 0, 1, 1}, - {3, 5, 0, 1, 2}, - {3, 5, 7, 1, 4}, - {3, 0, 0, 2, 3}, - {3, 5, 0, 2, 4}, - {3, 5, 7, 2, 4}, - {3, 0, 7, 1, 3}, - {3, 0, 7, 2, 3}, - {0, 0, 0, 1, 1}, - {0, 5, 0, 1, 2}, - {0, 5, 7, 1, 4}, - {0, 0, 0, 2, 3}, - {0, 5, 0, 2, 4}, - {0, 5, 7, 2, 4}, - {0, 0, 7, 1, 3}, - {0, 0, 7, 2, 3} - }; - } } diff --git a/core/trino-main/src/test/java/io/trino/operator/aggregation/state/TestLongDecimalWithOverflowStateSerializer.java b/core/trino-main/src/test/java/io/trino/operator/aggregation/state/TestLongDecimalWithOverflowStateSerializer.java index 57638bd7bfd79..4c33f3016bfe5 100644 --- a/core/trino-main/src/test/java/io/trino/operator/aggregation/state/TestLongDecimalWithOverflowStateSerializer.java +++ b/core/trino-main/src/test/java/io/trino/operator/aggregation/state/TestLongDecimalWithOverflowStateSerializer.java @@ -16,8 +16,7 @@ import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.block.VariableWidthBlockBuilder; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import static org.assertj.core.api.Assertions.assertThat; @@ -25,8 +24,20 @@ public class TestLongDecimalWithOverflowStateSerializer { private static final LongDecimalWithOverflowStateFactory STATE_FACTORY = new LongDecimalWithOverflowStateFactory(); - @Test(dataProvider = "input") - public void testSerde(long low, long high, long overflow, int expectedLength) + @Test + public void testSerde() + { + testSerde(3, 0, 0, 1); + testSerde(3, 5, 0, 2); + testSerde(3, 5, 7, 3); + testSerde(3, 0, 7, 3); + testSerde(0, 0, 0, 1); + testSerde(0, 5, 0, 2); + testSerde(0, 5, 7, 3); + testSerde(0, 0, 7, 3); + } + + private void testSerde(long low, long high, long overflow, int expectedLength) { LongDecimalWithOverflowState state = STATE_FACTORY.createSingleState(); state.getDecimalArray()[0] = high; @@ -66,19 +77,4 @@ private LongDecimalWithOverflowState roundTrip(LongDecimalWithOverflowState stat serializer.deserialize(serialized, 0, outState); return outState; } - - @DataProvider - public Object[][] input() - { - return new Object[][] { - {3, 0, 0, 1}, - {3, 5, 0, 2}, - {3, 5, 7, 3}, - {3, 0, 7, 3}, - {0, 0, 0, 1}, - {0, 5, 0, 2}, - {0, 5, 7, 3}, - {0, 0, 7, 3} - }; - } } diff --git a/core/trino-main/src/test/java/io/trino/operator/exchange/TestLocalExchange.java b/core/trino-main/src/test/java/io/trino/operator/exchange/TestLocalExchange.java index 92deb657a8fe4..fa35534e9ae42 100644 --- a/core/trino-main/src/test/java/io/trino/operator/exchange/TestLocalExchange.java +++ b/core/trino-main/src/test/java/io/trino/operator/exchange/TestLocalExchange.java @@ -41,9 +41,10 @@ import io.trino.sql.planner.PartitioningHandle; import io.trino.testing.TestingTransactionHandle; import io.trino.util.FinalizerService; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.parallel.Execution; import java.util.List; import java.util.Optional; @@ -73,8 +74,11 @@ import static io.trino.testing.TestingSession.testSessionBuilder; import static java.util.stream.IntStream.range; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_METHOD; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; -@Test(singleThreaded = true) +@TestInstance(PER_METHOD) +@Execution(SAME_THREAD) public class TestLocalExchange { private static final List TYPES = ImmutableList.of(BIGINT); @@ -88,8 +92,9 @@ public class TestLocalExchange private final ConcurrentMap partitionManagers = new ConcurrentHashMap<>(); private NodePartitioningManager nodePartitioningManager; + private final PartitioningHandle customScalingPartitioningHandle = getCustomScalingPartitioningHandle(); - @BeforeMethod + @BeforeEach public void setUp() { NodeScheduler nodeScheduler = new NodeScheduler(new UniformNodeSelectorFactory( @@ -332,6 +337,121 @@ public void testNoWriterScalingWhenOnlyBufferSizeLimitIsExceeded() }); } + @Test + public void testScalingWithTwoDifferentPartitions() + { + testScalingWithTwoDifferentPartitions(customScalingPartitioningHandle); + testScalingWithTwoDifferentPartitions(SCALED_WRITER_HASH_DISTRIBUTION); + } + + private void testScalingWithTwoDifferentPartitions(PartitioningHandle partitioningHandle) + { + LocalExchange localExchange = new LocalExchange( + nodePartitioningManager, + testSessionBuilder() + .setSystemProperty(SKEWED_PARTITION_MIN_DATA_PROCESSED_REBALANCE_THRESHOLD, "20kB") + .setSystemProperty(QUERY_MAX_MEMORY_PER_NODE, "256MB") + .build(), + 4, + partitioningHandle, + ImmutableList.of(0), + TYPES, + Optional.empty(), + DataSize.ofBytes(retainedSizeOfPages(2)), + TYPE_OPERATORS, + DataSize.of(10, KILOBYTE), + TOTAL_MEMORY_USED); + + run(localExchange, exchange -> { + assertThat(exchange.getBufferCount()).isEqualTo(4); + assertExchangeTotalBufferedBytes(exchange, 0); + + LocalExchangeSinkFactory sinkFactory = exchange.createSinkFactory(); + sinkFactory.noMoreSinkFactories(); + LocalExchangeSink sink = sinkFactory.createSink(); + assertSinkCanWrite(sink); + sinkFactory.close(); + + LocalExchangeSource sourceA = exchange.getNextSource(); + assertSource(sourceA, 0); + + LocalExchangeSource sourceB = exchange.getNextSource(); + assertSource(sourceB, 0); + + LocalExchangeSource sourceC = exchange.getNextSource(); + assertSource(sourceC, 0); + + LocalExchangeSource sourceD = exchange.getNextSource(); + assertSource(sourceD, 0); + + sink.addPage(createSingleValuePage(0, 1000)); + sink.addPage(createSingleValuePage(0, 1000)); + sink.addPage(createSingleValuePage(1, 2)); + sink.addPage(createSingleValuePage(1, 2)); + + // Two partitions are assigned to two different writers + assertSource(sourceA, 2); + assertSource(sourceB, 0); + assertSource(sourceC, 0); + assertSource(sourceD, 2); + + sink.addPage(createSingleValuePage(0, 1000)); + sink.addPage(createSingleValuePage(0, 1000)); + sink.addPage(createSingleValuePage(0, 1000)); + sink.addPage(createSingleValuePage(0, 1000)); + + // partition 0 is assigned to writer B after scaling. + assertSource(sourceA, 2); + assertSource(sourceB, 2); + assertSource(sourceC, 0); + assertSource(sourceD, 4); + + sink.addPage(createSingleValuePage(0, 1000)); + sink.addPage(createSingleValuePage(0, 1000)); + sink.addPage(createSingleValuePage(0, 1000)); + sink.addPage(createSingleValuePage(0, 1000)); + + // partition 0 is assigned to writer A after scaling. + assertSource(sourceA, 3); + assertSource(sourceB, 4); + assertSource(sourceC, 0); + assertSource(sourceD, 5); + + sink.addPage(createSingleValuePage(0, 1000)); + sink.addPage(createSingleValuePage(0, 1000)); + sink.addPage(createSingleValuePage(0, 1000)); + sink.addPage(createSingleValuePage(0, 1000)); + + // partition 0 is assigned to writer C after scaling. + assertSource(sourceA, 4); + assertSource(sourceB, 5); + assertSource(sourceC, 1); + assertSource(sourceD, 6); + + sink.addPage(createSingleValuePage(1, 10000)); + sink.addPage(createSingleValuePage(1, 10000)); + sink.addPage(createSingleValuePage(1, 10000)); + sink.addPage(createSingleValuePage(1, 10000)); + + // partition 1 is assigned to writer B after scaling. + assertSource(sourceA, 5); + assertSource(sourceB, 8); + assertSource(sourceC, 1); + assertSource(sourceD, 6); + + sink.addPage(createSingleValuePage(1, 10000)); + sink.addPage(createSingleValuePage(1, 10000)); + sink.addPage(createSingleValuePage(1, 10000)); + sink.addPage(createSingleValuePage(1, 10000)); + + // partition 1 is assigned to writer C and D after scaling. + assertSource(sourceA, 6); + assertSource(sourceB, 9); + assertSource(sourceC, 2); + assertSource(sourceD, 7); + }); + } + @Test public void testScaledWriterRoundRobinExchangerWhenTotalMemoryUsedIsGreaterThanLimit() { @@ -424,8 +544,14 @@ public void testNoWriterScalingWhenOnlyWriterScalingMinDataProcessedLimitIsExcee }); } - @Test(dataProvider = "scalingPartitionHandles") - public void testScalingForSkewedWriters(PartitioningHandle partitioningHandle) + @Test + public void testScalingForSkewedWriters() + { + testScalingForSkewedWriters(customScalingPartitioningHandle); + testScalingForSkewedWriters(SCALED_WRITER_HASH_DISTRIBUTION); + } + + private void testScalingForSkewedWriters(PartitioningHandle partitioningHandle) { LocalExchange localExchange = new LocalExchange( nodePartitioningManager, @@ -514,8 +640,14 @@ public void testScalingForSkewedWriters(PartitioningHandle partitioningHandle) }); } - @Test(dataProvider = "scalingPartitionHandles") - public void testNoScalingWhenDataWrittenIsLessThanMinFileSize(PartitioningHandle partitioningHandle) + @Test + public void testNoScalingWhenDataWrittenIsLessThanMinFileSize() + { + testNoScalingWhenDataWrittenIsLessThanMinFileSize(customScalingPartitioningHandle); + testNoScalingWhenDataWrittenIsLessThanMinFileSize(SCALED_WRITER_HASH_DISTRIBUTION); + } + + private void testNoScalingWhenDataWrittenIsLessThanMinFileSize(PartitioningHandle partitioningHandle) { LocalExchange localExchange = new LocalExchange( nodePartitioningManager, @@ -578,8 +710,14 @@ public void testNoScalingWhenDataWrittenIsLessThanMinFileSize(PartitioningHandle }); } - @Test(dataProvider = "scalingPartitionHandles") - public void testNoScalingWhenBufferUtilizationIsLessThanLimit(PartitioningHandle partitioningHandle) + @Test + public void testNoScalingWhenBufferUtilizationIsLessThanLimit() + { + testNoScalingWhenBufferUtilizationIsLessThanLimit(customScalingPartitioningHandle); + testNoScalingWhenBufferUtilizationIsLessThanLimit(SCALED_WRITER_HASH_DISTRIBUTION); + } + + private void testNoScalingWhenBufferUtilizationIsLessThanLimit(PartitioningHandle partitioningHandle) { LocalExchange localExchange = new LocalExchange( nodePartitioningManager, @@ -642,8 +780,14 @@ public void testNoScalingWhenBufferUtilizationIsLessThanLimit(PartitioningHandle }); } - @Test(dataProvider = "scalingPartitionHandles") - public void testNoScalingWhenTotalMemoryUsedIsGreaterThanLimit(PartitioningHandle partitioningHandle) + @Test + public void testNoScalingWhenTotalMemoryUsedIsGreaterThanLimit() + { + testNoScalingWhenTotalMemoryUsedIsGreaterThanLimit(customScalingPartitioningHandle); + testNoScalingWhenTotalMemoryUsedIsGreaterThanLimit(SCALED_WRITER_HASH_DISTRIBUTION); + } + + private void testNoScalingWhenTotalMemoryUsedIsGreaterThanLimit(PartitioningHandle partitioningHandle) { AtomicLong totalMemoryUsed = new AtomicLong(); LocalExchange localExchange = new LocalExchange( @@ -702,13 +846,13 @@ public void testNoScalingWhenTotalMemoryUsedIsGreaterThanLimit(PartitioningHandl sink.addPage(createSingleValuePage(0, 1000)); sink.addPage(createSingleValuePage(0, 1000)); - // Scaling since total memory used is less than 10 MBs + // Scaling since total memory used is less than 14 MBs (20 MBs * 70%) assertSource(sourceA, 2); assertSource(sourceB, 2); assertSource(sourceC, 0); assertSource(sourceD, 4); - totalMemoryUsed.set(DataSize.of(13, MEGABYTE).toBytes()); + totalMemoryUsed.set(DataSize.of(15, MEGABYTE).toBytes()); sink.addPage(createSingleValuePage(0, 1000)); sink.addPage(createSingleValuePage(0, 1000)); @@ -723,14 +867,21 @@ public void testNoScalingWhenTotalMemoryUsedIsGreaterThanLimit(PartitioningHandl }); } - @Test(dataProvider = "scalingPartitionHandles") - public void testNoScalingWhenMaxScaledPartitionsPerTaskIsSmall(PartitioningHandle partitioningHandle) + @Test + public void testDoNotUpdateScalingStateWhenMemoryIsAboveLimit() { + testDoNotUpdateScalingStateWhenMemoryIsAboveLimit(customScalingPartitioningHandle); + testDoNotUpdateScalingStateWhenMemoryIsAboveLimit(SCALED_WRITER_HASH_DISTRIBUTION); + } + + private void testDoNotUpdateScalingStateWhenMemoryIsAboveLimit(PartitioningHandle partitioningHandle) + { + AtomicLong totalMemoryUsed = new AtomicLong(); LocalExchange localExchange = new LocalExchange( nodePartitioningManager, testSessionBuilder() .setSystemProperty(SKEWED_PARTITION_MIN_DATA_PROCESSED_REBALANCE_THRESHOLD, "20kB") - .setSystemProperty(QUERY_MAX_MEMORY_PER_NODE, "256MB") + .setSystemProperty(QUERY_MAX_MEMORY_PER_NODE, "20MB") .build(), 4, partitioningHandle, @@ -740,7 +891,7 @@ public void testNoScalingWhenMaxScaledPartitionsPerTaskIsSmall(PartitioningHandl DataSize.ofBytes(retainedSizeOfPages(2)), TYPE_OPERATORS, DataSize.of(10, KILOBYTE), - TOTAL_MEMORY_USED); + totalMemoryUsed::get); run(localExchange, exchange -> { assertThat(exchange.getBufferCount()).isEqualTo(4); @@ -775,60 +926,45 @@ public void testNoScalingWhenMaxScaledPartitionsPerTaskIsSmall(PartitioningHandl assertSource(sourceC, 0); assertSource(sourceD, 2); + totalMemoryUsed.set(DataSize.of(5, MEGABYTE).toBytes()); + sink.addPage(createSingleValuePage(0, 1000)); sink.addPage(createSingleValuePage(0, 1000)); sink.addPage(createSingleValuePage(0, 1000)); sink.addPage(createSingleValuePage(0, 1000)); - // partition 0 is assigned to writer B after scaling. + // Scaling since total memory used is less than 14 MBs (20 MBs * 70%) assertSource(sourceA, 2); assertSource(sourceB, 2); assertSource(sourceC, 0); assertSource(sourceD, 4); + totalMemoryUsed.set(DataSize.of(15, MEGABYTE).toBytes()); + sink.addPage(createSingleValuePage(0, 1000)); sink.addPage(createSingleValuePage(0, 1000)); sink.addPage(createSingleValuePage(0, 1000)); sink.addPage(createSingleValuePage(0, 1000)); - // partition 0 is assigned to writer A after scaling. - assertSource(sourceA, 3); + // No scaling since total memory used is greater than 14 MBs (20 MBs * 70%) + assertSource(sourceA, 2); assertSource(sourceB, 4); assertSource(sourceC, 0); - assertSource(sourceD, 5); - - sink.addPage(createSingleValuePage(0, 1000)); - sink.addPage(createSingleValuePage(0, 1000)); - sink.addPage(createSingleValuePage(0, 1000)); - sink.addPage(createSingleValuePage(0, 1000)); - - // partition 0 is assigned to writer C after scaling. - assertSource(sourceA, 4); - assertSource(sourceB, 5); - assertSource(sourceC, 1); assertSource(sourceD, 6); - sink.addPage(createSingleValuePage(1, 10000)); - sink.addPage(createSingleValuePage(1, 10000)); - sink.addPage(createSingleValuePage(1, 10000)); - sink.addPage(createSingleValuePage(1, 10000)); - - // partition 1 is assigned to writer B after scaling. - assertSource(sourceA, 6); - assertSource(sourceB, 7); - assertSource(sourceC, 1); - assertSource(sourceD, 6); - - sink.addPage(createSingleValuePage(1, 10000)); - sink.addPage(createSingleValuePage(1, 10000)); - sink.addPage(createSingleValuePage(1, 10000)); - sink.addPage(createSingleValuePage(1, 10000)); + // Memory reduced due to closing of some writers + totalMemoryUsed.set(DataSize.of(13, MEGABYTE).toBytes()); - // no scaling will happen since we have scaled to maximum limit which is the number of writer count. - assertSource(sourceA, 8); - assertSource(sourceB, 9); - assertSource(sourceC, 1); - assertSource(sourceD, 6); + sink.addPage(createSingleValuePage(0, 10)); + sink.addPage(createSingleValuePage(0, 10)); + sink.addPage(createSingleValuePage(0, 10)); + sink.addPage(createSingleValuePage(0, 10)); + // No scaling since not enough data has been processed, and we are not considering data written + // when memory utilization is above the limit. + assertSource(sourceA, 3); + assertSource(sourceB, 6); + assertSource(sourceC, 0); + assertSource(sourceD, 7); }); } @@ -1221,12 +1357,6 @@ public void writeUnblockWhenAllReadersFinishAndPagesConsumed() }); } - @DataProvider - public Object[][] scalingPartitionHandles() - { - return new Object[][] {{SCALED_WRITER_HASH_DISTRIBUTION}, {getCustomScalingPartitioningHandle()}}; - } - private PartitioningHandle getCustomScalingPartitioningHandle() { ConnectorPartitioningHandle connectorPartitioningHandle = new ConnectorPartitioningHandle() {}; @@ -1318,7 +1448,7 @@ private static void assertPartitionedRemovePage(LocalExchangeSource source, int Page page = source.removePage(); assertThat(page).isNotNull(); - LocalPartitionGenerator partitionGenerator = new LocalPartitionGenerator(createChannelsHashGenerator(TYPES, new int[]{0}, TYPE_OPERATORS), partitionCount); + LocalPartitionGenerator partitionGenerator = new LocalPartitionGenerator(createChannelsHashGenerator(TYPES, new int[] {0}, TYPE_OPERATORS), partitionCount); for (int position = 0; position < page.getPositionCount(); position++) { assertThat(partitionGenerator.getPartition(page, position)).isEqualTo(partition); } diff --git a/core/trino-main/src/test/java/io/trino/operator/join/JoinTestUtils.java b/core/trino-main/src/test/java/io/trino/operator/join/JoinTestUtils.java index 3f119282e95f8..514c2dcc5c6c9 100644 --- a/core/trino-main/src/test/java/io/trino/operator/join/JoinTestUtils.java +++ b/core/trino-main/src/test/java/io/trino/operator/join/JoinTestUtils.java @@ -318,14 +318,16 @@ public static class DummySpillerFactory private volatile boolean failSpill; private volatile boolean failUnspill; - public void failSpill() + public DummySpillerFactory failSpill() { failSpill = true; + return this; } - public void failUnspill() + public DummySpillerFactory failUnspill() { failUnspill = true; + return this; } @Override diff --git a/core/trino-main/src/test/java/io/trino/operator/join/TestHashJoinOperator.java b/core/trino-main/src/test/java/io/trino/operator/join/TestHashJoinOperator.java index 091b7f3b49735..e4ac6f8ec309e 100644 --- a/core/trino-main/src/test/java/io/trino/operator/join/TestHashJoinOperator.java +++ b/core/trino-main/src/test/java/io/trino/operator/join/TestHashJoinOperator.java @@ -62,20 +62,18 @@ import io.trino.testing.MaterializedResult; import io.trino.testing.TestingTaskContext; import io.trino.util.FinalizerService; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.Timeout; +import org.junit.jupiter.api.parallel.Execution; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import java.util.Optional; import java.util.OptionalInt; import java.util.concurrent.ExecutorService; import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.SynchronousQueue; -import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -106,19 +104,21 @@ import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.IntegerType.INTEGER; import static io.trino.spi.type.VarcharType.VARCHAR; -import static java.lang.String.format; import static java.util.Arrays.asList; import static java.util.Collections.nCopies; import static java.util.Collections.singletonList; import static java.util.Objects.requireNonNull; +import static java.util.concurrent.Executors.newCachedThreadPool; import static java.util.concurrent.Executors.newScheduledThreadPool; import static java.util.concurrent.TimeUnit.NANOSECONDS; import static java.util.concurrent.TimeUnit.SECONDS; -import static java.util.stream.Collectors.toList; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; +import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; -@Test(singleThreaded = true) +@TestInstance(PER_CLASS) +@Execution(CONCURRENT) public class TestHashJoinOperator { private static final int PARTITION_COUNT = 4; @@ -126,62 +126,37 @@ public class TestHashJoinOperator private static final PartitioningSpillerFactory PARTITIONING_SPILLER_FACTORY = new GenericPartitioningSpillerFactory(SINGLE_STREAM_SPILLER_FACTORY); private static final TypeOperators TYPE_OPERATORS = new TypeOperators(); - private ExecutorService executor; - private ScheduledExecutorService scheduledExecutor; - private NodePartitioningManager nodePartitioningManager; - - @BeforeMethod - public void setUp() - { - // Before/AfterMethod is chosen here because the executor needs to be shutdown - // after every single test case to terminate outstanding threads, if any. - - // The line below is the same as newCachedThreadPool(daemonThreadsNamed(...)) except RejectionExecutionHandler. - // RejectionExecutionHandler is set to DiscardPolicy (instead of the default AbortPolicy) here. - // Otherwise, a large number of RejectedExecutionException will flood logging, resulting in Travis failure. - executor = new ThreadPoolExecutor( - 0, - Integer.MAX_VALUE, - 60L, - SECONDS, - new SynchronousQueue<>(), - daemonThreadsNamed("test-executor-%s"), - new ThreadPoolExecutor.DiscardPolicy()); - scheduledExecutor = newScheduledThreadPool(2, daemonThreadsNamed(getClass().getSimpleName() + "-scheduledExecutor-%s")); - - NodeScheduler nodeScheduler = new NodeScheduler(new UniformNodeSelectorFactory( - new InMemoryNodeManager(), - new NodeSchedulerConfig().setIncludeCoordinator(true), - new NodeTaskMap(new FinalizerService()))); - nodePartitioningManager = new NodePartitioningManager( - nodeScheduler, - TYPE_OPERATORS, - CatalogServiceProvider.fail()); - } - - @AfterMethod(alwaysRun = true) + private final ExecutorService executor = newCachedThreadPool(daemonThreadsNamed("test-executor-%s")); + private final ScheduledExecutorService scheduledExecutor = newScheduledThreadPool(2, daemonThreadsNamed(getClass().getSimpleName() + "-scheduledExecutor-%s")); + private final NodePartitioningManager nodePartitioningManager = new NodePartitioningManager( + new NodeScheduler(new UniformNodeSelectorFactory( + new InMemoryNodeManager(), + new NodeSchedulerConfig().setIncludeCoordinator(true), + new NodeTaskMap(new FinalizerService()))), + TYPE_OPERATORS, + CatalogServiceProvider.fail()); + + @AfterAll public void tearDown() { executor.shutdownNow(); scheduledExecutor.shutdownNow(); } - @DataProvider(name = "hashJoinTestValues") - public static Object[][] hashJoinTestValuesProvider() + @Test + public void testInnerJoin() { - return new Object[][] { - {true, true, true}, - {true, true, false}, - {true, false, true}, - {true, false, false}, - {false, true, true}, - {false, true, false}, - {false, false, true}, - {false, false, false}}; + testInnerJoin(true, true, true); + testInnerJoin(true, true, false); + testInnerJoin(true, false, true); + testInnerJoin(true, false, false); + testInnerJoin(false, true, true); + testInnerJoin(false, true, false); + testInnerJoin(false, false, true); + testInnerJoin(false, false, false); } - @Test(dataProvider = "hashJoinTestValues") - public void testInnerJoin(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) + private void testInnerJoin(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) { TaskContext taskContext = createTaskContext(); @@ -382,90 +357,76 @@ private enum WhenSpill DURING_BUILD, AFTER_BUILD, DURING_USAGE, NEVER } - private enum WhenSpillFails - { - SPILL_BUILD, SPILL_JOIN, UNSPILL_BUILD, UNSPILL_JOIN - } - - @DataProvider - public Object[][] joinWithSpillValues() - { - return joinWithSpillParameters(true).stream() - .map(List::toArray) - .toArray(Object[][]::new); - } - - @DataProvider - public Object[][] joinWithFailingSpillValues() + @Test + public void testInnerJoinWithSpill() + throws Exception { - List> spillFailValues = Arrays.stream(WhenSpillFails.values()) - .map(ImmutableList::of) - .collect(toList()); - return product(joinWithSpillParameters(false), spillFailValues).stream() - .map(List::toArray) - .toArray(Object[][]::new); + for (boolean probeHashEnabled : ImmutableList.of(false, true)) { + // spill all + innerJoinWithSpill(probeHashEnabled, nCopies(PARTITION_COUNT, WhenSpill.NEVER), SINGLE_STREAM_SPILLER_FACTORY, PARTITIONING_SPILLER_FACTORY); + innerJoinWithSpill(probeHashEnabled, nCopies(PARTITION_COUNT, WhenSpill.DURING_BUILD), SINGLE_STREAM_SPILLER_FACTORY, PARTITIONING_SPILLER_FACTORY); + innerJoinWithSpill(probeHashEnabled, nCopies(PARTITION_COUNT, WhenSpill.AFTER_BUILD), SINGLE_STREAM_SPILLER_FACTORY, PARTITIONING_SPILLER_FACTORY); + innerJoinWithSpill(probeHashEnabled, nCopies(PARTITION_COUNT, WhenSpill.DURING_USAGE), SINGLE_STREAM_SPILLER_FACTORY, PARTITIONING_SPILLER_FACTORY); + + // spill one + innerJoinWithSpill(probeHashEnabled, concat(singletonList(WhenSpill.DURING_BUILD), nCopies(PARTITION_COUNT - 1, WhenSpill.NEVER)), SINGLE_STREAM_SPILLER_FACTORY, PARTITIONING_SPILLER_FACTORY); + innerJoinWithSpill(probeHashEnabled, concat(singletonList(WhenSpill.AFTER_BUILD), nCopies(PARTITION_COUNT - 1, WhenSpill.NEVER)), SINGLE_STREAM_SPILLER_FACTORY, PARTITIONING_SPILLER_FACTORY); + innerJoinWithSpill(probeHashEnabled, concat(singletonList(WhenSpill.DURING_USAGE), nCopies(PARTITION_COUNT - 1, WhenSpill.NEVER)), SINGLE_STREAM_SPILLER_FACTORY, PARTITIONING_SPILLER_FACTORY); + + innerJoinWithSpill(probeHashEnabled, concat(asList(WhenSpill.DURING_BUILD, WhenSpill.AFTER_BUILD), nCopies(PARTITION_COUNT - 2, WhenSpill.NEVER)), SINGLE_STREAM_SPILLER_FACTORY, PARTITIONING_SPILLER_FACTORY); + innerJoinWithSpill(probeHashEnabled, concat(asList(WhenSpill.DURING_BUILD, WhenSpill.DURING_USAGE), nCopies(PARTITION_COUNT - 2, WhenSpill.NEVER)), SINGLE_STREAM_SPILLER_FACTORY, PARTITIONING_SPILLER_FACTORY); + } } - private static List> joinWithSpillParameters(boolean allowNoSpill) + @Test + public void testInnerJoinWithFailingSpill() { - List> result = new ArrayList<>(); for (boolean probeHashEnabled : ImmutableList.of(false, true)) { - for (WhenSpill whenSpill : WhenSpill.values()) { - // spill all - if (allowNoSpill || whenSpill != WhenSpill.NEVER) { - result.add(ImmutableList.of(probeHashEnabled, nCopies(PARTITION_COUNT, whenSpill))); - } - - if (whenSpill != WhenSpill.NEVER) { - // spill one - result.add(ImmutableList.of(probeHashEnabled, concat(singletonList(whenSpill), nCopies(PARTITION_COUNT - 1, WhenSpill.NEVER)))); - } - } - - result.add(ImmutableList.of(probeHashEnabled, concat(asList(WhenSpill.DURING_BUILD, WhenSpill.AFTER_BUILD), nCopies(PARTITION_COUNT - 2, WhenSpill.NEVER)))); - result.add(ImmutableList.of(probeHashEnabled, concat(asList(WhenSpill.DURING_BUILD, WhenSpill.DURING_USAGE), nCopies(PARTITION_COUNT - 2, WhenSpill.NEVER)))); + // spill all + testInnerJoinWithFailingSpill(probeHashEnabled, nCopies(PARTITION_COUNT, WhenSpill.DURING_USAGE)); + testInnerJoinWithFailingSpill(probeHashEnabled, nCopies(PARTITION_COUNT, WhenSpill.DURING_BUILD)); + testInnerJoinWithFailingSpill(probeHashEnabled, nCopies(PARTITION_COUNT, WhenSpill.AFTER_BUILD)); + + // spill one + testInnerJoinWithFailingSpill(probeHashEnabled, concat(singletonList(WhenSpill.DURING_USAGE), nCopies(PARTITION_COUNT - 1, WhenSpill.NEVER))); + testInnerJoinWithFailingSpill(probeHashEnabled, concat(singletonList(WhenSpill.DURING_BUILD), nCopies(PARTITION_COUNT - 1, WhenSpill.NEVER))); + testInnerJoinWithFailingSpill(probeHashEnabled, concat(singletonList(WhenSpill.AFTER_BUILD), nCopies(PARTITION_COUNT - 1, WhenSpill.NEVER))); + + testInnerJoinWithFailingSpill(probeHashEnabled, concat(asList(WhenSpill.DURING_BUILD, WhenSpill.AFTER_BUILD), nCopies(PARTITION_COUNT - 2, WhenSpill.NEVER))); + testInnerJoinWithFailingSpill(probeHashEnabled, concat(asList(WhenSpill.DURING_BUILD, WhenSpill.DURING_USAGE), nCopies(PARTITION_COUNT - 2, WhenSpill.NEVER))); } - return result; } - @Test(dataProvider = "joinWithSpillValues") - public void testInnerJoinWithSpill(boolean probeHashEnabled, List whenSpill) - throws Exception + private void testInnerJoinWithFailingSpill(boolean probeHashEnabled, List whenSpill) { - innerJoinWithSpill(probeHashEnabled, whenSpill, SINGLE_STREAM_SPILLER_FACTORY, PARTITIONING_SPILLER_FACTORY); - } + assertThatThrownBy(() -> innerJoinWithSpill( + probeHashEnabled, + whenSpill, + new DummySpillerFactory().failSpill(), + new GenericPartitioningSpillerFactory(new DummySpillerFactory()))) + .isInstanceOf(RuntimeException.class) + .hasMessage("Spill failed"); - @Test(dataProvider = "joinWithFailingSpillValues") - public void testInnerJoinWithFailingSpill(boolean probeHashEnabled, List whenSpill, WhenSpillFails whenSpillFails) - { - DummySpillerFactory buildSpillerFactory = new DummySpillerFactory(); - DummySpillerFactory joinSpillerFactory = new DummySpillerFactory(); - PartitioningSpillerFactory partitioningSpillerFactory = new GenericPartitioningSpillerFactory(joinSpillerFactory); - - String expectedMessage; - switch (whenSpillFails) { - case SPILL_BUILD: - buildSpillerFactory.failSpill(); - expectedMessage = "Spill failed"; - break; - case SPILL_JOIN: - joinSpillerFactory.failSpill(); - expectedMessage = "Spill failed"; - break; - case UNSPILL_BUILD: - buildSpillerFactory.failUnspill(); - expectedMessage = "Unspill failed"; - break; - case UNSPILL_JOIN: - joinSpillerFactory.failUnspill(); - expectedMessage = "Unspill failed"; - break; - default: - throw new IllegalArgumentException(format("Unsupported option: %s", whenSpillFails)); - } - assertThatThrownBy(() -> innerJoinWithSpill(probeHashEnabled, whenSpill, buildSpillerFactory, partitioningSpillerFactory)) + assertThatThrownBy(() -> innerJoinWithSpill(probeHashEnabled, + whenSpill, + new DummySpillerFactory(), + new GenericPartitioningSpillerFactory(new DummySpillerFactory().failSpill()))) + .isInstanceOf(RuntimeException.class) + .hasMessage("Spill failed"); + + assertThatThrownBy(() -> innerJoinWithSpill(probeHashEnabled, + whenSpill, + new DummySpillerFactory().failUnspill(), + new GenericPartitioningSpillerFactory(new DummySpillerFactory()))) + .isInstanceOf(RuntimeException.class) + .hasMessage("Unspill failed"); + + assertThatThrownBy(() -> innerJoinWithSpill(probeHashEnabled, + whenSpill, + new DummySpillerFactory(), + new GenericPartitioningSpillerFactory(new DummySpillerFactory().failUnspill()))) .isInstanceOf(RuntimeException.class) - .hasMessage(expectedMessage); + .hasMessage("Unspill failed"); } private void innerJoinWithSpill(boolean probeHashEnabled, List whenSpill, SingleStreamSpillerFactory buildSpillerFactory, PartitioningSpillerFactory joinSpillerFactory) @@ -644,7 +605,8 @@ private static MaterializedResult getProperColumns(Operator joinOperator, List(), - daemonThreadsNamed("test-executor-%s"), - new ThreadPoolExecutor.DiscardPolicy()); - scheduledExecutor = newScheduledThreadPool(2, daemonThreadsNamed(getClass().getSimpleName() + "-scheduledExecutor-%s")); - - NodeScheduler nodeScheduler = new NodeScheduler(new UniformNodeSelectorFactory( - new InMemoryNodeManager(), - new NodeSchedulerConfig().setIncludeCoordinator(true), - new NodeTaskMap(new FinalizerService()))); - nodePartitioningManager = new NodePartitioningManager( - nodeScheduler, - TYPE_OPERATORS, - CatalogServiceProvider.fail()); - } - - @AfterMethod(alwaysRun = true) + private final ExecutorService executor = newCachedThreadPool(daemonThreadsNamed("test-executor-%s")); + private final ScheduledExecutorService scheduledExecutor = newScheduledThreadPool(2, daemonThreadsNamed(getClass().getSimpleName() + "-scheduledExecutor-%s")); + private final NodePartitioningManager nodePartitioningManager = new NodePartitioningManager( + new NodeScheduler(new UniformNodeSelectorFactory( + new InMemoryNodeManager(), + new NodeSchedulerConfig().setIncludeCoordinator(true), + new NodeTaskMap(new FinalizerService()))), + TYPE_OPERATORS, + CatalogServiceProvider.fail()); + + @AfterAll public void tearDown() { executor.shutdownNow(); scheduledExecutor.shutdownNow(); } - @Test(dataProvider = "hashJoinTestValues") - public void testInnerJoin(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) + @Test + public void testInnerJoin() + { + testInnerJoin(false, false, false); + testInnerJoin(false, false, true); + testInnerJoin(false, true, false); + testInnerJoin(false, true, true); + testInnerJoin(true, false, false); + testInnerJoin(true, false, true); + testInnerJoin(true, true, false); + testInnerJoin(true, true, true); + } + + private void testInnerJoin(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) { TaskContext taskContext = createTaskContext(); @@ -183,8 +171,20 @@ public void testInnerJoin(boolean parallelBuild, boolean probeHashEnabled, boole assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages)); } - @Test(dataProvider = "hashJoinRleProbeTestValues") - public void testInnerJoinWithRunLengthEncodedProbe(boolean withFilter, boolean probeHashEnabled, boolean singleBigintLookupSource) + @Test + public void testInnerJoinWithRunLengthEncodedProbe() + { + testInnerJoinWithRunLengthEncodedProbe(false, false, false); + testInnerJoinWithRunLengthEncodedProbe(false, false, true); + testInnerJoinWithRunLengthEncodedProbe(false, true, false); + testInnerJoinWithRunLengthEncodedProbe(false, true, true); + testInnerJoinWithRunLengthEncodedProbe(true, false, false); + testInnerJoinWithRunLengthEncodedProbe(true, false, true); + testInnerJoinWithRunLengthEncodedProbe(true, true, false); + testInnerJoinWithRunLengthEncodedProbe(true, true, true); + } + + private void testInnerJoinWithRunLengthEncodedProbe(boolean withFilter, boolean probeHashEnabled, boolean singleBigintLookupSource) { TaskContext taskContext = createTaskContext(); @@ -252,14 +252,14 @@ private JoinOperatorInfo getJoinOperatorInfo(DriverContext driverContext) return (JoinOperatorInfo) getOnlyElement(driverContext.getOperatorStats()).getInfo(); } - @DataProvider(name = "hashJoinRleProbeTestValues") - public static Object[][] hashJoinRleProbeTestValuesProvider() + @Test + public void testUnwrapsLazyBlocks() { - return cartesianProduct(trueFalse(), trueFalse(), trueFalse()); + testUnwrapsLazyBlocks(false); + testUnwrapsLazyBlocks(true); } - @Test(dataProvider = "singleBigintLookupSourceProvider") - public void testUnwrapsLazyBlocks(boolean singleBigintLookupSource) + private void testUnwrapsLazyBlocks(boolean singleBigintLookupSource) { TaskContext taskContext = createTaskContext(); DriverContext driverContext = taskContext.addPipelineContext(0, true, true, false).addDriverContext(); @@ -304,8 +304,14 @@ public void testUnwrapsLazyBlocks(boolean singleBigintLookupSource) assertThat(output.getBlock(1) instanceof LazyBlock).isFalse(); } - @Test(dataProvider = "singleBigintLookupSourceProvider") - public void testYield(boolean singleBigintLookupSource) + @Test + public void testYield() + { + testYield(false); + testYield(true); + } + + private void testYield(boolean singleBigintLookupSource) { // create a filter function that yields for every probe match // verify we will yield #match times totally @@ -375,8 +381,28 @@ public void testYield(boolean singleBigintLookupSource) assertThat(output.getPositionCount()).isEqualTo(entries); } - @Test(dataProvider = "hashJoinTestValuesAndsingleBigintLookupSourceProvider") - public void testInnerJoinWithNullProbe(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) + @Test + public void testInnerJoinWithNullProbe() + { + testInnerJoinWithNullProbe(false, false, false, false); + testInnerJoinWithNullProbe(false, false, false, true); + testInnerJoinWithNullProbe(false, false, true, false); + testInnerJoinWithNullProbe(false, false, true, true); + testInnerJoinWithNullProbe(false, true, false, false); + testInnerJoinWithNullProbe(false, true, false, true); + testInnerJoinWithNullProbe(false, true, true, false); + testInnerJoinWithNullProbe(false, true, true, true); + testInnerJoinWithNullProbe(true, false, false, false); + testInnerJoinWithNullProbe(true, false, false, true); + testInnerJoinWithNullProbe(true, false, true, false); + testInnerJoinWithNullProbe(true, false, true, true); + testInnerJoinWithNullProbe(true, true, false, false); + testInnerJoinWithNullProbe(true, true, false, true); + testInnerJoinWithNullProbe(true, true, true, false); + testInnerJoinWithNullProbe(true, true, true, true); + } + + private void testInnerJoinWithNullProbe(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) { TaskContext taskContext = createTaskContext(); @@ -415,8 +441,28 @@ public void testInnerJoinWithNullProbe(boolean parallelBuild, boolean probeHashE assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages)); } - @Test(dataProvider = "hashJoinTestValuesAndsingleBigintLookupSourceProvider") - public void testInnerJoinWithOutputSingleMatch(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) + @Test + public void testInnerJoinWithOutputSingleMatch() + { + testInnerJoinWithOutputSingleMatch(false, false, false, false); + testInnerJoinWithOutputSingleMatch(false, false, false, true); + testInnerJoinWithOutputSingleMatch(false, false, true, false); + testInnerJoinWithOutputSingleMatch(false, false, true, true); + testInnerJoinWithOutputSingleMatch(false, true, false, false); + testInnerJoinWithOutputSingleMatch(false, true, false, true); + testInnerJoinWithOutputSingleMatch(false, true, true, false); + testInnerJoinWithOutputSingleMatch(false, true, true, true); + testInnerJoinWithOutputSingleMatch(true, false, false, false); + testInnerJoinWithOutputSingleMatch(true, false, false, true); + testInnerJoinWithOutputSingleMatch(true, false, true, false); + testInnerJoinWithOutputSingleMatch(true, false, true, true); + testInnerJoinWithOutputSingleMatch(true, true, false, false); + testInnerJoinWithOutputSingleMatch(true, true, false, true); + testInnerJoinWithOutputSingleMatch(true, true, true, false); + testInnerJoinWithOutputSingleMatch(true, true, true, true); + } + + private void testInnerJoinWithOutputSingleMatch(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) { TaskContext taskContext = createTaskContext(); // build factory @@ -451,8 +497,20 @@ public void testInnerJoinWithOutputSingleMatch(boolean parallelBuild, boolean pr assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages)); } - @Test(dataProvider = "hashJoinTestValuesAndsingleBigintLookupSourceProvider") - public void testInnerJoinWithNullBuild(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) + @Test + public void testInnerJoinWithNullBuild() + { + testInnerJoinWithNullBuild(false, false, false); + testInnerJoinWithNullBuild(false, false, true); + testInnerJoinWithNullBuild(false, true, false); + testInnerJoinWithNullBuild(false, true, true); + testInnerJoinWithNullBuild(true, false, false); + testInnerJoinWithNullBuild(true, false, true); + testInnerJoinWithNullBuild(true, true, false); + testInnerJoinWithNullBuild(true, true, true); + } + + private void testInnerJoinWithNullBuild(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) { TaskContext taskContext = createTaskContext(); @@ -491,8 +549,20 @@ public void testInnerJoinWithNullBuild(boolean parallelBuild, boolean probeHashE assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages)); } - @Test(dataProvider = "hashJoinTestValuesAndsingleBigintLookupSourceProvider") - public void testInnerJoinWithNullOnBothSides(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) + @Test + public void testInnerJoinWithNullOnBothSides() + { + testInnerJoinWithNullOnBothSides(false, false, false); + testInnerJoinWithNullOnBothSides(false, false, true); + testInnerJoinWithNullOnBothSides(false, true, false); + testInnerJoinWithNullOnBothSides(false, true, true); + testInnerJoinWithNullOnBothSides(true, false, false); + testInnerJoinWithNullOnBothSides(true, false, true); + testInnerJoinWithNullOnBothSides(true, true, false); + testInnerJoinWithNullOnBothSides(true, true, true); + } + + private void testInnerJoinWithNullOnBothSides(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) { TaskContext taskContext = createTaskContext(); @@ -532,8 +602,20 @@ public void testInnerJoinWithNullOnBothSides(boolean parallelBuild, boolean prob assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages)); } - @Test(dataProvider = "hashJoinTestValues") - public void testProbeOuterJoin(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) + @Test + public void testProbeOuterJoin() + { + testProbeOuterJoin(false, false, false); + testProbeOuterJoin(false, false, true); + testProbeOuterJoin(false, true, false); + testProbeOuterJoin(false, true, true); + testProbeOuterJoin(true, false, false); + testProbeOuterJoin(true, false, true); + testProbeOuterJoin(true, true, false); + testProbeOuterJoin(true, true, true); + } + + private void testProbeOuterJoin(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) { TaskContext taskContext = createTaskContext(); @@ -578,8 +660,20 @@ public void testProbeOuterJoin(boolean parallelBuild, boolean probeHashEnabled, assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages)); } - @Test(dataProvider = "hashJoinTestValues") - public void testProbeOuterJoinWithFilterFunction(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) + @Test + public void testProbeOuterJoinWithFilterFunction() + { + testProbeOuterJoinWithFilterFunction(false, false, false); + testProbeOuterJoinWithFilterFunction(false, false, true); + testProbeOuterJoinWithFilterFunction(false, true, false); + testProbeOuterJoinWithFilterFunction(false, true, true); + testProbeOuterJoinWithFilterFunction(true, false, false); + testProbeOuterJoinWithFilterFunction(true, false, true); + testProbeOuterJoinWithFilterFunction(true, true, false); + testProbeOuterJoinWithFilterFunction(true, true, true); + } + + private void testProbeOuterJoinWithFilterFunction(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) { TaskContext taskContext = createTaskContext(); @@ -627,8 +721,28 @@ public void testProbeOuterJoinWithFilterFunction(boolean parallelBuild, boolean assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages)); } - @Test(dataProvider = "hashJoinTestValuesAndsingleBigintLookupSourceProvider") - public void testOuterJoinWithNullProbe(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) + @Test + public void testOuterJoinWithNullProbe() + { + testOuterJoinWithNullProbe(false, false, false, false); + testOuterJoinWithNullProbe(false, false, false, true); + testOuterJoinWithNullProbe(false, false, true, false); + testOuterJoinWithNullProbe(false, false, true, true); + testOuterJoinWithNullProbe(false, true, false, false); + testOuterJoinWithNullProbe(false, true, false, true); + testOuterJoinWithNullProbe(false, true, true, false); + testOuterJoinWithNullProbe(false, true, true, true); + testOuterJoinWithNullProbe(true, false, false, false); + testOuterJoinWithNullProbe(true, false, false, true); + testOuterJoinWithNullProbe(true, false, true, false); + testOuterJoinWithNullProbe(true, false, true, true); + testOuterJoinWithNullProbe(true, true, false, false); + testOuterJoinWithNullProbe(true, true, false, true); + testOuterJoinWithNullProbe(true, true, true, false); + testOuterJoinWithNullProbe(true, true, true, true); + } + + private void testOuterJoinWithNullProbe(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) { TaskContext taskContext = createTaskContext(); @@ -669,8 +783,28 @@ public void testOuterJoinWithNullProbe(boolean parallelBuild, boolean probeHashE assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages)); } - @Test(dataProvider = "hashJoinTestValuesAndsingleBigintLookupSourceProvider") - public void testOuterJoinWithNullProbeAndFilterFunction(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) + @Test + public void testOuterJoinWithNullProbeAndFilterFunction() + { + testOuterJoinWithNullProbeAndFilterFunction(false, false, false, false); + testOuterJoinWithNullProbeAndFilterFunction(false, false, false, true); + testOuterJoinWithNullProbeAndFilterFunction(false, false, true, false); + testOuterJoinWithNullProbeAndFilterFunction(false, false, true, true); + testOuterJoinWithNullProbeAndFilterFunction(false, true, false, false); + testOuterJoinWithNullProbeAndFilterFunction(false, true, false, true); + testOuterJoinWithNullProbeAndFilterFunction(false, true, true, false); + testOuterJoinWithNullProbeAndFilterFunction(false, true, true, true); + testOuterJoinWithNullProbeAndFilterFunction(true, false, false, false); + testOuterJoinWithNullProbeAndFilterFunction(true, false, false, true); + testOuterJoinWithNullProbeAndFilterFunction(true, false, true, false); + testOuterJoinWithNullProbeAndFilterFunction(true, false, true, true); + testOuterJoinWithNullProbeAndFilterFunction(true, true, false, false); + testOuterJoinWithNullProbeAndFilterFunction(true, true, false, true); + testOuterJoinWithNullProbeAndFilterFunction(true, true, true, false); + testOuterJoinWithNullProbeAndFilterFunction(true, true, true, true); + } + + private void testOuterJoinWithNullProbeAndFilterFunction(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) { TaskContext taskContext = createTaskContext(); @@ -714,8 +848,28 @@ public void testOuterJoinWithNullProbeAndFilterFunction(boolean parallelBuild, b assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages)); } - @Test(dataProvider = "hashJoinTestValuesAndsingleBigintLookupSourceProvider") - public void testOuterJoinWithNullBuild(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) + @Test + public void testOuterJoinWithNullBuild() + { + testOuterJoinWithNullBuild(false, false, false, false); + testOuterJoinWithNullBuild(false, false, false, true); + testOuterJoinWithNullBuild(false, false, true, false); + testOuterJoinWithNullBuild(false, false, true, true); + testOuterJoinWithNullBuild(false, true, false, false); + testOuterJoinWithNullBuild(false, true, false, true); + testOuterJoinWithNullBuild(false, true, true, false); + testOuterJoinWithNullBuild(false, true, true, true); + testOuterJoinWithNullBuild(true, false, false, false); + testOuterJoinWithNullBuild(true, false, false, true); + testOuterJoinWithNullBuild(true, false, true, false); + testOuterJoinWithNullBuild(true, false, true, true); + testOuterJoinWithNullBuild(true, true, false, false); + testOuterJoinWithNullBuild(true, true, false, true); + testOuterJoinWithNullBuild(true, true, true, false); + testOuterJoinWithNullBuild(true, true, true, true); + } + + private void testOuterJoinWithNullBuild(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) { TaskContext taskContext = createTaskContext(); @@ -755,8 +909,28 @@ public void testOuterJoinWithNullBuild(boolean parallelBuild, boolean probeHashE assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages)); } - @Test(dataProvider = "hashJoinTestValuesAndsingleBigintLookupSourceProvider") - public void testOuterJoinWithNullBuildAndFilterFunction(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) + @Test + public void testOuterJoinWithNullBuildAndFilterFunction() + { + testOuterJoinWithNullBuildAndFilterFunction(false, false, false, false); + testOuterJoinWithNullBuildAndFilterFunction(false, false, false, true); + testOuterJoinWithNullBuildAndFilterFunction(false, false, true, false); + testOuterJoinWithNullBuildAndFilterFunction(false, false, true, true); + testOuterJoinWithNullBuildAndFilterFunction(false, true, false, false); + testOuterJoinWithNullBuildAndFilterFunction(false, true, false, true); + testOuterJoinWithNullBuildAndFilterFunction(false, true, true, false); + testOuterJoinWithNullBuildAndFilterFunction(false, true, true, true); + testOuterJoinWithNullBuildAndFilterFunction(true, false, false, false); + testOuterJoinWithNullBuildAndFilterFunction(true, false, false, true); + testOuterJoinWithNullBuildAndFilterFunction(true, false, true, false); + testOuterJoinWithNullBuildAndFilterFunction(true, false, true, true); + testOuterJoinWithNullBuildAndFilterFunction(true, true, false, false); + testOuterJoinWithNullBuildAndFilterFunction(true, true, false, true); + testOuterJoinWithNullBuildAndFilterFunction(true, true, true, false); + testOuterJoinWithNullBuildAndFilterFunction(true, true, true, true); + } + + private void testOuterJoinWithNullBuildAndFilterFunction(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) { TaskContext taskContext = createTaskContext(); @@ -800,8 +974,28 @@ public void testOuterJoinWithNullBuildAndFilterFunction(boolean parallelBuild, b assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages)); } - @Test(dataProvider = "hashJoinTestValuesAndsingleBigintLookupSourceProvider") - public void testOuterJoinWithNullOnBothSides(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) + @Test + public void testOuterJoinWithNullOnBothSides() + { + testOuterJoinWithNullOnBothSides(false, false, false, false); + testOuterJoinWithNullOnBothSides(false, false, false, true); + testOuterJoinWithNullOnBothSides(false, false, true, false); + testOuterJoinWithNullOnBothSides(false, false, true, true); + testOuterJoinWithNullOnBothSides(false, true, false, false); + testOuterJoinWithNullOnBothSides(false, true, false, true); + testOuterJoinWithNullOnBothSides(false, true, true, false); + testOuterJoinWithNullOnBothSides(false, true, true, true); + testOuterJoinWithNullOnBothSides(true, false, false, false); + testOuterJoinWithNullOnBothSides(true, false, false, true); + testOuterJoinWithNullOnBothSides(true, false, true, false); + testOuterJoinWithNullOnBothSides(true, false, true, true); + testOuterJoinWithNullOnBothSides(true, true, false, false); + testOuterJoinWithNullOnBothSides(true, true, false, true); + testOuterJoinWithNullOnBothSides(true, true, true, false); + testOuterJoinWithNullOnBothSides(true, true, true, true); + } + + private void testOuterJoinWithNullOnBothSides(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) { TaskContext taskContext = createTaskContext(); @@ -842,8 +1036,28 @@ public void testOuterJoinWithNullOnBothSides(boolean parallelBuild, boolean prob assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages)); } - @Test(dataProvider = "hashJoinTestValuesAndsingleBigintLookupSourceProvider") - public void testOuterJoinWithNullOnBothSidesAndFilterFunction(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) + @Test + public void testOuterJoinWithNullOnBothSidesAndFilterFunction() + { + testOuterJoinWithNullOnBothSidesAndFilterFunction(false, false, false, false); + testOuterJoinWithNullOnBothSidesAndFilterFunction(false, false, false, true); + testOuterJoinWithNullOnBothSidesAndFilterFunction(false, false, true, false); + testOuterJoinWithNullOnBothSidesAndFilterFunction(false, false, true, true); + testOuterJoinWithNullOnBothSidesAndFilterFunction(false, true, false, false); + testOuterJoinWithNullOnBothSidesAndFilterFunction(false, true, false, true); + testOuterJoinWithNullOnBothSidesAndFilterFunction(false, true, true, false); + testOuterJoinWithNullOnBothSidesAndFilterFunction(false, true, true, true); + testOuterJoinWithNullOnBothSidesAndFilterFunction(true, false, false, false); + testOuterJoinWithNullOnBothSidesAndFilterFunction(true, false, false, true); + testOuterJoinWithNullOnBothSidesAndFilterFunction(true, false, true, false); + testOuterJoinWithNullOnBothSidesAndFilterFunction(true, false, true, true); + testOuterJoinWithNullOnBothSidesAndFilterFunction(true, true, false, false); + testOuterJoinWithNullOnBothSidesAndFilterFunction(true, true, false, true); + testOuterJoinWithNullOnBothSidesAndFilterFunction(true, true, true, false); + testOuterJoinWithNullOnBothSidesAndFilterFunction(true, true, true, true); + } + + private void testOuterJoinWithNullOnBothSidesAndFilterFunction(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) { TaskContext taskContext = createTaskContext(); @@ -888,8 +1102,16 @@ public void testOuterJoinWithNullOnBothSidesAndFilterFunction(boolean parallelBu assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages)); } - @Test(dataProvider = "testMemoryLimitProvider") - public void testMemoryLimit(boolean parallelBuild, boolean buildHashEnabled) + @Test + public void testMemoryLimit() + { + testMemoryLimit(false, false); + testMemoryLimit(false, true); + testMemoryLimit(true, false); + testMemoryLimit(true, true); + } + + private void testMemoryLimit(boolean parallelBuild, boolean buildHashEnabled) { TaskContext taskContext = TestingTaskContext.createTaskContext(executor, scheduledExecutor, TEST_SESSION, DataSize.ofBytes(100)); @@ -903,8 +1125,28 @@ public void testMemoryLimit(boolean parallelBuild, boolean buildHashEnabled) .hasMessageMatching("Query exceeded per-node memory limit of.*"); } - @Test(dataProvider = "hashJoinTestValuesAndsingleBigintLookupSourceProvider") - public void testInnerJoinWithEmptyLookupSource(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) + @Test + public void testInnerJoinWithEmptyLookupSource() + { + testInnerJoinWithEmptyLookupSource(false, false, false, false); + testInnerJoinWithEmptyLookupSource(false, false, false, true); + testInnerJoinWithEmptyLookupSource(false, false, true, false); + testInnerJoinWithEmptyLookupSource(false, false, true, true); + testInnerJoinWithEmptyLookupSource(false, true, false, false); + testInnerJoinWithEmptyLookupSource(false, true, false, true); + testInnerJoinWithEmptyLookupSource(false, true, true, false); + testInnerJoinWithEmptyLookupSource(false, true, true, true); + testInnerJoinWithEmptyLookupSource(true, false, false, false); + testInnerJoinWithEmptyLookupSource(true, false, false, true); + testInnerJoinWithEmptyLookupSource(true, false, true, false); + testInnerJoinWithEmptyLookupSource(true, false, true, true); + testInnerJoinWithEmptyLookupSource(true, true, false, false); + testInnerJoinWithEmptyLookupSource(true, true, false, true); + testInnerJoinWithEmptyLookupSource(true, true, true, false); + testInnerJoinWithEmptyLookupSource(true, true, true, true); + } + + private void testInnerJoinWithEmptyLookupSource(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) { TaskContext taskContext = createTaskContext(); @@ -940,8 +1182,28 @@ public void testInnerJoinWithEmptyLookupSource(boolean parallelBuild, boolean pr assertThat(outputPage).isNull(); } - @Test(dataProvider = "hashJoinTestValuesAndsingleBigintLookupSourceProvider") - public void testLookupOuterJoinWithEmptyLookupSource(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) + @Test + public void testLookupOuterJoinWithEmptyLookupSource() + { + testLookupOuterJoinWithEmptyLookupSource(false, false, false, false); + testLookupOuterJoinWithEmptyLookupSource(false, false, false, true); + testLookupOuterJoinWithEmptyLookupSource(false, false, true, false); + testLookupOuterJoinWithEmptyLookupSource(false, false, true, true); + testLookupOuterJoinWithEmptyLookupSource(false, true, false, false); + testLookupOuterJoinWithEmptyLookupSource(false, true, false, true); + testLookupOuterJoinWithEmptyLookupSource(false, true, true, false); + testLookupOuterJoinWithEmptyLookupSource(false, true, true, true); + testLookupOuterJoinWithEmptyLookupSource(true, false, false, false); + testLookupOuterJoinWithEmptyLookupSource(true, false, false, true); + testLookupOuterJoinWithEmptyLookupSource(true, false, true, false); + testLookupOuterJoinWithEmptyLookupSource(true, false, true, true); + testLookupOuterJoinWithEmptyLookupSource(true, true, false, false); + testLookupOuterJoinWithEmptyLookupSource(true, true, false, true); + testLookupOuterJoinWithEmptyLookupSource(true, true, true, false); + testLookupOuterJoinWithEmptyLookupSource(true, true, true, true); + } + + private void testLookupOuterJoinWithEmptyLookupSource(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) { TaskContext taskContext = createTaskContext(); @@ -977,8 +1239,28 @@ public void testLookupOuterJoinWithEmptyLookupSource(boolean parallelBuild, bool assertThat(outputPage).isNull(); } - @Test(dataProvider = "hashJoinTestValuesAndsingleBigintLookupSourceProvider") - public void testProbeOuterJoinWithEmptyLookupSource(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) + @Test + public void testProbeOuterJoinWithEmptyLookupSource() + { + testProbeOuterJoinWithEmptyLookupSource(false, false, false, false); + testProbeOuterJoinWithEmptyLookupSource(false, false, false, true); + testProbeOuterJoinWithEmptyLookupSource(false, false, true, false); + testProbeOuterJoinWithEmptyLookupSource(false, false, true, true); + testProbeOuterJoinWithEmptyLookupSource(false, true, false, false); + testProbeOuterJoinWithEmptyLookupSource(false, true, false, true); + testProbeOuterJoinWithEmptyLookupSource(false, true, true, false); + testProbeOuterJoinWithEmptyLookupSource(false, true, true, true); + testProbeOuterJoinWithEmptyLookupSource(true, false, false, false); + testProbeOuterJoinWithEmptyLookupSource(true, false, false, true); + testProbeOuterJoinWithEmptyLookupSource(true, false, true, false); + testProbeOuterJoinWithEmptyLookupSource(true, false, true, true); + testProbeOuterJoinWithEmptyLookupSource(true, true, false, false); + testProbeOuterJoinWithEmptyLookupSource(true, true, false, true); + testProbeOuterJoinWithEmptyLookupSource(true, true, true, false); + testProbeOuterJoinWithEmptyLookupSource(true, true, true, true); + } + + private void testProbeOuterJoinWithEmptyLookupSource(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) { TaskContext taskContext = createTaskContext(); @@ -1023,8 +1305,28 @@ public void testProbeOuterJoinWithEmptyLookupSource(boolean parallelBuild, boole assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages)); } - @Test(dataProvider = "hashJoinTestValuesAndsingleBigintLookupSourceProvider") - public void testFullOuterJoinWithEmptyLookupSource(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) + @Test + public void testFullOuterJoinWithEmptyLookupSource() + { + testFullOuterJoinWithEmptyLookupSource(false, false, false, false); + testFullOuterJoinWithEmptyLookupSource(false, false, false, true); + testFullOuterJoinWithEmptyLookupSource(false, false, true, false); + testFullOuterJoinWithEmptyLookupSource(false, false, true, true); + testFullOuterJoinWithEmptyLookupSource(false, true, false, false); + testFullOuterJoinWithEmptyLookupSource(false, true, false, true); + testFullOuterJoinWithEmptyLookupSource(false, true, true, false); + testFullOuterJoinWithEmptyLookupSource(false, true, true, true); + testFullOuterJoinWithEmptyLookupSource(true, false, false, false); + testFullOuterJoinWithEmptyLookupSource(true, false, false, true); + testFullOuterJoinWithEmptyLookupSource(true, false, true, false); + testFullOuterJoinWithEmptyLookupSource(true, false, true, true); + testFullOuterJoinWithEmptyLookupSource(true, true, false, false); + testFullOuterJoinWithEmptyLookupSource(true, true, false, true); + testFullOuterJoinWithEmptyLookupSource(true, true, true, false); + testFullOuterJoinWithEmptyLookupSource(true, true, true, true); + } + + private void testFullOuterJoinWithEmptyLookupSource(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) { TaskContext taskContext = createTaskContext(); @@ -1069,8 +1371,28 @@ public void testFullOuterJoinWithEmptyLookupSource(boolean parallelBuild, boolea assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages)); } - @Test(dataProvider = "hashJoinTestValuesAndsingleBigintLookupSourceProvider") - public void testInnerJoinWithNonEmptyLookupSourceAndEmptyProbe(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) + @Test + public void testInnerJoinWithNonEmptyLookupSourceAndEmptyProbe() + { + testInnerJoinWithNonEmptyLookupSourceAndEmptyProbe(false, false, false, false); + testInnerJoinWithNonEmptyLookupSourceAndEmptyProbe(false, false, false, true); + testInnerJoinWithNonEmptyLookupSourceAndEmptyProbe(false, false, true, false); + testInnerJoinWithNonEmptyLookupSourceAndEmptyProbe(false, false, true, true); + testInnerJoinWithNonEmptyLookupSourceAndEmptyProbe(false, true, false, false); + testInnerJoinWithNonEmptyLookupSourceAndEmptyProbe(false, true, false, true); + testInnerJoinWithNonEmptyLookupSourceAndEmptyProbe(false, true, true, false); + testInnerJoinWithNonEmptyLookupSourceAndEmptyProbe(false, true, true, true); + testInnerJoinWithNonEmptyLookupSourceAndEmptyProbe(true, false, false, false); + testInnerJoinWithNonEmptyLookupSourceAndEmptyProbe(true, false, false, true); + testInnerJoinWithNonEmptyLookupSourceAndEmptyProbe(true, false, true, false); + testInnerJoinWithNonEmptyLookupSourceAndEmptyProbe(true, false, true, true); + testInnerJoinWithNonEmptyLookupSourceAndEmptyProbe(true, true, false, false); + testInnerJoinWithNonEmptyLookupSourceAndEmptyProbe(true, true, false, true); + testInnerJoinWithNonEmptyLookupSourceAndEmptyProbe(true, true, true, false); + testInnerJoinWithNonEmptyLookupSourceAndEmptyProbe(true, true, true, true); + } + + private void testInnerJoinWithNonEmptyLookupSourceAndEmptyProbe(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled, boolean singleBigintLookupSource) { TaskContext taskContext = createTaskContext(); @@ -1109,8 +1431,21 @@ public void testInnerJoinWithNonEmptyLookupSourceAndEmptyProbe(boolean parallelB assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages)); } - @Test(dataProvider = "hashJoinTestValues") - public void testInnerJoinWithBlockingLookupSourceAndEmptyProbe(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) + @Test + public void testInnerJoinWithBlockingLookupSourceAndEmptyProbe() + throws Exception + { + testInnerJoinWithBlockingLookupSourceAndEmptyProbe(false, false, false); + testInnerJoinWithBlockingLookupSourceAndEmptyProbe(false, false, true); + testInnerJoinWithBlockingLookupSourceAndEmptyProbe(false, true, false); + testInnerJoinWithBlockingLookupSourceAndEmptyProbe(false, true, true); + testInnerJoinWithBlockingLookupSourceAndEmptyProbe(true, false, false); + testInnerJoinWithBlockingLookupSourceAndEmptyProbe(true, false, true); + testInnerJoinWithBlockingLookupSourceAndEmptyProbe(true, true, false); + testInnerJoinWithBlockingLookupSourceAndEmptyProbe(true, true, true); + } + + private void testInnerJoinWithBlockingLookupSourceAndEmptyProbe(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) throws Exception { // join that waits for build side to be collected @@ -1145,8 +1480,21 @@ public void testInnerJoinWithBlockingLookupSourceAndEmptyProbe(boolean parallelB } } - @Test(dataProvider = "hashJoinTestValues") - public void testInnerJoinWithBlockingLookupSource(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) + @Test + public void testInnerJoinWithBlockingLookupSource() + throws Exception + { + testInnerJoinWithBlockingLookupSource(false, false, false); + testInnerJoinWithBlockingLookupSource(false, false, true); + testInnerJoinWithBlockingLookupSource(false, true, false); + testInnerJoinWithBlockingLookupSource(false, true, true); + testInnerJoinWithBlockingLookupSource(true, false, false); + testInnerJoinWithBlockingLookupSource(true, false, true); + testInnerJoinWithBlockingLookupSource(true, true, false); + testInnerJoinWithBlockingLookupSource(true, true, true); + } + + private void testInnerJoinWithBlockingLookupSource(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) throws Exception { RowPagesBuilder probePages = rowPagesBuilder(probeHashEnabled, Ints.asList(0), ImmutableList.of(VARCHAR)); @@ -1296,39 +1644,6 @@ private OperatorFactory createJoinOperatorFactoryWithBlockingLookupSource(TaskCo return joinOperatorFactory; } - @DataProvider(name = "hashJoinTestValues") - public static Object[][] hashJoinTestValuesProvider() - { - return DataProviders.cartesianProduct( - new Object[][] {{true}, {false}}, - new Object[][] {{true}, {false}}, - new Object[][] {{true}, {false}}); - } - - @DataProvider - public static Object[][] testMemoryLimitProvider() - { - return DataProviders.cartesianProduct( - new Object[][] {{true}, {false}}, - new Object[][] {{true}, {false}}); - } - - @DataProvider(name = "singleBigintLookupSourceProvider") - public static Object[][] singleBigintLookupSourceProvider() - { - return new Object[][] {{true}, {false}}; - } - - @DataProvider(name = "hashJoinTestValuesAndsingleBigintLookupSourceProvider") - public static Object[][] hashJoinTestValuesAndsingleBigintLookupSourceProvider() - { - return DataProviders.cartesianProduct( - new Object[][] {{true}, {false}}, - new Object[][] {{true}, {false}}, - new Object[][] {{true}, {false}}, - new Object[][] {{true}, {false}}); - } - private TaskContext createTaskContext() { return TestingTaskContext.createTaskContext(executor, scheduledExecutor, TEST_SESSION); diff --git a/core/trino-main/src/test/java/io/trino/operator/output/TestPagePartitioner.java b/core/trino-main/src/test/java/io/trino/operator/output/TestPagePartitioner.java index 444e029656335..898f5802e0f6b 100644 --- a/core/trino-main/src/test/java/io/trino/operator/output/TestPagePartitioner.java +++ b/core/trino-main/src/test/java/io/trino/operator/output/TestPagePartitioner.java @@ -42,7 +42,6 @@ import io.trino.spi.block.RunLengthEncodedBlock; import io.trino.spi.block.TestingBlockEncodingSerde; import io.trino.spi.predicate.NullableValue; -import io.trino.spi.type.AbstractType; import io.trino.spi.type.ArrayType; import io.trino.spi.type.Decimals; import io.trino.spi.type.TimestampType; @@ -50,11 +49,10 @@ import io.trino.sql.planner.plan.PlanNodeId; import io.trino.testing.TestingTaskContext; import io.trino.type.BlockTypeOperators; -import org.testng.annotations.AfterClass; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.parallel.Execution; import java.util.ArrayList; import java.util.Collection; @@ -98,8 +96,11 @@ import static java.util.concurrent.Executors.newScheduledThreadPool; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; -@Test(singleThreaded = true) +@TestInstance(PER_CLASS) +@Execution(SAME_THREAD) public class TestPagePartitioner { private static final DataSize MAX_MEMORY = DataSize.of(50, MEGABYTE); @@ -111,36 +112,21 @@ public class TestPagePartitioner private static final PagesSerdeFactory PAGES_SERDE_FACTORY = new PagesSerdeFactory(new TestingBlockEncodingSerde(), false); private static final PageDeserializer PAGE_DESERIALIZER = PAGES_SERDE_FACTORY.createDeserializer(Optional.empty()); - private ExecutorService executor; - private ScheduledExecutorService scheduledExecutor; - private TestOutputBuffer outputBuffer; + private final ExecutorService executor = newCachedThreadPool(daemonThreadsNamed(getClass().getSimpleName() + "-executor-%s")); + private final ScheduledExecutorService scheduledExecutor = newScheduledThreadPool(1, daemonThreadsNamed(getClass().getSimpleName() + "-scheduledExecutor-%s")); - @BeforeClass - public void setUpClass() - { - executor = newCachedThreadPool(daemonThreadsNamed(getClass().getSimpleName() + "-executor-%s")); - scheduledExecutor = newScheduledThreadPool(1, daemonThreadsNamed(getClass().getSimpleName() + "-scheduledExecutor-%s")); - } - - @AfterClass(alwaysRun = true) + @AfterAll public void tearDownClass() { executor.shutdownNow(); - executor = null; scheduledExecutor.shutdownNow(); - scheduledExecutor = null; - } - - @BeforeMethod - public void setUp() - { - outputBuffer = new TestOutputBuffer(); } @Test public void testOutputForEmptyPage() { - PagePartitioner pagePartitioner = pagePartitioner(BIGINT).build(); + TestOutputBuffer outputBuffer = new TestOutputBuffer(); + PagePartitioner pagePartitioner = pagePartitioner(outputBuffer, BIGINT).build(); Page page = new Page(createLongsBlock(ImmutableList.of())); pagePartitioner.partitionPage(page, operatorContext()); @@ -156,10 +142,18 @@ private OperatorContext operatorContext() .addOperatorContext(0, new PlanNodeId("plan-node-0"), PartitionedOutputOperator.class.getSimpleName()); } - @Test(dataProvider = "partitioningMode") - public void testOutputEqualsInput(PartitioningMode partitioningMode) + @Test + public void testOutputEqualsInput() + { + testOutputEqualsInput(PartitioningMode.ROW_WISE); + testOutputEqualsInput(PartitioningMode.COLUMNAR); + } + + private void testOutputEqualsInput(PartitioningMode partitioningMode) { - PagePartitioner pagePartitioner = pagePartitioner(BIGINT).build(); + TestOutputBuffer outputBuffer = new TestOutputBuffer(); + + PagePartitioner pagePartitioner = pagePartitioner(outputBuffer, BIGINT).build(); Page page = new Page(createLongSequenceBlock(0, POSITIONS_PER_PAGE)); List expected = readLongs(Stream.of(page), 0); @@ -169,10 +163,18 @@ public void testOutputEqualsInput(PartitioningMode partitioningMode) assertThat(partitioned).containsExactlyInAnyOrderElementsOf(expected); // order is different due to 2 partitions joined } - @Test(dataProvider = "partitioningMode") - public void testOutputForPageWithNoBlockPartitionFunction(PartitioningMode partitioningMode) + @Test + public void testOutputForPageWithNoBlockPartitionFunction() { - PagePartitioner pagePartitioner = pagePartitioner(BIGINT) + testOutputForPageWithNoBlockPartitionFunction(PartitioningMode.ROW_WISE); + testOutputForPageWithNoBlockPartitionFunction(PartitioningMode.COLUMNAR); + } + + private void testOutputForPageWithNoBlockPartitionFunction(PartitioningMode partitioningMode) + { + TestOutputBuffer outputBuffer = new TestOutputBuffer(); + + PagePartitioner pagePartitioner = pagePartitioner(outputBuffer, BIGINT) .withPartitionFunction(new BucketPartitionFunction( ROUND_ROBIN.createBucketFunction(null, false, PARTITION_COUNT, null), IntStream.range(0, PARTITION_COUNT).toArray())) @@ -188,10 +190,18 @@ public void testOutputForPageWithNoBlockPartitionFunction(PartitioningMode parti assertThat(partition1).containsExactly(1L, 3L, 5L, 7L); } - @Test(dataProvider = "partitioningMode") - public void testOutputForMultipleSimplePages(PartitioningMode partitioningMode) + @Test + public void testOutputForMultipleSimplePages() { - PagePartitioner pagePartitioner = pagePartitioner(BIGINT).build(); + testOutputForMultipleSimplePages(PartitioningMode.ROW_WISE); + testOutputForMultipleSimplePages(PartitioningMode.COLUMNAR); + } + + private void testOutputForMultipleSimplePages(PartitioningMode partitioningMode) + { + TestOutputBuffer outputBuffer = new TestOutputBuffer(); + + PagePartitioner pagePartitioner = pagePartitioner(outputBuffer, BIGINT).build(); Page page1 = new Page(createLongSequenceBlock(0, POSITIONS_PER_PAGE)); Page page2 = new Page(createLongSequenceBlock(1, POSITIONS_PER_PAGE)); Page page3 = new Page(createLongSequenceBlock(2, POSITIONS_PER_PAGE)); @@ -203,10 +213,17 @@ public void testOutputForMultipleSimplePages(PartitioningMode partitioningMode) assertThat(partitioned).containsExactlyInAnyOrderElementsOf(expected); // order is different due to 2 partitions joined } - @Test(dataProvider = "partitioningMode") - public void testOutputForSimplePageWithReplication(PartitioningMode partitioningMode) + @Test + public void testOutputForSimplePageWithReplication() + { + testOutputForSimplePageWithReplication(PartitioningMode.ROW_WISE); + testOutputForSimplePageWithReplication(PartitioningMode.COLUMNAR); + } + + private void testOutputForSimplePageWithReplication(PartitioningMode partitioningMode) { - PagePartitioner pagePartitioner = pagePartitioner(BIGINT).replicate().build(); + TestOutputBuffer outputBuffer = new TestOutputBuffer(); + PagePartitioner pagePartitioner = pagePartitioner(outputBuffer, BIGINT).replicate().build(); Page page = new Page(createLongsBlock(0L, 1L, 2L, 3L, null)); processPages(pagePartitioner, partitioningMode, page); @@ -217,10 +234,17 @@ public void testOutputForSimplePageWithReplication(PartitioningMode partitioning assertThat(partition1).containsExactly(0L, 1L, 3L); // position 0 copied to all partitions } - @Test(dataProvider = "partitioningMode") - public void testOutputForSimplePageWithNullChannel(PartitioningMode partitioningMode) + @Test + public void testOutputForSimplePageWithNullChannel() + { + testOutputForSimplePageWithNullChannel(PartitioningMode.ROW_WISE); + testOutputForSimplePageWithNullChannel(PartitioningMode.COLUMNAR); + } + + private void testOutputForSimplePageWithNullChannel(PartitioningMode partitioningMode) { - PagePartitioner pagePartitioner = pagePartitioner(BIGINT).withNullChannel(0).build(); + TestOutputBuffer outputBuffer = new TestOutputBuffer(); + PagePartitioner pagePartitioner = pagePartitioner(outputBuffer, BIGINT).withNullChannel(0).build(); Page page = new Page(createLongsBlock(0L, 1L, 2L, 3L, null)); processPages(pagePartitioner, partitioningMode, page); @@ -231,10 +255,17 @@ public void testOutputForSimplePageWithNullChannel(PartitioningMode partitioning assertThat(partition1).containsExactlyInAnyOrder(1L, 3L, null); // null copied to all partitions } - @Test(dataProvider = "partitioningMode") - public void testOutputForSimplePageWithPartitionConstant(PartitioningMode partitioningMode) + @Test + public void testOutputForSimplePageWithPartitionConstant() + { + testOutputForSimplePageWithPartitionConstant(PartitioningMode.ROW_WISE); + testOutputForSimplePageWithPartitionConstant(PartitioningMode.COLUMNAR); + } + + private void testOutputForSimplePageWithPartitionConstant(PartitioningMode partitioningMode) { - PagePartitioner pagePartitioner = pagePartitioner(BIGINT) + TestOutputBuffer outputBuffer = new TestOutputBuffer(); + PagePartitioner pagePartitioner = pagePartitioner(outputBuffer, BIGINT) .withPartitionConstants(ImmutableList.of(Optional.of(new NullableValue(BIGINT, 1L)))) .withPartitionChannels(-1) .build(); @@ -249,10 +280,17 @@ public void testOutputForSimplePageWithPartitionConstant(PartitioningMode partit assertThat(partition1).containsExactlyElementsOf(allValues); } - @Test(dataProvider = "partitioningMode") - public void testOutputForSimplePageWithPartitionConstantAndHashBlock(PartitioningMode partitioningMode) + @Test + public void testOutputForSimplePageWithPartitionConstantAndHashBlock() { - PagePartitioner pagePartitioner = pagePartitioner(BIGINT) + testOutputForSimplePageWithPartitionConstantAndHashBlock(PartitioningMode.ROW_WISE); + testOutputForSimplePageWithPartitionConstantAndHashBlock(PartitioningMode.COLUMNAR); + } + + private void testOutputForSimplePageWithPartitionConstantAndHashBlock(PartitioningMode partitioningMode) + { + TestOutputBuffer outputBuffer = new TestOutputBuffer(); + PagePartitioner pagePartitioner = pagePartitioner(outputBuffer, BIGINT) .withPartitionConstants(ImmutableList.of(Optional.empty(), Optional.of(new NullableValue(BIGINT, 1L)))) .withPartitionChannels(0, -1) // use first block and constant block at index 1 as input to partitionFunction .withHashChannels(0, 1) // use both channels to calculate partition (a+b) mod 2 @@ -267,10 +305,17 @@ public void testOutputForSimplePageWithPartitionConstantAndHashBlock(Partitionin assertThat(partition1).containsExactly(0L, 2L); } - @Test(dataProvider = "partitioningMode") - public void testPartitionPositionsWithRleNotNull(PartitioningMode partitioningMode) + @Test + public void testPartitionPositionsWithRleNotNull() + { + testPartitionPositionsWithRleNotNull(PartitioningMode.ROW_WISE); + testPartitionPositionsWithRleNotNull(PartitioningMode.COLUMNAR); + } + + private void testPartitionPositionsWithRleNotNull(PartitioningMode partitioningMode) { - PagePartitioner pagePartitioner = pagePartitioner(BIGINT, BIGINT).build(); + TestOutputBuffer outputBuffer = new TestOutputBuffer(); + PagePartitioner pagePartitioner = pagePartitioner(outputBuffer, BIGINT, BIGINT).build(); Page page = new Page(createRepeatedValuesBlock(0, POSITIONS_PER_PAGE), createLongSequenceBlock(0, POSITIONS_PER_PAGE)); processPages(pagePartitioner, partitioningMode, page); @@ -282,10 +327,17 @@ public void testPartitionPositionsWithRleNotNull(PartitioningMode partitioningMo assertThat(outputBuffer.getEnqueuedDeserialized(1)).isEmpty(); } - @Test(dataProvider = "partitioningMode") - public void testPartitionPositionsWithRleNotNullWithReplication(PartitioningMode partitioningMode) + @Test + public void testPartitionPositionsWithRleNotNullWithReplication() { - PagePartitioner pagePartitioner = pagePartitioner(BIGINT, BIGINT).replicate().build(); + testPartitionPositionsWithRleNotNullWithReplication(PartitioningMode.ROW_WISE); + testPartitionPositionsWithRleNotNullWithReplication(PartitioningMode.COLUMNAR); + } + + private void testPartitionPositionsWithRleNotNullWithReplication(PartitioningMode partitioningMode) + { + TestOutputBuffer outputBuffer = new TestOutputBuffer(); + PagePartitioner pagePartitioner = pagePartitioner(outputBuffer, BIGINT, BIGINT).replicate().build(); Page page = new Page(createRepeatedValuesBlock(0, POSITIONS_PER_PAGE), createLongSequenceBlock(0, POSITIONS_PER_PAGE)); processPages(pagePartitioner, partitioningMode, page); @@ -296,10 +348,17 @@ public void testPartitionPositionsWithRleNotNullWithReplication(PartitioningMode assertThat(partition1).containsExactly(0L); // position 0 copied to all partitions } - @Test(dataProvider = "partitioningMode") - public void testPartitionPositionsWithRleNullWithNullChannel(PartitioningMode partitioningMode) + @Test + public void testPartitionPositionsWithRleNullWithNullChannel() + { + testPartitionPositionsWithRleNullWithNullChannel(PartitioningMode.ROW_WISE); + testPartitionPositionsWithRleNullWithNullChannel(PartitioningMode.COLUMNAR); + } + + private void testPartitionPositionsWithRleNullWithNullChannel(PartitioningMode partitioningMode) { - PagePartitioner pagePartitioner = pagePartitioner(BIGINT, BIGINT).withNullChannel(0).build(); + TestOutputBuffer outputBuffer = new TestOutputBuffer(); + PagePartitioner pagePartitioner = pagePartitioner(outputBuffer, BIGINT, BIGINT).withNullChannel(0).build(); Page page = new Page(RunLengthEncodedBlock.create(createLongsBlock((Long) null), POSITIONS_PER_PAGE), createLongSequenceBlock(0, POSITIONS_PER_PAGE)); processPages(pagePartitioner, partitioningMode, page); @@ -310,10 +369,17 @@ public void testPartitionPositionsWithRleNullWithNullChannel(PartitioningMode pa assertThat(partition1).containsExactlyElementsOf(readLongs(Stream.of(page), 1)); } - @Test(dataProvider = "partitioningMode") - public void testOutputForDictionaryBlock(PartitioningMode partitioningMode) + @Test + public void testOutputForDictionaryBlock() + { + testOutputForDictionaryBlock(PartitioningMode.ROW_WISE); + testOutputForDictionaryBlock(PartitioningMode.COLUMNAR); + } + + private void testOutputForDictionaryBlock(PartitioningMode partitioningMode) { - PagePartitioner pagePartitioner = pagePartitioner(BIGINT).build(); + TestOutputBuffer outputBuffer = new TestOutputBuffer(); + PagePartitioner pagePartitioner = pagePartitioner(outputBuffer, BIGINT).build(); Page page = new Page(createLongDictionaryBlock(0, 10)); // must have at least 10 position to have non-trivial dict processPages(pagePartitioner, partitioningMode, page); @@ -324,10 +390,17 @@ public void testOutputForDictionaryBlock(PartitioningMode partitioningMode) assertThat(partition1).containsExactlyElementsOf(nCopies(5, 1L)); } - @Test(dataProvider = "partitioningMode") - public void testOutputForOneValueDictionaryBlock(PartitioningMode partitioningMode) + @Test + public void testOutputForOneValueDictionaryBlock() + { + testOutputForOneValueDictionaryBlock(PartitioningMode.ROW_WISE); + testOutputForOneValueDictionaryBlock(PartitioningMode.COLUMNAR); + } + + private void testOutputForOneValueDictionaryBlock(PartitioningMode partitioningMode) { - PagePartitioner pagePartitioner = pagePartitioner(BIGINT).build(); + TestOutputBuffer outputBuffer = new TestOutputBuffer(); + PagePartitioner pagePartitioner = pagePartitioner(outputBuffer, BIGINT).build(); Page page = new Page(DictionaryBlock.create(4, createLongsBlock(0), new int[] {0, 0, 0, 0})); processPages(pagePartitioner, partitioningMode, page); @@ -338,10 +411,17 @@ public void testOutputForOneValueDictionaryBlock(PartitioningMode partitioningMo assertThat(partition1).isEmpty(); } - @Test(dataProvider = "partitioningMode") - public void testOutputForViewDictionaryBlock(PartitioningMode partitioningMode) + @Test + public void testOutputForViewDictionaryBlock() { - PagePartitioner pagePartitioner = pagePartitioner(BIGINT).build(); + testOutputForViewDictionaryBlock(PartitioningMode.ROW_WISE); + testOutputForViewDictionaryBlock(PartitioningMode.COLUMNAR); + } + + private void testOutputForViewDictionaryBlock(PartitioningMode partitioningMode) + { + TestOutputBuffer outputBuffer = new TestOutputBuffer(); + PagePartitioner pagePartitioner = pagePartitioner(outputBuffer, BIGINT).build(); Page page = new Page(DictionaryBlock.create(4, createLongSequenceBlock(4, 8), new int[] {1, 0, 3, 2})); processPages(pagePartitioner, partitioningMode, page); @@ -352,10 +432,48 @@ public void testOutputForViewDictionaryBlock(PartitioningMode partitioningMode) assertThat(partition1).containsExactlyInAnyOrder(5L, 7L); } - @Test(dataProvider = "typesWithPartitioningMode") - public void testOutputForSimplePageWithType(Type type, PartitioningMode partitioningMode) - { - PagePartitioner pagePartitioner = pagePartitioner(BIGINT, type).build(); + @Test + public void testOutputForSimplePageWithType() + { + testOutputForSimplePageWithType(BIGINT, PartitioningMode.ROW_WISE); + testOutputForSimplePageWithType(BOOLEAN, PartitioningMode.ROW_WISE); + testOutputForSimplePageWithType(INTEGER, PartitioningMode.ROW_WISE); + testOutputForSimplePageWithType(createCharType(10), PartitioningMode.ROW_WISE); + testOutputForSimplePageWithType(createUnboundedVarcharType(), PartitioningMode.ROW_WISE); + testOutputForSimplePageWithType(DOUBLE, PartitioningMode.ROW_WISE); + testOutputForSimplePageWithType(SMALLINT, PartitioningMode.ROW_WISE); + testOutputForSimplePageWithType(TINYINT, PartitioningMode.ROW_WISE); + testOutputForSimplePageWithType(UUID, PartitioningMode.ROW_WISE); + testOutputForSimplePageWithType(VARBINARY, PartitioningMode.ROW_WISE); + testOutputForSimplePageWithType(createDecimalType(1), PartitioningMode.ROW_WISE); + testOutputForSimplePageWithType(createDecimalType(Decimals.MAX_SHORT_PRECISION + 1), PartitioningMode.ROW_WISE); + testOutputForSimplePageWithType(new ArrayType(BIGINT), PartitioningMode.ROW_WISE); + testOutputForSimplePageWithType(TimestampType.createTimestampType(9), PartitioningMode.ROW_WISE); + testOutputForSimplePageWithType(TimestampType.createTimestampType(3), PartitioningMode.ROW_WISE); + testOutputForSimplePageWithType(IPADDRESS, PartitioningMode.ROW_WISE); + + testOutputForSimplePageWithType(BIGINT, PartitioningMode.COLUMNAR); + testOutputForSimplePageWithType(BOOLEAN, PartitioningMode.COLUMNAR); + testOutputForSimplePageWithType(INTEGER, PartitioningMode.COLUMNAR); + testOutputForSimplePageWithType(createCharType(10), PartitioningMode.COLUMNAR); + testOutputForSimplePageWithType(createUnboundedVarcharType(), PartitioningMode.COLUMNAR); + testOutputForSimplePageWithType(DOUBLE, PartitioningMode.COLUMNAR); + testOutputForSimplePageWithType(SMALLINT, PartitioningMode.COLUMNAR); + testOutputForSimplePageWithType(TINYINT, PartitioningMode.COLUMNAR); + testOutputForSimplePageWithType(UUID, PartitioningMode.COLUMNAR); + testOutputForSimplePageWithType(VARBINARY, PartitioningMode.COLUMNAR); + testOutputForSimplePageWithType(createDecimalType(1), PartitioningMode.COLUMNAR); + testOutputForSimplePageWithType(createDecimalType(Decimals.MAX_SHORT_PRECISION + 1), PartitioningMode.COLUMNAR); + testOutputForSimplePageWithType(new ArrayType(BIGINT), PartitioningMode.COLUMNAR); + testOutputForSimplePageWithType(TimestampType.createTimestampType(9), PartitioningMode.COLUMNAR); + testOutputForSimplePageWithType(TimestampType.createTimestampType(3), PartitioningMode.COLUMNAR); + testOutputForSimplePageWithType(IPADDRESS, PartitioningMode.COLUMNAR); + } + + private void testOutputForSimplePageWithType(Type type, PartitioningMode partitioningMode) + { + TestOutputBuffer outputBuffer = new TestOutputBuffer(); + PagePartitioner pagePartitioner = pagePartitioner(outputBuffer, BIGINT, type).build(); Page page = new Page( createLongSequenceBlock(0, POSITIONS_PER_PAGE), // partition block createBlockForType(type, POSITIONS_PER_PAGE)); @@ -367,18 +485,83 @@ public void testOutputForSimplePageWithType(Type type, PartitioningMode partitio assertThat(partitioned).containsExactlyInAnyOrderElementsOf(expected); // order is different due to 2 partitions joined } - @Test(dataProvider = "types") - public void testOutputWithMixedRowWiseAndColumnarPartitioning(Type type) + @Test + public void testOutputWithMixedRowWiseAndColumnarPartitioning() + { + testOutputEqualsInput(BIGINT, PartitioningMode.COLUMNAR, PartitioningMode.ROW_WISE); + testOutputEqualsInput(BOOLEAN, PartitioningMode.COLUMNAR, PartitioningMode.ROW_WISE); + testOutputEqualsInput(INTEGER, PartitioningMode.COLUMNAR, PartitioningMode.ROW_WISE); + testOutputEqualsInput(createCharType(10), PartitioningMode.COLUMNAR, PartitioningMode.ROW_WISE); + testOutputEqualsInput(createUnboundedVarcharType(), PartitioningMode.COLUMNAR, PartitioningMode.ROW_WISE); + testOutputEqualsInput(DOUBLE, PartitioningMode.COLUMNAR, PartitioningMode.ROW_WISE); + testOutputEqualsInput(SMALLINT, PartitioningMode.COLUMNAR, PartitioningMode.ROW_WISE); + testOutputEqualsInput(TINYINT, PartitioningMode.COLUMNAR, PartitioningMode.ROW_WISE); + testOutputEqualsInput(UUID, PartitioningMode.COLUMNAR, PartitioningMode.ROW_WISE); + testOutputEqualsInput(VARBINARY, PartitioningMode.COLUMNAR, PartitioningMode.ROW_WISE); + testOutputEqualsInput(createDecimalType(1), PartitioningMode.COLUMNAR, PartitioningMode.ROW_WISE); + testOutputEqualsInput(createDecimalType(Decimals.MAX_SHORT_PRECISION + 1), PartitioningMode.COLUMNAR, PartitioningMode.ROW_WISE); + testOutputEqualsInput(new ArrayType(BIGINT), PartitioningMode.COLUMNAR, PartitioningMode.ROW_WISE); + testOutputEqualsInput(TimestampType.createTimestampType(9), PartitioningMode.COLUMNAR, PartitioningMode.ROW_WISE); + testOutputEqualsInput(TimestampType.createTimestampType(3), PartitioningMode.COLUMNAR, PartitioningMode.ROW_WISE); + testOutputEqualsInput(IPADDRESS, PartitioningMode.COLUMNAR, PartitioningMode.ROW_WISE); + + testOutputEqualsInput(BIGINT, PartitioningMode.ROW_WISE, PartitioningMode.COLUMNAR); + testOutputEqualsInput(BOOLEAN, PartitioningMode.ROW_WISE, PartitioningMode.COLUMNAR); + testOutputEqualsInput(INTEGER, PartitioningMode.ROW_WISE, PartitioningMode.COLUMNAR); + testOutputEqualsInput(createCharType(10), PartitioningMode.ROW_WISE, PartitioningMode.COLUMNAR); + testOutputEqualsInput(createUnboundedVarcharType(), PartitioningMode.ROW_WISE, PartitioningMode.COLUMNAR); + testOutputEqualsInput(DOUBLE, PartitioningMode.ROW_WISE, PartitioningMode.COLUMNAR); + testOutputEqualsInput(SMALLINT, PartitioningMode.ROW_WISE, PartitioningMode.COLUMNAR); + testOutputEqualsInput(TINYINT, PartitioningMode.ROW_WISE, PartitioningMode.COLUMNAR); + testOutputEqualsInput(UUID, PartitioningMode.ROW_WISE, PartitioningMode.COLUMNAR); + testOutputEqualsInput(VARBINARY, PartitioningMode.ROW_WISE, PartitioningMode.COLUMNAR); + testOutputEqualsInput(createDecimalType(1), PartitioningMode.ROW_WISE, PartitioningMode.COLUMNAR); + testOutputEqualsInput(createDecimalType(Decimals.MAX_SHORT_PRECISION + 1), PartitioningMode.ROW_WISE, PartitioningMode.COLUMNAR); + testOutputEqualsInput(new ArrayType(BIGINT), PartitioningMode.ROW_WISE, PartitioningMode.COLUMNAR); + testOutputEqualsInput(TimestampType.createTimestampType(9), PartitioningMode.ROW_WISE, PartitioningMode.COLUMNAR); + testOutputEqualsInput(TimestampType.createTimestampType(3), PartitioningMode.ROW_WISE, PartitioningMode.COLUMNAR); + testOutputEqualsInput(IPADDRESS, PartitioningMode.ROW_WISE, PartitioningMode.COLUMNAR); + } + + @Test + public void testOutputBytesWhenReused() + { + TestOutputBuffer outputBuffer = new TestOutputBuffer(); + PagePartitioner pagePartitioner = pagePartitioner(outputBuffer, BIGINT).build(); + OperatorContext operatorContext = operatorContext(); + + Page page = new Page(createLongsBlock(1, 1, 1, 1, 1, 1)); + + pagePartitioner.partitionPage(page, operatorContext); + assertThat(operatorContext.getOutputDataSize().getTotalCount()).isEqualTo(0); + pagePartitioner.prepareForRelease(operatorContext); + assertThat(operatorContext.getOutputDataSize().getTotalCount()).isEqualTo(page.getSizeInBytes()); + // release again with no additional input, size should not change + pagePartitioner.prepareForRelease(operatorContext); + assertThat(operatorContext.getOutputDataSize().getTotalCount()).isEqualTo(page.getSizeInBytes()); + + pagePartitioner.partitionPage(page, operatorContext); + pagePartitioner.prepareForRelease(operatorContext); + assertThat(operatorContext.getOutputDataSize().getTotalCount()).isEqualTo(page.getSizeInBytes() * 2); + + pagePartitioner.close(); + List output = outputBuffer.getEnqueued(); + // only a single page was flushed after the partitioner is closed, all output bytes were reported eagerly on release + assertThat(output.size()).isEqualTo(1); + } + + @Test + public void testMemoryReleased() { - testOutputEqualsInput(type, PartitioningMode.COLUMNAR, PartitioningMode.ROW_WISE); - testOutputEqualsInput(type, PartitioningMode.ROW_WISE, PartitioningMode.COLUMNAR); + testMemoryReleased(PartitioningMode.ROW_WISE); + testMemoryReleased(PartitioningMode.COLUMNAR); } - @Test(dataProvider = "partitioningMode") - public void testMemoryReleased(PartitioningMode partitioningMode) + private void testMemoryReleased(PartitioningMode partitioningMode) { AggregatedMemoryContext memoryContext = newSimpleAggregatedMemoryContext(); - PagePartitioner pagePartitioner = pagePartitioner(BIGINT).withMemoryContext(memoryContext).build(); + TestOutputBuffer outputBuffer = new TestOutputBuffer(); + PagePartitioner pagePartitioner = pagePartitioner(outputBuffer, BIGINT).withMemoryContext(memoryContext).build(); Page page = new Page(createLongsBlock(0L, 1L, 2L, 3L, null)); processPages(pagePartitioner, partitioningMode, page); @@ -386,13 +569,20 @@ public void testMemoryReleased(PartitioningMode partitioningMode) assertThat(memoryContext.getBytes()).isEqualTo(0); } - @Test(dataProvider = "partitioningMode") - public void testMemoryReleasedOnFailure(PartitioningMode partitioningMode) + @Test + public void testMemoryReleasedOnFailure() + { + testMemoryReleasedOnFailure(PartitioningMode.ROW_WISE); + testMemoryReleasedOnFailure(PartitioningMode.COLUMNAR); + } + + private void testMemoryReleasedOnFailure(PartitioningMode partitioningMode) { AggregatedMemoryContext memoryContext = newSimpleAggregatedMemoryContext(); RuntimeException exception = new RuntimeException(); + TestOutputBuffer outputBuffer = new TestOutputBuffer(); outputBuffer.throwOnEnqueue(exception); - PagePartitioner pagePartitioner = pagePartitioner(BIGINT).withMemoryContext(memoryContext).build(); + PagePartitioner pagePartitioner = pagePartitioner(outputBuffer, BIGINT).withMemoryContext(memoryContext).build(); Page page = new Page(createLongsBlock(0L, 1L, 2L, 3L, null)); partitioningMode.partitionPage(pagePartitioner, page); @@ -403,7 +593,8 @@ public void testMemoryReleasedOnFailure(PartitioningMode partitioningMode) private void testOutputEqualsInput(Type type, PartitioningMode mode1, PartitioningMode mode2) { - PagePartitionerBuilder pagePartitionerBuilder = pagePartitioner(BIGINT, type, type); + TestOutputBuffer outputBuffer = new TestOutputBuffer(); + PagePartitionerBuilder pagePartitionerBuilder = pagePartitioner(outputBuffer, BIGINT, type, type); PagePartitioner pagePartitioner = pagePartitionerBuilder.build(); Page input = new Page( createLongSequenceBlock(0, POSITIONS_PER_PAGE), // partition block @@ -422,48 +613,6 @@ private void testOutputEqualsInput(Type type, PartitioningMode mode1, Partitioni outputBuffer.clear(); } - @DataProvider(name = "partitioningMode") - public static Object[][] partitioningMode() - { - return new Object[][] {{PartitioningMode.ROW_WISE}, {PartitioningMode.COLUMNAR}}; - } - - @DataProvider(name = "types") - public static Object[][] types() - { - return getTypes().stream().map(type -> new Object[] {type}).toArray(Object[][]::new); - } - - @DataProvider(name = "typesWithPartitioningMode") - public static Object[][] typesWithPartitioningMode() - { - return getTypes().stream() - .flatMap(type -> Stream.of(PartitioningMode.values()) - .map(partitioningMode -> new Object[] {type, partitioningMode})) - .toArray(Object[][]::new); - } - - private static ImmutableList getTypes() - { - return ImmutableList.of( - BIGINT, - BOOLEAN, - INTEGER, - createCharType(10), - createUnboundedVarcharType(), - DOUBLE, - SMALLINT, - TINYINT, - UUID, - VARBINARY, - createDecimalType(1), - createDecimalType(Decimals.MAX_SHORT_PRECISION + 1), - new ArrayType(BIGINT), - TimestampType.createTimestampType(9), - TimestampType.createTimestampType(3), - IPADDRESS); - } - private static Block createBlockForType(Type type, int positionsPerPage) { return createRandomBlockForType(type, positionsPerPage, 0.2F); @@ -500,17 +649,17 @@ private static List readChannel(Stream pages, int channel, Type ty return unmodifiableList(result); } - private PagePartitionerBuilder pagePartitioner(Type... types) + private PagePartitionerBuilder pagePartitioner(TestOutputBuffer outputBuffer, Type... types) { - return pagePartitioner(ImmutableList.copyOf(types)); + return pagePartitioner(ImmutableList.copyOf(types), outputBuffer); } - private PagePartitionerBuilder pagePartitioner(List types) + private PagePartitionerBuilder pagePartitioner(List types, TestOutputBuffer outputBuffer) { - return pagePartitioner().withTypes(types); + return pagePartitioner(outputBuffer).withTypes(types); } - private PagePartitionerBuilder pagePartitioner() + private PagePartitionerBuilder pagePartitioner(TestOutputBuffer outputBuffer) { return new PagePartitionerBuilder(executor, scheduledExecutor, outputBuffer); } diff --git a/core/trino-main/src/test/java/io/trino/operator/output/TestPartitionedOutputOperator.java b/core/trino-main/src/test/java/io/trino/operator/output/TestPartitionedOutputOperator.java index a8adb0ddbab23..9dd54666e942b 100644 --- a/core/trino-main/src/test/java/io/trino/operator/output/TestPartitionedOutputOperator.java +++ b/core/trino-main/src/test/java/io/trino/operator/output/TestPartitionedOutputOperator.java @@ -68,7 +68,10 @@ public void testOperatorContextStats() partitionedOutputOperator.addInput(page); OperatorContext operatorContext = partitionedOutputOperator.getOperatorContext(); - assertThat(operatorContext.getOutputDataSize().getTotalCount()).isEqualTo(page.getSizeInBytes()); + assertThat(operatorContext.getOutputDataSize().getTotalCount()).isEqualTo(0); assertThat(operatorContext.getOutputPositions().getTotalCount()).isEqualTo(page.getPositionCount()); + + partitionedOutputOperator.finish(); + assertThat(operatorContext.getOutputDataSize().getTotalCount()).isEqualTo(page.getSizeInBytes()); } } diff --git a/core/trino-main/src/test/java/io/trino/operator/output/TestPositionsAppender.java b/core/trino-main/src/test/java/io/trino/operator/output/TestPositionsAppender.java index a41bd26a53341..01ece57858d9a 100644 --- a/core/trino-main/src/test/java/io/trino/operator/output/TestPositionsAppender.java +++ b/core/trino-main/src/test/java/io/trino/operator/output/TestPositionsAppender.java @@ -40,12 +40,9 @@ import io.trino.spi.type.VarbinaryType; import io.trino.spi.type.VarcharType; import io.trino.type.BlockTypeOperators; -import io.trino.type.UnknownType; import it.unimi.dsi.fastutil.ints.IntArrayList; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; -import java.util.Arrays; import java.util.List; import java.util.Optional; import java.util.function.Function; @@ -83,72 +80,97 @@ public class TestPositionsAppender { private static final PositionsAppenderFactory POSITIONS_APPENDER_FACTORY = new PositionsAppenderFactory(new BlockTypeOperators()); - @Test(dataProvider = "types") - public void testMixedBlockTypes(TestType type) - { - List input = ImmutableList.of( - input(emptyBlock(type)), - input(nullBlock(type, 3), 0, 2), - input(notNullBlock(type, 3), 1, 2), - input(partiallyNullBlock(type, 4), 0, 1, 2, 3), - input(partiallyNullBlock(type, 4)), // empty position list - input(rleBlock(type, 4), 0, 2), - input(rleBlock(type, 2), 0, 1), // rle all positions - input(nullRleBlock(type, 4), 1, 2), - input(dictionaryBlock(type, 4, 2, 0), 0, 3), // dict not null - input(dictionaryBlock(type, 8, 4, 0.5F), 1, 3, 5), // dict mixed - input(dictionaryBlock(type, 8, 4, 1), 1, 3, 5), // dict null - input(rleBlock(dictionaryBlock(type, 1, 2, 0), 3), 2), // rle -> dict - input(rleBlock(dictionaryBlock(notNullBlock(type, 2), new int[] {1}), 3), 2), // rle -> dict with position 0 mapped to > 0 - input(rleBlock(dictionaryBlock(rleBlock(type, 4), 1), 3), 1), // rle -> dict -> rle - input(dictionaryBlock(dictionaryBlock(type, 5, 4, 0.5F), 3), 2), // dict -> dict - input(dictionaryBlock(dictionaryBlock(dictionaryBlock(type, 5, 4, 0.5F), 3), 3), 2), // dict -> dict -> dict - input(dictionaryBlock(rleBlock(type, 4), 3), 0, 2), // dict -> rle - input(notNullBlock(type, 4).getRegion(2, 2), 0, 1), // not null block with offset - input(partiallyNullBlock(type, 4).getRegion(2, 2), 0, 1), // nullable block with offset - input(rleBlock(notNullBlock(type, 4).getRegion(2, 1), 3), 1)); // rle block with offset - - testAppend(type, input); + @Test + public void testMixedBlockTypes() + { + for (TestType type : TestType.values()) { + List input = ImmutableList.of( + input(emptyBlock(type)), + input(nullBlock(type, 3), 0, 2), + input(notNullBlock(type, 3), 1, 2), + input(partiallyNullBlock(type, 4), 0, 1, 2, 3), + input(partiallyNullBlock(type, 4)), // empty position list + input(rleBlock(type, 4), 0, 2), + input(rleBlock(type, 2), 0, 1), // rle all positions + input(nullRleBlock(type, 4), 1, 2), + input(dictionaryBlock(type, 4, 2, 0), 0, 3), // dict not null + input(dictionaryBlock(type, 8, 4, 0.5F), 1, 3, 5), // dict mixed + input(dictionaryBlock(type, 8, 4, 1), 1, 3, 5), // dict null + input(rleBlock(dictionaryBlock(type, 1, 2, 0), 3), 2), // rle -> dict + input(rleBlock(dictionaryBlock(notNullBlock(type, 2), new int[] {1}), 3), 2), // rle -> dict with position 0 mapped to > 0 + input(rleBlock(dictionaryBlock(rleBlock(type, 4), 1), 3), 1), // rle -> dict -> rle + input(dictionaryBlock(dictionaryBlock(type, 5, 4, 0.5F), 3), 2), // dict -> dict + input(dictionaryBlock(dictionaryBlock(dictionaryBlock(type, 5, 4, 0.5F), 3), 3), 2), // dict -> dict -> dict + input(dictionaryBlock(rleBlock(type, 4), 3), 0, 2), // dict -> rle + input(notNullBlock(type, 4).getRegion(2, 2), 0, 1), // not null block with offset + input(partiallyNullBlock(type, 4).getRegion(2, 2), 0, 1), // nullable block with offset + input(rleBlock(notNullBlock(type, 4).getRegion(2, 1), 3), 1)); // rle block with offset + + testAppend(type, input); + } } - @Test(dataProvider = "types") - public void testNullRle(TestType type) + @Test + public void testNullRle() { - testNullRle(type.getType(), nullBlock(type, 2)); - testNullRle(type.getType(), nullRleBlock(type, 2)); - testNullRle(type.getType(), createRandomBlockForType(type, 4, 0.5f)); + for (TestType type : TestType.values()) { + testNullRle(type.getType(), nullBlock(type, 2)); + testNullRle(type.getType(), nullRleBlock(type, 2)); + testNullRle(type.getType(), createRandomBlockForType(type, 4, 0.5f)); + } } - @Test(dataProvider = "types") - public void testRleSwitchToFlat(TestType type) - { - List inputs = ImmutableList.of( - input(rleBlock(type, 3), 0, 1), - input(notNullBlock(type, 2), 0, 1)); - testAppend(type, inputs); + @Test + public void testRleSwitchToFlat() + { + for (TestType type : TestType.values()) { + List inputs = ImmutableList.of( + input(rleBlock(type, 3), 0, 1), + input(notNullBlock(type, 2), 0, 1)); + testAppend(type, inputs); + + List dictionaryInputs = ImmutableList.of( + input(rleBlock(type, 3), 0, 1), + input(dictionaryBlock(type, 2, 4, 0), 0, 1)); + testAppend(type, dictionaryInputs); + } + } - List dictionaryInputs = ImmutableList.of( - input(rleBlock(type, 3), 0, 1), - input(dictionaryBlock(type, 2, 4, 0), 0, 1)); - testAppend(type, dictionaryInputs); + @Test + public void testFlatAppendRle() + { + for (TestType type : TestType.values()) { + List inputs = ImmutableList.of( + input(notNullBlock(type, 2), 0, 1), + input(rleBlock(type, 3), 0, 1)); + testAppend(type, inputs); + + List dictionaryInputs = ImmutableList.of( + input(dictionaryBlock(type, 2, 4, 0), 0, 1), + input(rleBlock(type, 3), 0, 1)); + testAppend(type, dictionaryInputs); + } } - @Test(dataProvider = "types") - public void testFlatAppendRle(TestType type) + @Test + public void testMultipleRleBlocksWithDifferentValues() { - List inputs = ImmutableList.of( - input(notNullBlock(type, 2), 0, 1), - input(rleBlock(type, 3), 0, 1)); - testAppend(type, inputs); - - List dictionaryInputs = ImmutableList.of( - input(dictionaryBlock(type, 2, 4, 0), 0, 1), - input(rleBlock(type, 3), 0, 1)); - testAppend(type, dictionaryInputs); + testMultipleRleBlocksWithDifferentValues(TestType.BIGINT, createLongsBlock(0), createLongsBlock(1)); + testMultipleRleBlocksWithDifferentValues(TestType.BOOLEAN, createBooleansBlock(true), createBooleansBlock(false)); + testMultipleRleBlocksWithDifferentValues(TestType.INTEGER, createIntsBlock(0), createIntsBlock(1)); + testMultipleRleBlocksWithDifferentValues(TestType.CHAR_10, createStringsBlock("0"), createStringsBlock("1")); + testMultipleRleBlocksWithDifferentValues(TestType.VARCHAR, createStringsBlock("0"), createStringsBlock("1")); + testMultipleRleBlocksWithDifferentValues(TestType.DOUBLE, createDoublesBlock(0.0), createDoublesBlock(1.0)); + testMultipleRleBlocksWithDifferentValues(TestType.SMALLINT, createSmallintsBlock(0), createSmallintsBlock(1)); + testMultipleRleBlocksWithDifferentValues(TestType.TINYINT, createTinyintsBlock(0), createTinyintsBlock(1)); + testMultipleRleBlocksWithDifferentValues(TestType.VARBINARY, createSlicesBlock(Slices.allocate(Long.BYTES)), createSlicesBlock(Slices.allocate(Long.BYTES).getOutput().appendLong(1).slice())); + testMultipleRleBlocksWithDifferentValues(TestType.LONG_DECIMAL, createLongDecimalsBlock("0"), createLongDecimalsBlock("1")); + testMultipleRleBlocksWithDifferentValues(TestType.ARRAY_BIGINT, createArrayBigintBlock(ImmutableList.of(ImmutableList.of(0L))), createArrayBigintBlock(ImmutableList.of(ImmutableList.of(1L)))); + testMultipleRleBlocksWithDifferentValues(TestType.LONG_TIMESTAMP, createLongTimestampBlock(createTimestampType(9), new LongTimestamp(0, 0)), createLongTimestampBlock(createTimestampType(9), new LongTimestamp(1, 0))); + testMultipleRleBlocksWithDifferentValues(TestType.VARCHAR_WITH_TEST_BLOCK, adapt(createStringsBlock("0")), adapt(createStringsBlock("1"))); } - @Test(dataProvider = "differentValues") - public void testMultipleRleBlocksWithDifferentValues(TestType type, Block value1, Block value2) + private void testMultipleRleBlocksWithDifferentValues(TestType type, Block value1, Block value2) { List input = ImmutableList.of( input(rleBlock(value1, 3), 0, 1), @@ -156,44 +178,30 @@ public void testMultipleRleBlocksWithDifferentValues(TestType type, Block value1 testAppend(type, input); } - @DataProvider(name = "differentValues") - public static Object[][] differentValues() + @Test + public void testMultipleRleWithTheSameValueProduceRle() { - return new Object[][] - { - {TestType.BIGINT, createLongsBlock(0), createLongsBlock(1)}, - {TestType.BOOLEAN, createBooleansBlock(true), createBooleansBlock(false)}, - {TestType.INTEGER, createIntsBlock(0), createIntsBlock(1)}, - {TestType.CHAR_10, createStringsBlock("0"), createStringsBlock("1")}, - {TestType.VARCHAR, createStringsBlock("0"), createStringsBlock("1")}, - {TestType.DOUBLE, createDoublesBlock(0.0), createDoublesBlock(1.0)}, - {TestType.SMALLINT, createSmallintsBlock(0), createSmallintsBlock(1)}, - {TestType.TINYINT, createTinyintsBlock(0), createTinyintsBlock(1)}, - {TestType.VARBINARY, createSlicesBlock(Slices.allocate(Long.BYTES)), createSlicesBlock(Slices.allocate(Long.BYTES).getOutput().appendLong(1).slice())}, - {TestType.LONG_DECIMAL, createLongDecimalsBlock("0"), createLongDecimalsBlock("1")}, - {TestType.ARRAY_BIGINT, createArrayBigintBlock(ImmutableList.of(ImmutableList.of(0L))), createArrayBigintBlock(ImmutableList.of(ImmutableList.of(1L)))}, - {TestType.LONG_TIMESTAMP, createLongTimestampBlock(createTimestampType(9), new LongTimestamp(0, 0)), - createLongTimestampBlock(createTimestampType(9), new LongTimestamp(1, 0))}, - {TestType.VARCHAR_WITH_TEST_BLOCK, adapt(createStringsBlock("0")), adapt(createStringsBlock("1"))} - }; - } + for (TestType type : TestType.values()) { + UnnestingPositionsAppender positionsAppender = POSITIONS_APPENDER_FACTORY.create(type.getType(), 10, DEFAULT_MAX_PAGE_SIZE_IN_BYTES); - @Test(dataProvider = "types") - public void testMultipleRleWithTheSameValueProduceRle(TestType type) - { - UnnestingPositionsAppender positionsAppender = POSITIONS_APPENDER_FACTORY.create(type.getType(), 10, DEFAULT_MAX_PAGE_SIZE_IN_BYTES); + Block value = notNullBlock(type, 1); + positionsAppender.append(allPositions(3), rleBlock(value, 3)); + positionsAppender.append(allPositions(2), rleBlock(value, 2)); - Block value = notNullBlock(type, 1); - positionsAppender.append(allPositions(3), rleBlock(value, 3)); - positionsAppender.append(allPositions(2), rleBlock(value, 2)); + Block actual = positionsAppender.build(); + assertThat(actual.getPositionCount()).isEqualTo(5); + assertInstanceOf(actual, RunLengthEncodedBlock.class); + } + } - Block actual = positionsAppender.build(); - assertThat(actual.getPositionCount()).isEqualTo(5); - assertInstanceOf(actual, RunLengthEncodedBlock.class); + @Test + public void testRleAppendForComplexTypeWithNullElement() + { + testRleAppendForComplexTypeWithNullElement(TestType.ROW_BIGINT_VARCHAR, RowBlock.fromFieldBlocks(1, new Block[] {nullBlock(BIGINT, 1), nullBlock(VARCHAR, 1)})); + testRleAppendForComplexTypeWithNullElement(TestType.ARRAY_BIGINT, ArrayBlock.fromElementBlock(1, Optional.empty(), new int[] {0, 1}, nullBlock(BIGINT, 1))); } - @Test(dataProvider = "complexTypesWithNullElementBlock") - public void testRleAppendForComplexTypeWithNullElement(TestType type, Block value) + private void testRleAppendForComplexTypeWithNullElement(TestType type, Block value) { checkArgument(value.getPositionCount() == 1); UnnestingPositionsAppender positionsAppender = POSITIONS_APPENDER_FACTORY.create(type.getType(), 10, DEFAULT_MAX_PAGE_SIZE_IN_BYTES); @@ -207,31 +215,35 @@ public void testRleAppendForComplexTypeWithNullElement(TestType type, Block valu assertBlockEquals(type.getType(), actual, RunLengthEncodedBlock.create(value, 5)); } - @Test(dataProvider = "types") - public void testRleAppendedWithSinglePositionDoesNotProduceRle(TestType type) + @Test + public void testRleAppendedWithSinglePositionDoesNotProduceRle() { - UnnestingPositionsAppender positionsAppender = POSITIONS_APPENDER_FACTORY.create(type.getType(), 10, DEFAULT_MAX_PAGE_SIZE_IN_BYTES); + for (TestType type : TestType.values()) { + UnnestingPositionsAppender positionsAppender = POSITIONS_APPENDER_FACTORY.create(type.getType(), 10, DEFAULT_MAX_PAGE_SIZE_IN_BYTES); - Block value = notNullBlock(type, 1); - positionsAppender.append(allPositions(3), rleBlock(value, 3)); - positionsAppender.append(allPositions(2), rleBlock(value, 2)); - positionsAppender.append(0, rleBlock(value, 2)); + Block value = notNullBlock(type, 1); + positionsAppender.append(allPositions(3), rleBlock(value, 3)); + positionsAppender.append(allPositions(2), rleBlock(value, 2)); + positionsAppender.append(0, rleBlock(value, 2)); - Block actual = positionsAppender.build(); - assertThat(actual.getPositionCount()).isEqualTo(6); - assertThat(actual instanceof RunLengthEncodedBlock) - .describedAs(actual.getClass().getSimpleName()) - .isFalse(); + Block actual = positionsAppender.build(); + assertThat(actual.getPositionCount()).isEqualTo(6); + assertThat(actual instanceof RunLengthEncodedBlock) + .describedAs(actual.getClass().getSimpleName()) + .isFalse(); + } } - @Test(dataProvider = "types") - public static void testMultipleTheSameDictionariesProduceDictionary(TestType type) + @Test + public void testMultipleTheSameDictionariesProduceDictionary() { - UnnestingPositionsAppender positionsAppender = POSITIONS_APPENDER_FACTORY.create(type.getType(), 10, DEFAULT_MAX_PAGE_SIZE_IN_BYTES); + for (TestType type : TestType.values()) { + UnnestingPositionsAppender positionsAppender = POSITIONS_APPENDER_FACTORY.create(type.getType(), 10, DEFAULT_MAX_PAGE_SIZE_IN_BYTES); - testMultipleTheSameDictionariesProduceDictionary(type, positionsAppender); - // test if appender can accept different dictionary after a build - testMultipleTheSameDictionariesProduceDictionary(type, positionsAppender); + testMultipleTheSameDictionariesProduceDictionary(type, positionsAppender); + // test if appender can accept different dictionary after a build + testMultipleTheSameDictionariesProduceDictionary(type, positionsAppender); + } } private static void testMultipleTheSameDictionariesProduceDictionary(TestType type, UnnestingPositionsAppender positionsAppender) @@ -246,91 +258,98 @@ private static void testMultipleTheSameDictionariesProduceDictionary(TestType ty assertThat(((DictionaryBlock) actual).getDictionary()).isEqualTo(dictionary); } - @Test(dataProvider = "types") - public void testDictionarySwitchToFlat(TestType type) + @Test + public void testDictionarySwitchToFlat() { - List inputs = ImmutableList.of( - input(dictionaryBlock(type, 3, 4, 0), 0, 1), - input(notNullBlock(type, 2), 0, 1)); - testAppend(type, inputs); + for (TestType type : TestType.values()) { + List inputs = ImmutableList.of( + input(dictionaryBlock(type, 3, 4, 0), 0, 1), + input(notNullBlock(type, 2), 0, 1)); + testAppend(type, inputs); + } } - @Test(dataProvider = "types") - public void testFlatAppendDictionary(TestType type) + @Test + public void testFlatAppendDictionary() { - List inputs = ImmutableList.of( - input(notNullBlock(type, 2), 0, 1), - input(dictionaryBlock(type, 3, 4, 0), 0, 1)); - testAppend(type, inputs); + for (TestType type : TestType.values()) { + List inputs = ImmutableList.of( + input(notNullBlock(type, 2), 0, 1), + input(dictionaryBlock(type, 3, 4, 0), 0, 1)); + testAppend(type, inputs); + } } - @Test(dataProvider = "types") - public void testDictionaryAppendDifferentDictionary(TestType type) + @Test + public void testDictionaryAppendDifferentDictionary() { - List dictionaryInputs = ImmutableList.of( - input(dictionaryBlock(type, 3, 4, 0), 0, 1), - input(dictionaryBlock(type, 2, 4, 0), 0, 1)); - testAppend(type, dictionaryInputs); + for (TestType type : TestType.values()) { + List dictionaryInputs = ImmutableList.of( + input(dictionaryBlock(type, 3, 4, 0), 0, 1), + input(dictionaryBlock(type, 2, 4, 0), 0, 1)); + testAppend(type, dictionaryInputs); + } } - @Test(dataProvider = "types") - public void testDictionarySingleThenFlat(TestType type) + @Test + public void testDictionarySingleThenFlat() { - BlockView firstInput = input(dictionaryBlock(type, 1, 4, 0), 0); - BlockView secondInput = input(dictionaryBlock(type, 2, 4, 0), 0, 1); - UnnestingPositionsAppender positionsAppender = POSITIONS_APPENDER_FACTORY.create(type.getType(), 10, DEFAULT_MAX_PAGE_SIZE_IN_BYTES); - long initialRetainedSize = positionsAppender.getRetainedSizeInBytes(); + for (TestType type : TestType.values()) { + BlockView firstInput = input(dictionaryBlock(type, 1, 4, 0), 0); + BlockView secondInput = input(dictionaryBlock(type, 2, 4, 0), 0, 1); + UnnestingPositionsAppender positionsAppender = POSITIONS_APPENDER_FACTORY.create(type.getType(), 10, DEFAULT_MAX_PAGE_SIZE_IN_BYTES); + long initialRetainedSize = positionsAppender.getRetainedSizeInBytes(); - firstInput.positions().forEach((int position) -> positionsAppender.append(position, firstInput.block())); - positionsAppender.append(secondInput.positions(), secondInput.block()); + firstInput.positions().forEach((int position) -> positionsAppender.append(position, firstInput.block())); + positionsAppender.append(secondInput.positions(), secondInput.block()); - assertBuildResult(type, ImmutableList.of(firstInput, secondInput), positionsAppender, initialRetainedSize); + assertBuildResult(type, ImmutableList.of(firstInput, secondInput), positionsAppender, initialRetainedSize); + } } - @Test(dataProvider = "types") - public void testConsecutiveBuilds(TestType type) - { - UnnestingPositionsAppender positionsAppender = POSITIONS_APPENDER_FACTORY.create(type.getType(), 10, DEFAULT_MAX_PAGE_SIZE_IN_BYTES); + @Test + public void testConsecutiveBuilds() + { + for (TestType type : TestType.values()) { + UnnestingPositionsAppender positionsAppender = POSITIONS_APPENDER_FACTORY.create(type.getType(), 10, DEFAULT_MAX_PAGE_SIZE_IN_BYTES); + + // empty block + positionsAppender.append(positions(), emptyBlock(type)); + assertThat(positionsAppender.build().getPositionCount()).isEqualTo(0); + + Block block = createRandomBlockForType(type, 2, 0.5f); + // append only null position + int nullPosition = block.isNull(0) ? 0 : 1; + positionsAppender.append(positions(nullPosition), block); + Block actualNullBlock = positionsAppender.build(); + assertThat(actualNullBlock.getPositionCount()).isEqualTo(1); + assertThat(actualNullBlock.isNull(0)).isTrue(); + + // append null and not null position + positionsAppender.append(allPositions(2), block); + assertBlockEquals(type.getType(), positionsAppender.build(), block); + + // append not null rle + Block rleBlock = rleBlock(type, 10); + positionsAppender.append(allPositions(10), rleBlock); + assertBlockEquals(type.getType(), positionsAppender.build(), rleBlock); + + // append null rle + Block nullRleBlock = nullRleBlock(type, 10); + positionsAppender.append(allPositions(10), nullRleBlock); + assertBlockEquals(type.getType(), positionsAppender.build(), nullRleBlock); + + // append dictionary + Block dictionaryBlock = dictionaryBlock(type, 10, 5, 0); + positionsAppender.append(allPositions(10), dictionaryBlock); + assertBlockEquals(type.getType(), positionsAppender.build(), dictionaryBlock); + + // just build to confirm appender was reset + assertThat(positionsAppender.build().getPositionCount()).isEqualTo(0); + } + } - // empty block - positionsAppender.append(positions(), emptyBlock(type)); - assertThat(positionsAppender.build().getPositionCount()).isEqualTo(0); - - Block block = createRandomBlockForType(type, 2, 0.5f); - // append only null position - int nullPosition = block.isNull(0) ? 0 : 1; - positionsAppender.append(positions(nullPosition), block); - Block actualNullBlock = positionsAppender.build(); - assertThat(actualNullBlock.getPositionCount()).isEqualTo(1); - assertThat(actualNullBlock.isNull(0)).isTrue(); - - // append null and not null position - positionsAppender.append(allPositions(2), block); - assertBlockEquals(type.getType(), positionsAppender.build(), block); - - // append not null rle - Block rleBlock = rleBlock(type, 10); - positionsAppender.append(allPositions(10), rleBlock); - assertBlockEquals(type.getType(), positionsAppender.build(), rleBlock); - - // append null rle - Block nullRleBlock = nullRleBlock(type, 10); - positionsAppender.append(allPositions(10), nullRleBlock); - assertBlockEquals(type.getType(), positionsAppender.build(), nullRleBlock); - - // append dictionary - Block dictionaryBlock = dictionaryBlock(type, 10, 5, 0); - positionsAppender.append(allPositions(10), dictionaryBlock); - assertBlockEquals(type.getType(), positionsAppender.build(), dictionaryBlock); - - // just build to confirm appender was reset - assertThat(positionsAppender.build().getPositionCount()).isEqualTo(0); - } - - // testcase for jit bug described https://github.com/trinodb/trino/issues/12821. - // this test needs to be run first (hence the lowest priority) as the test order - // influences jit compilation, making this problem to not occur if other tests are run first. - @Test(priority = Integer.MIN_VALUE) + @Test public void testSliceRle() { UnnestingPositionsAppender positionsAppender = POSITIONS_APPENDER_FACTORY.create(VARCHAR, 10, DEFAULT_MAX_PAGE_SIZE_IN_BYTES); @@ -362,23 +381,6 @@ public void testRowWithNestedFields() assertBlockEquals(type, actual, rowBLock); } - @DataProvider(name = "complexTypesWithNullElementBlock") - public static Object[][] complexTypesWithNullElementBlock() - { - return new Object[][] { - {TestType.ROW_BIGINT_VARCHAR, RowBlock.fromFieldBlocks(1, new Block[] {nullBlock(BIGINT, 1), nullBlock(VARCHAR, 1)})}, - {TestType.ARRAY_BIGINT, ArrayBlock.fromElementBlock(1, Optional.empty(), new int[] {0, 1}, nullBlock(BIGINT, 1))}}; - } - - @DataProvider(name = "types") - public static Object[][] types() - { - return Arrays.stream(TestType.values()) - .filter(testType -> testType != TestType.UNKNOWN) - .map(type -> new Object[] {type}) - .toArray(Object[][]::new); - } - private static ValueBlock singleValueBlock(String value) { BlockBuilder blockBuilder = VARCHAR.createBlockBuilder(null, 1); @@ -577,8 +579,8 @@ private enum TestType LONG_TIMESTAMP(createTimestampType(9)), ROW_BIGINT_VARCHAR(anonymousRow(BigintType.BIGINT, VarcharType.VARCHAR)), ARRAY_BIGINT(new ArrayType(BigintType.BIGINT)), - VARCHAR_WITH_TEST_BLOCK(VarcharType.VARCHAR, adaptation()), - UNKNOWN(UnknownType.UNKNOWN); + VARCHAR_WITH_TEST_BLOCK(VarcharType.VARCHAR, adaptation()); +// UNKNOWN(UnknownType.UNKNOWN); private final Type type; private final Function blockAdaptation; diff --git a/core/trino-main/src/test/java/io/trino/operator/output/TestPositionsAppenderPageBuilder.java b/core/trino-main/src/test/java/io/trino/operator/output/TestPositionsAppenderPageBuilder.java index ca4cf5f5125fc..197e59f951a19 100644 --- a/core/trino-main/src/test/java/io/trino/operator/output/TestPositionsAppenderPageBuilder.java +++ b/core/trino-main/src/test/java/io/trino/operator/output/TestPositionsAppenderPageBuilder.java @@ -16,14 +16,20 @@ import io.airlift.slice.Slices; import io.trino.spi.Page; import io.trino.spi.block.Block; +import io.trino.spi.block.DictionaryBlock; import io.trino.spi.block.RunLengthEncodedBlock; +import io.trino.spi.block.ValueBlock; +import io.trino.spi.predicate.Utils; import io.trino.type.BlockTypeOperators; import it.unimi.dsi.fastutil.ints.IntArrayList; import org.junit.jupiter.api.Test; import java.util.List; +import java.util.Optional; +import static io.trino.block.BlockAssertions.createRandomBlockForType; import static io.trino.spi.type.VarcharType.VARCHAR; +import static java.lang.Math.toIntExact; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -44,7 +50,7 @@ public void testFullOnPositionCountLimit() Block rleBlock = RunLengthEncodedBlock.create(VARCHAR, Slices.utf8Slice("test"), 10); Page inputPage = new Page(rleBlock); - IntArrayList positions = IntArrayList.wrap(new int[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}); + IntArrayList positions = IntArrayList.wrap(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}); // Append 32760 positions, just less than MAX_POSITION_COUNT assertEquals(32768, PositionsAppenderPageBuilder.MAX_POSITION_COUNT, "expected MAX_POSITION_COUNT to be 32768"); for (int i = 0; i < 3276; i++) { @@ -79,7 +85,7 @@ public void testFullOnDirectSizeInBytes() Block rleBlock = RunLengthEncodedBlock.create(VARCHAR, Slices.utf8Slice("test"), 10); Page inputPage = new Page(rleBlock); - IntArrayList positions = IntArrayList.wrap(new int[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}); + IntArrayList positions = IntArrayList.wrap(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}); pageBuilder.appendToOutputPartition(inputPage, positions); // 10 positions inserted, size in bytes is still the same since we're in RLE mode but direct size is 10x sizeAccumulator = pageBuilder.computeAppenderSizes(); @@ -102,4 +108,59 @@ public void testFullOnDirectSizeInBytes() assertEquals(120, result.getPositionCount(), "result positions should be below the 8192 maximum"); assertTrue(result.getBlock(0) instanceof RunLengthEncodedBlock, "result block is RLE encoded"); } + + @Test + public void testFlushUsefulDictionariesOnRelease() + { + int maxPageBytes = 100; + int maxDirectSize = 1000; + PositionsAppenderPageBuilder pageBuilder = PositionsAppenderPageBuilder.withMaxPageSize( + maxPageBytes, + maxDirectSize, + List.of(VARCHAR), + new PositionsAppenderFactory(new BlockTypeOperators())); + + Block valueBlock = Utils.nativeValueToBlock(VARCHAR, Slices.utf8Slice("test")); + Block dictionaryBlock = DictionaryBlock.create(10, valueBlock, new int[10]); + Page inputPage = new Page(dictionaryBlock); + + pageBuilder.appendToOutputPartition(inputPage, IntArrayList.wrap(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9})); + // Dictionary mode appender should report the size of the ID's, but doesn't currently track + // the per-position size at all because it would be inefficient + assertEquals(Integer.BYTES * 10, pageBuilder.getSizeInBytes()); + assertFalse(pageBuilder.isFull()); + + Optional flushedPage = pageBuilder.flushOrFlattenBeforeRelease(); + assertTrue(flushedPage.isPresent(), "pageBuilder should force flush the dictionary"); + assertTrue(flushedPage.get().getBlock(0) instanceof DictionaryBlock, "result should be dictionary encoded"); + } + + @Test + public void testFlattenUnhelpfulDictionariesOnRelease() + { + // Create unhelpful dictionary wrapping + Block valueBlock = createRandomBlockForType(VARCHAR, 10, 0.25f); + Block dictionaryBlock = DictionaryBlock.create(10, valueBlock, new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}); + Page inputPage = new Page(dictionaryBlock); + + // Ensure the builder allows the entire value block to be inserted without being full + int maxPageBytes = toIntExact(valueBlock.getSizeInBytes() * 10); + int maxDirectSize = maxPageBytes * 10; + PositionsAppenderPageBuilder pageBuilder = PositionsAppenderPageBuilder.withMaxPageSize( + maxPageBytes, + maxDirectSize, + List.of(VARCHAR), + new PositionsAppenderFactory(new BlockTypeOperators())); + + pageBuilder.appendToOutputPartition(inputPage, IntArrayList.wrap(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9})); + assertEquals(Integer.BYTES * 10, pageBuilder.getSizeInBytes()); + assertFalse(pageBuilder.isFull()); + + assertEquals(Optional.empty(), pageBuilder.flushOrFlattenBeforeRelease(), "pageBuilder should not force a flush"); + assertFalse(pageBuilder.isFull()); + assertEquals(valueBlock.getSizeInBytes(), pageBuilder.getSizeInBytes(), "pageBuilder should have transitioned to direct mode"); + + Page result = pageBuilder.build(); + assertTrue(result.getBlock(0) instanceof ValueBlock, "result should not be a dictionary block"); + } } diff --git a/core/trino-main/src/test/java/io/trino/operator/output/TestSkewedPartitionRebalancer.java b/core/trino-main/src/test/java/io/trino/operator/output/TestSkewedPartitionRebalancer.java index 5a61bf221287e..548f25b6da200 100644 --- a/core/trino-main/src/test/java/io/trino/operator/output/TestSkewedPartitionRebalancer.java +++ b/core/trino-main/src/test/java/io/trino/operator/output/TestSkewedPartitionRebalancer.java @@ -32,7 +32,6 @@ class TestSkewedPartitionRebalancer { private static final long MIN_PARTITION_DATA_PROCESSED_REBALANCE_THRESHOLD = DataSize.of(1, MEGABYTE).toBytes(); private static final long MIN_DATA_PROCESSED_REBALANCE_THRESHOLD = DataSize.of(50, MEGABYTE).toBytes(); - private static final int MAX_REBALANCED_PARTITIONS = 30; @Test void testRebalanceWithSkewness() @@ -43,8 +42,7 @@ void testRebalanceWithSkewness() 3, 3, MIN_PARTITION_DATA_PROCESSED_REBALANCE_THRESHOLD, - MIN_DATA_PROCESSED_REBALANCE_THRESHOLD, - MAX_REBALANCED_PARTITIONS); + MIN_DATA_PROCESSED_REBALANCE_THRESHOLD); SkewedPartitionFunction function = new SkewedPartitionFunction(new TestPartitionFunction(partitionCount), rebalancer); rebalancer.addPartitionRowCount(0, 1000); @@ -104,8 +102,7 @@ void testRebalanceWithoutSkewness() 3, 2, MIN_PARTITION_DATA_PROCESSED_REBALANCE_THRESHOLD, - MIN_DATA_PROCESSED_REBALANCE_THRESHOLD, - MAX_REBALANCED_PARTITIONS); + MIN_DATA_PROCESSED_REBALANCE_THRESHOLD); SkewedPartitionFunction function = new SkewedPartitionFunction(new TestPartitionFunction(partitionCount), rebalancer); rebalancer.addPartitionRowCount(0, 1000); @@ -136,8 +133,7 @@ void testNoRebalanceWhenDataWrittenIsLessThanTheRebalanceLimit() 3, 3, MIN_PARTITION_DATA_PROCESSED_REBALANCE_THRESHOLD, - MIN_DATA_PROCESSED_REBALANCE_THRESHOLD, - MAX_REBALANCED_PARTITIONS); + MIN_DATA_PROCESSED_REBALANCE_THRESHOLD); SkewedPartitionFunction function = new SkewedPartitionFunction(new TestPartitionFunction(partitionCount), rebalancer); rebalancer.addPartitionRowCount(0, 1000); @@ -166,8 +162,7 @@ void testNoRebalanceWhenDataWrittenByThePartitionIsLessThanWriterScalingMinDataP 3, 3, minPartitionDataProcessedRebalanceThreshold, - MIN_DATA_PROCESSED_REBALANCE_THRESHOLD, - MAX_REBALANCED_PARTITIONS); + MIN_DATA_PROCESSED_REBALANCE_THRESHOLD); SkewedPartitionFunction function = new SkewedPartitionFunction(new TestPartitionFunction(partitionCount), rebalancer); rebalancer.addPartitionRowCount(0, 1000); @@ -195,8 +190,7 @@ void testRebalancePartitionToSingleTaskInARebalancingLoop() 3, 3, MIN_PARTITION_DATA_PROCESSED_REBALANCE_THRESHOLD, - MIN_DATA_PROCESSED_REBALANCE_THRESHOLD, - MAX_REBALANCED_PARTITIONS); + MIN_DATA_PROCESSED_REBALANCE_THRESHOLD); SkewedPartitionFunction function = new SkewedPartitionFunction(new TestPartitionFunction(partitionCount), rebalancer); rebalancer.addPartitionRowCount(0, 1000); @@ -240,8 +234,7 @@ public void testConsiderSkewedPartitionOnlyWithinACycle() 3, 1, MIN_PARTITION_DATA_PROCESSED_REBALANCE_THRESHOLD, - MIN_DATA_PROCESSED_REBALANCE_THRESHOLD, - MAX_REBALANCED_PARTITIONS); + MIN_DATA_PROCESSED_REBALANCE_THRESHOLD); SkewedPartitionFunction function = new SkewedPartitionFunction( new TestPartitionFunction(partitionCount), rebalancer); @@ -280,70 +273,6 @@ public void testConsiderSkewedPartitionOnlyWithinACycle() .containsExactly(ImmutableList.of(0, 2), ImmutableList.of(1), ImmutableList.of(2, 0)); } - @Test - public void testRebalancePartitionWithMaxRebalancedPartitionsPerTask() - { - int partitionCount = 3; - SkewedPartitionRebalancer rebalancer = new SkewedPartitionRebalancer( - partitionCount, - 3, - 3, - MIN_PARTITION_DATA_PROCESSED_REBALANCE_THRESHOLD, - MIN_DATA_PROCESSED_REBALANCE_THRESHOLD, - 2); - SkewedPartitionFunction function = new SkewedPartitionFunction( - new TestPartitionFunction(partitionCount), - rebalancer); - - rebalancer.addPartitionRowCount(0, 1000); - rebalancer.addPartitionRowCount(1, 1000); - rebalancer.addPartitionRowCount(2, 1000); - rebalancer.addDataProcessed(DataSize.of(40, MEGABYTE).toBytes()); - - // rebalancing will only happen to single task even though two tasks are available - rebalancer.rebalance(); - - assertThat(getPartitionPositions(function, 17)) - .containsExactly( - new IntArrayList(ImmutableList.of(0, 3, 6, 9, 12, 15)), - new IntArrayList(ImmutableList.of(1, 4, 7, 10, 13, 16)), - new IntArrayList(ImmutableList.of(2, 5, 8, 11, 14))); - assertThat(rebalancer.getPartitionAssignments()) - .containsExactly(ImmutableList.of(0), ImmutableList.of(1), ImmutableList.of(2)); - - rebalancer.addPartitionRowCount(0, 1000); - rebalancer.addPartitionRowCount(1, 1000); - rebalancer.addPartitionRowCount(2, 1000); - rebalancer.addDataProcessed(DataSize.of(20, MEGABYTE).toBytes()); - // Rebalancing will happen since we crossed the data processed limit. - // Part0 -> Task1 (Bucket1), Part1 -> Task0 (Bucket1) - rebalancer.rebalance(); - - assertThat(getPartitionPositions(function, 17)) - .containsExactly( - new IntArrayList(ImmutableList.of(0, 4, 6, 10, 12, 16)), - new IntArrayList(ImmutableList.of(1, 3, 7, 9, 13, 15)), - new IntArrayList(ImmutableList.of(2, 5, 8, 11, 14))); - assertThat(rebalancer.getPartitionAssignments()) - .containsExactly(ImmutableList.of(0, 1), ImmutableList.of(1, 0), ImmutableList.of(2)); - - rebalancer.addPartitionRowCount(0, 1000); - rebalancer.addPartitionRowCount(1, 1000); - rebalancer.addPartitionRowCount(2, 1000); - rebalancer.addDataProcessed(DataSize.of(200, MEGABYTE).toBytes()); - - // No rebalancing will happen since we crossed the max rebalanced partitions limit. - rebalancer.rebalance(); - - assertThat(getPartitionPositions(function, 17)) - .containsExactly( - new IntArrayList(ImmutableList.of(0, 4, 6, 10, 12, 16)), - new IntArrayList(ImmutableList.of(1, 3, 7, 9, 13, 15)), - new IntArrayList(ImmutableList.of(2, 5, 8, 11, 14))); - assertThat(rebalancer.getPartitionAssignments()) - .containsExactly(ImmutableList.of(0, 1), ImmutableList.of(1, 0), ImmutableList.of(2)); - } - private static List> getPartitionPositions(PartitionFunction function, int maxPosition) { List> partitionPositions = new ArrayList<>(); diff --git a/core/trino-main/src/test/java/io/trino/operator/project/TestDictionaryAwarePageProjection.java b/core/trino-main/src/test/java/io/trino/operator/project/TestDictionaryAwarePageProjection.java index bdcc3daca7a7a..069feb0b1169e 100644 --- a/core/trino-main/src/test/java/io/trino/operator/project/TestDictionaryAwarePageProjection.java +++ b/core/trino-main/src/test/java/io/trino/operator/project/TestDictionaryAwarePageProjection.java @@ -23,11 +23,13 @@ import io.trino.spi.block.LazyBlock; import io.trino.spi.block.LongArrayBlock; import io.trino.spi.block.RunLengthEncodedBlock; +import io.trino.spi.block.ValueBlock; import io.trino.spi.connector.ConnectorSession; import io.trino.spi.type.Type; -import org.testng.annotations.AfterClass; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.parallel.Execution; import java.util.Arrays; import java.util.concurrent.ScheduledExecutorService; @@ -44,21 +46,16 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.assertj.core.api.Fail.fail; +import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; +import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; +@TestInstance(PER_CLASS) +@Execution(CONCURRENT) public class TestDictionaryAwarePageProjection { private static final ScheduledExecutorService executor = newSingleThreadScheduledExecutor(daemonThreadsNamed("TestDictionaryAwarePageProjection-%s")); - @DataProvider(name = "forceYield") - public static Object[][] forceYieldAndProduceLazyBlock() - { - return new Object[][] { - {true, false}, - {false, true}, - {false, false}}; - } - - @AfterClass(alwaysRun = true) + @AfterAll public void tearDown() { executor.shutdownNow(); @@ -73,54 +70,66 @@ public void testDelegateMethods() assertThat(projection.getType()).isEqualTo(BIGINT); } - @Test(dataProvider = "forceYield") - public void testSimpleBlock(boolean forceYield, boolean produceLazyBlock) + @Test + public void testSimpleBlock() { - Block block = createLongSequenceBlock(0, 100); - testProject(block, block.getClass(), forceYield, produceLazyBlock); + ValueBlock block = createLongSequenceBlock(0, 100); + testProject(block, block.getClass(), true, false); + testProject(block, block.getClass(), false, true); + testProject(block, block.getClass(), false, false); } - @Test(dataProvider = "forceYield") - public void testRleBlock(boolean forceYield, boolean produceLazyBlock) + @Test + public void testRleBlock() { Block value = createLongSequenceBlock(42, 43); RunLengthEncodedBlock block = (RunLengthEncodedBlock) RunLengthEncodedBlock.create(value, 100); - testProject(block, RunLengthEncodedBlock.class, forceYield, produceLazyBlock); + testProject(block, RunLengthEncodedBlock.class, true, false); + testProject(block, RunLengthEncodedBlock.class, false, true); + testProject(block, RunLengthEncodedBlock.class, false, false); } - @Test(dataProvider = "forceYield") - public void testRleBlockWithFailure(boolean forceYield, boolean produceLazyBlock) + @Test + public void testRleBlockWithFailure() { Block value = createLongSequenceBlock(-43, -42); RunLengthEncodedBlock block = (RunLengthEncodedBlock) RunLengthEncodedBlock.create(value, 100); - testProjectFails(block, RunLengthEncodedBlock.class, forceYield, produceLazyBlock); + testProjectFails(block, RunLengthEncodedBlock.class, true, false); + testProjectFails(block, RunLengthEncodedBlock.class, false, true); + testProjectFails(block, RunLengthEncodedBlock.class, false, false); } - @Test(dataProvider = "forceYield") - public void testDictionaryBlock(boolean forceYield, boolean produceLazyBlock) + @Test + public void testDictionaryBlock() { Block block = createDictionaryBlock(10, 100); - testProject(block, DictionaryBlock.class, forceYield, produceLazyBlock); + testProject(block, DictionaryBlock.class, true, false); + testProject(block, DictionaryBlock.class, false, true); + testProject(block, DictionaryBlock.class, false, false); } - @Test(dataProvider = "forceYield") - public void testDictionaryBlockWithFailure(boolean forceYield, boolean produceLazyBlock) + @Test + public void testDictionaryBlockWithFailure() { Block block = createDictionaryBlockWithFailure(10, 100); - testProjectFails(block, DictionaryBlock.class, forceYield, produceLazyBlock); + testProjectFails(block, DictionaryBlock.class, true, false); + testProjectFails(block, DictionaryBlock.class, false, true); + testProjectFails(block, DictionaryBlock.class, false, false); } - @Test(dataProvider = "forceYield") - public void testDictionaryBlockProcessingWithUnusedFailure(boolean forceYield, boolean produceLazyBlock) + @Test + public void testDictionaryBlockProcessingWithUnusedFailure() { Block block = createDictionaryBlockWithUnusedEntries(10, 100); // failures in the dictionary processing will cause a fallback to normal columnar processing - testProject(block, LongArrayBlock.class, forceYield, produceLazyBlock); + testProject(block, LongArrayBlock.class, true, false); + testProject(block, LongArrayBlock.class, false, true); + testProject(block, LongArrayBlock.class, false, false); } @Test @@ -136,8 +145,15 @@ public void testDictionaryProcessingIgnoreYield() testProjectFastReturnIgnoreYield(block, projection, false); } - @Test(dataProvider = "forceYield") - public void testDictionaryProcessingEnableDisable(boolean forceYield, boolean produceLazyBlock) + @Test + public void testDictionaryProcessingEnableDisable() + { + testDictionaryProcessingEnableDisable(true, false); + testDictionaryProcessingEnableDisable(false, true); + testDictionaryProcessingEnableDisable(false, false); + } + + private void testDictionaryProcessingEnableDisable(boolean forceYield, boolean produceLazyBlock) { DictionaryAwarePageProjection projection = createProjection(produceLazyBlock); diff --git a/core/trino-main/src/test/java/io/trino/operator/scalar/TestDateTimeFunctions.java b/core/trino-main/src/test/java/io/trino/operator/scalar/TestDateTimeFunctions.java index 0e7fd5af99718..9af8140ed98e5 100644 --- a/core/trino-main/src/test/java/io/trino/operator/scalar/TestDateTimeFunctions.java +++ b/core/trino-main/src/test/java/io/trino/operator/scalar/TestDateTimeFunctions.java @@ -130,6 +130,10 @@ public void testFromUnixTime() assertThat(assertions.function("from_unixtime", "980172245.888")) .matches("TIMESTAMP '2001-01-22 03:04:05.888 Pacific/Apia'"); + + assertTrinoExceptionThrownBy(assertions.function("from_unixtime", "123456789123456789")::evaluate) + .hasErrorCode(INVALID_FUNCTION_ARGUMENT) + .hasMessage("Millis overflow: 9223372036854775807"); } @Test @@ -197,6 +201,10 @@ public void testFromUnixTimeNanos() assertThat(assertions.function("from_unixtime_nanos", "DECIMAL '-12345678900123456789.500'")) .matches("TIMESTAMP '1578-10-13 17:18:03.876543210 Pacific/Apia'"); + + assertTrinoExceptionThrownBy(assertions.function("from_unixtime_nanos", "DECIMAL '123456789123456789000000000'")::evaluate) + .hasErrorCode(INVALID_FUNCTION_ARGUMENT) + .hasMessage("long overflow"); } @Test @@ -214,6 +222,11 @@ public void testFromUnixTimeWithOffset() assertTrinoExceptionThrownBy(assertions.function("from_unixtime", "0", "-100", "100")::evaluate) .hasErrorCode(INVALID_FUNCTION_ARGUMENT); + + // test millisecond overflow + assertTrinoExceptionThrownBy(assertions.function("from_unixtime", "123456789123456789", "1", "1")::evaluate) + .hasErrorCode(INVALID_FUNCTION_ARGUMENT) + .hasMessage("Millis overflow: 9223372036854775807"); } @Test @@ -236,6 +249,10 @@ public void testFromUnixTimeWithTimeZone() assertThat(assertions.function("from_unixtime", "7200", "'America/Los_Angeles'")) .matches("TIMESTAMP '1969-12-31 18:00:00.000 America/Los_Angeles'"); + + assertTrinoExceptionThrownBy(assertions.function("from_unixtime", "123456789123456789", "'Asia/Kolkata'")::evaluate) + .hasErrorCode(INVALID_FUNCTION_ARGUMENT) + .hasMessage("Millis overflow: 9223372036854775807"); } @Test @@ -262,6 +279,10 @@ public void testFromISO8601() assertThat(assertions.function("from_iso8601_date", "'2001-08-22'")) .matches("DATE '2001-08-22'"); + + assertTrinoExceptionThrownBy(assertions.function("from_iso8601_timestamp", "'115023-03-21T10:45:30.00Z'")::evaluate) + .hasErrorCode(INVALID_FUNCTION_ARGUMENT) + .hasMessage("Millis overflow: 3567614928330000"); } @Test diff --git a/core/trino-main/src/test/java/io/trino/operator/scalar/TestScalarValidation.java b/core/trino-main/src/test/java/io/trino/operator/scalar/TestScalarValidation.java index 3e4927d963877..e73030b32fa14 100644 --- a/core/trino-main/src/test/java/io/trino/operator/scalar/TestScalarValidation.java +++ b/core/trino-main/src/test/java/io/trino/operator/scalar/TestScalarValidation.java @@ -24,15 +24,19 @@ import io.trino.spi.type.StandardTypes; import io.trino.spi.type.Type; import jakarta.annotation.Nullable; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThatThrownBy; @SuppressWarnings("UtilityClassWithoutPrivateConstructor") public class TestScalarValidation { - @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = "Parametric class method .* is annotated with @ScalarFunction") + @Test public void testBogusParametricMethodAnnotation() { - extractParametricScalar(BogusParametricMethodAnnotation.class); + assertThatThrownBy(() -> extractParametricScalar(BogusParametricMethodAnnotation.class)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Parametric class method .* is annotated with @ScalarFunction"); } @ScalarFunction @@ -42,20 +46,24 @@ public static final class BogusParametricMethodAnnotation public static void bad() {} } - @Test(expectedExceptions = TrinoException.class, expectedExceptionsMessageRegExp = "Parametric class .* does not have any annotated methods") + @Test public void testNoParametricMethods() { - extractParametricScalar(NoParametricMethods.class); + assertThatThrownBy(() -> extractParametricScalar(NoParametricMethods.class)) + .isInstanceOf(TrinoException.class) + .hasMessageMatching("Parametric class .* does not have any annotated methods"); } @SuppressWarnings("EmptyClass") @ScalarFunction public static final class NoParametricMethods {} - @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = "Method .* is missing @SqlType annotation") + @Test public void testMethodMissingReturnAnnotation() { - extractScalars(MethodMissingReturnAnnotation.class); + assertThatThrownBy(() -> extractScalars(MethodMissingReturnAnnotation.class)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Method .* is missing @SqlType annotation"); } public static final class MethodMissingReturnAnnotation @@ -64,10 +72,12 @@ public static final class MethodMissingReturnAnnotation public static void bad() {} } - @Test(expectedExceptions = TrinoException.class, expectedExceptionsMessageRegExp = "Method .* annotated with @SqlType is missing @ScalarFunction or @ScalarOperator") + @Test public void testMethodMissingScalarAnnotation() { - extractScalars(MethodMissingScalarAnnotation.class); + assertThatThrownBy(() -> extractScalars(MethodMissingScalarAnnotation.class)) + .isInstanceOf(TrinoException.class) + .hasMessageMatching("Method .* annotated with @SqlType is missing @ScalarFunction or @ScalarOperator"); } public static final class MethodMissingScalarAnnotation @@ -77,10 +87,12 @@ public static final class MethodMissingScalarAnnotation public static void bad() {} } - @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = "Method .* has wrapper return type Long but is missing @SqlNullable") + @Test public void testPrimitiveWrapperReturnWithoutNullable() { - extractScalars(PrimitiveWrapperReturnWithoutNullable.class); + assertThatThrownBy(() -> extractScalars(PrimitiveWrapperReturnWithoutNullable.class)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Method .* has wrapper return type Long but is missing @SqlNullable"); } public static final class PrimitiveWrapperReturnWithoutNullable @@ -93,10 +105,12 @@ public static Long bad() } } - @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = "Method .* annotated with @SqlNullable has primitive return type long") + @Test public void testPrimitiveReturnWithNullable() { - extractScalars(PrimitiveReturnWithNullable.class); + assertThatThrownBy(() -> extractScalars(PrimitiveReturnWithNullable.class)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Method .* annotated with @SqlNullable has primitive return type long"); } public static final class PrimitiveReturnWithNullable @@ -110,10 +124,12 @@ public static long bad() } } - @Test(expectedExceptions = TrinoException.class, expectedExceptionsMessageRegExp = "A parameter with USE_NULL_FLAG or RETURN_NULL_ON_NULL convention must not use wrapper type. Found in method .*") + @Test public void testPrimitiveWrapperParameterWithoutNullable() { - extractScalars(PrimitiveWrapperParameterWithoutNullable.class); + assertThatThrownBy(() -> extractScalars(PrimitiveWrapperParameterWithoutNullable.class)) + .isInstanceOf(TrinoException.class) + .hasMessageMatching("A parameter with USE_NULL_FLAG or RETURN_NULL_ON_NULL convention must not use wrapper type. Found in method .*"); } public static final class PrimitiveWrapperParameterWithoutNullable @@ -126,10 +142,12 @@ public static long bad(@SqlType(StandardTypes.BOOLEAN) Boolean boxed) } } - @Test(expectedExceptions = TrinoException.class, expectedExceptionsMessageRegExp = "Method .* has parameter with primitive type double annotated with @SqlNullable") + @Test public void testPrimitiveParameterWithNullable() { - extractScalars(PrimitiveParameterWithNullable.class); + assertThatThrownBy(() -> extractScalars(PrimitiveParameterWithNullable.class)) + .isInstanceOf(TrinoException.class) + .hasMessageMatching("Method .* has parameter with primitive type double annotated with @SqlNullable"); } public static final class PrimitiveParameterWithNullable @@ -142,10 +160,12 @@ public static long bad(@SqlNullable @SqlType(StandardTypes.DOUBLE) double primit } } - @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = "Method .* is missing @SqlType annotation for parameter") + @Test public void testParameterWithoutType() { - extractScalars(ParameterWithoutType.class); + assertThatThrownBy(() -> extractScalars(ParameterWithoutType.class)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Method .* is missing @SqlType annotation for parameter"); } public static final class ParameterWithoutType @@ -158,10 +178,12 @@ public static long bad(long missing) } } - @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = "Method .* annotated with @ScalarFunction must be public") + @Test public void testNonPublicAnnnotatedMethod() { - extractScalars(NonPublicAnnnotatedMethod.class); + assertThatThrownBy(() -> extractScalars(NonPublicAnnnotatedMethod.class)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Method .* annotated with @ScalarFunction must be public"); } public static final class NonPublicAnnnotatedMethod @@ -174,10 +196,12 @@ private static long bad() } } - @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = "Method .* is annotated with @Nullable but not @SqlNullable") + @Test public void testMethodWithLegacyNullable() { - extractScalars(MethodWithLegacyNullable.class); + assertThatThrownBy(() -> extractScalars(MethodWithLegacyNullable.class)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Method .* is annotated with @Nullable but not @SqlNullable"); } public static final class MethodWithLegacyNullable @@ -191,10 +215,12 @@ public static Long bad() } } - @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = "Method .* has @IsNull parameter that does not follow a @SqlType parameter") + @Test public void testParameterWithConnectorAndIsNull() { - extractScalars(ParameterWithConnectorAndIsNull.class); + assertThatThrownBy(() -> extractScalars(ParameterWithConnectorAndIsNull.class)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Method .* has @IsNull parameter that does not follow a @SqlType parameter"); } public static final class ParameterWithConnectorAndIsNull @@ -207,10 +233,12 @@ public static long bad(ConnectorSession session, @IsNull boolean isNull) } } - @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = "Method .* has @IsNull parameter that does not follow a @SqlType parameter") + @Test public void testParameterWithOnlyIsNull() { - extractScalars(ParameterWithOnlyIsNull.class); + assertThatThrownBy(() -> extractScalars(ParameterWithOnlyIsNull.class)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Method .* has @IsNull parameter that does not follow a @SqlType parameter"); } public static final class ParameterWithOnlyIsNull @@ -223,10 +251,12 @@ public static long bad(@IsNull boolean isNull) } } - @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = "Method .* has non-boolean parameter with @IsNull") + @Test public void testParameterWithNonBooleanIsNull() { - extractScalars(ParameterWithNonBooleanIsNull.class); + assertThatThrownBy(() -> extractScalars(ParameterWithNonBooleanIsNull.class)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Method .* has non-boolean parameter with @IsNull"); } public static final class ParameterWithNonBooleanIsNull @@ -239,10 +269,12 @@ public static long bad(@SqlType(StandardTypes.BIGINT) long value, @IsNull int is } } - @Test(expectedExceptions = TrinoException.class, expectedExceptionsMessageRegExp = "A parameter with USE_NULL_FLAG or RETURN_NULL_ON_NULL convention must not use wrapper type. Found in method .*") + @Test public void testParameterWithBoxedPrimitiveIsNull() { - extractScalars(ParameterWithBoxedPrimitiveIsNull.class); + assertThatThrownBy(() -> extractScalars(ParameterWithBoxedPrimitiveIsNull.class)) + .isInstanceOf(TrinoException.class) + .hasMessageMatching("A parameter with USE_NULL_FLAG or RETURN_NULL_ON_NULL convention must not use wrapper type. Found in method .*"); } public static final class ParameterWithBoxedPrimitiveIsNull @@ -255,10 +287,12 @@ public static long bad(@SqlType(StandardTypes.BIGINT) Long value, @IsNull boolea } } - @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = "Method .* has @IsNull parameter that has other annotations") + @Test public void testParameterWithOtherAnnotationsWithIsNull() { - extractScalars(ParameterWithOtherAnnotationsWithIsNull.class); + assertThatThrownBy(() -> extractScalars(ParameterWithOtherAnnotationsWithIsNull.class)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Method .* has @IsNull parameter that has other annotations"); } public static final class ParameterWithOtherAnnotationsWithIsNull @@ -271,10 +305,12 @@ public static long bad(@SqlType(StandardTypes.BIGINT) long value, @IsNull @SqlNu } } - @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = "Expected type parameter to only contain A-Z and 0-9 \\(starting with A-Z\\), but got bad on method .*") + @Test public void testNonUpperCaseTypeParameters() { - extractScalars(TypeParameterWithNonUpperCaseAnnotation.class); + assertThatThrownBy(() -> extractScalars(TypeParameterWithNonUpperCaseAnnotation.class)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Expected type parameter to only contain A-Z and 0-9 \\(starting with A-Z\\), but got bad on method .*"); } public static final class TypeParameterWithNonUpperCaseAnnotation @@ -288,10 +324,12 @@ public static long bad(@TypeParameter("array(bad)") Type type, @SqlType(Standard } } - @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = "Expected type parameter to only contain A-Z and 0-9 \\(starting with A-Z\\), but got 1E on method .*") + @Test public void testLeadingNumericTypeParameters() { - extractScalars(TypeParameterWithLeadingNumbers.class); + assertThatThrownBy(() -> extractScalars(TypeParameterWithLeadingNumbers.class)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Expected type parameter to only contain A-Z and 0-9 \\(starting with A-Z\\), but got 1E on method .*"); } public static final class TypeParameterWithLeadingNumbers @@ -305,10 +343,12 @@ public static long bad(@TypeParameter("array(1E)") Type type, @SqlType(StandardT } } - @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = "Expected type parameter not to take parameters, but got 'e' on method .*") + @Test public void testNonPrimitiveTypeParameters() { - extractScalars(TypeParameterWithNonPrimitiveAnnotation.class); + assertThatThrownBy(() -> extractScalars(TypeParameterWithNonPrimitiveAnnotation.class)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Expected type parameter not to take parameters, but got 'e' on method .*"); } public static final class TypeParameterWithNonPrimitiveAnnotation @@ -357,10 +397,12 @@ public void testValidTypeParametersForConstructors() extractParametricScalar(ConstructorWithValidTypeParameters.class); } - @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = "Expected type parameter not to take parameters, but got 'k' on method .*") + @Test public void testInvalidTypeParametersForConstructors() { - extractParametricScalar(ConstructorWithInvalidTypeParameters.class); + assertThatThrownBy(() -> extractParametricScalar(ConstructorWithInvalidTypeParameters.class)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Expected type parameter not to take parameters, but got 'k' on method .*"); } private static void extractParametricScalar(Class clazz) diff --git a/core/trino-main/src/test/java/io/trino/security/TestAccessControlManager.java b/core/trino-main/src/test/java/io/trino/security/TestAccessControlManager.java index 7545f5d5227e5..961353ef7d3fc 100644 --- a/core/trino-main/src/test/java/io/trino/security/TestAccessControlManager.java +++ b/core/trino-main/src/test/java/io/trino/security/TestAccessControlManager.java @@ -67,8 +67,8 @@ import static io.trino.spi.security.AccessDeniedException.denySelectTable; import static io.trino.testing.TestingEventListenerManager.emptyEventListenerManager; import static io.trino.testing.TestingHandles.TEST_CATALOG_NAME; +import static io.trino.testing.TransactionBuilder.transaction; import static io.trino.transaction.InMemoryTransactionManager.createTestTransactionManager; -import static io.trino.transaction.TransactionBuilder.transaction; import static java.nio.file.Files.createTempFile; import static java.nio.file.StandardOpenOption.CREATE; import static java.nio.file.StandardOpenOption.TRUNCATE_EXISTING; diff --git a/core/trino-main/src/test/java/io/trino/security/TestFileBasedSystemAccessControl.java b/core/trino-main/src/test/java/io/trino/security/TestFileBasedSystemAccessControl.java index 010c930212bfc..2cb2f6533d9d5 100644 --- a/core/trino-main/src/test/java/io/trino/security/TestFileBasedSystemAccessControl.java +++ b/core/trino-main/src/test/java/io/trino/security/TestFileBasedSystemAccessControl.java @@ -50,8 +50,8 @@ import static io.trino.spi.security.PrincipalType.USER; import static io.trino.spi.security.Privilege.SELECT; import static io.trino.testing.TestingEventListenerManager.emptyEventListenerManager; +import static io.trino.testing.TransactionBuilder.transaction; import static io.trino.transaction.InMemoryTransactionManager.createTestTransactionManager; -import static io.trino.transaction.TransactionBuilder.transaction; import static java.lang.Thread.sleep; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; diff --git a/core/trino-main/src/test/java/io/trino/server/TestHttpRequestSessionContextFactory.java b/core/trino-main/src/test/java/io/trino/server/TestHttpRequestSessionContextFactory.java index 6e62951548188..8407e30b8b23c 100644 --- a/core/trino-main/src/test/java/io/trino/server/TestHttpRequestSessionContextFactory.java +++ b/core/trino-main/src/test/java/io/trino/server/TestHttpRequestSessionContextFactory.java @@ -40,12 +40,6 @@ public class TestHttpRequestSessionContextFactory { - private static final HttpRequestSessionContextFactory SESSION_CONTEXT_FACTORY = new HttpRequestSessionContextFactory( - new PreparedStatementEncoder(new ProtocolConfig()), - createTestMetadataManager(), - ImmutableSet::of, - new AllowAllAccessControl()); - @Test public void testSessionContext() { @@ -76,9 +70,8 @@ private static void assertSessionContext(ProtocolHeaders protocolHeaders) .put(protocolHeaders.requestExtraCredential(), "test.token.abc=xyz") .build()); - SessionContext context = SESSION_CONTEXT_FACTORY.createSessionContext( + SessionContext context = sessionContextFactory(protocolHeaders).createSessionContext( headers, - Optional.of(protocolHeaders.getProtocolName()), Optional.of("testRemote"), Optional.empty()); assertThat(context.getSource().orElse(null)).isEqualTo("testSource"); @@ -118,31 +111,27 @@ private static void assertMappedUser(ProtocolHeaders protocolHeaders) MultivaluedMap userHeaders = new GuavaMultivaluedMap<>(ImmutableListMultimap.of(protocolHeaders.requestUser(), "testUser")); MultivaluedMap emptyHeaders = new MultivaluedHashMap<>(); - SessionContext context = SESSION_CONTEXT_FACTORY.createSessionContext( + SessionContext context = sessionContextFactory(protocolHeaders).createSessionContext( userHeaders, - Optional.of(protocolHeaders.getProtocolName()), Optional.of("testRemote"), Optional.empty()); assertThat(context.getIdentity()).isEqualTo(Identity.forUser("testUser").withGroups(ImmutableSet.of("testUser")).build()); - context = SESSION_CONTEXT_FACTORY.createSessionContext( + context = sessionContextFactory(protocolHeaders).createSessionContext( emptyHeaders, - Optional.of(protocolHeaders.getProtocolName()), Optional.of("testRemote"), Optional.of(Identity.forUser("mappedUser").withGroups(ImmutableSet.of("test")).build())); assertThat(context.getIdentity()).isEqualTo(Identity.forUser("mappedUser").withGroups(ImmutableSet.of("test", "mappedUser")).build()); - context = SESSION_CONTEXT_FACTORY.createSessionContext( + context = sessionContextFactory(protocolHeaders).createSessionContext( userHeaders, - Optional.of(protocolHeaders.getProtocolName()), Optional.of("testRemote"), Optional.of(Identity.ofUser("mappedUser"))); assertThat(context.getIdentity()).isEqualTo(Identity.forUser("testUser").withGroups(ImmutableSet.of("testUser")).build()); assertThatThrownBy( - () -> SESSION_CONTEXT_FACTORY.createSessionContext( + () -> sessionContextFactory(protocolHeaders).createSessionContext( emptyHeaders, - Optional.of(protocolHeaders.getProtocolName()), Optional.of("testRemote"), Optional.empty())) .isInstanceOf(WebApplicationException.class) @@ -171,12 +160,22 @@ private static void assertPreparedStatementsHeaderDoesNotParse(ProtocolHeaders p .build()); assertThatThrownBy( - () -> SESSION_CONTEXT_FACTORY.createSessionContext( + () -> sessionContextFactory(protocolHeaders).createSessionContext( headers, - Optional.of(protocolHeaders.getProtocolName()), Optional.of("testRemote"), Optional.empty())) .isInstanceOf(WebApplicationException.class) .hasMessageMatching("Invalid " + protocolHeaders.requestPreparedStatement() + " header: line 1:1: mismatched input 'abcdefg'. Expecting: .*"); } + + private static HttpRequestSessionContextFactory sessionContextFactory(ProtocolHeaders headers) + { + return new HttpRequestSessionContextFactory( + new PreparedStatementEncoder(new ProtocolConfig()), + createTestMetadataManager(), + ImmutableSet::of, + new AllowAllAccessControl(), + new ProtocolConfig() + .setAlternateHeaderName(headers.getProtocolName())); + } } diff --git a/core/trino-main/src/test/java/io/trino/server/TestQuerySessionSupplier.java b/core/trino-main/src/test/java/io/trino/server/TestQuerySessionSupplier.java index 7cb8c3fd41bce..f75523215f747 100644 --- a/core/trino-main/src/test/java/io/trino/server/TestQuerySessionSupplier.java +++ b/core/trino-main/src/test/java/io/trino/server/TestQuerySessionSupplier.java @@ -71,12 +71,13 @@ public class TestQuerySessionSupplier new PreparedStatementEncoder(new ProtocolConfig()), createTestMetadataManager(), ImmutableSet::of, - new AllowAllAccessControl()); + new AllowAllAccessControl(), + new ProtocolConfig()); @Test public void testCreateSession() { - SessionContext context = SESSION_CONTEXT_FACTORY.createSessionContext(TEST_HEADERS, Optional.empty(), Optional.of("testRemote"), Optional.empty()); + SessionContext context = SESSION_CONTEXT_FACTORY.createSessionContext(TEST_HEADERS, Optional.of("testRemote"), Optional.empty()); QuerySessionSupplier sessionSupplier = createSessionSupplier(new SqlEnvironmentConfig()); Session session = sessionSupplier.createSession(new QueryId("test_query_id"), Span.getInvalid(), context); @@ -106,14 +107,14 @@ public void testCreateSession() public void testEmptyClientTags() { MultivaluedMap headers1 = new GuavaMultivaluedMap<>(ImmutableListMultimap.of(TRINO_HEADERS.requestUser(), "testUser")); - SessionContext context1 = SESSION_CONTEXT_FACTORY.createSessionContext(headers1, Optional.empty(), Optional.of("remoteAddress"), Optional.empty()); + SessionContext context1 = SESSION_CONTEXT_FACTORY.createSessionContext(headers1, Optional.of("remoteAddress"), Optional.empty()); assertThat(context1.getClientTags()).isEqualTo(ImmutableSet.of()); MultivaluedMap headers2 = new GuavaMultivaluedMap<>(ImmutableListMultimap.builder() .put(TRINO_HEADERS.requestUser(), "testUser") .put(TRINO_HEADERS.requestClientTags(), "") .build()); - SessionContext context2 = SESSION_CONTEXT_FACTORY.createSessionContext(headers2, Optional.empty(), Optional.of("remoteAddress"), Optional.empty()); + SessionContext context2 = SESSION_CONTEXT_FACTORY.createSessionContext(headers2, Optional.of("remoteAddress"), Optional.empty()); assertThat(context2.getClientTags()).isEqualTo(ImmutableSet.of()); } @@ -124,11 +125,11 @@ public void testClientCapabilities() .put(TRINO_HEADERS.requestUser(), "testUser") .put(TRINO_HEADERS.requestClientCapabilities(), "foo, bar") .build()); - SessionContext context1 = SESSION_CONTEXT_FACTORY.createSessionContext(headers1, Optional.empty(), Optional.of("remoteAddress"), Optional.empty()); + SessionContext context1 = SESSION_CONTEXT_FACTORY.createSessionContext(headers1, Optional.of("remoteAddress"), Optional.empty()); assertThat(context1.getClientCapabilities()).isEqualTo(ImmutableSet.of("foo", "bar")); MultivaluedMap headers2 = new GuavaMultivaluedMap<>(ImmutableListMultimap.of(TRINO_HEADERS.requestUser(), "testUser")); - SessionContext context2 = SESSION_CONTEXT_FACTORY.createSessionContext(headers2, Optional.empty(), Optional.of("remoteAddress"), Optional.empty()); + SessionContext context2 = SESSION_CONTEXT_FACTORY.createSessionContext(headers2, Optional.of("remoteAddress"), Optional.empty()); assertThat(context2.getClientCapabilities()).isEqualTo(ImmutableSet.of()); } @@ -139,7 +140,7 @@ public void testInvalidTimeZone() .put(TRINO_HEADERS.requestUser(), "testUser") .put(TRINO_HEADERS.requestTimeZone(), "unknown_timezone") .build()); - SessionContext context = SESSION_CONTEXT_FACTORY.createSessionContext(headers, Optional.empty(), Optional.of("remoteAddress"), Optional.empty()); + SessionContext context = SESSION_CONTEXT_FACTORY.createSessionContext(headers, Optional.of("remoteAddress"), Optional.empty()); QuerySessionSupplier sessionSupplier = createSessionSupplier(new SqlEnvironmentConfig()); assertThatThrownBy(() -> sessionSupplier.createSession(new QueryId("test_query_id"), Span.getInvalid(), context)) .isInstanceOf(TrinoException.class) @@ -230,7 +231,7 @@ public void testDefaultCatalogAndSchema() private static Session createSession(ListMultimap headers, SqlEnvironmentConfig config) { MultivaluedMap headerMap = new GuavaMultivaluedMap<>(headers); - SessionContext context = SESSION_CONTEXT_FACTORY.createSessionContext(headerMap, Optional.empty(), Optional.of("testRemote"), Optional.empty()); + SessionContext context = SESSION_CONTEXT_FACTORY.createSessionContext(headerMap, Optional.of("testRemote"), Optional.empty()); QuerySessionSupplier sessionSupplier = createSessionSupplier(config); return sessionSupplier.createSession(new QueryId("test_query_id"), Span.getInvalid(), context); } diff --git a/core/trino-main/src/test/java/io/trino/server/security/TestResourceSecurity.java b/core/trino-main/src/test/java/io/trino/server/security/TestResourceSecurity.java index c1d7139e5e02d..2452cfc8958ec 100644 --- a/core/trino-main/src/test/java/io/trino/server/security/TestResourceSecurity.java +++ b/core/trino-main/src/test/java/io/trino/server/security/TestResourceSecurity.java @@ -58,9 +58,10 @@ import okhttp3.OkHttpClient; import okhttp3.Request; import okhttp3.Response; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.parallel.Execution; import javax.crypto.SecretKey; @@ -122,7 +123,11 @@ import static java.util.Objects.requireNonNull; import static java.util.concurrent.TimeUnit.MINUTES; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; +import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; +@TestInstance(PER_CLASS) +@Execution(CONCURRENT) public class TestResourceSecurity { private static final String LOCALHOST_KEYSTORE = Resources.getResource("cert/localhost.pem").getPath(); @@ -164,7 +169,7 @@ public class TestResourceSecurity private OkHttpClient client; private Path passwordConfigDummy; - @BeforeClass + @BeforeAll public void setup() throws IOException { @@ -784,8 +789,16 @@ public HttpCookie getNonceCookie() } } - @Test(dataProvider = "groups") - public void testOAuth2Groups(Optional> groups) + @Test + public void testOAuth2Groups() + throws Exception + { + testOAuth2Groups(Optional.empty()); + testOAuth2Groups(Optional.of(ImmutableSet.of())); + testOAuth2Groups(Optional.of(ImmutableSet.of("admin", "public"))); + } + + private void testOAuth2Groups(Optional> groups) throws Exception { try (TokenServer tokenServer = new TokenServer(Optional.empty()); @@ -855,18 +868,15 @@ public List loadForRequest(HttpUrl url) } } - @DataProvider(name = "groups") - public static Object[][] groups() + @Test + public void testJwtAndOAuth2AuthenticatorsSeparation() + throws Exception { - return new Object[][] { - {Optional.empty()}, - {Optional.of(ImmutableSet.of())}, - {Optional.of(ImmutableSet.of("admin", "public"))} - }; + testJwtAndOAuth2AuthenticatorsSeparation("jwt,oauth2"); + testJwtAndOAuth2AuthenticatorsSeparation("oauth2,jwt"); } - @Test(dataProvider = "authenticators") - public void testJwtAndOAuth2AuthenticatorsSeparation(String authenticators) + private void testJwtAndOAuth2AuthenticatorsSeparation(String authenticators) throws Exception { TestingHttpServer jwkServer = createTestingJwkServer(); @@ -914,15 +924,6 @@ public void testJwtAndOAuth2AuthenticatorsSeparation(String authenticators) } } - @DataProvider(name = "authenticators") - public static Object[][] authenticators() - { - return new Object[][] { - {"jwt,oauth2"}, - {"oauth2,jwt"} - }; - } - @Test public void testJwtWithRefreshTokensForOAuth2Enabled() throws Exception @@ -1185,7 +1186,8 @@ public TestResource(AccessControl accessControl) new PreparedStatementEncoder(new ProtocolConfig()), createTestMetadataManager(), user -> ImmutableSet.of(), - accessControl); + accessControl, + new ProtocolConfig()); } @ResourceSecurity(AUTHENTICATED_USER) @@ -1206,7 +1208,7 @@ public jakarta.ws.rs.core.Response webUiIdentity(@Context HttpServletRequest ser public jakarta.ws.rs.core.Response echoIdentity(HttpServletRequest servletRequest, HttpHeaders httpHeaders) { - Identity identity = sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders, Optional.empty()); + Identity identity = sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders); return jakarta.ws.rs.core.Response.ok() .header("user", identity.getUser()) .header("principal", identity.getPrincipal().map(Principal::getName).orElse(null)) diff --git a/core/trino-main/src/test/java/io/trino/server/security/oauth2/TestJweTokenSerializer.java b/core/trino-main/src/test/java/io/trino/server/security/oauth2/TestJweTokenSerializer.java index adfeb76d2c9a5..918cd48e784ef 100644 --- a/core/trino-main/src/test/java/io/trino/server/security/oauth2/TestJweTokenSerializer.java +++ b/core/trino-main/src/test/java/io/trino/server/security/oauth2/TestJweTokenSerializer.java @@ -18,8 +18,7 @@ import io.jsonwebtoken.ExpiredJwtException; import io.jsonwebtoken.Jwts; import io.trino.server.security.oauth2.TokenPairSerializer.TokenPair; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import java.net.URI; import java.security.GeneralSecurityException; @@ -60,24 +59,29 @@ public void testSerialization() assertThat(deserializedTokenPair.refreshToken()).isEqualTo(Optional.of("refresh_token")); } - @Test(dataProvider = "wrongSecretsProvider") - public void testDeserializationWithWrongSecret(String encryptionSecret, String decryptionSecret) + @Test + public void testDeserializationWithWrongSecret() { - assertThatThrownBy(() -> assertRoundTrip(Optional.ofNullable(encryptionSecret), Optional.ofNullable(decryptionSecret))) + assertThatThrownBy(() -> assertRoundTrip(Optional.of(randomEncodedSecret()), Optional.of(randomEncodedSecret()))) .isInstanceOf(RuntimeException.class) .hasMessageContaining("decryption failed: Tag mismatch"); - } - @DataProvider - public Object[][] wrongSecretsProvider() - { - return new Object[][]{ - {randomEncodedSecret(), randomEncodedSecret()}, - {randomEncodedSecret(16), randomEncodedSecret(24)}, - {null, null}, // This will generate two different secret keys - {null, randomEncodedSecret()}, - {randomEncodedSecret(), null} - }; + assertThatThrownBy(() -> assertRoundTrip(Optional.of(randomEncodedSecret(16)), Optional.of(randomEncodedSecret(24)))) + .isInstanceOf(RuntimeException.class) + .hasMessageContaining("decryption failed: Tag mismatch"); + + // This will generate two different secret keys + assertThatThrownBy(() -> assertRoundTrip(Optional.empty(), Optional.empty())) + .isInstanceOf(RuntimeException.class) + .hasMessageContaining("decryption failed: Tag mismatch"); + + assertThatThrownBy(() -> assertRoundTrip(Optional.empty(), Optional.of(randomEncodedSecret()))) + .isInstanceOf(RuntimeException.class) + .hasMessageContaining("decryption failed: Tag mismatch"); + + assertThatThrownBy(() -> assertRoundTrip(Optional.of(randomEncodedSecret()), Optional.empty())) + .isInstanceOf(RuntimeException.class) + .hasMessageContaining("decryption failed: Tag mismatch"); } @Test diff --git a/core/trino-main/src/test/java/io/trino/server/security/oauth2/TestOidcDiscovery.java b/core/trino-main/src/test/java/io/trino/server/security/oauth2/TestOidcDiscovery.java index bac63b99be72d..ee2871dcf2847 100644 --- a/core/trino-main/src/test/java/io/trino/server/security/oauth2/TestOidcDiscovery.java +++ b/core/trino-main/src/test/java/io/trino/server/security/oauth2/TestOidcDiscovery.java @@ -29,8 +29,7 @@ import jakarta.servlet.http.HttpServlet; import jakarta.servlet.http.HttpServletRequest; import jakarta.servlet.http.HttpServletResponse; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.net.URI; @@ -49,8 +48,15 @@ public class TestOidcDiscovery { - @Test(dataProvider = "staticConfiguration") - public void testStaticConfiguration(Optional accessTokenPath, Optional userinfoPath) + @Test + public void testStaticConfiguration() + throws Exception + { + testStaticConfiguration(Optional.empty(), Optional.empty()); + testStaticConfiguration(Optional.of("/access-token-issuer"), Optional.of("/userinfo")); + } + + private void testStaticConfiguration(Optional accessTokenPath, Optional userinfoPath) throws Exception { try (MetadataServer metadataServer = new MetadataServer(ImmutableMap.of("/jwks.json", "jwk/jwk-public.json"))) { @@ -72,17 +78,16 @@ public void testStaticConfiguration(Optional accessTokenPath, Optional accessTokenIssuer, Optional userinfoUrl) + private void testOidcDiscovery(String configuration, Optional accessTokenIssuer, Optional userinfoUrl) throws Exception { try (MetadataServer metadataServer = new MetadataServer( @@ -100,16 +105,6 @@ public void testOidcDiscovery(String configuration, Optional accessToken } } - @DataProvider(name = "oidcDiscovery") - public static Object[][] oidcDiscovery() - { - return new Object[][] { - {"openid-configuration.json", Optional.empty(), Optional.of("/connect/userinfo")}, - {"openid-configuration-without-userinfo.json", Optional.empty(), Optional.empty()}, - {"openid-configuration-with-access-token-issuer.json", Optional.of("http://access-token-issuer.com/adfs/services/trust"), Optional.of("/connect/userinfo")}, - }; - } - @Test public void testIssuerCheck() { diff --git a/core/trino-main/src/test/java/io/trino/server/ui/TestWebUi.java b/core/trino-main/src/test/java/io/trino/server/ui/TestWebUi.java index 838fd26d7f953..5494ac654c52f 100644 --- a/core/trino-main/src/test/java/io/trino/server/ui/TestWebUi.java +++ b/core/trino-main/src/test/java/io/trino/server/ui/TestWebUi.java @@ -100,7 +100,7 @@ import static io.jsonwebtoken.security.Keys.hmacShaKeyFor; import static io.trino.client.OkHttpUtil.setupSsl; import static io.trino.metadata.MetadataManager.createTestMetadataManager; -import static io.trino.server.HttpRequestSessionContextFactory.AUTHENTICATED_IDENTITY; +import static io.trino.server.ServletSecurityUtils.authenticatedIdentity; import static io.trino.server.security.ResourceSecurity.AccessType.WEB_UI; import static io.trino.server.security.jwt.JwtUtil.newJwtBuilder; import static io.trino.server.security.oauth2.OAuth2CallbackResource.CALLBACK_ENDPOINT; @@ -423,14 +423,15 @@ public TestResource(AccessControl accessControl) new PreparedStatementEncoder(new ProtocolConfig()), createTestMetadataManager(), ImmutableSet::of, - accessControl); + accessControl, + new ProtocolConfig()); } @ResourceSecurity(WEB_UI) @GET public jakarta.ws.rs.core.Response echoToken(@Context HttpServletRequest servletRequest, @Context HttpHeaders httpHeaders) { - Identity identity = sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders, Optional.empty()); + Identity identity = sessionContextFactory.extractAuthorizedIdentity(servletRequest, httpHeaders); return jakarta.ws.rs.core.Response.ok() .header("user", identity.getUser()) .build(); @@ -1399,7 +1400,7 @@ private static class AuthenticatedIdentityCapturingFilter public synchronized void filter(ContainerRequestContext request) throws IOException { - Optional identity = Optional.ofNullable((Identity) request.getProperty(AUTHENTICATED_IDENTITY)); + Optional identity = authenticatedIdentity(request); if (identity.map(Identity::getUser).filter(not(""::equals)).isPresent()) { if (authenticatedIdentity == null) { authenticatedIdentity = identity.get(); diff --git a/core/trino-main/src/test/java/io/trino/spiller/TestFileSingleStreamSpillerFactory.java b/core/trino-main/src/test/java/io/trino/spiller/TestFileSingleStreamSpillerFactory.java index c53d8bfcf75da..1d4d283bf0e24 100644 --- a/core/trino-main/src/test/java/io/trino/spiller/TestFileSingleStreamSpillerFactory.java +++ b/core/trino-main/src/test/java/io/trino/spiller/TestFileSingleStreamSpillerFactory.java @@ -218,7 +218,7 @@ public void testCacheInvalidatedOnBadDisk() // Set second spiller path to read-only after initialization to emulate a disk failing during runtime setPosixFilePermissions(spillPath2.toPath(), ImmutableSet.of(PosixFilePermission.OWNER_READ)); - assertThatThrownBy(() -> { getUnchecked(singleStreamSpiller2.spill(page)); }) + assertThatThrownBy(() -> getUnchecked(singleStreamSpiller2.spill(page))) .isInstanceOf(com.google.common.util.concurrent.UncheckedExecutionException.class) .hasMessageContaining("Failed to spill pages"); spillers.add(singleStreamSpiller2); diff --git a/core/trino-main/src/test/java/io/trino/sql/ExpressionTestUtils.java b/core/trino-main/src/test/java/io/trino/sql/ExpressionTestUtils.java index f289a9e287dc0..5be0d4ab4667c 100644 --- a/core/trino-main/src/test/java/io/trino/sql/ExpressionTestUtils.java +++ b/core/trino-main/src/test/java/io/trino/sql/ExpressionTestUtils.java @@ -43,10 +43,7 @@ import static io.trino.sql.analyzer.SemanticExceptions.semanticException; import static io.trino.sql.analyzer.TypeSignatureTranslator.toSqlType; import static io.trino.sql.planner.TypeAnalyzer.createTestingTypeAnalyzer; -import static io.trino.transaction.TransactionBuilder.transaction; -import static org.testng.internal.EclipseInterface.ASSERT_LEFT; -import static org.testng.internal.EclipseInterface.ASSERT_MIDDLE; -import static org.testng.internal.EclipseInterface.ASSERT_RIGHT; +import static io.trino.testing.TransactionBuilder.transaction; public final class ExpressionTestUtils { @@ -73,7 +70,7 @@ private static void failNotEqual(Object actual, Object expected, String message) if (message != null) { formatted = message + " "; } - throw new AssertionError(formatted + ASSERT_LEFT + expected + ASSERT_MIDDLE + actual + ASSERT_RIGHT); + throw new AssertionError(formatted + " expected [" + expected + "] but found [" + actual + "]"); } public static Expression createExpression(Session session, String expression, TransactionManager transactionManager, PlannerContext plannerContext, TypeProvider symbolTypes) diff --git a/core/trino-main/src/test/java/io/trino/sql/TestExpressionInterpreter.java b/core/trino-main/src/test/java/io/trino/sql/TestExpressionInterpreter.java index 70fa383e6af33..71b8383a61a6c 100644 --- a/core/trino-main/src/test/java/io/trino/sql/TestExpressionInterpreter.java +++ b/core/trino-main/src/test/java/io/trino/sql/TestExpressionInterpreter.java @@ -71,8 +71,8 @@ import static io.trino.sql.ExpressionUtils.rewriteIdentifiersToSymbolReferences; import static io.trino.sql.planner.TestingPlannerContext.plannerContextBuilder; import static io.trino.sql.planner.TypeAnalyzer.createTestingTypeAnalyzer; +import static io.trino.testing.TransactionBuilder.transaction; import static io.trino.testing.assertions.TrinoExceptionAssert.assertTrinoExceptionThrownBy; -import static io.trino.transaction.TransactionBuilder.transaction; import static io.trino.type.DateTimes.scaleEpochMillisToMicros; import static io.trino.type.IntervalDayTimeType.INTERVAL_DAY_TIME; import static java.lang.String.format; diff --git a/core/trino-main/src/test/java/io/trino/sql/analyzer/TestAnalyzer.java b/core/trino-main/src/test/java/io/trino/sql/analyzer/TestAnalyzer.java index fb8f3dac141fd..8478d856e9c2f 100644 --- a/core/trino-main/src/test/java/io/trino/sql/analyzer/TestAnalyzer.java +++ b/core/trino-main/src/test/java/io/trino/sql/analyzer/TestAnalyzer.java @@ -118,6 +118,7 @@ import static io.trino.spi.StandardErrorCode.COLUMN_NOT_FOUND; import static io.trino.spi.StandardErrorCode.COLUMN_TYPE_UNKNOWN; import static io.trino.spi.StandardErrorCode.DUPLICATE_COLUMN_NAME; +import static io.trino.spi.StandardErrorCode.DUPLICATE_COLUMN_OR_PATH_NAME; import static io.trino.spi.StandardErrorCode.DUPLICATE_NAMED_QUERY; import static io.trino.spi.StandardErrorCode.DUPLICATE_PARAMETER_NAME; import static io.trino.spi.StandardErrorCode.DUPLICATE_PROPERTY; @@ -143,6 +144,7 @@ import static io.trino.spi.StandardErrorCode.INVALID_PARTITION_BY; import static io.trino.spi.StandardErrorCode.INVALID_PATH; import static io.trino.spi.StandardErrorCode.INVALID_PATTERN_RECOGNITION_FUNCTION; +import static io.trino.spi.StandardErrorCode.INVALID_PLAN; import static io.trino.spi.StandardErrorCode.INVALID_PROCESSING_MODE; import static io.trino.spi.StandardErrorCode.INVALID_RANGE; import static io.trino.spi.StandardErrorCode.INVALID_RECURSIVE_REFERENCE; @@ -160,6 +162,7 @@ import static io.trino.spi.StandardErrorCode.MISSING_GROUP_BY; import static io.trino.spi.StandardErrorCode.MISSING_ORDER_BY; import static io.trino.spi.StandardErrorCode.MISSING_OVER; +import static io.trino.spi.StandardErrorCode.MISSING_PATH_NAME; import static io.trino.spi.StandardErrorCode.MISSING_ROW_PATTERN; import static io.trino.spi.StandardErrorCode.MISSING_SCHEMA_NAME; import static io.trino.spi.StandardErrorCode.MISSING_VARIABLE_DEFINITIONS; @@ -199,8 +202,8 @@ import static io.trino.testing.TestingAccessControlManager.privilege; import static io.trino.testing.TestingEventListenerManager.emptyEventListenerManager; import static io.trino.testing.TestingSession.testSessionBuilder; +import static io.trino.testing.TransactionBuilder.transaction; import static io.trino.testing.assertions.TrinoExceptionAssert.assertTrinoExceptionThrownBy; -import static io.trino.transaction.TransactionBuilder.transaction; import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER; import static java.lang.String.format; import static java.util.Collections.emptyList; @@ -6738,11 +6741,541 @@ public void testTableFunctionRequiredColumns() } @Test - public void testJsonTable() + public void testJsonTableColumnTypes() { - assertFails("SELECT * FROM JSON_TABLE('[1, 2, 3]', 'lax $[2]' COLUMNS(o FOR ORDINALITY))") + // ordinality column + analyze(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $[2]' + COLUMNS( + o FOR ORDINALITY)) + """); + + // regular column + analyze(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $' + COLUMNS( + id BIGINT + PATH 'lax $[1]' + DEFAULT 0 ON EMPTY + ERROR ON ERROR)) + """); + + // formatted column + analyze(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $' + COLUMNS( + id VARBINARY + FORMAT JSON ENCODING UTF16 + PATH 'lax $[1]' + WITHOUT WRAPPER + OMIT QUOTES + EMPTY ARRAY ON EMPTY + NULL ON ERROR)) + """); + + // nested columns + analyze(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $' + COLUMNS( + NESTED PATH 'lax $[*]' AS nested_path COLUMNS ( + o FOR ORDINALITY, + id BIGINT PATH 'lax $[1]'))) + """); + } + + @Test + public void testJsonTableColumnAndPathNameUniqueness() + { + // root path is named + analyze(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $[2]' AS root_path + COLUMNS( + o FOR ORDINALITY)) + """); + + // nested path is named + analyze(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $' + COLUMNS( + NESTED PATH 'lax $[*]' AS nested_path COLUMNS ( + o FOR ORDINALITY))) + """); + + // root and nested paths are named + analyze(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $' AS root_path + COLUMNS( + NESTED PATH 'lax $[*]' AS nested_path COLUMNS ( + o FOR ORDINALITY))) + """); + + // duplicate path name + assertFails(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $' AS some_path + COLUMNS( + NESTED PATH 'lax $[*]' AS some_path COLUMNS ( + o FOR ORDINALITY))) + """) + .hasErrorCode(DUPLICATE_COLUMN_OR_PATH_NAME) + .hasMessage("line 6:35: All column and path names in JSON_TABLE invocation must be unique"); + + // duplicate column name + assertFails(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $[2]' + COLUMNS( + id FOR ORDINALITY, + id BIGINT)) + """) + .hasErrorCode(DUPLICATE_COLUMN_OR_PATH_NAME) + .hasMessage("line 7:9: All column and path names in JSON_TABLE invocation must be unique"); + + // column and path names are the same + assertFails(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $[2]' AS some_name + COLUMNS( + some_name FOR ORDINALITY)) + """) + .hasErrorCode(DUPLICATE_COLUMN_OR_PATH_NAME) + .hasMessage("line 6:9: All column and path names in JSON_TABLE invocation must be unique"); + + assertFails(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $' + COLUMNS( + NESTED PATH 'lax $[*]' AS some_name COLUMNS ( + some_name FOR ORDINALITY))) + """) + .hasErrorCode(DUPLICATE_COLUMN_OR_PATH_NAME) + .hasMessage("line 7:13: All column and path names in JSON_TABLE invocation must be unique"); + + // duplicate name is deeply nested + assertFails(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $[2]' + COLUMNS( + NESTED PATH 'lax $[*]' AS some_name COLUMNS ( + NESTED PATH 'lax $' AS another_name COLUMNS ( + NESTED PATH 'lax $' AS yet_another_name COLUMNS ( + some_name FOR ORDINALITY))))) + """) + .hasErrorCode(DUPLICATE_COLUMN_OR_PATH_NAME) + .hasMessage("line 9:21: All column and path names in JSON_TABLE invocation must be unique"); + } + + @Test + public void testJsonTableColumnAndPathNameIdentifierSemantics() + { + assertFails(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $[2]' AS some_name + COLUMNS( + Some_Name FOR ORDINALITY)) + """) + .hasErrorCode(DUPLICATE_COLUMN_OR_PATH_NAME) + .hasMessage("line 6:9: All column and path names in JSON_TABLE invocation must be unique"); + + analyze(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $[2]' AS some_name + COLUMNS( + "some_name" FOR ORDINALITY)) + """); + } + + @Test + public void testJsonTableOutputColumns() + { + analyze(""" + SELECT a, b, c, d, e + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $' + COLUMNS( + a FOR ORDINALITY, + b BIGINT, + c VARBINARY FORMAT JSON ENCODING UTF16, + NESTED PATH 'lax $[*]' COLUMNS ( + d FOR ORDINALITY, + e BIGINT))) + """); + } + + @Test + public void testImplicitJsonPath() + { + // column name: Ab + // canonical name: AB + // implicit path: lax $."AB" + // resolved member accessor: $.AB + analyze(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $[2]' + COLUMNS(Ab BIGINT)) + """); + + // column name: Ab + // canonical name: Ab + // implicit path: lax $."Ab" + // resolved member accessor: $.Ab + analyze(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $[2]' + COLUMNS("Ab" BIGINT)) + """); + + // column name: ? + // canonical name: ? + // implicit path: lax $."?" + // resolved member accessor: $.? + analyze(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $[2]' + COLUMNS("?" BIGINT)) + """); + + // column name: " + // canonical name: " + // implicit path: lax $."""" + // resolved member accessor $." + analyze(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $[2]' + COLUMNS("\"\"" BIGINT)) + """); + } + + @Test + public void testJsonTableSpecificPlan() + { + assertFails(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $[2]' + COLUMNS(id BIGINT) + PLAN (root_path)) + """) + .hasErrorCode(MISSING_PATH_NAME) + .hasMessage("line 3:5: All JSON paths must be named when specific plan is given"); + + assertFails(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $[2]' AS root_path + COLUMNS(id BIGINT) + PLAN (root_path UNION another_path)) + """) + .hasErrorCode(INVALID_PLAN) + .hasMessage("line 6:11: JSON_TABLE plan must either be a single path name or it must be rooted in parent-child relationship (OUTER or INNER)"); + + assertFails(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $' AS root_path + COLUMNS(id BIGINT) + PLAN (another_path)) + """) + .hasErrorCode(INVALID_PLAN) + .hasMessage("line 6:11: JSON_TABLE plan should contain all JSON paths available at each level of nesting. Paths not included: ROOT_PATH"); + + assertFails(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $' AS root_path + COLUMNS( + NESTED PATH 'lax $' COLUMNS(id BIGINT)) + PLAN (root_path OUTER another_path)) + """) + .hasErrorCode(MISSING_PATH_NAME) + .hasMessage("line 6:21: All JSON paths must be named when specific plan is given"); + + assertFails(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $' AS root_path + COLUMNS( + NESTED PATH 'lax $' AS nested_path_1 COLUMNS(id_1 BIGINT), + NESTED PATH 'lax $' AS nested_path_2 COLUMNS(id_2 BIGINT)) + PLAN (root_path OUTER (nested_path_1 CROSS another_path))) + """) + .hasErrorCode(INVALID_PLAN) + .hasMessage("line 8:11: JSON_TABLE plan should contain all JSON paths available at each level of nesting. Paths not included: NESTED_PATH_2"); + + assertFails(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $' AS root_path + COLUMNS( + NESTED PATH 'lax $' AS nested_path_1 COLUMNS(id_1 BIGINT), + NESTED PATH 'lax $' AS nested_path_2 COLUMNS(id_2 BIGINT)) + PLAN (root_path OUTER (nested_path_1 CROSS another_path CROSS nested_path_2))) + """) + .hasErrorCode(INVALID_PLAN) + .hasMessage("line 8:11: JSON_TABLE plan includes unavailable JSON path names: ANOTHER_PATH"); + + assertFails(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $' AS root_path + COLUMNS( + NESTED PATH 'lax $' AS nested_path_1 COLUMNS(id_1 BIGINT), + NESTED PATH 'lax $' AS nested_path_2 COLUMNS( + id_2 BIGINT, + NESTED PATH 'lax $' AS nested_path_3 COLUMNS(id_3 BIGINT))) + PLAN (root_path OUTER (nested_path_1 CROSS (nested_path_2 UNION nested_path_3)))) + """) + .hasErrorCode(INVALID_PLAN) + .hasMessage("line 10:11: JSON_TABLE plan includes unavailable JSON path names: NESTED_PATH_3"); // nested_path_3 is on another nesting level + + assertFails(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $' AS root_path + COLUMNS( + NESTED PATH 'lax $' AS nested_path_1 COLUMNS(id_1 BIGINT), + NESTED PATH 'lax $' AS nested_path_2 COLUMNS(id_2 BIGINT)) + PLAN (root_path OUTER (nested_path_1 CROSS (nested_path_2 UNION nested_path_1)))) + """) + .hasErrorCode(INVALID_PLAN) + .hasMessage("line 8:69: Duplicate reference to JSON path name in sibling plan: NESTED_PATH_1"); + + analyze(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $' AS root_path + COLUMNS( + NESTED PATH 'lax $' AS nested_path_1 COLUMNS(id_1 BIGINT), + NESTED PATH 'lax $' AS nested_path_2 COLUMNS( + id_2 BIGINT, + NESTED PATH 'lax $' AS nested_path_3 COLUMNS(id_3 BIGINT))) + PLAN (root_path OUTER (nested_path_1 CROSS (nested_path_2 INNER nested_path_3)))) + """); + } + + @Test + public void testJsonTableDefaultPlan() + { + analyze(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $[2]' + COLUMNS(id BIGINT) + PLAN DEFAULT(CROSS, INNER)) + """); + + assertFails(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $' AS root_path + COLUMNS( + NESTED PATH 'lax $' COLUMNS(id BIGINT)) + PLAN DEFAULT(OUTER, UNION)) + """) + .hasErrorCode(MISSING_PATH_NAME) + .hasMessage("line 6:21: All nested JSON paths must be named when default plan is given"); + } + + @Test + public void tstJsonTableInJoin() + { + analyze(""" + SELECT * + FROM t1, t2, JSON_TABLE('[1, 2, 3]', 'lax $[2]' COLUMNS(o FOR ORDINALITY)) + """); + + // join condition + analyze(""" + SELECT * + FROM t1 + LEFT JOIN + JSON_TABLE('[1, 2, 3]', 'lax $[2]' COLUMNS(o FOR ORDINALITY)) + ON TRUE + """); + + assertFails(""" + SELECT * + FROM t1 + RIGHT JOIN + JSON_TABLE('[1, 2, 3]', 'lax $[2]' COLUMNS(o FOR ORDINALITY)) t + ON t.o > t1.a + """) .hasErrorCode(NOT_SUPPORTED) - .hasMessage("line 1:15: JSON_TABLE is not yet supported"); + .hasMessage("line 5:12: RIGHT JOIN involving JSON_TABLE is only supported with condition ON TRUE"); + + // correlation in context item + analyze(""" + SELECT * + FROM t6 + LEFT JOIN + JSON_TABLE(b, 'lax $[2]' COLUMNS(o FOR ORDINALITY)) + ON TRUE + """); + + // correlation in default value + analyze(""" + SELECT * + FROM t6 + LEFT JOIN + JSON_TABLE('[1, 2, 3]', 'lax $[2]' COLUMNS(x BIGINT DEFAULT a ON EMPTY)) + ON TRUE + """); + + // correlation in path parameter + analyze(""" + SELECT * + FROM t6 + LEFT JOIN + JSON_TABLE('[1, 2, 3]', 'lax $[2]' PASSING a AS parameter_name COLUMNS(o FOR ORDINALITY)) + ON TRUE + """); + + // invalid correlation in right join + assertFails(""" + SELECT * + FROM t6 + RIGHT JOIN + JSON_TABLE('[1, 2, 3]', 'lax $[2]' PASSING a AS parameter_name COLUMNS(o FOR ORDINALITY)) + ON TRUE + """) + .hasErrorCode(INVALID_COLUMN_REFERENCE) + .hasMessage("line 4:48: LATERAL reference not allowed in RIGHT JOIN"); + } + + @Test + public void testSubqueryInJsonTable() + { + analyze(""" + SELECT * + FROM JSON_TABLE( + (SELECT '[1, 2, 3]'), + 'lax $[2]' PASSING (SELECT 1) AS parameter_name + COLUMNS( + x BIGINT DEFAULT (SELECT 2) ON EMPTY)) + """); + } + + @Test + public void testAggregationInJsonTable() + { + assertFails(""" + SELECT * + FROM JSON_TABLE( + CAST(sum(1) AS varchar), + 'lax $' PASSING 2 AS parameter_name + COLUMNS( + x BIGINT DEFAULT 3 ON EMPTY DEFAULT 4 ON ERROR)) + """) + .hasErrorCode(EXPRESSION_NOT_SCALAR) + .hasMessage("line 3:5: JSON_TABLE input expression cannot contain aggregations, window functions or grouping operations: [sum(1)]"); + + assertFails(""" + SELECT * + FROM JSON_TABLE( + '1', + 'lax $' PASSING avg(2) AS parameter_name + COLUMNS( + x BIGINT DEFAULT 3 ON EMPTY DEFAULT 4 ON ERROR)) + """) + .hasErrorCode(EXPRESSION_NOT_SCALAR) + .hasMessage("line 4:21: JSON_TABLE path parameter cannot contain aggregations, window functions or grouping operations: [avg(2)]"); + + assertFails(""" + SELECT * + FROM JSON_TABLE( + '1', + 'lax $' PASSING 2 AS parameter_name + COLUMNS( + x BIGINT DEFAULT min(3) ON EMPTY DEFAULT 4 ON ERROR)) + """) + .hasErrorCode(EXPRESSION_NOT_SCALAR) + .hasMessage("line 6:26: default expression for JSON_TABLE column cannot contain aggregations, window functions or grouping operations: [min(3)]"); + + assertFails(""" + SELECT * + FROM JSON_TABLE( + '1', + 'lax $' PASSING 2 AS parameter_name + COLUMNS( + x BIGINT DEFAULT 3 ON EMPTY DEFAULT max(4) ON ERROR)) + """) + .hasErrorCode(EXPRESSION_NOT_SCALAR) + .hasMessage("line 6:45: default expression for JSON_TABLE column cannot contain aggregations, window functions or grouping operations: [max(4)]"); + } + + @Test + public void testAliasJsonTable() + { + analyze(""" + SELECT t.y + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $[2]' + COLUMNS(x BIGINT)) t(y) + """); + + analyze(""" + SELECT t.x + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $[2]' + COLUMNS(x BIGINT)) t + """); } @Test @@ -6847,9 +7380,14 @@ public void setup() Optional.of("comment"), Identity.ofUser("user"), ImmutableList.of(), - Optional.empty(), - ImmutableMap.of()); - inSetupTransaction(session -> metadata.createMaterializedView(session, new QualifiedObjectName(TPCH_CATALOG, "s1", "mv1"), materializedViewData1, false, true)); + Optional.empty()); + inSetupTransaction(session -> metadata.createMaterializedView( + session, + new QualifiedObjectName(TPCH_CATALOG, "s1", "mv1"), + materializedViewData1, + ImmutableMap.of(), + false, + true)); // valid view referencing table in same schema ViewDefinition viewData1 = new ViewDefinition( @@ -6971,8 +7509,8 @@ public void setup() Optional.empty(), Identity.ofUser("some user"), ImmutableList.of(), - Optional.of(new CatalogSchemaTableName(TPCH_CATALOG, "s1", "t1")), - ImmutableMap.of()), + Optional.of(new CatalogSchemaTableName(TPCH_CATALOG, "s1", "t1"))), + ImmutableMap.of(), false, false)); ViewDefinition viewDefinition = new ViewDefinition( @@ -7024,8 +7562,8 @@ public void setup() Identity.ofUser("some user"), ImmutableList.of(), // t3 has a, b column and hidden column x - Optional.of(new CatalogSchemaTableName(TPCH_CATALOG, "s1", "t3")), - ImmutableMap.of()), + Optional.of(new CatalogSchemaTableName(TPCH_CATALOG, "s1", "t3"))), + ImmutableMap.of(), false, false)); testingConnectorMetadata.markMaterializedViewIsFresh(freshMaterializedView.asSchemaTableName()); @@ -7043,8 +7581,8 @@ public void setup() Optional.empty(), Identity.ofUser("some user"), ImmutableList.of(), - Optional.of(new CatalogSchemaTableName(TPCH_CATALOG, "s1", "t2")), - ImmutableMap.of()), + Optional.of(new CatalogSchemaTableName(TPCH_CATALOG, "s1", "t2"))), + ImmutableMap.of(), false, false)); testingConnectorMetadata.markMaterializedViewIsFresh(freshMaterializedViewMismatchedColumnCount.asSchemaTableName()); @@ -7062,8 +7600,8 @@ public void setup() Optional.empty(), Identity.ofUser("some user"), ImmutableList.of(), - Optional.of(new CatalogSchemaTableName(TPCH_CATALOG, "s1", "t2")), - ImmutableMap.of()), + Optional.of(new CatalogSchemaTableName(TPCH_CATALOG, "s1", "t2"))), + ImmutableMap.of(), false, false)); testingConnectorMetadata.markMaterializedViewIsFresh(freshMaterializedMismatchedColumnName.asSchemaTableName()); @@ -7081,8 +7619,8 @@ public void setup() Optional.empty(), Identity.ofUser("some user"), ImmutableList.of(), - Optional.of(new CatalogSchemaTableName(TPCH_CATALOG, "s1", "t2")), - ImmutableMap.of()), + Optional.of(new CatalogSchemaTableName(TPCH_CATALOG, "s1", "t2"))), + ImmutableMap.of(), false, false)); testingConnectorMetadata.markMaterializedViewIsFresh(freshMaterializedMismatchedColumnType.asSchemaTableName()); diff --git a/core/trino-main/src/test/java/io/trino/sql/gen/TestInCodeGenerator.java b/core/trino-main/src/test/java/io/trino/sql/gen/TestInCodeGenerator.java index 2fcaa99882b5c..9bfb5af900252 100644 --- a/core/trino-main/src/test/java/io/trino/sql/gen/TestInCodeGenerator.java +++ b/core/trino-main/src/test/java/io/trino/sql/gen/TestInCodeGenerator.java @@ -43,9 +43,9 @@ public class TestInCodeGenerator public void testInteger() { List values = new ArrayList<>(); - values.add(constant(Integer.MIN_VALUE, INTEGER)); - values.add(constant(Integer.MAX_VALUE, INTEGER)); - values.add(constant(3, INTEGER)); + values.add(constant((long) Integer.MIN_VALUE, INTEGER)); + values.add(constant((long) Integer.MAX_VALUE, INTEGER)); + values.add(constant(3L, INTEGER)); assertThat(checkSwitchGenerationCase(INTEGER, values)).isEqualTo(DIRECT_SWITCH); values.add(constant(null, INTEGER)); @@ -55,11 +55,11 @@ public void testInteger() Collections.singletonList(constant(12345678901234.0, DOUBLE)))); assertThat(checkSwitchGenerationCase(INTEGER, values)).isEqualTo(DIRECT_SWITCH); - values.add(constant(6, BIGINT)); - values.add(constant(7, BIGINT)); + values.add(constant(6L, BIGINT)); + values.add(constant(7L, BIGINT)); assertThat(checkSwitchGenerationCase(INTEGER, values)).isEqualTo(DIRECT_SWITCH); - values.add(constant(8, INTEGER)); + values.add(constant(8L, INTEGER)); assertThat(checkSwitchGenerationCase(INTEGER, values)).isEqualTo(SET_CONTAINS); } @@ -130,9 +130,9 @@ public void testDouble() public void testVarchar() { List values = new ArrayList<>(); - values.add(constant(Slices.utf8Slice("1"), DOUBLE)); - values.add(constant(Slices.utf8Slice("2"), DOUBLE)); - values.add(constant(Slices.utf8Slice("3"), DOUBLE)); + values.add(constant(Slices.utf8Slice("1"), VARCHAR)); + values.add(constant(Slices.utf8Slice("2"), VARCHAR)); + values.add(constant(Slices.utf8Slice("3"), VARCHAR)); assertThat(checkSwitchGenerationCase(VARCHAR, values)).isEqualTo(HASH_SWITCH); values.add(constant(null, VARCHAR)); diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/AbstractPredicatePushdownTest.java b/core/trino-main/src/test/java/io/trino/sql/planner/AbstractPredicatePushdownTest.java index 5d6ebc84b220d..476b697ffc14f 100644 --- a/core/trino-main/src/test/java/io/trino/sql/planner/AbstractPredicatePushdownTest.java +++ b/core/trino-main/src/test/java/io/trino/sql/planner/AbstractPredicatePushdownTest.java @@ -13,6 +13,7 @@ */ package io.trino.sql.planner; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import io.trino.Session; import io.trino.sql.planner.assertions.BasePlanTest; @@ -39,7 +40,9 @@ import static io.trino.sql.planner.assertions.PlanMatchPattern.semiJoin; import static io.trino.sql.planner.assertions.PlanMatchPattern.tableScan; import static io.trino.sql.planner.assertions.PlanMatchPattern.values; +import static io.trino.sql.planner.plan.JoinNode.Type.INNER; import static io.trino.sql.planner.plan.JoinNode.Type.LEFT; +import static io.trino.sql.tree.BooleanLiteral.TRUE_LITERAL; public abstract class AbstractPredicatePushdownTest extends BasePlanTest @@ -424,33 +427,31 @@ public void testRemovesRedundantTableScanPredicate() @Test public void testTablePredicateIsExtracted() { + PlanMatchPattern ordersTableScan = tableScan("orders", ImmutableMap.of("ORDERSTATUS", "orderstatus")); + if (enableDynamicFiltering) { + ordersTableScan = filter(TRUE_LITERAL, ordersTableScan); + } assertPlan( "SELECT * FROM orders, nation WHERE orderstatus = CAST(nation.name AS varchar(1)) AND orderstatus BETWEEN 'A' AND 'O'", anyTree( node(JoinNode.class, + ordersTableScan, anyTree( filter("CAST(NAME AS varchar(1)) IN ('F', 'O')", tableScan( "nation", - ImmutableMap.of("NAME", "name")))), - anyTree( - tableScan( - "orders", - ImmutableMap.of("ORDERSTATUS", "orderstatus")))))); + ImmutableMap.of("NAME", "name"))))))); assertPlan( "SELECT * FROM orders JOIN nation ON orderstatus = CAST(nation.name AS varchar(1))", anyTree( node(JoinNode.class, + ordersTableScan, anyTree( filter("CAST(NAME AS varchar(1)) IN ('F', 'O', 'P')", tableScan( "nation", - ImmutableMap.of("NAME", "name")))), - anyTree( - tableScan( - "orders", - ImmutableMap.of("ORDERSTATUS", "orderstatus")))))); + ImmutableMap.of("NAME", "name"))))))); } @Test @@ -458,14 +459,74 @@ public void testOnlyNullPredicateIsPushDownThroughJoinFilters() { assertPlan( """ - WITH t(a) AS (VALUES 'a', 'b') - SELECT * - FROM t t1 JOIN t t2 ON true - WHERE t1.a = 'aa' - """, + WITH t(a) AS (VALUES 'a', 'b') + SELECT * + FROM t t1 JOIN t t2 ON true + WHERE t1.a = 'aa' + """, output(values("field", "field_0"))); } + @Test + public void testSimplifyNonInferrableInheritedPredicate() + { + assertPlan("SELECT * FROM (SELECT * FROM nation WHERE nationkey = regionkey AND regionkey = 5) a, nation b WHERE a.nationkey = b.nationkey AND a.nationkey + 11 > 15", + output( + join(INNER, builder -> builder + .equiCriteria(ImmutableList.of()) + .left( + filter("((L_NATIONKEY = L_REGIONKEY) AND (L_REGIONKEY = BIGINT '5'))", + tableScan("nation", ImmutableMap.of("L_NATIONKEY", "nationkey", "L_REGIONKEY", "regionkey")))) + .right( + anyTree( + filter("R_NATIONKEY = BIGINT '5'", + tableScan("nation", ImmutableMap.of("R_NATIONKEY", "nationkey")))))))); + } + + @Test + public void testDoesNotCreatePredicateFromInferredPredicate() + { + assertPlan("SELECT * FROM (SELECT *, nationkey + 1 as nationkey2 FROM nation) a JOIN nation b ON a.nationkey2 = b.nationkey", + output( + join(INNER, builder -> builder + .equiCriteria("L_NATIONKEY2", "R_NATIONKEY") + .left( + project(ImmutableMap.of("L_NATIONKEY2", expression("L_NATIONKEY + BIGINT '1'")), + tableScan("nation", ImmutableMap.of("L_NATIONKEY", "nationkey")))) + .right( + anyTree( + tableScan("nation", ImmutableMap.of("R_NATIONKEY", "nationkey"))))))); + + assertPlan("SELECT * FROM (SELECT * FROM nation WHERE nationkey = 5) a JOIN (SELECT * FROM nation WHERE nationkey = 5) b ON a.nationkey = b.nationkey", + output( + join(INNER, builder -> builder + .equiCriteria(ImmutableList.of()) + .left( + filter("L_NATIONKEY = BIGINT '5'", + tableScan("nation", ImmutableMap.of("L_NATIONKEY", "nationkey")))) + .right( + anyTree( + filter("R_NATIONKEY = BIGINT '5'", + tableScan("nation", ImmutableMap.of("R_NATIONKEY", "nationkey")))))))); + } + + @Test + public void testSimplifiesStraddlingPredicate() + { + assertPlan("SELECT * FROM (SELECT * FROM NATION WHERE nationkey = 5) a JOIN nation b ON a.nationkey = b.nationkey AND a.nationkey = a.regionkey + b.regionkey", + output( + filter("L_REGIONKEY + R_REGIONKEY = BIGINT '5'", + join(INNER, builder -> builder + .equiCriteria(ImmutableList.of()) + .left( + filter("L_NATIONKEY = BIGINT '5'", + tableScan("nation", ImmutableMap.of("L_NATIONKEY", "nationkey", "L_REGIONKEY", "regionkey")))) + .right( + anyTree( + filter("R_NATIONKEY = BIGINT '5'", + tableScan("nation", ImmutableMap.of("R_NATIONKEY", "nationkey", "R_REGIONKEY", "regionkey"))))))))); + } + protected Session noSemiJoinRewrite() { return Session.builder(getQueryRunner().getDefaultSession()) diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/JsonTablePlanComparator.java b/core/trino-main/src/test/java/io/trino/sql/planner/JsonTablePlanComparator.java new file mode 100644 index 0000000000000..8e34adfd2f2dd --- /dev/null +++ b/core/trino-main/src/test/java/io/trino/sql/planner/JsonTablePlanComparator.java @@ -0,0 +1,125 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.sql.planner; + +import io.trino.operator.table.json.JsonTableColumn; +import io.trino.operator.table.json.JsonTableOrdinalityColumn; +import io.trino.operator.table.json.JsonTablePlanCross; +import io.trino.operator.table.json.JsonTablePlanLeaf; +import io.trino.operator.table.json.JsonTablePlanNode; +import io.trino.operator.table.json.JsonTablePlanSingle; +import io.trino.operator.table.json.JsonTablePlanUnion; +import io.trino.operator.table.json.JsonTableQueryColumn; +import io.trino.operator.table.json.JsonTableValueColumn; + +import java.util.Comparator; +import java.util.List; + +import static java.util.Objects.requireNonNull; + +public class JsonTablePlanComparator +{ + private JsonTablePlanComparator() {} + + public static Comparator planComparator() + { + return (actual, expected) -> { + requireNonNull(actual, "actual is null"); + requireNonNull(expected, "expected is null"); + return compare(actual, expected) ? 0 : -1; + }; + } + + private static boolean compare(JsonTablePlanNode left, JsonTablePlanNode right) + { + if (left == right) { + return true; + } + if (left.getClass() != right.getClass()) { + return false; + } + if (left instanceof JsonTablePlanLeaf leftPlan) { + JsonTablePlanLeaf rightPlan = (JsonTablePlanLeaf) right; + return leftPlan.path().equals(rightPlan.path()) && + compareColumns(leftPlan.columns(), rightPlan.columns()); + } + if (left instanceof JsonTablePlanSingle leftPlan) { + JsonTablePlanSingle rightPlan = (JsonTablePlanSingle) right; + return leftPlan.path().equals(rightPlan.path()) && + compareColumns(leftPlan.columns(), rightPlan.columns()) && + leftPlan.outer() == rightPlan.outer() && + compare(leftPlan.child(), rightPlan.child()); + } + List leftSiblings; + List rightSiblings; + if (left instanceof JsonTablePlanCross leftPlan) { + leftSiblings = leftPlan.siblings(); + rightSiblings = ((JsonTablePlanCross) right).siblings(); + } + else { + leftSiblings = ((JsonTablePlanUnion) left).siblings(); + rightSiblings = ((JsonTablePlanUnion) right).siblings(); + } + if (leftSiblings.size() != rightSiblings.size()) { + return false; + } + for (int i = 0; i < leftSiblings.size(); i++) { + if (!compare(leftSiblings.get(i), rightSiblings.get(i))) { + return false; + } + } + return true; + } + + private static boolean compareColumns(List leftColumns, List rightColumns) + { + if (leftColumns.size() != rightColumns.size()) { + return false; + } + for (int i = 0; i < leftColumns.size(); i++) { + if (!compareColumn(leftColumns.get(i), rightColumns.get(i))) { + return false; + } + } + return true; + } + + private static boolean compareColumn(JsonTableColumn left, JsonTableColumn right) + { + if (left.getClass() != right.getClass()) { + return false; + } + if (left instanceof JsonTableOrdinalityColumn leftColumn) { + return leftColumn.outputIndex() == ((JsonTableOrdinalityColumn) right).outputIndex(); + } + if (left instanceof JsonTableQueryColumn leftColumn) { + JsonTableQueryColumn rightColumn = (JsonTableQueryColumn) right; + return leftColumn.outputIndex() == rightColumn.outputIndex() && + leftColumn.function().equals(rightColumn.function()) && + leftColumn.path().equals(rightColumn.path()) && + leftColumn.wrapperBehavior() == rightColumn.wrapperBehavior() && + leftColumn.emptyBehavior() == rightColumn.emptyBehavior() && + leftColumn.errorBehavior() == rightColumn.errorBehavior(); + } + JsonTableValueColumn leftColumn = (JsonTableValueColumn) left; + JsonTableValueColumn rightColumn = (JsonTableValueColumn) right; + return leftColumn.outputIndex() == rightColumn.outputIndex() && + leftColumn.function().equals(rightColumn.function()) && + leftColumn.path().equals(rightColumn.path()) && + leftColumn.emptyBehavior() == rightColumn.emptyBehavior() && + leftColumn.emptyDefaultInput() == rightColumn.emptyDefaultInput() && + leftColumn.errorBehavior() == rightColumn.errorBehavior() && + leftColumn.errorDefaultInput() == rightColumn.errorDefaultInput(); + } +} diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/TestAddDynamicFilterSource.java b/core/trino-main/src/test/java/io/trino/sql/planner/TestAddDynamicFilterSource.java index 77335e39a2141..b0ffa8505aada 100644 --- a/core/trino-main/src/test/java/io/trino/sql/planner/TestAddDynamicFilterSource.java +++ b/core/trino-main/src/test/java/io/trino/sql/planner/TestAddDynamicFilterSource.java @@ -225,24 +225,24 @@ public void testCrossJoinInequality() filter("O_ORDERKEY BETWEEN L_ORDERKEY AND L_PARTKEY", join(INNER, builder -> builder .dynamicFilter(ImmutableList.of( - new DynamicFilterPattern("O_ORDERKEY", GREATER_THAN_OR_EQUAL, "L_ORDERKEY"), - new DynamicFilterPattern("O_ORDERKEY", LESS_THAN_OR_EQUAL, "L_PARTKEY"))) + new DynamicFilterPattern("L_ORDERKEY", LESS_THAN_OR_EQUAL, "O_ORDERKEY"), + new DynamicFilterPattern("L_PARTKEY", GREATER_THAN_OR_EQUAL, "O_ORDERKEY"))) .left( filter( TRUE_LITERAL, - tableScan("orders", ImmutableMap.of("O_ORDERKEY", "orderkey")))) + tableScan("lineitem", ImmutableMap.of("L_ORDERKEY", "orderkey", "L_PARTKEY", "partkey")))) .right( exchange( LOCAL, exchange( REMOTE, - node( - DynamicFilterSourceNode.class, - tableScan("lineitem", ImmutableMap.of("L_ORDERKEY", "orderkey", "L_PARTKEY", "partkey")))))))))); + node(DynamicFilterSourceNode.class, + tableScan("orders", ImmutableMap.of("O_ORDERKEY", "orderkey")))))))))); // TODO: Add support for dynamic filters in the below case assertDistributedPlan( "SELECT o.orderkey FROM orders o, lineitem l WHERE o.orderkey >= l.orderkey AND o.orderkey <= l.partkey - 1", + withJoinDistributionType(PARTITIONED), anyTree( filter("O_ORDERKEY >= L_ORDERKEY AND O_ORDERKEY <= expr", join(INNER, builder -> builder diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/TestConnectorExpressionTranslator.java b/core/trino-main/src/test/java/io/trino/sql/planner/TestConnectorExpressionTranslator.java index fa755e6a50ca5..278a67a4fb85b 100644 --- a/core/trino-main/src/test/java/io/trino/sql/planner/TestConnectorExpressionTranslator.java +++ b/core/trino-main/src/test/java/io/trino/sql/planner/TestConnectorExpressionTranslator.java @@ -86,8 +86,9 @@ import static io.trino.sql.planner.ConnectorExpressionTranslator.translate; import static io.trino.sql.planner.TestingPlannerContext.PLANNER_CONTEXT; import static io.trino.sql.planner.TypeAnalyzer.createTestingTypeAnalyzer; -import static io.trino.transaction.TransactionBuilder.transaction; +import static io.trino.testing.TransactionBuilder.transaction; import static io.trino.type.JoniRegexpType.JONI_REGEXP; +import static io.trino.type.JsonPathType.JSON_PATH; import static io.trino.type.LikeFunctions.likePattern; import static io.trino.type.LikePatternType.LIKE_PATTERN; import static java.nio.charset.StandardCharsets.UTF_8; @@ -455,6 +456,24 @@ public void testTranslateRegularExpression() }); } + @Test + void testTranslateJsonPath() + { + // JSON path type is considered implementation detail of the engine and is not exposed to connectors + // within ConnectorExpression. Instead, it is replaced with a varchar pattern. + assertTranslationRoundTrips( + BuiltinFunctionCallBuilder.resolve(PLANNER_CONTEXT.getMetadata()) + .setName("json_extract_scalar") + .addArgument(VARCHAR_TYPE, new SymbolReference("varchar_symbol_1")) + .addArgument(JSON_PATH, new Cast(new StringLiteral("$.path"), toSqlType(JSON_PATH))) + .build(), + new Call( + VARCHAR_TYPE, + new FunctionName("json_extract_scalar"), + List.of(new Variable("varchar_symbol_1", VARCHAR_TYPE), + new Constant(utf8Slice("$.path"), createVarcharType(6))))); + } + @Test public void testTranslateIn() { diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/TestDeleteAndInsertMergeProcessor.java b/core/trino-main/src/test/java/io/trino/sql/planner/TestDeleteAndInsertMergeProcessor.java index 3a144d52b8b74..ad0ac8eed07db 100644 --- a/core/trino-main/src/test/java/io/trino/sql/planner/TestDeleteAndInsertMergeProcessor.java +++ b/core/trino-main/src/test/java/io/trino/sql/planner/TestDeleteAndInsertMergeProcessor.java @@ -104,10 +104,10 @@ public void testUpdateAndDeletedMerge() Page inputPage = makePageFromBlocks( 5, Optional.of(rowIdNulls), - new Block[]{ - new LongArrayBlock(5, Optional.of(rowIdNulls), new long[]{2, 0, 1, 2, 2}), // TransactionId - new LongArrayBlock(5, Optional.of(rowIdNulls), new long[]{0, 0, 3, 1, 2}), // rowId - new IntArrayBlock(5, Optional.of(rowIdNulls), new int[]{536870912, 0, 536870912, 536870912, 536870912})}, // bucket + new Block[] { + new LongArrayBlock(5, Optional.of(rowIdNulls), new long[] {2, 0, 1, 2, 2}), // TransactionId + new LongArrayBlock(5, Optional.of(rowIdNulls), new long[] {0, 0, 3, 1, 2}), // rowId + new IntArrayBlock(5, Optional.of(rowIdNulls), new int[] {536870912, 0, 536870912, 536870912, 536870912})}, // bucket new Block[] { // customer makeVarcharArrayBlock("Aaron", "Carol", "Dave", "Dave", "Ed"), @@ -145,9 +145,9 @@ public void testAnotherMergeCase() 5, Optional.of(rowIdNulls), new Block[] { - new LongArrayBlock(5, Optional.of(rowIdNulls), new long[]{2, 0, 1, 2, 2}), // TransactionId - new LongArrayBlock(5, Optional.of(rowIdNulls), new long[]{0, 0, 3, 1, 2}), // rowId - new IntArrayBlock(5, Optional.of(rowIdNulls), new int[]{536870912, 0, 536870912, 536870912, 536870912})}, // bucket + new LongArrayBlock(5, Optional.of(rowIdNulls), new long[] {2, 0, 1, 2, 2}), // TransactionId + new LongArrayBlock(5, Optional.of(rowIdNulls), new long[] {0, 0, 3, 1, 2}), // rowId + new IntArrayBlock(5, Optional.of(rowIdNulls), new int[] {536870912, 0, 536870912, 536870912, 536870912})}, // bucket new Block[] { // customer makeVarcharArrayBlock("Aaron", "Carol", "Dave", "Dave", "Ed"), diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/TestDomainTranslator.java b/core/trino-main/src/test/java/io/trino/sql/planner/TestDomainTranslator.java index 4bad8335dab96..0259b1d52ff71 100644 --- a/core/trino-main/src/test/java/io/trino/sql/planner/TestDomainTranslator.java +++ b/core/trino-main/src/test/java/io/trino/sql/planner/TestDomainTranslator.java @@ -98,7 +98,7 @@ import static io.trino.sql.tree.ComparisonExpression.Operator.LESS_THAN_OR_EQUAL; import static io.trino.sql.tree.ComparisonExpression.Operator.NOT_EQUAL; import static io.trino.testing.TestingConnectorSession.SESSION; -import static io.trino.transaction.TransactionBuilder.transaction; +import static io.trino.testing.TransactionBuilder.transaction; import static io.trino.type.ColorType.COLOR; import static io.trino.type.LikeFunctions.LIKE_FUNCTION_NAME; import static io.trino.type.LikeFunctions.LIKE_PATTERN_FUNCTION_NAME; diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/TestEffectivePredicateExtractor.java b/core/trino-main/src/test/java/io/trino/sql/planner/TestEffectivePredicateExtractor.java index 32a3e4bc03213..1244478cc0ed8 100644 --- a/core/trino-main/src/test/java/io/trino/sql/planner/TestEffectivePredicateExtractor.java +++ b/core/trino-main/src/test/java/io/trino/sql/planner/TestEffectivePredicateExtractor.java @@ -76,8 +76,10 @@ import io.trino.testing.TestingSession; import io.trino.testing.TestingTransactionHandle; import io.trino.transaction.TestingTransactionManager; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.parallel.Execution; import java.util.Arrays; import java.util.Collection; @@ -111,12 +113,15 @@ import static io.trino.sql.tree.BooleanLiteral.TRUE_LITERAL; import static io.trino.sql.tree.ComparisonExpression.Operator.EQUAL; import static io.trino.testing.TestingHandles.TEST_CATALOG_HANDLE; +import static io.trino.testing.TransactionBuilder.transaction; import static io.trino.tests.BogusType.BOGUS; -import static io.trino.transaction.TransactionBuilder.transaction; import static io.trino.type.UnknownType.UNKNOWN; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_METHOD; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; -@Test(singleThreaded = true) +@TestInstance(PER_METHOD) +@Execution(SAME_THREAD) public class TestEffectivePredicateExtractor { private static final Symbol A = new Symbol("a"); @@ -176,7 +181,7 @@ public TableProperties getTableProperties(Session session, TableHandle handle) private TableScanNode baseTableScan; private ExpressionIdentityNormalizer expressionNormalizer; - @BeforeMethod + @BeforeEach public void setUp() { scanAssignments = ImmutableMap.builder() diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/TestEqualityInference.java b/core/trino-main/src/test/java/io/trino/sql/planner/TestEqualityInference.java index b3174c7a48a7b..2c03203834d54 100644 --- a/core/trino-main/src/test/java/io/trino/sql/planner/TestEqualityInference.java +++ b/core/trino-main/src/test/java/io/trino/sql/planner/TestEqualityInference.java @@ -65,6 +65,29 @@ public class TestEqualityInference private final TestingFunctionResolution functionResolution = new TestingFunctionResolution(); private final Metadata metadata = functionResolution.getMetadata(); + @Test + public void testDoesNotInferRedundantStraddlingPredicates() + { + EqualityInference inference = new EqualityInference( + metadata, + equals("a1", "b1"), + equals(add(nameReference("a1"), number(1)), number(0)), + equals(nameReference("a2"), add(nameReference("a1"), number(2))), + equals(nameReference("a1"), add("a3", "b3")), + equals(nameReference("b2"), add("a4", "b4"))); + EqualityInference.EqualityPartition partition = inference.generateEqualitiesPartitionedBy(symbols("a1", "a2", "a3", "a4")); + assertThat(partition.getScopeEqualities()).containsExactly( + equals(number(0), add(nameReference("a1"), number(1))), + equals(nameReference("a2"), add(nameReference("a1"), number(2)))); + assertThat(partition.getScopeComplementEqualities()).containsExactly( + equals(number(0), add(nameReference("b1"), number(1)))); + // there shouldn't be equality a2 = b1 + 1 as it can be derived from a2 = a1 + 1, a1 = b1 + assertThat(partition.getScopeStraddlingEqualities()).containsExactly( + equals("a1", "b1"), + equals(nameReference("a1"), add("a3", "b3")), + equals(nameReference("b2"), add("a4", "b4"))); + } + @Test public void testTransitivity() { diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/TestJsonTable.java b/core/trino-main/src/test/java/io/trino/sql/planner/TestJsonTable.java new file mode 100644 index 0000000000000..4631acea7921b --- /dev/null +++ b/core/trino-main/src/test/java/io/trino/sql/planner/TestJsonTable.java @@ -0,0 +1,549 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.sql.planner; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import io.trino.execution.warnings.WarningCollector; +import io.trino.json.ir.IrJsonPath; +import io.trino.metadata.ResolvedFunction; +import io.trino.metadata.TestingFunctionResolution; +import io.trino.operator.table.json.JsonTable; +import io.trino.operator.table.json.JsonTablePlanCross; +import io.trino.operator.table.json.JsonTablePlanLeaf; +import io.trino.operator.table.json.JsonTablePlanNode; +import io.trino.operator.table.json.JsonTablePlanSingle; +import io.trino.operator.table.json.JsonTablePlanUnion; +import io.trino.operator.table.json.JsonTableQueryColumn; +import io.trino.operator.table.json.JsonTableValueColumn; +import io.trino.sql.planner.assertions.BasePlanTest; +import io.trino.sql.planner.optimizations.PlanNodeSearcher; +import io.trino.sql.planner.plan.TableFunctionNode; +import io.trino.sql.tree.JsonQuery; +import io.trino.sql.tree.JsonValue; +import org.intellij.lang.annotations.Language; +import org.junit.jupiter.api.Test; + +import static com.google.common.collect.Iterables.getOnlyElement; +import static io.trino.execution.querystats.PlanOptimizersStatsCollector.createPlanOptimizersStatsCollector; +import static io.trino.operator.scalar.json.JsonQueryFunction.JSON_QUERY_FUNCTION_NAME; +import static io.trino.operator.scalar.json.JsonValueFunction.JSON_VALUE_FUNCTION_NAME; +import static io.trino.spi.type.BigintType.BIGINT; +import static io.trino.spi.type.BooleanType.BOOLEAN; +import static io.trino.spi.type.TinyintType.TINYINT; +import static io.trino.sql.analyzer.ExpressionAnalyzer.JSON_NO_PARAMETERS_ROW_TYPE; +import static io.trino.sql.analyzer.TypeSignatureProvider.fromTypes; +import static io.trino.sql.planner.JsonTablePlanComparator.planComparator; +import static io.trino.sql.planner.LogicalPlanner.Stage.CREATED; +import static io.trino.sql.planner.PathNodes.contextVariable; +import static io.trino.sql.planner.PathNodes.literal; +import static io.trino.sql.planner.PathNodes.memberAccessor; +import static io.trino.sql.planner.assertions.PlanMatchPattern.anyTree; +import static io.trino.sql.planner.assertions.PlanMatchPattern.expression; +import static io.trino.sql.planner.assertions.PlanMatchPattern.project; +import static io.trino.sql.planner.assertions.PlanMatchPattern.strictOutput; +import static io.trino.sql.planner.assertions.PlanMatchPattern.tableFunction; +import static io.trino.sql.planner.assertions.PlanMatchPattern.values; +import static io.trino.sql.planner.assertions.TableFunctionMatcher.TableArgumentValue.Builder.tableArgument; +import static io.trino.type.Json2016Type.JSON_2016; +import static io.trino.type.TestJsonPath2016TypeSerialization.JSON_PATH_2016; +import static org.assertj.core.api.Assertions.assertThat; + +public class TestJsonTable + extends BasePlanTest +{ + private static final ResolvedFunction JSON_VALUE_FUNCTION = new TestingFunctionResolution().resolveFunction( + JSON_VALUE_FUNCTION_NAME, + fromTypes(JSON_2016, JSON_PATH_2016, JSON_NO_PARAMETERS_ROW_TYPE, TINYINT, BIGINT, TINYINT, BIGINT)); + + private static final ResolvedFunction JSON_QUERY_FUNCTION = new TestingFunctionResolution().resolveFunction( + JSON_QUERY_FUNCTION_NAME, + fromTypes(JSON_2016, JSON_PATH_2016, JSON_NO_PARAMETERS_ROW_TYPE, TINYINT, TINYINT, TINYINT)); + + @Test + public void testJsonTableInitialPlan() + { + assertPlan( + """ + SELECT * + FROM (SELECT '[1, 2, 3]', 4) t(json_col, int_col), JSON_TABLE( + json_col, + 'lax $' AS root_path PASSING int_col AS id, '[ala]' FORMAT JSON AS name + COLUMNS( + bigint_col BIGINT DEFAULT 5 ON EMPTY DEFAULT int_col ON ERROR, + varchar_col VARCHAR FORMAT JSON ERROR ON ERROR) + EMPTY ON ERROR) + """, + CREATED, + strictOutput(// left-side columns first, json_table columns next + ImmutableList.of("json_col", "int_col", "bigint_col", "formatted_varchar_col"), + anyTree( + project( + ImmutableMap.of("formatted_varchar_col", expression("\"$json_to_varchar\"(varchar_col, tinyint '1', false)")), + tableFunction(builder -> builder + .name("$json_table") + .addTableArgument( + "$input", + tableArgument(0) + .rowSemantics() + .passThroughColumns() + .passThroughSymbols(ImmutableSet.of("json_col", "int_col"))) + .properOutputs(ImmutableList.of("bigint_col", "varchar_col")), + project( + ImmutableMap.of( + "context_item", expression("\"$varchar_to_json\"(json_col_coerced, false)"), // apply input function to context item + "parameters_row", expression("CAST(ROW (int_col, \"$varchar_to_json\"(name_coerced, false)) AS ROW(ID integer, NAME json2016))")), // apply input function to formatted path parameter and gather path parameters in a row + project(// coerce context item, path parameters and default expressions + ImmutableMap.of( + "name_coerced", expression("CAST(name AS VARCHAR)"), // cast formatted path parameter to VARCHAR for the input function + "default_value_coerced", expression("CAST(default_value AS BIGINT)"), // cast default value to BIGINT to match declared return type for the column + "json_col_coerced", expression("CAST(json_col AS VARCHAR)"), // cast context item to VARCHAR for the input function + "int_col_coerced", expression("CAST(int_col AS BIGINT)")), // cast default value to BIGINT to match declared return type for the column + project(// pre-project context item, path parameters and default expressions + ImmutableMap.of( + "name", expression("'[ala]'"), + "default_value", expression("5")), + anyTree( + project( + ImmutableMap.of( + "json_col", expression("'[1, 2, 3]'"), + "int_col", expression("4")), + values(1))))))))))); + } + + @Test + public void testImplicitColumnPath() + { + assertJsonTablePlan( + """ + SELECT * + FROM (SELECT 1, 2, 3), JSON_TABLE( + '[1, 2, 3]', + 'lax $' AS root_path + COLUMNS( + first_col BIGINT, + "Second_Col" BIGINT, + "_""_'_?_" BIGINT)) + """, + new JsonTablePlanLeaf( + new IrJsonPath(true, contextVariable()), + ImmutableList.of( + valueColumn(0, new IrJsonPath(true, memberAccessor(contextVariable(), "FIRST_COL"))), + valueColumn(1, new IrJsonPath(true, memberAccessor(contextVariable(), "Second_Col"))), + valueColumn(2, new IrJsonPath(true, memberAccessor(contextVariable(), "_\"_'_?_")))))); + } + + @Test + public void testExplicitColumnPath() + { + assertJsonTablePlan( + """ + SELECT * + FROM (SELECT 1, 2, 3), JSON_TABLE( + '[1, 2, 3]', + 'lax $' AS root_path + COLUMNS( + first_col BIGINT PATH 'lax $.a', + "Second_Col" BIGINT PATH 'lax $.B', + "_""_'_?_" BIGINT PATH 'lax false')) + """, + new JsonTablePlanLeaf( + new IrJsonPath(true, contextVariable()), + ImmutableList.of( + valueColumn(0, new IrJsonPath(true, memberAccessor(contextVariable(), "a"))), + valueColumn(1, new IrJsonPath(true, memberAccessor(contextVariable(), "B"))), + valueColumn(2, new IrJsonPath(true, literal(BOOLEAN, false)))))); + } + + @Test + public void testColumnOutputIndex() + { + // output indexes follow the declaration order: [a, b, c, d] + assertJsonTablePlan( + """ + SELECT * + FROM (SELECT 1, 2, 3), JSON_TABLE( + '[1, 2, 3]', + 'lax $' AS root_path + COLUMNS( + a BIGINT, + NESTED PATH 'lax $.x' COLUMNS( + b BIGINT, + NESTED PATH 'lax $.y' COLUMNS( + c BIGINT)), + d BIGINT)) + """, + new JsonTablePlanSingle( + new IrJsonPath(true, contextVariable()), + ImmutableList.of( + valueColumn(0, new IrJsonPath(true, memberAccessor(contextVariable(), "A"))), + valueColumn(3, new IrJsonPath(true, memberAccessor(contextVariable(), "D")))), + true, + new JsonTablePlanSingle( + new IrJsonPath(true, memberAccessor(contextVariable(), "x")), + ImmutableList.of(valueColumn(1, new IrJsonPath(true, memberAccessor(contextVariable(), "B")))), + true, + new JsonTablePlanLeaf( + new IrJsonPath(true, memberAccessor(contextVariable(), "y")), + ImmutableList.of(valueColumn(2, new IrJsonPath(true, memberAccessor(contextVariable(), "C")))))))); + } + + @Test + public void testColumnBehavior() + { + assertJsonTablePlan( + """ + SELECT * + FROM (SELECT 1, 2, 3), JSON_TABLE( + '[1, 2, 3]', + 'lax $' AS root_path + COLUMNS( + a BIGINT, + b BIGINT NULL ON EMPTY ERROR ON ERROR, + c BIGINT DEFAULT 1 ON EMPTY DEFAULT 2 ON ERROR, + d VARCHAR FORMAT JSON, + e VARCHAR FORMAT JSON WITH CONDITIONAL ARRAY WRAPPER NULL ON EMPTY ERROR ON ERROR, + f VARCHAR FORMAT JSON OMIT QUOTES EMPTY ARRAY ON EMPTY EMPTY OBJECT ON ERROR)) + """, + new JsonTablePlanLeaf( + new IrJsonPath(true, contextVariable()), + ImmutableList.of( + valueColumn( + 0, + new IrJsonPath(true, memberAccessor(contextVariable(), "A")), + JsonValue.EmptyOrErrorBehavior.NULL, + -1, + JsonValue.EmptyOrErrorBehavior.NULL, + -1), + valueColumn( + 1, + new IrJsonPath(true, memberAccessor(contextVariable(), "B")), + JsonValue.EmptyOrErrorBehavior.NULL, + -1, + JsonValue.EmptyOrErrorBehavior.ERROR, + -1), + valueColumn( + 2, + new IrJsonPath(true, memberAccessor(contextVariable(), "C")), + JsonValue.EmptyOrErrorBehavior.DEFAULT, + 2, + JsonValue.EmptyOrErrorBehavior.DEFAULT, + 3), + queryColumn( + 3, + new IrJsonPath(true, memberAccessor(contextVariable(), "D")), + JsonQuery.ArrayWrapperBehavior.WITHOUT, + JsonQuery.EmptyOrErrorBehavior.NULL, + JsonQuery.EmptyOrErrorBehavior.NULL), + queryColumn( + 4, + new IrJsonPath(true, memberAccessor(contextVariable(), "E")), + JsonQuery.ArrayWrapperBehavior.CONDITIONAL, + JsonQuery.EmptyOrErrorBehavior.NULL, + JsonQuery.EmptyOrErrorBehavior.ERROR), + queryColumn( + 5, + new IrJsonPath(true, memberAccessor(contextVariable(), "F")), + JsonQuery.ArrayWrapperBehavior.WITHOUT, + JsonQuery.EmptyOrErrorBehavior.EMPTY_ARRAY, + JsonQuery.EmptyOrErrorBehavior.EMPTY_OBJECT)))); + } + + @Test + public void testInheritedErrorBehavior() + { + // the column has no explicit error behavior, and json_table has no explicit error behavior. The default behavior for column is NULL ON ERROR. + assertJsonTablePlan( + """ + SELECT * + FROM (SELECT 1, 2, 3), JSON_TABLE( + '[1, 2, 3]', + 'lax $' AS root_path + COLUMNS( + a BIGINT)) + """, + new JsonTablePlanLeaf( + new IrJsonPath(true, contextVariable()), + ImmutableList.of( + valueColumn( + 0, + new IrJsonPath(true, memberAccessor(contextVariable(), "A")), + JsonValue.EmptyOrErrorBehavior.NULL, + -1, + JsonValue.EmptyOrErrorBehavior.NULL, + -1)))); + + // the column has no explicit error behavior, and json_table has explicit ERROR ON ERROR. The default behavior for column is ERROR ON ERROR. + assertJsonTablePlan( + """ + SELECT * + FROM (SELECT 1, 2, 3), JSON_TABLE( + '[1, 2, 3]', + 'lax $' AS root_path + COLUMNS( + a BIGINT) + ERROR ON ERROR) + """, + new JsonTablePlanLeaf( + new IrJsonPath(true, contextVariable()), + ImmutableList.of( + valueColumn( + 0, + new IrJsonPath(true, memberAccessor(contextVariable(), "A")), + JsonValue.EmptyOrErrorBehavior.NULL, + -1, + JsonValue.EmptyOrErrorBehavior.ERROR, + -1)))); + + // the column has no explicit error behavior, and json_table has explicit EMPTY ON ERROR. The default behavior for column is NULL ON ERROR. + assertJsonTablePlan( + """ + SELECT * + FROM (SELECT 1, 2, 3), JSON_TABLE( + '[1, 2, 3]', + 'lax $' AS root_path + COLUMNS( + a BIGINT) + EMPTY ON ERROR) + """, + new JsonTablePlanLeaf( + new IrJsonPath(true, contextVariable()), + ImmutableList.of( + valueColumn( + 0, + new IrJsonPath(true, memberAccessor(contextVariable(), "A")), + JsonValue.EmptyOrErrorBehavior.NULL, + -1, + JsonValue.EmptyOrErrorBehavior.NULL, + -1)))); + + // the column has explicit NULL ON ERROR behavior, and json_table has no explicit ERROR ON ERROR. The behavior for column is the one explicitly specified. + assertJsonTablePlan( + """ + SELECT * + FROM (SELECT 1, 2, 3), JSON_TABLE( + '[1, 2, 3]', + 'lax $' AS root_path + COLUMNS( + a BIGINT NULL ON ERROR) + ERROR ON ERROR) + """, + new JsonTablePlanLeaf( + new IrJsonPath(true, contextVariable()), + ImmutableList.of( + valueColumn( + 0, + new IrJsonPath(true, memberAccessor(contextVariable(), "A")), + JsonValue.EmptyOrErrorBehavior.NULL, + -1, + JsonValue.EmptyOrErrorBehavior.NULL, + -1)))); + } + + @Test + public void testImplicitDefaultPlan() + { + // implicit plan settings are OUTER, UNION + assertJsonTablePlan( + """ + SELECT * + FROM (SELECT 1, 2, 3), JSON_TABLE( + '[1, 2, 3]', + 'lax $' AS root_path + COLUMNS( + NESTED PATH 'lax $.a' COLUMNS(col_1 BIGINT), + NESTED PATH 'lax $.b' COLUMNS( + NESTED PATH 'lax $.c' COLUMNS(col_2 BIGINT), + NESTED PATH 'lax $.d' COLUMNS(col_3 BIGINT)), + NESTED PATH 'lax $.e' COLUMNS(col_4 BIGINT))) + """, + new JsonTablePlanSingle( + new IrJsonPath(true, contextVariable()), + ImmutableList.of(), + true, + new JsonTablePlanUnion(ImmutableList.of( + new JsonTablePlanLeaf( + new IrJsonPath(true, memberAccessor(contextVariable(), "a")), + ImmutableList.of(valueColumn(0, new IrJsonPath(true, memberAccessor(contextVariable(), "COL_1"))))), + new JsonTablePlanSingle( + new IrJsonPath(true, memberAccessor(contextVariable(), "b")), + ImmutableList.of(), + true, + new JsonTablePlanUnion(ImmutableList.of( + new JsonTablePlanLeaf( + new IrJsonPath(true, memberAccessor(contextVariable(), "c")), + ImmutableList.of(valueColumn(1, new IrJsonPath(true, memberAccessor(contextVariable(), "COL_2"))))), + new JsonTablePlanLeaf( + new IrJsonPath(true, memberAccessor(contextVariable(), "d")), + ImmutableList.of(valueColumn(2, new IrJsonPath(true, memberAccessor(contextVariable(), "COL_3")))))))), + new JsonTablePlanLeaf( + new IrJsonPath(true, memberAccessor(contextVariable(), "e")), + ImmutableList.of(valueColumn(3, new IrJsonPath(true, memberAccessor(contextVariable(), "COL_4"))))))))); + } + + @Test + public void testExplicitDefaultPlan() + { + assertJsonTablePlan( + """ + SELECT * + FROM (SELECT 1, 2, 3), JSON_TABLE( + '[1, 2, 3]', + 'lax $' AS root_path + COLUMNS( + NESTED PATH 'lax $.a' AS a COLUMNS(col_1 BIGINT), + NESTED PATH 'lax $.b' AS b COLUMNS( + NESTED PATH 'lax $.c' AS c COLUMNS(col_2 BIGINT), + NESTED PATH 'lax $.d' AS d COLUMNS(col_3 BIGINT)), + NESTED PATH 'lax $.e' AS e COLUMNS(col_4 BIGINT)) + PLAN DEFAULT (INNER, CROSS)) + """, + new JsonTablePlanSingle( + new IrJsonPath(true, contextVariable()), + ImmutableList.of(), + false, + new JsonTablePlanCross(ImmutableList.of( + new JsonTablePlanLeaf( + new IrJsonPath(true, memberAccessor(contextVariable(), "a")), + ImmutableList.of(valueColumn(0, new IrJsonPath(true, memberAccessor(contextVariable(), "COL_1"))))), + new JsonTablePlanSingle( + new IrJsonPath(true, memberAccessor(contextVariable(), "b")), + ImmutableList.of(), + false, + new JsonTablePlanCross(ImmutableList.of( + new JsonTablePlanLeaf( + new IrJsonPath(true, memberAccessor(contextVariable(), "c")), + ImmutableList.of(valueColumn(1, new IrJsonPath(true, memberAccessor(contextVariable(), "COL_2"))))), + new JsonTablePlanLeaf( + new IrJsonPath(true, memberAccessor(contextVariable(), "d")), + ImmutableList.of(valueColumn(2, new IrJsonPath(true, memberAccessor(contextVariable(), "COL_3")))))))), + new JsonTablePlanLeaf( + new IrJsonPath(true, memberAccessor(contextVariable(), "e")), + ImmutableList.of(valueColumn(3, new IrJsonPath(true, memberAccessor(contextVariable(), "COL_4"))))))))); + + assertJsonTablePlan( + """ + SELECT * + FROM (SELECT 1, 2, 3), JSON_TABLE( + '[1, 2, 3]', + 'lax $' AS root_path + COLUMNS( + NESTED PATH 'lax $.a' AS a COLUMNS(col_1 BIGINT), + NESTED PATH 'lax $.b' AS b COLUMNS( + NESTED PATH 'lax $.c' AS c COLUMNS(col_2 BIGINT), + NESTED PATH 'lax $.d' AS d COLUMNS(col_3 BIGINT)), + NESTED PATH 'lax $.e' AS e COLUMNS(col_4 BIGINT)) + PLAN DEFAULT (CROSS)) + """, + new JsonTablePlanSingle( + new IrJsonPath(true, contextVariable()), + ImmutableList.of(), + true, + new JsonTablePlanCross(ImmutableList.of( + new JsonTablePlanLeaf( + new IrJsonPath(true, memberAccessor(contextVariable(), "a")), + ImmutableList.of(valueColumn(0, new IrJsonPath(true, memberAccessor(contextVariable(), "COL_1"))))), + new JsonTablePlanSingle( + new IrJsonPath(true, memberAccessor(contextVariable(), "b")), + ImmutableList.of(), + true, + new JsonTablePlanCross(ImmutableList.of( + new JsonTablePlanLeaf( + new IrJsonPath(true, memberAccessor(contextVariable(), "c")), + ImmutableList.of(valueColumn(1, new IrJsonPath(true, memberAccessor(contextVariable(), "COL_2"))))), + new JsonTablePlanLeaf( + new IrJsonPath(true, memberAccessor(contextVariable(), "d")), + ImmutableList.of(valueColumn(2, new IrJsonPath(true, memberAccessor(contextVariable(), "COL_3")))))))), + new JsonTablePlanLeaf( + new IrJsonPath(true, memberAccessor(contextVariable(), "e")), + ImmutableList.of(valueColumn(3, new IrJsonPath(true, memberAccessor(contextVariable(), "COL_4"))))))))); + } + + @Test + public void testSpecificPlan() + { + assertJsonTablePlan( + """ + SELECT * + FROM (SELECT 1, 2, 3), JSON_TABLE( + '[1, 2, 3]', + 'lax $' AS root_path + COLUMNS( + NESTED PATH 'lax $.a' AS a COLUMNS(col_1 BIGINT), + NESTED PATH 'lax $.b' AS b COLUMNS( + NESTED PATH 'lax $.c' AS c COLUMNS(col_2 BIGINT), + NESTED PATH 'lax $.d' AS d COLUMNS(col_3 BIGINT)), + NESTED PATH 'lax $.e' AS e COLUMNS(col_4 BIGINT)) + PLAN (ROOT_PATH INNER (((B OUTER (D CROSS C)) UNION E) CROSS A))) + """, + new JsonTablePlanSingle( + new IrJsonPath(true, contextVariable()), + ImmutableList.of(), + false, + new JsonTablePlanCross(ImmutableList.of( + new JsonTablePlanUnion(ImmutableList.of( + new JsonTablePlanSingle( + new IrJsonPath(true, memberAccessor(contextVariable(), "b")), + ImmutableList.of(), + true, + new JsonTablePlanCross(ImmutableList.of( + new JsonTablePlanLeaf( + new IrJsonPath(true, memberAccessor(contextVariable(), "d")), + ImmutableList.of(valueColumn(2, new IrJsonPath(true, memberAccessor(contextVariable(), "COL_3"))))), + new JsonTablePlanLeaf( + new IrJsonPath(true, memberAccessor(contextVariable(), "c")), + ImmutableList.of(valueColumn(1, new IrJsonPath(true, memberAccessor(contextVariable(), "COL_2")))))))), + new JsonTablePlanLeaf( + new IrJsonPath(true, memberAccessor(contextVariable(), "e")), + ImmutableList.of(valueColumn(3, new IrJsonPath(true, memberAccessor(contextVariable(), "COL_4"))))))), + new JsonTablePlanLeaf( + new IrJsonPath(true, memberAccessor(contextVariable(), "a")), + ImmutableList.of(valueColumn(0, new IrJsonPath(true, memberAccessor(contextVariable(), "COL_1"))))))))); + } + + private static JsonTableValueColumn valueColumn(int outputIndex, IrJsonPath path) + { + return valueColumn(outputIndex, path, JsonValue.EmptyOrErrorBehavior.NULL, -1, JsonValue.EmptyOrErrorBehavior.NULL, -1); + } + + private static JsonTableValueColumn valueColumn(int outputIndex, IrJsonPath path, JsonValue.EmptyOrErrorBehavior emptyBehavior, int emptyDefaultInput, JsonValue.EmptyOrErrorBehavior errorBehavior, int errorDefaultInput) + { + return new JsonTableValueColumn(outputIndex, JSON_VALUE_FUNCTION, path, emptyBehavior.ordinal(), emptyDefaultInput, errorBehavior.ordinal(), errorDefaultInput); + } + + private static JsonTableQueryColumn queryColumn(int outputIndex, IrJsonPath path, JsonQuery.ArrayWrapperBehavior wrapperBehavior, JsonQuery.EmptyOrErrorBehavior emptyBehavior, JsonQuery.EmptyOrErrorBehavior errorBehavior) + { + return new JsonTableQueryColumn(outputIndex, JSON_QUERY_FUNCTION, path, wrapperBehavior.ordinal(), emptyBehavior.ordinal(), errorBehavior.ordinal()); + } + + private void assertJsonTablePlan(@Language("SQL") String sql, JsonTablePlanNode expectedPlan) + { + try { + getQueryRunner().inTransaction(transactionSession -> { + Plan queryPlan = getQueryRunner().createPlan(transactionSession, sql, ImmutableList.of(), CREATED, WarningCollector.NOOP, createPlanOptimizersStatsCollector()); + TableFunctionNode tableFunctionNode = getOnlyElement(PlanNodeSearcher.searchFrom(queryPlan.getRoot()).where(TableFunctionNode.class::isInstance).findAll()); + JsonTablePlanNode actualPlan = ((JsonTable.JsonTableFunctionHandle) tableFunctionNode.getHandle().getFunctionHandle()).processingPlan(); + assertThat(actualPlan) + .usingComparator(planComparator()) + .isEqualTo(expectedPlan); + return null; + }); + } + catch (Throwable e) { + e.addSuppressed(new Exception("Query: " + sql)); + throw e; + } + } +} diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/TestLiteralEncoder.java b/core/trino-main/src/test/java/io/trino/sql/planner/TestLiteralEncoder.java index 05ce1f93e8173..ce290fac07f5d 100644 --- a/core/trino-main/src/test/java/io/trino/sql/planner/TestLiteralEncoder.java +++ b/core/trino-main/src/test/java/io/trino/sql/planner/TestLiteralEncoder.java @@ -72,7 +72,7 @@ import static io.trino.sql.ExpressionUtils.isEffectivelyLiteral; import static io.trino.sql.SqlFormatter.formatSql; import static io.trino.sql.planner.TestingPlannerContext.PLANNER_CONTEXT; -import static io.trino.transaction.TransactionBuilder.transaction; +import static io.trino.testing.TransactionBuilder.transaction; import static io.trino.type.CodePointsType.CODE_POINTS; import static io.trino.type.JoniRegexpType.JONI_REGEXP; import static io.trino.type.JsonPathType.JSON_PATH; diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/TestLogicalPlanner.java b/core/trino-main/src/test/java/io/trino/sql/planner/TestLogicalPlanner.java index bf20f8dc6edff..55f5099d848ec 100644 --- a/core/trino-main/src/test/java/io/trino/sql/planner/TestLogicalPlanner.java +++ b/core/trino-main/src/test/java/io/trino/sql/planner/TestLogicalPlanner.java @@ -397,24 +397,24 @@ public void testInnerInequalityJoinWithEquiJoinConjuncts() anyTree( anyNot(FilterNode.class, join(INNER, builder -> builder - .equiCriteria("O_SHIPPRIORITY", "L_LINENUMBER") + .equiCriteria("L_LINENUMBER", "O_SHIPPRIORITY") .filter("O_ORDERKEY < L_ORDERKEY") .dynamicFilter( ImmutableList.of( - new DynamicFilterPattern("O_SHIPPRIORITY", EQUAL, "L_LINENUMBER"), - new DynamicFilterPattern("O_ORDERKEY", LESS_THAN, "L_ORDERKEY"))) + new DynamicFilterPattern("L_LINENUMBER", EQUAL, "O_SHIPPRIORITY"), + new DynamicFilterPattern("L_ORDERKEY", GREATER_THAN, "O_ORDERKEY"))) .left( filter(TRUE_LITERAL, - tableScan("orders", + tableScan("lineitem", ImmutableMap.of( - "O_SHIPPRIORITY", "shippriority", - "O_ORDERKEY", "orderkey")))) + "L_LINENUMBER", "linenumber", + "L_ORDERKEY", "orderkey")))) .right( anyTree( - tableScan("lineitem", + tableScan("orders", ImmutableMap.of( - "L_LINENUMBER", "linenumber", - "L_ORDERKEY", "orderkey")))))))); + "O_SHIPPRIORITY", "shippriority", + "O_ORDERKEY", "orderkey")))))))); } @Test @@ -439,13 +439,13 @@ public void testJoin() assertPlan("SELECT o.orderkey FROM orders o, lineitem l WHERE l.orderkey = o.orderkey", anyTree( join(INNER, builder -> builder - .equiCriteria("ORDERS_OK", "LINEITEM_OK") + .equiCriteria("LINEITEM_OK", "ORDERS_OK") .left( anyTree( - tableScan("orders", ImmutableMap.of("ORDERS_OK", "orderkey")))) + tableScan("lineitem", ImmutableMap.of("LINEITEM_OK", "orderkey")))) .right( anyTree( - tableScan("lineitem", ImmutableMap.of("LINEITEM_OK", "orderkey"))))))); + tableScan("orders", ImmutableMap.of("ORDERS_OK", "orderkey"))))))); } @Test @@ -454,13 +454,13 @@ public void testJoinWithOrderBySameKey() assertPlan("SELECT o.orderkey FROM orders o, lineitem l WHERE l.orderkey = o.orderkey ORDER BY l.orderkey ASC, o.orderkey ASC", anyTree( join(INNER, builder -> builder - .equiCriteria("ORDERS_OK", "LINEITEM_OK") + .equiCriteria("LINEITEM_OK", "ORDERS_OK") .left( anyTree( - tableScan("orders", ImmutableMap.of("ORDERS_OK", "orderkey")))) + tableScan("lineitem", ImmutableMap.of("LINEITEM_OK", "orderkey")))) .right( anyTree( - tableScan("lineitem", ImmutableMap.of("LINEITEM_OK", "orderkey"))))))); + tableScan("orders", ImmutableMap.of("ORDERS_OK", "orderkey"))))))); } @Test @@ -902,19 +902,19 @@ public void testStreamingAggregationOverJoin() assertPlan("SELECT o.orderkey, count(*) FROM orders o, lineitem l WHERE o.orderkey=l.orderkey GROUP BY 1", anyTree( aggregation( - singleGroupingSet("o_orderkey"), + singleGroupingSet("l_orderkey"), ImmutableMap.of(Optional.empty(), functionCall("count", ImmutableList.of())), - ImmutableList.of("o_orderkey"), // streaming + ImmutableList.of("l_orderkey"), // streaming Optional.empty(), SINGLE, join(INNER, builder -> builder - .equiCriteria("o_orderkey", "l_orderkey") + .equiCriteria("l_orderkey", "o_orderkey") .left( anyTree( - tableScan("orders", ImmutableMap.of("o_orderkey", "orderkey")))) + tableScan("lineitem", ImmutableMap.of("l_orderkey", "orderkey")))) .right( anyTree( - tableScan("lineitem", ImmutableMap.of("l_orderkey", "orderkey")))))))); + tableScan("orders", ImmutableMap.of("o_orderkey", "orderkey")))))))); // left join -> streaming aggregation assertPlan("SELECT o.orderkey, count(*) FROM orders o LEFT JOIN lineitem l ON o.orderkey=l.orderkey GROUP BY 1", @@ -1324,7 +1324,9 @@ public void testUsesDistributedJoinIfNaturallyPartitionedOnProbeSymbols() // replicated join is preserved if probe side is single node assertPlanWithSession( "SELECT * FROM (VALUES 1, 2, 3) t(a), region r WHERE r.regionkey = t.a", - broadcastJoin, + Session.builder(broadcastJoin) + .setSystemProperty(JOIN_REORDERING_STRATEGY, JoinReorderingStrategy.NONE.name()) + .build(), false, anyTree( node(JoinNode.class, @@ -1433,18 +1435,18 @@ public void testFilteringSemiJoinRewriteToInnerJoin() "SELECT custkey FROM orders WHERE custkey IN (SELECT custkey FROM customer)", any( join(INNER, builder -> builder - .equiCriteria("CUSTOMER_CUSTKEY", "ORDER_CUSTKEY") + .equiCriteria("ORDER_CUSTKEY", "CUSTOMER_CUSTKEY") .left( + anyTree( + tableScan("orders", ImmutableMap.of("ORDER_CUSTKEY", "custkey")))) + .right( aggregation( singleGroupingSet("CUSTOMER_CUSTKEY"), ImmutableMap.of(), Optional.empty(), FINAL, anyTree( - tableScan("customer", ImmutableMap.of("CUSTOMER_CUSTKEY", "custkey"))))) - .right( - anyTree( - tableScan("orders", ImmutableMap.of("ORDER_CUSTKEY", "custkey"))))))); + tableScan("customer", ImmutableMap.of("CUSTOMER_CUSTKEY", "custkey")))))))); } @Test @@ -1691,14 +1693,14 @@ public void testLimitPushdownThroughUnionNesting() { assertPlan( """ - SELECT col FROM ( - SELECT nationkey FROM nation - UNION ALL - SELECT nationkey FROM nation - UNION ALL - SELECT nationkey FROM nation - ) AS t(col) - LIMIT 2""", + SELECT col FROM ( + SELECT nationkey FROM nation + UNION ALL + SELECT nationkey FROM nation + UNION ALL + SELECT nationkey FROM nation + ) AS t(col) + LIMIT 2""", output( limit( 2, @@ -1861,13 +1863,19 @@ public void testRemoveRedundantFilter() "ON orders.orderstatus = t2.s", any( join(INNER, builder -> builder - .equiCriteria("expr", "ORDER_STATUS") - .left(anyTree(values(ImmutableList.of("expr"), ImmutableList.of(ImmutableList.of(new StringLiteral("O")), ImmutableList.of(new StringLiteral("F")))))) + .equiCriteria("ORDER_STATUS", "expr") + .left( + filter(TRUE_LITERAL, + strictConstrainedTableScan( + "orders", + ImmutableMap.of("ORDER_STATUS", "orderstatus", "ORDER_KEY", "orderkey"), + ImmutableMap.of("orderstatus", multipleValues(createVarcharType(1), ImmutableList.of(utf8Slice("F"), utf8Slice("O"))))))) .right( - exchange(strictConstrainedTableScan( - "orders", - ImmutableMap.of("ORDER_STATUS", "orderstatus", "ORDER_KEY", "orderkey"), - ImmutableMap.of("orderstatus", multipleValues(createVarcharType(1), ImmutableList.of(utf8Slice("F"), utf8Slice("O")))))))))); + filter( + "expr IN ('F', 'O')", + values( + ImmutableList.of("expr"), + ImmutableList.of(ImmutableList.of(new StringLiteral("O")), ImmutableList.of(new StringLiteral("F"))))))))); } @Test @@ -1956,16 +1964,16 @@ public void testMergeProjectWithValues() "ON orders.orderstatus = t2.s", anyTree( join(INNER, builder -> builder - .equiCriteria("expr", "ORDER_STATUS") + .equiCriteria("ORDER_STATUS", "expr") .left( - filter("expr IN ('F', 'O')", - values(ImmutableList.of("expr"), ImmutableList.of(ImmutableList.of(new StringLiteral("O")), ImmutableList.of(new StringLiteral("F")))))) - .right( - exchange( + filter(TRUE_LITERAL, strictConstrainedTableScan( "orders", ImmutableMap.of("ORDER_STATUS", "orderstatus", "ORDER_KEY", "orderkey"), - ImmutableMap.of("orderstatus", multipleValues(createVarcharType(1), ImmutableList.of(utf8Slice("F"), utf8Slice("O")))))))))); + ImmutableMap.of("orderstatus", multipleValues(createVarcharType(1), ImmutableList.of(utf8Slice("F"), utf8Slice("O"))))))) + .right( + filter("expr IN ('F', 'O')", + values(ImmutableList.of("expr"), ImmutableList.of(ImmutableList.of(new StringLiteral("O")), ImmutableList.of(new StringLiteral("F"))))))))); // Constraint for the table is derived, based on constant values in the other branch of the join. // It is not accepted by the connector, and remains in form of a filter over TableScan. @@ -1976,18 +1984,18 @@ public void testMergeProjectWithValues() "ON orders.orderkey = t2.s", anyTree( join(INNER, builder -> builder - .equiCriteria("expr", "ORDER_KEY") + .equiCriteria("ORDER_KEY", "expr") .left( filter( - "expr IN (BIGINT '1', BIGINT '2')", - values(ImmutableList.of("expr"), ImmutableList.of(ImmutableList.of(new GenericLiteral("BIGINT", "1")), ImmutableList.of(new GenericLiteral("BIGINT", "2")))))) - .right( - anyTree(filter( "ORDER_KEY IN (BIGINT '1', BIGINT '2')", strictConstrainedTableScan( "orders", ImmutableMap.of("ORDER_STATUS", "orderstatus", "ORDER_KEY", "orderkey"), - ImmutableMap.of()))))))); + ImmutableMap.of()))) + .right( + filter( + "expr IN (BIGINT '1', BIGINT '2')", + values(ImmutableList.of("expr"), ImmutableList.of(ImmutableList.of(new GenericLiteral("BIGINT", "1")), ImmutableList.of(new GenericLiteral("BIGINT", "2"))))))))); } @Test diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/TestMaterializedViews.java b/core/trino-main/src/test/java/io/trino/sql/planner/TestMaterializedViews.java index 253be3ac8a244..0b5e5f98059a6 100644 --- a/core/trino-main/src/test/java/io/trino/sql/planner/TestMaterializedViews.java +++ b/core/trino-main/src/test/java/io/trino/sql/planner/TestMaterializedViews.java @@ -134,13 +134,13 @@ protected LocalQueryRunner createLocalQueryRunner() Optional.empty(), Identity.ofUser("some user"), ImmutableList.of(), - Optional.of(new CatalogSchemaTableName(TEST_CATALOG_NAME, SCHEMA, "storage_table")), - ImmutableMap.of()); + Optional.of(new CatalogSchemaTableName(TEST_CATALOG_NAME, SCHEMA, "storage_table"))); queryRunner.inTransaction(session -> { metadata.createMaterializedView( session, freshMaterializedView, materializedViewDefinition, + ImmutableMap.of(), false, false); return null; @@ -153,6 +153,7 @@ protected LocalQueryRunner createLocalQueryRunner() session, notFreshMaterializedView, materializedViewDefinition, + ImmutableMap.of(), false, false); return null; @@ -167,14 +168,14 @@ protected LocalQueryRunner createLocalQueryRunner() Optional.empty(), Identity.ofUser("some user"), ImmutableList.of(), - Optional.of(new CatalogSchemaTableName(TEST_CATALOG_NAME, SCHEMA, "storage_table_with_casts")), - ImmutableMap.of()); + Optional.of(new CatalogSchemaTableName(TEST_CATALOG_NAME, SCHEMA, "storage_table_with_casts"))); QualifiedObjectName materializedViewWithCasts = new QualifiedObjectName(TEST_CATALOG_NAME, SCHEMA, "materialized_view_with_casts"); queryRunner.inTransaction(session -> { metadata.createMaterializedView( session, materializedViewWithCasts, materializedViewDefinitionWithCasts, + ImmutableMap.of(), false, false); return null; @@ -186,6 +187,7 @@ protected LocalQueryRunner createLocalQueryRunner() session, new QualifiedObjectName(TEST_CATALOG_NAME, SCHEMA, "stale_materialized_view_with_casts"), materializedViewDefinitionWithCasts, + ImmutableMap.of(), false, false); return null; diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/TestPlanFragmentPartitionCount.java b/core/trino-main/src/test/java/io/trino/sql/planner/TestPlanFragmentPartitionCount.java index ddcae1df4180e..5aeffcb41522d 100644 --- a/core/trino-main/src/test/java/io/trino/sql/planner/TestPlanFragmentPartitionCount.java +++ b/core/trino-main/src/test/java/io/trino/sql/planner/TestPlanFragmentPartitionCount.java @@ -42,7 +42,7 @@ import static io.trino.sql.planner.plan.JoinNode.Type.INNER; import static io.trino.testing.TestingHandles.TEST_CATALOG_NAME; import static io.trino.testing.TestingSession.testSessionBuilder; -import static io.trino.transaction.TransactionBuilder.transaction; +import static io.trino.testing.TransactionBuilder.transaction; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/TestPredicatePushdown.java b/core/trino-main/src/test/java/io/trino/sql/planner/TestPredicatePushdown.java index 9ff43423109f0..934be3f14f777 100644 --- a/core/trino-main/src/test/java/io/trino/sql/planner/TestPredicatePushdown.java +++ b/core/trino-main/src/test/java/io/trino/sql/planner/TestPredicatePushdown.java @@ -155,7 +155,7 @@ public void testNonDeterministicPredicateDoesNotPropagateFromFilteringSideToSour semiJoin("LINE_ORDER_KEY", "ORDERS_ORDER_KEY", "SEMI_JOIN_RESULT", true, anyTree( tableScan("lineitem", ImmutableMap.of( - "LINE_ORDER_KEY", "orderkey"))), + "LINE_ORDER_KEY", "orderkey"))), node(ExchangeNode.class, filter("ORDERS_ORDER_KEY = CAST(random(5) AS bigint)", tableScan("orders", ImmutableMap.of("ORDERS_ORDER_KEY", "orderkey"))))))); @@ -168,15 +168,14 @@ public void testNonStraddlingJoinExpression() "SELECT * FROM orders JOIN lineitem ON orders.orderkey = lineitem.orderkey AND cast(lineitem.linenumber AS varchar) = '2'", anyTree( join(INNER, builder -> builder - .equiCriteria("ORDERS_OK", "LINEITEM_OK") + .equiCriteria("LINEITEM_OK", "ORDERS_OK") .left( - anyTree( - tableScan("orders", ImmutableMap.of("ORDERS_OK", "orderkey")))) + filter("cast(LINEITEM_LINENUMBER as varchar) = VARCHAR '2'", + tableScan("lineitem", ImmutableMap.of( + "LINEITEM_OK", "orderkey", + "LINEITEM_LINENUMBER", "linenumber")))) .right( anyTree( - filter("cast(LINEITEM_LINENUMBER as varchar) = VARCHAR '2'", - tableScan("lineitem", ImmutableMap.of( - "LINEITEM_OK", "orderkey", - "LINEITEM_LINENUMBER", "linenumber")))))))); + tableScan("orders", ImmutableMap.of("ORDERS_OK", "orderkey"))))))); } } diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/TestPredicatePushdownWithoutDynamicFilter.java b/core/trino-main/src/test/java/io/trino/sql/planner/TestPredicatePushdownWithoutDynamicFilter.java index b6ded5e0c49f0..1c4e627e47429 100644 --- a/core/trino-main/src/test/java/io/trino/sql/planner/TestPredicatePushdownWithoutDynamicFilter.java +++ b/core/trino-main/src/test/java/io/trino/sql/planner/TestPredicatePushdownWithoutDynamicFilter.java @@ -163,14 +163,14 @@ public void testNonStraddlingJoinExpression() "SELECT * FROM orders JOIN lineitem ON orders.orderkey = lineitem.orderkey AND cast(lineitem.linenumber AS varchar) = '2'", anyTree( join(INNER, builder -> builder - .equiCriteria("ORDERS_OK", "LINEITEM_OK") + .equiCriteria("LINEITEM_OK", "ORDERS_OK") .left( - tableScan("orders", ImmutableMap.of("ORDERS_OK", "orderkey"))) + filter("cast(LINEITEM_LINENUMBER as varchar) = VARCHAR '2'", + tableScan("lineitem", ImmutableMap.of( + "LINEITEM_OK", "orderkey", + "LINEITEM_LINENUMBER", "linenumber")))) .right( anyTree( - filter("cast(LINEITEM_LINENUMBER as varchar) = VARCHAR '2'", - tableScan("lineitem", ImmutableMap.of( - "LINEITEM_OK", "orderkey", - "LINEITEM_LINENUMBER", "linenumber")))))))); + tableScan("orders", ImmutableMap.of("ORDERS_OK", "orderkey"))))))); } } diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/TestQuantifiedComparison.java b/core/trino-main/src/test/java/io/trino/sql/planner/TestQuantifiedComparison.java index e85181388c774..6261d4ab2a7d5 100644 --- a/core/trino-main/src/test/java/io/trino/sql/planner/TestQuantifiedComparison.java +++ b/core/trino-main/src/test/java/io/trino/sql/planner/TestQuantifiedComparison.java @@ -38,9 +38,9 @@ public void testQuantifiedComparisonEqualsAny() String query = "SELECT orderkey, custkey FROM orders WHERE orderkey = ANY (VALUES ROW(CAST(5 as BIGINT)), ROW(CAST(3 as BIGINT)))"; assertPlan(query, anyTree( join(INNER, builder -> builder - .equiCriteria("Y", "X") - .left(anyTree(values(ImmutableMap.of("Y", 0)))) - .right(anyTree(tableScan("orders", ImmutableMap.of("X", "orderkey"))))))); + .equiCriteria("X", "Y") + .left(anyTree(tableScan("orders", ImmutableMap.of("X", "orderkey")))) + .right(anyTree(values(ImmutableMap.of("Y", 0))))))); } @Test diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/TestingPlannerContext.java b/core/trino-main/src/test/java/io/trino/sql/planner/TestingPlannerContext.java index a7e8f9bc4bded..f2e8d9647efd6 100644 --- a/core/trino-main/src/test/java/io/trino/sql/planner/TestingPlannerContext.java +++ b/core/trino-main/src/test/java/io/trino/sql/planner/TestingPlannerContext.java @@ -119,7 +119,10 @@ public PlannerContext build() types.forEach(typeRegistry::addType); parametricTypes.forEach(typeRegistry::addParametricType); - GlobalFunctionCatalog globalFunctionCatalog = new GlobalFunctionCatalog(); + GlobalFunctionCatalog globalFunctionCatalog = new GlobalFunctionCatalog( + () -> { throw new UnsupportedOperationException(); }, + () -> { throw new UnsupportedOperationException(); }, + () -> { throw new UnsupportedOperationException(); }); globalFunctionCatalog.addFunctions(SystemFunctionBundle.create(featuresConfig, typeOperators, new BlockTypeOperators(typeOperators), UNKNOWN)); functionBundles.forEach(globalFunctionCatalog::addFunctions); diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/assertions/BasePushdownPlanTest.java b/core/trino-main/src/test/java/io/trino/sql/planner/assertions/BasePushdownPlanTest.java index 325b4a5d26317..9ba223af6a669 100644 --- a/core/trino-main/src/test/java/io/trino/sql/planner/assertions/BasePushdownPlanTest.java +++ b/core/trino-main/src/test/java/io/trino/sql/planner/assertions/BasePushdownPlanTest.java @@ -29,7 +29,7 @@ public abstract class BasePushdownPlanTest { protected Optional getTableHandle(Session session, QualifiedObjectName objectName) { - return getQueryRunner().inTransaction(session, transactionSession -> { return getQueryRunner().getMetadata().getTableHandle(transactionSession, objectName); }); + return getQueryRunner().inTransaction(session, transactionSession -> getQueryRunner().getMetadata().getTableHandle(transactionSession, objectName)); } protected Map getColumnHandles(Session session, QualifiedObjectName tableName) diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/iterative/rule/TestCanonicalizeExpressionRewriter.java b/core/trino-main/src/test/java/io/trino/sql/planner/iterative/rule/TestCanonicalizeExpressionRewriter.java index cfa726c2bbad3..9680dec3d94a3 100644 --- a/core/trino-main/src/test/java/io/trino/sql/planner/iterative/rule/TestCanonicalizeExpressionRewriter.java +++ b/core/trino-main/src/test/java/io/trino/sql/planner/iterative/rule/TestCanonicalizeExpressionRewriter.java @@ -39,8 +39,8 @@ import static io.trino.sql.planner.TestingPlannerContext.plannerContextBuilder; import static io.trino.sql.planner.TypeAnalyzer.createTestingTypeAnalyzer; import static io.trino.sql.planner.iterative.rule.CanonicalizeExpressionRewriter.rewrite; +import static io.trino.testing.TransactionBuilder.transaction; import static io.trino.transaction.InMemoryTransactionManager.createTestTransactionManager; -import static io.trino.transaction.TransactionBuilder.transaction; public class TestCanonicalizeExpressionRewriter { diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/optimizations/TestExpressionEquivalence.java b/core/trino-main/src/test/java/io/trino/sql/planner/optimizations/TestExpressionEquivalence.java index cdc9179eab7b6..076705032a2e8 100644 --- a/core/trino-main/src/test/java/io/trino/sql/planner/optimizations/TestExpressionEquivalence.java +++ b/core/trino-main/src/test/java/io/trino/sql/planner/optimizations/TestExpressionEquivalence.java @@ -41,7 +41,7 @@ import static io.trino.sql.planner.SymbolsExtractor.extractUnique; import static io.trino.sql.planner.TestingPlannerContext.plannerContextBuilder; import static io.trino.sql.planner.TypeAnalyzer.createTestingTypeAnalyzer; -import static io.trino.transaction.TransactionBuilder.transaction; +import static io.trino.testing.TransactionBuilder.transaction; import static java.lang.String.format; import static java.util.function.Function.identity; import static java.util.stream.Collectors.toMap; diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/planprinter/TestCounterBasedAnonymizer.java b/core/trino-main/src/test/java/io/trino/sql/planner/planprinter/TestCounterBasedAnonymizer.java index d343eceee6236..6380bffab53e8 100644 --- a/core/trino-main/src/test/java/io/trino/sql/planner/planprinter/TestCounterBasedAnonymizer.java +++ b/core/trino-main/src/test/java/io/trino/sql/planner/planprinter/TestCounterBasedAnonymizer.java @@ -22,7 +22,6 @@ import io.trino.sql.tree.DoubleLiteral; import io.trino.sql.tree.GenericLiteral; import io.trino.sql.tree.IntervalLiteral; -import io.trino.sql.tree.Literal; import io.trino.sql.tree.LogicalExpression; import io.trino.sql.tree.LongLiteral; import io.trino.sql.tree.NullLiteral; @@ -30,8 +29,7 @@ import io.trino.sql.tree.SymbolReference; import io.trino.sql.tree.TimeLiteral; import io.trino.sql.tree.TimestampLiteral; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import java.util.Optional; @@ -64,34 +62,51 @@ public void testSymbolReferenceAnonymization() .isEqualTo("((\"symbol_1\" > 'long_literal_1') AND (\"symbol_2\" < 'long_literal_2') AND (\"symbol_3\" = 'long_literal_3'))"); } - @Test(dataProvider = "literals") - public void testLiteralAnonymization(Literal actual, String expected) + @Test + public void testLiteralAnonymization() { CounterBasedAnonymizer anonymizer = new CounterBasedAnonymizer(); - assertThat(anonymizer.anonymize(actual)).isEqualTo(expected); - } - @DataProvider - public static Object[][] literals() - { - return new Object[][] { - {new BinaryLiteral("DEF321"), "'binary_literal_1'"}, - {new StringLiteral("abc"), "'string_literal_1'"}, - {new GenericLiteral("bigint", "1"), "'bigint_literal_1'"}, - {new CharLiteral("a"), "'char_literal_1'"}, - {new DecimalLiteral("123"), "'decimal_literal_1'"}, - {new DoubleLiteral(String.valueOf(6554)), "'double_literal_1'"}, - {new DoubleLiteral(String.valueOf(Double.MAX_VALUE)), "'double_literal_1'"}, - {new LongLiteral(String.valueOf(6554)), "'long_literal_1'"}, - {new LongLiteral(String.valueOf(Long.MAX_VALUE)), "'long_literal_1'"}, - {new BooleanLiteral("true"), "true"}, - {new TimeLiteral("03:04:05"), "'time_literal_1'"}, - {new TimestampLiteral("2012-10-31 01:00 UTC"), "'timestamp_literal_1'"}, - {new NullLiteral(), "null"}, - { - new IntervalLiteral("33", IntervalLiteral.Sign.POSITIVE, IntervalLiteral.IntervalField.DAY, Optional.empty()), - "'interval_literal_1'" - } - }; + assertThat(anonymizer.anonymize(new BinaryLiteral("DEF321"))) + .isEqualTo("'binary_literal_1'"); + + assertThat(anonymizer.anonymize(new StringLiteral("abc"))) + .isEqualTo("'string_literal_2'"); + + assertThat(anonymizer.anonymize(new GenericLiteral("bigint", "1"))) + .isEqualTo("'bigint_literal_3'"); + + assertThat(anonymizer.anonymize(new CharLiteral("a"))) + .isEqualTo("'char_literal_4'"); + + assertThat(anonymizer.anonymize(new DecimalLiteral("123"))) + .isEqualTo("'decimal_literal_5'"); + + assertThat(anonymizer.anonymize(new DoubleLiteral(String.valueOf(6554)))) + .isEqualTo("'double_literal_6'"); + + assertThat(anonymizer.anonymize(new DoubleLiteral(String.valueOf(Double.MAX_VALUE)))) + .isEqualTo("'double_literal_7'"); + + assertThat(anonymizer.anonymize(new LongLiteral(String.valueOf(6554)))) + .isEqualTo("'long_literal_8'"); + + assertThat(anonymizer.anonymize(new LongLiteral(String.valueOf(Long.MAX_VALUE)))) + .isEqualTo("'long_literal_9'"); + + assertThat(anonymizer.anonymize(new BooleanLiteral("true"))) + .isEqualTo("true"); + + assertThat(anonymizer.anonymize(new TimeLiteral("03:04:05"))) + .isEqualTo("'time_literal_10'"); + + assertThat(anonymizer.anonymize(new TimestampLiteral("2012-10-31 01:00 UTC"))) + .isEqualTo("'timestamp_literal_11'"); + + assertThat(anonymizer.anonymize(new NullLiteral())) + .isEqualTo("null"); + + assertThat(anonymizer.anonymize(new IntervalLiteral("33", IntervalLiteral.Sign.POSITIVE, IntervalLiteral.IntervalField.DAY, Optional.empty()))) + .isEqualTo("'interval_literal_12'"); } } diff --git a/core/trino-main/src/test/java/io/trino/sql/planner/sanity/TestValidateScaledWritersUsage.java b/core/trino-main/src/test/java/io/trino/sql/planner/sanity/TestValidateScaledWritersUsage.java index b980fe60c51eb..0012f7cd62181 100644 --- a/core/trino-main/src/test/java/io/trino/sql/planner/sanity/TestValidateScaledWritersUsage.java +++ b/core/trino-main/src/test/java/io/trino/sql/planner/sanity/TestValidateScaledWritersUsage.java @@ -39,10 +39,11 @@ import io.trino.sql.planner.plan.TableScanNode; import io.trino.testing.LocalQueryRunner; import io.trino.testing.TestingTransactionHandle; -import org.testng.annotations.AfterClass; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.parallel.Execution; import java.util.Optional; @@ -55,10 +56,20 @@ import static io.trino.testing.TestingHandles.TEST_CATALOG_HANDLE; import static io.trino.testing.TestingHandles.createTestCatalogHandle; import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; +import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; +@TestInstance(PER_CLASS) +@Execution(CONCURRENT) public class TestValidateScaledWritersUsage extends BasePlanTest { + private static final PartitioningHandle CUSTOM_HANDLE = new PartitioningHandle( + Optional.of(TEST_CATALOG_HANDLE), + Optional.of(new ConnectorTransactionHandle() { }), + new ConnectorPartitioningHandle() { }, + true); + private LocalQueryRunner queryRunner; private PlannerContext plannerContext; private PlanBuilder planBuilder; @@ -67,7 +78,7 @@ public class TestValidateScaledWritersUsage private CatalogHandle catalog; private SchemaTableName schemaTableName; - @BeforeClass + @BeforeAll public void setup() { schemaTableName = new SchemaTableName("any", "any"); @@ -85,7 +96,7 @@ public void setup() tableScanNode = planBuilder.tableScan(nationTableHandle, ImmutableList.of(symbol), ImmutableMap.of(symbol, nationkeyColumnHandle)); } - @AfterClass(alwaysRun = true) + @AfterAll public void tearDown() { queryRunner.close(); @@ -104,8 +115,15 @@ private MockConnectorFactory createConnectorFactory(String name) .build(); } - @Test(dataProvider = "scaledWriterPartitioningHandles") - public void testScaledWritersUsedAndTargetSupportsIt(PartitioningHandle scaledWriterPartitionHandle) + @Test + public void testScaledWritersUsedAndTargetSupportsIt() + { + testScaledWritersUsedAndTargetSupportsIt(SCALED_WRITER_ROUND_ROBIN_DISTRIBUTION); + testScaledWritersUsedAndTargetSupportsIt(SCALED_WRITER_HASH_DISTRIBUTION); + testScaledWritersUsedAndTargetSupportsIt(CUSTOM_HANDLE); + } + + private void testScaledWritersUsedAndTargetSupportsIt(PartitioningHandle scaledWriterPartitionHandle) { PlanNode tableWriterSource = planBuilder.exchange(ex -> ex @@ -125,8 +143,15 @@ public void testScaledWritersUsedAndTargetSupportsIt(PartitioningHandle scaledWr validatePlan(root); } - @Test(dataProvider = "scaledWriterPartitioningHandles") - public void testScaledWritersUsedAndTargetDoesNotSupportScalingPerTask(PartitioningHandle scaledWriterPartitionHandle) + @Test + public void testScaledWritersUsedAndTargetDoesNotSupportScalingPerTask() + { + testScaledWritersUsedAndTargetDoesNotSupportScalingPerTask(SCALED_WRITER_ROUND_ROBIN_DISTRIBUTION); + testScaledWritersUsedAndTargetDoesNotSupportScalingPerTask(SCALED_WRITER_HASH_DISTRIBUTION); + testScaledWritersUsedAndTargetDoesNotSupportScalingPerTask(CUSTOM_HANDLE); + } + + private void testScaledWritersUsedAndTargetDoesNotSupportScalingPerTask(PartitioningHandle scaledWriterPartitionHandle) { PlanNode tableWriterSource = planBuilder.exchange(ex -> ex @@ -149,8 +174,15 @@ public void testScaledWritersUsedAndTargetDoesNotSupportScalingPerTask(Partition .hasMessage("The scaled writer per task partitioning scheme is set but writer target catalog:INSTANCE doesn't support it"); } - @Test(dataProvider = "scaledWriterPartitioningHandles") - public void testScaledWritersUsedAndTargetDoesNotSupportScalingAcrossTasks(PartitioningHandle scaledWriterPartitionHandle) + @Test + public void testScaledWritersUsedAndTargetDoesNotSupportScalingAcrossTasks() + { + testScaledWritersUsedAndTargetDoesNotSupportScalingAcrossTasks(SCALED_WRITER_ROUND_ROBIN_DISTRIBUTION); + testScaledWritersUsedAndTargetDoesNotSupportScalingAcrossTasks(SCALED_WRITER_HASH_DISTRIBUTION); + testScaledWritersUsedAndTargetDoesNotSupportScalingAcrossTasks(CUSTOM_HANDLE); + } + + private void testScaledWritersUsedAndTargetDoesNotSupportScalingAcrossTasks(PartitioningHandle scaledWriterPartitionHandle) { PlanNode tableWriterSource = planBuilder.exchange(ex -> ex @@ -173,8 +205,15 @@ public void testScaledWritersUsedAndTargetDoesNotSupportScalingAcrossTasks(Parti .hasMessage("The scaled writer across tasks partitioning scheme is set but writer target catalog:INSTANCE doesn't support it"); } - @Test(dataProvider = "scaledWriterPartitioningHandles") - public void testScaledWriterUsedAndTargetDoesNotSupportMultipleWritersPerPartition(PartitioningHandle scaledWriterPartitionHandle) + @Test + public void testScaledWriterUsedAndTargetDoesNotSupportMultipleWritersPerPartition() + { + testScaledWriterUsedAndTargetDoesNotSupportMultipleWritersPerPartition(SCALED_WRITER_ROUND_ROBIN_DISTRIBUTION); + testScaledWriterUsedAndTargetDoesNotSupportMultipleWritersPerPartition(SCALED_WRITER_HASH_DISTRIBUTION); + testScaledWriterUsedAndTargetDoesNotSupportMultipleWritersPerPartition(CUSTOM_HANDLE); + } + + private void testScaledWriterUsedAndTargetDoesNotSupportMultipleWritersPerPartition(PartitioningHandle scaledWriterPartitionHandle) { PlanNode tableWriterSource = planBuilder.exchange(ex -> ex @@ -202,8 +241,15 @@ public void testScaledWriterUsedAndTargetDoesNotSupportMultipleWritersPerPartiti } } - @Test(dataProvider = "scaledWriterPartitioningHandles") - public void testScaledWriterWithMultipleSourceExchangesAndTargetDoesNotSupportMultipleWritersPerPartition(PartitioningHandle scaledWriterPartitionHandle) + @Test + public void testScaledWriterWithMultipleSourceExchangesAndTargetDoesNotSupportMultipleWritersPerPartition() + { + testScaledWriterWithMultipleSourceExchangesAndTargetDoesNotSupportMultipleWritersPerPartition(SCALED_WRITER_ROUND_ROBIN_DISTRIBUTION); + testScaledWriterWithMultipleSourceExchangesAndTargetDoesNotSupportMultipleWritersPerPartition(SCALED_WRITER_HASH_DISTRIBUTION); + testScaledWriterWithMultipleSourceExchangesAndTargetDoesNotSupportMultipleWritersPerPartition(CUSTOM_HANDLE); + } + + private void testScaledWriterWithMultipleSourceExchangesAndTargetDoesNotSupportMultipleWritersPerPartition(PartitioningHandle scaledWriterPartitionHandle) { PlanNode tableWriterSource = planBuilder.exchange(ex -> ex @@ -237,20 +283,6 @@ public void testScaledWriterWithMultipleSourceExchangesAndTargetDoesNotSupportMu } } - @DataProvider - public Object[][] scaledWriterPartitioningHandles() - { - return new Object[][] { - {SCALED_WRITER_ROUND_ROBIN_DISTRIBUTION}, - {SCALED_WRITER_HASH_DISTRIBUTION}, - {new PartitioningHandle( - Optional.of(TEST_CATALOG_HANDLE), - Optional.of(new ConnectorTransactionHandle() {}), - new ConnectorPartitioningHandle() {}, - true)} - }; - } - private void validatePlan(PlanNode root) { queryRunner.inTransaction(session -> { diff --git a/core/trino-main/src/test/java/io/trino/sql/query/QueryAssertions.java b/core/trino-main/src/test/java/io/trino/sql/query/QueryAssertions.java index e50954ddbba7d..72705779f319f 100644 --- a/core/trino-main/src/test/java/io/trino/sql/query/QueryAssertions.java +++ b/core/trino-main/src/test/java/io/trino/sql/query/QueryAssertions.java @@ -64,7 +64,7 @@ import static io.trino.sql.query.QueryAssertions.QueryAssert.newQueryAssert; import static io.trino.testing.TestingHandles.TEST_CATALOG_NAME; import static io.trino.testing.TestingSession.testSessionBuilder; -import static io.trino.transaction.TransactionBuilder.transaction; +import static io.trino.testing.TransactionBuilder.transaction; import static java.lang.String.format; import static java.util.Objects.requireNonNull; import static java.util.stream.Collectors.toList; @@ -571,7 +571,10 @@ public QueryAssert isNotFullyPushedDown(PlanMatchPattern retainedSubplan) /** * Verifies join query is not fully pushed down by containing JOIN node. + * + * @deprecated because the method is not tested in BaseQueryAssertionsTest yet */ + @Deprecated @CanIgnoreReturnValue public QueryAssert joinIsNotFullyPushedDown() { @@ -580,6 +583,7 @@ public QueryAssert joinIsNotFullyPushedDown() .whereIsInstanceOfAny(JoinNode.class) .findFirst() .isEmpty()) { + // TODO show then plan when assertions fails (like hasPlan()) and add negative test coverage in BaseQueryAssertionsTest throw new IllegalStateException("Join node should be present in explain plan, when pushdown is not applied"); } }); diff --git a/core/trino-main/src/test/java/io/trino/sql/query/TestColumnMask.java b/core/trino-main/src/test/java/io/trino/sql/query/TestColumnMask.java index e1d6cc3420354..bce26d1224945 100644 --- a/core/trino-main/src/test/java/io/trino/sql/query/TestColumnMask.java +++ b/core/trino-main/src/test/java/io/trino/sql/query/TestColumnMask.java @@ -122,8 +122,7 @@ public TestColumnMask() Optional.of(Duration.ZERO), Optional.empty(), Optional.of(VIEW_OWNER), - ImmutableList.of(), - ImmutableMap.of()); + ImmutableList.of()); ConnectorMaterializedViewDefinition freshMaterializedView = new ConnectorMaterializedViewDefinition( "SELECT * FROM local.tiny.nation", @@ -138,8 +137,7 @@ public TestColumnMask() Optional.of(Duration.ZERO), Optional.empty(), Optional.of(VIEW_OWNER), - ImmutableList.of(), - ImmutableMap.of()); + ImmutableList.of()); ConnectorMaterializedViewDefinition materializedViewWithCasts = new ConnectorMaterializedViewDefinition( "SELECT nationkey, cast(name as varchar(1)) as name, regionkey, comment FROM local.tiny.nation", @@ -154,8 +152,7 @@ public TestColumnMask() Optional.of(Duration.ZERO), Optional.empty(), Optional.of(VIEW_OWNER), - ImmutableList.of(), - ImmutableMap.of()); + ImmutableList.of()); MockConnectorFactory mock = MockConnectorFactory.builder() .withGetColumns(schemaTableName -> { diff --git a/core/trino-main/src/test/java/io/trino/sql/query/TestJsonTable.java b/core/trino-main/src/test/java/io/trino/sql/query/TestJsonTable.java new file mode 100644 index 0000000000000..c5f15ed662057 --- /dev/null +++ b/core/trino-main/src/test/java/io/trino/sql/query/TestJsonTable.java @@ -0,0 +1,867 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.sql.query; + +import io.trino.Session; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; + +import static com.google.common.io.BaseEncoding.base16; +import static io.trino.spi.StandardErrorCode.PATH_EVALUATION_ERROR; +import static io.trino.testing.assertions.TrinoExceptionAssert.assertTrinoExceptionThrownBy; +import static java.nio.charset.StandardCharsets.UTF_16LE; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; + +@TestInstance(PER_CLASS) +public class TestJsonTable +{ + private QueryAssertions assertions; + + @BeforeAll + public void init() + { + assertions = new QueryAssertions(); + } + + @AfterAll + public void teardown() + { + assertions.close(); + assertions = null; + } + + @Test + public void testSimple() + { + assertThat(assertions.query(""" + SELECT first, last + FROM (SELECT '{"a" : [1, 2, 3], "b" : [4, 5, 6]}') t(json_col), JSON_TABLE( + json_col, + 'lax $.a' + COLUMNS( + first bigint PATH 'lax $[0]', + last bigint PATH 'lax $[last]')) + """)) + .matches("VALUES (BIGINT '1', BIGINT '3')"); + + assertThat(assertions.query(""" + SELECT * + FROM + (SELECT '{"a" : {"b" : [1, 2, 3], "c" : [[4, 5, 6], [7, 8, 9]]}}') t(json_col), + JSON_TABLE( + json_col, + 'lax $.a' AS "path_a" + COLUMNS( + NESTED PATH 'lax $.b[*]' AS "path_b" + COLUMNS (c1 integer PATH 'lax $ * 10'), + NESTED PATH 'lax $.c' AS "path_c" + COLUMNS ( + NESTED PATH 'lax $[0][*]' AS "path_d" COLUMNS (c2 integer PATH 'lax $ * 100'), + NESTED PATH 'lax $[last][*]' AS "path_e" COLUMNS (c3 integer PATH 'lax $ * 1000'))) + PLAN ("path_a" OUTER ("path_b" UNION ("path_c" INNER ("path_d" CROSS "path_e"))))) + """)) + .matches(""" + VALUES + ('{"a" : {"b" : [1, 2, 3], "c" : [[4, 5, 6], [7, 8, 9]]}}', 10, CAST(null AS integer), CAST(null AS integer)), + ('{"a" : {"b" : [1, 2, 3], "c" : [[4, 5, 6], [7, 8, 9]]}}', 20, null, null), + ('{"a" : {"b" : [1, 2, 3], "c" : [[4, 5, 6], [7, 8, 9]]}}', 30, null, null), + ('{"a" : {"b" : [1, 2, 3], "c" : [[4, 5, 6], [7, 8, 9]]}}', null, 400, 7000), + ('{"a" : {"b" : [1, 2, 3], "c" : [[4, 5, 6], [7, 8, 9]]}}', null, 400, 8000), + ('{"a" : {"b" : [1, 2, 3], "c" : [[4, 5, 6], [7, 8, 9]]}}', null, 400, 9000), + ('{"a" : {"b" : [1, 2, 3], "c" : [[4, 5, 6], [7, 8, 9]]}}', null, 500, 7000), + ('{"a" : {"b" : [1, 2, 3], "c" : [[4, 5, 6], [7, 8, 9]]}}', null, 500, 8000), + ('{"a" : {"b" : [1, 2, 3], "c" : [[4, 5, 6], [7, 8, 9]]}}', null, 500, 9000), + ('{"a" : {"b" : [1, 2, 3], "c" : [[4, 5, 6], [7, 8, 9]]}}', null, 600, 7000), + ('{"a" : {"b" : [1, 2, 3], "c" : [[4, 5, 6], [7, 8, 9]]}}', null, 600, 8000), + ('{"a" : {"b" : [1, 2, 3], "c" : [[4, 5, 6], [7, 8, 9]]}}', null, 600, 9000) + """); + } + + @Test + public void testSubqueries() + { + // test subqueries in: context item, value of path parameter "index", empty default, error default + assertThat(assertions.query(""" + SELECT empty_default, error_default + FROM (SELECT '[[1, 2, 3], [4, 5, 6]]') t(json_col), JSON_TABLE( + (SELECT json_col), + 'lax $[$index]' PASSING (SELECT 0) AS "index" + COLUMNS( + empty_default bigint PATH 'lax $[-42]' DEFAULT (SELECT -42) ON EMPTY, + error_default bigint PATH 'strict $[42]' DEFAULT (SELECT 42) ON ERROR)) + """)) + .matches("VALUES (BIGINT '-42', BIGINT '42')"); + } + + @Test + public void testCorrelation() + { + // test correlation in: context item, value of path parameter "index", empty default, error default + assertThat(assertions.query(""" + SELECT empty_default, error_default + FROM (SELECT '[[1, 2, 3], [4, 5, 6]]', 0, -42, 42) t(json_col, index_col, empty_default_col, error_default_col), + JSON_TABLE( + json_col, + 'lax $[$index]' PASSING index_col AS "index" + COLUMNS( + empty_default bigint PATH 'lax $[-42]' DEFAULT empty_default_col ON EMPTY, + error_default bigint PATH 'strict $[42]' DEFAULT error_default_col ON ERROR)) + """)) + .matches("VALUES (BIGINT '-42', BIGINT '42')"); + } + + @Test + public void testParameters() + { + // test parameters in: context item, value of path parameter "index", empty default, error default + Session session = Session.builder(assertions.getDefaultSession()) + .addPreparedStatement( + "my_query", + """ + SELECT empty_default, error_default + FROM JSON_TABLE( + ?, + 'lax $[$index]' PASSING ? AS "index" + COLUMNS( + empty_default bigint PATH 'lax $[-42]' DEFAULT ? ON EMPTY, + error_default bigint PATH 'strict $[42]' DEFAULT ? ON ERROR)) + """) + .build(); + assertThat(assertions.query(session, "EXECUTE my_query USING '[[1, 2, 3], [4, 5, 6]]', 0, -42, 42")) + .matches("VALUES (BIGINT '-42', BIGINT '42')"); + } + + @Test + public void testOutputLayout() + { + // first the columns from the left side of the join (json_col, index_col, empty_default_col, error_default_col), next the json_table columns (empty_default, error_default) + assertThat(assertions.query(""" + SELECT * + FROM (SELECT '[[1, 2, 3], [4, 5, 6]]', 0, -42, 42) t(json_col, index_col, empty_default_col, error_default_col), + JSON_TABLE( + json_col, + 'lax $[$index]' PASSING index_col AS "index" + COLUMNS( + empty_default bigint PATH 'lax $[-42]' DEFAULT empty_default_col * 2 ON EMPTY, + error_default bigint PATH 'strict $[42]' DEFAULT error_default_col * 2 ON ERROR)) + """)) + .matches("VALUES ('[[1, 2, 3], [4, 5, 6]]', 0, -42, 42, BIGINT '-84', BIGINT '84')"); + + // json_table columns in order of declaration + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[]', + 'lax $' AS "p" + COLUMNS( + a varchar(1) PATH 'lax "A"', + NESTED PATH 'lax $' AS "p1" + COLUMNS ( + b varchar(1) PATH 'lax "B"', + NESTED PATH 'lax $' AS "p2 "COLUMNS ( + c varchar(1) PATH 'lax "C"', + d varchar(1) PATH 'lax "D"'), + e varchar(1) PATH 'lax "E"'), + f varchar(1) PATH 'lax "F"', + NESTED PATH 'lax $' AS "p3" + COLUMNS (g varchar(1) PATH 'lax "G"'), + h varchar(1) PATH 'lax "H"') + PLAN DEFAULT (CROSS)) + """)) + .matches("VALUES ('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H')"); + } + + @Test + public void testJoinTypes() + { + // implicit CROSS join + assertThat(assertions.query(""" + SELECT * + FROM (VALUES ('[1, 2, 3]'), ('[4, 5, 6, 7, 8]')) t(json_col), + JSON_TABLE( + json_col, + 'lax $[4]' + COLUMNS(a integer PATH 'lax $')) + """)) + .matches("VALUES ('[4, 5, 6, 7, 8]', 8)"); + + // INNER join + assertThat(assertions.query(""" + SELECT * + FROM (VALUES ('[1, 2, 3]'), ('[4, 5, 6, 7, 8]')) t(json_col) + INNER JOIN + JSON_TABLE( + json_col, + 'lax $[4]' + COLUMNS(a integer PATH 'lax $')) + ON TRUE + """)) + .matches("VALUES ('[4, 5, 6, 7, 8]', 8)"); + + // LEFT join + assertThat(assertions.query(""" + SELECT * + FROM (VALUES ('[1, 2, 3]'), ('[4, 5, 6, 7, 8]')) t(json_col) + LEFT JOIN + JSON_TABLE( + json_col, + 'lax $[4]' + COLUMNS(a integer PATH 'lax $')) + ON TRUE + """)) + .matches(""" + VALUES + ('[1, 2, 3]', CAST(null AS integer)), + ('[4, 5, 6, 7, 8]', 8) + """); + + // RIGHT join is effectively INNER. Correlation is not allowed in RIGHT join + assertThat(assertions.query(""" + SELECT * + FROM (VALUES 1) t(x) + RIGHT JOIN + JSON_TABLE( + '[1, 2, 3]', + 'lax $[4]' + COLUMNS(a integer PATH 'lax $')) + ON TRUE + """)) + .returnsEmptyResult(); + + // FULL join. Correlation is not allowed in FULL join + assertThat(assertions.query(""" + SELECT * + FROM (VALUES 1) t(x) + FULL JOIN + JSON_TABLE( + '[1, 2, 3]', + 'lax $[4]' + COLUMNS(a integer PATH 'lax $')) + ON TRUE + """)) + .matches("VALUES (1, CAST(null AS integer))"); + } + + @Test + public void testParentChildRelationship() + { + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[]', + 'lax $' AS "root_path" + COLUMNS( + a varchar(1) PATH 'lax "A"', + NESTED PATH 'lax $[*]' AS "nested_path" + COLUMNS (b varchar(1) PATH 'lax "B"')) + PLAN ("root_path" OUTER "nested_path")) + """)) + .matches("VALUES ('A', CAST(null AS varchar(1)))"); + + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[]', + 'lax $' AS "root_path" + COLUMNS( + a varchar(1) PATH 'lax "A"', + NESTED PATH 'lax $[*]' AS "nested_path" + COLUMNS (b varchar(1) PATH 'lax "B"')) + PLAN ("root_path" INNER "nested_path")) + """)) + .returnsEmptyResult(); + + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[[], [1]]', + 'lax $' AS "root_path" + COLUMNS( + a varchar(1) PATH 'lax "A"', + NESTED PATH 'lax $[*]' AS "nested_path_1" + COLUMNS ( + b varchar(1) PATH 'lax "B"', + NESTED PATH 'lax $[*]' AS "nested_path_2" + COLUMNS( + c varchar(1) PATH 'lax "C"'))) + PLAN ("root_path" OUTER ("nested_path_1" OUTER "nested_path_2"))) + """)) + .matches(""" + VALUES + ('A', 'B', CAST(null AS varchar(1))), + ('A', 'B', 'C') + """); + + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[[], [1]]', + 'lax $' AS "root_path" + COLUMNS( + a varchar(1) PATH 'lax "A"', + NESTED PATH 'lax $[*]' AS "nested_path_1" + COLUMNS ( + b varchar(1) PATH 'lax "B"', + NESTED PATH 'lax $[*]' AS "nested_path_2" + COLUMNS( + c varchar(1) PATH 'lax "C"'))) + PLAN ("root_path" OUTER ("nested_path_1" INNER "nested_path_2"))) + """)) + .matches("VALUES ('A', 'B', 'C')"); + + // intermediately nested path returns empty sequence + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[]', + 'lax $' AS "root_path" + COLUMNS( + a varchar(1) PATH 'lax "A"', + NESTED PATH 'lax $[*]' AS "nested_path_1" + COLUMNS ( + b varchar(1) PATH 'lax "B"', + NESTED PATH 'lax $' AS "nested_path_2" + COLUMNS( + c varchar(1) PATH 'lax "C"'))) + PLAN ("root_path" OUTER ("nested_path_1" INNER "nested_path_2"))) + """)) + .matches("VALUES ('A', CAST(null AS varchar(1)), CAST(null AS varchar(1)))"); + } + + @Test + public void testSiblingsRelationship() + { + // each sibling produces 1 row + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[]', + 'lax $' AS "root_path" + COLUMNS( + a varchar(1) PATH 'lax "A"', + NESTED PATH 'lax $' AS "nested_path_b" + COLUMNS (b varchar(1) PATH 'lax "B"'), + NESTED PATH 'lax $' AS "nested_path_c" + COLUMNS (c varchar(1) PATH 'lax "C"'), + NESTED PATH 'lax $' AS "nested_path_d" + COLUMNS (d varchar(1) PATH 'lax "D"')) + PLAN ("root_path" INNER ("nested_path_c" UNION ("nested_path_d" CROSS "nested_path_b")))) + """)) + .matches(""" + VALUES + ('A', CAST(null AS varchar(1)), 'C', CAST(null AS varchar(1))), + ('A', 'B', CAST(null AS varchar(1)), 'D') + """); + + // each sibling produces 2 rows + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[10, 1000]', + 'lax $' AS "root_path" + COLUMNS( + a varchar(1) PATH 'lax "A"', + NESTED PATH 'lax $[*]' AS "nested_path_1" + COLUMNS (b integer PATH 'lax $ * 1'), + NESTED PATH 'lax $[*]' AS "nested_path_2" + COLUMNS (c integer PATH 'lax $ * 2'), + NESTED PATH 'lax $[*]' AS "nested_path_3" + COLUMNS (d integer PATH 'lax $ * 3')) + PLAN ("root_path" INNER ("nested_path_2" UNION ("nested_path_3" CROSS "nested_path_1")))) + """)) + .matches(""" + VALUES + ('A', CAST(null AS integer), 20, CAST(null AS integer)), + ('A', null, 2000, null), + ('A', 10, null, 30), + ('A', 10, null, 3000), + ('A', 1000, null, 30), + ('A', 1000, null, 3000) + """); + + // one sibling produces empty result -- CROSS result is empty + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[10, 1000]', + 'lax $' AS "root_path" + COLUMNS( + a varchar(1) PATH 'lax "A"', + NESTED PATH 'lax $[*]' AS "nested_path_1" + COLUMNS (b integer PATH 'lax $ * 1'), + NESTED PATH 'lax $[42]' AS "nested_path_2" + COLUMNS (c integer PATH 'lax $ * 2')) + PLAN ("root_path" INNER ("nested_path_1" CROSS "nested_path_2"))) + """)) + .returnsEmptyResult(); + + // one sibling produces empty result -- UNION result contains the other sibling's result + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[10, 1000]', + 'lax $' AS "root_path" + COLUMNS( + a varchar(1) PATH 'lax "A"', + NESTED PATH 'lax $[*]' AS "nested_path_1" + COLUMNS (b integer PATH 'lax $ * 1'), + NESTED PATH 'lax $[42]' AS "nested_path_2" + COLUMNS (c integer PATH 'lax $ * 2')) + PLAN ("root_path" INNER ("nested_path_1" UNION "nested_path_2"))) + """)) + .matches(""" + VALUES + ('A', 10, CAST(null AS integer)), + ('A', 1000, null) + """); + } + + @Test + public void testImplicitColumnPath() + { + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '{"A" : 42, "b" : true}', + 'lax $' + COLUMNS( + a integer, + "b" boolean)) + """)) + .matches("VALUES (42, true)"); + + // the implicit column path is 'lax $.C'. It produces empty sequence, so the ON EMPTY clause determines the result + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '{"A" : 42, "b" : true}', + 'lax $' + COLUMNS(c varchar (5) DEFAULT 'empty' ON EMPTY DEFAULT 'error' ON ERROR)) + """)) + .matches("VALUES 'empty'"); + } + + @Test + public void testRootPathErrorHandling() + { + // error during root path evaluation handled according to top level EMPTY ON ERROR clause + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[]', + 'strict $[42]' + COLUMNS(a integer PATH 'lax 1') + EMPTY ON ERROR) + """)) + .returnsEmptyResult(); + + // error during root path evaluation handled according to top level ON ERROR clause which defaults to EMPTY ON ERROR + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[]', + 'strict $[42]' + COLUMNS(a integer PATH 'lax 1')) + """)) + .returnsEmptyResult(); + + // error during root path evaluation handled according to top level ERROR ON ERROR clause + assertTrinoExceptionThrownBy(() -> assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[]', + 'strict $[42]' + COLUMNS(a integer PATH 'lax 1') + ERROR ON ERROR) + """)) + .hasErrorCode(PATH_EVALUATION_ERROR) + .hasMessage("path evaluation failed: structural error: invalid array subscript for empty array"); + } + + @Test + public void testNestedPathErrorHandling() + { + // error during nested path evaluation handled according to top level EMPTY ON ERROR clause + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[]', + 'lax $' AS "root_path" + COLUMNS( + a integer PATH 'lax 1', + NESTED PATH 'strict $[42]' AS "nested_path" + COLUMNS(b integer PATH 'lax 2')) + PLAN DEFAULT(INNER) + EMPTY ON ERROR) + """)) + .returnsEmptyResult(); + + // error during nested path evaluation handled according to top level ON ERROR clause which defaults to EMPTY ON ERROR + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[]', + 'lax $' AS "root_path" + COLUMNS( + a integer PATH 'lax 1', + NESTED PATH 'strict $[42]' AS "nested_path" + COLUMNS(b integer PATH 'lax 2')) + PLAN DEFAULT(INNER)) + """)) + .returnsEmptyResult(); + + // error during nested path evaluation handled according to top level ERROR ON ERROR clause + assertTrinoExceptionThrownBy(() -> assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[]', + 'lax $' AS "root_path" + COLUMNS( + a integer PATH 'lax 1', + NESTED PATH 'strict $[42]' AS "nested_path" + COLUMNS(b integer PATH 'lax 2')) + PLAN DEFAULT(INNER) + ERROR ON ERROR) + """)) + .hasErrorCode(PATH_EVALUATION_ERROR) + .hasMessage("path evaluation failed: structural error: invalid array subscript for empty array"); + } + + @Test + public void testColumnPathErrorHandling() + { + // error during column path evaluation handled according to column's ERROR ON ERROR clause + assertTrinoExceptionThrownBy(() -> assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[]', + 'lax $' + COLUMNS(a integer PATH 'strict $[42]' ERROR ON ERROR) + EMPTY ON ERROR) + """)) + .hasErrorCode(PATH_EVALUATION_ERROR) + .hasMessage("path evaluation failed: structural error: invalid array subscript for empty array"); + + // error during column path evaluation handled according to column's ON ERROR clause which defaults to NULL ON ERROR + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[]', + 'lax $' + COLUMNS(a integer PATH 'strict $[42]') + EMPTY ON ERROR) + """)) + .matches("VALUES CAST(null as integer)"); + + // error during column path evaluation handled according to column's ON ERROR clause which defaults to ERROR ON ERROR because the top level error behavior is ERROR ON ERROR + assertTrinoExceptionThrownBy(() -> assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[]', + 'lax $' + COLUMNS(a integer PATH 'strict $[42]') + ERROR ON ERROR) + """)) + .hasErrorCode(PATH_EVALUATION_ERROR) + .hasMessage("path evaluation failed: structural error: invalid array subscript for empty array"); + } + + @Test + public void testEmptyInput() + { + assertThat(assertions.query(""" + SELECT * + FROM (SELECT '[]' WHERE rand() > 1) t(json_col), + JSON_TABLE( + json_col, + 'lax $' + COLUMNS(a integer PATH 'lax 1')) + """)) + .returnsEmptyResult(); + } + + @Test + public void testNullInput() + { + // if input is null, json_table returns empty result + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + CAST (null AS varchar), + 'lax $' + COLUMNS(a integer PATH 'lax 1')) + """)) + .returnsEmptyResult(); + + assertThat(assertions.query(""" + SELECT * + FROM (VALUES (CAST(null AS varchar)), (CAST(null AS varchar)), (CAST(null AS varchar))) t(json_col), + JSON_TABLE( + json_col, + 'lax $' + COLUMNS(a integer PATH 'lax 1')) + """)) + .returnsEmptyResult(); + + assertThat(assertions.query(""" + SELECT * + FROM (VALUES (CAST(null AS varchar)), (CAST(null AS varchar)), (CAST(null AS varchar))) t(json_col), + JSON_TABLE( + json_col, + 'lax $' + COLUMNS( + NESTED PATH 'lax $' + COLUMNS(a integer PATH 'lax 1'))) + """)) + .returnsEmptyResult(); + + // null as formatted input evaluates to empty sequence. json_table returns empty result + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + CAST (null AS varchar) FORMAT JSON, + 'lax $' + COLUMNS(a varchar FORMAT JSON PATH 'lax $')) + """)) + .returnsEmptyResult(); + } + + @Test + public void testNullPathParameter() + { + // null as SQL-value parameter "index" is evaluated to a JSON null, and causes type mismatch + assertTrinoExceptionThrownBy(() -> assertions.query(""" + SELECT * + FROM (SELECT '[1, 2, 3]', CAST(null AS integer)) t(json_col, index_col), + JSON_TABLE( + json_col, + 'lax $[$index]' PASSING index_col AS "index" + COLUMNS(a integer PATH 'lax 1') + ERROR ON ERROR) + """)) + .hasErrorCode(PATH_EVALUATION_ERROR) + .hasMessage("path evaluation failed: invalid item type. Expected: NUMBER, actual: NULL"); + + // null as JSON (formatted) parameter "index" evaluates to empty sequence, and causes type mismatch + assertTrinoExceptionThrownBy(() -> assertions.query(""" + SELECT * + FROM (SELECT '[1, 2, 3]', CAST(null AS varchar)) t(json_col, index_col), + JSON_TABLE( + json_col, + 'lax $[$index]' PASSING index_col FORMAT JSON AS "index" + COLUMNS(a integer PATH 'lax 1') + ERROR ON ERROR) + """)) + .hasErrorCode(PATH_EVALUATION_ERROR) + .hasMessage("path evaluation failed: array subscript 'from' value must be singleton numeric"); + } + + @Test + public void testNullDefaultValue() + { + assertThat(assertions.query(""" + SELECT a + FROM (SELECT null) t(empty_default), + JSON_TABLE( + '[1, 2, 3]', + 'lax $' + COLUMNS(a integer PATH 'lax $[42]' DEFAULT empty_default ON EMPTY DEFAULT -1 ON ERROR)) + """)) + .matches("VALUES CAST(null AS integer)"); + + assertThat(assertions.query(""" + SELECT a + FROM (SELECT null) t(error_default), + JSON_TABLE( + '[1, 2, 3]', + 'lax $' + COLUMNS(a integer PATH 'strict $[42]' DEFAULT -1 ON EMPTY DEFAULT error_default ON ERROR)) + """)) + .matches("VALUES CAST(null AS integer)"); + } + + @Test + public void testValueColumnCoercion() + { + // returned value cast to declared type + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $' + COLUMNS(a real PATH 'lax $[last]')) + """)) + .matches("VALUES REAL '3'"); + + // default value cast to declared type + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $' + COLUMNS(a real PATH 'lax $[42]' DEFAULT 42 ON EMPTY)) + """)) + .matches("VALUES REAL '42'"); + + // default ON EMPTY value is null. It is cast to declared type + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $' + COLUMNS(a real PATH 'lax $[42]')) + """)) + .matches("VALUES CAST(null AS REAL)"); + + // default value cast to declared type + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $' + COLUMNS(a real PATH 'strict $[42]' DEFAULT 42 ON ERROR)) + """)) + .matches("VALUES REAL '42'"); + + // default ON ERROR value is null. It is cast to declared type + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[1, 2, 3]', + 'lax $' + COLUMNS(a real PATH 'strict $[42]')) + """)) + .matches("VALUES CAST(null AS REAL)"); + } + + @Test + public void testQueryColumnFormat() + { + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[{"a" : true}]', + 'lax $' + COLUMNS(a varchar(50) FORMAT JSON PATH 'lax $[0]')) + """)) + .matches("VALUES CAST('{\"a\":true}' AS VARCHAR(50))"); + + String varbinaryLiteral = "X'" + base16().encode("{\"a\":true}".getBytes(UTF_16LE)) + "'"; + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[{"a" : true}]', + 'lax $' + COLUMNS(a varbinary FORMAT JSON ENCODING UTF16 PATH 'lax $[0]')) + """)) + .matches("VALUES " + varbinaryLiteral); + + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[{"a" : true}]', + 'lax $' + COLUMNS(a char(50) FORMAT JSON PATH 'lax $[42]' EMPTY OBJECT ON EMPTY)) + """)) + .matches("VALUES CAST('{}' AS CHAR(50))"); + + varbinaryLiteral = "X'" + base16().encode("[]".getBytes(UTF_16LE)) + "'"; + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[{"a" : true}]', + 'lax $' + COLUMNS(a varbinary FORMAT JSON ENCODING UTF16 PATH 'strict $[42]' EMPTY ARRAY ON ERROR)) + """)) + .matches("VALUES " + varbinaryLiteral); + + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '[{"a" : true}]', + 'lax $' + COLUMNS(a varbinary FORMAT JSON ENCODING UTF16 PATH 'lax $[42]' NULL ON EMPTY)) + """)) + .matches("VALUES CAST(null AS VARBINARY)"); + } + + @Test + public void testOrdinalityColumn() + { + assertThat(assertions.query(""" + SELECT * + FROM JSON_TABLE( + '["a", "b", "c", "d", "e", "f", "g", "h"]', + 'lax $[*]' AS "root_path" + COLUMNS( + o FOR ORDINALITY, + x varchar(1) PATH 'lax $')) + """)) + .matches(""" + VALUES + (BIGINT '1', 'a'), + (2, 'b'), + (3, 'c'), + (4, 'd'), + (5, 'e'), + (6, 'f'), + (7, 'g'), + (8, 'h') + """); + + assertThat(assertions.query(""" + SELECT * + FROM (VALUES + ('[["a", "b"], ["c", "d"], ["e", "f"]]'), + ('[["g", "h"], ["i", "j"], ["k", "l"]]')) t(json_col), + JSON_TABLE( + json_col, + 'lax $' AS "root_path" + COLUMNS( + o FOR ORDINALITY, + NESTED PATH 'lax $[0][*]' AS "nested_path_1" + COLUMNS ( + x1 varchar PATH 'lax $', + o1 FOR ORDINALITY), + NESTED PATH 'lax $[1][*]' AS "nested_path_2" + COLUMNS ( + x2 varchar PATH 'lax $', + o2 FOR ORDINALITY), + NESTED PATH 'lax $[2][*]' AS "nested_path_3" + COLUMNS ( + x3 varchar PATH 'lax $', + o3 FOR ORDINALITY)) + PLAN ("root_path" INNER ("nested_path_2" UNION ("nested_path_3" CROSS "nested_path_1")))) + """)) + .matches(""" + VALUES + ('[["a", "b"], ["c", "d"], ["e", "f"]]', BIGINT '1', VARCHAR 'a', BIGINT '1', CAST(null AS varchar), CAST(null AS bigint), VARCHAR 'e', BIGINT '1'), + ('[["a", "b"], ["c", "d"], ["e", "f"]]', 1, 'a', 1, null, null, 'f', 2), + ('[["a", "b"], ["c", "d"], ["e", "f"]]', 1, 'b', 2, null, null, 'e', 1), + ('[["a", "b"], ["c", "d"], ["e", "f"]]', 1, 'b', 2, null, null, 'f', 2), + ('[["a", "b"], ["c", "d"], ["e", "f"]]', 1, null, null, 'c', 1, null, null), + ('[["a", "b"], ["c", "d"], ["e", "f"]]', 1, null, null, 'd', 2, null, null), + + ('[["g", "h"], ["i", "j"], ["k", "l"]]', 1, VARCHAR 'g', BIGINT '1', CAST(null AS varchar), CAST(null AS bigint), VARCHAR 'k', BIGINT '1'), + ('[["g", "h"], ["i", "j"], ["k", "l"]]', 1, 'g', 1, null, null, 'l', 2), + ('[["g", "h"], ["i", "j"], ["k", "l"]]', 1, 'h', 2, null, null, 'k', 1), + ('[["g", "h"], ["i", "j"], ["k", "l"]]', 1, 'h', 2, null, null, 'l', 2), + ('[["g", "h"], ["i", "j"], ["k", "l"]]', 1, null, null, 'i', 1, null, null), + ('[["g", "h"], ["i", "j"], ["k", "l"]]', 1, null, null, 'j', 2, null, null) + """); + } +} diff --git a/core/trino-main/src/test/java/io/trino/sql/query/TestWindow.java b/core/trino-main/src/test/java/io/trino/sql/query/TestWindow.java index 9996aae42631e..0545ffc9eb2ac 100644 --- a/core/trino-main/src/test/java/io/trino/sql/query/TestWindow.java +++ b/core/trino-main/src/test/java/io/trino/sql/query/TestWindow.java @@ -36,7 +36,7 @@ public void teardown() } @Test - @Timeout(2) + @Timeout(5) public void testManyFunctionsWithSameWindow() { assertThat(assertions.query(""" diff --git a/core/trino-main/src/test/java/io/trino/sql/routine/TestSqlFunctions.java b/core/trino-main/src/test/java/io/trino/sql/routine/TestSqlFunctions.java index 8807b83c3c398..e87783adf57ac 100644 --- a/core/trino-main/src/test/java/io/trino/sql/routine/TestSqlFunctions.java +++ b/core/trino-main/src/test/java/io/trino/sql/routine/TestSqlFunctions.java @@ -49,8 +49,8 @@ import static io.trino.spi.type.VarcharType.VARCHAR; import static io.trino.sql.planner.TestingPlannerContext.plannerContextBuilder; import static io.trino.testing.TestingSession.testSessionBuilder; +import static io.trino.testing.TransactionBuilder.transaction; import static io.trino.transaction.InMemoryTransactionManager.createTestTransactionManager; -import static io.trino.transaction.TransactionBuilder.transaction; import static io.trino.type.UnknownType.UNKNOWN; import static java.lang.Math.floor; import static org.assertj.core.api.Assertions.assertThat; diff --git a/core/trino-main/src/test/java/io/trino/sql/routine/TestSqlRoutineAnalyzer.java b/core/trino-main/src/test/java/io/trino/sql/routine/TestSqlRoutineAnalyzer.java index 4d8c325926481..e0cddbc75523d 100644 --- a/core/trino-main/src/test/java/io/trino/sql/routine/TestSqlRoutineAnalyzer.java +++ b/core/trino-main/src/test/java/io/trino/sql/routine/TestSqlRoutineAnalyzer.java @@ -33,8 +33,8 @@ import static io.trino.spi.StandardErrorCode.TYPE_MISMATCH; import static io.trino.sql.planner.TestingPlannerContext.plannerContextBuilder; import static io.trino.testing.TestingSession.testSession; +import static io.trino.testing.TransactionBuilder.transaction; import static io.trino.testing.assertions.TrinoExceptionAssert.assertTrinoExceptionThrownBy; -import static io.trino.transaction.TransactionBuilder.transaction; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.from; diff --git a/core/trino-main/src/test/java/io/trino/testing/TestTestingMetadata.java b/core/trino-main/src/test/java/io/trino/testing/TestTestingMetadata.java index 8b5bad533ce56..d188741497da5 100644 --- a/core/trino-main/src/test/java/io/trino/testing/TestTestingMetadata.java +++ b/core/trino-main/src/test/java/io/trino/testing/TestTestingMetadata.java @@ -43,7 +43,7 @@ private void testRenameMaterializedView(String source, String target) SchemaTableName newName = schemaTableName("schema", target); TestingMetadata metadata = new TestingMetadata(); ConnectorMaterializedViewDefinition viewDefinition = someMaterializedView(); - metadata.createMaterializedView(SESSION, initialName, viewDefinition, false, false); + metadata.createMaterializedView(SESSION, initialName, viewDefinition, ImmutableMap.of(), false, false); metadata.renameMaterializedView(SESSION, initialName, newName); @@ -62,7 +62,6 @@ private static ConnectorMaterializedViewDefinition someMaterializedView() Optional.of(Duration.ZERO), Optional.empty(), Optional.of("owner"), - ImmutableList.of(), - ImmutableMap.of()); + ImmutableList.of()); } } diff --git a/core/trino-main/src/test/java/io/trino/type/TestJsonPath2016TypeSerialization.java b/core/trino-main/src/test/java/io/trino/type/TestJsonPath2016TypeSerialization.java index 7e28a1efc9ebf..70cbf37a7d6c1 100644 --- a/core/trino-main/src/test/java/io/trino/type/TestJsonPath2016TypeSerialization.java +++ b/core/trino-main/src/test/java/io/trino/type/TestJsonPath2016TypeSerialization.java @@ -71,7 +71,7 @@ public class TestJsonPath2016TypeSerialization { - private static final Type JSON_PATH_2016 = new JsonPath2016Type(new TypeDeserializer(TESTING_TYPE_MANAGER), new TestingBlockEncodingSerde()); + public static final Type JSON_PATH_2016 = new JsonPath2016Type(new TypeDeserializer(TESTING_TYPE_MANAGER), new TestingBlockEncodingSerde()); private static final RecursiveComparisonConfiguration COMPARISON_CONFIGURATION = RecursiveComparisonConfiguration.builder().withStrictTypeChecking(true).build(); @Test diff --git a/core/trino-main/src/test/java/io/trino/dispatcher/TestDecoratingListeningExecutorService.java b/core/trino-main/src/test/java/io/trino/util/TestDecoratingListeningExecutorService.java similarity index 97% rename from core/trino-main/src/test/java/io/trino/dispatcher/TestDecoratingListeningExecutorService.java rename to core/trino-main/src/test/java/io/trino/util/TestDecoratingListeningExecutorService.java index f942a64f73109..ab45726cb3a0b 100644 --- a/core/trino-main/src/test/java/io/trino/dispatcher/TestDecoratingListeningExecutorService.java +++ b/core/trino-main/src/test/java/io/trino/util/TestDecoratingListeningExecutorService.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.dispatcher; +package io.trino.util; import com.google.common.util.concurrent.ListeningExecutorService; import org.junit.jupiter.api.Test; diff --git a/core/trino-main/src/test/java/io/trino/version/TestEmbedVersion.java b/core/trino-main/src/test/java/io/trino/util/TestEmbedVersion.java similarity index 99% rename from core/trino-main/src/test/java/io/trino/version/TestEmbedVersion.java rename to core/trino-main/src/test/java/io/trino/util/TestEmbedVersion.java index 678ccd9faf6d7..82c8345d46260 100644 --- a/core/trino-main/src/test/java/io/trino/version/TestEmbedVersion.java +++ b/core/trino-main/src/test/java/io/trino/util/TestEmbedVersion.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.version; +package io.trino.util; import org.junit.jupiter.api.Test; diff --git a/core/trino-main/src/test/java/io/trino/util/TestLongLong2LongOpenCustomBigHashMap.java b/core/trino-main/src/test/java/io/trino/util/TestLongLong2LongOpenCustomBigHashMap.java index e51ccfa23b876..b4a1b0164dbae 100644 --- a/core/trino-main/src/test/java/io/trino/util/TestLongLong2LongOpenCustomBigHashMap.java +++ b/core/trino-main/src/test/java/io/trino/util/TestLongLong2LongOpenCustomBigHashMap.java @@ -13,8 +13,7 @@ */ package io.trino.util; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import java.util.Arrays; import java.util.List; @@ -38,14 +37,16 @@ public boolean equals(long a1, long a2, long b1, long b2) } }; - @DataProvider - public static Object[][] nullKeyValues() + @Test + public void testBasicOps() { - return new Object[][] {{0L, 0L}, {1L, 1L}, {-1L, -1L}, {0L, -1L}}; + testBasicOps(0L, 0L); + testBasicOps(1L, 1L); + testBasicOps(-1L, -1L); + testBasicOps(0L, -1L); } - @Test(dataProvider = "nullKeyValues") - public void testBasicOps(long nullKey1, long nullKey2) + private void testBasicOps(long nullKey1, long nullKey2) { int expected = 100_000; LongLong2LongOpenCustomBigHashMap map = new LongLong2LongOpenCustomBigHashMap(expected, DEFAULT_STRATEGY, nullKey1, nullKey2); @@ -101,8 +102,16 @@ public void testBasicOps(long nullKey1, long nullKey2) } } - @Test(dataProvider = "nullKeyValues") - public void testHashCollision(long nullKey1, long nullKey2) + @Test + public void testHashCollision() + { + testHashCollision(0L, 0L); + testHashCollision(1L, 1L); + testHashCollision(-1L, -1L); + testHashCollision(0L, -1L); + } + + private void testHashCollision(long nullKey1, long nullKey2) { LongLong2LongOpenCustomBigHashMap.HashStrategy collisionHashStrategy = new LongLong2LongOpenCustomBigHashMap.HashStrategy() { @@ -168,8 +177,16 @@ public boolean equals(long a1, long a2, long b1, long b2) } } - @Test(dataProvider = "nullKeyValues") - public void testRehash(long nullKey1, long nullKey2) + @Test + public void testRehash() + { + testRehash(0L, 0L); + testRehash(1L, 1L); + testRehash(-1L, -1L); + testRehash(0L, -1L); + } + + private void testRehash(long nullKey1, long nullKey2) { int initialCapacity = 1; LongLong2LongOpenCustomBigHashMap map = new LongLong2LongOpenCustomBigHashMap(initialCapacity, DEFAULT_STRATEGY, nullKey1, nullKey2); diff --git a/core/trino-parser/pom.xml b/core/trino-parser/pom.xml index fa2fe342cbccd..b1b7fed9685d5 100644 --- a/core/trino-parser/pom.xml +++ b/core/trino-parser/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/core/trino-parser/src/main/java/io/trino/sql/jsonpath/PathParser.java b/core/trino-parser/src/main/java/io/trino/sql/jsonpath/PathParser.java index e3be63349577b..b15687e98550e 100644 --- a/core/trino-parser/src/main/java/io/trino/sql/jsonpath/PathParser.java +++ b/core/trino-parser/src/main/java/io/trino/sql/jsonpath/PathParser.java @@ -40,13 +40,13 @@ public final class PathParser { private final BaseErrorListener errorListener; - public PathParser(Location startLocation) + public static PathParser withRelativeErrorLocation(Location startLocation) { requireNonNull(startLocation, "startLocation is null"); int pathStartLine = startLocation.line(); int pathStartColumn = startLocation.column(); - this.errorListener = new BaseErrorListener() + return new PathParser(new BaseErrorListener() { @Override public void syntaxError(Recognizer recognizer, Object offendingSymbol, int line, int charPositionInLine, String message, RecognitionException e) @@ -58,7 +58,26 @@ public void syntaxError(Recognizer recognizer, Object offendingSymbol, int int columnInQuery = line == 1 ? pathStartColumn + 1 + charPositionInLine : charPositionInLine + 1; throw new ParsingException(message, e, lineInQuery, columnInQuery); } - }; + }); + } + + public static PathParser withFixedErrorLocation(Location location) + { + requireNonNull(location, "location is null"); + + return new PathParser(new BaseErrorListener() + { + @Override + public void syntaxError(Recognizer recognizer, Object offendingSymbol, int line, int charPositionInLine, String message, RecognitionException e) + { + throw new ParsingException(message, e, location.line, location.column); + } + }); + } + + private PathParser(BaseErrorListener errorListener) + { + this.errorListener = requireNonNull(errorListener, "errorListener is null"); } public PathNode parseJsonPath(String path) diff --git a/core/trino-parser/src/main/java/io/trino/sql/tree/FunctionCall.java b/core/trino-parser/src/main/java/io/trino/sql/tree/FunctionCall.java index a24d0787a0195..8103fde855d1f 100644 --- a/core/trino-parser/src/main/java/io/trino/sql/tree/FunctionCall.java +++ b/core/trino-parser/src/main/java/io/trino/sql/tree/FunctionCall.java @@ -58,7 +58,7 @@ public FunctionCall( super(location); requireNonNull(name, "name is null"); requireNonNull(window, "window is null"); - window.ifPresent(node -> checkArgument(node instanceof WindowReference || node instanceof WindowSpecification, "unexpected window: " + node.getClass().getSimpleName())); + window.ifPresent(node -> checkArgument(node instanceof WindowReference || node instanceof WindowSpecification, "unexpected window: %s", node.getClass().getSimpleName())); requireNonNull(filter, "filter is null"); requireNonNull(orderBy, "orderBy is null"); requireNonNull(nullTreatment, "nullTreatment is null"); diff --git a/core/trino-parser/src/main/java/io/trino/sql/tree/PlanSiblings.java b/core/trino-parser/src/main/java/io/trino/sql/tree/PlanSiblings.java index 844eb4ebb8dcb..46b7a91de48ed 100644 --- a/core/trino-parser/src/main/java/io/trino/sql/tree/PlanSiblings.java +++ b/core/trino-parser/src/main/java/io/trino/sql/tree/PlanSiblings.java @@ -33,7 +33,7 @@ public PlanSiblings(NodeLocation location, SiblingsPlanType type, List= 2, "sibling plan must contain at least two siblings, actual: " + siblings.size()); + checkArgument(siblings.size() >= 2, "sibling plan must contain at least two siblings, actual: %s", siblings.size()); } public SiblingsPlanType getType() diff --git a/core/trino-parser/src/main/java/io/trino/sql/tree/SkipTo.java b/core/trino-parser/src/main/java/io/trino/sql/tree/SkipTo.java index 4b4e8c2bdf19c..1742e476fc8ed 100644 --- a/core/trino-parser/src/main/java/io/trino/sql/tree/SkipTo.java +++ b/core/trino-parser/src/main/java/io/trino/sql/tree/SkipTo.java @@ -107,8 +107,8 @@ private SkipTo(Optional location, Position position, Optional location, Identifier name, Window super(location); requireNonNull(name, "name is null"); requireNonNull(window, "window is null"); - checkArgument(window instanceof WindowReference || window instanceof WindowSpecification, "unexpected window: " + window.getClass().getSimpleName()); + checkArgument(window instanceof WindowReference || window instanceof WindowSpecification, "unexpected window: %s", window.getClass().getSimpleName()); this.name = name; this.window = window; diff --git a/core/trino-parser/src/test/java/io/trino/sql/jsonpath/TestPathParser.java b/core/trino-parser/src/test/java/io/trino/sql/jsonpath/TestPathParser.java index f294b2299d69c..b645c75c0b3e6 100644 --- a/core/trino-parser/src/test/java/io/trino/sql/jsonpath/TestPathParser.java +++ b/core/trino-parser/src/test/java/io/trino/sql/jsonpath/TestPathParser.java @@ -75,7 +75,7 @@ public class TestPathParser { - private static final PathParser PATH_PARSER = new PathParser(new Location(1, 0)); + private static final PathParser PATH_PARSER = PathParser.withRelativeErrorLocation(new Location(1, 0)); private static final RecursiveComparisonConfiguration COMPARISON_CONFIGURATION = RecursiveComparisonConfiguration.builder().withStrictTypeChecking(true).build(); @Test diff --git a/core/trino-server-main/pom.xml b/core/trino-server-main/pom.xml index 247c271598537..ea33c72a0c0e4 100644 --- a/core/trino-server-main/pom.xml +++ b/core/trino-server-main/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/core/trino-server-main/src/main/java/io/trino/server/TrinoServer.java b/core/trino-server-main/src/main/java/io/trino/server/TrinoServer.java index 8cdd002f28666..b53af87c41ed5 100644 --- a/core/trino-server-main/src/main/java/io/trino/server/TrinoServer.java +++ b/core/trino-server-main/src/main/java/io/trino/server/TrinoServer.java @@ -29,8 +29,8 @@ public static void main(String[] args) String javaVersion = nullToEmpty(StandardSystemProperty.JAVA_VERSION.value()); String majorVersion = javaVersion.split("\\D", 2)[0]; Integer major = Ints.tryParse(majorVersion); - if (major == null || major < 17) { - System.err.println(format("ERROR: Trino requires Java 17+ (found %s)", javaVersion)); + if (major == null || major < 21) { + System.err.println(format("ERROR: Trino requires Java 21+ (found %s)", javaVersion)); System.exit(100); } diff --git a/core/trino-server-rpm/pom.xml b/core/trino-server-rpm/pom.xml index 29c66bd4f0fdd..fde598314adbe 100644 --- a/core/trino-server-rpm/pom.xml +++ b/core/trino-server-rpm/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/core/trino-server-rpm/src/main/rpm/preinstall b/core/trino-server-rpm/src/main/rpm/preinstall index 0bf1e4a8fae4a..dbbf425b2c6cb 100644 --- a/core/trino-server-rpm/src/main/rpm/preinstall +++ b/core/trino-server-rpm/src/main/rpm/preinstall @@ -22,7 +22,7 @@ check_if_correct_java_version() { # candidate for JAVA_HOME). JAVA_VERSION=$(java_version "$1") JAVA_MAJOR=$(echo "$JAVA_VERSION" | cut -d'.' -f1) - if [ "$JAVA_MAJOR" -ge "17" ]; then + if [ "$JAVA_MAJOR" -ge "21" ]; then echo "$1" >/tmp/trino-rpm-install-java-home return 0 else @@ -34,10 +34,6 @@ check_if_correct_java_version() { if ! check_if_correct_java_version "$JAVA_HOME"; then java_found=false for candidate in \ - /usr/lib/jvm/java-17-* \ - /usr/lib/jvm/zulu-17 \ - /usr/lib/jvm/temurin-17 \ - /usr/lib/jvm/temurin-17-* \ /usr/lib/jvm/java-21-* \ /usr/lib/jvm/zulu-21 \ /usr/lib/jvm/temurin-21 \ @@ -61,7 +57,7 @@ if [ "$java_found" = false ]; then +======================================================================+ | Error: Required Java version could not be found | +----------------------------------------------------------------------+ -| JDK 17 was not detected. | +| JDK 21 was not detected. | | Recommended JDK distribution is Eclipse Temurin. | | Installation guide: https://adoptium.net/installation/linux/ | | | diff --git a/core/trino-server-rpm/src/test/java/io/trino/server/rpm/ServerIT.java b/core/trino-server-rpm/src/test/java/io/trino/server/rpm/ServerIT.java index 37b6dbb0b1f24..2b52768005263 100644 --- a/core/trino-server-rpm/src/test/java/io/trino/server/rpm/ServerIT.java +++ b/core/trino-server-rpm/src/test/java/io/trino/server/rpm/ServerIT.java @@ -56,7 +56,6 @@ public ServerIT() @Test public void testInstall() { - testInstall("17"); testInstall("21"); } @@ -107,7 +106,6 @@ private void testInstall(String javaVersion) public void testUninstall() throws Exception { - testUninstall("17"); testUninstall("21"); } diff --git a/core/trino-server/pom.xml b/core/trino-server/pom.xml index b538b06c7047a..c396e07ec6909 100644 --- a/core/trino-server/pom.xml +++ b/core/trino-server/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/core/trino-server/src/main/provisio/trino.xml b/core/trino-server/src/main/provisio/trino.xml index 7cceff9360dda..9041aba020074 100644 --- a/core/trino-server/src/main/provisio/trino.xml +++ b/core/trino-server/src/main/provisio/trino.xml @@ -114,7 +114,7 @@ - + @@ -218,6 +218,12 @@ + + + + + + diff --git a/core/trino-server/src/test/java/io/trino/server/TestDummy.java b/core/trino-server/src/test/java/io/trino/server/TestDummy.java new file mode 100644 index 0000000000000..b560df431cb69 --- /dev/null +++ b/core/trino-server/src/test/java/io/trino/server/TestDummy.java @@ -0,0 +1,22 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.server; + +import org.junit.jupiter.api.Test; + +public class TestDummy +{ + @Test + public void buildRequiresTestToExist() {} +} diff --git a/core/trino-spi/pom.xml b/core/trino-spi/pom.xml index 1f76552d4fc17..6817dfaf7dd98 100644 --- a/core/trino-spi/pom.xml +++ b/core/trino-spi/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -212,6 +212,20 @@ + + java.method.numberOfParametersChanged + method void io.trino.spi.connector.ConnectorMaterializedViewDefinition::<init>(java.lang.String, java.util.Optional<io.trino.spi.connector.CatalogSchemaTableName>, java.util.Optional<java.lang.String>, java.util.Optional<java.lang.String>, java.util.List<io.trino.spi.connector.ConnectorMaterializedViewDefinition.Column>, java.util.Optional<java.time.Duration>, java.util.Optional<java.lang.String>, java.util.Optional<java.lang.String>, java.util.List<io.trino.spi.connector.CatalogSchemaName>, java.util.Map<java.lang.String, java.lang.Object>) + method void io.trino.spi.connector.ConnectorMaterializedViewDefinition::<init>(java.lang.String, java.util.Optional<io.trino.spi.connector.CatalogSchemaTableName>, java.util.Optional<java.lang.String>, java.util.Optional<java.lang.String>, java.util.List<io.trino.spi.connector.ConnectorMaterializedViewDefinition.Column>, java.util.Optional<java.time.Duration>, java.util.Optional<java.lang.String>, java.util.Optional<java.lang.String>, java.util.List<io.trino.spi.connector.CatalogSchemaName>) + + + java.method.removed + method java.util.Map<java.lang.String, java.lang.Object> io.trino.spi.connector.ConnectorMaterializedViewDefinition::getProperties() + + + java.method.numberOfParametersChanged + method void io.trino.spi.connector.ConnectorMetadata::createMaterializedView(io.trino.spi.connector.ConnectorSession, io.trino.spi.connector.SchemaTableName, io.trino.spi.connector.ConnectorMaterializedViewDefinition, boolean, boolean) + method void io.trino.spi.connector.ConnectorMetadata::createMaterializedView(io.trino.spi.connector.ConnectorSession, io.trino.spi.connector.SchemaTableName, io.trino.spi.connector.ConnectorMaterializedViewDefinition, java.util.Map<java.lang.String, java.lang.Object>, boolean, boolean) + diff --git a/core/trino-spi/src/main/java/io/trino/spi/StandardErrorCode.java b/core/trino-spi/src/main/java/io/trino/spi/StandardErrorCode.java index 9500f3e4a66b0..4751394a433dd 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/StandardErrorCode.java +++ b/core/trino-spi/src/main/java/io/trino/spi/StandardErrorCode.java @@ -148,6 +148,9 @@ public enum StandardErrorCode INVALID_CATALOG_PROPERTY(124, USER_ERROR), CATALOG_UNAVAILABLE(125, USER_ERROR), MISSING_RETURN(126, USER_ERROR), + DUPLICATE_COLUMN_OR_PATH_NAME(127, USER_ERROR), + MISSING_PATH_NAME(128, USER_ERROR), + INVALID_PLAN(129, USER_ERROR), GENERIC_INTERNAL_ERROR(65536, INTERNAL_ERROR), TOO_MANY_REQUESTS_FAILED(65537, INTERNAL_ERROR), diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/DictionaryBlock.java b/core/trino-spi/src/main/java/io/trino/spi/block/DictionaryBlock.java index 69c3902fd5e48..132aaca0761b1 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/DictionaryBlock.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/DictionaryBlock.java @@ -420,6 +420,11 @@ public Block getRegion(int positionOffset, int length) public Block copyRegion(int position, int length) { checkValidRegion(positionCount, position, length); + if (length == 0) { + // explicit support for case when length == 0 which might otherwise fail + // on getId(position) if position == positionCount + return dictionary.copyRegion(0, 0); + } // Avoid repeated volatile reads to the uniqueIds field int uniqueIds = this.uniqueIds; if (length <= 1 || (uniqueIds == dictionary.getPositionCount() && isSequentialIds)) { diff --git a/core/trino-spi/src/main/java/io/trino/spi/block/VariableWidthBlockBuilder.java b/core/trino-spi/src/main/java/io/trino/spi/block/VariableWidthBlockBuilder.java index 59aca4f3b550f..58d819995e589 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/block/VariableWidthBlockBuilder.java +++ b/core/trino-spi/src/main/java/io/trino/spi/block/VariableWidthBlockBuilder.java @@ -32,7 +32,7 @@ public class VariableWidthBlockBuilder implements BlockBuilder { private static final int INSTANCE_SIZE = instanceSize(VariableWidthBlockBuilder.class); - private static final Block NULL_VALUE_BLOCK = new VariableWidthBlock(0, 1, EMPTY_SLICE, new int[]{0, 0}, new boolean[]{true}); + private static final Block NULL_VALUE_BLOCK = new VariableWidthBlock(0, 1, EMPTY_SLICE, new int[] {0, 0}, new boolean[] {true}); private static final int SIZE_IN_BYTES_PER_POSITION = Integer.BYTES + Byte.BYTES; private final BlockBuilderStatus blockBuilderStatus; diff --git a/core/trino-spi/src/main/java/io/trino/spi/connector/ConnectorMaterializedViewDefinition.java b/core/trino-spi/src/main/java/io/trino/spi/connector/ConnectorMaterializedViewDefinition.java index 98a429d3dceef..d7ebd0097f493 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/connector/ConnectorMaterializedViewDefinition.java +++ b/core/trino-spi/src/main/java/io/trino/spi/connector/ConnectorMaterializedViewDefinition.java @@ -17,7 +17,6 @@ import java.time.Duration; import java.util.List; -import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.StringJoiner; @@ -37,7 +36,6 @@ public class ConnectorMaterializedViewDefinition private final Optional comment; private final Optional owner; private final List path; - private final Map properties; public ConnectorMaterializedViewDefinition( String originalSql, @@ -48,8 +46,7 @@ public ConnectorMaterializedViewDefinition( Optional gracePeriod, Optional comment, Optional owner, - List path, - Map properties) + List path) { this.originalSql = requireNonNull(originalSql, "originalSql is null"); this.storageTable = requireNonNull(storageTable, "storageTable is null"); @@ -61,7 +58,6 @@ public ConnectorMaterializedViewDefinition( this.comment = requireNonNull(comment, "comment is null"); this.owner = requireNonNull(owner, "owner is null"); this.path = List.copyOf(path); - this.properties = requireNonNull(properties, "properties are null"); if (catalog.isEmpty() && schema.isPresent()) { throw new IllegalArgumentException("catalog must be present if schema is present"); @@ -116,11 +112,6 @@ public List getPath() return path; } - public Map getProperties() - { - return properties; - } - @Override public String toString() { @@ -133,9 +124,8 @@ public String toString() gracePeriod.ifPresent(value -> joiner.add("gracePeriod=" + gracePeriod)); comment.ifPresent(value -> joiner.add("comment=" + value)); joiner.add("owner=" + owner); - joiner.add("properties=" + properties); joiner.add(path.stream().map(CatalogSchemaName::toString).collect(joining(", ", "path=(", ")"))); - return getClass().getSimpleName() + joiner.toString(); + return getClass().getSimpleName() + joiner; } @Override @@ -156,14 +146,13 @@ public boolean equals(Object o) Objects.equals(gracePeriod, that.gracePeriod) && Objects.equals(comment, that.comment) && Objects.equals(owner, that.owner) && - Objects.equals(path, that.path) && - Objects.equals(properties, that.properties); + Objects.equals(path, that.path); } @Override public int hashCode() { - return Objects.hash(originalSql, storageTable, catalog, schema, columns, gracePeriod, comment, owner, path, properties); + return Objects.hash(originalSql, storageTable, catalog, schema, columns, gracePeriod, comment, owner, path); } public static final class Column diff --git a/core/trino-spi/src/main/java/io/trino/spi/connector/ConnectorMetadata.java b/core/trino-spi/src/main/java/io/trino/spi/connector/ConnectorMetadata.java index 3a006f8a0a84e..5f0bdc7602b50 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/connector/ConnectorMetadata.java +++ b/core/trino-spi/src/main/java/io/trino/spi/connector/ConnectorMetadata.java @@ -1661,7 +1661,13 @@ default void validateScan(ConnectorSession session, ConnectorTableHandle handle) * * @throws TrinoException with {@code ALREADY_EXISTS} if the object already exists and {@param ignoreExisting} is not set */ - default void createMaterializedView(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition, boolean replace, boolean ignoreExisting) + default void createMaterializedView( + ConnectorSession session, + SchemaTableName viewName, + ConnectorMaterializedViewDefinition definition, + Map properties, + boolean replace, + boolean ignoreExisting) { throw new TrinoException(NOT_SUPPORTED, "This connector does not support creating materialized views"); } @@ -1709,6 +1715,11 @@ default Optional getMaterializedView(Connec return Optional.empty(); } + default Map getMaterializedViewProperties(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition materializedViewDefinition) + { + throw new TrinoException(NOT_SUPPORTED, "This connector does not support materialized views"); + } + /** * The method is used by the engine to determine if a materialized view is current with respect to the tables it depends on. * diff --git a/core/trino-spi/src/main/java/io/trino/spi/connector/ConnectorPageSink.java b/core/trino-spi/src/main/java/io/trino/spi/connector/ConnectorPageSink.java index b7fc9e2897ad8..952eef2aece46 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/connector/ConnectorPageSink.java +++ b/core/trino-spi/src/main/java/io/trino/spi/connector/ConnectorPageSink.java @@ -61,6 +61,14 @@ default long getValidationCpuNanos() */ CompletableFuture appendPage(Page page); + /** + * Closes the idle partition writers that have not received any data since the last time this + * method is called. This method is called periodically based on some + * data written threshold by the TableWriterOperator. It is needed to avoid high memory + * usage due to stale partitions kept in memory during partitioned writes. + */ + default void closeIdleWriters() {} + /** * Notifies the connector that no more pages will be appended and returns * connector-specific information that will be sent to the coordinator to diff --git a/core/trino-spi/src/main/java/io/trino/spi/type/BooleanType.java b/core/trino-spi/src/main/java/io/trino/spi/type/BooleanType.java index d2195f2c16190..2c8442f26ec1c 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/type/BooleanType.java +++ b/core/trino-spi/src/main/java/io/trino/spi/type/BooleanType.java @@ -64,7 +64,7 @@ public static Block wrapByteArrayAsBooleanBlockWithoutNulls(byte[] booleansAsByt public static Block createBlockForSingleNonNullValue(boolean value) { byte byteValue = value ? (byte) 1 : 0; - return new ByteArrayBlock(1, Optional.empty(), new byte[]{byteValue}); + return new ByteArrayBlock(1, Optional.empty(), new byte[] {byteValue}); } private BooleanType() diff --git a/core/trino-spi/src/test/java/io/trino/spi/block/BenchmarkCopyPositions.java b/core/trino-spi/src/test/java/io/trino/spi/block/BenchmarkCopyPositions.java index c66eade9807be..10166979629ff 100644 --- a/core/trino-spi/src/test/java/io/trino/spi/block/BenchmarkCopyPositions.java +++ b/core/trino-spi/src/test/java/io/trino/spi/block/BenchmarkCopyPositions.java @@ -102,7 +102,7 @@ public void setup() else if (type.equals("ROW(BIGINT)")) { Optional rowIsNull = nullsAllowed ? Optional.of(generateIsNull(POSITIONS)) : Optional.empty(); LongArrayBlock randomLongArrayBlock = new LongArrayBlock(POSITIONS, rowIsNull, new Random(SEED).longs().limit(POSITIONS).toArray()); - block = RowBlock.fromNotNullSuppressedFieldBlocks(POSITIONS, rowIsNull, new Block[]{randomLongArrayBlock}); + block = RowBlock.fromNotNullSuppressedFieldBlocks(POSITIONS, rowIsNull, new Block[] {randomLongArrayBlock}); } } diff --git a/core/trino-spi/src/test/java/io/trino/spi/block/TestColumnarMap.java b/core/trino-spi/src/test/java/io/trino/spi/block/TestColumnarMap.java index 7a625a7f2c316..2b4db2558b214 100644 --- a/core/trino-spi/src/test/java/io/trino/spi/block/TestColumnarMap.java +++ b/core/trino-spi/src/test/java/io/trino/spi/block/TestColumnarMap.java @@ -37,7 +37,7 @@ public class TestColumnarMap { private static final TypeOperators TYPE_OPERATORS = new TypeOperators(); private static final MapType MAP_TYPE = new MapType(VARCHAR, VARCHAR, TYPE_OPERATORS); - private static final int[] MAP_SIZES = new int[]{16, 0, 13, 1, 2, 11, 4, 7}; + private static final int[] MAP_SIZES = new int[] {16, 0, 13, 1, 2, 11, 4, 7}; @Test public void test() diff --git a/core/trino-spi/src/test/java/io/trino/spi/block/TestLazyBlock.java b/core/trino-spi/src/test/java/io/trino/spi/block/TestLazyBlock.java index 30d27f8a34ee9..f0d7293a01ea7 100644 --- a/core/trino-spi/src/test/java/io/trino/spi/block/TestLazyBlock.java +++ b/core/trino-spi/src/test/java/io/trino/spi/block/TestLazyBlock.java @@ -66,7 +66,7 @@ public void testNestedGetLoadedBlock() List actualNotifications = new ArrayList<>(); Block arrayBlock = new IntArrayBlock(2, Optional.empty(), new int[] {0, 1}); LazyBlock lazyArrayBlock = new LazyBlock(2, () -> arrayBlock); - Block rowBlock = RowBlock.fromFieldBlocks(2, new Block[]{lazyArrayBlock}); + Block rowBlock = RowBlock.fromFieldBlocks(2, new Block[] {lazyArrayBlock}); LazyBlock lazyBlock = new LazyBlock(2, () -> rowBlock); LazyBlock.listenForLoads(lazyBlock, actualNotifications::add); diff --git a/docs/.vale/Vocab/Base/accept.txt b/docs/.vale/config/vocabularies/Base/accept.txt similarity index 100% rename from docs/.vale/Vocab/Base/accept.txt rename to docs/.vale/config/vocabularies/Base/accept.txt diff --git a/docs/build b/docs/build index 7cbb83a6980ba..5a72bbdbcf576 100755 --- a/docs/build +++ b/docs/build @@ -8,5 +8,5 @@ test -t 1 && OPTS='-it' || OPTS='' SPHINX_IMAGE=${SPHINX_IMAGE:-ghcr.io/trinodb/build/sphinx:7} -docker run --rm $OPTS -e TRINO_VERSION -u $(id -u):$(id -g) -v "$PWD":/docs $SPHINX_IMAGE \ +docker run --security-opt label:disable --rm $OPTS -e TRINO_VERSION -u $(id -u):$(id -g) -v "$PWD":/docs $SPHINX_IMAGE \ sphinx-build -q -j auto -b html -W -d target/doctrees src/main/sphinx target/html diff --git a/docs/pom.xml b/docs/pom.xml index fb14778bf5b9a..bc732e679b0a8 100644 --- a/docs/pom.xml +++ b/docs/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT trino-docs diff --git a/docs/src/main/sphinx/admin/fault-tolerant-execution.md b/docs/src/main/sphinx/admin/fault-tolerant-execution.md index 864b8e4425450..ab085a86d79b0 100644 --- a/docs/src/main/sphinx/admin/fault-tolerant-execution.md +++ b/docs/src/main/sphinx/admin/fault-tolerant-execution.md @@ -80,6 +80,10 @@ execution on a Trino cluster: - Enable compression of spooling data. Setting to `true` is recommended when using an [exchange manager](fte-exchange-manager). - ``false`` +* - `fault-tolerant-execution.exchange-encryption-enabled` + - Enable encryption of spooling data, see [Encryption](fte-encryption) for details. + Setting this property to false is not recommended if Trino processes sensitive data. + - ``true`` ::: (fte-retry-policy)= @@ -145,6 +149,14 @@ with a `TASK` retry policy for large batch queries, separate from another cluster that handles short queries. ::: +(fte-encryption)= +## Encryption + +Trino encrypts data before spooling it to storage. This prevents access to query +data by anyone besides the Trino cluster that wrote it, including administrators +of the storage system. A new encryption key is randomly generated for every +exchange with every query, and keys are discarded once a query is completed. + ## Advanced configuration You can further configure fault-tolerant execution with the following @@ -449,7 +461,11 @@ the property may be configured for: - AWS S3, GCS * - `exchange.s3.endpoint` - S3 storage endpoint server if using an S3-compatible storage system that - is not AWS. If using AWS S3, this can be ignored. If using GCS, set it + is not AWS. If using AWS S3, this can be ignored unless HTTPS is required + by an AWS bucket policy. If TLS is required, then this property can be + set to an https endpoint such as ``https://s3.us-east-1.amazonaws.com``. + Note that TLS is redundant due to {ref}`automatic encryption `. + If using GCS, set it to `https://storage.googleapis.com`. - - Any S3-compatible storage diff --git a/docs/src/main/sphinx/admin/properties-query-management.md b/docs/src/main/sphinx/admin/properties-query-management.md index 9ca758e13e71b..83612aa93aeae 100644 --- a/docs/src/main/sphinx/admin/properties-query-management.md +++ b/docs/src/main/sphinx/admin/properties-query-management.md @@ -14,7 +14,7 @@ application, such as the CLI, before it abandons and cancels its work. - **Default value:** `phased` - **Session property:** `execution_policy` -Configures the algorithm to organize the processing of all of the +Configures the algorithm to organize the processing of all the stages of a query. You can use the following execution policies: - `phased` schedules stages in a sequence to avoid blockages because of @@ -69,7 +69,7 @@ such as joins, aggregations, partitioned window functions and others. The maximum number of tasks that will take part in writing data during `INSERT`, `CREATE TABLE AS SELECT` and `EXECUTE` queries. -The limit is only applicable when `redistribute-writes` or `scale-writers` is be enabled. +The limit is only applicable when `redistribute-writes` or `scale-writers` is enabled. ## `query.low-memory-killer.policy` @@ -97,11 +97,11 @@ Configures the behavior to handle killing running tasks in the event of low memory availability. Supports the following values: - `none` - Do not kill any tasks in the event of low memory. -- `total-reservation-on-blocked-nodes` - Kill the tasks which are part of the queries - which has task retries enabled and are currently using the most memory specifically +- `total-reservation-on-blocked-nodes` - Kill the tasks that are part of the queries + which have task retries enabled and are currently using the most memory specifically on nodes that are now out of memory. -- `least-waste` - Kill the tasks which are part of the queries - which has task retries enabled and use significant amount of memory on nodes +- `least-waste` - Kill the tasks that are part of the queries + which have task retries enabled and use significant amount of memory on nodes which are now out of memory. This policy avoids killing tasks which are already executing for a long time, so significant amount of work is not wasted. @@ -116,7 +116,7 @@ Only applies for queries with task level retries enabled (`retry-policy=TASK`) The amount of time a query is allowed to recover between running out of memory and being killed, if `query.low-memory-killer.policy` or -`task.low-memory-killer.policy` is set to value differnt than `none`. +`task.low-memory-killer.policy` is set to value different from `none`. ## `query.max-execution-time` @@ -156,7 +156,7 @@ and may not terminate immediately. The maximum allowed time for a query to be processed on the cluster, before it is terminated. The time includes time for analysis and planning, but also -time spend in a queue waiting, so essentially this is the time allowed for a +time spent in a queue waiting, so essentially this is the time allowed for a query to exist since creation. ## `query.max-scan-physical-bytes` @@ -179,7 +179,7 @@ generates more stages than this it will get killed with error `QUERY_HAS_TOO_MANY_STAGES`. :::{warning} -Setting this to a high value can cause queries with large number of +Setting this to a high value can cause queries with a large number of stages to introduce instability in the cluster causing unrelated queries to get killed with `REMOTE_TASK_ERROR` and the message `Max requests queued per destination exceeded for HttpDestination ...` diff --git a/docs/src/main/sphinx/connector.md b/docs/src/main/sphinx/connector.md index d8dd8253d6da1..3c8dc201e7f09 100644 --- a/docs/src/main/sphinx/connector.md +++ b/docs/src/main/sphinx/connector.md @@ -29,6 +29,7 @@ MariaDB Memory MongoDB MySQL +OpenSearch Oracle Phoenix Pinot diff --git a/docs/src/main/sphinx/connector/elasticsearch.md b/docs/src/main/sphinx/connector/elasticsearch.md index 337c638b42efd..2fc3834fd29ed 100644 --- a/docs/src/main/sphinx/connector/elasticsearch.md +++ b/docs/src/main/sphinx/connector/elasticsearch.md @@ -8,7 +8,7 @@ The Elasticsearch Connector allows access to [Elasticsearch](https://www.elastic This document describes how to setup the Elasticsearch Connector to run SQL queries against Elasticsearch. :::{note} -Elasticsearch (6.6.0 or later) or OpenSearch (1.1.0 or later) is required. +Elasticsearch (7.x or 8.x) is required. OpenSearch is supported through the dedicated [OpenSearch](/connector/opensearch) connector. ::: ## Configuration @@ -112,6 +112,9 @@ The allowed configuration values are: * - `elasticsearch.tls.truststore-password` - The key password for the trust store specified by `elasticsearch.tls.truststore-path`. +* - `elasticsearch.tls.verify-hostnames` + - Flag to determine if the hostnames in the certificates must be verified. Defaults + to `true`. ::: (elasticesearch-type-mapping)= diff --git a/docs/src/main/sphinx/connector/hive.md b/docs/src/main/sphinx/connector/hive.md index 9697b8a927919..9982e5d5c3148 100644 --- a/docs/src/main/sphinx/connector/hive.md +++ b/docs/src/main/sphinx/connector/hive.md @@ -644,13 +644,13 @@ type conversions. * - `CHAR` - narrowing conversions for `CHAR` * - `TINYINT` - - `VARCHAR`, `SMALLINT`, `INTEGER`, `BIGINT`, `DOUBLE` + - `VARCHAR`, `SMALLINT`, `INTEGER`, `BIGINT`, `DOUBLE`, `DECIMAL` * - `SMALLINT` - - `VARCHAR`, `INTEGER`, `BIGINT`, `DOUBLE` + - `VARCHAR`, `INTEGER`, `BIGINT`, `DOUBLE`, `DECIMAL` * - `INTEGER` - - `VARCHAR`, `BIGINT`, `DOUBLE` + - `VARCHAR`, `BIGINT`, `DOUBLE`, `DECIMAL` * - `BIGINT` - - `VARCHAR`, `DOUBLE` + - `VARCHAR`, `DOUBLE`, `DECIMAL` * - `REAL` - `DOUBLE`, `DECIMAL` * - `DOUBLE` @@ -658,6 +658,8 @@ type conversions. * - `DECIMAL` - `DOUBLE`, `REAL`, `VARCHAR`, `TINYINT`, `SMALLINT`, `INTEGER`, `BIGINT`, as well as narrowing and widening conversions for `DECIMAL` +* - `DATE` + - `VARCHAR` * - `TIMESTAMP` - `VARCHAR`, `DATE` ::: diff --git a/docs/src/main/sphinx/connector/hudi.md b/docs/src/main/sphinx/connector/hudi.md index 8289e6fb6eb1f..2f8be98a3f3d7 100644 --- a/docs/src/main/sphinx/connector/hudi.md +++ b/docs/src/main/sphinx/connector/hudi.md @@ -82,6 +82,15 @@ Additionally, following configuration properties can be set depending on the use - Maximum number of metastore data objects per transaction in the Hive metastore cache. - `2000` +* - `hudi.query-partition-filter-required` + - Set to `true` to force a query to use a partition column in the filter condition. + The equivalent catalog session property is `query_partition_filter_required`. + Enabling this property causes query failures if the partition column used + in the filter condition doesn't effectively reduce the number of data files read. + Example: Complex filter expressions such as `id = 1 OR part_key = '100'` + or `CAST(part_key AS INTEGER) % 2 = 0` are not recognized as partition filters, + and queries using such expressions fail if the property is set to `true`. + - `false` ::: diff --git a/docs/src/main/sphinx/connector/metastores.md b/docs/src/main/sphinx/connector/metastores.md index 950e2248055cd..2fc7612e3ba48 100644 --- a/docs/src/main/sphinx/connector/metastores.md +++ b/docs/src/main/sphinx/connector/metastores.md @@ -353,8 +353,8 @@ iceberg.catalog.type=rest iceberg.rest-catalog.uri=http://iceberg-with-rest:8181 ``` -The REST catalog does not support {doc}`views` or -{doc}`materialized views`. +The REST catalog does not support [view management](sql-view-management) or +[materialized view management](sql-materialized-view-management). (iceberg-jdbc-catalog)= @@ -390,8 +390,8 @@ iceberg.jdbc-catalog.connection-password=test iceberg.jdbc-catalog.default-warehouse-dir=s3://bucket ``` -The JDBC catalog does not support {doc}`views` or -{doc}`materialized views`. +The JDBC catalog does not support [view management](sql-view-management) or +[materialized view management](sql-materialized-view-management). (iceberg-nessie-catalog)= @@ -439,6 +439,9 @@ iceberg.nessie-catalog.uri=https://localhost:19120/api/v1 iceberg.nessie-catalog.default-warehouse-dir=/tmp ``` +The Nessie catalog does not support [view management](sql-view-management) or +[materialized view management](sql-materialized-view-management). + (partition-projection)= ## Access tables with Athena partition projection metadata diff --git a/docs/src/main/sphinx/connector/mongodb.md b/docs/src/main/sphinx/connector/mongodb.md index ff66ceaef6780..5edcf63b64512 100644 --- a/docs/src/main/sphinx/connector/mongodb.md +++ b/docs/src/main/sphinx/connector/mongodb.md @@ -59,6 +59,7 @@ The following configuration properties are available: | `mongodb.write-concern` | The write concern | | `mongodb.required-replica-set` | The required replica set name | | `mongodb.cursor-batch-size` | The number of elements to return in a batch | +| `mongodb.allow-local-scheduling` | Assign MongoDB splits to a specific worker | ### `mongodb.connection-url` @@ -203,6 +204,15 @@ Do not use a batch size of `1`. This property is optional; the default is `0`. +### `mongodb.allow-local-scheduling` + +Set the value of this property to `true` if Trino and MongoDB share the same +cluster, and specific MongoDB splits should be processed on the same worker and +MongoDB node. Note that a shared deployment is not recommended, and enabling +this property can lead to resource contention. + +This property is optional, and defaults to false. + (table-definition-label)= ## Table definition diff --git a/docs/src/main/sphinx/connector/object-storage-file-formats.md b/docs/src/main/sphinx/connector/object-storage-file-formats.md index 45e9f59c34e52..b47be16835fc3 100644 --- a/docs/src/main/sphinx/connector/object-storage-file-formats.md +++ b/docs/src/main/sphinx/connector/object-storage-file-formats.md @@ -91,6 +91,11 @@ with Parquet files performed by supported object storage connectors: catalog session property is `parquet_use_column_index`. Only supported by the Delta Lake and Hive connectors. - `true` +* - `parquet.ignore-statistics` + - Ignore statistics from Parquet to allow querying files with corrupted or + incorrect statistics. The equivalent catalog session property is + `parquet_ignore_statistics`. + - `false` * - `parquet.max-read-block-row-count` - Sets the maximum number of rows read in a batch. The equivalent catalog session property is named `parquet_max_read_block_row_count` and supported diff --git a/docs/src/main/sphinx/connector/opensearch.md b/docs/src/main/sphinx/connector/opensearch.md new file mode 100644 index 0000000000000..d50a20aad45af --- /dev/null +++ b/docs/src/main/sphinx/connector/opensearch.md @@ -0,0 +1,496 @@ +# OpenSearch connector + +```{raw} html + +``` + +The OpenSearch connector allows access to [OpenSearch](https://opensearch.org/) +data from Trino. This document describes how to configure a catalog with the +OpenSearch connector to run SQL queries against OpenSearch. + +## Requirements + +- OpenSearch 1.1.0 or higher. +- Network access from the Trino coordinator and workers to the OpenSearch nodes. + +## Configuration + +To configure the OpenSearch connector, create a catalog properties file +`etc/catalog/example.properties` with the following content, replacing the +properties as appropriate for your setup: + +```text +connector.name=opensearch +opensearch.host=search.example.com +opensearch.port=9200 +opensearch.default-schema-name=default +``` + +The following table details all general configuration properties: + +:::{list-table} OpenSearch configuration properties +:widths: 35, 55, 10 +:header-rows: 1 + +* - Property name + - Description + - Default +* - `opensearch.host` + - The comma-separated list of host names of the OpenSearch cluster. This + property is required. + - +* - `opensearch.port` + - Port to use to connect to OpenSearch. + - `9200` +* - `opensearch.default-schema-name` + - The schema that contains all tables defined without a qualifying schema + name. + - `default` +* - `opensearch.scroll-size` + - Sets the maximum number of hits that can be returned with each [OpenSearch + scroll request](https://opensearch.org/docs/latest/api-reference/scroll/). + - `1000` +* - `opensearch.scroll-timeout` + - [Duration](prop-type-duration) for OpenSearch to keep the search context + alive for scroll requests. + - `1m` +* - `opensearch.request-timeout` + - Timeout [duration](prop-type-duration) for all OpenSearch requests. + - `10s` +* - `opensearch.connect-timeout` + - Timeout [duration](prop-type-duration) for all OpenSearch connection + attempts. + - `1s` +* - `opensearch.backoff-init-delay` + - The minimum [duration](prop-type-duration) between backpressure retry + attempts for a single request to OpenSearch. Setting it too low can + overwhelm an already struggling cluster. + - `500ms` +* - `opensearch.backoff-max-delay` + - The maximum [duration](prop-type-duration) between backpressure retry + attempts for a single request. + - `20s` +* - `opensearch.max-retry-time` + - The maximum [duration](prop-type-duration) across all retry attempts for a + single request. + - `20s` +* - `opensearch.node-refresh-interval` + - [Duration](prop-type-duration) between requests to refresh the list of + available OpenSearch nodes. + - `1m` +* - `opensearch.ignore-publish-address` + - Disable using the address published by the OpenSearch API to connect for + queries. Defaults to `false`. Some deployments map OpenSearch ports to a + random public port and enabling this property can help in these cases. + - +::: + +### Authentication + +The connection to OpenSearch can use AWS or password authentication. + +To enable AWS authentication and authorization using IAM policies, the +`opensearch.security` option must be set to `AWS`. Additionally, the +following options must be configured: + +:::{list-table} +:widths: 40, 60 +:header-rows: 1 + +* - Property name + - Description +* - `opensearch.aws.region` + - AWS region of the OpenSearch endpoint. This option is required. +* - `opensearch.aws.access-key` + - AWS access key to use to connect to the OpenSearch domain. If not set, the + default AWS credentials provider chain is used. +* - `opensearch.aws.secret-key` + - AWS secret key to use to connect to the OpenSearch domain. If not set, the + default AWS credentials provider chain is used. +* - `opensearch.aws.iam-role` + - Optional ARN of an IAM role to assume to connect to OpenSearch. Note that + the configured IAM user must be able to assume this role. +* - `opensearch.aws.external-id` + - Optional external ID to pass while assuming an AWS IAM role. +::: + +To enable password authentication, the `opensearch.security` option must be set +to `PASSWORD`. Additionally the following options must be configured: + +:::{list-table} +:widths: 45, 55 +:header-rows: 1 + +* - Property name + - Description +* - `opensearch.auth.user` + - User name to use to connect to OpenSearch. +* - `opensearch.auth.password` + - Password to use to connect to OpenSearch. +::: + +### Connection security with TLS + +The connector provides additional security options to connect to OpenSearch +clusters with TLS enabled. + +If your cluster uses globally-trusted certificates, you only need to +enable TLS. If you require custom configuration for certificates, the connector +supports key stores and trust stores in PEM or Java Key Store (JKS) format. + +The available configuration values are listed in the following table: + +:::{list-table} TLS configuration properties +:widths: 40, 60 +:header-rows: 1 + +* - Property name + - Description +* - `opensearch.tls.enabled` + - Enable TLS security. Defaults to `false`. +* - `opensearch.tls.keystore-path` + - The path to the [PEM](/security/inspect-pem) or [JKS](/security/inspect-jks) + key store. +* - `opensearch.tls.truststore-path` + - The path to [PEM](/security/inspect-pem) or [JKS](/security/inspect-jks) + trust store. +* - `opensearch.tls.keystore-password` + - The password for the key store specified by + `opensearch.tls.keystore-path`. +* - `opensearch.tls.truststore-password` + - The password for the trust store specified by + `opensearch.tls.truststore-path`. +* - `opensearch.tls.verify-hostnames` + - Flag to determine if the hostnames in the certificates must be verified. + Defaults to `true`. +::: + +(opensearch-type-mapping)= +## Type mapping + +Because Trino and OpenSearch each support types that the other does not, the +connector [maps some types](type-mapping-overview) when reading data. + +### OpenSearch type to Trino type mapping + +The connector maps OpenSearch types to the corresponding Trino types +according to the following table: + +:::{list-table} OpenSearch type to Trino type mapping +:widths: 30, 30, 50 +:header-rows: 1 + +* - OpenSearch type + - Trino type + - Notes +* - `BOOLEAN` + - `BOOLEAN` + - +* - `DOUBLE` + - `DOUBLE` + - +* - `FLOAT` + - `REAL` + - +* - `BYTE` + - `TINYINT` + - +* - `SHORT` + - `SMALLINT` + - +* - `INTEGER` + - `INTEGER` + - +* - `LONG` + - `BIGINT` + - +* - `KEYWORD` + - `VARCHAR` + - +* - `TEXT` + - `VARCHAR` + - +* - `DATE` + - `TIMESTAMP` + - For more information, see [](opensearch-date-types). +* - `IPADDRESS` + - `IP` + - +::: + +No other types are supported. + +(opensearch-array-types)= +### Array types + +Fields in OpenSearch can contain [zero or more +values](https://opensearch.org/docs/latest/field-types/supported-field-types/date/#custom-formats), +but there is no dedicated array type. To indicate a field contains an array, it +can be annotated in a Trino-specific structure in the +[\_meta](https://opensearch.org/docs/latest/field-types/index/#get-a-mapping) +section of the index mapping in OpenSearch. + +For example, you can have an OpenSearch index that contains documents with the +following structure: + +```json +{ + "array_string_field": ["trino","the","lean","machine-ohs"], + "long_field": 314159265359, + "id_field": "564e6982-88ee-4498-aa98-df9e3f6b6109", + "timestamp_field": "1987-09-17T06:22:48.000Z", + "object_field": { + "array_int_field": [86,75,309], + "int_field": 2 + } +} +``` + +The array fields of this structure can be defined by using the following command +to add the field property definition to the `_meta.trino` property of the target +index mapping with OpenSearch available at `search.example.com:9200`: + +```shell +curl --request PUT \ + --url search.example.com:9200/doc/_mapping \ + --header 'content-type: application/json' \ + --data ' +{ + "_meta": { + "trino":{ + "array_string_field":{ + "isArray":true + }, + "object_field":{ + "array_int_field":{ + "isArray":true + } + }, + } + } +}' +``` + +:::{note} +It is not allowed to use `asRawJson` and `isArray` flags simultaneously for the same column. +::: + +(opensearch-date-types)= +### Date types + +The OpenSearch connector supports only the default `date` type. All other +OpenSearch [date] formats including [built-in date formats] and [custom date +formats] are not supported. Dates with the [format] property are ignored. + +### Raw JSON transform + +Documents in OpenSearch can include more complex structures that are not +represented in the mapping. For example, a single `keyword` field can have +widely different content including a single `keyword` value, an array, or a +multidimensional `keyword` array with any level of nesting + +The following command configures `array_string_field` mapping with OpenSearch +available at `search.example.com:9200`: + +```shell +curl --request PUT \ + --url search.example.com:9200/doc/_mapping \ + --header 'content-type: application/json' \ + --data ' +{ + "properties": { + "array_string_field":{ + "type": "keyword" + } + } +}' +``` + +All the following documents are legal for OpenSearch with `array_string_field` +mapping: + +```json +[ + { + "array_string_field": "trino" + }, + { + "array_string_field": ["trino","is","the","best"] + }, + { + "array_string_field": ["trino",["is","the","best"]] + }, + { + "array_string_field": ["trino",["is",["the","best"]]] + } +] +``` + +See the [OpenSearch array +documentation](https://opensearch.org/docs/latest/field-types/supported-field-types/index/#arrays) +for more details. + +Further, OpenSearch supports types, such as [k-NN +vector](https://opensearch.org/docs/latest/field-types/supported-field-types/knn-vector/), +that are not supported in Trino. These and other types can cause parsing +exceptions for users that use of these types in OpenSearch. To manage all of +these scenarios, you can transform fields to raw JSON by annotating it in a +Trino-specific structure in the +[\_meta](https://opensearch.org/docs/latest/field-types/index/) section of the +OpenSearch index mapping. This indicates to Trino that the field, and all nested +fields beneath, must be cast to a `VARCHAR` field that contains the raw JSON +content. These fields can be defined by using the following command to add the +field property definition to the `_meta.trino` property of the target index +mapping. + +```shell +curl --request PUT \ + --url search.example.com:9200/doc/_mapping \ + --header 'content-type: application/json' \ + --data ' +{ + "_meta": { + "trino":{ + "array_string_field":{ + "asRawJson":true + } + } + } +}' +``` + +The preceding configuration causes Trino to return the `array_string_field` +field as a `VARCHAR` containing raw JSON. You can parse these fields with the +[built-in JSON functions](/functions/json). + +:::{note} +It is not allowed to use `asRawJson` and `isArray` flags simultaneously for the same column. +::: + +## Special columns + +The following hidden columns are available: + +:::{list-table} +:widths: 25, 75 +:header-rows: 1 + +* - Column + - Description +* - `_id` + - The OpenSearch document ID. +* - `_score` + - The document score returned by the OpenSearch query. +* - `_source` + - The source of the original document. +::: + +(opensearch-sql-support)= +## SQL support + +The connector provides [globally available](sql-globally-available) and +[read operation](sql-read-operations) statements to access data and +metadata in the OpenSearch catalog. + +## Table functions + +The connector provides specific [table functions](/functions/table) to +access OpenSearch. + +(opensearch-raw-query-function)= +### `raw_query(varchar) -> table` + +The `raw_query` function allows you to query the underlying database directly +using the [OpenSearch Query +DSL](https://opensearch.org/docs/latest/query-dsl/index/) syntax. The full DSL +query is pushed down and processed in OpenSearch. This can be useful for +accessing native features which are not available in Trino, or for improving +query performance in situations where running a query natively may be faster. + +```{include} query-passthrough-warning.fragment +``` + +The `raw_query` function requires three parameters: + +- `schema`: The schema in the catalog that the query is to be executed on. +- `index`: The index in OpenSearch to search. +- `query`: The query to execute, written in [OpenSearch Query DSL](https://opensearch.org/docs/latest/query-dsl). + +Once executed, the query returns a single row containing the resulting JSON +payload returned by OpenSearch. + +For example, query the `example` catalog and use the `raw_query` table function +to search for documents in the `orders` index where the country name is +`ALGERIA` as defined as a JSON-formatted query matcher and passed to the +`raw_query` table function in the `query` parameter: + +```sql +SELECT + * +FROM + TABLE( + example.system.raw_query( + schema => 'sales', + index => 'orders', + query => '{ + "query": { + "match": { + "name": "ALGERIA" + } + } + }' + ) + ); +``` + +```{include} query-table-function-ordering.fragment +``` + +## Performance + +The connector includes a number of performance improvements, detailed in the +following sections. + +### Parallel data access + +The connector requests data from multiple nodes of the OpenSearch cluster for +query processing in parallel. + +### Predicate push down + +The connector supports [predicate push down](predicate-pushdown) for the +following data types: + +:::{list-table} +:widths: 50, 50 +:header-rows: 1 + +* - OpenSearch + - Trino +* - `boolean` + - `BOOLEAN` +* - `double` + - `DOUBLE` +* - `float` + - `REAL` +* - `byte` + - `TINYINT` +* - `short` + - `SMALLINT` +* - `integer` + - `INTEGER` +* - `long` + - `BIGINT` +* - `keyword` + - `VARCHAR` +* - `date` + - `TIMESTAMP` +::: + +No other data types are supported for predicate push down. + +[built-in date formats]: https://opensearch.org/docs/latest/field-types/supported-field-types/date/#custom-formats +[custom date formats]: https://opensearch.org/docs/latest/field-types/supported-field-types/date/#custom-formats +[date]: https://opensearch.org/docs/latest/field-types/supported-field-types/date/ +[format]: https://opensearch.org/docs/latest/query-dsl/term/range/#format +[full text query]: https://opensearch.org/docs/latest/query-dsl/full-text/query-string/ diff --git a/docs/src/main/sphinx/connector/oracle.md b/docs/src/main/sphinx/connector/oracle.md index ccbfb9a483741..b823e642dfef6 100644 --- a/docs/src/main/sphinx/connector/oracle.md +++ b/docs/src/main/sphinx/connector/oracle.md @@ -18,7 +18,7 @@ like Oracle and Hive, or different Oracle database instances. To connect to Oracle, you need: -- Oracle 12 or higher. +- Oracle 19 or higher. - Network access from the Trino coordinator and workers to Oracle. Port 1521 is the default port. @@ -43,7 +43,7 @@ to the JDBC driver. The Oracle connector uses the Oracle JDBC Thin driver, and the syntax of the URL may be different depending on your Oracle configuration. For example, the connection URL is different if you are connecting to an Oracle SID or an Oracle service name. See the [Oracle -Database JDBC driver documentation](https://docs.oracle.com/en/database/oracle/oracle-database/21/jjdbc/data-sources-and-URLs.html#GUID-088B1600-C6C2-4F19-A020-2DAF8FE1F1C3) +Database JDBC driver documentation](https://docs.oracle.com/en/database/oracle/oracle-database/19/jjdbc/data-sources-and-URLs.html) for more information. The `connection-user` and `connection-password` are typically required and @@ -54,7 +54,7 @@ properties files. :::{note} Oracle does not expose metadata comment via `REMARKS` column by default in JDBC driver. You can enable it using `oracle.remarks-reporting.enabled` -config option. See [Additional Oracle Performance Extensions](https://docs.oracle.com/en/database/oracle/oracle-database/19/jjdbc/performance-extensions.html#GUID-96A38C6D-A288-4E0B-9F03-E711C146632B) +config option. See [Additional Oracle Performance Extensions](https://docs.oracle.com/en/database/oracle/oracle-database/19/jjdbc/performance-extensions.html) for more details. ::: diff --git a/docs/src/main/sphinx/connector/snowflake.md b/docs/src/main/sphinx/connector/snowflake.md index 579764534e230..1ca16df602f2b 100644 --- a/docs/src/main/sphinx/connector/snowflake.md +++ b/docs/src/main/sphinx/connector/snowflake.md @@ -71,8 +71,6 @@ Trino supports the following Snowflake data types: | `date` | `date` | | `time` | `time` | | `timestampntz` | `timestamp` | -| `timestamptz` | `timestampTZ` | -| `timestampltz` | `timestampTZ` | Complete list of [Snowflake data types](https://docs.snowflake.com/en/sql-reference/intro-summary-data-types.html). diff --git a/docs/src/main/sphinx/functions/aggregate.md b/docs/src/main/sphinx/functions/aggregate.md index fc59ce710c20c..109a25a1a5770 100644 --- a/docs/src/main/sphinx/functions/aggregate.md +++ b/docs/src/main/sphinx/functions/aggregate.md @@ -180,7 +180,7 @@ Synopsis: ``` LISTAGG( expression [, separator] [ON OVERFLOW overflow_behaviour]) - WITHIN GROUP (ORDER BY sort_item, [...]) + WITHIN GROUP (ORDER BY sort_item, [...]) [FILTER (WHERE condition)] ``` If `separator` is not specified, the empty string will be used as `separator`. @@ -213,7 +213,7 @@ of omitted non-null values in case that the length of the output of the function exceeds `1048576` bytes: ``` -SELECT LISTAGG(value, ',' ON OVERFLOW TRUNCATE '.....' WITH COUNT) WITHIN GROUP (ORDER BY value) +SELECT listagg(value, ',' ON OVERFLOW TRUNCATE '.....' WITH COUNT) WITHIN GROUP (ORDER BY value) FROM (VALUES 'a', 'b', 'c') t(value); ``` @@ -222,7 +222,7 @@ If not specified, the truncation filler string is by default `'...'`. This aggregation function can be also used in a scenario involving grouping: ``` -SELECT id, LISTAGG(value, ',') WITHIN GROUP (ORDER BY o) csv_value +SELECT id, listagg(value, ',') WITHIN GROUP (ORDER BY o) csv_value FROM (VALUES (100, 1, 'a'), (200, 3, 'c'), @@ -241,7 +241,39 @@ results in: 200 | b,c ``` -The current implementation of `LISTAGG` function does not support window frames. +This aggregation function supports +[filtering during aggregation](aggregate-function-filtering-during-aggregation) +for scenarios where the aggregation for the data not matching the filter +condition still needs to show up in the output: + +``` +SELECT + country, + listagg(city, ',') + WITHIN GROUP (ORDER BY population DESC) + FILTER (WHERE population >= 10_000_000) megacities +FROM (VALUES + ('India', 'Bangalore', 13_700_000), + ('India', 'Chennai', 12_200_000), + ('India', 'Ranchi', 1_547_000), + ('Austria', 'Vienna', 1_897_000), + ('Poland', 'Warsaw', 1_765_000) +) t(country, city, population) +GROUP BY country +ORDER BY country; +``` + +results in: + +```text + country | megacities +---------+------------------- + Austria | NULL + India | Bangalore,Chennai + Poland | NULL +``` + +The current implementation of `listagg` function does not support window frames. ::: :::{function} max(x) -> [same as input] diff --git a/docs/src/main/sphinx/functions/conditional.md b/docs/src/main/sphinx/functions/conditional.md index 3e07ed29347c3..4ed0716aa6645 100644 --- a/docs/src/main/sphinx/functions/conditional.md +++ b/docs/src/main/sphinx/functions/conditional.md @@ -1,7 +1,6 @@ # Conditional expressions (case-expression)= - ## CASE The standard SQL `CASE` expression has two forms. @@ -52,8 +51,11 @@ SELECT a, b, END ``` -(if-function)= +SQL routines can use [`CASE` statements](/routines/case) that use a slightly +different syntax from the CASE expressions. Specifically note the requirements +for terminating each clause with a semicolon `;` and the usage of `END CASE`. +(if-expression)= ## IF The `IF` expression has two forms, one supplying only a @@ -93,6 +95,10 @@ SELECT FROM tpch.sf1.orders; ``` +SQL routines can use [`IF` statements](/routines/if) that use a slightly +different syntax from `IF` expressions. Specifically note the requirement +for terminating each clause with a semicolon `;` and the usage of `END IF`. + (coalesce-function)= ## COALESCE diff --git a/docs/src/main/sphinx/functions/conversion.md b/docs/src/main/sphinx/functions/conversion.md index 7de546ab7ed8b..8fb09e16c005b 100644 --- a/docs/src/main/sphinx/functions/conversion.md +++ b/docs/src/main/sphinx/functions/conversion.md @@ -22,7 +22,7 @@ Like {func}`cast`, but returns null if the cast fails. ## Formatting :::{function} format(format, args...) -> varchar -Returns a formatted string using the specified [format string](https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/util/Formatter.html#syntax) +Returns a formatted string using the specified [format string](https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/util/Formatter.html#syntax) and arguments: ``` diff --git a/docs/src/main/sphinx/functions/geospatial.md b/docs/src/main/sphinx/functions/geospatial.md index 448bc8f2e2027..19473464ada1e 100644 --- a/docs/src/main/sphinx/functions/geospatial.md +++ b/docs/src/main/sphinx/functions/geospatial.md @@ -174,7 +174,8 @@ Returns the closure of the combinatorial boundary of this geometry. :::{function} ST_Buffer(Geometry, distance) -> Geometry Returns the geometry that represents all points whose distance from the specified geometry -is less than or equal to the specified distance. +is less than or equal to the specified distance. If the points of the geometry are extremely +close together (``delta < 1e-8``), this might return an empty geometry. ::: :::{function} ST_Difference(Geometry, Geometry) -> Geometry diff --git a/docs/src/main/sphinx/functions/list-by-topic.md b/docs/src/main/sphinx/functions/list-by-topic.md index d55aafbcba6bc..20e4b1d15e491 100644 --- a/docs/src/main/sphinx/functions/list-by-topic.md +++ b/docs/src/main/sphinx/functions/list-by-topic.md @@ -158,10 +158,11 @@ For more details, see {doc}`comparison` For more details, see {doc}`conditional` -- {ref}`coalesce ` -- {ref}`if ` -- {ref}`nullif ` -- {ref}`try ` +- [case](case-expression) +- [coalesce](coalesce-function) +- [if](if-expression) +- [nullif](nullif-function) +- [try](try-function) ## Conversion diff --git a/docs/src/main/sphinx/functions/list.md b/docs/src/main/sphinx/functions/list.md index e69699448ed77..823c252821fdd 100644 --- a/docs/src/main/sphinx/functions/list.md +++ b/docs/src/main/sphinx/functions/list.md @@ -200,7 +200,7 @@ ## I -- [if](if-function) +- [if](if-expression) - {func}`index` - {func}`infinity` - {func}`intersection_cardinality` diff --git a/docs/src/main/sphinx/functions/regexp.md b/docs/src/main/sphinx/functions/regexp.md index cbc853e0ecc7c..e78939d96eb43 100644 --- a/docs/src/main/sphinx/functions/regexp.md +++ b/docs/src/main/sphinx/functions/regexp.md @@ -184,6 +184,6 @@ SELECT regexp_split('1a 2b 14m', '\s*[a-z]+\s*'); -- [1, 2, 14, ] ``` ::: -[capturing group number]: https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/util/regex/Pattern.html#gnumber -[capturing groups]: https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/util/regex/Pattern.html#cg -[java pattern]: https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/util/regex/Pattern.html +[capturing group number]: https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/util/regex/Pattern.html#gnumber +[capturing groups]: https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/util/regex/Pattern.html#cg +[java pattern]: https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/util/regex/Pattern.html diff --git a/docs/src/main/sphinx/installation/deployment.md b/docs/src/main/sphinx/installation/deployment.md index e7f723c5ede7f..b3139babc7d5d 100644 --- a/docs/src/main/sphinx/installation/deployment.md +++ b/docs/src/main/sphinx/installation/deployment.md @@ -35,19 +35,15 @@ ### Java runtime environment -Trino requires a 64-bit version of Java 17, with a minimum required version of 17.0.3. -Earlier major versions such as Java 8 or Java 11 do not work. -Newer major versions such as Java 18 or 19, are not supported -- they may work, but are not tested. +Trino requires a 64-bit version of Java 21, with a minimum required version of 21.0.1. +Earlier major versions such as Java 8, Java 11 or Java 17 do not work. +Newer major versions such as Java 22 are not supported -- they may work, but are not tested. We recommend using the Eclipse Temurin OpenJDK distribution from [Adoptium](https://adoptium.net/) as the JDK for Trino, as Trino is tested against that distribution. Eclipse Temurin is also the JDK used by the [Trino Docker image](https://hub.docker.com/r/trinodb/trino). -If you are using Java 17 or 18, the JVM must be configured to use UTF-8 as the default charset by -adding `-Dfile.encoding=UTF-8` to `etc/jvm.config`. Starting with Java 19, the Java default -charset is UTF-8, so this configuration is not needed. - (requirements-python)= ### Python diff --git a/docs/src/main/sphinx/language/types.md b/docs/src/main/sphinx/language/types.md index e4acfe02a0ce3..08ccd5583b150 100644 --- a/docs/src/main/sphinx/language/types.md +++ b/docs/src/main/sphinx/language/types.md @@ -154,13 +154,20 @@ before any Unicode character usage with 4 digits. In the examples above with 6 digits require usage of the plus symbol before the code. For example, you need to use `\+01F600` for a grinning face emoji. +Single quotes in string literals can be escaped by using another single quote: +`'I am big, it''s the pictures that got small!'` + ### `CHAR` Fixed length character data. A `CHAR` type without length specified has a default length of 1. A `CHAR(x)` value always has `x` characters. For example, casting `dog` to `CHAR(7)` adds 4 implicit trailing spaces. Leading and trailing spaces are included in comparisons of `CHAR` values. As a result, two character values with different lengths (`CHAR(x)` and -`CHAR(y)` where `x != y`) will never be equal. +`CHAR(y)` where `x != y`) will never be equal. As with `VARCHAR`, a single quote in a `CHAR` +literal can be escaped with another single quote: +```sql +SELECT CHAR 'All right, Mr. DeMille, I''m ready for my close-up.' +``` Example type definitions: `char`, `char(20)` diff --git a/docs/src/main/sphinx/release.md b/docs/src/main/sphinx/release.md index 056cfe9d7f9b7..1457ac89a4a3c 100644 --- a/docs/src/main/sphinx/release.md +++ b/docs/src/main/sphinx/release.md @@ -1,12 +1,12 @@ # Release notes (releases-2023)= - ## 2023 ```{toctree} :maxdepth: 1 +release/release-435 release/release-434 release/release-433 release/release-432 @@ -39,7 +39,6 @@ release/release-406 ``` (releases-2022)= - ## 2022 ```{toctree} @@ -86,7 +85,6 @@ release/release-368 ``` (releases-2021)= - ## 2021 ```{toctree} @@ -112,7 +110,6 @@ release/release-351 ``` (releases-2020)= - ## 2020 ```{toctree} @@ -144,7 +141,6 @@ release/release-328 ``` (releases-2019)= - ## 2019 ```{toctree} diff --git a/docs/src/main/sphinx/release/release-434.md b/docs/src/main/sphinx/release/release-434.md index 2c62dcd348ee6..c7c3c4d2488d6 100644 --- a/docs/src/main/sphinx/release/release-434.md +++ b/docs/src/main/sphinx/release/release-434.md @@ -67,7 +67,7 @@ * Add support for separate metadata caching configuration for schemas, tables, and metadata. ({issue}`19859`) -## MySQl connector +## MySQL connector * Add support for separate metadata caching configuration for schemas, tables, and metadata. ({issue}`19859`) diff --git a/docs/src/main/sphinx/release/release-435.md b/docs/src/main/sphinx/release/release-435.md new file mode 100644 index 0000000000000..ccb6aef6d5e89 --- /dev/null +++ b/docs/src/main/sphinx/release/release-435.md @@ -0,0 +1,82 @@ +# Release 435 (13 Dec 2023) + +## General + +* Add support for the `json_table` table function. ({issue}`18017`) +* Reduce coordinator memory usage. ({issue}`20018`, {issue}`20022`) +* Increase reliability and memory consumption of inserts. ({issue}`20040`) +* Fix incorrect results for `LIKE` with some strings containing repeated + substrings. ({issue}`20089`) +* Fix coordinator memory leak. ({issue}`20023`) +* Fix possible query failure for `MERGE` queries when `retry-policy` set to + `TASK` and `query.determine-partition-count-for-write-enabled` set to `true`. + ({issue}`19979`) +* Prevent hanging query processing with `retry.policy` set to `TASK` when a + worker node died. ({issue}`18603 `) +* Fix query failure when reading array columns. ({issue}`20065`) + +## Delta Lake connector + +* {{breaking}} Remove support for registering external tables with + `CREATE TABLE` and the `location` table property. Use the + `register_table` procedure as replacement. The property + `delta.legacy-create-table-with-existing-location.enabled` is + also removed. ({issue}`17016`) +* Improve query planning performance on Delta Lake tables. ({issue}`19795`) +* Ensure AWS access keys are used for connections to the AWS Security Token + Service. ({issue}`19982`) +* Reduce memory usage for inserts into partitioned tables. ({issue}`19649`) +* Improve reliability when reading from GCS. ({issue}`20003`) +* Fix failure when reading ORC data. ({issue}`19935`) + +## Elasticsearch connector + +* Ensure certificate validation is skipped when + `elasticsearch.tls.verify-hostnames` is `false`. ({issue}`20076`) + +## Hive connector + +* Add support for columns that changed from integer types to `decimal` type. ({issue}`19931`) +* Add support for columns that changed from `date` to `varchar` type. ({issue}`19500`) +* Rename `presto_version` table property to `trino_version`. ({issue}`19967`) +* Rename `presto_query_id` table property to `trino_query_id`. ({issue}`19967`) +* Ensure AWS access keys are used for connections to the AWS Security Token + Service. ({issue}`19982`) +* Improve query planning time on Hive tables without statistics. ({issue}`20034`) +* Reduce memory usage for inserts into partitioned tables. ({issue}`19649`) +* Improve reliability when reading from GCS. ({issue}`20003`) +* Fix failure when reading ORC data. ({issue}`19935`) + +## Hudi connector + +* Ensure AWS access keys are used for connections to the AWS Security Token + Service. ({issue}`19982`) +* Improve reliability when reading from GCS. ({issue}`20003`) +* Fix failure when reading ORC data. ({issue}`19935`) + +## Iceberg connector + +* Fix incorrect removal of statistics files when executing + `remove_orphan_files`. ({issue}`19965`) +* Ensure AWS access keys are used for connections to the AWS Security Token + Service. ({issue}`19982`) +* Improve performance of metadata queries involving materialized views. ({issue}`19939`) +* Reduce memory usage for inserts into partitioned tables. ({issue}`19649`) +* Improve reliability when reading from GCS. ({issue}`20003`) +* Fix failure when reading ORC data. ({issue}`19935`) + +## Ignite connector + +* Improve performance of queries involving `OR` with `IS NULL`, `IS NOT NULL` + predicates, or involving `NOT` expression by pushing predicate computation to + the Ignite database. ({issue}`19453`) + +## MongoDB connector + +* Allow configuration to use local scheduling of MongoDB splits with + `mongodb.allow-local-scheduling`. ({issue}`20078`) + +## SQL Server connector + +* Fix incorrect results when reading dates between `1582-10-05` and + `1582-10-14`. ({issue}`20005`) diff --git a/docs/src/main/sphinx/routines/case.md b/docs/src/main/sphinx/routines/case.md index 926c1df925f3d..f7264d08b0962 100644 --- a/docs/src/main/sphinx/routines/case.md +++ b/docs/src/main/sphinx/routines/case.md @@ -60,3 +60,4 @@ documentation](/routines/examples). ## See also * [](/routines/introduction) +* [Conditional expressions using `CASE`](case-expression) diff --git a/docs/src/main/sphinx/routines/examples.md b/docs/src/main/sphinx/routines/examples.md index 7ed95d24d7986..f586b2988b7c4 100644 --- a/docs/src/main/sphinx/routines/examples.md +++ b/docs/src/main/sphinx/routines/examples.md @@ -369,3 +369,464 @@ BEGIN RETURN r; END ``` + +## Date string parsing example + +This example routine parses a date string of type `VARCHAR` into `TIMESTAMP WITH +TIME ZONE`. Date strings are commonly represented by ISO 8601 standard, such as +`2023-12-01`, `2023-12-01T23`. Date strings are also often represented in the +`YYYYmmdd` and `YYYYmmddHH` format, such as `20230101` and `2023010123`. Hive +tables can use this format to represent day and hourly partitions, for example +`/day=20230101`, `/hour=2023010123`. + +This routine parses date strings in a best-effort fashion and can be used as a +replacement for date string manipulation functions such as `date`, `date_parse`, +`from_iso8601_date`, and `from_iso8601_timestamp`. + +Note that the routine defaults the time value to `00:00:00.000` and the time +zone to the session time zone. + + +```sql +FUNCTION from_date_string(date_string VARCHAR) +RETURNS TIMESTAMP WITH TIME ZONE +BEGIN + IF date_string like '%-%' THEN -- ISO 8601 + RETURN from_iso8601_timestamp(date_string); + ELSEIF length(date_string) = 8 THEN -- YYYYmmdd + RETURN date_parse(date_string, '%Y%m%d'); + ELSEIF length(date_string) = 10 THEN -- YYYYmmddHH + RETURN date_parse(date_string, '%Y%m%d%H'); + END IF; + RETURN NULL; +END +``` + +Following are a couple of example invocations with result and explanation: + +```sql +SELECT from_date_string('2023-01-01'); -- 2023-01-01 00:00:00.000 UTC (using the ISO 8601 format) +SELECT from_date_string('2023-01-01T23'); -- 2023-01-01 23:00:00.000 UTC (using the ISO 8601 format) +SELECT from_date_string('2023-01-01T23:23:23'); -- 2023-01-01 23:23:23.000 UTC (using the ISO 8601 format) +SELECT from_date_string('20230101'); -- 2023-01-01 00:00:00.000 UTC (using the YYYYmmdd format) +SELECT from_date_string('2023010123'); -- 2023-01-01 23:00:00.000 UTC (using the YYYYmmddHH format) +SELECT from_date_string(NULL); -- NULL (handles NULL string) +SELECT from_date_string('abc'); -- NULL (not matched to any format) +``` + +## Truncating long strings + +This example routine `strtrunc` truncates strings longer than 60 characters, +leaving the first 30 and the last 25 characters, and cutting out extra +characters in the middle. + +```sql +FUNCTION strtrunc(input VARCHAR) +RETURNS VARCHAR +RETURN + CASE WHEN length(input) > 60 + THEN substr(input, 1, 30) || ' ... ' || substr(input, length(input) - 25) + ELSE input + END; +``` + +The preceding declaration is very compact and consists of only one complex +statement with a [`CASE` expression](case-expression) and multiple function +calls. It can therefore define the complete logic in the `RETURN` clause. + +The following statement shows the same capability within the routine itself. +Note the duplicate `RETURN` inside and outside the `CASE` statement and the +required `END CASE;`. The second `RETURN` statement is required, because a +routine must end with a `RETURN` statement. As a result the `ELSE` clause can be +omitted. + +```sql +FUNCTION strtrunc(input VARCHAR) +RETURNS VARCHAR +BEGIN + CASE WHEN length(input) > 60 + THEN + RETURN substr(input, 1, 30) || ' ... ' || substr(input, length(input) - 25); + ELSE + RETURN input; + END CASE; + RETURN input; +END; +``` + +The next example changes over from a `CASE` to an `IF` statement, and avoids the +duplicate `RETURN`: + +```sql +FUNCTION strtrunc(input VARCHAR) +RETURNS VARCHAR +BEGIN + IF length(input) > 60 THEN + RETURN substr(input, 1, 30) || ' ... ' || substr(input, length(input) - 25); + END IF; + RETURN input; +END; +``` + +All the preceding examples create the same output. Following is an example query +which generates long strings to truncate: + +```sql +WITH +data AS ( + SELECT substring('strtrunc truncates strings longer than 60 characters, leaving the prefix and suffix visible', 1, s.num) AS value + FROM table(sequence(start=>40, stop=>80, step=>5)) AS s(num) +) +SELECT + data.value + , strtrunc(data.value) AS truncated +FROM data +ORDER BY data.value; +``` + +The preceding query produces the following output with all variants of the +routine: + +``` + value | truncated +----------------------------------------------------------------------------------+--------------------------------------------------------------- + strtrunc truncates strings longer than 6 | strtrunc truncates strings longer than 6 + strtrunc truncates strings longer than 60 cha | strtrunc truncates strings longer than 60 cha + strtrunc truncates strings longer than 60 characte | strtrunc truncates strings longer than 60 characte + strtrunc truncates strings longer than 60 characters, l | strtrunc truncates strings longer than 60 characters, l + strtrunc truncates strings longer than 60 characters, leavin | strtrunc truncates strings longer than 60 characters, leavin + strtrunc truncates strings longer than 60 characters, leaving the | strtrunc truncates strings lon ... 60 characters, leaving the + strtrunc truncates strings longer than 60 characters, leaving the pref | strtrunc truncates strings lon ... aracters, leaving the pref + strtrunc truncates strings longer than 60 characters, leaving the prefix an | strtrunc truncates strings lon ... ers, leaving the prefix an + strtrunc truncates strings longer than 60 characters, leaving the prefix and suf | strtrunc truncates strings lon ... leaving the prefix and suf +``` + +A possible improvement is to introduce parameters for the total length. + +## Formatting bytes + +Trino includes a built-in `format_number()` function. However it is using units +that don't work well with bytes. The following `format_data_size` routine can +format large values of bytes into a human readable string. + +```sql +FUNCTION format_data_size(input BIGINT) +RETURNS VARCHAR + BEGIN + DECLARE value DOUBLE DEFAULT CAST(input AS DOUBLE); + DECLARE result BIGINT; + DECLARE base INT DEFAULT 1024; + DECLARE unit VARCHAR DEFAULT 'B'; + DECLARE format VARCHAR; + IF abs(value) >= base THEN + SET value = value / base; + SET unit = 'kB'; + END IF; + IF abs(value) >= base THEN + SET value = value / base; + SET unit = 'MB'; + END IF; + IF abs(value) >= base THEN + SET value = value / base; + SET unit = 'GB'; + END IF; + IF abs(value) >= base THEN + SET value = value / base; + SET unit = 'TB'; + END IF; + IF abs(value) >= base THEN + SET value = value / base; + SET unit = 'PB'; + END IF; + IF abs(value) >= base THEN + SET value = value / base; + SET unit = 'EB'; + END IF; + IF abs(value) >= base THEN + SET value = value / base; + SET unit = 'ZB'; + END IF; + IF abs(value) >= base THEN + SET value = value / base; + SET unit = 'YB'; + END IF; + IF abs(value) < 10 THEN + SET format = '%.2f'; + ELSEIF abs(value) < 100 THEN + SET format = '%.1f'; + ELSE + SET format = '%.0f'; + END IF; + RETURN format(format, value) || unit; + END; +``` + +Below is a query to show how it formats a wide range of values. + +```sql +WITH +data AS ( + SELECT CAST(pow(10, s.p) AS BIGINT) AS num + FROM table(sequence(start=>1, stop=>18)) AS s(p) + UNION ALL + SELECT -CAST(pow(10, s.p) AS BIGINT) AS num + FROM table(sequence(start=>1, stop=>18)) AS s(p) +) +SELECT + data.num + , format_data_size(data.num) AS formatted +FROM data +ORDER BY data.num; +``` + +The preceding query produces the following output: + +``` + num | formatted +----------------------+----------- + -1000000000000000000 | -888PB + -100000000000000000 | -88.8PB + -10000000000000000 | -8.88PB + -1000000000000000 | -909TB + -100000000000000 | -90.9TB + -10000000000000 | -9.09TB + -1000000000000 | -931GB + -100000000000 | -93.1GB + -10000000000 | -9.31GB + -1000000000 | -954MB + -100000000 | -95.4MB + -10000000 | -9.54MB + -1000000 | -977kB + -100000 | -97.7kB + -10000 | -9.77kB + -1000 | -1000B + -100 | -100B + -10 | -10.0B + 0 | 0.00B + 10 | 10.0B + 100 | 100B + 1000 | 1000B + 10000 | 9.77kB + 100000 | 97.7kB + 1000000 | 977kB + 10000000 | 9.54MB + 100000000 | 95.4MB + 1000000000 | 954MB + 10000000000 | 9.31GB + 100000000000 | 93.1GB + 1000000000000 | 931GB + 10000000000000 | 9.09TB + 100000000000000 | 90.9TB + 1000000000000000 | 909TB + 10000000000000000 | 8.88PB + 100000000000000000 | 88.8PB + 1000000000000000000 | 888PB +``` + + +## Charts + +Trino already has a built-in `bar()` [color function](/functions/color), but +it's using ANSI escape codes to output colors, and thus is only usable for +displaying results in a terminal. The following example shows a similar +function, that only uses ASCII characters. + +```sql +FUNCTION ascii_bar(value DOUBLE) +RETURNS VARCHAR +BEGIN + DECLARE max_width DOUBLE DEFAULT 40.0; + RETURN array_join( + repeat('█', + greatest(0, CAST(floor(max_width * value) AS integer) - 1)), '') + || ARRAY[' ', '▏', '▎', '▍', '▌', '▋', '▊', '▉', '█'][cast((value % (cast(1 as double) / max_width)) * max_width * 8 + 1 as int)]; +END; +``` + +It can be used to visualize a value. + +```sql +WITH +data AS ( + SELECT + cast(s.num as double) / 100.0 AS x, + sin(cast(s.num as double) / 100.0) AS y + FROM table(sequence(start=>0, stop=>314, step=>10)) AS s(num) +) +SELECT + data.x, + round(data.y, 4) AS y, + ascii_bar(data.y) AS chart +FROM data +ORDER BY data.x; +``` + +The preceding query produces the following output: + +```text + x | y | chart +-----+--------+----------------------------------------- + 0.0 | 0.0 | + 0.1 | 0.0998 | ███ + 0.2 | 0.1987 | ███████ + 0.3 | 0.2955 | ██████████▉ + 0.4 | 0.3894 | ██████████████▋ + 0.5 | 0.4794 | ██████████████████▏ + 0.6 | 0.5646 | █████████████████████▋ + 0.7 | 0.6442 | ████████████████████████▊ + 0.8 | 0.7174 | ███████████████████████████▊ + 0.9 | 0.7833 | ██████████████████████████████▍ + 1.0 | 0.8415 | ████████████████████████████████▋ + 1.1 | 0.8912 | ██████████████████████████████████▋ + 1.2 | 0.932 | ████████████████████████████████████▎ + 1.3 | 0.9636 | █████████████████████████████████████▌ + 1.4 | 0.9854 | ██████████████████████████████████████▍ + 1.5 | 0.9975 | ██████████████████████████████████████▉ + 1.6 | 0.9996 | ███████████████████████████████████████ + 1.7 | 0.9917 | ██████████████████████████████████████▋ + 1.8 | 0.9738 | ██████████████████████████████████████ + 1.9 | 0.9463 | ████████████████████████████████████▉ + 2.0 | 0.9093 | ███████████████████████████████████▍ + 2.1 | 0.8632 | █████████████████████████████████▌ + 2.2 | 0.8085 | ███████████████████████████████▍ + 2.3 | 0.7457 | ████████████████████████████▉ + 2.4 | 0.6755 | ██████████████████████████ + 2.5 | 0.5985 | ███████████████████████ + 2.6 | 0.5155 | ███████████████████▋ + 2.7 | 0.4274 | ████████████████▏ + 2.8 | 0.335 | ████████████▍ + 2.9 | 0.2392 | ████████▋ + 3.0 | 0.1411 | ████▋ + 3.1 | 0.0416 | ▋ +``` + +It's also possible to draw more compacted charts. Following is a function +drawing vertical bars: + +```sql +FUNCTION vertical_bar(value DOUBLE) +RETURNS VARCHAR +RETURN ARRAY[' ', '▁', '▂', '▃', '▄', '▅', '▆', '▇', '█'][cast(value * 8 + 1 as int)]; +``` + +It can be used to draw a distribution of values, in a single column. + +```sql +WITH +measurements(sensor_id, recorded_at, value) AS ( + VALUES + ('A', date '2023-01-01', 5.0) + , ('A', date '2023-01-03', 7.0) + , ('A', date '2023-01-04', 15.0) + , ('A', date '2023-01-05', 14.0) + , ('A', date '2023-01-08', 10.0) + , ('A', date '2023-01-09', 1.0) + , ('A', date '2023-01-10', 7.0) + , ('A', date '2023-01-11', 8.0) + , ('B', date '2023-01-03', 2.0) + , ('B', date '2023-01-04', 3.0) + , ('B', date '2023-01-05', 2.5) + , ('B', date '2023-01-07', 2.75) + , ('B', date '2023-01-09', 4.0) + , ('B', date '2023-01-10', 1.5) + , ('B', date '2023-01-11', 1.0) +), +days AS ( + SELECT date_add('day', s.num, date '2023-01-01') AS day + -- table function arguments need to be constant but range could be calculated + -- using: SELECT date_diff('day', max(recorded_at), min(recorded_at)) FROM measurements + FROM table(sequence(start=>0, stop=>10)) AS s(num) +), +sensors(id) AS (VALUES ('A'), ('B')), +normalized AS ( + SELECT + sensors.id AS sensor_id, + days.day, + value, + value / max(value) OVER (PARTITION BY sensor_id) AS normalized + FROM days + CROSS JOIN sensors + LEFT JOIN measurements m ON day = recorded_at AND m.sensor_id = sensors.id +) +SELECT + sensor_id, + min(day) AS start, + max(day) AS stop, + count(value) AS num_values, + min(value) AS min_value, + max(value) AS max_value, + avg(value) AS avg_value, + array_join(array_agg(coalesce(vertical_bar(normalized), ' ') ORDER BY day), '') AS distribution +FROM normalized +WHERE sensor_id IS NOT NULL +GROUP BY sensor_id +ORDER BY sensor_id; +``` + +The preceding query produces the following output: + +```text + sensor_id | start | stop | num_values | min_value | max_value | avg_value | distribution +-----------+------------+------------+------------+-----------+-----------+-----------+-------------- + A | 2023-01-01 | 2023-01-11 | 8 | 1.00 | 15.00 | 8.38 | ▃ ▄█▇ ▅▁▄▄ + B | 2023-01-01 | 2023-01-11 | 7 | 1.00 | 4.00 | 2.39 | ▄▆▅ ▆ █▃▂ +``` + +## Top-N + +Trino already has a built-in [aggregate function](/functions/aggregate) called +`approx_most_frequent()`, that can calculate most frequently occurring values. +It returns a map with values as keys and number of occurrences as values. Maps +are not ordered, so when displayed, the entries can change places on subsequent +runs of the same query, and readers must still compare all frequencies to find +the one most frequent value. The following is a routine returns ordered results +as a string. + +```sql +FUNCTION format_topn(input map) +RETURNS VARCHAR +NOT DETERMINISTIC +BEGIN + DECLARE freq_separator VARCHAR DEFAULT '='; + DECLARE entry_separator VARCHAR DEFAULT ', '; + RETURN array_join(transform( + reverse(array_sort(transform( + transform( + map_entries(input), + r -> cast(r AS row(key varchar, value bigint)) + ), + r -> cast(row(r.value, r.key) AS row(value bigint, key varchar))) + )), + r -> r.key || freq_separator || cast(r.value as varchar)), + entry_separator); +END; +``` + +Following is an example query to count generated strings: + +```sql +WITH +data AS ( + SELECT lpad('', 3, chr(65+(s.num / 3))) AS value + FROM table(sequence(start=>1, stop=>10)) AS s(num) +), +aggregated AS ( + SELECT + array_agg(data.value ORDER BY data.value) AS all_values, + approx_most_frequent(3, data.value, 1000) AS top3 + FROM data +) +SELECT + a.all_values, + a.top3, + format_topn(a.top3) AS top3_formatted +FROM aggregated a; +``` + +The preceding query produces the following result: + +```text + all_values | top3 | top3_formatted +----------------------------------------------------+-----------------------+--------------------- + [AAA, AAA, BBB, BBB, BBB, CCC, CCC, CCC, DDD, DDD] | {AAA=2, CCC=3, BBB=3} | CCC=3, BBB=3, AAA=2 +``` diff --git a/docs/src/main/sphinx/routines/if.md b/docs/src/main/sphinx/routines/if.md index a02c9e659dc25..264beec66b522 100644 --- a/docs/src/main/sphinx/routines/if.md +++ b/docs/src/main/sphinx/routines/if.md @@ -45,3 +45,4 @@ documentation](/routines/examples). ## See also * [](/routines/introduction) +* [Conditional expressions using `IF`](if-expression) diff --git a/docs/src/main/sphinx/routines/introduction.md b/docs/src/main/sphinx/routines/introduction.md index ab3377ba75a18..f444c29374a2f 100644 --- a/docs/src/main/sphinx/routines/introduction.md +++ b/docs/src/main/sphinx/routines/introduction.md @@ -55,8 +55,13 @@ SELECT abs(-10); -- -20, not 10! ## Catalog routines You can store a routine in the context of a catalog, if the connector used in -the catalog supports routine storage. In this scenario, the following commands -can be used: +the catalog supports routine storage. The following connectors support catalog +routine storage: + +* [](/connector/hive) +* [](/connector/memory) + +In this scenario, the following commands can be used: * [](/sql/create-function) to create and store a routine. * [](/sql/drop-function) to remove a routine. @@ -153,8 +158,8 @@ terms of memory and processing. Take the following considerations into account when writing and running SQL routines: * Some checks for the runtime behavior of routines are in place. For example, - routines that use take longer to process than a hardcoded threshold are automatically - terminated. + routines that take longer to process than a hardcoded threshold are + automatically terminated. * Avoid creation of arrays in a looping construct. Each iteration creates a separate new array with all items and copies the data for each modification, leaving the prior array in memory for automated clean up later. Use a [lambda diff --git a/docs/src/main/sphinx/security/tls.md b/docs/src/main/sphinx/security/tls.md index 775a20f4e61b5..f46caa1fcdc3a 100644 --- a/docs/src/main/sphinx/security/tls.md +++ b/docs/src/main/sphinx/security/tls.md @@ -26,8 +26,8 @@ using TLS 1.2 and TLS 1.3 certificates. The server rejects TLS 1.1, TLS 1.0, and all SSL format certificates. The Trino server does not specify a set of supported ciphers, instead deferring -to the defaults set by the JVM version in use. The documentation for Java 17 -lists its [supported cipher suites](https://docs.oracle.com/en/java/javase/17/security/oracle-providers.html#GUID-7093246A-31A3-4304-AC5F-5FB6400405E2__SUNJSSE_CIPHER_SUITES). +to the defaults set by the JVM version in use. The documentation for Java 21 +lists its [supported cipher suites](https://docs.oracle.com/en/java/javase/21/security/oracle-providers.html#GUID-7093246A-31A3-4304-AC5F-5FB6400405E2__SUNJSSE_CIPHER_SUITES). Run the following two-line code on the same JVM from the same vendor as configured on the coordinator to determine that JVM's default cipher list. @@ -56,7 +56,7 @@ considered in conjunction with your organization's security managers. Using a different suite may require downloading and installing a different SunJCE implementation package. Some locales may have export restrictions on cipher suites. See the discussion in Java documentation that begins with [Customizing -the Encryption Algorithm Providers](https://docs.oracle.com/en/java/javase/17/security/java-secure-socket-extension-jsse-reference-guide.html#GUID-316FB978-7588-442E-B829-B4973DB3B584). +the Encryption Algorithm Providers](https://docs.oracle.com/en/java/javase/21/security/java-secure-socket-extension-jsse-reference-guide.html#GUID-316FB978-7588-442E-B829-B4973DB3B584). :::{note} If you manage the coordinator's direct TLS implementatation, monitor the CPU diff --git a/docs/src/main/sphinx/static/img/opensearch.png b/docs/src/main/sphinx/static/img/opensearch.png new file mode 100644 index 0000000000000..113d451b34673 Binary files /dev/null and b/docs/src/main/sphinx/static/img/opensearch.png differ diff --git a/lib/trino-array/pom.xml b/lib/trino-array/pom.xml index b3a566e618fbd..38862568d8672 100644 --- a/lib/trino-array/pom.xml +++ b/lib/trino-array/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/lib/trino-cache/pom.xml b/lib/trino-cache/pom.xml index 63848a176b006..7bc80bb2204d4 100644 --- a/lib/trino-cache/pom.xml +++ b/lib/trino-cache/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/lib/trino-cache/src/test/java/io/trino/cache/TestEvictableCache.java b/lib/trino-cache/src/test/java/io/trino/cache/TestEvictableCache.java index 0c589503bea79..ce3788a3b8bba 100644 --- a/lib/trino-cache/src/test/java/io/trino/cache/TestEvictableCache.java +++ b/lib/trino-cache/src/test/java/io/trino/cache/TestEvictableCache.java @@ -360,7 +360,7 @@ private static Integer newInteger(int value) /** * Test that the loader is invoked only once for concurrent invocations of {{@link LoadingCache#get(Object, Callable)} with equal keys. - * This is a behavior of Guava Cache as well. While this is necessarily desirable behavior (see + * This is a behavior of Guava Cache as well. While this is not necessarily desirable behavior (see * https://github.com/trinodb/trino/issues/11067), * the test exists primarily to document current state and support discussion, should the current state change. */ diff --git a/lib/trino-cache/src/test/java/io/trino/cache/TestEvictableLoadingCache.java b/lib/trino-cache/src/test/java/io/trino/cache/TestEvictableLoadingCache.java index fea26d863d688..bb2e2f72f06a8 100644 --- a/lib/trino-cache/src/test/java/io/trino/cache/TestEvictableLoadingCache.java +++ b/lib/trino-cache/src/test/java/io/trino/cache/TestEvictableLoadingCache.java @@ -411,7 +411,7 @@ public Map loadAll(Iterablehttps://github.com/trinodb/trino/issues/11067), * the test exists primarily to document current state and support discussion, should the current state change. */ diff --git a/lib/trino-filesystem-azure/pom.xml b/lib/trino-filesystem-azure/pom.xml index f637bfa330179..63e155f093a79 100644 --- a/lib/trino-filesystem-azure/pom.xml +++ b/lib/trino-filesystem-azure/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/lib/trino-filesystem-azure/src/main/java/io/trino/filesystem/azure/AzureFileSystem.java b/lib/trino-filesystem-azure/src/main/java/io/trino/filesystem/azure/AzureFileSystem.java index 702b68a38a3c9..40c75f9424601 100644 --- a/lib/trino-filesystem-azure/src/main/java/io/trino/filesystem/azure/AzureFileSystem.java +++ b/lib/trino-filesystem-azure/src/main/java/io/trino/filesystem/azure/AzureFileSystem.java @@ -53,6 +53,7 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.collect.ImmutableSet.toImmutableSet; import static io.trino.filesystem.azure.AzureUtils.handleAzureException; +import static io.trino.filesystem.azure.AzureUtils.isFileNotFoundException; import static java.lang.Math.toIntExact; import static java.util.Objects.requireNonNull; import static java.util.UUID.randomUUID; @@ -123,6 +124,9 @@ public void deleteFile(Location location) client.delete(); } catch (RuntimeException e) { + if (isFileNotFoundException(e)) { + return; + } throw handleAzureException(e, "deleting file", azureLocation); } } diff --git a/lib/trino-filesystem-azure/src/main/java/io/trino/filesystem/azure/AzureOutputFile.java b/lib/trino-filesystem-azure/src/main/java/io/trino/filesystem/azure/AzureOutputFile.java index 17840e85388a3..261601c197e71 100644 --- a/lib/trino-filesystem-azure/src/main/java/io/trino/filesystem/azure/AzureOutputFile.java +++ b/lib/trino-filesystem-azure/src/main/java/io/trino/filesystem/azure/AzureOutputFile.java @@ -71,13 +71,6 @@ public OutputStream createOrOverwrite(AggregatedMemoryContext memoryContext) return createOutputStream(memoryContext, true); } - @Override - public OutputStream createExclusive(AggregatedMemoryContext memoryContext) - throws IOException - { - return create(memoryContext); - } - private AzureOutputStream createOutputStream(AggregatedMemoryContext memoryContext, boolean overwrite) throws IOException { diff --git a/lib/trino-filesystem-azure/src/main/java/io/trino/filesystem/azure/AzureUtils.java b/lib/trino-filesystem-azure/src/main/java/io/trino/filesystem/azure/AzureUtils.java index ef0c01c299f54..2117af82671f7 100644 --- a/lib/trino-filesystem-azure/src/main/java/io/trino/filesystem/azure/AzureUtils.java +++ b/lib/trino-filesystem-azure/src/main/java/io/trino/filesystem/azure/AzureUtils.java @@ -28,15 +28,8 @@ private AzureUtils() {} public static IOException handleAzureException(RuntimeException exception, String action, AzureLocation location) throws IOException { - if (exception instanceof BlobStorageException blobStorageException) { - if (BlobErrorCode.BLOB_NOT_FOUND.equals(blobStorageException.getErrorCode())) { - throw withCause(new FileNotFoundException(location.toString()), exception); - } - } - if (exception instanceof DataLakeStorageException dataLakeStorageException) { - if ("PathNotFound".equals(dataLakeStorageException.getErrorCode())) { - throw withCause(new FileNotFoundException(location.toString()), exception); - } + if (isFileNotFoundException(exception)) { + throw withCause(new FileNotFoundException(location.toString()), exception); } if (exception instanceof AzureException) { throw new IOException("Azure service error %s file: %s".formatted(action, location), exception); @@ -44,6 +37,17 @@ public static IOException handleAzureException(RuntimeException exception, Strin throw new IOException("Error %s file: %s".formatted(action, location), exception); } + public static boolean isFileNotFoundException(RuntimeException exception) + { + if (exception instanceof BlobStorageException blobStorageException) { + return BlobErrorCode.BLOB_NOT_FOUND.equals(blobStorageException.getErrorCode()); + } + if (exception instanceof DataLakeStorageException dataLakeStorageException) { + return "PathNotFound" .equals(dataLakeStorageException.getErrorCode()); + } + return false; + } + private static T withCause(T throwable, Throwable cause) { throwable.initCause(cause); diff --git a/lib/trino-filesystem-gcs/pom.xml b/lib/trino-filesystem-gcs/pom.xml index 33acd16414f3b..a5732aeffb7ee 100644 --- a/lib/trino-filesystem-gcs/pom.xml +++ b/lib/trino-filesystem-gcs/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -98,6 +98,11 @@ configuration + + io.airlift + slice + + io.airlift units @@ -123,6 +128,11 @@ jakarta.validation-api + + org.threeten + threetenbp + + io.trino trino-spi @@ -141,6 +151,12 @@ test + + io.airlift + testing + test + + io.trino trino-filesystem diff --git a/lib/trino-filesystem-gcs/src/main/java/io/trino/filesystem/gcs/GcsFileSystem.java b/lib/trino-filesystem-gcs/src/main/java/io/trino/filesystem/gcs/GcsFileSystem.java index 3b7f2383cf4c4..c971925b34914 100644 --- a/lib/trino-filesystem-gcs/src/main/java/io/trino/filesystem/gcs/GcsFileSystem.java +++ b/lib/trino-filesystem-gcs/src/main/java/io/trino/filesystem/gcs/GcsFileSystem.java @@ -45,7 +45,7 @@ import static com.google.cloud.storage.Storage.BlobListOption.pageSize; import static com.google.common.collect.Iterables.partition; import static io.airlift.concurrent.MoreFutures.getFutureValue; -import static io.trino.filesystem.gcs.GcsUtils.getBlobOrThrow; +import static io.trino.filesystem.gcs.GcsUtils.getBlob; import static io.trino.filesystem.gcs.GcsUtils.handleGcsException; import static java.util.Objects.requireNonNull; @@ -99,8 +99,7 @@ public void deleteFile(Location location) { GcsLocation gcsLocation = new GcsLocation(location); checkIsValidFile(gcsLocation); - Blob blob = getBlobOrThrow(storage, gcsLocation); - blob.delete(); + getBlob(storage, gcsLocation).ifPresent(Blob::delete); } @Override @@ -112,7 +111,8 @@ public void deleteFiles(Collection locations) for (List locationBatch : partition(locations, batchSize)) { StorageBatch batch = storage.batch(); for (Location location : locationBatch) { - batch.delete(getBlobOrThrow(storage, new GcsLocation(location)).getBlobId()); + getBlob(storage, new GcsLocation(location)) + .ifPresent(blob -> batch.delete(blob.getBlobId())); } batchFutures.add(executorService.submit(batch::submit)); } diff --git a/lib/trino-filesystem-gcs/src/main/java/io/trino/filesystem/gcs/GcsFileSystemConfig.java b/lib/trino-filesystem-gcs/src/main/java/io/trino/filesystem/gcs/GcsFileSystemConfig.java index f71b4997f6956..b5c8652ccdd3e 100644 --- a/lib/trino-filesystem-gcs/src/main/java/io/trino/filesystem/gcs/GcsFileSystemConfig.java +++ b/lib/trino-filesystem-gcs/src/main/java/io/trino/filesystem/gcs/GcsFileSystemConfig.java @@ -18,10 +18,15 @@ import io.airlift.configuration.ConfigSecuritySensitive; import io.airlift.configuration.validation.FileExists; import io.airlift.units.DataSize; +import io.airlift.units.Duration; +import io.airlift.units.MinDuration; import jakarta.annotation.Nullable; +import jakarta.validation.constraints.AssertTrue; import jakarta.validation.constraints.Min; import jakarta.validation.constraints.NotNull; +import java.util.concurrent.TimeUnit; + import static com.google.common.base.Preconditions.checkState; import static io.airlift.units.DataSize.Unit.MEGABYTE; @@ -37,6 +42,12 @@ public class GcsFileSystemConfig private boolean useGcsAccessToken; private String jsonKey; private String jsonKeyFilePath; + private int maxRetries = 20; + private double backoffScaleFactor = 2.0; + private Duration maxRetryTime = new Duration(20, TimeUnit.SECONDS); + private Duration minBackoffDelay = new Duration(10, TimeUnit.MILLISECONDS); + // Note: there is no benefit to setting this much higher as the rpc quota is 1x per second: https://cloud.google.com/storage/docs/retry-strategy#java + private Duration maxBackoffDelay = new Duration(1100, TimeUnit.MILLISECONDS); @NotNull public DataSize getReadBlockSize() @@ -148,14 +159,92 @@ public GcsFileSystemConfig setJsonKeyFilePath(String jsonKeyFilePath) return this; } + @Min(0) + public int getMaxRetries() + { + return maxRetries; + } + + @Config("gcs.client.max-retries") + @ConfigDescription("Maximum number of RPC attempts") + public GcsFileSystemConfig setMaxRetries(int maxRetries) + { + this.maxRetries = maxRetries; + return this; + } + + @Min(1) + public double getBackoffScaleFactor() + { + return backoffScaleFactor; + } + + @Config("gcs.client.backoff-scale-factor") + @ConfigDescription("Scale factor for RPC retry delay") + public GcsFileSystemConfig setBackoffScaleFactor(double backoffScaleFactor) + { + this.backoffScaleFactor = backoffScaleFactor; + return this; + } + + @NotNull + public Duration getMaxRetryTime() + { + return maxRetryTime; + } + + @Config("gcs.client.max-retry-time") + @ConfigDescription("Total time limit for an RPC to be retried") + public GcsFileSystemConfig setMaxRetryTime(Duration maxRetryTime) + { + this.maxRetryTime = maxRetryTime; + return this; + } + + @NotNull + @MinDuration("0ms") + public Duration getMinBackoffDelay() + { + return minBackoffDelay; + } + + @Config("gcs.client.min-backoff-delay") + @ConfigDescription("Minimum delay between RPC retries") + public GcsFileSystemConfig setMinBackoffDelay(Duration minBackoffDelay) + { + this.minBackoffDelay = minBackoffDelay; + return this; + } + + @NotNull + @MinDuration("0ms") + public Duration getMaxBackoffDelay() + { + return maxBackoffDelay; + } + + @Config("gcs.client.max-backoff-delay") + @ConfigDescription("Maximum delay between RPC retries.") + public GcsFileSystemConfig setMaxBackoffDelay(Duration maxBackoffDelay) + { + this.maxBackoffDelay = maxBackoffDelay; + return this; + } + + @AssertTrue(message = "gcs.client.min-backoff-delay must be less than or equal to gcs.client.max-backoff-delay") + public boolean isRetryDelayValid() + { + return minBackoffDelay.compareTo(maxBackoffDelay) <= 0; + } + public void validate() { - // This cannot be normal validation, as it would make it impossible to write TestHiveGcsConfig.testExplicitPropertyMappings + // This cannot be normal validation, as it would make it impossible to write TestGcsFileSystemConfig.testExplicitPropertyMappings if (useGcsAccessToken) { - checkState(jsonKey == null, "Cannot specify 'hive.gcs.json-key' when 'hive.gcs.use-access-token' is set"); - checkState(jsonKeyFilePath == null, "Cannot specify 'hive.gcs.json-key-file-path' when 'hive.gcs.use-access-token' is set"); + checkState(jsonKey == null, "Cannot specify 'gcs.json-key' when 'gcs.use-access-token' is set"); + checkState(jsonKeyFilePath == null, "Cannot specify 'gcs.json-key-file-path' when 'gcs.use-access-token' is set"); } - checkState(jsonKey == null || jsonKeyFilePath == null, "'hive.gcs.json-key' and 'hive.gcs.json-key-file-path' cannot be both set"); + checkState(jsonKey == null || jsonKeyFilePath == null, "'gcs.json-key' and 'gcs.json-key-file-path' cannot be both set"); } } diff --git a/lib/trino-filesystem-gcs/src/main/java/io/trino/filesystem/gcs/GcsOutputFile.java b/lib/trino-filesystem-gcs/src/main/java/io/trino/filesystem/gcs/GcsOutputFile.java index 12344c9a0f98b..0131299daebda 100644 --- a/lib/trino-filesystem-gcs/src/main/java/io/trino/filesystem/gcs/GcsOutputFile.java +++ b/lib/trino-filesystem-gcs/src/main/java/io/trino/filesystem/gcs/GcsOutputFile.java @@ -18,6 +18,7 @@ import com.google.cloud.storage.BlobInfo; import com.google.cloud.storage.Storage; import com.google.cloud.storage.Storage.BlobTargetOption; +import io.airlift.slice.Slice; import io.trino.filesystem.Location; import io.trino.filesystem.TrinoOutputFile; import io.trino.memory.context.AggregatedMemoryContext; @@ -64,10 +65,24 @@ public OutputStream createOrOverwrite(AggregatedMemoryContext memoryContext) } @Override - public OutputStream createExclusive(AggregatedMemoryContext memoryContext) + public void createExclusive(Slice content, AggregatedMemoryContext memoryContext) throws IOException { - return create(memoryContext); + try { + if (getBlob(storage, location).isPresent()) { + throw new FileAlreadyExistsException("File %s already exists".formatted(location)); + } + storage.create( + BlobInfo.newBuilder(BlobId.of(location.bucket(), location.path())).build(), + content.getBytes(), + DOES_NOT_EXIST_TARGET_OPTION); + } + catch (FileAlreadyExistsException e) { + throw e; + } + catch (RuntimeException e) { + throw handleGcsException(e, "writing file", location); + } } private OutputStream createOutputStream(AggregatedMemoryContext memoryContext, boolean overwrite) diff --git a/lib/trino-filesystem-gcs/src/main/java/io/trino/filesystem/gcs/GcsStorageFactory.java b/lib/trino-filesystem-gcs/src/main/java/io/trino/filesystem/gcs/GcsStorageFactory.java index 176d45061fdac..bdb85f82d7e27 100644 --- a/lib/trino-filesystem-gcs/src/main/java/io/trino/filesystem/gcs/GcsStorageFactory.java +++ b/lib/trino-filesystem-gcs/src/main/java/io/trino/filesystem/gcs/GcsStorageFactory.java @@ -13,6 +13,7 @@ */ package io.trino.filesystem.gcs; +import com.google.api.gax.retrying.RetrySettings; import com.google.auth.oauth2.GoogleCredentials; import com.google.cloud.storage.Storage; import com.google.cloud.storage.StorageOptions; @@ -20,6 +21,7 @@ import com.google.common.collect.ImmutableList; import com.google.inject.Inject; import io.trino.spi.security.ConnectorIdentity; +import org.threeten.bp.Duration; import java.io.ByteArrayInputStream; import java.io.FileInputStream; @@ -29,6 +31,7 @@ import java.util.List; import java.util.Optional; +import static com.google.cloud.storage.StorageRetryStrategy.getUniformStorageRetryStrategy; import static com.google.common.base.Strings.nullToEmpty; import static java.nio.charset.StandardCharsets.UTF_8; @@ -39,6 +42,11 @@ public class GcsStorageFactory private final String projectId; private final boolean useGcsAccessToken; private final Optional jsonGoogleCredential; + private final int maxRetries; + private final double backoffScaleFactor; + private final Duration maxRetryTime; + private final Duration minBackoffDelay; + private final Duration maxBackoffDelay; @Inject public GcsStorageFactory(GcsFileSystemConfig config) @@ -62,6 +70,12 @@ else if (jsonKeyFilePath != null) { else { jsonGoogleCredential = Optional.empty(); } + this.maxRetries = config.getMaxRetries(); + this.backoffScaleFactor = config.getBackoffScaleFactor(); + // To avoid name collision by importing io.airlift.Duration + this.maxRetryTime = Duration.ofMillis(config.getMaxRetryTime().toMillis()); + this.minBackoffDelay = Duration.ofMillis(config.getMinBackoffDelay().toMillis()); + this.maxBackoffDelay = Duration.ofMillis(config.getMaxBackoffDelay().toMillis()); } public Storage create(ConnectorIdentity identity) @@ -81,7 +95,20 @@ public Storage create(ConnectorIdentity identity) if (projectId != null) { storageOptionsBuilder.setProjectId(projectId); } - return storageOptionsBuilder.setCredentials(credentials).build().getService(); + // Note: without uniform strategy we cannot retry idempotent operations. + // The trino-filesystem api does not violate the conditions for idempotency, see https://cloud.google.com/storage/docs/retry-strategy#java for details. + return storageOptionsBuilder + .setCredentials(credentials) + .setStorageRetryStrategy(getUniformStorageRetryStrategy()) + .setRetrySettings(RetrySettings.newBuilder() + .setMaxAttempts(maxRetries + 1) + .setRetryDelayMultiplier(backoffScaleFactor) + .setTotalTimeout(maxRetryTime) + .setInitialRetryDelay(minBackoffDelay) + .setMaxRetryDelay(maxBackoffDelay) + .build()) + .build() + .getService(); } catch (IOException e) { throw new UncheckedIOException(e); diff --git a/lib/trino-filesystem-gcs/src/test/java/io/trino/filesystem/gcs/AbstractTestGcsFileSystem.java b/lib/trino-filesystem-gcs/src/test/java/io/trino/filesystem/gcs/AbstractTestGcsFileSystem.java index b830f893a9043..ec0ae9836f5b6 100644 --- a/lib/trino-filesystem-gcs/src/test/java/io/trino/filesystem/gcs/AbstractTestGcsFileSystem.java +++ b/lib/trino-filesystem-gcs/src/test/java/io/trino/filesystem/gcs/AbstractTestGcsFileSystem.java @@ -125,6 +125,12 @@ protected void verifyFileSystemIsEmpty() assertThat(storage.list(bucket).iterateAll()).isEmpty(); } + @Override + protected final boolean supportsCreateExclusive() + { + return true; + } + @Override protected final boolean supportsRenameFile() { diff --git a/lib/trino-filesystem-gcs/src/test/java/io/trino/filesystem/gcs/TestGcsFileSystemConfig.java b/lib/trino-filesystem-gcs/src/test/java/io/trino/filesystem/gcs/TestGcsFileSystemConfig.java index 2c6dd4a9992ba..d5b640f8f738a 100644 --- a/lib/trino-filesystem-gcs/src/test/java/io/trino/filesystem/gcs/TestGcsFileSystemConfig.java +++ b/lib/trino-filesystem-gcs/src/test/java/io/trino/filesystem/gcs/TestGcsFileSystemConfig.java @@ -15,6 +15,8 @@ import com.google.common.collect.ImmutableMap; import io.airlift.units.DataSize; +import io.airlift.units.Duration; +import jakarta.validation.constraints.AssertTrue; import org.junit.jupiter.api.Test; import java.io.IOException; @@ -25,6 +27,10 @@ import static io.airlift.configuration.testing.ConfigAssertions.assertFullMapping; import static io.airlift.configuration.testing.ConfigAssertions.assertRecordedDefaults; import static io.airlift.configuration.testing.ConfigAssertions.recordDefaults; +import static io.airlift.testing.ValidationAssertions.assertFailsValidation; +import static io.airlift.units.DataSize.Unit.MEGABYTE; +import static java.util.concurrent.TimeUnit.MILLISECONDS; +import static java.util.concurrent.TimeUnit.SECONDS; import static org.assertj.core.api.Assertions.assertThatThrownBy; public class TestGcsFileSystemConfig @@ -33,14 +39,19 @@ public class TestGcsFileSystemConfig void testDefaults() { assertRecordedDefaults(recordDefaults(GcsFileSystemConfig.class) - .setReadBlockSize(DataSize.of(2, DataSize.Unit.MEGABYTE)) - .setWriteBlockSize(DataSize.of(16, DataSize.Unit.MEGABYTE)) + .setReadBlockSize(DataSize.of(2, MEGABYTE)) + .setWriteBlockSize(DataSize.of(16, MEGABYTE)) .setPageSize(100) .setBatchSize(100) .setProjectId(null) .setUseGcsAccessToken(false) .setJsonKey(null) - .setJsonKeyFilePath(null)); + .setJsonKeyFilePath(null) + .setMaxRetries(20) + .setBackoffScaleFactor(2.0) + .setMaxRetryTime(new Duration(20, SECONDS)) + .setMinBackoffDelay(new Duration(10, MILLISECONDS)) + .setMaxBackoffDelay(new Duration(1100, MILLISECONDS))); } @Test @@ -58,17 +69,27 @@ void testExplicitPropertyMappings() .put("gcs.use-access-token", "true") .put("gcs.json-key", "{}") .put("gcs.json-key-file-path", jsonKeyFile.toString()) + .put("gcs.client.max-retries", "10") + .put("gcs.client.backoff-scale-factor", "3.0") + .put("gcs.client.max-retry-time", "10s") + .put("gcs.client.min-backoff-delay", "20ms") + .put("gcs.client.max-backoff-delay", "20ms") .buildOrThrow(); GcsFileSystemConfig expected = new GcsFileSystemConfig() - .setReadBlockSize(DataSize.of(51, DataSize.Unit.MEGABYTE)) - .setWriteBlockSize(DataSize.of(52, DataSize.Unit.MEGABYTE)) + .setReadBlockSize(DataSize.of(51, MEGABYTE)) + .setWriteBlockSize(DataSize.of(52, MEGABYTE)) .setPageSize(10) .setBatchSize(11) .setProjectId("project") .setUseGcsAccessToken(true) .setJsonKey("{}") - .setJsonKeyFilePath(jsonKeyFile.toString()); + .setJsonKeyFilePath(jsonKeyFile.toString()) + .setMaxRetries(10) + .setBackoffScaleFactor(3.0) + .setMaxRetryTime(new Duration(10, SECONDS)) + .setMinBackoffDelay(new Duration(20, MILLISECONDS)) + .setMaxBackoffDelay(new Duration(20, MILLISECONDS)); assertFullMapping(properties, expected); } @@ -80,20 +101,29 @@ public void testValidation() .setUseGcsAccessToken(true) .setJsonKey("{}}")::validate) .isInstanceOf(IllegalStateException.class) - .hasMessage("Cannot specify 'hive.gcs.json-key' when 'hive.gcs.use-access-token' is set"); + .hasMessage("Cannot specify 'gcs.json-key' when 'gcs.use-access-token' is set"); assertThatThrownBy( new GcsFileSystemConfig() .setUseGcsAccessToken(true) .setJsonKeyFilePath("/dev/null")::validate) .isInstanceOf(IllegalStateException.class) - .hasMessage("Cannot specify 'hive.gcs.json-key-file-path' when 'hive.gcs.use-access-token' is set"); + .hasMessage("Cannot specify 'gcs.json-key-file-path' when 'gcs.use-access-token' is set"); assertThatThrownBy( new GcsFileSystemConfig() .setJsonKey("{}") .setJsonKeyFilePath("/dev/null")::validate) .isInstanceOf(IllegalStateException.class) - .hasMessage("'hive.gcs.json-key' and 'hive.gcs.json-key-file-path' cannot be both set"); + .hasMessage("'gcs.json-key' and 'gcs.json-key-file-path' cannot be both set"); + + assertFailsValidation( + new GcsFileSystemConfig() + .setJsonKey("{}") + .setMinBackoffDelay(new Duration(20, MILLISECONDS)) + .setMaxBackoffDelay(new Duration(19, MILLISECONDS)), + "retryDelayValid", + "gcs.client.min-backoff-delay must be less than or equal to gcs.client.max-backoff-delay", + AssertTrue.class); } } diff --git a/lib/trino-filesystem-gcs/src/test/java/io/trino/filesystem/gcs/TestGcsFileSystemGcs.java b/lib/trino-filesystem-gcs/src/test/java/io/trino/filesystem/gcs/TestGcsFileSystemGcs.java index b9a056e487fc0..00f08df91377d 100644 --- a/lib/trino-filesystem-gcs/src/test/java/io/trino/filesystem/gcs/TestGcsFileSystemGcs.java +++ b/lib/trino-filesystem-gcs/src/test/java/io/trino/filesystem/gcs/TestGcsFileSystemGcs.java @@ -13,10 +13,16 @@ */ package io.trino.filesystem.gcs; +import io.trino.filesystem.TrinoOutputFile; import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; import java.io.IOException; +import java.io.OutputStream; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.assertj.core.api.Assertions.assertThatNoException; @TestInstance(TestInstance.Lifecycle.PER_CLASS) public class TestGcsFileSystemGcs @@ -28,4 +34,20 @@ void setup() { initialize(getRequiredEnvironmentVariable("GCP_CREDENTIALS_KEY")); } + + @Test + void testCreateFileRetry() + { + // Note: this test is meant to expose flakiness + // Without retries it may fail non-deterministically. + // Retries are enabled in the default GcsFileSystemConfig. + // In practice this may happen between 7 and 20 retries. + assertThatNoException().isThrownBy(() -> { + for (int i = 1; i <= 30; i++) { + TrinoOutputFile outputFile = getFileSystem().newOutputFile(getRootLocation().appendPath("testFile")); + try (OutputStream out = outputFile.createOrOverwrite()) { + out.write("test".getBytes(UTF_8)); + } + }}); + } } diff --git a/lib/trino-filesystem-manager/pom.xml b/lib/trino-filesystem-manager/pom.xml index 902503eab98e9..e96fd45aef0df 100644 --- a/lib/trino-filesystem-manager/pom.xml +++ b/lib/trino-filesystem-manager/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/lib/trino-filesystem-s3/pom.xml b/lib/trino-filesystem-s3/pom.xml index b8c1165d8d114..1f9645ef738e6 100644 --- a/lib/trino-filesystem-s3/pom.xml +++ b/lib/trino-filesystem-s3/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/lib/trino-filesystem-s3/src/main/java/io/trino/filesystem/s3/S3OutputFile.java b/lib/trino-filesystem-s3/src/main/java/io/trino/filesystem/s3/S3OutputFile.java index a388bcb6d2874..5a6bf934c1b86 100644 --- a/lib/trino-filesystem-s3/src/main/java/io/trino/filesystem/s3/S3OutputFile.java +++ b/lib/trino-filesystem-s3/src/main/java/io/trino/filesystem/s3/S3OutputFile.java @@ -18,7 +18,6 @@ import io.trino.memory.context.AggregatedMemoryContext; import software.amazon.awssdk.services.s3.S3Client; -import java.io.IOException; import java.io.OutputStream; import static java.util.Objects.requireNonNull; @@ -51,13 +50,6 @@ public OutputStream createOrOverwrite(AggregatedMemoryContext memoryContext) return new S3OutputStream(memoryContext, client, context, location); } - @Override - public OutputStream createExclusive(AggregatedMemoryContext memoryContext) - throws IOException - { - throw new IOException("S3 does not support exclusive create"); - } - @Override public Location location() { diff --git a/lib/trino-filesystem-s3/src/test/java/io/trino/filesystem/s3/AbstractTestS3FileSystem.java b/lib/trino-filesystem-s3/src/test/java/io/trino/filesystem/s3/AbstractTestS3FileSystem.java index b755f5a403dc8..56b17fae8d825 100644 --- a/lib/trino-filesystem-s3/src/test/java/io/trino/filesystem/s3/AbstractTestS3FileSystem.java +++ b/lib/trino-filesystem-s3/src/test/java/io/trino/filesystem/s3/AbstractTestS3FileSystem.java @@ -82,7 +82,7 @@ protected final Location getRootLocation() } @Override - protected final boolean supportsCreateExclusive() + protected boolean isCreateExclusive() { return false; } @@ -93,12 +93,6 @@ protected final boolean supportsRenameFile() return false; } - @Override - protected final boolean deleteFileFailsIfNotExists() - { - return false; - } - @Override protected final void verifyFileSystemIsEmpty() { diff --git a/lib/trino-filesystem/pom.xml b/lib/trino-filesystem/pom.xml index 7f78e77bcd86b..9b01a4bb67280 100644 --- a/lib/trino-filesystem/pom.xml +++ b/lib/trino-filesystem/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -58,6 +58,12 @@ provided + + io.airlift + concurrent + test + + io.airlift junit-extensions diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/TrinoFileSystem.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/TrinoFileSystem.java index 9692154a39d02..f4633964fa450 100644 --- a/lib/trino-filesystem/src/main/java/io/trino/filesystem/TrinoFileSystem.java +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/TrinoFileSystem.java @@ -78,10 +78,10 @@ public interface TrinoFileSystem /** * Deletes the specified file. The file location path cannot be empty, and must not end with - * a slash or whitespace. If the file is a director, an exception is raised. + * a slash or whitespace. If the file is a director, an exception is raised. If the file does + * not exist, this method is a noop. * * @throws IllegalArgumentException if location is not valid for this file system - * @throws IOException if the file does not exist (optional) or was not deleted */ void deleteFile(Location location) throws IOException; @@ -90,9 +90,9 @@ void deleteFile(Location location) * Delete specified files. This operation is not required to be atomic, so if an error * occurs, all, some, or, none of the files may be deleted. This operation may be faster than simply * looping over the locations as some file systems support batch delete operations natively. + * If a file does not exist, it is ignored. * * @throws IllegalArgumentException if location is not valid for this file system - * @throws IOException if a file does not exist (optional) or was not deleted */ default void deleteFiles(Collection locations) throws IOException diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/TrinoOutputFile.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/TrinoOutputFile.java index eea75c0291fb9..2f20f3d1fba4f 100644 --- a/lib/trino-filesystem/src/main/java/io/trino/filesystem/TrinoOutputFile.java +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/TrinoOutputFile.java @@ -14,6 +14,7 @@ package io.trino.filesystem; +import io.airlift.slice.Slice; import io.trino.memory.context.AggregatedMemoryContext; import java.io.IOException; @@ -35,10 +36,13 @@ default OutputStream createOrOverwrite() return createOrOverwrite(newSimpleAggregatedMemoryContext()); } - default OutputStream createExclusive() + /** + * Create file exclusively and atomically with specified contents. + */ + default void createExclusive(Slice content) throws IOException { - return createExclusive(newSimpleAggregatedMemoryContext()); + createExclusive(content, newSimpleAggregatedMemoryContext()); } OutputStream create(AggregatedMemoryContext memoryContext) @@ -47,8 +51,14 @@ OutputStream create(AggregatedMemoryContext memoryContext) OutputStream createOrOverwrite(AggregatedMemoryContext memoryContext) throws IOException; - OutputStream createExclusive(AggregatedMemoryContext memoryContext) - throws IOException; + /** + * Create file exclusively and atomically with specified contents. + */ + default void createExclusive(Slice content, AggregatedMemoryContext memoryContext) + throws IOException + { + throw new UnsupportedOperationException("createExclusive not supported by " + getClass()); + } Location location(); } diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/local/LocalFileSystem.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/local/LocalFileSystem.java index a6fa596124c5a..dc5069260d8d1 100644 --- a/lib/trino-filesystem/src/main/java/io/trino/filesystem/local/LocalFileSystem.java +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/local/LocalFileSystem.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.nio.file.FileVisitResult; import java.nio.file.Files; +import java.nio.file.NoSuchFileException; import java.nio.file.Path; import java.nio.file.SimpleFileVisitor; import java.nio.file.attribute.BasicFileAttributes; @@ -34,6 +35,7 @@ import static com.google.common.collect.ImmutableSet.toImmutableSet; import static io.trino.filesystem.local.LocalUtils.handleException; import static java.nio.file.LinkOption.NOFOLLOW_LINKS; +import static java.util.UUID.randomUUID; /** * A hierarchical file system for testing. @@ -46,7 +48,7 @@ public class LocalFileSystem public LocalFileSystem(Path rootPath) { this.rootPath = rootPath; - checkArgument(Files.isDirectory(rootPath), "root is not a directory"); + checkArgument(Files.isDirectory(rootPath), "root is not a directory: %s", rootPath); } @Override @@ -75,6 +77,8 @@ public void deleteFile(Location location) try { Files.delete(filePath); } + catch (NoSuchFileException ignored) { + } catch (IOException e) { throw handleException(location, e); } @@ -223,7 +227,23 @@ public Set listDirectories(Location location) public Optional createTemporaryDirectory(Location targetPath, String temporaryPrefix, String relativePrefix) throws IOException { - throw new IOException("Local file system does not support creating temporary directories"); + // allow for absolute or relative temporary prefix + Location temporary; + if (temporaryPrefix.startsWith("/")) { + String prefix = temporaryPrefix; + while (prefix.startsWith("/")) { + prefix = prefix.substring(1); + } + temporary = Location.of("local:///").appendPath(prefix); + } + else { + temporary = targetPath.appendPath(temporaryPrefix); + } + + temporary = temporary.appendPath(randomUUID().toString()); + + createDirectory(temporary); + return Optional.of(temporary); } private Path toFilePath(Location location) diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/local/LocalOutputFile.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/local/LocalOutputFile.java index 2af7daf0602da..87deb5fd11fff 100644 --- a/lib/trino-filesystem/src/main/java/io/trino/filesystem/local/LocalOutputFile.java +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/local/LocalOutputFile.java @@ -72,13 +72,6 @@ public OutputStream createOrOverwrite(AggregatedMemoryContext memoryContext) } } - @Override - public OutputStream createExclusive(AggregatedMemoryContext memoryContext) - throws IOException - { - return create(memoryContext); - } - @Override public Location location() { diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryFileSystem.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryFileSystem.java index 1b07a632fa695..33f5135ce32b1 100644 --- a/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryFileSystem.java +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryFileSystem.java @@ -23,7 +23,6 @@ import io.trino.filesystem.TrinoOutputFile; import io.trino.filesystem.memory.MemoryOutputFile.OutputBlob; -import java.io.FileNotFoundException; import java.io.IOException; import java.nio.file.FileAlreadyExistsException; import java.util.Iterator; @@ -96,9 +95,7 @@ public void overwriteBlob(Slice data) public void deleteFile(Location location) throws IOException { - if (blobs.remove(toBlobKey(location)) == null) { - throw new FileNotFoundException(location.toString()); - } + blobs.remove(toBlobKey(location)); } @Override diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryInputStream.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryInputStream.java index 573093a9a14af..05830d68aa037 100644 --- a/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryInputStream.java +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryInputStream.java @@ -20,6 +20,7 @@ import java.io.IOException; +import static java.util.Objects.checkFromIndexSize; import static java.util.Objects.requireNonNull; class MemoryInputStream @@ -78,6 +79,7 @@ public int read(byte[] destination, int destinationIndex, int length) throws IOException { ensureOpen(); + checkFromIndexSize(destinationIndex, length, destination.length); return input.read(destination, destinationIndex, length); } diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryOutputFile.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryOutputFile.java index b937377f3ca45..7d4373242e165 100644 --- a/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryOutputFile.java +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryOutputFile.java @@ -64,10 +64,10 @@ public OutputStream createOrOverwrite(AggregatedMemoryContext memoryContext) } @Override - public OutputStream createExclusive(AggregatedMemoryContext memoryContext) + public void createExclusive(Slice content, AggregatedMemoryContext memoryContext) throws IOException { - return create(memoryContext); + outputBlob.createBlob(content); } @Override diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/tracing/TracingOutputFile.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/tracing/TracingOutputFile.java index de0123b21b2a9..89125f49d102d 100644 --- a/lib/trino-filesystem/src/main/java/io/trino/filesystem/tracing/TracingOutputFile.java +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/tracing/TracingOutputFile.java @@ -13,6 +13,7 @@ */ package io.trino.filesystem.tracing; +import io.airlift.slice.Slice; import io.opentelemetry.api.trace.Span; import io.opentelemetry.api.trace.Tracer; import io.trino.filesystem.Location; @@ -58,13 +59,13 @@ public OutputStream createOrOverwrite() } @Override - public OutputStream createExclusive() + public void createExclusive(Slice content) throws IOException { Span span = tracer.spanBuilder("OutputFile.createExclusive") .setAttribute(FileSystemAttributes.FILE_LOCATION, toString()) .startSpan(); - return withTracing(span, () -> delegate.createExclusive()); + withTracing(span, () -> delegate.createExclusive(content)); } @Override @@ -88,13 +89,13 @@ public OutputStream createOrOverwrite(AggregatedMemoryContext memoryContext) } @Override - public OutputStream createExclusive(AggregatedMemoryContext memoryContext) + public void createExclusive(Slice content, AggregatedMemoryContext memoryContext) throws IOException { Span span = tracer.spanBuilder("OutputFile.createExclusive") .setAttribute(FileSystemAttributes.FILE_LOCATION, toString()) .startSpan(); - return withTracing(span, () -> delegate.createExclusive(memoryContext)); + withTracing(span, () -> delegate.createExclusive(content, memoryContext)); } @Override diff --git a/lib/trino-filesystem/src/test/java/io/trino/filesystem/AbstractTestTrinoFileSystem.java b/lib/trino-filesystem/src/test/java/io/trino/filesystem/AbstractTestTrinoFileSystem.java index b11890c86b0f9..ee5a7c03b8b7d 100644 --- a/lib/trino-filesystem/src/test/java/io/trino/filesystem/AbstractTestTrinoFileSystem.java +++ b/lib/trino-filesystem/src/test/java/io/trino/filesystem/AbstractTestTrinoFileSystem.java @@ -17,7 +17,6 @@ import com.google.common.io.ByteStreams; import com.google.common.io.Closer; import io.airlift.slice.Slice; -import io.airlift.slice.Slices; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; @@ -40,10 +39,19 @@ import java.util.Optional; import java.util.Set; import java.util.UUID; - +import java.util.concurrent.CyclicBarrier; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicBoolean; + +import static io.airlift.slice.Slices.EMPTY_SLICE; +import static io.airlift.slice.Slices.wrappedBuffer; import static java.lang.Math.min; +import static java.nio.charset.StandardCharsets.US_ASCII; import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Objects.requireNonNull; +import static java.util.concurrent.TimeUnit.SECONDS; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; @@ -63,17 +71,23 @@ public abstract class AbstractTestTrinoFileSystem protected abstract void verifyFileSystemIsEmpty(); - protected boolean supportsCreateExclusive() + /** + * Specifies whether implementation {@link TrinoOutputFile#create()} is exclusive. + */ + protected boolean isCreateExclusive() { return true; } - protected boolean supportsRenameFile() + /** + * Specifies whether implementation supports {@link TrinoOutputFile#createExclusive(Slice)}. + */ + protected boolean supportsCreateExclusive() { - return true; + return false; } - protected boolean deleteFileFailsIfNotExists() + protected boolean supportsRenameFile() { return true; } @@ -231,7 +245,7 @@ public void testInputFile() // write a 16 MB file try (OutputStream outputStream = tempBlob.outputFile().create()) { byte[] bytes = new byte[4]; - Slice slice = Slices.wrappedBuffer(bytes); + Slice slice = wrappedBuffer(bytes); for (int i = 0; i < 4 * MEGABYTE; i++) { slice.setInt(0, i); outputStream.write(bytes); @@ -244,7 +258,7 @@ public void testInputFile() try (TrinoInputStream inputStream = inputFile.newStream()) { byte[] bytes = new byte[4]; - Slice slice = Slices.wrappedBuffer(bytes); + Slice slice = wrappedBuffer(bytes); // read int at a time for (int intPosition = 0; intPosition < 4 * MEGABYTE; intPosition++) { @@ -377,6 +391,13 @@ public void testInputFile() assertThat(inputStream.getPosition()).isEqualTo(fileSize + 100); } + assertThatThrownBy(() -> inputStream.read(new byte[1], -1, 0)) + .isInstanceOf(IndexOutOfBoundsException.class); + assertThatThrownBy(() -> inputStream.read(new byte[1], 0, -1)) + .isInstanceOf(IndexOutOfBoundsException.class); + assertThatThrownBy(() -> inputStream.read(new byte[1], 1, 3)) + .isInstanceOf(IndexOutOfBoundsException.class); + // verify all the methods throw after close inputStream.close(); assertThatThrownBy(inputStream::available) @@ -398,14 +419,14 @@ public void testInputFile() try (TrinoInput trinoInput = inputFile.newInput()) { byte[] bytes = new byte[4 * 10]; - Slice slice = Slices.wrappedBuffer(bytes); + Slice slice = wrappedBuffer(bytes); // positioned read trinoInput.readFully(0, bytes, 0, bytes.length); for (int i = 0; i < 10; i++) { assertThat(slice.getInt(i * 4)).isEqualTo(i); } - assertThat(trinoInput.readFully(0, bytes.length)).isEqualTo(Slices.wrappedBuffer(bytes)); + assertThat(trinoInput.readFully(0, bytes.length)).isEqualTo(wrappedBuffer(bytes)); trinoInput.readFully(0, bytes, 2, bytes.length - 2); for (int i = 0; i < 9; i++) { @@ -416,7 +437,7 @@ public void testInputFile() for (int i = 0; i < 10; i++) { assertThat(slice.getInt(i * 4)).isEqualTo(i + MEGABYTE / 4); } - assertThat(trinoInput.readFully(MEGABYTE, bytes.length)).isEqualTo(Slices.wrappedBuffer(bytes)); + assertThat(trinoInput.readFully(MEGABYTE, bytes.length)).isEqualTo(wrappedBuffer(bytes)); assertThatThrownBy(() -> trinoInput.readFully(fileSize - bytes.length + 1, bytes, 0, bytes.length)) .isInstanceOf(IOException.class) .hasMessageContaining(tempBlob.location().toString()); @@ -428,7 +449,7 @@ public void testInputFile() assertThat(slice.getInt(i * 4)).isEqualTo(totalPositions - 10 + i); } - assertThat(trinoInput.readTail(bytes.length)).isEqualTo(Slices.wrappedBuffer(bytes)); + assertThat(trinoInput.readTail(bytes.length)).isEqualTo(wrappedBuffer(bytes)); trinoInput.readTail(bytes, 2, bytes.length - 2); for (int i = 0; i < 9; i++) { @@ -479,7 +500,7 @@ void testOutputFile() outputStream.write("initial".getBytes(UTF_8)); } - if (supportsCreateExclusive()) { + if (isCreateExclusive()) { // re-create without overwrite is an error assertThatThrownBy(outputFile::create) .isInstanceOf(FileAlreadyExistsException.class) @@ -489,9 +510,16 @@ void testOutputFile() assertThat(tempBlob.read()).isEqualTo("initial"); // re-create exclusive is an error - assertThatThrownBy(outputFile::createExclusive) - .isInstanceOf(FileAlreadyExistsException.class) - .hasMessageContaining(tempBlob.location().toString()); + if (supportsCreateExclusive()) { + assertThatThrownBy(() -> outputFile.createExclusive(EMPTY_SLICE)) + .isInstanceOf(FileAlreadyExistsException.class) + .hasMessageContaining(tempBlob.location().toString()); + } + else { + assertThatThrownBy(() -> outputFile.createExclusive(EMPTY_SLICE)) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessageStartingWith("createExclusive not supported"); + } // verify nothing changed assertThat(tempBlob.read()).isEqualTo("initial"); @@ -506,9 +534,16 @@ void testOutputFile() assertThat(tempBlob.read()).isEqualTo("replaced"); // create exclusive is an error - assertThatThrownBy(outputFile::createExclusive) - .isInstanceOf(IOException.class) - .hasMessageContaining("does not support exclusive create"); + if (supportsCreateExclusive()) { + assertThatThrownBy(() -> outputFile.createExclusive(EMPTY_SLICE)) + .isInstanceOf(FileAlreadyExistsException.class) + .hasMessageContaining(tempBlob.location().toString()); + } + else { + assertThatThrownBy(() -> outputFile.createExclusive(EMPTY_SLICE)) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessageStartingWith("createExclusive not supported"); + } } // overwrite file @@ -521,6 +556,53 @@ void testOutputFile() } } + @Test + void testCreateExclusiveIsAtomic() + throws Exception + { + if (!supportsCreateExclusive()) { + return; + } + + int timeoutSeconds = 20; + ExecutorService executor = Executors.newCachedThreadPool(io.airlift.concurrent.Threads.daemonThreadsNamed("testCreateExclusiveIsAtomic-%s")); + AtomicBoolean finishing = new AtomicBoolean(false); + try (TempBlob tempBlob = randomBlobLocation("outputFile")) { + TrinoFileSystem fileSystem = getFileSystem(); + Slice content = wrappedBuffer("a".repeat(MEGABYTE).getBytes(US_ASCII)); + + fileSystem.deleteFile(tempBlob.location()); + CyclicBarrier barrier = new CyclicBarrier(2); + Future write = executor.submit(() -> { + barrier.await(timeoutSeconds, SECONDS); + fileSystem.newOutputFile(tempBlob.location()).createExclusive(content); + return null; + }); + Future read = executor.submit(() -> { + TrinoInputFile inputFile = fileSystem.newInputFile(tempBlob.location()); + assertThat(inputFile.exists()).as("inputFile.exists()").isFalse(); + barrier.await(timeoutSeconds, SECONDS); + + while (!finishing.get()) { + try (TrinoInput input = inputFile.newInput()) { + return input.readFully(0, content.length()); + } + catch (FileNotFoundException expected) { + } + } + throw new RuntimeException("File not created"); + }); + + assertThat(read.get(timeoutSeconds, SECONDS)).as("read content").isEqualTo(content); + write.get(timeoutSeconds, SECONDS); + } + finally { + finishing.set(true); + executor.shutdownNow(); + assertThat(executor.awaitTermination(timeoutSeconds, SECONDS)).as("executor terminated").isTrue(); + } + } + @Test void testOutputStreamByteAtATime() throws IOException @@ -671,16 +753,8 @@ void testDeleteFile() .hasMessageContaining(createLocation("foo/").toString()); try (TempBlob tempBlob = randomBlobLocation("delete")) { - if (deleteFileFailsIfNotExists()) { - // deleting a non-existent file is an error - assertThatThrownBy(() -> getFileSystem().deleteFile(tempBlob.location())) - .isInstanceOf(FileNotFoundException.class) - .hasMessageContaining(tempBlob.location().toString()); - } - else { - // deleting a non-existent file is a no-op - getFileSystem().deleteFile(tempBlob.location()); - } + // deleting a non-existent file is a no-op + getFileSystem().deleteFile(tempBlob.location()); tempBlob.createOrOverwrite("delete me"); diff --git a/lib/trino-filesystem/src/test/java/io/trino/filesystem/TrackingFileSystemFactory.java b/lib/trino-filesystem/src/test/java/io/trino/filesystem/TrackingFileSystemFactory.java index c9f4bae6e02ce..195b3eede7576 100644 --- a/lib/trino-filesystem/src/test/java/io/trino/filesystem/TrackingFileSystemFactory.java +++ b/lib/trino-filesystem/src/test/java/io/trino/filesystem/TrackingFileSystemFactory.java @@ -14,6 +14,7 @@ package io.trino.filesystem; import com.google.common.collect.ImmutableMap; +import io.airlift.slice.Slice; import io.trino.memory.context.AggregatedMemoryContext; import io.trino.spi.security.ConnectorIdentity; @@ -306,11 +307,11 @@ public OutputStream createOrOverwrite(AggregatedMemoryContext memoryContext) } @Override - public OutputStream createExclusive(AggregatedMemoryContext memoryContext) + public void createExclusive(Slice content, AggregatedMemoryContext memoryContext) throws IOException { tracker.accept(OUTPUT_FILE_CREATE_EXCLUSIVE); - return delegate.createExclusive(memoryContext); + delegate.createExclusive(content, memoryContext); } @Override diff --git a/lib/trino-filesystem/src/test/java/io/trino/filesystem/memory/TestMemoryFileSystem.java b/lib/trino-filesystem/src/test/java/io/trino/filesystem/memory/TestMemoryFileSystem.java index a1014dc898097..b11aaeac7f62a 100644 --- a/lib/trino-filesystem/src/test/java/io/trino/filesystem/memory/TestMemoryFileSystem.java +++ b/lib/trino-filesystem/src/test/java/io/trino/filesystem/memory/TestMemoryFileSystem.java @@ -44,6 +44,12 @@ protected boolean isHierarchical() return false; } + @Override + protected boolean supportsCreateExclusive() + { + return true; + } + @Override protected TrinoFileSystem getFileSystem() { diff --git a/lib/trino-geospatial-toolkit/pom.xml b/lib/trino-geospatial-toolkit/pom.xml index 181ce54f0bf31..c3d1211654ef9 100644 --- a/lib/trino-geospatial-toolkit/pom.xml +++ b/lib/trino-geospatial-toolkit/pom.xml @@ -4,7 +4,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/lib/trino-hdfs/pom.xml b/lib/trino-hdfs/pom.xml index 70fec6c06ea4d..820fb5c8372dd 100644 --- a/lib/trino-hdfs/pom.xml +++ b/lib/trino-hdfs/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -13,9 +13,6 @@ ${project.parent.basedir} - - **/TestFileSystemCache.java @@ -261,12 +258,6 @@ testcontainers test - - - org.testng - testng - test - @@ -301,7 +292,7 @@ maven-surefire-plugin - ${isolatedJvmTests} + **/TestFileSystemCache.java **/TestTrinoS3FileSystemAwsS3.java @@ -319,7 +310,7 @@ maven-surefire-plugin - ${isolatedJvmTests} + **/TestFileSystemCache.java false 1 diff --git a/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsFileSystem.java b/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsFileSystem.java index e1ae9400f4614..c7f8d83fd759a 100644 --- a/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsFileSystem.java +++ b/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsFileSystem.java @@ -114,9 +114,8 @@ public void deleteFile(Location location) } return null; } - catch (FileNotFoundException e) { - stats.getDeleteFileCalls().recordException(e); - throw new FileNotFoundException(location.toString()); + catch (FileNotFoundException ignored) { + return null; } catch (IOException e) { stats.getDeleteFileCalls().recordException(e); diff --git a/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsOutputFile.java b/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsOutputFile.java index d09b28baeb563..b913425283011 100644 --- a/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsOutputFile.java +++ b/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsOutputFile.java @@ -14,6 +14,7 @@ package io.trino.filesystem.hdfs; import com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem; +import io.airlift.slice.Slice; import io.airlift.stats.TimeStat; import io.trino.filesystem.Location; import io.trino.filesystem.TrinoOutputFile; @@ -22,8 +23,7 @@ import io.trino.hdfs.HdfsEnvironment; import io.trino.hdfs.MemoryAwareFileSystem; import io.trino.hdfs.authentication.GenericExceptionAction; -import io.trino.hdfs.gcs.GcsExclusiveOutputStream; -import io.trino.hdfs.s3.TrinoS3FileSystem; +import io.trino.hdfs.gcs.GcsAtomicOutputStream; import io.trino.memory.context.AggregatedMemoryContext; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -70,18 +70,18 @@ public OutputStream createOrOverwrite(AggregatedMemoryContext memoryContext) } @Override - public OutputStream createExclusive(AggregatedMemoryContext memoryContext) + public void createExclusive(Slice content, AggregatedMemoryContext memoryContext) throws IOException { Path file = hadoopPath(location); FileSystem fileSystem = getRawFileSystem(environment.getFileSystem(context, file)); - if (fileSystem instanceof TrinoS3FileSystem) { - throw new IOException("S3 does not support exclusive create"); - } if (fileSystem instanceof GoogleHadoopFileSystem) { - return new GcsExclusiveOutputStream(environment, context, file); + GcsAtomicOutputStream atomicOutputStream = new GcsAtomicOutputStream(environment, context, file); + atomicOutputStream.write(content.getBytes()); + atomicOutputStream.close(); + return; } - return create(memoryContext); + throw new UnsupportedOperationException("createExclusive not supported for " + fileSystem); } private OutputStream create(boolean overwrite, AggregatedMemoryContext memoryContext) diff --git a/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsTrinoInputStream.java b/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsTrinoInputStream.java index 8e1fa57b06e4b..f5374dc751a66 100644 --- a/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsTrinoInputStream.java +++ b/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsTrinoInputStream.java @@ -21,6 +21,7 @@ import java.io.IOException; import static io.trino.filesystem.hdfs.HdfsFileSystem.withCause; +import static java.util.Objects.checkFromIndexSize; import static java.util.Objects.requireNonNull; class HdfsTrinoInputStream @@ -96,6 +97,7 @@ public int read(byte[] b, int off, int len) throws IOException { ensureOpen(); + checkFromIndexSize(off, len, b.length); try { return stream.read(b, off, len); } diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsAccessTokenProvider.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsAccessTokenProvider.java index 215e6c0d4b286..21b4877ff704a 100644 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsAccessTokenProvider.java +++ b/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsAccessTokenProvider.java @@ -16,6 +16,8 @@ import com.google.cloud.hadoop.util.AccessTokenProvider; import org.apache.hadoop.conf.Configuration; +import java.time.Instant; + import static com.google.common.base.Strings.nullToEmpty; import static java.util.concurrent.TimeUnit.HOURS; @@ -29,7 +31,7 @@ public class GcsAccessTokenProvider @Override public AccessToken getAccessToken() { - return new AccessToken(nullToEmpty(config.get(GCS_ACCESS_TOKEN_CONF)), EXPIRATION_TIME_MILLISECONDS); + return new AccessToken(nullToEmpty(config.get(GCS_ACCESS_TOKEN_CONF)), Instant.now().plusMillis(EXPIRATION_TIME_MILLISECONDS)); } @Override diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsExclusiveOutputStream.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsAtomicOutputStream.java similarity index 93% rename from lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsExclusiveOutputStream.java rename to lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsAtomicOutputStream.java index b53e0b4e082da..3ee56648304f5 100644 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsExclusiveOutputStream.java +++ b/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsAtomicOutputStream.java @@ -24,14 +24,14 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; -public class GcsExclusiveOutputStream +public class GcsAtomicOutputStream extends ByteArrayOutputStream { private final Storage storage; private final Path path; private boolean closed; - public GcsExclusiveOutputStream(HdfsEnvironment environment, HdfsContext context, Path path) + public GcsAtomicOutputStream(HdfsEnvironment environment, HdfsContext context, Path path) { this.storage = environment.createGcsStorage(context, path); this.path = path; diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsStorageFactory.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsStorageFactory.java index 2579685950fc6..55384769f2592 100644 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsStorageFactory.java +++ b/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsStorageFactory.java @@ -13,14 +13,14 @@ */ package io.trino.hdfs.gcs; -import com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.googleapis.auth.oauth2.GoogleCredential; import com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.http.HttpTransport; import com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.json.jackson2.JacksonFactory; import com.google.cloud.hadoop.repackaged.gcs.com.google.api.services.storage.Storage; +import com.google.cloud.hadoop.repackaged.gcs.com.google.auth.oauth2.GoogleCredentials; import com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageOptions; -import com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.util.CredentialFactory; import com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.util.HttpTransportFactory; import com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.util.RetryHttpInitializer; +import com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.util.RetryHttpInitializerOptions; import com.google.inject.Inject; import io.trino.hdfs.HdfsContext; import io.trino.hdfs.HdfsEnvironment; @@ -31,10 +31,10 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; -import java.time.Duration; import java.util.Optional; import static com.google.cloud.hadoop.fs.gcs.TrinoGoogleHadoopFileSystemConfiguration.getGcsOptionsBuilder; +import static com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.util.HadoopCredentialsConfiguration.CLOUD_PLATFORM_SCOPE; import static com.google.common.base.Strings.nullToEmpty; import static io.trino.hdfs.gcs.GcsConfigurationProvider.GCS_OAUTH_KEY; import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR; @@ -46,7 +46,7 @@ public class GcsStorageFactory private static final String APPLICATION_NAME = "Trino"; private final boolean useGcsAccessToken; - private final Optional jsonGoogleCredential; + private final Optional jsonGoogleCredential; @Inject public GcsStorageFactory(HiveGcsConfig hiveGcsConfig) @@ -58,12 +58,12 @@ public GcsStorageFactory(HiveGcsConfig hiveGcsConfig) String jsonKeyFilePath = hiveGcsConfig.getJsonKeyFilePath(); if (jsonKey != null) { try (InputStream inputStream = new ByteArrayInputStream(jsonKey.getBytes(UTF_8))) { - jsonGoogleCredential = Optional.of(GoogleCredential.fromStream(inputStream).createScoped(CredentialFactory.DEFAULT_SCOPES)); + jsonGoogleCredential = Optional.of(GoogleCredentials.fromStream(inputStream).createScoped(CLOUD_PLATFORM_SCOPE)); } } else if (jsonKeyFilePath != null) { try (FileInputStream inputStream = new FileInputStream(jsonKeyFilePath)) { - jsonGoogleCredential = Optional.of(GoogleCredential.fromStream(inputStream).createScoped(CredentialFactory.DEFAULT_SCOPES)); + jsonGoogleCredential = Optional.of(GoogleCredentials.fromStream(inputStream).createScoped(CLOUD_PLATFORM_SCOPE)); } } else { @@ -76,22 +76,23 @@ public Storage create(HdfsEnvironment environment, HdfsContext context, Path pat try { GoogleCloudStorageOptions gcsOptions = getGcsOptionsBuilder(environment.getConfiguration(context, path)).build(); HttpTransport httpTransport = HttpTransportFactory.createHttpTransport( - gcsOptions.getTransportType(), gcsOptions.getProxyAddress(), gcsOptions.getProxyUsername(), - gcsOptions.getProxyPassword(), - Duration.ofMillis(gcsOptions.getHttpRequestReadTimeout())); - GoogleCredential credential; + gcsOptions.getProxyPassword()); + GoogleCredentials credential; if (useGcsAccessToken) { String accessToken = nullToEmpty(context.getIdentity().getExtraCredentials().get(GCS_OAUTH_KEY)); try (ByteArrayInputStream inputStream = new ByteArrayInputStream(accessToken.getBytes(UTF_8))) { - credential = GoogleCredential.fromStream(inputStream).createScoped(CredentialFactory.DEFAULT_SCOPES); + credential = GoogleCredentials.fromStream(inputStream).createScoped(CLOUD_PLATFORM_SCOPE); } } else { credential = jsonGoogleCredential.orElseThrow(() -> new IllegalStateException("GCS credentials not configured")); } - return new Storage.Builder(httpTransport, JacksonFactory.getDefaultInstance(), new RetryHttpInitializer(credential, APPLICATION_NAME)) + return new Storage.Builder(httpTransport, JacksonFactory.getDefaultInstance(), new RetryHttpInitializer(credential, RetryHttpInitializerOptions.builder() + .setReadTimeout(gcsOptions.getHttpRequestReadTimeout()) + .setMaxRequestRetries(gcsOptions.getMaxHttpRequestRetries()) + .build())) .setApplicationName(APPLICATION_NAME) .build(); } diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GoogleGcsConfigurationInitializer.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GoogleGcsConfigurationInitializer.java index 2cb3ba0eb57e7..4e979bf2e1a3e 100644 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GoogleGcsConfigurationInitializer.java +++ b/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GoogleGcsConfigurationInitializer.java @@ -29,9 +29,11 @@ import java.util.Optional; import static com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemConfiguration.GCS_CONFIG_PREFIX; -import static com.google.cloud.hadoop.fs.gcs.HadoopCredentialConfiguration.ACCESS_TOKEN_PROVIDER_IMPL_SUFFIX; -import static com.google.cloud.hadoop.fs.gcs.HadoopCredentialConfiguration.ENABLE_SERVICE_ACCOUNTS_SUFFIX; -import static com.google.cloud.hadoop.fs.gcs.HadoopCredentialConfiguration.SERVICE_ACCOUNT_JSON_KEYFILE_SUFFIX; +import static com.google.cloud.hadoop.fs.gcs.HadoopCredentialsConfiguration.SERVICE_ACCOUNT_JSON_KEYFILE_SUFFIX; +import static com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.util.HadoopCredentialsConfiguration.ACCESS_TOKEN_PROVIDER_SUFFIX; +import static com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.util.HadoopCredentialsConfiguration.AUTHENTICATION_TYPE_SUFFIX; +import static com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.util.HadoopCredentialsConfiguration.AuthenticationType.ACCESS_TOKEN_PROVIDER; +import static com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.util.HadoopCredentialsConfiguration.AuthenticationType.SERVICE_ACCOUNT_JSON_KEYFILE; import static java.nio.file.attribute.PosixFilePermission.OWNER_READ; import static java.nio.file.attribute.PosixFilePermission.OWNER_WRITE; @@ -72,12 +74,12 @@ public void initializeConfiguration(Configuration config) if (useGcsAccessToken) { // use oauth token to authenticate with Google Cloud Storage - config.setBoolean(GCS_CONFIG_PREFIX + ENABLE_SERVICE_ACCOUNTS_SUFFIX.getKey(), false); - config.setClass(GCS_CONFIG_PREFIX + ACCESS_TOKEN_PROVIDER_IMPL_SUFFIX.getKey(), GcsAccessTokenProvider.class, AccessTokenProvider.class); + config.setEnum(GCS_CONFIG_PREFIX + AUTHENTICATION_TYPE_SUFFIX.getKey(), ACCESS_TOKEN_PROVIDER); + config.setClass(GCS_CONFIG_PREFIX + ACCESS_TOKEN_PROVIDER_SUFFIX.getKey(), GcsAccessTokenProvider.class, AccessTokenProvider.class); } else if (jsonKeyFilePath != null) { // use service account key file - config.setBoolean(GCS_CONFIG_PREFIX + ENABLE_SERVICE_ACCOUNTS_SUFFIX.getKey(), true); + config.setEnum(GCS_CONFIG_PREFIX + AUTHENTICATION_TYPE_SUFFIX.getKey(), SERVICE_ACCOUNT_JSON_KEYFILE); config.set(GCS_CONFIG_PREFIX + SERVICE_ACCOUNT_JSON_KEYFILE_SUFFIX.getKey(), jsonKeyFilePath); } } diff --git a/lib/trino-hdfs/src/test/java/io/trino/filesystem/hdfs/TestHdfsFileSystemS3Mock.java b/lib/trino-hdfs/src/test/java/io/trino/filesystem/hdfs/TestHdfsFileSystemS3Mock.java index 45f3d49f372e8..25fe9a824169c 100644 --- a/lib/trino-hdfs/src/test/java/io/trino/filesystem/hdfs/TestHdfsFileSystemS3Mock.java +++ b/lib/trino-hdfs/src/test/java/io/trino/filesystem/hdfs/TestHdfsFileSystemS3Mock.java @@ -109,13 +109,7 @@ protected Location getRootLocation() } @Override - protected final boolean supportsCreateExclusive() - { - return false; - } - - @Override - protected final boolean deleteFileFailsIfNotExists() + protected boolean isCreateExclusive() { return false; } diff --git a/lib/trino-hdfs/src/test/java/io/trino/hdfs/TestCachingSetup.java b/lib/trino-hdfs/src/test/java/io/trino/hdfs/TestCachingSetup.java new file mode 100644 index 0000000000000..2b1492130cc88 --- /dev/null +++ b/lib/trino-hdfs/src/test/java/io/trino/hdfs/TestCachingSetup.java @@ -0,0 +1,140 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.hdfs; + +import com.google.common.collect.ImmutableMap; +import com.qubole.rubix.core.CachingFileSystem; +import io.airlift.testing.TempFile; +import io.opentelemetry.api.OpenTelemetry; +import io.trino.filesystem.hdfs.HdfsFileSystemManager; +import io.trino.testing.TestingNodeManager; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.api.parallel.Execution; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.Map; + +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; + +@TestInstance(PER_CLASS) +@Execution(SAME_THREAD) +class TestCachingSetup +{ + @BeforeEach + @AfterEach + public void deinitializeRubix() + { + // revert static Rubix initialization done by other tests + CachingFileSystem.deinitialize(); + } + + @Test + public void testS3SecurityMappingAndHiveCachingMutuallyExclusive(@TempDir Path tempDirectory) + throws IOException + { + try (TempFile mappingConfig = new TempFile()) { + assertThatThrownBy(() -> createFileSystemManager( + ImmutableMap.builder() + .put("hive.s3.security-mapping.config-file", mappingConfig.path().toString()) + .put("hive.cache.enabled", "true") + .put("hive.cache.location", tempDirectory.toString()) + .buildOrThrow())) + .hasMessageContaining("S3 security mapping is not compatible with Hive caching"); + } + } + + @Test + public void testGcsAccessTokenAndHiveCachingMutuallyExclusive(@TempDir Path tempDirectory) + { + assertThatThrownBy(() -> createFileSystemManager( + ImmutableMap.builder() + .put("hive.gcs.use-access-token", "true") + .put("hive.cache.enabled", "true") + .put("hive.cache.location", tempDirectory.toString()) + .buildOrThrow())) + .hasMessageContaining("Use of GCS access token is not compatible with Hive caching"); + } + + @Test + public void testHdfsImpersonationAndHiveCachingMutuallyExclusive(@TempDir Path tempDirectory) + { + assertThatThrownBy(() -> createFileSystemManager( + ImmutableMap.builder() + .put("hive.hdfs.impersonation.enabled", "true") + .put("hive.cache.enabled", "true") + .put("hive.cache.location", tempDirectory.toString()) + .buildOrThrow())) + .hasMessageContaining("HDFS impersonation is not compatible with Hive caching"); + } + + @Test + public void testRubixCache(@TempDir Path tempDirectory) + { + createFileSystemManager( + ImmutableMap.builder() + .put("hive.cache.enabled", "true") + .put("hive.cache.location", tempDirectory.toString()) + .buildOrThrow()); + } + + @Test + public void testRubixCacheWithNonExistingCacheDirectory() + { + assertThatThrownBy(() -> createFileSystemManager( + ImmutableMap.builder() + .put("hive.cache.enabled", "true") + .put("hive.cache.start-server-on-coordinator", "true") + .put("hive.cache.location", "/tmp/non/existing/directory") + .buildOrThrow())) + .hasMessageContaining("None of the cache parent directories exists"); + + assertThatThrownBy(() -> createFileSystemManager( + ImmutableMap.builder() + .put("hive.cache.enabled", "true") + .put("hive.cache.start-server-on-coordinator", "true") + .buildOrThrow())) + .hasMessageContaining("caching directories were not provided"); + + // cache directories should not be required when cache is not explicitly started on coordinator + createFileSystemManager( + ImmutableMap.builder() + .put("hive.cache.enabled", "true") + .buildOrThrow()); + } + + private static void createFileSystemManager(Map config) + { + HdfsFileSystemManager manager = new HdfsFileSystemManager( + ImmutableMap.builder() + .putAll(config) + .put("boostrap.quiet", "true") + .buildOrThrow(), + true, + true, + true, + "test", + new TestingNodeManager(), + OpenTelemetry.noop()); + manager.configure(); + manager.create(); + manager.stop(); + } +} diff --git a/lib/trino-hive-formats/pom.xml b/lib/trino-hive-formats/pom.xml index a96634b81a87b..459979155d653 100644 --- a/lib/trino-hive-formats/pom.xml +++ b/lib/trino-hive-formats/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/avro/AvroPageDataReader.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/avro/AvroPageDataReader.java index 2d3f1bfb4acc1..365f421ed7cc8 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/avro/AvroPageDataReader.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/avro/AvroPageDataReader.java @@ -208,11 +208,29 @@ private static BlockBuildingDecoder createBlockBuildingDecoderForAction(Resolver yield new ReaderUnionCoercedIntoRowBlockBuildingDecoder((Resolver.ReaderUnion) action, typeManager); } } - case ERROR -> throw new AvroTypeException("Resolution action returned with error " + action); + case ERROR -> new TypeErrorThrower((Resolver.ErrorAction) action); case SKIP -> throw new IllegalStateException("Skips filtered by row step"); }; } + private static class TypeErrorThrower + extends BlockBuildingDecoder + { + private final Resolver.ErrorAction action; + + public TypeErrorThrower(Resolver.ErrorAction action) + { + this.action = requireNonNull(action, "action is null"); + } + + @Override + protected void decodeIntoBlock(Decoder decoder, BlockBuilder builder) + throws IOException + { + throw new IOException(new AvroTypeException("Resolution action returned with error " + action)); + } + } + // Different plugins may have different Avro Schema to Type mappings // that are currently transforming GenericDatumReader returned objects into their target type during the record reading process // This block building decoder allows plugin writers to port that code directly and use within this reader diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/line/simple/SimpleDeserializer.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/line/simple/SimpleDeserializer.java index 16ef43157d67d..ec1943f29ba78 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/line/simple/SimpleDeserializer.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/line/simple/SimpleDeserializer.java @@ -81,7 +81,8 @@ public void deserialize(LineBuffer lineBuffer, PageBuilder builder) throws IOException { builder.declarePosition(); - Slice line = Slices.wrappedBuffer(lineBuffer.getBuffer(), 0, lineBuffer.getLength()); + byte[] buffer = lineBuffer.getBuffer(); + Slice line = Slices.wrappedBuffer(buffer, 0, lineBuffer.getLength()); int offset = 0; int length = line.length(); @@ -90,7 +91,7 @@ public void deserialize(LineBuffer lineBuffer, PageBuilder builder) int elementOffset = offset; int fieldIndex = 0; while (offset < end) { - byte currentByte = line.getByte(offset); + byte currentByte = buffer[offset]; if (currentByte == separator) { decodeElementValueInto(fieldIndex, builder, line, elementOffset, offset - elementOffset); elementOffset = offset + 1; @@ -100,7 +101,7 @@ public void deserialize(LineBuffer lineBuffer, PageBuilder builder) break; } } - else if (isEscapeByte(currentByte)) { + else if (escapeByte != null && currentByte == escapeByte) { // ignore the char after escape_char offset++; } @@ -143,9 +144,4 @@ private boolean isNullSequence(Slice slice, int offset, int length) { return nullSequence.equals(0, nullSequence.length(), slice, offset, length); } - - private boolean isEscapeByte(byte currentByte) - { - return escapeByte != null && currentByte == escapeByte; - } } diff --git a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/avro/TestAvroBase.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/avro/TestAvroBase.java index 81426283e0126..988dfdff7c599 100644 --- a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/avro/TestAvroBase.java +++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/avro/TestAvroBase.java @@ -160,15 +160,15 @@ public abstract class TestAvroBase ALL_TYPES_GENERIC_RECORD = new GenericData.Record(ALL_TYPES_RECORD_SCHEMA); ALL_TYPES_GENERIC_RECORD.put("aBoolean", true); - allTypeBlocks.add(new ByteArrayBlock(1, Optional.empty(), new byte[]{1})); + allTypeBlocks.add(new ByteArrayBlock(1, Optional.empty(), new byte[] {1})); ALL_TYPES_GENERIC_RECORD.put("aInt", 42); - allTypeBlocks.add(new IntArrayBlock(1, Optional.empty(), new int[]{42})); + allTypeBlocks.add(new IntArrayBlock(1, Optional.empty(), new int[] {42})); ALL_TYPES_GENERIC_RECORD.put("aLong", 3400L); - allTypeBlocks.add(new LongArrayBlock(1, Optional.empty(), new long[]{3400L})); + allTypeBlocks.add(new LongArrayBlock(1, Optional.empty(), new long[] {3400L})); ALL_TYPES_GENERIC_RECORD.put("aFloat", 3.14f); - allTypeBlocks.add(new IntArrayBlock(1, Optional.empty(), new int[]{floatToIntBits(3.14f)})); + allTypeBlocks.add(new IntArrayBlock(1, Optional.empty(), new int[] {floatToIntBits(3.14f)})); ALL_TYPES_GENERIC_RECORD.put("aDouble", 9.81); - allTypeBlocks.add(new LongArrayBlock(1, Optional.empty(), new long[]{doubleToLongBits(9.81)})); + allTypeBlocks.add(new LongArrayBlock(1, Optional.empty(), new long[] {doubleToLongBits(9.81)})); ALL_TYPES_GENERIC_RECORD.put("aString", A_STRING_VALUE); allTypeBlocks.add(new VariableWidthBlock(1, Slices.utf8Slice(A_STRING_VALUE), new int[] {0, Slices.utf8Slice(A_STRING_VALUE).length()}, Optional.empty())); ALL_TYPES_GENERIC_RECORD.put("aBytes", A_BYTES_VALUE); diff --git a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/avro/TestAvroPageDataReaderWithoutTypeManager.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/avro/TestAvroPageDataReaderWithoutTypeManager.java index bcc88c9068b9c..2234c0a3bed4a 100644 --- a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/avro/TestAvroPageDataReaderWithoutTypeManager.java +++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/avro/TestAvroPageDataReaderWithoutTypeManager.java @@ -15,6 +15,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterables; import io.airlift.slice.Slice; import io.trino.filesystem.TrinoInputFile; import io.trino.spi.Page; @@ -335,4 +336,38 @@ public void testCoercionOfUnionToStruct() assertThat(totalRecords).isEqualTo(3); } } + + @Test + public void testRead3UnionWith2UnionDataWith2Union() + throws IOException, AvroTypeException + { + Schema twoUnion = Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.INT)); + Schema threeUnion = Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.INT), Schema.create(Schema.Type.STRING)); + + Schema twoUnionRecord = SchemaBuilder.builder() + .record("aRecord") + .fields() + .name("aField") + .type(twoUnion) + .noDefault() + .endRecord(); + + Schema threeUnionRecord = SchemaBuilder.builder() + .record("aRecord") + .fields() + .name("aField") + .type(threeUnion) + .noDefault() + .endRecord(); + + // write a file with the 3 union schema, using 2 union data + TrinoInputFile inputFile = createWrittenFileWithData(threeUnionRecord, ImmutableList.copyOf(Iterables.transform(new RandomData(twoUnionRecord, 1000), object -> (GenericRecord) object))); + + //read the file with the 2 union schema and ensure that no error thrown + try (AvroFileReader avroFileReader = new AvroFileReader(inputFile, twoUnionRecord, NoOpAvroTypeManager.INSTANCE)) { + while (avroFileReader.hasNext()) { + assertThat(avroFileReader.next()).isNotNull(); + } + } + } } diff --git a/lib/trino-ignite-patched/pom.xml b/lib/trino-ignite-patched/pom.xml index 3b3c7d32d77e5..6090d4cc14e06 100644 --- a/lib/trino-ignite-patched/pom.xml +++ b/lib/trino-ignite-patched/pom.xml @@ -6,7 +6,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/lib/trino-matching/pom.xml b/lib/trino-matching/pom.xml index c53931f7d6a90..754d7186b84c2 100644 --- a/lib/trino-matching/pom.xml +++ b/lib/trino-matching/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/lib/trino-memory-context/pom.xml b/lib/trino-memory-context/pom.xml index 63f297577970d..9141fc4be12c0 100644 --- a/lib/trino-memory-context/pom.xml +++ b/lib/trino-memory-context/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/lib/trino-orc/pom.xml b/lib/trino-orc/pom.xml index c18b8d87eda0b..931885bba9025 100644 --- a/lib/trino-orc/pom.xml +++ b/lib/trino-orc/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/lib/trino-orc/src/main/java/io/trino/orc/reader/ColumnReaders.java b/lib/trino-orc/src/main/java/io/trino/orc/reader/ColumnReaders.java index 27550fe1ce6c9..8d4b0260d8ca6 100644 --- a/lib/trino-orc/src/main/java/io/trino/orc/reader/ColumnReaders.java +++ b/lib/trino-orc/src/main/java/io/trino/orc/reader/ColumnReaders.java @@ -54,7 +54,7 @@ public static ColumnReader createColumnReader( return new TimeColumnReader(type, column, memoryContext.newLocalMemoryContext(ColumnReaders.class.getSimpleName())); } if (type instanceof UuidType) { - checkArgument(column.getColumnType() == BINARY, "UUID type can only be read from BINARY column but got " + column); + checkArgument(column.getColumnType() == BINARY, "UUID type can only be read from BINARY column but got %s", column); checkArgument( "UUID".equals(column.getAttributes().get(ICEBERG_BINARY_TYPE)), "Expected ORC column for UUID data to be annotated with %s=UUID: %s", diff --git a/lib/trino-orc/src/main/java/io/trino/orc/stream/OrcInputStream.java b/lib/trino-orc/src/main/java/io/trino/orc/stream/OrcInputStream.java index 3f37981fc5e48..cd0b6bf0b36f6 100644 --- a/lib/trino-orc/src/main/java/io/trino/orc/stream/OrcInputStream.java +++ b/lib/trino-orc/src/main/java/io/trino/orc/stream/OrcInputStream.java @@ -227,7 +227,9 @@ public void seekToCheckpoint(long checkpoint) int decompressedOffset = decodeDecompressedOffset(checkpoint); // if checkpoint is within the current buffer, seek locally int currentDecompressedBufferOffset = decodeDecompressedOffset(lastCheckpoint); - if (current != null && compressedOffset == decodeCompressedBlockOffset(lastCheckpoint) && decompressedOffset < currentDecompressedBufferOffset + current.length()) { + if (current != null && compressedOffset == decodeCompressedBlockOffset(lastCheckpoint) + && decompressedOffset >= currentDecompressedBufferOffset + && decompressedOffset < currentDecompressedBufferOffset + current.length()) { current.setPosition(decompressedOffset - currentDecompressedBufferOffset); return; } diff --git a/lib/trino-orc/src/test/java/io/trino/orc/stream/TestLongStreamV2.java b/lib/trino-orc/src/test/java/io/trino/orc/stream/TestLongStreamV2.java index b8a4106b07289..8333dc3ab29fe 100644 --- a/lib/trino-orc/src/test/java/io/trino/orc/stream/TestLongStreamV2.java +++ b/lib/trino-orc/src/test/java/io/trino/orc/stream/TestLongStreamV2.java @@ -33,7 +33,7 @@ public class TestLongStreamV2 extends AbstractTestValueStream { @Test - public void test() + public void testLargeValue() throws IOException { List> groups = new ArrayList<>(); @@ -47,6 +47,21 @@ public void test() testWriteValue(groups); } + @Test + public void testSmallValue() + throws IOException + { + List> groups = new ArrayList<>(); + for (int groupIndex = 0; groupIndex < 22; groupIndex++) { + List group = new ArrayList<>(); + for (int i = 0; i < 1_000_000; i++) { + group.add((long) (groupIndex * 0 + i)); + } + groups.add(group); + } + testWriteValue(groups); + } + @Override protected LongOutputStreamV2 createValueOutputStream() { diff --git a/lib/trino-parquet/pom.xml b/lib/trino-parquet/pom.xml index 4bf90122f230b..92fc733c8c558 100644 --- a/lib/trino-parquet/pom.xml +++ b/lib/trino-parquet/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/predicate/TupleDomainParquetPredicate.java b/lib/trino-parquet/src/main/java/io/trino/parquet/predicate/TupleDomainParquetPredicate.java index 6dcc6b0743ffc..09fedb188ae61 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/predicate/TupleDomainParquetPredicate.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/predicate/TupleDomainParquetPredicate.java @@ -62,6 +62,7 @@ import java.util.Optional; import java.util.function.Function; +import static com.google.common.base.MoreObjects.toStringHelper; import static com.google.common.base.Preconditions.checkArgument; import static io.trino.parquet.ParquetTimestampUtils.decodeInt64Timestamp; import static io.trino.parquet.ParquetTimestampUtils.decodeInt96Timestamp; @@ -763,6 +764,15 @@ public boolean inverseCanDrop(org.apache.parquet.filter2.predicate.Statistics // To be safe, we just keep the record by returning false. return false; } + + @Override + public String toString() + { + return toStringHelper(this) + .add("columnDescriptor", columnDescriptor) + .add("columnDomain", columnDomain) + .toString(); + } } private static class ColumnIndexValueConverter diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/NestedColumnReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/NestedColumnReader.java index 6870ce3122a9b..2ceff847f3264 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/NestedColumnReader.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/NestedColumnReader.java @@ -507,8 +507,8 @@ private void readFlatPageV1(DataPageV1 page) int maxDefinitionLevel = field.getDefinitionLevel(); int maxRepetitionLevel = field.getRepetitionLevel(); - checkArgument(maxDefinitionLevel == 0 || definitionEncoding == RLE, "Invalid definition level encoding: " + definitionEncoding); - checkArgument(maxRepetitionLevel == 0 || repetitionEncoding == RLE, "Invalid repetition level encoding: " + definitionEncoding); + checkArgument(maxDefinitionLevel == 0 || definitionEncoding == RLE, "Invalid definition level encoding: %s", definitionEncoding); + checkArgument(maxRepetitionLevel == 0 || repetitionEncoding == RLE, "Invalid repetition level encoding: %s", repetitionEncoding); repetitionLevelDecoder = levelsDecoderProvider.create(maxRepetitionLevel); if (maxRepetitionLevel > 0) { diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ParquetReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ParquetReader.java index aa9633f2763be..d6884c3b14146 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ParquetReader.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ParquetReader.java @@ -306,7 +306,9 @@ private boolean advanceToNextRowGroup() long rowCount = currentGroupRowRanges.getRowCount(); columnIndexRowsFiltered += currentGroupRowCount - rowCount; if (rowCount == 0) { - return false; + // Filters on multiple columns with page indexes may yield non-overlapping row ranges and eliminate the entire row group. + // Advance to next row group to ensure that we don't return a null Page and close the page source before all row groups are processed + return advanceToNextRowGroup(); } currentGroupRowCount = rowCount; } diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/flat/FlatColumnReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/flat/FlatColumnReader.java index 45347567ead26..5d2b190b620e7 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/flat/FlatColumnReader.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/flat/FlatColumnReader.java @@ -280,7 +280,7 @@ private void readFlatPageV1(DataPageV1 page) Slice buffer = page.getSlice(); ParquetEncoding definitionEncoding = page.getDefinitionLevelEncoding(); - checkArgument(isNonNull() || definitionEncoding == RLE, "Invalid definition level encoding: " + definitionEncoding); + checkArgument(isNonNull() || definitionEncoding == RLE, "Invalid definition level encoding: %s", definitionEncoding); int alreadyRead = 0; if (definitionEncoding == RLE) { // Definition levels are skipped from file when the max definition level is 0 as the bit-width required to store them is 0. diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/flat/FlatDefinitionLevelDecoder.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/flat/FlatDefinitionLevelDecoder.java index de629401fb4af..84fcc6ac85ea9 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/flat/FlatDefinitionLevelDecoder.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/flat/FlatDefinitionLevelDecoder.java @@ -39,7 +39,7 @@ interface DefinitionLevelDecoderProvider static FlatDefinitionLevelDecoder getFlatDefinitionLevelDecoder(int maxDefinitionLevel) { - checkArgument(maxDefinitionLevel >= 0 && maxDefinitionLevel <= 1, "Invalid max definition level: " + maxDefinitionLevel); + checkArgument(maxDefinitionLevel >= 0 && maxDefinitionLevel <= 1, "Invalid max definition level: %s", maxDefinitionLevel); if (maxDefinitionLevel == 0) { return new ZeroDefinitionLevelDecoder(); } diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetTypeVisitor.java b/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetTypeVisitor.java index d494770fe64a2..13edc85f59972 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetTypeVisitor.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetTypeVisitor.java @@ -44,15 +44,15 @@ public static T visit(Type type, ParquetTypeVisitor visitor) LogicalTypeAnnotation annotation = group.getLogicalTypeAnnotation(); if (LogicalTypeAnnotation.listType().equals(annotation)) { checkArgument(!group.isRepetition(REPEATED), - "Invalid list: top-level group is repeated: " + group); + "Invalid list: top-level group is repeated: %s", group); checkArgument(group.getFieldCount() == 1, - "Invalid list: does not contain single repeated field: " + group); + "Invalid list: does not contain single repeated field: %s", group); GroupType repeatedElement = group.getFields().get(0).asGroupType(); checkArgument(repeatedElement.isRepetition(REPEATED), "Invalid list: inner group is not repeated"); checkArgument(repeatedElement.getFieldCount() <= 1, - "Invalid list: repeated group is not a single field: " + group); + "Invalid list: repeated group is not a single field: %s", group); visitor.fieldNames.push(repeatedElement.getName()); try { @@ -69,9 +69,9 @@ public static T visit(Type type, ParquetTypeVisitor visitor) } if (LogicalTypeAnnotation.mapType().equals(annotation)) { checkArgument(!group.isRepetition(REPEATED), - "Invalid map: top-level group is repeated: " + group); + "Invalid map: top-level group is repeated: %s", group); checkArgument(group.getFieldCount() == 1, - "Invalid map: does not contain single repeated field: " + group); + "Invalid map: does not contain single repeated field: %s", group); GroupType repeatedKeyValue = group.getType(0).asGroupType(); checkArgument(repeatedKeyValue.isRepetition(REPEATED), diff --git a/lib/trino-parquet/src/test/java/io/trino/parquet/ParquetTestUtils.java b/lib/trino-parquet/src/test/java/io/trino/parquet/ParquetTestUtils.java index 47dcbd151d251..a12422bfbbe7e 100644 --- a/lib/trino-parquet/src/test/java/io/trino/parquet/ParquetTestUtils.java +++ b/lib/trino-parquet/src/test/java/io/trino/parquet/ParquetTestUtils.java @@ -17,6 +17,7 @@ import io.airlift.slice.Slice; import io.airlift.slice.Slices; import io.trino.memory.context.AggregatedMemoryContext; +import io.trino.parquet.predicate.TupleDomainParquetPredicate; import io.trino.parquet.reader.ParquetReader; import io.trino.parquet.reader.RowGroupInfo; import io.trino.parquet.writer.ParquetSchemaConverter; @@ -27,13 +28,15 @@ import io.trino.spi.block.BlockBuilder; import io.trino.spi.block.LongArrayBlock; import io.trino.spi.block.RowBlock; +import io.trino.spi.predicate.TupleDomain; import io.trino.spi.type.MapType; import io.trino.spi.type.Type; import io.trino.spi.type.TypeOperators; +import org.apache.parquet.column.ColumnDescriptor; import org.apache.parquet.format.CompressionCodec; -import org.apache.parquet.hadoop.metadata.BlockMetaData; import org.apache.parquet.hadoop.metadata.ParquetMetadata; import org.apache.parquet.io.MessageColumnIO; +import org.apache.parquet.schema.MessageType; import org.joda.time.DateTimeZone; import java.io.ByteArrayOutputStream; @@ -42,6 +45,7 @@ import java.io.UncheckedIOException; import java.util.Arrays; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Random; @@ -50,12 +54,16 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static io.trino.parquet.ParquetTypeUtils.constructField; import static io.trino.parquet.ParquetTypeUtils.getColumnIO; +import static io.trino.parquet.ParquetTypeUtils.getDescriptors; import static io.trino.parquet.ParquetTypeUtils.getParquetEncoding; import static io.trino.parquet.ParquetTypeUtils.lookupColumnByName; +import static io.trino.parquet.predicate.PredicateUtils.buildPredicate; +import static io.trino.parquet.predicate.PredicateUtils.getFilteredRowGroups; import static io.trino.spi.block.ArrayBlock.fromElementBlock; import static io.trino.spi.block.MapBlock.fromKeyValueBlock; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.TypeUtils.writeNativeValue; +import static java.util.Locale.ENGLISH; import static org.joda.time.DateTimeZone.UTC; public class ParquetTestUtils @@ -101,9 +109,22 @@ public static ParquetReader createParquetReader( List types, List columnNames) throws IOException + { + return createParquetReader(input, parquetMetadata, memoryContext, types, columnNames, TupleDomain.all()); + } + + public static ParquetReader createParquetReader( + ParquetDataSource input, + ParquetMetadata parquetMetadata, + AggregatedMemoryContext memoryContext, + List types, + List columnNames, + TupleDomain predicate) + throws IOException { org.apache.parquet.hadoop.metadata.FileMetaData fileMetaData = parquetMetadata.getFileMetaData(); - MessageColumnIO messageColumnIO = getColumnIO(fileMetaData.getSchema(), fileMetaData.getSchema()); + MessageType fileSchema = fileMetaData.getSchema(); + MessageColumnIO messageColumnIO = getColumnIO(fileSchema, fileSchema); ImmutableList.Builder columnFields = ImmutableList.builder(); for (int i = 0; i < types.size(); i++) { columnFields.add(new Column( @@ -113,25 +134,35 @@ public static ParquetReader createParquetReader( lookupColumnByName(messageColumnIO, columnNames.get(i))) .orElseThrow())); } - long nextStart = 0; - ImmutableList.Builder rowGroupInfoBuilder = ImmutableList.builder(); - for (BlockMetaData block : parquetMetadata.getBlocks()) { - rowGroupInfoBuilder.add(new RowGroupInfo(block, nextStart, Optional.empty())); - nextStart += block.getRowCount(); - } + Map, ColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema); + TupleDomain parquetTupleDomain = predicate.transformKeys( + columnName -> descriptorsByPath.get(ImmutableList.of(columnName.toLowerCase(ENGLISH)))); + TupleDomainParquetPredicate parquetPredicate = buildPredicate(fileSchema, parquetTupleDomain, descriptorsByPath, UTC); + ParquetReaderOptions options = new ParquetReaderOptions(); + List rowGroups = getFilteredRowGroups( + 0, + input.getEstimatedSize(), + input, + parquetMetadata.getBlocks(), + ImmutableList.of(parquetTupleDomain), + ImmutableList.of(parquetPredicate), + descriptorsByPath, + UTC, + 1000, + options); return new ParquetReader( Optional.ofNullable(fileMetaData.getCreatedBy()), columnFields.build(), - rowGroupInfoBuilder.build(), + rowGroups, input, UTC, memoryContext, - new ParquetReaderOptions(), + options, exception -> { throwIfUnchecked(exception); return new RuntimeException(exception); }, - Optional.empty(), + Optional.of(parquetPredicate), Optional.empty()); } diff --git a/lib/trino-parquet/src/test/java/io/trino/parquet/TestTupleDomainParquetPredicate.java b/lib/trino-parquet/src/test/java/io/trino/parquet/TestTupleDomainParquetPredicate.java index 206c6161249f0..0e6e7ada80179 100644 --- a/lib/trino-parquet/src/test/java/io/trino/parquet/TestTupleDomainParquetPredicate.java +++ b/lib/trino-parquet/src/test/java/io/trino/parquet/TestTupleDomainParquetPredicate.java @@ -46,8 +46,7 @@ import org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.Types; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import java.io.ByteArrayOutputStream; import java.math.BigInteger; @@ -402,20 +401,19 @@ public void testDate() .withMessage("Malformed Parquet file. Corrupted statistics for column \"[] required int32 DateColumn\": [min: 200, max: 100, num_nulls: 0] [testFile]"); } - @DataProvider - public Object[][] timestampPrecision() + @Test + public void testTimestampInt96() + throws ParquetCorruptionException { LocalDateTime baseTime = LocalDateTime.of(1970, 1, 19, 10, 28, 52, 123456789); - return new Object[][] { - {3, baseTime, baseTime.atZone(ZoneOffset.UTC).toInstant().toEpochMilli() * MICROSECONDS_PER_MILLISECOND}, - // note the rounding of micros - {6, baseTime, baseTime.atZone(ZoneOffset.UTC).toInstant().getEpochSecond() * MICROSECONDS_PER_SECOND + 123457}, - {9, baseTime, longTimestamp(9, baseTime)} - }; + + testTimestampInt96(3, baseTime, baseTime.atZone(ZoneOffset.UTC).toInstant().toEpochMilli() * MICROSECONDS_PER_MILLISECOND); + // note the rounding of micros + testTimestampInt96(6, baseTime, baseTime.atZone(ZoneOffset.UTC).toInstant().getEpochSecond() * MICROSECONDS_PER_SECOND + 123457); + testTimestampInt96(9, baseTime, longTimestamp(9, baseTime)); } - @Test(dataProvider = "timestampPrecision") - public void testTimestampInt96(int precision, LocalDateTime baseTime, Object baseDomainValue) + private void testTimestampInt96(int precision, LocalDateTime baseTime, Object baseDomainValue) throws ParquetCorruptionException { ColumnDescriptor columnDescriptor = createColumnDescriptor(INT96, "TimestampColumn"); @@ -426,8 +424,35 @@ public void testTimestampInt96(int precision, LocalDateTime baseTime, Object bas assertThat(getDomain(columnDescriptor, timestampType, 10, timestampColumnStats(baseTime.minusSeconds(10), baseTime), ID, UTC)).isEqualTo(create(ValueSet.all(timestampType), false)); } - @Test(dataProvider = "testTimestampInt64DataProvider") - public void testTimestampInt64(TimeUnit timeUnit, int precision, LocalDateTime baseTime, Object baseDomainValue) + @Test + public void testTimestampInt64() + throws ParquetCorruptionException + { + LocalDateTime baseTime = LocalDateTime.of(1970, 1, 19, 10, 28, 52, 123456789); + Object millisExpectedValue = baseTime.atZone(ZoneOffset.UTC).toInstant().toEpochMilli() * MICROSECONDS_PER_MILLISECOND; + // note the rounding of micros + Object microsExpectedValue = baseTime.atZone(ZoneOffset.UTC).toInstant().getEpochSecond() * MICROSECONDS_PER_SECOND + 123457; + Object nanosExpectedValue = longTimestamp(9, baseTime); + + Object nanosTruncatedToMillisExpectedValue = longTimestamp( + 9, + LocalDateTime.of(1970, 1, 19, 10, 28, 52, 123000000)); + Object nanosTruncatedToMicrosExpectedValue = longTimestamp( + 9, + LocalDateTime.of(1970, 1, 19, 10, 28, 52, 123457000)); + + testTimestampInt64(TimeUnit.MILLIS, 3, baseTime, millisExpectedValue); + testTimestampInt64(TimeUnit.MICROS, 3, baseTime, millisExpectedValue); + testTimestampInt64(TimeUnit.NANOS, 3, baseTime, millisExpectedValue); + testTimestampInt64(TimeUnit.MILLIS, 6, baseTime, millisExpectedValue); + testTimestampInt64(TimeUnit.MICROS, 6, baseTime, microsExpectedValue); + testTimestampInt64(TimeUnit.NANOS, 6, baseTime, microsExpectedValue); + testTimestampInt64(TimeUnit.MILLIS, 9, baseTime, nanosTruncatedToMillisExpectedValue); + testTimestampInt64(TimeUnit.MICROS, 9, baseTime, nanosTruncatedToMicrosExpectedValue); + testTimestampInt64(TimeUnit.NANOS, 9, baseTime, nanosExpectedValue); + } + + private void testTimestampInt64(TimeUnit timeUnit, int precision, LocalDateTime baseTime, Object baseDomainValue) throws ParquetCorruptionException { int parquetPrecision; @@ -449,7 +474,7 @@ public void testTimestampInt64(TimeUnit timeUnit, int precision, LocalDateTime b .as(LogicalTypeAnnotation.timestampType(false, timeUnit)) .named("TimestampColumn"); - ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[]{}, type, 0, 0); + ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] {}, type, 0, 0); TimestampType timestampType = createTimestampType(precision); assertThat(getDomain(columnDescriptor, timestampType, 0, null, ID, UTC)).isEqualTo(all(timestampType)); LocalDateTime maxTime = baseTime.plus(Duration.ofMillis(50)); @@ -471,34 +496,6 @@ else if (baseDomainValue instanceof LongTimestamp longTimestamp) { assertThat(getDomain(columnDescriptor, timestampType, 10, longColumnStats(minValue, maxValue), ID, UTC)).isEqualTo(create(ValueSet.ofRanges(range(timestampType, baseDomainValue, true, maxDomainValue, true)), false)); } - @DataProvider - public Object[][] testTimestampInt64DataProvider() - { - LocalDateTime baseTime = LocalDateTime.of(1970, 1, 19, 10, 28, 52, 123456789); - Object millisExpectedValue = baseTime.atZone(ZoneOffset.UTC).toInstant().toEpochMilli() * MICROSECONDS_PER_MILLISECOND; - // note the rounding of micros - Object microsExpectedValue = baseTime.atZone(ZoneOffset.UTC).toInstant().getEpochSecond() * MICROSECONDS_PER_SECOND + 123457; - Object nanosExpectedValue = longTimestamp(9, baseTime); - - Object nanosTruncatedToMillisExpectedValue = longTimestamp( - 9, - LocalDateTime.of(1970, 1, 19, 10, 28, 52, 123000000)); - Object nanosTruncatedToMicrosExpectedValue = longTimestamp( - 9, - LocalDateTime.of(1970, 1, 19, 10, 28, 52, 123457000)); - return new Object[][] { - {TimeUnit.MILLIS, 3, baseTime, millisExpectedValue}, - {TimeUnit.MICROS, 3, baseTime, millisExpectedValue}, - {TimeUnit.NANOS, 3, baseTime, millisExpectedValue}, - {TimeUnit.MILLIS, 6, baseTime, millisExpectedValue}, - {TimeUnit.MICROS, 6, baseTime, microsExpectedValue}, - {TimeUnit.NANOS, 6, baseTime, microsExpectedValue}, - {TimeUnit.MILLIS, 9, baseTime, nanosTruncatedToMillisExpectedValue}, - {TimeUnit.MICROS, 9, baseTime, nanosTruncatedToMicrosExpectedValue}, - {TimeUnit.NANOS, 9, baseTime, nanosExpectedValue}, - }; - } - private static long toEpochWithPrecision(LocalDateTime time, int precision) { long scaledEpochSeconds = time.toEpochSecond(ZoneOffset.UTC) * (long) Math.pow(10, precision); @@ -549,8 +546,16 @@ public void testVarcharMatchesWithStatistics() .isEqualTo(Optional.of(ImmutableList.of(column))); } - @Test(dataProvider = "typeForParquetInt32") - public void testIntegerMatchesWithStatistics(Type typeForParquetInt32) + @Test + public void testIntegerMatchesWithStatistics() + throws ParquetCorruptionException + { + testIntegerMatchesWithStatistics(INTEGER); + testIntegerMatchesWithStatistics(SMALLINT); + testIntegerMatchesWithStatistics(TINYINT); + } + + private void testIntegerMatchesWithStatistics(Type typeForParquetInt32) throws ParquetCorruptionException { ColumnDescriptor column = createColumnDescriptor(INT32, "Test column"); @@ -566,16 +571,6 @@ public void testIntegerMatchesWithStatistics(Type typeForParquetInt32) .isEqualTo(typeForParquetInt32 != INTEGER); // stats invalid for smallint/tinyint } - @DataProvider - public Object[][] typeForParquetInt32() - { - return new Object[][] { - {INTEGER}, - {SMALLINT}, - {TINYINT}, - }; - } - @Test public void testBigintMatchesWithStatistics() throws ParquetCorruptionException @@ -724,7 +719,7 @@ public void testColumnIndexWithNoNullsCount() private ColumnDescriptor createColumnDescriptor(PrimitiveTypeName typeName, String columnName) { - return new ColumnDescriptor(new String[]{}, new PrimitiveType(REQUIRED, typeName, columnName), 0, 0); + return new ColumnDescriptor(new String[] {}, new PrimitiveType(REQUIRED, typeName, columnName), 0, 0); } private TupleDomain getEffectivePredicate(ColumnDescriptor column, VarcharType type, Slice value) diff --git a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestParquetReaderMemoryUsage.java b/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestParquetReader.java similarity index 63% rename from lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestParquetReaderMemoryUsage.java rename to lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestParquetReader.java index fab5d62849861..3acfd1f0cdf84 100644 --- a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestParquetReaderMemoryUsage.java +++ b/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestParquetReader.java @@ -14,6 +14,8 @@ package io.trino.parquet.reader; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.io.Resources; import io.airlift.units.DataSize; import io.trino.memory.context.AggregatedMemoryContext; import io.trino.parquet.ParquetDataSource; @@ -21,23 +23,35 @@ import io.trino.parquet.writer.ParquetWriterOptions; import io.trino.spi.Page; import io.trino.spi.block.LazyBlock; +import io.trino.spi.metrics.Count; +import io.trino.spi.metrics.Metric; +import io.trino.spi.predicate.Domain; +import io.trino.spi.predicate.Range; +import io.trino.spi.predicate.TupleDomain; +import io.trino.spi.predicate.ValueSet; import io.trino.spi.type.Type; import org.apache.parquet.hadoop.metadata.ParquetMetadata; import org.junit.jupiter.api.Test; +import java.io.File; import java.io.IOException; +import java.net.URISyntaxException; +import java.time.LocalDate; import java.util.List; +import java.util.Map; import java.util.Optional; import static io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext; import static io.trino.parquet.ParquetTestUtils.createParquetReader; import static io.trino.parquet.ParquetTestUtils.generateInputPages; import static io.trino.parquet.ParquetTestUtils.writeParquetFile; +import static io.trino.parquet.reader.ParquetReader.COLUMN_INDEX_ROWS_FILTERED; import static io.trino.spi.type.BigintType.BIGINT; +import static io.trino.spi.type.DateType.DATE; import static io.trino.spi.type.IntegerType.INTEGER; import static org.assertj.core.api.Assertions.assertThat; -public class TestParquetReaderMemoryUsage +public class TestParquetReader { @Test public void testColumnReaderMemoryUsage() @@ -98,4 +112,39 @@ public void testColumnReaderMemoryUsage() reader.close(); assertThat(memoryContext.getBytes()).isEqualTo(0); } + + @Test + public void testEmptyRowRangesWithColumnIndex() + throws URISyntaxException, IOException + { + List columnNames = ImmutableList.of("l_shipdate", "l_commitdate"); + List types = ImmutableList.of(DATE, DATE); + + ParquetDataSource dataSource = new FileParquetDataSource( + new File(Resources.getResource("lineitem_sorted_by_shipdate/data.parquet").toURI()), + new ParquetReaderOptions()); + ParquetMetadata parquetMetadata = MetadataReader.readFooter(dataSource, Optional.empty()); + assertThat(parquetMetadata.getBlocks().size()).isEqualTo(2); + // The predicate and the file are prepared so that page indexes will result in non-overlapping row ranges and eliminate the entire first row group + // while the second row group still has to be read + TupleDomain predicate = TupleDomain.withColumnDomains( + ImmutableMap.of( + "l_shipdate", Domain.multipleValues(DATE, ImmutableList.of(LocalDate.of(1993, 1, 1).toEpochDay(), LocalDate.of(1997, 1, 1).toEpochDay())), + "l_commitdate", Domain.create(ValueSet.ofRanges(Range.greaterThan(DATE, LocalDate.of(1995, 1, 1).toEpochDay())), false))); + + try (ParquetReader reader = createParquetReader(dataSource, parquetMetadata, newSimpleAggregatedMemoryContext(), types, columnNames, predicate)) { + Page page = reader.nextPage(); + int rowsRead = 0; + while (page != null) { + rowsRead += page.getPositionCount(); + page = reader.nextPage(); + } + assertThat(rowsRead).isEqualTo(2387); + Map> metrics = reader.getMetrics().getMetrics(); + assertThat(metrics).containsKey(COLUMN_INDEX_ROWS_FILTERED); + // Column index should filter at least the first row group + assertThat(((Count) metrics.get(COLUMN_INDEX_ROWS_FILTERED)).getTotal()) + .isGreaterThanOrEqualTo(parquetMetadata.getBlocks().get(0).getRowCount()); + } + } } diff --git a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/flat/TestNullsDecoder.java b/lib/trino-parquet/src/test/java/io/trino/parquet/reader/flat/TestNullsDecoder.java index 8003acbe7bdaf..ef420d9e8050e 100644 --- a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/flat/TestNullsDecoder.java +++ b/lib/trino-parquet/src/test/java/io/trino/parquet/reader/flat/TestNullsDecoder.java @@ -16,17 +16,13 @@ import io.airlift.slice.Slices; import org.apache.parquet.bytes.HeapByteBufferAllocator; import org.apache.parquet.column.values.rle.RunLengthBitPackingHybridEncoder; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.util.Arrays; import java.util.Random; -import java.util.stream.Stream; import static io.trino.parquet.reader.TestData.generateMixedData; -import static io.trino.testing.DataProviders.cartesianProduct; -import static io.trino.testing.DataProviders.toDataProvider; import static java.lang.Math.min; import static org.assertj.core.api.Assertions.assertThat; @@ -48,69 +44,69 @@ public class TestNullsDecoder MIXED_RANDOM_AND_GROUPED_ARRAY = generateMixedData(r, N, MAX_MIXED_GROUP_SIZE); } - @Test(dataProvider = "dataSets") - public void testDecoding(NullValuesProvider nullValuesProvider, int batchSize) + @Test + public void testDecoding() throws IOException { - boolean[] values = nullValuesProvider.getPositions(); - byte[] encoded = encode(values); - NullsDecoder decoder = new NullsDecoder(); - decoder.init(Slices.wrappedBuffer(encoded)); - boolean[] result = new boolean[N]; - int nonNullCount = 0; - for (int i = 0; i < N; i += batchSize) { - nonNullCount += decoder.readNext(result, i, min(batchSize, N - i)); + for (NullValuesProvider nullValuesProvider : NullValuesProvider.values()) { + for (int batchSize : Arrays.asList(1, 3, 16, 100, 1000)) { + boolean[] values = nullValuesProvider.getPositions(); + byte[] encoded = encode(values); + NullsDecoder decoder = new NullsDecoder(); + decoder.init(Slices.wrappedBuffer(encoded)); + boolean[] result = new boolean[N]; + int nonNullCount = 0; + for (int i = 0; i < N; i += batchSize) { + nonNullCount += decoder.readNext(result, i, min(batchSize, N - i)); + } + // Parquet encodes whether value exists, Trino whether value is null + boolean[] byteResult = flip(result); + assertThat(byteResult).containsExactly(values); + + int expectedNonNull = nonNullCount(values); + assertThat(nonNullCount).isEqualTo(expectedNonNull); + } } - // Parquet encodes whether value exists, Trino whether value is null - boolean[] byteResult = flip(result); - assertThat(byteResult).containsExactly(values); - - int expectedNonNull = nonNullCount(values); - assertThat(nonNullCount).isEqualTo(expectedNonNull); } - @Test(dataProvider = "dataSets") - public void testSkippedDecoding(NullValuesProvider nullValuesProvider, int batchSize) + @Test + public void testSkippedDecoding() throws IOException { - boolean[] values = nullValuesProvider.getPositions(); - byte[] encoded = encode(values); - NullsDecoder decoder = new NullsDecoder(); - decoder.init(Slices.wrappedBuffer(encoded)); - int nonNullCount = 0; - int numberOfBatches = (N + batchSize - 1) / batchSize; - Random random = new Random(batchSize * 0xFFFFFFFFL * N); - int skippedBatches = random.nextInt(numberOfBatches); - int alreadyRead = 0; - for (int i = 0; i < skippedBatches; i++) { - int chunkSize = min(batchSize, N - alreadyRead); - nonNullCount += decoder.skip(chunkSize); - alreadyRead += chunkSize; - } - assertThat(nonNullCount).isEqualTo(nonNullCount(values, alreadyRead)); - - boolean[] result = new boolean[N - alreadyRead]; - boolean[] expected = Arrays.copyOfRange(values, alreadyRead, values.length); - int offset = 0; - while (alreadyRead < N) { - int chunkSize = min(batchSize, N - alreadyRead); - nonNullCount += decoder.readNext(result, offset, chunkSize); - alreadyRead += chunkSize; - offset += chunkSize; + for (NullValuesProvider nullValuesProvider : NullValuesProvider.values()) { + for (int batchSize : Arrays.asList(1, 3, 16, 100, 1000)) { + boolean[] values = nullValuesProvider.getPositions(); + byte[] encoded = encode(values); + NullsDecoder decoder = new NullsDecoder(); + decoder.init(Slices.wrappedBuffer(encoded)); + int nonNullCount = 0; + int numberOfBatches = (N + batchSize - 1) / batchSize; + Random random = new Random(batchSize * 0xFFFFFFFFL * N); + int skippedBatches = random.nextInt(numberOfBatches); + int alreadyRead = 0; + for (int i = 0; i < skippedBatches; i++) { + int chunkSize = min(batchSize, N - alreadyRead); + nonNullCount += decoder.skip(chunkSize); + alreadyRead += chunkSize; + } + assertThat(nonNullCount).isEqualTo(nonNullCount(values, alreadyRead)); + + boolean[] result = new boolean[N - alreadyRead]; + boolean[] expected = Arrays.copyOfRange(values, alreadyRead, values.length); + int offset = 0; + while (alreadyRead < N) { + int chunkSize = min(batchSize, N - alreadyRead); + nonNullCount += decoder.readNext(result, offset, chunkSize); + alreadyRead += chunkSize; + offset += chunkSize; + } + // Parquet encodes whether value exists, Trino whether value is null + boolean[] byteResult = flip(result); + assertThat(byteResult).containsExactly(expected); + + assertThat(nonNullCount).isEqualTo(nonNullCount(values)); + } } - // Parquet encodes whether value exists, Trino whether value is null - boolean[] byteResult = flip(result); - assertThat(byteResult).containsExactly(expected); - - assertThat(nonNullCount).isEqualTo(nonNullCount(values)); - } - - @DataProvider(name = "dataSets") - public static Object[][] dataSets() - { - return cartesianProduct( - Arrays.stream(NullValuesProvider.values()).collect(toDataProvider()), - Stream.of(1, 3, 16, 100, 1000).collect(toDataProvider())); } private enum NullValuesProvider diff --git a/lib/trino-parquet/src/test/java/io/trino/parquet/writer/NullsProvider.java b/lib/trino-parquet/src/test/java/io/trino/parquet/writer/NullsProvider.java index f0b3836264689..304ae28eafcba 100644 --- a/lib/trino-parquet/src/test/java/io/trino/parquet/writer/NullsProvider.java +++ b/lib/trino-parquet/src/test/java/io/trino/parquet/writer/NullsProvider.java @@ -13,14 +13,9 @@ */ package io.trino.parquet.writer; -import org.testng.annotations.DataProvider; - import java.util.Arrays; import java.util.Optional; import java.util.Random; -import java.util.stream.Stream; - -import static io.trino.testing.DataProviders.toDataProvider; enum NullsProvider { @@ -98,11 +93,4 @@ Optional getNulls(int positionCount, Optional forcedNulls) } return Optional.of(nullPositions); } - - @DataProvider - public static Object[][] nullsProviders() - { - return Stream.of(NullsProvider.values()) - .collect(toDataProvider()); - } } diff --git a/lib/trino-parquet/src/test/java/io/trino/parquet/writer/TestDefinitionLevelWriter.java b/lib/trino-parquet/src/test/java/io/trino/parquet/writer/TestDefinitionLevelWriter.java index 3cab34ede5f5f..5eda372804ef4 100644 --- a/lib/trino-parquet/src/test/java/io/trino/parquet/writer/TestDefinitionLevelWriter.java +++ b/lib/trino-parquet/src/test/java/io/trino/parquet/writer/TestDefinitionLevelWriter.java @@ -20,7 +20,7 @@ import io.trino.spi.block.ColumnarMap; import io.trino.spi.block.LongArrayBlock; import io.trino.spi.block.RowBlock; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import java.util.List; import java.util.Optional; @@ -42,105 +42,113 @@ public class TestDefinitionLevelWriter { private static final int POSITIONS = 8096; - @Test(dataProviderClass = NullsProvider.class, dataProvider = "nullsProviders") - public void testWritePrimitiveDefinitionLevels(NullsProvider nullsProvider) + @Test + public void testWritePrimitiveDefinitionLevels() { - Block block = new LongArrayBlock(POSITIONS, nullsProvider.getNulls(POSITIONS), new long[POSITIONS]); - int maxDefinitionLevel = 3; - // Write definition levels for all positions - assertDefinitionLevels(block, ImmutableList.of(), maxDefinitionLevel); + for (NullsProvider nullsProvider : NullsProvider.values()) { + Block block = new LongArrayBlock(POSITIONS, nullsProvider.getNulls(POSITIONS), new long[POSITIONS]); + int maxDefinitionLevel = 3; + // Write definition levels for all positions + assertDefinitionLevels(block, ImmutableList.of(), maxDefinitionLevel); - // Write definition levels for all positions one-at-a-time - assertDefinitionLevels(block, nCopies(block.getPositionCount(), 1), maxDefinitionLevel); + // Write definition levels for all positions one-at-a-time + assertDefinitionLevels(block, nCopies(block.getPositionCount(), 1), maxDefinitionLevel); - // Write definition levels for all positions with different group sizes - assertDefinitionLevels(block, generateGroupSizes(block.getPositionCount()), maxDefinitionLevel); + // Write definition levels for all positions with different group sizes + assertDefinitionLevels(block, generateGroupSizes(block.getPositionCount()), maxDefinitionLevel); + } } - @Test(dataProviderClass = NullsProvider.class, dataProvider = "nullsProviders") - public void testWriteRowDefinitionLevels(NullsProvider nullsProvider) + @Test + public void testWriteRowDefinitionLevels() { - RowBlock rowBlock = createRowBlock(nullsProvider.getNulls(POSITIONS), POSITIONS); - List fields = getNullSuppressedRowFieldsFromBlock(rowBlock); - int fieldMaxDefinitionLevel = 2; - // Write definition levels for all positions - for (int field = 0; field < fields.size(); field++) { - assertDefinitionLevels(rowBlock, fields, ImmutableList.of(), field, fieldMaxDefinitionLevel); - } + for (NullsProvider nullsProvider : NullsProvider.values()) { + RowBlock rowBlock = createRowBlock(nullsProvider.getNulls(POSITIONS), POSITIONS); + List fields = getNullSuppressedRowFieldsFromBlock(rowBlock); + int fieldMaxDefinitionLevel = 2; + // Write definition levels for all positions + for (int field = 0; field < fields.size(); field++) { + assertDefinitionLevels(rowBlock, fields, ImmutableList.of(), field, fieldMaxDefinitionLevel); + } - // Write definition levels for all positions one-at-a-time - for (int field = 0; field < fields.size(); field++) { - assertDefinitionLevels( - rowBlock, - fields, - nCopies(rowBlock.getPositionCount(), 1), - field, - fieldMaxDefinitionLevel); - } + // Write definition levels for all positions one-at-a-time + for (int field = 0; field < fields.size(); field++) { + assertDefinitionLevels( + rowBlock, + fields, + nCopies(rowBlock.getPositionCount(), 1), + field, + fieldMaxDefinitionLevel); + } - // Write definition levels for all positions with different group sizes - for (int field = 0; field < fields.size(); field++) { - assertDefinitionLevels( - rowBlock, - fields, - generateGroupSizes(rowBlock.getPositionCount()), - field, - fieldMaxDefinitionLevel); + // Write definition levels for all positions with different group sizes + for (int field = 0; field < fields.size(); field++) { + assertDefinitionLevels( + rowBlock, + fields, + generateGroupSizes(rowBlock.getPositionCount()), + field, + fieldMaxDefinitionLevel); + } } } - @Test(dataProviderClass = NullsProvider.class, dataProvider = "nullsProviders") - public void testWriteArrayDefinitionLevels(NullsProvider nullsProvider) + @Test + public void testWriteArrayDefinitionLevels() { - Block arrayBlock = createArrayBlock(nullsProvider.getNulls(POSITIONS), POSITIONS); - ColumnarArray columnarArray = toColumnarArray(arrayBlock); - int maxDefinitionLevel = 3; - // Write definition levels for all positions - assertDefinitionLevels( - columnarArray, - ImmutableList.of(), - maxDefinitionLevel); + for (NullsProvider nullsProvider : NullsProvider.values()) { + Block arrayBlock = createArrayBlock(nullsProvider.getNulls(POSITIONS), POSITIONS); + ColumnarArray columnarArray = toColumnarArray(arrayBlock); + int maxDefinitionLevel = 3; + // Write definition levels for all positions + assertDefinitionLevels( + columnarArray, + ImmutableList.of(), + maxDefinitionLevel); - // Write definition levels for all positions one-at-a-time - assertDefinitionLevels( - columnarArray, - nCopies(columnarArray.getPositionCount(), 1), - maxDefinitionLevel); + // Write definition levels for all positions one-at-a-time + assertDefinitionLevels( + columnarArray, + nCopies(columnarArray.getPositionCount(), 1), + maxDefinitionLevel); - // Write definition levels for all positions with different group sizes - assertDefinitionLevels( - columnarArray, - generateGroupSizes(columnarArray.getPositionCount()), - maxDefinitionLevel); + // Write definition levels for all positions with different group sizes + assertDefinitionLevels( + columnarArray, + generateGroupSizes(columnarArray.getPositionCount()), + maxDefinitionLevel); + } } - @Test(dataProviderClass = NullsProvider.class, dataProvider = "nullsProviders") - public void testWriteMapDefinitionLevels(NullsProvider nullsProvider) + @Test + public void testWriteMapDefinitionLevels() { - Block mapBlock = createMapBlock(nullsProvider.getNulls(POSITIONS), POSITIONS); - ColumnarMap columnarMap = toColumnarMap(mapBlock); - int keysMaxDefinitionLevel = 2; - int valuesMaxDefinitionLevel = 3; - // Write definition levels for all positions - assertDefinitionLevels( - columnarMap, - ImmutableList.of(), - keysMaxDefinitionLevel, - valuesMaxDefinitionLevel); + for (NullsProvider nullsProvider : NullsProvider.values()) { + Block mapBlock = createMapBlock(nullsProvider.getNulls(POSITIONS), POSITIONS); + ColumnarMap columnarMap = toColumnarMap(mapBlock); + int keysMaxDefinitionLevel = 2; + int valuesMaxDefinitionLevel = 3; + // Write definition levels for all positions + assertDefinitionLevels( + columnarMap, + ImmutableList.of(), + keysMaxDefinitionLevel, + valuesMaxDefinitionLevel); - // Write definition levels for all positions one-at-a-time - assertDefinitionLevels( - columnarMap, - nCopies(columnarMap.getPositionCount(), 1), - keysMaxDefinitionLevel, - valuesMaxDefinitionLevel); + // Write definition levels for all positions one-at-a-time + assertDefinitionLevels( + columnarMap, + nCopies(columnarMap.getPositionCount(), 1), + keysMaxDefinitionLevel, + valuesMaxDefinitionLevel); - // Write definition levels for all positions with different group sizes - assertDefinitionLevels( - columnarMap, - generateGroupSizes(columnarMap.getPositionCount()), - keysMaxDefinitionLevel, - valuesMaxDefinitionLevel); + // Write definition levels for all positions with different group sizes + assertDefinitionLevels( + columnarMap, + generateGroupSizes(columnarMap.getPositionCount()), + keysMaxDefinitionLevel, + valuesMaxDefinitionLevel); + } } private static void assertDefinitionLevels(Block block, List writePositionCounts, int maxDefinitionLevel) diff --git a/lib/trino-parquet/src/test/java/io/trino/parquet/writer/TestRepetitionLevelWriter.java b/lib/trino-parquet/src/test/java/io/trino/parquet/writer/TestRepetitionLevelWriter.java index 4f38866c003a0..40780a38a29ea 100644 --- a/lib/trino-parquet/src/test/java/io/trino/parquet/writer/TestRepetitionLevelWriter.java +++ b/lib/trino-parquet/src/test/java/io/trino/parquet/writer/TestRepetitionLevelWriter.java @@ -22,7 +22,7 @@ import io.trino.spi.block.RowBlock; import io.trino.spi.type.MapType; import io.trino.spi.type.TypeOperators; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import java.util.Iterator; import java.util.List; @@ -49,91 +49,99 @@ public class TestRepetitionLevelWriter { private static final int POSITIONS = 1024; - @Test(dataProviderClass = NullsProvider.class, dataProvider = "nullsProviders") - public void testWriteRowRepetitionLevels(NullsProvider nullsProvider) + @Test + public void testWriteRowRepetitionLevels() { - // Using an array of row blocks for testing as Structs don't have a repetition level by themselves - Optional valueIsNull = RANDOM_NULLS.getNulls(POSITIONS); - int[] arrayOffsets = generateOffsets(valueIsNull, POSITIONS); - int rowBlockPositions = arrayOffsets[POSITIONS]; - RowBlock rowBlock = createRowBlock(nullsProvider.getNulls(rowBlockPositions), rowBlockPositions); - ArrayBlock arrayBlock = fromElementBlock(POSITIONS, valueIsNull, arrayOffsets, rowBlock); + for (NullsProvider nullsProvider : NullsProvider.values()) { + // Using an array of row blocks for testing as Structs don't have a repetition level by themselves + Optional valueIsNull = RANDOM_NULLS.getNulls(POSITIONS); + int[] arrayOffsets = generateOffsets(valueIsNull, POSITIONS); + int rowBlockPositions = arrayOffsets[POSITIONS]; + RowBlock rowBlock = createRowBlock(nullsProvider.getNulls(rowBlockPositions), rowBlockPositions); + ArrayBlock arrayBlock = fromElementBlock(POSITIONS, valueIsNull, arrayOffsets, rowBlock); - ColumnarArray columnarArray = toColumnarArray(arrayBlock); - Block row = columnarArray.getElementsBlock(); - List nullSuppressedFields = getNullSuppressedRowFieldsFromBlock(row); - // Write Repetition levels for all positions - for (int fieldIndex = 0; fieldIndex < nullSuppressedFields.size(); fieldIndex++) { - Block field = nullSuppressedFields.get(fieldIndex); - assertRepetitionLevels(columnarArray, row, field, ImmutableList.of()); - assertRepetitionLevels(columnarArray, row, field, ImmutableList.of()); + ColumnarArray columnarArray = toColumnarArray(arrayBlock); + Block row = columnarArray.getElementsBlock(); + List nullSuppressedFields = getNullSuppressedRowFieldsFromBlock(row); + // Write Repetition levels for all positions + for (int fieldIndex = 0; fieldIndex < nullSuppressedFields.size(); fieldIndex++) { + Block field = nullSuppressedFields.get(fieldIndex); + assertRepetitionLevels(columnarArray, row, field, ImmutableList.of()); + assertRepetitionLevels(columnarArray, row, field, ImmutableList.of()); - // Write Repetition levels for all positions one-at-a-time - assertRepetitionLevels( - columnarArray, - row, - field, - nCopies(columnarArray.getPositionCount(), 1)); + // Write Repetition levels for all positions one-at-a-time + assertRepetitionLevels( + columnarArray, + row, + field, + nCopies(columnarArray.getPositionCount(), 1)); - // Write Repetition levels for all positions with different group sizes - assertRepetitionLevels( - columnarArray, - row, - field, - generateGroupSizes(columnarArray.getPositionCount())); + // Write Repetition levels for all positions with different group sizes + assertRepetitionLevels( + columnarArray, + row, + field, + generateGroupSizes(columnarArray.getPositionCount())); + } } } - @Test(dataProviderClass = NullsProvider.class, dataProvider = "nullsProviders") - public void testWriteArrayRepetitionLevels(NullsProvider nullsProvider) + @Test + public void testWriteArrayRepetitionLevels() { - Block arrayBlock = createArrayBlock(nullsProvider.getNulls(POSITIONS), POSITIONS); - ColumnarArray columnarArray = toColumnarArray(arrayBlock); - // Write Repetition levels for all positions - assertRepetitionLevels(columnarArray, ImmutableList.of()); + for (NullsProvider nullsProvider : NullsProvider.values()) { + Block arrayBlock = createArrayBlock(nullsProvider.getNulls(POSITIONS), POSITIONS); + ColumnarArray columnarArray = toColumnarArray(arrayBlock); + // Write Repetition levels for all positions + assertRepetitionLevels(columnarArray, ImmutableList.of()); - // Write Repetition levels for all positions one-at-a-time - assertRepetitionLevels(columnarArray, nCopies(columnarArray.getPositionCount(), 1)); + // Write Repetition levels for all positions one-at-a-time + assertRepetitionLevels(columnarArray, nCopies(columnarArray.getPositionCount(), 1)); - // Write Repetition levels for all positions with different group sizes - assertRepetitionLevels(columnarArray, generateGroupSizes(columnarArray.getPositionCount())); + // Write Repetition levels for all positions with different group sizes + assertRepetitionLevels(columnarArray, generateGroupSizes(columnarArray.getPositionCount())); + } } - @Test(dataProviderClass = NullsProvider.class, dataProvider = "nullsProviders") - public void testWriteMapRepetitionLevels(NullsProvider nullsProvider) + @Test + public void testWriteMapRepetitionLevels() { - Block mapBlock = createMapBlock(nullsProvider.getNulls(POSITIONS), POSITIONS); - ColumnarMap columnarMap = toColumnarMap(mapBlock); - // Write Repetition levels for all positions - assertRepetitionLevels(columnarMap, ImmutableList.of()); + for (NullsProvider nullsProvider : NullsProvider.values()) { + Block mapBlock = createMapBlock(nullsProvider.getNulls(POSITIONS), POSITIONS); + ColumnarMap columnarMap = toColumnarMap(mapBlock); + // Write Repetition levels for all positions + assertRepetitionLevels(columnarMap, ImmutableList.of()); - // Write Repetition levels for all positions one-at-a-time - assertRepetitionLevels(columnarMap, nCopies(columnarMap.getPositionCount(), 1)); + // Write Repetition levels for all positions one-at-a-time + assertRepetitionLevels(columnarMap, nCopies(columnarMap.getPositionCount(), 1)); - // Write Repetition levels for all positions with different group sizes - assertRepetitionLevels(columnarMap, generateGroupSizes(columnarMap.getPositionCount())); + // Write Repetition levels for all positions with different group sizes + assertRepetitionLevels(columnarMap, generateGroupSizes(columnarMap.getPositionCount())); + } } - @Test(dataProviderClass = NullsProvider.class, dataProvider = "nullsProviders") - public void testNestedStructRepetitionLevels(NullsProvider nullsProvider) + @Test + public void testNestedStructRepetitionLevels() { - RowBlock rowBlock = createNestedRowBlock(nullsProvider.getNulls(POSITIONS), POSITIONS); - List fieldBlocks = getNullSuppressedRowFieldsFromBlock(rowBlock); + for (NullsProvider nullsProvider : NullsProvider.values()) { + RowBlock rowBlock = createNestedRowBlock(nullsProvider.getNulls(POSITIONS), POSITIONS); + List fieldBlocks = getNullSuppressedRowFieldsFromBlock(rowBlock); - for (int field = 0; field < fieldBlocks.size(); field++) { - Block fieldBlock = fieldBlocks.get(field); - ColumnarMap columnarMap = toColumnarMap(fieldBlock); - for (Block mapElements : ImmutableList.of(columnarMap.getKeysBlock(), columnarMap.getValuesBlock())) { - ColumnarArray columnarArray = toColumnarArray(mapElements); + for (int field = 0; field < fieldBlocks.size(); field++) { + Block fieldBlock = fieldBlocks.get(field); + ColumnarMap columnarMap = toColumnarMap(fieldBlock); + for (Block mapElements : ImmutableList.of(columnarMap.getKeysBlock(), columnarMap.getValuesBlock())) { + ColumnarArray columnarArray = toColumnarArray(mapElements); - // Write Repetition levels for all positions - assertRepetitionLevels(rowBlock, columnarMap, columnarArray, ImmutableList.of()); + // Write Repetition levels for all positions + assertRepetitionLevels(rowBlock, columnarMap, columnarArray, ImmutableList.of()); - // Write Repetition levels for all positions one-at-a-time - assertRepetitionLevels(rowBlock, columnarMap, columnarArray, nCopies(rowBlock.getPositionCount(), 1)); + // Write Repetition levels for all positions one-at-a-time + assertRepetitionLevels(rowBlock, columnarMap, columnarArray, nCopies(rowBlock.getPositionCount(), 1)); - // Write Repetition levels for all positions with different group sizes - assertRepetitionLevels(rowBlock, columnarMap, columnarArray, generateGroupSizes(rowBlock.getPositionCount())); + // Write Repetition levels for all positions with different group sizes + assertRepetitionLevels(rowBlock, columnarMap, columnarArray, generateGroupSizes(rowBlock.getPositionCount())); + } } } } diff --git a/lib/trino-parquet/src/test/resources/lineitem_sorted_by_shipdate/README.md b/lib/trino-parquet/src/test/resources/lineitem_sorted_by_shipdate/README.md new file mode 100644 index 0000000000000..d1b04fb3ad9d1 --- /dev/null +++ b/lib/trino-parquet/src/test/resources/lineitem_sorted_by_shipdate/README.md @@ -0,0 +1,11 @@ +This file is prepared to have parquet page indexes and two small row-groups with many small pages per row-group. +It required using release <= 422 because the new Trino parquet writer does not support writing page indexes yet. + +```sql +set session hive.parquet_writer_batch_size=10; +set session hive.parquet_writer_page_size='10Kb'; +set session hive.parquet_writer_block_size='1MB'; +set session hive.parquet_optimized_writer_enabled=false; + +create table lineitem with (format='parquet', sorted_by=array['l_shipdate'], bucketed_by=array['l_shipdate'], bucket_count=1) as select * from tpch.tiny.lineitem; +``` diff --git a/lib/trino-parquet/src/test/resources/lineitem_sorted_by_shipdate/data.parquet b/lib/trino-parquet/src/test/resources/lineitem_sorted_by_shipdate/data.parquet new file mode 100644 index 0000000000000..82fd0166124d8 Binary files /dev/null and b/lib/trino-parquet/src/test/resources/lineitem_sorted_by_shipdate/data.parquet differ diff --git a/lib/trino-phoenix5-patched/pom.xml b/lib/trino-phoenix5-patched/pom.xml index b5d69023d0ef0..8954ec63d723b 100644 --- a/lib/trino-phoenix5-patched/pom.xml +++ b/lib/trino-phoenix5-patched/pom.xml @@ -6,7 +6,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/lib/trino-plugin-toolkit/pom.xml b/lib/trino-plugin-toolkit/pom.xml index 74332aa4cb8e0..01cd65a8fe6c4 100644 --- a/lib/trino-plugin-toolkit/pom.xml +++ b/lib/trino-plugin-toolkit/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -187,12 +187,6 @@ junit-jupiter-engine test - - - org.testng - testng - test - diff --git a/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/classloader/ClassLoaderSafeConnectorMetadata.java b/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/classloader/ClassLoaderSafeConnectorMetadata.java index dfbed636f83b6..05547e0d26f5c 100644 --- a/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/classloader/ClassLoaderSafeConnectorMetadata.java +++ b/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/classloader/ClassLoaderSafeConnectorMetadata.java @@ -1089,10 +1089,16 @@ public void validateScan(ConnectorSession session, ConnectorTableHandle handle) } @Override - public void createMaterializedView(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition, boolean replace, boolean ignoreExisting) + public void createMaterializedView( + ConnectorSession session, + SchemaTableName viewName, + ConnectorMaterializedViewDefinition definition, + Map properties, + boolean replace, + boolean ignoreExisting) { try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) { - delegate.createMaterializedView(session, viewName, definition, replace, ignoreExisting); + delegate.createMaterializedView(session, viewName, definition, properties, replace, ignoreExisting); } } @@ -1128,6 +1134,14 @@ public Optional getMaterializedView(Connect } } + @Override + public Map getMaterializedViewProperties(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition viewDefinition) + { + try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) { + return delegate.getMaterializedViewProperties(session, viewName, viewDefinition); + } + } + @Override public MaterializedViewFreshness getMaterializedViewFreshness(ConnectorSession session, SchemaTableName name) { diff --git a/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/classloader/ClassLoaderSafeConnectorPageSink.java b/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/classloader/ClassLoaderSafeConnectorPageSink.java index 6b399d2d40ded..a4f76d74b3d28 100644 --- a/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/classloader/ClassLoaderSafeConnectorPageSink.java +++ b/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/classloader/ClassLoaderSafeConnectorPageSink.java @@ -69,6 +69,14 @@ public CompletableFuture appendPage(Page page) } } + @Override + public void closeIdleWriters() + { + try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) { + delegate.closeIdleWriters(); + } + } + @Override public CompletableFuture> finish() { diff --git a/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/expression/ConnectorExpressionRewriter.java b/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/expression/ConnectorExpressionRewriter.java index ce9f18743e43a..472eeb0e98cef 100644 --- a/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/expression/ConnectorExpressionRewriter.java +++ b/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/expression/ConnectorExpressionRewriter.java @@ -87,6 +87,9 @@ private Optional rewrite( ConnectorExpression expression, RewriteContext context) { + if (!rule.isEnabled(context.getSession())) { + return Optional.empty(); + } Capture expressionCapture = newCapture(); Pattern pattern = rule.getPattern().capturedAs(expressionCapture); Iterator matches = pattern.match(expression, context).iterator(); diff --git a/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/expression/ConnectorExpressionRule.java b/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/expression/ConnectorExpressionRule.java index 2dcbb25e6f133..c0ecc31663a5e 100644 --- a/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/expression/ConnectorExpressionRule.java +++ b/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/expression/ConnectorExpressionRule.java @@ -27,6 +27,11 @@ public interface ConnectorExpressionRule { + default boolean isEnabled(ConnectorSession session) + { + return true; + } + Pattern getPattern(); Optional rewrite(ExpressionType expression, Captures captures, RewriteContext context); diff --git a/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/expression/ConnectorExpressions.java b/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/expression/ConnectorExpressions.java index 01fc9a38cf61e..71d587247f6ff 100644 --- a/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/expression/ConnectorExpressions.java +++ b/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/expression/ConnectorExpressions.java @@ -14,21 +14,36 @@ package io.trino.plugin.base.expression; import com.google.common.collect.ImmutableList; +import com.google.common.graph.SuccessorsFunction; +import com.google.common.graph.Traverser; import io.trino.spi.expression.Call; import io.trino.spi.expression.ConnectorExpression; +import io.trino.spi.expression.Variable; import java.util.Arrays; import java.util.List; +import java.util.stream.Stream; +import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.Iterables.getOnlyElement; +import static com.google.common.collect.Streams.stream; import static io.trino.spi.expression.Constant.TRUE; import static io.trino.spi.expression.StandardFunctions.AND_FUNCTION_NAME; import static io.trino.spi.type.BooleanType.BOOLEAN; +import static java.util.Objects.requireNonNull; public final class ConnectorExpressions { private ConnectorExpressions() {} + public static List extractVariables(ConnectorExpression expression) + { + return preOrder(expression) + .filter(Variable.class::isInstance) + .map(Variable.class::cast) + .collect(toImmutableList()); + } + public static List extractConjuncts(ConnectorExpression expression) { ImmutableList.Builder resultBuilder = ImmutableList.builder(); @@ -38,6 +53,10 @@ public static List extractConjuncts(ConnectorExpression exp private static void extractConjuncts(ConnectorExpression expression, ImmutableList.Builder resultBuilder) { + if (expression.equals(TRUE)) { + // Skip useless conjuncts. + return; + } if (expression instanceof Call call) { if (AND_FUNCTION_NAME.equals(call.getFunctionName())) { for (ConnectorExpression argument : call.getArguments()) { @@ -64,4 +83,11 @@ public static ConnectorExpression and(List expressions) } return getOnlyElement(expressions); } + + private static Stream preOrder(ConnectorExpression expression) + { + return stream( + Traverser.forTree((SuccessorsFunction) ConnectorExpression::getChildren) + .depthFirstPreOrder(requireNonNull(expression, "expression is null"))); + } } diff --git a/lib/trino-record-decoder/pom.xml b/lib/trino-record-decoder/pom.xml index 5e7a0548029b0..8c838108c22c1 100644 --- a/lib/trino-record-decoder/pom.xml +++ b/lib/trino-record-decoder/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -165,12 +165,6 @@ junit-jupiter-engine test - - - org.testng - testng - test - diff --git a/lib/trino-record-decoder/src/test/java/io/trino/decoder/util/DecoderTestUtil.java b/lib/trino-record-decoder/src/test/java/io/trino/decoder/util/DecoderTestUtil.java index 31b3a13dedea2..61d5d190b74ae 100644 --- a/lib/trino-record-decoder/src/test/java/io/trino/decoder/util/DecoderTestUtil.java +++ b/lib/trino-record-decoder/src/test/java/io/trino/decoder/util/DecoderTestUtil.java @@ -22,7 +22,7 @@ import java.util.Map; import static org.assertj.core.api.Assertions.assertThat; -import static org.testng.Assert.assertEquals; +import static org.assertj.core.data.Offset.offset; public final class DecoderTestUtil { @@ -55,7 +55,7 @@ public static void checkValue(Map decod { FieldValueProvider provider = decodedRow.get(handle); assertThat(provider).isNotNull(); - assertEquals(provider.getDouble(), value, 0.0001); + assertThat(provider.getDouble()).isCloseTo(value, offset(0.0001)); } public static void checkValue(Map decodedRow, DecoderColumnHandle handle, boolean value) diff --git a/plugin/trino-accumulo-iterators/pom.xml b/plugin/trino-accumulo-iterators/pom.xml index dbe4589d0ac4e..43ac42b4a82cf 100644 --- a/plugin/trino-accumulo-iterators/pom.xml +++ b/plugin/trino-accumulo-iterators/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-accumulo/pom.xml b/plugin/trino-accumulo/pom.xml index 5d07c63c19277..8cf27ac319e85 100644 --- a/plugin/trino-accumulo/pom.xml +++ b/plugin/trino-accumulo/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -267,6 +267,12 @@ + + io.trino + trino-testing-containers + test + + io.trino trino-testing-services @@ -314,12 +320,6 @@ testcontainers test - - - org.testng - testng - test - diff --git a/plugin/trino-accumulo/src/test/java/io/trino/plugin/accumulo/TestingAccumuloServer.java b/plugin/trino-accumulo/src/test/java/io/trino/plugin/accumulo/TestingAccumuloServer.java index f8a6049019c48..9854ac0966c0f 100644 --- a/plugin/trino-accumulo/src/test/java/io/trino/plugin/accumulo/TestingAccumuloServer.java +++ b/plugin/trino-accumulo/src/test/java/io/trino/plugin/accumulo/TestingAccumuloServer.java @@ -14,6 +14,7 @@ package io.trino.plugin.accumulo; import io.trino.testing.TestingProperties; +import io.trino.testing.containers.junit.ReportLeakedContainers; import org.apache.accumulo.core.client.AccumuloException; import org.apache.accumulo.core.client.AccumuloSecurityException; import org.apache.accumulo.core.client.Connector; @@ -63,6 +64,7 @@ private TestingAccumuloServer() // TODO Change this class to not be a singleton // https://github.com/trinodb/trino/issues/5842 accumuloContainer.start(); + ReportLeakedContainers.ignoreContainerId(accumuloContainer.getContainerId()); } public String getInstanceName() diff --git a/plugin/trino-atop/pom.xml b/plugin/trino-atop/pom.xml index 032fc348bb4f0..208167a5f3033 100644 --- a/plugin/trino-atop/pom.xml +++ b/plugin/trino-atop/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-base-jdbc/pom.xml b/plugin/trino-base-jdbc/pom.xml index 7d9e8a828e8e1..2952976342510 100644 --- a/plugin/trino-base-jdbc/pom.xml +++ b/plugin/trino-base-jdbc/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -275,12 +275,6 @@ junit-jupiter-engine test - - - org.testng - testng - test - diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/BaseJdbcClient.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/BaseJdbcClient.java index a8150d6245634..42d6731d52cc9 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/BaseJdbcClient.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/BaseJdbcClient.java @@ -524,6 +524,35 @@ protected static Optional getAdditionalPredicate(List

implementJoin( + ConnectorSession session, + JoinType joinType, + PreparedQuery leftSource, + Map leftProjections, + PreparedQuery rightSource, + Map rightProjections, + List joinConditions, + JoinStatistics statistics) + { + try (Connection connection = this.connectionFactory.openConnection(session)) { + return Optional.of(queryBuilder.prepareJoinQuery( + this, + session, + connection, + joinType, + leftSource, + leftProjections, + rightSource, + rightProjections, + joinConditions)); + } + catch (SQLException e) { + throw new TrinoException(JDBC_ERROR, e); + } + } + + @Deprecated + @Override + public Optional legacyImplementJoin( ConnectorSession session, JoinType joinType, PreparedQuery leftSource, @@ -540,7 +569,7 @@ public Optional implementJoin( } try (Connection connection = this.connectionFactory.openConnection(session)) { - return Optional.of(queryBuilder.prepareJoinQuery( + return Optional.of(queryBuilder.legacyPrepareJoinQuery( this, session, connection, diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/CachingJdbcClient.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/CachingJdbcClient.java index 032d9895ca1bf..ecea7b964a4eb 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/CachingJdbcClient.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/CachingJdbcClient.java @@ -281,6 +281,20 @@ public CallableStatement buildProcedure(ConnectorSession session, Connection con @Override public Optional implementJoin( + ConnectorSession session, + JoinType joinType, + PreparedQuery leftSource, + Map leftProjections, + PreparedQuery rightSource, + Map rightProjections, + List joinConditions, + JoinStatistics statistics) + { + return delegate.implementJoin(session, joinType, leftSource, leftProjections, rightSource, rightProjections, joinConditions, statistics); + } + + @Override + public Optional legacyImplementJoin( ConnectorSession session, JoinType joinType, PreparedQuery leftSource, @@ -290,7 +304,7 @@ public Optional implementJoin( Map leftAssignments, JoinStatistics statistics) { - return delegate.implementJoin(session, joinType, leftSource, rightSource, joinConditions, rightAssignments, leftAssignments, statistics); + return delegate.legacyImplementJoin(session, joinType, leftSource, rightSource, joinConditions, rightAssignments, leftAssignments, statistics); } @Override diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultJdbcMetadata.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultJdbcMetadata.java index cee54f86096c2..7d0d7f6d6b089 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultJdbcMetadata.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultJdbcMetadata.java @@ -73,6 +73,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Optional; import java.util.OptionalInt; import java.util.OptionalLong; @@ -94,6 +95,7 @@ import static io.trino.plugin.jdbc.JdbcErrorCode.JDBC_NON_TRANSIENT_ERROR; import static io.trino.plugin.jdbc.JdbcMetadataSessionProperties.isAggregationPushdownEnabled; import static io.trino.plugin.jdbc.JdbcMetadataSessionProperties.isComplexExpressionPushdown; +import static io.trino.plugin.jdbc.JdbcMetadataSessionProperties.isComplexJoinPushdownEnabled; import static io.trino.plugin.jdbc.JdbcMetadataSessionProperties.isJoinPushdownEnabled; import static io.trino.plugin.jdbc.JdbcMetadataSessionProperties.isTopNPushdownEnabled; import static io.trino.plugin.jdbc.JdbcWriteSessionProperties.isNonTransactionalInsert; @@ -436,6 +438,120 @@ static JdbcColumnHandle createSyntheticAggregationColumn(AggregateFunction aggre .build(); } + @Override + public Optional> applyJoin( + ConnectorSession session, + JoinType joinType, + ConnectorTableHandle left, + ConnectorTableHandle right, + ConnectorExpression joinCondition, + Map leftAssignments, + Map rightAssignments, + JoinStatistics statistics) + { + if (!isComplexJoinPushdownEnabled(session)) { + // Fallback to the old join pushdown code + return JdbcMetadata.super.applyJoin( + session, + joinType, + left, + right, + joinCondition, + leftAssignments, + rightAssignments, + statistics); + } + + if (isTableHandleForProcedure(left) || isTableHandleForProcedure(right)) { + return Optional.empty(); + } + + if (!isJoinPushdownEnabled(session)) { + return Optional.empty(); + } + + JdbcTableHandle leftHandle = flushAttributesAsQuery(session, (JdbcTableHandle) left); + JdbcTableHandle rightHandle = flushAttributesAsQuery(session, (JdbcTableHandle) right); + + if (!leftHandle.getAuthorization().equals(rightHandle.getAuthorization())) { + return Optional.empty(); + } + int nextSyntheticColumnId = max(leftHandle.getNextSyntheticColumnId(), rightHandle.getNextSyntheticColumnId()); + + ImmutableMap.Builder newLeftColumnsBuilder = ImmutableMap.builder(); + OptionalInt maxColumnNameLength = jdbcClient.getMaxColumnNameLength(session); + for (JdbcColumnHandle column : jdbcClient.getColumns(session, leftHandle)) { + newLeftColumnsBuilder.put(column, createSyntheticJoinProjectionColumn(column, nextSyntheticColumnId, maxColumnNameLength)); + nextSyntheticColumnId++; + } + Map newLeftColumns = newLeftColumnsBuilder.buildOrThrow(); + + ImmutableMap.Builder newRightColumnsBuilder = ImmutableMap.builder(); + for (JdbcColumnHandle column : jdbcClient.getColumns(session, rightHandle)) { + newRightColumnsBuilder.put(column, createSyntheticJoinProjectionColumn(column, nextSyntheticColumnId, maxColumnNameLength)); + nextSyntheticColumnId++; + } + Map newRightColumns = newRightColumnsBuilder.buildOrThrow(); + + Map assignments = ImmutableMap.builder() + .putAll(leftAssignments.entrySet().stream() + .collect(toImmutableMap(Entry::getKey, entry -> newLeftColumns.get((JdbcColumnHandle) entry.getValue())))) + .putAll(rightAssignments.entrySet().stream() + .collect(toImmutableMap(Entry::getKey, entry -> newRightColumns.get((JdbcColumnHandle) entry.getValue())))) + .buildOrThrow(); + + ImmutableList.Builder joinConditions = ImmutableList.builder(); + for (ConnectorExpression conjunct : extractConjuncts(joinCondition)) { + Optional converted = jdbcClient.convertPredicate(session, conjunct, assignments); + if (converted.isEmpty()) { + return Optional.empty(); + } + joinConditions.add(converted.get()); + } + + Optional joinQuery = jdbcClient.implementJoin( + session, + joinType, + asPreparedQuery(leftHandle), + newLeftColumns.entrySet().stream() + .collect(toImmutableMap(Entry::getKey, entry -> entry.getValue().getColumnName())), + asPreparedQuery(rightHandle), + newRightColumns.entrySet().stream() + .collect(toImmutableMap(Entry::getKey, entry -> entry.getValue().getColumnName())), + joinConditions.build(), + statistics); + + if (joinQuery.isEmpty()) { + return Optional.empty(); + } + + return Optional.of(new JoinApplicationResult<>( + new JdbcTableHandle( + new JdbcQueryRelationHandle(joinQuery.get()), + TupleDomain.all(), + ImmutableList.of(), + Optional.empty(), + OptionalLong.empty(), + Optional.of( + ImmutableList.builder() + .addAll(newLeftColumns.values()) + .addAll(newRightColumns.values()) + .build()), + leftHandle.getAllReferencedTables().flatMap(leftReferencedTables -> + rightHandle.getAllReferencedTables().map(rightReferencedTables -> + ImmutableSet.builder() + .addAll(leftReferencedTables) + .addAll(rightReferencedTables) + .build())), + nextSyntheticColumnId, + leftHandle.getAuthorization(), + leftHandle.getUpdateAssignments()), + ImmutableMap.copyOf(newLeftColumns), + ImmutableMap.copyOf(newRightColumns), + precalculateStatisticsForPushdown)); + } + + @Deprecated @Override public Optional> applyJoin( ConnectorSession session, @@ -488,16 +604,16 @@ public Optional> applyJoin( jdbcJoinConditions.add(new JdbcJoinCondition(leftColumn.get(), joinCondition.getOperator(), rightColumn.get())); } - Optional joinQuery = jdbcClient.implementJoin( + Optional joinQuery = jdbcClient.legacyImplementJoin( session, joinType, asPreparedQuery(leftHandle), asPreparedQuery(rightHandle), jdbcJoinConditions.build(), newRightColumns.entrySet().stream() - .collect(toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().getColumnName())), + .collect(toImmutableMap(Entry::getKey, entry -> entry.getValue().getColumnName())), newLeftColumns.entrySet().stream() - .collect(toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().getColumnName())), + .collect(toImmutableMap(Entry::getKey, entry -> entry.getValue().getColumnName())), statistics); if (joinQuery.isEmpty()) { diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultQueryBuilder.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultQueryBuilder.java index dfd3b06743295..bd06719f9ce1d 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultQueryBuilder.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultQueryBuilder.java @@ -112,6 +112,43 @@ public PreparedQuery prepareSelectQuery( @Override public PreparedQuery prepareJoinQuery( + JdbcClient client, + ConnectorSession session, + Connection connection, + JoinType joinType, + PreparedQuery leftSource, + Map leftProjections, + PreparedQuery rightSource, + Map rightProjections, + List joinConditions) + { + // Joins wih no conditions are not pushed down, so it is a same assumption and simplifies the code here + verify(!joinConditions.isEmpty(), "joinConditions is empty"); + + String query = format( + // The subquery aliases (`l` and `r`) are needed by some databases, but are not needed for expressions + // The joinConditions and output columns are aliased to use unique names. + "SELECT * FROM (SELECT %s FROM (%s) l) l %s (SELECT %s FROM (%s) r) r ON %s", + formatProjections(client, leftProjections), + leftSource.getQuery(), + formatJoinType(joinType), + formatProjections(client, rightProjections), + rightSource.getQuery(), + joinConditions.stream() + .map(ParameterizedExpression::expression) + .collect(joining(") AND (", "(", ")"))); + List parameters = ImmutableList.builder() + .addAll(leftSource.getParameters()) + .addAll(rightSource.getParameters()) + .addAll(joinConditions.stream() + .flatMap(expression -> expression.parameters().stream()) + .iterator()) + .build(); + return new PreparedQuery(query, parameters); + } + + @Override + public PreparedQuery legacyPrepareJoinQuery( JdbcClient client, ConnectorSession session, Connection connection, @@ -296,6 +333,13 @@ protected String buildJoinColumn(JdbcClient client, JdbcColumnHandle columnHandl return client.quoted(columnHandle.getColumnName()); } + protected String formatProjections(JdbcClient client, Map projections) + { + return projections.entrySet().stream() + .map(entry -> format("%s AS %s", client.quoted(entry.getKey().getColumnName()), client.quoted(entry.getValue()))) + .collect(joining(", ")); + } + protected String formatAssignments(JdbcClient client, String relationAlias, Map assignments) { return assignments.entrySet().stream() diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/ForwardingJdbcClient.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/ForwardingJdbcClient.java index 2f784634a44b0..3f672896d56c1 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/ForwardingJdbcClient.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/ForwardingJdbcClient.java @@ -208,6 +208,20 @@ public CallableStatement buildProcedure(ConnectorSession session, Connection con @Override public Optional implementJoin( + ConnectorSession session, + JoinType joinType, + PreparedQuery leftSource, + Map leftProjections, + PreparedQuery rightSource, + Map rightProjections, + List joinConditions, + JoinStatistics statistics) + { + return delegate().implementJoin(session, joinType, leftSource, leftProjections, rightSource, rightProjections, joinConditions, statistics); + } + + @Override + public Optional legacyImplementJoin( ConnectorSession session, JoinType joinType, PreparedQuery leftSource, @@ -217,7 +231,7 @@ public Optional implementJoin( Map leftAssignments, JoinStatistics statistics) { - return delegate().implementJoin(session, joinType, leftSource, rightSource, joinConditions, rightAssignments, leftAssignments, statistics); + return delegate().legacyImplementJoin(session, joinType, leftSource, rightSource, joinConditions, rightAssignments, leftAssignments, statistics); } @Override diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/JdbcClient.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/JdbcClient.java index 38e6f7e801141..cb2ff550a8e8f 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/JdbcClient.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/JdbcClient.java @@ -124,6 +124,17 @@ CallableStatement buildProcedure(ConnectorSession session, Connection connection throws SQLException; Optional implementJoin( + ConnectorSession session, + JoinType joinType, + PreparedQuery leftSource, + Map leftProjections, + PreparedQuery rightSource, + Map rightProjections, + List joinConditions, + JoinStatistics statistics); + + @Deprecated + Optional legacyImplementJoin( ConnectorSession session, JoinType joinType, PreparedQuery leftSource, diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/JdbcMetadataConfig.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/JdbcMetadataConfig.java index 039195481a86e..d6896c157a1ab 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/JdbcMetadataConfig.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/JdbcMetadataConfig.java @@ -30,6 +30,7 @@ public class JdbcMetadataConfig * in terms of performance and money due to an increased network traffic. */ private boolean joinPushdownEnabled; + private boolean complexJoinPushdownEnabled = true; private boolean aggregationPushdownEnabled = true; private boolean topNPushdownEnabled = true; @@ -67,6 +68,19 @@ public JdbcMetadataConfig setJoinPushdownEnabled(boolean joinPushdownEnabled) return this; } + public boolean isComplexJoinPushdownEnabled() + { + return complexJoinPushdownEnabled; + } + + @Config("join-pushdown.with-expressions") + @ConfigDescription("Enable join pushdown with complex expressions") + public JdbcMetadataConfig setComplexJoinPushdownEnabled(boolean complexJoinPushdownEnabled) + { + this.complexJoinPushdownEnabled = complexJoinPushdownEnabled; + return this; + } + public boolean isAggregationPushdownEnabled() { return aggregationPushdownEnabled; diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/JdbcMetadataSessionProperties.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/JdbcMetadataSessionProperties.java index d4ae2a0b5b12b..96476cce488e8 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/JdbcMetadataSessionProperties.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/JdbcMetadataSessionProperties.java @@ -33,6 +33,7 @@ public class JdbcMetadataSessionProperties { public static final String COMPLEX_EXPRESSION_PUSHDOWN = "complex_expression_pushdown"; public static final String JOIN_PUSHDOWN_ENABLED = "join_pushdown_enabled"; + public static final String COMPLEX_JOIN_PUSHDOWN_ENABLED = "complex_join_pushdown_enabled"; public static final String AGGREGATION_PUSHDOWN_ENABLED = "aggregation_pushdown_enabled"; public static final String TOPN_PUSHDOWN_ENABLED = "topn_pushdown_enabled"; public static final String DOMAIN_COMPACTION_THRESHOLD = "domain_compaction_threshold"; @@ -54,6 +55,11 @@ public JdbcMetadataSessionProperties(JdbcMetadataConfig jdbcMetadataConfig, @Max "Enable join pushdown", jdbcMetadataConfig.isJoinPushdownEnabled(), false)) + .add(booleanProperty( + COMPLEX_JOIN_PUSHDOWN_ENABLED, + "Enable join pushdown with non-comparison expressions", + jdbcMetadataConfig.isComplexJoinPushdownEnabled(), + false)) .add(booleanProperty( AGGREGATION_PUSHDOWN_ENABLED, "Enable aggregation pushdown", @@ -89,6 +95,11 @@ public static boolean isJoinPushdownEnabled(ConnectorSession session) return session.getProperty(JOIN_PUSHDOWN_ENABLED, Boolean.class); } + public static boolean isComplexJoinPushdownEnabled(ConnectorSession session) + { + return session.getProperty(COMPLEX_JOIN_PUSHDOWN_ENABLED, Boolean.class); + } + public static boolean isAggregationPushdownEnabled(ConnectorSession session) { return session.getProperty(AGGREGATION_PUSHDOWN_ENABLED, Boolean.class); diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/QueryBuilder.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/QueryBuilder.java index 52cb58e80cb59..9c6fbf7c4ca95 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/QueryBuilder.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/QueryBuilder.java @@ -42,6 +42,17 @@ PreparedQuery prepareSelectQuery( Optional additionalPredicate); PreparedQuery prepareJoinQuery( + JdbcClient client, + ConnectorSession session, + Connection connection, + JoinType joinType, + PreparedQuery leftSource, + Map leftProjections, + PreparedQuery rightSource, + Map rightProjections, + List joinConditions); + + PreparedQuery legacyPrepareJoinQuery( JdbcClient client, ConnectorSession session, Connection connection, diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/TracingDataSource.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/TracingDataSource.java index c9a1a91dca7fb..453d6956c0a6d 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/TracingDataSource.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/TracingDataSource.java @@ -25,6 +25,7 @@ import java.util.Properties; import java.util.logging.Logger; +import static com.google.common.base.Preconditions.checkState; import static java.util.Objects.requireNonNull; public class TracingDataSource @@ -70,7 +71,9 @@ public JdbcDataSource(Driver driver, String connectionUrl, Properties properties public Connection getConnection() throws SQLException { - return driver.connect(connectionUrl, properties); + Connection connection = driver.connect(connectionUrl, properties); + checkState(connection != null, "Driver returned null connection, make sure the connection URL '%s' is valid for the driver %s", connectionUrl, driver); + return connection; } @Override diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/expression/ComparisonOperator.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/expression/ComparisonOperator.java new file mode 100644 index 0000000000000..f9fa748442890 --- /dev/null +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/expression/ComparisonOperator.java @@ -0,0 +1,70 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.jdbc.expression; + +import io.trino.spi.expression.FunctionName; + +import java.util.Map; +import java.util.stream.Stream; + +import static com.google.common.base.Verify.verifyNotNull; +import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static io.trino.spi.expression.StandardFunctions.EQUAL_OPERATOR_FUNCTION_NAME; +import static io.trino.spi.expression.StandardFunctions.GREATER_THAN_OPERATOR_FUNCTION_NAME; +import static io.trino.spi.expression.StandardFunctions.GREATER_THAN_OR_EQUAL_OPERATOR_FUNCTION_NAME; +import static io.trino.spi.expression.StandardFunctions.IS_DISTINCT_FROM_OPERATOR_FUNCTION_NAME; +import static io.trino.spi.expression.StandardFunctions.LESS_THAN_OPERATOR_FUNCTION_NAME; +import static io.trino.spi.expression.StandardFunctions.LESS_THAN_OR_EQUAL_OPERATOR_FUNCTION_NAME; +import static io.trino.spi.expression.StandardFunctions.NOT_EQUAL_OPERATOR_FUNCTION_NAME; +import static java.util.Objects.requireNonNull; +import static java.util.function.Function.identity; + +public enum ComparisonOperator +{ + EQUAL(EQUAL_OPERATOR_FUNCTION_NAME, "="), + NOT_EQUAL(NOT_EQUAL_OPERATOR_FUNCTION_NAME, "<>"), + LESS_THAN(LESS_THAN_OPERATOR_FUNCTION_NAME, "<"), + LESS_THAN_OR_EQUAL(LESS_THAN_OR_EQUAL_OPERATOR_FUNCTION_NAME, "<="), + GREATER_THAN(GREATER_THAN_OPERATOR_FUNCTION_NAME, ">"), + GREATER_THAN_OR_EQUAL(GREATER_THAN_OR_EQUAL_OPERATOR_FUNCTION_NAME, ">="), + IS_DISTINCT_FROM(IS_DISTINCT_FROM_OPERATOR_FUNCTION_NAME, "IS DISTINCT FROM"), + /**/; + + private final FunctionName functionName; + private final String operator; + + private static final Map OPERATOR_BY_FUNCTION_NAME = Stream.of(values()) + .collect(toImmutableMap(ComparisonOperator::getFunctionName, identity())); + + ComparisonOperator(FunctionName functionName, String operator) + { + this.functionName = requireNonNull(functionName, "functionName is null"); + this.operator = requireNonNull(operator, "operator is null"); + } + + public FunctionName getFunctionName() + { + return functionName; + } + + public String getOperator() + { + return operator; + } + + public static ComparisonOperator forFunctionName(FunctionName functionName) + { + return verifyNotNull(OPERATOR_BY_FUNCTION_NAME.get(functionName), "Function name not recognized: %s", functionName); + } +} diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/expression/GenericRewrite.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/expression/GenericRewrite.java index f562ff2f1491b..0917737ad9633 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/expression/GenericRewrite.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/expression/GenericRewrite.java @@ -17,11 +17,13 @@ import io.trino.matching.Captures; import io.trino.plugin.base.expression.ConnectorExpressionRule; import io.trino.plugin.jdbc.QueryParameter; +import io.trino.spi.connector.ConnectorSession; import io.trino.spi.expression.ConnectorExpression; import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -35,16 +37,24 @@ public class GenericRewrite // Matches words in the `rewritePattern` private static final Pattern REWRITE_TOKENS = Pattern.compile("(? condition; private final ExpressionPattern expressionPattern; private final String rewritePattern; - public GenericRewrite(Map> typeClasses, String expressionPattern, String rewritePattern) + public GenericRewrite(Map> typeClasses, Predicate condition, String expressionPattern, String rewritePattern) { + this.condition = requireNonNull(condition, "condition is null"); ExpressionMappingParser parser = new ExpressionMappingParser(typeClasses); this.expressionPattern = parser.createExpressionPattern(expressionPattern); this.rewritePattern = requireNonNull(rewritePattern, "rewritePattern is null"); } + @Override + public boolean isEnabled(ConnectorSession session) + { + return condition.test(session); + } + @Override public io.trino.matching.Pattern getPattern() { diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/expression/JdbcConnectorExpressionRewriterBuilder.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/expression/JdbcConnectorExpressionRewriterBuilder.java index 8256ff3fa077b..22bb050358e87 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/expression/JdbcConnectorExpressionRewriterBuilder.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/expression/JdbcConnectorExpressionRewriterBuilder.java @@ -13,14 +13,17 @@ */ package io.trino.plugin.jdbc.expression; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import io.trino.plugin.base.expression.ConnectorExpressionRewriter; import io.trino.plugin.base.expression.ConnectorExpressionRule; +import io.trino.spi.connector.ConnectorSession; import java.util.HashMap; import java.util.Map; import java.util.Set; import java.util.function.Function; +import java.util.function.Predicate; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; @@ -64,17 +67,16 @@ public JdbcConnectorExpressionRewriterBuilder withTypeClass(String typeClass, Se return this; } - public ExpressionMapping map(String expressionPattern) + public ExpectSourceExpression when(Predicate condition) { - return new ExpressionMapping<>() - { - @Override - public JdbcConnectorExpressionRewriterBuilder to(String rewritePattern) - { - rules.add(new GenericRewrite(typeClasses, expressionPattern, rewritePattern)); - return JdbcConnectorExpressionRewriterBuilder.this; - } - }; + return new GenericRewriteBuilder() + .when(condition); + } + + public ExpectRewriteTarget map(String expressionPattern) + { + return new GenericRewriteBuilder() + .map(expressionPattern); } public ConnectorExpressionRewriter build() @@ -82,8 +84,40 @@ public ConnectorExpressionRewriter build() return new ConnectorExpressionRewriter<>(rules.build()); } - public interface ExpressionMapping + public interface ExpectSourceExpression + { + ExpectRewriteTarget map(String expressionPattern); + } + + public interface ExpectRewriteTarget + { + JdbcConnectorExpressionRewriterBuilder to(String rewritePattern); + } + + private class GenericRewriteBuilder + implements ExpectSourceExpression, ExpectRewriteTarget { - Continuation to(String rewritePattern); + private Predicate condition = session -> true; + private String expressionPattern; + + GenericRewriteBuilder when(Predicate condition) + { + this.condition = requireNonNull(condition, "condition is null"); + return this; + } + + @Override + public ExpectRewriteTarget map(String expressionPattern) + { + this.expressionPattern = requireNonNull(expressionPattern, "expressionPattern is null"); + return this; + } + + @Override + public JdbcConnectorExpressionRewriterBuilder to(String rewritePattern) + { + rules.add(new GenericRewrite(ImmutableMap.copyOf(typeClasses), condition, expressionPattern, rewritePattern)); + return JdbcConnectorExpressionRewriterBuilder.this; + } } } diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/expression/RewriteCaseSensitiveComparison.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/expression/RewriteCaseSensitiveComparison.java new file mode 100644 index 0000000000000..2ff0c1c8ad701 --- /dev/null +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/expression/RewriteCaseSensitiveComparison.java @@ -0,0 +1,94 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.jdbc.expression; + +import com.google.common.collect.ImmutableList; +import io.trino.matching.Capture; +import io.trino.matching.Captures; +import io.trino.matching.Pattern; +import io.trino.plugin.base.expression.ConnectorExpressionRule; +import io.trino.plugin.jdbc.JdbcColumnHandle; +import io.trino.plugin.jdbc.QueryParameter; +import io.trino.spi.expression.Call; +import io.trino.spi.expression.FunctionName; +import io.trino.spi.expression.Variable; +import io.trino.spi.type.VarcharType; + +import java.util.Optional; +import java.util.Set; + +import static com.google.common.collect.ImmutableSet.toImmutableSet; +import static io.trino.matching.Capture.newCapture; +import static io.trino.plugin.base.expression.ConnectorExpressionPatterns.argument; +import static io.trino.plugin.base.expression.ConnectorExpressionPatterns.argumentCount; +import static io.trino.plugin.base.expression.ConnectorExpressionPatterns.call; +import static io.trino.plugin.base.expression.ConnectorExpressionPatterns.functionName; +import static io.trino.plugin.base.expression.ConnectorExpressionPatterns.type; +import static io.trino.plugin.base.expression.ConnectorExpressionPatterns.variable; +import static io.trino.plugin.jdbc.CaseSensitivity.CASE_SENSITIVE; +import static io.trino.spi.type.BooleanType.BOOLEAN; + +public class RewriteCaseSensitiveComparison + implements ConnectorExpressionRule +{ + private static final Capture LEFT = newCapture(); + private static final Capture RIGHT = newCapture(); + + private final Pattern pattern; + + public RewriteCaseSensitiveComparison(Set enabledOperators) + { + Set functionNames = enabledOperators.stream() + .map(ComparisonOperator::getFunctionName) + .collect(toImmutableSet()); + + pattern = call() + .with(type().equalTo(BOOLEAN)) + .with(functionName().matching(functionNames::contains)) + .with(argumentCount().equalTo(2)) + .with(argument(0).matching(variable().with(type().matching(VarcharType.class::isInstance)).capturedAs(LEFT))) + .with(argument(1).matching(variable().with(type().matching(VarcharType.class::isInstance)).capturedAs(RIGHT))); + } + + @Override + public Pattern getPattern() + { + return pattern; + } + + @Override + public Optional rewrite(Call expression, Captures captures, RewriteContext context) + { + ComparisonOperator comparison = ComparisonOperator.forFunctionName(expression.getFunctionName()); + Variable firstArgument = captures.get(LEFT); + Variable secondArgument = captures.get(RIGHT); + + if (!isCaseSensitive(firstArgument, context) || !isCaseSensitive(secondArgument, context)) { + return Optional.empty(); + } + return context.defaultRewrite(firstArgument).flatMap(first -> + context.defaultRewrite(secondArgument).map(second -> + new ParameterizedExpression( + "(%s) %s (%s)".formatted(first.expression(), comparison.getOperator(), second.expression()), + ImmutableList.builder() + .addAll(first.parameters()) + .addAll(second.parameters()) + .build()))); + } + + private static boolean isCaseSensitive(Variable variable, RewriteContext context) + { + return ((JdbcColumnHandle) context.getAssignment(variable.getName())).getJdbcTypeHandle().getCaseSensitivity().equals(Optional.of(CASE_SENSITIVE)); + } +} diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/expression/RewriteComparison.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/expression/RewriteComparison.java index 10db5e84ab6c3..2231ca86e33b6 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/expression/RewriteComparison.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/expression/RewriteComparison.java @@ -23,14 +23,10 @@ import io.trino.spi.expression.ConnectorExpression; import io.trino.spi.expression.FunctionName; -import java.util.Map; import java.util.Optional; import java.util.Set; -import java.util.stream.Stream; import static com.google.common.base.Verify.verify; -import static com.google.common.base.Verify.verifyNotNull; -import static com.google.common.collect.ImmutableMap.toImmutableMap; import static com.google.common.collect.ImmutableSet.toImmutableSet; import static io.trino.matching.Capture.newCapture; import static io.trino.plugin.base.expression.ConnectorExpressionPatterns.argument; @@ -39,17 +35,8 @@ import static io.trino.plugin.base.expression.ConnectorExpressionPatterns.expression; import static io.trino.plugin.base.expression.ConnectorExpressionPatterns.functionName; import static io.trino.plugin.base.expression.ConnectorExpressionPatterns.type; -import static io.trino.spi.expression.StandardFunctions.EQUAL_OPERATOR_FUNCTION_NAME; -import static io.trino.spi.expression.StandardFunctions.GREATER_THAN_OPERATOR_FUNCTION_NAME; -import static io.trino.spi.expression.StandardFunctions.GREATER_THAN_OR_EQUAL_OPERATOR_FUNCTION_NAME; -import static io.trino.spi.expression.StandardFunctions.IS_DISTINCT_FROM_OPERATOR_FUNCTION_NAME; -import static io.trino.spi.expression.StandardFunctions.LESS_THAN_OPERATOR_FUNCTION_NAME; -import static io.trino.spi.expression.StandardFunctions.LESS_THAN_OR_EQUAL_OPERATOR_FUNCTION_NAME; -import static io.trino.spi.expression.StandardFunctions.NOT_EQUAL_OPERATOR_FUNCTION_NAME; import static io.trino.spi.type.BooleanType.BOOLEAN; import static java.lang.String.format; -import static java.util.Objects.requireNonNull; -import static java.util.function.Function.identity; public class RewriteComparison implements ConnectorExpressionRule @@ -57,45 +44,6 @@ public class RewriteComparison private static final Capture LEFT = newCapture(); private static final Capture RIGHT = newCapture(); - public enum ComparisonOperator - { - EQUAL(EQUAL_OPERATOR_FUNCTION_NAME, "="), - NOT_EQUAL(NOT_EQUAL_OPERATOR_FUNCTION_NAME, "<>"), - LESS_THAN(LESS_THAN_OPERATOR_FUNCTION_NAME, "<"), - LESS_THAN_OR_EQUAL(LESS_THAN_OR_EQUAL_OPERATOR_FUNCTION_NAME, "<="), - GREATER_THAN(GREATER_THAN_OPERATOR_FUNCTION_NAME, ">"), - GREATER_THAN_OR_EQUAL(GREATER_THAN_OR_EQUAL_OPERATOR_FUNCTION_NAME, ">="), - IS_DISTINCT_FROM(IS_DISTINCT_FROM_OPERATOR_FUNCTION_NAME, "IS DISTINCT FROM"), - /**/; - - private final FunctionName functionName; - private final String operator; - - private static final Map OPERATOR_BY_FUNCTION_NAME = Stream.of(values()) - .collect(toImmutableMap(ComparisonOperator::getFunctionName, identity())); - - ComparisonOperator(FunctionName functionName, String operator) - { - this.functionName = requireNonNull(functionName, "functionName is null"); - this.operator = requireNonNull(operator, "operator is null"); - } - - private FunctionName getFunctionName() - { - return functionName; - } - - private String getOperator() - { - return operator; - } - - private static ComparisonOperator forFunctionName(FunctionName functionName) - { - return verifyNotNull(OPERATOR_BY_FUNCTION_NAME.get(functionName), "Function name not recognized: %s", functionName); - } - } - private final Pattern pattern; public RewriteComparison(Set enabledOperators) diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/jmx/StatisticsAwareJdbcClient.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/jmx/StatisticsAwareJdbcClient.java index d07bcfa5178c1..339cf17df0f55 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/jmx/StatisticsAwareJdbcClient.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/jmx/StatisticsAwareJdbcClient.java @@ -229,6 +229,19 @@ public CallableStatement buildProcedure(ConnectorSession session, Connection con @Override public Optional implementJoin(ConnectorSession session, + JoinType joinType, + PreparedQuery leftSource, + Map leftProjections, + PreparedQuery rightSource, + Map rightProjections, + List joinConditions, + JoinStatistics statistics) + { + return stats.getImplementJoin().wrap(() -> delegate().implementJoin(session, joinType, leftSource, leftProjections, rightSource, rightProjections, joinConditions, statistics)); + } + + @Override + public Optional legacyImplementJoin(ConnectorSession session, JoinType joinType, PreparedQuery leftSource, PreparedQuery rightSource, @@ -237,7 +250,7 @@ public Optional implementJoin(ConnectorSession session, Map leftAssignments, JoinStatistics statistics) { - return stats.getImplementJoin().wrap(() -> delegate().implementJoin(session, joinType, leftSource, rightSource, joinConditions, rightAssignments, leftAssignments, statistics)); + return stats.getImplementJoin().wrap(() -> delegate().legacyImplementJoin(session, joinType, leftSource, rightSource, joinConditions, rightAssignments, leftAssignments, statistics)); } @Override diff --git a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/BaseJdbcConnectorTest.java b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/BaseJdbcConnectorTest.java index e2ea22c7d46a2..63ab5bebabdf7 100644 --- a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/BaseJdbcConnectorTest.java +++ b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/BaseJdbcConnectorTest.java @@ -14,6 +14,7 @@ package io.trino.plugin.jdbc; import com.google.common.collect.ImmutableList; +import io.airlift.log.Logger; import io.airlift.units.Duration; import io.trino.Session; import io.trino.spi.QueryId; @@ -63,6 +64,7 @@ import static io.trino.SystemSessionProperties.MARK_DISTINCT_STRATEGY; import static io.trino.plugin.jdbc.JdbcDynamicFilteringSessionProperties.DYNAMIC_FILTERING_ENABLED; import static io.trino.plugin.jdbc.JdbcDynamicFilteringSessionProperties.DYNAMIC_FILTERING_WAIT_TIMEOUT; +import static io.trino.plugin.jdbc.JdbcMetadataSessionProperties.COMPLEX_JOIN_PUSHDOWN_ENABLED; import static io.trino.plugin.jdbc.JdbcMetadataSessionProperties.DOMAIN_COMPACTION_THRESHOLD; import static io.trino.plugin.jdbc.JdbcMetadataSessionProperties.JOIN_PUSHDOWN_ENABLED; import static io.trino.plugin.jdbc.JoinOperator.FULL_JOIN; @@ -124,6 +126,8 @@ public abstract class BaseJdbcConnectorTest extends BaseConnectorTest { + private static final Logger log = Logger.get(BaseJdbcConnectorTest.class); + private final ExecutorService executor = newCachedThreadPool(daemonThreadsNamed(getClass().getName())); protected abstract SqlExecutor onRemoteDatabase(); @@ -1179,71 +1183,44 @@ public void testJoinPushdownDisabled() .joinIsNotFullyPushedDown(); } - /** - * Verify !SUPPORTS_JOIN_PUSHDOWN declaration is true. - */ @Test - public void verifySupportsJoinPushdownDeclaration() + public void testJoinPushdown() { - if (hasBehavior(SUPPORTS_JOIN_PUSHDOWN)) { - // Covered by testJoinPushdown - return; - } + Session session = joinPushdownEnabled(getSession()); - assertThat(query(joinPushdownEnabled(getSession()), "SELECT r.name, n.name FROM nation n JOIN region r ON n.regionkey = r.regionkey")) - .joinIsNotFullyPushedDown(); - } - - /** - * Verify !SUPPORTS_JOIN_PUSHDOWN_WITH_FULL_JOIN declaration is true. - */ - @Test - public void verifySupportsJoinPushdownWithFullJoinDeclaration() - { - if (hasBehavior(SUPPORTS_JOIN_PUSHDOWN_WITH_FULL_JOIN)) { - // Covered by testJoinPushdown + if (!hasBehavior(SUPPORTS_JOIN_PUSHDOWN)) { + assertThat(query(session, "SELECT r.name, n.name FROM nation n JOIN region r ON n.regionkey = r.regionkey")) + .joinIsNotFullyPushedDown(); return; } - assertThat(query(joinPushdownEnabled(getSession()), "SELECT r.name, n.name FROM nation n FULL JOIN region r ON n.regionkey = r.regionkey")) - .joinIsNotFullyPushedDown(); - } + try (TestTable nationLowercaseTable = new TestTable( + // If a connector supports Join pushdown, but does not allow CTAS, we need to make the table creation here overridable. + getQueryRunner()::execute, + "nation_lowercase", + "AS SELECT nationkey, lower(name) name, regionkey FROM nation")) { + for (JoinOperator joinOperator : JoinOperator.values()) { + log.info("Testing joinOperator=%s", joinOperator); + + if (joinOperator == FULL_JOIN && !hasBehavior(SUPPORTS_JOIN_PUSHDOWN_WITH_FULL_JOIN)) { + assertThat(query(session, "SELECT r.name, n.name FROM nation n FULL JOIN region r ON n.regionkey = r.regionkey")) + .joinIsNotFullyPushedDown(); + continue; + } - @Test - public void testJoinPushdown() - { - for (JoinOperator joinOperator : JoinOperator.values()) { - Session session = joinPushdownEnabled(getSession()); + // Disable DF here for the sake of negative test cases' expected plan. With DF enabled, some operators return in DF's FilterNode and some do not. + Session withoutDynamicFiltering = Session.builder(session) + .setSystemProperty("enable_dynamic_filtering", "false") + .build(); - if (!hasBehavior(SUPPORTS_JOIN_PUSHDOWN)) { - assertThat(query(session, "SELECT r.name, n.name FROM nation n JOIN region r ON n.regionkey = r.regionkey")) - .joinIsNotFullyPushedDown(); - return; - } + String notDistinctOperator = "IS NOT DISTINCT FROM"; + List nonEqualities = Stream.concat( + Stream.of(JoinCondition.Operator.values()) + .filter(operator -> operator != JoinCondition.Operator.EQUAL) + .map(JoinCondition.Operator::getValue), + Stream.of(notDistinctOperator)) + .collect(toImmutableList()); - if (joinOperator == FULL_JOIN && !hasBehavior(SUPPORTS_JOIN_PUSHDOWN_WITH_FULL_JOIN)) { - // Covered by verifySupportsJoinPushdownWithFullJoinDeclaration - return; - } - - // Disable DF here for the sake of negative test cases' expected plan. With DF enabled, some operators return in DF's FilterNode and some do not. - Session withoutDynamicFiltering = Session.builder(session) - .setSystemProperty("enable_dynamic_filtering", "false") - .build(); - - String notDistinctOperator = "IS NOT DISTINCT FROM"; - List nonEqualities = Stream.concat( - Stream.of(JoinCondition.Operator.values()) - .filter(operator -> operator != JoinCondition.Operator.EQUAL) - .map(JoinCondition.Operator::getValue), - Stream.of(notDistinctOperator)) - .collect(toImmutableList()); - - try (TestTable nationLowercaseTable = new TestTable( - // If a connector supports Join pushdown, but does not allow CTAS, we need to make the table creation here overridable. - getQueryRunner()::execute, - "nation_lowercase", - "AS SELECT nationkey, lower(name) name, regionkey FROM nation")) { // basic case assertThat(query(session, format("SELECT r.name, n.name FROM nation n %s region r ON n.regionkey = r.regionkey", joinOperator))).isFullyPushedDown(); @@ -1273,17 +1250,18 @@ public void testJoinPushdown() assertJoinConditionallyPushedDown( withoutDynamicFiltering, format("SELECT r.name, n.name FROM nation n %s region r ON n.regionkey %s r.regionkey", joinOperator, operator), - expectJoinPushdown(operator) && expectJoinPushdowOnInequalityOperator(joinOperator)); + expectJoinPushdown(operator) && expectJoinPushdownOnInequalityOperator(joinOperator)); // varchar inequality predicate assertJoinConditionallyPushedDown( withoutDynamicFiltering, format("SELECT n.name, nl.name FROM nation n %s %s nl ON n.name %s nl.name", joinOperator, nationLowercaseTable.getName(), operator), - expectVarcharJoinPushdown(operator) && expectJoinPushdowOnInequalityOperator(joinOperator)); + expectVarcharJoinPushdown(operator) && expectJoinPushdownOnInequalityOperator(joinOperator)); } // inequality along with an equality, which constitutes an equi-condition and allows filter to remain as part of the Join for (String operator : nonEqualities) { + log.info("Testing [joinOperator=%s] operator=%s on number", joinOperator, operator); assertJoinConditionallyPushedDown( session, format("SELECT n.name, c.name FROM nation n %s customer c ON n.nationkey = c.nationkey AND n.regionkey %s c.custkey", joinOperator, operator), @@ -1292,6 +1270,7 @@ public void testJoinPushdown() // varchar inequality along with an equality, which constitutes an equi-condition and allows filter to remain as part of the Join for (String operator : nonEqualities) { + log.info("Testing [joinOperator=%s] operator=%s on varchar", joinOperator, operator); assertJoinConditionallyPushedDown( session, format("SELECT n.name, nl.name FROM nation n %s %s nl ON n.regionkey = nl.regionkey AND n.name %s nl.name", joinOperator, nationLowercaseTable.getName(), operator), @@ -1347,6 +1326,28 @@ public void testJoinPushdown() } } + @Test + public void testComplexJoinPushdown() + { + String catalog = getSession().getCatalog().orElseThrow(); + Session session = joinPushdownEnabled(getSession()); + String query = "SELECT n.name, o.orderstatus FROM nation n JOIN orders o ON n.regionkey = o.orderkey AND n.nationkey + o.custkey - 3 = 0"; + + // The join cannot be pushed down without "complex join pushdown" + assertThat(query( + Session.builder(session) + .setCatalogSessionProperty(catalog, COMPLEX_JOIN_PUSHDOWN_ENABLED, "false") + .build(), + query)) + .joinIsNotFullyPushedDown(); + + // The join can be pushed down + assertJoinConditionallyPushedDown( + session, + query, + hasBehavior(SUPPORTS_JOIN_PUSHDOWN) && hasBehavior(SUPPORTS_PREDICATE_ARITHMETIC_EXPRESSION_PUSHDOWN)); + } + @Test public void testExplainAnalyzePhysicalReadWallTime() { @@ -1373,11 +1374,17 @@ protected QueryAssert assertJoinConditionallyPushedDown( @Language("SQL") String query, boolean condition) { - QueryAssert queryAssert = assertThat(query(session, query)); - if (condition) { - return queryAssert.isFullyPushedDown(); + try { + QueryAssert queryAssert = assertThat(query(session, query)); + if (condition) { + return queryAssert.isFullyPushedDown(); + } + return queryAssert.joinIsNotFullyPushedDown(); + } + catch (Throwable e) { + e.addSuppressed(new Exception("Query: " + query)); + throw e; } - return queryAssert.joinIsNotFullyPushedDown(); } protected void assertConditionallyOrderedPushedDown( @@ -1386,36 +1393,33 @@ protected void assertConditionallyOrderedPushedDown( boolean condition, PlanMatchPattern otherwiseExpected) { - QueryAssert queryAssert = assertThat(query(session, query)).ordered(); - if (condition) { - queryAssert.isFullyPushedDown(); + try { + QueryAssert queryAssert = assertThat(query(session, query)).ordered(); + if (condition) { + queryAssert.isFullyPushedDown(); + } + else { + queryAssert.isNotFullyPushedDown(otherwiseExpected); + } } - else { - queryAssert.isNotFullyPushedDown(otherwiseExpected); + catch (Throwable e) { + e.addSuppressed(new Exception("Query: " + query)); + throw e; } } protected boolean expectJoinPushdown(String operator) { if ("IS NOT DISTINCT FROM".equals(operator)) { - // TODO (https://github.com/trinodb/trino/issues/6967) support join pushdown for IS NOT DISTINCT FROM - return false; - } - switch (toJoinConditionOperator(operator)) { - case EQUAL: - case NOT_EQUAL: - case LESS_THAN: - case LESS_THAN_OR_EQUAL: - case GREATER_THAN: - case GREATER_THAN_OR_EQUAL: - return true; - case IS_DISTINCT_FROM: - return hasBehavior(SUPPORTS_JOIN_PUSHDOWN_WITH_DISTINCT_FROM); + return hasBehavior(SUPPORTS_JOIN_PUSHDOWN_WITH_DISTINCT_FROM); } - throw new AssertionError(); // unreachable + return switch (toJoinConditionOperator(operator)) { + case EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_OR_EQUAL, GREATER_THAN, GREATER_THAN_OR_EQUAL -> true; + case IS_DISTINCT_FROM -> hasBehavior(SUPPORTS_JOIN_PUSHDOWN_WITH_DISTINCT_FROM); + }; } - protected boolean expectJoinPushdowOnInequalityOperator(JoinOperator joinOperator) + protected boolean expectJoinPushdownOnInequalityOperator(JoinOperator joinOperator) { // Currently no pushdown as inequality predicate is removed from Join to maintain Cross Join and Filter as separate nodes return joinOperator != JOIN; @@ -1424,22 +1428,13 @@ protected boolean expectJoinPushdowOnInequalityOperator(JoinOperator joinOperato private boolean expectVarcharJoinPushdown(String operator) { if ("IS NOT DISTINCT FROM".equals(operator)) { - // TODO (https://github.com/trinodb/trino/issues/6967) support join pushdown for IS NOT DISTINCT FROM - return false; - } - switch (toJoinConditionOperator(operator)) { - case EQUAL: - case NOT_EQUAL: - return hasBehavior(SUPPORTS_JOIN_PUSHDOWN_WITH_VARCHAR_EQUALITY); - case LESS_THAN: - case LESS_THAN_OR_EQUAL: - case GREATER_THAN: - case GREATER_THAN_OR_EQUAL: - return hasBehavior(SUPPORTS_JOIN_PUSHDOWN_WITH_VARCHAR_INEQUALITY); - case IS_DISTINCT_FROM: - return hasBehavior(SUPPORTS_JOIN_PUSHDOWN_WITH_DISTINCT_FROM) && hasBehavior(SUPPORTS_JOIN_PUSHDOWN_WITH_VARCHAR_EQUALITY); + return hasBehavior(SUPPORTS_JOIN_PUSHDOWN_WITH_DISTINCT_FROM) && hasBehavior(SUPPORTS_JOIN_PUSHDOWN_WITH_VARCHAR_EQUALITY); } - throw new AssertionError(); // unreachable + return switch (toJoinConditionOperator(operator)) { + case EQUAL, NOT_EQUAL -> hasBehavior(SUPPORTS_JOIN_PUSHDOWN_WITH_VARCHAR_EQUALITY); + case LESS_THAN, LESS_THAN_OR_EQUAL, GREATER_THAN, GREATER_THAN_OR_EQUAL -> hasBehavior(SUPPORTS_JOIN_PUSHDOWN_WITH_VARCHAR_INEQUALITY); + case IS_DISTINCT_FROM -> hasBehavior(SUPPORTS_JOIN_PUSHDOWN_WITH_DISTINCT_FROM) && hasBehavior(SUPPORTS_JOIN_PUSHDOWN_WITH_VARCHAR_EQUALITY); + }; } private JoinCondition.Operator toJoinConditionOperator(String operator) @@ -2055,8 +2050,8 @@ public void testJoinPushdownWithLongIdentifiers() try (TestTable left = new TestTable(getQueryRunner()::execute, "test_long_id_l", format("(%s BIGINT)", validColumnName)); TestTable right = new TestTable(getQueryRunner()::execute, "test_long_id_r", format("(%s BIGINT)", validColumnName))) { assertThat(query(joinPushdownEnabled(getSession()), """ - SELECT l.%1$s, r.%1$s - FROM %2$s l JOIN %3$s r ON l.%1$s = r.%1$s""".formatted(validColumnName, left.getName(), right.getName()))) + SELECT l.%1$s, r.%1$s + FROM %2$s l JOIN %3$s r ON l.%1$s = r.%1$s""".formatted(validColumnName, left.getName(), right.getName()))) .isFullyPushedDown(); } } diff --git a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestDefaultJdbcQueryBuilder.java b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestDefaultJdbcQueryBuilder.java index 0c04b64d4ad7e..170df828714ba 100644 --- a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestDefaultJdbcQueryBuilder.java +++ b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestDefaultJdbcQueryBuilder.java @@ -194,41 +194,41 @@ public void testNormalBuildSql() { TupleDomain tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.builder() .put(columns.get(0), Domain.create(SortedRangeSet.copyOf(BIGINT, - ImmutableList.of( - Range.equal(BIGINT, 128L), - Range.equal(BIGINT, 180L), - Range.equal(BIGINT, 233L), - Range.lessThan(BIGINT, 25L), - Range.range(BIGINT, 66L, true, 96L, true), - Range.greaterThan(BIGINT, 192L))), + ImmutableList.of( + Range.equal(BIGINT, 128L), + Range.equal(BIGINT, 180L), + Range.equal(BIGINT, 233L), + Range.lessThan(BIGINT, 25L), + Range.range(BIGINT, 66L, true, 96L, true), + Range.greaterThan(BIGINT, 192L))), false)) .put(columns.get(1), Domain.create(SortedRangeSet.copyOf(DOUBLE, - ImmutableList.of( - Range.equal(DOUBLE, 200011.0), - Range.equal(DOUBLE, 200014.0), - Range.equal(DOUBLE, 200017.0), - Range.equal(DOUBLE, 200116.5), - Range.range(DOUBLE, 200030.0, true, 200036.0, true), - Range.range(DOUBLE, 200048.0, true, 200099.0, true))), + ImmutableList.of( + Range.equal(DOUBLE, 200011.0), + Range.equal(DOUBLE, 200014.0), + Range.equal(DOUBLE, 200017.0), + Range.equal(DOUBLE, 200116.5), + Range.range(DOUBLE, 200030.0, true, 200036.0, true), + Range.range(DOUBLE, 200048.0, true, 200099.0, true))), false)) .put(columns.get(7), Domain.create(SortedRangeSet.copyOf(TINYINT, - ImmutableList.of( - Range.range(TINYINT, 60L, true, 70L, false), - Range.range(TINYINT, 52L, true, 55L, false))), + ImmutableList.of( + Range.range(TINYINT, 60L, true, 70L, false), + Range.range(TINYINT, 52L, true, 55L, false))), false)) .put(columns.get(8), Domain.create(SortedRangeSet.copyOf(SMALLINT, - ImmutableList.of( - Range.range(SMALLINT, -75L, true, -68L, true), - Range.range(SMALLINT, -200L, true, -100L, false))), + ImmutableList.of( + Range.range(SMALLINT, -75L, true, -68L, true), + Range.range(SMALLINT, -200L, true, -100L, false))), false)) .put(columns.get(9), Domain.create(SortedRangeSet.copyOf(INTEGER, - ImmutableList.of( - Range.equal(INTEGER, 80L), - Range.equal(INTEGER, 96L), - Range.lessThan(INTEGER, 0L))), + ImmutableList.of( + Range.equal(INTEGER, 80L), + Range.equal(INTEGER, 96L), + Range.lessThan(INTEGER, 0L))), false)) .put(columns.get(2), Domain.create(SortedRangeSet.copyOf(BOOLEAN, - ImmutableList.of(Range.equal(BOOLEAN, true))), + ImmutableList.of(Range.equal(BOOLEAN, true))), false)) .buildOrThrow()); @@ -309,10 +309,10 @@ public void testBuildSqlWithFloat() { TupleDomain tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of( columns.get(10), Domain.create(SortedRangeSet.copyOf(REAL, - ImmutableList.of( - Range.equal(REAL, (long) floatToRawIntBits(100.0f + 0)), - Range.equal(REAL, (long) floatToRawIntBits(100.008f + 0)), - Range.equal(REAL, (long) floatToRawIntBits(100.0f + 14)))), + ImmutableList.of( + Range.equal(REAL, (long) floatToRawIntBits(100.0f + 0)), + Range.equal(REAL, (long) floatToRawIntBits(100.008f + 0)), + Range.equal(REAL, (long) floatToRawIntBits(100.0f + 14)))), false))); Connection connection = database.getConnection(); @@ -343,10 +343,10 @@ public void testBuildSqlWithVarchar() { TupleDomain tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of( columns.get(3), Domain.create(SortedRangeSet.copyOf(VARCHAR, - ImmutableList.of( - Range.range(VARCHAR, utf8Slice("test_str_700"), true, utf8Slice("test_str_702"), false), - Range.equal(VARCHAR, utf8Slice("test_str_180")), - Range.equal(VARCHAR, utf8Slice("test_str_196")))), + ImmutableList.of( + Range.range(VARCHAR, utf8Slice("test_str_700"), true, utf8Slice("test_str_702"), false), + Range.equal(VARCHAR, utf8Slice("test_str_180")), + Range.equal(VARCHAR, utf8Slice("test_str_196")))), false))); Connection connection = database.getConnection(); @@ -379,10 +379,10 @@ public void testBuildSqlWithChar() CharType charType = CharType.createCharType(0); TupleDomain tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of( columns.get(11), Domain.create(SortedRangeSet.copyOf(charType, - ImmutableList.of( - Range.range(charType, utf8Slice("test_str_700"), true, utf8Slice("test_str_702"), false), - Range.equal(charType, utf8Slice("test_str_180")), - Range.equal(charType, utf8Slice("test_str_196")))), + ImmutableList.of( + Range.range(charType, utf8Slice("test_str_700"), true, utf8Slice("test_str_702"), false), + Range.equal(charType, utf8Slice("test_str_180")), + Range.equal(charType, utf8Slice("test_str_196")))), false))); Connection connection = database.getConnection(); @@ -419,16 +419,16 @@ public void testBuildSqlWithDateTime() { TupleDomain tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of( columns.get(4), Domain.create(SortedRangeSet.copyOf(DATE, - ImmutableList.of( - Range.range(DATE, toDays(2016, 6, 7), true, toDays(2016, 6, 17), false), - Range.equal(DATE, toDays(2016, 6, 3)), - Range.equal(DATE, toDays(2016, 10, 21)))), + ImmutableList.of( + Range.range(DATE, toDays(2016, 6, 7), true, toDays(2016, 6, 17), false), + Range.equal(DATE, toDays(2016, 6, 3)), + Range.equal(DATE, toDays(2016, 10, 21)))), false), columns.get(5), Domain.create(SortedRangeSet.copyOf(TIME_MILLIS, - ImmutableList.of( - Range.range(TIME_MILLIS, toTimeRepresentation(6, 12, 23), false, toTimeRepresentation(8, 23, 37), true), - Range.equal(TIME_MILLIS, toTimeRepresentation(2, 3, 4)), - Range.equal(TIME_MILLIS, toTimeRepresentation(20, 23, 37)))), + ImmutableList.of( + Range.range(TIME_MILLIS, toTimeRepresentation(6, 12, 23), false, toTimeRepresentation(8, 23, 37), true), + Range.equal(TIME_MILLIS, toTimeRepresentation(2, 3, 4)), + Range.equal(TIME_MILLIS, toTimeRepresentation(20, 23, 37)))), false))); Connection connection = database.getConnection(); @@ -472,10 +472,10 @@ public void testBuildSqlWithTimestamp() { TupleDomain tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of( columns.get(6), Domain.create(SortedRangeSet.copyOf(TIMESTAMP_MILLIS, - ImmutableList.of( - Range.equal(TIMESTAMP_MILLIS, toTrinoTimestamp(2016, 6, 3, 0, 23, 37)), - Range.equal(TIMESTAMP_MILLIS, toTrinoTimestamp(2016, 10, 19, 16, 23, 37)), - Range.range(TIMESTAMP_MILLIS, toTrinoTimestamp(2016, 6, 7, 8, 23, 37), false, toTrinoTimestamp(2016, 6, 9, 12, 23, 37), true))), + ImmutableList.of( + Range.equal(TIMESTAMP_MILLIS, toTrinoTimestamp(2016, 6, 3, 0, 23, 37)), + Range.equal(TIMESTAMP_MILLIS, toTrinoTimestamp(2016, 10, 19, 16, 23, 37)), + Range.range(TIMESTAMP_MILLIS, toTrinoTimestamp(2016, 6, 7, 8, 23, 37), false, toTrinoTimestamp(2016, 6, 9, 12, 23, 37), true))), false))); Connection connection = database.getConnection(); @@ -512,6 +512,39 @@ public void testBuildJoinSql() Connection connection = database.getConnection(); PreparedQuery preparedQuery = queryBuilder.prepareJoinQuery( + jdbcClient, + SESSION, + connection, + JoinType.INNER, + new PreparedQuery("SELECT * FROM \"test_table\"", List.of()), + ImmutableMap.of(columns.get(2), "name1", columns.get(7), "lcol7"), + new PreparedQuery("SELECT * FROM \"test_table\"", List.of()), + ImmutableMap.of(columns.get(3), "name2", columns.get(8), "rcol8"), + List.of(new ParameterizedExpression("\"lcol7\" = \"rcol8\"", List.of()))); + try (PreparedStatement preparedStatement = queryBuilder.prepareStatement(jdbcClient, SESSION, connection, preparedQuery, Optional.empty())) { + assertThat(preparedQuery.getQuery()).isEqualTo(""" + SELECT * FROM \ + (SELECT "col_2" AS "name1", "col_7" AS "lcol7" FROM (SELECT * FROM "test_table") l) l \ + INNER JOIN \ + (SELECT "col_3" AS "name2", "col_8" AS "rcol8" FROM (SELECT * FROM "test_table") r) r \ + ON ("lcol7" = "rcol8")"""); + long count = 0; + try (ResultSet resultSet = preparedStatement.executeQuery()) { + while (resultSet.next()) { + count++; + } + } + assertThat(count).isEqualTo(8); + } + } + + @Test + public void testBuildJoinSqlLegacy() + throws SQLException + { + Connection connection = database.getConnection(); + + PreparedQuery preparedQuery = queryBuilder.legacyPrepareJoinQuery( jdbcClient, SESSION, connection, diff --git a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestJdbcMetadataConfig.java b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestJdbcMetadataConfig.java index ed09ca49e4ba3..e86c88d7a3fcb 100644 --- a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestJdbcMetadataConfig.java +++ b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestJdbcMetadataConfig.java @@ -30,6 +30,7 @@ public void testDefaults() assertRecordedDefaults(recordDefaults(JdbcMetadataConfig.class) .setComplexExpressionPushdownEnabled(true) .setJoinPushdownEnabled(false) + .setComplexJoinPushdownEnabled(true) .setAggregationPushdownEnabled(true) .setTopNPushdownEnabled(true) .setDomainCompactionThreshold(32)); @@ -41,6 +42,7 @@ public void testExplicitPropertyMappings() Map properties = ImmutableMap.builder() .put("complex-expression-pushdown.enabled", "false") .put("join-pushdown.enabled", "true") + .put("join-pushdown.with-expressions", "false") .put("aggregation-pushdown.enabled", "false") .put("domain-compaction-threshold", "42") .put("topn-pushdown.enabled", "false") @@ -49,6 +51,7 @@ public void testExplicitPropertyMappings() JdbcMetadataConfig expected = new JdbcMetadataConfig() .setComplexExpressionPushdownEnabled(false) .setJoinPushdownEnabled(true) + .setComplexJoinPushdownEnabled(false) .setAggregationPushdownEnabled(false) .setTopNPushdownEnabled(false) .setDomainCompactionThreshold(42); diff --git a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestJdbcTableProperties.java b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestJdbcTableProperties.java index 8c69032467c7b..8dc5ab420eaf0 100644 --- a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestJdbcTableProperties.java +++ b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestJdbcTableProperties.java @@ -56,7 +56,7 @@ public Map getTableProperties(ConnectorSession session, JdbcTabl @Test public void testGetTablePropertiesIsNotCalledForSelect() { - onGetTableProperties = () -> { fail("Unexpected call of: getTableProperties"); }; + onGetTableProperties = () -> fail("Unexpected call of: getTableProperties"); assertUpdate("CREATE TABLE copy_of_nation AS SELECT * FROM nation", 25); assertQuerySucceeds("SELECT * FROM copy_of_nation"); assertQuerySucceeds("SELECT nationkey FROM copy_of_nation"); @@ -66,7 +66,7 @@ public void testGetTablePropertiesIsNotCalledForSelect() public void testGetTablePropertiesIsCalled() { AtomicInteger counter = new AtomicInteger(); - onGetTableProperties = () -> { counter.incrementAndGet(); }; + onGetTableProperties = () -> counter.incrementAndGet(); assertQuerySucceeds("SHOW CREATE TABLE nation"); assertThat(counter.get()).isOne(); } diff --git a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/expression/TestGenericRewrite.java b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/expression/TestGenericRewrite.java index 2c9653790656b..67f313b3069c9 100644 --- a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/expression/TestGenericRewrite.java +++ b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/expression/TestGenericRewrite.java @@ -41,7 +41,7 @@ public class TestGenericRewrite @Test public void testRewriteCall() { - GenericRewrite rewrite = new GenericRewrite(Map.of(), "add(foo: decimal(p, s), bar: bigint): decimal(rp, rs)", "foo + bar::decimal(rp,rs)"); + GenericRewrite rewrite = new GenericRewrite(Map.of(), session -> true, "add(foo: decimal(p, s), bar: bigint): decimal(rp, rs)", "foo + bar::decimal(rp,rs)"); ConnectorExpression expression = new Call( createDecimalType(21, 2), new FunctionName("add"), @@ -58,7 +58,7 @@ public void testRewriteCall() public void testRewriteCallWithTypeClass() { Map> typeClasses = Map.of("integer_class", Set.of("integer", "bigint")); - GenericRewrite rewrite = new GenericRewrite(typeClasses, "add(foo: integer_class, bar: bigint): integer_class", "foo + bar"); + GenericRewrite rewrite = new GenericRewrite(typeClasses, session -> true, "add(foo: integer_class, bar: bigint): integer_class", "foo + bar"); assertThat(apply(rewrite, new Call( BIGINT, diff --git a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/expression/TestRewriteComparison.java b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/expression/TestRewriteComparison.java index 80e70a2e60073..57504cb16b75a 100644 --- a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/expression/TestRewriteComparison.java +++ b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/expression/TestRewriteComparison.java @@ -26,7 +26,7 @@ public class TestRewriteComparison public void testOperatorEnumsInSync() { assertThat( - Stream.of(RewriteComparison.ComparisonOperator.values()) + Stream.of(ComparisonOperator.values()) .map(Enum::name)) .containsExactlyInAnyOrder( Stream.of(ComparisonExpression.Operator.values()) diff --git a/plugin/trino-bigquery/pom.xml b/plugin/trino-bigquery/pom.xml index 2e339e1349cea..fc7493cc7c795 100644 --- a/plugin/trino-bigquery/pom.xml +++ b/plugin/trino-bigquery/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -41,12 +41,6 @@ conscrypt-openjdk-uber 2.5.2 - - - org.threeten - threetenbp - 1.6.8 - @@ -490,12 +484,6 @@ 5.6.0 test - - - org.testng - testng - test - @@ -545,7 +533,7 @@ - cloud-tests + cloud-tests-1 false @@ -557,12 +545,6 @@ **/TestBigQueryAvroConnectorTest.java - **/TestBigQueryWithDifferentProjectIdConnectorSmokeTest.java - **/TestBigQueryMetadataCaching.java - **/TestBigQueryAvroTypeMapping.java - **/TestBigQueryMetadata.java - **/TestBigQueryInstanceCleaner.java - **/TestBigQueryWithProxyConnectorSmokeTest.java @@ -595,7 +577,7 @@ - cloud-tests-arrow-and-fte + cloud-tests-2 false @@ -610,8 +592,14 @@ **/TestBigQueryArrowConnectorSmokeTest.java + **/TestBigQueryWithDifferentProjectIdConnectorSmokeTest.java + **/TestBigQueryWithProxyConnectorSmokeTest.java **/TestBigQueryArrowTypeMapping.java + **/TestBigQueryAvroTypeMapping.java + **/TestBigQueryMetadataCaching.java + **/TestBigQueryMetadata.java **/TestBigQuery*FailureRecoveryTest.java + **/TestBigQueryInstanceCleaner.java diff --git a/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryMetadata.java b/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryMetadata.java index fd5e88a633ee7..f0e6341005f60 100644 --- a/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryMetadata.java +++ b/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryMetadata.java @@ -723,10 +723,16 @@ public ConnectorMergeTableHandle beginMerge(ConnectorSession session, ConnectorT } @Override - public void createMaterializedView(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition, boolean replace, boolean ignoreExisting) + public void createMaterializedView( + ConnectorSession session, + SchemaTableName viewName, + ConnectorMaterializedViewDefinition definition, + Map properties, + boolean replace, + boolean ignoreExisting) { // TODO Fix BaseBigQueryFailureRecoveryTest when implementing this method - ConnectorMetadata.super.createMaterializedView(session, viewName, definition, replace, ignoreExisting); + ConnectorMetadata.super.createMaterializedView(session, viewName, definition, properties, replace, ignoreExisting); } @Override diff --git a/plugin/trino-bigquery/src/test/java/io/trino/plugin/bigquery/BaseBigQueryConnectorTest.java b/plugin/trino-bigquery/src/test/java/io/trino/plugin/bigquery/BaseBigQueryConnectorTest.java index 22dc474133079..96f3ffe23d6ba 100644 --- a/plugin/trino-bigquery/src/test/java/io/trino/plugin/bigquery/BaseBigQueryConnectorTest.java +++ b/plugin/trino-bigquery/src/test/java/io/trino/plugin/bigquery/BaseBigQueryConnectorTest.java @@ -154,35 +154,6 @@ private void testPredicatePushdown(@Language("SQL") String inputLiteral, @Langua } } - @Test - public void testCreateTableSupportedType() - { - testCreateTableSupportedType("boolean", "boolean"); - testCreateTableSupportedType("tinyint", "bigint"); - testCreateTableSupportedType("smallint", "bigint"); - testCreateTableSupportedType("integer", "bigint"); - testCreateTableSupportedType("bigint", "bigint"); - testCreateTableSupportedType("double", "double"); - testCreateTableSupportedType("decimal", "decimal(38,9)"); - testCreateTableSupportedType("date", "date"); - testCreateTableSupportedType("time with time zone", "time(6)"); - testCreateTableSupportedType("timestamp(6)", "timestamp(6)"); - testCreateTableSupportedType("timestamp(6) with time zone", "timestamp(6) with time zone"); - testCreateTableSupportedType("varchar", "varchar"); - testCreateTableSupportedType("varchar(65535)", "varchar"); - testCreateTableSupportedType("varbinary", "varbinary"); - testCreateTableSupportedType("array(bigint)", "array(bigint)"); - testCreateTableSupportedType("row(x bigint, y double)", "row(x bigint, y double)"); - testCreateTableSupportedType("row(x array(bigint))", "row(x array(bigint))"); - } - - private void testCreateTableSupportedType(String createType, String expectedType) - { - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_create_table_supported_type_" + createType.replaceAll("[^a-zA-Z0-9]", ""), format("(col1 %s)", createType))) { - assertThat(computeScalar("SELECT data_type FROM information_schema.columns WHERE table_name = '" + table.getName() + "' AND column_name = 'col1'")).isEqualTo(expectedType); - } - } - @Test public void testCreateTableUnsupportedType() { @@ -837,6 +808,13 @@ protected OptionalInt maxSchemaNameLength() return OptionalInt.of(1024); } + @Override + @Test + public void testCreateSchemaWithLongName() + { + abort("Dropping schema with long name causes BigQuery to return code 500"); + } + @Override protected void verifySchemaNameLengthFailurePermissible(Throwable e) { diff --git a/plugin/trino-blackhole/pom.xml b/plugin/trino-blackhole/pom.xml index d76dc1989adc1..cd9ac64f101f1 100644 --- a/plugin/trino-blackhole/pom.xml +++ b/plugin/trino-blackhole/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-cassandra/pom.xml b/plugin/trino-cassandra/pom.xml index 16c5f45ada009..5aa4d6099a687 100644 --- a/plugin/trino-cassandra/pom.xml +++ b/plugin/trino-cassandra/pom.xml @@ -4,7 +4,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -15,7 +15,6 @@ ${project.parent.basedir} 1.5.1 - instances @@ -251,12 +250,6 @@ testcontainers test - - - org.testng - testng - test - diff --git a/plugin/trino-clickhouse/pom.xml b/plugin/trino-clickhouse/pom.xml index 90082d9a937bb..b36e0422613b7 100644 --- a/plugin/trino-clickhouse/pom.xml +++ b/plugin/trino-clickhouse/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -199,11 +199,5 @@ testcontainers test - - - org.testng - testng - test - diff --git a/plugin/trino-delta-lake/pom.xml b/plugin/trino-delta-lake/pom.xml index fe8c8910cde77..8132ca79a53ca 100644 --- a/plugin/trino-delta-lake/pom.xml +++ b/plugin/trino-delta-lake/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/AbstractDeltaLakePageSink.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/AbstractDeltaLakePageSink.java index b4f6c3e86c4b6..e5b2e8aef0b01 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/AbstractDeltaLakePageSink.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/AbstractDeltaLakePageSink.java @@ -100,11 +100,12 @@ public abstract class AbstractDeltaLakePageSink private final DeltaLakeWriterStats stats; private final String trinoVersion; private final long targetMaxFileSize; - + private final long idleWriterMinFileSize; private long writtenBytes; private long memoryUsage; private final List closedWriterRollbackActions = new ArrayList<>(); + private final List activeWriters = new ArrayList<>(); protected final ImmutableList.Builder dataFileInfos = ImmutableList.builder(); private final DeltaLakeParquetSchemaMapping parquetSchemaMapping; @@ -190,6 +191,7 @@ public AbstractDeltaLakePageSink( this.trinoVersion = requireNonNull(trinoVersion, "trinoVersion is null"); this.targetMaxFileSize = DeltaLakeSessionProperties.getTargetMaxFileSize(session); + this.idleWriterMinFileSize = DeltaLakeSessionProperties.getIdleWriterMinFileSize(session); } protected abstract void processSynthesizedColumn(DeltaLakeColumnHandle column); @@ -312,6 +314,7 @@ private void writePage(Page page) } DeltaLakeWriter writer = writers.get(index); + verify(writer != null, "Expected writer at index %s", index); long currentWritten = writer.getWrittenBytes(); long currentMemory = writer.getMemoryUsage(); @@ -320,6 +323,22 @@ private void writePage(Page page) writtenBytes += writer.getWrittenBytes() - currentWritten; memoryUsage += writer.getMemoryUsage() - currentMemory; + // Mark this writer as active (i.e. not idle) + activeWriters.set(index, true); + } + } + + @Override + public void closeIdleWriters() + { + for (int writerIndex = 0; writerIndex < writers.size(); writerIndex++) { + DeltaLakeWriter writer = writers.get(writerIndex); + if (activeWriters.get(writerIndex) || writer == null || writer.getWrittenBytes() <= idleWriterMinFileSize) { + activeWriters.set(writerIndex, false); + continue; + } + LOG.debug("Closing writer %s with %s bytes written", writerIndex, writer.getWrittenBytes()); + closeWriter(writerIndex); } } @@ -334,6 +353,7 @@ private int[] getWriterIndexes(Page page) // expand writers list to new size while (writers.size() <= pageIndexer.getMaxIndex()) { writers.add(null); + activeWriters.add(false); } // create missing writers for (int position = 0; position < page.getPositionCount(); position++) { @@ -374,7 +394,6 @@ private int[] getWriterIndexes(Page page) memoryUsage += writer.getMemoryUsage(); } verify(writers.size() == pageIndexer.getMaxIndex() + 1); - verify(!writers.contains(null)); return writerIndexes; } @@ -387,6 +406,9 @@ private String getRelativeFilePath(Optional partitionName, String fileNa protected void closeWriter(int writerIndex) { DeltaLakeWriter writer = writers.get(writerIndex); + if (writer == null) { + return; + } long currentWritten = writer.getWrittenBytes(); long currentMemory = writer.getMemoryUsage(); diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java index 87ff4190d60fb..99d1ff1c8d275 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java @@ -76,6 +76,7 @@ public class DeltaLakeConfig private boolean deleteSchemaLocationsFallback; private String parquetTimeZone = TimeZone.getDefault().getID(); private DataSize targetMaxFileSize = DataSize.of(1, GIGABYTE); + private DataSize idleWriterMinFileSize = DataSize.of(16, MEGABYTE); private boolean uniqueTableLocation = true; private boolean registerTableProcedureEnabled; private boolean projectionPushdownEnabled = true; @@ -271,13 +272,13 @@ public long getDefaultCheckpointWritingInterval() return defaultCheckpointWritingInterval; } - public boolean isCheckpointPartitionFilterEnabled() + public boolean isCheckpointFilteringEnabled() { return checkpointFilteringEnabled; } @Config("delta.checkpoint-filtering.enabled") - public DeltaLakeConfig setCheckpointPartitionFilterEnabled(boolean checkpointFilteringEnabled) + public DeltaLakeConfig setCheckpointFilteringEnabled(boolean checkpointFilteringEnabled) { this.checkpointFilteringEnabled = checkpointFilteringEnabled; return this; @@ -450,6 +451,20 @@ public DeltaLakeConfig setTargetMaxFileSize(DataSize targetMaxFileSize) return this; } + @NotNull + public DataSize getIdleWriterMinFileSize() + { + return idleWriterMinFileSize; + } + + @Config("delta.idle-writer-min-file-size") + @ConfigDescription("Minimum data written by a single partition writer before it can be consider as 'idle' and could be closed by the engine") + public DeltaLakeConfig setIdleWriterMinFileSize(DataSize idleWriterMinFileSize) + { + this.idleWriterMinFileSize = idleWriterMinFileSize; + return this; + } + public boolean isUniqueTableLocation() { return uniqueTableLocation; diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeInputInfo.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeInputInfo.java index e65b1cf0c3a3e..9bc5fc99fe577 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeInputInfo.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeInputInfo.java @@ -21,11 +21,13 @@ public class DeltaLakeInputInfo { private final boolean partitioned; + private final long version; @JsonCreator - public DeltaLakeInputInfo(@JsonProperty("partitioned") boolean partitioned) + public DeltaLakeInputInfo(@JsonProperty("partitioned") boolean partitioned, @JsonProperty("version") long version) { this.partitioned = partitioned; + this.version = version; } @JsonProperty @@ -34,6 +36,12 @@ public boolean isPartitioned() return partitioned; } + @JsonProperty + public long getVersion() + { + return version; + } + @Override public boolean equals(Object o) { @@ -43,12 +51,12 @@ public boolean equals(Object o) if (!(o instanceof DeltaLakeInputInfo that)) { return false; } - return partitioned == that.partitioned; + return partitioned == that.partitioned && version == that.version; } @Override public int hashCode() { - return Objects.hash(partitioned); + return Objects.hash(partitioned, version); } } diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java index 49a639f1c0fba..5fc2f5defa035 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java @@ -965,13 +965,6 @@ public void createTable(ConnectorSession session, ConnectorTableMetadata tableMe Table table = buildTable(session, schemaTableName, location, external); - // Ensure the table has queryId set. This is relied on for exception handling - String queryId = session.getQueryId(); - verify( - getQueryId(table).orElseThrow(() -> new IllegalArgumentException("Query id is not present")).equals(queryId), - "Table '%s' does not have correct query id set", - table); - PrincipalPrivileges principalPrivileges = buildInitialPrivilegeSet(table.getOwner().orElseThrow()); // As a precaution, clear the caches statisticsAccess.invalidateCache(schemaTableName, Optional.of(location)); @@ -1232,12 +1225,6 @@ public Optional finishCreateTable( SchemaTableName schemaTableName = schemaTableName(schemaName, tableName); Table table = buildTable(session, schemaTableName, location, handle.isExternal()); - // Ensure the table has queryId set. This is relied on for exception handling - String queryId = session.getQueryId(); - verify( - getQueryId(table).orElseThrow(() -> new IllegalArgumentException("Query id is not present")).equals(queryId), - "Table '%s' does not have correct query id set", - table); ColumnMappingMode columnMappingMode = handle.getColumnMappingMode(); String schemaString = handle.getSchemaString(); @@ -2362,8 +2349,9 @@ private void cleanupFailedWrite(ConnectorSession session, String tableLocation, @Override public Optional getInfo(ConnectorTableHandle table) { - boolean isPartitioned = !((DeltaLakeTableHandle) table).getMetadataEntry().getLowercasePartitionColumns().isEmpty(); - return Optional.of(new DeltaLakeInputInfo(isPartitioned)); + DeltaLakeTableHandle handle = (DeltaLakeTableHandle) table; + boolean isPartitioned = !handle.getMetadataEntry().getLowercasePartitionColumns().isEmpty(); + return Optional.of(new DeltaLakeInputInfo(isPartitioned, handle.getReadVersion())); } @Override diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java index 4a56b213cd53d..0882ad8100a79 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java @@ -86,7 +86,9 @@ import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.getParquetMaxReadBlockRowCount; import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.getParquetMaxReadBlockSize; import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.getParquetSmallFileThreshold; +import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isParquetIgnoreStatistics; import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isParquetUseColumnIndex; +import static io.trino.plugin.deltalake.DeltaLakeSplitManager.partitionMatchesPredicate; import static io.trino.plugin.deltalake.delete.DeletionVectors.readDeletionVectors; import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractSchema; import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.getColumnMappingMode; @@ -180,6 +182,12 @@ public ConnectorPageSource createPageSource( if (filteredSplitPredicate.isNone()) { return new EmptyPageSource(); } + Map partitionColumnDomains = filteredSplitPredicate.getDomains().orElseThrow().entrySet().stream() + .filter(entry -> entry.getKey().getColumnType() == DeltaLakeColumnType.PARTITION_KEY) + .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue)); + if (!partitionMatchesPredicate(split.getPartitionKeys(), partitionColumnDomains)) { + return new EmptyPageSource(); + } if (filteredSplitPredicate.isAll() && split.getStart() == 0 && split.getLength() == split.getFileSize() && split.getFileRowCount().isPresent() && @@ -204,7 +212,8 @@ public ConnectorPageSource createPageSource( ParquetReaderOptions options = parquetReaderOptions.withMaxReadBlockSize(getParquetMaxReadBlockSize(session)) .withMaxReadBlockRowCount(getParquetMaxReadBlockRowCount(session)) .withSmallFileThreshold(getParquetSmallFileThreshold(session)) - .withUseColumnIndex(isParquetUseColumnIndex(session)); + .withUseColumnIndex(isParquetUseColumnIndex(session)) + .withIgnoreStatistics(isParquetIgnoreStatistics(session)); Map parquetFieldIdToName = columnMappingMode == ColumnMappingMode.ID ? loadParquetIdAndNameMapping(inputFile, options) : ImmutableMap.of(); diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java index 8cfb02a3988ba..065ac2e187fad 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java @@ -56,9 +56,11 @@ public final class DeltaLakeSessionProperties private static final String PARQUET_MAX_READ_BLOCK_ROW_COUNT = "parquet_max_read_block_row_count"; private static final String PARQUET_SMALL_FILE_THRESHOLD = "parquet_small_file_threshold"; private static final String PARQUET_USE_COLUMN_INDEX = "parquet_use_column_index"; + private static final String PARQUET_IGNORE_STATISTICS = "parquet_ignore_statistics"; private static final String PARQUET_WRITER_BLOCK_SIZE = "parquet_writer_block_size"; private static final String PARQUET_WRITER_PAGE_SIZE = "parquet_writer_page_size"; private static final String TARGET_MAX_FILE_SIZE = "target_max_file_size"; + private static final String IDLE_WRITER_MIN_FILE_SIZE = "idle_writer_min_file_size"; private static final String COMPRESSION_CODEC = "compression_codec"; // This property is not supported by Delta Lake and exists solely for technical reasons. @Deprecated @@ -130,6 +132,11 @@ public DeltaLakeSessionProperties( "Use Parquet column index", parquetReaderConfig.isUseColumnIndex(), false), + booleanProperty( + PARQUET_IGNORE_STATISTICS, + "Ignore statistics from Parquet to allow querying files with corrupted or incorrect statistics", + parquetReaderConfig.isIgnoreStatistics(), + false), dataSizeProperty( PARQUET_WRITER_BLOCK_SIZE, "Parquet: Writer block size", @@ -150,6 +157,11 @@ public DeltaLakeSessionProperties( "Target maximum size of written files; the actual size may be larger", deltaLakeConfig.getTargetMaxFileSize(), false), + dataSizeProperty( + IDLE_WRITER_MIN_FILE_SIZE, + "Minimum data written by a single partition writer before it can be consider as 'idle' and could be closed by the engine", + deltaLakeConfig.getIdleWriterMinFileSize(), + false), enumProperty( TIMESTAMP_PRECISION, "Internal Delta Lake connector property", @@ -201,7 +213,7 @@ public DeltaLakeSessionProperties( booleanProperty( CHECKPOINT_FILTERING_ENABLED, "Use filter in checkpoint reader", - deltaLakeConfig.isCheckpointPartitionFilterEnabled(), + deltaLakeConfig.isCheckpointFilteringEnabled(), false)); } @@ -251,6 +263,11 @@ public static boolean isParquetUseColumnIndex(ConnectorSession session) return session.getProperty(PARQUET_USE_COLUMN_INDEX, Boolean.class); } + public static boolean isParquetIgnoreStatistics(ConnectorSession session) + { + return session.getProperty(PARQUET_IGNORE_STATISTICS, Boolean.class); + } + public static DataSize getParquetWriterBlockSize(ConnectorSession session) { return session.getProperty(PARQUET_WRITER_BLOCK_SIZE, DataSize.class); @@ -266,6 +283,11 @@ public static long getTargetMaxFileSize(ConnectorSession session) return session.getProperty(TARGET_MAX_FILE_SIZE, DataSize.class).toBytes(); } + public static long getIdleWriterMinFileSize(ConnectorSession session) + { + return session.getProperty(IDLE_WRITER_MIN_FILE_SIZE, DataSize.class).toBytes(); + } + public static Duration getDynamicFilteringWaitTimeout(ConnectorSession session) { return session.getProperty(DYNAMIC_FILTERING_WAIT_TIMEOUT, Duration.class); diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplitManager.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplitManager.java index 8f6f48f247db3..cb9e031ea0a36 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplitManager.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplitManager.java @@ -14,7 +14,6 @@ package io.trino.plugin.deltalake; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; import com.google.inject.Inject; import io.airlift.units.DataSize; import io.trino.filesystem.Location; @@ -46,6 +45,7 @@ import java.net.URI; import java.net.URLDecoder; import java.time.Instant; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; @@ -54,6 +54,7 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Stream; +import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableMap.toImmutableMap; import static com.google.common.collect.ImmutableSet.toImmutableSet; @@ -70,8 +71,6 @@ import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Objects.requireNonNull; import static java.util.function.Function.identity; -import static java.util.stream.Collectors.counting; -import static java.util.stream.Collectors.groupingBy; public class DeltaLakeSplitManager implements ConnectorSplitManager @@ -177,7 +176,10 @@ private Stream getSplits( MetadataEntry metadataEntry = tableHandle.getMetadataEntry(); boolean isOptimize = tableHandle.isOptimize(); - Set>> partitionsWithAtMostOneFile = isOptimize ? findPartitionsWithAtMostOneFile(validDataFiles) : ImmutableSet.of(); + if (isOptimize) { + checkArgument(maxScannedFileSizeInBytes.isPresent(), "maxScannedFileSizeInBytes must be provided when performing OPTIMIZE"); + validDataFiles = filterValidDataFilesForOptimize(validDataFiles, maxScannedFileSizeInBytes.get()); + } Set predicatedColumnNames = Stream.concat( nonPartitionConstraint.getDomains().orElseThrow().keySet().stream(), @@ -210,11 +212,6 @@ private Stream getSplits( return Stream.empty(); } - // no need to rewrite small file that is the only one in its partition - if (isOptimize && partitionsWithAtMostOneFile.contains(addAction.getCanonicalPartitionValues()) && maxScannedFileSizeInBytes.isPresent() && addAction.getSize() < maxScannedFileSizeInBytes.get()) { - return Stream.empty(); - } - Map enforcedDomains = enforcedPartitionConstraint.getDomains().orElseThrow(); if (!partitionMatchesPredicate(addAction.getCanonicalPartitionValues(), enforcedDomains)) { return Stream.empty(); @@ -253,12 +250,28 @@ private Stream getSplits( }); } - private Set>> findPartitionsWithAtMostOneFile(List addFileEntries) + private static List filterValidDataFilesForOptimize(List validDataFiles, long maxScannedFileSizeInBytes) { - return addFileEntries.stream().collect(groupingBy(AddFileEntry::getCanonicalPartitionValues, counting())).entrySet().stream() - .filter(entry -> entry.getValue() <= 1) - .map(Map.Entry::getKey) - .collect(toImmutableSet()); + // Value being present is a pending file (potentially the only one) for a given partition. + // Value being empty is a tombstone, indicates that there were in the stream previously at least 2 files selected for processing for a given partition. + Map>, Optional> pendingAddFileEntriesMap = new HashMap<>(); + return validDataFiles.stream() + .filter(addFileEntry -> addFileEntry.getSize() < maxScannedFileSizeInBytes) + .flatMap(addFileEntry -> { + Map> canonicalPartitionValues = addFileEntry.getCanonicalPartitionValues(); + if (pendingAddFileEntriesMap.containsKey(canonicalPartitionValues)) { + Optional alreadyQueuedAddFileEntry = pendingAddFileEntriesMap.get(canonicalPartitionValues); + if (alreadyQueuedAddFileEntry.isEmpty()) { + return Stream.of(addFileEntry); + } + pendingAddFileEntriesMap.put(canonicalPartitionValues, Optional.empty()); + return Stream.of(alreadyQueuedAddFileEntry.get(), addFileEntry); + } + + pendingAddFileEntriesMap.put(canonicalPartitionValues, Optional.of(addFileEntry)); + return Stream.empty(); + }) + .collect(toImmutableList()); } private static boolean mayAnyDataColumnProjected(DeltaLakeTableHandle tableHandle) diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplitSource.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplitSource.java index c4bb1da90ecd7..964361376ca95 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplitSource.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplitSource.java @@ -123,8 +123,8 @@ public CompletableFuture getNextBatch(int maxSize) .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue)); List filteredSplits = splits.stream() .map(DeltaLakeSplit.class::cast) - .filter(split -> split.getStatisticsPredicate().overlaps(dynamicFilterPredicate) && - partitionMatchesPredicate(split.getPartitionKeys(), partitionColumnDomains)) + .filter(split -> partitionMatchesPredicate(split.getPartitionKeys(), partitionColumnDomains) && + split.getStatisticsPredicate().overlaps(dynamicFilterPredicate)) .collect(toImmutableList()); if (recordScannedFiles) { filteredSplits.forEach(split -> scannedFilePaths.add(((DeltaLakeSplit) split).getPath())); diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeWriter.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeWriter.java index 7c27b8d151cd3..9fa930f2297f2 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeWriter.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeWriter.java @@ -350,7 +350,7 @@ public Block apply(Block block) RowBlock rowBlock = (RowBlock) runLengthEncodedBlock.getValue(); RowBlock newRowBlock = RowBlock.fromNotNullSuppressedFieldBlocks( 1, - rowBlock.isNull(0) ? Optional.of(new boolean[]{true}) : Optional.empty(), + rowBlock.isNull(0) ? Optional.of(new boolean[] {true}) : Optional.empty(), coerceFields(rowBlock.getFieldBlocks())); return RunLengthEncodedBlock.create(newRowBlock, runLengthEncodedBlock.getPositionCount()); } diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/functions/tablechanges/TableChangesFunctionProcessor.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/functions/tablechanges/TableChangesFunctionProcessor.java index d5cf1f482538e..d18e32462a177 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/functions/tablechanges/TableChangesFunctionProcessor.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/functions/tablechanges/TableChangesFunctionProcessor.java @@ -47,6 +47,7 @@ import static io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR; import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.getParquetMaxReadBlockRowCount; import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.getParquetMaxReadBlockSize; +import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isParquetIgnoreStatistics; import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isParquetUseColumnIndex; import static io.trino.plugin.deltalake.functions.tablechanges.TableChangesFileType.CDF_FILE; import static io.trino.spi.function.table.TableFunctionProcessorState.Finished.FINISHED; @@ -175,7 +176,8 @@ private static DeltaLakePageSource createDeltaLakePageSource( parquetReaderOptions = parquetReaderOptions .withMaxReadBlockSize(getParquetMaxReadBlockSize(session)) .withMaxReadBlockRowCount(getParquetMaxReadBlockRowCount(session)) - .withUseColumnIndex(isParquetUseColumnIndex(session)); + .withUseColumnIndex(isParquetUseColumnIndex(session)) + .withIgnoreStatistics(isParquetIgnoreStatistics(session)); List splitColumns = switch (split.fileType()) { case CDF_FILE -> ImmutableList.builder().addAll(handle.columns()) diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/HiveMetastoreBackedDeltaLakeMetastore.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/HiveMetastoreBackedDeltaLakeMetastore.java index df3a1c1a8f150..854512b93352c 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/HiveMetastoreBackedDeltaLakeMetastore.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/HiveMetastoreBackedDeltaLakeMetastore.java @@ -63,7 +63,7 @@ public List getAllTables(String databaseName) // it would be nice to filter out non-Delta tables; however, we can not call // metastore.getTablesWithParameter(schema, TABLE_PROVIDER_PROP, TABLE_PROVIDER_VALUE), because that property // contains a dot and must be compared case-insensitive - return delegate.getAllTables(databaseName); + return delegate.getTables(databaseName); } @Override diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/procedure/RegisterTableProcedure.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/procedure/RegisterTableProcedure.java index de3c38e216cb2..76e3a3e892b57 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/procedure/RegisterTableProcedure.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/procedure/RegisterTableProcedure.java @@ -41,12 +41,10 @@ import java.util.Optional; import static com.google.common.base.Strings.isNullOrEmpty; -import static com.google.common.base.Verify.verify; import static io.trino.plugin.base.util.Procedures.checkProcedureArgument; import static io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_FILESYSTEM_ERROR; import static io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_TABLE; import static io.trino.plugin.deltalake.DeltaLakeMetadata.buildTable; -import static io.trino.plugin.deltalake.DeltaLakeMetadata.getQueryId; import static io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.getTransactionLogDir; import static io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet; import static io.trino.spi.StandardErrorCode.GENERIC_USER_ERROR; @@ -177,12 +175,6 @@ private void doRegisterTable( throw new TrinoException(DELTA_LAKE_INVALID_TABLE, "Failed to access table location: " + tableLocation, e); } - // Ensure the table has queryId set. This is relied on for exception handling - String queryId = session.getQueryId(); - verify( - getQueryId(table).orElseThrow(() -> new IllegalArgumentException("Query id is not present")).equals(queryId), - "Table '%s' does not have correct query id set", - table); metastore.createTable(session, table, principalPrivileges); } } diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/AddFileEntry.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/AddFileEntry.java index 5b6c96d3a31c6..ed7a4d646b9e1 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/AddFileEntry.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/AddFileEntry.java @@ -61,10 +61,35 @@ public AddFileEntry( @JsonProperty("parsedStats") Optional parsedStats, @JsonProperty("tags") @Nullable Map tags, @JsonProperty("deletionVector") Optional deletionVector) + { + this( + path, + partitionValues, + canonicalizePartitionValues(partitionValues), + size, + modificationTime, + dataChange, + stats, + parsedStats, + tags, + deletionVector); + } + + public AddFileEntry( + String path, + Map partitionValues, + Map> canonicalPartitionValues, + long size, + long modificationTime, + boolean dataChange, + Optional stats, + Optional parsedStats, + @Nullable Map tags, + Optional deletionVector) { this.path = path; - this.partitionValues = partitionValues; - this.canonicalPartitionValues = canonicalizePartitionValues(partitionValues); + this.partitionValues = requireNonNull(partitionValues, "partitionValues is null"); + this.canonicalPartitionValues = requireNonNull(canonicalPartitionValues, "canonicalPartitionValues is null"); this.size = size; this.modificationTime = modificationTime; this.dataChange = dataChange; diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TransactionLogAccess.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TransactionLogAccess.java index f308865e90b45..5d46748db19b5 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TransactionLogAccess.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TransactionLogAccess.java @@ -236,7 +236,7 @@ public MetadataEntry getMetadataEntry(TableSnapshot tableSnapshot, ConnectorSess @Deprecated public List getActiveFiles(TableSnapshot tableSnapshot, MetadataEntry metadataEntry, ProtocolEntry protocolEntry, ConnectorSession session) { - return retrieveActiveFiles(tableSnapshot, metadataEntry, protocolEntry, TupleDomain.all(), Optional.empty(), session); + return retrieveActiveFiles(tableSnapshot, metadataEntry, protocolEntry, TupleDomain.all(), alwaysTrue(), session); } public List getActiveFiles( @@ -247,13 +247,13 @@ public List getActiveFiles( Optional> projectedColumns, ConnectorSession session) { - Optional> addStatsMinMaxColumnFilter = Optional.of(alwaysFalse()); + Predicate addStatsMinMaxColumnFilter = alwaysFalse(); if (projectedColumns.isPresent()) { Set baseColumnNames = projectedColumns.get().stream() .filter(DeltaLakeColumnHandle::isBaseColumn) // Only base column stats are supported .map(DeltaLakeColumnHandle::getColumnName) .collect(toImmutableSet()); - addStatsMinMaxColumnFilter = Optional.of(baseColumnNames::contains); + addStatsMinMaxColumnFilter = baseColumnNames::contains; } return retrieveActiveFiles(tableSnapshot, metadataEntry, protocolEntry, partitionConstraint, addStatsMinMaxColumnFilter, session); } @@ -263,7 +263,7 @@ private List retrieveActiveFiles( MetadataEntry metadataEntry, ProtocolEntry protocolEntry, TupleDomain partitionConstraint, - Optional> addStatsMinMaxColumnFilter, + Predicate addStatsMinMaxColumnFilter, ConnectorSession session) { try { @@ -299,7 +299,7 @@ private List retrieveActiveFiles( } } - List activeFiles = loadActiveFiles(tableSnapshot, metadataEntry, protocolEntry, TupleDomain.all(), Optional.of(alwaysTrue()), session); + List activeFiles = loadActiveFiles(tableSnapshot, metadataEntry, protocolEntry, TupleDomain.all(), alwaysTrue(), session); return new DeltaLakeDataFileCacheEntry(tableSnapshot.getVersion(), activeFiles); }); return cacheEntry.getActiveFiles(); @@ -314,7 +314,7 @@ private List loadActiveFiles( MetadataEntry metadataEntry, ProtocolEntry protocolEntry, TupleDomain partitionConstraint, - Optional> addStatsMinMaxColumnFilter, + Predicate addStatsMinMaxColumnFilter, ConnectorSession session) { List transactions = tableSnapshot.getTransactions(); @@ -327,7 +327,7 @@ private List loadActiveFiles( fileFormatDataSourceStats, Optional.of(new MetadataAndProtocolEntry(metadataEntry, protocolEntry)), partitionConstraint, - addStatsMinMaxColumnFilter)) { + Optional.of(addStatsMinMaxColumnFilter))) { return activeAddEntries(checkpointEntries, transactions) .filter(partitionConstraint.isAll() ? addAction -> true diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointEntryIterator.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointEntryIterator.java index 9b90eaf50930c..9e88bfd940296 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointEntryIterator.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointEntryIterator.java @@ -21,6 +21,7 @@ import io.airlift.log.Logger; import io.trino.filesystem.TrinoInputFile; import io.trino.parquet.Column; +import io.trino.parquet.Field; import io.trino.parquet.ParquetReaderOptions; import io.trino.plugin.deltalake.DeltaHiveTypeTranslator; import io.trino.plugin.deltalake.DeltaLakeColumnHandle; @@ -72,10 +73,11 @@ import java.util.function.Predicate; import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; import static com.google.common.base.Verify.verify; import static com.google.common.collect.ImmutableList.toImmutableList; -import static com.google.common.collect.ImmutableMap.toImmutableMap; import static com.google.common.collect.MoreCollectors.onlyElement; +import static com.google.common.collect.MoreCollectors.toOptional; import static io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR; import static io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_SCHEMA; import static io.trino.plugin.deltalake.DeltaLakeSplitManager.partitionMatchesPredicate; @@ -83,6 +85,7 @@ import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.isDeletionVectorEnabled; import static io.trino.plugin.deltalake.transactionlog.TransactionLogAccess.columnsWithStats; import static io.trino.plugin.deltalake.transactionlog.TransactionLogParser.START_OF_MODERN_ERA_EPOCH_DAY; +import static io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.canonicalizePartitionValues; import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.ADD; import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.COMMIT; import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.METADATA; @@ -132,19 +135,28 @@ public String getColumnName() private final String checkpointPath; private final ConnectorSession session; private final ParquetPageSource pageSource; - private final Map parquetFields; private final MapType stringMap; private final ArrayType stringList; private final Queue nextEntries; - private final List extractors; + private final List extractors; private final boolean checkpointRowStatisticsWritingEnabled; private final TupleDomain partitionConstraint; + private final Optional txnType; + private final Optional addType; + private final Optional addPartitionValuesType; + private final Optional addDeletionVectorType; + private final Optional addParsedStatsFieldType; + private final Optional removeType; + private final Optional metadataType; + private final Optional protocolType; + private final Optional commitType; + private MetadataEntry metadataEntry; private ProtocolEntry protocolEntry; + private boolean deletionVectorsEnabled; private List schema; private List columnsWithMinMaxStats; private Page page; - private long pageIndex; private int pagePosition; public CheckpointEntryIterator( @@ -171,20 +183,13 @@ public CheckpointEntryIterator( this.partitionConstraint = requireNonNull(partitionConstraint, "partitionConstraint is null"); requireNonNull(addStatsMinMaxColumnFilter, "addStatsMinMaxColumnFilter is null"); checkArgument(!fields.isEmpty(), "fields is empty"); - Map extractors = ImmutableMap.builder() - .put(TRANSACTION, this::buildTxnEntry) - .put(ADD, this::buildAddEntry) - .put(REMOVE, this::buildRemoveEntry) - .put(METADATA, this::buildMetadataEntry) - .put(PROTOCOL, this::buildProtocolEntry) - .put(COMMIT, this::buildCommitInfoEntry) - .buildOrThrow(); // ADD requires knowing the metadata in order to figure out the Parquet schema if (fields.contains(ADD)) { checkArgument(metadataEntry.isPresent(), "Metadata entry must be provided when reading ADD entries from Checkpoint files"); this.metadataEntry = metadataEntry.get(); checkArgument(protocolEntry.isPresent(), "Protocol entry must be provided when reading ADD entries from Checkpoint files"); this.protocolEntry = protocolEntry.get(); + deletionVectorsEnabled = isDeletionVectorEnabled(this.metadataEntry, this.protocolEntry); checkArgument(addStatsMinMaxColumnFilter.isPresent(), "addStatsMinMaxColumnFilter must be provided when reading ADD entries from Checkpoint files"); this.schema = extractSchema(this.metadataEntry, this.protocolEntry, typeManager); this.columnsWithMinMaxStats = columnsWithStats(schema, this.metadataEntry.getOriginalPartitionColumns()); @@ -200,6 +205,10 @@ public CheckpointEntryIterator( HiveColumnHandle column = buildColumnHandle(field, checkpointSchemaManager, this.metadataEntry, this.protocolEntry, addStatsMinMaxColumnFilter).toHiveColumnHandle(); columnsBuilder.add(column); disjunctDomainsBuilder.add(buildTupleDomainColumnHandle(field, column)); + if (field == ADD) { + Type addEntryPartitionValuesType = checkpointSchemaManager.getAddEntryPartitionValuesType(); + columnsBuilder.add(new DeltaLakeColumnHandle("add", addEntryPartitionValuesType, OptionalInt.empty(), "add", addEntryPartitionValuesType, REGULAR, Optional.empty()).toHiveColumnHandle()); + } } ReaderPageSource pageSource = ParquetPageSourceFactory.createPageSource( @@ -220,12 +229,69 @@ public CheckpointEntryIterator( this.pageSource = (ParquetPageSource) pageSource.get(); this.nextEntries = new ArrayDeque<>(); - // The size between parquetFields and extractors may not match when the requested field doesn't exist in Parquet file - this.parquetFields = this.pageSource.getColumnFields().stream() - .collect(toImmutableMap(Column::name, e -> e.field().getType())); this.extractors = fields.stream() - .map(field -> requireNonNull(extractors.get(field), "No extractor found for field " + field)) + .map(this::createCheckpointFieldExtractor) .collect(toImmutableList()); + txnType = getParquetType(fields, TRANSACTION); + addType = getAddParquetTypeContainingField(fields, "path"); + addPartitionValuesType = getAddParquetTypeContainingField(fields, "partitionValues"); + addDeletionVectorType = addType.flatMap(type -> getOptionalFieldType(type, "deletionVector")); + addParsedStatsFieldType = addType.flatMap(type -> getOptionalFieldType(type, "stats_parsed")); + removeType = getParquetType(fields, REMOVE); + metadataType = getParquetType(fields, METADATA); + protocolType = getParquetType(fields, PROTOCOL); + commitType = getParquetType(fields, COMMIT); + } + + private static Optional getOptionalFieldType(RowType type, String fieldName) + { + return type.getFields().stream() + .filter(field -> field.getName().orElseThrow().equals(fieldName)) + .collect(toOptional()) + .map(RowType.Field::getType) + .map(RowType.class::cast); + } + + private Optional getAddParquetTypeContainingField(Set fields, String fieldName) + { + return fields.contains(ADD) ? + this.pageSource.getColumnFields().stream() + .filter(column -> column.name().equals(ADD.getColumnName()) && + column.field().getType() instanceof RowType rowType && + rowType.getFields().stream().map(RowType.Field::getName).filter(Optional::isPresent).flatMap(Optional::stream).anyMatch(fieldName::equals)) + // The field even if it was requested might not exist in Parquet file + .collect(toOptional()) + .map(Column::field) + .map(Field::getType) + .map(RowType.class::cast) + : Optional.empty(); + } + + private Optional getParquetType(Set fields, EntryType field) + { + return fields.contains(field) ? getParquetType(field.getColumnName()).map(RowType.class::cast) : Optional.empty(); + } + + private Optional getParquetType(String columnName) + { + return pageSource.getColumnFields().stream() + .filter(column -> column.name().equals(columnName)) + // The field even if it was requested may not exist in Parquet file + .collect(toOptional()) + .map(Column::field) + .map(Field::getType); + } + + private CheckpointFieldExtractor createCheckpointFieldExtractor(EntryType entryType) + { + return switch (entryType) { + case TRANSACTION -> (session, pagePosition, blocks) -> buildTxnEntry(session, pagePosition, blocks[0]); + case ADD -> new AddFileEntryExtractor(); + case REMOVE -> (session, pagePosition, blocks) -> buildRemoveEntry(session, pagePosition, blocks[0]); + case METADATA -> (session, pagePosition, blocks) -> buildMetadataEntry(session, pagePosition, blocks[0]); + case PROTOCOL -> (session, pagePosition, blocks) -> buildProtocolEntry(session, pagePosition, blocks[0]); + case COMMIT -> (session, pagePosition, blocks) -> buildCommitInfoEntry(session, pagePosition, blocks[0]); + }; } private DeltaLakeColumnHandle buildColumnHandle( @@ -237,7 +303,7 @@ private DeltaLakeColumnHandle buildColumnHandle( { Type type = switch (entryType) { case TRANSACTION -> schemaManager.getTxnEntryType(); - case ADD -> schemaManager.getAddEntryType(metadataEntry, protocolEntry, addStatsMinMaxColumnFilter.orElseThrow(), true, true); + case ADD -> schemaManager.getAddEntryType(metadataEntry, protocolEntry, addStatsMinMaxColumnFilter.orElseThrow(), true, true, false); case REMOVE -> schemaManager.getRemoveEntryType(); case METADATA -> schemaManager.getMetadataEntryType(); case PROTOCOL -> schemaManager.getProtocolEntryType(true, true); @@ -314,13 +380,13 @@ private static HiveColumnHandle toPartitionValuesParsedField(HiveColumnHandle ad addColumn.getComment()); } - private DeltaLakeTransactionLogEntry buildCommitInfoEntry(ConnectorSession session, Block block, int pagePosition) + private DeltaLakeTransactionLogEntry buildCommitInfoEntry(ConnectorSession session, int pagePosition, Block block) { log.debug("Building commitInfo entry from %s pagePosition %d", block, pagePosition); if (block.isNull(pagePosition)) { return null; } - RowType type = (RowType) parquetFields.get("commitinfo"); + RowType type = commitType.orElseThrow(); int commitInfoFields = 12; int jobFields = 5; int notebookFields = 1; @@ -370,13 +436,13 @@ private DeltaLakeTransactionLogEntry buildCommitInfoEntry(ConnectorSession sessi return DeltaLakeTransactionLogEntry.commitInfoEntry(result); } - private DeltaLakeTransactionLogEntry buildProtocolEntry(ConnectorSession session, Block block, int pagePosition) + private DeltaLakeTransactionLogEntry buildProtocolEntry(ConnectorSession session, int pagePosition, Block block) { log.debug("Building protocol entry from %s pagePosition %d", block, pagePosition); if (block.isNull(pagePosition)) { return null; } - RowType type = (RowType) parquetFields.get("protocol"); + RowType type = protocolType.orElseThrow(); int minProtocolFields = 2; int maxProtocolFields = 4; SqlRow protocolEntryRow = block.getObject(pagePosition, SqlRow.class); @@ -397,13 +463,13 @@ private DeltaLakeTransactionLogEntry buildProtocolEntry(ConnectorSession session return DeltaLakeTransactionLogEntry.protocolEntry(result); } - private DeltaLakeTransactionLogEntry buildMetadataEntry(ConnectorSession session, Block block, int pagePosition) + private DeltaLakeTransactionLogEntry buildMetadataEntry(ConnectorSession session, int pagePosition, Block block) { log.debug("Building metadata entry from %s pagePosition %d", block, pagePosition); if (block.isNull(pagePosition)) { return null; } - RowType type = (RowType) parquetFields.get("metadata"); + RowType type = metadataType.orElseThrow(); int metadataFields = 8; int formatFields = 2; SqlRow metadataEntryRow = block.getObject(pagePosition, SqlRow.class); @@ -436,13 +502,13 @@ private DeltaLakeTransactionLogEntry buildMetadataEntry(ConnectorSession session return DeltaLakeTransactionLogEntry.metadataEntry(result); } - private DeltaLakeTransactionLogEntry buildRemoveEntry(ConnectorSession session, Block block, int pagePosition) + private DeltaLakeTransactionLogEntry buildRemoveEntry(ConnectorSession session, int pagePosition, Block block) { log.debug("Building remove entry from %s pagePosition %d", block, pagePosition); if (block.isNull(pagePosition)) { return null; } - RowType type = (RowType) parquetFields.get("remove"); + RowType type = removeType.orElseThrow(); int removeFields = 3; SqlRow removeEntryRow = block.getObject(pagePosition, SqlRow.class); log.debug("Block %s has %s fields", block, removeEntryRow.getFieldCount()); @@ -459,56 +525,76 @@ private DeltaLakeTransactionLogEntry buildRemoveEntry(ConnectorSession session, return DeltaLakeTransactionLogEntry.removeFileEntry(result); } - private DeltaLakeTransactionLogEntry buildAddEntry(ConnectorSession session, Block block, int pagePosition) + private class AddFileEntryExtractor + implements CheckpointFieldExtractor { - log.debug("Building add entry from %s pagePosition %d", block, pagePosition); - if (block.isNull(pagePosition)) { - return null; - } - RowType type = (RowType) parquetFields.get("add"); - boolean deletionVectorsEnabled = isDeletionVectorEnabled(metadataEntry, protocolEntry); - SqlRow addEntryRow = block.getObject(pagePosition, SqlRow.class); - log.debug("Block %s has %s fields", block, addEntryRow.getFieldCount()); - CheckpointFieldReader add = new CheckpointFieldReader(session, addEntryRow, type); - - String path = add.getString("path"); - Map partitionValues = add.getMap(stringMap, "partitionValues"); - long size = add.getLong("size"); - long modificationTime = add.getLong("modificationTime"); - boolean dataChange = add.getBoolean("dataChange"); - - Optional deletionVector = Optional.empty(); - if (deletionVectorsEnabled) { - deletionVector = Optional.ofNullable(add.getRow("deletionVector")) - .map(row -> { - RowType.Field deletionVectorField = type.getFields().stream().filter(field -> field.getName().orElseThrow().equals("deletionVector")).collect(onlyElement()); - return parseDeletionVectorFromParquet(session, row, (RowType) deletionVectorField.getType()); - }); - } - - Optional parsedStats = Optional.ofNullable(add.getRow("stats_parsed")).map(row -> { - RowType.Field parsedStatsField = type.getFields().stream().filter(field -> field.getName().orElseThrow().equals("stats_parsed")).collect(onlyElement()); - return parseStatisticsFromParquet(session, row, (RowType) parsedStatsField.getType()); - }); - Optional stats = Optional.empty(); - if (parsedStats.isEmpty()) { - stats = Optional.ofNullable(add.getString("stats")); - } - - Map tags = add.getMap(stringMap, "tags"); - AddFileEntry result = new AddFileEntry( - path, - partitionValues, - size, - modificationTime, - dataChange, - stats, - parsedStats, - tags, - deletionVector); + @Nullable + @Override + public DeltaLakeTransactionLogEntry getEntry(ConnectorSession session, int pagePosition, Block... blocks) + { + checkState(blocks.length == getRequiredChannels(), "Unexpected amount of blocks: %s", blocks.length); + Block addBlock = blocks[0]; + Block addPartitionValuesBlock = blocks[1]; + log.debug("Building add entry from %s pagePosition %d", addBlock, pagePosition); + if (addBlock.isNull(pagePosition)) { + return null; + } - log.debug("Result: %s", result); - return DeltaLakeTransactionLogEntry.addFileEntry(result); + checkState(!addPartitionValuesBlock.isNull(pagePosition), "Inconsistent blocks provided while building the add file entry"); + SqlRow addPartitionValuesRow = addPartitionValuesBlock.getObject(pagePosition, SqlRow.class); + CheckpointFieldReader addPartitionValuesReader = new CheckpointFieldReader(session, addPartitionValuesRow, addPartitionValuesType.orElseThrow()); + Map partitionValues = addPartitionValuesReader.getMap(stringMap, "partitionValues"); + Map> canonicalPartitionValues = canonicalizePartitionValues(partitionValues); + if (!partitionConstraint.isAll() && !partitionMatchesPredicate(canonicalPartitionValues, partitionConstraint.getDomains().orElseThrow())) { + return null; + } + + // Materialize from Parquet the information needed to build the AddEntry instance + addBlock = addBlock.getLoadedBlock(); + SqlRow addEntryRow = addBlock.getObject(pagePosition, SqlRow.class); + log.debug("Block %s has %s fields", addBlock, addEntryRow.getFieldCount()); + CheckpointFieldReader addReader = new CheckpointFieldReader(session, addEntryRow, addType.orElseThrow()); + + String path = addReader.getString("path"); + long size = addReader.getLong("size"); + long modificationTime = addReader.getLong("modificationTime"); + boolean dataChange = addReader.getBoolean("dataChange"); + + Optional deletionVector = Optional.empty(); + if (deletionVectorsEnabled) { + deletionVector = Optional.ofNullable(addReader.getRow("deletionVector")) + .map(row -> parseDeletionVectorFromParquet(session, row, addDeletionVectorType.orElseThrow())); + } + + Optional parsedStats = Optional.ofNullable(addReader.getRow("stats_parsed")) + .map(row -> parseStatisticsFromParquet(session, row, addParsedStatsFieldType.orElseThrow())); + Optional stats = Optional.empty(); + if (parsedStats.isEmpty()) { + stats = Optional.ofNullable(addReader.getString("stats")); + } + + Map tags = addReader.getMap(stringMap, "tags"); + AddFileEntry result = new AddFileEntry( + path, + partitionValues, + canonicalPartitionValues, + size, + modificationTime, + dataChange, + stats, + parsedStats, + tags, + deletionVector); + + log.debug("Result: %s", result); + return DeltaLakeTransactionLogEntry.addFileEntry(result); + } + + @Override + public int getRequiredChannels() + { + return 2; + } } private DeletionVectorEntry parseDeletionVectorFromParquet(ConnectorSession session, SqlRow row, RowType type) @@ -612,13 +698,13 @@ private Map parseNullCount(SqlRow row, List addStatsMinMaxColumnFilter, boolean requireWriteStatsAsJson, - boolean requireWriteStatsAsStruct) + boolean requireWriteStatsAsStruct, + boolean usePartitionValues) { List allColumns = extractSchema(metadataEntry, protocolEntry, typeManager); List minMaxColumns = columnsWithStats(metadataEntry, protocolEntry, typeManager); @@ -158,7 +159,9 @@ public RowType getAddEntryType( MapType stringMap = (MapType) typeManager.getType(TypeSignature.mapType(VARCHAR.getTypeSignature(), VARCHAR.getTypeSignature())); ImmutableList.Builder addFields = ImmutableList.builder(); addFields.add(RowType.field("path", VARCHAR)); - addFields.add(RowType.field("partitionValues", stringMap)); + if (usePartitionValues) { + addFields.add(RowType.field("partitionValues", stringMap)); + } addFields.add(RowType.field("size", BIGINT)); addFields.add(RowType.field("modificationTime", BIGINT)); addFields.add(RowType.field("dataChange", BOOLEAN)); @@ -183,6 +186,15 @@ public RowType getAddEntryType( return RowType.from(addFields.build()); } + public RowType getAddEntryPartitionValuesType() + { + ImmutableList.Builder addFields = ImmutableList.builder(); + MapType stringMap = (MapType) typeManager.getType(TypeSignature.mapType(VARCHAR.getTypeSignature(), VARCHAR.getTypeSignature())); + addFields.add(RowType.field("partitionValues", stringMap)); + + return RowType.from(addFields.build()); + } + private static RowType.Field buildNullCountType(Optional columnName, Type columnType) { if (columnType instanceof RowType rowType) { diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriter.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriter.java index 0cc8c1041431d..0ca4c2e5efdc3 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriter.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriter.java @@ -120,7 +120,8 @@ public void write(CheckpointEntries entries, TrinoOutputFile outputFile) entries.getProtocolEntry(), alwaysTrue(), writeStatsAsJson, - writeStatsAsStruct); + writeStatsAsStruct, + true); RowType removeEntryType = checkpointSchemaManager.getRemoveEntryType(); List columnNames = ImmutableList.of( diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/writer/GcsTransactionLogSynchronizer.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/writer/GcsTransactionLogSynchronizer.java index 4c373e9de2237..e2a70cb542b04 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/writer/GcsTransactionLogSynchronizer.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/writer/GcsTransactionLogSynchronizer.java @@ -20,9 +20,9 @@ import io.trino.spi.connector.ConnectorSession; import java.io.IOException; -import java.io.OutputStream; import java.io.UncheckedIOException; +import static io.airlift.slice.Slices.wrappedBuffer; import static java.util.Objects.requireNonNull; public class GcsTransactionLogSynchronizer @@ -42,8 +42,8 @@ public GcsTransactionLogSynchronizer(TrinoFileSystemFactory fileSystemFactory) public void write(ConnectorSession session, String clusterId, Location newLogEntryPath, byte[] entryContents) { TrinoFileSystem fileSystem = fileSystemFactory.create(session); - try (OutputStream outputStream = fileSystem.newOutputFile(newLogEntryPath).createExclusive()) { - outputStream.write(entryContents); + try { + fileSystem.newOutputFile(newLogEntryPath).createExclusive(wrappedBuffer(entryContents)); } catch (IOException e) { throw new UncheckedIOException(e); diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/BaseDeltaLakeConnectorSmokeTest.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/BaseDeltaLakeConnectorSmokeTest.java index 20b28c4392006..659c19caf2e9a 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/BaseDeltaLakeConnectorSmokeTest.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/BaseDeltaLakeConnectorSmokeTest.java @@ -51,6 +51,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import static com.google.common.base.Strings.repeat; import static com.google.common.base.Verify.verify; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableSet.toImmutableSet; @@ -188,12 +189,13 @@ protected QueryRunner createQueryRunner() registerTableFromResources(table.tableName(), table.resourcePath(), queryRunner); }); - queryRunner.installPlugin(new TestingHivePlugin()); + queryRunner.installPlugin(new TestingHivePlugin(queryRunner.getCoordinator().getBaseDataDir().resolve("hive_data"))); queryRunner.createCatalog( "hive", "hive", ImmutableMap.builder() + .put("hive.metastore", "thrift") .put("hive.metastore.uri", "thrift://" + hiveHadoop.getHiveMetastoreEndpoint()) .put("hive.allow-drop-table", "true") .putAll(hiveStorageConfiguration()) @@ -390,6 +392,37 @@ public void testOptimizeRewritesTable() } } + @Test + public void testOptimizeTableWithSmallFileAndLargeFiles() + { + String tableName = "test_optimize_rewrites_table_with_small_and_large_file" + randomNameSuffix(); + String tableLocation = getLocationForTable(bucketName, tableName); + assertUpdate("CREATE TABLE " + tableName + " (key integer, value varchar) WITH (location = '" + tableLocation + "')"); + try { + // Adds a small file of size < 1 kB + assertUpdate("INSERT INTO " + tableName + " VALUES (1, 'one')", 1); + // Adds other "large" files of size greater than 1 kB + assertUpdate("INSERT INTO " + tableName + " VALUES (2, '" + repeat("two", 1000) + "')", 1); + assertUpdate("INSERT INTO " + tableName + " VALUES (3, '" + repeat("three", 1000) + "')", 1); + + Set initialFiles = getActiveFiles(tableName); + assertThat(initialFiles).hasSize(3); + + for (int i = 0; i < 3; i++) { + computeActual("ALTER TABLE " + tableName + " EXECUTE OPTIMIZE (file_size_threshold => '1kB')"); + Set filesAfterOptimize = getActiveFiles(tableName); + assertThat(filesAfterOptimize) + .containsExactlyInAnyOrderElementsOf(initialFiles); + } + assertQuery( + "SELECT * FROM " + tableName, + "VALUES (1, 'one'), (2, '%s'), (3, '%s')".formatted(repeat("two", 1000), repeat("three", 1000))); + } + finally { + assertUpdate("DROP TABLE " + tableName); + } + } + @Test public void testOptimizeRewritesPartitionedTable() { diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/BaseDeltaLakeRegisterTableProcedureTest.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/BaseDeltaLakeRegisterTableProcedureTest.java index 8de5cbe0da438..a7773413ee122 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/BaseDeltaLakeRegisterTableProcedureTest.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/BaseDeltaLakeRegisterTableProcedureTest.java @@ -15,8 +15,12 @@ import com.google.common.collect.ImmutableMap; import io.trino.Session; +import io.trino.filesystem.Location; +import io.trino.filesystem.TrinoFileSystem; +import io.trino.filesystem.TrinoFileSystemFactory; import io.trino.plugin.deltalake.metastore.TestingDeltaLakeMetastoreModule; import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.spi.security.ConnectorIdentity; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.DistributedQueryRunner; import io.trino.testing.QueryRunner; @@ -24,9 +28,7 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; -import java.io.File; import java.io.IOException; -import java.net.URI; import java.nio.file.Path; import java.util.Map; import java.util.Optional; @@ -34,11 +36,11 @@ import java.util.regex.Pattern; import static com.google.common.base.Verify.verify; -import static com.google.common.io.MoreFiles.deleteDirectoryContents; import static com.google.common.io.MoreFiles.deleteRecursively; import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; import static com.google.inject.util.Modules.EMPTY_MODULE; import static io.trino.plugin.deltalake.DeltaLakeConnectorFactory.CONNECTOR_NAME; +import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.DELTA_CATALOG; import static io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.getTransactionLogDir; import static io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.getTransactionLogJsonEntryPath; import static io.trino.testing.TestingNames.randomNameSuffix; @@ -51,7 +53,6 @@ public abstract class BaseDeltaLakeRegisterTableProcedureTest extends AbstractTestQueryFramework { - protected static final String CATALOG_NAME = "delta_lake"; protected static final String SCHEMA = "test_delta_lake_register_table_" + randomNameSuffix(); private Path dataDirectory; @@ -62,7 +63,7 @@ protected QueryRunner createQueryRunner() throws Exception { Session session = testSessionBuilder() - .setCatalog(CATALOG_NAME) + .setCatalog(DELTA_CATALOG) .setSchema(SCHEMA) .build(); DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(session).build(); @@ -70,14 +71,14 @@ protected QueryRunner createQueryRunner() this.dataDirectory = queryRunner.getCoordinator().getBaseDataDir().resolve("delta_lake_data"); this.metastore = createTestMetastore(dataDirectory); - queryRunner.installPlugin(new TestingDeltaLakePlugin(Optional.of(new TestingDeltaLakeMetastoreModule(metastore)), Optional.empty(), EMPTY_MODULE)); + queryRunner.installPlugin(new TestingDeltaLakePlugin(dataDirectory, Optional.of(new TestingDeltaLakeMetastoreModule(metastore)), Optional.empty(), EMPTY_MODULE)); Map connectorProperties = ImmutableMap.builder() .put("delta.unique-table-location", "true") .put("delta.register-table-procedure.enabled", "true") .buildOrThrow(); - queryRunner.createCatalog(CATALOG_NAME, CONNECTOR_NAME, connectorProperties); + queryRunner.createCatalog(DELTA_CATALOG, CONNECTOR_NAME, connectorProperties); queryRunner.execute("CREATE SCHEMA " + SCHEMA); return queryRunner; @@ -170,7 +171,7 @@ public void testRegisterTableWithDifferentTableName() metastore.dropTable(SCHEMA, tableName, false); String tableNameNew = "test_register_table_with_different_table_name_new_" + randomNameSuffix(); - assertQuerySucceeds(format("CALL %s.system.register_table('%s', '%s', '%s')", CATALOG_NAME, SCHEMA, tableNameNew, tableLocation)); + assertQuerySucceeds(format("CALL %s.system.register_table('%s', '%s', '%s')", DELTA_CATALOG, SCHEMA, tableNameNew, tableLocation)); String showCreateTableNew = (String) computeScalar("SHOW CREATE TABLE " + tableNameNew); assertThat(showCreateTableOld).isEqualTo(showCreateTableNew.replaceFirst(tableNameNew, tableName)); @@ -183,7 +184,7 @@ public void testRegisterTableWithDifferentTableName() public void testRegisterTableWithTrailingSpaceInLocation() { String tableName = "test_register_table_with_trailing_space_" + randomNameSuffix(); - String tableLocationWithTrailingSpace = dataDirectory.toUri() + "/" + tableName + " "; + String tableLocationWithTrailingSpace = "local:///" + tableName + " "; assertQuerySucceeds(format("CREATE TABLE %s WITH (location = '%s') AS SELECT 1 AS a, 'INDIA' AS b, true AS c", tableName, tableLocationWithTrailingSpace)); assertQuery("SELECT * FROM " + tableName, "VALUES (1, 'INDIA', true)"); @@ -228,14 +229,18 @@ public void testRegisterTableWithInvalidDeltaTable() String tableNameNew = "test_register_table_with_no_transaction_log_new_" + randomNameSuffix(); // Delete files under transaction log directory and put an invalid log file to verify register_table call fails - String transactionLogDir = URI.create(getTransactionLogDir(tableLocation)).getPath(); - deleteDirectoryContents(Path.of(transactionLogDir), ALLOW_INSECURE); - new File("/" + getTransactionLogJsonEntryPath(transactionLogDir, 0).path()).createNewFile(); + DistributedQueryRunner queryRunner = (DistributedQueryRunner) getQueryRunner(); + TrinoFileSystem fileSystem = TestingDeltaLakeUtils.getConnectorService(queryRunner, TrinoFileSystemFactory.class) + .create(ConnectorIdentity.ofUser("test")); + fileSystem.deleteDirectory(Location.of(tableLocation)); + fileSystem.newOutputFile(getTransactionLogJsonEntryPath(getTransactionLogDir(tableLocation), 0)) + .create() + .close(); assertQueryFails(format("CALL system.register_table('%s', '%s', '%s')", SCHEMA, tableNameNew, tableLocation), ".*Metadata not found in transaction log for (.*)"); - deleteRecursively(Path.of(URI.create(tableLocation).getPath()), ALLOW_INSECURE); + fileSystem.deleteDirectory(Location.of(tableLocation)); metastore.dropTable(SCHEMA, tableName, false); } @@ -252,12 +257,15 @@ public void testRegisterTableWithNoTransactionLog() String tableNameNew = "test_register_table_with_no_transaction_log_new_" + randomNameSuffix(); // Delete files under transaction log directory to verify register_table call fails - deleteDirectoryContents(Path.of(URI.create(getTransactionLogDir(tableLocation)).getPath()), ALLOW_INSECURE); + DistributedQueryRunner queryRunner = (DistributedQueryRunner) getQueryRunner(); + TrinoFileSystem fileSystem = TestingDeltaLakeUtils.getConnectorService(queryRunner, TrinoFileSystemFactory.class) + .create(ConnectorIdentity.ofUser("test")); + fileSystem.deleteDirectory(Location.of(tableLocation)); assertQueryFails(format("CALL system.register_table('%s', '%s', '%s')", SCHEMA, tableNameNew, tableLocation), ".*No transaction log found in location (.*)"); - deleteRecursively(Path.of(URI.create(tableLocation).getPath()), ALLOW_INSECURE); + fileSystem.deleteDirectory(Location.of(tableLocation)); metastore.dropTable(SCHEMA, tableName, false); } @@ -363,7 +371,7 @@ private String getTableComment(String tableName) { return (String) computeScalar(format( "SELECT comment FROM system.metadata.table_comments WHERE catalog_name = '%s' AND schema_name = '%s' AND table_name = '%s'", - CATALOG_NAME, + DELTA_CATALOG, SCHEMA, tableName)); } diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/BaseDeltaLakeSharedMetastoreViewsTest.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/BaseDeltaLakeSharedMetastoreViewsTest.java index 209c8c0629ac6..d2b416aefb037 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/BaseDeltaLakeSharedMetastoreViewsTest.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/BaseDeltaLakeSharedMetastoreViewsTest.java @@ -62,13 +62,13 @@ protected QueryRunner createQueryRunner() .build(); DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(session).build(); - this.dataDirectory = queryRunner.getCoordinator().getBaseDataDir().resolve("delta_lake_data"); + this.dataDirectory = queryRunner.getCoordinator().getBaseDataDir().resolve("shared_data"); this.metastore = createTestMetastore(dataDirectory); - queryRunner.installPlugin(new TestingDeltaLakePlugin(Optional.of(new TestingDeltaLakeMetastoreModule(metastore)), Optional.empty(), EMPTY_MODULE)); + queryRunner.installPlugin(new TestingDeltaLakePlugin(dataDirectory, Optional.of(new TestingDeltaLakeMetastoreModule(metastore)), Optional.empty(), EMPTY_MODULE)); queryRunner.createCatalog(DELTA_CATALOG_NAME, "delta_lake"); - queryRunner.installPlugin(new TestingHivePlugin(metastore)); + queryRunner.installPlugin(new TestingHivePlugin(dataDirectory, metastore)); ImmutableMap hiveProperties = ImmutableMap.builder() .put("hive.allow-drop-table", "true") diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/BaseDeltaLakeTableWithCustomLocation.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/BaseDeltaLakeTableWithCustomLocation.java index 8c5f0130ad56e..f339634512297 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/BaseDeltaLakeTableWithCustomLocation.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/BaseDeltaLakeTableWithCustomLocation.java @@ -36,7 +36,6 @@ public abstract class BaseDeltaLakeTableWithCustomLocation extends AbstractTestQueryFramework { protected static final String SCHEMA = "test_tables_with_custom_location" + randomNameSuffix(); - protected static final String CATALOG_NAME = "delta_with_custom_location"; protected File metastoreDir; protected HiveMetastore metastore; diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/DeltaLakeQueryRunner.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/DeltaLakeQueryRunner.java index 552b30658d629..8afdc0eca0c11 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/DeltaLakeQueryRunner.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/DeltaLakeQueryRunner.java @@ -114,7 +114,7 @@ public DistributedQueryRunner build() queryRunner.installPlugin(new TpcdsPlugin()); queryRunner.createCatalog("tpcds", "tpcds"); - queryRunner.installPlugin(new TestingDeltaLakePlugin()); + queryRunner.installPlugin(new TestingDeltaLakePlugin(queryRunner.getCoordinator().getBaseDataDir().resolve("delta_lake_data"))); queryRunner.createCatalog(catalogName, CONNECTOR_NAME, deltaProperties.buildOrThrow()); return queryRunner; diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestCloseIdleWriters.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestCloseIdleWriters.java new file mode 100644 index 0000000000000..865bdf9d55eb2 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestCloseIdleWriters.java @@ -0,0 +1,103 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake; + +import com.google.common.collect.ImmutableMap; +import io.trino.Session; +import io.trino.testing.AbstractTestQueryFramework; +import io.trino.testing.DistributedQueryRunner; +import io.trino.testing.QueryRunner; +import org.intellij.lang.annotations.Language; +import org.junit.jupiter.api.Test; + +import java.nio.file.Files; +import java.nio.file.Path; + +import static io.trino.SystemSessionProperties.IDLE_WRITER_MIN_DATA_SIZE_THRESHOLD; +import static io.trino.SystemSessionProperties.SCALE_WRITERS; +import static io.trino.SystemSessionProperties.TASK_MAX_WRITER_COUNT; +import static io.trino.SystemSessionProperties.TASK_MIN_WRITER_COUNT; +import static io.trino.SystemSessionProperties.TASK_SCALE_WRITERS_ENABLED; +import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.DELTA_CATALOG; +import static io.trino.testing.TestingNames.randomNameSuffix; +import static org.assertj.core.api.Assertions.assertThat; + +public class TestCloseIdleWriters + extends AbstractTestQueryFramework +{ + @Override + protected QueryRunner createQueryRunner() + throws Exception + { + Path metastoreDirectory = Files.createTempDirectory(DELTA_CATALOG); + metastoreDirectory.toFile().deleteOnExit(); + DistributedQueryRunner queryRunner = DeltaLakeQueryRunner.builder() + .setCatalogName(DELTA_CATALOG) + .setNodeCount(1) + // Set the target max file size to 100GB so that we don't close writers due to file size in append + // page. + .setDeltaProperties(ImmutableMap.of( + "hive.metastore", "file", + "hive.metastore.catalog.dir", metastoreDirectory.toUri().toString(), + "delta.target-max-file-size", "100GB", + "delta.idle-writer-min-file-size", "0.1MB")) + .build(); + queryRunner.execute("CREATE SCHEMA IF NOT EXISTS tpch"); + return queryRunner; + } + + @Test + public void testCloseIdleWriters() + { + String tableName = "task_close_idle_writers_" + randomNameSuffix(); + try { + // Create a table with two partitions (0 and 1). Using the order by trick we will write the partitions in + // this order 0, 1, and then again 0. This way we are sure that during partition 1 write there will + // be an idle writer for partition 0. Additionally, during second partition 0 write, there will be an idle + // writer for partition 1. + @Language("SQL") String createTableSql = """ + CREATE TABLE %s WITH (partitioned_by = ARRAY['shipmodeVal']) + AS SELECT orderkey, partkey, suppkey, linenumber, quantity, extendedprice, + discount, tax, returnflag, linestatus, commitdate, receiptdate, shipinstruct, + comment, shipdate, + CASE + WHEN shipmode IN ('AIR', 'FOB', 'SHIP', 'TRUCK') THEN 0 + WHEN shipmode IN ('MAIL', 'RAIL', 'REG AIR') THEN 1 + ELSE 2 + END AS shipmodeVal + FROM tpch.tiny.lineitem + ORDER BY shipmode + LIMIT 60174 + """.formatted(tableName); + + // Disable all kind of scaling and set idle writer threshold to 10MB + assertUpdate( + Session.builder(getSession()) + .setSystemProperty(SCALE_WRITERS, "false") + .setSystemProperty(TASK_SCALE_WRITERS_ENABLED, "false") + .setSystemProperty(TASK_MAX_WRITER_COUNT, "1") + .setSystemProperty(TASK_MIN_WRITER_COUNT, "1") + .setSystemProperty(IDLE_WRITER_MIN_DATA_SIZE_THRESHOLD, "0.1MB") + .build(), + createTableSql, + 60174); + long files = (long) computeScalar("SELECT count(DISTINCT \"$path\") FROM " + tableName); + // There should more than 2 files since we triggered close idle writers. + assertThat(files).isGreaterThan(2); + } + finally { + assertUpdate("DROP TABLE IF EXISTS " + tableName); + } + } +} diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java index 2d74baea49f71..0d649b1ed22ef 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java @@ -309,8 +309,8 @@ private void testOptimizeWithColumnMappingMode(String columnMappingMode) // Verify optimized parquet file contains the expected physical id and name TrinoInputFile inputFile = new LocalInputFile(tableLocation.resolve(addFileEntry.getPath()).toFile()); ParquetMetadata parquetMetadata = MetadataReader.readFooter( - new TrinoParquetDataSource(inputFile, new ParquetReaderOptions(), new FileFormatDataSourceStats()), - Optional.empty()); + new TrinoParquetDataSource(inputFile, new ParquetReaderOptions(), new FileFormatDataSourceStats()), + Optional.empty()); FileMetaData fileMetaData = parquetMetadata.getFileMetaData(); PrimitiveType physicalType = getOnlyElement(fileMetaData.getSchema().getColumns().iterator()).getPrimitiveType(); assertThat(physicalType.getName()).isEqualTo(physicalName); @@ -590,23 +590,23 @@ private void testDeltaTimestampNtz(ZoneId sessionZone) assertThat(query(session, "SELECT * FROM " + tableName)) .matches(""" - VALUES - NULL, - TIMESTAMP '-9999-12-31 23:59:59.999999', - TIMESTAMP '-0001-01-01 00:00:00', - TIMESTAMP '0000-01-01 00:00:00', - TIMESTAMP '1582-10-05 00:00:00', - TIMESTAMP '1582-10-14 23:59:59.999999', - TIMESTAMP '2020-12-31 01:02:03.123456', - TIMESTAMP '9999-12-31 23:59:59.999999' - """); + VALUES + NULL, + TIMESTAMP '-9999-12-31 23:59:59.999999', + TIMESTAMP '-0001-01-01 00:00:00', + TIMESTAMP '0000-01-01 00:00:00', + TIMESTAMP '1582-10-05 00:00:00', + TIMESTAMP '1582-10-14 23:59:59.999999', + TIMESTAMP '2020-12-31 01:02:03.123456', + TIMESTAMP '9999-12-31 23:59:59.999999' + """); assertQuery( "SHOW STATS FOR " + tableName, """ - VALUES - ('x', null, null, 0.125, null, null, null), - (null, null, null, null, 8.0, null, null) - """); + VALUES + ('x', null, null, 0.125, null, null, null), + (null, null, null, null, 8.0, null, null) + """); // Verify the connector can insert into tables created by Databricks assertUpdate(session, "INSERT INTO " + tableName + " VALUES TIMESTAMP '2023-01-02 03:04:05.123456'", 1); @@ -833,11 +833,11 @@ private void testTimestampNtzPartitioned(ZoneId sessionZone) assertQuery( "SHOW STATS FOR " + tableName, """ - VALUES - ('id', null, null, 0.0, null, 1, 8), - ('part', null, 7.0, 0.125, null, null, null), - (null, null, null, null, 8.0, null, null) - """); + VALUES + ('id', null, null, 0.0, null, 1, 8), + ('part', null, 7.0, 0.125, null, null, null), + (null, null, null, null, 8.0, null, null) + """); // Verify the connector can insert into tables created by Databricks assertUpdate(session, "INSERT INTO " + tableName + " VALUES (9, TIMESTAMP '2023-01-02 03:04:05.123456')", 1); @@ -997,22 +997,22 @@ public void testStatsWithMinMaxValuesAsNulls() assertQuery( "SELECT * FROM stats_with_minmax_nulls", """ - VALUES - (0, 1), - (1, 2), - (3, 4), - (3, 7), - (NULL, NULL), - (NULL, NULL) - """); + VALUES + (0, 1), + (1, 2), + (3, 4), + (3, 7), + (NULL, NULL), + (NULL, NULL) + """); assertQuery( "SHOW STATS FOR stats_with_minmax_nulls", """ - VALUES - ('id', null, null, 0.3333333333333333, null, 0, 3), - ('id2', null, null, 0.3333333333333333, null, 1, 7), - (null, null, null, null, 6.0, null, null) - """); + VALUES + ('id', null, null, 0.3333333333333333, null, 0, 3), + ('id2', null, null, 0.3333333333333333, null, 1, 7), + (null, null, null, null, 6.0, null, null) + """); } /** diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java index 4a6306dc524f8..10213ebb2765a 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java @@ -27,6 +27,7 @@ import static io.airlift.configuration.testing.ConfigAssertions.assertRecordedDefaults; import static io.airlift.configuration.testing.ConfigAssertions.recordDefaults; import static io.airlift.units.DataSize.Unit.GIGABYTE; +import static io.airlift.units.DataSize.Unit.MEGABYTE; import static io.trino.plugin.hive.util.TestHiveUtil.nonDefaultTimeZone; import static java.util.concurrent.TimeUnit.DAYS; import static java.util.concurrent.TimeUnit.HOURS; @@ -53,7 +54,7 @@ public void testDefaults() .setMaxPartitionsPerWriter(100) .setUnsafeWritesEnabled(false) .setDefaultCheckpointWritingInterval(10) - .setCheckpointPartitionFilterEnabled(false) + .setCheckpointFilteringEnabled(false) .setCheckpointRowStatisticsWritingEnabled(true) .setVacuumMinRetention(new Duration(7, DAYS)) .setHiveCatalogName(null) @@ -66,6 +67,7 @@ public void testDefaults() .setParquetTimeZone(TimeZone.getDefault().getID()) .setPerTransactionMetastoreCacheMaximumSize(1000) .setTargetMaxFileSize(DataSize.of(1, GIGABYTE)) + .setIdleWriterMinFileSize(DataSize.of(16, MEGABYTE)) .setUniqueTableLocation(true) .setRegisterTableProcedureEnabled(false) .setProjectionPushdownEnabled(true) @@ -103,6 +105,7 @@ public void testExplicitPropertyMappings() .put("delta.delete-schema-locations-fallback", "true") .put("delta.parquet.time-zone", nonDefaultTimeZone().getID()) .put("delta.target-max-file-size", "2 GB") + .put("delta.idle-writer-min-file-size", "1MB") .put("delta.unique-table-location", "false") .put("delta.register-table-procedure.enabled", "true") .put("delta.projection-pushdown-enabled", "false") @@ -125,7 +128,7 @@ public void testExplicitPropertyMappings() .setUnsafeWritesEnabled(true) .setDefaultCheckpointWritingInterval(15) .setCheckpointRowStatisticsWritingEnabled(false) - .setCheckpointPartitionFilterEnabled(true) + .setCheckpointFilteringEnabled(true) .setVacuumMinRetention(new Duration(13, HOURS)) .setHiveCatalogName("hive") .setDynamicFilteringWaitTimeout(new Duration(30, MINUTES)) @@ -137,6 +140,7 @@ public void testExplicitPropertyMappings() .setParquetTimeZone(nonDefaultTimeZone().getID()) .setPerTransactionMetastoreCacheMaximumSize(500) .setTargetMaxFileSize(DataSize.of(2, GIGABYTE)) + .setIdleWriterMinFileSize(DataSize.of(1, MEGABYTE)) .setUniqueTableLocation(false) .setRegisterTableProcedureEnabled(true) .setProjectionPushdownEnabled(false) diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorTest.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorTest.java index fc51ec0c76394..3b0b4f0233afd 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorTest.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorTest.java @@ -22,6 +22,7 @@ import io.trino.execution.QueryInfo; import io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.ColumnMappingMode; import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.plugin.tpch.TpchPlugin; import io.trino.sql.planner.plan.FilterNode; import io.trino.sql.planner.plan.TableDeleteNode; @@ -41,7 +42,6 @@ import org.intellij.lang.annotations.Language; import org.junit.jupiter.api.Test; -import java.nio.file.Path; import java.time.ZonedDateTime; import java.util.List; import java.util.Map; @@ -60,7 +60,6 @@ import static io.trino.plugin.deltalake.DeltaLakeMetadata.CHANGE_DATA_FEED_COLUMN_NAMES; import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.DELTA_CATALOG; import static io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.TRANSACTION_LOG_DIRECTORY; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; import static io.trino.plugin.tpch.TpchMetadata.TINY_SCHEMA_NAME; import static io.trino.spi.type.TimeZoneKey.getTimeZoneKey; import static io.trino.spi.type.VarcharType.VARCHAR; @@ -89,7 +88,6 @@ public class TestDeltaLakeConnectorTest protected final String bucketName = "test-bucket-" + randomNameSuffix(); protected MinioClient minioClient; - protected HiveMetastore metastore; @Override protected QueryRunner createQueryRunner() @@ -105,8 +103,6 @@ protected QueryRunner createQueryRunner() .setSchema(SCHEMA) .build()) .build(); - Path metastoreDirectory = queryRunner.getCoordinator().getBaseDataDir().resolve("file-metastore"); - metastore = createTestingFileHiveMetastore(metastoreDirectory.toFile()); try { queryRunner.installPlugin(new TpchPlugin()); queryRunner.createCatalog("tpch", "tpch"); @@ -114,7 +110,7 @@ protected QueryRunner createQueryRunner() queryRunner.installPlugin(new DeltaLakePlugin()); queryRunner.createCatalog(DELTA_CATALOG, DeltaLakeConnectorFactory.CONNECTOR_NAME, ImmutableMap.builder() .put("hive.metastore", "file") - .put("hive.metastore.catalog.dir", metastoreDirectory.toString()) + .put("hive.metastore.catalog.dir", queryRunner.getCoordinator().getBaseDataDir().resolve("file-metastore").toString()) .put("hive.metastore.disable-location-checks", "true") .put("hive.s3.aws-access-key", MINIO_ACCESS_KEY) .put("hive.s3.aws-secret-key", MINIO_SECRET_KEY) @@ -3390,6 +3386,10 @@ public void testTrinoCacheInvalidatedOnCreateTable() assertUpdate("CREATE TABLE " + tableName + "(id, boolean, tinyint) WITH (location = '" + tableLocation + "') AS " + initialValues, 5); assertThat(query("SELECT * FROM " + tableName)).matches(initialValues); + DistributedQueryRunner queryRunner = (DistributedQueryRunner) getQueryRunner(); + HiveMetastore metastore = TestingDeltaLakeUtils.getConnectorService(queryRunner, HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); + metastore.dropTable(SCHEMA, tableName, false); for (String file : minioClient.listObjects(bucketName, SCHEMA + "/" + tableName)) { minioClient.removeObject(bucketName, file); diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java index 6470b9978c2f8..bf2467effc310 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java @@ -93,7 +93,8 @@ protected DistributedQueryRunner createQueryRunner() queryRunner.installPlugin(new TpchPlugin()); queryRunner.createCatalog("tpch", "tpch"); - queryRunner.installPlugin(new TestingDeltaLakePlugin(Optional.empty(), Optional.of(trackingFileSystemFactory), EMPTY_MODULE)); + Path dataDirectory = queryRunner.getCoordinator().getBaseDataDir().resolve("delta_lake_data"); + queryRunner.installPlugin(new TestingDeltaLakePlugin(dataDirectory, Optional.empty(), Optional.of(trackingFileSystemFactory), EMPTY_MODULE)); queryRunner.createCatalog( "delta_lake", "delta_lake", diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeMetadata.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeMetadata.java index 8b006a851dc3a..2392455a57b21 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeMetadata.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeMetadata.java @@ -475,7 +475,7 @@ public void testGetInputInfoForPartitionedTable() ImmutableList.of(BIGINT_COLUMN_1)); deltaLakeMetadata.createTable(SESSION, tableMetadata, false); DeltaLakeTableHandle tableHandle = (DeltaLakeTableHandle) deltaLakeMetadata.getTableHandle(SESSION, tableMetadata.getTable()); - assertThat(deltaLakeMetadata.getInfo(tableHandle)).isEqualTo(Optional.of(new DeltaLakeInputInfo(true))); + assertThat(deltaLakeMetadata.getInfo(tableHandle)).isEqualTo(Optional.of(new DeltaLakeInputInfo(true, 0))); deltaLakeMetadata.cleanupQuery(SESSION); } @@ -488,7 +488,7 @@ public void testGetInputInfoForUnPartitionedTable() ImmutableList.of()); deltaLakeMetadata.createTable(SESSION, tableMetadata, false); DeltaLakeTableHandle tableHandle = (DeltaLakeTableHandle) deltaLakeMetadata.getTableHandle(SESSION, tableMetadata.getTable()); - assertThat(deltaLakeMetadata.getInfo(tableHandle)).isEqualTo(Optional.of(new DeltaLakeInputInfo(false))); + assertThat(deltaLakeMetadata.getInfo(tableHandle)).isEqualTo(Optional.of(new DeltaLakeInputInfo(false, 0))); deltaLakeMetadata.cleanupQuery(SESSION); } diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeNodeLocalDynamicSplitPruning.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeNodeLocalDynamicSplitPruning.java new file mode 100644 index 0000000000000..df81d9e5a5364 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeNodeLocalDynamicSplitPruning.java @@ -0,0 +1,384 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import io.airlift.testing.TempFile; +import io.trino.filesystem.TrinoInputFile; +import io.trino.filesystem.TrinoOutputFile; +import io.trino.filesystem.hdfs.HdfsFileSystemFactory; +import io.trino.filesystem.local.LocalInputFile; +import io.trino.filesystem.local.LocalOutputFile; +import io.trino.metadata.TableHandle; +import io.trino.parquet.writer.ParquetSchemaConverter; +import io.trino.parquet.writer.ParquetWriter; +import io.trino.parquet.writer.ParquetWriterOptions; +import io.trino.plugin.deltalake.transactionlog.MetadataEntry; +import io.trino.plugin.deltalake.transactionlog.ProtocolEntry; +import io.trino.plugin.hive.FileFormatDataSourceStats; +import io.trino.plugin.hive.HiveTransactionHandle; +import io.trino.plugin.hive.parquet.ParquetReaderConfig; +import io.trino.plugin.hive.parquet.ParquetWriterConfig; +import io.trino.spi.Page; +import io.trino.spi.SplitWeight; +import io.trino.spi.block.BlockBuilder; +import io.trino.spi.connector.ColumnHandle; +import io.trino.spi.connector.ConnectorPageSource; +import io.trino.spi.connector.DynamicFilter; +import io.trino.spi.predicate.Domain; +import io.trino.spi.predicate.Range; +import io.trino.spi.predicate.TupleDomain; +import io.trino.spi.predicate.ValueSet; +import io.trino.spi.type.DecimalType; +import io.trino.spi.type.SqlDecimal; +import io.trino.testing.TestingConnectorSession; +import org.apache.parquet.format.CompressionCodec; +import org.joda.time.DateTimeZone; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.math.BigDecimal; +import java.nio.file.Files; +import java.time.LocalDate; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.OptionalInt; +import java.util.Set; +import java.util.concurrent.CompletableFuture; + +import static io.trino.plugin.deltalake.DeltaLakeColumnType.PARTITION_KEY; +import static io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR; +import static io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT; +import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_STATS; +import static io.trino.spi.type.DateType.DATE; +import static io.trino.spi.type.Decimals.writeShortDecimal; +import static io.trino.spi.type.IntegerType.INTEGER; +import static io.trino.spi.type.VarcharType.VARCHAR; +import static io.trino.testing.TestingHandles.TEST_CATALOG_HANDLE; +import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER; +import static java.util.concurrent.CompletableFuture.completedFuture; +import static org.assertj.core.api.Assertions.assertThat; + +public class TestDeltaLakeNodeLocalDynamicSplitPruning +{ + private static final ParquetReaderConfig PARQUET_READER_CONFIG = new ParquetReaderConfig(); + private static final ParquetWriterConfig PARQUET_WRITER_CONFIG = new ParquetWriterConfig(); + + @Test + public void testDynamicSplitPruningOnUnpartitionedTable() + throws IOException + { + String keyColumnName = "a_integer"; + DeltaLakeColumnHandle keyColumnHandle = new DeltaLakeColumnHandle(keyColumnName, INTEGER, OptionalInt.empty(), keyColumnName, INTEGER, REGULAR, Optional.empty()); + int keyColumnValue = 42; + String dataColumnName = "a_varchar"; + String dataColumnValue = "hello world"; + DeltaLakeColumnHandle dataColumnHandle = new DeltaLakeColumnHandle(dataColumnName, VARCHAR, OptionalInt.empty(), dataColumnName, VARCHAR, REGULAR, Optional.empty()); + ParquetSchemaConverter schemaConverter = new ParquetSchemaConverter( + ImmutableList.of(INTEGER, VARCHAR), + ImmutableList.of(keyColumnName, dataColumnName), + false, + false); + + DeltaLakeConfig deltaLakeConfig = new DeltaLakeConfig(); + HiveTransactionHandle transaction = new HiveTransactionHandle(false); + try (TempFile file = new TempFile()) { + Files.delete(file.path()); + + TrinoOutputFile outputFile = new LocalOutputFile(file.file()); + TrinoInputFile inputFile = new LocalInputFile(file.file()); + + try (ParquetWriter writer = createParquetWriter(outputFile, schemaConverter)) { + BlockBuilder keyBuilder = INTEGER.createBlockBuilder(null, 1); + INTEGER.writeLong(keyBuilder, keyColumnValue); + BlockBuilder dataBuilder = VARCHAR.createBlockBuilder(null, 1); + VARCHAR.writeString(dataBuilder, dataColumnValue); + writer.write(new Page(keyBuilder.build(), dataBuilder.build())); + } + + DeltaLakeSplit split = new DeltaLakeSplit( + inputFile.location().toString(), + 0, + inputFile.length(), + inputFile.length(), + Optional.empty(), + 0, + Optional.empty(), + SplitWeight.standard(), + TupleDomain.all(), + ImmutableMap.of()); + + MetadataEntry metadataEntry = new MetadataEntry( + "id", + "name", + "description", + new MetadataEntry.Format("provider", ImmutableMap.of()), + "{\"type\":\"struct\",\"fields\":[{\"name\":\"a_integer\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"a_varchar\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}", + ImmutableList.of(), + ImmutableMap.of(), + 0); + TableHandle tableHandle = new TableHandle( + TEST_CATALOG_HANDLE, + new DeltaLakeTableHandle( + "test_schema_name", + "unpartitioned_table", + true, + "test_location", + metadataEntry, + new ProtocolEntry(1, 2, Optional.empty(), Optional.empty()), + TupleDomain.all(), + TupleDomain.all(), + Optional.empty(), + Optional.of(Set.of(keyColumnHandle, dataColumnHandle)), + Optional.empty(), + Optional.empty(), + Optional.empty(), + 0), + transaction); + + TupleDomain splitPruningPredicate = TupleDomain.withColumnDomains( + ImmutableMap.of( + keyColumnHandle, + Domain.singleValue(INTEGER, 1L))); + try (ConnectorPageSource emptyPageSource = createTestingPageSource(transaction, deltaLakeConfig, split, tableHandle, ImmutableList.of(keyColumnHandle, dataColumnHandle), getDynamicFilter(splitPruningPredicate))) { + assertThat(emptyPageSource.getNextPage()).isNull(); + } + + TupleDomain nonSelectivePredicate = TupleDomain.withColumnDomains( + ImmutableMap.of( + keyColumnHandle, + Domain.singleValue(INTEGER, (long) keyColumnValue))); + try (ConnectorPageSource nonEmptyPageSource = createTestingPageSource(transaction, deltaLakeConfig, split, tableHandle, ImmutableList.of(keyColumnHandle, dataColumnHandle), getDynamicFilter(nonSelectivePredicate))) { + Page page = nonEmptyPageSource.getNextPage(); + assertThat(page).isNotNull(); + assertThat(page.getPositionCount()).isEqualTo(1); + assertThat(page.getBlock(0).getInt(0, 0)).isEqualTo(keyColumnValue); + assertThat(page.getBlock(1).getSlice(0, 0, page.getBlock(1).getSliceLength(0)).toStringUtf8()).isEqualTo(dataColumnValue); + } + } + } + + @Test + public void testDynamicSplitPruningWithExplicitPartitionFilter() + throws IOException + { + String dateColumnName = "date"; + DeltaLakeColumnHandle dateColumnHandle = new DeltaLakeColumnHandle(dateColumnName, DATE, OptionalInt.empty(), dateColumnName, DATE, PARTITION_KEY, Optional.empty()); + long dateColumnValue = LocalDate.of(2023, 1, 10).toEpochDay(); + String receiptColumnName = "receipt"; + DeltaLakeColumnHandle receiptColumnHandle = new DeltaLakeColumnHandle(receiptColumnName, VARCHAR, OptionalInt.empty(), receiptColumnName, VARCHAR, REGULAR, Optional.empty()); + String receiptColumnValue = "#12345"; + String amountColumnName = "amount"; + DecimalType amountColumnType = DecimalType.createDecimalType(10, 2); + DeltaLakeColumnHandle amountColumnHandle = new DeltaLakeColumnHandle(amountColumnName, amountColumnType, OptionalInt.empty(), amountColumnName, amountColumnType, REGULAR, Optional.empty()); + BigDecimal amountColumnValue = new BigDecimal("1234567.65"); + ParquetSchemaConverter schemaConverter = new ParquetSchemaConverter( + ImmutableList.of(VARCHAR, amountColumnType), + ImmutableList.of(receiptColumnName, amountColumnName), + false, + false); + + DeltaLakeConfig icebergConfig = new DeltaLakeConfig(); + HiveTransactionHandle transaction = new HiveTransactionHandle(false); + try (TempFile file = new TempFile()) { + Files.delete(file.path()); + + TrinoOutputFile outputFile = new LocalOutputFile(file.file()); + TrinoInputFile inputFile = new LocalInputFile(file.file()); + + try (ParquetWriter writer = createParquetWriter(outputFile, schemaConverter)) { + BlockBuilder receiptBuilder = VARCHAR.createBlockBuilder(null, 1); + VARCHAR.writeString(receiptBuilder, receiptColumnValue); + BlockBuilder amountBuilder = amountColumnType.createBlockBuilder(null, 1); + writeShortDecimal(amountBuilder, amountColumnValue.unscaledValue().longValueExact()); + writer.write(new Page(receiptBuilder.build(), amountBuilder.build())); + } + + DeltaLakeSplit split = new DeltaLakeSplit( + inputFile.location().toString(), + 0, + inputFile.length(), + inputFile.length(), + Optional.empty(), + 0, + Optional.empty(), + SplitWeight.standard(), + TupleDomain.all(), + ImmutableMap.of(dateColumnName, Optional.of("2023-01-10"))); + + MetadataEntry metadataEntry = new MetadataEntry( + "id", + "name", + "description", + new MetadataEntry.Format("provider", ImmutableMap.of()), + "{\"type\":\"struct\",\"fields\":[{\"name\":\"date\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}},{\"name\":\"receipt\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"amount\",\"type\":\"decimal(10,2)\",\"nullable\":true,\"metadata\":{}}]}", + ImmutableList.of(dateColumnName), + ImmutableMap.of(), + 0); + + TableHandle tableHandle = new TableHandle( + TEST_CATALOG_HANDLE, + new DeltaLakeTableHandle( + "test_schema_name", + "unpartitioned_table", + true, + "test_location", + metadataEntry, + new ProtocolEntry(1, 2, Optional.empty(), Optional.empty()), + TupleDomain.all(), + TupleDomain.all(), + Optional.empty(), + Optional.of(Set.of(dateColumnHandle, receiptColumnHandle, amountColumnHandle)), + Optional.empty(), + Optional.empty(), + Optional.empty(), + 0), + transaction); + + // Simulate situations where the dynamic filter (e.g.: while performing a JOIN with another table) reduces considerably + // the amount of data to be processed from the current table + + TupleDomain differentDatePredicate = TupleDomain.withColumnDomains( + ImmutableMap.of( + dateColumnHandle, + Domain.singleValue(DATE, LocalDate.of(2023, 2, 2).toEpochDay()))); + TupleDomain nonOverlappingDatePredicate = TupleDomain.withColumnDomains( + ImmutableMap.of( + dateColumnHandle, + Domain.create(ValueSet.ofRanges(Range.greaterThanOrEqual(DATE, LocalDate.of(2023, 2, 2).toEpochDay())), true))); + for (TupleDomain partitionPredicate : List.of(differentDatePredicate, nonOverlappingDatePredicate)) { + try (ConnectorPageSource emptyPageSource = createTestingPageSource( + transaction, + icebergConfig, + split, + tableHandle, + ImmutableList.of(dateColumnHandle, receiptColumnHandle, amountColumnHandle), + getDynamicFilter(partitionPredicate))) { + assertThat(emptyPageSource.getNextPage()).isNull(); + } + } + + TupleDomain sameDatePredicate = TupleDomain.withColumnDomains( + ImmutableMap.of( + dateColumnHandle, + Domain.singleValue(DATE, dateColumnValue))); + TupleDomain overlappingDatePredicate = TupleDomain.withColumnDomains( + ImmutableMap.of( + dateColumnHandle, + Domain.create(ValueSet.ofRanges(Range.range(DATE, LocalDate.of(2023, 1, 1).toEpochDay(), true, LocalDate.of(2023, 2, 1).toEpochDay(), false)), true))); + for (TupleDomain partitionPredicate : List.of(sameDatePredicate, overlappingDatePredicate)) { + try (ConnectorPageSource nonEmptyPageSource = createTestingPageSource( + transaction, + icebergConfig, + split, + tableHandle, + ImmutableList.of(dateColumnHandle, receiptColumnHandle, amountColumnHandle), + getDynamicFilter(partitionPredicate))) { + Page page = nonEmptyPageSource.getNextPage(); + assertThat(page).isNotNull(); + assertThat(page.getPositionCount()).isEqualTo(1); + assertThat(page.getBlock(0).getInt(0, 0)).isEqualTo(dateColumnValue); + assertThat(page.getBlock(1).getSlice(0, 0, page.getBlock(1).getSliceLength(0)).toStringUtf8()).isEqualTo(receiptColumnValue); + assertThat(((SqlDecimal) amountColumnType.getObjectValue(null, page.getBlock(2), 0)).toBigDecimal()).isEqualTo(amountColumnValue); + } + } + } + } + + private static ParquetWriter createParquetWriter(TrinoOutputFile outputFile, ParquetSchemaConverter schemaConverter) + throws IOException + { + return new ParquetWriter( + outputFile.create(), + schemaConverter.getMessageType(), + schemaConverter.getPrimitiveTypes(), + ParquetWriterOptions.builder().build(), + CompressionCodec.SNAPPY, + "test", + Optional.of(DateTimeZone.UTC), + Optional.empty()); + } + + private static ConnectorPageSource createTestingPageSource( + HiveTransactionHandle transaction, + DeltaLakeConfig deltaLakeConfig, + DeltaLakeSplit split, + TableHandle tableHandle, + List columns, + DynamicFilter dynamicFilter) + { + FileFormatDataSourceStats stats = new FileFormatDataSourceStats(); + DeltaLakePageSourceProvider provider = new DeltaLakePageSourceProvider( + new HdfsFileSystemFactory(HDFS_ENVIRONMENT, HDFS_FILE_SYSTEM_STATS), + stats, + PARQUET_READER_CONFIG, + deltaLakeConfig, + TESTING_TYPE_MANAGER); + + return provider.createPageSource( + transaction, + getSession(deltaLakeConfig), + split, + tableHandle.getConnectorHandle(), + columns, + dynamicFilter); + } + + private static TestingConnectorSession getSession(DeltaLakeConfig deltaLakeConfig) + { + return TestingConnectorSession.builder() + .setPropertyMetadata(new DeltaLakeSessionProperties(deltaLakeConfig, PARQUET_READER_CONFIG, PARQUET_WRITER_CONFIG).getSessionProperties()) + .build(); + } + + private static DynamicFilter getDynamicFilter(TupleDomain tupleDomain) + { + return new DynamicFilter() + { + @Override + public Set getColumnsCovered() + { + return tupleDomain.getDomains().map(Map::keySet) + .orElseGet(ImmutableSet::of); + } + + @Override + public CompletableFuture isBlocked() + { + return completedFuture(null); + } + + @Override + public boolean isComplete() + { + return true; + } + + @Override + public boolean isAwaitable() + { + return false; + } + + @Override + public TupleDomain getCurrentPredicate() + { + return tupleDomain; + } + }; + } +} diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakePerTransactionMetastoreCache.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakePerTransactionMetastoreCache.java index 9103e73a1f4c5..244e00944e939 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakePerTransactionMetastoreCache.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakePerTransactionMetastoreCache.java @@ -16,82 +16,36 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMultiset; import com.google.common.collect.Multiset; -import com.google.common.reflect.ClassPath; -import com.google.inject.Binder; -import com.google.inject.Key; -import io.airlift.configuration.AbstractConfigurationAwareModule; -import io.trino.Session; import io.trino.plugin.base.util.Closables; -import io.trino.plugin.hive.metastore.CountingAccessHiveMetastore; -import io.trino.plugin.hive.metastore.CountingAccessHiveMetastoreUtil; -import io.trino.plugin.hive.metastore.HiveMetastoreFactory; -import io.trino.plugin.hive.metastore.RawHiveMetastoreFactory; -import io.trino.plugin.hive.metastore.file.FileHiveMetastore; +import io.trino.plugin.hive.metastore.MetastoreMethod; import io.trino.testing.DistributedQueryRunner; -import io.trino.testing.QueryRunner; -import io.trino.tpch.TpchEntity; -import io.trino.tpch.TpchTable; import org.intellij.lang.annotations.Language; import org.junit.jupiter.api.Test; -import java.nio.file.Path; -import java.util.List; -import java.util.Optional; +import java.util.HashMap; +import java.util.Map; import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.DELTA_CATALOG; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_TABLE; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; -import static io.trino.testing.TestingNames.randomNameSuffix; -import static io.trino.testing.TestingSession.testSessionBuilder; -import static java.lang.String.format; -import static java.nio.file.Files.createDirectories; -import static java.nio.file.Files.write; -import static java.util.Objects.requireNonNull; +import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.createDeltaLakeQueryRunner; +import static io.trino.plugin.hive.metastore.MetastoreInvocations.assertMetastoreInvocationsForQuery; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_TABLE; public class TestDeltaLakePerTransactionMetastoreCache { - private CountingAccessHiveMetastore metastore; - - private DistributedQueryRunner createQueryRunner(boolean enablePerTransactionHiveMetastoreCaching) + private static DistributedQueryRunner createQueryRunner(boolean enablePerTransactionHiveMetastoreCaching) throws Exception { - Session session = testSessionBuilder() - .setCatalog(DELTA_CATALOG) - .setSchema("default") - .build(); + Map deltaLakeProperties = new HashMap<>(); + deltaLakeProperties.put("delta.register-table-procedure.enabled", "true"); + if (!enablePerTransactionHiveMetastoreCaching) { + // almost disable the cache; 0 is not allowed as config property value + deltaLakeProperties.put("delta.per-transaction-metastore-cache-maximum-size", "1"); + } - DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(session).build(); + DistributedQueryRunner queryRunner = createDeltaLakeQueryRunner(DELTA_CATALOG, ImmutableMap.of(), deltaLakeProperties); try { - FileHiveMetastore fileMetastore = createTestingFileHiveMetastore(queryRunner.getCoordinator().getBaseDataDir().resolve("file-metastore").toFile()); - metastore = new CountingAccessHiveMetastore(fileMetastore); - queryRunner.installPlugin(new TestingDeltaLakePlugin(Optional.empty(), Optional.empty(), new CountingAccessMetastoreModule(metastore))); - - ImmutableMap.Builder deltaLakeProperties = ImmutableMap.builder(); - deltaLakeProperties.put("hive.metastore", "test"); // use test value so we do not get clash with default bindings) - deltaLakeProperties.put("delta.register-table-procedure.enabled", "true"); - if (!enablePerTransactionHiveMetastoreCaching) { - // almost disable the cache; 0 is not allowed as config property value - deltaLakeProperties.put("delta.per-transaction-metastore-cache-maximum-size", "1"); - } - - queryRunner.createCatalog(DELTA_CATALOG, "delta_lake", deltaLakeProperties.buildOrThrow()); - queryRunner.execute("CREATE SCHEMA " + session.getSchema().orElseThrow()); - - for (TpchTable table : List.of(TpchTable.NATION, TpchTable.REGION)) { - String tableName = table.getTableName(); - String resourcePath = "io/trino/plugin/deltalake/testing/resources/databricks73/" + tableName + "/"; - Path tableDirectory = queryRunner.getCoordinator().getBaseDataDir().resolve("%s-%s".formatted(tableName, randomNameSuffix())); - - for (ClassPath.ResourceInfo resourceInfo : ClassPath.from(getClass().getClassLoader()).getResources()) { - if (resourceInfo.getResourceName().startsWith(resourcePath)) { - Path targetFile = tableDirectory.resolve(resourceInfo.getResourceName().substring(resourcePath.length())); - createDirectories(targetFile.getParent()); - write(targetFile, resourceInfo.asByteSource().read()); - } - } - - queryRunner.execute(format("CALL system.register_table(CURRENT_SCHEMA, '%s', '%s')", tableName, tableDirectory)); - } + queryRunner.execute("CREATE TABLE nation AS SELECT * FROM tpch.tiny.nation"); + queryRunner.execute("CREATE TABLE region AS SELECT * FROM tpch.tiny.region"); } catch (Throwable e) { Closables.closeAllSuppress(e, queryRunner); @@ -101,24 +55,6 @@ private DistributedQueryRunner createQueryRunner(boolean enablePerTransactionHiv return queryRunner; } - private static class CountingAccessMetastoreModule - extends AbstractConfigurationAwareModule - { - private final CountingAccessHiveMetastore metastore; - - public CountingAccessMetastoreModule(CountingAccessHiveMetastore metastore) - { - this.metastore = requireNonNull(metastore, "metastore is null"); - } - - @Override - protected void setup(Binder binder) - { - binder.bind(HiveMetastoreFactory.class).annotatedWith(RawHiveMetastoreFactory.class).toInstance(HiveMetastoreFactory.ofInstance(metastore)); - binder.bind(Key.get(boolean.class, AllowDeltaLakeManagedTableRename.class)).toInstance(false); - } - } - @Test public void testPerTransactionHiveMetastoreCachingEnabled() throws Exception @@ -126,7 +62,7 @@ public void testPerTransactionHiveMetastoreCachingEnabled() try (DistributedQueryRunner queryRunner = createQueryRunner(true)) { // Verify cache works; we expect only two calls to `getTable` because we have two tables in a query. assertMetastoreInvocations(queryRunner, "SELECT * FROM nation JOIN region ON nation.regionkey = region.regionkey", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLE, 2) .build()); } @@ -138,14 +74,14 @@ public void testPerTransactionHiveMetastoreCachingDisabled() { try (DistributedQueryRunner queryRunner = createQueryRunner(false)) { assertMetastoreInvocations(queryRunner, "SELECT * FROM nation JOIN region ON nation.regionkey = region.regionkey", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLE, 2) .build()); } } - private void assertMetastoreInvocations(QueryRunner queryRunner, @Language("SQL") String query, Multiset expectedInvocations) + private static void assertMetastoreInvocations(DistributedQueryRunner queryRunner, @Language("SQL") String query, Multiset expectedInvocations) { - CountingAccessHiveMetastoreUtil.assertMetastoreInvocations(metastore, queryRunner, queryRunner.getDefaultSession(), query, expectedInvocations); + assertMetastoreInvocationsForQuery(queryRunner, queryRunner.getDefaultSession(), query, expectedInvocations); } } diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakePlugin.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakePlugin.java deleted file mode 100644 index cf64442880cc0..0000000000000 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakePlugin.java +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.deltalake; - -import com.google.common.collect.ImmutableMap; -import io.airlift.bootstrap.ApplicationConfigurationException; -import io.trino.plugin.hive.HiveConfig; -import io.trino.spi.Plugin; -import io.trino.spi.connector.Connector; -import io.trino.spi.connector.ConnectorFactory; -import io.trino.testing.TestingConnectorContext; -import org.junit.jupiter.api.Test; - -import java.io.File; -import java.nio.file.Files; - -import static com.google.common.collect.Iterables.getOnlyElement; -import static org.assertj.core.api.Assertions.assertThatThrownBy; - -public class TestDeltaLakePlugin -{ - @Test - public void testCreateConnector() - { - ConnectorFactory factory = getConnectorFactory(); - factory.create( - "test", - ImmutableMap.of( - "hive.metastore.uri", "thrift://foo:1234", - "bootstrap.quiet", "true"), - new TestingConnectorContext()) - .shutdown(); - } - - @Test - public void testCreateTestingConnector() - { - Plugin plugin = new TestingDeltaLakePlugin(); - ConnectorFactory factory = getOnlyElement(plugin.getConnectorFactories()); - factory.create( - "test", - ImmutableMap.of( - "hive.metastore.uri", "thrift://foo:1234", - "bootstrap.quiet", "true"), - new TestingConnectorContext()) - .shutdown(); - } - - @Test - public void testTestingFileMetastore() - { - ConnectorFactory factory = getConnectorFactory(); - factory.create( - "test", - ImmutableMap.of( - "hive.metastore", "file", - "hive.metastore.catalog.dir", "/tmp", - "bootstrap.quiet", "true"), - new TestingConnectorContext()) - .shutdown(); - } - - @Test - public void testThriftMetastore() - { - ConnectorFactory factory = getConnectorFactory(); - factory.create( - "test", - ImmutableMap.of( - "hive.metastore", "thrift", - "hive.metastore.uri", "thrift://foo:1234", - "bootstrap.quiet", "true"), - new TestingConnectorContext()) - .shutdown(); - - assertThatThrownBy(() -> factory.create( - "test", - ImmutableMap.of( - "hive.metastore", "thrift", - "hive.metastore.uri", "thrift://foo:1234", - "delta.hide-non-delta-lake-tables", "true", - "bootstrap.quiet", "true"), - new TestingConnectorContext())) - .isInstanceOf(ApplicationConfigurationException.class) - // TODO support delta.hide-non-delta-lake-tables with thrift metastore - .hasMessageContaining("Error: Configuration property 'delta.hide-non-delta-lake-tables' was not used"); - } - - @Test - public void testGlueMetastore() - { - ConnectorFactory factory = getConnectorFactory(); - factory.create( - "test", - ImmutableMap.of( - "hive.metastore", "glue", - "hive.metastore.glue.region", "us-east-2", - "bootstrap.quiet", "true"), - new TestingConnectorContext()) - .shutdown(); - - assertThatThrownBy(() -> factory.create( - "test", - ImmutableMap.of( - "hive.metastore", "glue", - "hive.metastore.uri", "thrift://foo:1234", - "bootstrap.quiet", "true"), - new TestingConnectorContext())) - .isInstanceOf(ApplicationConfigurationException.class) - .hasMessageContaining("Error: Configuration property 'hive.metastore.uri' was not used"); - } - - @Test - public void testNoCaching() - { - ConnectorFactory factory = getConnectorFactory(); - factory.create("test", - ImmutableMap.of( - "hive.metastore.uri", "thrift://foo:1234", - "delta.metadata.cache-ttl", "0s", - "bootstrap.quiet", "true"), - new TestingConnectorContext()) - .shutdown(); - } - - @Test - public void testNoActiveDataFilesCaching() - { - ConnectorFactory factory = getConnectorFactory(); - factory.create("test", - ImmutableMap.of( - "hive.metastore.uri", "thrift://foo:1234", - "delta.metadata.live-files.cache-ttl", "0s", - "bootstrap.quiet", "true"), - new TestingConnectorContext()) - .shutdown(); - } - - @Test - public void testHiveConfigIsNotBound() - { - ConnectorFactory factory = getConnectorFactory(); - assertThatThrownBy(() -> factory.create("test", - ImmutableMap.of( - "hive.metastore.uri", "thrift://foo:1234", - // Try setting any property provided by HiveConfig class - HiveConfig.CONFIGURATION_HIVE_PARTITION_PROJECTION_ENABLED, "true", - "bootstrap.quiet", "true"), - new TestingConnectorContext())) - .hasMessageContaining("Error: Configuration property 'hive.partition-projection-enabled' was not used"); - } - - @Test - public void testReadOnlyAllAccessControl() - { - ConnectorFactory factory = getConnectorFactory(); - factory.create( - "test", - ImmutableMap.builder() - .put("hive.metastore.uri", "thrift://foo:1234") - .put("delta.security", "read-only") - .put("bootstrap.quiet", "true") - .buildOrThrow(), - new TestingConnectorContext()) - .shutdown(); - } - - @Test - public void testSystemAccessControl() - { - ConnectorFactory factory = getConnectorFactory(); - Connector connector = factory.create( - "test", - ImmutableMap.builder() - .put("hive.metastore.uri", "thrift://foo:1234") - .put("delta.security", "system") - .put("bootstrap.quiet", "true") - .buildOrThrow(), - new TestingConnectorContext()); - assertThatThrownBy(connector::getAccessControl).isInstanceOf(UnsupportedOperationException.class); - connector.shutdown(); - } - - @Test - public void testFileBasedAccessControl() - throws Exception - { - ConnectorFactory factory = getConnectorFactory(); - File tempFile = File.createTempFile("test-delta-lake-plugin-access-control", ".json"); - tempFile.deleteOnExit(); - Files.writeString(tempFile.toPath(), "{}"); - - factory.create( - "test", - ImmutableMap.builder() - .put("hive.metastore.uri", "thrift://foo:1234") - .put("delta.security", "file") - .put("security.config-file", tempFile.getAbsolutePath()) - .put("bootstrap.quiet", "true") - .buildOrThrow(), - new TestingConnectorContext()) - .shutdown(); - } - - private static ConnectorFactory getConnectorFactory() - { - return getOnlyElement(new DeltaLakePlugin().getConnectorFactories()); - } -} diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeProjectionPushdownPlans.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeProjectionPushdownPlans.java index 07448e7d968f4..ac8c42a1b250c 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeProjectionPushdownPlans.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeProjectionPushdownPlans.java @@ -19,9 +19,9 @@ import io.trino.Session; import io.trino.metadata.QualifiedObjectName; import io.trino.metadata.TableHandle; -import io.trino.plugin.deltalake.metastore.TestingDeltaLakeMetastoreModule; import io.trino.plugin.hive.metastore.Database; import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.spi.connector.ColumnHandle; import io.trino.spi.predicate.Domain; import io.trino.spi.predicate.TupleDomain; @@ -32,10 +32,10 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; -import java.io.File; import java.io.IOException; import java.io.UncheckedIOException; import java.nio.file.Files; +import java.nio.file.Path; import java.util.List; import java.util.Map; import java.util.Optional; @@ -44,8 +44,7 @@ import static com.google.common.base.Predicates.equalTo; import static com.google.common.io.MoreFiles.deleteRecursively; import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; -import static com.google.inject.util.Modules.EMPTY_MODULE; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; +import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.DELTA_CATALOG; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.sql.planner.assertions.PlanMatchPattern.any; import static io.trino.sql.planner.assertions.PlanMatchPattern.anyTree; @@ -65,25 +64,33 @@ public class TestDeltaLakeProjectionPushdownPlans extends BasePushdownPlanTest { - private static final String CATALOG = "delta"; private static final String SCHEMA = "test_schema"; - private File baseDir; + private Path baseDir; @Override protected LocalQueryRunner createLocalQueryRunner() { Session session = testSessionBuilder() - .setCatalog(CATALOG) + .setCatalog(DELTA_CATALOG) .setSchema(SCHEMA) .build(); try { - baseDir = Files.createTempDirectory("delta_lake_projection_pushdown").toFile(); + baseDir = Files.createTempDirectory("delta_lake_projection_pushdown"); } catch (IOException e) { throw new UncheckedIOException(e); } - HiveMetastore metastore = createTestingFileHiveMetastore(baseDir); + + LocalQueryRunner queryRunner = LocalQueryRunner.create(session); + queryRunner.installPlugin(new TestingDeltaLakePlugin(baseDir)); + queryRunner.createCatalog(DELTA_CATALOG, "delta_lake", ImmutableMap.builder() + .put("hive.metastore", "file") + .put("hive.metastore.catalog.dir", baseDir.toString()) + .buildOrThrow()); + + HiveMetastore metastore = TestingDeltaLakeUtils.getConnectorService(queryRunner, HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); Database database = Database.builder() .setDatabaseName(SCHEMA) .setOwnerName(Optional.of("public")) @@ -92,10 +99,6 @@ protected LocalQueryRunner createLocalQueryRunner() metastore.createDatabase(database); - LocalQueryRunner queryRunner = LocalQueryRunner.create(session); - queryRunner.installPlugin(new TestingDeltaLakePlugin(Optional.of(new TestingDeltaLakeMetastoreModule(metastore)), Optional.empty(), EMPTY_MODULE)); - queryRunner.createCatalog(CATALOG, "delta_lake", ImmutableMap.of()); - return queryRunner; } @@ -104,7 +107,7 @@ public void cleanup() throws Exception { if (baseDir != null) { - deleteRecursively(baseDir.toPath(), ALLOW_INSECURE); + deleteRecursively(baseDir, ALLOW_INSECURE); } } @@ -114,7 +117,7 @@ public void testPushdownDisabled() String testTable = "test_pushdown_disabled_" + randomNameSuffix(); Session session = Session.builder(getQueryRunner().getDefaultSession()) - .setCatalogSessionProperty(CATALOG, "projection_pushdown_enabled", "false") + .setCatalogSessionProperty(DELTA_CATALOG, "projection_pushdown_enabled", "false") .build(); getQueryRunner().execute(format( @@ -134,7 +137,7 @@ public void testPushdownDisabled() public void testDereferencePushdown() { String testTable = "test_simple_projection_pushdown" + randomNameSuffix(); - QualifiedObjectName completeTableName = new QualifiedObjectName(CATALOG, SCHEMA, testTable); + QualifiedObjectName completeTableName = new QualifiedObjectName(DELTA_CATALOG, SCHEMA, testTable); getQueryRunner().execute(format( "CREATE TABLE %s (col0, col1) WITH (partitioned_by = ARRAY['col1']) AS" + diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSharedFileMetastoreWithTableRedirections.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSharedFileMetastoreWithTableRedirections.java index 442456778740e..4db9ee5d73058 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSharedFileMetastoreWithTableRedirections.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSharedFileMetastoreWithTableRedirections.java @@ -47,7 +47,7 @@ protected QueryRunner createQueryRunner() DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(deltaLakeSession).build(); dataDirectory = queryRunner.getCoordinator().getBaseDataDir().resolve("data"); - queryRunner.installPlugin(new TestingDeltaLakePlugin()); + queryRunner.installPlugin(new TestingDeltaLakePlugin(dataDirectory)); Map deltaLakeProperties = ImmutableMap.builder() .put("hive.metastore", "file") .put("hive.metastore.catalog.dir", dataDirectory.toString()) @@ -58,7 +58,7 @@ protected QueryRunner createQueryRunner() queryRunner.createCatalog("delta_with_redirections", CONNECTOR_NAME, deltaLakeProperties); queryRunner.execute("CREATE SCHEMA " + schema); - queryRunner.installPlugin(new TestingHivePlugin()); + queryRunner.installPlugin(new TestingHivePlugin(dataDirectory)); queryRunner.createCatalog( "hive_with_redirections", diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSharedGlueMetastoreWithTableRedirections.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSharedGlueMetastoreWithTableRedirections.java index 83f43ff8940a8..9e6527a08ace1 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSharedGlueMetastoreWithTableRedirections.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSharedGlueMetastoreWithTableRedirections.java @@ -70,7 +70,7 @@ protected QueryRunner createQueryRunner() .buildOrThrow()); this.glueMetastore = createTestingGlueHiveMetastore(dataDirectory); - queryRunner.installPlugin(new TestingHivePlugin(glueMetastore)); + queryRunner.installPlugin(new TestingHivePlugin(queryRunner.getCoordinator().getBaseDataDir().resolve("hive_data"), glueMetastore)); queryRunner.createCatalog( "hive_with_redirections", "hive", diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSharedHiveMetastoreWithViews.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSharedHiveMetastoreWithViews.java index c7b62bd865e30..e84783eb94a81 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSharedHiveMetastoreWithViews.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSharedHiveMetastoreWithViews.java @@ -56,7 +56,7 @@ protected QueryRunner createQueryRunner() hiveMinioDataLake.getHiveHadoop()); queryRunner.execute("CREATE SCHEMA " + schema + " WITH (location = 's3://" + bucketName + "/" + schema + "')"); - queryRunner.installPlugin(new TestingHivePlugin()); + queryRunner.installPlugin(new TestingHivePlugin(queryRunner.getCoordinator().getBaseDataDir().resolve("hive_data"))); Map s3Properties = ImmutableMap.builder() .put("hive.s3.aws-access-key", MINIO_ACCESS_KEY) .put("hive.s3.aws-secret-key", MINIO_SECRET_KEY) @@ -67,6 +67,7 @@ protected QueryRunner createQueryRunner() "hive", "hive", ImmutableMap.builder() + .put("hive.metastore", "thrift") .put("hive.metastore.uri", "thrift://" + hiveMinioDataLake.getHiveHadoop().getHiveMetastoreEndpoint()) .put("hive.allow-drop-table", "true") .putAll(s3Properties) diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java index 20ba4c7e23436..1971f297ee58c 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java @@ -19,7 +19,9 @@ import io.airlift.json.JsonCodec; import io.airlift.json.JsonCodecFactory; import io.airlift.units.DataSize; +import io.trino.filesystem.Location; import io.trino.filesystem.hdfs.HdfsFileSystemFactory; +import io.trino.filesystem.memory.MemoryFileSystemFactory; import io.trino.plugin.deltalake.statistics.CachingExtendedStatisticsAccess; import io.trino.plugin.deltalake.statistics.ExtendedStatistics; import io.trino.plugin.deltalake.statistics.MetaDirStatisticsAccess; @@ -38,7 +40,6 @@ import io.trino.plugin.hive.HiveTransactionHandle; import io.trino.plugin.hive.NodeVersion; import io.trino.plugin.hive.metastore.HiveMetastoreFactory; -import io.trino.plugin.hive.metastore.UnimplementedHiveMetastore; import io.trino.plugin.hive.parquet.ParquetReaderConfig; import io.trino.plugin.hive.parquet.ParquetWriterConfig; import io.trino.spi.SplitWeight; @@ -63,6 +64,7 @@ import static io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT; import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_FACTORY; import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_STATS; +import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; import static org.assertj.core.api.Assertions.assertThat; public class TestDeltaLakeSplitManager @@ -208,7 +210,7 @@ public List getActiveFiles( JsonCodec.jsonCodec(LastCheckpoint.class)); DeltaLakeMetadataFactory metadataFactory = new DeltaLakeMetadataFactory( - HiveMetastoreFactory.ofInstance(new UnimplementedHiveMetastore()), + HiveMetastoreFactory.ofInstance(createTestingFileHiveMetastore(new MemoryFileSystemFactory(), Location.of("memory:///"))), hdfsFileSystemFactory, transactionLogAccess, typeManager, diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeTableWithCustomLocationUsingGlueMetastore.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeTableWithCustomLocationUsingGlueMetastore.java index 2f4f64b448e76..7d392a008ad44 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeTableWithCustomLocationUsingGlueMetastore.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeTableWithCustomLocationUsingGlueMetastore.java @@ -26,6 +26,7 @@ import static com.google.common.io.MoreFiles.deleteRecursively; import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; import static io.trino.plugin.deltalake.DeltaLakeConnectorFactory.CONNECTOR_NAME; +import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.DELTA_CATALOG; import static io.trino.plugin.hive.metastore.glue.TestingGlueHiveMetastore.createTestingGlueHiveMetastore; import static io.trino.testing.TestingSession.testSessionBuilder; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; @@ -41,7 +42,7 @@ protected QueryRunner createQueryRunner() throws Exception { Session deltaLakeSession = testSessionBuilder() - .setCatalog(CATALOG_NAME) + .setCatalog(DELTA_CATALOG) .setSchema(SCHEMA) .build(); @@ -52,7 +53,7 @@ protected QueryRunner createQueryRunner() queryRunner.installPlugin(new DeltaLakePlugin()); queryRunner.createCatalog( - CATALOG_NAME, + DELTA_CATALOG, CONNECTOR_NAME, ImmutableMap.builder() .put("hive.metastore", "glue") diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeTableWithCustomLocationUsingHiveMetastore.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeTableWithCustomLocationUsingHiveMetastore.java index eea54a4d91bcc..55d3549b25a9f 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeTableWithCustomLocationUsingHiveMetastore.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeTableWithCustomLocationUsingHiveMetastore.java @@ -14,15 +14,17 @@ package io.trino.plugin.deltalake; import io.trino.Session; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.testing.DistributedQueryRunner; import io.trino.testing.QueryRunner; import java.nio.file.Files; import java.util.HashMap; import java.util.Map; +import java.util.Optional; import static io.trino.plugin.deltalake.DeltaLakeConnectorFactory.CONNECTOR_NAME; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; +import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.DELTA_CATALOG; import static io.trino.testing.TestingSession.testSessionBuilder; public class TestDeltaLakeTableWithCustomLocationUsingHiveMetastore @@ -33,7 +35,7 @@ protected QueryRunner createQueryRunner() throws Exception { Session session = testSessionBuilder() - .setCatalog(CATALOG_NAME) + .setCatalog(DELTA_CATALOG) .setSchema(SCHEMA) .build(); @@ -42,13 +44,15 @@ protected QueryRunner createQueryRunner() Map connectorProperties = new HashMap<>(); metastoreDir = Files.createTempDirectory("test_delta_lake").toFile(); - metastore = createTestingFileHiveMetastore(metastoreDir); connectorProperties.putIfAbsent("delta.unique-table-location", "true"); connectorProperties.putIfAbsent("hive.metastore", "file"); connectorProperties.putIfAbsent("hive.metastore.catalog.dir", metastoreDir.getPath()); - queryRunner.installPlugin(new TestingDeltaLakePlugin()); - queryRunner.createCatalog(CATALOG_NAME, CONNECTOR_NAME, connectorProperties); + queryRunner.installPlugin(new TestingDeltaLakePlugin(metastoreDir.toPath())); + queryRunner.createCatalog(DELTA_CATALOG, CONNECTOR_NAME, connectorProperties); + + metastore = TestingDeltaLakeUtils.getConnectorService(queryRunner, HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); queryRunner.execute("CREATE SCHEMA " + SCHEMA); diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestPredicatePushdown.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestPredicatePushdown.java index ec95becbea822..f09ef7ed3d2fc 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestPredicatePushdown.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestPredicatePushdown.java @@ -15,21 +15,27 @@ import com.google.common.collect.ContiguousSet; import com.google.common.collect.ImmutableMap; +import io.trino.Session; +import io.trino.operator.OperatorStats; import io.trino.plugin.hive.containers.HiveMinioDataLake; import io.trino.spi.QueryId; import io.trino.testing.AbstractTestQueryFramework; +import io.trino.testing.DistributedQueryRunner; import io.trino.testing.MaterializedResult; import io.trino.testing.MaterializedResultWithQueryId; import io.trino.testing.MaterializedRow; import io.trino.testing.QueryRunner; +import org.intellij.lang.annotations.Language; import org.junit.jupiter.api.Test; import java.nio.file.Path; import java.util.OptionalLong; import java.util.Set; +import static com.google.common.collect.MoreCollectors.onlyElement; import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.DELTA_CATALOG; import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.createS3DeltaLakeQueryRunner; +import static io.trino.testing.QueryAssertions.assertEqualsIgnoreOrder; import static io.trino.testing.TestingNames.randomNameSuffix; import static java.lang.String.format; import static org.assertj.core.api.Assertions.assertThat; @@ -130,6 +136,42 @@ public void testUpdatePushdown() table)); } + @Test + public void testIgnoreParquetStatistics() + { + String table = testTable.register("ignore_parquet_statistics"); + @Language("SQL") String query = "SELECT * FROM " + table + " WHERE custkey = 1450"; + + DistributedQueryRunner queryRunner = getDistributedQueryRunner(); + MaterializedResultWithQueryId resultWithoutParquetStatistics = queryRunner.executeWithQueryId( + Session.builder(getSession()) + .setCatalogSessionProperty(getSession().getCatalog().orElseThrow(), "parquet_ignore_statistics", "true") + .build(), + query); + OperatorStats queryStatsWithoutParquetStatistics = getOperatorStats(resultWithoutParquetStatistics.getQueryId()); + assertThat(queryStatsWithoutParquetStatistics.getPhysicalInputPositions()).isGreaterThan(0); + + MaterializedResultWithQueryId resultWithParquetStatistics = queryRunner.executeWithQueryId(getSession(), query); + OperatorStats queryStatsWithParquetStatistics = getOperatorStats(resultWithParquetStatistics.getQueryId()); + assertThat(queryStatsWithParquetStatistics.getPhysicalInputPositions()).isGreaterThan(0); + assertThat(queryStatsWithParquetStatistics.getPhysicalInputPositions()) + .isLessThan(queryStatsWithoutParquetStatistics.getPhysicalInputPositions()); + + assertEqualsIgnoreOrder(resultWithParquetStatistics.getResult(), resultWithoutParquetStatistics.getResult()); + } + + private OperatorStats getOperatorStats(QueryId queryId) + { + return getDistributedQueryRunner().getCoordinator() + .getQueryManager() + .getFullQueryInfo(queryId) + .getQueryStats() + .getOperatorSummaries() + .stream() + .filter(summary -> summary.getOperatorType().startsWith("TableScan") || summary.getOperatorType().startsWith("Scan")) + .collect(onlyElement()); + } + /** * Assert on the number of rows read and updated by a read operation * diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestTransactionLogAccess.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestTransactionLogAccess.java index 0a39614c6292d..08beafc3e94d4 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestTransactionLogAccess.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestTransactionLogAccess.java @@ -33,7 +33,9 @@ import io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeFileStatistics; import io.trino.plugin.hive.FileFormatDataSourceStats; import io.trino.plugin.hive.parquet.ParquetReaderConfig; +import io.trino.plugin.hive.parquet.ParquetWriterConfig; import io.trino.spi.connector.ColumnMetadata; +import io.trino.spi.connector.ConnectorSession; import io.trino.spi.connector.SchemaTableName; import io.trino.spi.predicate.TupleDomain; import io.trino.spi.type.DateTimeEncoding; @@ -41,6 +43,7 @@ import io.trino.spi.type.IntegerType; import io.trino.spi.type.TypeManager; import io.trino.testing.TestingConnectorContext; +import io.trino.testing.TestingConnectorSession; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.parallel.Execution; @@ -206,6 +209,16 @@ public void testGetActiveAddEntries() .collect(Collectors.toSet()); assertThat(paths).isEqualTo(EXPECTED_ADD_FILE_PATHS); + ConnectorSession checkpointFilteringSession = TestingConnectorSession.builder() + .setPropertyMetadata(new DeltaLakeSessionProperties( + new DeltaLakeConfig().setCheckpointFilteringEnabled(true), + new ParquetReaderConfig(), + new ParquetWriterConfig()) + .getSessionProperties()) + .build(); + List checkpointFilteredAddFileEntries = transactionLogAccess.getActiveFiles(tableSnapshot, metadataEntry, protocolEntry, checkpointFilteringSession); + assertThat(checkpointFilteredAddFileEntries).isEqualTo(addFileEntries); + AddFileEntry addFileEntry = addFileEntries .stream() .filter(entry -> entry.getPath().equals("age=42/part-00000-b26c891a-7288-4d96-9d3b-bef648f12a34.c000.snappy.parquet")) diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestingDeltaLakePlugin.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestingDeltaLakePlugin.java index 3fc363f27b55f..f75748e2e42c1 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestingDeltaLakePlugin.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestingDeltaLakePlugin.java @@ -17,31 +17,38 @@ import com.google.inject.Module; import io.airlift.configuration.AbstractConfigurationAwareModule; import io.trino.filesystem.TrinoFileSystemFactory; +import io.trino.filesystem.local.LocalFileSystemFactory; +import io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig; import io.trino.spi.connector.Connector; import io.trino.spi.connector.ConnectorContext; import io.trino.spi.connector.ConnectorFactory; +import java.nio.file.Path; import java.util.List; import java.util.Map; import java.util.Optional; +import static com.google.inject.multibindings.MapBinder.newMapBinder; import static com.google.inject.util.Modules.EMPTY_MODULE; +import static io.airlift.configuration.ConfigBinder.configBinder; import static java.util.Objects.requireNonNull; public class TestingDeltaLakePlugin extends DeltaLakePlugin { + private final Path localFileSystemRootPath; private final Optional metastoreModule; private final Optional fileSystemFactory; private final Module additionalModule; - public TestingDeltaLakePlugin() + public TestingDeltaLakePlugin(Path localFileSystemRootPath) { - this(Optional.empty(), Optional.empty(), EMPTY_MODULE); + this(localFileSystemRootPath, Optional.empty(), Optional.empty(), EMPTY_MODULE); } - public TestingDeltaLakePlugin(Optional metastoreModule, Optional fileSystemFactory, Module additionalModule) + public TestingDeltaLakePlugin(Path localFileSystemRootPath, Optional metastoreModule, Optional fileSystemFactory, Module additionalModule) { + this.localFileSystemRootPath = requireNonNull(localFileSystemRootPath, "localFileSystemRootPath is null"); this.metastoreModule = requireNonNull(metastoreModule, "metastoreModule is null"); this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); this.additionalModule = requireNonNull(additionalModule, "additionalModule is null"); @@ -61,6 +68,7 @@ public String getName() @Override public Connector create(String catalogName, Map config, ConnectorContext context) { + localFileSystemRootPath.toFile().mkdirs(); return InternalDeltaLakeConnectorFactory.createConnector( catalogName, config, @@ -74,6 +82,9 @@ protected void setup(Binder binder) { install(additionalModule); install(new TestingDeltaLakeExtensionsModule()); + newMapBinder(binder, String.class, TrinoFileSystemFactory.class) + .addBinding("local").toInstance(new LocalFileSystemFactory(localFileSystemRootPath)); + configBinder(binder).bindConfigDefaults(FileHiveMetastoreConfig.class, config -> config.setCatalogDirectory("local:///")); } }); } diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestingDeltaLakeUtils.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestingDeltaLakeUtils.java index 251985eb1afe0..c9fe01e010d42 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestingDeltaLakeUtils.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestingDeltaLakeUtils.java @@ -20,6 +20,7 @@ import io.trino.plugin.deltalake.transactionlog.TransactionLogAccess; import io.trino.spi.connector.SchemaTableName; import io.trino.testing.DistributedQueryRunner; +import io.trino.testing.LocalQueryRunner; import java.io.IOException; import java.nio.file.Files; @@ -35,6 +36,11 @@ public final class TestingDeltaLakeUtils { private TestingDeltaLakeUtils() {} + public static T getConnectorService(LocalQueryRunner queryRunner, Class clazz) + { + return ((DeltaLakeConnector) queryRunner.getConnector(DELTA_CATALOG)).getInjector().getInstance(clazz); + } + public static T getConnectorService(DistributedQueryRunner queryRunner, Class clazz) { return ((DeltaLakeConnector) queryRunner.getCoordinator().getConnector(DELTA_CATALOG)).getInjector().getInstance(clazz); diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestDeltaLakeMetastoreAccessOperations.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestDeltaLakeMetastoreAccessOperations.java index dda0004e52361..8c8354c3603e2 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestDeltaLakeMetastoreAccessOperations.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestDeltaLakeMetastoreAccessOperations.java @@ -16,88 +16,44 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMultiset; import com.google.common.collect.Multiset; -import com.google.inject.Binder; -import com.google.inject.Key; -import io.airlift.configuration.AbstractConfigurationAwareModule; -import io.trino.Session; -import io.trino.plugin.deltalake.AllowDeltaLakeManagedTableRename; -import io.trino.plugin.deltalake.TestingDeltaLakePlugin; -import io.trino.plugin.hive.metastore.CountingAccessHiveMetastore; -import io.trino.plugin.hive.metastore.CountingAccessHiveMetastoreUtil; -import io.trino.plugin.hive.metastore.HiveMetastoreFactory; -import io.trino.plugin.hive.metastore.RawHiveMetastoreFactory; +import io.trino.plugin.hive.metastore.MetastoreMethod; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.DistributedQueryRunner; import org.intellij.lang.annotations.Language; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.parallel.Execution; -import java.io.File; -import java.util.Optional; - -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.CREATE_TABLE; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.DROP_TABLE; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_ALL_DATABASES; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_ALL_TABLES_FROM_DATABASE; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_DATABASE; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_TABLE; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; -import static io.trino.testing.TestingSession.testSessionBuilder; -import static java.util.Objects.requireNonNull; +import java.util.Map; + +import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.DELTA_CATALOG; +import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.createDeltaLakeQueryRunner; +import static io.trino.plugin.hive.metastore.MetastoreInvocations.assertMetastoreInvocationsForQuery; +import static io.trino.plugin.hive.metastore.MetastoreMethod.CREATE_TABLE; +import static io.trino.plugin.hive.metastore.MetastoreMethod.DROP_TABLE; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_ALL_DATABASES; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_DATABASE; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_TABLE; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_TABLES; import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; @Execution(SAME_THREAD) // metastore invocation counters shares mutable state so can't be run from many threads simultaneously public class TestDeltaLakeMetastoreAccessOperations extends AbstractTestQueryFramework { - private static final Session TEST_SESSION = testSessionBuilder() - .setCatalog("delta") - .setSchema("test_schema") - .build(); - - private CountingAccessHiveMetastore metastore; - @Override protected DistributedQueryRunner createQueryRunner() throws Exception { - DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(TEST_SESSION).build(); - - File baseDir = queryRunner.getCoordinator().getBaseDataDir().resolve("delta_lake").toFile(); - metastore = new CountingAccessHiveMetastore(createTestingFileHiveMetastore(baseDir)); - - queryRunner.installPlugin(new TestingDeltaLakePlugin(Optional.empty(), Optional.empty(), new CountingAccessMetastoreModule(metastore))); - ImmutableMap.Builder deltaLakeProperties = ImmutableMap.builder(); - deltaLakeProperties.put("hive.metastore", "test"); // use test value so we do not get clash with default bindings) - queryRunner.createCatalog("delta", "delta_lake", deltaLakeProperties.buildOrThrow()); - + DistributedQueryRunner queryRunner = createDeltaLakeQueryRunner(DELTA_CATALOG, ImmutableMap.of(), Map.of()); queryRunner.execute("CREATE SCHEMA test_schema"); return queryRunner; } - private static class CountingAccessMetastoreModule - extends AbstractConfigurationAwareModule - { - private final CountingAccessHiveMetastore metastore; - - public CountingAccessMetastoreModule(CountingAccessHiveMetastore metastore) - { - this.metastore = requireNonNull(metastore, "metastore is null"); - } - - @Override - protected void setup(Binder binder) - { - binder.bind(HiveMetastoreFactory.class).annotatedWith(RawHiveMetastoreFactory.class).toInstance(HiveMetastoreFactory.ofInstance(metastore)); - binder.bind(Key.get(boolean.class, AllowDeltaLakeManagedTableRename.class)).toInstance(false); - } - } - @Test public void testCreateTable() { assertMetastoreInvocations("CREATE TABLE test_create (id VARCHAR, age INT)", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(CREATE_TABLE) .add(GET_DATABASE) .add(GET_TABLE) @@ -108,7 +64,7 @@ public void testCreateTable() public void testCreateTableAsSelect() { assertMetastoreInvocations("CREATE TABLE test_ctas AS SELECT 1 AS age", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_DATABASE) .add(CREATE_TABLE) .add(GET_TABLE) @@ -121,7 +77,7 @@ public void testSelect() assertUpdate("CREATE TABLE test_select_from (id VARCHAR, age INT)"); assertMetastoreInvocations("SELECT * FROM test_select_from", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .build()); } @@ -132,7 +88,7 @@ public void testSelectWithFilter() assertUpdate("CREATE TABLE test_select_from_where AS SELECT 2 as age", 1); assertMetastoreInvocations("SELECT * FROM test_select_from_where WHERE age = 2", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .build()); } @@ -144,7 +100,7 @@ public void testSelectFromView() assertUpdate("CREATE VIEW test_select_view_view AS SELECT id, age FROM test_select_view_table"); assertMetastoreInvocations("SELECT * FROM test_select_view_view", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLE, 2) .build()); } @@ -156,7 +112,7 @@ public void testSelectFromViewWithFilter() assertUpdate("CREATE VIEW test_select_view_where_view AS SELECT age FROM test_select_view_where_table"); assertMetastoreInvocations("SELECT * FROM test_select_view_where_view WHERE age = 2", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLE, 2) .build()); } @@ -195,7 +151,7 @@ public void testJoin() assertUpdate("CREATE TABLE test_join_t2 AS SELECT 'name1' as name, 'id1' AS id", 1); assertMetastoreInvocations("SELECT name, age FROM test_join_t1 JOIN test_join_t2 ON test_join_t2.id = test_join_t1.id", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLE, 2) .build()); } @@ -206,7 +162,7 @@ public void testSelfJoin() assertUpdate("CREATE TABLE test_self_join_table AS SELECT 2 as age, 0 parent, 3 AS id", 1); assertMetastoreInvocations("SELECT child.age, parent.age FROM test_self_join_table child JOIN test_self_join_table parent ON child.parent = parent.id", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .build()); } @@ -217,7 +173,7 @@ public void testExplainSelect() assertUpdate("CREATE TABLE test_explain AS SELECT 2 as age", 1); assertMetastoreInvocations("EXPLAIN SELECT * FROM test_explain", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .build()); } @@ -228,7 +184,7 @@ public void testShowStatsForTable() assertUpdate("CREATE TABLE test_show_stats AS SELECT 2 as age", 1); assertMetastoreInvocations("SHOW STATS FOR test_show_stats", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .build()); } @@ -239,7 +195,7 @@ public void testShowStatsForTableWithFilter() assertUpdate("CREATE TABLE test_show_stats_with_filter AS SELECT 2 as age", 1); assertMetastoreInvocations("SHOW STATS FOR (SELECT * FROM test_show_stats_with_filter where age >= 2)", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .build()); } @@ -250,7 +206,7 @@ public void testDropTable() assertUpdate("CREATE TABLE test_drop_table AS SELECT 20050910 as a_number", 1); assertMetastoreInvocations("DROP TABLE test_drop_table", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .add(DROP_TABLE) .build()); @@ -260,16 +216,16 @@ public void testDropTable() public void testShowTables() { assertMetastoreInvocations("SHOW TABLES", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_DATABASES) - .add(GET_ALL_TABLES_FROM_DATABASE) + .add(GET_TABLES) .build()); } - private void assertMetastoreInvocations(@Language("SQL") String query, Multiset expectedInvocations) + private void assertMetastoreInvocations(@Language("SQL") String query, Multiset expectedInvocations) { assertUpdate("CALL system.flush_metadata_cache()"); - CountingAccessHiveMetastoreUtil.assertMetastoreInvocations(metastore, getQueryRunner(), getSession(), query, expectedInvocations); + assertMetastoreInvocationsForQuery(getDistributedQueryRunner(), getSession(), query, expectedInvocations); } } diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/glue/TestDeltaLakeConcurrentModificationGlueMetastore.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/glue/TestDeltaLakeConcurrentModificationGlueMetastore.java index 9fb30ddd5957d..04092fe237db9 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/glue/TestDeltaLakeConcurrentModificationGlueMetastore.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/glue/TestDeltaLakeConcurrentModificationGlueMetastore.java @@ -101,7 +101,7 @@ protected QueryRunner createQueryRunner() stats, table -> true); - queryRunner.installPlugin(new TestingDeltaLakePlugin(Optional.of(new TestingDeltaLakeMetastoreModule(metastore)), Optional.empty(), EMPTY_MODULE)); + queryRunner.installPlugin(new TestingDeltaLakePlugin(dataDirectory, Optional.of(new TestingDeltaLakeMetastoreModule(metastore)), Optional.empty(), EMPTY_MODULE)); queryRunner.createCatalog(CATALOG_NAME, "delta_lake"); queryRunner.execute("CREATE SCHEMA " + SCHEMA); return queryRunner; diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/glue/TestDeltaLakeViewsGlueMetastore.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/glue/TestDeltaLakeViewsGlueMetastore.java index cce31746b617a..9fee324abe606 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/glue/TestDeltaLakeViewsGlueMetastore.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/glue/TestDeltaLakeViewsGlueMetastore.java @@ -67,7 +67,7 @@ protected QueryRunner createQueryRunner() dataDirectory = queryRunner.getCoordinator().getBaseDataDir().resolve("data_delta_lake_views"); metastore = createTestMetastore(dataDirectory); - queryRunner.installPlugin(new TestingDeltaLakePlugin(Optional.of(new TestingDeltaLakeMetastoreModule(metastore)), Optional.empty(), EMPTY_MODULE)); + queryRunner.installPlugin(new TestingDeltaLakePlugin(dataDirectory, Optional.of(new TestingDeltaLakeMetastoreModule(metastore)), Optional.empty(), EMPTY_MODULE)); queryRunner.createCatalog(CATALOG_NAME, "delta_lake"); queryRunner.execute("CREATE SCHEMA " + SCHEMA); diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointEntryIterator.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointEntryIterator.java index 2a8d84a65b732..dcf83f52d74a4 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointEntryIterator.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointEntryIterator.java @@ -51,6 +51,7 @@ import java.time.LocalDate; import java.time.LocalDateTime; import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; import java.util.List; import java.util.Map; import java.util.Objects; @@ -740,6 +741,143 @@ public void testSkipAddEntriesThroughPartitionPruning() assertThat(addEntryIteratorWithRangePartitionFilter.getCompletedPositions().orElseThrow()).isEqualTo(3L); } + @Test + public void testSkipAddEntriesThroughComposedPartitionPruning() + throws IOException + { + MetadataEntry metadataEntry = new MetadataEntry( + "metadataId", + "metadataName", + "metadataDescription", + new MetadataEntry.Format( + "metadataFormatProvider", + ImmutableMap.of()), + "{\"type\":\"struct\",\"fields\":" + + "[{\"name\":\"ts\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}}," + + "{\"name\":\"part_day\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}," + + "{\"name\":\"part_hour\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}", + ImmutableList.of( + "part_day", + "part_hour"), + ImmutableMap.of(), + 1000); + ProtocolEntry protocolEntry = new ProtocolEntry(1, 1, Optional.empty(), Optional.empty()); + LocalDateTime date = LocalDateTime.of(2023, 12, 1, 0, 0); + DateTimeFormatter dayFormatter = DateTimeFormatter.ofPattern("yyyyMMdd"); + DateTimeFormatter hourFormatter = DateTimeFormatter.ofPattern("HH"); + int numAddEntries = 100; + Set addFileEntries = IntStream.rangeClosed(1, numAddEntries).mapToObj(index -> + new AddFileEntry( + "addFilePath", + ImmutableMap.of( + "part_day", date.plusHours(index).format(dayFormatter), + "part_hour", date.plusHours(index).format(hourFormatter)), + 1000, + 1001, + true, + Optional.of("{" + + "\"numRecords\":20," + + "\"minValues\":{" + + "\"ts\":\"1990-10-31T01:00:00.000Z\"" + + "}," + + "\"maxValues\":{" + + "\"ts\":\"1990-10-31T02:00:00.000Z\"" + + "}," + + "\"nullCount\":{" + + "\"ts\":1" + + "}}"), + Optional.empty(), + ImmutableMap.of(), + Optional.empty())) + .collect(toImmutableSet()); + + CheckpointEntries entries = new CheckpointEntries( + metadataEntry, + protocolEntry, + ImmutableSet.of(), + addFileEntries, + ImmutableSet.of()); + + CheckpointWriter writer = new CheckpointWriter( + TESTING_TYPE_MANAGER, + checkpointSchemaManager, + "test", + ParquetWriterOptions.builder() // approximately 2 rows per row group + .setMaxBlockSize(DataSize.ofBytes(128L)) + .setMaxPageSize(DataSize.ofBytes(128L)) + .build()); + + File targetFile = File.createTempFile("testSkipAddEntries-", ".checkpoint.parquet"); + targetFile.deleteOnExit(); + + String targetPath = "file://" + targetFile.getAbsolutePath(); + targetFile.delete(); // file must not exist when writer is called + writer.write(entries, createOutputFile(targetPath)); + + CheckpointEntryIterator metadataAndProtocolEntryIterator = createCheckpointEntryIterator( + URI.create(targetPath), + ImmutableSet.of(METADATA, PROTOCOL), + Optional.empty(), + Optional.empty(), + TupleDomain.all(), + Optional.empty()); + + DeltaLakeColumnHandle partitionDayField = new DeltaLakeColumnHandle( + "part_day", + VARCHAR, + OptionalInt.empty(), + "part_day", + VARCHAR, + REGULAR, + Optional.empty()); + DeltaLakeColumnHandle partitionHourField = new DeltaLakeColumnHandle( + "part_hour", + VARCHAR, + OptionalInt.empty(), + "part_hour", + VARCHAR, + REGULAR, + Optional.empty()); + + CheckpointEntryIterator addEntryIterator = createCheckpointEntryIterator( + URI.create(targetPath), + ImmutableSet.of(ADD), + Optional.of(metadataEntry), + Optional.of(protocolEntry), + TupleDomain.all(), + Optional.of(alwaysTrue())); + + CheckpointEntryIterator addEntryIteratorEqualityDayPartitionFilter = createCheckpointEntryIterator( + URI.create(targetPath), + ImmutableSet.of(ADD), + Optional.of(metadataEntry), + Optional.of(protocolEntry), + TupleDomain.withColumnDomains(ImmutableMap.of(partitionDayField, singleValue(VARCHAR, utf8Slice("20231202")))), + Optional.of(alwaysTrue())); + + CheckpointEntryIterator addEntryIteratorWithDayAndHourEqualityPartitionFilter = createCheckpointEntryIterator( + URI.create(targetPath), + ImmutableSet.of(ADD), + Optional.of(metadataEntry), + Optional.of(protocolEntry), + TupleDomain.withColumnDomains(ImmutableMap.of( + partitionDayField, singleValue(VARCHAR, utf8Slice("20231202")), + partitionHourField, singleValue(VARCHAR, utf8Slice("10")))), + Optional.of(alwaysTrue())); + + assertThat(Iterators.size(metadataAndProtocolEntryIterator)).isEqualTo(2); + assertThat(metadataAndProtocolEntryIterator.getCompletedPositions().orElseThrow()).isEqualTo(3L); + + assertThat(Iterators.size(addEntryIterator)).isEqualTo(numAddEntries); + assertThat(addEntryIterator.getCompletedPositions().orElseThrow()).isEqualTo(101L); + + assertThat(Iterators.size(addEntryIteratorEqualityDayPartitionFilter)).isEqualTo(24); + assertThat(addEntryIteratorEqualityDayPartitionFilter.getCompletedPositions().orElseThrow()).isEqualTo(24L); + + assertThat(Iterators.size(addEntryIteratorWithDayAndHourEqualityPartitionFilter)).isEqualTo(1); + assertThat(addEntryIteratorWithDayAndHourEqualityPartitionFilter.getCompletedPositions().orElseThrow()).isEqualTo(1L); + } + @Test public void testSkipRemoveEntries() throws IOException diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointWriter.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointWriter.java index b928ae46be54d..0ad6e8b06f355 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointWriter.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointWriter.java @@ -242,11 +242,11 @@ public void testCheckpointWriteReadParquetStatisticsRoundtrip() ProtocolEntry protocolEntry = new ProtocolEntry(10, 20, Optional.empty(), Optional.empty()); TransactionEntry transactionEntry = new TransactionEntry("appId", 1, 1001); - Block[] minMaxRowFieldBlocks = new Block[]{ + Block[] minMaxRowFieldBlocks = new Block[] { nativeValueToBlock(IntegerType.INTEGER, 1L), nativeValueToBlock(createUnboundedVarcharType(), utf8Slice("a")) }; - Block[] nullCountRowFieldBlocks = new Block[]{ + Block[] nullCountRowFieldBlocks = new Block[] { nativeValueToBlock(BigintType.BIGINT, 0L), nativeValueToBlock(BigintType.BIGINT, 15L) }; @@ -359,11 +359,11 @@ public void testDisablingRowStatistics() ImmutableMap.of(), 1000); ProtocolEntry protocolEntry = new ProtocolEntry(10, 20, Optional.empty(), Optional.empty()); - Block[] minMaxRowFieldBlocks = new Block[]{ + Block[] minMaxRowFieldBlocks = new Block[] { nativeValueToBlock(IntegerType.INTEGER, 1L), nativeValueToBlock(createUnboundedVarcharType(), utf8Slice("a")) }; - Block[] nullCountRowFieldBlocks = new Block[]{ + Block[] nullCountRowFieldBlocks = new Block[] { nativeValueToBlock(BigintType.BIGINT, 0L), nativeValueToBlock(BigintType.BIGINT, 15L) }; diff --git a/plugin/trino-druid/pom.xml b/plugin/trino-druid/pom.xml index cc686502ecb43..c04d8bd4bffcb 100644 --- a/plugin/trino-druid/pom.xml +++ b/plugin/trino-druid/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-elasticsearch/pom.xml b/plugin/trino-elasticsearch/pom.xml index 326020108295b..e4601e8391cec 100644 --- a/plugin/trino-elasticsearch/pom.xml +++ b/plugin/trino-elasticsearch/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -15,21 +15,13 @@ ${project.parent.basedir} - 6.8.23 - instances + 7.17.16 com.amazonaws aws-java-sdk-core - - - - org.apache.httpcomponents - httpclient - - @@ -115,7 +107,7 @@ org.apache.httpcomponents httpasyncclient - 4.1.2 + 4.1.4 @@ -157,18 +149,13 @@ org.apache.logging.log4j - log4j-api + log4j-core - + org.apache.lucene lucene-analyzers-common - - - org.elasticsearch - jna - @@ -182,12 +169,6 @@ org.elasticsearch elasticsearch-x-content ${dep.elasticsearch.version} - - - org.yaml - snakeyaml - - @@ -406,20 +387,6 @@ - - org.apache.maven.plugins - maven-enforcer-plugin - - - - - - org.apache.lucene:lucene-core - - - - - org.apache.maven.plugins maven-dependency-plugin diff --git a/plugin/trino-elasticsearch/src/main/java/io/trino/plugin/elasticsearch/client/BackpressureRestHighLevelClient.java b/plugin/trino-elasticsearch/src/main/java/io/trino/plugin/elasticsearch/client/BackpressureRestHighLevelClient.java index 391283144cbec..9ea38bc290192 100644 --- a/plugin/trino-elasticsearch/src/main/java/io/trino/plugin/elasticsearch/client/BackpressureRestHighLevelClient.java +++ b/plugin/trino-elasticsearch/src/main/java/io/trino/plugin/elasticsearch/client/BackpressureRestHighLevelClient.java @@ -33,6 +33,7 @@ import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.RestClientBuilder; import org.elasticsearch.client.RestHighLevelClient; +import org.elasticsearch.client.RestHighLevelClientBuilder; import org.elasticsearch.rest.RestStatus; import java.io.Closeable; @@ -58,7 +59,8 @@ public class BackpressureRestHighLevelClient public BackpressureRestHighLevelClient(RestClientBuilder restClientBuilder, ElasticsearchConfig config, TimeStat backpressureStats) { this.backpressureStats = requireNonNull(backpressureStats, "backpressureStats is null"); - delegate = new RestHighLevelClient(requireNonNull(restClientBuilder, "restClientBuilder is null")); + delegate = new RestHighLevelClientBuilder(requireNonNull(restClientBuilder, "restClientBuilder is null").build()) + .build(); backpressureRestClient = new BackpressureRestClient(delegate.getLowLevelClient(), config, backpressureStats); retryPolicy = RetryPolicy.builder() .withMaxAttempts(-1) diff --git a/plugin/trino-elasticsearch/src/main/java/io/trino/plugin/elasticsearch/client/ElasticsearchClient.java b/plugin/trino-elasticsearch/src/main/java/io/trino/plugin/elasticsearch/client/ElasticsearchClient.java index f23bb90862aef..f696192e0b2c3 100644 --- a/plugin/trino-elasticsearch/src/main/java/io/trino/plugin/elasticsearch/client/ElasticsearchClient.java +++ b/plugin/trino-elasticsearch/src/main/java/io/trino/plugin/elasticsearch/client/ElasticsearchClient.java @@ -62,7 +62,7 @@ import org.elasticsearch.client.ResponseException; import org.elasticsearch.client.RestClient; import org.elasticsearch.client.RestClientBuilder; -import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.weakref.jmx.Managed; @@ -201,8 +201,7 @@ private static BackpressureRestHighLevelClient createClient( RestClientBuilder builder = RestClient.builder( config.getHosts().stream() .map(httpHost -> new HttpHost(httpHost, config.getPort(), config.isTlsEnabled() ? "https" : "http")) - .toArray(HttpHost[]::new)) - .setMaxRetryTimeoutMillis(toIntExact(config.getMaxRetryTime().toMillis())); + .toArray(HttpHost[]::new)); builder.setHttpClientConfigCallback(ignored -> { RequestConfig requestConfig = RequestConfig.custom() @@ -225,7 +224,7 @@ private static BackpressureRestHighLevelClient createClient( buildSslContext(config.getKeystorePath(), config.getKeystorePassword(), config.getTrustStorePath(), config.getTruststorePassword()) .ifPresent(clientBuilder::setSSLContext); - if (config.isVerifyHostnames()) { + if (!config.isVerifyHostnames()) { clientBuilder.setSSLHostnameVerifier(NoopHostnameVerifier.INSTANCE); } } diff --git a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/BaseElasticsearchConnectorTest.java b/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/BaseElasticsearchConnectorTest.java index 60586c95bbf59..07abe0792fcd6 100644 --- a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/BaseElasticsearchConnectorTest.java +++ b/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/BaseElasticsearchConnectorTest.java @@ -16,7 +16,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.net.HostAndPort; import io.trino.Session; import io.trino.spi.type.VarcharType; import io.trino.sql.planner.plan.LimitNode; @@ -26,10 +25,7 @@ import io.trino.testing.QueryRunner; import io.trino.testing.TestingConnectorBehavior; import io.trino.tpch.TpchTable; -import org.apache.http.HttpHost; -import org.apache.http.entity.ContentType; -import org.apache.http.nio.entity.NStringEntity; -import org.elasticsearch.client.RestClient; +import org.elasticsearch.client.Request; import org.elasticsearch.client.RestHighLevelClient; import org.intellij.lang.annotations.Language; import org.junit.jupiter.api.AfterAll; @@ -49,6 +45,7 @@ import static io.trino.testing.TestingNames.randomNameSuffix; import static java.lang.String.format; import static java.nio.charset.StandardCharsets.UTF_8; +import static java.util.Objects.requireNonNull; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; @@ -57,28 +54,23 @@ public abstract class BaseElasticsearchConnectorTest extends BaseConnectorTest { - private final String image; private final String catalogName; - private ElasticsearchServer elasticsearch; - protected RestHighLevelClient client; + private ElasticsearchServer server; + private RestHighLevelClient client; - BaseElasticsearchConnectorTest(String image, String catalogName) + BaseElasticsearchConnectorTest(ElasticsearchServer server, String catalogName) { - this.image = image; + this.server = requireNonNull(server, "server is null"); this.catalogName = catalogName; + this.client = server.getClient(); } @Override protected QueryRunner createQueryRunner() throws Exception { - elasticsearch = new ElasticsearchServer(image, ImmutableMap.of()); - - HostAndPort address = elasticsearch.getAddress(); - client = new RestHighLevelClient(RestClient.builder(new HttpHost(address.getHost(), address.getPort()))); - return createElasticsearchQueryRunner( - elasticsearch.getAddress(), + server, TpchTable.getTables(), ImmutableMap.of(), ImmutableMap.of(), @@ -90,8 +82,8 @@ protected QueryRunner createQueryRunner() public final void destroy() throws IOException { - elasticsearch.stop(); - elasticsearch = null; + server.stop(); + server = null; client.close(); client = null; } @@ -123,7 +115,7 @@ protected boolean hasBehavior(TestingConnectorBehavior connectorBehavior) /** * This method overrides the default values used for the data provider - * of the test {@link AbstractTestQueries#testLargeIn(int)} by taking + * of the test {@link AbstractTestQueries#testLargeIn()} by taking * into account that by default Elasticsearch supports only up to `1024` * clauses in query. *

@@ -1910,53 +1902,63 @@ protected void assertTableDoesNotExist(String name) assertQueryFails("SELECT * FROM " + name, ".*Table '" + catalogName + ".tpch." + name + "' does not exist"); } - protected abstract String indexEndpoint(String index, String docId); + protected String indexEndpoint(String index, String docId) + { + return format("/%s/_doc/%s", index, docId); + } private void index(String index, Map document) throws IOException { String json = new ObjectMapper().writeValueAsString(document); String endpoint = format("%s?refresh", indexEndpoint(index, String.valueOf(System.nanoTime()))); - client.getLowLevelClient() - .performRequest("PUT", endpoint, ImmutableMap.of(), new NStringEntity(json, ContentType.APPLICATION_JSON)); + + Request request = new Request("PUT", endpoint); + request.setJsonEntity(json); + + client.getLowLevelClient().performRequest(request); } private void addAlias(String index, String alias) throws IOException { client.getLowLevelClient() - .performRequest("PUT", format("/%s/_alias/%s", index, alias)); + .performRequest(new Request("PUT", format("/%s/_alias/%s", index, alias))); refreshIndex(alias); } - protected abstract String indexMapping(@Language("JSON") String properties); + protected String indexMapping(@Language("JSON") String properties) + { + return "{\"mappings\": " + properties + "}"; + } private void createIndex(String indexName) throws IOException { - client.getLowLevelClient().performRequest("PUT", "/" + indexName); + client.getLowLevelClient().performRequest(new Request("PUT", "/" + indexName)); } private void createIndex(String indexName, @Language("JSON") String properties) throws IOException { String mappings = indexMapping(properties); - client.getLowLevelClient() - .performRequest("PUT", "/" + indexName, ImmutableMap.of(), new NStringEntity(mappings, ContentType.APPLICATION_JSON)); + + Request request = new Request("PUT", "/" + indexName); + request.setJsonEntity(mappings); + + client.getLowLevelClient().performRequest(request); } private void refreshIndex(String index) throws IOException { - client.getLowLevelClient() - .performRequest("GET", format("/%s/_refresh", index)); + client.getLowLevelClient().performRequest(new Request("GET", format("/%s/_refresh", index))); } private void deleteIndex(String indexName) throws IOException { - client.getLowLevelClient() - .performRequest("DELETE", "/" + indexName); + client.getLowLevelClient().performRequest(new Request("DELETE", "/" + indexName)); } } diff --git a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/ElasticsearchExternalQueryRunner.java b/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/ElasticsearchExternalQueryRunner.java deleted file mode 100644 index beef029ed9486..0000000000000 --- a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/ElasticsearchExternalQueryRunner.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.elasticsearch; - -import com.google.common.collect.ImmutableMap; -import com.google.common.net.HostAndPort; -import io.airlift.log.Logger; -import io.trino.testing.DistributedQueryRunner; -import io.trino.tpch.TpchTable; - -import static io.trino.plugin.elasticsearch.ElasticsearchQueryRunner.createElasticsearchQueryRunner; -import static java.lang.Integer.parseInt; - -public class ElasticsearchExternalQueryRunner -{ - private static final String HOSTNAME = System.getProperty("elasticsearch.host", "localhost"); - private static final int PORT = parseInt(System.getProperty("elasticsearch.port", "9200")); - - private ElasticsearchExternalQueryRunner() {} - - public static void main(String[] args) - throws Exception - { - // Please set hostname and port via VM options. e.g. "-Delasticsearch.host=localhost -Delasticsearch.port=9200" - // To start Elasticsearch: - // docker run -p 9200:9200 -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:7.6.2 - DistributedQueryRunner queryRunner = createElasticsearchQueryRunner( - HostAndPort.fromParts(HOSTNAME, PORT), - TpchTable.getTables(), - ImmutableMap.of("http-server.http.port", "8080"), - ImmutableMap.of(), - 3); - - Logger log = Logger.get(ElasticsearchExternalQueryRunner.class); - log.info("======== SERVER STARTED ========"); - log.info("\n====\n%s\n====", queryRunner.getCoordinator().getBaseUrl()); - } -} diff --git a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/ElasticsearchLoader.java b/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/ElasticsearchLoader.java index 35fdeb39f4831..af01e19f78fca 100644 --- a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/ElasticsearchLoader.java +++ b/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/ElasticsearchLoader.java @@ -25,8 +25,9 @@ import org.elasticsearch.action.bulk.BulkRequest; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.support.WriteRequest; +import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.RestHighLevelClient; -import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; import java.io.UncheckedIOException; @@ -42,7 +43,7 @@ import static io.trino.spi.type.DoubleType.DOUBLE; import static io.trino.spi.type.IntegerType.INTEGER; import static java.util.Objects.requireNonNull; -import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; +import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; public class ElasticsearchLoader extends AbstractTestingTrinoClient @@ -109,7 +110,7 @@ public void addResults(QueryStatusInfo statusInfo, QueryData data) request.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); try { - client.bulk(request); + client.bulk(request, RequestOptions.DEFAULT); } catch (IOException e) { throw new RuntimeException(e); diff --git a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/ElasticsearchNginxProxy.java b/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/ElasticsearchNginxProxy.java deleted file mode 100644 index 298edeabaac77..0000000000000 --- a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/ElasticsearchNginxProxy.java +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.elasticsearch; - -import com.google.common.net.HostAndPort; -import org.testcontainers.containers.Network; -import org.testcontainers.containers.NginxContainer; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; - -import static com.google.common.io.MoreFiles.deleteRecursively; -import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.testcontainers.utility.MountableFile.forHostPath; - -public class ElasticsearchNginxProxy -{ - private static final int PROXY_PORT = 9201; - private static final String NGINX_CONFIG_TEMPLATE = - "limit_req_zone $binary_remote_addr zone=mylimit:1m rate=REQUEST_PER_SECONDr/s;\n" + - "upstream elasticsearch {\n" + - " server elasticsearch-server:9200;\n" + - " keepalive 15;\n" + - "}\n" + - "server {\n" + - " access_log /var/log/nginx/access.log main;" + - " listen " + PROXY_PORT + ";\n" + - " location / {\n" + - " proxy_pass http://elasticsearch;\n" + - " proxy_redirect http://elasticsearch /;\n" + - " proxy_buffering off;\n" + - " proxy_http_version 1.1;\n" + - " proxy_set_header Connection \"Keep-Alive\";\n" + - " proxy_set_header Proxy-Connection \"Keep-Alive\";\n" + - " client_max_body_size 0;\n" + - " }\n" + - " location /_search/scroll {\n" + - " limit_req zone=mylimit;\n" + - " limit_req_status 429;\n" + - " proxy_pass http://elasticsearch;\n" + - " proxy_redirect http://elasticsearch /;\n" + - " proxy_buffering off;\n" + - " proxy_http_version 1.1;\n" + - " proxy_set_header Connection \"Keep-Alive\";\n" + - " proxy_set_header Proxy-Connection \"Keep-Alive\";\n" + - " client_max_body_size 0;\n" + - " }\n" + - " location ~ /.*/_search$ {\n" + - " limit_req zone=mylimit;\n" + - " limit_req_status 429;\n" + - " proxy_pass http://elasticsearch;\n" + - " proxy_redirect http://elasticsearch /;\n" + - " proxy_buffering off;\n" + - " proxy_http_version 1.1;\n" + - " proxy_set_header Connection \"Keep-Alive\";\n" + - " proxy_set_header Proxy-Connection \"Keep-Alive\";\n" + - " client_max_body_size 0;\n" + - " }\n" + - "}\n"; - - private final Path configurationPath; - private final NginxContainer container; - - public ElasticsearchNginxProxy(Network network, int requestsPerSecond) - throws IOException - { - container = new NginxContainer<>("nginx:1.19.8"); - container.withNetwork(network); - container.withNetworkAliases("elasticsearch-proxy"); - // Create the Nginx configuration file on host and copy it into a predefined path the container - configurationPath = Files.createTempDirectory("elasticsearchProxy"); - Path path = configurationPath.resolve("elasticsearch.conf"); - Files.writeString(path, NGINX_CONFIG_TEMPLATE.replace("REQUEST_PER_SECOND", String.valueOf(requestsPerSecond)), UTF_8); - container.withCopyFileToContainer(forHostPath(path), "/etc/nginx/conf.d/elasticsearch.conf"); - container.addExposedPort(PROXY_PORT); - container.start(); - } - - public void stop() - throws IOException - { - container.close(); - deleteRecursively(configurationPath, ALLOW_INSECURE); - } - - public HostAndPort getAddress() - { - return HostAndPort.fromString(container.getHost() + ":" + container.getMappedPort(PROXY_PORT)); - } -} diff --git a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/ElasticsearchQueryRunner.java b/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/ElasticsearchQueryRunner.java index 73be8fd317517..e560cff86c180 100644 --- a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/ElasticsearchQueryRunner.java +++ b/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/ElasticsearchQueryRunner.java @@ -14,7 +14,6 @@ package io.trino.plugin.elasticsearch; import com.google.common.collect.ImmutableMap; -import com.google.common.net.HostAndPort; import io.airlift.log.Level; import io.airlift.log.Logger; import io.airlift.log.Logging; @@ -25,14 +24,19 @@ import io.trino.testing.QueryRunner; import io.trino.testing.TestingTrinoClient; import io.trino.tpch.TpchTable; -import org.apache.http.HttpHost; -import org.elasticsearch.client.RestClient; import org.elasticsearch.client.RestHighLevelClient; +import javax.net.ssl.SSLContext; + +import java.io.File; +import java.net.URISyntaxException; import java.util.Map; +import java.util.Optional; +import static com.google.common.io.Resources.getResource; import static io.airlift.testing.Closeables.closeAllSuppress; import static io.airlift.units.Duration.nanosSince; +import static io.trino.plugin.base.ssl.SslUtils.createSSLContext; import static io.trino.plugin.elasticsearch.ElasticsearchServer.ELASTICSEARCH_7_IMAGE; import static io.trino.plugin.tpch.TpchMetadata.TINY_SCHEMA_NAME; import static io.trino.testing.TestingSession.testSessionBuilder; @@ -42,6 +46,9 @@ public final class ElasticsearchQueryRunner { + public static final String USER = "elastic_user"; + public static final String PASSWORD = "123456"; + static { Logging logging = Logging.initialize(); logging.setLevel("org.elasticsearch.client.RestClient", Level.OFF); @@ -53,18 +60,18 @@ private ElasticsearchQueryRunner() {} private static final String TPCH_SCHEMA = "tpch"; public static DistributedQueryRunner createElasticsearchQueryRunner( - HostAndPort address, + ElasticsearchServer server, Iterable> tables, Map extraProperties, Map extraConnectorProperties, int nodeCount) throws Exception { - return createElasticsearchQueryRunner(address, tables, extraProperties, extraConnectorProperties, nodeCount, "elasticsearch"); + return createElasticsearchQueryRunner(server, tables, extraProperties, extraConnectorProperties, nodeCount, "elasticsearch"); } public static DistributedQueryRunner createElasticsearchQueryRunner( - HostAndPort address, + ElasticsearchServer server, Iterable> tables, Map extraProperties, Map extraConnectorProperties, @@ -91,13 +98,14 @@ public static DistributedQueryRunner createElasticsearchQueryRunner( ElasticsearchConnectorFactory testFactory = new ElasticsearchConnectorFactory(); - installElasticsearchPlugin(address, queryRunner, catalogName, testFactory, extraConnectorProperties); + installElasticsearchPlugin(server, queryRunner, catalogName, testFactory, extraConnectorProperties); TestingTrinoClient trinoClient = queryRunner.getClient(); LOG.info("Loading data..."); - client = new RestHighLevelClient(RestClient.builder(HttpHost.create(address.toString()))); + client = server.getClient(); + long startTime = System.nanoTime(); for (TpchTable table : tables) { loadTpchTopic(client, trinoClient, table); @@ -112,17 +120,32 @@ public static DistributedQueryRunner createElasticsearchQueryRunner( } } + public static SSLContext getSSLContext() + { + try { + return createSSLContext( + Optional.empty(), + Optional.empty(), + Optional.of(new File(getResource("truststore.jks").toURI())), + Optional.of("123456")); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } + private static void installElasticsearchPlugin( - HostAndPort address, + ElasticsearchServer server, QueryRunner queryRunner, String catalogName, ElasticsearchConnectorFactory factory, Map extraConnectorProperties) + throws URISyntaxException { queryRunner.installPlugin(new ElasticsearchPlugin(factory)); - Map config = ImmutableMap.builder() - .put("elasticsearch.host", address.getHost()) - .put("elasticsearch.port", Integer.toString(address.getPort())) + ImmutableMap.Builder builder = ImmutableMap.builder() + .put("elasticsearch.host", server.getAddress().getHost()) + .put("elasticsearch.port", Integer.toString(server.getAddress().getPort())) // Node discovery relies on the publish_address exposed via the Elasticseach API // This doesn't work well within a docker environment that maps ES's port to a random public port .put("elasticsearch.ignore-publish-address", "true") @@ -130,7 +153,15 @@ private static void installElasticsearchPlugin( .put("elasticsearch.scroll-size", "1000") .put("elasticsearch.scroll-timeout", "1m") .put("elasticsearch.request-timeout", "2m") - .putAll(extraConnectorProperties) + .put("elasticsearch.tls.enabled", "true") + .put("elasticsearch.tls.truststore-path", new File(getResource("truststore.jks").toURI()).getPath()) + .put("elasticsearch.tls.truststore-password", "123456") + .put("elasticsearch.tls.verify-hostnames", "false") + .put("elasticsearch.security", "PASSWORD") + .put("elasticsearch.auth.user", USER) + .put("elasticsearch.auth.password", PASSWORD); + + Map config = builder.putAll(extraConnectorProperties) .buildOrThrow(); queryRunner.createCatalog(catalogName, "elasticsearch", config); @@ -149,7 +180,7 @@ public static void main(String[] args) throws Exception { DistributedQueryRunner queryRunner = createElasticsearchQueryRunner( - new ElasticsearchServer(ELASTICSEARCH_7_IMAGE, ImmutableMap.of()).getAddress(), + new ElasticsearchServer(ELASTICSEARCH_7_IMAGE), TpchTable.getTables(), ImmutableMap.of("http-server.http.port", "8080"), ImmutableMap.of(), diff --git a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/ElasticsearchServer.java b/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/ElasticsearchServer.java index 8f3da20229d14..5a4423901a872 100644 --- a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/ElasticsearchServer.java +++ b/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/ElasticsearchServer.java @@ -13,46 +13,72 @@ */ package io.trino.plugin.elasticsearch; +import com.amazonaws.util.Base64; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.io.Resources; import com.google.common.net.HostAndPort; import io.trino.testing.ResourcePresence; +import org.apache.http.HttpHost; +import org.apache.http.impl.nio.client.HttpAsyncClientBuilder; +import org.apache.http.message.BasicHeader; +import org.elasticsearch.client.RestClient; +import org.elasticsearch.client.RestHighLevelClient; +import org.elasticsearch.client.RestHighLevelClientBuilder; import org.testcontainers.containers.Network; import org.testcontainers.elasticsearch.ElasticsearchContainer; import org.testcontainers.utility.DockerImageName; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.Map; import static com.google.common.io.MoreFiles.deleteRecursively; import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; +import static com.google.common.io.Resources.getResource; +import static io.trino.plugin.elasticsearch.ElasticsearchQueryRunner.PASSWORD; +import static io.trino.plugin.elasticsearch.ElasticsearchQueryRunner.USER; +import static io.trino.plugin.elasticsearch.ElasticsearchQueryRunner.getSSLContext; +import static java.lang.String.format; import static java.nio.charset.StandardCharsets.UTF_8; import static java.nio.file.Files.createTempDirectory; import static org.testcontainers.utility.MountableFile.forHostPath; public class ElasticsearchServer { - public static final String ELASTICSEARCH_7_IMAGE = "elasticsearch:7.0.0"; + public static final String ELASTICSEARCH_7_IMAGE = "elasticsearch:7.16.2"; + public static final String ELASTICSEARCH_8_IMAGE = "elasticsearch:8.11.3"; private final Path configurationPath; private final ElasticsearchContainer container; - public ElasticsearchServer(String image, Map configurationFiles) + public ElasticsearchServer(String image) throws IOException { - this(Network.SHARED, image, configurationFiles); + this(Network.SHARED, image); } - public ElasticsearchServer(Network network, String image, Map configurationFiles) + public ElasticsearchServer(Network network, String image) throws IOException { DockerImageName dockerImageName = DockerImageName.parse(image).asCompatibleSubstituteFor("docker.elastic.co/elasticsearch/elasticsearch"); container = new ElasticsearchContainer(dockerImageName); container.withNetwork(network); container.withNetworkAliases("elasticsearch-server"); - container.withEnv("DISABLE_SECURITY_PLUGIN", "true"); // Required for OpenSearch container configurationPath = createTempDirectory(null); + Map configurationFiles = ImmutableMap.builder() + .put("elasticsearch.yml", loadResource("elasticsearch.yml")) + .put("users", loadResource("users")) + .put("users_roles", loadResource("users_roles")) + .put("roles.yml", loadResource("roles.yml")) + .put("ca.crt", loadResource("ca.crt")) + .put("server.crt", loadResource("server.crt")) + .put("server.key", loadResource("server.key")) + .buildOrThrow(); + for (Map.Entry entry : configurationFiles.entrySet()) { String name = entry.getKey(); String contents = entry.getValue(); @@ -61,7 +87,6 @@ public ElasticsearchServer(Network network, String image, Map co Files.writeString(path, contents, UTF_8); container.withCopyFileToContainer(forHostPath(path), "/usr/share/elasticsearch/config/" + name); } - container.start(); } @@ -82,4 +107,26 @@ public HostAndPort getAddress() { return HostAndPort.fromString(container.getHttpHostAddress()); } + + public RestHighLevelClient getClient() + { + HostAndPort address = getAddress(); + return new RestHighLevelClientBuilder(RestClient.builder(new HttpHost(address.getHost(), address.getPort(), "https")) + .setStrictDeprecationMode(false) + .setHttpClientConfigCallback(ElasticsearchServer::enableSecureCommunication).build()) + .setApiCompatibilityMode(true) // Needed for 7.x client to work with 8.x server + .build(); + } + + private static HttpAsyncClientBuilder enableSecureCommunication(HttpAsyncClientBuilder clientBuilder) + { + return clientBuilder.setSSLContext(getSSLContext()) + .setDefaultHeaders(ImmutableList.of(new BasicHeader("Authorization", format("Basic %s", Base64.encodeAsString(format("%s:%s", USER, PASSWORD).getBytes(StandardCharsets.UTF_8)))))); + } + + private static String loadResource(String file) + throws IOException + { + return Resources.toString(getResource(file), UTF_8); + } } diff --git a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/TestElasticsearch6ConnectorTest.java b/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/TestElasticsearch6ConnectorTest.java deleted file mode 100644 index 06b2c74992008..0000000000000 --- a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/TestElasticsearch6ConnectorTest.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.elasticsearch; - -import com.google.common.collect.ImmutableMap; -import org.apache.http.entity.ContentType; -import org.apache.http.nio.entity.NStringEntity; -import org.intellij.lang.annotations.Language; -import org.junit.jupiter.api.Test; - -import java.io.IOException; - -import static java.lang.String.format; - -public class TestElasticsearch6ConnectorTest - extends BaseElasticsearchConnectorTest -{ - public TestElasticsearch6ConnectorTest() - { - super("docker.elastic.co/elasticsearch/elasticsearch-oss:6.6.0", "elasticsearch6"); - } - - @Test - public void testIndexWithMappingsButNoProperties() - throws IOException - { - String indexName = "test_empty_index_with_mappings_no_properties"; - - @Language("JSON") - String mappings = "{\"mappings\": " + - " {\"foo\": { \"dynamic\" : \"strict\" } }" + - "}"; - client.getLowLevelClient() - .performRequest("PUT", "/" + indexName, ImmutableMap.of(), new NStringEntity(mappings, ContentType.APPLICATION_JSON)); - - assertTableDoesNotExist(indexName); - } - - @Override - protected String indexEndpoint(String index, String docId) - { - return format("/%s/doc/%s", index, docId); - } - - @Override - protected String indexMapping(String properties) - { - return "{\"mappings\": " + - " {\"doc\": " + properties + "}" + - "}"; - } -} diff --git a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/TestElasticsearch7ConnectorTest.java b/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/TestElasticsearch7ConnectorTest.java index b48d76c34cc0b..29b76f9b2d33c 100644 --- a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/TestElasticsearch7ConnectorTest.java +++ b/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/TestElasticsearch7ConnectorTest.java @@ -13,26 +13,16 @@ */ package io.trino.plugin.elasticsearch; +import java.io.IOException; + import static io.trino.plugin.elasticsearch.ElasticsearchServer.ELASTICSEARCH_7_IMAGE; -import static java.lang.String.format; public class TestElasticsearch7ConnectorTest extends BaseElasticsearchConnectorTest { public TestElasticsearch7ConnectorTest() + throws IOException { - super(ELASTICSEARCH_7_IMAGE, "elasticsearch7"); - } - - @Override - protected String indexEndpoint(String index, String docId) - { - return format("/%s/_doc/%s", index, docId); - } - - @Override - protected String indexMapping(String properties) - { - return "{\"mappings\": " + properties + "}"; + super(new ElasticsearchServer(ELASTICSEARCH_7_IMAGE), "elasticsearch7"); } } diff --git a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/TestElasticsearch8ConnectorTest.java b/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/TestElasticsearch8ConnectorTest.java new file mode 100644 index 0000000000000..c2c57a5bc6e1f --- /dev/null +++ b/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/TestElasticsearch8ConnectorTest.java @@ -0,0 +1,28 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.elasticsearch; + +import java.io.IOException; + +import static io.trino.plugin.elasticsearch.ElasticsearchServer.ELASTICSEARCH_8_IMAGE; + +public class TestElasticsearch8ConnectorTest + extends BaseElasticsearchConnectorTest +{ + public TestElasticsearch8ConnectorTest() + throws IOException + { + super(new ElasticsearchServer(ELASTICSEARCH_8_IMAGE), "elasticsearch8"); + } +} diff --git a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/TestElasticsearchBackpressure.java b/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/TestElasticsearchBackpressure.java deleted file mode 100644 index dbaf931760664..0000000000000 --- a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/TestElasticsearchBackpressure.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.elasticsearch; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import io.trino.testing.AbstractTestQueryFramework; -import io.trino.testing.QueryRunner; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; -import org.testcontainers.containers.Network; - -import java.io.IOException; - -import static io.trino.plugin.elasticsearch.ElasticsearchQueryRunner.createElasticsearchQueryRunner; -import static io.trino.tpch.TpchTable.ORDERS; -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; - -@TestInstance(PER_CLASS) -public class TestElasticsearchBackpressure - extends AbstractTestQueryFramework -{ - private static final String image = "elasticsearch:7.0.0"; - - private Network network; - private ElasticsearchServer elasticsearch; - private ElasticsearchNginxProxy elasticsearchNginxProxy; - - @Override - protected QueryRunner createQueryRunner() - throws Exception - { - network = Network.newNetwork(); - elasticsearch = new ElasticsearchServer(network, image, ImmutableMap.of()); - elasticsearchNginxProxy = new ElasticsearchNginxProxy(network, 1); - - return createElasticsearchQueryRunner( - elasticsearchNginxProxy.getAddress(), - ImmutableList.of(ORDERS), - ImmutableMap.of(), - ImmutableMap.of(), - // This test can only run on a single node, otherwise each node exports its own stats beans and they override each other - // You can only bind one such bean per JVM, so this causes problems with statistics being 0 despite backpressure handling - 1, - // Use a unique catalog name to make sure JMX stats beans are unique and not affected by other tests - "elasticsearch-backpressure"); - } - - @AfterAll - public final void destroy() - throws IOException - { - elasticsearchNginxProxy.stop(); - elasticsearchNginxProxy = null; - elasticsearch.stop(); - elasticsearch = null; - network.close(); - network = null; - } - - @Test - public void testQueryWithBackpressure() - { - // Check that JMX stats show no sign of backpressure - assertQueryReturnsEmptyResult("SELECT 1 FROM jmx.current.\"trino.plugin.elasticsearch.client:*\" WHERE \"backpressurestats.alltime.count\" > 0"); - assertQueryReturnsEmptyResult("SELECT 1 FROM jmx.current.\"trino.plugin.elasticsearch.client:*\" WHERE \"backpressurestats.alltime.max\" > 0"); - - assertQuerySucceeds("SELECT * FROM orders"); - - // Check that JMX stats show requests have been retried due to backpressure - assertQuery("SELECT DISTINCT 1 FROM jmx.current.\"trino.plugin.elasticsearch.client:*\" WHERE \"backpressurestats.alltime.count\" > 0", "VALUES 1"); - assertQuery("SELECT DISTINCT 1 FROM jmx.current.\"trino.plugin.elasticsearch.client:*\" WHERE \"backpressurestats.alltime.max\" > 0", "VALUES 1"); - } -} diff --git a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/TestPasswordAuthentication.java b/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/TestPasswordAuthentication.java deleted file mode 100644 index 14cb228e229a0..0000000000000 --- a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/TestPasswordAuthentication.java +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.elasticsearch; - -import com.amazonaws.util.Base64; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.io.Resources; -import com.google.common.net.HostAndPort; -import io.trino.sql.query.QueryAssertions; -import io.trino.testing.DistributedQueryRunner; -import org.apache.http.HttpHost; -import org.apache.http.entity.ContentType; -import org.apache.http.message.BasicHeader; -import org.apache.http.nio.entity.NStringEntity; -import org.elasticsearch.client.RestClient; -import org.elasticsearch.client.RestHighLevelClient; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; -import org.junit.jupiter.api.parallel.Execution; - -import java.io.IOException; -import java.nio.charset.StandardCharsets; - -import static com.google.common.io.Resources.getResource; -import static io.airlift.testing.Closeables.closeAll; -import static io.trino.plugin.elasticsearch.ElasticsearchQueryRunner.createElasticsearchQueryRunner; -import static java.lang.String.format; -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; -import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; - -@TestInstance(PER_CLASS) -@Execution(CONCURRENT) -public class TestPasswordAuthentication -{ - private static final String USER = "elastic_user"; - private static final String PASSWORD = "123456"; - - private ElasticsearchServer elasticsearch; - private RestHighLevelClient client; - private QueryAssertions assertions; - - @BeforeAll - public void setUp() - throws Exception - { - // We use 7.8.0 because security became a non-commercial feature in recent versions - elasticsearch = new ElasticsearchServer("elasticsearch:7.8.0", ImmutableMap.builder() - .put("elasticsearch.yml", loadResource("elasticsearch.yml")) - .put("users", loadResource("users")) - .put("users_roles", loadResource("users_roles")) - .put("roles.yml", loadResource("roles.yml")) - .buildOrThrow()); - - HostAndPort address = elasticsearch.getAddress(); - client = new RestHighLevelClient(RestClient.builder(new HttpHost(address.getHost(), address.getPort()))); - - DistributedQueryRunner runner = createElasticsearchQueryRunner( - elasticsearch.getAddress(), - ImmutableList.of(), - ImmutableMap.of(), - ImmutableMap.builder() - .put("elasticsearch.security", "PASSWORD") - .put("elasticsearch.auth.user", USER) - .put("elasticsearch.auth.password", PASSWORD) - .buildOrThrow(), - 3); - - assertions = new QueryAssertions(runner); - } - - @AfterAll - public final void destroy() - throws IOException - { - closeAll( - () -> assertions.close(), - () -> elasticsearch.stop(), - () -> client.close()); - - assertions = null; - elasticsearch = null; - client = null; - } - - @Test - public void test() - throws IOException - { - String json = new ObjectMapper().writeValueAsString(ImmutableMap.of("value", 42L)); - - client.getLowLevelClient() - .performRequest( - "POST", - "/test/_doc?refresh", - ImmutableMap.of(), - new NStringEntity(json, ContentType.APPLICATION_JSON), - new BasicHeader("Authorization", format("Basic %s", Base64.encodeAsString(format("%s:%s", USER, PASSWORD).getBytes(StandardCharsets.UTF_8))))); - - assertThat(assertions.query("SELECT * FROM test")) - .matches("VALUES BIGINT '42'"); - } - - private static String loadResource(String file) - throws IOException - { - return Resources.toString(getResource(file), UTF_8); - } -} diff --git a/plugin/trino-elasticsearch/src/test/resources/ca.crt b/plugin/trino-elasticsearch/src/test/resources/ca.crt new file mode 100644 index 0000000000000..4cf446a6cc277 --- /dev/null +++ b/plugin/trino-elasticsearch/src/test/resources/ca.crt @@ -0,0 +1,20 @@ +-----BEGIN CERTIFICATE----- +MIIDSTCCAjGgAwIBAgIUByq1K6VbZFocXfpnEohIw8NgEW4wDQYJKoZIhvcNAQEL +BQAwNDEyMDAGA1UEAxMpRWxhc3RpYyBDZXJ0aWZpY2F0ZSBUb29sIEF1dG9nZW5l +cmF0ZWQgQ0EwHhcNMjMxMjMwMTczMjIxWhcNMjYxMjI5MTczMjIxWjA0MTIwMAYD +VQQDEylFbGFzdGljIENlcnRpZmljYXRlIFRvb2wgQXV0b2dlbmVyYXRlZCBDQTCC +ASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAImUzmkzHoVgREpIVHcjvtkV +oT4QjNvciH0bZOcnIUBpXq5ywBoSS3CjckF9mirkAJXZHS+M13155+JKO085Ewy7 +U684jsEVRS8GmNiKrKNYKFiwdKeQKIYtwmwgHMfJCEwcFaVOmJ2PhbiHiQj+8lMr +mRMwS+Wy7deci9uJuzykSHQ7LW7MNcZBCBkWBtctI2p4h21yYZzwn4UzsDGD7i02 +GNJ/CHG4M5QjLY9P/tCHoss9kHDUn7k/rnezk8nHZgf2XAwVYdJBbVeYvUe7HgtK ++35FeSACslOtgV2kQJpULwvh8wiqgP+/oIhNoNPW/NpyoOT4luQmJfxZV5SKJ08C +AwEAAaNTMFEwHQYDVR0OBBYEFOqG2pBw+qtXIiPZ69CVSzeI43UGMB8GA1UdIwQY +MBaAFOqG2pBw+qtXIiPZ69CVSzeI43UGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZI +hvcNAQELBQADggEBABKr0j2hk5qzWs8uWR4yqxjL4+MQTSaBtvcMHCF6w1rCCRLt +4sIz7Gy0ecnqjfXscjrrrEA4ruYP4CpAvRXyYgmJ2F1jLYrMcmYnYupCwRl88ygO +dvjVTdAjOib0NBUsE1DA8l0idFYHipCV2izpKjF/AB1HnhWm1A++pC3nZ++x7o6u +lqOuTwEMDBMnwpc+CQpQPSa9QCIl60LNpbhH+rWAL2xm5fdLV/Hs3hFQvABGorgR +78F/kBfk30c/sM2tQog7By/ic6KIEFQhFGvDz2fAqt2BAAyO22nWEQZQI+JTBAH4 +tEBodOialM5LlPFAOU0gmr6q7g8lFIkVYwxadUw= +-----END CERTIFICATE----- diff --git a/plugin/trino-elasticsearch/src/test/resources/elasticsearch.yml b/plugin/trino-elasticsearch/src/test/resources/elasticsearch.yml index eb6bb9c410c30..81d4a23f173a9 100644 --- a/plugin/trino-elasticsearch/src/test/resources/elasticsearch.yml +++ b/plugin/trino-elasticsearch/src/test/resources/elasticsearch.yml @@ -1,4 +1,14 @@ cluster.name: "docker-cluster" +node.name: "elasticsearch-server" network.host: 0.0.0.0 xpack.security.enabled: true +xpack.security.http.ssl.enabled: true +xpack.security.http.ssl.certificate_authorities: /usr/share/elasticsearch/config/ca.crt +xpack.security.http.ssl.certificate: /usr/share/elasticsearch/config/server.crt +xpack.security.http.ssl.key: /usr/share/elasticsearch/config/server.key +xpack.security.transport.ssl.enabled: true +xpack.security.transport.ssl.verification_mode: none +xpack.security.transport.ssl.certificate_authorities: /usr/share/elasticsearch/config/ca.crt +xpack.security.transport.ssl.certificate: /usr/share/elasticsearch/config/server.crt +xpack.security.transport.ssl.key: /usr/share/elasticsearch/config/server.key diff --git a/plugin/trino-elasticsearch/src/test/resources/server.crt b/plugin/trino-elasticsearch/src/test/resources/server.crt new file mode 100644 index 0000000000000..6b033c7f8de27 --- /dev/null +++ b/plugin/trino-elasticsearch/src/test/resources/server.crt @@ -0,0 +1,21 @@ +-----BEGIN CERTIFICATE----- +MIIDYTCCAkmgAwIBAgIVALANPI8Y3YDiH0EsyazPC0a305zjMA0GCSqGSIb3DQEB +CwUAMDQxMjAwBgNVBAMTKUVsYXN0aWMgQ2VydGlmaWNhdGUgVG9vbCBBdXRvZ2Vu +ZXJhdGVkIENBMB4XDTIzMTIzMDE3MzIyM1oXDTI2MTIyOTE3MzIyM1owHzEdMBsG +A1UEAxMUZWxhc3RpY3NlYXJjaC1zZXJ2ZXIwggEiMA0GCSqGSIb3DQEBAQUAA4IB +DwAwggEKAoIBAQC3lnDqYCsnVWLxlokBBhM4kJsOc+Dwp7FQWvcEEDQYIPHBUDoE +1b1uPO10iwDy41+vCd+L3tkt5vgzhkU+W+AMYOLVJSDUTU1oaFT3gQdAPBLssMZJ ++mrVVysxK/BOkB1r4GRCj3+DaL0/YiIm+VT4fmt1bQ46C9AqPtGBNjjAheOfBVvG +sp8j9zs296OtUnlDLYhtEPF5bSpjyTn0rlKuW9Vc1zYNFW+oshLMJxmiLcFCFzLK +RKdNLGNXs0JGy7BfjOGJ7b8zgzBe8phua7VvDoYw4Hda8z8uNSBWd0bOOE0DT4mb +biC+M0G3c76TNceeZMGlkw6g0Vt4G5jFYTIXAgMBAAGjfzB9MB0GA1UdDgQWBBSA +l2wp8zAFBrBMH+Zf+tS0dznNnjAfBgNVHSMEGDAWgBTqhtqQcPqrVyIj2evQlUs3 +iON1BjAwBgNVHREEKTAngglsb2NhbGhvc3SHBH8AAAGCFGVsYXN0aWNzZWFyY2gt +c2VydmVyMAkGA1UdEwQCMAAwDQYJKoZIhvcNAQELBQADggEBACYQKkoSJrQsFWpR +zTD+5visP/W/wUeHdQxU5iEesLsd43r7jKkZLRJzZgDtPSfYw3LLL/dZHVCtzdUg +NtarP+EaMp5YlGyEvMRiaIy2pFajHN9+wElImQhi5iw5CY6R/YL6m4ap2mVmIeMi +nvWKYaibMzNx2wouahIrEvANb3N93lr8T5LDfLPPLXmHAa0ebk2GbWt90mVdk6UZ +QQqI0VK8YlzR9ybp8jJ5ZRgXIegtn91Tts164+61wQQ2M6fV1le41m/1NENAzkIW +Q7LJvvqe+Q3YO8yBvxYP7ru/kKgBWHPyof+6mocqirwvrVLb+MPL+sKolcuXP0sS +c52vk0Q= +-----END CERTIFICATE----- diff --git a/plugin/trino-elasticsearch/src/test/resources/server.key b/plugin/trino-elasticsearch/src/test/resources/server.key new file mode 100644 index 0000000000000..eaee8c711e3a5 --- /dev/null +++ b/plugin/trino-elasticsearch/src/test/resources/server.key @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEpAIBAAKCAQEAt5Zw6mArJ1Vi8ZaJAQYTOJCbDnPg8KexUFr3BBA0GCDxwVA6 +BNW9bjztdIsA8uNfrwnfi97ZLeb4M4ZFPlvgDGDi1SUg1E1NaGhU94EHQDwS7LDG +Sfpq1VcrMSvwTpAda+BkQo9/g2i9P2IiJvlU+H5rdW0OOgvQKj7RgTY4wIXjnwVb +xrKfI/c7NvejrVJ5Qy2IbRDxeW0qY8k59K5SrlvVXNc2DRVvqLISzCcZoi3BQhcy +ykSnTSxjV7NCRsuwX4zhie2/M4MwXvKYbmu1bw6GMOB3WvM/LjUgVndGzjhNA0+J +m24gvjNBt3O+kzXHnmTBpZMOoNFbeBuYxWEyFwIDAQABAoIBACRPxrys0cxBWMUk ++6suqu2UuH6Gz/Z5jJoyq8nrdIpTNc4BEsCzD/k2ZtcWyVTBQUMrRkJxjO5F0VDL +nXWEkY+RODxlUBV9xrJAdJHRvP/pAes72bR3KdtRlvLuWGUJnzAfnWUG4fc5Z7ka +LVwdE7/krVd3/wjfHkjEzQZsFZSKcDf/3gAt/xfVW2Oi9xIbUlsHKjjTyE7H3rb2 +DG91TEDYHa5tt8WBzdyyKiXgy8WDugHWrH8C6rNShAocuMtPmkk/W2LyAtioKY5u +qE+XCDpjFFJq99feJQwhkbVNyfpEDX6s6jUzJ67ppr3dkEsxEkVpQF8pXnbo+oB0 +flvsGvUCgYEAuy0YDx6GCc28rO9YvaML92RhvSgwQN8J27taApJRi9Vf8en7sPzk +wbe2oz/80SSnM3IOz6NGf/3y+MtDwy+Z6Xo/foQGngER4ojYu0nqOUG/w7Vo/k3M +JCFla90opMxv5ndvZHzCB4hKXNpB2KmDMO2lxwS1JMdQ50t5lQEF2XUCgYEA+xeP +++5LRLGrOxsWxcnEO2DTLOFIMXI0WnsxipkBCYSyVO/fVloNC2KR0da9Us2ea6er +6iKNCQ8I7LGbvx36+ITLbI5VN1y+0NTXvQuOQa5kif1ShE1E3peRRVPStd+nzJss +VIQMSiXgaJxzZLfLOgHlNUF+Kp8p3WZZqDN/H9sCgYAjhUI8lElRkPaORvmrO0bp +Euatc/P8S5deWKaTOFs7UynbCdFW3Bc5X/jL8De1JOSKEcoXw198qJ+0cTaR0vW9 +ZhKomrt6vq87M0cV+8f0R7Lxm/qpz+++p4vMtXKiG53cuautATP9I/MgvdajHEAm +R+Me6pjJuv1yd1NCYKhQhQKBgQC75/07xzAS33dKS1J2gDFXUnWW750MS/A8Clef +5jLtgZ3HjJ+xysBaVdDUnYSfc/tri1ZPaL3tGpBOOfhZRUFtPEvUGIxTWxf9xlsR +QZWbN7xLvAVFt2Gjxv/TRxC4RQEZUMMTGKLt1nyjLPn6VMVEyuhSO4oEsYouAwtz +od4v4wKBgQCcb3OVsDRMqILwie7ksnVU9j1HhpjA7rhH9wtRZZP0oc3rabr5irgv +A0LGxKKSBPdrQFsH94C1SHwJh+m0MscrjI41XZei9+7B1+I1M3X/IcOTheOS4Iin +04gVZdj7A9lE/USRIrJO2eSL71uOVkcczpOAMFtrkoCQ/DRkw5sC1A== +-----END RSA PRIVATE KEY----- diff --git a/plugin/trino-elasticsearch/src/test/resources/truststore.jks b/plugin/trino-elasticsearch/src/test/resources/truststore.jks new file mode 100644 index 0000000000000..12391c7e447bb Binary files /dev/null and b/plugin/trino-elasticsearch/src/test/resources/truststore.jks differ diff --git a/plugin/trino-example-http/pom.xml b/plugin/trino-example-http/pom.xml index fdb209737132a..3dd676a3cf55e 100644 --- a/plugin/trino-example-http/pom.xml +++ b/plugin/trino-example-http/pom.xml @@ -4,7 +4,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-example-jdbc/pom.xml b/plugin/trino-example-jdbc/pom.xml index 2e84219b1e0ee..837e1be5fbeb2 100644 --- a/plugin/trino-example-jdbc/pom.xml +++ b/plugin/trino-example-jdbc/pom.xml @@ -4,7 +4,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-exchange-filesystem/pom.xml b/plugin/trino-exchange-filesystem/pom.xml index 2ebb14c8aeb3d..c58b41f9bacac 100644 --- a/plugin/trino-exchange-filesystem/pom.xml +++ b/plugin/trino-exchange-filesystem/pom.xml @@ -4,7 +4,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-exchange-hdfs/pom.xml b/plugin/trino-exchange-hdfs/pom.xml index a9300c086d2c4..3fc32e4148af5 100644 --- a/plugin/trino-exchange-hdfs/pom.xml +++ b/plugin/trino-exchange-hdfs/pom.xml @@ -4,7 +4,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-geospatial/pom.xml b/plugin/trino-geospatial/pom.xml index b1a115e8085a9..b859355ed7a26 100644 --- a/plugin/trino-geospatial/pom.xml +++ b/plugin/trino-geospatial/pom.xml @@ -4,7 +4,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -83,6 +83,12 @@ runtime + + com.google.inject + guice + runtime + + it.unimi.dsi fastutil @@ -142,6 +148,12 @@ io.trino trino-main test + + + io.trino + re2j + + @@ -149,6 +161,12 @@ trino-main test-jar test + + + io.trino + re2j + + diff --git a/plugin/trino-geospatial/src/main/java/io/trino/plugin/geospatial/GeoFunctions.java b/plugin/trino-geospatial/src/main/java/io/trino/plugin/geospatial/GeoFunctions.java index cd36a36e00445..74a7fad720cfa 100644 --- a/plugin/trino-geospatial/src/main/java/io/trino/plugin/geospatial/GeoFunctions.java +++ b/plugin/trino-geospatial/src/main/java/io/trino/plugin/geospatial/GeoFunctions.java @@ -33,7 +33,6 @@ import com.esri.core.geometry.ogc.OGCGeometry; import com.esri.core.geometry.ogc.OGCGeometryCollection; import com.esri.core.geometry.ogc.OGCLineString; -import com.esri.core.geometry.ogc.OGCMultiPolygon; import com.esri.core.geometry.ogc.OGCPoint; import com.esri.core.geometry.ogc.OGCPolygon; import com.google.common.base.Joiner; @@ -433,25 +432,7 @@ public static Slice stCentroid(@SqlType(GEOMETRY_TYPE_NAME) Slice input) return serialize(createFromEsriGeometry(new Point(), geometry.getEsriSpatialReference())); } - Point centroid; - switch (geometryType) { - case MULTI_POINT: - centroid = computePointsCentroid((MultiVertexGeometry) geometry.getEsriGeometry()); - break; - case LINE_STRING: - case MULTI_LINE_STRING: - centroid = computeLineCentroid((Polyline) geometry.getEsriGeometry()); - break; - case POLYGON: - centroid = computePolygonCentroid((Polygon) geometry.getEsriGeometry()); - break; - case MULTI_POLYGON: - centroid = computeMultiPolygonCentroid((OGCMultiPolygon) geometry); - break; - default: - throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "Unexpected geometry type: " + geometryType); - } - return serialize(createFromEsriGeometry(centroid, geometry.getEsriSpatialReference())); + return serialize(geometry.centroid()); } @Description("Returns the minimum convex geometry that encloses all input geometries") @@ -1609,119 +1590,6 @@ private static void verifySameSpatialReference(OGCGeometry leftGeometry, OGCGeom checkArgument(Objects.equals(leftGeometry.getEsriSpatialReference(), rightGeometry.getEsriSpatialReference()), "Input geometries must have the same spatial reference"); } - // Points centroid is arithmetic mean of the input points - private static Point computePointsCentroid(MultiVertexGeometry multiVertex) - { - double xSum = 0; - double ySum = 0; - for (int i = 0; i < multiVertex.getPointCount(); i++) { - Point point = multiVertex.getPoint(i); - xSum += point.getX(); - ySum += point.getY(); - } - return new Point(xSum / multiVertex.getPointCount(), ySum / multiVertex.getPointCount()); - } - - // Lines centroid is weighted mean of each line segment, weight in terms of line length - private static Point computeLineCentroid(Polyline polyline) - { - double xSum = 0; - double ySum = 0; - double weightSum = 0; - for (int i = 0; i < polyline.getPathCount(); i++) { - Point startPoint = polyline.getPoint(polyline.getPathStart(i)); - Point endPoint = polyline.getPoint(polyline.getPathEnd(i) - 1); - double dx = endPoint.getX() - startPoint.getX(); - double dy = endPoint.getY() - startPoint.getY(); - double length = sqrt(dx * dx + dy * dy); - weightSum += length; - xSum += (startPoint.getX() + endPoint.getX()) * length / 2; - ySum += (startPoint.getY() + endPoint.getY()) * length / 2; - } - return new Point(xSum / weightSum, ySum / weightSum); - } - - // Polygon centroid: area weighted average of centroids in case of holes - private static Point computePolygonCentroid(Polygon polygon) - { - int pathCount = polygon.getPathCount(); - - if (pathCount == 1) { - return getPolygonSansHolesCentroid(polygon); - } - - double xSum = 0; - double ySum = 0; - double areaSum = 0; - - for (int i = 0; i < pathCount; i++) { - int startIndex = polygon.getPathStart(i); - int endIndex = polygon.getPathEnd(i); - - Polygon sansHoles = getSubPolygon(polygon, startIndex, endIndex); - - Point centroid = getPolygonSansHolesCentroid(sansHoles); - double area = sansHoles.calculateArea2D(); - - xSum += centroid.getX() * area; - ySum += centroid.getY() * area; - areaSum += area; - } - - return new Point(xSum / areaSum, ySum / areaSum); - } - - private static Polygon getSubPolygon(Polygon polygon, int startIndex, int endIndex) - { - Polyline boundary = new Polyline(); - boundary.startPath(polygon.getPoint(startIndex)); - for (int i = startIndex + 1; i < endIndex; i++) { - Point current = polygon.getPoint(i); - boundary.lineTo(current); - } - - Polygon newPolygon = new Polygon(); - newPolygon.add(boundary, false); - return newPolygon; - } - - // Polygon sans holes centroid: - // c[x] = (Sigma(x[i] + x[i + 1]) * (x[i] * y[i + 1] - x[i + 1] * y[i]), for i = 0 to N - 1) / (6 * signedArea) - // c[y] = (Sigma(y[i] + y[i + 1]) * (x[i] * y[i + 1] - x[i + 1] * y[i]), for i = 0 to N - 1) / (6 * signedArea) - private static Point getPolygonSansHolesCentroid(Polygon polygon) - { - int pointCount = polygon.getPointCount(); - double xSum = 0; - double ySum = 0; - double signedArea = 0; - for (int i = 0; i < pointCount; i++) { - Point current = polygon.getPoint(i); - Point next = polygon.getPoint((i + 1) % polygon.getPointCount()); - double ladder = current.getX() * next.getY() - next.getX() * current.getY(); - xSum += (current.getX() + next.getX()) * ladder; - ySum += (current.getY() + next.getY()) * ladder; - signedArea += ladder / 2; - } - return new Point(xSum / (signedArea * 6), ySum / (signedArea * 6)); - } - - // MultiPolygon centroid is weighted mean of each polygon, weight in terms of polygon area - private static Point computeMultiPolygonCentroid(OGCMultiPolygon multiPolygon) - { - double xSum = 0; - double ySum = 0; - double weightSum = 0; - for (int i = 0; i < multiPolygon.numGeometries(); i++) { - Point centroid = computePolygonCentroid((Polygon) multiPolygon.geometryN(i).getEsriGeometry()); - Polygon polygon = (Polygon) multiPolygon.geometryN(i).getEsriGeometry(); - double weight = polygon.calculateArea2D(); - weightSum += weight; - xSum += centroid.getX() * weight; - ySum += centroid.getY() * weight; - } - return new Point(xSum / weightSum, ySum / weightSum); - } - private static boolean envelopes(Slice left, Slice right, EnvelopesPredicate predicate) { Envelope leftEnvelope = deserializeEnvelope(left); diff --git a/plugin/trino-geospatial/src/test/java/io/trino/plugin/geospatial/TestBingTileFunctions.java b/plugin/trino-geospatial/src/test/java/io/trino/plugin/geospatial/TestBingTileFunctions.java index e38fe7fde0725..4044d3610577c 100644 --- a/plugin/trino-geospatial/src/test/java/io/trino/plugin/geospatial/TestBingTileFunctions.java +++ b/plugin/trino-geospatial/src/test/java/io/trino/plugin/geospatial/TestBingTileFunctions.java @@ -454,7 +454,7 @@ public void testBingTilePolygon() assertThat(assertions.function("ST_AsText", "ST_Centroid(bing_tile_polygon(bing_tile('123030123010121')))")) .hasType(VARCHAR) - .isEqualTo("POINT (60.0018310442288 30.121372968273892)"); + .isEqualTo("POINT (60.0018310546875 30.121372973521975)"); // Check bottom right corner of a stack of tiles at different zoom levels assertThat(assertions.function("ST_AsText", "apply(bing_tile_polygon(bing_tile(1, 1, 1)), g -> ST_Point(ST_XMax(g), ST_YMin(g)))")) diff --git a/plugin/trino-geospatial/src/test/java/io/trino/plugin/geospatial/TestGeoFunctions.java b/plugin/trino-geospatial/src/test/java/io/trino/plugin/geospatial/TestGeoFunctions.java index 96e7b57653161..bd86208503502 100644 --- a/plugin/trino-geospatial/src/test/java/io/trino/plugin/geospatial/TestGeoFunctions.java +++ b/plugin/trino-geospatial/src/test/java/io/trino/plugin/geospatial/TestGeoFunctions.java @@ -14,10 +14,12 @@ package io.trino.plugin.geospatial; import com.esri.core.geometry.Point; +import com.esri.core.geometry.ogc.OGCGeometry; import com.esri.core.geometry.ogc.OGCPoint; import com.google.common.collect.ImmutableList; import io.trino.geospatial.KdbTreeUtils; import io.trino.geospatial.Rectangle; +import io.trino.geospatial.serde.GeometrySerde; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.ArrayType; @@ -35,6 +37,7 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static io.trino.geospatial.KdbTree.buildKdbTree; +import static io.trino.plugin.geospatial.GeoFunctions.stCentroid; import static io.trino.plugin.geospatial.GeometryType.GEOMETRY; import static io.trino.spi.type.BooleanType.BOOLEAN; import static io.trino.spi.type.DoubleType.DOUBLE; @@ -42,6 +45,7 @@ import static io.trino.spi.type.VarcharType.VARCHAR; import static io.trino.testing.assertions.TrinoExceptionAssert.assertTrinoExceptionThrownBy; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; @@ -259,6 +263,19 @@ public void testSTBuffer() assertTrinoExceptionThrownBy(assertions.function("ST_Buffer", "ST_Point(0, 0)", "nan()")::evaluate) .hasMessage("distance is NaN"); + + // For small polygons, there was a bug in ESRI that throw an NPE. This + // was fixed (https://github.com/Esri/geometry-api-java/pull/243) to + // return an empty geometry instead. Ideally, these would return + // something approximately like `ST_Buffer(ST_Centroid(geometry))`. + assertThat(assertions.function("ST_IsEmpty", "ST_Buffer(ST_Buffer(ST_Point(177.50102959662, 64.726807421691), 0.0000000001), 0.00005)")) + .hasType(BOOLEAN) + .isEqualTo(true); + + assertThat(assertions.function("ST_IsEmpty", "ST_Buffer(ST_GeometryFromText(" + + "'POLYGON ((177.0 64.0, 177.0000000001 64.0, 177.0000000001 64.0000000001, 177.0 64.0000000001, 177.0 64.0))'), 0.01)")) + .hasType(BOOLEAN) + .isEqualTo(true); } @Test @@ -299,6 +316,33 @@ public void testSTCentroid() assertThat(assertions.function("ST_AsText", "ST_Centroid(ST_GeometryFromText('POLYGON ((0 0, 0 5, 5 5, 5 0, 0 0), (1 1, 1 2, 2 2, 2 1, 1 1))'))")) .hasType(VARCHAR) .isEqualTo("POINT (2.5416666666666665 2.5416666666666665)"); + + assertApproximateCentroid("MULTIPOLYGON (((4.903234300000006 52.08474289999999, 4.903234265193165 52.084742934806826, 4.903234299999999 52.08474289999999, 4.903234300000006 52.08474289999999)))", new Point(4.9032343, 52.0847429), 1e-7); + + // Numerical stability tests + assertApproximateCentroid( + "MULTIPOLYGON (((153.492818 -28.13729, 153.492821 -28.137291, 153.492816 -28.137289, 153.492818 -28.13729)))", + new Point(153.49282, -28.13729), 1e-5); + assertApproximateCentroid( + "MULTIPOLYGON (((153.112475 -28.360526, 153.1124759 -28.360527, 153.1124759 -28.360526, 153.112475 -28.360526)))", + new Point(153.112475, -28.360526), 1e-5); + assertApproximateCentroid( + "POLYGON ((4.903234300000006 52.08474289999999, 4.903234265193165 52.084742934806826, 4.903234299999999 52.08474289999999, 4.903234300000006 52.08474289999999))", + new Point(4.9032343, 52.0847429), 1e-6); + assertApproximateCentroid( + "MULTIPOLYGON (((4.903234300000006 52.08474289999999, 4.903234265193165 52.084742934806826, 4.903234299999999 52.08474289999999, 4.903234300000006 52.08474289999999)))", + new Point(4.9032343, 52.0847429), 1e-6); + assertApproximateCentroid( + "POLYGON ((-81.0387349 29.20822, -81.039974 29.210597, -81.0410331 29.2101579, -81.0404758 29.2090879, -81.0404618 29.2090609, -81.040433 29.209005, -81.0404269 29.208993, -81.0404161 29.2089729, -81.0398001 29.20779, -81.0387349 29.20822), (-81.0404229 29.208986, -81.04042 29.2089809, -81.0404269 29.208993, -81.0404229 29.208986))", + new Point(-81.039885, 29.209191), 1e-6); + } + + private void assertApproximateCentroid(String wkt, Point expectedCentroid, double epsilon) + { + OGCPoint actualCentroid = (OGCPoint) GeometrySerde.deserialize( + stCentroid(GeometrySerde.serialize(OGCGeometry.fromText(wkt)))); + assertEquals(actualCentroid.X(), expectedCentroid.getX(), epsilon); + assertEquals(actualCentroid.Y(), expectedCentroid.getY(), epsilon); } @Test diff --git a/plugin/trino-geospatial/src/test/java/io/trino/plugin/geospatial/TestSpatialJoins.java b/plugin/trino-geospatial/src/test/java/io/trino/plugin/geospatial/TestSpatialJoins.java index 8693030d20abc..10ef1b5104c68 100644 --- a/plugin/trino-geospatial/src/test/java/io/trino/plugin/geospatial/TestSpatialJoins.java +++ b/plugin/trino-geospatial/src/test/java/io/trino/plugin/geospatial/TestSpatialJoins.java @@ -15,19 +15,12 @@ import io.trino.Session; import io.trino.plugin.hive.TestingHivePlugin; -import io.trino.plugin.hive.metastore.Database; -import io.trino.plugin.hive.metastore.HiveMetastore; -import io.trino.spi.security.PrincipalType; import io.trino.sql.query.QueryAssertions; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.DistributedQueryRunner; import org.junit.jupiter.api.Test; -import java.io.File; -import java.util.Optional; - import static io.trino.SystemSessionProperties.SPATIAL_PARTITIONING_TABLE_NAME; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; import static io.trino.testing.TestingSession.testSessionBuilder; import static java.lang.String.format; import static org.assertj.core.api.Assertions.assertThat; @@ -74,19 +67,10 @@ protected DistributedQueryRunner createQueryRunner() DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(session).build(); queryRunner.installPlugin(new GeoPlugin()); - File baseDir = queryRunner.getCoordinator().getBaseDataDir().resolve("hive_data").toFile(); - - HiveMetastore metastore = createTestingFileHiveMetastore(baseDir); - - metastore.createDatabase( - Database.builder() - .setDatabaseName("default") - .setOwnerName(Optional.of("public")) - .setOwnerType(Optional.of(PrincipalType.ROLE)) - .build()); - queryRunner.installPlugin(new TestingHivePlugin(metastore)); - + queryRunner.installPlugin(new TestingHivePlugin(queryRunner.getCoordinator().getBaseDataDir().resolve("hive_data"))); queryRunner.createCatalog("hive", "hive"); + queryRunner.execute("CREATE SCHEMA hive.default"); + return queryRunner; } diff --git a/plugin/trino-google-sheets/pom.xml b/plugin/trino-google-sheets/pom.xml index 4ff813f9718e3..e7396a711768a 100644 --- a/plugin/trino-google-sheets/pom.xml +++ b/plugin/trino-google-sheets/pom.xml @@ -4,7 +4,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-hive-hadoop2/bin/common.sh b/plugin/trino-hive-hadoop2/bin/common.sh deleted file mode 100644 index 534bc9020ec75..0000000000000 --- a/plugin/trino-hive-hadoop2/bin/common.sh +++ /dev/null @@ -1,214 +0,0 @@ -#!/usr/bin/env bash - -function retry() { - local END - local EXIT_CODE - - END=$(($(date +%s) + 600)) - - while (( $(date +%s) < $END )); do - set +e - "$@" - EXIT_CODE=$? - set -e - - if [[ ${EXIT_CODE} == 0 ]]; then - break - fi - sleep 5 - done - - return ${EXIT_CODE} -} - -function hadoop_master_container() { - docker-compose -f "${DOCKER_COMPOSE_LOCATION}" ps -q hadoop-master | grep . -} - -function hadoop_master_ip() { - HADOOP_MASTER_CONTAINER=$(hadoop_master_container) - docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' $HADOOP_MASTER_CONTAINER -} - -function check_hadoop() { - HADOOP_MASTER_CONTAINER=$(hadoop_master_container) - docker exec ${HADOOP_MASTER_CONTAINER} supervisorctl status hive-server2 | grep -i running &> /dev/null && - docker exec ${HADOOP_MASTER_CONTAINER} supervisorctl status hive-metastore | grep -i running &> /dev/null && - docker exec ${HADOOP_MASTER_CONTAINER} netstat -lpn | grep -i 0.0.0.0:10000 &> /dev/null && - docker exec ${HADOOP_MASTER_CONTAINER} netstat -lpn | grep -i 0.0.0.0:9083 &> /dev/null -} - -function exec_in_hadoop_master_container() { - HADOOP_MASTER_CONTAINER=$(hadoop_master_container) - docker exec ${HADOOP_MASTER_CONTAINER} "$@" -} - -function stop_unnecessary_hadoop_services() { - HADOOP_MASTER_CONTAINER=$(hadoop_master_container) - docker exec ${HADOOP_MASTER_CONTAINER} supervisorctl status - docker exec ${HADOOP_MASTER_CONTAINER} supervisorctl stop yarn-resourcemanager - docker exec ${HADOOP_MASTER_CONTAINER} supervisorctl stop yarn-nodemanager -} - -# Expands docker compose file paths files into the format "-f $1 -f $2 ...." -# Arguments: -# $1, $2, ...: A list of docker-compose files used to start/stop containers -function expand_compose_args() { - local files=( "${@}" ) - local compose_args="" - for file in ${files[@]}; do - compose_args+=" -f ${file}" - done - echo "${compose_args}" -} - -function cleanup_docker_containers() { - local compose_args="$(expand_compose_args "$@")" - # stop containers started with "up" - docker-compose ${compose_args} down --remove-orphans - - # docker logs processes are being terminated as soon as docker container are stopped - # wait for docker logs termination - wait -} - -function cleanup_hadoop_docker_containers() { - cleanup_docker_containers "${DOCKER_COMPOSE_LOCATION}" -} - -function termination_handler() { - set +e - cleanup_docker_containers "$@" - exit 130 -} - -# Check that all arguments are the names of non-empty variables. -function check_vars() { - ( # Subshell to preserve xtrace - set +x # Disable xtrace to make the messages printed clear - local failing=0 - for arg; do - if [[ ! -v "${arg}" ]]; then - echo "error: Variable not set: ${arg}" >&2 - failing=1 - elif [[ -z "${!arg}" ]]; then - echo "error: Variable is empty: ${arg}" >&2 - failing=1 - fi - done - return "$failing" - ) -} - -SCRIPT_DIR="${BASH_SOURCE%/*}" -INTEGRATION_TESTS_ROOT="${SCRIPT_DIR}/.." -PROJECT_ROOT="${INTEGRATION_TESTS_ROOT}/../.." -DOCKER_COMPOSE_LOCATION="${INTEGRATION_TESTS_ROOT}/conf/docker-compose.yml" -source "${INTEGRATION_TESTS_ROOT}/conf/hive-tests-defaults.sh" - -# check docker and docker compose installation -docker-compose version -docker version - -# extract proxy IP -if [ -n "${DOCKER_MACHINE_NAME:-}" ] -then - PROXY=`docker-machine ip` -else - PROXY=127.0.0.1 -fi - -# Starts containers based on multiple docker compose locations -# Arguments: -# $1, $2, ...: A list of docker-compose files used to start containers -function start_docker_containers() { - local compose_args="$(expand_compose_args $@)" - # Purposefully don't surround ${compose_args} with quotes so that docker-compose infers multiple arguments - # stop already running containers - docker-compose ${compose_args} down || true - - # catch terminate signals - # trap arguments are not expanded until the trap is called, so they must be in a global variable - TRAP_ARGS="$@" - trap 'termination_handler $TRAP_ARGS' INT TERM - - # pull docker images - if [[ "${CONTINUOUS_INTEGRATION:-false}" == 'true' ]]; then - retry docker-compose ${compose_args} pull --quiet - fi - - # start containers - docker-compose ${compose_args} up -d -} - -function start_hadoop_docker_containers() { - start_docker_containers "${DOCKER_COMPOSE_LOCATION}" - - # start docker logs for hadoop container - docker-compose -f "${DOCKER_COMPOSE_LOCATION}" logs --no-color hadoop-master & - - # wait until hadoop processes is started - retry check_hadoop -} - -# $1 = base URI for table names -function create_test_tables() { - local table_name table_path - local base_path="${1:?create_test_tables requires an argument}" - base_path="${base_path%/}" # remove trailing slash - - table_name="trino_test_external_fs" - table_path="$base_path/$table_name/" - exec_in_hadoop_master_container hadoop fs -mkdir -p "${table_path}" - exec_in_hadoop_master_container hadoop fs -copyFromLocal -f /docker/files/test_table.csv{,.gz,.bz2,.lz4} "${table_path}" - exec_in_hadoop_master_container /usr/bin/hive -e "CREATE EXTERNAL TABLE $table_name(t_bigint bigint) LOCATION '${table_path}'" - - table_name="trino_test_external_fs_with_header" - table_path="$base_path/$table_name/" - exec_in_hadoop_master_container hadoop fs -mkdir -p "${table_path}" - exec_in_hadoop_master_container hadoop fs -copyFromLocal -f /docker/files/test_table_with_header.csv{,.gz,.bz2,.lz4} "${table_path}" - exec_in_hadoop_master_container /usr/bin/hive -e " - CREATE EXTERNAL TABLE $table_name(t_bigint bigint) - STORED AS TEXTFILE - LOCATION '${table_path}' - TBLPROPERTIES ('skip.header.line.count'='1')" - - table_name="trino_test_external_fs_with_header_and_footer" - table_path="$base_path/$table_name/" - exec_in_hadoop_master_container hadoop fs -mkdir -p "${table_path}" - exec_in_hadoop_master_container hadoop fs -copyFromLocal -f /docker/files/test_table_with_header_and_footer.csv{,.gz,.bz2,.lz4} "${table_path}" - exec_in_hadoop_master_container /usr/bin/hive -e " - CREATE EXTERNAL TABLE $table_name(t_bigint bigint) - STORED AS TEXTFILE - LOCATION '${table_path}' - TBLPROPERTIES ('skip.header.line.count'='2', 'skip.footer.line.count'='2')" -} - -# $1 = basename of core-site.xml template -# other arguments are names of variables to substitute in the file -function deploy_core_site_xml() { - local template="${1:?deploy_core_site_xml expects at least one argument}" - shift - local args=() - local name value - for name; do - shift - value="${!name//\\/\\\\}" # escape \ as \\ - value="${value//|/\\|}" # escape | as \| - args+=(-e "s|%$name%|$value|g") - done - exec_in_hadoop_master_container bash -c \ - 'sed "${@:2}" "/docker/files/$1" > /etc/hadoop/conf/core-site.xml' \ - bash "$template" "${args[@]}" -} - -# Checks if Gitflow Incremental Builder (GIB) is enabled and the trino-hive-hadoop2 module should be build and/or tested -function abort_if_not_gib_impacted() { - local module=plugin/trino-hive-hadoop2 - local impacted_log=gib-impacted.log - if [ -f "$impacted_log" ] && ! grep -q "^${module}$" "$impacted_log"; then - echo >&2 "Module $module not present in $impacted_log, exiting" - exit 0 - fi - return 0 -} diff --git a/plugin/trino-hive-hadoop2/bin/run_hive_abfs_access_key_tests.sh b/plugin/trino-hive-hadoop2/bin/run_hive_abfs_access_key_tests.sh deleted file mode 100755 index 60c0fd1ec5dc0..0000000000000 --- a/plugin/trino-hive-hadoop2/bin/run_hive_abfs_access_key_tests.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env bash - -set -euo pipefail -x - -. "${BASH_SOURCE%/*}/common.sh" - -abort_if_not_gib_impacted - -check_vars ABFS_CONTAINER ABFS_ACCOUNT ABFS_ACCESS_KEY - -cleanup_hadoop_docker_containers -start_hadoop_docker_containers - -test_directory="$(date '+%Y%m%d-%H%M%S')-$(uuidgen | sha1sum | cut -b 1-6)" - -# insert Azure credentials -deploy_core_site_xml core-site.xml.abfs-access-key-template \ - ABFS_ACCESS_KEY ABFS_ACCOUNT - -# restart hive-server2 to apply changes in core-site.xml -exec_in_hadoop_master_container supervisorctl restart hive-server2 -retry check_hadoop - -create_test_tables "abfs://${ABFS_CONTAINER}@${ABFS_ACCOUNT}.dfs.core.windows.net/${test_directory}" - -stop_unnecessary_hadoop_services - -# run product tests -pushd $PROJECT_ROOT -set +e -./mvnw ${MAVEN_TEST:--B} -pl :trino-hive-hadoop2 test -P test-hive-hadoop2-abfs-access-key \ - -DHADOOP_USER_NAME=hive \ - -Dhive.hadoop2.metastoreHost=localhost \ - -Dhive.hadoop2.metastorePort=9083 \ - -Dhive.hadoop2.databaseName=default \ - -Dhive.hadoop2.abfs.container=${ABFS_CONTAINER} \ - -Dhive.hadoop2.abfs.account=${ABFS_ACCOUNT} \ - -Dhive.hadoop2.abfs.accessKey=${ABFS_ACCESS_KEY} \ - -Dhive.hadoop2.abfs.testDirectory="${test_directory}" -EXIT_CODE=$? -set -e -popd - -cleanup_hadoop_docker_containers - -exit ${EXIT_CODE} diff --git a/plugin/trino-hive-hadoop2/bin/run_hive_abfs_oauth_tests.sh b/plugin/trino-hive-hadoop2/bin/run_hive_abfs_oauth_tests.sh deleted file mode 100755 index 87fe5e34291cb..0000000000000 --- a/plugin/trino-hive-hadoop2/bin/run_hive_abfs_oauth_tests.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env bash -set -euxo pipefail - -. "${BASH_SOURCE%/*}/common.sh" - -abort_if_not_gib_impacted - -check_vars ABFS_ACCOUNT ABFS_CONTAINER \ - ABFS_OAUTH_ENDPOINT ABFS_OAUTH_CLIENTID ABFS_OAUTH_SECRET - -test_directory="$(date '+%Y%m%d-%H%M%S')-$(uuidgen | sha1sum | cut -b 1-6)" - -cleanup_hadoop_docker_containers -start_hadoop_docker_containers - -# insert Azure credentials -deploy_core_site_xml core-site.xml.abfs-oauth-template \ - ABFS_ACCOUNT ABFS_CONTAINER \ - ABFS_OAUTH_ENDPOINT ABFS_OAUTH_CLIENTID ABFS_OAUTH_SECRET - -# restart hive-server2 to apply changes in core-site.xml -exec_in_hadoop_master_container supervisorctl restart hive-server2 -retry check_hadoop - -create_test_tables \ - "abfs://$ABFS_CONTAINER@$ABFS_ACCOUNT.dfs.core.windows.net/$test_directory" - -stop_unnecessary_hadoop_services - -pushd $PROJECT_ROOT -set +e -./mvnw ${MAVEN_TEST:--B} -pl :trino-hive-hadoop2 test -P test-hive-hadoop2-abfs-oauth \ - -DHADOOP_USER_NAME=hive \ - -Dhive.hadoop2.metastoreHost=localhost \ - -Dhive.hadoop2.metastorePort=9083 \ - -Dhive.hadoop2.databaseName=default \ - -Dtest.hive.azure.abfs.container="$ABFS_CONTAINER" \ - -Dtest.hive.azure.abfs.storage-account="$ABFS_ACCOUNT" \ - -Dtest.hive.azure.abfs.test-directory="$test_directory" \ - -Dtest.hive.azure.abfs.oauth.endpoint="$ABFS_OAUTH_ENDPOINT" \ - -Dtest.hive.azure.abfs.oauth.client-id="$ABFS_OAUTH_CLIENTID" \ - -Dtest.hive.azure.abfs.oauth.secret="$ABFS_OAUTH_SECRET" -EXIT_CODE=$? -set -e -popd - -cleanup_hadoop_docker_containers - -exit ${EXIT_CODE} diff --git a/plugin/trino-hive-hadoop2/bin/run_hive_adl_tests.sh b/plugin/trino-hive-hadoop2/bin/run_hive_adl_tests.sh deleted file mode 100755 index c064eb923fa78..0000000000000 --- a/plugin/trino-hive-hadoop2/bin/run_hive_adl_tests.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env bash - -set -euo pipefail -x - -. "${BASH_SOURCE%/*}/common.sh" - -abort_if_not_gib_impacted - -check_vars ADL_NAME ADL_CLIENT_ID ADL_CREDENTIAL ADL_REFRESH_URL - -cleanup_hadoop_docker_containers -start_hadoop_docker_containers - -test_directory="$(date '+%Y%m%d-%H%M%S')-$(uuidgen | sha1sum | cut -b 1-6)" - -# insert Azure credentials -deploy_core_site_xml core-site.xml.adl-template \ - ADL_CLIENT_ID ADL_CREDENTIAL ADL_REFRESH_URL - -# restart hive-server2 to apply changes in core-site.xml -exec_in_hadoop_master_container supervisorctl restart hive-server2 -retry check_hadoop - -create_test_tables "adl://${ADL_NAME}.azuredatalakestore.net/${test_directory}" - -stop_unnecessary_hadoop_services - -# run product tests -pushd $PROJECT_ROOT -set +e -./mvnw ${MAVEN_TEST:--B} -pl :trino-hive-hadoop2 test -P test-hive-hadoop2-adl \ - -DHADOOP_USER_NAME=hive \ - -Dhive.hadoop2.metastoreHost=localhost \ - -Dhive.hadoop2.metastorePort=9083 \ - -Dhive.hadoop2.databaseName=default \ - -Dhive.hadoop2.adl.name=${ADL_NAME} \ - -Dhive.hadoop2.adl.clientId=${ADL_CLIENT_ID} \ - -Dhive.hadoop2.adl.credential=${ADL_CREDENTIAL} \ - -Dhive.hadoop2.adl.refreshUrl=${ADL_REFRESH_URL} \ - -Dhive.hadoop2.adl.testDirectory=${test_directory} -EXIT_CODE=$? -set -e -popd - -cleanup_hadoop_docker_containers - -exit ${EXIT_CODE} diff --git a/plugin/trino-hive-hadoop2/bin/run_hive_s3_tests.sh b/plugin/trino-hive-hadoop2/bin/run_hive_s3_tests.sh deleted file mode 100755 index 82fa2b7fd6e6c..0000000000000 --- a/plugin/trino-hive-hadoop2/bin/run_hive_s3_tests.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env bash - -set -euo pipefail -x - -. "${BASH_SOURCE%/*}/common.sh" - -abort_if_not_gib_impacted - -check_vars S3_BUCKET S3_BUCKET_ENDPOINT AWS_REGION \ - AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY - -cleanup_hadoop_docker_containers -start_hadoop_docker_containers - -test_directory="$(date '+%Y%m%d-%H%M%S')-$(uuidgen | sha1sum | cut -b 1-6)" - -# insert AWS credentials -deploy_core_site_xml core-site.xml.s3-template \ - AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY S3_BUCKET_ENDPOINT - -# create test tables -# can't use create_test_tables because the first table is created with different commands -table_path="s3a://${S3_BUCKET}/${test_directory}/trino_test_external_fs/" -exec_in_hadoop_master_container hadoop fs -mkdir -p "${table_path}" -exec_in_hadoop_master_container /docker/files/hadoop-put.sh /docker/files/test_table.csv{,.gz,.bz2,.lz4} "${table_path}" -exec_in_hadoop_master_container sudo -Eu hive beeline -u jdbc:hive2://localhost:10000/default -n hive -e " - CREATE EXTERNAL TABLE trino_test_external_fs(t_bigint bigint) - STORED AS TEXTFILE - LOCATION '${table_path}'" - -table_path="s3a://${S3_BUCKET}/${test_directory}/trino_test_external_fs_with_header/" -exec_in_hadoop_master_container hadoop fs -mkdir -p "${table_path}" -exec_in_hadoop_master_container hadoop fs -put -f /docker/files/test_table_with_header.csv{,.gz,.bz2,.lz4} "${table_path}" -exec_in_hadoop_master_container /usr/bin/hive -e " - CREATE EXTERNAL TABLE trino_test_external_fs_with_header(t_bigint bigint) - STORED AS TEXTFILE - LOCATION '${table_path}' - TBLPROPERTIES ('skip.header.line.count'='1')" - -table_path="s3a://${S3_BUCKET}/${test_directory}/trino_test_external_fs_with_header_and_footer/" -exec_in_hadoop_master_container hadoop fs -mkdir -p "${table_path}" -exec_in_hadoop_master_container hadoop fs -put -f /docker/files/test_table_with_header_and_footer.csv{,.gz,.bz2,.lz4} "${table_path}" -exec_in_hadoop_master_container /usr/bin/hive -e " - CREATE EXTERNAL TABLE trino_test_external_fs_with_header_and_footer(t_bigint bigint) - STORED AS TEXTFILE - LOCATION '${table_path}' - TBLPROPERTIES ('skip.header.line.count'='2', 'skip.footer.line.count'='2')" - -stop_unnecessary_hadoop_services - -# restart hive-metastore to apply S3 changes in core-site.xml -docker exec "$(hadoop_master_container)" supervisorctl restart hive-metastore -retry check_hadoop - -# run product tests -pushd "${PROJECT_ROOT}" -set +e -./mvnw ${MAVEN_TEST:--B} -pl :trino-hive-hadoop2 test -P test-hive-hadoop2-s3 \ - -DHADOOP_USER_NAME=hive \ - -Dhive.hadoop2.metastoreHost=localhost \ - -Dhive.hadoop2.metastorePort=9083 \ - -Dhive.hadoop2.databaseName=default \ - -Dhive.hadoop2.s3.endpoint="${S3_BUCKET_ENDPOINT}" \ - -Dhive.hadoop2.s3.region="${AWS_REGION}" \ - -Dhive.hadoop2.s3.awsAccessKey="${AWS_ACCESS_KEY_ID}" \ - -Dhive.hadoop2.s3.awsSecretKey="${AWS_SECRET_ACCESS_KEY}" \ - -Dhive.hadoop2.s3.writableBucket="${S3_BUCKET}" \ - -Dhive.hadoop2.s3.testDirectory="${test_directory}" -EXIT_CODE=$? -set -e -popd - -cleanup_hadoop_docker_containers - -exit "${EXIT_CODE}" diff --git a/plugin/trino-hive-hadoop2/bin/run_hive_tests.sh b/plugin/trino-hive-hadoop2/bin/run_hive_tests.sh deleted file mode 100755 index e7a82e9cf5004..0000000000000 --- a/plugin/trino-hive-hadoop2/bin/run_hive_tests.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env bash - -set -euo pipefail -x - -. "${BASH_SOURCE%/*}/common.sh" - -abort_if_not_gib_impacted - -cleanup_hadoop_docker_containers -start_hadoop_docker_containers - -# generate test data -exec_in_hadoop_master_container sudo -Eu hive beeline -u jdbc:hive2://localhost:10000/default -n hive -f /docker/sql/create-test.sql - -stop_unnecessary_hadoop_services - -HADOOP_MASTER_IP=$(hadoop_master_ip) - -# run product tests -pushd "${PROJECT_ROOT}" -set +e -./mvnw ${MAVEN_TEST:--B} -pl :trino-hive-hadoop2 test -P test-hive-hadoop2 \ - -DHADOOP_USER_NAME=hive \ - -Dtest.metastore=localhost:9083 \ - -Dtest.database=default \ - -Dhive.metastore.thrift.client.socks-proxy="${PROXY}:1180" \ - -Dhive.hdfs.socks-proxy="${PROXY}:1180" \ - -Dhadoop-master-ip="${HADOOP_MASTER_IP}" -EXIT_CODE=$? -set -e -popd - -cleanup_hadoop_docker_containers - -exit "${EXIT_CODE}" diff --git a/plugin/trino-hive-hadoop2/bin/run_hive_wasb_tests.sh b/plugin/trino-hive-hadoop2/bin/run_hive_wasb_tests.sh deleted file mode 100755 index 40c19ddf8c472..0000000000000 --- a/plugin/trino-hive-hadoop2/bin/run_hive_wasb_tests.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env bash - -set -euo pipefail -x - -. "${BASH_SOURCE%/*}/common.sh" - -abort_if_not_gib_impacted - -check_vars WASB_CONTAINER WASB_ACCOUNT WASB_ACCESS_KEY - -cleanup_hadoop_docker_containers -start_hadoop_docker_containers - -test_directory="$(date '+%Y%m%d-%H%M%S')-$(uuidgen | sha1sum | cut -b 1-6)" - -# insert Azure credentials -deploy_core_site_xml core-site.xml.wasb-template \ - WASB_ACCESS_KEY WASB_ACCOUNT - -# restart hive-server2 to apply changes in core-site.xml -exec_in_hadoop_master_container supervisorctl restart hive-server2 -retry check_hadoop - -create_test_tables "wasb://${WASB_CONTAINER}@${WASB_ACCOUNT}.blob.core.windows.net/${test_directory}" - -stop_unnecessary_hadoop_services - -# run product tests -pushd $PROJECT_ROOT -set +e -./mvnw ${MAVEN_TEST:--B} -pl :trino-hive-hadoop2 test -P test-hive-hadoop2-wasb \ - -DHADOOP_USER_NAME=hive \ - -Dhive.hadoop2.metastoreHost=localhost \ - -Dhive.hadoop2.metastorePort=9083 \ - -Dhive.hadoop2.databaseName=default \ - -Dhive.hadoop2.wasb.container=${WASB_CONTAINER} \ - -Dhive.hadoop2.wasb.account=${WASB_ACCOUNT} \ - -Dhive.hadoop2.wasb.accessKey=${WASB_ACCESS_KEY} \ - -Dhive.hadoop2.wasb.testDirectory=${test_directory} -EXIT_CODE=$? -set -e -popd - -cleanup_hadoop_docker_containers - -exit ${EXIT_CODE} diff --git a/plugin/trino-hive-hadoop2/bin/start_hive.sh b/plugin/trino-hive-hadoop2/bin/start_hive.sh deleted file mode 100755 index 82179fd6eb697..0000000000000 --- a/plugin/trino-hive-hadoop2/bin/start_hive.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env bash - -set -euo pipefail - -. "${BASH_SOURCE%/*}/common.sh" - -cleanup_hadoop_docker_containers -start_hadoop_docker_containers - -HADOOP_MASTER_IP=$(hadoop_master_ip) - -# get short version of container ID (as shown by "docker ps") -CONTAINER=$(echo "${HADOOP_MASTER_CONTAINER}" | cut -b1-12) - -echo -echo "Proxy: ${PROXY}:1180" -echo "Hadoop: ${HADOOP_MASTER_IP}" -echo "Docker: ${CONTAINER}" -echo -echo "docker exec -it ${CONTAINER} bash" -echo diff --git a/plugin/trino-hive-hadoop2/conf/docker-compose.yml b/plugin/trino-hive-hadoop2/conf/docker-compose.yml deleted file mode 100644 index fe9414fe33ed4..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/docker-compose.yml +++ /dev/null @@ -1,22 +0,0 @@ -version: '2' -services: - hadoop-master: - hostname: hadoop-master - image: '${HADOOP_BASE_IMAGE}:${DOCKER_IMAGES_VERSION}' - ports: - - '1180:1180' - - '8020:8020' # Default hadoop namenode port - - '8042:8042' - - '8088:8088' - - '9000:9000' # Default hadoop namenode port - - '9083:9083' # Metastore Thrift - - '9864:9864' # DataNode Web UI since Hadoop 3 - - '9870:9870' # NameNode Web UI since Hadoop 3 - - '10000:10000' # HiveServer2 - - '19888:19888' - - '50070:50070' # NameNode Web UI prior to Hadoop 3 - - '50075:50075' # DataNode Web UI prior to Hadoop 3 - volumes: - - ../../trino-hive/src/test/sql:/docker/sql:ro - - ./files:/docker/files:ro - - ./files/tez-site.xml:/etc/tez/conf/tez-site.xml:ro diff --git a/plugin/trino-hive-hadoop2/conf/files/core-site.xml.abfs-access-key-template b/plugin/trino-hive-hadoop2/conf/files/core-site.xml.abfs-access-key-template deleted file mode 100644 index 7f807e26e0c1f..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/core-site.xml.abfs-access-key-template +++ /dev/null @@ -1,40 +0,0 @@ - - - - - - - hadoop.proxyuser.hive.hosts - * - - - - hadoop.proxyuser.hive.groups - * - - - - fs.defaultFS - hdfs://hadoop-master:9000 - - - - fs.azure.account.key.%ABFS_ACCOUNT%.dfs.core.windows.net - %ABFS_ACCESS_KEY% - - diff --git a/plugin/trino-hive-hadoop2/conf/files/core-site.xml.abfs-oauth-template b/plugin/trino-hive-hadoop2/conf/files/core-site.xml.abfs-oauth-template deleted file mode 100644 index 48869661ea995..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/core-site.xml.abfs-oauth-template +++ /dev/null @@ -1,60 +0,0 @@ - - - - - - hadoop.proxyuser.hive.hosts - * - - - - hadoop.proxyuser.hive.groups - * - - - - fs.defaultFS - hdfs://hadoop-master:9000 - - - - - fs.azure.account.auth.type - OAuth - - - - fs.azure.account.oauth.provider.type - org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider - - - - fs.azure.account.oauth2.client.id - %ABFS_OAUTH_CLIENTID% - - - - fs.azure.account.oauth2.client.secret - %ABFS_OAUTH_SECRET% - - - - fs.azure.account.oauth2.client.endpoint - %ABFS_OAUTH_ENDPOINT% - - diff --git a/plugin/trino-hive-hadoop2/conf/files/core-site.xml.adl-template b/plugin/trino-hive-hadoop2/conf/files/core-site.xml.adl-template deleted file mode 100644 index bb0fa9f47e3cb..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/core-site.xml.adl-template +++ /dev/null @@ -1,54 +0,0 @@ - - - - - - hadoop.proxyuser.hive.hosts - * - - - - hadoop.proxyuser.hive.groups - * - - - - fs.defaultFS - hdfs://hadoop-master:9000 - - - - fs.adl.oauth2.access.token.provider.type - ClientCredential - - - - fs.adl.oauth2.client.id - %ADL_CLIENT_ID% - - - - fs.adl.oauth2.credential - %ADL_CREDENTIAL% - - - - fs.adl.oauth2.refresh.url - %ADL_REFRESH_URL% - - diff --git a/plugin/trino-hive-hadoop2/conf/files/core-site.xml.s3-template b/plugin/trino-hive-hadoop2/conf/files/core-site.xml.s3-template deleted file mode 100644 index 984026e39e380..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/core-site.xml.s3-template +++ /dev/null @@ -1,61 +0,0 @@ - - - - - - - hadoop.proxyuser.hive.hosts - * - - - - hadoop.proxyuser.hive.groups - * - - - - fs.defaultFS - hdfs://hadoop-master:9000 - - - - fs.s3.awsAccessKeyId - %AWS_ACCESS_KEY_ID% - - - - fs.s3.awsSecretAccessKey - %AWS_SECRET_ACCESS_KEY% - - - - fs.s3a.access.key - %AWS_ACCESS_KEY_ID% - - - - fs.s3a.secret.key - %AWS_SECRET_ACCESS_KEY% - - - - fs.s3a.endpoint - %S3_BUCKET_ENDPOINT% - - - diff --git a/plugin/trino-hive-hadoop2/conf/files/core-site.xml.wasb-template b/plugin/trino-hive-hadoop2/conf/files/core-site.xml.wasb-template deleted file mode 100644 index 74f28465c0a62..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/core-site.xml.wasb-template +++ /dev/null @@ -1,40 +0,0 @@ - - - - - - - hadoop.proxyuser.hive.hosts - * - - - - hadoop.proxyuser.hive.groups - * - - - - fs.defaultFS - hdfs://hadoop-master:9000 - - - - fs.azure.account.key.%WASB_ACCOUNT%.blob.core.windows.net - %WASB_ACCESS_KEY% - - diff --git a/plugin/trino-hive-hadoop2/conf/files/hadoop-put.sh b/plugin/trino-hive-hadoop2/conf/files/hadoop-put.sh deleted file mode 100755 index 33a7431af06e4..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/hadoop-put.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -set -xeuo pipefail - -# Hadoop 3 without -d (don't create _COPYING_ temporary file) requires additional S3 permissions -# Hadoop 2 doesn't have '-d' switch -hadoop fs -put -f -d "$@" || -hadoop fs -put -f "$@" diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table.csv b/plugin/trino-hive-hadoop2/conf/files/test_table.csv deleted file mode 100644 index 0628eaa1bdc28..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/test_table.csv +++ /dev/null @@ -1,3 +0,0 @@ -3 -14 -15 diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table.csv.bz2 b/plugin/trino-hive-hadoop2/conf/files/test_table.csv.bz2 deleted file mode 100644 index d9d6b339f9049..0000000000000 Binary files a/plugin/trino-hive-hadoop2/conf/files/test_table.csv.bz2 and /dev/null differ diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table.csv.gz b/plugin/trino-hive-hadoop2/conf/files/test_table.csv.gz deleted file mode 100644 index de775ab750427..0000000000000 Binary files a/plugin/trino-hive-hadoop2/conf/files/test_table.csv.gz and /dev/null differ diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table.csv.lz4 b/plugin/trino-hive-hadoop2/conf/files/test_table.csv.lz4 deleted file mode 100644 index 86cfed21b841c..0000000000000 Binary files a/plugin/trino-hive-hadoop2/conf/files/test_table.csv.lz4 and /dev/null differ diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table.json b/plugin/trino-hive-hadoop2/conf/files/test_table.json deleted file mode 100644 index 6173ff8aae879..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/test_table.json +++ /dev/null @@ -1,2 +0,0 @@ -{"col_1":2, "col_2":4} -{"col_1":5, "col_2":6} diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table.json.bz2 b/plugin/trino-hive-hadoop2/conf/files/test_table.json.bz2 deleted file mode 100644 index 6b90f2081e35b..0000000000000 Binary files a/plugin/trino-hive-hadoop2/conf/files/test_table.json.bz2 and /dev/null differ diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table.json.gz b/plugin/trino-hive-hadoop2/conf/files/test_table.json.gz deleted file mode 100644 index ae46357976038..0000000000000 Binary files a/plugin/trino-hive-hadoop2/conf/files/test_table.json.gz and /dev/null differ diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_csv_scan_range_select_pushdown_1.csv b/plugin/trino-hive-hadoop2/conf/files/test_table_csv_scan_range_select_pushdown_1.csv deleted file mode 100644 index c9b5ed332bf5f..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/test_table_csv_scan_range_select_pushdown_1.csv +++ /dev/null @@ -1,100 +0,0 @@ -1|AAAAAAAABAAAAAAA|980124|7135|282946|2452238|2452208|Mr.|Javier|Lewis|Y|9|12|1936|CHILE||Javier.Lewis@VFAxlnZEvOx.org|2452508| -2|AAAAAAAACAAAAAAA|819667|1461|681655|2452318|2452288|Dr.|Amy|Moses|Y|9|4|1966|TOGO||Amy.Moses@Ovk9KjHH.com|2452318| -3|AAAAAAAADAAAAAAA|1473522|6247|748572|2449130|2449100|Miss|Latisha|Hamilton|N|18|9|1979|NIUE||Latisha.Hamilton@V.com|2452313| -4|AAAAAAAAEAAAAAAA|1703214|3986|689558|2450030|2450000|Dr.|Michael|White|N|7|6|1983|MEXICO||Michael.White@i.org|2452361| -5|AAAAAAAAFAAAAAAA|953372|4470|336368|2449438|2449408|Sir|Robert|Moran|N|8|5|1956|FIJI||Robert.Moran@Hh.edu|2452469| -6|AAAAAAAAGAAAAAAA|213219|6374|327082|2451883|2451853|Ms.|Brunilda|Sharp|N|4|12|1925|SURINAME||Brunilda.Sharp@T3pylZEUQjm.org|2452430| -7|AAAAAAAAHAAAAAAA|68377|3219|944814|2451438|2451408|Ms.|Fonda|Wiles|Y|24|4|1985|GAMBIA||Fonda.Wiles@S9KnyEtz9hv.org|2452360| -8|AAAAAAAAIAAAAAAA|1215897|2471|316598|2449406|2449376|Sir|Ollie|Shipman|N|26|12|1938|KOREA, REPUBLIC OF||Ollie.Shipman@be.org|2452334| -9|AAAAAAAAJAAAAAAA|1168667|1404|599388|2452275|2452245|Sir|Karl|Gilbert|N|26|10|1966|MONTSERRAT||Karl.Gilbert@Crg5KyP2IxX9C4d6.edu|2452454| -10|AAAAAAAAKAAAAAAA|1207553|5143|369580|2451353|2451323|Ms.|Albert|Brunson|N|15|10|1973|JORDAN||Albert.Brunson@62.com|2452641| -11|AAAAAAAALAAAAAAA|1114415|6807|947999|2452288|2452258|Ms.|Betty|Williams|N|18|12|1963|BURKINA FASO||Betty.Williams@xRtDqM1eLBVQNoYAJ.com|2452398| -12|AAAAAAAAMAAAAAAA|502141|6577|547366|2451039|2451009|Ms.|Margaret|Farias|N|2|6|1956|TURKMENISTAN||Margaret.Farias@cb.edu|2452634| -13|AAAAAAAANAAAAAAA|1128748|2777|664006|2449658|2449628|Mrs.|Rosalinda|Grimes|N|1|3|1970|UKRAINE||Rosalinda.Grimes@tC8pcU7Lt.edu|2452616| -14|AAAAAAAAOAAAAAAA|929344|892|656440|2450318|2450288|Mr.|Jack|Wilcox|N|30|3|1937|SLOVENIA||Jack.Wilcox@Y3Etqyv3.org|2452641| -15|AAAAAAAAPAAAAAAA||134|480469||2449010|Ms.|Tonya||||12|1969||||2452376| -16|AAAAAAAAABAAAAAA|1196373|3014|829302|2451346|2451316|Dr.|Margie|Browning|N|24|12|1933|PHILIPPINES||Margie.Browning@LM674NrE2.org|2452573| -17|AAAAAAAABBAAAAAA|707524|3876|552228|2451068|2451038|Dr.|Lee|Stovall|N|23|12|1972|PHILIPPINES||Lee.Stovall@fqKC83UU0f.org|2452454| -18|AAAAAAAACBAAAAAA|1361151|6580|168456|2450041|2450011|Sir|Brad|Lynch|Y|1|9|1950|URUGUAY||Brad.Lynch@nAbai.edu|2452549| -19|AAAAAAAADBAAAAAA|1161742|4238|945581|2449580|2449550|Dr.|Andre|Moore|N|20|10|1978|NICARAGUA||Andre.Moore@cTZLGYi1ZJi.org|2452576| -20|AAAAAAAAEBAAAAAA|1185612|89|688966|2450965|2450935|Mr.|Stanton|Dallas|Y|17|5|1976|SWITZERLAND||Stanton.Dallas@DBXgl18FGo.edu|2452334| -21|AAAAAAAAFBAAAAAA|442697|6538|575979|2451827|2451797|Miss|Naomi|Barnett|N|18|5|1956|BAHAMAS||Naomi.Barnett@2T3V3OZOy4KBNAHsT.edu|2452433| -22|AAAAAAAAGBAAAAAA|490494|4504|740467|2451465|2451435|Sir|Victor|Martinez|N|21|12|1991|CAYMAN ISLANDS||Victor.Martinez@fC.edu|2452529| -23|AAAAAAAAHBAAAAAA||2107|686|2451012|2450982|Sir||Garrison|N||5||||Earl.Garrison@G3sM4P.com|| -24|AAAAAAAAIBAAAAAA|17113|1102|564291|2450098|2450068|Dr.|Paul|Morris|Y|6|6|1951|GUINEA-BISSAU||Paul.Morris@FMGalegqc3.com|2452499| -25|AAAAAAAAJBAAAAAA|476176|3278|425933|2449246|2449216|Miss|Nancy|Mccormick|N|6|10|1973|BAHAMAS||Nancy.Mccormick@DA26I9ZArLF9rxJ6Z.edu|2452425| -26|AAAAAAAAKBAAAAAA|230278|1242|128206|2449816|2449786|Ms.|Monique|Baker|N|14|6|1991|MOROCCO||Monique.Baker@9uEucNczY.org|2452356| -27|AAAAAAAALBAAAAAA|111621|835|516096|2450611|2450581|Dr.|Shawn|Prather|N|7|3|1972|PUERTO RICO||Shawn.Prather@8BusRYegn6.org|2452353| -28|AAAAAAAAMBAAAAAA|889961|897|183480|2452615|2452585|Miss|Edith|Hernandez|Y|17|8|1959|CÔTE D'IVOIRE||Edith.Hernandez@BNHL0k.com|2452631| -29|AAAAAAAANBAAAAAA|1634269|1204|744139|2452216|2452186|Ms.|Margaret|Collins|Y|25|8|1973|NETHERLANDS ANTILLES||Margaret.Collins@9obPr3UV.org|2452496| -30|AAAAAAAAOBAAAAAA|84232|5869|245459|2449916|2449886|Dr.|Pamela|Luna|Y|4|7|1956|GUAM||Pamela.Luna@QBGuhL36lnA.edu|2452410| -31|AAAAAAAAPBAAAAAA|1275120|5724|457128|2452054|2452024|Dr.|William|Craig|N|13|6|1964|SPAIN||William.Craig@prVDE1E8AHc.org|2452336| -32|AAAAAAAAACAAAAAA|1369589|6163|98290|2449471|2449441|Dr.|Kenneth|Wood|N|2|8|1927|MOLDOVA, REPUBLIC OF||Kenneth.Wood@RIA.edu|2452510| -33|AAAAAAAABCAAAAAA|1680761|5823|82438|2451572|2451542|Mrs.|Donna|Betts|Y|25|3|1958|FRANCE||Donna.Betts@YJ14k.edu|2452455| -34|AAAAAAAACCAAAAAA|||287501|2452192|2452162|||Woods|||||||Brandon.Woods@hjKbf.edu|2452408| -35|AAAAAAAADCAAAAAA|339036|2057|482231|2451937|2451907|Dr.|Marie|Peterson|N|19|6|1953|KIRIBATI||Marie.Peterson@1zg9tydFHafA5.com|2452581| -36|AAAAAAAAECAAAAAA|655414|2223|376585|2449222|2449192|Dr.|Anthony|Fisher|N|29|11|1968|KYRGYZSTAN||Anthony.Fisher@jJrZfeDcz8P.com|2452470| -37|AAAAAAAAFCAAAAAA|827972|2281|412943|2449848|2449818|Dr.|Dwight|Schneider|Y|9|1|1949|RUSSIAN FEDERATION||Dwight.Schneider@koxO7zAysvOd.com|2452293| -38|AAAAAAAAGCAAAAAA|766645|3879|686360|2450323|2450293|Mrs.|Ha|Carpenter|Y|30|3|1939|PARAGUAY||Ha.Carpenter@XgcUt4svNz.com|2452351| -39|AAAAAAAAHCAAAAAA|1617182|4013|422844|2452131|2452101|Sir|Neil|Cox|N|14|12|1951|ECUADOR||Neil.Cox@FRuR2bFK.com|2452586| -40|AAAAAAAAICAAAAAA|1795301||71045|2451159|||Jacqueline|Hatfield||26||1942||||| -41|AAAAAAAAJCAAAAAA|941420|5068|118316|2449989|2449959|Miss|Maxine|Carlson|N|26|6|1939|ISLE OF MAN||Maxine.Carlson@StyP5lAokmQ29QHYMLa.edu|2452464| -42|AAAAAAAAKCAAAAAA|1867377|3327|111277|2449869|2449839|Dr.|Deborah|Burton|N|17|1|1950|COMOROS||Deborah.Burton@xt.edu|2452498| -43|AAAAAAAALCAAAAAA|385562|3676|643743|2451705|2451675|Mr.|William|Warner|Y|5|12|1965|GUAM||William.Warner@zegnrzurU.org|2452537| -44|AAAAAAAAMCAAAAAA|497758|5427|32296|2449750|2449720|Miss|Mabel|Richmond|Y|7|12|1932|CAMBODIA||Mabel.Richmond@Tkla.edu|2452333| -45|AAAAAAAANCAAAAAA|1036174|2399|469777|2451118|2451088|||Hammonds||5|||NIGER|||| -46|AAAAAAAAOCAAAAAA|843672|4729|971386|2451857|2451827|Mrs.|Jane|Stephenson|Y|13|4|1990|TURKEY||Jane.Stephenson@lq8ZQLAUMZhR.edu|2452301| -47|AAAAAAAAPCAAAAAA|1634314|5368|638469|2449165|2449135|Mr.|Luis|Young|N|3|12|1982|BRUNEI DARUSSALAM||Luis.Young@0DmV.edu|2452586| -48|AAAAAAAAADAAAAAA|8817|311|788252|2449531|2449501|Ms.|Jill|Jackson|N|15|9|1961|LUXEMBOURG||Jill.Jackson@n6I7SF.org|2452575| -49|AAAAAAAABDAAAAAA|936800|1514|970581|2449364|2449334|Mr.|Steven|Venable|N|13|6|1961|NETHERLANDS ANTILLES||Steven.Venable@0hA90vhfK7k9F4h.com|2452350| -50|AAAAAAAACDAAAAAA|759177|72|382567|2449384|2449354|Mr.|Christopher|Eller|N|24|11|1965|AUSTRALIA||Christopher.Eller@gV5Ua7HOmt.com|2452520| -51|AAAAAAAADDAAAAAA|532799|6920|260688|2452390|2452360|Dr.|David|Nieves|N|5|10|1926|CHRISTMAS ISLAND||David.Nieves@LcDkQ.edu|2452570| -52|AAAAAAAAEDAAAAAA|534808|4768|677128|2451421|2451391|Mrs.|Wendy|Colley|N|1|4|1982|FRENCH GUIANA||Wendy.Colley@qLBjqbAQQGj.edu|2452553| -53|AAAAAAAAFDAAAAAA|1597348|7017|443439|2451820|2451790|Mr.|Paul|Higgins|Y|23|9|1950|GABON||Paul.Higgins@qG9NrSTLz9HaNHX.edu|2452447| -54|AAAAAAAAGDAAAAAA|380102|2381|840660|2449980|2449950|Ms.|Donna|King|Y|3|2|1940|TUNISIA||Donna.King@TEftU.com|2452446| -55|AAAAAAAAHDAAAAAA|783093|1151|773256|2451539|2451509|Dr.|Beryl|Thomason|N|12|8|1933|UNITED KINGDOM||Beryl.Thomason@OeqefhtCmZTAj.com|2452380| -56|AAAAAAAAIDAAAAAA|344460|3092|202524|2451424|2451394|Miss|Pamela|Delgado|N|29|1|1960|GUERNSEY||Pamela.Delgado@8OpV0Ldj8vq2K9ZK.org|2452334| -57|AAAAAAAAJDAAAAAA|752932|128|100571|2449741|2449711|Mr.|Travis|Melendez|Y|10|3|1961|AFGHANISTAN||Travis.Melendez@344rCMk.edu|2452606| -58|AAAAAAAAKDAAAAAA|528756|6879|814155|2451386|2451356|Mr.|Cecil|Peterman|Y|29|2|1988|ICELAND||Cecil.Peterman@tbeqEuUvS4ZM4Px9N.com|2452447| -59|AAAAAAAALDAAAAAA|1895444|4787|445683|2451157|2451127|Miss|Melanie|Morrison|N|6|1|1947|HUNGARY||Melanie.Morrison@F2foqn.edu|2452530| -60|AAAAAAAAMDAAAAAA|1179671|3200|694282|2449714|2449684|Ms.|Dollie|Thao|N|15|7|1990|SWITZERLAND||Dollie.Thao@Xead5vagsekdHDLUkv.edu|2452407| -61|AAAAAAAANDAAAAAA|1620078|6683|11688|2451136|2451106|Sir|Joseph|White|Y|6|5|1978|SENEGAL||Joseph.White@c0EJ7pimuu.com|2452462| -62|AAAAAAAAODAAAAAA|821787|4700|228413|2451854|2451824|Mr.|Matthew|Brown|Y|1|6|1973|KOREA, REPUBLIC OF||Matthew.Brown@F.edu|2452319| -63|AAAAAAAAPDAAAAAA|1790374|2445|109716|2450179|2450149|Dr.|Frank|Morton|N|26|4|1943|EGYPT||Frank.Morton@Hd7jNaA3s.com|2452425| -64|AAAAAAAAAEAAAAAA|1260191|6284|675930|2452379|2452349|Dr.|Michael|Frye|N|27|6|1941|CAPE VERDE||Michael.Frye@aM1HsbOs0smgpLo.org|2452426| -65|AAAAAAAABEAAAAAA|1778884|2234|37584|2451898|2451868|Dr.|Rodney|Taft|N|18|9|1991|VIRGIN ISLANDS, U.S.||Rodney.Taft@qe.com|2452353| -66|AAAAAAAACEAAAAAA|747190|6036|92882|2449294|2449264|Ms.|Julie|Chester|N|8|9|1950|HONG KONG||Julie.Chester@Kv.com|2452375| -67|AAAAAAAADEAAAAAA|1118294|2032|724970|2452033|2452003|Dr.|Gerald|Thomas|N|27|10|1948|NORWAY||Gerald.Thomas@zSuIGSgb6iyu.org|2452414| -68|AAAAAAAAEEAAAAAA|779965|6069|266126|2451936|2451906|Dr.|Arthur|Troy|Y|20|8|1928|LUXEMBOURG||Arthur.Troy@3VY5bV30AifrO.com|2452542| -69|AAAAAAAAFEAAAAAA|971368|4167|159366|2450154|2450124|Sir|Steven|Mcclellan|Y|15|1|1950|NEPAL||Steven.Mcclellan@UviyOLnu2m1POo.edu|2452340| -70|AAAAAAAAGEAAAAAA|1460929|5492|648102|2451944|2451914|Sir|James|Smith|N|7|12|1963|KUWAIT||James.Smith@ifJngGlNG.edu|2452342| -71|AAAAAAAAHEAAAAAA|1292064|2257|392450|2450248|2450218|Mr.|Roderick|Rogers|N|4|11|1967|OMAN||Roderick.Rogers@pJdioQ.com|2452462| -72|AAAAAAAAIEAAAAAA|1608738|6364|326390|2451700|2451670|Dr.|Bradley|Barry|Y|21|3|1937|TAJIKISTAN||Bradley.Barry@Kq2ONpEXU9YSno31.edu|2452601| -73|AAAAAAAAJEAAAAAA|425740|431|948369|2449133|2449103|Mr.|David|Vasquez|N|17|4|1956|ISRAEL||David.Vasquez@j.org|2452489| -74|AAAAAAAAKEAAAAAA|1434225|347|515269|2452641|2452611|Sir|Eric|Woods|Y|19|2|1953|SAINT LUCIA||Eric.Woods@CfPzy1AUqxd2.com|2452584| -75|AAAAAAAALEAAAAAA|1888603|143|837730|2451755|2451725|Dr.|Annie|Grant|Y|25|5|1925|TUVALU||Annie.Grant@tccug5KC1oT2nL.com|2452401| -76|AAAAAAAAMEAAAAAA|526064|2054|209691|2451760|2451730|Mr.|Craig|Lowry|Y|7|3|1946|WALLIS AND FUTUNA||Craig.Lowry@92zokgx8duX.org|2452430| -77|AAAAAAAANEAAAAAA|915180|2167|437154|2451632|2451602|Dr.|Darrin|Smith|N|30|5|1931|UNITED STATES||Darrin.Smith@Mti.edu|2452355| -78|AAAAAAAAOEAAAAAA|1092537|3677|407264|2449388|2449358|Ms.|Wanda|Davis|N|11|5|1940|NETHERLANDS ANTILLES||Wanda.Davis@I6s7DD86i6.edu|2452339| -79|AAAAAAAAPEAAAAAA|389494|3493|759539|2450863|2450833|Miss|Sandi|Tran|Y|10|6|1937|FRENCH GUIANA||Sandi.Tran@myikqStif1Q.edu|2452644| -80|AAAAAAAAAFAAAAAA|1499808|3891|294727|2450558|2450528|Mrs.|Eleanor|Evans|N|21|12|1948|CÔTE D'IVOIRE||Eleanor.Evans@zxvr5rl.org|2452413| -81|AAAAAAAABFAAAAAA|728917|388|848306|2452562|2452532|Ms.|Jessica|Levesque|Y|17|7|1940|ALBANIA||Jessica.Levesque@06mGqI9mHG.org|2452414| -82|AAAAAAAACFAAAAAA|75627|5081|530088|2450357|2450327|Sir|Max|Mueller|N|26|2|1947|IRAQ||Max.Mueller@xqCZRBSrTGD6CBvXh.com|2452303| -83|AAAAAAAADFAAAAAA|976724|5574|40824|2451032|2451002|Miss|Daisy|Flynn|N|24|1|1943|NEW ZEALAND||Daisy.Flynn@288e6Z0csxJ.com|2452338| -84|AAAAAAAAEFAAAAAA|1250744|2821|190898|2449496|2449466|Mrs.|Ami|Montgomery|Y|2|5|1964|JAMAICA||Ami.Montgomery@VBSKqhL36j55.edu|2452526| -85|AAAAAAAAFFAAAAAA|1293499|37|410575|2449868|2449838|Miss|Michele|Baldwin|Y|30|10|1978|GEORGIA||Michele.Baldwin@sIVO1J4U.org|2452491| -86|AAAAAAAAGFAAAAAA|1428237|6963|188442|2451560|2451530|Dr.|Marvin|Matlock|Y|21|10|1983|BRUNEI DARUSSALAM||Marvin.Matlock@0FXEZp.org|2452633| -87|AAAAAAAAHFAAAAAA|1452824|1427|722030|2449404|2449374|Dr.|Kevin|White|N|2|7|1934|BENIN||Kevin.White@x9oTPjEI6AdDQ7n4l.edu|2452492| -88|AAAAAAAAIFAAAAAA|495575|131|492687|2450991|2450961|Dr.|Phyllis|Horner|N|20|8|1965|UZBEKISTAN||Phyllis.Horner@uQy.edu|2452403| -89|AAAAAAAAJFAAAAAA|694848|5383|726318|2451425|2451395|Mrs.|Helen|Macdonald|Y|15|3|1981|DOMINICA||Helen.Macdonald@3d4.com|2452626| -90|AAAAAAAAKFAAAAAA|417827|5083|745139|2451494|2451464|Mr.|Nathan|Pond|Y|7|5|1985|GUYANA||Nathan.Pond@nPh7drM687MhI.org|2452637| -91|AAAAAAAALFAAAAAA|827176|2441|464906|2450406|2450376|Miss|Heather|White|Y|28|3|1962|MARTINIQUE||Heather.White@3JitjmxYQnXAtCNAl.com|2452295| -92|AAAAAAAAMFAAAAAA|953084|5771|585211|2452582|2452552|Miss|Crystal|Ryan|Y|8|2|1973|ECUADOR||Crystal.Ryan@Ju2rO6u.com|2452294| -93|AAAAAAAANFAAAAAA|647375|6229|535836|2449341|2449311|Sir|Clyde|Williams|N|6|10|1927|FRENCH POLYNESIA||Clyde.Williams@en.com|2452510| -94|AAAAAAAAOFAAAAAA|451893|1990|583287|2449553|2449523|Mr.|Craig|Byrd|Y|14|5|1982|FRENCH POLYNESIA||Craig.Byrd@Dc0OEMXkvvuJ.com|2452445| -95|AAAAAAAAPFAAAAAA|796503|1663|566023|2452468|2452438|Ms.|Elizabeth|Hollingsworth|N|17|4|1943|GREECE||Elizabeth.Hollingsworth@lVpeDS5Rcs.com|2452584| -96|AAAAAAAAAGAAAAAA|1148074|6019|335611|2451505|2451475|Sir|Shaun|Lewis|N|22|10|1955|NIGERIA||Shaun.Lewis@MTRUPYFTXf9.com|2452395| -97|AAAAAAAABGAAAAAA|418763|102|599041|2452467|2452437|Mr.|Stewart|Ruffin|Y|19|5|1971|MYANMAR||Stewart.Ruffin@R7Mrx.edu|2452528| -98|AAAAAAAACGAAAAAA|574977|1615|493853|2450894|2450864|Dr.|David|Lewis|N|23|6|1965|KIRIBATI||David.Lewis@5mhvq.org|2452558| -99|AAAAAAAADGAAAAAA|622676|2152|867228|2451687|2451657|Sir|Austin|Tran|Y|9|12|1961|NAMIBIA||Austin.Tran@ect7cnjLsucbd.edu|2452437| -100|AAAAAAAAEGAAAAAA|1254468|6370|656672|2449148|2449118|Ms.|Jeannette|Johnson|Y|13|7|1958|BANGLADESH||Jeannette.Johnson@8BvSqgp.com|2452635| diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_csv_scan_range_select_pushdown_2.csv b/plugin/trino-hive-hadoop2/conf/files/test_table_csv_scan_range_select_pushdown_2.csv deleted file mode 100644 index 133455e4f33c4..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/test_table_csv_scan_range_select_pushdown_2.csv +++ /dev/null @@ -1,100 +0,0 @@ -101|AAAAAAAAFGAAAAAA|1730669|4190|100464|2450159|2450129|Dr.|Jeffrey|Bruce|N|7|5|1951|MAURITANIA||Jeffrey.Bruce@SPZG.com|2452590| -102|AAAAAAAAGGAAAAAA|1250712|1207|104149|2450198|2450168|Ms.|Jodi|Silva|N|9|8|1937|CANADA||Jodi.Silva@lntBSGFbpEOSVs.com|2452443| -103|AAAAAAAAHGAAAAAA|1659630|5909|483035|2451586|2451556|Dr.|James|Porter|N|3|5|1947|AFGHANISTAN||James.Porter@3C1oBhj.com|2452359| -104|AAAAAAAAIGAAAAAA|1090695|3116|325490|2450355|2450325|Dr.|Benjamin|Johnson|Y|29|11|1936|BAHRAIN||Benjamin.Johnson@HL2ugJBTO.com|2452499| -105|AAAAAAAAJGAAAAAA|949850|383|846916|2452463|2452433|Dr.|Frank|Strain|Y|14|1|1945|VIRGIN ISLANDS, U.S.||Frank.Strain@MbOHByB.edu|2452378| -106|AAAAAAAAKGAAAAAA|341252|5949|268246|2451785|2451755|Miss|Anne|Massey|Y|3|6|1974|PORTUGAL||Anne.Massey@7XXGTvh.edu|2452602| -107|AAAAAAAALGAAAAAA|546775|1414|766806|2451094|2451064|Dr.|Charles|Robinson|N|11|10|1970|VANUATU||Charles.Robinson@SKBvOYuE06xlJ6.org|2452312| -108|AAAAAAAAMGAAAAAA|847273|1898|754123|2449106|2449076|Mr.|Alfred|Bryant|Y|18|4|1964|BURUNDI||Alfred.Bryant@TRiZbgq.com|2452283| -109|AAAAAAAANGAAAAAA|1487931|5544|39604|2452506|2452476|Sir|Alfred|Watson|Y|13|1|1989|PORTUGAL||Alfred.Watson@Q.org|2452644| -110|AAAAAAAAOGAAAAAA|578986|5158|667330|2449769|2449739|Dr.|David|Thornton|N|30|7|1974|EGYPT||David.Thornton@eIz6xPr53uo8.com|2452426| -111|AAAAAAAAPGAAAAAA|291167|4554|127188|2450460|2450430|Dr.|Allen|Merritt|Y|19|4|1937|SOMALIA||Allen.Merritt@3fsiZ.edu|2452481| -112|AAAAAAAAAHAAAAAA|1032747|4822|214144|2451513|2451483|Sir|Floyd|Kelly|N|6|3|1984|PORTUGAL||Floyd.Kelly@VYyRIjGro.org|2452299| -113|AAAAAAAABHAAAAAA|204333|5202|454871|2451982|2451952|Mrs.|Karen|Roman|N|14|4|1985|BOTSWANA||Karen.Roman@IxcYF9rKJKomt.com|2452507| -114|AAAAAAAACHAAAAAA|1185641|5735|507076|2449631|2449601|Dr.|Richard|Chang|Y|2|5|1988|MEXICO||Richard.Chang@VKy9d4gdkatVugH.edu|2452359| -115|AAAAAAAADHAAAAAA|1498128|6766|270604|2449264|2449234|Sir|Freddie|Rojas|N|23|10|1971|MALDIVES||Freddie.Rojas@EfcO6EJZhAFac2.com|2452377| -116|AAAAAAAAEHAAAAAA|1824586|1944|18502|2449121|2449091|Miss|Carolyn|Callaway|Y|15|8|1932|ARUBA||Carolyn.Callaway@PjDvifaFqz0EZ.com|2452353| -117|AAAAAAAAFHAAAAAA|1759097|4215|232980|2449143|2449113|Miss|Renee|Jones|Y|29|8|1967|ARUBA||Renee.Jones@CKGxUpmCMLnxLg3.edu|2452624| -118|AAAAAAAAGHAAAAAA|648018|1399|693533|2451091|2451061|Dr.|Polly|Marlow|Y|7|10|1954|BAHRAIN||Polly.Marlow@OkB7beq1dpZn5InXZ8.org|2452476| -119|AAAAAAAAHHAAAAAA|1111698|4858|922818|2449829|2449799|Mr.|Jack|Spriggs|Y|4|8|1935|SAUDI ARABIA||Jack.Spriggs@jtQVaqKJXZnPPp3.org|2452475| -120|AAAAAAAAIHAAAAAA|609294|6365|492236|2450799|2450769|Dr.|Dan|Layne|N|25|11|1966|NORFOLK ISLAND||Dan.Layne@8btahREthm2.com|2452633| -121|AAAAAAAAJHAAAAAA||3852|344960||2449480|Mrs.|||N|4||||||2452542| -122|AAAAAAAAKHAAAAAA|802359|2399|376839|2450519|2450489|Sir|Christopher|Magee|N|1|10|1927|BERMUDA||Christopher.Magee@YJGYG5Y00r.com|2452431| -123|AAAAAAAALHAAAAAA|1715811|1013|167015|2450151|2450121|Mr.|Issac|Vega|Y|26|9|1953|SENEGAL||Issac.Vega@BB3eo23hUchr.edu|2452578| -124|AAAAAAAAMHAAAAAA|1813357|5275|877993|2449506|2449476|Dr.|Katharine|Hope|Y|3|11|1972|ARGENTINA||Katharine.Hope@UAtskHDZYaf2GLKqd.edu|2452312| -125|AAAAAAAANHAAAAAA|184237|3375|574513|2451019|2450989|Mr.|Stephen|Bruton|Y|15|9|1976|NIGER||Stephen.Bruton@KyRk407i9d.com|2452606| -126|AAAAAAAAOHAAAAAA|1297001|7129|122057|2449697|2449667|Sir|Daniel|Underwood|Y|20|4|1929|CAMEROON||Daniel.Underwood@UFBLooOoyKm.org|2452596| -127|AAAAAAAAPHAAAAAA|1172846|1728|751410|2450366|2450336|Miss|Tina|Clay|Y|23|5|1988|SAINT LUCIA||Tina.Clay@9XXnkEUtJlAsiKH.com|2452326| -128|AAAAAAAAAIAAAAAA|1035624|727|30983|2451336|2451306|Dr.|Michele|Benitez|Y|21|10|1961|PARAGUAY||Michele.Benitez@TSo2.org|2452308| -129|AAAAAAAABIAAAAAA|675894|2919|268791|2449881|2449851|Dr.|Brandi|Daniels|N|14|7|1954|SAUDI ARABIA||Brandi.Daniels@45lz1nXM8yJ.org|2452425| -130|AAAAAAAACIAAAAAA|784063|1299|573382|2450940|2450910|Mr.|Julius|Locke|N|24|3|1940|CAPE VERDE||Julius.Locke@bQ.org|2452507| -131|AAAAAAAADIAAAAAA|21268|6537|708606|2452497|2452467|Mrs.|Christine|Walton|Y|6|12|1959|ISRAEL||Christine.Walton@v.com|2452514| -132|AAAAAAAAEIAAAAAA|73004|6105|665959|2452287|2452257|Miss|Delia|Holland|N|21|7|1950|MYANMAR||Delia.Holland@4p5.org|2452607| -133|AAAAAAAAFIAAAAAA|1746749|571|42040|2449794|2449764|Dr.|David|Mcnabb|Y|10|7|1981|KENYA||David.Mcnabb@LT4hqKbQOBdZlv9T.org|2452472| -134|AAAAAAAAGIAAAAAA|1466626|3546|538066|2451995|2451965|Dr.|Timothy|Mendoza|N|1|10|1957|BOTSWANA||Timothy.Mendoza@sTDpAuVIO0.com|2452647| -135|AAAAAAAAHIAAAAAA|770531|2849|216767|2450899|2450869|Ms.|Vivian|Elliott|N|19|7|1938|ROMANIA||Vivian.Elliott@2InpKDq1ZB.org|2452631| -136|AAAAAAAAIIAAAAAA|1585327|6456|515232|2449583|2449553|Ms.|Elizabeth|Horn|Y|5|2|1954|BELIZE||Elizabeth.Horn@07DoPCi67U8.edu|2452507| -137|AAAAAAAAJIAAAAAA|743641|4304|916167|2451243|2451213|Sir|Roger|Burns|Y|3|1|1933|BAHRAIN||Roger.Burns@ZfvcJQpboR.com|2452283| -138|AAAAAAAAKIAAAAAA|1584692|5491|268337|2449468|2449438|Miss|Marjorie|Smith|N|3|4|1978|WALLIS AND FUTUNA||Marjorie.Smith@pT4LYd9jYKp9gZPSS.org|2452404| -139|AAAAAAAALIAAAAAA|1676720|5931|493895|2450260|2450230|Dr.|Catherine|Newman|N|29|3|1938|BERMUDA||Catherine.Newman@XYU8uAboQoTY35lq.org|2452497| -140|AAAAAAAAMIAAAAAA|1023202|3414|897226|2451434|2451404|Dr.|Debra|Ferguson|N|15|5|1936|UNITED ARAB EMIRATES||Debra.Ferguson@n.org|2452570| -141|AAAAAAAANIAAAAAA|1482167|4206|317798|2450487|2450457|Sir|Roberto|Johnson|N|24|6|1934|KIRIBATI||Roberto.Johnson@Ix1LO6c.org|2452323| -142|AAAAAAAAOIAAAAAA|1656828|6260|914892|2449129|2449099|Dr.|Marie|Clark|N|13|6|1959|SAINT HELENA||Marie.Clark@vB.edu|2452574| -143|AAAAAAAAPIAAAAAA|1659500|5465|277383|2450934|2450904|Mr.|Gilberto|Bennett|N|24|7|1950|NEW CALEDONIA||Gilberto.Bennett@M9cDJqGnuMVN.edu|2452292| -144|AAAAAAAAAJAAAAAA|1390646|5861|455476|2451219|2451189|Dr.|Cecil|Patterson|Y|12|1|1963|INDONESIA||Cecil.Patterson@PG48D3r9RkzS5cHF.org|2452619| -145|AAAAAAAABJAAAAAA|632398|2601|723204|2449213|2449183|Mr.|Lucas|Dewey|N|15|8|1938|JERSEY||Lucas.Dewey@ZHJkTXkTSxf7VtfaK.org|2452305| -146|AAAAAAAACJAAAAAA|99608|812|322816|2451504|2451474|Miss|Sue|Garner|N|25|8|1984|TOGO||Sue.Garner@nj7MaJfQVn2XnU.com|2452598| -147|AAAAAAAADJAAAAAA|584601|124|322494|2451306|2451276|Mr.|David|Bell|Y|1|4|1945|TIMOR-LESTE||David.Bell@mczTttGdMUhP.edu|2452613| -148|AAAAAAAAEJAAAAAA|13731|4895|45659|2451936|2451906|Dr.|Timothy|Randall|Y|22|9|1974|GUADELOUPE||Timothy.Randall@Iha.org|2452540| -149|AAAAAAAAFJAAAAAA|1092922|6291|654413|2450192|2450162|Miss|Anthony|Bell|N|30|11|1992|GRENADA||Anthony.Bell@EK1UOvs.com|2452607| -150|AAAAAAAAGJAAAAAA|170732|5380|287675|2452516|2452486|Sir|Carlos|Rivers|N|17|4|1951|GIBRALTAR||Carlos.Rivers@kXExbrcHQr7.com|2452463| -151|AAAAAAAAHJAAAAAA|1268169|1470|168724|2452131|2452101|Ms.|Inez|Britt|N|1|7|1981|INDONESIA||Inez.Britt@iuP18200P.com|2452441| -152|AAAAAAAAIJAAAAAA|11202|2418|683587|2451121|2451091|Dr.|Charles|Valdez|Y|31|5|1957|SOLOMON ISLANDS||Charles.Valdez@BZ5SNzn.edu|2452535| -153|AAAAAAAAJJAAAAAA|1553082|2967|589299|2451026|2450996|Sir|Oscar|Rodriguez|N|1|3|1956|AMERICAN SAMOA||Oscar.Rodriguez@cl3gRATjot.edu|2452357| -154|AAAAAAAAKJAAAAAA|615984|2520|155456|2451372|2451342|Mr.|Oscar|Jolley|Y|4|10|1947|SWAZILAND||Oscar.Jolley@fXYAyXeJCf.edu|2452283| -155|AAAAAAAALJAAAAAA|116723|2035|332147|2452341|2452311|Mr.|Robert|Bailey|Y|26|4|1952|LATVIA||Robert.Bailey@Kg2HymHYnhiFQ.com|2452308| -156|AAAAAAAAMJAAAAAA|1239996|3753|533463|2449910|2449880|Mrs.|Ebony|Herrera|N|24|7|1945|INDIA||Ebony.Herrera@FxPQDpja2A.edu|2452622| -157|AAAAAAAANJAAAAAA|140571|2515|626859|2452340|2452310|Dr.|Timothy|Dickey|N|23|11|1970|NIGERIA||Timothy.Dickey@4pz4mFsRXHDHn9Bsei.com|2452460| -158|AAAAAAAAOJAAAAAA|1766379|3371|65163|2451514|2451484|Mr.|James|Smith|N|8|5|1942|JAPAN||James.Smith@TdTbs.edu|2452422| -159|AAAAAAAAPJAAAAAA|1592543|2088|423280|2450169|2450139|Mrs.|Lydia|Parker|N|29|9|1941|TURKMENISTAN||Lydia.Parker@gaCKhtjHBv.com|2452539| -160|AAAAAAAAAKAAAAAA|100623|330|238084|2449929|2449899|Ms.|Jeanne|Fisher|Y|25|12|1966|QATAR||Jeanne.Fisher@o89qHfPiE4GRTPHU.org|2452439| -161|AAAAAAAABKAAAAAA|555176|3982|259188|2449566|2449536|Ms.|Wendy|Chiu|Y|25|11|1957|TIMOR-LESTE||Wendy.Chiu@ETOB393IPN.org|2452555| -162|AAAAAAAACKAAAAAA|265628|2401|926021|2452594|2452564|Dr.|Pamela|Perez|Y|24|3|1954|INDONESIA||Pamela.Perez@Xanx2Z6eDZY2K68.com|2452373| -163|AAAAAAAADKAAAAAA|1349987|3914|536241|2449332|2449302|Ms.|Carly|Chavez|Y|18|12|1941|TIMOR-LESTE||Carly.Chavez@oIeMa8xK7R.org|2452552| -164|AAAAAAAAEKAAAAAA|1561803|6070|789537|2449656|2449626|Miss|Kathleen|Keegan|N|24|10|1938|TOKELAU||Kathleen.Keegan@CVIZzgGq.org|2452627| -165|AAAAAAAAFKAAAAAA|171221|4745|178622|2449501|2449471|Miss|Eleanor|Muller|N|4|9|1966|VIRGIN ISLANDS, U.S.||Eleanor.Muller@pv3YT9DSjUHYPhA.edu|2452585| -166|AAAAAAAAGKAAAAAA|277430|4819|11855|2451862|2451832|Mr.|Richard|Taylor|Y|13|10|1992|NAURU||Richard.Taylor@IPGGsdy9uatPzD.edu|2452506| -167|AAAAAAAAHKAAAAAA|995414|653|462692|2452617|2452587|Dr.|Ricky|Lee|Y|8|1|1948|BAHRAIN||Ricky.Lee@6tjVJREkz3m.org|2452478| -168|AAAAAAAAIKAAAAAA|823657|609|530361|2449510|2449480|Dr.|Walter|Clarke|Y|15|1|1924|GUYANA||Walter.Clarke@RKoQ39fyLG.edu|2452478| -169|AAAAAAAAJKAAAAAA|360308|3101|343494|2450702|2450672|Ms.|Samantha|Long|Y|21|5|1971|EL SALVADOR||Samantha.Long@DRyuV0NvXuboR.org|2452370| -170|AAAAAAAAKKAAAAAA|1514859|5547|861835|2450613|2450583|Sir|Randy|Wilson|N|19|2|1934|ARUBA||Randy.Wilson@Hz.org|2452393| -171|AAAAAAAALKAAAAAA|1147675|6180|9485|2450715|2450685|Dr.|Rosa|Nixon|N|7|6|1926|SOMALIA||Rosa.Nixon@ghkTsItbO5o8hKtVkdI.com|2452380| -172|AAAAAAAAMKAAAAAA||5588|401514|2450803||Miss|||||3|1924||||| -173|AAAAAAAANKAAAAAA|1028237|2827|679017|2450947|2450917|Ms.|Heather|Joseph|Y|17|7|1964|MONTSERRAT||Heather.Joseph@xS5p.edu|2452530| -174|AAAAAAAAOKAAAAAA|1883378|5789|332200|2450141|2450111|Miss|Linda|Murdock|Y|9|7|1981|EQUATORIAL GUINEA||Linda.Murdock@pJR.edu|2452323| -175|AAAAAAAAPKAAAAAA|1593693|4956|355866|2449185|2449155|Miss|Kathryn|Boyd|Y|6|5|1937|SAUDI ARABIA||Kathryn.Boyd@1HbD5gQXQFg.org|2452297| -176|AAAAAAAAALAAAAAA|1267506|1797|425688|2451536|2451506|Mrs.|Bonnie|Cunningham|N|12|2|1978|BERMUDA||Bonnie.Cunningham@aeB7sFe1xodAK.com|2452556| -177|AAAAAAAABLAAAAAA|1109447|932|919170|2449752|2449722|Sir|Joshua|Parr|Y|1|8|1949|MYANMAR||Joshua.Parr@8Nr76TFzm.org|2452472| -178|AAAAAAAACLAAAAAA|297805|986|164520|2451731|2451701|Ms.|Gladys|Clancy|Y|15|1|1926|BANGLADESH||Gladys.Clancy@9raQR8eDl.edu|2452489| -179|AAAAAAAADLAAAAAA|1500071|6369|474267|2450741|2450711|Dr.|Robert|Rand|N|21|9|1946|JAMAICA||Robert.Rand@gIsgOsXLveqH.org|2452457| -180|AAAAAAAAELAAAAAA|916887|6100|87666|2449694|2449664|Mr.|Robert|Larsen|Y|24|2|1983|JAPAN||Robert.Larsen@rT.com|2452475| -181|AAAAAAAAFLAAAAAA|999550|653|256956|2450511|2450481|Sir|Joseph|Riley|N|19|10|1972|BELIZE||Joseph.Riley@znxF.com|2452416| -182|AAAAAAAAGLAAAAAA|234442|6642|659289|2452326|2452296|Sir|Michael|Brown|N|27|12|1976|NIUE||Michael.Brown@scVqD1Ayq3.edu|2452425| -183|AAAAAAAAHLAAAAAA|365062|4506|657550|2450561|2450531|Sir|Peter|Collins|Y|4|2|1965|PERU||Peter.Collins@HyeA4GFuSt.org|2452466| -184|AAAAAAAAILAAAAAA|1029065|2586|253354|2452572|2452542|Mr.|Clifford|Flynn|N|8|1|1964|AZERBAIJAN||Clifford.Flynn@xj7u.org|2452630| -185|AAAAAAAAJLAAAAAA|874016|6988|825913|2449177|2449147|Miss|Yvette|Eastman|Y|12|1|1925|TONGA||Yvette.Eastman@C5F68ATco7.org|2452616| -186|AAAAAAAAKLAAAAAA|1642233|6554|336122|2451474|2451444|Miss|Marilyn|Chapa|Y|13|4|1968|SRI LANKA||Marilyn.Chapa@nGPdfzUIEoCqX.org|2452601| -187|AAAAAAAALLAAAAAA|1447273|2767|735061|2452415|2452385|Miss|Florence|Foster|Y|27|4|1974|SPAIN||Florence.Foster@9UuxIfghgPUq.com|2452306| -188|AAAAAAAAMLAAAAAA|1194851|1207|324363|2450899|2450869|Mr.|Vincent|Martinez|Y|4|12|1981|DENMARK||Vincent.Martinez@v.edu|2452284| -189|AAAAAAAANLAAAAAA|1373685|3805|690725|2452633|2452603|Sir|Bruce|Betz|Y|18|10|1961|MALDIVES||Bruce.Betz@T8cs6MI3.edu|2452354| -190|AAAAAAAAOLAAAAAA|1317424|2089|611554|2450929|2450899|Miss|Romona|Woodruff|Y|1|9|1964|KUWAIT||Romona.Woodruff@tst458XZmsz3k.com|2452302| -191|AAAAAAAAPLAAAAAA|553851|5174|952944|2452079|2452049|Sir|Jack|Marino|N|14|8|1977|BOUVET ISLAND||Jack.Marino@s4VlvxMMCr.com|2452479| -192|AAAAAAAAAMAAAAAA|624770|4667|480211|2451481|2451451|Sir|Corey|Quinones|Y|23|12|1962|ROMANIA||Corey.Quinones@iR983lsrbEXs3FGJC.org|2452442| -193|AAAAAAAABMAAAAAA|220795|4096|907844|2449375|2449345|Dr.|David|Neeley|N|2|11|1931|MALAWI||David.Neeley@RvD3OryEP.com|2452441| -194|AAAAAAAACMAAAAAA|555321|4747|369020|2451226|2451196|Sir|William|Faison|Y|22|4|1957|WESTERN SAHARA||William.Faison@Q.org|2452433| -195|AAAAAAAADMAAAAAA|923632|805|864378|2449196|2449166|Mrs.|Deborah|Terrell|N|24|5|1924|CAMBODIA||Deborah.Terrell@Ufs60H6gcr2gjy.edu|2452384| -196|AAAAAAAAEMAAAAAA|126554|2276|601551|2452628|2452598|Mr.|Shawn|Cowan|Y|13|7|1933|BOUVET ISLAND||Shawn.Cowan@Ryflqzn5RFr.org|2452586| -197|AAAAAAAAFMAAAAAA|1752208|3408|232208|2451503|2451473|Miss|Debra|Carter|Y|4|3|1957|GERMANY||Debra.Carter@AT1tanFkGTJ.edu|2452509| -198|AAAAAAAAGMAAAAAA|1827315|4982|573496|2452672|2452642|Dr.|Dusty|Bryan|Y|20|2|1988|NEW CALEDONIA||Dusty.Bryan@4SUMJMY50Fc.org|2452419| -199|AAAAAAAAHMAAAAAA|1279496|5584|692025|2452285|2452255|Mr.|Edwardo|Porter|Y|14|7|1950|BURKINA FASO||Edwardo.Porter@Juh2suS.org|2452474| -200|AAAAAAAAIMAAAAAA|78046|5077|826155|2451444|2451414|Dr.|Ted|Ingalls|N|17|6|1990|CHRISTMAS ISLAND||Ted.Ingalls@VoeeTfC04pGNt.com|2452471| diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_csv_scan_range_select_pushdown_3.csv b/plugin/trino-hive-hadoop2/conf/files/test_table_csv_scan_range_select_pushdown_3.csv deleted file mode 100644 index 4dd6aec92d2f9..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/test_table_csv_scan_range_select_pushdown_3.csv +++ /dev/null @@ -1,100 +0,0 @@ -201|AAAAAAAAJMAAAAAA|32508|6523|342491|2450742|2450712|Dr.|James|Scott|Y|29|3|1988|CANADA||James.Scott@Cqo77afYrMqPEL.org|2452622| -202|AAAAAAAAKMAAAAAA|1372547|6820|583493|2450940|2450910|Dr.|Trena|Grant|N|21|9|1989|SAN MARINO||Trena.Grant@tvQJEJNtmx341m.org|2452383| -203|AAAAAAAALMAAAAAA|1358958|255|972752|2451001|2450971|Mr.|Clifford|Markham|N|16|4|1939|CHILE||Clifford.Markham@bUdzHp.edu|2452312| -204|AAAAAAAAMMAAAAAA|1454612|5591|305017|2449076|2449046|Mrs.|Christine|Hebert|Y|4|8|1960|HONDURAS||Christine.Hebert@LK7DsUezOy.com|2452299| -205|AAAAAAAANMAAAAAA|1657734|3679|44824|2452639|2452609|Dr.|Heriberto|Putnam|Y|8|10|1941|MALAYSIA||Heriberto.Putnam@8qZ5ecY.org|2452353| -206|AAAAAAAAOMAAAAAA|800878|6588|32745|2450083|2450053|Sir|Brian|Serna|Y|15|2|1989|SLOVENIA||Brian.Serna@jMrH3hytNT.com|2452431| -207|AAAAAAAAPMAAAAAA|37305|4470|133235|2451926|2451896|Sir|Floyd|Scroggins|Y|17|1|1974|FRENCH POLYNESIA||Floyd.Scroggins@zpx4V5VQ7jMTU.edu|2452444| -208|AAAAAAAAANAAAAAA|1181602|5473|389344|2449891|2449861|Miss|Karla|Rincon|Y|28|6|1961|MONTENEGRO||Karla.Rincon@R0.org|2452388| -209|AAAAAAAABNAAAAAA|251525|5744|63963|2449213|2449183|Mr.|Curtis|Williams|Y|28|7|1987|ANDORRA||Curtis.Williams@pNqB.org|2452433| -210|AAAAAAAACNAAAAAA|893704|6632|141960|2450462|2450432|Ms.|Diane|Blount|N|17|1|1972|MAYOTTE||Diane.Blount@x8.edu|2452502| -211|AAAAAAAADNAAAAAA|1075746|5285|590377|2451509|2451479|Miss|Barbara|Hernandez|Y|11|4|1961|NICARAGUA||Barbara.Hernandez@S.org|2452437| -212|AAAAAAAAENAAAAAA|938838|3570|378702|2450458||Dr.||Monk|||9|||||2452603| -213|AAAAAAAAFNAAAAAA|1596601|4645|599089|2451950|2451920|Miss|Christene|Mccullough|Y|8|10|1945|LUXEMBOURG||Christene.Mccullough@kalRTzJl8KjD5G7H.org|2452299| -214|AAAAAAAAGNAAAAAA|325205|3154|133103|2450585|2450555|Mr.|Archie|Mayes|Y|6|11|1941|NEW ZEALAND||Archie.Mayes@ktKG1s9VpMGY.edu|2452334| -215|AAAAAAAAHNAAAAAA|1040836|2700|389717|2450769|2450739|Miss|Lillian|Morgan|Y|22|10|1982|KUWAIT||Lillian.Morgan@53sRO4941QRyx.com|2452355| -216|AAAAAAAAINAAAAAA|359850|2530|952410|2451359|2451329|Mrs.|Jacqueline|Russo|Y|25|12|1943|KYRGYZSTAN||Jacqueline.Russo@VcjQALO78c.com|2452585| -217|AAAAAAAAJNAAAAAA|1397675|2358|984194|2449674|2449644|Dr.|James|Lackey|Y|24|6|1933|MALI||James.Lackey@tqHuV9.edu|2452362| -218|AAAAAAAAKNAAAAAA|247753|6176|202552|2449706|2449676|Sir|Cody|Fowler|Y|19|1|1961|GHANA||Cody.Fowler@C4PEjdXI1cvZVkupF.edu|2452546| -219|AAAAAAAALNAAAAAA|1095236|2503|459017|2449220|2449190|Sir|Roy|Aguilar|Y|23|10|1988|SWITZERLAND||Roy.Aguilar@haRtA0p.org|2452521| -220|AAAAAAAAMNAAAAAA|1845912|5357|336002|2452461|2452431|Sir|William|Chu|Y|18|5|1977|GUAM||William.Chu@jrDhML4b.org|2452522| -221|AAAAAAAANNAAAAAA|228357|6441|606925|2450142|2450112|Dr.|Kenny|Johnson|N|14|10|1963|BELIZE||Kenny.Johnson@pcbnkVHQtUA7.org|2452602| -222|AAAAAAAAONAAAAAA|1499262||526462||||Omar||Y|||1944|ITALY||Omar.Garcia@9lh519BY66jzAf.com|| -223|AAAAAAAAPNAAAAAA|497955|6872|18189|2452497|2452467|Dr.|Rudolph|Hutchins|Y|11|9|1958|LIBERIA||Rudolph.Hutchins@n2sVh5a3ykauteVNas.com|2452418| -224|AAAAAAAAAOAAAAAA|538767|4950|417980|2452470|2452440|Ms.|Adela|Rutherford|Y|14|12|1953|ALGERIA||Adela.Rutherford@X.com|2452333| -225|AAAAAAAABOAAAAAA|1057243|3356|43869|2450927|2450897|Dr.|Roscoe|Balderas|N|8|12|1935|CAMBODIA||Roscoe.Balderas@lcAXRX7pSO18v.edu|2452591| -226|AAAAAAAACOAAAAAA|994138||975117|2449452||||Lee||22|12|1925|NIUE||James.Lee@xFk9m.edu|| -227|AAAAAAAADOAAAAAA|1146454||562659||2451472|Miss|Angelia|Vaughn|N|15||1972|NIGERIA|||2452560| -228|AAAAAAAAEOAAAAAA|652264|6607|612150|2449832|2449802|Sir|Daniel|Croteau|N|18|2|1988|FRENCH POLYNESIA||Daniel.Croteau@KLNHYpbyM2i7xEQK.com|2452313| -229|AAAAAAAAFOAAAAAA|125162||977806|||||Skinner|N|7|||ETHIOPIA|||| -230|AAAAAAAAGOAAAAAA|779201|2365|935129|2449971|2449941|Miss|Shirlene|Guerra|N|8|3|1962|SOUTH AFRICA||Shirlene.Guerra@LvAxQ5TvOY85Vdc.edu|2452617| -231|AAAAAAAAHOAAAAAA|1200393|5286|845379|2451931|2451901|Ms.|Janet|Welch|Y|9|7|1991|PERU||Janet.Welch@K1dYk2cCcLC.org|2452375| -232|AAAAAAAAIOAAAAAA|1213087|4275|563463|2450582|2450552|Sir|David|Hudspeth|Y|30|9|1982|IRELAND||David.Hudspeth@vujJsnUszvZzR9zEG.org|2452532| -233|AAAAAAAAJOAAAAAA||2278|496475||2451382|Dr.|Harry||Y|29|6||||Harry.Dalton@DFOVaqZkzTp.edu|| -234|AAAAAAAAKOAAAAAA|1569035|959|960883|2451923|2451893|Mr.|Dustin|Perry||10|4|||||2452362| -235|AAAAAAAALOAAAAAA|1425007|42|135877|2452188|2452158|Mr.|Salvador|Ramirez|Y|20|1|1973|UZBEKISTAN||Salvador.Ramirez@cGDOfYJ.edu|2452298| -236|AAAAAAAAMOAAAAAA|272752|4670|777835|2449715|2449685|Dr.|Becky|Downey|N|30|7|1962|GIBRALTAR||Becky.Downey@LfVPF.org|2452510| -237|AAAAAAAANOAAAAAA|904909|7171|353433||||Sean||Y|1||1959||||2452493| -238|AAAAAAAAOOAAAAAA|1592906|1415|929477|2449229|2449199|Sir|Sean|Sanderson|N|26|2|1944|SWITZERLAND||Sean.Sanderson@0rAG1YgxruusVGnvNA.org|2452363| -239|AAAAAAAAPOAAAAAA|414449|2345|374241|2450501|2450471|Dr.|Elizabeth|Smith|N|10|3|1933|INDONESIA||Elizabeth.Smith@z.com|2452591| -240|AAAAAAAAAPAAAAAA|1548896|4636|834423|||Mrs.|Carrie||N|||1941|BERMUDA|||| -241|AAAAAAAABPAAAAAA|288616|5879|605542|2450397|2450367|Mr.|Scott|Ward|Y|25|2|1930|ANDORRA||Scott.Ward@CzpTDaCmmYVV.org|2452634| -242|AAAAAAAACPAAAAAA|611134|4572|207163|2452532|2452502|Dr.|Lance|Redman|N|13|2|1969|GUADELOUPE||Lance.Redman@EM69kK.edu|2452302| -243|AAAAAAAADPAAAAAA|756855|5748|544594|2450306|2450276|Sir|James|Catron|Y|4|12|1945|EQUATORIAL GUINEA||James.Catron@yYjRH2ryUMi70yXYk.org|2452616| -244|AAAAAAAAEPAAAAAA|845511|727|665896|2450938|2450908|Sir|Stephen|Morgan|Y|3|2|1942|BAHAMAS||Stephen.Morgan@cb0.org|2452475| -245|AAAAAAAAFPAAAAAA|196556|2258|533913|2452486|2452456|Dr.|Russell|Donnelly|Y|31|10|1982|KYRGYZSTAN||Russell.Donnelly@IjVh06eeAG8ixu9i.org|2452316| -246|AAAAAAAAGPAAAAAA|747052|286|546673|2449824|2449794|Mrs.|Rosalie|Low|Y|22|1|1930|ARGENTINA||Rosalie.Low@GKe6czSvZh.org|2452477| -247|AAAAAAAAHPAAAAAA|1350958|1489|479573|2450579|2450549|Dr.|Earl|Holden|N|16|9|1928|MAURITANIA||Earl.Holden@5USlfiaaA.com|2452357| -248|AAAAAAAAIPAAAAAA|1001304|301|862498|2449499|2449469|Mr.|Leonard|Munoz|Y|20|9|1952|LIBERIA||Leonard.Munoz@Dvd7KYB7s9.edu|2452582| -249|AAAAAAAAJPAAAAAA|87712|2767|959069|2452535|2452505|Ms.|Cindy|Jackson|N|8|8|1989|TUVALU||Cindy.Jackson@pKLhquF6mljh4uVx.com|2452549| -250|AAAAAAAAKPAAAAAA|493542|345|516301|2449052|2449022|Mr.|Derick|Stewart|N|3|2|1953|HONG KONG||Derick.Stewart@KOdZQdYU.org|2452355| -251|AAAAAAAALPAAAAAA|384131|1194|915410|2450010|2449980|Dr.|Richard|Craig|N|22|2|1979|SUDAN||Richard.Craig@Lm6xST9.com|2452476| -252|AAAAAAAAMPAAAAAA|281285|4556|600274|2450642|2450612|Dr.|Troy|Brewer|N|5|7|1939|VIRGIN ISLANDS, U.S.||Troy.Brewer@ZDt1Qk5q2.com|2452607| -253|AAAAAAAANPAAAAAA|1585267|4376|477085|2449284|2449254|Sir|Robert|Walker|N|9|10|1931|BOTSWANA||Robert.Walker@EazZfkopxvr9j.com|2452615| -254|AAAAAAAAOPAAAAAA|1665044|3106|141521|2449402|2449372|Ms.|Tiffanie|Holliday|Y|7|3|1991|KIRIBATI||Tiffanie.Holliday@hZ.com|2452536| -255|AAAAAAAAPPAAAAAA|1583680|3768|361456|2451471|2451441|Mr.|Michael|Thompson|N|6|2|1949|BOUVET ISLAND||Michael.Thompson@D7P7H7a9Tfy9hu1M.org|2452457| -256|AAAAAAAAAABAAAAA|321240|4092|28544|2451932|2451902|Sir|Kevin|Garrison|N|5|6|1941|KENYA||Kevin.Garrison@n8j.com|2452442| -257|AAAAAAAABABAAAAA|1251965|3504|79043|2451051|2451021|Ms.|Petra|Brown|N|8|6|1961|LUXEMBOURG||Petra.Brown@MSq6yPx51zt3IioI.com|2452508| -258|AAAAAAAACABAAAAA|||845278|2451082||Sir|Clyde|Bernard||14||1974|PHILIPPINES||Clyde.Bernard@IlM4cxVZghXay.edu|2452640| -259|AAAAAAAADABAAAAA|945819|4358|940497|2450055|2450025|Mr.|Carl|Motley|N|26|9|1948|MALAYSIA||Carl.Motley@7XEsdRS2Qg.com|2452419| -260|AAAAAAAAEABAAAAA|1354896|5413|520283|2451576|2451546|Dr.|Steven|Deleon|Y|13|11|1957|SAUDI ARABIA||Steven.Deleon@AcFdH75siBXhu2.com|2452519| -261|AAAAAAAAFABAAAAA|692189|2930|372424|2450805|2450775|Miss|Honey|Raymond|N|28|11|1991|GRENADA||Honey.Raymond@DvgDPolBRk0bSs.org|2452418| -262|AAAAAAAAGABAAAAA|601411|2451|829091|2452213|2452183|Ms.|Angela|Mauro|Y|3|3|1937|TUVALU||Angela.Mauro@yjzVkHtZvKOTCmxrx.edu|2452371| -263|AAAAAAAAHABAAAAA|178882|5021|326807|2452449|2452419|Mrs.|Rebecca|Hermann|N|3|11|1972|VENEZUELA||Rebecca.Hermann@sYcaFy.com|2452409| -264|AAAAAAAAIABAAAAA|||518063|2450258|2450228||Claude|Stewart|N|27|3||FRENCH GUIANA||Claude.Stewart@cutXggd.edu|| -265|AAAAAAAAJABAAAAA|973825|1062|614346|2449728|2449698|Miss|Laura|Pham|N|7|3|1979|UNITED STATES||Laura.Pham@mxCbEghi5Y.com|2452289| -266|AAAAAAAAKABAAAAA|1134414|2313|443812|2451156|2451126|Sir|Walter|Martinez|Y|5|7|1968|GUYANA||Walter.Martinez@vQnVJXQ4seh9pLBDJ.com|2452376| -267|AAAAAAAALABAAAAA|1250943|4768|871746|2451369|2451339|Ms.|Dessie|Simms|Y|12|9|1931|BERMUDA||Dessie.Simms@t8fo.edu|2452373| -268|AAAAAAAAMABAAAAA|1462242|1291|79987|2449422|2449392|Ms.|Ann|Siegel|Y|3|10|1943|ZIMBABWE||Ann.Siegel@f3V7nPs.com|2452485| -269|AAAAAAAANABAAAAA|1408713|888|712710|2449663|2449633|Mr.|William|Burns|N|23|5|1943|MAYOTTE||William.Burns@ojrPxK.com|2452575| -270|AAAAAAAAOABAAAAA|1747702|2270|862085|2449066|2449036|Dr.|Diane|Molina|Y|16|1|1981|TOKELAU||Diane.Molina@Lu64EONTqV.edu|2452303| -271|AAAAAAAAPABAAAAA|974572|1459|133946|2449485|2449455|Ms.|Ashley|Wallace|N|27|10|1989|PANAMA||Ashley.Wallace@jGtH0oX.com|2452643| -272|AAAAAAAAABBAAAAA|58528|468|154408|2451613|2451583|Miss|Claudia|Butler|Y|3|8|1929|NETHERLANDS ANTILLES||Claudia.Butler@8p2VT6M.org|2452454| -273|AAAAAAAABBBAAAAA|916621|1316|847611|2450274|2450244|Sir|Everett|Jenkins|N|18|11|1941|MOLDOVA, REPUBLIC OF||Everett.Jenkins@8IGK3viZTO.edu|2452415| -274|AAAAAAAACBBAAAAA|1207798|5098|481598|2449045|2449015|Ms.|Helen|Gray|N|8|4|1983|SAINT LUCIA||Helen.Gray@qRV1qZc.com|2452311| -275|AAAAAAAADBBAAAAA|792731|883|38635|2451901|2451871|Mrs.|Janice|Kirby|Y|16|2|1949|CUBA||Janice.Kirby@qx.edu|2452526| -276|AAAAAAAAEBBAAAAA|1351960|2030|661253|2450868|2450838|Dr.|David|Moreau|N|31|5|1930|TRINIDAD AND TOBAGO||David.Moreau@lVcIFYe6e9i.com|2452528| -277|AAAAAAAAFBBAAAAA|530527|2728|31729|2450807|2450777|Mr.|Bernard|Bryant|Y|18|6|1988|GUATEMALA||Bernard.Bryant@Iz8KAURMPPB.com|2452550| -278|AAAAAAAAGBBAAAAA|999018|6992|371413|2451200|2451170|Miss|Rachel|Hernandez|N|18|2|1934|SLOVAKIA||Rachel.Hernandez@3ndYYBHSLvT.org|2452342| -279|AAAAAAAAHBBAAAAA|408201|1504|602807|2449574|2449544|Dr.|William|Kaye|N|12|2|1961|AZERBAIJAN||William.Kaye@Mx1n74bCJjkc08.edu|2452510| -280|AAAAAAAAIBBAAAAA|472007|6069|706084|2449053|2449023|Dr.|Christopher|Randolph|Y|12|6|1967|NEPAL||Christopher.Randolph@HJhyNrdvkKzHVG.org|2452534| -281|AAAAAAAAJBBAAAAA|363108|7109|427110|2450273|2450243|Dr.|Tommy|Whitney|Y|30|4|1929|BOLIVIA||Tommy.Whitney@PoZcVZzeLj.org|2452359| -282|AAAAAAAAKBBAAAAA|817596||500169|||Sir||Neal|N||||MALDIVES|||| -283|AAAAAAAALBBAAAAA|1531961|4221|708128|2452331|2452301|Dr.|Joseph|Hilton|Y|2|12|1975|BERMUDA||Joseph.Hilton@io2VIr.org|2452283| -284|AAAAAAAAMBBAAAAA|553655|4881|92460|2451545|2451515|Mrs.|Adam|Chambers|Y|13|6|1961|MONTSERRAT||Adam.Chambers@1fmL5RoSch74.edu|2452298| -285|AAAAAAAANBBAAAAA|311786|6886|374786|2449844|2449814|Sir|Stephen|Burton|N|25|12|1954|BELIZE||Stephen.Burton@vUZLt1J.edu|2452424| -286|AAAAAAAAOBBAAAAA|428772|3923|178274|2450267|2450237|Dr.|Ima|Amos|N|16|9|1982|KUWAIT||Ima.Amos@hEZu94B6kdfOrsAPA.edu|2452481| -287|AAAAAAAAPBBAAAAA|463191|366|925094|2452654|2452624|Dr.|Carolyn|Wong|N|15|2|1940|MALDIVES||Carolyn.Wong@qDMA4gY.edu|2452290| -288|AAAAAAAAACBAAAAA|338763|6861|61359|2451174|2451144|Dr.|Ruth|Duarte|Y|27|1|1954|ANGOLA||Ruth.Duarte@aYJinh7bzQiMP.org|2452576| -289|AAAAAAAABCBAAAAA|401587|2758|925529|2450500|2450470|Dr.|Nancy|Jones|N|1|6|1935|LITHUANIA||Nancy.Jones@Q6kuq.org|2452568| -290|AAAAAAAACCBAAAAA|238104|2602|331100|2449668|2449638|Mrs.|Florence|Caro|N|23|2|1983|MOZAMBIQUE||Florence.Caro@mACMoIsUY.org|2452362| -291|AAAAAAAADCBAAAAA|244059|6378|363544|2449302|2449272|Mr.|Ralph|Johnson|Y|2|11|1968|NICARAGUA||Ralph.Johnson@uuCPEDT5B8oV0ll.edu|2452312| -292|AAAAAAAAECBAAAAA|126296|4363|323677|2450549|2450519|Mr.|Billy|Coffin|N|15|5|1949|SWITZERLAND||Billy.Coffin@DrhdyQf.com|2452331| -293|AAAAAAAAFCBAAAAA|1521828|4146|623264|2449716|2449686|Ms.|Heather|Brown|N|10|6|1955|WALLIS AND FUTUNA||Heather.Brown@g.org|2452488| -294|AAAAAAAAGCBAAAAA||1222|169272|2449373|2449343||||||||NETHERLANDS||Thomas.Holder@bQnouG.com|| -295|AAAAAAAAHCBAAAAA|903118|5620|246969|2451540|2451510|Dr.|Michael|Mcguire|Y|2|6|1928|IRELAND||Michael.Mcguire@iTFQNbciZ3CdSv8.org|2452599| -296|AAAAAAAAICBAAAAA|654664|3131|13645|2449671|2449641|Miss|Christy|Hudson|Y|1|10|1940|TIMOR-LESTE||Christy.Hudson@8HMH0C2ydsabR9.edu|2452487| -297|AAAAAAAAJCBAAAAA|394444|2855|790466|2449499|2449469|Mr.|Lee|Farias|Y|24|6|1954|SWEDEN||Lee.Farias@BJN5bHOoiKDGUgN.org|2452390| -298|AAAAAAAAKCBAAAAA|1336062|2437|220012|2450213|2450183|Ms.|Iris|Roberts|N|25|5|1937|BENIN||Iris.Roberts@golv5Jj9oga.com|2452559| -299|AAAAAAAALCBAAAAA|273612|539|60266|2449936|2449906|Dr.|Gerald|Stone|N|24|6|1990|YEMEN||Gerald.Stone@1qgrObT0Cm.com|2452495| -300|AAAAAAAAMCBAAAAA|948309|5015|796921|2452539|2452509|Dr.|Gina|Batson|N|14|8|1927|SURINAME||Gina.Batson@vTFlyNS.com|2452515| diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_json_scan_range_select_pushdown_1.json b/plugin/trino-hive-hadoop2/conf/files/test_table_json_scan_range_select_pushdown_1.json deleted file mode 100644 index 9541257816dfb..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/test_table_json_scan_range_select_pushdown_1.json +++ /dev/null @@ -1,100 +0,0 @@ -{"col_1":1,"col_2":"AAAAAAAABAAAAAAA","col_3":"18","col_4":"Jackson ","col_5":"Parkway","col_6":"Suite 280","col_7":"Fairfield","col_8":"Maricopa County","col_9":"AZ","col_10":"86192","col_11":"United States","col_12":"-7","col_13":"condo","col_14":""} -{"col_1":2,"col_2":"AAAAAAAACAAAAAAA","col_3":"362","col_4":"Washington 6th","col_5":"RD","col_6":"Suite 80","col_7":"Fairview","col_8":"Taos County","col_9":"NM","col_10":"85709","col_11":"United States","col_12":"-7","col_13":"condo","col_14":""} -{"col_1":3,"col_2":"AAAAAAAADAAAAAAA","col_3":"585","col_4":"Dogwood Washington","col_5":"Circle","col_6":"Suite Q","col_7":"Pleasant Valley","col_8":"York County","col_9":"PA","col_10":"12477","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":4,"col_2":"AAAAAAAAEAAAAAAA","col_3":"111","col_4":"Smith ","col_5":"Wy","col_6":"Suite A","col_7":"Oak Rcol_2ge","col_8":"Kit Carson County","col_9":"CO","col_10":"88371","col_11":"United States","col_12":"-7","col_13":"condo","col_14":""} -{"col_1":5,"col_2":"AAAAAAAAFAAAAAAA","col_3":"31","col_4":"College ","col_5":"Blvd","col_6":"Suite 180","col_7":"Glendale","col_8":"Barry County","col_9":"MO","col_10":"63951","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":6,"col_2":"AAAAAAAAGAAAAAAA","col_3":"59","col_4":"Williams Sixth","col_5":"Parkway","col_6":"Suite 100","col_7":"Lakeview","col_8":"Chelan County","col_9":"WA","col_10":"98579","col_11":"United States","col_12":"-8","col_13":"single family","col_14":""} -{"col_1":7,"col_2":"AAAAAAAAHAAAAAAA","col_3":"","col_4":"Hill 7th","col_5":"Road","col_6":"Suite U","col_7":"Farmington","col_8":"","col_9":"","col_10":"39145","col_11":"United States","col_12":"","col_13":"","col_14":""} -{"col_1":8,"col_2":"AAAAAAAAIAAAAAAA","col_3":"875","col_4":"Lincoln ","col_5":"Ct.","col_6":"Suite Y","col_7":"Union","col_8":"Bledsoe County","col_9":"TN","col_10":"38721","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":9,"col_2":"AAAAAAAAJAAAAAAA","col_3":"819","col_4":"1st Laurel","col_5":"Ave","col_6":"Suite 70","col_7":"New Hope","col_8":"Perry County","col_9":"AL","col_10":"39431","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":10,"col_2":"AAAAAAAAKAAAAAAA","col_3":"851","col_4":"Woodland Poplar","col_5":"ST","col_6":"Suite Y","col_7":"Martinsville","col_8":"Haines Borough","col_9":"AK","col_10":"90419","col_11":"United States","col_12":"-9","col_13":"condo","col_14":""} -{"col_1":11,"col_2":"AAAAAAAALAAAAAAA","col_3":"189","col_4":"13th 2nd","col_5":"Street","col_6":"Suite 470","col_7":"Maple Grove","col_8":"Madison County","col_9":"MT","col_10":"68252","col_11":"United States","col_12":"-7","col_13":"single family","col_14":""} -{"col_1":12,"col_2":"AAAAAAAAMAAAAAAA","col_3":"76","col_4":"Ash 8th","col_5":"Ct.","col_6":"Suite O","col_7":"Edgewood","col_8":"Mifflin County","col_9":"PA","col_10":"10069","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":13,"col_2":"AAAAAAAANAAAAAAA","col_3":"424","col_4":"Main Second","col_5":"Ln","col_6":"Suite 130","col_7":"Greenville","col_8":"Noxubee County","col_9":"MS","col_10":"51387","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":14,"col_2":"AAAAAAAAOAAAAAAA","col_3":"923","col_4":"Pine Oak","col_5":"Dr.","col_6":"Suite 100","col_7":"","col_8":"Lipscomb County","col_9":"TX","col_10":"77752","col_11":"","col_12":"-6","col_13":"","col_14":""} -{"col_1":15,"col_2":"AAAAAAAAPAAAAAAA","col_3":"314","col_4":"Spring ","col_5":"Ct.","col_6":"Suite B","col_7":"Oakland","col_8":"Washington County","col_9":"OH","col_10":"49843","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":16,"col_2":"AAAAAAAAABAAAAAA","col_3":"576","col_4":"Adams Center","col_5":"Street","col_6":"Suite J","col_7":"Valley View","col_8":"Oldham County","col_9":"TX","col_10":"75124","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":17,"col_2":"AAAAAAAABBAAAAAA","col_3":"801","col_4":"Green ","col_5":"Dr.","col_6":"Suite 0","col_7":"Montpelier","col_8":"Richland County","col_9":"OH","col_10":"48930","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":18,"col_2":"AAAAAAAACBAAAAAA","col_3":"460","col_4":"Maple Spruce","col_5":"Court","col_6":"Suite 480","col_7":"Somerville","col_8":"Potter County","col_9":"SD","col_10":"57783","col_11":"United States","col_12":"-7","col_13":"condo","col_14":""} -{"col_1":19,"col_2":"AAAAAAAADBAAAAAA","col_3":"611","col_4":"Wilson ","col_5":"Way","col_6":"Suite O","col_7":"Oakdale","col_8":"Tangipahoa Parish","col_9":"LA","col_10":"79584","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":20,"col_2":"AAAAAAAAEBAAAAAA","col_3":"675","col_4":"Elm Wilson","col_5":"Street","col_6":"Suite I","col_7":"Hopewell","col_8":"Williams County","col_9":"OH","col_10":"40587","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":21,"col_2":"AAAAAAAAFBAAAAAA","col_3":"294","col_4":"Jefferson Smith","col_5":"Ave","col_6":"Suite 390","col_7":"Springfield","col_8":"Marshall County","col_9":"SD","col_10":"59303","col_11":"United States","col_12":"-7","col_13":"condo","col_14":""} -{"col_1":22,"col_2":"AAAAAAAAGBAAAAAA","col_3":"874","col_4":"Park Second","col_5":"Avenue","col_6":"Suite N","col_7":"Green Acres","col_8":"Perkins County","col_9":"NE","col_10":"67683","col_11":"United States","col_12":"-7","col_13":"single family","col_14":""} -{"col_1":23,"col_2":"AAAAAAAAHBAAAAAA","col_3":"230","col_4":"6th Spring","col_5":"Drive","col_6":"Suite U","col_7":"Newtown","col_8":"Jewell County","col_9":"KS","col_10":"61749","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":24,"col_2":"AAAAAAAAIBAAAAAA","col_3":"837","col_4":"4th ","col_5":"Street","col_6":"Suite 200","col_7":"Fairfield","col_8":"Marin County","col_9":"CA","col_10":"96192","col_11":"United States","col_12":"-8","col_13":"single family","col_14":""} -{"col_1":25,"col_2":"AAAAAAAAJBAAAAAA","col_3":"986","col_4":"West East","col_5":"Blvd","col_6":"Suite F","col_7":"Pleasant Grove","col_8":"Coweta County","col_9":"GA","col_10":"34136","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":26,"col_2":"AAAAAAAAKBAAAAAA","col_3":"809","col_4":"Central ","col_5":"","col_6":"","col_7":"Springdale","col_8":"Forsyth County","col_9":"","col_10":"28883","col_11":"","col_12":"-5","col_13":"","col_14":""} -{"col_1":27,"col_2":"AAAAAAAALBAAAAAA","col_3":"649","col_4":"Williams ","col_5":"Ct.","col_6":"Suite 400","col_7":"Cedar","col_8":"Utah County","col_9":"UT","col_10":"81229","col_11":"United States","col_12":"-7","col_13":"condo","col_14":""} -{"col_1":28,"col_2":"AAAAAAAAMBAAAAAA","col_3":"992","col_4":"Birch ","col_5":"Avenue","col_6":"Suite N","col_7":"Oak Hill","col_8":"Montrose County","col_9":"CO","col_10":"87838","col_11":"United States","col_12":"-7","col_13":"condo","col_14":""} -{"col_1":29,"col_2":"AAAAAAAANBAAAAAA","col_3":"572","col_4":"First Elevnth","col_5":"Pkwy","col_6":"Suite 160","col_7":"Newport","col_8":"Richland County","col_9":"IL","col_10":"61521","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":30,"col_2":"AAAAAAAAOBAAAAAA","col_3":"748","col_4":"Oak ","col_5":"Avenue","col_6":"Suite M","col_7":"Wildwood","col_8":"Clark County","col_9":"MO","col_10":"66871","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":31,"col_2":"AAAAAAAAPBAAAAAA","col_3":"291","col_4":"East ","col_5":"Lane","col_6":"Suite A","col_7":"Forestville","col_8":"Walworth County","col_9":"SD","col_10":"53027","col_11":"United States","col_12":"-7","col_13":"condo","col_14":""} -{"col_1":32,"col_2":"AAAAAAAAACAAAAAA","col_3":"243","col_4":"8th Lake","col_5":"Wy","col_6":"Suite G","col_7":"Oakland","col_8":"Madison County","col_9":"NC","col_10":"29843","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":33,"col_2":"AAAAAAAABCAAAAAA","col_3":"711","col_4":"3rd 4th","col_5":"Ave","col_6":"Suite 10","col_7":"Pine Grove","col_8":"Sumter County","col_9":"GA","col_10":"34593","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":34,"col_2":"AAAAAAAACCAAAAAA","col_3":"785","col_4":"1st Cedar","col_5":"Ave","col_6":"Suite 40","col_7":"Greenville","col_8":"Russell County","col_9":"VA","col_10":"21387","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":35,"col_2":"AAAAAAAADCAAAAAA","col_3":"814","col_4":"Washington Main","col_5":"Drive","col_6":"Suite C","col_7":"Waterloo","col_8":"Henry County","col_9":"VA","col_10":"21675","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":36,"col_2":"AAAAAAAAECAAAAAA","col_3":"648","col_4":"South 15th","col_5":"Court","col_6":"Suite 410","col_7":"Lakeview","col_8":"Lewis County","col_9":"TN","col_10":"38579","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":37,"col_2":"AAAAAAAAFCAAAAAA","col_3":"999","col_4":"4th ","col_5":"Ct.","col_6":"Suite 250","col_7":"Sumner","col_8":"Greenville County","col_9":"SC","col_10":"20519","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":38,"col_2":"AAAAAAAAGCAAAAAA","col_3":"912","col_4":"Second Hickory","col_5":"Drive","col_6":"Suite 190","col_7":"Plainview","col_8":"Baca County","col_9":"CO","col_10":"83683","col_11":"United States","col_12":"-7","col_13":"condo","col_14":""} -{"col_1":39,"col_2":"AAAAAAAAHCAAAAAA","col_3":"13","col_4":"10th ","col_5":"Ct.","col_6":"Suite 280","col_7":"Hamilton","col_8":"Lee County","col_9":"IA","col_10":"52808","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":40,"col_2":"AAAAAAAAICAAAAAA","col_3":"369","col_4":"2nd Maple","col_5":"Pkwy","col_6":"Suite H","col_7":"Brcol_2geport","col_8":"Ferry County","col_9":"WA","col_10":"95817","col_11":"United States","col_12":"-8","col_13":"single family","col_14":""} -{"col_1":41,"col_2":"AAAAAAAAJCAAAAAA","col_3":"198","col_4":"Washington ","col_5":"RD","col_6":"Suite S","col_7":"Oakland","col_8":"Ouray County","col_9":"CO","col_10":"89843","col_11":"United States","col_12":"-7","col_13":"apartment","col_14":""} -{"col_1":42,"col_2":"AAAAAAAAKCAAAAAA","col_3":"884","col_4":"Oak ","col_5":"Court","col_6":"Suite F","col_7":"Riverscol_2e","col_8":"Scott County","col_9":"MS","col_10":"59231","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":43,"col_2":"AAAAAAAALCAAAAAA","col_3":"762","col_4":"Spring ","col_5":"Dr.","col_6":"Suite F","col_7":"Sulphur Springs","col_8":"Jefferson County","col_9":"IL","col_10":"68354","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":44,"col_2":"AAAAAAAAMCAAAAAA","col_3":"","col_4":"","col_5":"","col_6":"Suite 490","col_7":"","col_8":"O-Brien County","col_9":"","col_10":"58721","col_11":"United States","col_12":"-6","col_13":"","col_14":""} -{"col_1":45,"col_2":"AAAAAAAANCAAAAAA","col_3":"206","col_4":"4th ","col_5":"Street","col_6":"Suite 120","col_7":"Union","col_8":"Washington County","col_9":"ME","col_10":"09321","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":46,"col_2":"AAAAAAAAOCAAAAAA","col_3":"995","col_4":"Oak ","col_5":"Boulevard","col_6":"Suite 120","col_7":"Union Hill","col_8":"Forrest County","col_9":"MS","col_10":"57746","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":47,"col_2":"AAAAAAAAPCAAAAAA","col_3":"776","col_4":"Washington View","col_5":"Wy","col_6":"Suite 60","col_7":"Pleasant Grove","col_8":"Mercer County","col_9":"KY","col_10":"44136","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":48,"col_2":"AAAAAAAAADAAAAAA","col_3":"895","col_4":"Valley ","col_5":"Lane","col_6":"Suite 480","col_7":"Salem","col_8":"James City County","col_9":"VA","col_10":"28048","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":49,"col_2":"AAAAAAAABDAAAAAA","col_3":"583","col_4":"Highland 2nd","col_5":"Street","col_6":"Suite 90","col_7":"Fairfield","col_8":"York County","col_9":"ME","col_10":"06792","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":50,"col_2":"AAAAAAAACDAAAAAA","col_3":"190","col_4":"7th ","col_5":"Wy","col_6":"Suite D","col_7":"Enterprise","col_8":"Gray County","col_9":"KS","col_10":"61757","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":51,"col_2":"AAAAAAAADDAAAAAA","col_3":"845","col_4":"5th ","col_5":"Pkwy","col_6":"Suite F","col_7":"Green Acres","col_8":"Hamblen County","col_9":"TN","col_10":"37683","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":52,"col_2":"AAAAAAAAEDAAAAAA","col_3":"472","col_4":"Rcol_2ge Park","col_5":"ST","col_6":"Suite 60","col_7":"Oakdale","col_8":"Muskingum County","col_9":"OH","col_10":"49584","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":53,"col_2":"AAAAAAAAFDAAAAAA","col_3":"884","col_4":"College Franklin","col_5":"Boulevard","col_6":"Suite 440","col_7":"Riverview","col_8":"McPherson County","col_9":"SD","col_10":"59003","col_11":"United States","col_12":"-7","col_13":"single family","col_14":""} -{"col_1":54,"col_2":"AAAAAAAAGDAAAAAA","col_3":"857","col_4":"10th 10th","col_5":"Boulevard","col_6":"Suite V","col_7":"Oak Hill","col_8":"Martin County","col_9":"NC","col_10":"27838","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":55,"col_2":"AAAAAAAAHDAAAAAA","col_3":"177","col_4":"Lee Adams","col_5":"Way","col_6":"Suite S","col_7":"Ashland","col_8":"Macomb County","col_9":"MI","col_10":"44244","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":56,"col_2":"AAAAAAAAcol_2AAAAAA","col_3":"49","col_4":"Valley 14th","col_5":"Wy","col_6":"Suite V","col_7":"Woodland","col_8":"Goodhue County","col_9":"MN","col_10":"54854","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":57,"col_2":"AAAAAAAAJDAAAAAA","col_3":"625","col_4":"10th Oak","col_5":"RD","col_6":"Suite J","col_7":"Enterprise","col_8":"Bottineau County","col_9":"ND","col_10":"51757","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":58,"col_2":"AAAAAAAAKDAAAAAA","col_3":"917","col_4":"Miller 15th","col_5":"Parkway","col_6":"Suite 230","col_7":"Union Hill","col_8":"Canyon County","col_9":"col_2","col_10":"87746","col_11":"United States","col_12":"-7","col_13":"single family","col_14":""} -{"col_1":59,"col_2":"AAAAAAAALDAAAAAA","col_3":"","col_4":"","col_5":"Court","col_6":"","col_7":"Newtown","col_8":"Valley County","col_9":"MT","col_10":"","col_11":"United States","col_12":"-7","col_13":"condo","col_14":""} -{"col_1":60,"col_2":"AAAAAAAAMDAAAAAA","col_3":"","col_4":"Lee ","col_5":"","col_6":"","col_7":"Forest Hills","col_8":"Green County","col_9":"KY","col_10":"49237","col_11":"","col_12":"","col_13":"apartment","col_14":""} -{"col_1":61,"col_2":"AAAAAAAANDAAAAAA","col_3":"159","col_4":"Rcol_2ge ","col_5":"Boulevard","col_6":"Suite 20","col_7":"Sulphur Springs","col_8":"Laurel County","col_9":"KY","col_10":"48354","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":62,"col_2":"AAAAAAAAODAAAAAA","col_3":"925","col_4":"Lincoln ","col_5":"Lane","col_6":"Suite 90","col_7":"Riverdale","col_8":"Lewis County","col_9":"WV","col_10":"29391","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":63,"col_2":"AAAAAAAAPDAAAAAA","col_3":"416","col_4":"Elevnth Green","col_5":"Drive","col_6":"Suite A","col_7":"Mcol_2way","col_8":"Jewell County","col_9":"KS","col_10":"61904","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":64,"col_2":"AAAAAAAAAEAAAAAA","col_3":"546","col_4":"Meadow ","col_5":"Circle","col_6":"Suite 90","col_7":"Macedonia","col_8":"Trousdale County","col_9":"TN","col_10":"31087","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":65,"col_2":"AAAAAAAABEAAAAAA","col_3":"147","col_4":"Fourth Lake","col_5":"RD","col_6":"Suite 460","col_7":"Bethel","col_8":"Iowa County","col_9":"IA","col_10":"55281","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":66,"col_2":"AAAAAAAACEAAAAAA","col_3":"238","col_4":"Pine ","col_5":"Road","col_6":"Suite 470","col_7":"Crossroads","col_8":"Sioux County","col_9":"NE","col_10":"60534","col_11":"United States","col_12":"-7","col_13":"single family","col_14":""} -{"col_1":67,"col_2":"AAAAAAAADEAAAAAA","col_3":"896","col_4":"11th ","col_5":"Street","col_6":"Suite B","col_7":"Flint","col_8":"Van Buren County","col_9":"IA","col_10":"58909","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":68,"col_2":"AAAAAAAAEEAAAAAA","col_3":"240","col_4":"Laurel Forest","col_5":"Street","col_6":"Suite H","col_7":"Summit","col_8":"Bledsoe County","col_9":"TN","col_10":"30499","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":69,"col_2":"AAAAAAAAFEAAAAAA","col_3":"37","col_4":"Pine ","col_5":"Drive","col_6":"Suite H","col_7":"Glendale","col_8":"Scott County","col_9":"KY","col_10":"43951","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":70,"col_2":"AAAAAAAAGEAAAAAA","col_3":"511","col_4":"14th Sycamore","col_5":"Parkway","col_6":"Suite 210","col_7":"White Oak","col_8":"Union County","col_9":"OH","col_10":"46668","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":71,"col_2":"AAAAAAAAHEAAAAAA","col_3":"38","col_4":"Rcol_2ge ","col_5":"Cir.","col_6":"Suite 370","col_7":"Lakeview","col_8":"Nottoway County","col_9":"VA","col_10":"28579","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":72,"col_2":"AAAAAAAAIEAAAAAA","col_3":"982","col_4":"Willow ","col_5":"Blvd","col_6":"Suite 20","col_7":"Richville","col_8":"Cascade County","col_9":"MT","col_10":"65945","col_11":"United States","col_12":"-7","col_13":"single family","col_14":""} -{"col_1":73,"col_2":"AAAAAAAAJEAAAAAA","col_3":"158","col_4":"Green Broadway","col_5":"Road","col_6":"Suite B","col_7":"Cedar Grove","col_8":"Kingman County","col_9":"KS","col_10":"60411","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":74,"col_2":"AAAAAAAAKEAAAAAA","col_3":"723","col_4":"Sycamore ","col_5":"Boulevard","col_6":"Suite C","col_7":"Spring Hill","col_8":"Wapello County","col_9":"IA","col_10":"56787","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":75,"col_2":"AAAAAAAALEAAAAAA","col_3":"32","col_4":"Pine ","col_5":"Wy","col_6":"Suite 480","col_7":"Valley View","col_8":"Cleburne County","col_9":"AL","col_10":"35124","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":76,"col_2":"AAAAAAAAMEAAAAAA","col_3":"257","col_4":"Walnut South","col_5":"Drive","col_6":"Suite 420","col_7":"Shiloh","col_8":"Menominee County","col_9":"MI","col_10":"49275","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":77,"col_2":"AAAAAAAANEAAAAAA","col_3":"151","col_4":"Hill ","col_5":"Wy","col_6":"Suite U","col_7":"Oak Grove","col_8":"Thomas County","col_9":"GA","col_10":"38370","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":78,"col_2":"AAAAAAAAOEAAAAAA","col_3":"857","col_4":"Railroad ","col_5":"Boulevard","col_6":"Suite 140","col_7":"Green Acres","col_8":"Potter County","col_9":"SD","col_10":"57683","col_11":"United States","col_12":"-7","col_13":"single family","col_14":""} -{"col_1":79,"col_2":"AAAAAAAAPEAAAAAA","col_3":"409","col_4":"Park 7th","col_5":"Cir.","col_6":"Suite U","col_7":"Farmington","col_8":"Wayne County","col_9":"TN","col_10":"39145","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":80,"col_2":"AAAAAAAAAFAAAAAA","col_3":"298","col_4":"12th ","col_5":"Way","col_6":"Suite J","col_7":"Oakland","col_8":"Grant County","col_9":"KY","col_10":"49843","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":81,"col_2":"AAAAAAAABFAAAAAA","col_3":"953","col_4":"River Spruce","col_5":"Pkwy","col_6":"Suite 0","col_7":"Union Hill","col_8":"Brown County","col_9":"TX","col_10":"77746","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":82,"col_2":"AAAAAAAACFAAAAAA","col_3":"97","col_4":"River Main","col_5":"Ln","col_6":"Suite L","col_7":"Buena Vista","col_8":"Santa Clara County","col_9":"CA","col_10":"95752","col_11":"United States","col_12":"-8","col_13":"single family","col_14":""} -{"col_1":83,"col_2":"AAAAAAAADFAAAAAA","col_3":"410","col_4":"Spring Poplar","col_5":"Drive","col_6":"Suite 20","col_7":"Bethel","col_8":"Nueces County","col_9":"TX","col_10":"75281","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":84,"col_2":"AAAAAAAAEFAAAAAA","col_3":"886","col_4":"Sunset ","col_5":"Circle","col_6":"Suite T","col_7":"Warwick","col_8":"Itawamba County","col_9":"MS","col_10":"51398","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":85,"col_2":"AAAAAAAAFFAAAAAA","col_3":"476","col_4":"Willow ","col_5":"Avenue","col_6":"Suite R","col_7":"Five Points","col_8":"Crawford County","col_9":"PA","col_10":"16098","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":86,"col_2":"AAAAAAAAGFAAAAAA","col_3":"699","col_4":"11th 14th","col_5":"ST","col_6":"Suite O","col_7":"Five Points","col_8":"Pike County","col_9":"KY","col_10":"46098","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":87,"col_2":"AAAAAAAAHFAAAAAA","col_3":"","col_4":"6th ","col_5":"Lane","col_6":"","col_7":"Maple Grove","col_8":"Claiborne County","col_9":"","col_10":"58252","col_11":"","col_12":"","col_13":"apartment","col_14":""} -{"col_1":88,"col_2":"AAAAAAAAIFAAAAAA","col_3":"104","col_4":"9th ","col_5":"RD","col_6":"Suite W","col_7":"Frogtown","col_8":"Bay County","col_9":"MI","col_10":"48784","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":89,"col_2":"AAAAAAAAJFAAAAAA","col_3":"440","col_4":"Church 7th","col_5":"Ave","col_6":"Suite 200","col_7":"Lakescol_2e","col_8":"Solano County","col_9":"CA","col_10":"99532","col_11":"United States","col_12":"-8","col_13":"apartment","col_14":""} -{"col_1":90,"col_2":"AAAAAAAAKFAAAAAA","col_3":"892","col_4":"North East","col_5":"Drive","col_6":"Suite U","col_7":"Centerville","col_8":"Hettinger County","col_9":"ND","col_10":"50059","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":91,"col_2":"AAAAAAAALFAAAAAA","col_3":"936","col_4":"","col_5":"Wy","col_6":"","col_7":"Riverscol_2e","col_8":"Murray County","col_9":"","col_10":"","col_11":"United States","col_12":"","col_13":"","col_14":""} -{"col_1":92,"col_2":"AAAAAAAAMFAAAAAA","col_3":"734","col_4":"Ash Wilson","col_5":"Boulevard","col_6":"Suite V","col_7":"Newtown","col_8":"Boise County","col_9":"col_2","col_10":"81749","col_11":"United States","col_12":"-7","col_13":"condo","col_14":""} -{"col_1":93,"col_2":"AAAAAAAANFAAAAAA","col_3":"755","col_4":"Washington Sunset","col_5":"RD","col_6":"Suite Q","col_7":"Walnut Grove","col_8":"Greenup County","col_9":"KY","col_10":"47752","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":94,"col_2":"AAAAAAAAOFAAAAAA","col_3":"","col_4":"","col_5":"","col_6":"","col_7":"Lakescol_2e","col_8":"Washington County","col_9":"GA","col_10":"","col_11":"","col_12":"","col_13":"","col_14":""} -{"col_1":95,"col_2":"AAAAAAAAPFAAAAAA","col_3":"571","col_4":"Hickory River","col_5":"Ln","col_6":"Suite M","col_7":"Woodland","col_8":"Thayer County","col_9":"NE","col_10":"64854","col_11":"United States","col_12":"-7","col_13":"condo","col_14":""} -{"col_1":96,"col_2":"AAAAAAAAAGAAAAAA","col_3":"246","col_4":"Cedar Franklin","col_5":"RD","col_6":"Suite A","col_7":"Oakdale","col_8":"Fulton County","col_9":"KY","col_10":"49584","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":97,"col_2":"AAAAAAAABGAAAAAA","col_3":"858","col_4":"Sunset Main","col_5":"Way","col_6":"Suite 320","col_7":"Enterprise","col_8":"Hardin County","col_9":"TX","col_10":"71757","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":98,"col_2":"AAAAAAAACGAAAAAA","col_3":"346","col_4":"Walnut ","col_5":"Road","col_6":"Suite 370","col_7":"Spring Hill","col_8":"Labette County","col_9":"KS","col_10":"66787","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":99,"col_2":"AAAAAAAADGAAAAAA","col_3":"222","col_4":"Park View","col_5":"Drive","col_6":"Suite U","col_7":"Wilson","col_8":"Dickinson County","col_9":"MI","col_10":"46971","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":100,"col_2":"AAAAAAAAEGAAAAAA","col_3":"127","col_4":"Railroad ","col_5":"Court","col_6":"Suite 170","col_7":"Oakland","col_8":"Perry County","col_9":"TN","col_10":"39843","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_json_scan_range_select_pushdown_2.json b/plugin/trino-hive-hadoop2/conf/files/test_table_json_scan_range_select_pushdown_2.json deleted file mode 100644 index 62518f9e6ac0c..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/test_table_json_scan_range_select_pushdown_2.json +++ /dev/null @@ -1,100 +0,0 @@ -{"col_1":101,"col_2":"AAAAAAAAFGAAAAAA","col_3":"922","col_4":"Wilson 1st","col_5":"Ave","col_6":"Suite C","col_7":"Bunker Hill","col_8":"Vernon Parish","col_9":"LA","col_10":"70150","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":102,"col_2":"AAAAAAAAGGAAAAAA","col_3":"270","col_4":"North Fourth","col_5":"Ct.","col_6":"Suite 460","col_7":"Newtown","col_8":"Bay County","col_9":"FL","col_10":"31749","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":103,"col_2":"AAAAAAAAHGAAAAAA","col_3":"467","col_4":"Maple Chestnut","col_5":"Ln","col_6":"Suite 60","col_7":"Forest Hills","col_8":"Brown County","col_9":"WI","col_10":"59237","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":104,"col_2":"AAAAAAAAIGAAAAAA","col_3":"786","col_4":"Locust ","col_5":"Drive","col_6":"Suite V","col_7":"Jamestown","col_8":"Madison County","col_9":"IA","col_10":"56867","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":105,"col_2":"AAAAAAAAJGAAAAAA","col_3":"960","col_4":"Franklin ","col_5":"Lane","col_6":"Suite P","col_7":"Lebanon","col_8":"Dallas County","col_9":"AL","col_10":"32898","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":106,"col_2":"AAAAAAAAKGAAAAAA","col_3":"992","col_4":"View Ash","col_5":"Parkway","col_6":"Suite J","col_7":"Forest Hills","col_8":"Hall County","col_9":"TX","col_10":"79237","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":107,"col_2":"AAAAAAAALGAAAAAA","col_3":"128","col_4":"Laurel Church","col_5":"Boulevard","col_6":"Suite 210","col_7":"Bethel","col_8":"Grant County","col_9":"IN","col_10":"45281","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":108,"col_2":"AAAAAAAAMGAAAAAA","col_3":"156","col_4":"West Maple","col_5":"Street","col_6":"Suite R","col_7":"Belmont","col_8":"Jackson County","col_9":"OR","col_10":"90191","col_11":"United States","col_12":"-8","col_13":"single family","col_14":""} -{"col_1":109,"col_2":"AAAAAAAANGAAAAAA","col_3":"453","col_4":"Madison 8th","col_5":"Avenue","col_6":"Suite 310","col_7":"Harmony","col_8":"Aurora County","col_9":"SD","col_10":"55804","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":110,"col_2":"AAAAAAAAOGAAAAAA","col_3":"834","col_4":"Hill Jackson","col_5":"ST","col_6":"Suite N","col_7":"Hillcrest","col_8":"Houston County","col_9":"AL","col_10":"33003","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":111,"col_2":"AAAAAAAAPGAAAAAA","col_3":"306","col_4":"Pine Lincoln","col_5":"Drive","col_6":"Suite 470","col_7":"Bethel","col_8":"Davcol_2son County","col_9":"NC","col_10":"25281","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":112,"col_2":"AAAAAAAAAHAAAAAA","col_3":"232","col_4":"Dogwood ","col_5":"Street","col_6":"Suite K","col_7":"Rankin","col_8":"Harrison County","col_9":"IA","col_10":"52621","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":113,"col_2":"AAAAAAAABHAAAAAA","col_3":"138","col_4":"Main ","col_5":"Ave","col_6":"Suite 390","col_7":"Highland","col_8":"Page County","col_9":"IA","col_10":"59454","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":114,"col_2":"AAAAAAAACHAAAAAA","col_3":"576","col_4":"Willow ","col_5":"Road","col_6":"Suite E","col_7":"Pleasant Valley","col_8":"Mohave County","col_9":"AZ","col_10":"82477","col_11":"United States","col_12":"-7","col_13":"condo","col_14":""} -{"col_1":115,"col_2":"AAAAAAAADHAAAAAA","col_3":"543","col_4":"12th 9th","col_5":"Ave","col_6":"","col_7":"Wildwood","col_8":"Rolette County","col_9":"","col_10":"","col_11":"","col_12":"-6","col_13":"","col_14":""} -{"col_1":116,"col_2":"AAAAAAAAEHAAAAAA","col_3":"581","col_4":"Oak ","col_5":"Road","col_6":"Suite 310","col_7":"Deerfield","col_8":"Harvey County","col_9":"KS","col_10":"69840","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":117,"col_2":"AAAAAAAAFHAAAAAA","col_3":"593","col_4":"2nd 5th","col_5":"Ln","col_6":"Suite 400","col_7":"Provcol_2ence","col_8":"Curry County","col_9":"OR","col_10":"96614","col_11":"United States","col_12":"-8","col_13":"condo","col_14":""} -{"col_1":118,"col_2":"AAAAAAAAGHAAAAAA","col_3":"185","col_4":"Wilson ","col_5":"Circle","col_6":"Suite 270","col_7":"Union Hill","col_8":"Washington County","col_9":"MS","col_10":"57746","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":119,"col_2":"AAAAAAAAHHAAAAAA","col_3":"525","col_4":"Cedar ","col_5":"RD","col_6":"Suite 210","col_7":"Lakeview","col_8":"Stark County","col_9":"OH","col_10":"48579","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":120,"col_2":"AAAAAAAAIHAAAAAA","col_3":"741","col_4":"First ","col_5":"Ave","col_6":"Suite O","col_7":"Valley View","col_8":"Benton County","col_9":"IN","col_10":"45124","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":121,"col_2":"AAAAAAAAJHAAAAAA","col_3":"644","col_4":"Sixth ","col_5":"Ln","col_6":"Suite 470","col_7":"Lakewood","col_8":"Putnam County","col_9":"WV","col_10":"28877","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":122,"col_2":"AAAAAAAAKHAAAAAA","col_3":"","col_4":"","col_5":"Ave","col_6":"","col_7":"Woodlawn","col_8":"","col_9":"MN","col_10":"","col_11":"","col_12":"-6","col_13":"","col_14":""} -{"col_1":123,"col_2":"AAAAAAAALHAAAAAA","col_3":"319","col_4":"West View","col_5":"Blvd","col_6":"Suite X","col_7":"Concord","col_8":"Lagrange County","col_9":"IN","col_10":"44107","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":124,"col_2":"AAAAAAAAMHAAAAAA","col_3":"688","col_4":"Woodland 2nd","col_5":"Cir.","col_6":"Suite J","col_7":"Hopewell","col_8":"Okanogan County","col_9":"WA","col_10":"90587","col_11":"United States","col_12":"-8","col_13":"condo","col_14":""} -{"col_1":125,"col_2":"AAAAAAAANHAAAAAA","col_3":"74","col_4":"7th ","col_5":"Lane","col_6":"Suite 180","col_7":"New Hope","col_8":"Santa Clara County","col_9":"CA","col_10":"99431","col_11":"United States","col_12":"-8","col_13":"apartment","col_14":""} -{"col_1":126,"col_2":"AAAAAAAAOHAAAAAA","col_3":"196","col_4":"Maple Railroad","col_5":"Road","col_6":"Suite 120","col_7":"Arlington","col_8":"Mahoning County","col_9":"OH","col_10":"46557","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":127,"col_2":"AAAAAAAAPHAAAAAA","col_3":"896","col_4":"Sunset ","col_5":"Ln","col_6":"Suite Y","col_7":"Kingston","col_8":"Stanton County","col_9":"KS","col_10":"64975","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":128,"col_2":"AAAAAAAAAIAAAAAA","col_3":"127","col_4":"Rcol_2ge 9th","col_5":"Boulevard","col_6":"Suite B","col_7":"Sutton","col_8":"Lorain County","col_9":"OH","col_10":"45413","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":129,"col_2":"AAAAAAAABIAAAAAA","col_3":"","col_4":"Sunset Pine","col_5":"","col_6":"","col_7":"","col_8":"Dickson County","col_9":"TN","col_10":"","col_11":"","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":130,"col_2":"AAAAAAAACIAAAAAA","col_3":"194","col_4":"Pine ","col_5":"ST","col_6":"Suite J","col_7":"Salem","col_8":"Potter County","col_9":"PA","col_10":"18048","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":131,"col_2":"AAAAAAAADIAAAAAA","col_3":"884","col_4":"Woodland Seventh","col_5":"Road","col_6":"Suite 200","col_7":"Green Acres","col_8":"Cherokee County","col_9":"OK","col_10":"77683","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":132,"col_2":"AAAAAAAAEIAAAAAA","col_3":"797","col_4":"2nd ","col_5":"Ct.","col_6":"Suite M","col_7":"Ellisville","col_8":"Darlington County","col_9":"SC","col_10":"26820","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":133,"col_2":"AAAAAAAAFIAAAAAA","col_3":"953","col_4":"Oak 5th","col_5":"Boulevard","col_6":"Suite J","col_7":"Oakland","col_8":"Fannin County","col_9":"TX","col_10":"79843","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":134,"col_2":"AAAAAAAAGIAAAAAA","col_3":"504","col_4":"West Hickory","col_5":"Lane","col_6":"Suite E","col_7":"Greenwood","col_8":"Crawford County","col_9":"IN","col_10":"48828","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":135,"col_2":"AAAAAAAAHIAAAAAA","col_3":"279","col_4":"Oak ","col_5":"Ave","col_6":"Suite W","col_7":"Clearview","col_8":"Thayer County","col_9":"NE","col_10":"65495","col_11":"United States","col_12":"-7","col_13":"apartment","col_14":""} -{"col_1":136,"col_2":"AAAAAAAAIIAAAAAA","col_3":"610","col_4":"Elm ","col_5":"Wy","col_6":"Suite 30","col_7":"Fowler","col_8":"McLean County","col_9":"IL","col_10":"61083","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":137,"col_2":"AAAAAAAAJIAAAAAA","col_3":"467","col_4":"Second ","col_5":"RD","col_6":"Suite 220","col_7":"Greenwood","col_8":"Madison County","col_9":"NE","col_10":"68828","col_11":"United States","col_12":"-7","col_13":"condo","col_14":""} -{"col_1":138,"col_2":"AAAAAAAAKIAAAAAA","col_3":"99","col_4":"Hillcrest 13th","col_5":"Lane","col_6":"Suite Q","col_7":"Belmont","col_8":"Greenwood County","col_9":"KS","col_10":"60191","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":139,"col_2":"AAAAAAAALIAAAAAA","col_3":"14","col_4":"Pine Fourth","col_5":"Ave","col_6":"Suite 290","col_7":"Pine Grove","col_8":"Letcher County","col_9":"KY","col_10":"44593","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":140,"col_2":"AAAAAAAAMIAAAAAA","col_3":"837","col_4":"Madison Washington","col_5":"Cir.","col_6":"Suite Q","col_7":"Union","col_8":"Ottawa County","col_9":"OK","col_10":"78721","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":141,"col_2":"AAAAAAAANIAAAAAA","col_3":"727","col_4":"Elm Center","col_5":"Cir.","col_6":"Suite D","col_7":"Hartland","col_8":"Edmunds County","col_9":"SD","col_10":"56594","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":142,"col_2":"AAAAAAAAOIAAAAAA","col_3":"268","col_4":"Maple ","col_5":"Road","col_6":"Suite 210","col_7":"Belmont","col_8":"Bergen County","col_9":"NJ","col_10":"00791","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":143,"col_2":"AAAAAAAAPIAAAAAA","col_3":"141","col_4":"9th ","col_5":"Ct.","col_6":"Suite R","col_7":"Edgewood","col_8":"Burke County","col_9":"GA","col_10":"30069","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":144,"col_2":"AAAAAAAAAJAAAAAA","col_3":"686","col_4":"Cedar ","col_5":"Cir.","col_6":"Suite 90","col_7":"Green Acres","col_8":"Alpena County","col_9":"MI","col_10":"47683","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":145,"col_2":"AAAAAAAABJAAAAAA","col_3":"454","col_4":"Maple ","col_5":"Street","col_6":"Suite 190","col_7":"Wilson","col_8":"Highland County","col_9":"VA","col_10":"26971","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":146,"col_2":"AAAAAAAACJAAAAAA","col_3":"976","col_4":"Sixth Dogwood","col_5":"Ct.","col_6":"Suite L","col_7":"Highland","col_8":"Litchfield County","col_9":"CT","col_10":"09454","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":147,"col_2":"AAAAAAAADJAAAAAA","col_3":"596","col_4":"Lake Maple","col_5":"Blvd","col_6":"Suite 160","col_7":"Mount Olive","col_8":"Hawaii County","col_9":"HI","col_10":"98059","col_11":"United States","col_12":"-10","col_13":"apartment","col_14":""} -{"col_1":148,"col_2":"AAAAAAAAEJAAAAAA","col_3":"731","col_4":"14th ","col_5":"Boulevard","col_6":"Suite D","col_7":"Cedar Grove","col_8":"Morton County","col_9":"ND","col_10":"50411","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":149,"col_2":"AAAAAAAAFJAAAAAA","col_3":"8","col_4":"Woodland ","col_5":"Boulevard","col_6":"Suite 270","col_7":"Glenwood","col_8":"Haralson County","col_9":"GA","col_10":"33511","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":150,"col_2":"AAAAAAAAGJAAAAAA","col_3":"121","col_4":"Fifteenth ","col_5":"Blvd","col_6":"Suite C","col_7":"Sunnyscol_2e","col_8":"Lafayette County","col_9":"AR","col_10":"71952","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":151,"col_2":"AAAAAAAAHJAAAAAA","col_3":"125","col_4":"Fifth ","col_5":"Cir.","col_6":"Suite 140","col_7":"Shiloh","col_8":"Deuel County","col_9":"NE","col_10":"69275","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":152,"col_2":"AAAAAAAAIJAAAAAA","col_3":"448","col_4":"Sunset ","col_5":"RD","col_6":"Suite J","col_7":"Red Hill","col_8":"Sharkey County","col_9":"MS","col_10":"54338","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":153,"col_2":"AAAAAAAAJJAAAAAA","col_3":"171","col_4":"River Elm","col_5":"Parkway","col_6":"Suite 130","col_7":"Deerfield","col_8":"Fulton County","col_9":"OH","col_10":"49840","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":154,"col_2":"AAAAAAAAKJAAAAAA","col_3":"264","col_4":"Hillcrest Lincoln","col_5":"Boulevard","col_6":"Suite W","col_7":"Lakewood","col_8":"Eureka County","col_9":"NV","col_10":"88877","col_11":"United States","col_12":"-8","col_13":"condo","col_14":""} -{"col_1":155,"col_2":"AAAAAAAALJAAAAAA","col_3":"902","col_4":"10th ","col_5":"Avenue","col_6":"Suite 30","col_7":"Glendale","col_8":"Jackson County","col_9":"MI","col_10":"43951","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":156,"col_2":"AAAAAAAAMJAAAAAA","col_3":"595","col_4":"","col_5":"","col_6":"","col_7":"","col_8":"","col_9":"TX","col_10":"","col_11":"","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":157,"col_2":"AAAAAAAANJAAAAAA","col_3":"904","col_4":"Locust Seventh","col_5":"Court","col_6":"Suite 340","col_7":"Shiloh","col_8":"Clermont County","col_9":"OH","col_10":"49275","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":158,"col_2":"AAAAAAAAOJAAAAAA","col_3":"293","col_4":"Lincoln Willow","col_5":"Court","col_6":"Suite 370","col_7":"Shady Grove","col_8":"Davcol_2son County","col_9":"NC","col_10":"22812","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":159,"col_2":"AAAAAAAAPJAAAAAA","col_3":"544","col_4":"Spruce ","col_5":"Cir.","col_6":"Suite J","col_7":"New Hope","col_8":"Bradford County","col_9":"FL","col_10":"39431","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":160,"col_2":"AAAAAAAAAKAAAAAA","col_3":"676","col_4":"Jefferson ","col_5":"Lane","col_6":"Suite 170","col_7":"Woodland","col_8":"Fresno County","col_9":"CA","col_10":"94854","col_11":"United States","col_12":"-8","col_13":"single family","col_14":""} -{"col_1":161,"col_2":"AAAAAAAABKAAAAAA","col_3":"518","col_4":"Center Spruce","col_5":"Boulevard","col_6":"Suite 100","col_7":"Clifton","col_8":"McDonough County","col_9":"IL","col_10":"68014","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":162,"col_2":"AAAAAAAACKAAAAAA","col_3":"68","col_4":"Oak Sycamore","col_5":"Way","col_6":"Suite 480","col_7":"Waterloo","col_8":"Pottawatomie County","col_9":"KS","col_10":"61675","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":163,"col_2":"AAAAAAAADKAAAAAA","col_3":"484","col_4":"Spring ","col_5":"Dr.","col_6":"Suite 90","col_7":"Hamilton","col_8":"Vernon County","col_9":"WI","col_10":"52808","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":164,"col_2":"AAAAAAAAEKAAAAAA","col_3":"435","col_4":"2nd ","col_5":"Boulevard","col_6":"Suite Q","col_7":"Enterprise","col_8":"Sussex County","col_9":"DE","col_10":"11757","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":165,"col_2":"AAAAAAAAFKAAAAAA","col_3":"144","col_4":"Sunset 2nd","col_5":"Way","col_6":"Suite A","col_7":"Woodland","col_8":"Banner County","col_9":"NE","col_10":"64854","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":166,"col_2":"AAAAAAAAGKAAAAAA","col_3":"335","col_4":"7th ","col_5":"Blvd","col_6":"Suite 230","col_7":"Plainview","col_8":"Taylor County","col_9":"GA","col_10":"33683","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":167,"col_2":"AAAAAAAAHKAAAAAA","col_3":"288","col_4":"Oak 3rd","col_5":"Dr.","col_6":"Suite B","col_7":"Woodland","col_8":"Grant County","col_9":"NE","col_10":"64854","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":168,"col_2":"AAAAAAAAIKAAAAAA","col_3":"373","col_4":"Maple ","col_5":"Court","col_6":"Suite 450","col_7":"Provcol_2ence","col_8":"Hickory County","col_9":"MO","col_10":"66614","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":169,"col_2":"AAAAAAAAJKAAAAAA","col_3":"12","col_4":"","col_5":"","col_6":"Suite 50","col_7":"","col_8":"","col_9":"","col_10":"","col_11":"","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":170,"col_2":"AAAAAAAAKKAAAAAA","col_3":"237","col_4":"Walnut ","col_5":"Way","col_6":"Suite W","col_7":"Colonial Heights","col_8":"Monroe County","col_9":"MO","col_10":"63425","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":171,"col_2":"AAAAAAAALKAAAAAA","col_3":"871","col_4":"Smith Seventh","col_5":"Way","col_6":"Suite A","col_7":"Calhoun","col_8":"Cook County","col_9":"MN","col_10":"56909","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":172,"col_2":"AAAAAAAAMKAAAAAA","col_3":"","col_4":"5th ","col_5":"","col_6":"Suite 280","col_7":"","col_8":"Madison County","col_9":"MO","col_10":"68054","col_11":"United States","col_12":"","col_13":"single family","col_14":""} -{"col_1":173,"col_2":"AAAAAAAANKAAAAAA","col_3":"715","col_4":"1st ","col_5":"Dr.","col_6":"Suite H","col_7":"Forest Hills","col_8":"Grayson County","col_9":"KY","col_10":"49237","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":174,"col_2":"AAAAAAAAOKAAAAAA","col_3":"986","col_4":"Adams ","col_5":"Pkwy","col_6":"Suite M","col_7":"Holcol_2ay Hills","col_8":"Garden County","col_9":"NE","col_10":"63109","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":175,"col_2":"AAAAAAAAPKAAAAAA","col_3":"557","col_4":"4th Forest","col_5":"Cir.","col_6":"Suite 10","col_7":"Antioch","col_8":"Leon County","col_9":"TX","col_10":"78605","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":176,"col_2":"AAAAAAAAALAAAAAA","col_3":"302","col_4":"Jefferson ","col_5":"RD","col_6":"Suite 140","col_7":"Mcol_2way","col_8":"Madison County","col_9":"GA","col_10":"31904","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":177,"col_2":"AAAAAAAABLAAAAAA","col_3":"853","col_4":"Dogwood View","col_5":"Lane","col_6":"Suite 440","col_7":"Woodland","col_8":"Grand County","col_9":"CO","col_10":"84854","col_11":"United States","col_12":"-7","col_13":"single family","col_14":""} -{"col_1":178,"col_2":"AAAAAAAACLAAAAAA","col_3":"592","col_4":"Davis 14th","col_5":"Circle","col_6":"Suite L","col_7":"Friendship","col_8":"Winneshiek County","col_9":"IA","col_10":"54536","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":179,"col_2":"AAAAAAAADLAAAAAA","col_3":"795","col_4":"Davis Walnut","col_5":"Avenue","col_6":"Suite T","col_7":"Jackson","col_8":"Nye County","col_9":"NV","col_10":"89583","col_11":"United States","col_12":"-8","col_13":"single family","col_14":""} -{"col_1":180,"col_2":"AAAAAAAAELAAAAAA","col_3":"264","col_4":"Railroad 11th","col_5":"Ct.","col_6":"Suite R","col_7":"Mcol_2way","col_8":"Carroll County","col_9":"IL","col_10":"61904","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":181,"col_2":"AAAAAAAAFLAAAAAA","col_3":"469","col_4":"8th ","col_5":"Drive","col_6":"Suite 250","col_7":"Greenwood","col_8":"Osborne County","col_9":"KS","col_10":"68828","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":182,"col_2":"AAAAAAAAGLAAAAAA","col_3":"772","col_4":"Elm ","col_5":"Court","col_6":"Suite 290","col_7":"Bayscol_2e","col_8":"Rice County","col_9":"KS","col_10":"69550","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":183,"col_2":"AAAAAAAAHLAAAAAA","col_3":"47","col_4":"Washington ","col_5":"Ave","col_6":"Suite 310","col_7":"Lakescol_2e","col_8":"Moniteau County","col_9":"MO","col_10":"69532","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":184,"col_2":"AAAAAAAAILAAAAAA","col_3":"704","col_4":"Lincoln ","col_5":"Ln","col_6":"Suite 340","col_7":"Flatwoods","col_8":"Williamson County","col_9":"TX","col_10":"74212","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":185,"col_2":"AAAAAAAAJLAAAAAA","col_3":"441","col_4":"Willow Washington","col_5":"Cir.","col_6":"Suite 490","col_7":"Hillcrest","col_8":"Pueblo County","col_9":"CO","col_10":"83003","col_11":"United States","col_12":"-7","col_13":"condo","col_14":""} -{"col_1":186,"col_2":"AAAAAAAAKLAAAAAA","col_3":"309","col_4":"8th ","col_5":"Lane","col_6":"Suite T","col_7":"Riverscol_2e","col_8":"Mesa County","col_9":"CO","col_10":"89231","col_11":"United States","col_12":"-7","col_13":"apartment","col_14":""} -{"col_1":187,"col_2":"AAAAAAAALLAAAAAA","col_3":"54","col_4":"Ash 3rd","col_5":"Boulevard","col_6":"Suite 470","col_7":"Centerville","col_8":"Mills County","col_9":"TX","col_10":"70059","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":188,"col_2":"AAAAAAAAMLAAAAAA","col_3":"798","col_4":"West Sunset","col_5":"Pkwy","col_6":"Suite 160","col_7":"Harmony","col_8":"New Haven County","col_9":"CT","col_10":"06404","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":189,"col_2":"AAAAAAAANLAAAAAA","col_3":"719","col_4":"Hillcrest ","col_5":"Parkway","col_6":"Suite 270","col_7":"Five Forks","col_8":"Sanders County","col_9":"MT","col_10":"62293","col_11":"United States","col_12":"-7","col_13":"condo","col_14":""} -{"col_1":190,"col_2":"AAAAAAAAOLAAAAAA","col_3":"928","col_4":"Lake Lincoln","col_5":"Pkwy","col_6":"Suite T","col_7":"Fremont","col_8":"Greene County","col_9":"NC","col_10":"21851","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":191,"col_2":"AAAAAAAAPLAAAAAA","col_3":"115","col_4":"12th First","col_5":"Boulevard","col_6":"Suite J","col_7":"Oakwood","col_8":"Hamilton County","col_9":"NE","col_10":"60169","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":192,"col_2":"AAAAAAAAAMAAAAAA","col_3":"639","col_4":"Maple East","col_5":"Circle","col_6":"Suite 340","col_7":"Walnut Grove","col_8":"Stokes County","col_9":"NC","col_10":"27752","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":193,"col_2":"AAAAAAAABMAAAAAA","col_3":"239","col_4":"1st ","col_5":"Road","col_6":"Suite G","col_7":"Greenville","col_8":"Hickman County","col_9":"KY","col_10":"41387","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":194,"col_2":"AAAAAAAACMAAAAAA","col_3":"990","col_4":"9th First","col_5":"Ln","col_6":"Suite 230","col_7":"Oak Hill","col_8":"Union County","col_9":"AR","col_10":"77838","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":195,"col_2":"AAAAAAAADMAAAAAA","col_3":"939","col_4":"Cedar ","col_5":"Court","col_6":"Suite A","col_7":"Mount Pleasant","col_8":"Marion County","col_9":"GA","col_10":"31933","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":196,"col_2":"AAAAAAAAEMAAAAAA","col_3":"860","col_4":"","col_5":"","col_6":"Suite 270","col_7":"","col_8":"","col_9":"","col_10":"","col_11":"","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":197,"col_2":"AAAAAAAAFMAAAAAA","col_3":"200","col_4":"Park North","col_5":"Blvd","col_6":"Suite F","col_7":"Highland Park","col_8":"Creek County","col_9":"OK","col_10":"76534","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":198,"col_2":"AAAAAAAAGMAAAAAA","col_3":"842","col_4":"3rd ","col_5":"RD","col_6":"Suite 280","col_7":"Pleasant Valley","col_8":"Pawnee County","col_9":"KS","col_10":"62477","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":199,"col_2":"AAAAAAAAHMAAAAAA","col_3":"790","col_4":"Pine ","col_5":"Street","col_6":"Suite 400","col_7":"Lakeview","col_8":"Somerset County","col_9":"ME","col_10":"09179","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":200,"col_2":"AAAAAAAAIMAAAAAA","col_3":"516","col_4":"6th Spring","col_5":"Boulevard","col_6":"Suite K","col_7":"Marion","col_8":"McIntosh County","col_9":"OK","col_10":"70399","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_json_scan_range_select_pushdown_3.json b/plugin/trino-hive-hadoop2/conf/files/test_table_json_scan_range_select_pushdown_3.json deleted file mode 100644 index b3f69228e20c6..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/test_table_json_scan_range_select_pushdown_3.json +++ /dev/null @@ -1,100 +0,0 @@ -{"col_1":201,"col_2":"AAAAAAAAJMAAAAAA","col_3":"517","col_4":"8th ","col_5":"Parkway","col_6":"Suite J","col_7":"Salem","col_8":"McIntosh County","col_9":"OK","col_10":"78048","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":202,"col_2":"AAAAAAAAKMAAAAAA","col_3":"944","col_4":"Willow ","col_5":"Parkway","col_6":"Suite L","col_7":"Unionville","col_8":"Collin County","col_9":"TX","col_10":"71711","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":203,"col_2":"AAAAAAAALMAAAAAA","col_3":"997","col_4":"Cedar ","col_5":"Boulevard","col_6":"Suite T","col_7":"Mount Zion","col_8":"Wabash County","col_9":"IL","col_10":"68054","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":204,"col_2":"AAAAAAAAMMAAAAAA","col_3":"901","col_4":"Locust Main","col_5":"Blvd","col_6":"Suite 440","col_7":"Friendship","col_8":"Isabella County","col_9":"MI","col_10":"44536","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":205,"col_2":"AAAAAAAANMAAAAAA","col_3":"525","col_4":"North Franklin","col_5":"Pkwy","col_6":"Suite 490","col_7":"Shore Acres","col_8":"Benewah County","col_9":"col_2","col_10":"82724","col_11":"United States","col_12":"-7","col_13":"single family","col_14":""} -{"col_1":206,"col_2":"AAAAAAAAOMAAAAAA","col_3":"381","col_4":"Third ","col_5":"Blvd","col_6":"Suite 10","col_7":"Waterloo","col_8":"Panola County","col_9":"MS","col_10":"51675","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":207,"col_2":"AAAAAAAAPMAAAAAA","col_3":"833","col_4":"Miller ","col_5":"Pkwy","col_6":"Suite 450","col_7":"Blue Springs","col_8":"Vernon County","col_9":"WI","col_10":"54686","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":208,"col_2":"AAAAAAAAANAAAAAA","col_3":"772","col_4":"Second ","col_5":"Court","col_6":"Suite S","col_7":"Lincoln","col_8":"Albany County","col_9":"WY","col_10":"81289","col_11":"United States","col_12":"-7","col_13":"apartment","col_14":""} -{"col_1":209,"col_2":"AAAAAAAABNAAAAAA","col_3":"335","col_4":"River ","col_5":"Drive","col_6":"Suite 210","col_7":"Sulphur Springs","col_8":"Mitchell County","col_9":"NC","col_10":"28354","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":210,"col_2":"AAAAAAAACNAAAAAA","col_3":"617","col_4":"Park Hill","col_5":"Road","col_6":"Suite W","col_7":"Shiloh","col_8":"Jefferson County","col_9":"KY","col_10":"49275","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":211,"col_2":"AAAAAAAADNAAAAAA","col_3":"747","col_4":"Willow Cedar","col_5":"Lane","col_6":"Suite 130","col_7":"Brcol_2geport","col_8":"Lamar County","col_9":"MS","col_10":"55817","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":212,"col_2":"AAAAAAAAENAAAAAA","col_3":"691","col_4":"9th Mill","col_5":"","col_6":"Suite 190","col_7":"","col_8":"Pontotoc County","col_9":"MS","col_10":"59431","col_11":"","col_12":"","col_13":"","col_14":""} -{"col_1":213,"col_2":"AAAAAAAAFNAAAAAA","col_3":"351","col_4":"1st ","col_5":"Blvd","col_6":"Suite I","col_7":"Superior","col_8":"Hcol_2algo County","col_9":"NM","col_10":"82562","col_11":"United States","col_12":"-7","col_13":"condo","col_14":""} -{"col_1":214,"col_2":"AAAAAAAAGNAAAAAA","col_3":"375","col_4":"Walnut Second","col_5":"Lane","col_6":"Suite 480","col_7":"Oakland","col_8":"Halifax County","col_9":"NC","col_10":"29843","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":215,"col_2":"AAAAAAAAHNAAAAAA","col_3":"874","col_4":"3rd ","col_5":"Street","col_6":"Suite M","col_7":"Sunnyscol_2e","col_8":"Palo Alto County","col_9":"IA","col_10":"51952","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":216,"col_2":"AAAAAAAAINAAAAAA","col_3":"469","col_4":"Maple Laurel","col_5":"Pkwy","col_6":"Suite F","col_7":"Greenwood","col_8":"Hampton city","col_9":"VA","col_10":"28828","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":217,"col_2":"AAAAAAAAJNAAAAAA","col_3":"297","col_4":"7th ","col_5":"Way","col_6":"Suite E","col_7":"White Oak","col_8":"Tioga County","col_9":"NY","col_10":"16668","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":218,"col_2":"AAAAAAAAKNAAAAAA","col_3":"998","col_4":"Center Dogwood","col_5":"Lane","col_6":"Suite I","col_7":"Oakland","col_8":"Pine County","col_9":"MN","col_10":"59843","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":219,"col_2":"AAAAAAAALNAAAAAA","col_3":"933","col_4":"Franklin Highland","col_5":"ST","col_6":"Suite 380","col_7":"Woodbury","col_8":"Stevens County","col_9":"KS","col_10":"64489","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":220,"col_2":"AAAAAAAAMNAAAAAA","col_3":"695","col_4":"1st Oak","col_5":"Circle","col_6":"Suite 40","col_7":"Liberty","col_8":"Wabaunsee County","col_9":"KS","col_10":"63451","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":221,"col_2":"AAAAAAAANNAAAAAA","col_3":"940","col_4":"5th ","col_5":"Ln","col_6":"Suite 300","col_7":"Forest Hills","col_8":"Kit Carson County","col_9":"CO","col_10":"89237","col_11":"United States","col_12":"-7","col_13":"apartment","col_14":""} -{"col_1":222,"col_2":"AAAAAAAAONAAAAAA","col_3":"393","col_4":"South ","col_5":"Wy","col_6":"Suite X","col_7":"Ashley","col_8":"Falls County","col_9":"TX","col_10":"74324","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":223,"col_2":"AAAAAAAAPNAAAAAA","col_3":"455","col_4":"First ","col_5":"Ln","col_6":"Suite 210","col_7":"Friendship","col_8":"Cleburne County","col_9":"AL","col_10":"34536","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":224,"col_2":"AAAAAAAAAOAAAAAA","col_3":"94","col_4":"Lincoln Walnut","col_5":"Court","col_6":"Suite 130","col_7":"Edgewood","col_8":"Calhoun County","col_9":"MS","col_10":"50069","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":225,"col_2":"AAAAAAAABOAAAAAA","col_3":"673","col_4":"Lincoln Washington","col_5":"Dr.","col_6":"Suite K","col_7":"Altamont","col_8":"Lake County","col_9":"IN","col_10":"49387","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":226,"col_2":"AAAAAAAACOAAAAAA","col_3":"247","col_4":"Hickory Adams","col_5":"Way","col_6":"Suite R","col_7":"Union","col_8":"Oldham County","col_9":"TX","col_10":"78721","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":227,"col_2":"AAAAAAAADOAAAAAA","col_3":"82","col_4":"Seventh Fifth","col_5":"Ln","col_6":"Suite R","col_7":"Mcol_2way","col_8":"Fairbanks North Star Borough","col_9":"AK","col_10":"91904","col_11":"United States","col_12":"-9","col_13":"apartment","col_14":""} -{"col_1":228,"col_2":"AAAAAAAAEOAAAAAA","col_3":"776","col_4":"Madison ","col_5":"ST","col_6":"Suite A","col_7":"Woodland","col_8":"Franklin County","col_9":"IA","col_10":"54854","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":229,"col_2":"AAAAAAAAFOAAAAAA","col_3":"376","col_4":"Ash ","col_5":"Dr.","col_6":"Suite M","col_7":"Crossroads","col_8":"Franklin city","col_9":"VA","col_10":"20534","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":230,"col_2":"AAAAAAAAGOAAAAAA","col_3":"313","col_4":"2nd Eigth","col_5":"Avenue","col_6":"Suite 40","col_7":"Marion","col_8":"Glascock County","col_9":"GA","col_10":"30399","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":231,"col_2":"AAAAAAAAHOAAAAAA","col_3":"560","col_4":"Lakeview ","col_5":"Way","col_6":"Suite 130","col_7":"Deerfield","col_8":"Union County","col_9":"KY","col_10":"49840","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":232,"col_2":"AAAAAAAAIOAAAAAA","col_3":"239","col_4":"Elm ","col_5":"RD","col_6":"Suite D","col_7":"Shady Grove","col_8":"Harrison County","col_9":"IN","col_10":"42812","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":233,"col_2":"AAAAAAAAJOAAAAAA","col_3":"957","col_4":"Cherry Johnson","col_5":"Ct.","col_6":"Suite E","col_7":"Clinton","col_8":"Jackson County","col_9":"NC","col_10":"28222","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":234,"col_2":"AAAAAAAAKOAAAAAA","col_3":"986","col_4":"6th Main","col_5":"Ln","col_6":"Suite 20","col_7":"Crossroads","col_8":"Benton County","col_9":"IN","col_10":"40534","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":235,"col_2":"AAAAAAAALOAAAAAA","col_3":"213","col_4":"Madison ","col_5":"Pkwy","col_6":"Suite H","col_7":"Lincoln","col_8":"Orange County","col_9":"FL","col_10":"31289","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":236,"col_2":"AAAAAAAAMOAAAAAA","col_3":"944","col_4":"West Center","col_5":"Cir.","col_6":"Suite J","col_7":"Lewisburg","col_8":"Flagler County","col_9":"FL","col_10":"37538","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":237,"col_2":"AAAAAAAANOAAAAAA","col_3":"169","col_4":"Broadway Oak","col_5":"Boulevard","col_6":"Suite V","col_7":"Marion","col_8":"Fayette County","col_9":"AL","col_10":"30399","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":238,"col_2":"AAAAAAAAOOAAAAAA","col_3":"566","col_4":"Williams Lee","col_5":"Way","col_6":"Suite 100","col_7":"Pleasant Hill","col_8":"Campbell County","col_9":"TN","col_10":"33604","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":239,"col_2":"AAAAAAAAPOAAAAAA","col_3":"633","col_4":"Elm 9th","col_5":"Dr.","col_6":"Suite 50","col_7":"Franklin","col_8":"Jefferson County","col_9":"IN","col_10":"49101","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":240,"col_2":"AAAAAAAAAPAAAAAA","col_3":"8","col_4":"Valley ","col_5":"Blvd","col_6":"Suite 470","col_7":"Concord","col_8":"Castro County","col_9":"TX","col_10":"74107","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":241,"col_2":"AAAAAAAABPAAAAAA","col_3":"323","col_4":"Laurel ","col_5":"Way","col_6":"Suite 50","col_7":"Wilson","col_8":"Sioux County","col_9":"IA","col_10":"56971","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":242,"col_2":"AAAAAAAACPAAAAAA","col_3":"706","col_4":"Oak Woodland","col_5":"Court","col_6":"Suite C","col_7":"Sunnyscol_2e","col_8":"Stephenson County","col_9":"IL","col_10":"61952","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":243,"col_2":"AAAAAAAADPAAAAAA","col_3":"600","col_4":"View ","col_5":"Ave","col_6":"Suite 150","col_7":"Crossroads","col_8":"Bennington County","col_9":"VT","col_10":"01134","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":244,"col_2":"AAAAAAAAEPAAAAAA","col_3":"501","col_4":"Dogwood Woodland","col_5":"Parkway","col_6":"Suite 180","col_7":"Ashland","col_8":"Iredell County","col_9":"NC","col_10":"24244","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":245,"col_2":"AAAAAAAAFPAAAAAA","col_3":"187","col_4":"Park North","col_5":"Street","col_6":"Suite 470","col_7":"Crossroads","col_8":"Bee County","col_9":"TX","col_10":"70534","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":246,"col_2":"AAAAAAAAGPAAAAAA","col_3":"154","col_4":"1st ","col_5":"Road","col_6":"Suite V","col_7":"Union Hill","col_8":"Camden County","col_9":"NJ","col_10":"08346","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":247,"col_2":"AAAAAAAAHPAAAAAA","col_3":"310","col_4":"Sunset ","col_5":"Circle","col_6":"Suite A","col_7":"Lakewood","col_8":"Newport News city","col_9":"VA","col_10":"28877","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":248,"col_2":"AAAAAAAAIPAAAAAA","col_3":"587","col_4":"Williams Dogwood","col_5":"Avenue","col_6":"Suite R","col_7":"Concord","col_8":"Paulding County","col_9":"OH","col_10":"44107","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":249,"col_2":"AAAAAAAAJPAAAAAA","col_3":"378","col_4":"First Oak","col_5":"ST","col_6":"Suite 390","col_7":"Five Points","col_8":"Kauai County","col_9":"HI","col_10":"96098","col_11":"United States","col_12":"-10","col_13":"condo","col_14":""} -{"col_1":250,"col_2":"AAAAAAAAKPAAAAAA","col_3":"882","col_4":"Rcol_2ge Meadow","col_5":"Parkway","col_6":"Suite Q","col_7":"Harmony","col_8":"Macon County","col_9":"TN","col_10":"35804","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":251,"col_2":"AAAAAAAALPAAAAAA","col_3":"913","col_4":"First Johnson","col_5":"Ct.","col_6":"Suite V","col_7":"Sugar Hill","col_8":"Crawford County","col_9":"IL","col_10":"65114","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":252,"col_2":"AAAAAAAAMPAAAAAA","col_3":"753","col_4":"Center ","col_5":"Cir.","col_6":"Suite 250","col_7":"Bunker Hill","col_8":"Halifax County","col_9":"VA","col_10":"20150","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":253,"col_2":"AAAAAAAANPAAAAAA","col_3":"446","col_4":"Jefferson Adams","col_5":"Avenue","col_6":"Suite O","col_7":"Liberty","col_8":"Dinwcol_2die County","col_9":"VA","col_10":"23451","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":254,"col_2":"AAAAAAAAOPAAAAAA","col_3":"271","col_4":"Highland ","col_5":"Lane","col_6":"Suite W","col_7":"Greenfield","col_8":"Red River Parish","col_9":"LA","col_10":"75038","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":255,"col_2":"AAAAAAAAPPAAAAAA","col_3":"681","col_4":"","col_5":"Cir.","col_6":"","col_7":"Concord","col_8":"","col_9":"MD","col_10":"","col_11":"","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":256,"col_2":"AAAAAAAAAABAAAAA","col_3":"973","col_4":"Forest ","col_5":"Dr.","col_6":"Suite V","col_7":"Cordova","col_8":"Union Parish","col_9":"LA","col_10":"76938","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":257,"col_2":"AAAAAAAABABAAAAA","col_3":"960","col_4":"4th ","col_5":"Wy","col_6":"Suite 200","col_7":"Ellsworth","col_8":"Bourbon County","col_9":"KS","col_10":"65079","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":258,"col_2":"AAAAAAAACABAAAAA","col_3":"241","col_4":"Birch ","col_5":"RD","col_6":"Suite X","col_7":"Walnut Grove","col_8":"Pasco County","col_9":"FL","col_10":"37752","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":259,"col_2":"AAAAAAAADABAAAAA","col_3":"888","col_4":"Sunset ","col_5":"Circle","col_6":"Suite I","col_7":"Elkton","col_8":"Sumter County","col_9":"FL","col_10":"33481","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":260,"col_2":"AAAAAAAAEABAAAAA","col_3":"629","col_4":"13th Cherry","col_5":"Court","col_6":"Suite 450","col_7":"Plainview","col_8":"Anson County","col_9":"NC","col_10":"23683","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":261,"col_2":"AAAAAAAAFABAAAAA","col_3":"911","col_4":"Second ","col_5":"RD","col_6":"Suite A","col_7":"Harmony","col_8":"Lee County","col_9":"GA","col_10":"35804","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":262,"col_2":"AAAAAAAAGABAAAAA","col_3":"718","col_4":"Willow ","col_5":"RD","col_6":"Suite Q","col_7":"Martinsville","col_8":"Amherst County","col_9":"VA","col_10":"20419","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":263,"col_2":"AAAAAAAAHABAAAAA","col_3":"75","col_4":"Park Oak","col_5":"ST","col_6":"Suite 80","col_7":"Marion","col_8":"Nantucket County","col_9":"MA","col_10":"00999","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":264,"col_2":"AAAAAAAAIABAAAAA","col_3":"17","col_4":"11th ","col_5":"RD","col_6":"Suite M","col_7":"Riverdale","col_8":"Pendleton County","col_9":"KY","col_10":"49391","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":265,"col_2":"AAAAAAAAJABAAAAA","col_3":"447","col_4":"Walnut Fourth","col_5":"Lane","col_6":"Suite R","col_7":"Union","col_8":"Lincoln County","col_9":"MO","col_10":"68721","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":266,"col_2":"AAAAAAAAKABAAAAA","col_3":"940","col_4":"Second Rcol_2ge","col_5":"Parkway","col_6":"Suite C","col_7":"Red Hill","col_8":"Marshall County","col_9":"SD","col_10":"54338","col_11":"United States","col_12":"-7","col_13":"condo","col_14":""} -{"col_1":267,"col_2":"AAAAAAAALABAAAAA","col_3":"671","col_4":"Broadway ","col_5":"Circle","col_6":"Suite 170","col_7":"Brcol_2geport","col_8":"Park County","col_9":"WY","col_10":"85817","col_11":"United States","col_12":"-7","col_13":"single family","col_14":""} -{"col_1":268,"col_2":"AAAAAAAAMABAAAAA","col_3":"938","col_4":"Cherry ","col_5":"Street","col_6":"Suite 30","col_7":"Stringtown","col_8":"Emmet County","col_9":"IA","col_10":"50162","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":269,"col_2":"AAAAAAAANABAAAAA","col_3":"643","col_4":"Park Main","col_5":"Court","col_6":"Suite Y","col_7":"Oak Hill","col_8":"Nuckolls County","col_9":"NE","col_10":"67838","col_11":"United States","col_12":"-7","col_13":"single family","col_14":""} -{"col_1":270,"col_2":"AAAAAAAAOABAAAAA","col_3":"935","col_4":"Rcol_2ge Hill","col_5":"Blvd","col_6":"Suite 180","col_7":"Lakescol_2e","col_8":"Carroll County","col_9":"AR","col_10":"79532","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":271,"col_2":"AAAAAAAAPABAAAAA","col_3":"5","col_4":"Chestnut Highland","col_5":"Ct.","col_6":"Suite 290","col_7":"Salem","col_8":"Bedford County","col_9":"VA","col_10":"28048","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":272,"col_2":"AAAAAAAAABBAAAAA","col_3":"","col_4":"Hickory ","col_5":"","col_6":"","col_7":"","col_8":"Sioux County","col_9":"","col_10":"68482","col_11":"United States","col_12":"-7","col_13":"","col_14":""} -{"col_1":273,"col_2":"AAAAAAAABBBAAAAA","col_3":"133","col_4":"Church ","col_5":"Avenue","col_6":"Suite F","col_7":"Provcol_2ence","col_8":"Kane County","col_9":"IL","col_10":"66614","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":274,"col_2":"AAAAAAAACBBAAAAA","col_3":"789","col_4":"Locust ","col_5":"RD","col_6":"Suite J","col_7":"Oak Rcol_2ge","col_8":"Sac County","col_9":"IA","col_10":"58371","col_11":"United States","col_12":"-6","col_13":"condo","col_14":""} -{"col_1":275,"col_2":"AAAAAAAADBBAAAAA","col_3":"454","col_4":"Church ","col_5":"Boulevard","col_6":"Suite 370","col_7":"Harmony","col_8":"Parmer County","col_9":"TX","col_10":"75804","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":276,"col_2":"AAAAAAAAEBBAAAAA","col_3":"767","col_4":"6th ","col_5":"Road","col_6":"Suite 430","col_7":"Franklin","col_8":"Rockdale County","col_9":"GA","col_10":"39101","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":277,"col_2":"AAAAAAAAFBBAAAAA","col_3":"129","col_4":"Valley ","col_5":"Circle","col_6":"Suite 410","col_7":"Ashland","col_8":"Decatur County","col_9":"GA","col_10":"34244","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":278,"col_2":"AAAAAAAAGBBAAAAA","col_3":"877","col_4":"14th ","col_5":"Wy","col_6":"Suite 490","col_7":"Ashland","col_8":"New Kent County","col_9":"VA","col_10":"24244","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":279,"col_2":"AAAAAAAAHBBAAAAA","col_3":"276","col_4":"Smith 5th","col_5":"Street","col_6":"Suite 270","col_7":"Jamestown","col_8":"Martin County","col_9":"KY","col_10":"46867","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":280,"col_2":"AAAAAAAAIBBAAAAA","col_3":"288","col_4":"Walnut 1st","col_5":"Ln","col_6":"Suite M","col_7":"Antioch","col_8":"Cleveland County","col_9":"NC","col_10":"28605","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":281,"col_2":"AAAAAAAAJBBAAAAA","col_3":"716","col_4":"Oak ","col_5":"Wy","col_6":"Suite 330","col_7":"Mount Olive","col_8":"Windham County","col_9":"CT","col_10":"08659","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":282,"col_2":"AAAAAAAAKBBAAAAA","col_3":"4","col_4":"Cherry ","col_5":"Ln","col_6":"Suite L","col_7":"Buena Vista","col_8":"Halifax County","col_9":"VA","col_10":"25752","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":283,"col_2":"AAAAAAAALBBAAAAA","col_3":"49","col_4":"View ","col_5":"Ave","col_6":"Suite W","col_7":"Florence","col_8":"Sevier County","col_9":"UT","col_10":"83394","col_11":"United States","col_12":"-7","col_13":"single family","col_14":""} -{"col_1":284,"col_2":"AAAAAAAAMBBAAAAA","col_3":"766","col_4":"13th ","col_5":"Ave","col_6":"Suite 350","col_7":"Franklin","col_8":"Ohio County","col_9":"KY","col_10":"49101","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":285,"col_2":"AAAAAAAANBBAAAAA","col_3":"764","col_4":"Adams Main","col_5":"Pkwy","col_6":"Suite X","col_7":"Lincoln","col_8":"Phelps County","col_9":"MO","col_10":"61289","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":286,"col_2":"AAAAAAAAOBBAAAAA","col_3":"103","col_4":"4th Davis","col_5":"Way","col_6":"Suite 390","col_7":"Jamestown","col_8":"Douglas County","col_9":"WI","col_10":"56867","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":287,"col_2":"AAAAAAAAPBBAAAAA","col_3":"117","col_4":"","col_5":"","col_6":"Suite 30","col_7":"Mountain View","col_8":"","col_9":"VA","col_10":"24466","col_11":"United States","col_12":"-5","col_13":"","col_14":""} -{"col_1":288,"col_2":"AAAAAAAAACBAAAAA","col_3":"898","col_4":"15th ","col_5":"RD","col_6":"Suite 460","col_7":"Mountain View","col_8":"Humboldt County","col_9":"IA","col_10":"54466","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":289,"col_2":"AAAAAAAABCBAAAAA","col_3":"612","col_4":"Davis Birch","col_5":"Way","col_6":"Suite W","col_7":"Webb","col_8":"Osceola County","col_9":"MI","col_10":"40899","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":290,"col_2":"AAAAAAAACCBAAAAA","col_3":"71","col_4":"Spring West","col_5":"Lane","col_6":"Suite 50","col_7":"Woodland","col_8":"Grant County","col_9":"NE","col_10":"64854","col_11":"United States","col_12":"-6","col_13":"apartment","col_14":""} -{"col_1":291,"col_2":"AAAAAAAADCBAAAAA","col_3":"638","col_4":"Main Lakeview","col_5":"Avenue","col_6":"Suite P","col_7":"Oakdale","col_8":"Storey County","col_9":"NV","col_10":"89584","col_11":"United States","col_12":"-8","col_13":"single family","col_14":""} -{"col_1":292,"col_2":"AAAAAAAAECBAAAAA","col_3":"151","col_4":"2nd ","col_5":"Drive","col_6":"Suite C","col_7":"Unionville","col_8":"Gray County","col_9":"KS","col_10":"61711","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":293,"col_2":"AAAAAAAAFCBAAAAA","col_3":"678","col_4":"Smith Franklin","col_5":"Lane","col_6":"Suite 80","col_7":"Edgewood","col_8":"Weber County","col_9":"UT","col_10":"80069","col_11":"United States","col_12":"-7","col_13":"condo","col_14":""} -{"col_1":294,"col_2":"AAAAAAAAGCBAAAAA","col_3":"640","col_4":"4th Laurel","col_5":"Boulevard","col_6":"Suite 340","col_7":"Enterprise","col_8":"Peach County","col_9":"GA","col_10":"31757","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} -{"col_1":295,"col_2":"AAAAAAAAHCBAAAAA","col_3":"195","col_4":"West Cherry","col_5":"Boulevard","col_6":"Suite S","col_7":"Enterprise","col_8":"Whitley County","col_9":"IN","col_10":"41757","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":296,"col_2":"AAAAAAAAICBAAAAA","col_3":"999","col_4":"2nd Main","col_5":"Drive","col_6":"Suite W","col_7":"Greenwood","col_8":"Ohio County","col_9":"KY","col_10":"48828","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":297,"col_2":"AAAAAAAAJCBAAAAA","col_3":"571","col_4":"Oak Miller","col_5":"Ln","col_6":"Suite 290","col_7":"Newport","col_8":"Rabun County","col_9":"GA","col_10":"31521","col_11":"United States","col_12":"-5","col_13":"single family","col_14":""} -{"col_1":298,"col_2":"AAAAAAAAKCBAAAAA","col_3":"177","col_4":"Church Church","col_5":"Pkwy","col_6":"Suite X","col_7":"Macon","col_8":"Waldo County","col_9":"ME","col_10":"00969","col_11":"United States","col_12":"-5","col_13":"condo","col_14":""} -{"col_1":299,"col_2":"AAAAAAAALCBAAAAA","col_3":"10","col_4":"Ash 4th","col_5":"Ave","col_6":"Suite X","col_7":"White Oak","col_8":"Finney County","col_9":"KS","col_10":"66668","col_11":"United States","col_12":"-6","col_13":"single family","col_14":""} -{"col_1":300,"col_2":"AAAAAAAAMCBAAAAA","col_3":"799","col_4":"10th ","col_5":"Road","col_6":"Suite 490","col_7":"Macedonia","col_8":"Switzerland County","col_9":"IN","col_10":"41087","col_11":"United States","col_12":"-5","col_13":"apartment","col_14":""} diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_comma_delimiter.csv b/plugin/trino-hive-hadoop2/conf/files/test_table_with_comma_delimiter.csv deleted file mode 100644 index 424dc886fde87..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/test_table_with_comma_delimiter.csv +++ /dev/null @@ -1,3 +0,0 @@ -7,1 -19,10 -1,345 diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_comma_delimiter.csv.bz2 b/plugin/trino-hive-hadoop2/conf/files/test_table_with_comma_delimiter.csv.bz2 deleted file mode 100644 index 5d30848665d44..0000000000000 Binary files a/plugin/trino-hive-hadoop2/conf/files/test_table_with_comma_delimiter.csv.bz2 and /dev/null differ diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_comma_delimiter.csv.gz b/plugin/trino-hive-hadoop2/conf/files/test_table_with_comma_delimiter.csv.gz deleted file mode 100644 index 2d8c9cb91edd8..0000000000000 Binary files a/plugin/trino-hive-hadoop2/conf/files/test_table_with_comma_delimiter.csv.gz and /dev/null differ diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_header.csv b/plugin/trino-hive-hadoop2/conf/files/test_table_with_header.csv deleted file mode 100644 index 2f907294028b1..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/test_table_with_header.csv +++ /dev/null @@ -1,4 +0,0 @@ -1 -2 -71 -82 diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_header.csv.bz2 b/plugin/trino-hive-hadoop2/conf/files/test_table_with_header.csv.bz2 deleted file mode 100644 index 4a06ecc8843fe..0000000000000 Binary files a/plugin/trino-hive-hadoop2/conf/files/test_table_with_header.csv.bz2 and /dev/null differ diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_header.csv.gz b/plugin/trino-hive-hadoop2/conf/files/test_table_with_header.csv.gz deleted file mode 100644 index 5466eaa771c0e..0000000000000 Binary files a/plugin/trino-hive-hadoop2/conf/files/test_table_with_header.csv.gz and /dev/null differ diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_header.csv.lz4 b/plugin/trino-hive-hadoop2/conf/files/test_table_with_header.csv.lz4 deleted file mode 100644 index 746baf57149b9..0000000000000 Binary files a/plugin/trino-hive-hadoop2/conf/files/test_table_with_header.csv.lz4 and /dev/null differ diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv b/plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv deleted file mode 100644 index c25ef79c3a630..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv +++ /dev/null @@ -1,7 +0,0 @@ -1 -2 -1 -41 -42 -4 -8 diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv.bz2 b/plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv.bz2 deleted file mode 100644 index a23e84d6c1882..0000000000000 Binary files a/plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv.bz2 and /dev/null differ diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv.gz b/plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv.gz deleted file mode 100644 index 7a439a4d1b0e8..0000000000000 Binary files a/plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv.gz and /dev/null differ diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv.lz4 b/plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv.lz4 deleted file mode 100644 index d337bd0bb8538..0000000000000 Binary files a/plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv.lz4 and /dev/null differ diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_pipe_delimiter.csv b/plugin/trino-hive-hadoop2/conf/files/test_table_with_pipe_delimiter.csv deleted file mode 100644 index 0cc012bf882a3..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/test_table_with_pipe_delimiter.csv +++ /dev/null @@ -1,3 +0,0 @@ -1|2 -3|4 -55|66 diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_pipe_delimiter.csv.bz2 b/plugin/trino-hive-hadoop2/conf/files/test_table_with_pipe_delimiter.csv.bz2 deleted file mode 100644 index df138bc6d194a..0000000000000 Binary files a/plugin/trino-hive-hadoop2/conf/files/test_table_with_pipe_delimiter.csv.bz2 and /dev/null differ diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_pipe_delimiter.csv.gz b/plugin/trino-hive-hadoop2/conf/files/test_table_with_pipe_delimiter.csv.gz deleted file mode 100644 index 6634c19f33456..0000000000000 Binary files a/plugin/trino-hive-hadoop2/conf/files/test_table_with_pipe_delimiter.csv.gz and /dev/null differ diff --git a/plugin/trino-hive-hadoop2/conf/files/tez-site.xml b/plugin/trino-hive-hadoop2/conf/files/tez-site.xml deleted file mode 100644 index 8f340b5611e9f..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/tez-site.xml +++ /dev/null @@ -1,100 +0,0 @@ - - - - - - - tez.lib.uris.ignore - false - - - tez.lib.uris - file:///usr/hdp/current/tez-client/lib/tez.tar.gz - - - tez.am.mode.session - false - - - tez.am.acl.enabled - false - - - tez.am.log.level - WARN - - - tez.task.log.level - WARN - - - tez.runtime.io.sort.mb - 8 - - - tez.am.max.app.attempts - 1 - - - tez.am.task.max.failed.attempts - 1 - - - tez.shuffle-vertex-manager.min-src-fraction - 0.10 - - - tez.shuffle-vertex-manager.max-src-fraction - 1.00 - - - tez.am.launch.cmd-opts - -server -Djava.net.preferIPv4Stack=true -XX:+UseParallelGC -Dhadoop.metrics.log.level=WARN - - - tez.am.resource.memory.mb - 512 - - - tez.task.launch.cmd-opts - -server -Djava.net.preferIPv4Stack=true -XX:+UseParallelGC -Dhadoop.metrics.log.level=WARN - - - tez.task.resource.memory.mb - 512 - - - tez.task.resource.cpu.vcores - 1 - - - tez.runtime.sort.threads - 1 - - - tez.runtime.io.sort.factor - 100 - - - tez.runtime.shuffle.memory-to-memory.enable - false - - - tez.runtime.optimize.local.fetch - true - - - hive.tez.container.size - 2048 - - diff --git a/plugin/trino-hive-hadoop2/conf/files/words b/plugin/trino-hive-hadoop2/conf/files/words deleted file mode 100644 index 6d01e61127052..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/words +++ /dev/null @@ -1,100 +0,0 @@ -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x diff --git a/plugin/trino-hive-hadoop2/conf/hive-tests-config-apache-hive3.sh b/plugin/trino-hive-hadoop2/conf/hive-tests-config-apache-hive3.sh deleted file mode 100644 index 8e05591f9118f..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/hive-tests-config-apache-hive3.sh +++ /dev/null @@ -1 +0,0 @@ -export HADOOP_BASE_IMAGE="ghcr.io/trinodb/testing/hive3.1-hive" diff --git a/plugin/trino-hive-hadoop2/conf/hive-tests-config-hdp3.sh b/plugin/trino-hive-hadoop2/conf/hive-tests-config-hdp3.sh deleted file mode 100644 index c736e171caba5..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/hive-tests-config-hdp3.sh +++ /dev/null @@ -1 +0,0 @@ -export HADOOP_BASE_IMAGE="ghcr.io/trinodb/testing/hdp3.1-hive" diff --git a/plugin/trino-hive-hadoop2/conf/hive-tests-defaults.sh b/plugin/trino-hive-hadoop2/conf/hive-tests-defaults.sh deleted file mode 100644 index cc5a3030aee03..0000000000000 --- a/plugin/trino-hive-hadoop2/conf/hive-tests-defaults.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash - -DEFAULT_DOCKER_VERSION=$(./mvnw help:evaluate -Dexpression=dep.docker.images.version -q -DforceStdout) - -if [ -z "$DEFAULT_DOCKER_VERSION" ]; -then - >&2 echo "Could not read dep.docker.images.version from parent POM" - exit 1 -fi - -export DOCKER_IMAGES_VERSION=${DOCKER_IMAGES_VERSION:-$DEFAULT_DOCKER_VERSION} diff --git a/plugin/trino-hive-hadoop2/pom.xml b/plugin/trino-hive-hadoop2/pom.xml deleted file mode 100644 index 790c47ff3e7a0..0000000000000 --- a/plugin/trino-hive-hadoop2/pom.xml +++ /dev/null @@ -1,345 +0,0 @@ - - - 4.0.0 - - - io.trino - trino-root - 435-SNAPSHOT - ../../pom.xml - - - trino-hive-hadoop2 - trino-plugin - Trino - Hive Connector - Apache Hadoop 2.x - - - ${project.parent.basedir} - - - - - com.google.guava - guava - - - - io.trino - trino-hive - - - - com.fasterxml.jackson.core - jackson-annotations - provided - - - - io.airlift - slice - provided - - - - io.opentelemetry - opentelemetry-api - provided - - - - io.opentelemetry - opentelemetry-context - provided - - - - io.trino - trino-spi - provided - - - - org.openjdk.jol - jol-core - provided - - - - com.amazonaws - aws-java-sdk-core - runtime - - - - com.amazonaws - aws-java-sdk-s3 - runtime - - - - com.qubole.rubix - rubix-presto-shaded - runtime - - - - io.airlift - concurrent - runtime - - - - io.airlift - json - runtime - - - - io.airlift - stats - runtime - - - - io.airlift - units - runtime - - - - io.trino - trino-filesystem - runtime - - - - io.trino - trino-plugin-toolkit - runtime - - - - org.alluxio - alluxio-shaded-client - runtime - - - - io.airlift - junit-extensions - test - - - - io.airlift - testing - test - - - - io.trino - trino-hdfs - test - - - - io.trino - trino-hive - test-jar - test - - - - io.trino - trino-main - test - - - - io.trino - trino-spi - test-jar - test - - - - io.trino - trino-testing - test - - - - io.trino - trino-testing-containers - test - - - - io.trino - trino-testing-services - test - - - - io.trino.hadoop - hadoop-apache - test - - - - io.trino.hive - hive-apache - test - - - - org.assertj - assertj-core - test - - - - org.junit.jupiter - junit-jupiter-api - test - - - - org.junit.jupiter - junit-jupiter-engine - test - - - - org.testng - testng - test - - - - - - default - - true - - - - - org.apache.maven.plugins - maven-surefire-plugin - - - **/TestHive.java - **/TestHiveThriftMetastoreWithS3.java - **/TestHiveFileSystemS3.java - **/TestHiveFileSystemWasb.java - **/TestHiveFileSystemAbfsAccessKey.java - **/TestHiveFileSystemAbfsOAuth.java - **/TestHiveFileSystemAdl.java - **/TestHiveAzure.java - - - - - - - - test-hive-hadoop2 - - - - org.apache.maven.plugins - maven-surefire-plugin - - - **/TestHive.java - - - - - - - - test-hive-hadoop2-s3 - - - - org.apache.maven.plugins - maven-surefire-plugin - - - **/TestHiveThriftMetastoreWithS3.java - **/TestHiveFileSystemS3.java - - - - - - - - test-hive-hadoop2-wasb - - - - org.apache.maven.plugins - maven-surefire-plugin - - - **/TestHiveFileSystemWasb.java - - - - - - - - test-hive-hadoop2-abfs-access-key - - - - org.apache.maven.plugins - maven-surefire-plugin - - - **/TestHiveFileSystemAbfsAccessKey.java - - - - - - - - test-hive-hadoop2-abfs-oauth - - - - org.apache.maven.plugins - maven-surefire-plugin - - - **/TestHiveFileSystemAbfsOAuth.java - - - - - - - - test-hive-hadoop2-adl - - - - org.apache.maven.plugins - maven-surefire-plugin - - - **/TestHiveFileSystemAdl.java - - - - - - - - diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/AbstractTestHiveFileSystemAbfs.java b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/AbstractTestHiveFileSystemAbfs.java deleted file mode 100644 index 8daba40b523bd..0000000000000 --- a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/AbstractTestHiveFileSystemAbfs.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import io.trino.hdfs.ConfigurationInitializer; -import io.trino.hdfs.DynamicHdfsConfiguration; -import io.trino.hdfs.HdfsConfig; -import io.trino.hdfs.HdfsConfiguration; -import io.trino.hdfs.HdfsConfigurationInitializer; -import io.trino.hdfs.azure.HiveAzureConfig; -import io.trino.hdfs.azure.TrinoAzureConfigurationInitializer; -import io.trino.plugin.hive.AbstractTestHive.Transaction; -import io.trino.spi.connector.ColumnMetadata; -import io.trino.spi.connector.ConnectorTableMetadata; -import io.trino.spi.connector.SchemaTableName; -import org.apache.hadoop.fs.Path; - -import java.util.Map; -import java.util.Optional; - -import static com.google.common.base.Preconditions.checkArgument; -import static io.trino.plugin.hive.HiveTableProperties.BUCKETED_BY_PROPERTY; -import static io.trino.plugin.hive.HiveTableProperties.BUCKET_COUNT_PROPERTY; -import static io.trino.plugin.hive.HiveTableProperties.EXTERNAL_LOCATION_PROPERTY; -import static io.trino.plugin.hive.HiveTableProperties.SKIP_FOOTER_LINE_COUNT; -import static io.trino.plugin.hive.HiveTableProperties.SKIP_HEADER_LINE_COUNT; -import static io.trino.plugin.hive.HiveTableProperties.SORTED_BY_PROPERTY; -import static io.trino.plugin.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY; -import static io.trino.spi.type.BigintType.BIGINT; -import static java.lang.String.format; -import static org.assertj.core.util.Strings.isNullOrEmpty; - -public abstract class AbstractTestHiveFileSystemAbfs - extends AbstractTestHiveFileSystem -{ - protected String account; - protected String container; - protected String testDirectory; - - protected static String checkParameter(String value, String name) - { - checkArgument(!isNullOrEmpty(value), "expected non-empty %s", name); - return value; - } - - protected void setup(String host, int port, String databaseName, String container, String account, String testDirectory) - { - this.container = checkParameter(container, "container"); - this.account = checkParameter(account, "account"); - this.testDirectory = checkParameter(testDirectory, "test directory"); - super.setup( - checkParameter(host, "host"), - port, - checkParameter(databaseName, "database name"), - createHdfsConfiguration()); - } - - @Override - protected void onSetupComplete() - { - ensureTableExists(table, "trino_test_external_fs", ImmutableMap.of()); - ensureTableExists(tableWithHeader, "trino_test_external_fs_with_header", ImmutableMap.of(SKIP_HEADER_LINE_COUNT, 1)); - ensureTableExists(tableWithHeaderAndFooter, "trino_test_external_fs_with_header_and_footer", ImmutableMap.of(SKIP_HEADER_LINE_COUNT, 2, SKIP_FOOTER_LINE_COUNT, 2)); - } - - private void ensureTableExists(SchemaTableName table, String tableDirectoryName, Map tableProperties) - { - try (Transaction transaction = newTransaction()) { - ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata( - table, - ImmutableList.of(new ColumnMetadata("t_bigint", BIGINT)), - ImmutableMap.builder() - .putAll(tableProperties) - .put(STORAGE_FORMAT_PROPERTY, HiveStorageFormat.TEXTFILE) - .put(EXTERNAL_LOCATION_PROPERTY, getBasePath().toString() + "/" + tableDirectoryName) - .put(BUCKET_COUNT_PROPERTY, 0) - .put(BUCKETED_BY_PROPERTY, ImmutableList.of()) - .put(SORTED_BY_PROPERTY, ImmutableList.of()) - .buildOrThrow()); - if (!transaction.getMetadata().listTables(newSession(), Optional.of(table.getSchemaName())).contains(table)) { - transaction.getMetadata().createTable(newSession(), tableMetadata, false); - } - transaction.commit(); - } - } - - protected abstract HiveAzureConfig getConfig(); - - private HdfsConfiguration createHdfsConfiguration() - { - ConfigurationInitializer initializer = new TrinoAzureConfigurationInitializer(getConfig()); - return new DynamicHdfsConfiguration(new HdfsConfigurationInitializer(new HdfsConfig(), ImmutableSet.of(initializer)), ImmutableSet.of()); - } - - @Override - protected Path getBasePath() - { - return new Path(format("abfs://%s@%s.dfs.core.windows.net/%s/", container, account, testDirectory)); - } -} diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHive.java b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHive.java deleted file mode 100644 index 2ae7c145603d2..0000000000000 --- a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHive.java +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive; - -import com.google.common.collect.ImmutableList; -import com.google.common.net.HostAndPort; -import io.trino.spi.connector.ConnectorMetadata; -import io.trino.spi.connector.SchemaTableName; -import io.trino.spi.connector.SchemaTablePrefix; -import org.apache.hadoop.net.NetUtils; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.junit.jupiter.api.Assumptions.abort; -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; - -@TestInstance(PER_CLASS) -public class TestHive - extends AbstractTestHive -{ - @BeforeAll - public void initialize() - { - String metastore = System.getProperty("test.metastore"); - String database = System.getProperty("test.database"); - String hadoopMasterIp = System.getProperty("hadoop-master-ip"); - if (hadoopMasterIp != null) { - // Even though Hadoop is accessed by proxy, Hadoop still tries to resolve hadoop-master - // (e.g: in: NameNodeProxies.createProxy) - // This adds a static resolution for hadoop-master to docker container internal ip - NetUtils.addStaticResolution("hadoop-master", hadoopMasterIp); - } - - setup(HostAndPort.fromString(metastore), database); - } - - @Test - @Override - public void testHideDeltaLakeTables() - { - assertThatThrownBy(super::testHideDeltaLakeTables) - .hasMessageMatching("(?s)\n" + - "Expecting\n" + - " \\[.*\\b(\\w+.tmp_trino_test_trino_delta_lake_table_\\w+)\\b.*]\n" + - "not to contain\n" + - " \\[\\1]\n" + - "but found.*"); - - abort("not supported"); - } - - @Test - public void testHiveViewsHaveNoColumns() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - assertThat(listTableColumns(metadata, newSession(), new SchemaTablePrefix(view.getSchemaName(), view.getTableName()))) - .isEmpty(); - } - } - - @Test - public void testHiveViewTranslationError() - { - try (Transaction transaction = newTransaction()) { - assertThatThrownBy(() -> transaction.getMetadata().getView(newSession(), view)) - .isInstanceOf(HiveViewNotSupportedException.class) - .hasMessageContaining("Hive views are not supported"); - - // TODO: combine this with tests for successful translation (currently in TestHiveViews product test) - } - } - - @Test - @Override - public void testUpdateBasicPartitionStatistics() - throws Exception - { - SchemaTableName tableName = temporaryTable("update_basic_partition_statistics"); - try { - createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); - // When the table has partitions, but row count statistics are set to zero, we treat this case as empty - // statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are - // used to ingest data into partitioned hive tables. - testUpdatePartitionStatistics( - tableName, - EMPTY_ROWCOUNT_STATISTICS, - ImmutableList.of(BASIC_STATISTICS_1, BASIC_STATISTICS_2), - ImmutableList.of(BASIC_STATISTICS_2, BASIC_STATISTICS_1)); - } - finally { - dropTable(tableName); - } - } - - @Test - @Override - public void testUpdatePartitionColumnStatistics() - throws Exception - { - SchemaTableName tableName = temporaryTable("update_partition_column_statistics"); - try { - createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); - // When the table has partitions, but row count statistics are set to zero, we treat this case as empty - // statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are - // used to ingest data into partitioned hive tables. - testUpdatePartitionStatistics( - tableName, - EMPTY_ROWCOUNT_STATISTICS, - ImmutableList.of(STATISTICS_1_1, STATISTICS_1_2, STATISTICS_2), - ImmutableList.of(STATISTICS_1_2, STATISTICS_1_1, STATISTICS_2)); - } - finally { - dropTable(tableName); - } - } - - @Test - @Override - public void testUpdatePartitionColumnStatisticsEmptyOptionalFields() - throws Exception - { - SchemaTableName tableName = temporaryTable("update_partition_column_statistics"); - try { - createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); - // When the table has partitions, but row count statistics are set to zero, we treat this case as empty - // statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are - // used to ingest data into partitioned hive tables. - testUpdatePartitionStatistics( - tableName, - EMPTY_ROWCOUNT_STATISTICS, - ImmutableList.of(STATISTICS_EMPTY_OPTIONAL_FIELDS), - ImmutableList.of(STATISTICS_EMPTY_OPTIONAL_FIELDS)); - } - finally { - dropTable(tableName); - } - } - - @Test - @Override - public void testStorePartitionWithStatistics() - throws Exception - { - // When the table has partitions, but row count statistics are set to zero, we treat this case as empty - // statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are - // used to ingest data into partitioned hive tables. - testStorePartitionWithStatistics(STATISTICS_PARTITIONED_TABLE_COLUMNS, STATISTICS_1, STATISTICS_2, STATISTICS_1_1, EMPTY_ROWCOUNT_STATISTICS); - } - - @Test - @Override - public void testDataColumnProperties() - { - // Column properties are currently not supported in ThriftHiveMetastore - assertThatThrownBy(super::testDataColumnProperties) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Persisting column properties is not supported: Column{name=id, type=bigint}"); - } - - @Test - @Override - public void testPartitionColumnProperties() - { - // Column properties are currently not supported in ThriftHiveMetastore - assertThatThrownBy(super::testPartitionColumnProperties) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Persisting column properties is not supported: Column{name=part_key, type=varchar(256)}"); - } -} diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemAbfsAccessKey.java b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemAbfsAccessKey.java deleted file mode 100644 index 539fc8ffcc47b..0000000000000 --- a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemAbfsAccessKey.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive; - -import io.trino.hdfs.azure.HiveAzureConfig; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.TestInstance; - -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; - -@TestInstance(PER_CLASS) -public class TestHiveFileSystemAbfsAccessKey - extends AbstractTestHiveFileSystemAbfs -{ - private String accessKey; - - @BeforeAll - public void setup() - { - this.accessKey = checkParameter(System.getProperty("hive.hadoop2.abfs.accessKey"), "access key"); - super.setup( - System.getProperty("hive.hadoop2.metastoreHost"), - Integer.getInteger("hive.hadoop2.metastorePort"), - System.getProperty("hive.hadoop2.databaseName"), - System.getProperty("hive.hadoop2.abfs.container"), - System.getProperty("hive.hadoop2.abfs.account"), - System.getProperty("hive.hadoop2.abfs.testDirectory")); - } - - @Override - protected HiveAzureConfig getConfig() - { - return new HiveAzureConfig() - .setAbfsAccessKey(accessKey) - .setAbfsStorageAccount(account); - } -} diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemAbfsOAuth.java b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemAbfsOAuth.java deleted file mode 100644 index 36adb3a9db31b..0000000000000 --- a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemAbfsOAuth.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive; - -import io.trino.hdfs.azure.HiveAzureConfig; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.TestInstance; - -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; - -@TestInstance(PER_CLASS) -public class TestHiveFileSystemAbfsOAuth - extends AbstractTestHiveFileSystemAbfs -{ - private String endpoint; - private String clientId; - private String secret; - - @BeforeAll - public void setup() - { - this.endpoint = checkParameter(System.getProperty("test.hive.azure.abfs.oauth.endpoint"), "endpoint"); - this.clientId = checkParameter(System.getProperty("test.hive.azure.abfs.oauth.client-id"), "client ID"); - this.secret = checkParameter(System.getProperty("test.hive.azure.abfs.oauth.secret"), "secret"); - super.setup( - System.getProperty("hive.hadoop2.metastoreHost"), - Integer.getInteger("hive.hadoop2.metastorePort"), - System.getProperty("hive.hadoop2.databaseName"), - System.getProperty("test.hive.azure.abfs.container"), - System.getProperty("test.hive.azure.abfs.storage-account"), - System.getProperty("test.hive.azure.abfs.test-directory")); - } - - @Override - protected HiveAzureConfig getConfig() - { - return new HiveAzureConfig() - .setAbfsOAuthClientEndpoint(endpoint) - .setAbfsOAuthClientId(clientId) - .setAbfsOAuthClientSecret(secret); - } -} diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemAdl.java b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemAdl.java deleted file mode 100644 index 525bf50631065..0000000000000 --- a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemAdl.java +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive; - -import com.google.common.collect.ImmutableSet; -import io.trino.hdfs.ConfigurationInitializer; -import io.trino.hdfs.DynamicHdfsConfiguration; -import io.trino.hdfs.HdfsConfig; -import io.trino.hdfs.HdfsConfiguration; -import io.trino.hdfs.HdfsConfigurationInitializer; -import io.trino.hdfs.azure.HiveAzureConfig; -import io.trino.hdfs.azure.TrinoAzureConfigurationInitializer; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; - -import java.io.FileNotFoundException; -import java.util.UUID; - -import static com.google.common.base.Preconditions.checkArgument; -import static java.lang.String.format; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.assertj.core.util.Strings.isNullOrEmpty; -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; - -@TestInstance(PER_CLASS) -public class TestHiveFileSystemAdl - extends AbstractTestHiveFileSystem -{ - private String dataLakeName; - private String clientId; - private String credential; - private String refreshUrl; - private String testDirectory; - - @BeforeAll - public void setup() - { - String host = System.getProperty("hive.hadoop2.metastoreHost"); - int port = Integer.getInteger("hive.hadoop2.metastorePort"); - String databaseName = System.getProperty("hive.hadoop2.databaseName"); - String dataLakeName = System.getProperty("hive.hadoop2.adl.name"); - String clientId = System.getProperty("hive.hadoop2.adl.clientId"); - String credential = System.getProperty("hive.hadoop2.adl.credential"); - String refreshUrl = System.getProperty("hive.hadoop2.adl.refreshUrl"); - String testDirectory = System.getProperty("hive.hadoop2.adl.testDirectory"); - - checkArgument(!isNullOrEmpty(host), "expected non empty host"); - checkArgument(!isNullOrEmpty(databaseName), "expected non empty databaseName"); - checkArgument(!isNullOrEmpty(dataLakeName), "expected non empty dataLakeName"); - checkArgument(!isNullOrEmpty(clientId), "expected non empty clientId"); - checkArgument(!isNullOrEmpty(credential), "expected non empty credential"); - checkArgument(!isNullOrEmpty(refreshUrl), "expected non empty refreshUrl"); - checkArgument(!isNullOrEmpty(testDirectory), "expected non empty testDirectory"); - - this.dataLakeName = dataLakeName; - this.clientId = clientId; - this.credential = credential; - this.refreshUrl = refreshUrl; - this.testDirectory = testDirectory; - - super.setup(host, port, databaseName, createHdfsConfiguration()); - } - - private HdfsConfiguration createHdfsConfiguration() - { - ConfigurationInitializer azureConfig = new TrinoAzureConfigurationInitializer(new HiveAzureConfig() - .setAdlClientId(clientId) - .setAdlCredential(credential) - .setAdlRefreshUrl(refreshUrl)); - return new DynamicHdfsConfiguration(new HdfsConfigurationInitializer(new HdfsConfig(), ImmutableSet.of(azureConfig)), ImmutableSet.of()); - } - - @Override - protected Path getBasePath() - { - return new Path(format("adl://%s.azuredatalakestore.net/%s/", dataLakeName, testDirectory)); - } - - @Override - @Test - public void testRename() - throws Exception - { - Path basePath = new Path(getBasePath(), UUID.randomUUID().toString()); - FileSystem fs = hdfsEnvironment.getFileSystem(TESTING_CONTEXT, basePath); - assertThat(fs.exists(basePath)).isFalse(); - - // create file foo.txt - Path path = new Path(basePath, "foo.txt"); - assertThat(fs.createNewFile(path)).isTrue(); - assertThat(fs.exists(path)).isTrue(); - - // rename foo.txt to bar.txt when bar does not exist - Path newPath = new Path(basePath, "bar.txt"); - assertThat(fs.exists(newPath)).isFalse(); - assertThat(fs.rename(path, newPath)).isTrue(); - assertThat(fs.exists(path)).isFalse(); - assertThat(fs.exists(newPath)).isTrue(); - - // rename foo.txt to foo.txt when foo.txt does not exist - // This fails with error no such file in ADLFileSystem - assertThatThrownBy(() -> fs.rename(path, path)) - .isInstanceOf(FileNotFoundException.class); - - // create file foo.txt and rename to existing bar.txt - assertThat(fs.createNewFile(path)).isTrue(); - assertThat(fs.rename(path, newPath)).isFalse(); - - // rename foo.txt to foo.txt when foo.txt exists - // This returns true in ADLFileSystem - assertThat(fs.rename(path, path)).isTrue(); - - // delete foo.txt - assertThat(fs.delete(path, false)).isTrue(); - assertThat(fs.exists(path)).isFalse(); - - // create directory source with file - Path source = new Path(basePath, "source"); - assertThat(fs.createNewFile(new Path(source, "test.txt"))).isTrue(); - - // rename source to non-existing target - Path target = new Path(basePath, "target"); - assertThat(fs.exists(target)).isFalse(); - assertThat(fs.rename(source, target)).isTrue(); - assertThat(fs.exists(source)).isFalse(); - assertThat(fs.exists(target)).isTrue(); - - // create directory source with file - assertThat(fs.createNewFile(new Path(source, "test.txt"))).isTrue(); - - // rename source to existing target - assertThat(fs.rename(source, target)).isTrue(); - assertThat(fs.exists(source)).isFalse(); - target = new Path(target, "source"); - assertThat(fs.exists(target)).isTrue(); - assertThat(fs.exists(new Path(target, "test.txt"))).isTrue(); - - // delete target - target = new Path(basePath, "target"); - assertThat(fs.exists(target)).isTrue(); - assertThat(fs.delete(target, true)).isTrue(); - assertThat(fs.exists(target)).isFalse(); - - // cleanup - fs.delete(basePath, true); - } -} diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemS3.java b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemS3.java deleted file mode 100644 index 5ff770ac39ff5..0000000000000 --- a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemS3.java +++ /dev/null @@ -1,262 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive; - -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3Client; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Streams; -import com.google.common.net.MediaType; -import io.trino.filesystem.Location; -import io.trino.filesystem.TrinoFileSystem; -import io.trino.filesystem.hdfs.HdfsFileSystemFactory; -import io.trino.hdfs.ConfigurationInitializer; -import io.trino.hdfs.DynamicHdfsConfiguration; -import io.trino.hdfs.HdfsConfig; -import io.trino.hdfs.HdfsConfiguration; -import io.trino.hdfs.HdfsConfigurationInitializer; -import io.trino.hdfs.TrinoHdfsFileSystemStats; -import io.trino.hdfs.s3.HiveS3Config; -import io.trino.hdfs.s3.TrinoS3ConfigurationInitializer; -import io.trino.plugin.hive.fs.FileSystemDirectoryLister; -import io.trino.plugin.hive.fs.HiveFileIterator; -import io.trino.plugin.hive.fs.TrinoFileStatus; -import io.trino.plugin.hive.metastore.Column; -import io.trino.plugin.hive.metastore.StorageFormat; -import io.trino.plugin.hive.metastore.Table; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; - -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Optional; - -import static com.google.common.base.Preconditions.checkArgument; -import static io.trino.plugin.hive.HiveTestUtils.SESSION; -import static io.trino.plugin.hive.HiveType.HIVE_LONG; -import static io.trino.plugin.hive.HiveType.HIVE_STRING; -import static java.io.InputStream.nullInputStream; -import static java.lang.String.format; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.util.Strings.isNullOrEmpty; -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; - -@TestInstance(PER_CLASS) -public class TestHiveFileSystemS3 - extends AbstractTestHiveFileSystem -{ - private static final MediaType DIRECTORY_MEDIA_TYPE = MediaType.create("application", "x-directory"); - private String awsAccessKey; - private String awsSecretKey; - private String writableBucket; - private String testDirectory; - private AmazonS3 s3Client; - - @BeforeAll - public void setup() - { - String host = System.getProperty("hive.hadoop2.metastoreHost"); - int port = Integer.getInteger("hive.hadoop2.metastorePort"); - String databaseName = System.getProperty("hive.hadoop2.databaseName"); - String s3endpoint = System.getProperty("hive.hadoop2.s3.endpoint"); - String awsAccessKey = System.getProperty("hive.hadoop2.s3.awsAccessKey"); - String awsSecretKey = System.getProperty("hive.hadoop2.s3.awsSecretKey"); - String writableBucket = System.getProperty("hive.hadoop2.s3.writableBucket"); - String testDirectory = System.getProperty("hive.hadoop2.s3.testDirectory"); - - checkArgument(!isNullOrEmpty(host), "Expected non empty host"); - checkArgument(!isNullOrEmpty(databaseName), "Expected non empty databaseName"); - checkArgument(!isNullOrEmpty(awsAccessKey), "Expected non empty awsAccessKey"); - checkArgument(!isNullOrEmpty(awsSecretKey), "Expected non empty awsSecretKey"); - checkArgument(!isNullOrEmpty(s3endpoint), "Expected non empty s3endpoint"); - checkArgument(!isNullOrEmpty(writableBucket), "Expected non empty writableBucket"); - checkArgument(!isNullOrEmpty(testDirectory), "Expected non empty testDirectory"); - this.awsAccessKey = awsAccessKey; - this.awsSecretKey = awsSecretKey; - this.writableBucket = writableBucket; - this.testDirectory = testDirectory; - - s3Client = AmazonS3Client.builder() - .withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3endpoint, null)) - .withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(awsAccessKey, awsSecretKey))) - .build(); - - setup(host, port, databaseName, createHdfsConfiguration()); - } - - private HdfsConfiguration createHdfsConfiguration() - { - ConfigurationInitializer s3Config = new TrinoS3ConfigurationInitializer(new HiveS3Config() - .setS3AwsAccessKey(awsAccessKey) - .setS3AwsSecretKey(awsSecretKey)); - HdfsConfigurationInitializer initializer = new HdfsConfigurationInitializer(new HdfsConfig(), ImmutableSet.of(s3Config)); - return new DynamicHdfsConfiguration(initializer, ImmutableSet.of()); - } - - @Override - protected Path getBasePath() - { - // HDP 3.1 does not understand s3:// out of the box. - return new Path(format("s3a://%s/%s/", writableBucket, testDirectory)); - } - - @Test - public void testIgnoreHadoopFolderMarker() - throws Exception - { - Path basePath = getBasePath(); - FileSystem fs = hdfsEnvironment.getFileSystem(TESTING_CONTEXT, basePath); - - String markerFileName = "test_table_$folder$"; - Path filePath = new Path(basePath, markerFileName); - fs.create(filePath).close(); - - assertThat(Arrays.stream(fs.listStatus(basePath)).anyMatch(file -> file.getPath().getName().equalsIgnoreCase(markerFileName))).isFalse(); - } - - /** - * Tests the same functionality like {@link #testFileIteratorPartitionedListing()} with the - * setup done by native {@link AmazonS3} - */ - @Test - public void testFileIteratorPartitionedListingNativeS3Client() - throws Exception - { - Table.Builder tableBuilder = Table.builder() - .setDatabaseName(table.getSchemaName()) - .setTableName(table.getTableName()) - .setDataColumns(ImmutableList.of(new Column("data", HIVE_LONG, Optional.empty(), Map.of()))) - .setPartitionColumns(ImmutableList.of(new Column("part", HIVE_STRING, Optional.empty(), Map.of()))) - .setOwner(Optional.empty()) - .setTableType("fake"); - tableBuilder.getStorageBuilder() - .setStorageFormat(StorageFormat.fromHiveStorageFormat(HiveStorageFormat.CSV)); - Table fakeTable = tableBuilder.build(); - - Path basePath = new Path(getBasePath(), "test-file-iterator-partitioned-listing-native-setup"); - FileSystem fs = hdfsEnvironment.getFileSystem(TESTING_CONTEXT, basePath); - TrinoFileSystem trinoFileSystem = new HdfsFileSystemFactory(hdfsEnvironment, new TrinoHdfsFileSystemStats()).create(SESSION); - fs.mkdirs(basePath); - String basePrefix = basePath.toUri().getPath().substring(1); - - // Expected file system tree: - // test-file-iterator-partitioned-listing-native-setup/ - // .hidden/ - // nested-file-in-hidden.txt - // part=simple/ - // _hidden-file.txt - // plain-file.txt - // part=nested/ - // parent/ - // _nested-hidden-file.txt - // nested-file.txt - // part=plus+sign/ - // plus-file.txt - // part=percent%sign/ - // percent-file.txt - // part=url%20encoded/ - // url-encoded-file.txt - // part=level1|level2/ - // pipe-file.txt - // parent1/ - // parent2/ - // deeply-nested-file.txt - // part=level1 | level2/ - // pipe-blanks-file.txt - // empty-directory/ - // .hidden-in-base.txt - - createFile(writableBucket, format("%s/.hidden/nested-file-in-hidden.txt", basePrefix)); - createFile(writableBucket, format("%s/part=simple/_hidden-file.txt", basePrefix)); - createFile(writableBucket, format("%s/part=simple/plain-file.txt", basePrefix)); - createFile(writableBucket, format("%s/part=nested/parent/_nested-hidden-file.txt", basePrefix)); - createFile(writableBucket, format("%s/part=nested/parent/nested-file.txt", basePrefix)); - createFile(writableBucket, format("%s/part=plus+sign/plus-file.txt", basePrefix)); - createFile(writableBucket, format("%s/part=percent%%sign/percent-file.txt", basePrefix)); - createFile(writableBucket, format("%s/part=url%%20encoded/url-encoded-file.txt", basePrefix)); - createFile(writableBucket, format("%s/part=level1|level2/pipe-file.txt", basePrefix)); - createFile(writableBucket, format("%s/part=level1|level2/parent1/parent2/deeply-nested-file.txt", basePrefix)); - createFile(writableBucket, format("%s/part=level1 | level2/pipe-blanks-file.txt", basePrefix)); - createDirectory(writableBucket, format("%s/empty-directory/", basePrefix)); - createFile(writableBucket, format("%s/.hidden-in-base.txt", basePrefix)); - - // List recursively through hive file iterator - HiveFileIterator recursiveIterator = new HiveFileIterator( - fakeTable, - Location.of(basePath.toString()), - trinoFileSystem, - new FileSystemDirectoryLister(), - HiveFileIterator.NestedDirectoryPolicy.RECURSE); - - List recursiveListing = Streams.stream(recursiveIterator) - .map(TrinoFileStatus::getPath) - .toList(); - // Should not include directories, or files underneath hidden directories - assertThat(recursiveListing).containsExactlyInAnyOrder( - format("%s/part=simple/plain-file.txt", basePath), - format("%s/part=nested/parent/nested-file.txt", basePath), - format("%s/part=plus+sign/plus-file.txt", basePath), - format("%s/part=percent%%sign/percent-file.txt", basePath), - format("%s/part=url%%20encoded/url-encoded-file.txt", basePath), - format("%s/part=level1|level2/pipe-file.txt", basePath), - format("%s/part=level1|level2/parent1/parent2/deeply-nested-file.txt", basePath), - format("%s/part=level1 | level2/pipe-blanks-file.txt", basePath)); - - HiveFileIterator shallowIterator = new HiveFileIterator( - fakeTable, - Location.of(basePath.toString()), - trinoFileSystem, - new FileSystemDirectoryLister(), - HiveFileIterator.NestedDirectoryPolicy.IGNORED); - List shallowListing = Streams.stream(shallowIterator) - .map(TrinoFileStatus::getPath) - .map(Path::new) - .toList(); - // Should not include any hidden files, folders, or nested files - assertThat(shallowListing).isEmpty(); - } - - protected void createDirectory(String bucketName, String key) - { - // create meta-data for your folder and set content-length to 0 - ObjectMetadata metadata = new ObjectMetadata(); - metadata.setContentLength(0); - metadata.setContentType(DIRECTORY_MEDIA_TYPE.toString()); - // create a PutObjectRequest passing the folder name suffixed by / - if (!key.endsWith("/")) { - key += "/"; - } - PutObjectRequest putObjectRequest = new PutObjectRequest(bucketName, key, nullInputStream(), metadata); - // send request to S3 to create folder - s3Client.putObject(putObjectRequest); - } - - protected void createFile(String bucketName, String key) - { - ObjectMetadata metadata = new ObjectMetadata(); - metadata.setContentLength(0); - PutObjectRequest putObjectRequest = new PutObjectRequest(bucketName, key, nullInputStream(), metadata); - s3Client.putObject(putObjectRequest); - } -} diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemWasb.java b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemWasb.java deleted file mode 100644 index 08d9eda5c3e46..0000000000000 --- a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemWasb.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive; - -import com.google.common.collect.ImmutableSet; -import io.trino.hdfs.ConfigurationInitializer; -import io.trino.hdfs.DynamicHdfsConfiguration; -import io.trino.hdfs.HdfsConfig; -import io.trino.hdfs.HdfsConfiguration; -import io.trino.hdfs.HdfsConfigurationInitializer; -import io.trino.hdfs.azure.HiveAzureConfig; -import io.trino.hdfs.azure.TrinoAzureConfigurationInitializer; -import org.apache.hadoop.fs.Path; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.TestInstance; - -import static com.google.common.base.Preconditions.checkArgument; -import static java.lang.String.format; -import static org.assertj.core.util.Strings.isNullOrEmpty; -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; - -@TestInstance(PER_CLASS) -public class TestHiveFileSystemWasb - extends AbstractTestHiveFileSystem -{ - private String container; - private String account; - private String accessKey; - private String testDirectory; - - @BeforeAll - public void setup() - { - String host = System.getProperty("hive.hadoop2.metastoreHost"); - int port = Integer.getInteger("hive.hadoop2.metastorePort"); - String databaseName = System.getProperty("hive.hadoop2.databaseName"); - String container = System.getProperty("hive.hadoop2.wasb.container"); - String account = System.getProperty("hive.hadoop2.wasb.account"); - String accessKey = System.getProperty("hive.hadoop2.wasb.accessKey"); - String testDirectory = System.getProperty("hive.hadoop2.wasb.testDirectory"); - - checkArgument(!isNullOrEmpty(host), "expected non empty host"); - checkArgument(!isNullOrEmpty(databaseName), "expected non empty databaseName"); - checkArgument(!isNullOrEmpty(container), "expected non empty container"); - checkArgument(!isNullOrEmpty(account), "expected non empty account"); - checkArgument(!isNullOrEmpty(accessKey), "expected non empty accessKey"); - checkArgument(!isNullOrEmpty(testDirectory), "expected non empty testDirectory"); - - this.container = container; - this.account = account; - this.accessKey = accessKey; - this.testDirectory = testDirectory; - - super.setup(host, port, databaseName, createHdfsConfiguration()); - } - - private HdfsConfiguration createHdfsConfiguration() - { - ConfigurationInitializer wasbConfig = new TrinoAzureConfigurationInitializer(new HiveAzureConfig() - .setWasbAccessKey(accessKey) - .setWasbStorageAccount(account)); - return new DynamicHdfsConfiguration(new HdfsConfigurationInitializer(new HdfsConfig(), ImmutableSet.of(wasbConfig)), ImmutableSet.of()); - } - - @Override - protected Path getBasePath() - { - return new Path(format("wasb://%s@%s.blob.core.windows.net/%s/", container, account, testDirectory)); - } -} diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveThriftMetastoreWithS3.java b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveThriftMetastoreWithS3.java deleted file mode 100644 index d46a24a7e5157..0000000000000 --- a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveThriftMetastoreWithS3.java +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive; - -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3Client; -import com.amazonaws.services.s3.model.S3ObjectSummary; -import com.google.common.collect.ImmutableMap; -import com.google.common.io.Resources; -import io.trino.plugin.hive.containers.HiveHadoop; -import io.trino.plugin.hive.metastore.thrift.ThriftMetastoreConfig; -import io.trino.plugin.hive.s3.S3HiveQueryRunner; -import io.trino.testing.AbstractTestQueryFramework; -import io.trino.testing.QueryRunner; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.attribute.PosixFilePermissions; -import java.util.List; - -import static io.trino.testing.TestingNames.randomNameSuffix; -import static java.nio.charset.StandardCharsets.UTF_8; -import static java.util.Objects.requireNonNull; -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; - -@TestInstance(PER_CLASS) -public class TestHiveThriftMetastoreWithS3 - extends AbstractTestQueryFramework -{ - private final String s3endpoint; - private final String s3Region; - private final String awsAccessKey; - private final String awsSecretKey; - private final String writableBucket; - private final String schemaName; - private final Path hadoopCoreSiteXmlTempFile; - private final AmazonS3 s3Client; - - public TestHiveThriftMetastoreWithS3() - throws IOException - { - this.s3endpoint = requireNonNull(System.getProperty("hive.hadoop2.s3.endpoint"), "hive.hadoop2.s3.endpoint is null"); - this.s3Region = requireNonNull(System.getProperty("hive.hadoop2.s3.region"), "hive.hadoop2.s3.region is null"); - this.awsAccessKey = requireNonNull(System.getProperty("hive.hadoop2.s3.awsAccessKey"), "hive.hadoop2.s3.awsAccessKey is null"); - this.awsSecretKey = requireNonNull(System.getProperty("hive.hadoop2.s3.awsSecretKey"), "hive.hadoop2.s3.awsSecretKey is null"); - this.writableBucket = requireNonNull(System.getProperty("hive.hadoop2.s3.writableBucket"), "hive.hadoop2.s3.writableBucket is null"); - this.schemaName = "test_thrift_s3_" + randomNameSuffix(); - - String coreSiteXmlContent = Resources.toString(Resources.getResource("s3/hive-core-site.template.xml"), UTF_8) - .replace("%S3_BUCKET_ENDPOINT%", s3endpoint) - .replace("%AWS_ACCESS_KEY_ID%", awsAccessKey) - .replace("%AWS_SECRET_ACCESS_KEY%", awsSecretKey); - - hadoopCoreSiteXmlTempFile = Files.createTempFile("core-site", ".xml", PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--"))); - hadoopCoreSiteXmlTempFile.toFile().deleteOnExit(); - Files.writeString(hadoopCoreSiteXmlTempFile, coreSiteXmlContent); - - s3Client = AmazonS3Client.builder() - .withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3endpoint, null)) - .withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(awsAccessKey, awsSecretKey))) - .build(); - } - - @Override - protected QueryRunner createQueryRunner() - throws Exception - { - HiveHadoop hiveHadoop = HiveHadoop.builder() - .withFilesToMount(ImmutableMap.of("/etc/hadoop/conf/core-site.xml", hadoopCoreSiteXmlTempFile.normalize().toAbsolutePath().toString())) - .build(); - hiveHadoop.start(); - - return S3HiveQueryRunner.builder() - .setHiveMetastoreEndpoint(hiveHadoop.getHiveMetastoreEndpoint()) - .setS3Endpoint(s3endpoint) - .setS3Region(s3Region) - .setS3AccessKey(awsAccessKey) - .setS3SecretKey(awsSecretKey) - .setBucketName(writableBucket) - .setCreateTpchSchemas(false) - .setThriftMetastoreConfig(new ThriftMetastoreConfig().setDeleteFilesOnDrop(true)) - .setHiveProperties(ImmutableMap.of("hive.allow-register-partition-procedure", "true")) - .build(); - } - - @BeforeAll - public void setUp() - { - String schemaLocation = "s3a://%s/%s".formatted(writableBucket, schemaName); - assertUpdate("CREATE SCHEMA " + schemaName + " WITH (location = '" + schemaLocation + "')"); - } - - @AfterAll - public void tearDown() - { - assertUpdate("DROP SCHEMA IF EXISTS " + schemaName); - } - - @Test - public void testRecreateTable() - { - String tableName = "test_recreate_table_" + randomNameSuffix(); - String schemaTableName = "%s.%s".formatted(schemaName, tableName); - String tableLocation = "%s/%s".formatted(schemaName, tableName); - - // Creating a new table generates special empty file on S3 (not MinIO) - assertUpdate("CREATE TABLE " + schemaTableName + "(col int)"); - try { - assertUpdate("INSERT INTO " + schemaTableName + " VALUES (1)", 1); - assertThat(getS3ObjectSummaries(tableLocation)).hasSize(2); // directory + file - - // DROP TABLE with Thrift metastore on S3 (not MinIO) leaves some files - // when 'hive.metastore.thrift.delete-files-on-drop' config property is false. - // Then, the subsequent CREATE TABLE throws "Target directory for table 'xxx' already exists" - assertUpdate("DROP TABLE " + schemaTableName); - assertThat(getS3ObjectSummaries(tableLocation)).hasSize(0); - - assertUpdate("CREATE TABLE " + schemaTableName + "(col int)"); - } - finally { - assertUpdate("DROP TABLE IF EXISTS " + schemaTableName); - } - } - - @Test - public void testRecreatePartition() - { - String tableName = "test_recreate_partition_" + randomNameSuffix(); - String schemaTableName = "%s.%s".formatted(schemaName, tableName); - String partitionLocation = "%s/%s/part=1".formatted(schemaName, tableName); - - assertUpdate("CREATE TABLE " + schemaTableName + "(col int, part int) WITH (partitioned_by = ARRAY['part'])"); - try { - // Creating an empty partition generates special empty file on S3 (not MinIO) - assertUpdate("CALL system.create_empty_partition('%s', '%s', ARRAY['part'], ARRAY['1'])".formatted(schemaName, tableName)); - assertUpdate("INSERT INTO " + schemaTableName + " VALUES (1, 1)", 1); - assertQuery("SELECT * FROM " + schemaTableName, "VALUES (1, 1)"); - - assertThat(getS3ObjectSummaries(partitionLocation)).hasSize(2); // directory + file - - // DELETE with Thrift metastore on S3 (not MinIO) leaves some files - // when 'hive.metastore.thrift.delete-files-on-drop' config property is false. - // Then, the subsequent SELECT doesn't return an empty row - assertUpdate("DELETE FROM " + schemaTableName); - assertThat(getS3ObjectSummaries(partitionLocation)).hasSize(0); - - assertUpdate("CALL system.create_empty_partition('%s', '%s', ARRAY['part'], ARRAY['1'])".formatted(schemaName, tableName)); - assertQueryReturnsEmptyResult("SELECT * FROM " + schemaTableName); - } - finally { - assertUpdate("DROP TABLE " + schemaTableName); - } - } - - @Test - public void testUnregisterPartitionNotRemoveData() - { - // Verify unregister_partition procedure doesn't remove physical data even when 'hive.metastore.thrift.delete-files-on-drop' config property is true - String tableName = "test_recreate_partition_" + randomNameSuffix(); - String schemaTableName = "%s.%s".formatted(schemaName, tableName); - - assertUpdate("CREATE TABLE " + schemaTableName + "(col int, part int) WITH (partitioned_by = ARRAY['part'])"); - try { - assertUpdate("INSERT INTO " + schemaTableName + " VALUES (1, 1)", 1); - assertQuery("SELECT * FROM " + schemaTableName, "VALUES (1, 1)"); - - assertUpdate("CALL system.unregister_partition('%s', '%s', ARRAY['part'], ARRAY['1'])".formatted(schemaName, tableName)); - assertQueryReturnsEmptyResult("SELECT * FROM " + schemaTableName); - - assertUpdate("CALL system.register_partition('%s', '%s', ARRAY['part'], ARRAY['1'])".formatted(schemaName, tableName)); - assertQuery("SELECT * FROM " + schemaTableName, "VALUES (1, 1)"); - } - finally { - assertUpdate("DROP TABLE " + schemaTableName); - } - } - - private List getS3ObjectSummaries(String prefix) - { - return s3Client.listObjectsV2(writableBucket, prefix).getObjectSummaries(); - } -} diff --git a/plugin/trino-hive-hadoop2/src/test/resources/s3/hive-core-site.template.xml b/plugin/trino-hive-hadoop2/src/test/resources/s3/hive-core-site.template.xml deleted file mode 100644 index a3dc6ad47d4b0..0000000000000 --- a/plugin/trino-hive-hadoop2/src/test/resources/s3/hive-core-site.template.xml +++ /dev/null @@ -1,43 +0,0 @@ - - - - fs.defaultFS - hdfs://hadoop-master:9000 - - - - fs.s3a.endpoint - %S3_BUCKET_ENDPOINT% - - - - fs.s3.awsAccessKeyId - %AWS_ACCESS_KEY_ID% - - - - fs.s3.awsSecretAccessKey - %AWS_SECRET_ACCESS_KEY% - - - - fs.s3a.access.key - %AWS_ACCESS_KEY_ID% - - - - fs.s3a.secret.key - %AWS_SECRET_ACCESS_KEY% - - - - - hadoop.proxyuser.hive.hosts - * - - - - hadoop.proxyuser.hive.groups - * - - diff --git a/plugin/trino-hive/pom.xml b/plugin/trino-hive/pom.xml index 76f7d9f3fbdd3..62a3ebca9725c 100644 --- a/plugin/trino-hive/pom.xml +++ b/plugin/trino-hive/pom.xml @@ -5,11 +5,12 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml trino-hive + trino-plugin Trino - Hive Connector @@ -121,16 +122,6 @@ units - - io.opentelemetry - opentelemetry-api - - - - io.opentelemetry - opentelemetry-context - - io.opentelemetry.instrumentation opentelemetry-aws-sdk-1.11 @@ -259,14 +250,20 @@ - io.trino - trino-spi + io.opentelemetry + opentelemetry-api provided - org.jetbrains - annotations + io.opentelemetry + opentelemetry-context + provided + + + + io.trino + trino-spi provided @@ -318,6 +315,12 @@ runtime + + org.jetbrains + annotations + runtime + + io.airlift junit-extensions diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/BackgroundHiveSplitLoader.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/BackgroundHiveSplitLoader.java index a79c7a6b2adce..da6d10b9abc06 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/BackgroundHiveSplitLoader.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/BackgroundHiveSplitLoader.java @@ -419,7 +419,7 @@ private ListenableFuture loadPartition(HivePartitionMetadata partition) partitionKeys, effectivePredicate, partitionMatchSupplier, - partition.getTableToPartitionMapping(), + partition.getHiveColumnCoercions(), Optional.empty(), Optional.empty(), getMaxInitialSplitSize(session), @@ -470,7 +470,7 @@ private ListenableFuture loadPartition(HivePartitionMetadata partition) partitionKeys, effectivePredicate, partitionMatchSupplier, - partition.getTableToPartitionMapping(), + partition.getHiveColumnCoercions(), bucketConversionRequiresWorkerParticipation ? bucketConversion : Optional.empty(), bucketValidation, getMaxInitialSplitSize(session), diff --git a/plugin/trino-hive-hadoop2/src/main/java/io/trino/plugin/hive/CanonicalizeHiveTimezoneId.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/CanonicalizeHiveTimezoneId.java similarity index 100% rename from plugin/trino-hive-hadoop2/src/main/java/io/trino/plugin/hive/CanonicalizeHiveTimezoneId.java rename to plugin/trino-hive/src/main/java/io/trino/plugin/hive/CanonicalizeHiveTimezoneId.java diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveConfig.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveConfig.java index 0f54cc3af1ecb..6a7a1817f9a0f 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveConfig.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveConfig.java @@ -68,7 +68,6 @@ public class HiveConfig public static final String CONFIGURATION_HIVE_PARTITION_PROJECTION_ENABLED = "hive.partition-projection-enabled"; private static final Splitter SPLITTER = Splitter.on(',').trimResults().omitEmptyStrings(); - public static final String HIVE_VIEWS_ENABLED = "hive.hive-views.enabled"; private boolean singleStatementWritesOnly; @@ -168,6 +167,7 @@ public class HiveConfig private Optional hudiCatalogName = Optional.empty(); private DataSize targetMaxFileSize = DataSize.of(1, GIGABYTE); + private DataSize idleWriterMinFileSize = DataSize.of(16, MEGABYTE); private boolean sizeBasedSplitWeightsEnabled = true; private double minimumAssignedSplitWeight = 0.05; @@ -270,6 +270,19 @@ public HiveConfig setTargetMaxFileSize(DataSize targetMaxFileSize) return this; } + public DataSize getIdleWriterMinFileSize() + { + return idleWriterMinFileSize; + } + + @Config("hive.idle-writer-min-file-size") + @ConfigDescription("Minimum data written by a single partition writer before it can be consider as 'idle' and could be closed by the engine") + public HiveConfig setIdleWriterMinFileSize(DataSize idleWriterMinFileSize) + { + this.idleWriterMinFileSize = idleWriterMinFileSize; + return this; + } + public boolean isForceLocalScheduling() { return forceLocalScheduling; @@ -785,7 +798,7 @@ public boolean isTranslateHiveViews() } @LegacyConfig({"hive.views-execution.enabled", "hive.translate-hive-views"}) - @Config(HIVE_VIEWS_ENABLED) + @Config("hive.hive-views.enabled") @ConfigDescription("Experimental: Allow translation of Hive views into Trino views") public HiveConfig setTranslateHiveViews(boolean translateHiveViews) { diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMaterializedViewMetadata.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMaterializedViewMetadata.java index f47ad0dba86cd..b2f405f242117 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMaterializedViewMetadata.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMaterializedViewMetadata.java @@ -25,7 +25,13 @@ public interface HiveMaterializedViewMetadata { - void createMaterializedView(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition, boolean replace, boolean ignoreExisting); + void createMaterializedView( + ConnectorSession session, + SchemaTableName viewName, + ConnectorMaterializedViewDefinition definition, + Map properties, + boolean replace, + boolean ignoreExisting); void dropMaterializedView(ConnectorSession session, SchemaTableName viewName); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java index a6c53d5543a56..c95a23fde674f 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java @@ -3857,9 +3857,15 @@ public void cleanupQuery(ConnectorSession session) } @Override - public void createMaterializedView(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition, boolean replace, boolean ignoreExisting) + public void createMaterializedView( + ConnectorSession session, + SchemaTableName viewName, + ConnectorMaterializedViewDefinition definition, + Map properties, + boolean replace, + boolean ignoreExisting) { - hiveMaterializedViewMetadata.createMaterializedView(session, viewName, definition, replace, ignoreExisting); + hiveMaterializedViewMetadata.createMaterializedView(session, viewName, definition, properties, replace, ignoreExisting); } @Override diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetastoreClosure.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetastoreClosure.java index cdbeee24b11a9..42555b6233f69 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetastoreClosure.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetastoreClosure.java @@ -140,24 +140,15 @@ public void updateTableStatistics(String databaseName, delegate.updateTableStatistics(databaseName, tableName, transaction, update); } - public void updatePartitionStatistics(String databaseName, - String tableName, - String partitionName, - Function update) - { - Table table = getExistingTable(databaseName, tableName); - delegate.updatePartitionStatistics(table, partitionName, update); - } - public void updatePartitionStatistics(String databaseName, String tableName, Map> updates) { Table table = getExistingTable(databaseName, tableName); delegate.updatePartitionStatistics(table, updates); } - public List getAllTables(String databaseName) + public List getTables(String databaseName) { - return delegate.getAllTables(databaseName); + return delegate.getTables(databaseName); } public Optional> getAllTables() @@ -170,14 +161,14 @@ public Map getRelationTypes(String databaseName) return delegate.getRelationTypes(databaseName); } - public Optional> getRelationTypes() + public Optional> getAllRelationTypes() { - return delegate.getRelationTypes(); + return delegate.getAllRelationTypes(); } - public List getAllViews(String databaseName) + public List getViews(String databaseName) { - return delegate.getAllViews(databaseName); + return delegate.getViews(databaseName); } public Optional> getAllViews() @@ -453,9 +444,9 @@ public boolean functionExists(SchemaFunctionName name, String signatureToken) return delegate.functionExists(name.getSchemaName(), name.getFunctionName(), signatureToken); } - public Collection getFunctions(String schemaName) + public Collection getAllFunctions(String schemaName) { - return delegate.getFunctions(schemaName); + return delegate.getAllFunctions(schemaName); } public Collection getFunctions(SchemaFunctionName name) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSink.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSink.java index c703ea951feb9..f1e688fb71430 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSink.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSink.java @@ -20,6 +20,7 @@ import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningExecutorService; import io.airlift.json.JsonCodec; +import io.airlift.log.Logger; import io.airlift.slice.Slice; import io.trino.plugin.hive.util.HiveBucketing.BucketingVersion; import io.trino.spi.Page; @@ -64,6 +65,7 @@ public class HivePageSink implements ConnectorPageSink, ConnectorMergeSink { + private static final Logger LOG = Logger.get(HivePageSink.class); private static final int MAX_PAGE_POSITIONS = 4096; private final HiveWriterFactory writerFactory; @@ -85,9 +87,11 @@ public class HivePageSink private final List writers = new ArrayList<>(); private final long targetMaxFileSize; + private final long idleWriterMinFileSize; private final List closedWriterRollbackActions = new ArrayList<>(); private final List partitionUpdates = new ArrayList<>(); private final List> verificationTasks = new ArrayList<>(); + private final List activeWriters = new ArrayList<>(); private final boolean isMergeSink; private long writtenBytes; @@ -161,6 +165,7 @@ public HivePageSink( } this.targetMaxFileSize = HiveSessionProperties.getTargetMaxFileSize(session).toBytes(); + this.idleWriterMinFileSize = HiveSessionProperties.getIdleWriterMinFileSize(session).toBytes(); } @Override @@ -191,6 +196,9 @@ private ListenableFuture> doMergeSinkFinish() { ImmutableList.Builder resultSlices = ImmutableList.builder(); for (HiveWriter writer : writers) { + if (writer == null) { + continue; + } writer.commit(); MergeFileWriter mergeFileWriter = (MergeFileWriter) writer.getFileWriter(); PartitionUpdateAndMergeResults results = mergeFileWriter.getPartitionUpdateAndMergeResults(writer.getPartitionUpdate()); @@ -198,6 +206,7 @@ private ListenableFuture> doMergeSinkFinish() } List result = resultSlices.build(); writtenBytes = writers.stream() + .filter(Objects::nonNull) .mapToLong(HiveWriter::getWrittenBytes) .sum(); return Futures.immediateFuture(result); @@ -308,6 +317,7 @@ private void writePage(Page page) } HiveWriter writer = writers.get(index); + verify(writer != null, "Expected writer at index %s", index); long currentWritten = writer.getWrittenBytes(); long currentMemory = writer.getMemoryUsage(); @@ -316,12 +326,17 @@ private void writePage(Page page) writtenBytes += (writer.getWrittenBytes() - currentWritten); memoryUsage += (writer.getMemoryUsage() - currentMemory); + // Mark this writer as active (i.e. not idle) + activeWriters.set(index, true); } } private void closeWriter(int writerIndex) { HiveWriter writer = writers.get(writerIndex); + if (writer == null) { + return; + } long currentWritten = writer.getWrittenBytes(); long currentMemory = writer.getMemoryUsage(); @@ -338,6 +353,26 @@ private void closeWriter(int writerIndex) partitionUpdates.add(wrappedBuffer(partitionUpdateCodec.toJsonBytes(partitionUpdate))); } + @Override + public void closeIdleWriters() + { + // For transactional tables we don't want to split output files because there is an explicit or implicit bucketing + // and file names have no random component (e.g. bucket_00000) + if (bucketFunction != null || isTransactional) { + return; + } + + for (int writerIndex = 0; writerIndex < writers.size(); writerIndex++) { + HiveWriter writer = writers.get(writerIndex); + if (activeWriters.get(writerIndex) || writer == null || writer.getWrittenBytes() <= idleWriterMinFileSize) { + activeWriters.set(writerIndex, false); + continue; + } + LOG.debug("Closing writer %s with %s bytes written", writerIndex, writer.getWrittenBytes()); + closeWriter(writerIndex); + } + } + private int[] getWriterIndexes(Page page) { Page partitionColumns = extractColumns(page, partitionColumnsInputIndex); @@ -350,6 +385,7 @@ private int[] getWriterIndexes(Page page) // expand writers list to new size while (writers.size() <= pagePartitioner.getMaxIndex()) { writers.add(null); + activeWriters.add(false); } // create missing writers @@ -378,7 +414,6 @@ private int[] getWriterIndexes(Page page) memoryUsage += writer.getMemoryUsage(); } verify(writers.size() == pagePartitioner.getMaxIndex() + 1); - verify(!writers.contains(null)); return writerIndexes; } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java index ec34a5814c5a6..4d03e4ea5596a 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java @@ -121,7 +121,7 @@ public ConnectorPageSource createPageSource( hiveSplit.getPartitionKeys(), hiveColumns, hiveSplit.getBucketConversion().map(BucketConversion::bucketColumnHandles).orElse(ImmutableList.of()), - hiveSplit.getTableToPartitionMapping(), + hiveSplit.getHiveColumnCoercions(), hiveSplit.getPath(), hiveSplit.getTableBucketNumber(), hiveSplit.getEstimatedFileSize(), @@ -382,7 +382,7 @@ public static List buildColumnMappings( List partitionKeys, List columns, List requiredInterimColumns, - TableToPartitionMapping tableToPartitionMapping, + Map hiveColumnCoercions, String path, OptionalInt bucketNumber, long estimatedFileSize, @@ -398,7 +398,7 @@ public static List buildColumnMappings( int regularIndex = 0; for (HiveColumnHandle column : columns) { - Optional baseTypeCoercionFrom = tableToPartitionMapping.getCoercion(column.getBaseHiveColumnIndex()); + Optional baseTypeCoercionFrom = Optional.ofNullable(hiveColumnCoercions.get(column.getBaseHiveColumnIndex())).map(HiveTypeName::toHiveType); if (column.getColumnType() == REGULAR) { if (column.isBaseColumn()) { baseColumnHiveIndices.add(column.getBaseHiveColumnIndex()); @@ -449,7 +449,8 @@ else if (isRowIdColumnHandle(column)) { } if (projectionsForColumn.containsKey(column.getBaseHiveColumnIndex())) { - columnMappings.add(interim(column, regularIndex, tableToPartitionMapping.getCoercion(column.getBaseHiveColumnIndex()))); + Optional baseTypeCoercionFrom = Optional.ofNullable(hiveColumnCoercions.get(column.getBaseHiveColumnIndex())).map(HiveTypeName::toHiveType); + columnMappings.add(interim(column, regularIndex, baseTypeCoercionFrom)); } else { // If coercion does not affect bucket number calculation, coercion doesn't need to be applied here. diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePartitionMetadata.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePartitionMetadata.java index f484f42868c91..15efa2929ad96 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePartitionMetadata.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePartitionMetadata.java @@ -13,8 +13,10 @@ */ package io.trino.plugin.hive; +import com.google.common.collect.ImmutableMap; import io.trino.plugin.hive.metastore.Partition; +import java.util.Map; import java.util.Optional; import static java.util.Objects.requireNonNull; @@ -23,16 +25,16 @@ public class HivePartitionMetadata { private final Optional partition; private final HivePartition hivePartition; - private final TableToPartitionMapping tableToPartitionMapping; + private final Map hiveColumnCoercions; HivePartitionMetadata( HivePartition hivePartition, Optional partition, - TableToPartitionMapping tableToPartitionMapping) + Map hiveColumnCoercions) { this.partition = requireNonNull(partition, "partition is null"); this.hivePartition = requireNonNull(hivePartition, "hivePartition is null"); - this.tableToPartitionMapping = requireNonNull(tableToPartitionMapping, "tableToPartitionMapping is null"); + this.hiveColumnCoercions = ImmutableMap.copyOf(requireNonNull(hiveColumnCoercions, "hiveColumnCoercions is null")); } public HivePartition getHivePartition() @@ -48,8 +50,8 @@ public Optional getPartition() return partition; } - public TableToPartitionMapping getTableToPartitionMapping() + public Map getHiveColumnCoercions() { - return tableToPartitionMapping; + return hiveColumnCoercions; } } diff --git a/plugin/trino-hive-hadoop2/src/main/java/io/trino/plugin/hive/HivePlugin.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePlugin.java similarity index 100% rename from plugin/trino-hive-hadoop2/src/main/java/io/trino/plugin/hive/HivePlugin.java rename to plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePlugin.java diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSessionProperties.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSessionProperties.java index fbeb33a40a5c3..c0a71c1fe9957 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSessionProperties.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSessionProperties.java @@ -63,6 +63,7 @@ public final class HiveSessionProperties private static final String BUCKET_EXECUTION_ENABLED = "bucket_execution_enabled"; private static final String VALIDATE_BUCKETING = "validate_bucketing"; private static final String TARGET_MAX_FILE_SIZE = "target_max_file_size"; + private static final String IDLE_WRITER_MIN_FILE_SIZE = "idle_writer_min_file_size"; private static final String PARALLEL_PARTITIONED_BUCKETED_WRITES = "parallel_partitioned_bucketed_writes"; private static final String FORCE_LOCAL_SCHEDULING = "force_local_scheduling"; private static final String INSERT_EXISTING_PARTITIONS_BEHAVIOR = "insert_existing_partitions_behavior"; @@ -169,6 +170,11 @@ public HiveSessionProperties( "Target maximum size of written files; the actual size may be larger", hiveConfig.getTargetMaxFileSize(), false), + dataSizeProperty( + IDLE_WRITER_MIN_FILE_SIZE, + "Minimum data written by a single partition writer before it can be consider as 'idle' and could be closed by the engine", + hiveConfig.getIdleWriterMinFileSize(), + false), booleanProperty( PARALLEL_PARTITIONED_BUCKETED_WRITES, "Improve parallelism of partitioned and bucketed table writes", @@ -555,6 +561,11 @@ public static DataSize getTargetMaxFileSize(ConnectorSession session) return session.getProperty(TARGET_MAX_FILE_SIZE, DataSize.class); } + public static DataSize getIdleWriterMinFileSize(ConnectorSession session) + { + return session.getProperty(IDLE_WRITER_MIN_FILE_SIZE, DataSize.class); + } + public static boolean isParallelPartitionedBucketedWrites(ConnectorSession session) { return session.getProperty(PARALLEL_PARTITIONED_BUCKETED_WRITES, Boolean.class); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplit.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplit.java index f93e8a3d6a2a0..fc4c585ce957c 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplit.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplit.java @@ -40,6 +40,7 @@ public class HiveSplit implements ConnectorSplit { private static final int INSTANCE_SIZE = instanceSize(HiveSplit.class); + private static final int INTEGER_INSTANCE_SIZE = instanceSize(Integer.class); private final String path; private final long start; @@ -53,7 +54,7 @@ public class HiveSplit private final OptionalInt readBucketNumber; private final OptionalInt tableBucketNumber; private final boolean forceLocalScheduling; - private final TableToPartitionMapping tableToPartitionMapping; + private final Map hiveColumnCoercions; private final Optional bucketConversion; private final Optional bucketValidation; private final Optional acidInfo; @@ -72,7 +73,7 @@ public HiveSplit( @JsonProperty("readBucketNumber") OptionalInt readBucketNumber, @JsonProperty("tableBucketNumber") OptionalInt tableBucketNumber, @JsonProperty("forceLocalScheduling") boolean forceLocalScheduling, - @JsonProperty("tableToPartitionMapping") TableToPartitionMapping tableToPartitionMapping, + @JsonProperty("hiveColumnCoercions") Map hiveColumnCoercions, @JsonProperty("bucketConversion") Optional bucketConversion, @JsonProperty("bucketValidation") Optional bucketValidation, @JsonProperty("acidInfo") Optional acidInfo, @@ -91,7 +92,7 @@ public HiveSplit( readBucketNumber, tableBucketNumber, forceLocalScheduling, - tableToPartitionMapping, + hiveColumnCoercions, bucketConversion, bucketValidation, acidInfo, @@ -111,7 +112,7 @@ public HiveSplit( OptionalInt readBucketNumber, OptionalInt tableBucketNumber, boolean forceLocalScheduling, - TableToPartitionMapping tableToPartitionMapping, + Map hiveColumnCoercions, Optional bucketConversion, Optional bucketValidation, Optional acidInfo, @@ -127,7 +128,7 @@ public HiveSplit( requireNonNull(addresses, "addresses is null"); requireNonNull(readBucketNumber, "readBucketNumber is null"); requireNonNull(tableBucketNumber, "tableBucketNumber is null"); - requireNonNull(tableToPartitionMapping, "tableToPartitionMapping is null"); + requireNonNull(hiveColumnCoercions, "hiveColumnCoercions is null"); requireNonNull(bucketConversion, "bucketConversion is null"); requireNonNull(bucketValidation, "bucketValidation is null"); requireNonNull(acidInfo, "acidInfo is null"); @@ -144,7 +145,7 @@ public HiveSplit( this.readBucketNumber = readBucketNumber; this.tableBucketNumber = tableBucketNumber; this.forceLocalScheduling = forceLocalScheduling; - this.tableToPartitionMapping = tableToPartitionMapping; + this.hiveColumnCoercions = ImmutableMap.copyOf(hiveColumnCoercions); this.bucketConversion = bucketConversion; this.bucketValidation = bucketValidation; this.acidInfo = acidInfo; @@ -226,9 +227,9 @@ public boolean isForceLocalScheduling() } @JsonProperty - public TableToPartitionMapping getTableToPartitionMapping() + public Map getHiveColumnCoercions() { - return tableToPartitionMapping; + return hiveColumnCoercions; } @JsonProperty @@ -273,7 +274,7 @@ public long getRetainedSizeInBytes() + estimatedSizeOf(partitionName) + sizeOf(readBucketNumber) + sizeOf(tableBucketNumber) - + tableToPartitionMapping.getEstimatedSizeInBytes() + + estimatedSizeOf(hiveColumnCoercions, (Integer key) -> INTEGER_INSTANCE_SIZE, HiveTypeName::getEstimatedSizeInBytes) + sizeOf(bucketConversion, BucketConversion::getRetainedSizeInBytes) + sizeOf(bucketValidation, BucketValidation::getRetainedSizeInBytes) + sizeOf(acidInfo, AcidInfo::getRetainedSizeInBytes) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplitManager.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplitManager.java index e007baeaf52ee..630caa38534ba 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplitManager.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplitManager.java @@ -80,7 +80,6 @@ import static io.trino.plugin.hive.HiveSessionProperties.isUseOrcColumnNames; import static io.trino.plugin.hive.HiveSessionProperties.isUseParquetColumnNames; import static io.trino.plugin.hive.HiveStorageFormat.getHiveStorageFormat; -import static io.trino.plugin.hive.TableToPartitionMapping.mapColumnsByIndex; import static io.trino.plugin.hive.metastore.MetastoreUtil.getProtectMode; import static io.trino.plugin.hive.metastore.MetastoreUtil.makePartitionName; import static io.trino.plugin.hive.metastore.MetastoreUtil.verifyOnline; @@ -305,7 +304,7 @@ private Iterator getPartitionMetadata( if (firstPartition.getPartitionId().equals(UNPARTITIONED_ID)) { hivePartitions.next(); checkArgument(!hivePartitions.hasNext(), "single partition is expected for unpartitioned table"); - return singletonIterator(new HivePartitionMetadata(firstPartition, Optional.empty(), TableToPartitionMapping.empty())); + return singletonIterator(new HivePartitionMetadata(firstPartition, Optional.empty(), ImmutableMap.of())); } HiveTimestampPrecision hiveTimestampPrecision = getTimestampPrecision(session); @@ -385,7 +384,7 @@ private static HivePartitionMetadata toPartitionMetadata( throw new TrinoException(HIVE_INVALID_METADATA, format("Table '%s' or partition '%s' has null columns", tableName, partName)); } - TableToPartitionMapping tableToPartitionMapping = getTableToPartitionMapping(usePartitionColumnNames, typeManager, hiveTimestampPrecision, tableName, partName, tableColumns, partitionColumns, neededColumnNames); + Map hiveColumnCoercions = getHiveColumnCoercions(usePartitionColumnNames, typeManager, hiveTimestampPrecision, tableName, partName, tableColumns, partitionColumns, neededColumnNames); if (bucketProperty.isPresent()) { HiveBucketProperty partitionBucketProperty = partition.getStorage().getBucketProperty() @@ -420,10 +419,10 @@ private static HivePartitionMetadata toPartitionMetadata( } } } - return new HivePartitionMetadata(hivePartition, Optional.of(partition), tableToPartitionMapping); + return new HivePartitionMetadata(hivePartition, Optional.of(partition), hiveColumnCoercions); } - private static TableToPartitionMapping getTableToPartitionMapping( + private static Map getHiveColumnCoercions( boolean usePartitionColumnNames, TypeManager typeManager, HiveTimestampPrecision hiveTimestampPrecision, @@ -434,7 +433,7 @@ private static TableToPartitionMapping getTableToPartitionMapping( Set neededColumnNames) { if (usePartitionColumnNames) { - return getTableToPartitionMappingByColumnNames(typeManager, tableName, partName, tableColumns, partitionColumns, neededColumnNames, hiveTimestampPrecision); + return getHiveColumnCoercionsByColumnNames(typeManager, tableName, partName, tableColumns, partitionColumns, neededColumnNames, hiveTimestampPrecision); } ImmutableMap.Builder columnCoercions = ImmutableMap.builder(); for (int i = 0; i < min(partitionColumns.size(), tableColumns.size()); i++) { @@ -451,7 +450,7 @@ private static TableToPartitionMapping getTableToPartitionMapping( columnCoercions.put(i, partitionType.getHiveTypeName()); } } - return mapColumnsByIndex(columnCoercions.buildOrThrow()); + return columnCoercions.buildOrThrow(); } private static boolean isPartitionUsesColumnNames(ConnectorSession session, Optional storageFormat) @@ -467,7 +466,7 @@ private static boolean isPartitionUsesColumnNames(ConnectorSession session, Opti }; } - private static TableToPartitionMapping getTableToPartitionMappingByColumnNames( + private static Map getHiveColumnCoercionsByColumnNames( TypeManager typeManager, SchemaTableName tableName, String partName, @@ -488,7 +487,6 @@ private static TableToPartitionMapping getTableToPartitionMappingByColumnNames( Map partitionColumnsByIndex = partitionColumnIndexesBuilder.buildOrThrow(); ImmutableMap.Builder columnCoercions = ImmutableMap.builder(); - ImmutableMap.Builder tableToPartitionColumns = ImmutableMap.builder(); for (int tableColumnIndex = 0; tableColumnIndex < tableColumns.size(); tableColumnIndex++) { Column tableColumn = tableColumns.get(tableColumnIndex); HiveType tableType = tableColumn.getType(); @@ -496,18 +494,17 @@ private static TableToPartitionMapping getTableToPartitionMappingByColumnNames( if (partitionColumnIndex == null) { continue; } - tableToPartitionColumns.put(tableColumnIndex, partitionColumnIndex); Column partitionColumn = partitionColumns.get(partitionColumnIndex); HiveType partitionType = partitionColumn.getType(); if (!tableType.equals(partitionType)) { if (!canCoerce(typeManager, partitionType, tableType, hiveTimestampPrecision)) { throw tablePartitionColumnMismatchException(tableName, partName, tableColumn.getName(), tableType, partitionColumn.getName(), partitionType); } - columnCoercions.put(partitionColumnIndex, partitionType.getHiveTypeName()); + columnCoercions.put(tableColumnIndex, partitionType.getHiveTypeName()); } } - return new TableToPartitionMapping(Optional.of(tableToPartitionColumns.buildOrThrow()), columnCoercions.buildOrThrow()); + return columnCoercions.buildOrThrow(); } private static TrinoException tablePartitionColumnMismatchException(SchemaTableName tableName, String partName, String tableColumnName, HiveType tableType, String partitionColumnName, HiveType partitionType) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplitSource.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplitSource.java index 63f36aec02698..2da1750229f50 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplitSource.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplitSource.java @@ -309,7 +309,7 @@ else if (maxSplitBytes * 2 >= remainingBlockBytes) { internalSplit.getReadBucketNumber(), internalSplit.getTableBucketNumber(), internalSplit.isForceLocalScheduling(), - internalSplit.getTableToPartitionMapping(), + internalSplit.getHiveColumnCoercions(), internalSplit.getBucketConversion(), internalSplit.getBucketValidation(), internalSplit.getAcidInfo(), diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/InternalHiveConnectorFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/InternalHiveConnectorFactory.java index 6dda13819bece..6d10eb61cb10e 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/InternalHiveConnectorFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/InternalHiveConnectorFactory.java @@ -80,7 +80,7 @@ private InternalHiveConnectorFactory() {} public static Connector createConnector(String catalogName, Map config, ConnectorContext context, Module module) { - return createConnector(catalogName, config, context, module, Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty()); + return createConnector(catalogName, config, context, module, Optional.empty(), Optional.empty(), Optional.empty()); } public static Connector createConnector( @@ -90,7 +90,6 @@ public static Connector createConnector( Module module, Optional metastore, Optional fileSystemFactory, - Optional openTelemetry, Optional directoryLister) { requireNonNull(config, "config is null"); @@ -110,11 +109,11 @@ public static Connector createConnector( new HiveSecurityModule(), fileSystemFactory .map(factory -> (Module) binder -> binder.bind(TrinoFileSystemFactory.class).toInstance(factory)) - .orElseGet(() -> new FileSystemModule(catalogName, context.getNodeManager(), openTelemetry.orElse(context.getOpenTelemetry()))), + .orElseGet(() -> new FileSystemModule(catalogName, context.getNodeManager(), context.getOpenTelemetry())), new HiveProcedureModule(), new MBeanServerModule(), binder -> { - binder.bind(OpenTelemetry.class).toInstance(openTelemetry.orElse(context.getOpenTelemetry())); + binder.bind(OpenTelemetry.class).toInstance(context.getOpenTelemetry()); binder.bind(Tracer.class).toInstance(context.getTracer()); binder.bind(NodeVersion.class).toInstance(new NodeVersion(context.getNodeManager().getCurrentNode().getVersion())); binder.bind(NodeManager.class).toInstance(context.getNodeManager()); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/InternalHiveSplit.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/InternalHiveSplit.java index 28b11595a7bcf..6984b7e9d34ac 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/InternalHiveSplit.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/InternalHiveSplit.java @@ -14,6 +14,7 @@ package io.trino.plugin.hive; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import io.trino.annotation.NotThreadSafe; import io.trino.plugin.hive.HiveSplit.BucketConversion; import io.trino.plugin.hive.HiveSplit.BucketValidation; @@ -38,6 +39,7 @@ public class InternalHiveSplit { private static final int INSTANCE_SIZE = instanceSize(InternalHiveSplit.class) + instanceSize(OptionalInt.class); + private static final int INTEGER_INSTANCE_SIZE = instanceSize(Integer.class); private final String path; private final long end; @@ -51,7 +53,7 @@ public class InternalHiveSplit private final OptionalInt tableBucketNumber; private final boolean splittable; private final boolean forceLocalScheduling; - private final TableToPartitionMapping tableToPartitionMapping; + private final Map hiveColumnCoercions; private final Optional bucketConversion; private final Optional bucketValidation; private final Optional acidInfo; @@ -74,7 +76,7 @@ public InternalHiveSplit( OptionalInt tableBucketNumber, boolean splittable, boolean forceLocalScheduling, - TableToPartitionMapping tableToPartitionMapping, + Map hiveColumnCoercions, Optional bucketConversion, Optional bucketValidation, Optional acidInfo, @@ -90,7 +92,7 @@ public InternalHiveSplit( requireNonNull(blocks, "blocks is null"); requireNonNull(readBucketNumber, "readBucketNumber is null"); requireNonNull(tableBucketNumber, "tableBucketNumber is null"); - requireNonNull(tableToPartitionMapping, "tableToPartitionMapping is null"); + requireNonNull(hiveColumnCoercions, "hiveColumnCoercions is null"); requireNonNull(bucketConversion, "bucketConversion is null"); requireNonNull(bucketValidation, "bucketValidation is null"); requireNonNull(acidInfo, "acidInfo is null"); @@ -109,7 +111,7 @@ public InternalHiveSplit( this.tableBucketNumber = tableBucketNumber; this.splittable = splittable; this.forceLocalScheduling = forceLocalScheduling; - this.tableToPartitionMapping = tableToPartitionMapping; + this.hiveColumnCoercions = ImmutableMap.copyOf(hiveColumnCoercions); this.bucketConversion = bucketConversion; this.bucketValidation = bucketValidation; this.acidInfo = acidInfo; @@ -176,9 +178,9 @@ public boolean isForceLocalScheduling() return forceLocalScheduling; } - public TableToPartitionMapping getTableToPartitionMapping() + public Map getHiveColumnCoercions() { - return tableToPartitionMapping; + return hiveColumnCoercions; } public Optional getBucketConversion() @@ -221,7 +223,7 @@ public int getEstimatedSizeInBytes() estimatedSizeOf(partitionKeys, HivePartitionKey::getEstimatedSizeInBytes) + estimatedSizeOf(blocks, InternalHiveBlock::getEstimatedSizeInBytes) + estimatedSizeOf(partitionName) + - tableToPartitionMapping.getEstimatedSizeInBytes(); + estimatedSizeOf(hiveColumnCoercions, (Integer key) -> INTEGER_INSTANCE_SIZE, HiveTypeName::getEstimatedSizeInBytes); return toIntExact(result); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/NoneHiveMaterializedViewMetadata.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/NoneHiveMaterializedViewMetadata.java index a23d562b616de..0c99c96f39976 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/NoneHiveMaterializedViewMetadata.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/NoneHiveMaterializedViewMetadata.java @@ -33,7 +33,13 @@ public class NoneHiveMaterializedViewMetadata implements HiveMaterializedViewMetadata { @Override - public void createMaterializedView(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition, boolean replace, boolean ignoreExisting) + public void createMaterializedView( + ConnectorSession session, + SchemaTableName viewName, + ConnectorMaterializedViewDefinition definition, + Map properties, + boolean replace, + boolean ignoreExisting) { throw new TrinoException(NOT_SUPPORTED, "This connector does not support creating materialized views"); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/PartitionStatistics.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/PartitionStatistics.java index f77000f0ea10a..151289496aaaa 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/PartitionStatistics.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/PartitionStatistics.java @@ -24,6 +24,7 @@ import java.util.Objects; import static com.google.common.base.MoreObjects.toStringHelper; +import static com.google.common.collect.ImmutableMap.toImmutableMap; import static java.util.Objects.requireNonNull; @Immutable @@ -99,6 +100,15 @@ public PartitionStatistics withBasicStatistics(HiveBasicStatistics basicStatisti return new PartitionStatistics(basicStatistics, columnStatistics); } + public PartitionStatistics withEmptyColumnStatisticsRemoved() + { + return new PartitionStatistics( + basicStatistics, + columnStatistics.entrySet().stream() + .filter(entry -> !entry.getValue().equals(HiveColumnStatistics.empty())) + .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue))); + } + public static class Builder { private HiveBasicStatistics basicStatistics = HiveBasicStatistics.createEmptyStatistics(); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/TableToPartitionMapping.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/TableToPartitionMapping.java deleted file mode 100644 index fcabc3ba27d73..0000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/TableToPartitionMapping.java +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.ImmutableMap; -import it.unimi.dsi.fastutil.ints.Int2IntArrayMap; -import it.unimi.dsi.fastutil.ints.Int2IntMaps; - -import java.util.Map; -import java.util.Objects; -import java.util.Optional; - -import static com.google.common.base.MoreObjects.toStringHelper; -import static io.airlift.slice.SizeOf.estimatedSizeOf; -import static io.airlift.slice.SizeOf.instanceSize; -import static io.airlift.slice.SizeOf.sizeOfIntArray; -import static java.lang.Math.toIntExact; -import static java.util.Objects.requireNonNull; - -public class TableToPartitionMapping -{ - public static TableToPartitionMapping empty() - { - return new TableToPartitionMapping(Optional.empty(), ImmutableMap.of()); - } - - public static TableToPartitionMapping mapColumnsByIndex(Map columnCoercions) - { - return new TableToPartitionMapping(Optional.empty(), columnCoercions); - } - - // Overhead of ImmutableMap is not accounted because of its complexity. - private static final int INSTANCE_SIZE = instanceSize(TableToPartitionMapping.class); - private static final int INTEGER_INSTANCE_SIZE = instanceSize(Integer.class); - private static final int OPTIONAL_INSTANCE_SIZE = instanceSize(Optional.class); - private static final int INT_2_INT_ARRAY_MAP_INSTANCE_SIZE = instanceSize(Int2IntArrayMap.class); - - private final Optional> tableToPartitionColumns; - private final Map partitionColumnCoercions; - - @JsonCreator - public TableToPartitionMapping( - @JsonProperty("tableToPartitionColumns") Optional> tableToPartitionColumns, - @JsonProperty("partitionColumnCoercions") Map partitionColumnCoercions) - { - if (tableToPartitionColumns.map(TableToPartitionMapping::isIdentityMapping).orElse(true)) { - this.tableToPartitionColumns = Optional.empty(); - } - else { - // we use Int2IntArrayMap due to much lower memory footprint than ImmutableMap - this.tableToPartitionColumns = tableToPartitionColumns.map(mapping -> Int2IntMaps.unmodifiable(new Int2IntArrayMap(mapping))); - } - this.partitionColumnCoercions = ImmutableMap.copyOf(requireNonNull(partitionColumnCoercions, "partitionColumnCoercions is null")); - } - - @VisibleForTesting - static boolean isIdentityMapping(Map map) - { - for (int i = 0; i < map.size(); i++) { - if (!Objects.equals(map.get(i), i)) { - return false; - } - } - return true; - } - - @JsonProperty - public Map getPartitionColumnCoercions() - { - return partitionColumnCoercions; - } - - @JsonProperty - public Optional> getTableToPartitionColumns() - { - return tableToPartitionColumns; - } - - public Optional getCoercion(int tableColumnIndex) - { - return getPartitionColumnIndex(tableColumnIndex) - .flatMap(partitionColumnIndex -> Optional.ofNullable(partitionColumnCoercions.get(partitionColumnIndex))) - .map(HiveTypeName::toHiveType); - } - - private Optional getPartitionColumnIndex(int tableColumnIndex) - { - if (tableToPartitionColumns.isEmpty()) { - return Optional.of(tableColumnIndex); - } - return Optional.ofNullable(tableToPartitionColumns.get().get(tableColumnIndex)); - } - - public int getEstimatedSizeInBytes() - { - long result = INSTANCE_SIZE + - estimatedSizeOf(partitionColumnCoercions, (Integer key) -> INTEGER_INSTANCE_SIZE, HiveTypeName::getEstimatedSizeInBytes) + - OPTIONAL_INSTANCE_SIZE + - tableToPartitionColumns - .map(tableToPartitionColumns -> INT_2_INT_ARRAY_MAP_INSTANCE_SIZE + 2 * sizeOfIntArray(tableToPartitionColumns.size())) - .orElse(0L); - return toIntExact(result); - } - - @Override - public String toString() - { - return toStringHelper(this) - .add("columnCoercions", partitionColumnCoercions) - .add("tableToPartitionColumns", tableToPartitionColumns) - .toString(); - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/CoercionUtils.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/CoercionUtils.java index a0aa9435c00ee..24c39905dcb25 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/CoercionUtils.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/CoercionUtils.java @@ -17,6 +17,7 @@ import io.trino.plugin.hive.HiveTimestampPrecision; import io.trino.plugin.hive.HiveType; import io.trino.plugin.hive.coercions.BooleanCoercer.BooleanToVarcharCoercer; +import io.trino.plugin.hive.coercions.DateCoercer.DateToVarcharCoercer; import io.trino.plugin.hive.coercions.DateCoercer.VarcharToDateCoercer; import io.trino.plugin.hive.coercions.TimestampCoercer.LongTimestampToDateCoercer; import io.trino.plugin.hive.coercions.TimestampCoercer.LongTimestampToVarcharCoercer; @@ -69,6 +70,7 @@ import static io.trino.plugin.hive.coercions.DecimalCoercers.createDecimalToRealCoercer; import static io.trino.plugin.hive.coercions.DecimalCoercers.createDecimalToVarcharCoercer; import static io.trino.plugin.hive.coercions.DecimalCoercers.createDoubleToDecimalCoercer; +import static io.trino.plugin.hive.coercions.DecimalCoercers.createIntegerNumberToDecimalCoercer; import static io.trino.plugin.hive.coercions.DecimalCoercers.createRealToDecimalCoercer; import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; import static io.trino.spi.block.ColumnarArray.toColumnarArray; @@ -139,6 +141,9 @@ public static Type createTypeFromCoercer(TypeManager typeManager, HiveType fromH if (toHiveType.equals(HIVE_DOUBLE)) { return Optional.of(new IntegerNumberToDoubleCoercer<>(fromType)); } + if (toType instanceof DecimalType toDecimalType) { + return Optional.of(createIntegerNumberToDecimalCoercer(fromType, toDecimalType)); + } } if (fromHiveType.equals(HIVE_SHORT)) { if (toHiveType.equals(HIVE_INT) || toHiveType.equals(HIVE_LONG)) { @@ -147,6 +152,9 @@ public static Type createTypeFromCoercer(TypeManager typeManager, HiveType fromH if (toHiveType.equals(HIVE_DOUBLE)) { return Optional.of(new IntegerNumberToDoubleCoercer<>(fromType)); } + if (toType instanceof DecimalType toDecimalType) { + return Optional.of(createIntegerNumberToDecimalCoercer(fromType, toDecimalType)); + } } if (fromHiveType.equals(HIVE_INT)) { if (toHiveType.equals(HIVE_LONG)) { @@ -155,9 +163,17 @@ public static Type createTypeFromCoercer(TypeManager typeManager, HiveType fromH if (toHiveType.equals(HIVE_DOUBLE)) { return Optional.of(new IntegerNumberToDoubleCoercer<>(fromType)); } + if (toType instanceof DecimalType toDecimalType) { + return Optional.of(createIntegerNumberToDecimalCoercer(fromType, toDecimalType)); + } } - if (fromHiveType.equals(HIVE_LONG) && toHiveType.equals(HIVE_DOUBLE)) { - return Optional.of(new IntegerNumberToDoubleCoercer<>(fromType)); + if (fromHiveType.equals(HIVE_LONG)) { + if (toHiveType.equals(HIVE_DOUBLE)) { + return Optional.of(new IntegerNumberToDoubleCoercer<>(fromType)); + } + if (toType instanceof DecimalType toDecimalType) { + return Optional.of(createIntegerNumberToDecimalCoercer(fromType, toDecimalType)); + } } if (fromHiveType.equals(HIVE_FLOAT) && toHiveType.equals(HIVE_DOUBLE)) { return Optional.of(new FloatToDoubleCoercer()); @@ -199,6 +215,9 @@ public static Type createTypeFromCoercer(TypeManager typeManager, HiveType fromH } return Optional.empty(); } + if (fromType instanceof DateType && toType instanceof VarcharType toVarcharType) { + return Optional.of(new DateToVarcharCoercer(toVarcharType)); + } if (fromType == DOUBLE && toType instanceof VarcharType toVarcharType) { return Optional.of(new DoubleToVarcharCoercer(toVarcharType, coercionContext.treatNaNAsNull())); } @@ -421,7 +440,7 @@ public Block apply(Block block) RowBlock rowBlock = (RowBlock) runLengthEncodedBlock.getValue(); RowBlock newRowBlock = RowBlock.fromNotNullSuppressedFieldBlocks( 1, - rowBlock.isNull(0) ? Optional.of(new boolean[]{true}) : Optional.empty(), + rowBlock.isNull(0) ? Optional.of(new boolean[] {true}) : Optional.empty(), coerceFields(rowBlock.getFieldBlocks())); return RunLengthEncodedBlock.create(newRowBlock, runLengthEncodedBlock.getPositionCount()); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/DateCoercer.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/DateCoercer.java index 3b9398a9e9253..752f72ac7ceb4 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/DateCoercer.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/DateCoercer.java @@ -13,16 +13,23 @@ */ package io.trino.plugin.hive.coercions; +import io.airlift.slice.Slice; import io.trino.spi.TrinoException; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.DateType; import io.trino.spi.type.VarcharType; +import java.time.DateTimeException; import java.time.LocalDate; import java.time.format.DateTimeParseException; +import static io.airlift.slice.SliceUtf8.countCodePoints; +import static io.airlift.slice.Slices.utf8Slice; import static io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_TIMESTAMP_COERCION; +import static io.trino.spi.StandardErrorCode.INVALID_ARGUMENTS; +import static io.trino.spi.type.DateType.DATE; +import static java.lang.String.format; import static java.time.format.DateTimeFormatter.ISO_LOCAL_DATE; public final class DateCoercer @@ -55,4 +62,32 @@ protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int pos } } } + + public static class DateToVarcharCoercer + extends TypeCoercer + { + public DateToVarcharCoercer(VarcharType toType) + { + super(DATE, toType); + } + + @Override + protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int position) + { + int value = fromType.getInt(block, position); + try { + if (value < START_OF_MODERN_ERA_DAYS) { + throw new TrinoException(HIVE_INVALID_TIMESTAMP_COERCION, "Coercion on historical dates is not supported"); + } + Slice converted = utf8Slice(ISO_LOCAL_DATE.format(LocalDate.ofEpochDay(value))); + if (!toType.isUnbounded() && countCodePoints(converted) > toType.getBoundedLength()) { + throw new TrinoException(INVALID_ARGUMENTS, format("Varchar representation of '%s' exceeds %s bounds", converted.toStringUtf8(), toType)); + } + toType.writeSlice(blockBuilder, converted); + } + catch (DateTimeException ignored) { + throw new IllegalArgumentException("Invalid date value: " + value + " is exceeding supported date range"); + } + } + } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/DecimalCoercers.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/DecimalCoercers.java index 5dc835cb463b6..2c91d7173681f 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/DecimalCoercers.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/DecimalCoercers.java @@ -25,6 +25,8 @@ import io.trino.spi.type.Type; import io.trino.spi.type.VarcharType; +import java.math.BigDecimal; + import static io.trino.spi.StandardErrorCode.INVALID_ARGUMENTS; import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; import static io.trino.spi.type.BigintType.BIGINT; @@ -41,6 +43,9 @@ import static io.trino.spi.type.DecimalConversions.shortToLongCast; import static io.trino.spi.type.DecimalConversions.shortToShortCast; import static io.trino.spi.type.Decimals.longTenToNth; +import static io.trino.spi.type.Decimals.overflows; +import static io.trino.spi.type.Decimals.writeBigDecimal; +import static io.trino.spi.type.Decimals.writeShortDecimal; import static io.trino.spi.type.DoubleType.DOUBLE; import static io.trino.spi.type.IntegerType.INTEGER; import static io.trino.spi.type.RealType.REAL; @@ -465,4 +470,54 @@ protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int pos realToLongDecimal(fromType.getFloat(block, position), toType.getPrecision(), toType.getScale())); } } + + public static TypeCoercer createIntegerNumberToDecimalCoercer(F fromType, DecimalType toType) + { + if (toType.isShort()) { + return new IntegerNumberToShortDecimalCoercer<>(fromType, toType); + } + return new IntegerNumberToLongDecimalCoercer<>(fromType, toType); + } + + private static class IntegerNumberToShortDecimalCoercer + extends TypeCoercer + { + public IntegerNumberToShortDecimalCoercer(F fromType, DecimalType toType) + { + super(fromType, toType); + } + + @Override + protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int position) + { + BigDecimal bigDecimal = BigDecimal.valueOf(fromType.getLong(block, position)).setScale(toType.getScale()); + if (overflows(bigDecimal, toType.getPrecision())) { + blockBuilder.appendNull(); + } + else { + writeShortDecimal(blockBuilder, bigDecimal.unscaledValue().longValueExact()); + } + } + } + + private static class IntegerNumberToLongDecimalCoercer + extends TypeCoercer + { + public IntegerNumberToLongDecimalCoercer(F fromType, DecimalType toType) + { + super(fromType, toType); + } + + @Override + protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int position) + { + BigDecimal bigDecimal = BigDecimal.valueOf(fromType.getLong(block, position)).setScale(toType.getScale()); + if (overflows(bigDecimal, toType.getPrecision())) { + blockBuilder.appendNull(); + } + else { + writeBigDecimal(toType, blockBuilder, bigDecimal); + } + } + } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/ForwardingHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/ForwardingHiveMetastore.java index 2a4c7a2701d60..702925691c174 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/ForwardingHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/ForwardingHiveMetastore.java @@ -94,15 +94,6 @@ public void updateTableStatistics( delegate.updateTableStatistics(databaseName, tableName, transaction, update); } - @Override - public void updatePartitionStatistics( - Table table, - String partitionName, - Function update) - { - delegate.updatePartitionStatistics(table, partitionName, update); - } - @Override public void updatePartitionStatistics( Table table, @@ -112,9 +103,9 @@ public void updatePartitionStatistics( } @Override - public List getAllTables(String databaseName) + public List getTables(String databaseName) { - return delegate.getAllTables(databaseName); + return delegate.getTables(databaseName); } @Override @@ -130,9 +121,9 @@ public Map getRelationTypes(String databaseName) } @Override - public Optional> getRelationTypes() + public Optional> getAllRelationTypes() { - return delegate.getRelationTypes(); + return delegate.getAllRelationTypes(); } @Override @@ -142,9 +133,9 @@ public List getTablesWithParameter(String databaseName, String parameter } @Override - public List getAllViews(String databaseName) + public List getViews(String databaseName) { - return delegate.getAllViews(databaseName); + return delegate.getViews(databaseName); } @Override @@ -472,9 +463,9 @@ public boolean functionExists(String databaseName, String functionName, String s } @Override - public Collection getFunctions(String databaseName) + public Collection getAllFunctions(String databaseName) { - return delegate.getFunctions(databaseName); + return delegate.getAllFunctions(databaseName); } @Override diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveColumnStatistics.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveColumnStatistics.java index 1469540e3fa52..1400effa9fd92 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveColumnStatistics.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveColumnStatistics.java @@ -26,6 +26,7 @@ import java.util.OptionalDouble; import java.util.OptionalLong; +import static com.google.common.base.MoreObjects.ToStringHelper; import static com.google.common.base.MoreObjects.toStringHelper; import static com.google.common.base.Preconditions.checkArgument; import static java.util.Objects.requireNonNull; @@ -174,17 +175,17 @@ public int hashCode() @Override public String toString() { - return toStringHelper(this) - .add("integerStatistics", integerStatistics) - .add("doubleStatistics", doubleStatistics) - .add("decimalStatistics", decimalStatistics) - .add("dateStatistics", dateStatistics) - .add("booleanStatistics", booleanStatistics) - .add("maxValueSizeInBytes", maxValueSizeInBytes) - .add("totalSizeInBytes", totalSizeInBytes) - .add("nullsCount", nullsCount) - .add("distinctValuesCount", distinctValuesCount) - .toString(); + ToStringHelper toStringHelper = toStringHelper(this); + integerStatistics.ifPresent(stats -> toStringHelper.add("integerStatistics", stats)); + doubleStatistics.ifPresent(stats -> toStringHelper.add("doubleStatistics", stats)); + decimalStatistics.ifPresent(stats -> toStringHelper.add("decimalStatistics", stats)); + dateStatistics.ifPresent(stats -> toStringHelper.add("dateStatistics", stats)); + booleanStatistics.ifPresent(stats -> toStringHelper.add("booleanStatistics", stats)); + maxValueSizeInBytes.ifPresent(stats -> toStringHelper.add("maxValueSizeInBytes", stats)); + totalSizeInBytes.ifPresent(stats -> toStringHelper.add("totalSizeInBytes", stats)); + nullsCount.ifPresent(stats -> toStringHelper.add("nullsCount", stats)); + distinctValuesCount.ifPresent(stats -> toStringHelper.add("distinctValuesCount", stats)); + return toStringHelper.toString(); } public static HiveColumnStatistics createIntegerColumnStatistics(OptionalLong min, OptionalLong max, OptionalLong nullsCount, OptionalLong distinctValuesCount) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastore.java index 2a821d23f29ee..5be464c024264 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastore.java @@ -13,7 +13,6 @@ */ package io.trino.plugin.hive.metastore; -import com.google.common.collect.ImmutableMap; import io.trino.hive.thrift.metastore.DataOperationType; import io.trino.plugin.hive.HiveColumnStatisticType; import io.trino.plugin.hive.HivePartition; @@ -56,14 +55,9 @@ public interface HiveMetastore void updateTableStatistics(String databaseName, String tableName, AcidTransaction transaction, Function update); - default void updatePartitionStatistics(Table table, String partitionName, Function update) - { - updatePartitionStatistics(table, ImmutableMap.of(partitionName, update)); - } - void updatePartitionStatistics(Table table, Map> updates); - List getAllTables(String databaseName); + List getTables(String databaseName); /** * @return List of tables, views and materialized views names from all schemas or Optional.empty if operation is not supported @@ -75,14 +69,14 @@ default void updatePartitionStatistics(Table table, String partitionName, Functi /** * @return empty if operation is not supported */ - Optional> getRelationTypes(); + Optional> getAllRelationTypes(); List getTablesWithParameter(String databaseName, String parameterKey, String parameterValue); /** * Lists views and materialized views from given database. */ - List getAllViews(String databaseName); + List getViews(String databaseName); /** * @return List of views including materialized views names from all schemas or Optional.empty if operation is not supported @@ -244,7 +238,7 @@ default void alterTransactionalTable(Table table, long transactionId, long write boolean functionExists(String databaseName, String functionName, String signatureToken); - Collection getFunctions(String databaseName); + Collection getAllFunctions(String databaseName); Collection getFunctions(String databaseName, String functionName); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java index cabc1de51b45a..d7943790883da 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java @@ -240,7 +240,7 @@ public synchronized List getAllTables(String databaseName) if (!tableActions.isEmpty()) { throw new UnsupportedOperationException("Listing all tables after adding/dropping/altering tables/views in a transaction is not supported"); } - return delegate.getAllTables(databaseName); + return delegate.getTables(databaseName); } public synchronized Optional> getAllTables() @@ -267,7 +267,7 @@ public synchronized Optional> getRelationType if (!tableActions.isEmpty()) { throw new UnsupportedOperationException("Listing all relations after adding/dropping/altering tables/views in a transaction is not supported"); } - return delegate.getRelationTypes(); + return delegate.getAllRelationTypes(); } public synchronized Optional getTable(String databaseName, String tableName) @@ -441,7 +441,7 @@ public synchronized List getAllViews(String databaseName) if (!tableActions.isEmpty()) { throw new UnsupportedOperationException("Listing all tables after adding/dropping/altering tables/views in a transaction is not supported"); } - return delegate.getAllViews(databaseName); + return delegate.getViews(databaseName); } public synchronized Optional> getAllViews() @@ -1236,7 +1236,7 @@ public synchronized boolean functionExists(SchemaFunctionName name, String signa public synchronized Collection getFunctions(String schemaName) { checkReadable(); - return delegate.getFunctions(schemaName); + return delegate.getAllFunctions(schemaName); } public synchronized Collection getFunctions(SchemaFunctionName name) @@ -3365,7 +3365,7 @@ public UpdateStatisticsOperation(SchemaTableName tableName, Optional par public void run(HiveMetastoreClosure metastore, AcidTransaction transaction) { if (partitionName.isPresent()) { - metastore.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), partitionName.get(), this::updateStatistics); + metastore.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableMap.of(partitionName.get(), this::updateStatistics)); } else { metastore.updateTableStatistics(tableName.getSchemaName(), tableName.getTableName(), transaction, this::updateStatistics); @@ -3379,7 +3379,7 @@ public void undo(HiveMetastoreClosure metastore, AcidTransaction transaction) return; } if (partitionName.isPresent()) { - metastore.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), partitionName.get(), this::resetStatistics); + metastore.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableMap.of(partitionName.get(), this::resetStatistics)); } else { metastore.updateTableStatistics(tableName.getSchemaName(), tableName.getTableName(), transaction, this::resetStatistics); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java index 595e4f684e850..ef964cb584d9f 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java @@ -59,6 +59,7 @@ import org.weakref.jmx.Managed; import org.weakref.jmx.Nested; +import java.util.AbstractMap; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -473,7 +474,9 @@ public PartitionStatistics getTableStatistics(Table table) Table tableWithOnlyMissingColumns = table.withSelectedDataColumnsOnly(missingColumns); return delegate.getTableStatistics(tableWithOnlyMissingColumns); }, - CachingHiveMetastore::mergePartitionColumnStatistics); + (currentStats, newStats) -> mergePartitionColumnStatistics(currentStats, newStats, dataColumns)) + // HiveColumnStatistics.empty() are removed to make output consistent with non-cached metastore which simplifies testing + .withEmptyColumnStatisticsRemoved(); } /** @@ -494,23 +497,31 @@ public Map getPartitionStatistics(Table table, List partitionsByName.keySet(), missingPartitions -> loadPartitionsColumnStatistics(table, partitionsByName, missingPartitions), currentStats -> currentStats.getColumnStatistics().keySet().containsAll(dataColumns), - CachingHiveMetastore::mergePartitionColumnStatistics); + (currentStats, newStats) -> mergePartitionColumnStatistics(currentStats, newStats, dataColumns)); return statistics.entrySet().stream() - .collect(toImmutableMap(entry -> entry.getKey().getPartitionName().orElseThrow(), Entry::getValue)); + .collect(toImmutableMap( + entry -> entry.getKey().getPartitionName().orElseThrow(), + // HiveColumnStatistics.empty() are removed to make output consistent with non-cached metastore which simplifies testing + entry -> entry.getValue().withEmptyColumnStatisticsRemoved())); } - private static PartitionStatistics mergePartitionColumnStatistics(PartitionStatistics currentStats, PartitionStatistics newStats) + private PartitionStatistics mergePartitionColumnStatistics(PartitionStatistics currentStats, PartitionStatistics newStats, Set dataColumns) { requireNonNull(newStats, "newStats is null"); - if (currentStats == null) { - return newStats; + ImmutableMap.Builder columnStatisticsBuilder = ImmutableMap.builder(); + // Populate empty statistics for all requested columns to cache absence of column statistics for future requests. + if (cacheMissing) { + columnStatisticsBuilder.putAll(Iterables.transform( + dataColumns, + column -> new AbstractMap.SimpleEntry<>(column, HiveColumnStatistics.empty()))); } + if (currentStats != null) { + columnStatisticsBuilder.putAll(currentStats.getColumnStatistics()); + } + columnStatisticsBuilder.putAll(newStats.getColumnStatistics()); return new PartitionStatistics( newStats.getBasicStatistics(), - ImmutableMap.builder() - .putAll(currentStats.getColumnStatistics()) - .putAll(newStats.getColumnStatistics()) - .buildKeepingLast()); + columnStatisticsBuilder.buildKeepingLast()); } private Map loadPartitionsColumnStatistics(Table table, Map partitionsByName, Collection partitionNamesToLoad) @@ -547,20 +558,6 @@ public void updateTableStatistics(String databaseName, } } - @Override - public void updatePartitionStatistics(Table table, String partitionName, Function update) - { - try { - delegate.updatePartitionStatistics(table, partitionName, update); - } - finally { - HivePartitionName hivePartitionName = hivePartitionName(hiveTableName(table.getDatabaseName(), table.getTableName()), partitionName); - partitionStatisticsCache.invalidate(hivePartitionName); - // basic stats are stored as partition properties - partitionCache.invalidate(hivePartitionName); - } - } - @Override public void updatePartitionStatistics(Table table, Map> updates) { @@ -568,7 +565,7 @@ public void updatePartitionStatistics(Table table, Map { + updates.keySet().forEach(partitionName -> { HivePartitionName hivePartitionName = hivePartitionName(hiveTableName(table.getDatabaseName(), table.getTableName()), partitionName); partitionStatisticsCache.invalidate(hivePartitionName); // basic stats are stored as partition properties @@ -578,7 +575,7 @@ public void updatePartitionStatistics(Table table, Map getAllTables(String databaseName) + public List getTables(String databaseName) { Map relationTypes = relationTypesCache.getIfPresent(databaseName); if (relationTypes != null) { @@ -589,7 +586,7 @@ public List getAllTables(String databaseName) private List loadAllTables(String databaseName) { - return delegate.getAllTables(databaseName); + return delegate.getTables(databaseName); } @Override @@ -619,14 +616,14 @@ private Map loadRelationTypes(String databaseName) } @Override - public Optional> getRelationTypes() + public Optional> getAllRelationTypes() { return getOptional(allRelationTypesCache, SingletonCacheKey.INSTANCE); } private Optional> loadRelationTypes() { - return delegate.getRelationTypes(); + return delegate.getAllRelationTypes(); } @Override @@ -642,14 +639,14 @@ private List loadTablesMatchingParameter(TablesWithParameterCacheKey key } @Override - public List getAllViews(String databaseName) + public List getViews(String databaseName) { return get(viewNamesCache, databaseName); } private List loadAllViews(String databaseName) { - return delegate.getAllViews(databaseName); + return delegate.getViews(databaseName); } @Override @@ -1193,9 +1190,9 @@ public boolean functionExists(String databaseName, String functionName, String s } @Override - public Collection getFunctions(String databaseName) + public Collection getAllFunctions(String databaseName) { - return delegate.getFunctions(databaseName); + return delegate.getAllFunctions(databaseName); } @Override diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastore.java index 95d195032f728..bdcd2f1b4ac65 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastore.java @@ -89,6 +89,7 @@ import java.util.stream.Collectors; import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Verify.verify; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableMap.toImmutableMap; import static com.google.common.collect.ImmutableSet.toImmutableSet; @@ -227,7 +228,7 @@ public synchronized void dropDatabase(String databaseName, boolean deleteData) databaseName = databaseName.toLowerCase(ENGLISH); getRequiredDatabase(databaseName); - if (!getAllTables(databaseName).isEmpty()) { + if (!getTables(databaseName).isEmpty()) { throw new TrinoException(HIVE_METASTORE_ERROR, "Database " + databaseName + " is not empty"); } @@ -324,6 +325,7 @@ public synchronized List getAllDatabases() String prefix = catalogDirectory.toString(); Set databases = new HashSet<>(); + // TODO this lists files recursively and may fail if e.g. table data being modified by other threads/processes FileIterator iterator = fileSystem.listFiles(catalogDirectory); while (iterator.hasNext()) { Location location = iterator.next().location(); @@ -532,7 +534,7 @@ public synchronized void updatePartitionStatistics(Table table, Map getAllTables(String databaseName) + public synchronized List getTables(String databaseName) { return listAllTables(databaseName).stream() .filter(hideDeltaLakeTables @@ -551,13 +553,13 @@ public Optional> getAllTables() public synchronized Map getRelationTypes(String databaseName) { ImmutableMap.Builder relationTypes = ImmutableMap.builder(); - getAllTables(databaseName).forEach(name -> relationTypes.put(name, RelationType.TABLE)); - getAllViews(databaseName).forEach(name -> relationTypes.put(name, RelationType.VIEW)); + getTables(databaseName).forEach(name -> relationTypes.put(name, RelationType.TABLE)); + getViews(databaseName).forEach(name -> relationTypes.put(name, RelationType.VIEW)); return relationTypes.buildKeepingLast(); } @Override - public Optional> getRelationTypes() + public Optional> getAllRelationTypes() { return Optional.empty(); } @@ -598,24 +600,16 @@ private List doListAllTables(String databaseName) Location metadataDirectory = getDatabaseMetadataDirectory(databaseName); try { String prefix = metadataDirectory.toString(); + if (!prefix.endsWith("/")) { + prefix += "/"; + } Set tables = new HashSet<>(); - FileIterator iterator = fileSystem.listFiles(metadataDirectory); - while (iterator.hasNext()) { - Location location = iterator.next().location(); - - String child = location.toString().substring(prefix.length()); - if (child.startsWith("/")) { - child = child.substring(1); - } - - if (child.startsWith(".") || (child.indexOf('/') != child.lastIndexOf('/'))) { - continue; - } - - int length = child.length() - TRINO_SCHEMA_FILE_NAME_SUFFIX.length() - 1; - if ((length >= 1) && child.endsWith("/" + TRINO_SCHEMA_FILE_NAME_SUFFIX)) { - tables.add(child.substring(0, length)); + for (Location subdirectory : fileSystem.listDirectories(metadataDirectory)) { + String locationString = subdirectory.toString(); + verify(locationString.startsWith(prefix) && locationString.endsWith("/"), "Unexpected subdirectory %s when listing %s", subdirectory, metadataDirectory); + if (fileSystem.newInputFile(subdirectory.appendPath(TRINO_SCHEMA_FILE_NAME_SUFFIX)).exists()) { + tables.add(locationString.substring(prefix.length(), locationString.length() - 1)); } } @@ -627,9 +621,9 @@ private List doListAllTables(String databaseName) } @Override - public synchronized List getAllViews(String databaseName) + public synchronized List getViews(String databaseName) { - return getAllTables(databaseName).stream() + return getTables(databaseName).stream() .map(tableName -> getTable(databaseName, tableName)) .filter(Optional::isPresent) .map(Optional::get) @@ -1271,7 +1265,7 @@ public synchronized boolean functionExists(String databaseName, String functionN } @Override - public synchronized Collection getFunctions(String databaseName) + public synchronized Collection getAllFunctions(String databaseName) { return getFunctions(databaseName, Optional.empty()); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastoreFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastoreFactory.java index 6c74be568e154..27a028706aa21 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastoreFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastoreFactory.java @@ -14,11 +14,13 @@ package io.trino.plugin.hive.metastore.file; import com.google.inject.Inject; +import io.opentelemetry.api.trace.Tracer; import io.trino.filesystem.TrinoFileSystemFactory; import io.trino.plugin.hive.HideDeltaLakeTables; import io.trino.plugin.hive.NodeVersion; import io.trino.plugin.hive.metastore.HiveMetastore; import io.trino.plugin.hive.metastore.HiveMetastoreFactory; +import io.trino.plugin.hive.metastore.tracing.TracingHiveMetastore; import io.trino.spi.security.ConnectorIdentity; import java.util.Optional; @@ -26,13 +28,19 @@ public class FileHiveMetastoreFactory implements HiveMetastoreFactory { - private final FileHiveMetastore metastore; + private final HiveMetastore metastore; @Inject - public FileHiveMetastoreFactory(NodeVersion nodeVersion, TrinoFileSystemFactory fileSystemFactory, @HideDeltaLakeTables boolean hideDeltaLakeTables, FileHiveMetastoreConfig config) + public FileHiveMetastoreFactory( + NodeVersion nodeVersion, + TrinoFileSystemFactory fileSystemFactory, + @HideDeltaLakeTables boolean hideDeltaLakeTables, + FileHiveMetastoreConfig config, + Tracer tracer) { // file metastore does not support impersonation, so just create a single shared instance - metastore = new FileHiveMetastore(nodeVersion, fileSystemFactory, hideDeltaLakeTables, config); + metastore = new TracingHiveMetastore(tracer, + new FileHiveMetastore(nodeVersion, fileSystemFactory, hideDeltaLakeTables, config)); } @Override diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java index e1efe5654fcd5..e25d21898776e 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java @@ -185,6 +185,9 @@ public class GlueHiveMetastore private static final int BATCH_CREATE_PARTITION_MAX_PAGE_SIZE = 100; private static final int BATCH_UPDATE_PARTITION_MAX_PAGE_SIZE = 100; private static final int AWS_GLUE_GET_PARTITIONS_MAX_RESULTS = 1000; + private static final int AWS_GLUE_GET_DATABASES_MAX_RESULTS = 100; + private static final int AWS_GLUE_GET_FUNCTIONS_MAX_RESULTS = 100; + private static final int AWS_GLUE_GET_TABLES_MAX_RESULTS = 100; private static final Comparator> PARTITION_VALUE_COMPARATOR = lexicographical(String.CASE_INSENSITIVE_ORDER); private static final Predicate SOME_KIND_OF_VIEW_FILTER = table -> VIRTUAL_VIEW.name().equals(getTableTypeNullable(table)); private static final RetryPolicy CONCURRENT_MODIFICATION_EXCEPTION_RETRY_POLICY = RetryPolicy.builder() @@ -253,7 +256,8 @@ public List getAllDatabases() try { List databaseNames = getPaginatedResults( glueClient::getDatabases, - new GetDatabasesRequest(), + new GetDatabasesRequest() + .withMaxResults(AWS_GLUE_GET_DATABASES_MAX_RESULTS), GetDatabasesRequest::setNextToken, GetDatabasesResult::getNextToken, stats.getGetDatabases()) @@ -424,7 +428,7 @@ private void updatePartitionStatisticsBatch(Table table, Map getAllTables(String databaseName) + public List getTables(String databaseName) { return getTableNames(databaseName, tableFilter); } @@ -461,7 +465,7 @@ public Map getRelationTypes(String databaseName) } @Override - public Optional> getRelationTypes() + public Optional> getAllRelationTypes() { return Optional.empty(); } @@ -473,7 +477,7 @@ public List getTablesWithParameter(String databaseName, String parameter } @Override - public List getAllViews(String databaseName) + public List getViews(String databaseName) { return getTableNames(databaseName, SOME_KIND_OF_VIEW_FILTER); } @@ -1273,7 +1277,7 @@ public boolean functionExists(String databaseName, String functionName, String s } @Override - public Collection getFunctions(String databaseName) + public Collection getAllFunctions(String databaseName) { return getFunctionsByPattern(databaseName, "trino__.*"); } @@ -1291,7 +1295,8 @@ private Collection getFunctionsByPattern(String databaseName, glueClient::getUserDefinedFunctions, new GetUserDefinedFunctionsRequest() .withDatabaseName(databaseName) - .withPattern(functionNamePattern), + .withPattern(functionNamePattern) + .withMaxResults(AWS_GLUE_GET_FUNCTIONS_MAX_RESULTS), GetUserDefinedFunctionsRequest::setNextToken, GetUserDefinedFunctionsResult::getNextToken, stats.getGetUserDefinedFunctions()) @@ -1370,7 +1375,8 @@ private Stream getGlueTables(String dat return getPaginatedResults( glueClient::getTables, new GetTablesRequest() - .withDatabaseName(databaseName), + .withDatabaseName(databaseName) + .withMaxResults(AWS_GLUE_GET_TABLES_MAX_RESULTS), GetTablesRequest::setNextToken, GetTablesResult::getNextToken, stats.getGetTables()) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastoreFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastoreFactory.java index 9fb979be961a7..6e37d604852dc 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastoreFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastoreFactory.java @@ -14,33 +14,24 @@ package io.trino.plugin.hive.metastore.glue; import com.google.inject.Inject; +import io.opentelemetry.api.trace.Tracer; import io.trino.plugin.hive.metastore.HiveMetastore; import io.trino.plugin.hive.metastore.HiveMetastoreFactory; +import io.trino.plugin.hive.metastore.tracing.TracingHiveMetastore; import io.trino.spi.security.ConnectorIdentity; -import org.weakref.jmx.Flatten; -import org.weakref.jmx.Managed; import java.util.Optional; -import static java.util.Objects.requireNonNull; - public class GlueHiveMetastoreFactory implements HiveMetastoreFactory { - private final GlueHiveMetastore metastore; + private final HiveMetastore metastore; // Glue metastore does not support impersonation, so just use single shared instance @Inject - public GlueHiveMetastoreFactory(GlueHiveMetastore metastore) + public GlueHiveMetastoreFactory(GlueHiveMetastore metastore, Tracer tracer) { - this.metastore = requireNonNull(metastore, "metastore is null"); - } - - @Flatten - @Managed - public GlueHiveMetastore getMetastore() - { - return metastore; + this.metastore = new TracingHiveMetastore(tracer, metastore); } @Override diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueMetastoreModule.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueMetastoreModule.java index ea4e4cb1e6da2..cb40ea0b3be18 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueMetastoreModule.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueMetastoreModule.java @@ -69,7 +69,7 @@ protected void setup(Binder binder) binder.bind(GlueHiveMetastoreFactory.class).in(Scopes.SINGLETON); binder.bind(Key.get(GlueMetastoreStats.class, ForGlueHiveMetastore.class)).toInstance(new GlueMetastoreStats()); binder.bind(AWSGlueAsync.class).toProvider(HiveGlueClientProvider.class).in(Scopes.SINGLETON); - newExporter(binder).export(GlueHiveMetastoreFactory.class).as(generator -> generator.generatedNameOf(GlueHiveMetastore.class)); + newExporter(binder).export(GlueHiveMetastore.class).withGeneratedName(); binder.bind(Key.get(boolean.class, AllowHiveTableRename.class)).toInstance(false); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/converter/GlueToTrinoConverter.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/converter/GlueToTrinoConverter.java index 0e0f9b8707d1f..68d3a6a145902 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/converter/GlueToTrinoConverter.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/converter/GlueToTrinoConverter.java @@ -162,9 +162,9 @@ public static Table convertTable(com.amazonaws.services.glue.model.Table glueTab throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, "Table StorageDescriptor is null for table '%s' %s".formatted(table, glueTable)); } boolean isCsv = sd.getSerdeInfo() != null && HiveStorageFormat.CSV.getSerde().equals(sd.getSerdeInfo().getSerializationLibrary()); - tableBuilder.setDataColumns(convertColumns(table, sd.getColumns(), isCsv)); + tableBuilder.setDataColumns(convertColumns(table, sd.getColumns(), ColumnType.DATA, isCsv)); if (glueTable.getPartitionKeys() != null) { - tableBuilder.setPartitionColumns(convertColumns(table, glueTable.getPartitionKeys(), isCsv)); + tableBuilder.setPartitionColumns(convertColumns(table, glueTable.getPartitionKeys(), ColumnType.PARTITION, isCsv)); } else { tableBuilder.setPartitionColumns(ImmutableList.of()); @@ -176,11 +176,11 @@ public static Table convertTable(com.amazonaws.services.glue.model.Table glueTab return tableBuilder.build(); } - private static Column convertColumn(SchemaTableName table, com.amazonaws.services.glue.model.Column glueColumn, boolean isCsv) + private static Column convertColumn(SchemaTableName table, com.amazonaws.services.glue.model.Column glueColumn, ColumnType columnType, boolean isCsv) { // OpenCSVSerde deserializes columns from csv file into strings, so we set the column type from the metastore // to string to avoid cast exceptions. - if (isCsv) { + if (columnType == ColumnType.DATA && isCsv) { //TODO(https://github.com/trinodb/trino/issues/7240) Add tests return new Column(glueColumn.getName(), HiveType.HIVE_STRING, Optional.ofNullable(glueColumn.getComment()), getColumnParameters(glueColumn)); } @@ -197,9 +197,9 @@ private static HiveType convertType(SchemaTableName table, com.amazonaws.service } } - private static List convertColumns(SchemaTableName table, List glueColumns, boolean isCsv) + private static List convertColumns(SchemaTableName table, List glueColumns, ColumnType columnType, boolean isCsv) { - return mappedCopy(glueColumns, glueColumn -> convertColumn(table, glueColumn, isCsv)); + return mappedCopy(glueColumns, glueColumn -> convertColumn(table, glueColumn, columnType, isCsv)); } private static Function, Map> parametersConverter() @@ -215,7 +215,7 @@ private static boolean isNullOrEmpty(List list) public static final class GluePartitionConverter implements Function { - private final BiFunction, Boolean, List> columnsConverter; + private final BiFunction, Boolean, List> dataColumnsConverter; private final Function, Map> parametersConverter = parametersConverter(); private final StorageConverter storageConverter = new StorageConverter(); private final String databaseName; @@ -228,7 +228,7 @@ public GluePartitionConverter(Table table) this.databaseName = requireNonNull(table.getDatabaseName(), "databaseName is null"); this.tableName = requireNonNull(table.getTableName(), "tableName is null"); this.tableParameters = table.getParameters(); - this.columnsConverter = memoizeLast((glueColumns, isCsv) -> convertColumns(table.getSchemaTableName(), glueColumns, isCsv)); + this.dataColumnsConverter = memoizeLast((glueColumns, isCsv) -> convertColumns(table.getSchemaTableName(), glueColumns, ColumnType.DATA, isCsv)); } @Override @@ -248,7 +248,7 @@ public Partition apply(com.amazonaws.services.glue.model.Partition gluePartition .setDatabaseName(databaseName) .setTableName(tableName) .setValues(gluePartition.getValues()) // No memoization benefit - .setColumns(columnsConverter.apply(sd.getColumns(), isCsv)) + .setColumns(dataColumnsConverter.apply(sd.getColumns(), isCsv)) .setParameters(parametersConverter.apply(getPartitionParameters(gluePartition))); storageConverter.setStorageBuilder(sd, partitionBuilder.getStorageBuilder(), tableParameters); @@ -347,4 +347,10 @@ public static List mappedCopy(List list, Function mapper) } return builder.build(); } + + private enum ColumnType + { + DATA, + PARTITION, + } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java index 5cd62ce4babf1..443aa3c04ac9c 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java @@ -145,7 +145,7 @@ public void updatePartitionStatistics(Table table, Map getAllTables(String databaseName) + public List getTables(String databaseName) { return delegate.getAllTables(databaseName); } @@ -154,8 +154,8 @@ public List getAllTables(String databaseName) public Map getRelationTypes(String databaseName) { ImmutableMap.Builder relationTypes = ImmutableMap.builder(); - getAllTables(databaseName).forEach(name -> relationTypes.put(name, RelationType.TABLE)); - getAllViews(databaseName).forEach(name -> relationTypes.put(name, RelationType.VIEW)); + getTables(databaseName).forEach(name -> relationTypes.put(name, RelationType.TABLE)); + getViews(databaseName).forEach(name -> relationTypes.put(name, RelationType.VIEW)); return relationTypes.buildKeepingLast(); } @@ -166,7 +166,7 @@ public List getTablesWithParameter(String databaseName, String parameter } @Override - public List getAllViews(String databaseName) + public List getViews(String databaseName) { return delegate.getAllViews(databaseName); } @@ -178,7 +178,7 @@ public Optional> getAllTables() } @Override - public Optional> getRelationTypes() + public Optional> getAllRelationTypes() { return getAllTables().flatMap(relations -> getAllViews().map(views -> { ImmutableMap.Builder relationTypes = ImmutableMap.builder(); @@ -611,7 +611,7 @@ public boolean functionExists(String databaseName, String functionName, String s } @Override - public Collection getFunctions(String databaseName) + public Collection getAllFunctions(String databaseName) { return getFunctionsByPattern(databaseName, "trino__*"); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastoreFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastoreFactory.java index 140abcd519e94..1dc2ff2c5a51a 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastoreFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastoreFactory.java @@ -14,8 +14,10 @@ package io.trino.plugin.hive.metastore.thrift; import com.google.inject.Inject; +import io.opentelemetry.api.trace.Tracer; import io.trino.plugin.hive.metastore.HiveMetastore; import io.trino.plugin.hive.metastore.HiveMetastoreFactory; +import io.trino.plugin.hive.metastore.tracing.TracingHiveMetastore; import io.trino.spi.security.ConnectorIdentity; import java.util.Optional; @@ -26,11 +28,13 @@ public class BridgingHiveMetastoreFactory implements HiveMetastoreFactory { private final ThriftMetastoreFactory thriftMetastoreFactory; + private final Tracer tracer; @Inject - public BridgingHiveMetastoreFactory(ThriftMetastoreFactory thriftMetastoreFactory) + public BridgingHiveMetastoreFactory(ThriftMetastoreFactory thriftMetastoreFactory, Tracer tracer) { this.thriftMetastoreFactory = requireNonNull(thriftMetastoreFactory, "thriftMetastore is null"); + this.tracer = requireNonNull(tracer, "tracer is null"); } @Override @@ -42,6 +46,7 @@ public boolean isImpersonationEnabled() @Override public HiveMetastore createMetastore(Optional identity) { - return new BridgingHiveMetastore(thriftMetastoreFactory.createMetastore(identity)); + return new TracingHiveMetastore(tracer, + new BridgingHiveMetastore(thriftMetastoreFactory.createMetastore(identity))); } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java index 0ae82fd221828..8f08137cf14d9 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java @@ -103,6 +103,7 @@ import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Function; import java.util.stream.Stream; @@ -144,7 +145,7 @@ import static java.util.Objects.requireNonNull; @ThreadSafe -public class ThriftHiveMetastore +public final class ThriftHiveMetastore implements ThriftMetastore { private static final Logger log = Logger.get(ThriftHiveMetastore.class); @@ -1062,13 +1063,27 @@ public void createTable(Table table) @Override public void dropTable(String databaseName, String tableName, boolean deleteData) { + AtomicInteger attemptCount = new AtomicInteger(); try { retry() .stopOn(NoSuchObjectException.class) .stopOnIllegalExceptions() .run("dropTable", stats.getDropTable().wrap(() -> { try (ThriftMetastoreClient client = createMetastoreClient()) { - Table table = client.getTable(databaseName, tableName); + attemptCount.incrementAndGet(); + Table table; + try { + table = client.getTable(databaseName, tableName); + } + catch (NoSuchObjectException e) { + if (attemptCount.get() == 1) { + // Throw exception only on first attempt. + throw e; + } + // If table is not found on consecutive attempts it was probably dropped on first attempt and timeout occurred. + // Exception in such case can be safely ignored and dropping table is finished. + return null; + } client.dropTable(databaseName, tableName, deleteData); String tableLocation = table.getSd().getLocation(); if (deleteFilesOnDrop && deleteData && isManagedTable(table) && !isNullOrEmpty(tableLocation)) { diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastore.java index 03043b64effae..4d8456d1ef62f 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastore.java @@ -46,7 +46,8 @@ import static io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA; import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; -public interface ThriftMetastore +public sealed interface ThriftMetastore + permits ThriftHiveMetastore { void createDatabase(Database database); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/tracing/TracingHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/tracing/TracingHiveMetastore.java index c5d9fb8b95b8f..0a6a57306f49d 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/tracing/TracingHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/tracing/TracingHiveMetastore.java @@ -148,17 +148,6 @@ public void updateTableStatistics(String databaseName, String tableName, AcidTra withTracing(span, () -> delegate.updateTableStatistics(databaseName, tableName, transaction, update)); } - @Override - public void updatePartitionStatistics(Table table, String partitionName, Function update) - { - Span span = tracer.spanBuilder("HiveMetastore.updatePartitionStatistics") - .setAttribute(SCHEMA, table.getDatabaseName()) - .setAttribute(TABLE, table.getTableName()) - .setAttribute(PARTITION, partitionName) - .startSpan(); - withTracing(span, () -> delegate.updatePartitionStatistics(table, partitionName, update)); - } - @Override public void updatePartitionStatistics(Table table, Map> updates) { @@ -170,13 +159,13 @@ public void updatePartitionStatistics(Table table, Map getAllTables(String databaseName) + public List getTables(String databaseName) { - Span span = tracer.spanBuilder("HiveMetastore.getAllTables") + Span span = tracer.spanBuilder("HiveMetastore.getTables") .setAttribute(SCHEMA, databaseName) .startSpan(); return withTracing(span, () -> { - List tables = delegate.getAllTables(databaseName); + List tables = delegate.getTables(databaseName); span.setAttribute(TABLE_RESPONSE_COUNT, tables.size()); return tables; }); @@ -208,12 +197,12 @@ public Map getRelationTypes(String databaseName) } @Override - public Optional> getRelationTypes() + public Optional> getAllRelationTypes() { - Span span = tracer.spanBuilder("HiveMetastore.getRelations") + Span span = tracer.spanBuilder("HiveMetastore.getAllRelationTypes") .startSpan(); return withTracing(span, () -> { - Optional> relationTypes = delegate.getRelationTypes(); + Optional> relationTypes = delegate.getAllRelationTypes(); relationTypes.ifPresent(map -> span.setAttribute(TABLE_RESPONSE_COUNT, map.size())); return relationTypes; }); @@ -234,13 +223,13 @@ public List getTablesWithParameter(String databaseName, String parameter } @Override - public List getAllViews(String databaseName) + public List getViews(String databaseName) { - Span span = tracer.spanBuilder("HiveMetastore.getAllViews") + Span span = tracer.spanBuilder("HiveMetastore.getViews") .setAttribute(SCHEMA, databaseName) .startSpan(); return withTracing(span, () -> { - List views = delegate.getAllViews(databaseName); + List views = delegate.getViews(databaseName); span.setAttribute(TABLE_RESPONSE_COUNT, views.size()); return views; }); @@ -702,13 +691,13 @@ public boolean functionExists(String databaseName, String functionName, String s } @Override - public Collection getFunctions(String databaseName) + public Collection getAllFunctions(String databaseName) { - Span span = tracer.spanBuilder("HiveMetastore.getFunctions") + Span span = tracer.spanBuilder("HiveMetastore.getAllFunctions") .setAttribute(SCHEMA, databaseName) .startSpan(); return withTracing(span, () -> { - Collection functions = delegate.getFunctions(databaseName); + Collection functions = delegate.getAllFunctions(databaseName); span.setAttribute(FUNCTION_RESPONSE_COUNT, functions.size()); return functions; }); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java index 00f050fa97b46..97b117b7645e6 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java @@ -15,6 +15,7 @@ import io.trino.orc.metadata.OrcType.OrcTypeKind; import io.trino.plugin.hive.coercions.BooleanCoercer.BooleanToVarcharCoercer; +import io.trino.plugin.hive.coercions.DateCoercer.DateToVarcharCoercer; import io.trino.plugin.hive.coercions.DateCoercer.VarcharToDateCoercer; import io.trino.plugin.hive.coercions.DoubleToVarcharCoercer; import io.trino.plugin.hive.coercions.IntegerNumberToDoubleCoercer; @@ -25,6 +26,7 @@ import io.trino.plugin.hive.coercions.TypeCoercer; import io.trino.plugin.hive.coercions.VarcharToDoubleCoercer; import io.trino.spi.type.DateType; +import io.trino.spi.type.DecimalType; import io.trino.spi.type.DoubleType; import io.trino.spi.type.TimestampType; import io.trino.spi.type.Type; @@ -34,6 +36,7 @@ import static io.trino.orc.metadata.OrcType.OrcTypeKind.BOOLEAN; import static io.trino.orc.metadata.OrcType.OrcTypeKind.BYTE; +import static io.trino.orc.metadata.OrcType.OrcTypeKind.DATE; import static io.trino.orc.metadata.OrcType.OrcTypeKind.DOUBLE; import static io.trino.orc.metadata.OrcType.OrcTypeKind.INT; import static io.trino.orc.metadata.OrcType.OrcTypeKind.LONG; @@ -41,6 +44,7 @@ import static io.trino.orc.metadata.OrcType.OrcTypeKind.STRING; import static io.trino.orc.metadata.OrcType.OrcTypeKind.TIMESTAMP; import static io.trino.orc.metadata.OrcType.OrcTypeKind.VARCHAR; +import static io.trino.plugin.hive.coercions.DecimalCoercers.createIntegerNumberToDecimalCoercer; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.IntegerType.INTEGER; import static io.trino.spi.type.SmallintType.SMALLINT; @@ -63,6 +67,9 @@ private OrcTypeTranslator() {} } return Optional.empty(); } + if (fromOrcType == DATE && toTrinoType instanceof VarcharType varcharType) { + return Optional.of(new DateToVarcharCoercer(varcharType)); + } if (isVarcharType(fromOrcType)) { if (toTrinoType instanceof TimestampType timestampType) { if (timestampType.isShort()) { @@ -98,6 +105,20 @@ private OrcTypeTranslator() {} return Optional.of(new IntegerNumberToDoubleCoercer<>(BIGINT)); } } + if (toTrinoType instanceof DecimalType decimalType) { + if (fromOrcType == BYTE) { + return Optional.of(createIntegerNumberToDecimalCoercer(TINYINT, decimalType)); + } + if (fromOrcType == SHORT) { + return Optional.of(createIntegerNumberToDecimalCoercer(SMALLINT, decimalType)); + } + if (fromOrcType == INT) { + return Optional.of(createIntegerNumberToDecimalCoercer(INTEGER, decimalType)); + } + if (fromOrcType == LONG) { + return Optional.of(createIntegerNumberToDecimalCoercer(BIGINT, decimalType)); + } + } return Optional.empty(); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetReaderConfig.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetReaderConfig.java index b4b1841f6e8e8..47d06429226e2 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetReaderConfig.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetReaderConfig.java @@ -37,13 +37,11 @@ public class ParquetReaderConfig private ParquetReaderOptions options = new ParquetReaderOptions(); - @Deprecated public boolean isIgnoreStatistics() { return options.isIgnoreStatistics(); } - @Deprecated @Config("parquet.ignore-statistics") @ConfigDescription("Ignore statistics from Parquet to allow querying files with corrupted or incorrect statistics") public ParquetReaderConfig setIgnoreStatistics(boolean ignoreStatistics) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/procedure/DropStatsProcedure.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/procedure/DropStatsProcedure.java index be2e8e2954412..b430db94f02d2 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/procedure/DropStatsProcedure.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/procedure/DropStatsProcedure.java @@ -14,6 +14,7 @@ package io.trino.plugin.hive.procedure; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.inject.Inject; import com.google.inject.Provider; import io.trino.plugin.base.util.UncheckedCloseable; @@ -128,8 +129,9 @@ private void doDropStats(ConnectorSession session, ConnectorAccessControl access partitionStringValues.forEach(values -> metastore.updatePartitionStatistics( schema, table, - makePartName(partitionColumns, values), - stats -> PartitionStatistics.empty())); + ImmutableMap.of( + makePartName(partitionColumns, values), + stats -> PartitionStatistics.empty()))); } else { // no partition specified, so drop stats for the entire table @@ -147,8 +149,9 @@ private void doDropStats(ConnectorSession session, ConnectorAccessControl access .ifPresent(partitions -> partitions.forEach(partitionName -> metastore.updatePartitionStatistics( schema, table, - partitionName, - stats -> PartitionStatistics.empty()))); + ImmutableMap.of( + partitionName, + stats -> PartitionStatistics.empty())))); } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveCoercionPolicy.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveCoercionPolicy.java index d424fcda56eeb..0af514ae19a7f 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveCoercionPolicy.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveCoercionPolicy.java @@ -81,22 +81,23 @@ private boolean canCoerce(HiveType fromHiveType, HiveType toHiveType, HiveTimest fromHiveType.equals(HIVE_LONG) || fromHiveType.equals(HIVE_TIMESTAMP) || fromHiveType.equals(HIVE_DOUBLE) || + fromHiveType.equals(HIVE_DATE) || fromType instanceof DecimalType; } if (toHiveType.equals(HIVE_DATE)) { return fromHiveType.equals(HIVE_TIMESTAMP); } if (fromHiveType.equals(HIVE_BYTE)) { - return toHiveType.equals(HIVE_SHORT) || toHiveType.equals(HIVE_INT) || toHiveType.equals(HIVE_LONG) || toHiveType.equals(HIVE_DOUBLE); + return toHiveType.equals(HIVE_SHORT) || toHiveType.equals(HIVE_INT) || toHiveType.equals(HIVE_LONG) || toHiveType.equals(HIVE_DOUBLE) || toType instanceof DecimalType; } if (fromHiveType.equals(HIVE_SHORT)) { - return toHiveType.equals(HIVE_INT) || toHiveType.equals(HIVE_LONG) || toHiveType.equals(HIVE_DOUBLE); + return toHiveType.equals(HIVE_INT) || toHiveType.equals(HIVE_LONG) || toHiveType.equals(HIVE_DOUBLE) || toType instanceof DecimalType; } if (fromHiveType.equals(HIVE_INT)) { - return toHiveType.equals(HIVE_LONG) || toHiveType.equals(HIVE_DOUBLE); + return toHiveType.equals(HIVE_LONG) || toHiveType.equals(HIVE_DOUBLE) || toType instanceof DecimalType; } if (fromHiveType.equals(HIVE_LONG)) { - return toHiveType.equals(HIVE_DOUBLE); + return toHiveType.equals(HIVE_DOUBLE) || toType instanceof DecimalType; } if (fromHiveType.equals(HIVE_FLOAT)) { return toHiveType.equals(HIVE_DOUBLE) || toType instanceof DecimalType; diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/InternalHiveSplitFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/InternalHiveSplitFactory.java index 2e6588d3d918d..545023c3809ef 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/InternalHiveSplitFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/InternalHiveSplitFactory.java @@ -14,6 +14,7 @@ package io.trino.plugin.hive.util; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import io.airlift.units.DataSize; import io.trino.plugin.hive.AcidInfo; import io.trino.plugin.hive.HiveColumnHandle; @@ -21,9 +22,9 @@ import io.trino.plugin.hive.HiveSplit; import io.trino.plugin.hive.HiveSplit.BucketConversion; import io.trino.plugin.hive.HiveStorageFormat; +import io.trino.plugin.hive.HiveTypeName; import io.trino.plugin.hive.InternalHiveSplit; import io.trino.plugin.hive.InternalHiveSplit.InternalHiveBlock; -import io.trino.plugin.hive.TableToPartitionMapping; import io.trino.plugin.hive.fs.BlockLocation; import io.trino.plugin.hive.fs.TrinoFileStatus; import io.trino.plugin.hive.orc.OrcPageSourceFactory; @@ -53,7 +54,7 @@ public class InternalHiveSplitFactory private final Map strippedSchema; private final List partitionKeys; private final Optional pathDomain; - private final TableToPartitionMapping tableToPartitionMapping; + private final Map hiveColumnCoercions; private final BooleanSupplier partitionMatchSupplier; private final Optional bucketConversion; private final Optional bucketValidation; @@ -68,7 +69,7 @@ public InternalHiveSplitFactory( List partitionKeys, TupleDomain effectivePredicate, BooleanSupplier partitionMatchSupplier, - TableToPartitionMapping tableToPartitionMapping, + Map hiveColumnCoercions, Optional bucketConversion, Optional bucketValidation, DataSize minimumTargetSplitSize, @@ -81,7 +82,7 @@ public InternalHiveSplitFactory( this.partitionKeys = requireNonNull(partitionKeys, "partitionKeys is null"); pathDomain = getPathDomain(requireNonNull(effectivePredicate, "effectivePredicate is null")); this.partitionMatchSupplier = requireNonNull(partitionMatchSupplier, "partitionMatchSupplier is null"); - this.tableToPartitionMapping = requireNonNull(tableToPartitionMapping, "tableToPartitionMapping is null"); + this.hiveColumnCoercions = ImmutableMap.copyOf(requireNonNull(hiveColumnCoercions, "hiveColumnCoercions is null")); this.bucketConversion = requireNonNull(bucketConversion, "bucketConversion is null"); this.bucketValidation = requireNonNull(bucketValidation, "bucketValidation is null"); this.forceLocalScheduling = forceLocalScheduling; @@ -191,7 +192,7 @@ private Optional createInternalHiveSplit( tableBucketNumber, splittable, forceLocalScheduling && allBlocksHaveAddress(blocks), - tableToPartitionMapping, + hiveColumnCoercions, bucketConversion, bucketValidation, acidInfo, diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHive.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHive.java deleted file mode 100644 index f5c2468d71370..0000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHive.java +++ /dev/null @@ -1,6368 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableMultimap; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Sets; -import com.google.common.net.HostAndPort; -import io.airlift.json.JsonCodec; -import io.airlift.log.Logger; -import io.airlift.slice.Slice; -import io.airlift.stats.CounterStat; -import io.airlift.units.DataSize; -import io.airlift.units.Duration; -import io.trino.filesystem.Location; -import io.trino.filesystem.TrinoFileSystem; -import io.trino.filesystem.TrinoFileSystemFactory; -import io.trino.filesystem.hdfs.HdfsFileSystemFactory; -import io.trino.hdfs.HdfsContext; -import io.trino.hdfs.HdfsEnvironment; -import io.trino.operator.GroupByHashPageIndexerFactory; -import io.trino.plugin.base.CatalogName; -import io.trino.plugin.base.metrics.LongCount; -import io.trino.plugin.hive.LocationService.WriteInfo; -import io.trino.plugin.hive.fs.DirectoryLister; -import io.trino.plugin.hive.fs.RemoteIterator; -import io.trino.plugin.hive.fs.TransactionScopeCachingDirectoryListerFactory; -import io.trino.plugin.hive.fs.TrinoFileStatus; -import io.trino.plugin.hive.fs.TrinoFileStatusRemoteIterator; -import io.trino.plugin.hive.line.LinePageSource; -import io.trino.plugin.hive.metastore.Column; -import io.trino.plugin.hive.metastore.HiveColumnStatistics; -import io.trino.plugin.hive.metastore.HiveMetastore; -import io.trino.plugin.hive.metastore.HiveMetastoreFactory; -import io.trino.plugin.hive.metastore.HivePrincipal; -import io.trino.plugin.hive.metastore.HivePrivilegeInfo; -import io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege; -import io.trino.plugin.hive.metastore.Partition; -import io.trino.plugin.hive.metastore.PartitionWithStatistics; -import io.trino.plugin.hive.metastore.PrincipalPrivileges; -import io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore; -import io.trino.plugin.hive.metastore.SortingColumn; -import io.trino.plugin.hive.metastore.StorageFormat; -import io.trino.plugin.hive.metastore.Table; -import io.trino.plugin.hive.metastore.cache.CachingHiveMetastore; -import io.trino.plugin.hive.metastore.cache.CachingHiveMetastoreConfig; -import io.trino.plugin.hive.metastore.thrift.BridgingHiveMetastore; -import io.trino.plugin.hive.metastore.thrift.ThriftMetastoreConfig; -import io.trino.plugin.hive.orc.OrcPageSource; -import io.trino.plugin.hive.parquet.ParquetPageSource; -import io.trino.plugin.hive.rcfile.RcFilePageSource; -import io.trino.plugin.hive.security.SqlStandardAccessControlMetadata; -import io.trino.spi.Page; -import io.trino.spi.TrinoException; -import io.trino.spi.block.Block; -import io.trino.spi.connector.Assignment; -import io.trino.spi.connector.CatalogSchemaTableName; -import io.trino.spi.connector.ColumnHandle; -import io.trino.spi.connector.ColumnMetadata; -import io.trino.spi.connector.ConnectorBucketNodeMap; -import io.trino.spi.connector.ConnectorInsertTableHandle; -import io.trino.spi.connector.ConnectorMaterializedViewDefinition; -import io.trino.spi.connector.ConnectorMetadata; -import io.trino.spi.connector.ConnectorNodePartitioningProvider; -import io.trino.spi.connector.ConnectorOutputTableHandle; -import io.trino.spi.connector.ConnectorPageSink; -import io.trino.spi.connector.ConnectorPageSinkProvider; -import io.trino.spi.connector.ConnectorPageSource; -import io.trino.spi.connector.ConnectorPageSourceProvider; -import io.trino.spi.connector.ConnectorPartitioningHandle; -import io.trino.spi.connector.ConnectorSession; -import io.trino.spi.connector.ConnectorSplit; -import io.trino.spi.connector.ConnectorSplitManager; -import io.trino.spi.connector.ConnectorSplitSource; -import io.trino.spi.connector.ConnectorTableHandle; -import io.trino.spi.connector.ConnectorTableLayout; -import io.trino.spi.connector.ConnectorTableMetadata; -import io.trino.spi.connector.ConnectorTableProperties; -import io.trino.spi.connector.ConnectorTransactionHandle; -import io.trino.spi.connector.ConnectorViewDefinition; -import io.trino.spi.connector.ConnectorViewDefinition.ViewColumn; -import io.trino.spi.connector.Constraint; -import io.trino.spi.connector.ConstraintApplicationResult; -import io.trino.spi.connector.DiscretePredicates; -import io.trino.spi.connector.DynamicFilter; -import io.trino.spi.connector.ProjectionApplicationResult; -import io.trino.spi.connector.SchemaTableName; -import io.trino.spi.connector.SchemaTablePrefix; -import io.trino.spi.connector.SortingProperty; -import io.trino.spi.connector.TableColumnsMetadata; -import io.trino.spi.connector.TableNotFoundException; -import io.trino.spi.connector.TableScanRedirectApplicationResult; -import io.trino.spi.connector.ViewNotFoundException; -import io.trino.spi.expression.ConnectorExpression; -import io.trino.spi.expression.FieldDereference; -import io.trino.spi.expression.Variable; -import io.trino.spi.metrics.Metrics; -import io.trino.spi.predicate.Domain; -import io.trino.spi.predicate.NullableValue; -import io.trino.spi.predicate.Range; -import io.trino.spi.predicate.TupleDomain; -import io.trino.spi.predicate.ValueSet; -import io.trino.spi.statistics.ColumnStatistics; -import io.trino.spi.statistics.TableStatistics; -import io.trino.spi.type.ArrayType; -import io.trino.spi.type.CharType; -import io.trino.spi.type.MapType; -import io.trino.spi.type.NamedTypeSignature; -import io.trino.spi.type.RowFieldName; -import io.trino.spi.type.RowType; -import io.trino.spi.type.SqlDate; -import io.trino.spi.type.SqlTimestamp; -import io.trino.spi.type.SqlTimestampWithTimeZone; -import io.trino.spi.type.SqlVarbinary; -import io.trino.spi.type.Type; -import io.trino.spi.type.TypeId; -import io.trino.spi.type.TypeOperators; -import io.trino.spi.type.VarcharType; -import io.trino.sql.gen.JoinCompiler; -import io.trino.testing.MaterializedResult; -import io.trino.testing.MaterializedRow; -import io.trino.testing.TestingConnectorSession; -import io.trino.testing.TestingNodeManager; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.assertj.core.api.InstanceOfAssertFactories; -import org.joda.time.DateTime; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; -import org.junit.jupiter.api.parallel.Execution; - -import java.io.IOException; -import java.io.OutputStream; -import java.math.BigDecimal; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.OptionalDouble; -import java.util.OptionalInt; -import java.util.OptionalLong; -import java.util.Set; -import java.util.UUID; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicReference; -import java.util.function.Function; -import java.util.function.Predicate; -import java.util.stream.IntStream; -import java.util.stream.LongStream; - -import static com.google.common.base.MoreObjects.toStringHelper; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkState; -import static com.google.common.base.Verify.verify; -import static com.google.common.collect.ImmutableList.toImmutableList; -import static com.google.common.collect.ImmutableMap.toImmutableMap; -import static com.google.common.collect.ImmutableSet.toImmutableSet; -import static com.google.common.collect.Iterables.concat; -import static com.google.common.collect.Iterables.getOnlyElement; -import static com.google.common.collect.Lists.newArrayList; -import static com.google.common.collect.Lists.reverse; -import static com.google.common.collect.Maps.uniqueIndex; -import static com.google.common.collect.MoreCollectors.onlyElement; -import static com.google.common.collect.Sets.difference; -import static com.google.common.collect.Streams.stream; -import static com.google.common.hash.Hashing.sha256; -import static com.google.common.io.MoreFiles.deleteRecursively; -import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; -import static io.airlift.concurrent.MoreFutures.getFutureValue; -import static io.airlift.concurrent.Threads.daemonThreadsNamed; -import static io.airlift.slice.Slices.utf8Slice; -import static io.airlift.testing.Assertions.assertGreaterThan; -import static io.airlift.testing.Assertions.assertGreaterThanOrEqual; -import static io.airlift.testing.Assertions.assertInstanceOf; -import static io.airlift.testing.Assertions.assertLessThanOrEqual; -import static io.airlift.units.DataSize.Unit.KILOBYTE; -import static io.trino.parquet.reader.ParquetReader.PARQUET_CODEC_METRIC_PREFIX; -import static io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.COMMIT; -import static io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_APPEND_PAGE; -import static io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_BEGIN_INSERT; -import static io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_DELETE; -import static io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_FINISH_INSERT; -import static io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_SINK_FINISH; -import static io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_RIGHT_AWAY; -import static io.trino.plugin.hive.HiveBasicStatistics.createEmptyStatistics; -import static io.trino.plugin.hive.HiveBasicStatistics.createZeroStatistics; -import static io.trino.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME; -import static io.trino.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY; -import static io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle; -import static io.trino.plugin.hive.HiveColumnHandle.createBaseColumn; -import static io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_BUCKET_FILES; -import static io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE; -import static io.trino.plugin.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH; -import static io.trino.plugin.hive.HiveMetadata.TRINO_QUERY_ID_NAME; -import static io.trino.plugin.hive.HiveMetadata.TRINO_VERSION_NAME; -import static io.trino.plugin.hive.HiveStorageFormat.AVRO; -import static io.trino.plugin.hive.HiveStorageFormat.CSV; -import static io.trino.plugin.hive.HiveStorageFormat.JSON; -import static io.trino.plugin.hive.HiveStorageFormat.ORC; -import static io.trino.plugin.hive.HiveStorageFormat.PARQUET; -import static io.trino.plugin.hive.HiveStorageFormat.RCBINARY; -import static io.trino.plugin.hive.HiveStorageFormat.RCTEXT; -import static io.trino.plugin.hive.HiveStorageFormat.REGEX; -import static io.trino.plugin.hive.HiveStorageFormat.SEQUENCEFILE; -import static io.trino.plugin.hive.HiveStorageFormat.TEXTFILE; -import static io.trino.plugin.hive.HiveTableProperties.BUCKETED_BY_PROPERTY; -import static io.trino.plugin.hive.HiveTableProperties.BUCKET_COUNT_PROPERTY; -import static io.trino.plugin.hive.HiveTableProperties.EXTERNAL_LOCATION_PROPERTY; -import static io.trino.plugin.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY; -import static io.trino.plugin.hive.HiveTableProperties.SORTED_BY_PROPERTY; -import static io.trino.plugin.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY; -import static io.trino.plugin.hive.HiveTableProperties.TRANSACTIONAL; -import static io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT; -import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_FACTORY; -import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_STATS; -import static io.trino.plugin.hive.HiveTestUtils.PAGE_SORTER; -import static io.trino.plugin.hive.HiveTestUtils.SESSION; -import static io.trino.plugin.hive.HiveTestUtils.arrayType; -import static io.trino.plugin.hive.HiveTestUtils.getDefaultHiveFileWriterFactories; -import static io.trino.plugin.hive.HiveTestUtils.getDefaultHivePageSourceFactories; -import static io.trino.plugin.hive.HiveTestUtils.getHiveSession; -import static io.trino.plugin.hive.HiveTestUtils.getHiveSessionProperties; -import static io.trino.plugin.hive.HiveTestUtils.getTypes; -import static io.trino.plugin.hive.HiveTestUtils.mapType; -import static io.trino.plugin.hive.HiveTestUtils.rowType; -import static io.trino.plugin.hive.HiveType.HIVE_INT; -import static io.trino.plugin.hive.HiveType.HIVE_LONG; -import static io.trino.plugin.hive.HiveType.HIVE_STRING; -import static io.trino.plugin.hive.HiveType.toHiveType; -import static io.trino.plugin.hive.LocationHandle.WriteMode.STAGE_AND_MOVE_TO_TARGET_DIRECTORY; -import static io.trino.plugin.hive.TableType.MANAGED_TABLE; -import static io.trino.plugin.hive.TestingThriftHiveMetastoreBuilder.testingThriftHiveMetastoreBuilder; -import static io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION; -import static io.trino.plugin.hive.metastore.HiveColumnStatistics.createBinaryColumnStatistics; -import static io.trino.plugin.hive.metastore.HiveColumnStatistics.createBooleanColumnStatistics; -import static io.trino.plugin.hive.metastore.HiveColumnStatistics.createDateColumnStatistics; -import static io.trino.plugin.hive.metastore.HiveColumnStatistics.createDecimalColumnStatistics; -import static io.trino.plugin.hive.metastore.HiveColumnStatistics.createDoubleColumnStatistics; -import static io.trino.plugin.hive.metastore.HiveColumnStatistics.createIntegerColumnStatistics; -import static io.trino.plugin.hive.metastore.HiveColumnStatistics.createStringColumnStatistics; -import static io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES; -import static io.trino.plugin.hive.metastore.SortingColumn.Order.ASCENDING; -import static io.trino.plugin.hive.metastore.SortingColumn.Order.DESCENDING; -import static io.trino.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat; -import static io.trino.plugin.hive.metastore.cache.CachingHiveMetastore.createCachingHiveMetastore; -import static io.trino.plugin.hive.orc.OrcPageSource.ORC_CODEC_METRIC_PREFIX; -import static io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V1; -import static io.trino.plugin.hive.util.HiveUtil.DELTA_LAKE_PROVIDER; -import static io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_NAME; -import static io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_VALUE; -import static io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY; -import static io.trino.plugin.hive.util.HiveUtil.columnExtraInfo; -import static io.trino.plugin.hive.util.HiveUtil.toPartitionValues; -import static io.trino.plugin.hive.util.HiveWriteUtils.getTableDefaultLocation; -import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; -import static io.trino.spi.StandardErrorCode.TRANSACTION_CONFLICT; -import static io.trino.spi.connector.MetadataProvider.NOOP_METADATA_PROVIDER; -import static io.trino.spi.connector.RetryMode.NO_RETRIES; -import static io.trino.spi.connector.SortOrder.ASC_NULLS_FIRST; -import static io.trino.spi.connector.SortOrder.DESC_NULLS_LAST; -import static io.trino.spi.security.PrincipalType.USER; -import static io.trino.spi.type.BigintType.BIGINT; -import static io.trino.spi.type.BooleanType.BOOLEAN; -import static io.trino.spi.type.CharType.createCharType; -import static io.trino.spi.type.DateType.DATE; -import static io.trino.spi.type.DecimalType.createDecimalType; -import static io.trino.spi.type.DoubleType.DOUBLE; -import static io.trino.spi.type.HyperLogLogType.HYPER_LOG_LOG; -import static io.trino.spi.type.IntegerType.INTEGER; -import static io.trino.spi.type.RealType.REAL; -import static io.trino.spi.type.SmallintType.SMALLINT; -import static io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS; -import static io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MILLIS; -import static io.trino.spi.type.TinyintType.TINYINT; -import static io.trino.spi.type.VarbinaryType.VARBINARY; -import static io.trino.spi.type.VarcharType.VARCHAR; -import static io.trino.spi.type.VarcharType.createUnboundedVarcharType; -import static io.trino.spi.type.VarcharType.createVarcharType; -import static io.trino.testing.DateTimeTestingUtils.sqlTimestampOf; -import static io.trino.testing.MaterializedResult.materializeSourceDataStream; -import static io.trino.testing.QueryAssertions.assertEqualsIgnoreOrder; -import static io.trino.testing.TestingNames.randomNameSuffix; -import static io.trino.testing.TestingPageSinkId.TESTING_PAGE_SINK_ID; -import static io.trino.testing.assertions.TrinoExceptionAssert.assertTrinoExceptionThrownBy; -import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER; -import static java.lang.Float.floatToRawIntBits; -import static java.lang.Math.toIntExact; -import static java.lang.String.format; -import static java.nio.charset.StandardCharsets.UTF_8; -import static java.nio.file.Files.createTempDirectory; -import static java.util.Locale.ENGLISH; -import static java.util.Objects.requireNonNull; -import static java.util.concurrent.Executors.newCachedThreadPool; -import static java.util.concurrent.Executors.newScheduledThreadPool; -import static java.util.concurrent.TimeUnit.MILLISECONDS; -import static java.util.concurrent.TimeUnit.MINUTES; -import static java.util.concurrent.TimeUnit.SECONDS; -import static java.util.stream.Collectors.toList; -import static org.apache.hadoop.hive.common.FileUtils.makePartName; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.assertj.core.api.Fail.fail; -import static org.assertj.core.data.Offset.offset; -import static org.joda.time.DateTimeZone.UTC; -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; -import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; - -@TestInstance(PER_CLASS) -@Execution(SAME_THREAD) // staging directory is shared mutable state -public abstract class AbstractTestHive -{ - private static final Logger log = Logger.get(AbstractTestHive.class); - - protected static final String TEMPORARY_TABLE_PREFIX = "tmp_trino_test_"; - - protected static final String INVALID_DATABASE = "totally_invalid_database_name"; - protected static final String INVALID_TABLE = "totally_invalid_table_name"; - - protected static final String TEST_SERVER_VERSION = "test_version"; - - private static final Type ARRAY_TYPE = arrayType(createUnboundedVarcharType()); - private static final Type MAP_TYPE = mapType(createUnboundedVarcharType(), BIGINT); - private static final Type ROW_TYPE = rowType(ImmutableList.of( - new NamedTypeSignature(Optional.of(new RowFieldName("f_string")), createUnboundedVarcharType().getTypeSignature()), - new NamedTypeSignature(Optional.of(new RowFieldName("f_bigint")), BIGINT.getTypeSignature()), - new NamedTypeSignature(Optional.of(new RowFieldName("f_boolean")), BOOLEAN.getTypeSignature()))); - - private static final List CREATE_TABLE_COLUMNS = ImmutableList.builder() - .add(new ColumnMetadata("id", BIGINT)) - .add(new ColumnMetadata("t_string", createUnboundedVarcharType())) - .add(new ColumnMetadata("t_tinyint", TINYINT)) - .add(new ColumnMetadata("t_smallint", SMALLINT)) - .add(new ColumnMetadata("t_integer", INTEGER)) - .add(new ColumnMetadata("t_bigint", BIGINT)) - .add(new ColumnMetadata("t_float", REAL)) - .add(new ColumnMetadata("t_double", DOUBLE)) - .add(new ColumnMetadata("t_boolean", BOOLEAN)) - .add(new ColumnMetadata("t_array", ARRAY_TYPE)) - .add(new ColumnMetadata("t_map", MAP_TYPE)) - .add(new ColumnMetadata("t_row", ROW_TYPE)) - .build(); - - private static final MaterializedResult CREATE_TABLE_DATA = - MaterializedResult.resultBuilder(SESSION, BIGINT, createUnboundedVarcharType(), TINYINT, SMALLINT, INTEGER, BIGINT, REAL, DOUBLE, BOOLEAN, ARRAY_TYPE, MAP_TYPE, ROW_TYPE) - .row(1L, "hello", (byte) 45, (short) 345, 234, 123L, -754.1985f, 43.5, true, ImmutableList.of("apple", "banana"), ImmutableMap.of("one", 1L, "two", 2L), ImmutableList.of("true", 1L, true)) - .row(2L, null, null, null, null, null, null, null, null, null, null, null) - .row(3L, "bye", (byte) 46, (short) 346, 345, 456L, 754.2008f, 98.1, false, ImmutableList.of("ape", "bear"), ImmutableMap.of("three", 3L, "four", 4L), ImmutableList.of("false", 0L, false)) - .build(); - - protected static final List CREATE_TABLE_COLUMNS_PARTITIONED = ImmutableList.builder() - .addAll(CREATE_TABLE_COLUMNS) - .add(new ColumnMetadata("ds", createUnboundedVarcharType())) - .build(); - - protected static final Set COLUMN_NAMES_PARTITIONED = CREATE_TABLE_COLUMNS_PARTITIONED.stream().map(ColumnMetadata::getName).collect(toImmutableSet()); - - protected static final Predicate PARTITION_COLUMN_FILTER = columnName -> columnName.equals("ds") || columnName.startsWith("part_"); - - private static final MaterializedResult CREATE_TABLE_PARTITIONED_DATA = new MaterializedResult( - CREATE_TABLE_DATA.getMaterializedRows().stream() - .map(row -> new MaterializedRow(row.getPrecision(), newArrayList(concat(row.getFields(), ImmutableList.of("2015-07-0" + row.getField(0)))))) - .collect(toList()), - ImmutableList.builder() - .addAll(CREATE_TABLE_DATA.getTypes()) - .add(createUnboundedVarcharType()) - .build()); - - private static final String CREATE_TABLE_PARTITIONED_DATA_2ND_PARTITION_VALUE = "2015-07-04"; - - private static final MaterializedResult CREATE_TABLE_PARTITIONED_DATA_2ND = - MaterializedResult.resultBuilder(SESSION, BIGINT, createUnboundedVarcharType(), TINYINT, SMALLINT, INTEGER, BIGINT, REAL, DOUBLE, BOOLEAN, ARRAY_TYPE, MAP_TYPE, ROW_TYPE, createUnboundedVarcharType()) - .row(4L, "hello", (byte) 45, (short) 345, 234, 123L, 754.1985f, 43.5, true, ImmutableList.of("apple", "banana"), ImmutableMap.of("one", 1L, "two", 2L), ImmutableList.of("true", 1L, true), CREATE_TABLE_PARTITIONED_DATA_2ND_PARTITION_VALUE) - .row(5L, null, null, null, null, null, null, null, null, null, null, null, CREATE_TABLE_PARTITIONED_DATA_2ND_PARTITION_VALUE) - .row(6L, "bye", (byte) 46, (short) 346, 345, 456L, -754.2008f, 98.1, false, ImmutableList.of("ape", "bear"), ImmutableMap.of("three", 3L, "four", 4L), ImmutableList.of("false", 0L, false), CREATE_TABLE_PARTITIONED_DATA_2ND_PARTITION_VALUE) - .build(); - - private static final List MISMATCH_SCHEMA_PRIMITIVE_COLUMN_BEFORE = ImmutableList.builder() - .add(new ColumnMetadata("tinyint_to_smallint", TINYINT)) - .add(new ColumnMetadata("tinyint_to_integer", TINYINT)) - .add(new ColumnMetadata("tinyint_to_bigint", TINYINT)) - .add(new ColumnMetadata("smallint_to_integer", SMALLINT)) - .add(new ColumnMetadata("smallint_to_bigint", SMALLINT)) - .add(new ColumnMetadata("integer_to_bigint", INTEGER)) - .add(new ColumnMetadata("integer_to_varchar", INTEGER)) - .add(new ColumnMetadata("varchar_to_integer", createUnboundedVarcharType())) - .add(new ColumnMetadata("float_to_double", REAL)) - .add(new ColumnMetadata("varchar_to_drop_in_row", createUnboundedVarcharType())) - .build(); - - private static final List MISMATCH_SCHEMA_TABLE_BEFORE = ImmutableList.builder() - .addAll(MISMATCH_SCHEMA_PRIMITIVE_COLUMN_BEFORE) - .add(new ColumnMetadata("struct_to_struct", toRowType(MISMATCH_SCHEMA_PRIMITIVE_COLUMN_BEFORE))) - .add(new ColumnMetadata("list_to_list", arrayType(toRowType(MISMATCH_SCHEMA_PRIMITIVE_COLUMN_BEFORE)))) - .add(new ColumnMetadata("map_to_map", mapType(MISMATCH_SCHEMA_PRIMITIVE_COLUMN_BEFORE.get(1).getType(), toRowType(MISMATCH_SCHEMA_PRIMITIVE_COLUMN_BEFORE)))) - .add(new ColumnMetadata("ds", createUnboundedVarcharType())) - .build(); - - private static RowType toRowType(List columns) - { - return rowType(columns.stream() - .map(col -> new NamedTypeSignature(Optional.of(new RowFieldName(format("f_%s", col.getName()))), col.getType().getTypeSignature())) - .collect(toImmutableList())); - } - - private static final MaterializedResult MISMATCH_SCHEMA_PRIMITIVE_FIELDS_DATA_BEFORE = - MaterializedResult.resultBuilder(SESSION, TINYINT, TINYINT, TINYINT, SMALLINT, SMALLINT, INTEGER, INTEGER, createUnboundedVarcharType(), REAL, createUnboundedVarcharType()) - .row((byte) -11, (byte) 12, (byte) -13, (short) 14, (short) 15, -16, 17, "2147483647", 18.0f, "2016-08-01") - .row((byte) 21, (byte) -22, (byte) 23, (short) -24, (short) 25, 26, -27, "asdf", -28.0f, "2016-08-02") - .row((byte) -31, (byte) -32, (byte) 33, (short) 34, (short) -35, 36, 37, "-923", 39.5f, "2016-08-03") - .row(null, (byte) 42, (byte) 43, (short) 44, (short) -45, 46, 47, "2147483648", 49.5f, "2016-08-03") - .build(); - - private static final MaterializedResult MISMATCH_SCHEMA_TABLE_DATA_BEFORE = - MaterializedResult.resultBuilder(SESSION, MISMATCH_SCHEMA_TABLE_BEFORE.stream().map(ColumnMetadata::getType).collect(toImmutableList())) - .rows(MISMATCH_SCHEMA_PRIMITIVE_FIELDS_DATA_BEFORE.getMaterializedRows() - .stream() - .map(materializedRow -> { - List result = materializedRow.getFields(); - List rowResult = materializedRow.getFields(); - result.add(rowResult); - result.add(Arrays.asList(rowResult, null, rowResult)); - result.add(ImmutableMap.of(rowResult.get(1), rowResult)); - result.add(rowResult.get(9)); - return new MaterializedRow(materializedRow.getPrecision(), result); - }).collect(toImmutableList())) - .build(); - - private static final List MISMATCH_SCHEMA_PRIMITIVE_COLUMN_AFTER = ImmutableList.builder() - .add(new ColumnMetadata("tinyint_to_smallint", SMALLINT)) - .add(new ColumnMetadata("tinyint_to_integer", INTEGER)) - .add(new ColumnMetadata("tinyint_to_bigint", BIGINT)) - .add(new ColumnMetadata("smallint_to_integer", INTEGER)) - .add(new ColumnMetadata("smallint_to_bigint", BIGINT)) - .add(new ColumnMetadata("integer_to_bigint", BIGINT)) - .add(new ColumnMetadata("integer_to_varchar", createUnboundedVarcharType())) - .add(new ColumnMetadata("varchar_to_integer", INTEGER)) - .add(new ColumnMetadata("float_to_double", DOUBLE)) - .add(new ColumnMetadata("varchar_to_drop_in_row", createUnboundedVarcharType())) - .build(); - - private static final Type MISMATCH_SCHEMA_ROW_TYPE_APPEND = toRowType(ImmutableList.builder() - .addAll(MISMATCH_SCHEMA_PRIMITIVE_COLUMN_AFTER) - .add(new ColumnMetadata(format("%s_append", MISMATCH_SCHEMA_PRIMITIVE_COLUMN_AFTER.get(0).getName()), MISMATCH_SCHEMA_PRIMITIVE_COLUMN_AFTER.get(0).getType())) - .build()); - private static final Type MISMATCH_SCHEMA_ROW_TYPE_DROP = toRowType(MISMATCH_SCHEMA_PRIMITIVE_COLUMN_AFTER.subList(0, MISMATCH_SCHEMA_PRIMITIVE_COLUMN_AFTER.size() - 1)); - - private static final List MISMATCH_SCHEMA_TABLE_AFTER = ImmutableList.builder() - .addAll(MISMATCH_SCHEMA_PRIMITIVE_COLUMN_AFTER) - .add(new ColumnMetadata("struct_to_struct", MISMATCH_SCHEMA_ROW_TYPE_APPEND)) - .add(new ColumnMetadata("list_to_list", arrayType(MISMATCH_SCHEMA_ROW_TYPE_APPEND))) - .add(new ColumnMetadata("map_to_map", mapType(MISMATCH_SCHEMA_PRIMITIVE_COLUMN_AFTER.get(1).getType(), MISMATCH_SCHEMA_ROW_TYPE_DROP))) - .add(new ColumnMetadata("ds", createUnboundedVarcharType())) - .build(); - - private static final MaterializedResult MISMATCH_SCHEMA_PRIMITIVE_FIELDS_DATA_AFTER = - MaterializedResult.resultBuilder(SESSION, SMALLINT, INTEGER, BIGINT, INTEGER, BIGINT, BIGINT, createUnboundedVarcharType(), INTEGER, DOUBLE, createUnboundedVarcharType()) - .row((short) -11, 12, -13L, 14, 15L, -16L, "17", 2147483647, 18.0, "2016-08-01") - .row((short) 21, -22, 23L, -24, 25L, 26L, "-27", null, -28.0, "2016-08-02") - .row((short) -31, -32, 33L, 34, -35L, 36L, "37", -923, 39.5, "2016-08-03") - .row(null, 42, 43L, 44, -45L, 46L, "47", null, 49.5, "2016-08-03") - .build(); - - private static final MaterializedResult MISMATCH_SCHEMA_TABLE_DATA_AFTER = - MaterializedResult.resultBuilder(SESSION, MISMATCH_SCHEMA_TABLE_AFTER.stream().map(ColumnMetadata::getType).collect(toImmutableList())) - .rows(MISMATCH_SCHEMA_PRIMITIVE_FIELDS_DATA_AFTER.getMaterializedRows() - .stream() - .map(materializedRow -> { - List result = materializedRow.getFields(); - List appendFieldRowResult = materializedRow.getFields(); - appendFieldRowResult.add(null); - List dropFieldRowResult = materializedRow.getFields().subList(0, materializedRow.getFields().size() - 1); - result.add(appendFieldRowResult); - result.add(Arrays.asList(appendFieldRowResult, null, appendFieldRowResult)); - result.add(ImmutableMap.of(result.get(1), dropFieldRowResult)); - result.add(result.get(9)); - return new MaterializedRow(materializedRow.getPrecision(), result); - }).collect(toImmutableList())) - .build(); - - protected Set createTableFormats = difference( - ImmutableSet.copyOf(HiveStorageFormat.values()), - // exclude formats that change table schema with serde and read-only formats - ImmutableSet.of(AVRO, CSV, REGEX)); - - private static final JoinCompiler JOIN_COMPILER = new JoinCompiler(new TypeOperators()); - - protected static final List STATISTICS_TABLE_COLUMNS = ImmutableList.builder() - .add(new ColumnMetadata("t_boolean", BOOLEAN)) - .add(new ColumnMetadata("t_bigint", BIGINT)) - .add(new ColumnMetadata("t_integer", INTEGER)) - .add(new ColumnMetadata("t_smallint", SMALLINT)) - .add(new ColumnMetadata("t_tinyint", TINYINT)) - .add(new ColumnMetadata("t_double", DOUBLE)) - .add(new ColumnMetadata("t_float", REAL)) - .add(new ColumnMetadata("t_string", createUnboundedVarcharType())) - .add(new ColumnMetadata("t_varchar", createVarcharType(100))) - .add(new ColumnMetadata("t_char", createCharType(5))) - .add(new ColumnMetadata("t_varbinary", VARBINARY)) - .add(new ColumnMetadata("t_date", DATE)) - .add(new ColumnMetadata("t_timestamp", TIMESTAMP_MILLIS)) - .add(new ColumnMetadata("t_short_decimal", createDecimalType(5, 2))) - .add(new ColumnMetadata("t_long_decimal", createDecimalType(20, 3))) - .build(); - - protected static final List STATISTICS_PARTITIONED_TABLE_COLUMNS = ImmutableList.builder() - .addAll(STATISTICS_TABLE_COLUMNS) - .add(new ColumnMetadata("ds", VARCHAR)) - .build(); - - protected static final PartitionStatistics ZERO_TABLE_STATISTICS = new PartitionStatistics(createZeroStatistics(), ImmutableMap.of()); - protected static final PartitionStatistics EMPTY_ROWCOUNT_STATISTICS = ZERO_TABLE_STATISTICS.withBasicStatistics(ZERO_TABLE_STATISTICS.getBasicStatistics().withEmptyRowCount()); - protected static final PartitionStatistics BASIC_STATISTICS_1 = new PartitionStatistics(new HiveBasicStatistics(0, 20, 3, 0), ImmutableMap.of()); - protected static final PartitionStatistics BASIC_STATISTICS_2 = new PartitionStatistics(new HiveBasicStatistics(0, 30, 2, 0), ImmutableMap.of()); - - protected static final PartitionStatistics STATISTICS_1 = - new PartitionStatistics( - BASIC_STATISTICS_1.getBasicStatistics(), - ImmutableMap.builder() - .put("t_boolean", createBooleanColumnStatistics(OptionalLong.of(5), OptionalLong.of(6), OptionalLong.of(3))) - .put("t_bigint", createIntegerColumnStatistics(OptionalLong.of(1234L), OptionalLong.of(5678L), OptionalLong.of(2), OptionalLong.of(5))) - .put("t_integer", createIntegerColumnStatistics(OptionalLong.of(123L), OptionalLong.of(567L), OptionalLong.of(3), OptionalLong.of(4))) - .put("t_smallint", createIntegerColumnStatistics(OptionalLong.of(12L), OptionalLong.of(56L), OptionalLong.of(2), OptionalLong.of(6))) - .put("t_tinyint", createIntegerColumnStatistics(OptionalLong.of(1L), OptionalLong.of(2L), OptionalLong.of(1), OptionalLong.of(3))) - .put("t_double", createDoubleColumnStatistics(OptionalDouble.of(1234.25), OptionalDouble.of(5678.58), OptionalLong.of(7), OptionalLong.of(8))) - .put("t_float", createDoubleColumnStatistics(OptionalDouble.of(123.25), OptionalDouble.of(567.58), OptionalLong.of(9), OptionalLong.of(10))) - .put("t_string", createStringColumnStatistics(OptionalLong.of(10), OptionalLong.of(50), OptionalLong.of(3), OptionalLong.of(7))) - .put("t_varchar", createStringColumnStatistics(OptionalLong.of(100), OptionalLong.of(230), OptionalLong.of(5), OptionalLong.of(3))) - .put("t_char", createStringColumnStatistics(OptionalLong.of(5), OptionalLong.of(50), OptionalLong.of(1), OptionalLong.of(4))) - .put("t_varbinary", createBinaryColumnStatistics(OptionalLong.of(4), OptionalLong.of(50), OptionalLong.of(1))) - .put("t_date", createDateColumnStatistics(Optional.of(LocalDate.ofEpochDay(1)), Optional.of(LocalDate.ofEpochDay(2)), OptionalLong.of(7), OptionalLong.of(6))) - .put("t_timestamp", createIntegerColumnStatistics(OptionalLong.of(1234567L), OptionalLong.of(71234567L), OptionalLong.of(7), OptionalLong.of(5))) - .put("t_short_decimal", createDecimalColumnStatistics(Optional.of(new BigDecimal(10)), Optional.of(new BigDecimal(12)), OptionalLong.of(3), OptionalLong.of(5))) - .put("t_long_decimal", createDecimalColumnStatistics(Optional.of(new BigDecimal("12345678901234567.123")), Optional.of(new BigDecimal("81234567890123456.123")), OptionalLong.of(2), OptionalLong.of(1))) - .buildOrThrow()); - - protected static final PartitionStatistics STATISTICS_1_1 = - new PartitionStatistics( - new HiveBasicStatistics(OptionalLong.of(0), OptionalLong.of(15), OptionalLong.empty(), OptionalLong.of(0)), - STATISTICS_1.getColumnStatistics().entrySet() - .stream() - .filter(entry -> entry.getKey().hashCode() % 2 == 0) - .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue))); - - protected static final PartitionStatistics STATISTICS_1_2 = - new PartitionStatistics( - new HiveBasicStatistics(OptionalLong.of(0), OptionalLong.of(15), OptionalLong.of(3), OptionalLong.of(0)), - STATISTICS_1.getColumnStatistics().entrySet() - .stream() - .filter(entry -> entry.getKey().hashCode() % 2 == 1) - .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue))); - - protected static final PartitionStatistics STATISTICS_2 = - new PartitionStatistics( - BASIC_STATISTICS_2.getBasicStatistics(), - ImmutableMap.builder() - .put("t_boolean", createBooleanColumnStatistics(OptionalLong.of(4), OptionalLong.of(3), OptionalLong.of(2))) - .put("t_bigint", createIntegerColumnStatistics(OptionalLong.of(2345L), OptionalLong.of(6789L), OptionalLong.of(4), OptionalLong.of(7))) - .put("t_integer", createIntegerColumnStatistics(OptionalLong.of(234L), OptionalLong.of(678L), OptionalLong.of(5), OptionalLong.of(6))) - .put("t_smallint", createIntegerColumnStatistics(OptionalLong.of(23L), OptionalLong.of(65L), OptionalLong.of(7), OptionalLong.of(5))) - .put("t_tinyint", createIntegerColumnStatistics(OptionalLong.of(3L), OptionalLong.of(12L), OptionalLong.of(2), OptionalLong.of(3))) - .put("t_double", createDoubleColumnStatistics(OptionalDouble.of(2345.25), OptionalDouble.of(6785.58), OptionalLong.of(6), OptionalLong.of(3))) - .put("t_float", createDoubleColumnStatistics(OptionalDouble.of(235.25), OptionalDouble.of(676.58), OptionalLong.of(7), OptionalLong.of(11))) - .put("t_string", createStringColumnStatistics(OptionalLong.of(301), OptionalLong.of(600), OptionalLong.of(2), OptionalLong.of(6))) - .put("t_varchar", createStringColumnStatistics(OptionalLong.of(99), OptionalLong.of(223), OptionalLong.of(7), OptionalLong.of(1))) - .put("t_char", createStringColumnStatistics(OptionalLong.of(6), OptionalLong.of(60), OptionalLong.of(0), OptionalLong.of(3))) - .put("t_varbinary", createBinaryColumnStatistics(OptionalLong.of(2), OptionalLong.of(10), OptionalLong.of(2))) - .put("t_date", createDateColumnStatistics(Optional.of(LocalDate.ofEpochDay(2)), Optional.of(LocalDate.ofEpochDay(3)), OptionalLong.of(8), OptionalLong.of(7))) - .put("t_timestamp", createIntegerColumnStatistics(OptionalLong.of(2345671L), OptionalLong.of(12345677L), OptionalLong.of(9), OptionalLong.of(1))) - .put("t_short_decimal", createDecimalColumnStatistics(Optional.of(new BigDecimal(11)), Optional.of(new BigDecimal(14)), OptionalLong.of(5), OptionalLong.of(7))) - .put("t_long_decimal", createDecimalColumnStatistics(Optional.of(new BigDecimal("71234567890123456.123")), Optional.of(new BigDecimal("78123456789012345.123")), OptionalLong.of(2), OptionalLong.of(1))) - .buildOrThrow()); - - protected static final PartitionStatistics STATISTICS_EMPTY_OPTIONAL_FIELDS = - new PartitionStatistics( - new HiveBasicStatistics(OptionalLong.of(0), OptionalLong.of(20), OptionalLong.empty(), OptionalLong.of(0)), - ImmutableMap.builder() - .put("t_boolean", createBooleanColumnStatistics(OptionalLong.of(4), OptionalLong.of(3), OptionalLong.of(2))) - .put("t_bigint", createIntegerColumnStatistics(OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(4), OptionalLong.of(7))) - .put("t_integer", createIntegerColumnStatistics(OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(5), OptionalLong.of(6))) - .put("t_smallint", createIntegerColumnStatistics(OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(7), OptionalLong.of(5))) - .put("t_tinyint", createIntegerColumnStatistics(OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(2), OptionalLong.of(3))) - .put("t_double", createDoubleColumnStatistics(OptionalDouble.empty(), OptionalDouble.empty(), OptionalLong.of(6), OptionalLong.of(3))) - .put("t_float", createDoubleColumnStatistics(OptionalDouble.empty(), OptionalDouble.empty(), OptionalLong.of(7), OptionalLong.of(11))) - .put("t_string", createStringColumnStatistics(OptionalLong.of(0), OptionalLong.of(0), OptionalLong.of(2), OptionalLong.of(6))) - .put("t_varchar", createStringColumnStatistics(OptionalLong.of(0), OptionalLong.of(0), OptionalLong.of(7), OptionalLong.of(1))) - .put("t_char", createStringColumnStatistics(OptionalLong.of(0), OptionalLong.of(0), OptionalLong.of(0), OptionalLong.of(3))) - .put("t_varbinary", createBinaryColumnStatistics(OptionalLong.of(0), OptionalLong.of(0), OptionalLong.of(2))) - // https://issues.apache.org/jira/browse/HIVE-20098 - // .put("t_date", createDateColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.of(8), OptionalLong.of(7))) - .put("t_timestamp", createIntegerColumnStatistics(OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(9), OptionalLong.of(1))) - .put("t_short_decimal", createDecimalColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.of(5), OptionalLong.of(7))) - .put("t_long_decimal", createDecimalColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.of(2), OptionalLong.of(1))) - .buildOrThrow()); - - protected String database; - protected SchemaTableName tablePartitionFormat; - protected SchemaTableName tableUnpartitioned; - protected SchemaTableName tablePartitionedWithNull; - protected SchemaTableName tableOffline; - protected SchemaTableName tableNotReadable; - protected SchemaTableName view; - protected SchemaTableName invalidTable; - protected SchemaTableName tableBucketedStringInt; - protected SchemaTableName tableBucketedBigintBoolean; - protected SchemaTableName tableBucketedDoubleFloat; - protected SchemaTableName tablePartitionSchemaChange; - protected SchemaTableName tablePartitionSchemaChangeNonCanonical; - - protected ConnectorTableHandle invalidTableHandle; - - protected ColumnHandle dsColumn; - protected ColumnHandle fileFormatColumn; - protected ColumnHandle dummyColumn; - protected ColumnHandle intColumn; - protected ColumnHandle pStringColumn; - protected ColumnHandle pIntegerColumn; - - protected ConnectorTableProperties tablePartitionFormatProperties; - protected List tablePartitionFormatPartitions; - protected List tableUnpartitionedPartitions; - - protected HdfsEnvironment hdfsEnvironment; - protected LocationService locationService; - - protected CountingDirectoryLister countingDirectoryLister; - protected HiveMetadataFactory metadataFactory; - protected HiveTransactionManager transactionManager; - protected HiveMetastore metastoreClient; - protected ConnectorSplitManager splitManager; - protected ConnectorPageSourceProvider pageSourceProvider; - protected ConnectorPageSinkProvider pageSinkProvider; - protected ConnectorNodePartitioningProvider nodePartitioningProvider; - protected ExecutorService executor; - - private ScheduledExecutorService heartbeatService; - private java.nio.file.Path temporaryStagingDirectory; - - protected final Set materializedViews = Sets.newConcurrentHashSet(); - - @BeforeAll - public void setupClass() - throws Exception - { - executor = newCachedThreadPool(daemonThreadsNamed("hive-%s")); - heartbeatService = newScheduledThreadPool(1); - // Use separate staging directory for each test class to prevent intermittent failures coming from test parallelism - temporaryStagingDirectory = createTempDirectory("trino-staging-"); - } - - @AfterAll - public void tearDown() - { - if (executor != null) { - executor.shutdownNow(); - executor = null; - } - if (heartbeatService != null) { - heartbeatService.shutdownNow(); - heartbeatService = null; - } - if (temporaryStagingDirectory != null) { - try { - deleteRecursively(temporaryStagingDirectory, ALLOW_INSECURE); - } - catch (Exception e) { - log.warn(e, "Error deleting %s", temporaryStagingDirectory); - } - } - } - - protected void setupHive(String databaseName) - { - database = databaseName; - tablePartitionFormat = new SchemaTableName(database, "trino_test_partition_format"); - tableUnpartitioned = new SchemaTableName(database, "trino_test_unpartitioned"); - tablePartitionedWithNull = new SchemaTableName(database, "trino_test_partitioned_with_null"); - tableOffline = new SchemaTableName(database, "trino_test_offline"); - tableNotReadable = new SchemaTableName(database, "trino_test_not_readable"); - view = new SchemaTableName(database, "trino_test_view"); - invalidTable = new SchemaTableName(database, INVALID_TABLE); - tableBucketedStringInt = new SchemaTableName(database, "trino_test_bucketed_by_string_int"); - tableBucketedBigintBoolean = new SchemaTableName(database, "trino_test_bucketed_by_bigint_boolean"); - tableBucketedDoubleFloat = new SchemaTableName(database, "trino_test_bucketed_by_double_float"); - tablePartitionSchemaChange = new SchemaTableName(database, "trino_test_partition_schema_change"); - tablePartitionSchemaChangeNonCanonical = new SchemaTableName(database, "trino_test_partition_schema_change_non_canonical"); - - invalidTableHandle = new HiveTableHandle(database, INVALID_TABLE, ImmutableMap.of(), ImmutableList.of(), ImmutableList.of(), Optional.empty()); - - dsColumn = createBaseColumn("ds", -1, HIVE_STRING, VARCHAR, PARTITION_KEY, Optional.empty()); - fileFormatColumn = createBaseColumn("file_format", -1, HIVE_STRING, VARCHAR, PARTITION_KEY, Optional.empty()); - dummyColumn = createBaseColumn("dummy", -1, HIVE_INT, INTEGER, PARTITION_KEY, Optional.empty()); - intColumn = createBaseColumn("t_int", -1, HIVE_INT, INTEGER, PARTITION_KEY, Optional.empty()); - pStringColumn = createBaseColumn("p_string", -1, HIVE_STRING, VARCHAR, PARTITION_KEY, Optional.empty()); - pIntegerColumn = createBaseColumn("p_integer", -1, HIVE_INT, INTEGER, PARTITION_KEY, Optional.empty()); - - List partitionColumns = ImmutableList.of(dsColumn, fileFormatColumn, dummyColumn); - tablePartitionFormatPartitions = ImmutableList.builder() - .add(new HivePartition(tablePartitionFormat, - "ds=2012-12-29/file_format=textfile/dummy=1", - ImmutableMap.builder() - .put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))) - .put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("textfile"))) - .put(dummyColumn, NullableValue.of(INTEGER, 1L)) - .buildOrThrow())) - .add(new HivePartition(tablePartitionFormat, - "ds=2012-12-29/file_format=sequencefile/dummy=2", - ImmutableMap.builder() - .put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))) - .put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("sequencefile"))) - .put(dummyColumn, NullableValue.of(INTEGER, 2L)) - .buildOrThrow())) - .add(new HivePartition(tablePartitionFormat, - "ds=2012-12-29/file_format=rctext/dummy=3", - ImmutableMap.builder() - .put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))) - .put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("rctext"))) - .put(dummyColumn, NullableValue.of(INTEGER, 3L)) - .buildOrThrow())) - .add(new HivePartition(tablePartitionFormat, - "ds=2012-12-29/file_format=rcbinary/dummy=4", - ImmutableMap.builder() - .put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))) - .put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("rcbinary"))) - .put(dummyColumn, NullableValue.of(INTEGER, 4L)) - .buildOrThrow())) - .build(); - tableUnpartitionedPartitions = ImmutableList.of(new HivePartition(tableUnpartitioned)); - tablePartitionFormatProperties = new ConnectorTableProperties( - TupleDomain.withColumnDomains(ImmutableMap.of( - dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), - fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("textfile")), Range.equal(createUnboundedVarcharType(), utf8Slice("sequencefile")), Range.equal(createUnboundedVarcharType(), utf8Slice("rctext")), Range.equal(createUnboundedVarcharType(), utf8Slice("rcbinary"))), false), - dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 1L), Range.equal(INTEGER, 2L), Range.equal(INTEGER, 3L), Range.equal(INTEGER, 4L)), false))), - Optional.empty(), - Optional.of(new DiscretePredicates(partitionColumns, ImmutableList.of( - TupleDomain.withColumnDomains(ImmutableMap.of( - dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), - fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("textfile"))), false), - dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 1L)), false))), - TupleDomain.withColumnDomains(ImmutableMap.of( - dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), - fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("sequencefile"))), false), - dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 2L)), false))), - TupleDomain.withColumnDomains(ImmutableMap.of( - dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), - fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("rctext"))), false), - dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 3L)), false))), - TupleDomain.withColumnDomains(ImmutableMap.of( - dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), - fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("rcbinary"))), false), - dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 4L)), false)))))), - ImmutableList.of()); - } - - protected final void setup(HostAndPort metastoreAddress, String databaseName) - { - HiveConfig hiveConfig = getHiveConfig() - .setParquetTimeZone("UTC") - .setRcfileTimeZone("UTC"); - - hdfsEnvironment = HDFS_ENVIRONMENT; - - CachingHiveMetastoreConfig cachingHiveMetastoreConfig = new CachingHiveMetastoreConfig(); - HiveMetastore metastore = createCachingHiveMetastore( - new BridgingHiveMetastore(testingThriftHiveMetastoreBuilder() - .metastoreClient(metastoreAddress) - .hiveConfig(hiveConfig) - .thriftMetastoreConfig(new ThriftMetastoreConfig() - .setAssumeCanonicalPartitionKeys(true)) - .fileSystemFactory(new HdfsFileSystemFactory(hdfsEnvironment, HDFS_FILE_SYSTEM_STATS)) - .build()), - new Duration(1, MINUTES), - new Duration(1, MINUTES), - Optional.of(new Duration(15, SECONDS)), - executor, - 10000, - CachingHiveMetastore.StatsRecording.ENABLED, - cachingHiveMetastoreConfig.isCacheMissing(), - cachingHiveMetastoreConfig.isPartitionCacheEnabled()); - - setup(databaseName, hiveConfig, metastore, hdfsEnvironment); - } - - protected final void setup(String databaseName, HiveConfig hiveConfig, HiveMetastore hiveMetastore, HdfsEnvironment hdfsConfiguration) - { - setupHive(databaseName); - - metastoreClient = hiveMetastore; - hdfsEnvironment = hdfsConfiguration; - HivePartitionManager partitionManager = new HivePartitionManager(hiveConfig); - HdfsFileSystemFactory fileSystemFactory = new HdfsFileSystemFactory(hdfsEnvironment, HDFS_FILE_SYSTEM_STATS); - locationService = new HiveLocationService(fileSystemFactory, hiveConfig); - JsonCodec partitionUpdateCodec = JsonCodec.jsonCodec(PartitionUpdate.class); - countingDirectoryLister = new CountingDirectoryLister(); - metadataFactory = new HiveMetadataFactory( - new CatalogName("hive"), - HiveMetastoreFactory.ofInstance(metastoreClient), - getDefaultHiveFileWriterFactories(hiveConfig, hdfsEnvironment), - fileSystemFactory, - partitionManager, - 10, - 10, - 10, - 100_000, - false, - false, - false, - true, - true, - false, - false, - 1000, - Optional.empty(), - true, - TESTING_TYPE_MANAGER, - NOOP_METADATA_PROVIDER, - locationService, - partitionUpdateCodec, - executor, - heartbeatService, - TEST_SERVER_VERSION, - (session, tableHandle) -> { - if (!tableHandle.getTableName().contains("apply_redirection_tester")) { - return Optional.empty(); - } - return Optional.of(new TableScanRedirectApplicationResult( - new CatalogSchemaTableName("hive", databaseName, "mock_redirection_target"), - ImmutableMap.of(), - TupleDomain.all())); - }, - ImmutableSet.of( - new PartitionsSystemTableProvider(partitionManager, TESTING_TYPE_MANAGER), - new PropertiesSystemTableProvider()), - metastore -> new NoneHiveMaterializedViewMetadata() - { - @Override - public List listMaterializedViews(ConnectorSession session, Optional schemaName) - { - return materializedViews.stream() - .filter(schemaName - .>map(name -> mvName -> mvName.getSchemaName().equals(name)) - .orElse(mvName -> true)) - .collect(toImmutableList()); - } - - @Override - public Optional getMaterializedView(ConnectorSession session, SchemaTableName viewName) - { - if (!viewName.getTableName().contains("materialized_view_tester")) { - return Optional.empty(); - } - return Optional.of(new ConnectorMaterializedViewDefinition( - "dummy_view_sql", - Optional.empty(), - Optional.empty(), - Optional.empty(), - ImmutableList.of(new ConnectorMaterializedViewDefinition.Column("abc", TypeId.of("type"), Optional.empty())), - Optional.of(java.time.Duration.ZERO), - Optional.empty(), - Optional.of("alice"), - ImmutableList.of(), - ImmutableMap.of())); - } - }, - SqlStandardAccessControlMetadata::new, - countingDirectoryLister, - new TransactionScopeCachingDirectoryListerFactory(hiveConfig), - false, - true, - HiveTimestampPrecision.DEFAULT_PRECISION); - transactionManager = new HiveTransactionManager(metadataFactory); - splitManager = new HiveSplitManager( - transactionManager, - partitionManager, - fileSystemFactory, - executor, - new CounterStat(), - 100, - hiveConfig.getMaxOutstandingSplitsSize(), - hiveConfig.getMinPartitionBatchSize(), - hiveConfig.getMaxPartitionBatchSize(), - hiveConfig.getMaxInitialSplits(), - hiveConfig.getSplitLoaderConcurrency(), - hiveConfig.getMaxSplitsPerSecond(), - false, - TESTING_TYPE_MANAGER, - hiveConfig.getMaxPartitionsPerScan()); - pageSinkProvider = new HivePageSinkProvider( - getDefaultHiveFileWriterFactories(hiveConfig, hdfsEnvironment), - fileSystemFactory, - PAGE_SORTER, - HiveMetastoreFactory.ofInstance(metastoreClient), - new GroupByHashPageIndexerFactory(JOIN_COMPILER), - TESTING_TYPE_MANAGER, - getHiveConfig(), - getSortingFileWriterConfig(), - locationService, - partitionUpdateCodec, - new TestingNodeManager("fake-environment"), - new HiveEventClient(), - getHiveSessionProperties(hiveConfig), - new HiveWriterStats()); - pageSourceProvider = new HivePageSourceProvider( - TESTING_TYPE_MANAGER, - hiveConfig, - getDefaultHivePageSourceFactories(hdfsEnvironment, hiveConfig)); - nodePartitioningProvider = new HiveNodePartitioningProvider( - new TestingNodeManager("fake-environment"), - TESTING_TYPE_MANAGER); - } - - /** - * Allow subclass to change default configuration. - */ - protected HiveConfig getHiveConfig() - { - return new HiveConfig() - .setTemporaryStagingDirectoryPath(temporaryStagingDirectory.resolve("temp_path_").toAbsolutePath().toString()); - } - - protected SortingFileWriterConfig getSortingFileWriterConfig() - { - return new SortingFileWriterConfig() - .setMaxOpenSortFiles(10) - .setWriterSortBufferSize(DataSize.of(100, KILOBYTE)); - } - - protected ConnectorSession newSession() - { - return newSession(ImmutableMap.of()); - } - - protected ConnectorSession newSession(Map propertyValues) - { - return TestingConnectorSession.builder() - .setPropertyMetadata(getHiveSessionProperties(getHiveConfig()).getSessionProperties()) - .setPropertyValues(propertyValues) - .build(); - } - - protected Transaction newTransaction() - { - return new HiveTransaction(transactionManager); - } - - protected interface Transaction - extends AutoCloseable - { - ConnectorMetadata getMetadata(); - - SemiTransactionalHiveMetastore getMetastore(); - - ConnectorTransactionHandle getTransactionHandle(); - - void commit(); - - void rollback(); - - @Override - void close(); - } - - static class HiveTransaction - implements Transaction - { - private final HiveTransactionManager transactionManager; - private final ConnectorTransactionHandle transactionHandle; - private boolean closed; - - public HiveTransaction(HiveTransactionManager transactionManager) - { - this.transactionManager = requireNonNull(transactionManager, "transactionManager is null"); - this.transactionHandle = new HiveTransactionHandle(false); - transactionManager.begin(transactionHandle); - getMetastore().testOnlyThrowOnCleanupFailures(); - } - - @Override - public ConnectorMetadata getMetadata() - { - return transactionManager.get(transactionHandle, SESSION.getIdentity()); - } - - @Override - public SemiTransactionalHiveMetastore getMetastore() - { - return transactionManager.get(transactionHandle, SESSION.getIdentity()).getMetastore(); - } - - @Override - public ConnectorTransactionHandle getTransactionHandle() - { - return transactionHandle; - } - - @Override - public void commit() - { - checkState(!closed); - closed = true; - transactionManager.commit(transactionHandle); - } - - @Override - public void rollback() - { - checkState(!closed); - closed = true; - transactionManager.rollback(transactionHandle); - } - - @Override - public void close() - { - if (!closed) { - try { - getMetastore().testOnlyCheckIsReadOnly(); // transactions in this test with writes in it must explicitly commit or rollback - } - finally { - rollback(); - } - } - } - } - - @Test - public void testGetDatabaseNames() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - List databases = metadata.listSchemaNames(newSession()); - assertThat(databases).contains(database); - } - } - - @Test - public void testGetTableNames() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - List tables = metadata.listTables(newSession(), Optional.of(database)); - assertThat(tables).contains(tablePartitionFormat); - assertThat(tables).contains(tableUnpartitioned); - } - } - - @Test - public void testGetAllTableNames() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - List tables = metadata.listTables(newSession(), Optional.empty()); - assertThat(tables).contains(tablePartitionFormat); - assertThat(tables).contains(tableUnpartitioned); - } - } - - @Test - public void testGetAllTableColumns() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - Map> allColumns = listTableColumns(metadata, newSession(), new SchemaTablePrefix()); - assertThat(allColumns).containsKey(tablePartitionFormat); - assertThat(allColumns).containsKey(tableUnpartitioned); - } - } - - @Test - public void testGetAllTableColumnsInSchema() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - Map> allColumns = listTableColumns(metadata, newSession(), new SchemaTablePrefix(database)); - assertThat(allColumns).containsKey(tablePartitionFormat); - assertThat(allColumns).containsKey(tableUnpartitioned); - } - } - - @Test - public void testListUnknownSchema() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - assertThat(metadata.getTableHandle(session, new SchemaTableName(INVALID_DATABASE, INVALID_TABLE))).isNull(); - assertThat(metadata.listTables(session, Optional.of(INVALID_DATABASE))).isEqualTo(ImmutableList.of()); - assertThat(listTableColumns(metadata, session, new SchemaTablePrefix(INVALID_DATABASE, INVALID_TABLE))).isEqualTo(ImmutableMap.of()); - assertThat(metadata.listViews(session, Optional.of(INVALID_DATABASE))).isEqualTo(ImmutableList.of()); - assertThat(metadata.getViews(session, Optional.of(INVALID_DATABASE))).isEqualTo(ImmutableMap.of()); - assertThat(metadata.getView(session, new SchemaTableName(INVALID_DATABASE, INVALID_TABLE))).isEqualTo(Optional.empty()); - } - } - - @Test - public void testGetPartitions() - throws Exception - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionFormat); - tableHandle = applyFilter(metadata, tableHandle, Constraint.alwaysTrue()); - ConnectorTableProperties properties = metadata.getTableProperties(newSession(), tableHandle); - assertExpectedTableProperties(properties, tablePartitionFormatProperties); - assertExpectedPartitions(tableHandle, tablePartitionFormatPartitions); - } - } - - @Test - public void testGetPartitionsWithBindings() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionFormat); - Constraint constraint = new Constraint(TupleDomain.withColumnDomains(ImmutableMap.of(intColumn, Domain.singleValue(BIGINT, 5L)))); - tableHandle = applyFilter(metadata, tableHandle, constraint); - ConnectorTableProperties properties = metadata.getTableProperties(newSession(), tableHandle); - assertExpectedTableProperties(properties, tablePartitionFormatProperties); - assertExpectedPartitions(tableHandle, tablePartitionFormatPartitions); - } - } - - @Test - public void testGetPartitionsWithFilter() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionedWithNull); - - Domain varcharSomeValue = Domain.singleValue(VARCHAR, utf8Slice("abc")); - Domain varcharOnlyNull = Domain.onlyNull(VARCHAR); - Domain varcharNotNull = Domain.notNull(VARCHAR); - - Domain integerSomeValue = Domain.singleValue(INTEGER, 123L); - Domain integerOnlyNull = Domain.onlyNull(INTEGER); - Domain integerNotNull = Domain.notNull(INTEGER); - - // all - assertThat(getPartitionNamesByFilter(metadata, tableHandle, new Constraint(TupleDomain.all()))) - .containsOnly( - "p_string=__HIVE_DEFAULT_PARTITION__/p_integer=__HIVE_DEFAULT_PARTITION__", - "p_string=abc/p_integer=123", - "p_string=def/p_integer=456"); - - // is some value - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pStringColumn, varcharSomeValue)) - .containsOnly("p_string=abc/p_integer=123"); - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pIntegerColumn, integerSomeValue)) - .containsOnly("p_string=abc/p_integer=123"); - - // IS NULL - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pStringColumn, varcharOnlyNull)) - .containsOnly("p_string=__HIVE_DEFAULT_PARTITION__/p_integer=__HIVE_DEFAULT_PARTITION__"); - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pIntegerColumn, integerOnlyNull)) - .containsOnly("p_string=__HIVE_DEFAULT_PARTITION__/p_integer=__HIVE_DEFAULT_PARTITION__"); - - // IS NOT NULL - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pStringColumn, varcharNotNull)) - .containsOnly("p_string=abc/p_integer=123", "p_string=def/p_integer=456"); - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pIntegerColumn, integerNotNull)) - .containsOnly("p_string=abc/p_integer=123", "p_string=def/p_integer=456"); - - // IS NULL OR is some value - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pStringColumn, varcharOnlyNull.union(varcharSomeValue))) - .containsOnly("p_string=__HIVE_DEFAULT_PARTITION__/p_integer=__HIVE_DEFAULT_PARTITION__", "p_string=abc/p_integer=123"); - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pIntegerColumn, integerOnlyNull.union(integerSomeValue))) - .containsOnly("p_string=__HIVE_DEFAULT_PARTITION__/p_integer=__HIVE_DEFAULT_PARTITION__", "p_string=abc/p_integer=123"); - - // IS NOT NULL AND is NOT some value - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pStringColumn, varcharSomeValue.complement().intersect(varcharNotNull))) - .containsOnly("p_string=def/p_integer=456"); - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pIntegerColumn, integerSomeValue.complement().intersect(integerNotNull))) - .containsOnly("p_string=def/p_integer=456"); - - // IS NULL OR is NOT some value - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pStringColumn, varcharSomeValue.complement())) - .containsOnly("p_string=__HIVE_DEFAULT_PARTITION__/p_integer=__HIVE_DEFAULT_PARTITION__", "p_string=def/p_integer=456"); - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pIntegerColumn, integerSomeValue.complement())) - .containsOnly("p_string=__HIVE_DEFAULT_PARTITION__/p_integer=__HIVE_DEFAULT_PARTITION__", "p_string=def/p_integer=456"); - } - } - - private Set getPartitionNamesByFilter(ConnectorMetadata metadata, ConnectorTableHandle tableHandle, ColumnHandle columnHandle, Domain domain) - { - return getPartitionNamesByFilter(metadata, tableHandle, new Constraint(TupleDomain.withColumnDomains(ImmutableMap.of(columnHandle, domain)))); - } - - private Set getPartitionNamesByFilter(ConnectorMetadata metadata, ConnectorTableHandle tableHandle, Constraint constraint) - { - return applyFilter(metadata, tableHandle, constraint) - .getPartitions().orElseThrow(() -> new IllegalStateException("No partitions")) - .stream() - .map(HivePartition::getPartitionId) - .collect(toImmutableSet()); - } - - @Test - public void testMismatchSchemaTable() - throws Exception - { - for (HiveStorageFormat storageFormat : createTableFormats) { - // TODO: fix coercion for JSON - if (storageFormat == JSON) { - continue; - } - SchemaTableName temporaryMismatchSchemaTable = temporaryTable("mismatch_schema"); - try { - doTestMismatchSchemaTable( - temporaryMismatchSchemaTable, - storageFormat, - MISMATCH_SCHEMA_TABLE_BEFORE, - MISMATCH_SCHEMA_TABLE_DATA_BEFORE, - MISMATCH_SCHEMA_TABLE_AFTER, - MISMATCH_SCHEMA_TABLE_DATA_AFTER); - } - finally { - dropTable(temporaryMismatchSchemaTable); - } - } - } - - protected void doTestMismatchSchemaTable( - SchemaTableName schemaTableName, - HiveStorageFormat storageFormat, - List tableBefore, - MaterializedResult dataBefore, - List tableAfter, - MaterializedResult dataAfter) - throws Exception - { - String schemaName = schemaTableName.getSchemaName(); - String tableName = schemaTableName.getTableName(); - - doCreateEmptyTable(schemaTableName, storageFormat, tableBefore); - - // insert the data - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName); - - ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES); - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle, TESTING_PAGE_SINK_ID); - sink.appendPage(dataBefore.toPage()); - Collection fragments = getFutureValue(sink.finish()); - - metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of()); - - transaction.commit(); - } - - // load the table and verify the data - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName); - - List columnHandles = metadata.getColumnHandles(session, tableHandle).values().stream() - .filter(columnHandle -> !((HiveColumnHandle) columnHandle).isHidden()) - .collect(toList()); - - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty()); - assertEqualsIgnoreOrder(result.getMaterializedRows(), dataBefore.getMaterializedRows()); - transaction.commit(); - } - - // alter the table schema - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - PrincipalPrivileges principalPrivileges = testingPrincipalPrivilege(session); - Table oldTable = transaction.getMetastore().getTable(schemaName, tableName).get(); - List dataColumns = tableAfter.stream() - .filter(columnMetadata -> !columnMetadata.getName().equals("ds")) - .map(columnMetadata -> new Column(columnMetadata.getName(), toHiveType(columnMetadata.getType()), Optional.empty(), Map.of())) - .collect(toList()); - Table.Builder newTable = Table.builder(oldTable) - .setDataColumns(dataColumns); - - transaction.getMetastore().replaceTable(schemaName, tableName, newTable.build(), principalPrivileges); - - transaction.commit(); - } - - // load the altered table and verify the data - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName); - List columnHandles = metadata.getColumnHandles(session, tableHandle).values().stream() - .filter(columnHandle -> !((HiveColumnHandle) columnHandle).isHidden()) - .collect(toList()); - - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty()); - assertEqualsIgnoreOrder(result.getMaterializedRows(), dataAfter.getMaterializedRows()); - - transaction.commit(); - } - - // insertions to the partitions with type mismatches should fail - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName); - - ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES); - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle, TESTING_PAGE_SINK_ID); - sink.appendPage(dataAfter.toPage()); - Collection fragments = getFutureValue(sink.finish()); - - metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of()); - - transaction.commit(); - - fail("expected exception"); - } - catch (TrinoException e) { - // expected - assertThat(e.getErrorCode()).isEqualTo(HIVE_PARTITION_SCHEMA_MISMATCH.toErrorCode()); - } - } - - protected void assertExpectedTableProperties(ConnectorTableProperties actualProperties, ConnectorTableProperties expectedProperties) - { - assertThat(actualProperties.getPredicate()).isEqualTo(expectedProperties.getPredicate()); - assertThat(actualProperties.getDiscretePredicates().isPresent()).isEqualTo(expectedProperties.getDiscretePredicates().isPresent()); - actualProperties.getDiscretePredicates().ifPresent(actual -> { - DiscretePredicates expected = expectedProperties.getDiscretePredicates().get(); - assertThat(actual.getColumns()).isEqualTo(expected.getColumns()); - assertEqualsIgnoreOrder(actual.getPredicates(), expected.getPredicates()); - }); - assertThat(actualProperties.getLocalProperties()).isEqualTo(expectedProperties.getLocalProperties()); - } - - protected void assertExpectedPartitions(ConnectorTableHandle table, Iterable expectedPartitions) - { - Iterable actualPartitions = ((HiveTableHandle) table).getPartitions().orElseThrow(AssertionError::new); - Map actualById = uniqueIndex(actualPartitions, HivePartition::getPartitionId); - Map expectedById = uniqueIndex(expectedPartitions, HivePartition::getPartitionId); - - assertThat(actualById).isEqualTo(expectedById); - - // HivePartition.equals doesn't compare all the fields, so let's check them - for (Map.Entry expected : expectedById.entrySet()) { - HivePartition actualPartition = actualById.get(expected.getKey()); - HivePartition expectedPartition = expected.getValue(); - assertThat(actualPartition.getPartitionId()).isEqualTo(expectedPartition.getPartitionId()); - assertThat(actualPartition.getKeys()).isEqualTo(expectedPartition.getKeys()); - assertThat(actualPartition.getTableName()).isEqualTo(expectedPartition.getTableName()); - } - } - - @Test - public void testGetPartitionNamesUnpartitioned() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableUnpartitioned); - tableHandle = applyFilter(metadata, tableHandle, Constraint.alwaysTrue()); - ConnectorTableProperties properties = metadata.getTableProperties(newSession(), tableHandle); - assertExpectedTableProperties(properties, new ConnectorTableProperties()); - assertExpectedPartitions(tableHandle, tableUnpartitionedPartitions); - } - } - - @Test - public void testGetTableSchemaPartitionFormat() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(newSession(), getTableHandle(metadata, tablePartitionFormat)); - Map map = uniqueIndex(tableMetadata.getColumns(), ColumnMetadata::getName); - - assertPrimitiveField(map, "t_string", createUnboundedVarcharType(), false); - assertPrimitiveField(map, "t_tinyint", TINYINT, false); - assertPrimitiveField(map, "t_smallint", SMALLINT, false); - assertPrimitiveField(map, "t_int", INTEGER, false); - assertPrimitiveField(map, "t_bigint", BIGINT, false); - assertPrimitiveField(map, "t_float", REAL, false); - assertPrimitiveField(map, "t_double", DOUBLE, false); - assertPrimitiveField(map, "t_boolean", BOOLEAN, false); - assertPrimitiveField(map, "ds", createUnboundedVarcharType(), true); - assertPrimitiveField(map, "file_format", createUnboundedVarcharType(), true); - assertPrimitiveField(map, "dummy", INTEGER, true); - } - } - - @Test - public void testGetTableSchemaUnpartitioned() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableUnpartitioned); - ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(newSession(), tableHandle); - Map map = uniqueIndex(tableMetadata.getColumns(), ColumnMetadata::getName); - - assertPrimitiveField(map, "t_string", createUnboundedVarcharType(), false); - assertPrimitiveField(map, "t_tinyint", TINYINT, false); - } - } - - @Test - public void testGetTableSchemaOffline() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - Map> columns = listTableColumns(metadata, newSession(), tableOffline.toSchemaTablePrefix()); - assertThat(columns.size()).isEqualTo(1); - Map map = uniqueIndex(getOnlyElement(columns.values()), ColumnMetadata::getName); - - assertPrimitiveField(map, "t_string", createUnboundedVarcharType(), false); - } - } - - @Test - public void testGetTableSchemaNotReadablePartition() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableNotReadable); - ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(newSession(), tableHandle); - Map map = uniqueIndex(tableMetadata.getColumns(), ColumnMetadata::getName); - - assertPrimitiveField(map, "t_string", createUnboundedVarcharType(), false); - } - } - - @Test - public void testGetTableSchemaException() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - assertThat(metadata.getTableHandle(newSession(), invalidTable)).isNull(); - } - } - - @Test - public void testGetTableStatsBucketedStringInt() - { - assertTableStatsComputed( - tableBucketedStringInt, - ImmutableSet.of( - "t_bigint", - "t_boolean", - "t_double", - "t_float", - "t_int", - "t_smallint", - "t_string", - "t_tinyint", - "ds")); - } - - @Test - public void testGetTableStatsUnpartitioned() - { - assertTableStatsComputed( - tableUnpartitioned, - ImmutableSet.of("t_string", "t_tinyint")); - } - - private void assertTableStatsComputed( - SchemaTableName tableName, - Set expectedColumnStatsColumns) - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - // first check if table handle with only one projected column will return this column stats - String firstColumnName = expectedColumnStatsColumns.iterator().next(); - verifyTableStatisticsWithColumns(metadata, session, applyProjection(metadata, session, tableHandle, firstColumnName), ImmutableSet.of(firstColumnName)); - - verifyTableStatisticsWithColumns(metadata, session, tableHandle, expectedColumnStatsColumns); - } - } - - private static ConnectorTableHandle applyProjection(ConnectorMetadata metadata, ConnectorSession session, ConnectorTableHandle tableHandle, String columnName) - { - Map columnHandles = metadata.getColumnHandles(session, tableHandle); - HiveColumnHandle firstColumn = (HiveColumnHandle) columnHandles.get(columnName); - return metadata.applyProjection( - session, - tableHandle, - ImmutableList.of(new Variable("c1", firstColumn.getBaseType())), - ImmutableMap.of("c1", firstColumn)) - .orElseThrow() - .getHandle(); - } - - private static void verifyTableStatisticsWithColumns( - ConnectorMetadata metadata, - ConnectorSession session, - ConnectorTableHandle tableHandle, - Set expectedColumnStatsColumns) - { - TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle); - - assertThat(tableStatistics.getRowCount().isUnknown()) - .describedAs("row count is unknown") - .isFalse(); - - Map columnsStatistics = tableStatistics - .getColumnStatistics() - .entrySet() - .stream() - .collect( - toImmutableMap( - entry -> ((HiveColumnHandle) entry.getKey()).getName(), - Map.Entry::getValue)); - - assertThat(columnsStatistics.keySet()) - .describedAs("columns with statistics") - .isEqualTo(expectedColumnStatsColumns); - - Map columnHandles = metadata.getColumnHandles(session, tableHandle); - columnsStatistics.forEach((columnName, columnStatistics) -> { - ColumnHandle columnHandle = columnHandles.get(columnName); - Type columnType = metadata.getColumnMetadata(session, tableHandle, columnHandle).getType(); - - assertThat(columnStatistics.getNullsFraction().isUnknown()) - .describedAs("unknown nulls fraction for " + columnName) - .isFalse(); - - assertThat(columnStatistics.getDistinctValuesCount().isUnknown()) - .describedAs("unknown distinct values count for " + columnName) - .isFalse(); - - if (columnType instanceof VarcharType) { - assertThat(columnStatistics.getDataSize().isUnknown()) - .describedAs("unknown data size for " + columnName) - .isFalse(); - } - else { - assertThat(columnStatistics.getDataSize().isUnknown()) - .describedAs("unknown data size for" + columnName) - .isTrue(); - } - }); - } - - @Test - public void testGetPartitionSplitsBatch() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionFormat); - ConnectorSplitSource splitSource = getSplits(splitManager, transaction, session, tableHandle); - - assertThat(getSplitCount(splitSource)).isEqualTo(tablePartitionFormatPartitions.size()); - } - } - - @Test - public void testGetPartitionSplitsBatchUnpartitioned() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableUnpartitioned); - ConnectorSplitSource splitSource = getSplits(splitManager, transaction, session, tableHandle); - - assertThat(getSplitCount(splitSource)).isEqualTo(1); - } - } - - @Test - public void testPerTransactionDirectoryListerCache() - throws Exception - { - long initListCount = countingDirectoryLister.getListCount(); - SchemaTableName tableName = temporaryTable("per_transaction_listing_cache_test"); - List columns = ImmutableList.of(new Column("test", HIVE_STRING, Optional.empty(), Map.of())); - createEmptyTable(tableName, ORC, columns, ImmutableList.of()); - try { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - getAllSplits(getSplits(splitManager, transaction, session, tableHandle)); - - // directory should be listed initially - assertThat(countingDirectoryLister.getListCount()).isEqualTo(initListCount + 1); - - // directory content should be cached - getAllSplits(getSplits(splitManager, transaction, session, tableHandle)); - assertThat(countingDirectoryLister.getListCount()).isEqualTo(initListCount + 1); - } - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - getAllSplits(getSplits(splitManager, transaction, session, tableHandle)); - - // directory should be listed again in new transaction - assertThat(countingDirectoryLister.getListCount()).isEqualTo(initListCount + 2); - } - } - finally { - dropTable(tableName); - } - } - - @Test - public void testGetPartitionSplitsBatchInvalidTable() - { - assertThatThrownBy(() -> { - try (Transaction transaction = newTransaction()) { - getSplits(splitManager, transaction, newSession(), invalidTableHandle); - } - }).isInstanceOf(TableNotFoundException.class); - } - - @Test - public void testGetPartitionTableOffline() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - try { - getTableHandle(metadata, tableOffline); - fail("expected TableOfflineException"); - } - catch (TableOfflineException e) { - assertThat(e.getTableName()).isEqualTo(tableOffline); - } - } - } - - @Test - public void testGetPartitionSplitsTableNotReadablePartition() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableNotReadable); - assertThat(tableHandle).isNotNull(); - - try { - getSplitCount(getSplits(splitManager, transaction, session, tableHandle)); - fail("Expected HiveNotReadableException"); - } - catch (HiveNotReadableException e) { - assertThat(e).hasMessageMatching("Table '.*\\.trino_test_not_readable' is not readable: reason for not readable"); - assertThat(e.getTableName()).isEqualTo(tableNotReadable); - assertThat(e.getPartition()).isEqualTo(Optional.empty()); - } - } - } - - @Test - public void testBucketedTableStringInt() - throws Exception - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableBucketedStringInt); - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - Map columnIndex = indexColumns(columnHandles); - - assertTableIsBucketed(tableHandle, transaction, session); - - String testString = "test"; - Integer testInt = 13; - Short testSmallint = 12; - - // Reverse the order of bindings as compared to bucketing order - ImmutableMap bindings = ImmutableMap.builder() - .put(columnHandles.get(columnIndex.get("t_int")), NullableValue.of(INTEGER, (long) testInt)) - .put(columnHandles.get(columnIndex.get("t_string")), NullableValue.of(createUnboundedVarcharType(), utf8Slice(testString))) - .put(columnHandles.get(columnIndex.get("t_smallint")), NullableValue.of(SMALLINT, (long) testSmallint)) - .buildOrThrow(); - - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.fromFixedValues(bindings), OptionalInt.of(1), Optional.empty()); - - boolean rowFound = false; - for (MaterializedRow row : result) { - if (testString.equals(row.getField(columnIndex.get("t_string"))) && - testInt.equals(row.getField(columnIndex.get("t_int"))) && - testSmallint.equals(row.getField(columnIndex.get("t_smallint")))) { - rowFound = true; - } - } - assertThat(rowFound).isTrue(); - } - } - - @SuppressWarnings("ConstantConditions") - @Test - public void testBucketedTableBigintBoolean() - throws Exception - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableBucketedBigintBoolean); - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - Map columnIndex = indexColumns(columnHandles); - - assertTableIsBucketed(tableHandle, transaction, session); - ConnectorTableProperties properties = metadata.getTableProperties( - newSession(ImmutableMap.of("propagate_table_scan_sorting_properties", true)), - tableHandle); - // trino_test_bucketed_by_bigint_boolean does not define sorting, therefore local properties is empty - assertThat(properties.getLocalProperties().isEmpty()).isTrue(); - assertThat(metadata.getTableProperties(newSession(), tableHandle).getLocalProperties().isEmpty()).isTrue(); - - String testString = "test"; - Long testBigint = 89L; - Boolean testBoolean = true; - - ImmutableMap bindings = ImmutableMap.builder() - .put(columnHandles.get(columnIndex.get("t_string")), NullableValue.of(createUnboundedVarcharType(), utf8Slice(testString))) - .put(columnHandles.get(columnIndex.get("t_bigint")), NullableValue.of(BIGINT, testBigint)) - .put(columnHandles.get(columnIndex.get("t_boolean")), NullableValue.of(BOOLEAN, testBoolean)) - .buildOrThrow(); - - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.fromFixedValues(bindings), OptionalInt.of(1), Optional.empty()); - - boolean rowFound = false; - for (MaterializedRow row : result) { - if (testString.equals(row.getField(columnIndex.get("t_string"))) && - testBigint.equals(row.getField(columnIndex.get("t_bigint"))) && - testBoolean.equals(row.getField(columnIndex.get("t_boolean")))) { - rowFound = true; - break; - } - } - assertThat(rowFound).isTrue(); - } - } - - @Test - public void testBucketedTableDoubleFloat() - throws Exception - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableBucketedDoubleFloat); - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - Map columnIndex = indexColumns(columnHandles); - - assertTableIsBucketed(tableHandle, transaction, session); - - float testFloatValue = 87.1f; - double testDoubleValue = 88.2; - - ImmutableMap bindings = ImmutableMap.builder() - .put(columnHandles.get(columnIndex.get("t_float")), NullableValue.of(REAL, (long) floatToRawIntBits(testFloatValue))) - .put(columnHandles.get(columnIndex.get("t_double")), NullableValue.of(DOUBLE, testDoubleValue)) - .buildOrThrow(); - - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.fromFixedValues(bindings), OptionalInt.of(1), Optional.empty()); - assertThat(result).anyMatch(row -> testFloatValue == (float) row.getField(columnIndex.get("t_float")) - && testDoubleValue == (double) row.getField(columnIndex.get("t_double"))); - } - } - - @Test - public void testBucketedTableEvolutionWithDifferentReadBucketCount() - throws Exception - { - for (HiveStorageFormat storageFormat : createTableFormats) { - SchemaTableName temporaryBucketEvolutionTable = temporaryTable("bucket_evolution"); - try { - doTestBucketedTableEvolutionWithDifferentReadCount(storageFormat, temporaryBucketEvolutionTable); - } - finally { - dropTable(temporaryBucketEvolutionTable); - } - } - } - - private void doTestBucketedTableEvolutionWithDifferentReadCount(HiveStorageFormat storageFormat, SchemaTableName tableName) - throws Exception - { - int rowCount = 100; - int bucketCount = 16; - - // Produce a table with a partition with bucket count different but compatible with the table bucket count - createEmptyTable( - tableName, - storageFormat, - ImmutableList.of( - new Column("id", HIVE_LONG, Optional.empty(), Map.of()), - new Column("name", HIVE_STRING, Optional.empty(), Map.of())), - ImmutableList.of(new Column("pk", HIVE_STRING, Optional.empty(), Map.of())), - Optional.of(new HiveBucketProperty(ImmutableList.of("id"), BUCKETING_V1, 4, ImmutableList.of()))); - // write a 4-bucket partition - MaterializedResult.Builder bucket8Builder = MaterializedResult.resultBuilder(SESSION, BIGINT, VARCHAR, VARCHAR); - IntStream.range(0, rowCount).forEach(i -> bucket8Builder.row((long) i, String.valueOf(i), "four")); - insertData(tableName, bucket8Builder.build()); - - // Alter the bucket count to 16 - alterBucketProperty(tableName, Optional.of(new HiveBucketProperty(ImmutableList.of("id"), BUCKETING_V1, bucketCount, ImmutableList.of()))); - - MaterializedResult result; - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - // read entire table - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - - List splits = getAllSplits(getSplits(splitManager, transaction, session, tableHandle)); - assertThat(splits.size()).isEqualTo(16); - - ImmutableList.Builder allRows = ImmutableList.builder(); - for (ConnectorSplit split : splits) { - try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles, DynamicFilter.EMPTY)) { - MaterializedResult intermediateResult = materializeSourceDataStream(session, pageSource, getTypes(columnHandles)); - allRows.addAll(intermediateResult.getMaterializedRows()); - } - } - result = new MaterializedResult(allRows.build(), getTypes(columnHandles)); - - assertThat(result.getRowCount()).isEqualTo(rowCount); - - Map columnIndex = indexColumns(columnHandles); - int nameColumnIndex = columnIndex.get("name"); - int bucketColumnIndex = columnIndex.get(BUCKET_COLUMN_NAME); - for (MaterializedRow row : result.getMaterializedRows()) { - String name = (String) row.getField(nameColumnIndex); - int bucket = (int) row.getField(bucketColumnIndex); - - assertThat(bucket).isEqualTo(Integer.parseInt(name) % bucketCount); - } - } - } - - @Test - public void testBucketedTableEvolution() - throws Exception - { - for (HiveStorageFormat storageFormat : createTableFormats) { - SchemaTableName temporaryBucketEvolutionTable = temporaryTable("bucket_evolution"); - try { - doTestBucketedTableEvolution(storageFormat, temporaryBucketEvolutionTable); - } - finally { - dropTable(temporaryBucketEvolutionTable); - } - } - } - - private void doTestBucketedTableEvolution(HiveStorageFormat storageFormat, SchemaTableName tableName) - throws Exception - { - int rowCount = 100; - - // - // Produce a table with 8 buckets. - // The table has 3 partitions of 3 different bucket count (4, 8, 16). - createEmptyTable( - tableName, - storageFormat, - ImmutableList.of( - new Column("id", HIVE_LONG, Optional.empty(), Map.of()), - new Column("name", HIVE_STRING, Optional.empty(), Map.of())), - ImmutableList.of(new Column("pk", HIVE_STRING, Optional.empty(), Map.of())), - Optional.of(new HiveBucketProperty(ImmutableList.of("id"), BUCKETING_V1, 4, ImmutableList.of()))); - // write a 4-bucket partition - MaterializedResult.Builder bucket4Builder = MaterializedResult.resultBuilder(SESSION, BIGINT, VARCHAR, VARCHAR); - IntStream.range(0, rowCount).forEach(i -> bucket4Builder.row((long) i, String.valueOf(i), "four")); - insertData(tableName, bucket4Builder.build()); - // write a 16-bucket partition - alterBucketProperty(tableName, Optional.of(new HiveBucketProperty(ImmutableList.of("id"), BUCKETING_V1, 16, ImmutableList.of()))); - MaterializedResult.Builder bucket16Builder = MaterializedResult.resultBuilder(SESSION, BIGINT, VARCHAR, VARCHAR); - IntStream.range(0, rowCount).forEach(i -> bucket16Builder.row((long) i, String.valueOf(i), "sixteen")); - insertData(tableName, bucket16Builder.build()); - // write an 8-bucket partition - alterBucketProperty(tableName, Optional.of(new HiveBucketProperty(ImmutableList.of("id"), BUCKETING_V1, 8, ImmutableList.of()))); - MaterializedResult.Builder bucket8Builder = MaterializedResult.resultBuilder(SESSION, BIGINT, VARCHAR, VARCHAR); - IntStream.range(0, rowCount).forEach(i -> bucket8Builder.row((long) i, String.valueOf(i), "eight")); - insertData(tableName, bucket8Builder.build()); - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - // read entire table - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - MaterializedResult result = readTable( - transaction, - tableHandle, - columnHandles, - session, - TupleDomain.all(), - OptionalInt.empty(), - Optional.empty()); - assertBucketTableEvolutionResult(result, columnHandles, ImmutableSet.of(0, 1, 2, 3, 4, 5, 6, 7), rowCount); - - // read single bucket (table/logical bucket) - result = readTable( - transaction, - tableHandle, - columnHandles, - session, - TupleDomain.fromFixedValues(ImmutableMap.of(bucketColumnHandle(), NullableValue.of(INTEGER, 6L))), - OptionalInt.empty(), - Optional.empty()); - assertBucketTableEvolutionResult(result, columnHandles, ImmutableSet.of(6), rowCount); - - // read single bucket, without selecting the bucketing column (i.e. id column) - columnHandles = metadata.getColumnHandles(session, tableHandle).values().stream() - .filter(columnHandle -> !"id".equals(((HiveColumnHandle) columnHandle).getName())) - .collect(toImmutableList()); - result = readTable( - transaction, - tableHandle, - columnHandles, - session, - TupleDomain.fromFixedValues(ImmutableMap.of(bucketColumnHandle(), NullableValue.of(INTEGER, 6L))), - OptionalInt.empty(), - Optional.empty()); - assertBucketTableEvolutionResult(result, columnHandles, ImmutableSet.of(6), rowCount); - } - } - - private static void assertBucketTableEvolutionResult(MaterializedResult result, List columnHandles, Set bucketIds, int rowCount) - { - // Assert that only elements in the specified bucket shows up, and each element shows up 3 times. - int bucketCount = 8; - Set expectedIds = LongStream.range(0, rowCount) - .filter(x -> bucketIds.contains(toIntExact(x % bucketCount))) - .boxed() - .collect(toImmutableSet()); - - // assert that content from all three buckets are the same - Map columnIndex = indexColumns(columnHandles); - OptionalInt idColumnIndex = columnIndex.containsKey("id") ? OptionalInt.of(columnIndex.get("id")) : OptionalInt.empty(); - int nameColumnIndex = columnIndex.get("name"); - int bucketColumnIndex = columnIndex.get(BUCKET_COLUMN_NAME); - Map idCount = new HashMap<>(); - for (MaterializedRow row : result.getMaterializedRows()) { - String name = (String) row.getField(nameColumnIndex); - int bucket = (int) row.getField(bucketColumnIndex); - idCount.compute(Long.parseLong(name), (key, oldValue) -> oldValue == null ? 1 : oldValue + 1); - assertThat(bucket).isEqualTo(Integer.parseInt(name) % bucketCount); - if (idColumnIndex.isPresent()) { - long id = (long) row.getField(idColumnIndex.getAsInt()); - assertThat(Integer.parseInt(name)).isEqualTo(id); - } - } - assertThat((int) idCount.values().stream() - .distinct() - .collect(onlyElement())).isEqualTo(3); - assertThat(idCount.keySet()).isEqualTo(expectedIds); - } - - @Test - public void testBucketedSortedTableEvolution() - throws Exception - { - SchemaTableName temporaryTable = temporaryTable("test_bucket_sorting_evolution"); - try { - doTestBucketedSortedTableEvolution(temporaryTable); - } - finally { - dropTable(temporaryTable); - } - } - - private void doTestBucketedSortedTableEvolution(SchemaTableName tableName) - throws Exception - { - int rowCount = 100; - // Create table and populate it with 3 partitions with different sort orders but same bucketing - createEmptyTable( - tableName, - ORC, - ImmutableList.of( - new Column("id", HIVE_LONG, Optional.empty(), Map.of()), - new Column("name", HIVE_STRING, Optional.empty(), Map.of())), - ImmutableList.of(new Column("pk", HIVE_STRING, Optional.empty(), Map.of())), - Optional.of(new HiveBucketProperty( - ImmutableList.of("id"), - BUCKETING_V1, - 4, - ImmutableList.of(new SortingColumn("id", ASCENDING), new SortingColumn("name", ASCENDING))))); - // write a 4-bucket partition sorted by id, name - MaterializedResult.Builder sortedByIdNameBuilder = MaterializedResult.resultBuilder(SESSION, BIGINT, VARCHAR, VARCHAR); - IntStream.range(0, rowCount).forEach(i -> sortedByIdNameBuilder.row((long) i, String.valueOf(i), "sorted_by_id_name")); - insertData(tableName, sortedByIdNameBuilder.build()); - - // write a 4-bucket partition sorted by name - alterBucketProperty(tableName, Optional.of(new HiveBucketProperty( - ImmutableList.of("id"), - BUCKETING_V1, - 4, - ImmutableList.of(new SortingColumn("name", ASCENDING))))); - MaterializedResult.Builder sortedByNameBuilder = MaterializedResult.resultBuilder(SESSION, BIGINT, VARCHAR, VARCHAR); - IntStream.range(0, rowCount).forEach(i -> sortedByNameBuilder.row((long) i, String.valueOf(i), "sorted_by_name")); - insertData(tableName, sortedByNameBuilder.build()); - - // write a 4-bucket partition sorted by id - alterBucketProperty(tableName, Optional.of(new HiveBucketProperty( - ImmutableList.of("id"), - BUCKETING_V1, - 4, - ImmutableList.of(new SortingColumn("id", ASCENDING))))); - MaterializedResult.Builder sortedByIdBuilder = MaterializedResult.resultBuilder(SESSION, BIGINT, VARCHAR, VARCHAR); - IntStream.range(0, rowCount).forEach(i -> sortedByIdBuilder.row((long) i, String.valueOf(i), "sorted_by_id")); - insertData(tableName, sortedByIdBuilder.build()); - - ConnectorTableHandle tableHandle; - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - tableHandle = getTableHandle(metadata, tableName); - - // read entire table - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty()); - assertThat(result.getRowCount()).isEqualTo(300); - } - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(ImmutableMap.of("propagate_table_scan_sorting_properties", true)); - metadata.beginQuery(session); - Map columnHandles = metadata.getColumnHandles(session, tableHandle); - // verify local sorting property - ConnectorTableProperties properties = metadata.getTableProperties(session, tableHandle); - assertThat(properties.getLocalProperties()).isEqualTo(ImmutableList.of( - new SortingProperty<>(columnHandles.get("id"), ASC_NULLS_FIRST))); - - // read on a entire table should fail with exception - assertThatThrownBy(() -> readTable(transaction, tableHandle, ImmutableList.copyOf(columnHandles.values()), session, TupleDomain.all(), OptionalInt.empty(), Optional.empty())) - .isInstanceOf(TrinoException.class) - .hasMessage("Hive table (%s) sorting by [id] is not compatible with partition (pk=sorted_by_name) sorting by [name]." + - " This restriction can be avoided by disabling propagate_table_scan_sorting_properties.", tableName); - - // read only the partitions with sorting that is compatible to table sorting - MaterializedResult result = readTable( - transaction, - tableHandle, - ImmutableList.copyOf(columnHandles.values()), - session, - TupleDomain.withColumnDomains(ImmutableMap.of( - columnHandles.get("pk"), - Domain.create(ValueSet.of(VARCHAR, utf8Slice("sorted_by_id_name"), utf8Slice("sorted_by_id")), false))), - OptionalInt.empty(), - Optional.empty()); - assertThat(result.getRowCount()).isEqualTo(200); - } - } - - @Test - public void testBucketedTableValidation() - throws Exception - { - for (HiveStorageFormat storageFormat : createTableFormats) { - SchemaTableName table = temporaryTable("bucket_validation"); - try { - doTestBucketedTableValidation(storageFormat, table); - } - finally { - dropTable(table); - } - } - } - - private void doTestBucketedTableValidation(HiveStorageFormat storageFormat, SchemaTableName tableName) - throws Exception - { - prepareInvalidBuckets(storageFormat, tableName); - - // read succeeds when validation is disabled - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(ImmutableMap.of("validate_bucketing", false)); - metadata.beginQuery(session); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat)); - assertThat(result.getRowCount()).isEqualTo(87); // fewer rows due to deleted file - } - - // read fails due to validation failure - assertReadFailsWithMessageMatching(storageFormat, tableName, "Hive table is corrupt\\. File '.*/000002_0_.*' is for bucket 2, but contains a row for bucket 5."); - } - - private void prepareInvalidBuckets(HiveStorageFormat storageFormat, SchemaTableName tableName) - throws Exception - { - createEmptyTable( - tableName, - storageFormat, - ImmutableList.of( - new Column("id", HIVE_LONG, Optional.empty(), Map.of()), - new Column("name", HIVE_STRING, Optional.empty(), Map.of())), - ImmutableList.of(), - Optional.of(new HiveBucketProperty(ImmutableList.of("id"), BUCKETING_V1, 8, ImmutableList.of()))); - - MaterializedResult.Builder dataBuilder = MaterializedResult.resultBuilder(SESSION, BIGINT, VARCHAR); - for (long id = 0; id < 100; id++) { - dataBuilder.row(id, String.valueOf(id)); - } - insertData(tableName, dataBuilder.build()); - - try (Transaction transaction = newTransaction()) { - Set files = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()); - - Path bucket2 = files.stream() - .map(Path::new) - .filter(path -> path.getName().startsWith("000002_0_")) - .collect(onlyElement()); - - Path bucket5 = files.stream() - .map(Path::new) - .filter(path -> path.getName().startsWith("000005_0_")) - .collect(onlyElement()); - - HdfsContext context = new HdfsContext(newSession()); - FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, bucket2); - fileSystem.delete(bucket2, false); - fileSystem.rename(bucket5, bucket2); - } - } - - protected void assertReadFailsWithMessageMatching(HiveStorageFormat storageFormat, SchemaTableName tableName, String regex) - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - assertTrinoExceptionThrownBy( - () -> readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat))) - .hasErrorCode(HIVE_INVALID_BUCKET_FILES) - .hasMessageMatching(regex); - } - } - - private void assertTableIsBucketed(ConnectorTableHandle tableHandle, Transaction transaction, ConnectorSession session) - { - // the bucketed test tables should have ~32 splits - List splits = getAllSplits(tableHandle, transaction, session); - assertThat(splits.size()).as("splits.size()") - .isBetween(31, 32); - - // verify all paths are unique - Set paths = new HashSet<>(); - for (ConnectorSplit split : splits) { - assertThat(paths.add(((HiveSplit) split).getPath())).isTrue(); - } - } - - @Test - public void testGetRecords() - throws Exception - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionFormat); - ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, tableHandle); - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - Map columnIndex = indexColumns(columnHandles); - - List splits = getAllSplits(tableHandle, transaction, session); - assertThat(splits.size()).isEqualTo(tablePartitionFormatPartitions.size()); - - for (ConnectorSplit split : splits) { - HiveSplit hiveSplit = (HiveSplit) split; - - List partitionKeys = hiveSplit.getPartitionKeys(); - String ds = partitionKeys.get(0).getValue(); - String fileFormat = partitionKeys.get(1).getValue(); - HiveStorageFormat fileType = HiveStorageFormat.valueOf(fileFormat.toUpperCase(ENGLISH)); - int dummyPartition = Integer.parseInt(partitionKeys.get(2).getValue()); - - long rowNumber = 0; - long completedBytes = 0; - try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, hiveSplit, tableHandle, columnHandles, DynamicFilter.EMPTY)) { - MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles)); - - assertPageSourceType(pageSource, fileType); - - for (MaterializedRow row : result) { - try { - assertValueTypes(row, tableMetadata.getColumns()); - } - catch (RuntimeException e) { - throw new RuntimeException("row " + rowNumber, e); - } - - rowNumber++; - Object value; - - value = row.getField(columnIndex.get("t_string")); - if (rowNumber % 19 == 0) { - assertThat(value).isNull(); - } - else if (rowNumber % 19 == 1) { - assertThat(value).isEqualTo(""); - } - else { - assertThat(value).isEqualTo("test"); - } - - assertThat(row.getField(columnIndex.get("t_tinyint"))).isEqualTo((byte) (1 + rowNumber)); - assertThat(row.getField(columnIndex.get("t_smallint"))).isEqualTo((short) (2 + rowNumber)); - assertThat(row.getField(columnIndex.get("t_int"))).isEqualTo(3 + (int) rowNumber); - - if (rowNumber % 13 == 0) { - assertThat(row.getField(columnIndex.get("t_bigint"))).isNull(); - } - else { - assertThat(row.getField(columnIndex.get("t_bigint"))).isEqualTo(4 + rowNumber); - } - - assertThat((Float) row.getField(columnIndex.get("t_float"))).isCloseTo(5.1f + rowNumber, offset(0.001f)); - assertThat(row.getField(columnIndex.get("t_double"))).isEqualTo(6.2 + rowNumber); - - if (rowNumber % 3 == 2) { - assertThat(row.getField(columnIndex.get("t_boolean"))).isNull(); - } - else { - assertThat(row.getField(columnIndex.get("t_boolean"))).isEqualTo(rowNumber % 3 != 0); - } - - assertThat(row.getField(columnIndex.get("ds"))).isEqualTo(ds); - assertThat(row.getField(columnIndex.get("file_format"))).isEqualTo(fileFormat); - assertThat(row.getField(columnIndex.get("dummy"))).isEqualTo(dummyPartition); - - long newCompletedBytes = pageSource.getCompletedBytes(); - assertThat(newCompletedBytes >= completedBytes).isTrue(); - assertThat(newCompletedBytes <= hiveSplit.getLength()).isTrue(); - completedBytes = newCompletedBytes; - } - - assertThat(completedBytes <= hiveSplit.getLength()).isTrue(); - assertThat(rowNumber).isEqualTo(100); - } - } - } - } - - @Test - public void testGetPartialRecords() - throws Exception - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionFormat); - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - Map columnIndex = indexColumns(columnHandles); - - List splits = getAllSplits(tableHandle, transaction, session); - assertThat(splits.size()).isEqualTo(tablePartitionFormatPartitions.size()); - - for (ConnectorSplit split : splits) { - HiveSplit hiveSplit = (HiveSplit) split; - - List partitionKeys = hiveSplit.getPartitionKeys(); - String ds = partitionKeys.get(0).getValue(); - String fileFormat = partitionKeys.get(1).getValue(); - HiveStorageFormat fileType = HiveStorageFormat.valueOf(fileFormat.toUpperCase(ENGLISH)); - int dummyPartition = Integer.parseInt(partitionKeys.get(2).getValue()); - - long rowNumber = 0; - try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, hiveSplit, tableHandle, columnHandles, DynamicFilter.EMPTY)) { - assertPageSourceType(pageSource, fileType); - MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles)); - for (MaterializedRow row : result) { - rowNumber++; - - assertThat(row.getField(columnIndex.get("t_double"))).isEqualTo(6.2 + rowNumber); - assertThat(row.getField(columnIndex.get("ds"))).isEqualTo(ds); - assertThat(row.getField(columnIndex.get("file_format"))).isEqualTo(fileFormat); - assertThat(row.getField(columnIndex.get("dummy"))).isEqualTo(dummyPartition); - } - } - assertThat(rowNumber).isEqualTo(100); - } - } - } - - @Test - public void testGetRecordsUnpartitioned() - throws Exception - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableUnpartitioned); - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - Map columnIndex = indexColumns(columnHandles); - - List splits = getAllSplits(tableHandle, transaction, session); - assertThat(splits).hasSameSizeAs(tableUnpartitionedPartitions); - - for (ConnectorSplit split : splits) { - HiveSplit hiveSplit = (HiveSplit) split; - - assertThat(hiveSplit.getPartitionKeys()).isEqualTo(ImmutableList.of()); - - long rowNumber = 0; - try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles, DynamicFilter.EMPTY)) { - assertPageSourceType(pageSource, TEXTFILE); - MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles)); - - for (MaterializedRow row : result) { - rowNumber++; - - if (rowNumber % 19 == 0) { - assertThat(row.getField(columnIndex.get("t_string"))).isNull(); - } - else if (rowNumber % 19 == 1) { - assertThat(row.getField(columnIndex.get("t_string"))).isEqualTo(""); - } - else { - assertThat(row.getField(columnIndex.get("t_string"))).isEqualTo("unpartitioned"); - } - - assertThat(row.getField(columnIndex.get("t_tinyint"))).isEqualTo((byte) (1 + rowNumber)); - } - } - assertThat(rowNumber).isEqualTo(100); - } - } - } - - @Test - public void testPartitionSchemaMismatch() - { - assertThatThrownBy(() -> { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle table = getTableHandle(metadata, tablePartitionSchemaChange); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - readTable(transaction, table, ImmutableList.of(dsColumn), session, TupleDomain.all(), OptionalInt.empty(), Optional.empty()); - } - }) - .isInstanceOf(TrinoException.class) - .hasMessageMatching(".*The column 't_data' in table '.*\\.trino_test_partition_schema_change' is declared as type 'float', but partition 'ds=2012-12-29' declared column 't_data' as type 'string'."); - } - - // TODO coercion of non-canonical values should be supported - @Test - @Disabled - public void testPartitionSchemaNonCanonical() - throws Exception - { - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - - ConnectorTableHandle table = getTableHandle(metadata, tablePartitionSchemaChangeNonCanonical); - ColumnHandle column = metadata.getColumnHandles(session, table).get("t_boolean"); - - Constraint constraint = new Constraint(TupleDomain.fromFixedValues(ImmutableMap.of(column, NullableValue.of(BOOLEAN, false)))); - table = applyFilter(metadata, table, constraint); - HivePartition partition = getOnlyElement(((HiveTableHandle) table).getPartitions().orElseThrow(AssertionError::new)); - assertThat(getPartitionId(partition)).isEqualTo("t_boolean=0"); - - ConnectorSplitSource splitSource = getSplits(splitManager, transaction, session, table); - ConnectorSplit split = getOnlyElement(getAllSplits(splitSource)); - - ImmutableList columnHandles = ImmutableList.of(column); - try (ConnectorPageSource ignored = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, table, columnHandles, DynamicFilter.EMPTY)) { - fail("expected exception"); - } - catch (TrinoException e) { - assertThat(e.getErrorCode()).isEqualTo(HIVE_INVALID_PARTITION_VALUE.toErrorCode()); - } - } - } - - @Test - public void testTypesTextFile() - throws Exception - { - assertGetRecords("trino_test_types_textfile", TEXTFILE); - } - - @Test - public void testTypesSequenceFile() - throws Exception - { - assertGetRecords("trino_test_types_sequencefile", SEQUENCEFILE); - } - - @Test - public void testTypesRcText() - throws Exception - { - assertGetRecords("trino_test_types_rctext", RCTEXT); - } - - @Test - public void testTypesRcBinary() - throws Exception - { - assertGetRecords("trino_test_types_rcbinary", RCBINARY); - } - - @Test - public void testTypesOrc() - throws Exception - { - assertGetRecords("trino_test_types_orc", ORC); - } - - @Test - public void testTypesParquet() - throws Exception - { - assertGetRecords("trino_test_types_parquet", PARQUET); - } - - @Test - public void testEmptyTextFile() - throws Exception - { - assertEmptyFile(TEXTFILE); - } - - @Test - public void testEmptySequenceFile() - throws Exception - { - assertEmptyFile(SEQUENCEFILE); - } - - @Test - public void testEmptyRcTextFile() - throws Exception - { - assertEmptyFile(RCTEXT); - } - - @Test - public void testEmptyRcBinaryFile() - throws Exception - { - assertEmptyFile(RCBINARY); - } - - @Test - public void testEmptyOrcFile() - throws Exception - { - assertEmptyFile(ORC); - } - - private void assertEmptyFile(HiveStorageFormat format) - throws Exception - { - SchemaTableName tableName = temporaryTable("empty_file"); - try { - List columns = ImmutableList.of(new Column("test", HIVE_STRING, Optional.empty(), Map.of())); - createEmptyTable(tableName, format, columns, ImmutableList.of()); - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - - Table table = transaction.getMetastore() - .getTable(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(AssertionError::new); - - // verify directory is empty - HdfsContext context = new HdfsContext(session); - Path location = new Path(table.getStorage().getLocation()); - assertThat(listDirectory(context, location).isEmpty()).isTrue(); - - // read table with empty directory - readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.of(0), Optional.of(ORC)); - - // create empty file - FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, location); - assertThat(fileSystem.createNewFile(new Path(location, "empty-file"))).isTrue(); - assertThat(listDirectory(context, location)).isEqualTo(ImmutableList.of("empty-file")); - - // read table with empty file - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.of(0), Optional.empty()); - assertThat(result.getRowCount()).isEqualTo(0); - } - } - finally { - dropTable(tableName); - } - } - - @Test - public void testRenameTable() - { - SchemaTableName temporaryRenameTableOld = temporaryTable("rename_old"); - SchemaTableName temporaryRenameTableNew = temporaryTable("rename_new"); - try { - createDummyTable(temporaryRenameTableOld); - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - - metadata.renameTable(session, getTableHandle(metadata, temporaryRenameTableOld), temporaryRenameTableNew); - transaction.commit(); - } - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - - assertThat(metadata.getTableHandle(session, temporaryRenameTableOld)).isNull(); - assertThat(metadata.getTableHandle(session, temporaryRenameTableNew)).isNotNull(); - } - } - finally { - dropTable(temporaryRenameTableOld); - dropTable(temporaryRenameTableNew); - } - } - - @Test - public void testTableCreation() - throws Exception - { - for (HiveStorageFormat storageFormat : createTableFormats) { - SchemaTableName temporaryCreateTable = temporaryTable("create"); - try { - doCreateTable(temporaryCreateTable, storageFormat); - } - finally { - dropTable(temporaryCreateTable); - } - } - } - - @Test - public void testTableCreationWithTrailingSpaceInLocation() - throws Exception - { - SchemaTableName tableName = temporaryTable("test_table_creation_with_trailing_space_in_location_" + randomNameSuffix()); - String tableDefaultLocationWithTrailingSpace = null; - try { - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - SemiTransactionalHiveMetastore metastore = transaction.getMetastore(); - TrinoFileSystem fileSystem = HDFS_FILE_SYSTEM_FACTORY.create(session); - - // Write data - tableDefaultLocationWithTrailingSpace = getTableDefaultLocation(metastore, fileSystem, tableName.getSchemaName(), tableName.getTableName()) + " "; - Path dataFilePath = new Path(tableDefaultLocationWithTrailingSpace, "foo.txt"); - FileSystem fs = hdfsEnvironment.getFileSystem(new HdfsContext(session), new Path(tableDefaultLocationWithTrailingSpace)); - try (OutputStream outputStream = fs.create(dataFilePath)) { - outputStream.write("hello\u0001world\nbye\u0001world".getBytes(UTF_8)); - } - - // create table - ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata( - tableName, - ImmutableList.builder() - .add(new ColumnMetadata("t_string1", VARCHAR)) - .add(new ColumnMetadata("t_string2", VARCHAR)) - .build(), - ImmutableMap.builder() - .putAll(createTableProperties(TEXTFILE, ImmutableList.of())) - .put(EXTERNAL_LOCATION_PROPERTY, tableDefaultLocationWithTrailingSpace) - .buildOrThrow()); - - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.createTable(session, tableMetadata, false); - - transaction.commit(); - } - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - - // verify the data - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(TEXTFILE)); - assertEqualsIgnoreOrder( - result.getMaterializedRows(), - MaterializedResult.resultBuilder(SESSION, VARCHAR, VARCHAR) - .row("hello", "world") - .row("bye", "world") - .build()); - } - } - finally { - dropTable(tableName); - if (tableDefaultLocationWithTrailingSpace != null) { - FileSystem fs = hdfsEnvironment.getFileSystem(new HdfsContext(SESSION), new Path(tableDefaultLocationWithTrailingSpace)); - fs.delete(new Path(tableDefaultLocationWithTrailingSpace), true); - } - } - } - - @Test - public void testTableCreationRollback() - throws Exception - { - SchemaTableName temporaryCreateRollbackTable = temporaryTable("create_rollback"); - try { - Location stagingPathRoot; - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - - // begin creating the table - ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(temporaryCreateRollbackTable, CREATE_TABLE_COLUMNS, createTableProperties(RCBINARY)); - - ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES); - - // write the data - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle, TESTING_PAGE_SINK_ID); - sink.appendPage(CREATE_TABLE_DATA.toPage()); - getFutureValue(sink.finish()); - - // verify we have data files - stagingPathRoot = getStagingPathRoot(outputHandle); - HdfsContext context = new HdfsContext(session); - assertThat(listAllDataFiles(context, stagingPathRoot).isEmpty()).isFalse(); - - // rollback the table - transaction.rollback(); - } - - // verify all files have been deleted - HdfsContext context = new HdfsContext(newSession()); - assertThat(listAllDataFiles(context, stagingPathRoot).isEmpty()).isTrue(); - - // verify table is not in the metastore - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - assertThat(metadata.getTableHandle(session, temporaryCreateRollbackTable)).isNull(); - } - } - finally { - dropTable(temporaryCreateRollbackTable); - } - } - - @Test - public void testTableCreationIgnoreExisting() - { - List columns = ImmutableList.of(new Column("dummy", HiveType.valueOf("uniontype"), Optional.empty(), Map.of())); - SchemaTableName schemaTableName = temporaryTable("create"); - ConnectorSession session = newSession(); - String schemaName = schemaTableName.getSchemaName(); - String tableName = schemaTableName.getTableName(); - PrincipalPrivileges privileges = testingPrincipalPrivilege(session); - Location targetPath; - try { - try (Transaction transaction = newTransaction()) { - LocationService locationService = getLocationService(); - targetPath = locationService.forNewTable(transaction.getMetastore(), session, schemaName, tableName); - Table table = createSimpleTable(schemaTableName, columns, session, targetPath, "q1"); - transaction.getMetastore() - .createTable(session, table, privileges, Optional.empty(), Optional.empty(), false, ZERO_TABLE_STATISTICS, false); - Optional
tableHandle = transaction.getMetastore().getTable(schemaName, tableName); - assertThat(tableHandle.isPresent()).isTrue(); - transaction.commit(); - } - - // try creating it again from another transaction with ignoreExisting=false - try (Transaction transaction = newTransaction()) { - Table table = createSimpleTable(schemaTableName, columns, session, targetPath.appendSuffix("_2"), "q2"); - transaction.getMetastore() - .createTable(session, table, privileges, Optional.empty(), Optional.empty(), false, ZERO_TABLE_STATISTICS, false); - transaction.commit(); - fail("Expected exception"); - } - catch (TrinoException e) { - assertInstanceOf(e, TableAlreadyExistsException.class); - } - - // try creating it again from another transaction with ignoreExisting=true - try (Transaction transaction = newTransaction()) { - Table table = createSimpleTable(schemaTableName, columns, session, targetPath.appendSuffix("_3"), "q3"); - transaction.getMetastore() - .createTable(session, table, privileges, Optional.empty(), Optional.empty(), true, ZERO_TABLE_STATISTICS, false); - transaction.commit(); - } - - // at this point the table should exist, now try creating the table again with a different table definition - columns = ImmutableList.of(new Column("new_column", HiveType.valueOf("string"), Optional.empty(), Map.of())); - try (Transaction transaction = newTransaction()) { - Table table = createSimpleTable(schemaTableName, columns, session, targetPath.appendSuffix("_4"), "q4"); - transaction.getMetastore() - .createTable(session, table, privileges, Optional.empty(), Optional.empty(), true, ZERO_TABLE_STATISTICS, false); - transaction.commit(); - fail("Expected exception"); - } - catch (TrinoException e) { - assertThat(e.getErrorCode()).isEqualTo(TRANSACTION_CONFLICT.toErrorCode()); - assertThat(e.getMessage()).isEqualTo(format("Table already exists with a different schema: '%s'", schemaTableName.getTableName())); - } - } - finally { - dropTable(schemaTableName); - } - } - - private static Table createSimpleTable(SchemaTableName schemaTableName, List columns, ConnectorSession session, Location targetPath, String queryId) - { - String tableOwner = session.getUser(); - String schemaName = schemaTableName.getSchemaName(); - String tableName = schemaTableName.getTableName(); - return Table.builder() - .setDatabaseName(schemaName) - .setTableName(tableName) - .setOwner(Optional.of(tableOwner)) - .setTableType(MANAGED_TABLE.name()) - .setParameters(ImmutableMap.of( - TRINO_VERSION_NAME, TEST_SERVER_VERSION, - TRINO_QUERY_ID_NAME, queryId)) - .setDataColumns(columns) - .withStorage(storage -> storage - .setLocation(targetPath.toString()) - .setStorageFormat(fromHiveStorageFormat(ORC)) - .setSerdeParameters(ImmutableMap.of())) - .build(); - } - - @Test - public void testBucketSortedTables() - throws Exception - { - SchemaTableName table = temporaryTable("create_sorted"); - try { - doTestBucketSortedTables(table); - } - finally { - dropTable(table); - } - } - - private void doTestBucketSortedTables(SchemaTableName table) - throws IOException - { - int bucketCount = 3; - int expectedRowCount = 0; - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - - // begin creating the table - ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata( - table, - ImmutableList.builder() - .add(new ColumnMetadata("id", VARCHAR)) - .add(new ColumnMetadata("value_asc", VARCHAR)) - .add(new ColumnMetadata("value_desc", BIGINT)) - .add(new ColumnMetadata("ds", VARCHAR)) - .build(), - ImmutableMap.builder() - .put(STORAGE_FORMAT_PROPERTY, RCBINARY) - .put(PARTITIONED_BY_PROPERTY, ImmutableList.of("ds")) - .put(BUCKETED_BY_PROPERTY, ImmutableList.of("id")) - .put(BUCKET_COUNT_PROPERTY, bucketCount) - .put(SORTED_BY_PROPERTY, ImmutableList.builder() - .add(new SortingColumn("value_asc", ASCENDING)) - .add(new SortingColumn("value_desc", DESCENDING)) - .build()) - .buildOrThrow()); - - ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES); - - // write the data - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle, TESTING_PAGE_SINK_ID); - List types = tableMetadata.getColumns().stream() - .map(ColumnMetadata::getType) - .collect(toList()); - ThreadLocalRandom random = ThreadLocalRandom.current(); - for (int i = 0; i < 50; i++) { - MaterializedResult.Builder builder = MaterializedResult.resultBuilder(session, types); - for (int j = 0; j < 1000; j++) { - builder.row( - sha256().hashLong(random.nextLong()).toString(), - "test" + random.nextInt(100), - random.nextLong(100_000), - "2018-04-01"); - expectedRowCount++; - } - sink.appendPage(builder.build().toPage()); - } - - HdfsContext context = new HdfsContext(session); - HiveConfig config = getHiveConfig(); - // verify we have enough temporary files per bucket to require multiple passes - Location stagingPathRoot; - if (config.isTemporaryStagingDirectoryEnabled()) { - stagingPathRoot = Location.of(config.getTemporaryStagingDirectoryPath() - .replace("${USER}", context.getIdentity().getUser())); - } - else { - stagingPathRoot = getStagingPathRoot(outputHandle); - } - assertThat(listAllDataFiles(context, stagingPathRoot)) - .filteredOn(file -> file.contains(".tmp-sort.")) - .size().isGreaterThan(bucketCount * getSortingFileWriterConfig().getMaxOpenSortFiles() * 2); - - // finish the write - Collection fragments = getFutureValue(sink.finish()); - - // verify there are no temporary files - for (String file : listAllDataFiles(context, stagingPathRoot)) { - assertThat(file).doesNotContain(".tmp-sort."); - } - - // finish creating table - metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of()); - - transaction.commit(); - } - - // verify that bucket files are sorted - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, table); - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - - // verify local sorting property - ConnectorTableProperties properties = metadata.getTableProperties( - newSession(ImmutableMap.of( - "propagate_table_scan_sorting_properties", true, - "bucket_execution_enabled", false)), - tableHandle); - Map columnIndex = indexColumns(columnHandles); - assertThat(properties.getLocalProperties()).isEqualTo(ImmutableList.of( - new SortingProperty<>(columnHandles.get(columnIndex.get("value_asc")), ASC_NULLS_FIRST), - new SortingProperty<>(columnHandles.get(columnIndex.get("value_desc")), DESC_NULLS_LAST))); - assertThat(metadata.getTableProperties(newSession(), tableHandle).getLocalProperties()).isEmpty(); - - List splits = getAllSplits(tableHandle, transaction, session); - assertThat(splits).hasSize(bucketCount); - - int actualRowCount = 0; - for (ConnectorSplit split : splits) { - try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles, DynamicFilter.EMPTY)) { - String lastValueAsc = null; - long lastValueDesc = -1; - - while (!pageSource.isFinished()) { - Page page = pageSource.getNextPage(); - if (page == null) { - continue; - } - for (int i = 0; i < page.getPositionCount(); i++) { - Block blockAsc = page.getBlock(1); - Block blockDesc = page.getBlock(2); - assertThat(blockAsc.isNull(i)).isFalse(); - assertThat(blockDesc.isNull(i)).isFalse(); - - String valueAsc = VARCHAR.getSlice(blockAsc, i).toStringUtf8(); - if (lastValueAsc != null) { - assertGreaterThanOrEqual(valueAsc, lastValueAsc); - if (valueAsc.equals(lastValueAsc)) { - long valueDesc = BIGINT.getLong(blockDesc, i); - if (lastValueDesc != -1) { - assertLessThanOrEqual(valueDesc, lastValueDesc); - } - lastValueDesc = valueDesc; - } - else { - lastValueDesc = -1; - } - } - lastValueAsc = valueAsc; - actualRowCount++; - } - } - } - } - assertThat(actualRowCount).isEqualTo(expectedRowCount); - } - } - - @Test - public void testInsert() - throws Exception - { - for (HiveStorageFormat storageFormat : createTableFormats) { - SchemaTableName temporaryInsertTable = temporaryTable("insert"); - try { - doInsert(storageFormat, temporaryInsertTable); - } - finally { - dropTable(temporaryInsertTable); - } - } - } - - @Test - public void testInsertOverwriteUnpartitioned() - throws Exception - { - SchemaTableName table = temporaryTable("insert_overwrite"); - try { - doInsertOverwriteUnpartitioned(table); - } - finally { - dropTable(table); - } - } - - @Test - public void testInsertIntoNewPartition() - throws Exception - { - for (HiveStorageFormat storageFormat : createTableFormats) { - SchemaTableName temporaryInsertIntoNewPartitionTable = temporaryTable("insert_new_partitioned"); - try { - doInsertIntoNewPartition(storageFormat, temporaryInsertIntoNewPartitionTable); - } - finally { - dropTable(temporaryInsertIntoNewPartitionTable); - } - } - } - - @Test - public void testInsertIntoExistingPartition() - throws Exception - { - for (HiveStorageFormat storageFormat : createTableFormats) { - SchemaTableName temporaryInsertIntoExistingPartitionTable = temporaryTable("insert_existing_partitioned"); - try { - doInsertIntoExistingPartition(storageFormat, temporaryInsertIntoExistingPartitionTable); - } - finally { - dropTable(temporaryInsertIntoExistingPartitionTable); - } - } - } - - @Test - public void testInsertIntoExistingPartitionEmptyStatistics() - throws Exception - { - for (HiveStorageFormat storageFormat : createTableFormats) { - SchemaTableName temporaryInsertIntoExistingPartitionTable = temporaryTable("insert_existing_partitioned_empty_statistics"); - try { - doInsertIntoExistingPartitionEmptyStatistics(storageFormat, temporaryInsertIntoExistingPartitionTable); - } - finally { - dropTable(temporaryInsertIntoExistingPartitionTable); - } - } - } - - @Test - public void testInsertUnsupportedWriteType() - throws Exception - { - SchemaTableName temporaryInsertUnsupportedWriteType = temporaryTable("insert_unsupported_type"); - try { - doInsertUnsupportedWriteType(ORC, temporaryInsertUnsupportedWriteType); - } - finally { - dropTable(temporaryInsertUnsupportedWriteType); - } - } - - @Test - public void testMetadataDelete() - throws Exception - { - for (HiveStorageFormat storageFormat : createTableFormats) { - SchemaTableName temporaryMetadataDeleteTable = temporaryTable("metadata_delete"); - try { - doTestMetadataDelete(storageFormat, temporaryMetadataDeleteTable); - } - finally { - dropTable(temporaryMetadataDeleteTable); - } - } - } - - @Test - public void testEmptyTableCreation() - throws Exception - { - for (HiveStorageFormat storageFormat : createTableFormats) { - SchemaTableName temporaryCreateEmptyTable = temporaryTable("create_empty"); - try { - doCreateEmptyTable(temporaryCreateEmptyTable, storageFormat, CREATE_TABLE_COLUMNS); - } - finally { - dropTable(temporaryCreateEmptyTable); - } - } - } - - @Test - public void testCreateEmptyTableShouldNotCreateStagingDirectory() - throws IOException - { - for (HiveStorageFormat storageFormat : createTableFormats) { - SchemaTableName temporaryCreateEmptyTable = temporaryTable("create_empty"); - try { - List columns = ImmutableList.of(new Column("test", HIVE_STRING, Optional.empty(), Map.of())); - try (Transaction transaction = newTransaction()) { - String temporaryStagingPrefix = "hive-temporary-staging-prefix-" + UUID.randomUUID().toString().toLowerCase(ENGLISH).replace("-", ""); - ConnectorSession session = newSession(); - String tableOwner = session.getUser(); - String schemaName = temporaryCreateEmptyTable.getSchemaName(); - String tableName = temporaryCreateEmptyTable.getTableName(); - HiveConfig hiveConfig = getHiveConfig() - .setTemporaryStagingDirectoryPath(temporaryStagingPrefix) - .setTemporaryStagingDirectoryEnabled(true); - TrinoFileSystemFactory fileSystemFactory = new HdfsFileSystemFactory(hdfsEnvironment, HDFS_FILE_SYSTEM_STATS); - LocationService locationService = new HiveLocationService(fileSystemFactory, hiveConfig); - Location targetPath = locationService.forNewTable(transaction.getMetastore(), session, schemaName, tableName); - Table.Builder tableBuilder = Table.builder() - .setDatabaseName(schemaName) - .setTableName(tableName) - .setOwner(Optional.of(tableOwner)) - .setTableType(MANAGED_TABLE.name()) - .setParameters(ImmutableMap.of( - TRINO_VERSION_NAME, TEST_SERVER_VERSION, - TRINO_QUERY_ID_NAME, session.getQueryId())) - .setDataColumns(columns); - tableBuilder.getStorageBuilder() - .setLocation(targetPath.toString()) - .setStorageFormat(StorageFormat.create(storageFormat.getSerde(), storageFormat.getInputFormat(), storageFormat.getOutputFormat())); - transaction.getMetastore().createTable( - session, - tableBuilder.build(), - testingPrincipalPrivilege(tableOwner, session.getUser()), - Optional.empty(), - Optional.empty(), - true, - ZERO_TABLE_STATISTICS, - false); - transaction.commit(); - - HdfsContext context = new HdfsContext(session); - Path temporaryRoot = new Path(targetPath.toString(), temporaryStagingPrefix); - FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, temporaryRoot); - assertThat(fileSystem.exists(temporaryRoot)) - .describedAs(format("Temporary staging directory %s is created.", temporaryRoot)) - .isFalse(); - } - } - finally { - dropTable(temporaryCreateEmptyTable); - } - } - } - - @Test - public void testViewCreation() - { - SchemaTableName temporaryCreateView = temporaryTable("create_view"); - try { - verifyViewCreation(temporaryCreateView); - } - finally { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.dropView(newSession(), temporaryCreateView); - transaction.commit(); - } - catch (RuntimeException e) { - // this usually occurs because the view was not created - } - } - } - - @Test - public void testCreateTableUnsupportedType() - { - for (HiveStorageFormat storageFormat : createTableFormats) { - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - List columns = ImmutableList.of(new ColumnMetadata("dummy", HYPER_LOG_LOG)); - ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(invalidTable, columns, createTableProperties(storageFormat)); - metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES); - fail("create table with unsupported type should fail for storage format " + storageFormat); - } - catch (TrinoException e) { - assertThat(e.getErrorCode()).isEqualTo(NOT_SUPPORTED.toErrorCode()); - } - } - } - - @Test - public void testHideDeltaLakeTables() - { - ConnectorSession session = newSession(); - SchemaTableName tableName = temporaryTable("trino_delta_lake_table"); - - Table.Builder table = Table.builder() - .setDatabaseName(tableName.getSchemaName()) - .setTableName(tableName.getTableName()) - .setOwner(Optional.of(session.getUser())) - .setTableType(MANAGED_TABLE.name()) - .setPartitionColumns(List.of(new Column("a_partition_column", HIVE_INT, Optional.empty(), Map.of()))) - .setDataColumns(List.of(new Column("a_column", HIVE_STRING, Optional.empty(), Map.of()))) - .setParameter(SPARK_TABLE_PROVIDER_KEY, DELTA_LAKE_PROVIDER); - table.getStorageBuilder() - .setStorageFormat(fromHiveStorageFormat(PARQUET)) - .setLocation(getTableDefaultLocation( - metastoreClient.getDatabase(tableName.getSchemaName()).orElseThrow(), - new HdfsFileSystemFactory(hdfsEnvironment, HDFS_FILE_SYSTEM_STATS).create(session), - tableName.getSchemaName(), - tableName.getTableName()).toString()); - metastoreClient.createTable(table.build(), NO_PRIVILEGES); - - try { - // Verify the table was created as a Delta Lake table - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - assertThatThrownBy(() -> getTableHandle(metadata, tableName)) - .hasMessage(format("Cannot query Delta Lake table '%s'", tableName)); - } - - // Verify the hidden `$properties` and `$partitions` Delta Lake table handle can't be obtained within the hive connector - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - SchemaTableName propertiesTableName = new SchemaTableName(tableName.getSchemaName(), format("%s$properties", tableName.getTableName())); - assertThat(metadata.getSystemTable(newSession(), propertiesTableName)).isEmpty(); - SchemaTableName partitionsTableName = new SchemaTableName(tableName.getSchemaName(), format("%s$partitions", tableName.getTableName())); - assertThat(metadata.getSystemTable(newSession(), partitionsTableName)).isEmpty(); - } - - // Assert that table is hidden - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - - // TODO (https://github.com/trinodb/trino/issues/5426) these assertions should use information_schema instead of metadata directly, - // as information_schema or MetadataManager may apply additional logic - - // list all tables - assertThat(metadata.listTables(session, Optional.empty())) - .doesNotContain(tableName); - - // list all tables in a schema - assertThat(metadata.listTables(session, Optional.of(tableName.getSchemaName()))) - .doesNotContain(tableName); - - // list all columns in a schema - assertThat(listTableColumns(metadata, session, new SchemaTablePrefix(tableName.getSchemaName())).keySet()) - .doesNotContain(tableName); - - // list all columns in a table - assertThat(listTableColumns(metadata, session, new SchemaTablePrefix(tableName.getSchemaName(), tableName.getTableName())).keySet()) - .doesNotContain(tableName); - } - } - finally { - // Clean up - metastoreClient.dropTable(tableName.getSchemaName(), tableName.getTableName(), true); - } - } - - @Test - public void testDisallowQueryingOfIcebergTables() - { - ConnectorSession session = newSession(); - SchemaTableName tableName = temporaryTable("trino_iceberg_table"); - - Table.Builder table = Table.builder() - .setDatabaseName(tableName.getSchemaName()) - .setTableName(tableName.getTableName()) - .setOwner(Optional.of(session.getUser())) - .setTableType(MANAGED_TABLE.name()) - .setPartitionColumns(List.of(new Column("a_partition_column", HIVE_INT, Optional.empty(), Map.of()))) - .setDataColumns(List.of(new Column("a_column", HIVE_STRING, Optional.empty(), Map.of()))) - .setParameter(ICEBERG_TABLE_TYPE_NAME, ICEBERG_TABLE_TYPE_VALUE); - table.getStorageBuilder() - .setStorageFormat(fromHiveStorageFormat(PARQUET)) - .setLocation(getTableDefaultLocation( - metastoreClient.getDatabase(tableName.getSchemaName()).orElseThrow(), - new HdfsFileSystemFactory(hdfsEnvironment, HDFS_FILE_SYSTEM_STATS).create(session), - tableName.getSchemaName(), - tableName.getTableName()).toString()); - metastoreClient.createTable(table.build(), NO_PRIVILEGES); - - try { - // Verify that the table was created as a Iceberg table can't be queried in hive - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - assertThatThrownBy(() -> getTableHandle(metadata, tableName)) - .hasMessage(format("Cannot query Iceberg table '%s'", tableName)); - } - - // Verify the hidden `$properties` and `$partitions` hive system tables table handle can't be obtained for the Iceberg tables - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - SchemaTableName propertiesTableName = new SchemaTableName(tableName.getSchemaName(), format("%s$properties", tableName.getTableName())); - assertThat(metadata.getSystemTable(newSession(), propertiesTableName)).isEmpty(); - SchemaTableName partitionsTableName = new SchemaTableName(tableName.getSchemaName(), format("%s$partitions", tableName.getTableName())); - assertThat(metadata.getSystemTable(newSession(), partitionsTableName)).isEmpty(); - } - } - finally { - // Clean up - metastoreClient.dropTable(tableName.getSchemaName(), tableName.getTableName(), true); - } - } - - @Test - public void testUpdateBasicTableStatistics() - throws Exception - { - SchemaTableName tableName = temporaryTable("update_basic_table_statistics"); - try { - doCreateEmptyTable(tableName, ORC, STATISTICS_TABLE_COLUMNS); - testUpdateTableStatistics(tableName, ZERO_TABLE_STATISTICS, BASIC_STATISTICS_1, BASIC_STATISTICS_2); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testUpdateTableColumnStatistics() - throws Exception - { - SchemaTableName tableName = temporaryTable("update_table_column_statistics"); - try { - doCreateEmptyTable(tableName, ORC, STATISTICS_TABLE_COLUMNS); - testUpdateTableStatistics(tableName, ZERO_TABLE_STATISTICS, STATISTICS_1_1, STATISTICS_1_2, STATISTICS_2); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testUpdateTableColumnStatisticsEmptyOptionalFields() - throws Exception - { - SchemaTableName tableName = temporaryTable("update_table_column_statistics_empty_optional_fields"); - try { - doCreateEmptyTable(tableName, ORC, STATISTICS_TABLE_COLUMNS); - testUpdateTableStatistics(tableName, ZERO_TABLE_STATISTICS, STATISTICS_EMPTY_OPTIONAL_FIELDS); - } - finally { - dropTable(tableName); - } - } - - protected void testUpdateTableStatistics(SchemaTableName tableName, PartitionStatistics initialStatistics, PartitionStatistics... statistics) - { - HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient(), TESTING_TYPE_MANAGER, false); - assertThat(metastoreClient.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(initialStatistics); - - AtomicReference expectedStatistics = new AtomicReference<>(initialStatistics); - for (PartitionStatistics partitionStatistics : statistics) { - metastoreClient.updateTableStatistics(tableName.getSchemaName(), tableName.getTableName(), NO_ACID_TRANSACTION, actualStatistics -> { - assertThat(actualStatistics).isEqualTo(expectedStatistics.get()); - return partitionStatistics; - }); - assertThat(metastoreClient.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(partitionStatistics); - expectedStatistics.set(partitionStatistics); - } - - assertThat(metastoreClient.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(expectedStatistics.get()); - - metastoreClient.updateTableStatistics(tableName.getSchemaName(), tableName.getTableName(), NO_ACID_TRANSACTION, actualStatistics -> { - assertThat(actualStatistics).isEqualTo(expectedStatistics.get()); - return initialStatistics; - }); - - assertThat(metastoreClient.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(initialStatistics); - } - - @Test - public void testUpdateBasicPartitionStatistics() - throws Exception - { - SchemaTableName tableName = temporaryTable("update_basic_partition_statistics"); - try { - createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); - testUpdatePartitionStatistics( - tableName, - ZERO_TABLE_STATISTICS, - ImmutableList.of(BASIC_STATISTICS_1, BASIC_STATISTICS_2), - ImmutableList.of(BASIC_STATISTICS_2, BASIC_STATISTICS_1)); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testUpdatePartitionColumnStatistics() - throws Exception - { - SchemaTableName tableName = temporaryTable("update_partition_column_statistics"); - try { - createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); - testUpdatePartitionStatistics( - tableName, - ZERO_TABLE_STATISTICS, - ImmutableList.of(STATISTICS_1_1, STATISTICS_1_2, STATISTICS_2), - ImmutableList.of(STATISTICS_1_2, STATISTICS_1_1, STATISTICS_2)); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testUpdatePartitionColumnStatisticsEmptyOptionalFields() - throws Exception - { - SchemaTableName tableName = temporaryTable("update_partition_column_statistics"); - try { - createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); - testUpdatePartitionStatistics( - tableName, - ZERO_TABLE_STATISTICS, - ImmutableList.of(STATISTICS_EMPTY_OPTIONAL_FIELDS), - ImmutableList.of(STATISTICS_EMPTY_OPTIONAL_FIELDS)); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testDataColumnProperties() - throws Exception - { - SchemaTableName tableName = temporaryTable("test_column_properties"); - HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient(), TESTING_TYPE_MANAGER, false); - try { - doCreateEmptyTable(tableName, ORC, List.of(new ColumnMetadata("id", BIGINT), new ColumnMetadata("part_key", createVarcharType(256)))); - - Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(); - assertThat(table.getDataColumns()) - .singleElement() - .extracting(Column::getProperties, InstanceOfAssertFactories.MAP) - .isEmpty(); - assertThat(table.getPartitionColumns()) - .singleElement() - .extracting(Column::getProperties, InstanceOfAssertFactories.MAP) - .isEmpty(); - - String columnPropertyValue = "data column value ,;.!??? \" ' {} [] non-printable \000 \001 spaces \n\r\t\f hiragana だ emoji 🤷‍♂️ x"; - metastoreClient.replaceTable( - tableName.getSchemaName(), - tableName.getTableName(), - Table.builder(table) - .setDataColumns(List.of(new Column("id", HIVE_LONG, Optional.empty(), Map.of("data prop", columnPropertyValue)))) - .build(), - NO_PRIVILEGES); - - table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(); - assertThat(table.getDataColumns()) - .singleElement() - .extracting(Column::getProperties, InstanceOfAssertFactories.MAP) - .isEqualTo(Map.of("data prop", columnPropertyValue)); - assertThat(table.getPartitionColumns()) - .singleElement() - .extracting(Column::getProperties, InstanceOfAssertFactories.MAP) - .isEmpty(); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testPartitionColumnProperties() - throws Exception - { - SchemaTableName tableName = temporaryTable("test_column_properties"); - HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient(), TESTING_TYPE_MANAGER, false); - try { - doCreateEmptyTable(tableName, ORC, List.of(new ColumnMetadata("id", BIGINT), new ColumnMetadata("part_key", createVarcharType(256)))); - - Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(); - assertThat(table.getDataColumns()) - .singleElement() - .extracting(Column::getProperties, InstanceOfAssertFactories.MAP) - .isEmpty(); - assertThat(table.getPartitionColumns()) - .singleElement() - .extracting(Column::getProperties, InstanceOfAssertFactories.MAP) - .isEmpty(); - - String columnPropertyValue = "partition column value ,;.!??? \" ' {} [] non-printable \000 \001 spaces \n\r\t\f hiragana だ emoji 🤷‍♂️ x"; - metastoreClient.replaceTable( - tableName.getSchemaName(), - tableName.getTableName(), - Table.builder(table) - .setPartitionColumns(List.of(new Column("part_key", HiveType.valueOf("varchar(256)"), Optional.empty(), Map.of("partition prop", columnPropertyValue)))) - .build(), - NO_PRIVILEGES); - - table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(); - assertThat(table.getDataColumns()) - .singleElement() - .extracting(Column::getProperties, InstanceOfAssertFactories.MAP) - .isEmpty(); - assertThat(table.getPartitionColumns()) - .singleElement() - .extracting(Column::getProperties, InstanceOfAssertFactories.MAP) - .isEqualTo(Map.of("partition prop", columnPropertyValue)); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testInputInfoWhenTableIsPartitioned() - throws Exception - { - SchemaTableName tableName = temporaryTable("test_input_info_with_partitioned_table"); - try { - createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); - assertInputInfo(tableName, new HiveInputInfo(ImmutableList.of(), true, Optional.of("ORC"))); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testInputInfoWhenTableIsNotPartitioned() - { - SchemaTableName tableName = temporaryTable("test_input_info_without_partitioned_table"); - try { - createDummyTable(tableName); - assertInputInfo(tableName, new HiveInputInfo(ImmutableList.of(), false, Optional.of("TEXTFILE"))); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testInputInfoWithParquetTableFormat() - { - SchemaTableName tableName = temporaryTable("test_input_info_with_parquet_table_format"); - try { - createDummyTable(tableName, PARQUET); - assertInputInfo(tableName, new HiveInputInfo(ImmutableList.of(), false, Optional.of("PARQUET"))); - } - finally { - dropTable(tableName); - } - } - - private void assertInputInfo(SchemaTableName tableName, HiveInputInfo expectedInputInfo) - { - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - HiveTableHandle tableHandle = (HiveTableHandle) metadata.getTableHandle(session, tableName); - assertThat(metadata.getInfo(tableHandle)).isEqualTo(Optional.of(expectedInputInfo)); - } - } - - /** - * During table scan, the illegal storage format for some specific table should not fail the whole table scan - */ - @Test - public void testIllegalStorageFormatDuringTableScan() - { - SchemaTableName schemaTableName = temporaryTable("test_illegal_storage_format"); - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - List columns = ImmutableList.of(new Column("pk", HIVE_STRING, Optional.empty(), Map.of())); - String tableOwner = session.getUser(); - String schemaName = schemaTableName.getSchemaName(); - String tableName = schemaTableName.getTableName(); - Location targetPath = locationService.forNewTable(transaction.getMetastore(), session, schemaName, tableName); - //create table whose storage format is null - Table.Builder tableBuilder = Table.builder() - .setDatabaseName(schemaName) - .setTableName(tableName) - .setOwner(Optional.of(tableOwner)) - .setTableType(MANAGED_TABLE.name()) - .setParameters(ImmutableMap.of( - TRINO_VERSION_NAME, TEST_SERVER_VERSION, - TRINO_QUERY_ID_NAME, session.getQueryId())) - .setDataColumns(columns) - .withStorage(storage -> storage - .setLocation(targetPath.toString()) - .setStorageFormat(StorageFormat.createNullable(null, null, null)) - .setSerdeParameters(ImmutableMap.of())); - PrincipalPrivileges principalPrivileges = testingPrincipalPrivilege(tableOwner, session.getUser()); - transaction.getMetastore().createTable(session, tableBuilder.build(), principalPrivileges, Optional.empty(), Optional.empty(), true, ZERO_TABLE_STATISTICS, false); - transaction.commit(); - } - - // We retrieve the table whose storageFormat has null serde/inputFormat/outputFormat - // to make sure it can still be retrieved instead of throwing exception. - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - Map> allColumns = listTableColumns(metadata, newSession(), new SchemaTablePrefix(schemaTableName.getSchemaName())); - assertThat(allColumns).containsKey(schemaTableName); - } - finally { - dropTable(schemaTableName); - } - } - - protected static Map> listTableColumns(ConnectorMetadata metadata, ConnectorSession session, SchemaTablePrefix prefix) - { - return stream(metadata.streamTableColumns(session, prefix)) - .collect(toImmutableMap( - TableColumnsMetadata::getTable, - tableColumns -> tableColumns.getColumns().orElseThrow(() -> new IllegalStateException("Table " + tableColumns.getTable() + " reported as redirected")))); - } - - private void createDummyTable(SchemaTableName tableName) - { - createDummyTable(tableName, TEXTFILE); - } - - private void createDummyTable(SchemaTableName tableName, HiveStorageFormat storageFormat) - { - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - - List columns = ImmutableList.of(new ColumnMetadata("dummy", createUnboundedVarcharType())); - ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(storageFormat)); - ConnectorOutputTableHandle handle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES); - metadata.finishCreateTable(session, handle, ImmutableList.of(), ImmutableList.of()); - - transaction.commit(); - } - } - - protected void createDummyPartitionedTable(SchemaTableName tableName, List columns) - throws Exception - { - doCreateEmptyTable(tableName, ORC, columns); - - HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient(), TESTING_TYPE_MANAGER, false); - Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new TableNotFoundException(tableName)); - - List firstPartitionValues = ImmutableList.of("2016-01-01"); - List secondPartitionValues = ImmutableList.of("2016-01-02"); - - String firstPartitionName = makePartName(ImmutableList.of("ds"), firstPartitionValues); - String secondPartitionName = makePartName(ImmutableList.of("ds"), secondPartitionValues); - - List partitions = ImmutableList.of(firstPartitionName, secondPartitionName) - .stream() - .map(partitionName -> new PartitionWithStatistics(createDummyPartition(table, partitionName), partitionName, PartitionStatistics.empty())) - .collect(toImmutableList()); - metastoreClient.addPartitions(tableName.getSchemaName(), tableName.getTableName(), partitions); - metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), firstPartitionName, currentStatistics -> ZERO_TABLE_STATISTICS); - metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), secondPartitionName, currentStatistics -> ZERO_TABLE_STATISTICS); - } - - protected void testUpdatePartitionStatistics( - SchemaTableName tableName, - PartitionStatistics initialStatistics, - List firstPartitionStatistics, - List secondPartitionStatistics) - { - verify(firstPartitionStatistics.size() == secondPartitionStatistics.size()); - - String firstPartitionName = "ds=2016-01-01"; - String secondPartitionName = "ds=2016-01-02"; - - HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient(), TESTING_TYPE_MANAGER, false); - assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(firstPartitionName, secondPartitionName))) - .isEqualTo(ImmutableMap.of(firstPartitionName, initialStatistics, secondPartitionName, initialStatistics)); - - AtomicReference expectedStatisticsPartition1 = new AtomicReference<>(initialStatistics); - AtomicReference expectedStatisticsPartition2 = new AtomicReference<>(initialStatistics); - - for (int i = 0; i < firstPartitionStatistics.size(); i++) { - PartitionStatistics statisticsPartition1 = firstPartitionStatistics.get(i); - PartitionStatistics statisticsPartition2 = secondPartitionStatistics.get(i); - metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), firstPartitionName, actualStatistics -> { - assertThat(actualStatistics).isEqualTo(expectedStatisticsPartition1.get()); - return statisticsPartition1; - }); - metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), secondPartitionName, actualStatistics -> { - assertThat(actualStatistics).isEqualTo(expectedStatisticsPartition2.get()); - return statisticsPartition2; - }); - assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(firstPartitionName, secondPartitionName))) - .isEqualTo(ImmutableMap.of(firstPartitionName, statisticsPartition1, secondPartitionName, statisticsPartition2)); - expectedStatisticsPartition1.set(statisticsPartition1); - expectedStatisticsPartition2.set(statisticsPartition2); - } - - assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(firstPartitionName, secondPartitionName))) - .isEqualTo(ImmutableMap.of(firstPartitionName, expectedStatisticsPartition1.get(), secondPartitionName, expectedStatisticsPartition2.get())); - metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), firstPartitionName, currentStatistics -> { - assertThat(currentStatistics).isEqualTo(expectedStatisticsPartition1.get()); - return initialStatistics; - }); - metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), secondPartitionName, currentStatistics -> { - assertThat(currentStatistics).isEqualTo(expectedStatisticsPartition2.get()); - return initialStatistics; - }); - assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(firstPartitionName, secondPartitionName))) - .isEqualTo(ImmutableMap.of(firstPartitionName, initialStatistics, secondPartitionName, initialStatistics)); - } - - @Test - public void testStorePartitionWithStatistics() - throws Exception - { - testStorePartitionWithStatistics(STATISTICS_PARTITIONED_TABLE_COLUMNS, STATISTICS_1, STATISTICS_2, STATISTICS_1_1, ZERO_TABLE_STATISTICS); - } - - protected void testStorePartitionWithStatistics( - List columns, - PartitionStatistics statsForAllColumns1, - PartitionStatistics statsForAllColumns2, - PartitionStatistics statsForSubsetOfColumns, - PartitionStatistics emptyStatistics) - throws Exception - { - SchemaTableName tableName = temporaryTable("store_partition_with_statistics"); - try { - doCreateEmptyTable(tableName, ORC, columns); - - HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient(), TESTING_TYPE_MANAGER, false); - Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new TableNotFoundException(tableName)); - - List partitionValues = ImmutableList.of("2016-01-01"); - String partitionName = makePartName(ImmutableList.of("ds"), partitionValues); - - Partition partition = createDummyPartition(table, partitionName); - - // create partition with stats for all columns - metastoreClient.addPartitions(tableName.getSchemaName(), tableName.getTableName(), ImmutableList.of(new PartitionWithStatistics(partition, partitionName, statsForAllColumns1))); - assertThat(metastoreClient.getPartition(tableName.getSchemaName(), tableName.getTableName(), partitionValues).get().getStorage().getStorageFormat()).isEqualTo(fromHiveStorageFormat(ORC)); - assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))) - .isEqualTo(ImmutableMap.of(partitionName, statsForAllColumns1)); - - // alter the partition into one with other stats - Partition modifiedPartition = Partition.builder(partition) - .withStorage(storage -> storage - .setStorageFormat(fromHiveStorageFormat(RCBINARY)) - .setLocation(partitionTargetPath(tableName, partitionName))) - .build(); - metastoreClient.alterPartition(tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, statsForAllColumns2)); - assertThat(metastoreClient.getPartition(tableName.getSchemaName(), tableName.getTableName(), partitionValues).get().getStorage().getStorageFormat()).isEqualTo(fromHiveStorageFormat(RCBINARY)); - assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))) - .isEqualTo(ImmutableMap.of(partitionName, statsForAllColumns2)); - - // alter the partition into one with stats for only subset of columns - modifiedPartition = Partition.builder(partition) - .withStorage(storage -> storage - .setStorageFormat(fromHiveStorageFormat(TEXTFILE)) - .setLocation(partitionTargetPath(tableName, partitionName))) - .build(); - metastoreClient.alterPartition(tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, statsForSubsetOfColumns)); - assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))) - .isEqualTo(ImmutableMap.of(partitionName, statsForSubsetOfColumns)); - - // alter the partition into one without stats - modifiedPartition = Partition.builder(partition) - .withStorage(storage -> storage - .setStorageFormat(fromHiveStorageFormat(TEXTFILE)) - .setLocation(partitionTargetPath(tableName, partitionName))) - .build(); - metastoreClient.alterPartition(tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, emptyStatistics)); - assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))) - .isEqualTo(ImmutableMap.of(partitionName, emptyStatistics)); - } - finally { - dropTable(tableName); - } - } - - protected Partition createDummyPartition(Table table, String partitionName) - { - return Partition.builder() - .setDatabaseName(table.getDatabaseName()) - .setTableName(table.getTableName()) - .setColumns(table.getDataColumns()) - .setValues(toPartitionValues(partitionName)) - .withStorage(storage -> storage - .setStorageFormat(fromHiveStorageFormat(ORC)) - .setLocation(partitionTargetPath(new SchemaTableName(table.getDatabaseName(), table.getTableName()), partitionName))) - .setParameters(ImmutableMap.of( - TRINO_VERSION_NAME, "testversion", - TRINO_QUERY_ID_NAME, "20180101_123456_00001_x1y2z")) - .build(); - } - - protected String partitionTargetPath(SchemaTableName schemaTableName, String partitionName) - { - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - SemiTransactionalHiveMetastore metastore = transaction.getMetastore(); - LocationService locationService = getLocationService(); - Table table = metastore.getTable(schemaTableName.getSchemaName(), schemaTableName.getTableName()).get(); - LocationHandle handle = locationService.forExistingTable(metastore, session, table); - return locationService.getPartitionWriteInfo(handle, Optional.empty(), partitionName).targetPath().toString(); - } - } - - /** - * This test creates 2 identical partitions and verifies that the statistics projected based on - * a single partition sample are equal to the statistics computed in a fair way - */ - @Test - public void testPartitionStatisticsSampling() - throws Exception - { - testPartitionStatisticsSampling(STATISTICS_PARTITIONED_TABLE_COLUMNS, STATISTICS_1); - } - - protected void testPartitionStatisticsSampling(List columns, PartitionStatistics statistics) - throws Exception - { - SchemaTableName tableName = temporaryTable("test_partition_statistics_sampling"); - - try { - createDummyPartitionedTable(tableName, columns); - HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient(), TESTING_TYPE_MANAGER, false); - metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), "ds=2016-01-01", actualStatistics -> statistics); - metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), "ds=2016-01-02", actualStatistics -> statistics); - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - - ConnectorTableHandle tableHandle = metadata.getTableHandle(session, tableName); - TableStatistics unsampledStatistics = metadata.getTableStatistics(sampleSize(2), tableHandle); - TableStatistics sampledStatistics = metadata.getTableStatistics(sampleSize(1), tableHandle); - assertThat(sampledStatistics).isEqualTo(unsampledStatistics); - } - } - finally { - dropTable(tableName); - } - } - - @Test - public void testApplyProjection() - throws Exception - { - ColumnMetadata bigIntColumn0 = new ColumnMetadata("int0", BIGINT); - ColumnMetadata bigIntColumn1 = new ColumnMetadata("int1", BIGINT); - - RowType oneLevelRowType = toRowType(ImmutableList.of(bigIntColumn0, bigIntColumn1)); - ColumnMetadata oneLevelRow0 = new ColumnMetadata("onelevelrow0", oneLevelRowType); - - RowType twoLevelRowType = toRowType(ImmutableList.of(oneLevelRow0, bigIntColumn0, bigIntColumn1)); - ColumnMetadata twoLevelRow0 = new ColumnMetadata("twolevelrow0", twoLevelRowType); - - List columnsForApplyProjectionTest = ImmutableList.of(bigIntColumn0, bigIntColumn1, oneLevelRow0, twoLevelRow0); - - SchemaTableName tableName = temporaryTable("apply_projection_tester"); - doCreateEmptyTable(tableName, ORC, columnsForApplyProjectionTest); - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - List columnHandles = metadata.getColumnHandles(session, tableHandle).values().stream() - .filter(columnHandle -> !((HiveColumnHandle) columnHandle).isHidden()) - .collect(toList()); - assertThat(columnHandles.size()).isEqualTo(columnsForApplyProjectionTest.size()); - - Map columnHandleMap = columnHandles.stream() - .collect(toImmutableMap(handle -> ((HiveColumnHandle) handle).getBaseColumnName(), Function.identity())); - - // Emulate symbols coming from the query plan and map them to column handles - Map columnHandlesWithSymbols = ImmutableMap.of( - "symbol_0", columnHandleMap.get("int0"), - "symbol_1", columnHandleMap.get("int1"), - "symbol_2", columnHandleMap.get("onelevelrow0"), - "symbol_3", columnHandleMap.get("twolevelrow0")); - - // Create variables for the emulated symbols - Map symbolVariableMapping = columnHandlesWithSymbols.entrySet().stream() - .collect(toImmutableMap( - Map.Entry::getKey, - e -> new Variable( - e.getKey(), - ((HiveColumnHandle) e.getValue()).getBaseType()))); - - // Create dereference expressions for testing - FieldDereference symbol2Field0 = new FieldDereference(BIGINT, symbolVariableMapping.get("symbol_2"), 0); - FieldDereference symbol3Field0 = new FieldDereference(oneLevelRowType, symbolVariableMapping.get("symbol_3"), 0); - FieldDereference symbol3Field0Field0 = new FieldDereference(BIGINT, symbol3Field0, 0); - FieldDereference symbol3Field1 = new FieldDereference(BIGINT, symbolVariableMapping.get("symbol_3"), 1); - - Map inputAssignments; - List inputProjections; - Optional> projectionResult; - List expectedProjections; - Map expectedAssignments; - - // Test projected columns pushdown to HiveTableHandle in case of all variable references - inputAssignments = getColumnHandlesFor(columnHandlesWithSymbols, ImmutableList.of("symbol_0", "symbol_1")); - inputProjections = ImmutableList.of(symbolVariableMapping.get("symbol_0"), symbolVariableMapping.get("symbol_1")); - expectedAssignments = ImmutableMap.of( - "symbol_0", BIGINT, - "symbol_1", BIGINT); - projectionResult = metadata.applyProjection(session, tableHandle, inputProjections, inputAssignments); - assertProjectionResult(projectionResult, false, inputProjections, expectedAssignments); - - // Empty result when projected column handles are same as those present in table handle - projectionResult = metadata.applyProjection(session, projectionResult.get().getHandle(), inputProjections, inputAssignments); - assertProjectionResult(projectionResult, true, ImmutableList.of(), ImmutableMap.of()); - - // Extra columns handles in HiveTableHandle should get pruned - projectionResult = metadata.applyProjection( - session, - ((HiveTableHandle) tableHandle).withProjectedColumns(ImmutableSet.copyOf(columnHandles)), - inputProjections, - inputAssignments); - assertProjectionResult(projectionResult, false, inputProjections, expectedAssignments); - - // Test projection pushdown for dereferences - inputAssignments = getColumnHandlesFor(columnHandlesWithSymbols, ImmutableList.of("symbol_2", "symbol_3")); - inputProjections = ImmutableList.of(symbol2Field0, symbol3Field0Field0, symbol3Field1); - expectedAssignments = ImmutableMap.of( - "onelevelrow0#f_int0", BIGINT, - "twolevelrow0#f_onelevelrow0#f_int0", BIGINT, - "twolevelrow0#f_int0", BIGINT); - expectedProjections = ImmutableList.of( - new Variable("onelevelrow0#f_int0", BIGINT), - new Variable("twolevelrow0#f_onelevelrow0#f_int0", BIGINT), - new Variable("twolevelrow0#f_int0", BIGINT)); - projectionResult = metadata.applyProjection(session, tableHandle, inputProjections, inputAssignments); - assertProjectionResult(projectionResult, false, expectedProjections, expectedAssignments); - - // Test reuse of virtual column handles - // Round-1: input projections [symbol_2, symbol_2.int0]. virtual handle is created for symbol_2.int0. - inputAssignments = getColumnHandlesFor(columnHandlesWithSymbols, ImmutableList.of("symbol_2")); - inputProjections = ImmutableList.of(symbol2Field0, symbolVariableMapping.get("symbol_2")); - projectionResult = metadata.applyProjection(session, tableHandle, inputProjections, inputAssignments); - expectedProjections = ImmutableList.of(new Variable("onelevelrow0#f_int0", BIGINT), symbolVariableMapping.get("symbol_2")); - expectedAssignments = ImmutableMap.of("onelevelrow0#f_int0", BIGINT, "symbol_2", oneLevelRowType); - assertProjectionResult(projectionResult, false, expectedProjections, expectedAssignments); - - // Round-2: input projections [symbol_2.int0 and onelevelrow0#f_int0]. Virtual handle is reused. - Assignment newlyCreatedColumn = getOnlyElement(projectionResult.get().getAssignments().stream() - .filter(handle -> handle.getVariable().equals("onelevelrow0#f_int0")) - .collect(toList())); - inputAssignments = ImmutableMap.builder() - .putAll(getColumnHandlesFor(columnHandlesWithSymbols, ImmutableList.of("symbol_2"))) - .put(newlyCreatedColumn.getVariable(), newlyCreatedColumn.getColumn()) - .buildOrThrow(); - inputProjections = ImmutableList.of(symbol2Field0, new Variable("onelevelrow0#f_int0", BIGINT)); - projectionResult = metadata.applyProjection(session, tableHandle, inputProjections, inputAssignments); - expectedProjections = ImmutableList.of(new Variable("onelevelrow0#f_int0", BIGINT), new Variable("onelevelrow0#f_int0", BIGINT)); - expectedAssignments = ImmutableMap.of("onelevelrow0#f_int0", BIGINT); - assertProjectionResult(projectionResult, false, expectedProjections, expectedAssignments); - } - finally { - dropTable(tableName); - } - } - - private static Map getColumnHandlesFor(Map columnHandles, List symbols) - { - return columnHandles.entrySet().stream() - .filter(e -> symbols.contains(e.getKey())) - .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue)); - } - - private static void assertProjectionResult(Optional> projectionResult, boolean shouldBeEmpty, List expectedProjections, Map expectedAssignments) - { - if (shouldBeEmpty) { - assertThat(projectionResult.isEmpty()) - .describedAs("expected projectionResult to be empty") - .isTrue(); - return; - } - - assertThat(projectionResult.isPresent()) - .describedAs("expected non-empty projection result") - .isTrue(); - - ProjectionApplicationResult result = projectionResult.get(); - - // Verify projections - assertThat(expectedProjections).isEqualTo(result.getProjections()); - - // Verify assignments - List assignments = result.getAssignments(); - Map actualAssignments = uniqueIndex(assignments, Assignment::getVariable); - - for (String variable : expectedAssignments.keySet()) { - Type expectedType = expectedAssignments.get(variable); - assertThat(actualAssignments).containsKey(variable); - assertThat(actualAssignments.get(variable).getType()).isEqualTo(expectedType); - assertThat(((HiveColumnHandle) actualAssignments.get(variable).getColumn()).getType()).isEqualTo(expectedType); - } - - assertThat(actualAssignments.size()).isEqualTo(expectedAssignments.size()); - assertThat(actualAssignments.values().stream().map(Assignment::getColumn).collect(toImmutableSet())).isEqualTo(((HiveTableHandle) result.getHandle()).getProjectedColumns()); - } - - @Test - public void testApplyRedirection() - throws Exception - { - SchemaTableName sourceTableName = temporaryTable("apply_redirection_tester"); - doCreateEmptyTable(sourceTableName, ORC, CREATE_TABLE_COLUMNS); - SchemaTableName tableName = temporaryTable("apply_no_redirection_tester"); - doCreateEmptyTable(tableName, ORC, CREATE_TABLE_COLUMNS); - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - assertThat(metadata.applyTableScanRedirect(session, getTableHandle(metadata, tableName))).isEmpty(); - Optional result = metadata.applyTableScanRedirect(session, getTableHandle(metadata, sourceTableName)); - assertThat(result).isPresent(); - assertThat(result.get().getDestinationTable()) - .isEqualTo(new CatalogSchemaTableName("hive", database, "mock_redirection_target")); - } - finally { - dropTable(sourceTableName); - dropTable(tableName); - } - } - - @Test - public void testMaterializedViewMetadata() - throws Exception - { - SchemaTableName sourceTableName = temporaryTable("materialized_view_tester"); - doCreateEmptyTable(sourceTableName, ORC, CREATE_TABLE_COLUMNS); - SchemaTableName tableName = temporaryTable("mock_table"); - doCreateEmptyTable(tableName, ORC, CREATE_TABLE_COLUMNS); - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - assertThat(metadata.getMaterializedView(session, tableName)).isEmpty(); - Optional result = metadata.getMaterializedView(session, sourceTableName); - assertThat(result).isPresent(); - assertThat(result.get().getOriginalSql()).isEqualTo("dummy_view_sql"); - } - finally { - dropTable(sourceTableName); - dropTable(tableName); - } - } - - @Test - public void testOrcPageSourceMetrics() - throws Exception - { - SchemaTableName tableName = temporaryTable("orc_page_source_metrics"); - try { - assertPageSourceMetrics(tableName, ORC, new Metrics(ImmutableMap.of(ORC_CODEC_METRIC_PREFIX + "SNAPPY", new LongCount(209)))); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testParquetPageSourceMetrics() - throws Exception - { - SchemaTableName tableName = temporaryTable("parquet_page_source_metrics"); - try { - assertPageSourceMetrics(tableName, PARQUET, new Metrics(ImmutableMap.of(PARQUET_CODEC_METRIC_PREFIX + "SNAPPY", new LongCount(1157)))); - } - finally { - dropTable(tableName); - } - } - - private void assertPageSourceMetrics(SchemaTableName tableName, HiveStorageFormat storageFormat, Metrics expectedMetrics) - throws Exception - { - createEmptyTable( - tableName, - storageFormat, - ImmutableList.of( - new Column("id", HIVE_LONG, Optional.empty(), Map.of()), - new Column("name", HIVE_STRING, Optional.empty(), Map.of())), - ImmutableList.of()); - MaterializedResult.Builder inputDataBuilder = MaterializedResult.resultBuilder(SESSION, BIGINT, VARCHAR); - IntStream.range(0, 100).forEach(i -> inputDataBuilder.row((long) i, String.valueOf(i))); - insertData(tableName, inputDataBuilder.build(), ImmutableMap.of("compression_codec", "SNAPPY")); - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - // read entire table - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - - List splits = getAllSplits(getSplits(splitManager, transaction, session, tableHandle)); - for (ConnectorSplit split : splits) { - try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles, DynamicFilter.EMPTY)) { - materializeSourceDataStream(session, pageSource, getTypes(columnHandles)); - assertThat(pageSource.getMetrics()).isEqualTo(expectedMetrics); - } - } - } - } - - private ConnectorSession sampleSize(int sampleSize) - { - return getHiveSession(getHiveConfig() - .setPartitionStatisticsSampleSize(sampleSize)); - } - - private void verifyViewCreation(SchemaTableName temporaryCreateView) - { - // replace works for new view - doCreateView(temporaryCreateView, true); - - // replace works for existing view - doCreateView(temporaryCreateView, true); - - // create fails for existing view - try { - doCreateView(temporaryCreateView, false); - fail("create existing should fail"); - } - catch (ViewAlreadyExistsException e) { - assertThat(e.getViewName()).isEqualTo(temporaryCreateView); - } - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - // drop works when view exists - metadata.dropView(newSession(), temporaryCreateView); - transaction.commit(); - } - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - assertThat(metadata.getView(newSession(), temporaryCreateView)) - .isEmpty(); - assertThat(metadata.getViews(newSession(), Optional.of(temporaryCreateView.getSchemaName()))) - .doesNotContainKey(temporaryCreateView); - assertThat(metadata.listViews(newSession(), Optional.of(temporaryCreateView.getSchemaName()))) - .doesNotContain(temporaryCreateView); - } - - // drop fails when view does not exist - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.dropView(newSession(), temporaryCreateView); - fail("drop non-existing should fail"); - } - catch (ViewNotFoundException e) { - assertThat(e.getViewName()).isEqualTo(temporaryCreateView); - } - - // create works for new view - doCreateView(temporaryCreateView, false); - } - - private void doCreateView(SchemaTableName viewName, boolean replace) - { - String viewData = "test data"; - ConnectorViewDefinition definition = new ConnectorViewDefinition( - viewData, - Optional.empty(), - Optional.empty(), - ImmutableList.of(new ViewColumn("test", BIGINT.getTypeId(), Optional.empty())), - Optional.empty(), - Optional.empty(), - true, - ImmutableList.of()); - - try (Transaction transaction = newTransaction()) { - transaction.getMetadata().createView(newSession(), viewName, definition, replace); - transaction.commit(); - } - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - - assertThat(metadata.getView(newSession(), viewName)) - .map(ConnectorViewDefinition::getOriginalSql) - .contains(viewData); - - Map views = metadata.getViews(newSession(), Optional.of(viewName.getSchemaName())); - assertThat(views.size()).isEqualTo(1); - assertThat(views.get(viewName).getOriginalSql()).isEqualTo(definition.getOriginalSql()); - - assertThat(metadata.listViews(newSession(), Optional.of(viewName.getSchemaName()))).contains(viewName); - } - } - - protected void doCreateTable(SchemaTableName tableName, HiveStorageFormat storageFormat) - throws Exception - { - String queryId; - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - queryId = session.getQueryId(); - - // begin creating the table - ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, CREATE_TABLE_COLUMNS, createTableProperties(storageFormat)); - - ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES); - - // write the data - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle, TESTING_PAGE_SINK_ID); - sink.appendPage(CREATE_TABLE_DATA.toPage()); - Collection fragments = getFutureValue(sink.finish()); - - // verify all new files start with the unique prefix - HdfsContext context = new HdfsContext(session); - for (String filePath : listAllDataFiles(context, getStagingPathRoot(outputHandle))) { - assertThat(new Path(filePath).getName()).startsWith(session.getQueryId()); - } - - // commit the table - metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of()); - - transaction.commit(); - } - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - // load the new table - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - - // verify the metadata - ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName)); - assertThat(filterNonHiddenColumnMetadata(tableMetadata.getColumns())).isEqualTo(CREATE_TABLE_COLUMNS); - - // verify the data - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat)); - assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_DATA.getMaterializedRows()); - - // verify the node version and query ID in table - Table table = getMetastoreClient().getTable(tableName.getSchemaName(), tableName.getTableName()).get(); - assertThat(table.getParameters()).containsEntry(TRINO_VERSION_NAME, TEST_SERVER_VERSION); - assertThat(table.getParameters()).containsEntry(TRINO_QUERY_ID_NAME, queryId); - - // verify basic statistics - HiveBasicStatistics statistics = getBasicStatisticsForTable(transaction, tableName); - assertThat(statistics.getRowCount().getAsLong()).isEqualTo(CREATE_TABLE_DATA.getRowCount()); - assertThat(statistics.getFileCount().getAsLong()).isEqualTo(1L); - assertGreaterThan(statistics.getInMemoryDataSizeInBytes().getAsLong(), 0L); - assertGreaterThan(statistics.getOnDiskDataSizeInBytes().getAsLong(), 0L); - } - } - - protected void doCreateEmptyTable(SchemaTableName tableName, HiveStorageFormat storageFormat, List createTableColumns) - throws Exception - { - List partitionedBy = createTableColumns.stream() - .map(ColumnMetadata::getName) - .filter(PARTITION_COLUMN_FILTER) - .collect(toList()); - - doCreateEmptyTable(tableName, storageFormat, createTableColumns, partitionedBy); - } - - protected void doCreateEmptyTable(SchemaTableName tableName, HiveStorageFormat storageFormat, List createTableColumns, List partitionedBy) - throws Exception - { - String queryId; - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - queryId = session.getQueryId(); - - ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, createTableColumns, createTableProperties(storageFormat, partitionedBy)); - metadata.createTable(session, tableMetadata, false); - transaction.commit(); - } - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - - // load the new table - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - // verify the metadata - ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName)); - - List expectedColumns = createTableColumns.stream() - .map(column -> ColumnMetadata.builder() - .setName(column.getName()) - .setType(column.getType()) - .setComment(Optional.ofNullable(column.getComment())) - .setExtraInfo(Optional.ofNullable(columnExtraInfo(partitionedBy.contains(column.getName())))) - .build()) - .collect(toList()); - assertThat(filterNonHiddenColumnMetadata(tableMetadata.getColumns())).isEqualTo(expectedColumns); - - // verify table format - Table table = transaction.getMetastore().getTable(tableName.getSchemaName(), tableName.getTableName()).get(); - assertThat(table.getStorage().getStorageFormat().getInputFormat()).isEqualTo(storageFormat.getInputFormat()); - - // verify the node version and query ID - assertThat(table.getParameters()).containsEntry(TRINO_VERSION_NAME, TEST_SERVER_VERSION); - assertThat(table.getParameters()).containsEntry(TRINO_QUERY_ID_NAME, queryId); - - // verify the table is empty - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat)); - assertThat(result.getRowCount()).isEqualTo(0); - - // verify basic statistics - if (partitionedBy.isEmpty()) { - HiveBasicStatistics statistics = getBasicStatisticsForTable(transaction, tableName); - assertThat(statistics.getRowCount().getAsLong()).isEqualTo(0L); - assertThat(statistics.getFileCount().getAsLong()).isEqualTo(0L); - assertThat(statistics.getInMemoryDataSizeInBytes().getAsLong()).isEqualTo(0L); - assertThat(statistics.getOnDiskDataSizeInBytes().getAsLong()).isEqualTo(0L); - } - } - } - - private void doInsert(HiveStorageFormat storageFormat, SchemaTableName tableName) - throws Exception - { - // creating the table - doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS); - - MaterializedResult.Builder resultBuilder = MaterializedResult.resultBuilder(SESSION, CREATE_TABLE_DATA.getTypes()); - for (int i = 0; i < 3; i++) { - insertData(tableName, CREATE_TABLE_DATA); - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - - // load the new table - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - - // verify the metadata - ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName)); - assertThat(filterNonHiddenColumnMetadata(tableMetadata.getColumns())).isEqualTo(CREATE_TABLE_COLUMNS); - - // verify the data - resultBuilder.rows(CREATE_TABLE_DATA.getMaterializedRows()); - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty()); - assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows()); - - // statistics - HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(transaction, tableName); - assertThat(tableStatistics.getRowCount().orElse(0)).isEqualTo(CREATE_TABLE_DATA.getRowCount() * (i + 1L)); - assertThat(tableStatistics.getFileCount().getAsLong()).isEqualTo(i + 1L); - assertGreaterThan(tableStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L); - assertGreaterThan(tableStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L); - } - } - - // test rollback - Set existingFiles; - try (Transaction transaction = newTransaction()) { - existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()); - assertThat(existingFiles.isEmpty()).isFalse(); - } - - Location stagingPathRoot; - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - // "stage" insert data - ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES); - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle, TESTING_PAGE_SINK_ID); - sink.appendPage(CREATE_TABLE_DATA.toPage()); - sink.appendPage(CREATE_TABLE_DATA.toPage()); - Collection fragments = getFutureValue(sink.finish()); - metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of()); - - // statistics, visible from within transaction - HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(transaction, tableName); - assertThat(tableStatistics.getRowCount().getAsLong()).isEqualTo(CREATE_TABLE_DATA.getRowCount() * 5L); - - try (Transaction otherTransaction = newTransaction()) { - // statistics, not visible from outside transaction - HiveBasicStatistics otherTableStatistics = getBasicStatisticsForTable(otherTransaction, tableName); - assertThat(otherTableStatistics.getRowCount().getAsLong()).isEqualTo(CREATE_TABLE_DATA.getRowCount() * 3L); - } - - // verify we did not modify the table directory - assertThat(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName())).isEqualTo(existingFiles); - - // verify all temp files start with the unique prefix - stagingPathRoot = getStagingPathRoot(insertTableHandle); - HdfsContext context = new HdfsContext(session); - Set tempFiles = listAllDataFiles(context, stagingPathRoot); - assertThat(!tempFiles.isEmpty()).isTrue(); - for (String filePath : tempFiles) { - assertThat(new Path(filePath).getName()).startsWith(session.getQueryId()); - } - - // rollback insert - transaction.rollback(); - } - - // verify temp directory is empty - HdfsContext context = new HdfsContext(newSession()); - assertThat(listAllDataFiles(context, stagingPathRoot).isEmpty()).isTrue(); - - // verify the data is unchanged - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty()); - assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows()); - - // verify we did not modify the table directory - assertThat(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName())).isEqualTo(existingFiles); - } - - // verify statistics unchanged - try (Transaction transaction = newTransaction()) { - HiveBasicStatistics statistics = getBasicStatisticsForTable(transaction, tableName); - assertThat(statistics.getRowCount().getAsLong()).isEqualTo(CREATE_TABLE_DATA.getRowCount() * 3L); - assertThat(statistics.getFileCount().getAsLong()).isEqualTo(3L); - } - } - - private void doInsertOverwriteUnpartitioned(SchemaTableName tableName) - throws Exception - { - // create table with data - doCreateEmptyTable(tableName, ORC, CREATE_TABLE_COLUMNS); - insertData(tableName, CREATE_TABLE_DATA); - - // overwrite table with new data - MaterializedResult.Builder overwriteDataBuilder = MaterializedResult.resultBuilder(SESSION, CREATE_TABLE_DATA.getTypes()); - MaterializedResult overwriteData = null; - - Map overwriteProperties = ImmutableMap.of("insert_existing_partitions_behavior", "OVERWRITE"); - - for (int i = 0; i < 3; i++) { - overwriteDataBuilder.rows(reverse(CREATE_TABLE_DATA.getMaterializedRows())); - overwriteData = overwriteDataBuilder.build(); - - insertData(tableName, overwriteData, overwriteProperties); - - // verify overwrite - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - - // load the new table - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - - // verify the metadata - ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName)); - assertThat(filterNonHiddenColumnMetadata(tableMetadata.getColumns())).isEqualTo(CREATE_TABLE_COLUMNS); - - // verify the data - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty()); - assertEqualsIgnoreOrder(result.getMaterializedRows(), overwriteData.getMaterializedRows()); - - // statistics - HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(transaction, tableName); - assertThat(tableStatistics.getRowCount().getAsLong()).isEqualTo(overwriteData.getRowCount()); - assertThat(tableStatistics.getFileCount().getAsLong()).isEqualTo(1L); - assertGreaterThan(tableStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L); - assertGreaterThan(tableStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L); - } - } - - // test rollback - Set existingFiles; - try (Transaction transaction = newTransaction()) { - existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()); - assertThat(existingFiles.isEmpty()).isFalse(); - } - - Location stagingPathRoot; - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(overwriteProperties); - ConnectorMetadata metadata = transaction.getMetadata(); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - // "stage" insert data - ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES); - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle, TESTING_PAGE_SINK_ID); - for (int i = 0; i < 4; i++) { - sink.appendPage(overwriteData.toPage()); - } - Collection fragments = getFutureValue(sink.finish()); - metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of()); - - // statistics, visible from within transaction - HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(transaction, tableName); - assertThat(tableStatistics.getRowCount().getAsLong()).isEqualTo(overwriteData.getRowCount() * 4L); - - try (Transaction otherTransaction = newTransaction()) { - // statistics, not visible from outside transaction - HiveBasicStatistics otherTableStatistics = getBasicStatisticsForTable(otherTransaction, tableName); - assertThat(otherTableStatistics.getRowCount().getAsLong()).isEqualTo(overwriteData.getRowCount()); - } - - // verify we did not modify the table directory - assertThat(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName())).isEqualTo(existingFiles); - - // verify all temp files start with the unique prefix - stagingPathRoot = getStagingPathRoot(insertTableHandle); - HdfsContext context = new HdfsContext(session); - Set tempFiles = listAllDataFiles(context, stagingPathRoot); - assertThat(!tempFiles.isEmpty()).isTrue(); - for (String filePath : tempFiles) { - assertThat(new Path(filePath).getName()).startsWith(session.getQueryId()); - } - - // rollback insert - transaction.rollback(); - } - - // verify temp directory is empty - HdfsContext context = new HdfsContext(newSession()); - assertThat(listAllDataFiles(context, stagingPathRoot).isEmpty()).isTrue(); - - // verify the data is unchanged - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty()); - assertEqualsIgnoreOrder(result.getMaterializedRows(), overwriteData.getMaterializedRows()); - - // verify we did not modify the table directory - assertThat(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName())).isEqualTo(existingFiles); - } - - // verify statistics unchanged - try (Transaction transaction = newTransaction()) { - HiveBasicStatistics statistics = getBasicStatisticsForTable(transaction, tableName); - assertThat(statistics.getRowCount().getAsLong()).isEqualTo(overwriteData.getRowCount()); - assertThat(statistics.getFileCount().getAsLong()).isEqualTo(1L); - } - } - - private Location getStagingPathRoot(ConnectorInsertTableHandle insertTableHandle) - { - HiveInsertTableHandle handle = (HiveInsertTableHandle) insertTableHandle; - WriteInfo writeInfo = getLocationService().getQueryWriteInfo(handle.getLocationHandle()); - if (writeInfo.writeMode() != STAGE_AND_MOVE_TO_TARGET_DIRECTORY) { - throw new AssertionError("writeMode is not STAGE_AND_MOVE_TO_TARGET_DIRECTORY"); - } - return writeInfo.writePath(); - } - - private Location getStagingPathRoot(ConnectorOutputTableHandle outputTableHandle) - { - HiveOutputTableHandle handle = (HiveOutputTableHandle) outputTableHandle; - return getLocationService() - .getQueryWriteInfo(handle.getLocationHandle()) - .writePath(); - } - - private Location getTargetPathRoot(ConnectorInsertTableHandle insertTableHandle) - { - HiveInsertTableHandle hiveInsertTableHandle = (HiveInsertTableHandle) insertTableHandle; - - return getLocationService() - .getQueryWriteInfo(hiveInsertTableHandle.getLocationHandle()) - .targetPath(); - } - - protected Set listAllDataFiles(Transaction transaction, String schemaName, String tableName) - throws IOException - { - HdfsContext hdfsContext = new HdfsContext(newSession()); - Set existingFiles = new HashSet<>(); - for (String location : listAllDataPaths(transaction.getMetastore(), schemaName, tableName)) { - existingFiles.addAll(listAllDataFiles(hdfsContext, Location.of(location))); - } - return existingFiles; - } - - public static List listAllDataPaths(SemiTransactionalHiveMetastore metastore, String schemaName, String tableName) - { - ImmutableList.Builder locations = ImmutableList.builder(); - Table table = metastore.getTable(schemaName, tableName).get(); - if (table.getStorage().getLocation() != null) { - // For partitioned table, there should be nothing directly under this directory. - // But including this location in the set makes the directory content assert more - // extensive, which is desirable. - locations.add(table.getStorage().getLocation()); - } - - Optional> partitionNames = metastore.getPartitionNames(schemaName, tableName); - if (partitionNames.isPresent()) { - metastore.getPartitionsByNames(schemaName, tableName, partitionNames.get()).values().stream() - .map(Optional::get) - .map(partition -> partition.getStorage().getLocation()) - .filter(location -> !location.startsWith(table.getStorage().getLocation())) - .forEach(locations::add); - } - - return locations.build(); - } - - protected Set listAllDataFiles(HdfsContext context, Location location) - throws IOException - { - Path path = new Path(location.toString()); - Set result = new HashSet<>(); - FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, path); - if (fileSystem.exists(path)) { - for (FileStatus fileStatus : fileSystem.listStatus(path)) { - if (fileStatus.getPath().getName().startsWith(".trino")) { - // skip hidden files - } - else if (fileStatus.isFile()) { - result.add(fileStatus.getPath().toString()); - } - else if (fileStatus.isDirectory()) { - result.addAll(listAllDataFiles(context, Location.of(fileStatus.getPath().toString()))); - } - } - } - return result; - } - - private void doInsertIntoNewPartition(HiveStorageFormat storageFormat, SchemaTableName tableName) - throws Exception - { - // creating the table - doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED); - - // insert the data - String queryId = insertData(tableName, CREATE_TABLE_PARTITIONED_DATA); - - Set existingFiles; - try (Transaction transaction = newTransaction()) { - // verify partitions were created - Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new TableNotFoundException(tableName)); - List partitionNames = transaction.getMetastore().getPartitionNames(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new AssertionError("Table does not exist: " + tableName)); - assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream() - .map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)) - .collect(toImmutableList())); - - // verify the node versions in partitions - Map> partitions = getMetastoreClient().getPartitionsByNames(table, partitionNames); - assertThat(partitions.size()).isEqualTo(partitionNames.size()); - for (String partitionName : partitionNames) { - Partition partition = partitions.get(partitionName).get(); - assertThat(partition.getParameters()).containsEntry(TRINO_VERSION_NAME, TEST_SERVER_VERSION); - assertThat(partition.getParameters()).containsEntry(TRINO_QUERY_ID_NAME, queryId); - } - - // load the new table - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - - // verify the data - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat)); - assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows()); - - // test rollback - existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()); - assertThat(existingFiles.isEmpty()).isFalse(); - - // test statistics - for (String partitionName : partitionNames) { - HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(transaction, tableName, COLUMN_NAMES_PARTITIONED, partitionName); - assertThat(partitionStatistics.getRowCount().getAsLong()).isEqualTo(1L); - assertThat(partitionStatistics.getFileCount().getAsLong()).isEqualTo(1L); - assertGreaterThan(partitionStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L); - assertGreaterThan(partitionStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L); - } - } - - Location stagingPathRoot; - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - // "stage" insert data - ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES); - stagingPathRoot = getStagingPathRoot(insertTableHandle); - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle, TESTING_PAGE_SINK_ID); - sink.appendPage(CREATE_TABLE_PARTITIONED_DATA_2ND.toPage()); - Collection fragments = getFutureValue(sink.finish()); - metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of()); - - // verify all temp files start with the unique prefix - HdfsContext context = new HdfsContext(session); - Set tempFiles = listAllDataFiles(context, getStagingPathRoot(insertTableHandle)); - assertThat(!tempFiles.isEmpty()).isTrue(); - for (String filePath : tempFiles) { - assertThat(new Path(filePath).getName()).startsWith(session.getQueryId()); - } - - // rollback insert - transaction.rollback(); - } - - // verify the data is unchanged - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty()); - assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows()); - - // verify we did not modify the table directory - assertThat(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName())).isEqualTo(existingFiles); - - // verify temp directory is empty - HdfsContext context = new HdfsContext(session); - assertThat(listAllDataFiles(context, stagingPathRoot).isEmpty()).isTrue(); - } - } - - private void doInsertUnsupportedWriteType(HiveStorageFormat storageFormat, SchemaTableName tableName) - throws Exception - { - List columns = ImmutableList.of(new Column("dummy", HiveType.valueOf("uniontype"), Optional.empty(), Map.of())); - List partitionColumns = ImmutableList.of(new Column("name", HIVE_STRING, Optional.empty(), Map.of())); - - createEmptyTable(tableName, storageFormat, columns, partitionColumns); - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES); - fail("expected failure"); - } - catch (TrinoException e) { - assertThat(e).hasMessageMatching("Inserting into Hive table .* with column type uniontype not supported"); - } - } - - private void doInsertIntoExistingPartition(HiveStorageFormat storageFormat, SchemaTableName tableName) - throws Exception - { - // creating the table - doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED); - - MaterializedResult.Builder resultBuilder = MaterializedResult.resultBuilder(SESSION, CREATE_TABLE_PARTITIONED_DATA.getTypes()); - for (int i = 0; i < 3; i++) { - // insert the data - insertData(tableName, CREATE_TABLE_PARTITIONED_DATA); - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - // verify partitions were created - List partitionNames = transaction.getMetastore().getPartitionNames(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new AssertionError("Table does not exist: " + tableName)); - assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream() - .map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)) - .collect(toImmutableList())); - - // load the new table - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - - // verify the data - resultBuilder.rows(CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows()); - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat)); - assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows()); - - // test statistics - for (String partitionName : partitionNames) { - HiveBasicStatistics statistics = getBasicStatisticsForPartition(transaction, tableName, COLUMN_NAMES_PARTITIONED, partitionName); - assertThat(statistics.getRowCount().getAsLong()).isEqualTo(i + 1L); - assertThat(statistics.getFileCount().getAsLong()).isEqualTo(i + 1L); - assertGreaterThan(statistics.getInMemoryDataSizeInBytes().getAsLong(), 0L); - assertGreaterThan(statistics.getOnDiskDataSizeInBytes().getAsLong(), 0L); - } - } - } - - // test rollback - Set existingFiles; - Location stagingPathRoot; - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - - existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()); - assertThat(existingFiles.isEmpty()).isFalse(); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - // "stage" insert data - ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES); - stagingPathRoot = getStagingPathRoot(insertTableHandle); - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle, TESTING_PAGE_SINK_ID); - sink.appendPage(CREATE_TABLE_PARTITIONED_DATA.toPage()); - sink.appendPage(CREATE_TABLE_PARTITIONED_DATA.toPage()); - Collection fragments = getFutureValue(sink.finish()); - metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of()); - - // verify all temp files start with the unique prefix - HdfsContext context = new HdfsContext(session); - Set tempFiles = listAllDataFiles(context, getStagingPathRoot(insertTableHandle)); - assertThat(!tempFiles.isEmpty()).isTrue(); - for (String filePath : tempFiles) { - assertThat(new Path(filePath).getName()).startsWith(session.getQueryId()); - } - - // verify statistics are visible from within of the current transaction - List partitionNames = transaction.getMetastore().getPartitionNames(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new AssertionError("Table does not exist: " + tableName)); - for (String partitionName : partitionNames) { - HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(transaction, tableName, COLUMN_NAMES_PARTITIONED, partitionName); - assertThat(partitionStatistics.getRowCount().getAsLong()).isEqualTo(5L); - } - - // rollback insert - transaction.rollback(); - } - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - - // verify the data is unchanged - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty()); - assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows()); - - // verify we did not modify the table directory - assertThat(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName())).isEqualTo(existingFiles); - - // verify temp directory is empty - HdfsContext hdfsContext = new HdfsContext(session); - assertThat(listAllDataFiles(hdfsContext, stagingPathRoot).isEmpty()).isTrue(); - - // verify statistics have been rolled back - List partitionNames = transaction.getMetastore().getPartitionNames(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new AssertionError("Table does not exist: " + tableName)); - for (String partitionName : partitionNames) { - HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(transaction, tableName, COLUMN_NAMES_PARTITIONED, partitionName); - assertThat(partitionStatistics.getRowCount().getAsLong()).isEqualTo(3L); - } - } - } - - private void doInsertIntoExistingPartitionEmptyStatistics(HiveStorageFormat storageFormat, SchemaTableName tableName) - throws Exception - { - doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED); - insertData(tableName, CREATE_TABLE_PARTITIONED_DATA); - - eraseStatistics(tableName); - - insertData(tableName, CREATE_TABLE_PARTITIONED_DATA); - - try (Transaction transaction = newTransaction()) { - List partitionNames = transaction.getMetastore().getPartitionNames(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new AssertionError("Table does not exist: " + tableName)); - - for (String partitionName : partitionNames) { - HiveBasicStatistics statistics = getBasicStatisticsForPartition(transaction, tableName, COLUMN_NAMES_PARTITIONED, partitionName); - assertThat(statistics.getRowCount()).isNotPresent(); - assertThat(statistics.getInMemoryDataSizeInBytes()).isNotPresent(); - // fileCount and rawSize statistics are computed on the fly by the metastore, thus cannot be erased - } - } - } - - private static HiveBasicStatistics getBasicStatisticsForTable(Transaction transaction, SchemaTableName table) - { - return transaction - .getMetastore() - .getTableStatistics(table.getSchemaName(), table.getTableName(), Optional.empty()) - .getBasicStatistics(); - } - - private static HiveBasicStatistics getBasicStatisticsForPartition(Transaction transaction, SchemaTableName table, Set columns, String partitionName) - { - return transaction - .getMetastore() - .getPartitionStatistics(table.getSchemaName(), table.getTableName(), columns, ImmutableSet.of(partitionName)) - .get(partitionName) - .getBasicStatistics(); - } - - private void eraseStatistics(SchemaTableName schemaTableName) - { - HiveMetastore metastoreClient = getMetastoreClient(); - metastoreClient.updateTableStatistics(schemaTableName.getSchemaName(), schemaTableName.getTableName(), NO_ACID_TRANSACTION, statistics -> new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of())); - Table table = metastoreClient.getTable(schemaTableName.getSchemaName(), schemaTableName.getTableName()) - .orElseThrow(() -> new TableNotFoundException(schemaTableName)); - List partitionColumns = table.getPartitionColumns().stream() - .map(Column::getName) - .collect(toImmutableList()); - if (!table.getPartitionColumns().isEmpty()) { - List partitionNames = metastoreClient.getPartitionNamesByFilter(schemaTableName.getSchemaName(), schemaTableName.getTableName(), partitionColumns, TupleDomain.all()) - .orElse(ImmutableList.of()); - List partitions = metastoreClient - .getPartitionsByNames(table, partitionNames) - .values() - .stream() - .filter(Optional::isPresent) - .map(Optional::get) - .collect(toImmutableList()); - for (Partition partition : partitions) { - metastoreClient.updatePartitionStatistics( - table, - makePartName(partitionColumns, partition.getValues()), - statistics -> new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of())); - } - } - } - - /** - * @return query id - */ - private String insertData(SchemaTableName tableName, MaterializedResult data) - throws Exception - { - return insertData(tableName, data, ImmutableMap.of()); - } - - private String insertData(SchemaTableName tableName, MaterializedResult data, Map sessionProperties) - throws Exception - { - Location writePath; - Location targetPath; - String queryId; - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(sessionProperties); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES); - queryId = session.getQueryId(); - writePath = getStagingPathRoot(insertTableHandle); - targetPath = getTargetPathRoot(insertTableHandle); - - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle, TESTING_PAGE_SINK_ID); - - // write data - sink.appendPage(data.toPage()); - Collection fragments = getFutureValue(sink.finish()); - - // commit the insert - metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of()); - transaction.commit(); - } - - // check that temporary files are removed - if (!writePath.equals(targetPath)) { - HdfsContext context = new HdfsContext(newSession()); - FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, new Path(writePath.toString())); - assertThat(fileSystem.exists(new Path(writePath.toString()))).isFalse(); - } - - return queryId; - } - - private void doTestMetadataDelete(HiveStorageFormat storageFormat, SchemaTableName tableName) - throws Exception - { - // creating the table - doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED); - - insertData(tableName, CREATE_TABLE_PARTITIONED_DATA); - - MaterializedResult.Builder expectedResultBuilder = MaterializedResult.resultBuilder(SESSION, CREATE_TABLE_PARTITIONED_DATA.getTypes()); - expectedResultBuilder.rows(CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows()); - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - - // verify partitions were created - List partitionNames = transaction.getMetastore().getPartitionNames(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new AssertionError("Table does not exist: " + tableName)); - assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream() - .map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)) - .collect(toImmutableList())); - - // verify table directory is not empty - Set filesAfterInsert = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()); - assertThat(filesAfterInsert.isEmpty()).isFalse(); - - // verify the data - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat)); - assertEqualsIgnoreOrder(result.getMaterializedRows(), expectedResultBuilder.build().getMaterializedRows()); - } - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - - // get ds column handle - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - HiveColumnHandle dsColumnHandle = (HiveColumnHandle) metadata.getColumnHandles(session, tableHandle).get("ds"); - - // delete ds=2015-07-03 - session = newSession(); - TupleDomain tupleDomain = TupleDomain.fromFixedValues(ImmutableMap.of(dsColumnHandle, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2015-07-03")))); - Constraint constraint = new Constraint(tupleDomain, tupleDomain.asPredicate(), tupleDomain.getDomains().orElseThrow().keySet()); - tableHandle = applyFilter(metadata, tableHandle, constraint); - tableHandle = metadata.applyDelete(session, tableHandle).get(); - metadata.executeDelete(session, tableHandle); - - transaction.commit(); - } - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - HiveColumnHandle dsColumnHandle = (HiveColumnHandle) metadata.getColumnHandles(session, tableHandle).get("ds"); - int dsColumnOrdinalPosition = columnHandles.indexOf(dsColumnHandle); - - // verify the data - ImmutableList expectedRows = expectedResultBuilder.build().getMaterializedRows().stream() - .filter(row -> !"2015-07-03".equals(row.getField(dsColumnOrdinalPosition))) - .collect(toImmutableList()); - MaterializedResult actualAfterDelete = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat)); - assertEqualsIgnoreOrder(actualAfterDelete.getMaterializedRows(), expectedRows); - } - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - HiveColumnHandle dsColumnHandle = (HiveColumnHandle) metadata.getColumnHandles(session, tableHandle).get("ds"); - - // delete ds=2015-07-01 and 2015-07-02 - session = newSession(); - TupleDomain tupleDomain2 = TupleDomain.withColumnDomains( - ImmutableMap.of(dsColumnHandle, Domain.create(ValueSet.ofRanges(Range.range(createUnboundedVarcharType(), utf8Slice("2015-07-01"), true, utf8Slice("2015-07-02"), true)), false))); - Constraint constraint2 = new Constraint(tupleDomain2, tupleDomain2.asPredicate(), tupleDomain2.getDomains().orElseThrow().keySet()); - tableHandle = applyFilter(metadata, tableHandle, constraint2); - tableHandle = metadata.applyDelete(session, tableHandle).get(); - metadata.executeDelete(session, tableHandle); - - transaction.commit(); - } - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - - // verify the data - session = newSession(); - MaterializedResult actualAfterDelete2 = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat)); - assertEqualsIgnoreOrder(actualAfterDelete2.getMaterializedRows(), ImmutableList.of()); - - // verify table directory is empty - Set filesAfterDelete = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()); - assertThat(filesAfterDelete.isEmpty()).isTrue(); - } - } - - protected void assertGetRecords(String tableName, HiveStorageFormat hiveStorageFormat) - throws Exception - { - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, new SchemaTableName(database, tableName)); - ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, tableHandle); - HiveSplit hiveSplit = getHiveSplit(tableHandle, transaction, session); - - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - - ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, hiveSplit, tableHandle, columnHandles, DynamicFilter.EMPTY); - assertGetRecords(hiveStorageFormat, tableMetadata, hiveSplit, pageSource, columnHandles); - } - } - - protected HiveSplit getHiveSplit(ConnectorTableHandle tableHandle, Transaction transaction, ConnectorSession session) - { - List splits = getAllSplits(tableHandle, transaction, session); - assertThat(splits.size()).isEqualTo(1); - return (HiveSplit) getOnlyElement(splits); - } - - protected void assertGetRecords( - HiveStorageFormat hiveStorageFormat, - ConnectorTableMetadata tableMetadata, - HiveSplit hiveSplit, - ConnectorPageSource pageSource, - List columnHandles) - throws IOException - { - try { - MaterializedResult result = materializeSourceDataStream(newSession(), pageSource, getTypes(columnHandles)); - - assertPageSourceType(pageSource, hiveStorageFormat); - - ImmutableMap columnIndex = indexColumns(tableMetadata); - - long rowNumber = 0; - long completedBytes = 0; - for (MaterializedRow row : result) { - try { - assertValueTypes(row, tableMetadata.getColumns()); - } - catch (RuntimeException e) { - throw new RuntimeException("row " + rowNumber, e); - } - - rowNumber++; - Integer index; - Object value; - - // STRING - index = columnIndex.get("t_string"); - value = row.getField(index); - if (rowNumber % 19 == 0) { - assertThat(value).isNull(); - } - else if (rowNumber % 19 == 1) { - assertThat(value).isEqualTo(""); - } - else { - assertThat(value).isEqualTo("test"); - } - - // NUMBERS - assertThat(row.getField(columnIndex.get("t_tinyint"))).isEqualTo((byte) (1 + rowNumber)); - assertThat(row.getField(columnIndex.get("t_smallint"))).isEqualTo((short) (2 + rowNumber)); - assertThat(row.getField(columnIndex.get("t_int"))).isEqualTo((int) (3 + rowNumber)); - - index = columnIndex.get("t_bigint"); - if ((rowNumber % 13) == 0) { - assertThat(row.getField(index)).isNull(); - } - else { - assertThat(row.getField(index)).isEqualTo(4 + rowNumber); - } - - assertThat((Float) row.getField(columnIndex.get("t_float"))).isCloseTo(5.1f + rowNumber, offset(0.001f)); - assertThat(row.getField(columnIndex.get("t_double"))).isEqualTo(6.2 + rowNumber); - - // BOOLEAN - index = columnIndex.get("t_boolean"); - if ((rowNumber % 3) == 2) { - assertThat(row.getField(index)).isNull(); - } - else { - assertThat(row.getField(index)).isEqualTo((rowNumber % 3) != 0); - } - - // TIMESTAMP - index = columnIndex.get("t_timestamp"); - if (index != null) { - if ((rowNumber % 17) == 0) { - assertThat(row.getField(index)).isNull(); - } - else { - SqlTimestamp expected = sqlTimestampOf(3, 2011, 5, 6, 7, 8, 9, 123); - assertThat(row.getField(index)).isEqualTo(expected); - } - } - - // BINARY - index = columnIndex.get("t_binary"); - if (index != null) { - if ((rowNumber % 23) == 0) { - assertThat(row.getField(index)).isNull(); - } - else { - assertThat(row.getField(index)).isEqualTo(new SqlVarbinary("test binary".getBytes(UTF_8))); - } - } - - // DATE - index = columnIndex.get("t_date"); - if (index != null) { - if ((rowNumber % 37) == 0) { - assertThat(row.getField(index)).isNull(); - } - else { - SqlDate expected = new SqlDate(toIntExact(MILLISECONDS.toDays(new DateTime(2013, 8, 9, 0, 0, 0, UTC).getMillis()))); - assertThat(row.getField(index)).isEqualTo(expected); - } - } - - // VARCHAR(50) - index = columnIndex.get("t_varchar"); - if (index != null) { - value = row.getField(index); - if (rowNumber % 39 == 0) { - assertThat(value).isNull(); - } - else if (rowNumber % 39 == 1) { - // https://issues.apache.org/jira/browse/HIVE-13289 - // RCBINARY reads empty VARCHAR as null - if (hiveStorageFormat == RCBINARY) { - assertThat(value).isNull(); - } - else { - assertThat(value).isEqualTo(""); - } - } - else { - assertThat(value).isEqualTo("test varchar"); - } - } - - //CHAR(25) - index = columnIndex.get("t_char"); - if (index != null) { - value = row.getField(index); - if ((rowNumber % 41) == 0) { - assertThat(value).isNull(); - } - else { - assertThat(value).isEqualTo((rowNumber % 41) == 1 ? " " : "test char "); - } - } - - // MAP - index = columnIndex.get("t_map"); - if (index != null) { - if ((rowNumber % 27) == 0) { - assertThat(row.getField(index)).isNull(); - } - else { - assertThat(row.getField(index)).isEqualTo(ImmutableMap.of("test key", "test value")); - } - } - - // ARRAY - index = columnIndex.get("t_array_string"); - if (index != null) { - if ((rowNumber % 29) == 0) { - assertThat(row.getField(index)).isNull(); - } - else { - assertThat(row.getField(index)).isEqualTo(ImmutableList.of("abc", "xyz", "data")); - } - } - - // ARRAY - index = columnIndex.get("t_array_timestamp"); - if (index != null) { - if ((rowNumber % 43) == 0) { - assertThat(row.getField(index)).isNull(); - } - else { - SqlTimestamp expected = sqlTimestampOf(3, LocalDateTime.of(2011, 5, 6, 7, 8, 9, 123_000_000)); - assertThat(row.getField(index)).isEqualTo(ImmutableList.of(expected)); - } - } - - // ARRAY> - index = columnIndex.get("t_array_struct"); - if (index != null) { - if ((rowNumber % 31) == 0) { - assertThat(row.getField(index)).isNull(); - } - else { - List expected1 = ImmutableList.of("test abc", 0.1); - List expected2 = ImmutableList.of("test xyz", 0.2); - assertThat(row.getField(index)).isEqualTo(ImmutableList.of(expected1, expected2)); - } - } - - // STRUCT - index = columnIndex.get("t_struct"); - if (index != null) { - if ((rowNumber % 31) == 0) { - assertThat(row.getField(index)).isNull(); - } - else { - assertThat(row.getField(index) instanceof List).isTrue(); - List values = (List) row.getField(index); - assertThat(values.size()).isEqualTo(2); - assertThat(values.get(0)).isEqualTo("test abc"); - assertThat(values.get(1)).isEqualTo(0.1); - } - } - - // MAP>> - index = columnIndex.get("t_complex"); - if (index != null) { - if ((rowNumber % 33) == 0) { - assertThat(row.getField(index)).isNull(); - } - else { - List expected1 = ImmutableList.of("test abc", 0.1); - List expected2 = ImmutableList.of("test xyz", 0.2); - assertThat(row.getField(index)).isEqualTo(ImmutableMap.of(1, ImmutableList.of(expected1, expected2))); - } - } - - // NEW COLUMN - assertThat(row.getField(columnIndex.get("new_column"))).isNull(); - - long newCompletedBytes = pageSource.getCompletedBytes(); - assertThat(newCompletedBytes >= completedBytes).isTrue(); - // some formats (e.g., parquet) over read the data by a bit - assertLessThanOrEqual(newCompletedBytes, hiveSplit.getLength() + (100 * 1024)); - completedBytes = newCompletedBytes; - } - - assertLessThanOrEqual(completedBytes, hiveSplit.getLength() + (100 * 1024)); - assertThat(rowNumber).isEqualTo(100); - } - finally { - pageSource.close(); - } - } - - protected void dropTable(SchemaTableName table) - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - - ConnectorTableHandle handle = metadata.getTableHandle(session, table); - if (handle == null) { - return; - } - - metadata.dropTable(session, handle); - try { - // todo I have no idea why this is needed... maybe there is a propagation delay in the metastore? - metadata.dropTable(session, handle); - fail("expected NotFoundException"); - } - catch (TableNotFoundException expected) { - } - - transaction.commit(); - } - } - - protected ConnectorTableHandle getTableHandle(ConnectorMetadata metadata, SchemaTableName tableName) - { - ConnectorTableHandle handle = metadata.getTableHandle(newSession(), tableName); - checkArgument(handle != null, "table not found: %s", tableName); - return handle; - } - - private HiveTableHandle applyFilter(ConnectorMetadata metadata, ConnectorTableHandle tableHandle, Constraint constraint) - { - return metadata.applyFilter(newSession(), tableHandle, constraint) - .map(ConstraintApplicationResult::getHandle) - .map(HiveTableHandle.class::cast) - .orElseThrow(AssertionError::new); - } - - protected MaterializedResult readTable( - Transaction transaction, - ConnectorTableHandle tableHandle, - List columnHandles, - ConnectorSession session, - TupleDomain tupleDomain, - OptionalInt expectedSplitCount, - Optional expectedStorageFormat) - throws Exception - { - tableHandle = applyFilter(transaction.getMetadata(), tableHandle, new Constraint(tupleDomain)); - List splits = getAllSplits(getSplits(splitManager, transaction, session, tableHandle)); - if (expectedSplitCount.isPresent()) { - assertThat(splits.size()).isEqualTo(expectedSplitCount.getAsInt()); - } - - ImmutableList.Builder allRows = ImmutableList.builder(); - for (ConnectorSplit split : splits) { - try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles, DynamicFilter.EMPTY)) { - expectedStorageFormat.ifPresent(format -> assertPageSourceType(pageSource, format)); - MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles)); - allRows.addAll(result.getMaterializedRows()); - } - } - return new MaterializedResult(allRows.build(), getTypes(columnHandles)); - } - - protected HiveMetastore getMetastoreClient() - { - return metastoreClient; - } - - protected LocationService getLocationService() - { - return locationService; - } - - protected static int getSplitCount(ConnectorSplitSource splitSource) - { - int splitCount = 0; - while (!splitSource.isFinished()) { - splitCount += getFutureValue(splitSource.getNextBatch(1000)).getSplits().size(); - } - return splitCount; - } - - private List getAllSplits(ConnectorTableHandle tableHandle, Transaction transaction, ConnectorSession session) - { - return getAllSplits(getSplits(splitManager, transaction, session, tableHandle)); - } - - protected static List getAllSplits(ConnectorSplitSource splitSource) - { - ImmutableList.Builder splits = ImmutableList.builder(); - while (!splitSource.isFinished()) { - splits.addAll(getFutureValue(splitSource.getNextBatch(1000)).getSplits()); - } - return splits.build(); - } - - protected static ConnectorSplitSource getSplits(ConnectorSplitManager splitManager, Transaction transaction, ConnectorSession session, ConnectorTableHandle tableHandle) - { - return splitManager.getSplits(transaction.getTransactionHandle(), session, tableHandle, DynamicFilter.EMPTY, Constraint.alwaysTrue()); - } - - protected String getPartitionId(Object partition) - { - return ((HivePartition) partition).getPartitionId(); - } - - protected static void assertPageSourceType(ConnectorPageSource pageSource, HiveStorageFormat hiveStorageFormat) - { - assertInstanceOf(((HivePageSource) pageSource).getPageSource(), pageSourceType(hiveStorageFormat), hiveStorageFormat.name()); - } - - private static Class pageSourceType(HiveStorageFormat hiveStorageFormat) - { - switch (hiveStorageFormat) { - case RCTEXT: - case RCBINARY: - return RcFilePageSource.class; - case ORC: - return OrcPageSource.class; - case PARQUET: - return ParquetPageSource.class; - case CSV: - case JSON: - case OPENX_JSON: - case TEXTFILE: - case SEQUENCEFILE: - return LinePageSource.class; - default: - throw new AssertionError("File type does not use a PageSource: " + hiveStorageFormat); - } - } - - private static void assertValueTypes(MaterializedRow row, List schema) - { - for (int columnIndex = 0; columnIndex < schema.size(); columnIndex++) { - ColumnMetadata column = schema.get(columnIndex); - Object value = row.getField(columnIndex); - if (value != null) { - if (BOOLEAN.equals(column.getType())) { - assertInstanceOf(value, Boolean.class); - } - else if (TINYINT.equals(column.getType())) { - assertInstanceOf(value, Byte.class); - } - else if (SMALLINT.equals(column.getType())) { - assertInstanceOf(value, Short.class); - } - else if (INTEGER.equals(column.getType())) { - assertInstanceOf(value, Integer.class); - } - else if (BIGINT.equals(column.getType())) { - assertInstanceOf(value, Long.class); - } - else if (DOUBLE.equals(column.getType())) { - assertInstanceOf(value, Double.class); - } - else if (REAL.equals(column.getType())) { - assertInstanceOf(value, Float.class); - } - else if (column.getType() instanceof VarcharType) { - assertInstanceOf(value, String.class); - } - else if (column.getType() instanceof CharType) { - assertInstanceOf(value, String.class); - } - else if (VARBINARY.equals(column.getType())) { - assertInstanceOf(value, SqlVarbinary.class); - } - else if (TIMESTAMP_MILLIS.equals(column.getType())) { - assertInstanceOf(value, SqlTimestamp.class); - } - else if (TIMESTAMP_TZ_MILLIS.equals(column.getType())) { - assertInstanceOf(value, SqlTimestampWithTimeZone.class); - } - else if (DATE.equals(column.getType())) { - assertInstanceOf(value, SqlDate.class); - } - else if (column.getType() instanceof ArrayType || column.getType() instanceof RowType) { - assertInstanceOf(value, List.class); - } - else if (column.getType() instanceof MapType) { - assertInstanceOf(value, Map.class); - } - else { - fail("Unknown primitive type " + columnIndex); - } - } - } - } - - private static void assertPrimitiveField(Map map, String name, Type type, boolean partitionKey) - { - assertThat(map).containsKey(name); - ColumnMetadata column = map.get(name); - assertThat(column.getType()) - .describedAs(name) - .isEqualTo(type); - assertThat(column.getExtraInfo()).isEqualTo(columnExtraInfo(partitionKey)); - } - - protected static ImmutableMap indexColumns(List columnHandles) - { - ImmutableMap.Builder index = ImmutableMap.builder(); - int i = 0; - for (ColumnHandle columnHandle : columnHandles) { - HiveColumnHandle hiveColumnHandle = (HiveColumnHandle) columnHandle; - index.put(hiveColumnHandle.getName(), i); - i++; - } - return index.buildOrThrow(); - } - - protected static ImmutableMap indexColumns(ConnectorTableMetadata tableMetadata) - { - ImmutableMap.Builder index = ImmutableMap.builder(); - int i = 0; - for (ColumnMetadata columnMetadata : tableMetadata.getColumns()) { - index.put(columnMetadata.getName(), i); - i++; - } - return index.buildOrThrow(); - } - - protected SchemaTableName temporaryTable(String tableName) - { - return temporaryTable(database, tableName); - } - - protected static SchemaTableName temporaryTable(String database, String tableName) - { - String randomName = UUID.randomUUID().toString().toLowerCase(ENGLISH).replace("-", ""); - return new SchemaTableName(database, TEMPORARY_TABLE_PREFIX + tableName + "_" + randomName); - } - - protected static Map createTableProperties(HiveStorageFormat storageFormat) - { - return createTableProperties(storageFormat, ImmutableList.of()); - } - - protected static Map createTableProperties(HiveStorageFormat storageFormat, Iterable partitionedBy) - { - return ImmutableMap.builder() - .put(STORAGE_FORMAT_PROPERTY, storageFormat) - .put(PARTITIONED_BY_PROPERTY, ImmutableList.copyOf(partitionedBy)) - .put(BUCKETED_BY_PROPERTY, ImmutableList.of()) - .put(BUCKET_COUNT_PROPERTY, 0) - .put(SORTED_BY_PROPERTY, ImmutableList.of()) - .buildOrThrow(); - } - - protected static List filterNonHiddenColumnHandles(Collection columnHandles) - { - return columnHandles.stream() - .filter(columnHandle -> !((HiveColumnHandle) columnHandle).isHidden()) - .collect(toList()); - } - - protected static List filterNonHiddenColumnMetadata(Collection columnMetadatas) - { - return columnMetadatas.stream() - .filter(columnMetadata -> !columnMetadata.isHidden()) - .collect(toList()); - } - - private void createEmptyTable(SchemaTableName schemaTableName, HiveStorageFormat hiveStorageFormat, List columns, List partitionColumns) - throws Exception - { - createEmptyTable(schemaTableName, hiveStorageFormat, columns, partitionColumns, Optional.empty(), false); - } - - private void createEmptyTable( - SchemaTableName schemaTableName, - HiveStorageFormat hiveStorageFormat, - List columns, - List partitionColumns, - Optional bucketProperty) - throws Exception - { - createEmptyTable(schemaTableName, hiveStorageFormat, columns, partitionColumns, bucketProperty, false); - } - - protected void createEmptyTable( - SchemaTableName schemaTableName, - HiveStorageFormat hiveStorageFormat, - List columns, - List partitionColumns, - Optional bucketProperty, - boolean isTransactional) - throws Exception - { - Path targetPath; - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - - String tableOwner = session.getUser(); - String schemaName = schemaTableName.getSchemaName(); - String tableName = schemaTableName.getTableName(); - - LocationService locationService = getLocationService(); - targetPath = new Path(locationService.forNewTable(transaction.getMetastore(), session, schemaName, tableName).toString()); - - ImmutableMap.Builder tableParamBuilder = ImmutableMap.builder() - .put(TRINO_VERSION_NAME, TEST_SERVER_VERSION) - .put(TRINO_QUERY_ID_NAME, session.getQueryId()); - if (isTransactional) { - tableParamBuilder.put(TRANSACTIONAL, "true"); - } - Table.Builder tableBuilder = Table.builder() - .setDatabaseName(schemaName) - .setTableName(tableName) - .setOwner(Optional.of(tableOwner)) - .setTableType(MANAGED_TABLE.name()) - .setParameters(tableParamBuilder.buildOrThrow()) - .setDataColumns(columns) - .setPartitionColumns(partitionColumns); - - tableBuilder.getStorageBuilder() - .setLocation(targetPath.toString()) - .setStorageFormat(StorageFormat.create(hiveStorageFormat.getSerde(), hiveStorageFormat.getInputFormat(), hiveStorageFormat.getOutputFormat())) - .setBucketProperty(bucketProperty) - .setSerdeParameters(ImmutableMap.of()); - - PrincipalPrivileges principalPrivileges = testingPrincipalPrivilege(tableOwner, session.getUser()); - transaction.getMetastore().createTable(session, tableBuilder.build(), principalPrivileges, Optional.empty(), Optional.empty(), true, ZERO_TABLE_STATISTICS, false); - - transaction.commit(); - } - - HdfsContext context = new HdfsContext(newSession()); - List targetDirectoryList = listDirectory(context, targetPath); - assertThat(targetDirectoryList).isEqualTo(ImmutableList.of()); - } - - private void alterBucketProperty(SchemaTableName schemaTableName, Optional bucketProperty) - { - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - - String tableOwner = session.getUser(); - String schemaName = schemaTableName.getSchemaName(); - String tableName = schemaTableName.getTableName(); - - Optional
table = transaction.getMetastore().getTable(schemaName, tableName); - Table.Builder tableBuilder = Table.builder(table.get()); - tableBuilder.getStorageBuilder().setBucketProperty(bucketProperty); - PrincipalPrivileges principalPrivileges = testingPrincipalPrivilege(tableOwner, session.getUser()); - transaction.getMetastore().replaceTable(schemaName, tableName, tableBuilder.build(), principalPrivileges); - - transaction.commit(); - } - } - - protected PrincipalPrivileges testingPrincipalPrivilege(ConnectorSession session) - { - return testingPrincipalPrivilege(session.getUser(), session.getUser()); - } - - protected PrincipalPrivileges testingPrincipalPrivilege(String tableOwner, String grantor) - { - return new PrincipalPrivileges( - ImmutableMultimap.builder() - .put(tableOwner, new HivePrivilegeInfo(HivePrivilege.SELECT, true, new HivePrincipal(USER, grantor), new HivePrincipal(USER, grantor))) - .put(tableOwner, new HivePrivilegeInfo(HivePrivilege.INSERT, true, new HivePrincipal(USER, grantor), new HivePrincipal(USER, grantor))) - .put(tableOwner, new HivePrivilegeInfo(HivePrivilege.UPDATE, true, new HivePrincipal(USER, grantor), new HivePrincipal(USER, grantor))) - .put(tableOwner, new HivePrivilegeInfo(HivePrivilege.DELETE, true, new HivePrincipal(USER, grantor), new HivePrincipal(USER, grantor))) - .build(), - ImmutableMultimap.of()); - } - - private List listDirectory(HdfsContext context, Path path) - throws IOException - { - FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, path); - return Arrays.stream(fileSystem.listStatus(path)) - .map(FileStatus::getPath) - .map(Path::getName) - .filter(name -> !name.startsWith(".trino")) - .collect(toList()); - } - - @Test - public void testTransactionDeleteInsert() - throws Exception - { - doTestTransactionDeleteInsert( - RCBINARY, - true, - ImmutableList.builder() - .add(new TransactionDeleteInsertTestCase(false, false, ROLLBACK_RIGHT_AWAY, Optional.empty())) - .add(new TransactionDeleteInsertTestCase(false, false, ROLLBACK_AFTER_DELETE, Optional.empty())) - .add(new TransactionDeleteInsertTestCase(false, false, ROLLBACK_AFTER_BEGIN_INSERT, Optional.empty())) - .add(new TransactionDeleteInsertTestCase(false, false, ROLLBACK_AFTER_APPEND_PAGE, Optional.empty())) - .add(new TransactionDeleteInsertTestCase(false, false, ROLLBACK_AFTER_SINK_FINISH, Optional.empty())) - .add(new TransactionDeleteInsertTestCase(false, false, ROLLBACK_AFTER_FINISH_INSERT, Optional.empty())) - .add(new TransactionDeleteInsertTestCase(false, false, COMMIT, Optional.of(new AddPartitionFailure()))) - .add(new TransactionDeleteInsertTestCase(false, false, COMMIT, Optional.of(new DirectoryRenameFailure()))) - .add(new TransactionDeleteInsertTestCase(false, false, COMMIT, Optional.of(new FileRenameFailure()))) - .add(new TransactionDeleteInsertTestCase(true, false, COMMIT, Optional.of(new DropPartitionFailure()))) - .add(new TransactionDeleteInsertTestCase(true, true, COMMIT, Optional.empty())) - .build()); - } - - @Test - public void testPreferredInsertLayout() - throws Exception - { - SchemaTableName tableName = temporaryTable("empty_partitioned_table"); - - try { - Column partitioningColumn = new Column("column2", HIVE_STRING, Optional.empty(), Map.of()); - List columns = ImmutableList.of( - new Column("column1", HIVE_STRING, Optional.empty(), Map.of()), - partitioningColumn); - createEmptyTable(tableName, ORC, columns, ImmutableList.of(partitioningColumn)); - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - Optional insertLayout = metadata.getInsertLayout(session, tableHandle); - assertThat(insertLayout.isPresent()).isTrue(); - assertThat(insertLayout.get().getPartitioning().isPresent()).isFalse(); - assertThat(insertLayout.get().getPartitionColumns()).isEqualTo(ImmutableList.of(partitioningColumn.getName())); - } - } - finally { - dropTable(tableName); - } - } - - @Test - public void testInsertBucketedTableLayout() - throws Exception - { - insertBucketedTableLayout(false); - } - - @Test - public void testInsertBucketedTransactionalTableLayout() - throws Exception - { - insertBucketedTableLayout(true); - } - - protected void insertBucketedTableLayout(boolean transactional) - throws Exception - { - SchemaTableName tableName = temporaryTable("empty_bucketed_table"); - try { - List columns = ImmutableList.of( - new Column("column1", HIVE_STRING, Optional.empty(), Map.of()), - new Column("column2", HIVE_LONG, Optional.empty(), Map.of())); - HiveBucketProperty bucketProperty = new HiveBucketProperty(ImmutableList.of("column1"), BUCKETING_V1, 4, ImmutableList.of()); - createEmptyTable(tableName, ORC, columns, ImmutableList.of(), Optional.of(bucketProperty), transactional); - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - Optional insertLayout = metadata.getInsertLayout(session, tableHandle); - assertThat(insertLayout.isPresent()).isTrue(); - ConnectorPartitioningHandle partitioningHandle = new HivePartitioningHandle( - bucketProperty.getBucketingVersion(), - bucketProperty.getBucketCount(), - ImmutableList.of(HIVE_STRING), - OptionalInt.empty(), - false); - assertThat(insertLayout.get().getPartitioning()).isEqualTo(Optional.of(partitioningHandle)); - assertThat(insertLayout.get().getPartitionColumns()).isEqualTo(ImmutableList.of("column1")); - ConnectorBucketNodeMap connectorBucketNodeMap = nodePartitioningProvider.getBucketNodeMapping(transaction.getTransactionHandle(), session, partitioningHandle).orElseThrow(); - assertThat(connectorBucketNodeMap.getBucketCount()).isEqualTo(4); - assertThat(connectorBucketNodeMap.hasFixedMapping()).isFalse(); - } - } - finally { - dropTable(tableName); - } - } - - @Test - public void testInsertPartitionedBucketedTableLayout() - throws Exception - { - insertPartitionedBucketedTableLayout(false); - } - - @Test - public void testInsertPartitionedBucketedTransactionalTableLayout() - throws Exception - { - insertPartitionedBucketedTableLayout(true); - } - - protected void insertPartitionedBucketedTableLayout(boolean transactional) - throws Exception - { - SchemaTableName tableName = temporaryTable("empty_partitioned_table"); - try { - Column partitioningColumn = new Column("column2", HIVE_LONG, Optional.empty(), Map.of()); - List columns = ImmutableList.of( - new Column("column1", HIVE_STRING, Optional.empty(), Map.of()), - partitioningColumn); - HiveBucketProperty bucketProperty = new HiveBucketProperty(ImmutableList.of("column1"), BUCKETING_V1, 4, ImmutableList.of()); - createEmptyTable(tableName, ORC, columns, ImmutableList.of(partitioningColumn), Optional.of(bucketProperty), transactional); - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - Optional insertLayout = metadata.getInsertLayout(session, tableHandle); - assertThat(insertLayout.isPresent()).isTrue(); - ConnectorPartitioningHandle partitioningHandle = new HivePartitioningHandle( - bucketProperty.getBucketingVersion(), - bucketProperty.getBucketCount(), - ImmutableList.of(HIVE_STRING), - OptionalInt.empty(), - true); - assertThat(insertLayout.get().getPartitioning()).isEqualTo(Optional.of(partitioningHandle)); - assertThat(insertLayout.get().getPartitionColumns()).isEqualTo(ImmutableList.of("column1", "column2")); - ConnectorBucketNodeMap connectorBucketNodeMap = nodePartitioningProvider.getBucketNodeMapping(transaction.getTransactionHandle(), session, partitioningHandle).orElseThrow(); - assertThat(connectorBucketNodeMap.getBucketCount()).isEqualTo(32); - assertThat(connectorBucketNodeMap.hasFixedMapping()).isFalse(); - } - } - finally { - dropTable(tableName); - } - } - - @Test - public void testPreferredCreateTableLayout() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - Optional newTableLayout = metadata.getNewTableLayout( - session, - new ConnectorTableMetadata( - new SchemaTableName("schema", "table"), - ImmutableList.of( - new ColumnMetadata("column1", BIGINT), - new ColumnMetadata("column2", BIGINT)), - ImmutableMap.of( - PARTITIONED_BY_PROPERTY, ImmutableList.of("column2"), - BUCKETED_BY_PROPERTY, ImmutableList.of(), - BUCKET_COUNT_PROPERTY, 0, - SORTED_BY_PROPERTY, ImmutableList.of()))); - assertThat(newTableLayout.isPresent()).isTrue(); - assertThat(newTableLayout.get().getPartitioning().isPresent()).isFalse(); - assertThat(newTableLayout.get().getPartitionColumns()).isEqualTo(ImmutableList.of("column2")); - } - } - - @Test - public void testCreateBucketedTableLayout() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - Optional newTableLayout = metadata.getNewTableLayout( - session, - new ConnectorTableMetadata( - new SchemaTableName("schema", "table"), - ImmutableList.of( - new ColumnMetadata("column1", BIGINT), - new ColumnMetadata("column2", BIGINT)), - ImmutableMap.of( - PARTITIONED_BY_PROPERTY, ImmutableList.of(), - BUCKETED_BY_PROPERTY, ImmutableList.of("column1"), - BUCKET_COUNT_PROPERTY, 10, - SORTED_BY_PROPERTY, ImmutableList.of()))); - assertThat(newTableLayout.isPresent()).isTrue(); - ConnectorPartitioningHandle partitioningHandle = new HivePartitioningHandle( - BUCKETING_V1, - 10, - ImmutableList.of(HIVE_LONG), - OptionalInt.empty(), - false); - assertThat(newTableLayout.get().getPartitioning()).isEqualTo(Optional.of(partitioningHandle)); - assertThat(newTableLayout.get().getPartitionColumns()).isEqualTo(ImmutableList.of("column1")); - ConnectorBucketNodeMap connectorBucketNodeMap = nodePartitioningProvider.getBucketNodeMapping(transaction.getTransactionHandle(), session, partitioningHandle).orElseThrow(); - assertThat(connectorBucketNodeMap.getBucketCount()).isEqualTo(10); - assertThat(connectorBucketNodeMap.hasFixedMapping()).isFalse(); - } - } - - @Test - public void testCreatePartitionedBucketedTableLayout() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - Optional newTableLayout = metadata.getNewTableLayout( - session, - new ConnectorTableMetadata( - new SchemaTableName("schema", "table"), - ImmutableList.of( - new ColumnMetadata("column1", BIGINT), - new ColumnMetadata("column2", BIGINT)), - ImmutableMap.of( - PARTITIONED_BY_PROPERTY, ImmutableList.of("column2"), - BUCKETED_BY_PROPERTY, ImmutableList.of("column1"), - BUCKET_COUNT_PROPERTY, 10, - SORTED_BY_PROPERTY, ImmutableList.of()))); - assertThat(newTableLayout.isPresent()).isTrue(); - ConnectorPartitioningHandle partitioningHandle = new HivePartitioningHandle( - BUCKETING_V1, - 10, - ImmutableList.of(HIVE_LONG), - OptionalInt.empty(), - true); - assertThat(newTableLayout.get().getPartitioning()).isEqualTo(Optional.of(partitioningHandle)); - assertThat(newTableLayout.get().getPartitionColumns()).isEqualTo(ImmutableList.of("column1", "column2")); - ConnectorBucketNodeMap connectorBucketNodeMap = nodePartitioningProvider.getBucketNodeMapping(transaction.getTransactionHandle(), session, partitioningHandle).orElseThrow(); - assertThat(connectorBucketNodeMap.getBucketCount()).isEqualTo(32); - assertThat(connectorBucketNodeMap.hasFixedMapping()).isFalse(); - } - } - - protected void doTestTransactionDeleteInsert(HiveStorageFormat storageFormat, boolean allowInsertExisting, List testCases) - throws Exception - { - // There are 4 types of operations on a partition: add, drop, alter (drop then add), insert existing. - // There are 12 partitions in this test, 3 for each type. - // 3 is chosen to verify that cleanups, commit aborts, rollbacks are always as complete as possible regardless of failure. - MaterializedResult beforeData = - MaterializedResult.resultBuilder(SESSION, BIGINT, createUnboundedVarcharType(), createUnboundedVarcharType()) - .row(110L, "a", "alter1") - .row(120L, "a", "insert1") - .row(140L, "a", "drop1") - .row(210L, "b", "drop2") - .row(310L, "c", "alter2") - .row(320L, "c", "alter3") - .row(510L, "e", "drop3") - .row(610L, "f", "insert2") - .row(620L, "f", "insert3") - .build(); - Domain domainToDrop = Domain.create(ValueSet.of( - createUnboundedVarcharType(), - utf8Slice("alter1"), utf8Slice("alter2"), utf8Slice("alter3"), utf8Slice("drop1"), utf8Slice("drop2"), utf8Slice("drop3")), - false); - List extraRowsForInsertExisting = ImmutableList.of(); - if (allowInsertExisting) { - extraRowsForInsertExisting = MaterializedResult.resultBuilder(SESSION, BIGINT, createUnboundedVarcharType(), createUnboundedVarcharType()) - .row(121L, "a", "insert1") - .row(611L, "f", "insert2") - .row(621L, "f", "insert3") - .build() - .getMaterializedRows(); - } - MaterializedResult insertData = - MaterializedResult.resultBuilder(SESSION, BIGINT, createUnboundedVarcharType(), createUnboundedVarcharType()) - .row(111L, "a", "alter1") - .row(131L, "a", "add1") - .row(221L, "b", "add2") - .row(311L, "c", "alter2") - .row(321L, "c", "alter3") - .row(411L, "d", "add3") - .rows(extraRowsForInsertExisting) - .build(); - MaterializedResult afterData = - MaterializedResult.resultBuilder(SESSION, BIGINT, createUnboundedVarcharType(), createUnboundedVarcharType()) - .row(120L, "a", "insert1") - .row(610L, "f", "insert2") - .row(620L, "f", "insert3") - .rows(insertData.getMaterializedRows()) - .build(); - - for (TransactionDeleteInsertTestCase testCase : testCases) { - SchemaTableName temporaryDeleteInsert = temporaryTable("delete_insert"); - try { - createEmptyTable( - temporaryDeleteInsert, - storageFormat, - ImmutableList.of(new Column("col1", HIVE_LONG, Optional.empty(), Map.of())), - ImmutableList.of(new Column("pk1", HIVE_STRING, Optional.empty(), Map.of()), new Column("pk2", HIVE_STRING, Optional.empty(), Map.of()))); - insertData(temporaryDeleteInsert, beforeData); - try { - doTestTransactionDeleteInsert( - storageFormat, - temporaryDeleteInsert, - domainToDrop, - insertData, - testCase.isExpectCommittedData() ? afterData : beforeData, - testCase.getTag(), - testCase.isExpectQuerySucceed(), - testCase.getConflictTrigger()); - } - catch (AssertionError e) { - throw new AssertionError(format("Test case: %s", testCase), e); - } - } - finally { - dropTable(temporaryDeleteInsert); - } - } - } - - private void doTestTransactionDeleteInsert( - HiveStorageFormat storageFormat, - SchemaTableName tableName, - Domain domainToDrop, - MaterializedResult insertData, - MaterializedResult expectedData, - TransactionDeleteInsertTestTag tag, - boolean expectQuerySucceed, - Optional conflictTrigger) - throws Exception - { - Location writePath = null; - Location targetPath = null; - - try (Transaction transaction = newTransaction()) { - try { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - ConnectorSession session; - rollbackIfEquals(tag, ROLLBACK_RIGHT_AWAY); - - // Query 1: delete - session = newSession(); - HiveColumnHandle dsColumnHandle = (HiveColumnHandle) metadata.getColumnHandles(session, tableHandle).get("pk2"); - TupleDomain tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of( - dsColumnHandle, domainToDrop)); - Constraint constraint = new Constraint(tupleDomain, tupleDomain.asPredicate(), tupleDomain.getDomains().orElseThrow().keySet()); - tableHandle = applyFilter(metadata, tableHandle, constraint); - tableHandle = metadata.applyDelete(session, tableHandle).get(); - metadata.executeDelete(session, tableHandle); - rollbackIfEquals(tag, ROLLBACK_AFTER_DELETE); - - // Query 2: insert - session = newSession(); - ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES); - rollbackIfEquals(tag, ROLLBACK_AFTER_BEGIN_INSERT); - writePath = getStagingPathRoot(insertTableHandle); - targetPath = getTargetPathRoot(insertTableHandle); - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle, TESTING_PAGE_SINK_ID); - sink.appendPage(insertData.toPage()); - rollbackIfEquals(tag, ROLLBACK_AFTER_APPEND_PAGE); - Collection fragments = getFutureValue(sink.finish()); - rollbackIfEquals(tag, ROLLBACK_AFTER_SINK_FINISH); - metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of()); - rollbackIfEquals(tag, ROLLBACK_AFTER_FINISH_INSERT); - - assertThat(tag).isEqualTo(COMMIT); - - if (conflictTrigger.isPresent()) { - JsonCodec partitionUpdateCodec = JsonCodec.jsonCodec(PartitionUpdate.class); - List partitionUpdates = fragments.stream() - .map(Slice::getBytes) - .map(partitionUpdateCodec::fromJson) - .collect(toList()); - conflictTrigger.get().triggerConflict(session, tableName, insertTableHandle, partitionUpdates); - } - transaction.commit(); - if (conflictTrigger.isPresent()) { - assertThat(expectQuerySucceed).isTrue(); - conflictTrigger.get().verifyAndCleanup(session, tableName); - } - } - catch (TestingRollbackException e) { - transaction.rollback(); - } - catch (TrinoException e) { - assertThat(expectQuerySucceed).isFalse(); - if (conflictTrigger.isPresent()) { - conflictTrigger.get().verifyAndCleanup(newSession(), tableName); - } - } - } - - // check that temporary files are removed - if (writePath != null && !writePath.equals(targetPath)) { - HdfsContext context = new HdfsContext(newSession()); - FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, new Path(writePath.toString())); - assertThat(fileSystem.exists(new Path(writePath.toString()))).isFalse(); - } - - try (Transaction transaction = newTransaction()) { - // verify partitions - List partitionNames = transaction.getMetastore() - .getPartitionNames(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new AssertionError("Table does not exist: " + tableName)); - assertEqualsIgnoreOrder( - partitionNames, - expectedData.getMaterializedRows().stream() - .map(row -> format("pk1=%s/pk2=%s", row.getField(1), row.getField(2))) - .distinct() - .collect(toImmutableList())); - - // load the new table - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - - // verify the data - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat)); - assertEqualsIgnoreOrder(result.getMaterializedRows(), expectedData.getMaterializedRows()); - } - } - - private static void rollbackIfEquals(TransactionDeleteInsertTestTag tag, TransactionDeleteInsertTestTag expectedTag) - { - if (expectedTag == tag) { - throw new TestingRollbackException(); - } - } - - private static class TestingRollbackException - extends RuntimeException - { - } - - protected static class TransactionDeleteInsertTestCase - { - private final boolean expectCommittedData; - private final boolean expectQuerySucceed; - private final TransactionDeleteInsertTestTag tag; - private final Optional conflictTrigger; - - public TransactionDeleteInsertTestCase(boolean expectCommittedData, boolean expectQuerySucceed, TransactionDeleteInsertTestTag tag, Optional conflictTrigger) - { - this.expectCommittedData = expectCommittedData; - this.expectQuerySucceed = expectQuerySucceed; - this.tag = tag; - this.conflictTrigger = conflictTrigger; - } - - public boolean isExpectCommittedData() - { - return expectCommittedData; - } - - public boolean isExpectQuerySucceed() - { - return expectQuerySucceed; - } - - public TransactionDeleteInsertTestTag getTag() - { - return tag; - } - - public Optional getConflictTrigger() - { - return conflictTrigger; - } - - @Override - public String toString() - { - return toStringHelper(this) - .add("tag", tag) - .add("conflictTrigger", conflictTrigger.map(conflictTrigger -> conflictTrigger.getClass().getName())) - .add("expectCommittedData", expectCommittedData) - .add("expectQuerySucceed", expectQuerySucceed) - .toString(); - } - } - - protected enum TransactionDeleteInsertTestTag - { - ROLLBACK_RIGHT_AWAY, - ROLLBACK_AFTER_DELETE, - ROLLBACK_AFTER_BEGIN_INSERT, - ROLLBACK_AFTER_APPEND_PAGE, - ROLLBACK_AFTER_SINK_FINISH, - ROLLBACK_AFTER_FINISH_INSERT, - COMMIT, - } - - protected interface ConflictTrigger - { - void triggerConflict(ConnectorSession session, SchemaTableName tableName, ConnectorInsertTableHandle insertTableHandle, List partitionUpdates) - throws IOException; - - void verifyAndCleanup(ConnectorSession session, SchemaTableName tableName) - throws IOException; - } - - protected class AddPartitionFailure - implements ConflictTrigger - { - private final ImmutableList copyPartitionFrom = ImmutableList.of("a", "insert1"); - private final String partitionNameToConflict = "pk1=b/pk2=add2"; - private Partition conflictPartition; - - @Override - public void triggerConflict(ConnectorSession session, SchemaTableName tableName, ConnectorInsertTableHandle insertTableHandle, List partitionUpdates) - { - // This method bypasses transaction interface because this method is inherently hacky and doesn't work well with the transaction abstraction. - // Additionally, this method is not part of a test. Its purpose is to set up an environment for another test. - HiveMetastore metastoreClient = getMetastoreClient(); - Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new TableNotFoundException(tableName)); - Optional partition = metastoreClient.getPartition(table, copyPartitionFrom); - conflictPartition = Partition.builder(partition.get()) - .setValues(toPartitionValues(partitionNameToConflict)) - .build(); - metastoreClient.addPartitions( - tableName.getSchemaName(), - tableName.getTableName(), - ImmutableList.of(new PartitionWithStatistics(conflictPartition, partitionNameToConflict, PartitionStatistics.empty()))); - } - - @Override - public void verifyAndCleanup(ConnectorSession session, SchemaTableName tableName) - { - // This method bypasses transaction interface because this method is inherently hacky and doesn't work well with the transaction abstraction. - // Additionally, this method is not part of a test. Its purpose is to set up an environment for another test. - HiveMetastore metastoreClient = getMetastoreClient(); - Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new TableNotFoundException(tableName)); - Optional actualPartition = metastoreClient.getPartition(table, toPartitionValues(partitionNameToConflict)); - // Make sure the partition inserted to trigger conflict was not overwritten - // Checking storage location is sufficient because implement never uses .../pk1=a/pk2=a2 as the directory for partition [b, b2]. - assertThat(actualPartition.get().getStorage().getLocation()).isEqualTo(conflictPartition.getStorage().getLocation()); - metastoreClient.dropPartition(tableName.getSchemaName(), tableName.getTableName(), conflictPartition.getValues(), false); - } - } - - protected class DropPartitionFailure - implements ConflictTrigger - { - private final ImmutableList partitionValueToConflict = ImmutableList.of("b", "drop2"); - - @Override - public void triggerConflict(ConnectorSession session, SchemaTableName tableName, ConnectorInsertTableHandle insertTableHandle, List partitionUpdates) - { - // This method bypasses transaction interface because this method is inherently hacky and doesn't work well with the transaction abstraction. - // Additionally, this method is not part of a test. Its purpose is to set up an environment for another test. - HiveMetastore metastoreClient = getMetastoreClient(); - metastoreClient.dropPartition(tableName.getSchemaName(), tableName.getTableName(), partitionValueToConflict, false); - } - - @Override - public void verifyAndCleanup(ConnectorSession session, SchemaTableName tableName) - { - // Do not add back the deleted partition because the implementation is expected to move forward instead of backward when delete fails - } - } - - protected class DirectoryRenameFailure - implements ConflictTrigger - { - private HdfsContext context; - private Path path; - - @Override - public void triggerConflict(ConnectorSession session, SchemaTableName tableName, ConnectorInsertTableHandle insertTableHandle, List partitionUpdates) - throws IOException - { - Location writePath = getStagingPathRoot(insertTableHandle); - Location targetPath = getTargetPathRoot(insertTableHandle); - if (writePath.equals(targetPath)) { - // This conflict does not apply. Trigger a rollback right away so that this test case passes. - throw new TestingRollbackException(); - } - path = new Path(targetPath.appendPath("pk1=b").appendPath("pk2=add2").toString()); - context = new HdfsContext(session); - if (!hdfsEnvironment.getFileSystem(context, path).mkdirs(path, hdfsEnvironment.getNewDirectoryPermissions().orElse(null))) { - throw new IOException("mkdirs returned false"); - } - } - - @Override - public void verifyAndCleanup(ConnectorSession session, SchemaTableName tableName) - throws IOException - { - assertThat(listDirectory(context, path)).isEqualTo(ImmutableList.of()); - hdfsEnvironment.getFileSystem(context, path).delete(path, false); - } - } - - protected class FileRenameFailure - implements ConflictTrigger - { - private HdfsContext context; - private Path path; - - @Override - public void triggerConflict(ConnectorSession session, SchemaTableName tableName, ConnectorInsertTableHandle insertTableHandle, List partitionUpdates) - throws IOException - { - for (PartitionUpdate partitionUpdate : partitionUpdates) { - if ("pk2=insert2".equals(partitionUpdate.getTargetPath().fileName())) { - path = new Path(partitionUpdate.getTargetPath().toString(), partitionUpdate.getFileNames().get(0)); - break; - } - } - assertThat(path).isNotNull(); - - context = new HdfsContext(session); - FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, path); - fileSystem.createNewFile(path); - } - - @Override - public void verifyAndCleanup(ConnectorSession session, SchemaTableName tableName) - throws IOException - { - // The file we added to trigger a conflict was cleaned up because it matches the query prefix. - // Consider this the same as a network failure that caused the successful creation of file not reported to the caller. - assertThat(hdfsEnvironment.getFileSystem(context, path).exists(path)).isFalse(); - } - } - - private static class CountingDirectoryLister - implements DirectoryLister - { - private final AtomicInteger listCount = new AtomicInteger(); - - @Override - public RemoteIterator listFilesRecursively(TrinoFileSystem fs, Table table, Location location) - throws IOException - { - listCount.incrementAndGet(); - return new TrinoFileStatusRemoteIterator(fs.listFiles(location)); - } - - public int getListCount() - { - return listCount.get(); - } - - @Override - public void invalidate(Partition partition) - { - } - - @Override - public void invalidate(Table table) - { - } - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHiveFileSystem.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHiveFileSystem.java deleted file mode 100644 index 6010dfd0b3e58..0000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHiveFileSystem.java +++ /dev/null @@ -1,942 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Streams; -import com.google.common.net.HostAndPort; -import io.airlift.concurrent.BoundedExecutor; -import io.airlift.json.JsonCodec; -import io.airlift.slice.Slice; -import io.airlift.stats.CounterStat; -import io.trino.filesystem.Location; -import io.trino.filesystem.TrinoFileSystem; -import io.trino.filesystem.hdfs.HdfsFileSystemFactory; -import io.trino.hdfs.HdfsConfig; -import io.trino.hdfs.HdfsConfiguration; -import io.trino.hdfs.HdfsContext; -import io.trino.hdfs.HdfsEnvironment; -import io.trino.hdfs.TrinoHdfsFileSystemStats; -import io.trino.hdfs.authentication.NoHdfsAuthentication; -import io.trino.operator.GroupByHashPageIndexerFactory; -import io.trino.plugin.base.CatalogName; -import io.trino.plugin.hive.AbstractTestHive.Transaction; -import io.trino.plugin.hive.fs.FileSystemDirectoryLister; -import io.trino.plugin.hive.fs.HiveFileIterator; -import io.trino.plugin.hive.fs.TransactionScopeCachingDirectoryListerFactory; -import io.trino.plugin.hive.fs.TrinoFileStatus; -import io.trino.plugin.hive.metastore.Column; -import io.trino.plugin.hive.metastore.Database; -import io.trino.plugin.hive.metastore.ForwardingHiveMetastore; -import io.trino.plugin.hive.metastore.HiveMetastore; -import io.trino.plugin.hive.metastore.HiveMetastoreConfig; -import io.trino.plugin.hive.metastore.HiveMetastoreFactory; -import io.trino.plugin.hive.metastore.PrincipalPrivileges; -import io.trino.plugin.hive.metastore.StorageFormat; -import io.trino.plugin.hive.metastore.Table; -import io.trino.plugin.hive.metastore.thrift.BridgingHiveMetastore; -import io.trino.plugin.hive.security.SqlStandardAccessControlMetadata; -import io.trino.spi.connector.ColumnHandle; -import io.trino.spi.connector.ColumnMetadata; -import io.trino.spi.connector.ConnectorInsertTableHandle; -import io.trino.spi.connector.ConnectorMetadata; -import io.trino.spi.connector.ConnectorOutputTableHandle; -import io.trino.spi.connector.ConnectorPageSink; -import io.trino.spi.connector.ConnectorPageSinkProvider; -import io.trino.spi.connector.ConnectorPageSource; -import io.trino.spi.connector.ConnectorPageSourceProvider; -import io.trino.spi.connector.ConnectorSession; -import io.trino.spi.connector.ConnectorSplit; -import io.trino.spi.connector.ConnectorSplitManager; -import io.trino.spi.connector.ConnectorSplitSource; -import io.trino.spi.connector.ConnectorTableHandle; -import io.trino.spi.connector.ConnectorTableMetadata; -import io.trino.spi.connector.DynamicFilter; -import io.trino.spi.connector.SchemaTableName; -import io.trino.spi.connector.TableNotFoundException; -import io.trino.spi.predicate.TupleDomain; -import io.trino.spi.security.ConnectorIdentity; -import io.trino.spi.type.TypeOperators; -import io.trino.sql.gen.JoinCompiler; -import io.trino.testing.MaterializedResult; -import io.trino.testing.TestingNodeManager; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; -import org.junit.jupiter.api.parallel.Execution; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.OutputStream; -import java.io.UncheckedIOException; -import java.util.Collection; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.ScheduledExecutorService; - -import static com.google.common.collect.ImmutableList.toImmutableList; -import static com.google.common.collect.Iterables.getOnlyElement; -import static com.google.common.util.concurrent.MoreExecutors.newDirectExecutorService; -import static io.airlift.concurrent.MoreFutures.getFutureValue; -import static io.airlift.concurrent.Threads.daemonThreadsNamed; -import static io.trino.hdfs.FileSystemUtils.getRawFileSystem; -import static io.trino.plugin.hive.AbstractTestHive.createTableProperties; -import static io.trino.plugin.hive.AbstractTestHive.filterNonHiddenColumnHandles; -import static io.trino.plugin.hive.AbstractTestHive.filterNonHiddenColumnMetadata; -import static io.trino.plugin.hive.AbstractTestHive.getAllSplits; -import static io.trino.plugin.hive.AbstractTestHive.getSplits; -import static io.trino.plugin.hive.HiveTableProperties.EXTERNAL_LOCATION_PROPERTY; -import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_STATS; -import static io.trino.plugin.hive.HiveTestUtils.PAGE_SORTER; -import static io.trino.plugin.hive.HiveTestUtils.SESSION; -import static io.trino.plugin.hive.HiveTestUtils.getDefaultHiveFileWriterFactories; -import static io.trino.plugin.hive.HiveTestUtils.getDefaultHivePageSourceFactories; -import static io.trino.plugin.hive.HiveTestUtils.getHiveSessionProperties; -import static io.trino.plugin.hive.HiveTestUtils.getTypes; -import static io.trino.plugin.hive.HiveType.HIVE_LONG; -import static io.trino.plugin.hive.HiveType.HIVE_STRING; -import static io.trino.plugin.hive.TestingThriftHiveMetastoreBuilder.testingThriftHiveMetastoreBuilder; -import static io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES; -import static io.trino.spi.connector.MetadataProvider.NOOP_METADATA_PROVIDER; -import static io.trino.spi.connector.RetryMode.NO_RETRIES; -import static io.trino.spi.type.BigintType.BIGINT; -import static io.trino.testing.MaterializedResult.materializeSourceDataStream; -import static io.trino.testing.QueryAssertions.assertEqualsIgnoreOrder; -import static io.trino.testing.TestingNames.randomNameSuffix; -import static io.trino.testing.TestingPageSinkId.TESTING_PAGE_SINK_ID; -import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER; -import static java.nio.charset.StandardCharsets.UTF_8; -import static java.util.Locale.ENGLISH; -import static java.util.UUID.randomUUID; -import static java.util.concurrent.Executors.newCachedThreadPool; -import static java.util.concurrent.Executors.newScheduledThreadPool; -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; -import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; - -@TestInstance(PER_CLASS) -@Execution(CONCURRENT) -public abstract class AbstractTestHiveFileSystem -{ - protected static final HdfsContext TESTING_CONTEXT = new HdfsContext(ConnectorIdentity.ofUser("test")); - - protected String database; - protected SchemaTableName table; - protected SchemaTableName tableWithHeader; - protected SchemaTableName tableWithHeaderAndFooter; - protected SchemaTableName temporaryCreateTable; - protected SchemaTableName temporaryCreateTableWithExternalLocation; - - protected HdfsEnvironment hdfsEnvironment; - protected LocationService locationService; - protected TestingHiveMetastore metastoreClient; - protected HiveMetadataFactory metadataFactory; - protected HiveTransactionManager transactionManager; - protected ConnectorSplitManager splitManager; - protected ConnectorPageSinkProvider pageSinkProvider; - protected ConnectorPageSourceProvider pageSourceProvider; - - private ExecutorService executor; - private HiveConfig config; - private ScheduledExecutorService heartbeatService; - - @BeforeAll - public void setUp() - { - executor = newCachedThreadPool(daemonThreadsNamed("hive-%s")); - heartbeatService = newScheduledThreadPool(1); - } - - @AfterAll - public void tearDown() - { - if (executor != null) { - executor.shutdownNow(); - executor = null; - } - if (heartbeatService != null) { - heartbeatService.shutdownNow(); - heartbeatService = null; - } - } - - protected abstract Path getBasePath(); - - protected void onSetupComplete() {} - - protected void setup(String host, int port, String databaseName, HdfsConfiguration hdfsConfiguration) - { - database = databaseName; - table = new SchemaTableName(database, "trino_test_external_fs"); - tableWithHeader = new SchemaTableName(database, "trino_test_external_fs_with_header"); - tableWithHeaderAndFooter = new SchemaTableName(database, "trino_test_external_fs_with_header_and_footer"); - - String random = randomUUID().toString().toLowerCase(ENGLISH).replace("-", ""); - temporaryCreateTable = new SchemaTableName(database, "tmp_trino_test_create_" + random); - temporaryCreateTableWithExternalLocation = new SchemaTableName(database, "tmp_trino_test_create_external" + random); - - config = new HiveConfig() - .setWritesToNonManagedTablesEnabled(true); - - HivePartitionManager hivePartitionManager = new HivePartitionManager(config); - - hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, new HdfsConfig(), new NoHdfsAuthentication()); - metastoreClient = new TestingHiveMetastore( - new BridgingHiveMetastore( - testingThriftHiveMetastoreBuilder() - .metastoreClient(HostAndPort.fromParts(host, port)) - .hiveConfig(config) - .fileSystemFactory(new HdfsFileSystemFactory(hdfsEnvironment, HDFS_FILE_SYSTEM_STATS)) - .build()), - getBasePath(), - hdfsEnvironment); - HdfsFileSystemFactory fileSystemFactory = new HdfsFileSystemFactory(hdfsEnvironment, HDFS_FILE_SYSTEM_STATS); - locationService = new HiveLocationService(fileSystemFactory, config); - JsonCodec partitionUpdateCodec = JsonCodec.jsonCodec(PartitionUpdate.class); - metadataFactory = new HiveMetadataFactory( - new CatalogName("hive"), - config, - new HiveMetastoreConfig(), - HiveMetastoreFactory.ofInstance(metastoreClient), - getDefaultHiveFileWriterFactories(config, hdfsEnvironment), - fileSystemFactory, - hivePartitionManager, - newDirectExecutorService(), - heartbeatService, - TESTING_TYPE_MANAGER, - NOOP_METADATA_PROVIDER, - locationService, - partitionUpdateCodec, - new NodeVersion("test_version"), - new NoneHiveRedirectionsProvider(), - ImmutableSet.of( - new PartitionsSystemTableProvider(hivePartitionManager, TESTING_TYPE_MANAGER), - new PropertiesSystemTableProvider()), - new DefaultHiveMaterializedViewMetadataFactory(), - SqlStandardAccessControlMetadata::new, - new FileSystemDirectoryLister(), - new TransactionScopeCachingDirectoryListerFactory(config), - true); - transactionManager = new HiveTransactionManager(metadataFactory); - splitManager = new HiveSplitManager( - transactionManager, - hivePartitionManager, - fileSystemFactory, - new BoundedExecutor(executor, config.getMaxSplitIteratorThreads()), - new CounterStat(), - config.getMaxOutstandingSplits(), - config.getMaxOutstandingSplitsSize(), - config.getMinPartitionBatchSize(), - config.getMaxPartitionBatchSize(), - config.getMaxInitialSplits(), - config.getSplitLoaderConcurrency(), - config.getMaxSplitsPerSecond(), - config.getRecursiveDirWalkerEnabled(), - TESTING_TYPE_MANAGER, - config.getMaxPartitionsPerScan()); - pageSinkProvider = new HivePageSinkProvider( - getDefaultHiveFileWriterFactories(config, hdfsEnvironment), - fileSystemFactory, - PAGE_SORTER, - HiveMetastoreFactory.ofInstance(metastoreClient), - new GroupByHashPageIndexerFactory(new JoinCompiler(new TypeOperators())), - TESTING_TYPE_MANAGER, - config, - new SortingFileWriterConfig(), - locationService, - partitionUpdateCodec, - new TestingNodeManager("fake-environment"), - new HiveEventClient(), - getHiveSessionProperties(config), - new HiveWriterStats()); - pageSourceProvider = new HivePageSourceProvider( - TESTING_TYPE_MANAGER, - config, - getDefaultHivePageSourceFactories(hdfsEnvironment, config)); - - onSetupComplete(); - } - - protected ConnectorSession newSession() - { - return HiveFileSystemTestUtils.newSession(config); - } - - protected Transaction newTransaction() - { - return HiveFileSystemTestUtils.newTransaction(transactionManager); - } - - protected MaterializedResult readTable(SchemaTableName tableName) - throws IOException - { - return HiveFileSystemTestUtils.readTable(tableName, transactionManager, config, pageSourceProvider, splitManager); - } - - @Test - public void testGetRecords() - throws Exception - { - assertEqualsIgnoreOrder( - readTable(table), - MaterializedResult.resultBuilder(newSession(), BIGINT) - .row(3L).row(14L).row(15L) // test_table.csv - .row(92L).row(65L).row(35L) // test_table.csv.gz - .row(89L).row(79L).row(32L) // test_table.csv.bz2 - .row(38L).row(46L).row(26L) // test_table.csv.lz4 - .build()); - } - - @Test - public void testGetRecordsWithHeader() - throws IOException - { - assertEqualsIgnoreOrder( - readTable(tableWithHeader), - MaterializedResult.resultBuilder(newSession(), BIGINT) - .row(2L).row(71L).row(82L) // test_table_with_header.csv - .row(81L).row(82L).row(84L) // test_table_with_header.csv.gz - .row(59L).row(4L).row(52L) // test_table_with_header.csv.bz2 - .row(35L).row(36L).row(2L) // test_table_with_header.csv.lz4 - .build()); - } - - @Test - public void testGetRecordsWithHeaderAndFooter() - throws IOException - { - assertEqualsIgnoreOrder( - readTable(tableWithHeaderAndFooter), - MaterializedResult.resultBuilder(newSession(), BIGINT) - .row(1L).row(41L).row(42L) // test_table_with_header_and_footer.csv - .row(13L).row(56L).row(23L) // test_table_with_header_and_footer.csv.gz - .row(73L).row(9L).row(50L) // test_table_with_header_and_footer.csv.bz2 - .row(48L).row(80L).row(16L) // test_table_with_header_and_footer.csv.lz4 - .build()); - } - - @Test - public void testGetFileStatus() - throws Exception - { - Path basePath = getBasePath(); - Path tablePath = new Path(basePath, "trino_test_external_fs"); - Path filePath = new Path(tablePath, "test_table.csv"); - FileSystem fs = hdfsEnvironment.getFileSystem(TESTING_CONTEXT, basePath); - - assertThat(fs.getFileStatus(basePath).isDirectory()) - .describedAs("basePath should be considered a directory") - .isTrue(); - assertThat(fs.getFileStatus(tablePath).isDirectory()) - .describedAs("tablePath should be considered a directory") - .isTrue(); - assertThat(fs.getFileStatus(filePath).isFile()) - .describedAs("filePath should be considered a file") - .isTrue(); - assertThat(fs.getFileStatus(filePath).isDirectory()) - .describedAs("filePath should not be considered a directory") - .isFalse(); - assertThat(fs.exists(new Path(basePath, "foo-" + randomUUID()))) - .describedAs("foo-random path should be found not to exist") - .isFalse(); - assertThat(fs.exists(new Path(basePath, "foo"))) - .describedAs("foo path should be found not to exist") - .isFalse(); - } - - @Test - public void testRename() - throws Exception - { - Path basePath = new Path(getBasePath(), randomUUID().toString()); - FileSystem fs = hdfsEnvironment.getFileSystem(TESTING_CONTEXT, basePath); - assertThat(fs.exists(basePath)).isFalse(); - - // create file foo.txt - Path path = new Path(basePath, "foo.txt"); - assertThat(fs.createNewFile(path)).isTrue(); - assertThat(fs.exists(path)).isTrue(); - - // rename foo.txt to bar.txt when bar does not exist - Path newPath = new Path(basePath, "bar.txt"); - assertThat(fs.exists(newPath)).isFalse(); - assertThat(fs.rename(path, newPath)).isTrue(); - assertThat(fs.exists(path)).isFalse(); - assertThat(fs.exists(newPath)).isTrue(); - - // rename foo.txt to foo.txt when foo.txt does not exist - assertThat(fs.rename(path, path)).isFalse(); - - // create file foo.txt and rename to existing bar.txt - assertThat(fs.createNewFile(path)).isTrue(); - assertThat(fs.rename(path, newPath)).isFalse(); - - // rename foo.txt to foo.txt when foo.txt exists - assertThat(fs.rename(path, path)).isEqualTo(getRawFileSystem(fs) instanceof AzureBlobFileSystem); - - // delete foo.txt - assertThat(fs.delete(path, false)).isTrue(); - assertThat(fs.exists(path)).isFalse(); - - // create directory source with file - Path source = new Path(basePath, "source"); - assertThat(fs.createNewFile(new Path(source, "test.txt"))).isTrue(); - - // rename source to non-existing target - Path target = new Path(basePath, "target"); - assertThat(fs.exists(target)).isFalse(); - assertThat(fs.rename(source, target)).isTrue(); - assertThat(fs.exists(source)).isFalse(); - assertThat(fs.exists(target)).isTrue(); - - // create directory source with file - assertThat(fs.createNewFile(new Path(source, "test.txt"))).isTrue(); - - // rename source to existing target - assertThat(fs.rename(source, target)).isTrue(); - assertThat(fs.exists(source)).isFalse(); - target = new Path(target, "source"); - assertThat(fs.exists(target)).isTrue(); - assertThat(fs.exists(new Path(target, "test.txt"))).isTrue(); - - // delete target - target = new Path(basePath, "target"); - assertThat(fs.exists(target)).isTrue(); - assertThat(fs.delete(target, true)).isTrue(); - assertThat(fs.exists(target)).isFalse(); - - // cleanup - fs.delete(basePath, true); - } - - @Test - public void testFileIteratorListing() - throws Exception - { - Table.Builder tableBuilder = Table.builder() - .setDatabaseName(table.getSchemaName()) - .setTableName(table.getTableName()) - .setDataColumns(ImmutableList.of(new Column("one", HIVE_LONG, Optional.empty(), Map.of()))) - .setPartitionColumns(ImmutableList.of()) - .setOwner(Optional.empty()) - .setTableType("fake"); - tableBuilder.getStorageBuilder() - .setStorageFormat(StorageFormat.fromHiveStorageFormat(HiveStorageFormat.CSV)); - Table fakeTable = tableBuilder.build(); - - // Expected file system tree: - // test-file-iterator-listing/ - // .hidden/ - // nested-file-in-hidden.txt - // parent/ - // _nested-hidden-file.txt - // nested-file.txt - // empty-directory/ - // .hidden-in-base.txt - // base-path-file.txt - Path basePath = new Path(getBasePath(), "test-file-iterator-listing"); - FileSystem fs = hdfsEnvironment.getFileSystem(TESTING_CONTEXT, basePath); - TrinoFileSystem trinoFileSystem = new HdfsFileSystemFactory(hdfsEnvironment, new TrinoHdfsFileSystemStats()).create(SESSION); - fs.mkdirs(basePath); - - // create file in hidden folder - Path fileInHiddenParent = new Path(new Path(basePath, ".hidden"), "nested-file-in-hidden.txt"); - fs.createNewFile(fileInHiddenParent); - // create hidden file in non-hidden folder - Path nestedHiddenFile = new Path(new Path(basePath, "parent"), "_nested-hidden-file.txt"); - fs.createNewFile(nestedHiddenFile); - // create file in non-hidden folder - Path nestedFile = new Path(new Path(basePath, "parent"), "nested-file.txt"); - fs.createNewFile(nestedFile); - // create file in base path - Path baseFile = new Path(basePath, "base-path-file.txt"); - fs.createNewFile(baseFile); - // create hidden file in base path - Path hiddenBase = new Path(basePath, ".hidden-in-base.txt"); - fs.createNewFile(hiddenBase); - // create empty subdirectory - Path emptyDirectory = new Path(basePath, "empty-directory"); - fs.mkdirs(emptyDirectory); - - // List recursively through hive file iterator - HiveFileIterator recursiveIterator = new HiveFileIterator( - fakeTable, - Location.of(basePath.toString()), - trinoFileSystem, - new FileSystemDirectoryLister(), - HiveFileIterator.NestedDirectoryPolicy.RECURSE); - - List recursiveListing = Streams.stream(recursiveIterator) - .map(TrinoFileStatus::getPath) - .map(Path::new) - .toList(); - // Should not include directories, or files underneath hidden directories - assertEqualsIgnoreOrder(recursiveListing, ImmutableList.of(nestedFile, baseFile)); - - HiveFileIterator shallowIterator = new HiveFileIterator( - fakeTable, - Location.of(basePath.toString()), - trinoFileSystem, - new FileSystemDirectoryLister(), - HiveFileIterator.NestedDirectoryPolicy.IGNORED); - List shallowListing = Streams.stream(shallowIterator) - .map(TrinoFileStatus::getPath) - .map(Path::new) - .toList(); - // Should not include any hidden files, folders, or nested files - assertEqualsIgnoreOrder(shallowListing, ImmutableList.of(baseFile)); - } - - @Test - public void testFileIteratorPartitionedListing() - throws Exception - { - Table.Builder tableBuilder = Table.builder() - .setDatabaseName(table.getSchemaName()) - .setTableName(table.getTableName()) - .setDataColumns(ImmutableList.of(new Column("data", HIVE_LONG, Optional.empty(), Map.of()))) - .setPartitionColumns(ImmutableList.of(new Column("part", HIVE_STRING, Optional.empty(), Map.of()))) - .setOwner(Optional.empty()) - .setTableType("fake"); - tableBuilder.getStorageBuilder() - .setStorageFormat(StorageFormat.fromHiveStorageFormat(HiveStorageFormat.CSV)); - Table fakeTable = tableBuilder.build(); - - // Expected file system tree: - // test-file-iterator-partitioned-listing/ - // .hidden/ - // nested-file-in-hidden.txt - // part=simple/ - // _hidden-file.txt - // plain-file.txt - // part=nested/ - // parent/ - // _nested-hidden-file.txt - // nested-file.txt - // part=plus+sign/ - // plus-file.txt - // part=percent%sign/ - // percent-file.txt - // part=url%20encoded/ - // url-encoded-file.txt - // part=level1|level2/ - // pipe-file.txt - // parent1/ - // parent2/ - // deeply-nested-file.txt - // part=level1 | level2/ - // pipe-blanks-file.txt - // empty-directory/ - // .hidden-in-base.txt - Path basePath = new Path(getBasePath(), "test-file-iterator-partitioned-listing"); - FileSystem fs = hdfsEnvironment.getFileSystem(TESTING_CONTEXT, basePath); - TrinoFileSystem trinoFileSystem = new HdfsFileSystemFactory(hdfsEnvironment, new TrinoHdfsFileSystemStats()).create(SESSION); - fs.mkdirs(basePath); - - // create file in hidden folder - Path fileInHiddenParent = new Path(new Path(basePath, ".hidden"), "nested-file-in-hidden.txt"); - fs.createNewFile(fileInHiddenParent); - // create hidden file in non-hidden folder - Path hiddenFileUnderPartitionSimple = new Path(new Path(basePath, "part=simple"), "_hidden-file.txt"); - fs.createNewFile(hiddenFileUnderPartitionSimple); - // create file in `part=simple` non-hidden folder - Path plainFilePartitionSimple = new Path(new Path(basePath, "part=simple"), "plain-file.txt"); - fs.createNewFile(plainFilePartitionSimple); - Path nestedFilePartitionNested = new Path(new Path(new Path(basePath, "part=nested"), "parent"), "nested-file.txt"); - fs.createNewFile(nestedFilePartitionNested); - // create hidden file in non-hidden folder - Path nestedHiddenFilePartitionNested = new Path(new Path(new Path(basePath, "part=nested"), "parent"), "_nested-hidden-file.txt"); - fs.createNewFile(nestedHiddenFilePartitionNested); - // create file in `part=plus+sign` non-hidden folder (which contains `+` special character) - Path plainFilePartitionPlusSign = new Path(new Path(basePath, "part=plus+sign"), "plus-file.txt"); - fs.createNewFile(plainFilePartitionPlusSign); - // create file in `part=percent%sign` non-hidden folder (which contains `%` special character) - Path plainFilePartitionPercentSign = new Path(new Path(basePath, "part=percent%sign"), "percent-file.txt"); - fs.createNewFile(plainFilePartitionPercentSign); - // create file in `part=url%20encoded` non-hidden folder (which contains `%` special character) - Path plainFilePartitionUrlEncoded = new Path(new Path(basePath, "part=url%20encoded"), "url-encoded-file.txt"); - fs.createNewFile(plainFilePartitionUrlEncoded); - // create file in `part=level1|level2` non-hidden folder (which contains `|` special character) - Path plainFilePartitionPipeSign = new Path(new Path(basePath, "part=level1|level2"), "pipe-file.txt"); - fs.createNewFile(plainFilePartitionPipeSign); - Path deeplyNestedFilePartitionPipeSign = new Path(new Path(new Path(new Path(basePath, "part=level1|level2"), "parent1"), "parent2"), "deeply-nested-file.txt"); - fs.createNewFile(deeplyNestedFilePartitionPipeSign); - // create file in `part=level1 | level2` non-hidden folder (which contains `|` and blank space special characters) - Path plainFilePartitionPipeSignBlanks = new Path(new Path(basePath, "part=level1 | level2"), "pipe-blanks-file.txt"); - fs.createNewFile(plainFilePartitionPipeSignBlanks); - - // create empty subdirectory - Path emptyDirectory = new Path(basePath, "empty-directory"); - fs.mkdirs(emptyDirectory); - // create hidden file in base path - Path hiddenBase = new Path(basePath, ".hidden-in-base.txt"); - fs.createNewFile(hiddenBase); - - // List recursively through hive file iterator - HiveFileIterator recursiveIterator = new HiveFileIterator( - fakeTable, - Location.of(basePath.toString()), - trinoFileSystem, - new FileSystemDirectoryLister(), - HiveFileIterator.NestedDirectoryPolicy.RECURSE); - - List recursiveListing = Streams.stream(recursiveIterator) - .map(TrinoFileStatus::getPath) - .map(Path::new) - .toList(); - // Should not include directories, or files underneath hidden directories - assertThat(recursiveListing).containsExactlyInAnyOrder( - plainFilePartitionSimple, - nestedFilePartitionNested, - plainFilePartitionPlusSign, - plainFilePartitionPercentSign, - plainFilePartitionUrlEncoded, - plainFilePartitionPipeSign, - deeplyNestedFilePartitionPipeSign, - plainFilePartitionPipeSignBlanks); - - HiveFileIterator shallowIterator = new HiveFileIterator( - fakeTable, - Location.of(basePath.toString()), - trinoFileSystem, - new FileSystemDirectoryLister(), - HiveFileIterator.NestedDirectoryPolicy.IGNORED); - List shallowListing = Streams.stream(shallowIterator) - .map(TrinoFileStatus::getPath) - .map(Path::new) - .toList(); - // Should not include any hidden files, folders, or nested files - assertThat(shallowListing).isEmpty(); - } - - @Test - public void testDirectoryWithTrailingSpace() - throws Exception - { - Path basePath = new Path(getBasePath(), randomUUID().toString()); - FileSystem fs = hdfsEnvironment.getFileSystem(TESTING_CONTEXT, basePath); - assertThat(fs.exists(basePath)).isFalse(); - - Path path = new Path(new Path(basePath, "dir_with_space "), "foo.txt"); - try (OutputStream outputStream = fs.create(path)) { - outputStream.write("test".getBytes(UTF_8)); - } - assertThat(fs.exists(path)).isTrue(); - - try (InputStream inputStream = fs.open(path)) { - String content = new BufferedReader(new InputStreamReader(inputStream, UTF_8)).readLine(); - assertThat(content).isEqualTo("test"); - } - - fs.delete(basePath, true); - } - - @Test - public void testTableCreation() - throws Exception - { - for (HiveStorageFormat storageFormat : HiveStorageFormat.values()) { - if (storageFormat == HiveStorageFormat.CSV) { - // CSV supports only unbounded VARCHAR type - continue; - } - if (storageFormat == HiveStorageFormat.REGEX) { - // REGEX format is read-only - continue; - } - createTable(temporaryCreateTable, storageFormat); - dropTable(temporaryCreateTable); - } - } - - @Test - public void testTableCreationExternalLocation() - throws Exception - { - for (HiveStorageFormat storageFormat : HiveStorageFormat.values()) { - if (storageFormat == HiveStorageFormat.CSV) { - // CSV supports only unbounded VARCHAR type - continue; - } - if (storageFormat == HiveStorageFormat.REGEX) { - // REGEX format is read-only - continue; - } - createExternalTableOnNonExistingPath(temporaryCreateTableWithExternalLocation, storageFormat); - dropTable(temporaryCreateTableWithExternalLocation); - } - } - - private void createTable(SchemaTableName tableName, HiveStorageFormat storageFormat) - throws Exception - { - List columns = ImmutableList.of(new ColumnMetadata("id", BIGINT)); - - MaterializedResult data = MaterializedResult.resultBuilder(newSession(), BIGINT) - .row(1L) - .row(3L) - .row(2L) - .build(); - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - - // begin creating the table - ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(storageFormat)); - ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES); - - // write the records - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle, TESTING_PAGE_SINK_ID); - sink.appendPage(data.toPage()); - Collection fragments = getFutureValue(sink.finish()); - - // commit the table - metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of()); - - transaction.commit(); - - // Hack to work around the metastore not being configured for S3 or other FS. - // The metastore tries to validate the location when creating the - // table, which fails without explicit configuration for file system. - // We work around that by using a dummy location when creating the - // table and update it here to the correct location. - Location location = locationService.getTableWriteInfo(((HiveOutputTableHandle) outputHandle).getLocationHandle(), false).targetPath(); - metastoreClient.updateTableLocation(database, tableName.getTableName(), location.toString()); - } - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - - // load the new table - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - - // verify the metadata - ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName)); - assertThat(filterNonHiddenColumnMetadata(tableMetadata.getColumns())).isEqualTo(columns); - - // verify the data - metadata.beginQuery(session); - ConnectorSplitSource splitSource = getSplits(splitManager, transaction, session, tableHandle); - ConnectorSplit split = getOnlyElement(getAllSplits(splitSource)); - - try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles, DynamicFilter.EMPTY)) { - MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles)); - assertEqualsIgnoreOrder(result.getMaterializedRows(), data.getMaterializedRows()); - } - - metadata.cleanupQuery(session); - } - } - - private void createExternalTableOnNonExistingPath(SchemaTableName tableName, HiveStorageFormat storageFormat) - throws Exception - { - List columns = ImmutableList.of(new ColumnMetadata("id", BIGINT)); - String externalLocation = getBasePath() + "/external_" + randomNameSuffix(); - - MaterializedResult data = MaterializedResult.resultBuilder(newSession(), BIGINT) - .row(1L) - .row(3L) - .row(2L) - .build(); - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - - Map tableProperties = ImmutableMap.builder() - .putAll(createTableProperties(storageFormat)) - .put(EXTERNAL_LOCATION_PROPERTY, externalLocation) - .buildOrThrow(); - - // begin creating the table - ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, tableProperties); - metadata.createTable(session, tableMetadata, true); - - transaction.commit(); - - // Hack to work around the metastore not being configured for S3 or other FS. - // The metastore tries to validate the location when creating the - // table, which fails without explicit configuration for file system. - // We work around that by using a dummy location when creating the - // table and update it here to the correct location. - Location location = locationService.getTableWriteInfo(new LocationHandle(externalLocation, externalLocation, LocationHandle.WriteMode.DIRECT_TO_TARGET_NEW_DIRECTORY), false).targetPath(); - metastoreClient.updateTableLocation(database, tableName.getTableName(), location.toString()); - } - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - - ConnectorTableHandle connectorTableHandle = getTableHandle(metadata, tableName); - ConnectorInsertTableHandle outputHandle = metadata.beginInsert(session, connectorTableHandle, ImmutableList.of(), NO_RETRIES); - - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle, TESTING_PAGE_SINK_ID); - sink.appendPage(data.toPage()); - Collection fragments = getFutureValue(sink.finish()); - - metadata.finishInsert(session, outputHandle, fragments, ImmutableList.of()); - transaction.commit(); - } - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - - // load the new table - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - - // verify the metadata - ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName)); - assertThat(filterNonHiddenColumnMetadata(tableMetadata.getColumns())).isEqualTo(columns); - assertThat(tableMetadata.getProperties()).containsEntry("external_location", externalLocation); - - // verify the data - metadata.beginQuery(session); - ConnectorSplitSource splitSource = getSplits(splitManager, transaction, session, tableHandle); - ConnectorSplit split = getOnlyElement(getAllSplits(splitSource)); - - try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles, DynamicFilter.EMPTY)) { - MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles)); - assertEqualsIgnoreOrder(result.getMaterializedRows(), data.getMaterializedRows()); - } - - metadata.cleanupQuery(session); - } - } - - private void dropTable(SchemaTableName table) - { - try (Transaction transaction = newTransaction()) { - transaction.getMetastore().dropTable(newSession(), table.getSchemaName(), table.getTableName()); - transaction.commit(); - } - } - - private ConnectorTableHandle getTableHandle(ConnectorMetadata metadata, SchemaTableName tableName) - { - return HiveFileSystemTestUtils.getTableHandle(metadata, tableName, newSession()); - } - - public static class TestingHiveMetastore - extends ForwardingHiveMetastore - { - private final Path basePath; - private final HdfsEnvironment hdfsEnvironment; - - public TestingHiveMetastore(HiveMetastore delegate, Path basePath, HdfsEnvironment hdfsEnvironment) - { - super(delegate); - this.basePath = basePath; - this.hdfsEnvironment = hdfsEnvironment; - } - - @Override - public Optional getDatabase(String databaseName) - { - return super.getDatabase(databaseName) - .map(database -> Database.builder(database) - .setLocation(Optional.of(basePath.toString())) - .build()); - } - - @Override - public void createTable(Table table, PrincipalPrivileges privileges) - { - // hack to work around the metastore not being configured for S3 or other FS - Table.Builder tableBuilder = Table.builder(table); - tableBuilder.getStorageBuilder().setLocation("/"); - super.createTable(tableBuilder.build(), privileges); - } - - @Override - public void dropTable(String databaseName, String tableName, boolean deleteData) - { - try { - Table table = getTable(databaseName, tableName) - .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName))); - - // hack to work around the metastore not being configured for S3 or other FS - List locations = listAllDataPaths(databaseName, tableName); - - Table.Builder tableBuilder = Table.builder(table); - tableBuilder.getStorageBuilder().setLocation("/"); - - // drop table - replaceTable(databaseName, tableName, tableBuilder.build(), NO_PRIVILEGES); - super.dropTable(databaseName, tableName, false); - - // drop data - if (deleteData) { - for (String location : locations) { - Path path = new Path(location); - hdfsEnvironment.getFileSystem(TESTING_CONTEXT, path).delete(path, true); - } - } - } - catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - public void updateTableLocation(String databaseName, String tableName, String location) - { - Table table = getTable(databaseName, tableName) - .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName))); - Table.Builder tableBuilder = Table.builder(table); - tableBuilder.getStorageBuilder().setLocation(location); - - // NOTE: this clears the permissions - replaceTable(databaseName, tableName, tableBuilder.build(), NO_PRIVILEGES); - } - - private List listAllDataPaths(String schemaName, String tableName) - { - ImmutableList.Builder locations = ImmutableList.builder(); - Table table = getTable(schemaName, tableName).get(); - List partitionColumnNames = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList()); - if (table.getStorage().getLocation() != null) { - // For partitioned table, there should be nothing directly under this directory. - // But including this location in the set makes the directory content assert more - // extensive, which is desirable. - locations.add(table.getStorage().getLocation()); - } - - Optional> partitionNames = getPartitionNamesByFilter(schemaName, tableName, partitionColumnNames, TupleDomain.all()); - if (partitionNames.isPresent()) { - getPartitionsByNames(table, partitionNames.get()).values().stream() - .map(Optional::get) - .map(partition -> partition.getStorage().getLocation()) - .filter(location -> !location.startsWith(table.getStorage().getLocation())) - .forEach(locations::add); - } - - return locations.build(); - } - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHiveLocal.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHiveLocal.java deleted file mode 100644 index a2cd60052b204..0000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHiveLocal.java +++ /dev/null @@ -1,351 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.io.RecursiveDeleteOption; -import com.google.common.reflect.ClassPath; -import io.airlift.log.Logger; -import io.trino.filesystem.Location; -import io.trino.plugin.hive.metastore.Column; -import io.trino.plugin.hive.metastore.Database; -import io.trino.plugin.hive.metastore.HiveMetastore; -import io.trino.plugin.hive.metastore.PrincipalPrivileges; -import io.trino.plugin.hive.metastore.SortingColumn; -import io.trino.plugin.hive.metastore.StorageFormat; -import io.trino.plugin.hive.metastore.Table; -import io.trino.spi.connector.ColumnHandle; -import io.trino.spi.connector.ConnectorMetadata; -import io.trino.spi.connector.ConnectorSession; -import io.trino.spi.connector.ConnectorTableHandle; -import io.trino.spi.connector.SchemaTableName; -import io.trino.spi.predicate.TupleDomain; -import io.trino.spi.security.PrincipalType; -import io.trino.testing.MaterializedResult; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.UncheckedIOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.OptionalInt; - -import static com.google.common.io.MoreFiles.deleteRecursively; -import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; -import static io.trino.plugin.hive.HiveMetadata.TABLE_COMMENT; -import static io.trino.plugin.hive.HiveMetadata.TRINO_QUERY_ID_NAME; -import static io.trino.plugin.hive.HiveMetadata.TRINO_VERSION_NAME; -import static io.trino.plugin.hive.HiveStorageFormat.ORC; -import static io.trino.plugin.hive.HiveStorageFormat.TEXTFILE; -import static io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT; -import static io.trino.plugin.hive.HiveType.HIVE_INT; -import static io.trino.plugin.hive.HiveType.HIVE_STRING; -import static io.trino.plugin.hive.TableType.EXTERNAL_TABLE; -import static io.trino.plugin.hive.TableType.MANAGED_TABLE; -import static io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES; -import static io.trino.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat; -import static io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V1; -import static io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY; -import static java.nio.file.Files.copy; -import static java.util.Objects.requireNonNull; -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.Assumptions.abort; -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; - -@TestInstance(PER_CLASS) -public abstract class AbstractTestHiveLocal - extends AbstractTestHive -{ - private static final Logger log = Logger.get(AbstractTestHiveLocal.class); - private static final String DEFAULT_TEST_DB_NAME = "test"; - - private File tempDir; - private final String testDbName; - - protected AbstractTestHiveLocal() - { - this(DEFAULT_TEST_DB_NAME); - } - - protected AbstractTestHiveLocal(String testDbName) - { - this.testDbName = requireNonNull(testDbName, "testDbName is null"); - } - - protected abstract HiveMetastore createMetastore(File tempDir); - - @BeforeAll - public void initialize() - throws Exception - { - tempDir = Files.createTempDirectory(null).toFile(); - - HiveMetastore metastore = createMetastore(tempDir); - - metastore.createDatabase( - Database.builder() - .setDatabaseName(testDbName) - .setOwnerName(Optional.of("public")) - .setOwnerType(Optional.of(PrincipalType.ROLE)) - .build()); - - HiveConfig hiveConfig = new HiveConfig() - .setParquetTimeZone("America/Los_Angeles") - .setRcfileTimeZone("America/Los_Angeles"); - - setup(testDbName, hiveConfig, metastore, HDFS_ENVIRONMENT); - - createTestTables(); - } - - protected void createTestTables() - throws Exception - { - Location location = Location.of(metastoreClient.getDatabase(database).orElseThrow() - .getLocation().orElseThrow()); - - createTestTable( - // Matches create-test.sql » trino_test_partition_format - Table.builder() - .setDatabaseName(database) - .setTableName(tablePartitionFormat.getTableName()) - .setTableType(MANAGED_TABLE.name()) - .setOwner(Optional.empty()) - .setDataColumns(List.of( - new Column("t_string", HiveType.HIVE_STRING, Optional.empty(), Map.of()), - new Column("t_tinyint", HiveType.HIVE_BYTE, Optional.empty(), Map.of()), - new Column("t_smallint", HiveType.HIVE_SHORT, Optional.empty(), Map.of()), - new Column("t_int", HiveType.HIVE_INT, Optional.empty(), Map.of()), - new Column("t_bigint", HiveType.HIVE_LONG, Optional.empty(), Map.of()), - new Column("t_float", HiveType.HIVE_FLOAT, Optional.empty(), Map.of()), - new Column("t_boolean", HiveType.HIVE_BOOLEAN, Optional.empty(), Map.of()))) - .setPartitionColumns(List.of( - new Column("ds", HiveType.HIVE_STRING, Optional.empty(), Map.of()), - new Column("file_format", HiveType.HIVE_STRING, Optional.empty(), Map.of()), - new Column("dummy", HiveType.HIVE_INT, Optional.empty(), Map.of()))) - .setParameter(TABLE_COMMENT, "Presto test data") - .withStorage(storage -> storage - .setStorageFormat(fromHiveStorageFormat(new HiveConfig().getHiveStorageFormat())) - .setLocation(Optional.of(location.appendPath(tablePartitionFormat.getTableName()).toString()))) - .build()); - - createTestTable( - // Matches create-test.sql » trino_test_partition_format - Table.builder() - .setDatabaseName(database) - .setTableName(tableUnpartitioned.getTableName()) - .setTableType(MANAGED_TABLE.name()) - .setOwner(Optional.empty()) - .setDataColumns(List.of( - new Column("t_string", HiveType.HIVE_STRING, Optional.empty(), Map.of()), - new Column("t_tinyint", HiveType.HIVE_BYTE, Optional.empty(), Map.of()))) - .setParameter(TABLE_COMMENT, "Presto test data") - .withStorage(storage -> storage - .setStorageFormat(fromHiveStorageFormat(TEXTFILE)) - .setLocation(Optional.of(location.appendPath(tableUnpartitioned.getTableName()).toString()))) - .build()); - } - - protected void createTestTable(Table table) - throws Exception - { - metastoreClient.createTable(table, NO_PRIVILEGES); - } - - @AfterAll - public void cleanup() - throws IOException - { - try { - for (String tableName : metastoreClient.getAllTables(database)) { - metastoreClient.dropTable(database, tableName, true); - } - metastoreClient.dropDatabase(testDbName, true); - } - finally { - deleteRecursively(tempDir.toPath(), ALLOW_INSECURE); - } - } - - @Override - protected ConnectorTableHandle getTableHandle(ConnectorMetadata metadata, SchemaTableName tableName) - { - if (tableName.getTableName().startsWith(TEMPORARY_TABLE_PREFIX)) { - return super.getTableHandle(metadata, tableName); - } - return abort("tests using existing tables are not supported"); - } - - @Test - @Override - public void testGetAllTableColumns() - { - abort("Test disabled for this subclass"); - } - - @Test - @Override - public void testGetAllTableColumnsInSchema() - { - abort("Test disabled for this subclass"); - } - - @Test - @Override - public void testGetTableNames() - { - abort("Test disabled for this subclass"); - } - - @Test - @Override - public void testGetTableSchemaOffline() - { - abort("Test disabled for this subclass"); - } - - @Test - public void testSparkBucketedTableValidation() - throws Exception - { - SchemaTableName table = temporaryTable("spark_bucket_validation"); - try { - doTestSparkBucketedTableValidation(table); - } - finally { - dropTable(table); - } - } - - private void doTestSparkBucketedTableValidation(SchemaTableName tableName) - throws Exception - { - Path externalLocation = copyResourceDirToTemporaryDirectory("spark_bucketed_nation"); - try { - createExternalTable( - tableName, - ORC, - ImmutableList.of( - new Column("nationkey", HIVE_INT, Optional.empty(), Map.of()), - new Column("name", HIVE_STRING, Optional.empty(), Map.of()), - new Column("regionkey", HIVE_INT, Optional.empty(), Map.of()), - new Column("comment", HIVE_STRING, Optional.empty(), Map.of())), - ImmutableList.of(), - Optional.of(new HiveBucketProperty( - ImmutableList.of("nationkey"), - BUCKETING_V1, - 3, - ImmutableList.of(new SortingColumn("name", SortingColumn.Order.ASCENDING)))), - Location.of(externalLocation.toUri().toString())); - - assertReadFailsWithMessageMatching(ORC, tableName, "Hive table is corrupt\\. File '.*/.*' is for bucket [0-2], but contains a row for bucket [0-2]."); - markTableAsCreatedBySpark(tableName, "orc"); - assertReadReturnsRowCount(ORC, tableName, 25); - } - finally { - deleteRecursively(externalLocation, RecursiveDeleteOption.ALLOW_INSECURE); - } - } - - private void assertReadReturnsRowCount(HiveStorageFormat storageFormat, SchemaTableName tableName, int rowCount) - throws Exception - { - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat)); - assertThat(result.getRowCount()).isEqualTo(rowCount); - } - } - - private void markTableAsCreatedBySpark(SchemaTableName tableName, String provider) - { - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - PrincipalPrivileges principalPrivileges = testingPrincipalPrivilege(session); - Table oldTable = transaction.getMetastore().getTable(tableName.getSchemaName(), tableName.getTableName()).get(); - Table.Builder newTable = Table.builder(oldTable).setParameter(SPARK_TABLE_PROVIDER_KEY, provider); - transaction.getMetastore().replaceTable(tableName.getSchemaName(), tableName.getTableName(), newTable.build(), principalPrivileges); - transaction.commit(); - } - } - - private void createExternalTable(SchemaTableName schemaTableName, HiveStorageFormat hiveStorageFormat, List columns, List partitionColumns, Optional bucketProperty, Location externalLocation) - { - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - - String tableOwner = session.getUser(); - String schemaName = schemaTableName.getSchemaName(); - String tableName = schemaTableName.getTableName(); - - Table.Builder tableBuilder = Table.builder() - .setDatabaseName(schemaName) - .setTableName(tableName) - .setOwner(Optional.of(tableOwner)) - .setTableType(EXTERNAL_TABLE.name()) - .setParameters(ImmutableMap.of( - TRINO_VERSION_NAME, TEST_SERVER_VERSION, - TRINO_QUERY_ID_NAME, session.getQueryId())) - .setDataColumns(columns) - .setPartitionColumns(partitionColumns); - - tableBuilder.getStorageBuilder() - .setLocation(externalLocation.toString()) - .setStorageFormat(StorageFormat.create(hiveStorageFormat.getSerde(), hiveStorageFormat.getInputFormat(), hiveStorageFormat.getOutputFormat())) - .setBucketProperty(bucketProperty) - .setSerdeParameters(ImmutableMap.of()); - - PrincipalPrivileges principalPrivileges = testingPrincipalPrivilege(tableOwner, session.getUser()); - transaction.getMetastore().createTable(session, tableBuilder.build(), principalPrivileges, Optional.of(externalLocation), Optional.empty(), true, ZERO_TABLE_STATISTICS, false); - - transaction.commit(); - } - } - - private Path copyResourceDirToTemporaryDirectory(String resourceName) - throws IOException - { - Path tempDir = java.nio.file.Files.createTempDirectory(getClass().getSimpleName()).normalize(); - log.info("Copying resource dir '%s' to %s", resourceName, tempDir); - ClassPath.from(getClass().getClassLoader()) - .getResources().stream() - .filter(resourceInfo -> resourceInfo.getResourceName().startsWith(resourceName)) - .forEach(resourceInfo -> { - try { - Path target = tempDir.resolve(resourceInfo.getResourceName()); - java.nio.file.Files.createDirectories(target.getParent()); - try (InputStream inputStream = resourceInfo.asByteSource().openStream()) { - copy(inputStream, target); - } - } - catch (IOException e) { - throw new UncheckedIOException(e); - } - }); - return tempDir.resolve(resourceName).normalize(); - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java index 39a6d4e505720..18bc7a699f55b 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java @@ -16,6 +16,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import com.google.common.io.Resources; import io.airlift.json.JsonCodec; import io.airlift.json.JsonCodecFactory; import io.airlift.json.ObjectMapperProvider; @@ -24,6 +25,8 @@ import io.trino.cost.StatsAndCosts; import io.trino.execution.QueryInfo; import io.trino.filesystem.Location; +import io.trino.filesystem.TrinoFileSystem; +import io.trino.filesystem.TrinoFileSystemFactory; import io.trino.metadata.FunctionManager; import io.trino.metadata.Metadata; import io.trino.metadata.QualifiedObjectName; @@ -39,6 +42,7 @@ import io.trino.spi.connector.CatalogSchemaTableName; import io.trino.spi.connector.ColumnMetadata; import io.trino.spi.connector.Constraint; +import io.trino.spi.security.ConnectorIdentity; import io.trino.spi.security.Identity; import io.trino.spi.security.SelectedRole; import io.trino.spi.type.DateType; @@ -71,7 +75,9 @@ import java.io.File; import java.io.IOException; +import java.io.OutputStream; import java.math.BigDecimal; +import java.net.URL; import java.time.Instant; import java.time.LocalDate; import java.time.LocalDateTime; @@ -88,6 +94,7 @@ import java.util.OptionalLong; import java.util.Set; import java.util.StringJoiner; +import java.util.UUID; import java.util.function.BiConsumer; import java.util.function.Consumer; import java.util.function.Function; @@ -143,6 +150,7 @@ import static io.trino.plugin.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY; import static io.trino.plugin.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY; import static io.trino.plugin.hive.HiveType.toHiveType; +import static io.trino.plugin.hive.TestingHiveUtils.getConnectorService; import static io.trino.plugin.hive.util.HiveUtil.columnExtraInfo; import static io.trino.spi.security.Identity.ofUser; import static io.trino.spi.security.SelectedRole.Type.ROLE; @@ -171,8 +179,7 @@ import static io.trino.testing.TestingAccessControlManager.privilege; import static io.trino.testing.TestingNames.randomNameSuffix; import static io.trino.testing.TestingSession.testSessionBuilder; -import static io.trino.testing.containers.TestContainers.getPathFromClassPathResource; -import static io.trino.transaction.TransactionBuilder.transaction; +import static io.trino.testing.TransactionBuilder.transaction; import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER; import static java.lang.String.format; import static java.lang.String.join; @@ -187,7 +194,6 @@ import static java.util.stream.Collectors.toSet; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.assertj.core.api.Fail.fail; import static org.assertj.core.data.Offset.offset; import static org.junit.jupiter.api.Assumptions.abort; @@ -4131,11 +4137,11 @@ public void testMultipleWriters() public void testMultipleWritersWithSkewedData() { try { - // We need to use large table (sf2) to see the effect. Otherwise, a single writer will write the entire + // We need to use large table (sf1) to see the effect. Otherwise, a single writer will write the entire // data before ScaledWriterScheduler is able to scale it to multiple machines. // Skewed table that will scale writers to multiple machines. - String selectSql = "SELECT t1.* FROM (SELECT *, case when orderkey >= 0 then 1 else orderkey end as join_key FROM tpch.sf2.orders) t1 " + - "INNER JOIN (SELECT orderkey FROM tpch.sf2.orders) t2 " + + String selectSql = "SELECT t1.* FROM (SELECT *, case when orderkey >= 0 then 1 else orderkey end as join_key FROM tpch.sf1.orders) t1 " + + "INNER JOIN (SELECT orderkey FROM tpch.sf1.orders) t2 " + "ON t1.join_key = t2.orderkey"; @Language("SQL") String createTableSql = "CREATE TABLE scale_writers_skewed WITH (format = 'PARQUET') AS " + selectSql; assertUpdate( @@ -4143,7 +4149,7 @@ public void testMultipleWritersWithSkewedData() .setSystemProperty("task_min_writer_count", "1") .setSystemProperty("scale_writers", "true") .setSystemProperty("task_scale_writers_enabled", "false") - .setSystemProperty("writer_scaling_min_data_processed", "1MB") + .setSystemProperty("writer_scaling_min_data_processed", "0.5MB") .setSystemProperty("join_distribution_type", "PARTITIONED") .build(), createTableSql, @@ -4411,13 +4417,17 @@ private void testCreateExternalTable( List tableProperties) throws Exception { - java.nio.file.Path tempDir = createTempDirectory(null); - File dataFile = tempDir.resolve("test.txt").toFile(); - writeString(dataFile.toPath(), fileContents); + TrinoFileSystem fileSystem = getTrinoFileSystem(); + Location tempDir = Location.of("local:///temp_" + UUID.randomUUID()); + fileSystem.createDirectory(tempDir); + Location dataFile = tempDir.appendPath("text.text"); + try (OutputStream out = fileSystem.newOutputFile(dataFile).create()) { + out.write(fileContents.getBytes(UTF_8)); + } // Table properties StringJoiner propertiesSql = new StringJoiner(",\n "); - propertiesSql.add(format("external_location = '%s'", tempDir.toUri().toASCIIString())); + propertiesSql.add(format("external_location = '%s'", tempDir)); propertiesSql.add("format = 'TEXTFILE'"); tableProperties.forEach(propertiesSql::add); @@ -4440,8 +4450,8 @@ private void testCreateExternalTable( assertQuery(format("SELECT col1, col2 from %s", tableName), expectedResults); assertUpdate(format("DROP TABLE %s", tableName)); - assertThat(dataFile).exists(); // file should still exist after drop - deleteRecursively(tempDir, ALLOW_INSECURE); + assertThat(fileSystem.newInputFile(dataFile).exists()).isTrue(); // file should still exist after drop + fileSystem.deleteDirectory(tempDir); } @Test @@ -8664,8 +8674,7 @@ public void testTimestampWithTimeZone() String tableLocation = getTableLocation("test_timestamptz_base"); // TIMESTAMP WITH LOCAL TIME ZONE is not mapped to any Trino type, so we need to create the metastore entry manually - HiveMetastore metastore = ((HiveConnector) getDistributedQueryRunner().getCoordinator().getConnector(catalog)) - .getInjector().getInstance(HiveMetastoreFactory.class) + HiveMetastore metastore = getConnectorService(getDistributedQueryRunner(), HiveMetastoreFactory.class) .createMetastore(Optional.of(getSession().getIdentity().toConnectorIdentity(catalog))); metastore.createTable( new Table( @@ -9003,16 +9012,26 @@ public void testCollidingMixedCaseProperty() @Test public void testSelectWithShortZoneId() + throws IOException { - String resourceLocation = getPathFromClassPathResource("with_short_zone_id/data"); + URL resourceLocation = Resources.getResource("with_short_zone_id/data/data.orc"); + + TrinoFileSystem fileSystem = getTrinoFileSystem(); + Location tempDir = Location.of("local:///temp_" + UUID.randomUUID()); + fileSystem.createDirectory(tempDir); + Location dataFile = tempDir.appendPath("data.orc"); + try (OutputStream out = fileSystem.newOutputFile(dataFile).create()) { + Resources.copy(resourceLocation, out); + } try (TestTable testTable = new TestTable( getQueryRunner()::execute, "test_select_with_short_zone_id_", - "(id INT, firstName VARCHAR, lastName VARCHAR) WITH (external_location = '%s')".formatted(resourceLocation))) { + "(id INT, firstName VARCHAR, lastName VARCHAR) WITH (external_location = '%s')".formatted(tempDir))) { assertThatThrownBy(() -> query("SELECT * FROM %s".formatted(testTable.getName()))) .hasMessageMatching(".*Failed to read ORC file: .*") .hasStackTraceContaining("Unknown time-zone ID: EST"); + } } @@ -9103,7 +9122,7 @@ private static void testWithStorageFormat(TestingHiveStorageFormat storageFormat test.accept(session, storageFormat.getFormat()); } catch (Exception | AssertionError e) { - fail(format("Failure for format %s with properties %s", storageFormat.getFormat(), session.getCatalogProperties()), e); + throw new AssertionError(format("Failure for format %s with properties %s", storageFormat.getFormat(), session.getCatalogProperties()), e); } } @@ -9228,6 +9247,11 @@ private String getTableLocation(String tableName) return (String) computeScalar("SELECT DISTINCT regexp_replace(\"$path\", '/[^/]*$', '') FROM " + tableName); } + private TrinoFileSystem getTrinoFileSystem() + { + return getConnectorService(getQueryRunner(), TrinoFileSystemFactory.class).create(ConnectorIdentity.ofUser("test")); + } + @Override protected boolean supportsPhysicalPushdown() { diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseS3AndGlueMetastoreTest.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseS3AndGlueMetastoreTest.java index fcf9f741a9a79..e45f3d6d9a4ba 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseS3AndGlueMetastoreTest.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseS3AndGlueMetastoreTest.java @@ -23,29 +23,30 @@ import io.trino.spi.connector.SchemaNotFoundException; import io.trino.testing.AbstractTestQueryFramework; import org.intellij.lang.annotations.Language; -import org.testng.annotations.AfterClass; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.parallel.Execution; import java.util.List; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; -import java.util.stream.Stream; import static com.google.common.base.Verify.verify; import static com.google.common.collect.Sets.union; import static io.trino.plugin.hive.S3Assert.s3Path; -import static io.trino.testing.DataProviders.cartesianProduct; -import static io.trino.testing.DataProviders.toDataProvider; -import static io.trino.testing.DataProviders.trueFalse; import static io.trino.testing.TestingNames.randomNameSuffix; import static java.lang.String.format; import static java.util.Objects.requireNonNull; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; +import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; +@TestInstance(PER_CLASS) +@Execution(CONCURRENT) public abstract class BaseS3AndGlueMetastoreTest extends AbstractTestQueryFramework { @@ -65,13 +66,13 @@ protected BaseS3AndGlueMetastoreTest(String partitionByKeyword, String locationK this.bucketName = requireNonNull(bucketName, "bucketName is null"); } - @BeforeClass + @BeforeAll public void setUp() { s3 = AmazonS3ClientBuilder.standard().build(); } - @AfterClass(alwaysRun = true) + @AfterAll public void tearDown() { if (metastore != null) { @@ -84,14 +85,16 @@ public void tearDown() } } - @DataProvider - public Object[][] locationPatternsDataProvider() + @Test + public void testBasicOperationsWithProvidedTableLocation() { - return cartesianProduct(trueFalse(), Stream.of(LocationPattern.values()).collect(toDataProvider())); + for (LocationPattern locationPattern : LocationPattern.values()) { + testBasicOperationsWithProvidedTableLocation(false, locationPattern); + testBasicOperationsWithProvidedTableLocation(true, locationPattern); + } } - @Test(dataProvider = "locationPatternsDataProvider") - public void testBasicOperationsWithProvidedTableLocation(boolean partitioned, LocationPattern locationPattern) + protected void testBasicOperationsWithProvidedTableLocation(boolean partitioned, LocationPattern locationPattern) { String tableName = "test_basic_operations_" + randomNameSuffix(); String location = locationPattern.locationForTable(bucketName, schemaName, tableName); @@ -121,8 +124,16 @@ public void testBasicOperationsWithProvidedTableLocation(boolean partitioned, Lo validateFilesAfterDrop(actualTableLocation); } - @Test(dataProvider = "locationPatternsDataProvider") - public void testBasicOperationsWithProvidedSchemaLocation(boolean partitioned, LocationPattern locationPattern) + @Test + public void testBasicOperationsWithProvidedSchemaLocation() + { + for (LocationPattern locationPattern : LocationPattern.values()) { + testBasicOperationsWithProvidedSchemaLocation(false, locationPattern); + testBasicOperationsWithProvidedSchemaLocation(true, locationPattern); + } + } + + protected void testBasicOperationsWithProvidedSchemaLocation(boolean partitioned, LocationPattern locationPattern) { String schemaName = "test_basic_operations_schema_" + randomNameSuffix(); String schemaLocation = locationPattern.locationForSchema(bucketName, schemaName); @@ -160,8 +171,16 @@ public void testBasicOperationsWithProvidedSchemaLocation(boolean partitioned, L assertThat(getTableFiles(actualTableLocation)).isEmpty(); } - @Test(dataProvider = "locationPatternsDataProvider") - public void testMergeWithProvidedTableLocation(boolean partitioned, LocationPattern locationPattern) + @Test + public void testMergeWithProvidedTableLocation() + { + for (LocationPattern locationPattern : LocationPattern.values()) { + testMergeWithProvidedTableLocation(false, locationPattern); + testMergeWithProvidedTableLocation(true, locationPattern); + } + } + + protected void testMergeWithProvidedTableLocation(boolean partitioned, LocationPattern locationPattern) { String tableName = "test_merge_" + randomNameSuffix(); String location = locationPattern.locationForTable(bucketName, schemaName, tableName); @@ -194,8 +213,16 @@ public void testMergeWithProvidedTableLocation(boolean partitioned, LocationPatt validateFilesAfterDrop(actualTableLocation); } - @Test(dataProvider = "locationPatternsDataProvider") - public void testOptimizeWithProvidedTableLocation(boolean partitioned, LocationPattern locationPattern) + @Test + public void testOptimizeWithProvidedTableLocation() + { + for (LocationPattern locationPattern : LocationPattern.values()) { + testOptimizeWithProvidedTableLocation(false, locationPattern); + testOptimizeWithProvidedTableLocation(true, locationPattern); + } + } + + protected void testOptimizeWithProvidedTableLocation(boolean partitioned, LocationPattern locationPattern) { String tableName = "test_optimize_" + randomNameSuffix(); String location = locationPattern.locationForTable(bucketName, schemaName, tableName); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveFileSystemTestUtils.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveFileSystemTestUtils.java deleted file mode 100644 index a43d782d67f97..0000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveFileSystemTestUtils.java +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive; - -import com.google.common.collect.ImmutableList; -import io.trino.plugin.hive.AbstractTestHive.HiveTransaction; -import io.trino.plugin.hive.AbstractTestHive.Transaction; -import io.trino.spi.connector.ColumnHandle; -import io.trino.spi.connector.ConnectorMetadata; -import io.trino.spi.connector.ConnectorPageSource; -import io.trino.spi.connector.ConnectorPageSourceProvider; -import io.trino.spi.connector.ConnectorSession; -import io.trino.spi.connector.ConnectorSplit; -import io.trino.spi.connector.ConnectorSplitManager; -import io.trino.spi.connector.ConnectorSplitSource; -import io.trino.spi.connector.ConnectorTableHandle; -import io.trino.spi.connector.DynamicFilter; -import io.trino.spi.connector.SchemaTableName; -import io.trino.spi.type.Type; -import io.trino.testing.MaterializedResult; -import io.trino.testing.MaterializedRow; - -import java.io.Closeable; -import java.io.IOException; -import java.util.List; -import java.util.stream.IntStream; - -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.collect.ImmutableList.toImmutableList; -import static io.trino.plugin.hive.AbstractTestHive.getAllSplits; -import static io.trino.plugin.hive.AbstractTestHive.getSplits; -import static io.trino.plugin.hive.HiveTestUtils.getHiveSession; -import static io.trino.plugin.hive.HiveTestUtils.getTypes; -import static io.trino.testing.MaterializedResult.materializeSourceDataStream; - -public class HiveFileSystemTestUtils -{ - private HiveFileSystemTestUtils() {} - - public static MaterializedResult readTable(SchemaTableName tableName, HiveTransactionManager transactionManager, - HiveConfig config, ConnectorPageSourceProvider pageSourceProvider, - ConnectorSplitManager splitManager) - throws IOException - { - ConnectorMetadata metadata = null; - ConnectorSession session = null; - ConnectorSplitSource splitSource = null; - - try (Transaction transaction = newTransaction(transactionManager)) { - metadata = transaction.getMetadata(); - session = newSession(config); - - ConnectorTableHandle table = getTableHandle(metadata, tableName, session); - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, table).values()); - - metadata.beginQuery(session); - splitSource = getSplits(splitManager, transaction, session, table); - - List allTypes = getTypes(columnHandles); - List dataTypes = getTypes(columnHandles.stream() - .filter(columnHandle -> !((HiveColumnHandle) columnHandle).isHidden()) - .collect(toImmutableList())); - MaterializedResult.Builder result = MaterializedResult.resultBuilder(session, dataTypes); - - List splits = getAllSplits(splitSource); - for (ConnectorSplit split : splits) { - try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource( - transaction.getTransactionHandle(), - session, - split, - table, - columnHandles, - DynamicFilter.EMPTY)) { - MaterializedResult pageSourceResult = materializeSourceDataStream(session, pageSource, allTypes); - for (MaterializedRow row : pageSourceResult.getMaterializedRows()) { - Object[] dataValues = IntStream.range(0, row.getFieldCount()) - .filter(channel -> !((HiveColumnHandle) columnHandles.get(channel)).isHidden()) - .mapToObj(row::getField) - .toArray(); - result.row(dataValues); - } - } - } - return result.build(); - } - finally { - cleanUpQuery(metadata, session); - closeQuietly(splitSource); - } - } - - public static ConnectorTableHandle getTableHandle(ConnectorMetadata metadata, SchemaTableName tableName, ConnectorSession session) - { - ConnectorTableHandle handle = metadata.getTableHandle(session, tableName); - checkArgument(handle != null, "table not found: %s", tableName); - return handle; - } - - public static ConnectorSession newSession(HiveConfig config) - { - return getHiveSession(config); - } - - public static Transaction newTransaction(HiveTransactionManager transactionManager) - { - return new HiveTransaction(transactionManager); - } - - private static void closeQuietly(Closeable closeable) - { - try { - if (closeable != null) { - closeable.close(); - } - } - catch (IOException ignored) { - } - } - - private static void cleanUpQuery(ConnectorMetadata metadata, ConnectorSession session) - { - if (metadata != null && session != null) { - metadata.cleanupQuery(session); - } - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveQueryRunner.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveQueryRunner.java index 3910565ffa73e..5993731129f01 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveQueryRunner.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveQueryRunner.java @@ -19,12 +19,12 @@ import com.google.inject.Module; import io.airlift.log.Logger; import io.airlift.log.Logging; -import io.opentelemetry.api.OpenTelemetry; import io.trino.Session; import io.trino.metadata.QualifiedObjectName; import io.trino.plugin.hive.fs.DirectoryLister; import io.trino.plugin.hive.metastore.Database; import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.plugin.tpcds.TpcdsPlugin; import io.trino.plugin.tpch.ColumnNaming; import io.trino.plugin.tpch.DecimalTypeMapping; @@ -38,7 +38,6 @@ import org.intellij.lang.annotations.Language; import org.joda.time.DateTimeZone; -import java.io.File; import java.nio.file.Path; import java.nio.file.Paths; import java.util.HashMap; @@ -50,7 +49,7 @@ import static com.google.inject.util.Modules.EMPTY_MODULE; import static io.airlift.log.Level.WARN; import static io.airlift.units.Duration.nanosSince; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; +import static io.trino.plugin.hive.TestingHiveUtils.getConnectorService; import static io.trino.plugin.hive.security.HiveSecurityModule.ALLOW_ALL; import static io.trino.plugin.hive.security.HiveSecurityModule.SQL_STANDARD; import static io.trino.plugin.tpch.ColumnNaming.SIMPLIFIED; @@ -105,12 +104,7 @@ public static class Builder> private ImmutableMap.Builder hiveProperties = ImmutableMap.builder(); private List> initialTables = ImmutableList.of(); private Optional initialSchemasLocationBase = Optional.empty(); - private Function initialTablesSessionMutator = Function.identity(); - private Function metastore = queryRunner -> { - File baseDir = queryRunner.getCoordinator().getBaseDataDir().resolve("hive_data").toFile(); - return createTestingFileHiveMetastore(baseDir); - }; - private Optional openTelemetry = Optional.empty(); + private Optional> metastore = Optional.empty(); private Module module = EMPTY_MODULE; private Optional directoryLister = Optional.empty(); private boolean tpcdsCatalogEnabled; @@ -165,24 +159,10 @@ public SELF setInitialSchemasLocationBase(String initialSchemasLocationBase) return self(); } - @CanIgnoreReturnValue - public SELF setInitialTablesSessionMutator(Function initialTablesSessionMutator) - { - this.initialTablesSessionMutator = requireNonNull(initialTablesSessionMutator, "initialTablesSessionMutator is null"); - return self(); - } - @CanIgnoreReturnValue public SELF setMetastore(Function metastore) { - this.metastore = requireNonNull(metastore, "metastore is null"); - return self(); - } - - @CanIgnoreReturnValue - public SELF setOpenTelemetry(OpenTelemetry openTelemetry) - { - this.openTelemetry = Optional.of(openTelemetry); + this.metastore = Optional.of(metastore); return self(); } @@ -221,6 +201,7 @@ public SELF setCreateTpchSchemas(boolean createTpchSchemas) return self(); } + @SuppressWarnings("unused") @CanIgnoreReturnValue public SELF setTpchColumnNaming(ColumnNaming tpchColumnNaming) { @@ -228,6 +209,7 @@ public SELF setTpchColumnNaming(ColumnNaming tpchColumnNaming) return self(); } + @SuppressWarnings("unused") @CanIgnoreReturnValue public SELF setTpchDecimalTypeMapping(DecimalTypeMapping tpchDecimalTypeMapping) { @@ -254,8 +236,15 @@ public DistributedQueryRunner build() queryRunner.createCatalog("tpcds", "tpcds"); } - HiveMetastore metastore = this.metastore.apply(queryRunner); - queryRunner.installPlugin(new TestingHivePlugin(Optional.of(metastore), openTelemetry, module, directoryLister)); + Optional metastore = this.metastore.map(factory -> factory.apply(queryRunner)); + Path dataDir = queryRunner.getCoordinator().getBaseDataDir().resolve("hive_data"); + + if (metastore.isEmpty() && !hiveProperties.buildOrThrow().containsKey("hive.metastore")) { + hiveProperties.put("hive.metastore", "file"); + hiveProperties.put("hive.metastore.catalog.dir", queryRunner.getCoordinator().getBaseDataDir().resolve("hive_data").toString()); + } + + queryRunner.installPlugin(new TestingHivePlugin(dataDir, metastore, module, directoryLister)); Map hiveProperties = new HashMap<>(); if (!skipTimezoneSetup) { @@ -285,7 +274,7 @@ public DistributedQueryRunner build() queryRunner.createCatalog(HIVE_CATALOG, "hive", hiveProperties); if (createTpchSchemas) { - populateData(queryRunner, metastore); + populateData(queryRunner); } return queryRunner; @@ -296,17 +285,19 @@ public DistributedQueryRunner build() } } - private void populateData(DistributedQueryRunner queryRunner, HiveMetastore metastore) + private void populateData(QueryRunner queryRunner) { + HiveMetastore metastore = getConnectorService(queryRunner, HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); if (metastore.getDatabase(TPCH_SCHEMA).isEmpty()) { metastore.createDatabase(createDatabaseMetastoreObject(TPCH_SCHEMA, initialSchemasLocationBase)); - Session session = initialTablesSessionMutator.apply(queryRunner.getDefaultSession()); + Session session = queryRunner.getDefaultSession(); copyTpchTables(queryRunner, "tpch", TINY_SCHEMA_NAME, session, initialTables); } if (tpchBucketedCatalogEnabled && metastore.getDatabase(TPCH_BUCKETED_SCHEMA).isEmpty()) { metastore.createDatabase(createDatabaseMetastoreObject(TPCH_BUCKETED_SCHEMA, initialSchemasLocationBase)); - Session session = initialTablesSessionMutator.apply(createBucketedSession(Optional.empty())); + Session session = createBucketedSession(Optional.empty()); copyTpchTablesBucketed(queryRunner, "tpch", TINY_SCHEMA_NAME, session, initialTables, tpchColumnNaming); } } @@ -410,7 +401,7 @@ public static void main(String[] args) baseDataDir = Optional.of(path); } - DistributedQueryRunner queryRunner = HiveQueryRunner.builder() + DistributedQueryRunner queryRunner = builder() .setExtraProperties(ImmutableMap.of("http-server.http.port", "8080")) .setHiveProperties(ImmutableMap.of("hive.security", ALLOW_ALL)) .setSkipTimezoneSetup(true) diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestBackgroundHiveSplitLoader.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestBackgroundHiveSplitLoader.java index 555ce79943bfb..57d8c8221cf93 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestBackgroundHiveSplitLoader.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestBackgroundHiveSplitLoader.java @@ -793,7 +793,7 @@ public void testBuildManifestFileIterator() List.of(), TupleDomain.all(), () -> true, - TableToPartitionMapping.empty(), + ImmutableMap.of(), Optional.empty(), Optional.empty(), DataSize.of(512, MEGABYTE), @@ -834,7 +834,7 @@ public void testBuildManifestFileIteratorNestedDirectory() List.of(), TupleDomain.all(), () -> true, - TableToPartitionMapping.empty(), + ImmutableMap.of(), Optional.empty(), Optional.empty(), DataSize.of(512, MEGABYTE), @@ -940,7 +940,7 @@ private static HivePartitionMetadata createPartitionMetadata() return new HivePartitionMetadata( new HivePartition(SIMPLE_TABLE.getSchemaTableName()), Optional.empty(), - TableToPartitionMapping.empty()); + ImmutableMap.of()); } private static void createOrcAcidFile(TrinoFileSystem fileSystem, Location location) @@ -1102,7 +1102,7 @@ private BackgroundHiveSplitLoader backgroundHiveSplitLoader( new HivePartitionMetadata( new HivePartition(new SchemaTableName("testSchema", "table_name")), Optional.empty(), - TableToPartitionMapping.empty())); + ImmutableMap.of())); return new BackgroundHiveSplitLoader( table, @@ -1133,7 +1133,7 @@ private BackgroundHiveSplitLoader backgroundHiveSplitLoader( new HivePartitionMetadata( new HivePartition(new SchemaTableName("testSchema", "table_name")), Optional.empty(), - TableToPartitionMapping.empty())); + ImmutableMap.of())); return backgroundHiveSplitLoader(partitions, locations, directoryLister, 100); } @@ -1209,7 +1209,7 @@ protected HivePartitionMetadata computeNext() { position++; return switch (position) { - case 0 -> new HivePartitionMetadata(new HivePartition(new SchemaTableName("testSchema", "table_name")), Optional.empty(), TableToPartitionMapping.empty()); + case 0 -> new HivePartitionMetadata(new HivePartition(new SchemaTableName("testSchema", "table_name")), Optional.empty(), ImmutableMap.of()); case 1 -> throw new RuntimeException("OFFLINE"); default -> endOfData(); }; diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestCloseIdleWriters.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestCloseIdleWriters.java new file mode 100644 index 0000000000000..60c65e468e87c --- /dev/null +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestCloseIdleWriters.java @@ -0,0 +1,91 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.hive; + +import com.google.common.collect.ImmutableMap; +import io.trino.Session; +import io.trino.testing.AbstractTestQueryFramework; +import io.trino.testing.QueryRunner; +import org.intellij.lang.annotations.Language; +import org.junit.jupiter.api.Test; + +import static io.trino.SystemSessionProperties.IDLE_WRITER_MIN_DATA_SIZE_THRESHOLD; +import static io.trino.SystemSessionProperties.SCALE_WRITERS; +import static io.trino.SystemSessionProperties.TASK_MAX_WRITER_COUNT; +import static io.trino.SystemSessionProperties.TASK_MIN_WRITER_COUNT; +import static io.trino.SystemSessionProperties.TASK_SCALE_WRITERS_ENABLED; +import static io.trino.testing.TestingNames.randomNameSuffix; +import static org.assertj.core.api.Assertions.assertThat; + +public class TestCloseIdleWriters + extends AbstractTestQueryFramework +{ + @Override + protected QueryRunner createQueryRunner() + throws Exception + { + return HiveQueryRunner.builder() + .setNodeCount(1) + // Set the target max file size to 100GB so that we don't close writers due to file size in append + // page. + .setHiveProperties(ImmutableMap.of( + "hive.target-max-file-size", "100GB", + "hive.idle-writer-min-file-size", "0.1MB")) + .build(); + } + + @Test + public void testCloseIdleWriters() + { + String tableName = "task_close_idle_writers_" + randomNameSuffix(); + try { + // Create a table with two partitions (0 and 1). Using the order by trick we will write the partitions in + // this order 0, 1, and then again 0. This way we are sure that during partition 1 write there will + // be an idle writer for partition 0. Additionally, during second partition 0 write, there will be an idle + // writer for partition 1. + @Language("SQL") String createTableSql = """ + CREATE TABLE %s WITH (format = 'ORC', partitioned_by = ARRAY['shipmodeVal']) + AS SELECT orderkey, partkey, suppkey, linenumber, quantity, extendedprice, + discount, tax, returnflag, linestatus, commitdate, receiptdate, shipinstruct, + comment, shipdate, + CASE + WHEN shipmode IN ('AIR', 'FOB', 'SHIP', 'TRUCK') THEN 0 + WHEN shipmode IN ('MAIL', 'RAIL', 'REG AIR') THEN 1 + ELSE 2 + END AS shipmodeVal + FROM tpch.tiny.lineitem + ORDER BY shipmode + LIMIT 60174 + """.formatted(tableName); + + // Disable all kind of scaling and set idle writer threshold to 10MB + assertUpdate( + Session.builder(getSession()) + .setSystemProperty(SCALE_WRITERS, "false") + .setSystemProperty(TASK_SCALE_WRITERS_ENABLED, "false") + .setSystemProperty(TASK_MAX_WRITER_COUNT, "1") + .setSystemProperty(TASK_MIN_WRITER_COUNT, "1") + .setSystemProperty(IDLE_WRITER_MIN_DATA_SIZE_THRESHOLD, "0.1MB") + .build(), + createTableSql, + 60174); + long files = (long) computeScalar("SELECT count(DISTINCT \"$path\") FROM " + tableName); + // There should more than 2 files since we triggered close idle writers. + assertThat(files).isGreaterThan(2); + } + finally { + assertUpdate("DROP TABLE IF EXISTS " + tableName); + } + } +} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHive3OnDataLake.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHive3OnDataLake.java index 24d40204c58f1..91c5a06cffbd1 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHive3OnDataLake.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHive3OnDataLake.java @@ -1548,7 +1548,7 @@ public void testPartitionProjectionIgnore() hiveMinioDataLake.getHiveHadoop().runOnHive( "ALTER TABLE " + hiveTestTableName + " SET TBLPROPERTIES ( 'trino.partition_projection.ignore'='TRUE' )"); // Flush cache to get new definition - computeActual("CALL system.flush_metadata_cache()"); + computeActual("CALL system.flush_metadata_cache(schema_name => '" + HIVE_TEST_SCHEMA + "', table_name => '" + tableName + "')"); // Verify query execution works computeActual(createInsertStatement( diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConcurrentModificationGlueMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConcurrentModificationGlueMetastore.java index c082dfd97b7f1..fb08016d7200a 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConcurrentModificationGlueMetastore.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConcurrentModificationGlueMetastore.java @@ -31,7 +31,6 @@ import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.nio.file.Path; -import java.util.Optional; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -39,7 +38,6 @@ import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; import static com.google.common.reflect.Reflection.newProxy; import static com.google.common.util.concurrent.MoreExecutors.directExecutor; -import static com.google.inject.util.Modules.EMPTY_MODULE; import static io.trino.plugin.hive.HiveErrorCode.HIVE_METASTORE_ERROR; import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_FACTORY; import static io.trino.plugin.hive.metastore.glue.TestingGlueHiveMetastore.createTestingAsyncGlueClient; @@ -102,7 +100,7 @@ protected QueryRunner createQueryRunner() stats, table -> true); - queryRunner.installPlugin(new TestingHivePlugin(Optional.of(metastore), Optional.empty(), EMPTY_MODULE, Optional.empty())); + queryRunner.installPlugin(new TestingHivePlugin(queryRunner.getCoordinator().getBaseDataDir().resolve("hive_data"), metastore)); queryRunner.createCatalog(CATALOG_NAME, "hive"); queryRunner.execute("CREATE SCHEMA " + SCHEMA); return queryRunner; diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConfig.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConfig.java index 7abc47c55b61d..63cb594b7bae1 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConfig.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConfig.java @@ -55,6 +55,7 @@ public void testDefaults() .setMaxSplitsPerSecond(null) .setDomainCompactionThreshold(1000) .setTargetMaxFileSize(DataSize.of(1, GIGABYTE)) + .setIdleWriterMinFileSize(DataSize.of(16, MEGABYTE)) .setForceLocalScheduling(false) .setMaxConcurrentFileSystemOperations(20) .setMaxConcurrentMetastoreDrops(20) @@ -138,6 +139,7 @@ public void testExplicitPropertyMappings() .put("hive.max-splits-per-second", "1") .put("hive.domain-compaction-threshold", "42") .put("hive.target-max-file-size", "72MB") + .put("hive.idle-writer-min-file-size", "1MB") .put("hive.recursive-directories", "true") .put("hive.ignore-absent-partitions", "true") .put("hive.storage-format", "SEQUENCEFILE") @@ -218,6 +220,7 @@ public void testExplicitPropertyMappings() .setMaxSplitsPerSecond(1) .setDomainCompactionThreshold(42) .setTargetMaxFileSize(DataSize.of(72, Unit.MEGABYTE)) + .setIdleWriterMinFileSize(DataSize.of(1, MEGABYTE)) .setForceLocalScheduling(true) .setMaxConcurrentFileSystemOperations(100) .setMaxConcurrentMetastoreDrops(100) diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveCreateExternalTable.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveCreateExternalTable.java index 6d0831f018b05..f689230fdf48a 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveCreateExternalTable.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveCreateExternalTable.java @@ -15,6 +15,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import io.trino.filesystem.Location; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.MaterializedResult; import io.trino.testing.QueryRunner; @@ -24,9 +25,8 @@ import java.io.File; import java.io.IOException; import java.nio.file.Path; +import java.util.UUID; -import static com.google.common.io.MoreFiles.deleteRecursively; -import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; import static io.trino.testing.QueryAssertions.assertEqualsIgnoreOrder; import static io.trino.testing.TestingNames.randomNameSuffix; import static io.trino.tpch.TpchTable.CUSTOMER; @@ -50,16 +50,13 @@ protected QueryRunner createQueryRunner() @Test public void testCreateExternalTableWithData() - throws IOException { - Path tempDir = createTempDirectory(null); - String tableLocation = tempDir.resolve("data").toUri().toString(); - + Location tempDir = Location.of("local:///temp_" + UUID.randomUUID()); @Language("SQL") String createTableSql = format("" + "CREATE TABLE test_create_external " + "WITH (external_location = '%s') AS " + "SELECT * FROM tpch.tiny.nation", - tableLocation); + tempDir); assertUpdate(createTableSql, 25); @@ -69,10 +66,9 @@ public void testCreateExternalTableWithData() MaterializedResult result = computeActual("SELECT DISTINCT regexp_replace(\"$path\", '/[^/]*$', '/') FROM test_create_external"); String tablePath = (String) result.getOnlyValue(); - assertThat(tablePath).startsWith(tableLocation); + assertThat(tablePath).startsWith(tempDir.toString()); assertUpdate("DROP TABLE test_create_external"); - deleteRecursively(tempDir, ALLOW_INSECURE); } @Test @@ -92,11 +88,8 @@ public void testCreateExternalTableAsWithExistingDirectory() @Test public void testCreateExternalTableOnNonExistingPath() - throws Exception { - java.nio.file.Path tempDir = createTempDirectory(null); - // delete dir, trino should recreate it - deleteRecursively(tempDir, ALLOW_INSECURE); + Location tempDir = Location.of("local:///temp_" + UUID.randomUUID()); String tableName = "test_create_external_non_exists_" + randomNameSuffix(); @Language("SQL") String createTableSql = format("" + @@ -111,13 +104,12 @@ public void testCreateExternalTableOnNonExistingPath() getSession().getCatalog().get(), getSession().getSchema().get(), tableName, - tempDir.toUri().toASCIIString()); + tempDir); assertUpdate(createTableSql); String actual = (String) computeScalar("SHOW CREATE TABLE " + tableName); assertThat(actual).isEqualTo(createTableSql); assertUpdate("DROP TABLE " + tableName); - deleteRecursively(tempDir, ALLOW_INSECURE); } @Test diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveDistributedAggregations.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveDistributedAggregations.java index 13a5fe418f09e..7c4c17dc85d60 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveDistributedAggregations.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveDistributedAggregations.java @@ -16,8 +16,6 @@ import io.trino.testing.AbstractTestAggregations; import io.trino.testing.QueryRunner; -import static io.trino.tpch.TpchTable.getTables; - public class TestHiveDistributedAggregations extends AbstractTestAggregations { @@ -26,7 +24,7 @@ protected QueryRunner createQueryRunner() throws Exception { return HiveQueryRunner.builder() - .setInitialTables(getTables()) + .setInitialTables(REQUIRED_TPCH_TABLES) .build(); } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java index ac702ba3b9f18..35d25d11ac72b 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java @@ -212,8 +212,8 @@ import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector; import static org.apache.hadoop.io.SequenceFile.CompressionType.BLOCK; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.data.Offset.offset; import static org.joda.time.DateTimeZone.UTC; -import static org.testng.Assert.assertEquals; // Failing on multiple threads because of org.apache.hadoop.hive.ql.io.parquet.write.ParquetRecordWriterWrapper // uses a single record writer across all threads. @@ -222,7 +222,7 @@ public final class TestHiveFileFormats { private static final DateTimeZone HIVE_STORAGE_TIME_ZONE = DateTimeZone.forID("America/Bahia_Banderas"); - private static final double EPSILON = 0.001; + private static final float EPSILON = 0.001f; private static final FileFormatDataSourceStats STATS = new FileFormatDataSourceStats(); private static final ConnectorSession PARQUET_SESSION = getHiveSession(createParquetHiveConfig(false)); @@ -974,7 +974,7 @@ private static void testPageSourceFactory( partitionKeys, getColumnHandles(testReadColumns), ImmutableList.of(), - TableToPartitionMapping.empty(), + ImmutableMap.of(), location.toString(), OptionalInt.empty(), paddedFileSize, @@ -1045,10 +1045,12 @@ private static void checkPageSource(ConnectorPageSource pageSource, List columnHandles = getPartitionedColumnHandles(SHIP_MODE.getColumnName()); + Location location = makeFileName(config); + ConnectorPageSink pageSink = createPageSink(fileSystemFactory, transaction, config, sortingFileWriterConfig, metastore, location, stats, columnHandles); + Page truckPage = createPage(lineItem -> lineItem.getShipMode().equals("TRUCK")); + Page shipPage = createPage(lineItem -> lineItem.getShipMode().equals("SHIP")); + + pageSink.appendPage(truckPage); + pageSink.appendPage(shipPage); + // This call will mark the truck and ship partition as idle. + pageSink.closeIdleWriters(); + + // This call will mark the ship partition as non-idle. + pageSink.appendPage(shipPage); + // This call will close the truck partition if idleWritersMinFileSize limit is reached since + // it is still idle. + pageSink.closeIdleWriters(); + + pageSink.appendPage(truckPage); + pageSink.appendPage(shipPage); + + getFutureValue(pageSink.finish()); + FileIterator fileIterator = fileSystemFactory.create(ConnectorIdentity.ofUser("test")).listFiles(location); + + int truckFileCount = 0; + int shipFileCount = 0; + while (fileIterator.hasNext()) { + FileEntry file = fileIterator.next(); + if (file.location().toString().contains("TRUCK")) { + truckFileCount++; + } + else if (file.location().toString().contains("SHIP")) { + shipFileCount++; + } + } + assertThat(truckFileCount).isEqualTo(expectedTruckFiles); + assertThat(shipFileCount).isEqualTo(expectedShipFiles); + } + private static boolean isSupportedCodec(HiveStorageFormat storageFormat, HiveCompressionOption compressionOption) { if (storageFormat == HiveStorageFormat.AVRO && compressionOption == LZ4) { @@ -163,17 +236,52 @@ private static long writeTestFile(TrinoFileSystemFactory fileSystemFactory, Hive { HiveTransactionHandle transaction = new HiveTransactionHandle(false); HiveWriterStats stats = new HiveWriterStats(); - ConnectorPageSink pageSink = createPageSink(fileSystemFactory, transaction, config, sortingFileWriterConfig, metastore, location, stats); + ConnectorPageSink pageSink = createPageSink(fileSystemFactory, transaction, config, sortingFileWriterConfig, metastore, location, stats, getColumnHandles()); List columns = getTestColumns(); List columnTypes = columns.stream() .map(LineItemColumn::getType) .map(TestHivePageSink::getType) .map(hiveType -> TESTING_TYPE_MANAGER.getType(hiveType.getTypeSignature())) .collect(toList()); + Page page = createPage(lineItem -> true); + pageSink.appendPage(page); + getFutureValue(pageSink.finish()); + + FileIterator fileIterator = fileSystemFactory.create(ConnectorIdentity.ofUser("test")).listFiles(location); + FileEntry fileEntry = fileIterator.next(); + assertThat(fileIterator.hasNext()).isFalse(); + + List pages = new ArrayList<>(); + try (ConnectorPageSource pageSource = createPageSource(fileSystemFactory, transaction, config, fileEntry.location())) { + while (!pageSource.isFinished()) { + Page nextPage = pageSource.getNextPage(); + if (nextPage != null) { + pages.add(nextPage.getLoadedPage()); + } + } + } + + MaterializedResult expectedResults = toMaterializedResult(getHiveSession(config), columnTypes, ImmutableList.of(page)); + MaterializedResult results = toMaterializedResult(getHiveSession(config), columnTypes, pages); + assertThat(results).containsExactlyElementsOf(expectedResults); + assertThat(round(stats.getInputPageSizeInBytes().getAllTime().getMax())).isEqualTo(page.getRetainedSizeInBytes()); + return fileEntry.length(); + } + private static Page createPage(Function filter) + { + List columns = getTestColumns(); + List columnTypes = columns.stream() + .map(LineItemColumn::getType) + .map(TestHivePageSink::getType) + .map(hiveType -> TESTING_TYPE_MANAGER.getType(hiveType.getTypeSignature())) + .collect(toList()); PageBuilder pageBuilder = new PageBuilder(columnTypes); int rows = 0; for (LineItem lineItem : new LineItemGenerator(0.01, 1, 1)) { + if (!filter.apply(lineItem)) { + continue; + } rows++; if (rows >= NUM_ROWS) { break; @@ -203,29 +311,7 @@ private static long writeTestFile(TrinoFileSystemFactory fileSystemFactory, Hive } } } - Page page = pageBuilder.build(); - pageSink.appendPage(page); - getFutureValue(pageSink.finish()); - - FileIterator fileIterator = fileSystemFactory.create(ConnectorIdentity.ofUser("test")).listFiles(location); - FileEntry fileEntry = fileIterator.next(); - assertThat(fileIterator.hasNext()).isFalse(); - - List pages = new ArrayList<>(); - try (ConnectorPageSource pageSource = createPageSource(fileSystemFactory, transaction, config, fileEntry.location())) { - while (!pageSource.isFinished()) { - Page nextPage = pageSource.getNextPage(); - if (nextPage != null) { - pages.add(nextPage.getLoadedPage()); - } - } - } - - MaterializedResult expectedResults = toMaterializedResult(getHiveSession(config), columnTypes, ImmutableList.of(page)); - MaterializedResult results = toMaterializedResult(getHiveSession(config), columnTypes, pages); - assertThat(results).containsExactlyElementsOf(expectedResults); - assertThat(round(stats.getInputPageSizeInBytes().getAllTime().getMax())).isEqualTo(page.getRetainedSizeInBytes()); - return fileEntry.length(); + return pageBuilder.build(); } static MaterializedResult toMaterializedResult(ConnectorSession session, List types, List pages) @@ -261,7 +347,7 @@ private static ConnectorPageSource createPageSource(TrinoFileSystemFactory fileS OptionalInt.empty(), OptionalInt.empty(), false, - TableToPartitionMapping.empty(), + ImmutableMap.of(), Optional.empty(), Optional.empty(), Optional.empty(), @@ -274,13 +360,21 @@ private static ConnectorPageSource createPageSource(TrinoFileSystemFactory fileS return provider.createPageSource(transaction, getHiveSession(config), split, table, ImmutableList.copyOf(getColumnHandles()), DynamicFilter.EMPTY); } - private static ConnectorPageSink createPageSink(TrinoFileSystemFactory fileSystemFactory, HiveTransactionHandle transaction, HiveConfig config, SortingFileWriterConfig sortingFileWriterConfig, HiveMetastore metastore, Location location, HiveWriterStats stats) + private static ConnectorPageSink createPageSink( + TrinoFileSystemFactory fileSystemFactory, + HiveTransactionHandle transaction, + HiveConfig config, + SortingFileWriterConfig sortingFileWriterConfig, + HiveMetastore metastore, + Location location, + HiveWriterStats stats, + List columnHandles) { LocationHandle locationHandle = new LocationHandle(location, location, DIRECT_TO_TARGET_NEW_DIRECTORY); HiveOutputTableHandle handle = new HiveOutputTableHandle( SCHEMA_NAME, TABLE_NAME, - getColumnHandles(), + columnHandles, new HivePageSinkMetadata(new SchemaTableName(SCHEMA_NAME, TABLE_NAME), metastore.getTable(SCHEMA_NAME, TABLE_NAME), ImmutableMap.of()), locationHandle, config.getHiveStorageFormat(), @@ -323,6 +417,23 @@ private static List getColumnHandles() return handles.build(); } + private static List getPartitionedColumnHandles(String partitionColumn) + { + ImmutableList.Builder handles = ImmutableList.builder(); + List columns = getTestColumns(); + for (int i = 0; i < columns.size(); i++) { + LineItemColumn column = columns.get(i); + Type type = getType(column.getType()); + if (column.getColumnName().equals(partitionColumn)) { + handles.add(createBaseColumn(column.getColumnName(), i, HiveType.toHiveType(type), type, PARTITION_KEY, Optional.empty())); + } + else { + handles.add(createBaseColumn(column.getColumnName(), i, HiveType.toHiveType(type), type, REGULAR, Optional.empty())); + } + } + return handles.build(); + } + private static List getTestColumns() { return Stream.of(LineItemColumn.values()) diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHivePlugin.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHivePlugin.java similarity index 58% rename from plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHivePlugin.java rename to plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHivePlugin.java index b3bb9c96a669a..4aa14051dd5d1 100644 --- a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHivePlugin.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHivePlugin.java @@ -14,66 +14,26 @@ package io.trino.plugin.hive; import com.google.common.collect.ImmutableMap; -import com.qubole.rubix.core.CachingFileSystem; import io.trino.spi.Plugin; import io.trino.spi.connector.Connector; import io.trino.spi.connector.ConnectorFactory; import io.trino.testing.TestingConnectorContext; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; -import org.junit.jupiter.api.parallel.Execution; import java.io.File; -import java.io.IOException; import java.nio.file.Files; -import java.nio.file.Path; import static com.google.common.collect.MoreCollectors.onlyElement; import static com.google.common.collect.MoreCollectors.toOptional; import static com.google.common.collect.Streams.stream; -import static com.google.common.io.MoreFiles.deleteRecursively; -import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; import static io.trino.plugin.hive.HiveSessionProperties.InsertExistingPartitionsBehavior.APPEND; import static io.trino.plugin.hive.HiveSessionProperties.InsertExistingPartitionsBehavior.ERROR; import static java.nio.charset.StandardCharsets.UTF_8; -import static java.nio.file.Files.createTempDirectory; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; -import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; -@TestInstance(PER_CLASS) -@Execution(SAME_THREAD) // see @BeforeEach public class TestHivePlugin { - private Path tempDirectory; - - @BeforeAll - public void setup() - throws IOException - { - tempDirectory = createTempDirectory(getClass().getSimpleName()); - } - - @AfterAll - public void tearDown() - throws IOException - { - deleteRecursively(tempDirectory, ALLOW_INSECURE); - } - - @AfterEach - @BeforeEach - public void deinitializeRubix() - { - // revert static rubix initialization done by other tests - CachingFileSystem.deinitialize(); - } - @Test public void testCreateConnector() { @@ -141,44 +101,6 @@ public void testGlueMetastore() .hasMessageContaining("Error: Configuration property 'hive.metastore.uri' was not used"); } - @Test - public void testS3SecurityMappingAndHiveCachingMutuallyExclusive() - throws IOException - { - Path mappingConfig = Files.createTempFile(null, null); - ConnectorFactory connectorFactory = getHiveConnectorFactory(); - - assertThatThrownBy(() -> connectorFactory.create( - "test", - ImmutableMap.builder() - .put("hive.s3.security-mapping.config-file", mappingConfig.toString()) - .put("hive.cache.enabled", "true") - .put("hive.metastore.uri", "thrift://foo:1234") - .put("hive.cache.location", tempDirectory.toString()) - .put("bootstrap.quiet", "true") - .buildOrThrow(), - new TestingConnectorContext())) - .hasMessageContaining("S3 security mapping is not compatible with Hive caching"); - } - - @Test - public void testGcsAccessTokenAndHiveCachingMutuallyExclusive() - { - ConnectorFactory connectorFactory = getHiveConnectorFactory(); - - assertThatThrownBy(() -> connectorFactory.create( - "test", - ImmutableMap.builder() - .put("hive.gcs.use-access-token", "true") - .put("hive.cache.enabled", "true") - .put("hive.metastore.uri", "thrift://foo:1234") - .put("hive.cache.location", tempDirectory.toString()) - .put("bootstrap.quiet", "true") - .buildOrThrow(), - new TestingConnectorContext())) - .hasMessageContaining("Use of GCS access token is not compatible with Hive caching"); - } - @Test public void testImmutablePartitionsAndInsertOverwriteMutuallyExclusive() { @@ -236,81 +158,6 @@ private Object getDefaultValueInsertExistingPartitionsBehavior(Connector connect .getDefaultValue(); } - @Test - public void testHdfsImpersonationAndHiveCachingMutuallyExclusive() - { - ConnectorFactory connectorFactory = getHiveConnectorFactory(); - - assertThatThrownBy(() -> connectorFactory.create( - "test", - ImmutableMap.builder() - .put("hive.hdfs.impersonation.enabled", "true") - .put("hive.cache.enabled", "true") - .put("hive.metastore.uri", "thrift://foo:1234") - .put("hive.cache.location", tempDirectory.toString()) - .put("bootstrap.quiet", "true") - .buildOrThrow(), - new TestingConnectorContext())) - .hasMessageContaining("HDFS impersonation is not compatible with Hive caching"); - } - - @Test - public void testRubixCache() - { - ConnectorFactory connectorFactory = getHiveConnectorFactory(); - - connectorFactory.create( - "test", - ImmutableMap.builder() - .put("hive.cache.enabled", "true") - .put("hive.metastore.uri", "thrift://foo:1234") - .put("hive.cache.location", tempDirectory.toString()) - .put("bootstrap.quiet", "true") - .buildOrThrow(), - new TestingConnectorContext()) - .shutdown(); - } - - @Test - public void testRubixCacheWithNonExistingCacheDirectory() - { - ConnectorFactory connectorFactory = getHiveConnectorFactory(); - - assertThatThrownBy(() -> connectorFactory.create( - "test", - ImmutableMap.builder() - .put("hive.cache.enabled", "true") - .put("hive.cache.start-server-on-coordinator", "true") - .put("hive.metastore.uri", "thrift://foo:1234") - .put("hive.cache.location", "/tmp/non/existing/directory") - .put("bootstrap.quiet", "true") - .buildOrThrow(), - new TestingConnectorContext())) - .hasMessageContaining("None of the cache parent directories exists"); - - assertThatThrownBy(() -> connectorFactory.create( - "test", - ImmutableMap.builder() - .put("hive.cache.enabled", "true") - .put("hive.cache.start-server-on-coordinator", "true") - .put("hive.metastore.uri", "thrift://foo:1234") - .put("bootstrap.quiet", "true") - .buildOrThrow(), - new TestingConnectorContext())) - .hasMessageContaining("caching directories were not provided"); - - // cache directories should not be required when cache is not explicitly started on coordinator - connectorFactory.create( - "test", - ImmutableMap.builder() - .put("hive.cache.enabled", "true") - .put("hive.metastore.uri", "thrift://foo:1234") - .put("bootstrap.quiet", "true") - .buildOrThrow(), - new TestingConnectorContext()) - .shutdown(); - } - @Test public void testAllowAllAccessControl() { diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveQlTranslation.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveQlTranslation.java index 79a129cdcf4f3..51c8421522df5 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveQlTranslation.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveQlTranslation.java @@ -13,19 +13,15 @@ */ package io.trino.plugin.hive; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Iterators; import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import com.google.common.collect.Streams; import io.trino.sql.parser.SqlParser; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; -import java.util.Iterator; +import java.util.Collection; import java.util.List; import java.util.Map; -import java.util.stream.Stream; import static io.trino.plugin.hive.HiveErrorCode.HIVE_VIEW_TRANSLATION_ERROR; import static io.trino.plugin.hive.HiveToTrinoTranslator.translateHiveViewToTrino; @@ -104,33 +100,11 @@ public class TestHiveQlTranslation /** * Prepare all combinations of {@code n} of the given columns. */ - private static Iterator getNColumns(int n, Map columns) + private static List getNColumns(int n, Collection columns) { - Stream hiveNames = - Sets.cartesianProduct(nCopies(n, columns.keySet())).stream() - .map(names -> join(", ", names)); - - Stream trinoNames = - Lists.cartesianProduct(nCopies(n, List.copyOf(columns.values()))).stream() - .map(names -> join(", ", names)); - - return Streams.zip(hiveNames, trinoNames, (h, p) -> new Object[] {h, p}).iterator(); - } - - @DataProvider(name = "simple_hive_translation_columns") - public Iterator getSimpleColumns() - { - return Iterators.concat( - getNColumns(1, simpleColumnNames), - getNColumns(3, simpleColumnNames)); - } - - @DataProvider(name = "extended_hive_translation_columns") - public Iterator getExtendedColumns() - { - return Iterators.concat( - getNColumns(1, extendedColumnNames), - getNColumns(2, extendedColumnNames)); + return Lists.cartesianProduct(nCopies(n, List.copyOf(columns))).stream() + .map(names -> join(", ", names)) + .toList(); } @Test @@ -212,20 +186,44 @@ public void testPredicates() "SELECT '''' = '''' OR false"); } - @Test(dataProvider = "simple_hive_translation_columns") - public void testSimpleColumns(String hiveColumn, String trinoColumn) + @Test + public void testSimpleColumns() { - assertTranslation( - format("SELECT %s FROM sometable", hiveColumn), - format("SELECT %s FROM sometable", trinoColumn)); + List hiveColumns = ImmutableList.builder() + .addAll(getNColumns(1, simpleColumnNames.keySet())) + .addAll(getNColumns(3, simpleColumnNames.keySet())) + .build(); + + List trinoColumns = ImmutableList.builder() + .addAll(getNColumns(1, simpleColumnNames.values())) + .addAll(getNColumns(3, simpleColumnNames.values())) + .build(); + + for (int i = 0; i < hiveColumns.size(); i++) { + assertTranslation( + format("SELECT %s FROM sometable", hiveColumns.get(i)), + format("SELECT %s FROM sometable", trinoColumns.get(i))); + } } - @Test(dataProvider = "extended_hive_translation_columns") - public void testExtendedColumns(String hiveColumn, String trinoColumn) + @Test + public void testExtendedColumns() { - assertTranslation( - format("SELECT %s FROM sometable", hiveColumn), - format("SELECT %s FROM sometable", trinoColumn)); + List hiveColumns = ImmutableList.builder() + .addAll(getNColumns(1, extendedColumnNames.keySet())) + .addAll(getNColumns(3, extendedColumnNames.keySet())) + .build(); + + List trinoColumns = ImmutableList.builder() + .addAll(getNColumns(1, extendedColumnNames.values())) + .addAll(getNColumns(3, extendedColumnNames.values())) + .build(); + + for (int i = 0; i < hiveColumns.size(); i++) { + assertTranslation( + format("SELECT %s FROM sometable", hiveColumns.get(i)), + format("SELECT %s FROM sometable", trinoColumns.get(i))); + } } @Test diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveS3AndGlueMetastoreTest.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveS3AndGlueMetastoreTest.java index d45371b16b0b4..0487184e504fa 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveS3AndGlueMetastoreTest.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveS3AndGlueMetastoreTest.java @@ -20,7 +20,7 @@ import io.trino.spi.security.SelectedRole; import io.trino.testing.DistributedQueryRunner; import io.trino.testing.QueryRunner; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import java.nio.file.Path; import java.util.HashSet; @@ -121,8 +121,7 @@ protected void validateFilesAfterOptimize(String location, Set initialFi } @Override // Row-level modifications are not supported for Hive tables - @Test(dataProvider = "locationPatternsDataProvider") - public void testBasicOperationsWithProvidedTableLocation(boolean partitioned, LocationPattern locationPattern) + protected void testBasicOperationsWithProvidedTableLocation(boolean partitioned, LocationPattern locationPattern) { String tableName = "test_basic_operations_" + randomNameSuffix(); String location = locationPattern.locationForTable(bucketName, schemaName, tableName); @@ -150,8 +149,16 @@ public void testBasicOperationsWithProvidedTableLocation(boolean partitioned, Lo } } - @Test(dataProvider = "locationPatternsDataProvider") - public void testBasicOperationsWithProvidedTableLocationNonCTAS(boolean partitioned, LocationPattern locationPattern) + @Test + public void testBasicOperationsWithProvidedTableLocationNonCTAS() + { + for (LocationPattern locationPattern : LocationPattern.values()) { + testBasicOperationsWithProvidedTableLocationNonCTAS(false, locationPattern); + testBasicOperationsWithProvidedTableLocationNonCTAS(true, locationPattern); + } + } + + private void testBasicOperationsWithProvidedTableLocationNonCTAS(boolean partitioned, LocationPattern locationPattern) { // this test needed, because execution path for CTAS and simple create is different String tableName = "test_basic_operations_" + randomNameSuffix(); @@ -177,8 +184,7 @@ public void testBasicOperationsWithProvidedTableLocationNonCTAS(boolean partitio } @Override // Row-level modifications are not supported for Hive tables - @Test(dataProvider = "locationPatternsDataProvider") - public void testBasicOperationsWithProvidedSchemaLocation(boolean partitioned, LocationPattern locationPattern) + protected void testBasicOperationsWithProvidedSchemaLocation(boolean partitioned, LocationPattern locationPattern) { String schemaName = "test_basic_operations_schema_" + randomNameSuffix(); String schemaLocation = locationPattern.locationForSchema(bucketName, schemaName); @@ -210,14 +216,13 @@ public void testBasicOperationsWithProvidedSchemaLocation(boolean partitioned, L } @Override - @Test(dataProvider = "locationPatternsDataProvider") public void testMergeWithProvidedTableLocation(boolean partitioned, LocationPattern locationPattern) { // Row-level modifications are not supported for Hive tables } @Override - public void testOptimizeWithProvidedTableLocation(boolean partitioned, LocationPattern locationPattern) + protected void testOptimizeWithProvidedTableLocation(boolean partitioned, LocationPattern locationPattern) { if (locationPattern == DOUBLE_SLASH || locationPattern == TRIPLE_SLASH || locationPattern == TWO_TRAILING_SLASHES) { assertThatThrownBy(() -> super.testOptimizeWithProvidedTableLocation(partitioned, locationPattern)) @@ -228,8 +233,16 @@ public void testOptimizeWithProvidedTableLocation(boolean partitioned, LocationP super.testOptimizeWithProvidedTableLocation(partitioned, locationPattern); } - @Test(dataProvider = "locationPatternsDataProvider") - public void testAnalyzeWithProvidedTableLocation(boolean partitioned, LocationPattern locationPattern) + @Test + public void testAnalyzeWithProvidedTableLocation() + { + for (LocationPattern locationPattern : LocationPattern.values()) { + testAnalyzeWithProvidedTableLocation(false, locationPattern); + testAnalyzeWithProvidedTableLocation(true, locationPattern); + } + } + + private void testAnalyzeWithProvidedTableLocation(boolean partitioned, LocationPattern locationPattern) { String tableName = "test_analyze_" + randomNameSuffix(); String location = locationPattern.locationForTable(bucketName, schemaName, tableName); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplit.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplit.java index 2195a07d35cfe..947d52b31e53e 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplit.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplit.java @@ -73,7 +73,7 @@ public void testJsonRoundTrip() OptionalInt.empty(), OptionalInt.empty(), true, - TableToPartitionMapping.mapColumnsByIndex(ImmutableMap.of(1, new HiveTypeName("string"))), + ImmutableMap.of(1, new HiveTypeName("string")), Optional.of(new HiveSplit.BucketConversion( BUCKETING_V1, 32, @@ -93,8 +93,7 @@ public void testJsonRoundTrip() assertThat(actual.getEstimatedFileSize()).isEqualTo(expected.getEstimatedFileSize()); assertThat(actual.getSchema()).isEqualTo(expected.getSchema()); assertThat(actual.getPartitionKeys()).isEqualTo(expected.getPartitionKeys()); - assertThat(actual.getTableToPartitionMapping().getPartitionColumnCoercions()).isEqualTo(expected.getTableToPartitionMapping().getPartitionColumnCoercions()); - assertThat(actual.getTableToPartitionMapping().getTableToPartitionColumns()).isEqualTo(expected.getTableToPartitionMapping().getTableToPartitionColumns()); + assertThat(actual.getHiveColumnCoercions()).isEqualTo(expected.getHiveColumnCoercions()); assertThat(actual.getBucketConversion()).isEqualTo(expected.getBucketConversion()); assertThat(actual.isForceLocalScheduling()).isEqualTo(expected.isForceLocalScheduling()); assertThat(actual.getAcidInfo().get()).isEqualTo(expected.getAcidInfo().get()); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplitSource.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplitSource.java index 159b977920161..acd4791ee7139 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplitSource.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplitSource.java @@ -331,7 +331,7 @@ private TestSplit(int id, OptionalInt bucketNumber, DataSize fileSize, BooleanSu bucketNumber, true, false, - TableToPartitionMapping.empty(), + ImmutableMap.of(), Optional.empty(), Optional.empty(), Optional.empty(), diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestNodeLocalDynamicSplitPruning.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestNodeLocalDynamicSplitPruning.java index d67a1fa1b0373..52c13e20554a7 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestNodeLocalDynamicSplitPruning.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestNodeLocalDynamicSplitPruning.java @@ -134,7 +134,7 @@ private static ConnectorPageSource createTestingPageSource(HiveTransactionHandle OptionalInt.of(1), OptionalInt.of(1), false, - TableToPartitionMapping.empty(), + ImmutableMap.of(), Optional.empty(), Optional.empty(), Optional.empty(), diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java index 50cb6a652300f..5d386113f92d0 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java @@ -563,7 +563,7 @@ public ConnectorPageSource newPageSource(FileFormatDataSourceStats stats, Connec partitionKeys, columns, ImmutableList.of(), - TableToPartitionMapping.empty(), + ImmutableMap.of(), fileSplit.getPath().toString(), OptionalInt.empty(), fileSplit.getLength(), diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestParquetPageSkipping.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestParquetPageSkipping.java index c3e41ac1881d3..97a89cd27a785 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestParquetPageSkipping.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestParquetPageSkipping.java @@ -17,10 +17,14 @@ import com.google.common.io.Resources; import io.trino.Session; import io.trino.execution.QueryStats; +import io.trino.filesystem.Location; +import io.trino.filesystem.TrinoFileSystem; +import io.trino.filesystem.TrinoFileSystemFactory; import io.trino.operator.OperatorStats; import io.trino.spi.QueryId; import io.trino.spi.metrics.Count; import io.trino.spi.metrics.Metric; +import io.trino.spi.security.ConnectorIdentity; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.DistributedQueryRunner; import io.trino.testing.MaterializedResult; @@ -29,12 +33,15 @@ import org.intellij.lang.annotations.Language; import org.junit.jupiter.api.Test; -import java.io.File; -import java.net.URISyntaxException; +import java.io.IOException; +import java.io.OutputStream; +import java.net.URL; import java.util.Map; +import java.util.UUID; import static com.google.common.collect.MoreCollectors.onlyElement; import static io.trino.parquet.reader.ParquetReader.COLUMN_INDEX_ROWS_FILTERED; +import static io.trino.plugin.hive.TestingHiveUtils.getConnectorService; import static io.trino.testing.QueryAssertions.assertEqualsIgnoreOrder; import static io.trino.testing.TestingNames.randomNameSuffix; import static java.lang.String.format; @@ -43,24 +50,32 @@ public class TestParquetPageSkipping extends AbstractTestQueryFramework { + private TrinoFileSystem fileSystem; + @Override protected QueryRunner createQueryRunner() throws Exception { - return HiveQueryRunner.builder() + DistributedQueryRunner queryRunner = HiveQueryRunner.builder() .setHiveProperties( ImmutableMap.of( "parquet.use-column-index", "true", "parquet.max-buffer-size", "1MB")) .build(); + + fileSystem = getConnectorService(queryRunner, TrinoFileSystemFactory.class) + .create(ConnectorIdentity.ofUser("test")); + + return queryRunner; } @Test public void testRowGroupPruningFromPageIndexes() throws Exception { + Location dataFile = copyInDataFile("parquet_page_skipping/orders_sorted_by_totalprice/data.parquet"); + String tableName = "test_row_group_pruning_" + randomNameSuffix(); - File parquetFile = new File(Resources.getResource("parquet_page_skipping/orders_sorted_by_totalprice").toURI()); assertUpdate( """ CREATE TABLE %s ( @@ -77,7 +92,7 @@ comment varchar(79), WITH ( format = 'PARQUET', external_location = '%s') - """.formatted(tableName, parquetFile.getAbsolutePath())); + """.formatted(tableName, dataFile.parentDirectory())); int rowCount = assertColumnIndexResults("SELECT * FROM " + tableName + " WHERE totalprice BETWEEN 100000 AND 131280 AND clerk = 'Clerk#000000624'"); assertThat(rowCount).isGreaterThan(0); @@ -90,14 +105,14 @@ comment varchar(79), @Test public void testPageSkippingWithNonSequentialOffsets() - throws URISyntaxException + throws IOException { + Location dataFile = copyInDataFile("parquet_page_skipping/random/data.parquet"); String tableName = "test_random_" + randomNameSuffix(); - File parquetFile = new File(Resources.getResource("parquet_page_skipping/random").toURI()); assertUpdate(format( "CREATE TABLE %s (col double) WITH (format = 'PARQUET', external_location = '%s')", tableName, - parquetFile.getAbsolutePath())); + dataFile.parentDirectory())); // These queries select a subset of pages which are stored at non-sequential offsets // This reproduces the issue identified in https://github.com/trinodb/trino/issues/9097 for (double i = 0; i < 1; i += 0.1) { @@ -108,17 +123,18 @@ public void testPageSkippingWithNonSequentialOffsets() @Test public void testFilteringOnColumnNameWithDot() - throws URISyntaxException + throws IOException { + Location dataFile = copyInDataFile("parquet_page_skipping/column_name_with_dot/data.parquet"); + String nameInSql = "\"a.dot\""; String tableName = "test_column_name_with_dot_" + randomNameSuffix(); - File parquetFile = new File(Resources.getResource("parquet_page_skipping/column_name_with_dot").toURI()); assertUpdate(format( "CREATE TABLE %s (key varchar(50), %s varchar(50)) WITH (format = 'PARQUET', external_location = '%s')", tableName, nameInSql, - parquetFile.getAbsolutePath())); + dataFile.parentDirectory())); assertQuery("SELECT key FROM " + tableName + " WHERE " + nameInSql + " IS NULL", "VALUES ('null value')"); assertQuery("SELECT key FROM " + tableName + " WHERE " + nameInSql + " = 'abc'", "VALUES ('sample value')"); @@ -128,16 +144,16 @@ public void testFilteringOnColumnNameWithDot() @Test public void testUnsupportedColumnIndex() - throws URISyntaxException + throws IOException { String tableName = "test_unsupported_column_index_" + randomNameSuffix(); // Test for https://github.com/trinodb/trino/issues/16801 - File parquetFile = new File(Resources.getResource("parquet_page_skipping/unsupported_column_index").toURI()); + Location dataFile = copyInDataFile("parquet_page_skipping/unsupported_column_index/data.parquet"); assertUpdate(format( "CREATE TABLE %s (stime timestamp(3), btime timestamp(3), detail varchar) WITH (format = 'PARQUET', external_location = '%s')", tableName, - parquetFile.getAbsolutePath())); + dataFile.parentDirectory())); assertQuery( "SELECT * FROM " + tableName + " WHERE btime >= timestamp '2023-03-27 13:30:00'", @@ -201,15 +217,15 @@ private void testPageSkipping(String sortByColumn, String sortByColumnType, Obje @Test public void testFilteringWithColumnIndex() - throws URISyntaxException + throws IOException { + Location dataFile = copyInDataFile("parquet_page_skipping/lineitem_sorted_by_suppkey/data.parquet"); String tableName = "test_page_filtering_" + randomNameSuffix(); - File parquetFile = new File(Resources.getResource("parquet_page_skipping/lineitem_sorted_by_suppkey").toURI()); assertUpdate(format( "CREATE TABLE %s (suppkey bigint, extendedprice decimal(12, 2), shipmode varchar(10), comment varchar(44)) " + "WITH (format = 'PARQUET', external_location = '%s')", tableName, - parquetFile.getAbsolutePath())); + dataFile.parentDirectory())); verifyFilteringWithColumnIndex("SELECT * FROM " + tableName + " WHERE suppkey = 10"); verifyFilteringWithColumnIndex("SELECT * FROM " + tableName + " WHERE suppkey BETWEEN 25 AND 35"); @@ -337,4 +353,18 @@ private void buildSortedTables(String tableName, String sortByColumnName, String format("INSERT INTO %s SELECT *, ARRAY[rand(), rand(), rand()] FROM tpch.tiny.orders", tableName), 15000); } + + private Location copyInDataFile(String resourceFileName) + throws IOException + { + URL resourceLocation = Resources.getResource(resourceFileName); + + Location tempDir = Location.of("local:///temp_" + UUID.randomUUID()); + fileSystem.createDirectory(tempDir); + Location dataFile = tempDir.appendPath("data.parquet"); + try (OutputStream out = fileSystem.newOutputFile(dataFile).create()) { + Resources.copy(resourceLocation, out); + } + return dataFile; + } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestRegexTable.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestRegexTable.java index 8895127f71a1c..6945670d586f5 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestRegexTable.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestRegexTable.java @@ -14,6 +14,7 @@ package io.trino.plugin.hive; import com.google.common.collect.ImmutableMap; +import io.trino.filesystem.Location; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.MaterializedResult; import io.trino.testing.QueryRunner; @@ -21,12 +22,9 @@ import org.junit.jupiter.api.Test; import java.io.IOException; -import java.nio.file.Path; +import java.util.UUID; -import static com.google.common.io.MoreFiles.deleteRecursively; -import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; import static io.trino.testing.QueryAssertions.assertEqualsIgnoreOrder; -import static java.nio.file.Files.createTempDirectory; public class TestRegexTable extends AbstractTestQueryFramework @@ -44,8 +42,7 @@ protected QueryRunner createQueryRunner() public void testCreateExternalTableWithData() throws IOException { - Path tempDir = createTempDirectory(null); - Path tableLocation = tempDir.resolve("data"); + Location tempDir = Location.of("local:///temp_" + UUID.randomUUID()); // REGEX format is read-only, so create data files using the text file format @Language("SQL") String createTableSql = """ @@ -55,13 +52,15 @@ public void testCreateExternalTableWithData() textfile_field_separator = 'x', external_location = '%s') AS SELECT nationkey, name FROM tpch.tiny.nation - """.formatted(tableLocation.toUri().toASCIIString()); + """.formatted(tempDir); assertUpdate(createTableSql, 25); MaterializedResult expected = computeActual("SELECT nationkey, name FROM tpch.tiny.nation"); MaterializedResult actual = computeActual("SELECT nationkey, name FROM test_regex_data"); assertEqualsIgnoreOrder(actual.getMaterializedRows(), expected.getMaterializedRows()); + MaterializedResult x = computeActual("SELECT \"$path\" FROM test_regex_data"); + // REGEX table over the text file created data createTableSql = """ CREATE TABLE test_regex ( @@ -71,7 +70,7 @@ CREATE TABLE test_regex ( format = 'regex', regex = '(\\d+)x(.+)', external_location = '%s') - """.formatted(tableLocation.toUri().toASCIIString()); + """.formatted(tempDir); assertUpdate(createTableSql); actual = computeActual("SELECT nationkey, name FROM test_regex"); @@ -91,7 +90,7 @@ CREATE TABLE test_regex ( regex = '(\\d+)X(.+)', regex_case_insensitive = true, external_location = '%s') - """.formatted(tableLocation.toUri().toASCIIString()); + """.formatted(tempDir); assertUpdate(createTableSql); actual = computeActual("SELECT nationkey, name FROM test_regex"); assertEqualsIgnoreOrder(actual.getMaterializedRows(), expected.getMaterializedRows()); @@ -106,14 +105,13 @@ CREATE TABLE test_regex ( format = 'regex', regex = '(\\d+)X(.+)', external_location = '%s') - """.formatted(tableLocation.toUri().toASCIIString()); + """.formatted(tempDir); assertUpdate(createTableSql); // when the pattern does not match all columns are null assertQueryReturnsEmptyResult("SELECT nationkey, name FROM test_regex WHERE nationkey IS NOT NULL AND name IS NOT NULL"); assertUpdate("DROP TABLE test_regex"); assertUpdate("DROP TABLE test_regex_data"); - deleteRecursively(tempDir, ALLOW_INSECURE); } @Test diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestTableToPartitionMapping.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestTableToPartitionMapping.java deleted file mode 100644 index 5a6f169e73323..0000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestTableToPartitionMapping.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive; - -import com.google.common.collect.ImmutableMap; -import org.junit.jupiter.api.Test; - -import static io.trino.plugin.hive.TableToPartitionMapping.isIdentityMapping; -import static org.assertj.core.api.Assertions.assertThat; - -public class TestTableToPartitionMapping -{ - @Test - public void testIsOneToOneMapping() - { - assertThat(isIdentityMapping(ImmutableMap.builder() - .put(0, 0) - .put(1, 1) - .put(2, 2) - .put(3, 3) - .buildOrThrow())).isTrue(); - assertThat(isIdentityMapping(ImmutableMap.builder() - .put(0, 0) - .put(1, 1) - .put(2, 2) - .put(3, 3) - .put(5, 5) - .buildOrThrow())).isFalse(); - assertThat(isIdentityMapping(ImmutableMap.builder() - .put(0, 0) - .put(1, 1) - .put(2, 2) - .put(4, 5) - .buildOrThrow())).isFalse(); - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestingHiveConnectorFactory.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestingHiveConnectorFactory.java index ae78ec5fc4a54..b6b5761570d8e 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestingHiveConnectorFactory.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestingHiveConnectorFactory.java @@ -15,17 +15,22 @@ import com.google.common.collect.ImmutableMap; import com.google.inject.Module; -import io.opentelemetry.api.OpenTelemetry; +import io.trino.filesystem.TrinoFileSystemFactory; +import io.trino.filesystem.local.LocalFileSystemFactory; import io.trino.plugin.hive.fs.DirectoryLister; import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig; import io.trino.spi.connector.Connector; import io.trino.spi.connector.ConnectorContext; import io.trino.spi.connector.ConnectorFactory; +import java.nio.file.Path; import java.util.Map; import java.util.Optional; +import static com.google.inject.multibindings.MapBinder.newMapBinder; import static com.google.inject.util.Modules.EMPTY_MODULE; +import static io.airlift.configuration.ConfigBinder.configBinder; import static io.trino.plugin.hive.InternalHiveConnectorFactory.createConnector; import static java.util.Objects.requireNonNull; @@ -33,24 +38,31 @@ public class TestingHiveConnectorFactory implements ConnectorFactory { private final Optional metastore; - private final Optional openTelemetry; private final Module module; private final Optional directoryLister; - public TestingHiveConnectorFactory(HiveMetastore metastore) + public TestingHiveConnectorFactory(Path localFileSystemRootPath) { - this(Optional.of(metastore), Optional.empty(), EMPTY_MODULE, Optional.empty()); + this(localFileSystemRootPath, Optional.empty(), EMPTY_MODULE, Optional.empty()); } + @Deprecated public TestingHiveConnectorFactory( + Path localFileSystemRootPath, Optional metastore, - Optional openTelemetry, Module module, Optional directoryLister) { this.metastore = requireNonNull(metastore, "metastore is null"); - this.openTelemetry = requireNonNull(openTelemetry, "openTelemetry is null"); - this.module = requireNonNull(module, "module is null"); + + localFileSystemRootPath.toFile().mkdirs(); + this.module = binder -> { + binder.install(module); + newMapBinder(binder, String.class, TrinoFileSystemFactory.class) + .addBinding("local").toInstance(new LocalFileSystemFactory(localFileSystemRootPath)); + configBinder(binder).bindConfigDefaults(FileHiveMetastoreConfig.class, config -> config.setCatalogDirectory("local:///")); + }; + this.directoryLister = requireNonNull(directoryLister, "directoryLister is null"); } @@ -63,17 +75,14 @@ public String getName() @Override public Connector create(String catalogName, Map config, ConnectorContext context) { - return createConnector( - catalogName, - ImmutableMap.builder() - .putAll(config) - .put("bootstrap.quiet", "true") - .buildOrThrow(), - context, - module, - metastore, + ImmutableMap.Builder configBuilder = ImmutableMap.builder() + .putAll(config) + .put("bootstrap.quiet", "true"); + if (metastore.isEmpty() && !config.containsKey("hive.metastore")) { + configBuilder.put("hive.metastore", "file"); + } + return createConnector(catalogName, configBuilder.buildOrThrow(), context, module, metastore, Optional.empty(), - openTelemetry, directoryLister); } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestingHivePlugin.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestingHivePlugin.java index 13975b1995b33..64733255a667d 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestingHivePlugin.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestingHivePlugin.java @@ -15,12 +15,12 @@ import com.google.common.collect.ImmutableList; import com.google.inject.Module; -import io.opentelemetry.api.OpenTelemetry; import io.trino.plugin.hive.fs.DirectoryLister; import io.trino.plugin.hive.metastore.HiveMetastore; import io.trino.spi.Plugin; import io.trino.spi.connector.ConnectorFactory; +import java.nio.file.Path; import java.util.Optional; import static com.google.inject.util.Modules.EMPTY_MODULE; @@ -29,25 +29,27 @@ public class TestingHivePlugin implements Plugin { + private final Path localFileSystemRootPath; private final Optional metastore; - private final Optional openTelemetry; private final Module module; private final Optional directoryLister; - public TestingHivePlugin() + public TestingHivePlugin(Path localFileSystemRootPath) { - this(Optional.empty(), Optional.empty(), EMPTY_MODULE, Optional.empty()); + this(localFileSystemRootPath, Optional.empty(), EMPTY_MODULE, Optional.empty()); } - public TestingHivePlugin(HiveMetastore metastore) + @Deprecated + public TestingHivePlugin(Path localFileSystemRootPath, HiveMetastore metastore) { - this(Optional.of(metastore), Optional.empty(), EMPTY_MODULE, Optional.empty()); + this(localFileSystemRootPath, Optional.of(metastore), EMPTY_MODULE, Optional.empty()); } - public TestingHivePlugin(Optional metastore, Optional openTelemetry, Module module, Optional directoryLister) + @Deprecated + public TestingHivePlugin(Path localFileSystemRootPath, Optional metastore, Module module, Optional directoryLister) { + this.localFileSystemRootPath = requireNonNull(localFileSystemRootPath, "localFileSystemRootPath is null"); this.metastore = requireNonNull(metastore, "metastore is null"); - this.openTelemetry = requireNonNull(openTelemetry, "openTelemetry is null"); this.module = requireNonNull(module, "module is null"); this.directoryLister = requireNonNull(directoryLister, "directoryLister is null"); } @@ -55,6 +57,6 @@ public TestingHivePlugin(Optional metastore, Optional getConnectorFactories() { - return ImmutableList.of(new TestingHiveConnectorFactory(metastore, openTelemetry, module, directoryLister)); + return ImmutableList.of(new TestingHiveConnectorFactory(localFileSystemRootPath, metastore, module, directoryLister)); } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestingHiveUtils.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestingHiveUtils.java new file mode 100644 index 0000000000000..768395d856c83 --- /dev/null +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestingHiveUtils.java @@ -0,0 +1,45 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.hive; + +import com.google.inject.Injector; +import com.google.inject.Key; +import io.trino.testing.DistributedQueryRunner; +import io.trino.testing.LocalQueryRunner; +import io.trino.testing.QueryRunner; + +import static io.trino.plugin.hive.HiveQueryRunner.HIVE_CATALOG; + +public final class TestingHiveUtils +{ + private TestingHiveUtils() {} + + public static T getConnectorService(QueryRunner queryRunner, Class clazz) + { + return getConnectorInjector(queryRunner).getInstance(clazz); + } + + public static T getConnectorService(QueryRunner queryRunner, Key key) + { + return getConnectorInjector(queryRunner).getInstance(key); + } + + private static Injector getConnectorInjector(QueryRunner queryRunner) + { + if (queryRunner instanceof DistributedQueryRunner) { + return ((HiveConnector) ((DistributedQueryRunner) queryRunner).getCoordinator().getConnector(HIVE_CATALOG)).getInjector(); + } + return ((HiveConnector) ((LocalQueryRunner) queryRunner).getConnector(HIVE_CATALOG)).getInjector(); + } +} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestingThriftHiveMetastoreBuilder.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestingThriftHiveMetastoreBuilder.java index dccc93a9768ce..e89932a6ac465 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestingThriftHiveMetastoreBuilder.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestingThriftHiveMetastoreBuilder.java @@ -18,6 +18,7 @@ import io.trino.filesystem.TrinoFileSystemFactory; import io.trino.filesystem.hdfs.HdfsFileSystemFactory; import io.trino.plugin.hive.metastore.HiveMetastoreConfig; +import io.trino.plugin.hive.metastore.thrift.MetastoreClientAdapterProvider; import io.trino.plugin.hive.metastore.thrift.TestingTokenAwareMetastoreClientFactory; import io.trino.plugin.hive.metastore.thrift.ThriftHiveMetastoreFactory; import io.trino.plugin.hive.metastore.thrift.ThriftMetastore; @@ -32,6 +33,7 @@ import static io.trino.plugin.base.security.UserNameProvider.SIMPLE_USER_NAME_PROVIDER; import static io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT; import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_STATS; +import static io.trino.plugin.hive.metastore.thrift.TestingTokenAwareMetastoreClientFactory.TIMEOUT; import static java.util.Objects.requireNonNull; import static java.util.concurrent.Executors.newFixedThreadPool; @@ -58,6 +60,14 @@ public TestingThriftHiveMetastoreBuilder metastoreClient(HostAndPort address, Du return this; } + public TestingThriftHiveMetastoreBuilder metastoreClient(HostAndPort address, MetastoreClientAdapterProvider metastoreClientAdapterProvider) + { + requireNonNull(address, "address is null"); + checkState(tokenAwareMetastoreClientFactory == null, "Metastore client already set"); + tokenAwareMetastoreClientFactory = new TestingTokenAwareMetastoreClientFactory(HiveTestUtils.SOCKS_PROXY, address, TIMEOUT, metastoreClientAdapterProvider); + return this; + } + public TestingThriftHiveMetastoreBuilder metastoreClient(HostAndPort address) { requireNonNull(address, "address is null"); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestDateCoercer.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestDateCoercer.java index 581fb2dbb2596..37a86dad7803f 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestDateCoercer.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestDateCoercer.java @@ -14,8 +14,8 @@ package io.trino.plugin.hive.coercions; import io.trino.plugin.hive.coercions.CoercionUtils.CoercionContext; +import io.trino.spi.TrinoException; import io.trino.spi.block.Block; -import io.trino.spi.type.DateType; import io.trino.spi.type.Type; import org.junit.jupiter.api.Test; @@ -27,7 +27,10 @@ import static io.trino.plugin.hive.coercions.CoercionUtils.createCoercer; import static io.trino.spi.predicate.Utils.blockToNativeValue; import static io.trino.spi.predicate.Utils.nativeValueToBlock; +import static io.trino.spi.type.DateType.DATE; +import static io.trino.spi.type.VarcharType.VARCHAR; import static io.trino.spi.type.VarcharType.createUnboundedVarcharType; +import static io.trino.spi.type.VarcharType.createVarcharType; import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -66,6 +69,28 @@ public void testThrowsExceptionWhenDateIsTooOld() .hasMessageMatching(".*Coercion on historical dates is not supported.*"); } + @Test + public void testDateToVarchar() + { + assertDateToVarcharCoercion(createUnboundedVarcharType(), LocalDate.parse("2023-01-10"), "2023-01-10"); + assertDateToVarcharCoercion(createUnboundedVarcharType(), LocalDate.parse("+10000-04-25"), "+10000-04-25"); + } + + @Test + public void testDateToLowerBoundedVarchar() + { + assertThatThrownBy(() -> assertDateToVarcharCoercion(createVarcharType(8), LocalDate.parse("2023-10-23"), "2023-10-23")) + .isInstanceOf(TrinoException.class) + .hasMessageContaining("Varchar representation of '2023-10-23' exceeds varchar(8) bounds"); + } + + @Test + public void testHistoricalDateToVarchar() + { + assertThatThrownBy(() -> assertDateToVarcharCoercion(createUnboundedVarcharType(), LocalDate.parse("1899-12-31"), null)) + .hasMessageMatching(".*Coercion on historical dates is not supported.*"); + } + private void assertVarcharToDateCoercion(Type fromType, String date) { assertVarcharToDateCoercion(fromType, date, fromDateToEpochDate(date)); @@ -73,12 +98,20 @@ private void assertVarcharToDateCoercion(Type fromType, String date) private void assertVarcharToDateCoercion(Type fromType, String date, Long expected) { - Block coercedValue = createCoercer(TESTING_TYPE_MANAGER, toHiveType(fromType), toHiveType(DateType.DATE), new CoercionContext(NANOSECONDS, false)).orElseThrow() + Block coercedValue = createCoercer(TESTING_TYPE_MANAGER, toHiveType(fromType), toHiveType(DATE), new CoercionContext(NANOSECONDS, false)).orElseThrow() .apply(nativeValueToBlock(fromType, utf8Slice(date))); - assertThat(blockToNativeValue(DateType.DATE, coercedValue)) + assertThat(blockToNativeValue(DATE, coercedValue)) .isEqualTo(expected); } + private void assertDateToVarcharCoercion(Type toType, LocalDate date, String expected) + { + Block coercedValue = createCoercer(TESTING_TYPE_MANAGER, toHiveType(DATE), toHiveType(toType), new CoercionContext(NANOSECONDS, false)).orElseThrow() + .apply(nativeValueToBlock(DATE, date.toEpochDay())); + assertThat(blockToNativeValue(VARCHAR, coercedValue)) + .isEqualTo(utf8Slice(expected)); + } + private long fromDateToEpochDate(String dateString) { LocalDate date = LocalDate.parse(dateString); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestDecimalCoercers.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestDecimalCoercers.java index dadfea248de9d..dc43dcd85c58f 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestDecimalCoercers.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestDecimalCoercers.java @@ -16,15 +16,17 @@ import io.trino.spi.block.Block; import io.trino.spi.type.DecimalParseResult; import io.trino.spi.type.Decimals; +import io.trino.spi.type.Int128; import io.trino.spi.type.Type; import org.junit.jupiter.api.Test; -import static io.trino.plugin.hive.HiveTimestampPrecision.NANOSECONDS; +import static io.trino.plugin.hive.HiveTimestampPrecision.DEFAULT_PRECISION; import static io.trino.plugin.hive.HiveType.toHiveType; import static io.trino.plugin.hive.coercions.CoercionUtils.createCoercer; import static io.trino.spi.predicate.Utils.blockToNativeValue; import static io.trino.spi.predicate.Utils.nativeValueToBlock; import static io.trino.spi.type.BigintType.BIGINT; +import static io.trino.spi.type.DecimalType.createDecimalType; import static io.trino.spi.type.IntegerType.INTEGER; import static io.trino.spi.type.SmallintType.SMALLINT; import static io.trino.spi.type.TinyintType.TINYINT; @@ -73,12 +75,89 @@ private void testDecimalToIntCoercion(String decimalString, Type coercedType, Ob else { assertThat(parseResult.getType().isShort()).isTrue(); } - assertDecimalToIntCoercion(parseResult.getType(), parseResult.getObject(), coercedType, expectedValue); + assertCoercion(parseResult.getType(), parseResult.getObject(), coercedType, expectedValue); } - private void assertDecimalToIntCoercion(Type fromType, Object valueToBeCoerced, Type toType, Object expectedValue) + @Test + public void testTinyintToDecimalCoercion() + { + // Short decimal coercion + assertCoercion(TINYINT, 12L, createDecimalType(10), 12L); + assertCoercion(TINYINT, 12L, createDecimalType(10, 2), 1_200L); + assertCoercion(TINYINT, 12L, createDecimalType(10, 5), 1_200_000L); + // Long decimal coercion + assertCoercion(TINYINT, 0L, createDecimalType(), Int128.ZERO); + assertCoercion(TINYINT, 0L, createDecimalType(), Int128.ZERO); + assertCoercion(TINYINT, 12L, createDecimalType(), Int128.valueOf(12)); + assertCoercion(TINYINT, -12L, createDecimalType(), Int128.valueOf(-12)); + assertCoercion(TINYINT, (long) Byte.MAX_VALUE, createDecimalType(), Int128.valueOf(Byte.MAX_VALUE)); + assertCoercion(TINYINT, (long) Byte.MIN_VALUE, createDecimalType(), Int128.valueOf(Byte.MIN_VALUE)); + assertCoercion(TINYINT, 12L, createDecimalType(20, 10), Int128.valueOf("120000000000")); + // Coercion overflow + assertCoercion(TINYINT, 42L, createDecimalType(6, 5), null); + } + + @Test + public void testSmallintToDecimalCoercion() + { + // Short decimal coercion + assertCoercion(SMALLINT, 12L, createDecimalType(10), 12L); + assertCoercion(SMALLINT, 12L, createDecimalType(10, 2), 1_200L); + assertCoercion(SMALLINT, 12L, createDecimalType(10, 5), 1_200_000L); + // Long decimal coercion + assertCoercion(SMALLINT, 12L, createDecimalType(20, 10), Int128.valueOf("120000000000")); + assertCoercion(SMALLINT, 0L, createDecimalType(), Int128.ZERO); + assertCoercion(SMALLINT, 128L, createDecimalType(), Int128.valueOf(128)); + assertCoercion(SMALLINT, -128L, createDecimalType(), Int128.valueOf(-128)); + assertCoercion(SMALLINT, (long) Short.MAX_VALUE, createDecimalType(), Int128.valueOf(Short.MAX_VALUE)); + assertCoercion(SMALLINT, (long) Short.MIN_VALUE, createDecimalType(), Int128.valueOf(Short.MIN_VALUE)); + // Coercion overflow + assertCoercion(SMALLINT, 128L, createDecimalType(7, 5), null); + assertCoercion(SMALLINT, 128L, createDecimalType(20, 18), null); + } + + @Test + public void testIntToDecimalCoercion() + { + // Short decimal coercion + assertCoercion(INTEGER, 123_456L, createDecimalType(10), 123_456L); + assertCoercion(INTEGER, 123_456L, createDecimalType(10, 3), 123_456_000L); + // Long decimal coercion + assertCoercion(INTEGER, 0L, createDecimalType(), Int128.ZERO); + assertCoercion(INTEGER, 128L, createDecimalType(), Int128.valueOf(128)); + assertCoercion(INTEGER, -128L, createDecimalType(), Int128.valueOf(-128)); + assertCoercion(INTEGER, (long) Integer.MAX_VALUE, createDecimalType(), Int128.valueOf(Integer.MAX_VALUE)); + assertCoercion(INTEGER, (long) Integer.MIN_VALUE, createDecimalType(), Int128.valueOf(Integer.MIN_VALUE)); + assertCoercion(INTEGER, 123_456L, createDecimalType(20, 10), Int128.valueOf("1234560000000000")); + // Coercion overflow + assertCoercion(INTEGER, 123_456_789L, createDecimalType(10, 5), null); + assertCoercion(INTEGER, 123_456_789L, createDecimalType(20, 13), null); + } + + @Test + public void testBigintToDecimalCoercion() + { + // Short decimal coercion + assertCoercion(BIGINT, 0L, createDecimalType(10), 0L); + assertCoercion(BIGINT, 123_456_789L, createDecimalType(12), 123_456_789L); + assertCoercion(BIGINT, 123_456_789L, createDecimalType(12, 3), 123_456_789_000L); + // Long decimal coercion + assertCoercion(BIGINT, 0L, createDecimalType(), Int128.ZERO); + assertCoercion(BIGINT, 128L, createDecimalType(), Int128.valueOf(128)); + assertCoercion(BIGINT, -128L, createDecimalType(), Int128.valueOf(-128)); + assertCoercion(BIGINT, Long.MAX_VALUE, createDecimalType(), Int128.valueOf(Long.MAX_VALUE)); + assertCoercion(BIGINT, Long.MIN_VALUE, createDecimalType(), Int128.valueOf(Long.MIN_VALUE)); + assertCoercion(BIGINT, 123_456_789L, createDecimalType(20, 5), Int128.valueOf("12345678900000")); + assertCoercion(BIGINT, 123_456_789L, createDecimalType(20, 10), Int128.valueOf("1234567890000000000")); + assertCoercion(BIGINT, Long.MAX_VALUE, createDecimalType(38, 2), Int128.valueOf("922337203685477580700")); + // Coercion overflow + assertCoercion(BIGINT, 123_456_789L, createDecimalType(10, 5), null); + assertCoercion(BIGINT, Long.MAX_VALUE, createDecimalType(25, 8), null); + } + + private static void assertCoercion(Type fromType, Object valueToBeCoerced, Type toType, Object expectedValue) { - Block coercedValue = createCoercer(TESTING_TYPE_MANAGER, toHiveType(fromType), toHiveType(toType), new CoercionUtils.CoercionContext(NANOSECONDS, false)).orElseThrow() + Block coercedValue = createCoercer(TESTING_TYPE_MANAGER, toHiveType(fromType), toHiveType(toType), new CoercionUtils.CoercionContext(DEFAULT_PRECISION, true)).orElseThrow() .apply(nativeValueToBlock(fromType, valueToBeCoerced)); assertThat(blockToNativeValue(toType, coercedValue)) .isEqualTo(expectedValue); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestDoubleToVarcharCoercions.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestDoubleToVarcharCoercions.java index 526c085e463e2..61d94034222dd 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestDoubleToVarcharCoercions.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestDoubleToVarcharCoercions.java @@ -18,10 +18,7 @@ import io.trino.spi.TrinoException; import io.trino.spi.block.Block; import io.trino.spi.type.Type; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.stream.Stream; +import org.junit.jupiter.api.Test; import static io.trino.plugin.hive.HiveTimestampPrecision.DEFAULT_PRECISION; import static io.trino.plugin.hive.HiveType.toHiveType; @@ -31,23 +28,50 @@ import static io.trino.spi.type.DoubleType.DOUBLE; import static io.trino.spi.type.VarcharType.createUnboundedVarcharType; import static io.trino.spi.type.VarcharType.createVarcharType; -import static io.trino.testing.DataProviders.cartesianProduct; -import static io.trino.testing.DataProviders.toDataProvider; -import static io.trino.testing.DataProviders.trueFalse; import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; public class TestDoubleToVarcharCoercions { - @Test(dataProvider = "doubleValues") - public void testDoubleToVarcharCoercions(Double doubleValue, boolean treatNaNAsNull) + @Test + public void testDoubleToVarcharCoercions() + { + testDoubleToVarcharCoercions(Double.NEGATIVE_INFINITY, true); + testDoubleToVarcharCoercions(Double.MIN_VALUE, true); + testDoubleToVarcharCoercions(Double.MAX_VALUE, true); + testDoubleToVarcharCoercions(Double.POSITIVE_INFINITY, true); + testDoubleToVarcharCoercions(Double.parseDouble("123456789.12345678"), true); + + testDoubleToVarcharCoercions(Double.NEGATIVE_INFINITY, false); + testDoubleToVarcharCoercions(Double.MIN_VALUE, false); + testDoubleToVarcharCoercions(Double.MAX_VALUE, false); + testDoubleToVarcharCoercions(Double.POSITIVE_INFINITY, false); + testDoubleToVarcharCoercions(Double.parseDouble("123456789.12345678"), false); + } + + private void testDoubleToVarcharCoercions(Double doubleValue, boolean treatNaNAsNull) { assertCoercions(DOUBLE, doubleValue, createUnboundedVarcharType(), Slices.utf8Slice(doubleValue.toString()), treatNaNAsNull); } - @Test(dataProvider = "doubleValues") - public void testDoubleSmallerVarcharCoercions(Double doubleValue, boolean treatNaNAsNull) + @Test + public void testDoubleSmallerVarcharCoercions() + { + testDoubleSmallerVarcharCoercions(Double.NEGATIVE_INFINITY, true); + testDoubleSmallerVarcharCoercions(Double.MIN_VALUE, true); + testDoubleSmallerVarcharCoercions(Double.MAX_VALUE, true); + testDoubleSmallerVarcharCoercions(Double.POSITIVE_INFINITY, true); + testDoubleSmallerVarcharCoercions(Double.parseDouble("123456789.12345678"), true); + + testDoubleSmallerVarcharCoercions(Double.NEGATIVE_INFINITY, false); + testDoubleSmallerVarcharCoercions(Double.MIN_VALUE, false); + testDoubleSmallerVarcharCoercions(Double.MAX_VALUE, false); + testDoubleSmallerVarcharCoercions(Double.POSITIVE_INFINITY, false); + testDoubleSmallerVarcharCoercions(Double.parseDouble("123456789.12345678"), false); + } + + private void testDoubleSmallerVarcharCoercions(Double doubleValue, boolean treatNaNAsNull) { assertThatThrownBy(() -> assertCoercions(DOUBLE, doubleValue, createVarcharType(1), doubleValue.toString(), treatNaNAsNull)) .isInstanceOf(TrinoException.class) @@ -65,20 +89,6 @@ public void testNaNToVarcharCoercions() .hasMessageContaining("Varchar representation of NaN exceeds varchar(1) bounds"); } - @DataProvider - public Object[][] doubleValues() - { - return cartesianProduct( - Stream.of( - Double.NEGATIVE_INFINITY, - Double.MIN_VALUE, - Double.MAX_VALUE, - Double.POSITIVE_INFINITY, - Double.parseDouble("123456789.12345678")) - .collect(toDataProvider()), - trueFalse()); - } - public static void assertCoercions(Type fromType, Object valueToBeCoerced, Type toType, Object expectedValue, boolean treatNaNAsNull) { Block coercedValue = createCoercer(TESTING_TYPE_MANAGER, toHiveType(fromType), toHiveType(toType), new CoercionContext(DEFAULT_PRECISION, treatNaNAsNull)).orElseThrow() diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/fs/BaseCachingDirectoryListerTest.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/fs/BaseCachingDirectoryListerTest.java index f305d7a5e5146..7090236230076 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/fs/BaseCachingDirectoryListerTest.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/fs/BaseCachingDirectoryListerTest.java @@ -17,33 +17,31 @@ import com.google.common.collect.ImmutableMap; import io.trino.filesystem.Location; import io.trino.plugin.hive.HiveQueryRunner; +import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.plugin.hive.metastore.PrincipalPrivileges; import io.trino.plugin.hive.metastore.Table; -import io.trino.plugin.hive.metastore.file.FileHiveMetastore; import io.trino.testing.AbstractTestQueryFramework; +import io.trino.testing.DistributedQueryRunner; import io.trino.testing.MaterializedRow; import io.trino.testing.QueryRunner; import org.junit.jupiter.api.Test; -import java.nio.file.Path; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.Optional; -import static com.google.common.io.MoreFiles.deleteRecursively; -import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; import static io.trino.plugin.hive.HiveQueryRunner.TPCH_SCHEMA; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; +import static io.trino.plugin.hive.TestingHiveUtils.getConnectorService; import static java.lang.String.format; -import static java.nio.file.Files.createTempDirectory; import static org.assertj.core.api.Assertions.assertThat; public abstract class BaseCachingDirectoryListerTest extends AbstractTestQueryFramework { private C directoryLister; - private FileHiveMetastore fileHiveMetastore; + private HiveMetastore metastore; @Override protected QueryRunner createQueryRunner() @@ -55,14 +53,16 @@ protected QueryRunner createQueryRunner() protected QueryRunner createQueryRunner(Map properties) throws Exception { - Path temporaryMetastoreDirectory = createTempDirectory(null); - closeAfterClass(() -> deleteRecursively(temporaryMetastoreDirectory, ALLOW_INSECURE)); directoryLister = createDirectoryLister(); - return HiveQueryRunner.builder() + DistributedQueryRunner queryRunner = HiveQueryRunner.builder() .setHiveProperties(properties) - .setMetastore(distributedQueryRunner -> fileHiveMetastore = createTestingFileHiveMetastore(temporaryMetastoreDirectory.toFile())) .setDirectoryLister(directoryLister) .build(); + + metastore = getConnectorService(queryRunner, HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); + + return queryRunner; } protected abstract C createDirectoryLister(); @@ -335,17 +335,17 @@ public void testDropPartitionedTable() protected Optional
getTable(String schemaName, String tableName) { - return fileHiveMetastore.getTable(schemaName, tableName); + return metastore.getTable(schemaName, tableName); } protected void createTable(Table table, PrincipalPrivileges principalPrivileges) { - fileHiveMetastore.createTable(table, principalPrivileges); + metastore.createTable(table, principalPrivileges); } protected void dropTable(String schemaName, String tableName, boolean deleteData) { - fileHiveMetastore.dropTable(schemaName, tableName, deleteData); + metastore.dropTable(schemaName, tableName, deleteData); } protected String getTableLocation(String schemaName, String tableName) @@ -360,7 +360,7 @@ protected String getPartitionLocation(String schemaName, String tableName, List< Table table = getTable(schemaName, tableName) .orElseThrow(() -> new NoSuchElementException(format("The table %s.%s could not be found", schemaName, tableName))); - return fileHiveMetastore.getPartition(table, partitionValues) + return metastore.getPartition(table, partitionValues) .map(partition -> partition.getStorage().getLocation()) .orElseThrow(() -> new NoSuchElementException(format("The partition %s from the table %s.%s could not be found", partitionValues, schemaName, tableName))); } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/AbstractTestHiveMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/AbstractTestHiveMetastore.java index 82460db7532c9..f54d8da54c1f9 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/AbstractTestHiveMetastore.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/AbstractTestHiveMetastore.java @@ -15,6 +15,7 @@ import io.trino.plugin.hive.SchemaAlreadyExistsException; import io.trino.plugin.hive.TableAlreadyExistsException; +import io.trino.spi.connector.TableNotFoundException; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.parallel.Execution; @@ -30,6 +31,7 @@ import static io.trino.plugin.hive.TableType.EXTERNAL_TABLE; import static io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES; import static io.trino.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat; +import static io.trino.testing.TestingNames.randomNameSuffix; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; @@ -45,6 +47,11 @@ public void setMetastore(HiveMetastore metastore) this.metastore = metastore; } + protected HiveMetastore getMetastore() + { + return metastore; + } + @Test void testCreateDatabase() { @@ -97,4 +104,20 @@ void testCreateTable() metastore.dropTable(databaseName, tableName, false); metastore.dropDatabase(databaseName, false); } + + @Test + public void testDropNotExistingTable() + { + String databaseName = "test_database_" + randomNameSuffix(); + Database.Builder database = Database.builder() + .setDatabaseName(databaseName) + .setOwnerName(Optional.empty()) + .setOwnerType(Optional.empty()); + getMetastore().createDatabase(database.build()); + + assertThatThrownBy(() -> getMetastore().dropTable(databaseName, "not_existing", false)) + .isInstanceOf(TableNotFoundException.class); + + getMetastore().dropDatabase(databaseName, false); + } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/CountingAccessHiveMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/CountingAccessHiveMetastore.java deleted file mode 100644 index a1737c3ad01f1..0000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/CountingAccessHiveMetastore.java +++ /dev/null @@ -1,431 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore; - -import com.google.common.collect.ConcurrentHashMultiset; -import com.google.common.collect.ImmutableMultiset; -import com.google.common.collect.Multiset; -import com.google.errorprone.annotations.ThreadSafe; -import io.trino.plugin.hive.HiveColumnStatisticType; -import io.trino.plugin.hive.HiveType; -import io.trino.plugin.hive.PartitionStatistics; -import io.trino.plugin.hive.acid.AcidTransaction; -import io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege; -import io.trino.spi.connector.RelationType; -import io.trino.spi.connector.SchemaTableName; -import io.trino.spi.function.LanguageFunction; -import io.trino.spi.predicate.TupleDomain; -import io.trino.spi.security.RoleGrant; -import io.trino.spi.type.Type; - -import java.util.Collection; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Set; -import java.util.function.Function; - -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_ALL_RELATION_TYPES; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_ALL_TABLES; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_ALL_VIEWS; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_RELATION_TYPES_FROM_DATABASE; - -@ThreadSafe -public class CountingAccessHiveMetastore - implements HiveMetastore -{ - public enum Method - { - CREATE_DATABASE, - DROP_DATABASE, - CREATE_TABLE, - GET_ALL_DATABASES, - GET_DATABASE, - GET_TABLE, - GET_ALL_TABLES, - GET_ALL_TABLES_FROM_DATABASE, - GET_RELATION_TYPES_FROM_DATABASE, - GET_ALL_RELATION_TYPES, - GET_TABLES_WITH_PARAMETER, - GET_TABLE_STATISTICS, - GET_ALL_VIEWS, - GET_ALL_VIEWS_FROM_DATABASE, - UPDATE_TABLE_STATISTICS, - ADD_PARTITIONS, - GET_PARTITION_NAMES_BY_FILTER, - GET_PARTITIONS_BY_NAMES, - GET_PARTITION, - GET_PARTITION_STATISTICS, - UPDATE_PARTITION_STATISTICS, - REPLACE_TABLE, - DROP_TABLE, - } - - private final HiveMetastore delegate; - private final ConcurrentHashMultiset methodInvocations = ConcurrentHashMultiset.create(); - - public CountingAccessHiveMetastore(HiveMetastore delegate) - { - this.delegate = delegate; - } - - public Multiset getMethodInvocations() - { - return ImmutableMultiset.copyOf(methodInvocations); - } - - public void resetCounters() - { - methodInvocations.clear(); - } - - @Override - public Optional
getTable(String databaseName, String tableName) - { - methodInvocations.add(Method.GET_TABLE); - return delegate.getTable(databaseName, tableName); - } - - @Override - public Set getSupportedColumnStatistics(Type type) - { - // No need to count that, since it's a pure local operation. - return delegate.getSupportedColumnStatistics(type); - } - - @Override - public List getAllDatabases() - { - methodInvocations.add(Method.GET_ALL_DATABASES); - return delegate.getAllDatabases(); - } - - @Override - public Optional getDatabase(String databaseName) - { - methodInvocations.add(Method.GET_DATABASE); - return delegate.getDatabase(databaseName); - } - - @Override - public List getTablesWithParameter(String databaseName, String parameterKey, String parameterValue) - { - methodInvocations.add(Method.GET_TABLES_WITH_PARAMETER); - return delegate.getTablesWithParameter(databaseName, parameterKey, parameterValue); - } - - @Override - public List getAllViews(String databaseName) - { - methodInvocations.add(Method.GET_ALL_VIEWS_FROM_DATABASE); - return delegate.getAllViews(databaseName); - } - - @Override - public Optional> getAllViews() - { - Optional> allViews = delegate.getAllViews(); - if (allViews.isPresent()) { - methodInvocations.add(GET_ALL_VIEWS); - } - return allViews; - } - - @Override - public void createDatabase(Database database) - { - methodInvocations.add(Method.CREATE_DATABASE); - delegate.createDatabase(database); - } - - @Override - public void dropDatabase(String databaseName, boolean deleteData) - { - methodInvocations.add(Method.DROP_DATABASE); - delegate.dropDatabase(databaseName, deleteData); - } - - @Override - public void renameDatabase(String databaseName, String newDatabaseName) - { - throw new UnsupportedOperationException(); - } - - @Override - public void setDatabaseOwner(String databaseName, HivePrincipal principal) - { - throw new UnsupportedOperationException(); - } - - @Override - public void createTable(Table table, PrincipalPrivileges principalPrivileges) - { - methodInvocations.add(Method.CREATE_TABLE); - delegate.createTable(table, principalPrivileges); - } - - @Override - public void dropTable(String databaseName, String tableName, boolean deleteData) - { - methodInvocations.add(Method.DROP_TABLE); - delegate.dropTable(databaseName, tableName, deleteData); - } - - @Override - public void replaceTable(String databaseName, String tableName, Table newTable, PrincipalPrivileges principalPrivileges) - { - methodInvocations.add(Method.REPLACE_TABLE); - delegate.replaceTable(databaseName, tableName, newTable, principalPrivileges); - } - - @Override - public void renameTable(String databaseName, String tableName, String newDatabaseName, String newTableName) - { - throw new UnsupportedOperationException(); - } - - @Override - public void commentTable(String databaseName, String tableName, Optional comment) - { - throw new UnsupportedOperationException(); - } - - @Override - public void setTableOwner(String databaseName, String tableName, HivePrincipal principal) - { - throw new UnsupportedOperationException(); - } - - @Override - public void commentColumn(String databaseName, String tableName, String columnName, Optional comment) - { - throw new UnsupportedOperationException(); - } - - @Override - public void addColumn(String databaseName, String tableName, String columnName, HiveType columnType, String columnComment) - { - throw new UnsupportedOperationException(); - } - - @Override - public void renameColumn(String databaseName, String tableName, String oldColumnName, String newColumnName) - { - throw new UnsupportedOperationException(); - } - - @Override - public void dropColumn(String databaseName, String tableName, String columnName) - { - throw new UnsupportedOperationException(); - } - - @Override - public Optional getPartition(Table table, List partitionValues) - { - methodInvocations.add(Method.GET_PARTITION); - return delegate.getPartition(table, partitionValues); - } - - @Override - public Optional> getPartitionNamesByFilter(String databaseName, - String tableName, - List columnNames, - TupleDomain partitionKeysFilter) - { - methodInvocations.add(Method.GET_PARTITION_NAMES_BY_FILTER); - return delegate.getPartitionNamesByFilter(databaseName, tableName, columnNames, partitionKeysFilter); - } - - @Override - public Map> getPartitionsByNames(Table table, List partitionNames) - { - methodInvocations.add(Method.GET_PARTITIONS_BY_NAMES); - return delegate.getPartitionsByNames(table, partitionNames); - } - - @Override - public void addPartitions(String databaseName, String tableName, List partitions) - { - methodInvocations.add(Method.ADD_PARTITIONS); - delegate.addPartitions(databaseName, tableName, partitions); - } - - @Override - public void dropPartition(String databaseName, String tableName, List parts, boolean deleteData) - { - throw new UnsupportedOperationException(); - } - - @Override - public void alterPartition(String databaseName, String tableName, PartitionWithStatistics partition) - { - throw new UnsupportedOperationException(); - } - - @Override - public void createRole(String role, String grantor) - { - throw new UnsupportedOperationException(); - } - - @Override - public void dropRole(String role) - { - throw new UnsupportedOperationException(); - } - - @Override - public Set listRoles() - { - throw new UnsupportedOperationException(); - } - - @Override - public void grantRoles(Set roles, Set grantees, boolean adminOption, HivePrincipal grantor) - { - throw new UnsupportedOperationException(); - } - - @Override - public void revokeRoles(Set roles, Set grantees, boolean adminOption, HivePrincipal grantor) - { - throw new UnsupportedOperationException(); - } - - @Override - public Set listRoleGrants(HivePrincipal principal) - { - throw new UnsupportedOperationException(); - } - - @Override - public void grantTablePrivileges(String databaseName, String tableName, String tableOwner, HivePrincipal grantee, HivePrincipal grantor, Set privileges, boolean grantOption) - { - throw new UnsupportedOperationException(); - } - - @Override - public void revokeTablePrivileges(String databaseName, String tableName, String tableOwner, HivePrincipal grantee, HivePrincipal grantor, Set privileges, boolean grantOption) - { - throw new UnsupportedOperationException(); - } - - @Override - public Set listTablePrivileges(String databaseName, String tableName, Optional tableOwner, Optional principal) - { - throw new UnsupportedOperationException(); - } - - @Override - public PartitionStatistics getTableStatistics(Table table) - { - methodInvocations.add(Method.GET_TABLE_STATISTICS); - return delegate.getTableStatistics(table); - } - - @Override - public Map getPartitionStatistics(Table table, List partitions) - { - methodInvocations.add(Method.GET_PARTITION_STATISTICS); - return delegate.getPartitionStatistics(table, partitions); - } - - @Override - public void updateTableStatistics(String databaseName, - String tableName, - AcidTransaction transaction, - Function update) - { - methodInvocations.add(Method.UPDATE_TABLE_STATISTICS); - delegate.updateTableStatistics(databaseName, tableName, transaction, update); - } - - @Override - public void updatePartitionStatistics(Table table, Map> updates) - { - methodInvocations.add(Method.UPDATE_PARTITION_STATISTICS); - delegate.updatePartitionStatistics(table, updates); - } - - @Override - public List getAllTables(String databaseName) - { - methodInvocations.add(Method.GET_ALL_TABLES_FROM_DATABASE); - return delegate.getAllTables(databaseName); - } - - @Override - public Optional> getAllTables() - { - Optional> allTables = delegate.getAllTables(); - if (allTables.isPresent()) { - methodInvocations.add(GET_ALL_TABLES); - } - return allTables; - } - - @Override - public Map getRelationTypes(String databaseName) - { - methodInvocations.add(GET_RELATION_TYPES_FROM_DATABASE); - return delegate.getRelationTypes(databaseName); - } - - @Override - public Optional> getRelationTypes() - { - Optional> relationTypes = delegate.getRelationTypes(); - if (relationTypes.isPresent()) { - methodInvocations.add(GET_ALL_RELATION_TYPES); - } - return relationTypes; - } - - @Override - public boolean functionExists(String databaseName, String functionName, String signatureToken) - { - throw new UnsupportedOperationException(); - } - - @Override - public Collection getFunctions(String databaseName) - { - throw new UnsupportedOperationException(); - } - - @Override - public Collection getFunctions(String databaseName, String functionName) - { - throw new UnsupportedOperationException(); - } - - @Override - public void createFunction(String databaseName, String functionName, LanguageFunction function) - { - throw new UnsupportedOperationException(); - } - - @Override - public void replaceFunction(String databaseName, String functionName, LanguageFunction function) - { - throw new UnsupportedOperationException(); - } - - @Override - public void dropFunction(String databaseName, String functionName, String signatureToken) - { - throw new UnsupportedOperationException(); - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/CountingAccessHiveMetastoreUtil.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/CountingAccessHiveMetastoreUtil.java deleted file mode 100644 index 46a8a8a969f66..0000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/CountingAccessHiveMetastoreUtil.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore; - -import com.google.common.collect.Multiset; -import io.trino.Session; -import io.trino.testing.QueryRunner; -import org.intellij.lang.annotations.Language; - -import static io.trino.testing.MultisetAssertions.assertMultisetsEqual; - -public final class CountingAccessHiveMetastoreUtil -{ - private CountingAccessHiveMetastoreUtil() {} - - public static void assertMetastoreInvocations( - CountingAccessHiveMetastore metastore, - QueryRunner queryRunner, - Session session, - @Language("SQL") String query, - Multiset expectedInvocations) - { - metastore.resetCounters(); - queryRunner.execute(session, query); - assertMultisetsEqual(metastore.getMethodInvocations(), expectedInvocations); - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/MetastoreInvocations.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/MetastoreInvocations.java new file mode 100644 index 0000000000000..551bfb9542e8c --- /dev/null +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/MetastoreInvocations.java @@ -0,0 +1,50 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.hive.metastore; + +import com.google.common.collect.Multiset; +import io.opentelemetry.sdk.trace.data.SpanData; +import io.trino.Session; +import io.trino.testing.DistributedQueryRunner; +import org.intellij.lang.annotations.Language; + +import static com.google.common.collect.ImmutableMultiset.toImmutableMultiset; +import static io.trino.testing.MultisetAssertions.assertMultisetsEqual; + +public final class MetastoreInvocations +{ + private static final String TRACE_PREFIX = "HiveMetastore."; + + private MetastoreInvocations() {} + + public static void assertMetastoreInvocationsForQuery( + DistributedQueryRunner queryRunner, + Session session, + @Language("SQL") String query, + Multiset expectedInvocations) + { + queryRunner.execute(session, query); + + Multiset invocations = queryRunner.getSpans().stream() + .map(SpanData::getName) + .filter(name -> name.startsWith(TRACE_PREFIX)) + .map(name -> name.substring(TRACE_PREFIX.length())) + .filter(name -> !name.equals("listRoleGrants")) + .filter(name -> !name.equals("listTablePrivileges")) + .map(MetastoreMethod::fromMethodName) + .collect(toImmutableMultiset()); + + assertMultisetsEqual(invocations, expectedInvocations); + } +} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/MetastoreMethod.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/MetastoreMethod.java new file mode 100644 index 0000000000000..30c6461a532b8 --- /dev/null +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/MetastoreMethod.java @@ -0,0 +1,50 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.hive.metastore; + +import static com.google.common.base.CaseFormat.LOWER_CAMEL; +import static com.google.common.base.CaseFormat.UPPER_UNDERSCORE; + +public enum MetastoreMethod +{ + CREATE_DATABASE, + DROP_DATABASE, + CREATE_TABLE, + GET_ALL_DATABASES, + GET_DATABASE, + GET_TABLE, + GET_ALL_TABLES, + GET_TABLES, + GET_RELATION_TYPES, + GET_ALL_RELATION_TYPES, + GET_TABLES_WITH_PARAMETER, + GET_TABLE_STATISTICS, + GET_ALL_VIEWS, + GET_VIEWS, + UPDATE_TABLE_STATISTICS, + ADD_PARTITIONS, + GET_PARTITION_NAMES_BY_FILTER, + GET_PARTITIONS_BY_NAMES, + GET_PARTITION, + GET_PARTITION_STATISTICS, + UPDATE_PARTITION_STATISTICS, + REPLACE_TABLE, + DROP_TABLE, + /**/; + + public static MetastoreMethod fromMethodName(String name) + { + return valueOf(LOWER_CAMEL.to(UPPER_UNDERSCORE, name)); + } +} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestBridgingHiveMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestBridgingHiveMetastore.java index f8242a403cfbd..59fa33ca076ee 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestBridgingHiveMetastore.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestBridgingHiveMetastore.java @@ -13,14 +13,32 @@ */ package io.trino.plugin.hive.metastore; +import io.trino.plugin.hive.SchemaAlreadyExistsException; +import io.trino.plugin.hive.TableAlreadyExistsException; import io.trino.plugin.hive.containers.HiveHadoop; import io.trino.plugin.hive.metastore.thrift.BridgingHiveMetastore; +import io.trino.plugin.hive.metastore.thrift.MetastoreClientAdapterProvider; +import io.trino.plugin.hive.metastore.thrift.ThriftMetastoreClient; import io.trino.plugin.hive.metastore.thrift.ThriftMetastoreConfig; import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.parallel.Execution; +import java.lang.reflect.InvocationTargetException; +import java.util.Map; +import java.util.Optional; + +import static com.google.common.reflect.Reflection.newProxy; +import static io.trino.plugin.hive.HiveMetadata.TRINO_QUERY_ID_NAME; +import static io.trino.plugin.hive.HiveStorageFormat.PARQUET; +import static io.trino.plugin.hive.TableType.EXTERNAL_TABLE; import static io.trino.plugin.hive.TestingThriftHiveMetastoreBuilder.testingThriftHiveMetastoreBuilder; +import static io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES; +import static io.trino.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat; +import static io.trino.testing.TestingNames.randomNameSuffix; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; @@ -36,8 +54,22 @@ final class TestBridgingHiveMetastore hiveHadoop = HiveHadoop.builder().build(); hiveHadoop.start(); + MetastoreClientAdapterProvider metastoreClientAdapterProvider = delegate -> newProxy(ThriftMetastoreClient.class, (proxy, method, methodArgs) -> { + Object result; + try { + result = method.invoke(delegate, methodArgs); + } + catch (InvocationTargetException e) { + throw e.getCause(); + } + if (method.getName().equals("createDatabase") || method.getName().equals("createTable") || method.getName().equals("dropTable")) { + throw new RuntimeException("Test-simulated Hive Metastore timeout exception"); + } + return result; + }); + setMetastore(new BridgingHiveMetastore(testingThriftHiveMetastoreBuilder() - .metastoreClient(hiveHadoop.getHiveMetastoreEndpoint()) + .metastoreClient(hiveHadoop.getHiveMetastoreEndpoint(), metastoreClientAdapterProvider) .thriftMetastoreConfig(new ThriftMetastoreConfig().setDeleteFilesOnDrop(true)) .build())); } @@ -47,4 +79,80 @@ void afterAll() { hiveHadoop.stop(); } + + @Test + public void testCreateDatabaseWithRetries() + { + // This test is similar to AbstractTestHiveMetastore#testCreateDatabase but with simulating timeout in ThriftMetastoreClient + String databaseName = "test_database_" + randomNameSuffix(); + Database.Builder database = Database.builder() + .setDatabaseName(databaseName) + .setParameters(Map.of(TRINO_QUERY_ID_NAME, "query_id")) + .setOwnerName(Optional.empty()) + .setOwnerType(Optional.empty()); + getMetastore().createDatabase(database.build()); + + database.setParameters(Map.of(TRINO_QUERY_ID_NAME, "another_query_id")); + assertThatThrownBy(() -> getMetastore().createDatabase(database.build())) + .isInstanceOf(SchemaAlreadyExistsException.class); + + getMetastore().dropDatabase(databaseName, false); + } + + @Test + public void testCreateTableWithRetries() + { + // This test is similar to AbstractTestHiveMetastore#testCreateTable but with simulating timeout in ThriftMetastoreClient + String databaseName = "test_database_" + randomNameSuffix(); + Database.Builder database = Database.builder() + .setDatabaseName(databaseName) + .setOwnerName(Optional.empty()) + .setOwnerType(Optional.empty()); + getMetastore().createDatabase(database.build()); + + String tableName = "test_table" + randomNameSuffix(); + Table.Builder table = Table.builder() + .setDatabaseName(databaseName) + .setTableName(tableName) + .setParameters(Map.of(TRINO_QUERY_ID_NAME, "query_id")) + .setTableType(EXTERNAL_TABLE.name()) + .setOwner(Optional.empty()); + table.getStorageBuilder() + .setStorageFormat(fromHiveStorageFormat(PARQUET)); + getMetastore().createTable(table.build(), NO_PRIVILEGES); + + table.setParameters(Map.of(TRINO_QUERY_ID_NAME, "another_query_id")); + assertThatThrownBy(() -> getMetastore().createTable(table.build(), NO_PRIVILEGES)) + .isInstanceOf(TableAlreadyExistsException.class); + + getMetastore().dropTable(databaseName, tableName, false); + getMetastore().dropDatabase(databaseName, false); + } + + @Test + public void testDropTableWithRetries() + { + String databaseName = "test_database_" + randomNameSuffix(); + Database.Builder database = Database.builder() + .setDatabaseName(databaseName) + .setOwnerName(Optional.empty()) + .setOwnerType(Optional.empty()); + getMetastore().createDatabase(database.build()); + + String tableName = "test_table" + randomNameSuffix(); + Table.Builder table = Table.builder() + .setDatabaseName(databaseName) + .setTableName(tableName) + .setTableType(EXTERNAL_TABLE.name()) + .setOwner(Optional.empty()); + table.getStorageBuilder() + .setStorageFormat(fromHiveStorageFormat(PARQUET)); + getMetastore().createTable(table.build(), NO_PRIVILEGES); + + assertThat(getMetastore().getTable(databaseName, tableName)).isPresent(); + getMetastore().dropTable(databaseName, tableName, false); + assertThat(getMetastore().getTable(databaseName, tableName)).isEmpty(); + + getMetastore().dropDatabase(databaseName, false); + } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestMetastoreMethod.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestMetastoreMethod.java new file mode 100644 index 0000000000000..88c02c4e6a207 --- /dev/null +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestMetastoreMethod.java @@ -0,0 +1,70 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.hive.metastore; + +import org.junit.jupiter.api.Test; + +import java.lang.reflect.Method; +import java.util.Set; +import java.util.stream.Stream; + +import static com.google.common.base.CaseFormat.LOWER_CAMEL; +import static com.google.common.base.CaseFormat.UPPER_UNDERSCORE; +import static com.google.common.collect.ImmutableSetMultimap.toImmutableSetMultimap; +import static io.trino.plugin.hive.metastore.MetastoreMethod.DROP_TABLE; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_ALL_VIEWS; +import static io.trino.plugin.hive.metastore.MetastoreMethod.fromMethodName; +import static java.util.function.Function.identity; +import static java.util.stream.Collectors.toSet; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +class TestMetastoreMethod +{ + @Test + void testFromMethodName() + { + assertThat(fromMethodName("getAllViews")).isEqualTo(GET_ALL_VIEWS); + assertThat(fromMethodName("dropTable")).isEqualTo(DROP_TABLE); + } + + @Test + void testFromMethodNameInvalid() + { + assertThatThrownBy(() -> fromMethodName("doesNotExist")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("No enum constant io.trino.plugin.hive.metastore.MetastoreMethod.DOES_NOT_EXIST"); + } + + @Test + void testEnumNamesMapToMethods() + { + Set methodNames = Stream.of(HiveMetastore.class.getMethods()) + .map(Method::getName) + .collect(toSet()); + + for (MetastoreMethod method : MetastoreMethod.values()) { + assertThat(methodNames).contains(UPPER_UNDERSCORE.to(LOWER_CAMEL, method.name())); + } + } + + @Test + void testMethodNamesUnique() + { + Stream.of(HiveMetastore.class.getMethods()) + .collect(toImmutableSetMultimap(Method::getName, identity())) + .asMap().values().forEach(methods -> + assertThat(methods).hasSize(1)); + } +} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestSemiTransactionalHiveMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestSemiTransactionalHiveMetastore.java deleted file mode 100644 index b8eccab2fad74..0000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestSemiTransactionalHiveMetastore.java +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import io.trino.filesystem.Location; -import io.trino.plugin.hive.HiveBucketProperty; -import io.trino.plugin.hive.HiveMetastoreClosure; -import io.trino.plugin.hive.HiveType; -import io.trino.plugin.hive.PartitionStatistics; -import io.trino.plugin.hive.acid.AcidTransaction; -import io.trino.plugin.hive.fs.FileSystemDirectoryLister; -import org.junit.jupiter.api.Test; - -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.OptionalLong; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.Executor; -import java.util.concurrent.TimeUnit; -import java.util.function.Function; -import java.util.stream.IntStream; - -import static com.google.common.util.concurrent.MoreExecutors.directExecutor; -import static io.trino.plugin.hive.HiveBasicStatistics.createEmptyStatistics; -import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_FACTORY; -import static io.trino.plugin.hive.acid.AcidOperation.INSERT; -import static io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V1; -import static io.trino.testing.TestingConnectorSession.SESSION; -import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER; -import static java.util.concurrent.Executors.newFixedThreadPool; -import static java.util.concurrent.Executors.newScheduledThreadPool; -import static org.assertj.core.api.Assertions.assertThat; - -// countDownLatch field is shared between tests -public class TestSemiTransactionalHiveMetastore -{ - private static final Column TABLE_COLUMN = new Column( - "column", - HiveType.HIVE_INT, - Optional.of("comment"), - Map.of()); - private static final Storage TABLE_STORAGE = new Storage( - StorageFormat.create("serde", "input", "output"), - Optional.of("/test"), - Optional.of(new HiveBucketProperty(ImmutableList.of("column"), BUCKETING_V1, 10, ImmutableList.of(new SortingColumn("column", SortingColumn.Order.ASCENDING)))), - true, - ImmutableMap.of("param", "value2")); - - private CountDownLatch countDownLatch; - - @Test - public void testParallelPartitionDrops() - { - int partitionsToDrop = 5; - IntStream dropThreadsConfig = IntStream.of(1, 2); - dropThreadsConfig.forEach(dropThreads -> { - countDownLatch = new CountDownLatch(dropThreads); - SemiTransactionalHiveMetastore semiTransactionalHiveMetastore = getSemiTransactionalHiveMetastoreWithDropExecutor(newFixedThreadPool(dropThreads)); - IntStream.range(0, partitionsToDrop).forEach(i -> semiTransactionalHiveMetastore.dropPartition(SESSION, - "test", - "test", - ImmutableList.of(String.valueOf(i)), - true)); - semiTransactionalHiveMetastore.commit(); - }); - } - - private SemiTransactionalHiveMetastore getSemiTransactionalHiveMetastoreWithDropExecutor(Executor dropExecutor) - { - return new SemiTransactionalHiveMetastore( - HDFS_FILE_SYSTEM_FACTORY, - new HiveMetastoreClosure(new TestingHiveMetastore(), TESTING_TYPE_MANAGER, false), - directExecutor(), - dropExecutor, - directExecutor(), - false, - false, - true, - Optional.empty(), - newScheduledThreadPool(1), - new FileSystemDirectoryLister()); - } - - @Test - public void testParallelUpdateStatisticsOperations() - { - int tablesToUpdate = 5; - IntStream updateThreadsConfig = IntStream.of(1, 2); - updateThreadsConfig.forEach(updateThreads -> { - countDownLatch = new CountDownLatch(updateThreads); - SemiTransactionalHiveMetastore semiTransactionalHiveMetastore; - if (updateThreads == 1) { - semiTransactionalHiveMetastore = getSemiTransactionalHiveMetastoreWithUpdateExecutor(directExecutor()); - } - else { - semiTransactionalHiveMetastore = getSemiTransactionalHiveMetastoreWithUpdateExecutor(newFixedThreadPool(updateThreads)); - } - IntStream.range(0, tablesToUpdate).forEach(i -> semiTransactionalHiveMetastore.finishChangingExistingTable(INSERT, SESSION, - "database", - "table_" + i, - Location.of(TABLE_STORAGE.getLocation()), - ImmutableList.of(), - PartitionStatistics.empty(), - false)); - semiTransactionalHiveMetastore.commit(); - }); - } - - private SemiTransactionalHiveMetastore getSemiTransactionalHiveMetastoreWithUpdateExecutor(Executor updateExecutor) - { - return new SemiTransactionalHiveMetastore( - HDFS_FILE_SYSTEM_FACTORY, - new HiveMetastoreClosure(new TestingHiveMetastore(), TESTING_TYPE_MANAGER, false), - directExecutor(), - directExecutor(), - updateExecutor, - false, - false, - true, - Optional.empty(), - newScheduledThreadPool(1), - new FileSystemDirectoryLister()); - } - - private class TestingHiveMetastore - extends UnimplementedHiveMetastore - { - @Override - public Optional
getTable(String databaseName, String tableName) - { - if (databaseName.equals("database")) { - return Optional.of(new Table( - "database", - tableName, - Optional.of("owner"), - "table_type", - TABLE_STORAGE, - ImmutableList.of(TABLE_COLUMN), - ImmutableList.of(TABLE_COLUMN), - ImmutableMap.of("param", "value3"), - Optional.of("original_text"), - Optional.of("expanded_text"), - OptionalLong.empty())); - } - return Optional.empty(); - } - - @Override - public PartitionStatistics getTableStatistics(Table table) - { - return new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of()); - } - - @Override - public void dropPartition(String databaseName, String tableName, List parts, boolean deleteData) - { - assertCountDownLatch(); - } - - @Override - public void updateTableStatistics(String databaseName, - String tableName, - AcidTransaction transaction, - Function update) - { - assertCountDownLatch(); - } - - private void assertCountDownLatch() - { - try { - countDownLatch.countDown(); - assertThat(countDownLatch.await(10, TimeUnit.SECONDS)).isTrue(); //all other threads launched should count down within 10 seconds - } - catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - } - } - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/UnimplementedHiveMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/UnimplementedHiveMetastore.java deleted file mode 100644 index ca2fd5dba8c89..0000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/UnimplementedHiveMetastore.java +++ /dev/null @@ -1,343 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore; - -import io.trino.plugin.hive.HiveColumnStatisticType; -import io.trino.plugin.hive.HiveType; -import io.trino.plugin.hive.PartitionStatistics; -import io.trino.plugin.hive.acid.AcidTransaction; -import io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege; -import io.trino.spi.connector.RelationType; -import io.trino.spi.connector.SchemaTableName; -import io.trino.spi.function.LanguageFunction; -import io.trino.spi.predicate.TupleDomain; -import io.trino.spi.security.RoleGrant; -import io.trino.spi.type.Type; - -import java.util.Collection; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Set; -import java.util.function.Function; - -public class UnimplementedHiveMetastore - implements HiveMetastore -{ - @Override - public Optional getDatabase(String databaseName) - { - throw new UnsupportedOperationException(); - } - - @Override - public List getAllDatabases() - { - throw new UnsupportedOperationException(); - } - - @Override - public Optional
getTable(String databaseName, String tableName) - { - throw new UnsupportedOperationException(); - } - - @Override - public Set getSupportedColumnStatistics(Type type) - { - throw new UnsupportedOperationException(); - } - - @Override - public PartitionStatistics getTableStatistics(Table table) - { - throw new UnsupportedOperationException(); - } - - @Override - public Map getPartitionStatistics(Table table, List partitions) - { - throw new UnsupportedOperationException(); - } - - @Override - public void updateTableStatistics(String databaseName, - String tableName, - AcidTransaction transaction, - Function update) - { - throw new UnsupportedOperationException(); - } - - @Override - public void updatePartitionStatistics(Table table, Map> updates) - { - throw new UnsupportedOperationException(); - } - - @Override - public List getAllTables(String databaseName) - { - throw new UnsupportedOperationException(); - } - - @Override - public Optional> getAllTables() - { - throw new UnsupportedOperationException(); - } - - @Override - public Map getRelationTypes(String databaseName) - { - throw new UnsupportedOperationException(); - } - - @Override - public Optional> getRelationTypes() - { - throw new UnsupportedOperationException(); - } - - @Override - public List getTablesWithParameter(String databaseName, String parameterKey, String parameterValue) - { - throw new UnsupportedOperationException(); - } - - @Override - public List getAllViews(String databaseName) - { - throw new UnsupportedOperationException(); - } - - @Override - public Optional> getAllViews() - { - throw new UnsupportedOperationException(); - } - - @Override - public void createDatabase(Database database) - { - throw new UnsupportedOperationException(); - } - - @Override - public void dropDatabase(String databaseName, boolean deleteData) - { - throw new UnsupportedOperationException(); - } - - @Override - public void renameDatabase(String databaseName, String newDatabaseName) - { - throw new UnsupportedOperationException(); - } - - @Override - public void setDatabaseOwner(String databaseName, HivePrincipal principal) - { - throw new UnsupportedOperationException(); - } - - @Override - public void setTableOwner(String databaseName, String tableName, HivePrincipal principal) - { - throw new UnsupportedOperationException(); - } - - @Override - public void createTable(Table table, PrincipalPrivileges principalPrivileges) - { - throw new UnsupportedOperationException(); - } - - @Override - public void dropTable(String databaseName, String tableName, boolean deleteData) - { - throw new UnsupportedOperationException(); - } - - @Override - public void replaceTable(String databaseName, String tableName, Table newTable, PrincipalPrivileges principalPrivileges) - { - throw new UnsupportedOperationException(); - } - - @Override - public void renameTable(String databaseName, String tableName, String newDatabaseName, String newTableName) - { - throw new UnsupportedOperationException(); - } - - @Override - public void commentTable(String databaseName, String tableName, Optional comment) - { - throw new UnsupportedOperationException(); - } - - @Override - public void commentColumn(String databaseName, String tableName, String columnName, Optional comment) - { - throw new UnsupportedOperationException(); - } - - @Override - public void addColumn(String databaseName, String tableName, String columnName, HiveType columnType, String columnComment) - { - throw new UnsupportedOperationException(); - } - - @Override - public void renameColumn(String databaseName, String tableName, String oldColumnName, String newColumnName) - { - throw new UnsupportedOperationException(); - } - - @Override - public void dropColumn(String databaseName, String tableName, String columnName) - { - throw new UnsupportedOperationException(); - } - - @Override - public Optional getPartition(Table table, List partitionValues) - { - throw new UnsupportedOperationException(); - } - - @Override - public Optional> getPartitionNamesByFilter(String databaseName, - String tableName, - List columnNames, - TupleDomain partitionKeysFilter) - { - throw new UnsupportedOperationException(); - } - - @Override - public Map> getPartitionsByNames(Table table, List partitionNames) - { - throw new UnsupportedOperationException(); - } - - @Override - public void addPartitions(String databaseName, String tableName, List partitions) - { - throw new UnsupportedOperationException(); - } - - @Override - public void dropPartition(String databaseName, String tableName, List parts, boolean deleteData) - { - throw new UnsupportedOperationException(); - } - - @Override - public void alterPartition(String databaseName, String tableName, PartitionWithStatistics partition) - { - throw new UnsupportedOperationException(); - } - - @Override - public Set listTablePrivileges(String databaseName, String tableName, Optional tableOwner, Optional prestoPrincipal) - { - throw new UnsupportedOperationException(); - } - - @Override - public void grantTablePrivileges(String databaseName, String tableName, String tableOwner, HivePrincipal grantee, HivePrincipal grantor, Set privileges, boolean grantOption) - { - throw new UnsupportedOperationException(); - } - - @Override - public void revokeTablePrivileges(String databaseName, String tableName, String tableOwner, HivePrincipal grantee, HivePrincipal grantor, Set privileges, boolean grantOption) - { - throw new UnsupportedOperationException(); - } - - @Override - public void createRole(String role, String grantor) - { - throw new UnsupportedOperationException(); - } - - @Override - public void dropRole(String role) - { - throw new UnsupportedOperationException(); - } - - @Override - public Set listRoles() - { - throw new UnsupportedOperationException(); - } - - @Override - public void grantRoles(Set roles, Set grantees, boolean adminOption, HivePrincipal grantor) - { - throw new UnsupportedOperationException(); - } - - @Override - public void revokeRoles(Set roles, Set grantees, boolean adminOption, HivePrincipal grantor) - { - throw new UnsupportedOperationException(); - } - - @Override - public Set listRoleGrants(HivePrincipal principal) - { - throw new UnsupportedOperationException(); - } - - @Override - public boolean functionExists(String databaseName, String functionName, String signatureToken) - { - throw new UnsupportedOperationException(); - } - - @Override - public Collection getFunctions(String databaseName) - { - throw new UnsupportedOperationException(); - } - - @Override - public Collection getFunctions(String databaseName, String functionName) - { - throw new UnsupportedOperationException(); - } - - @Override - public void createFunction(String databaseName, String functionName, LanguageFunction function) - { - throw new UnsupportedOperationException(); - } - - @Override - public void replaceFunction(String databaseName, String functionName, LanguageFunction function) - { - throw new UnsupportedOperationException(); - } - - @Override - public void dropFunction(String databaseName, String functionName, String signatureToken) - { - throw new UnsupportedOperationException(); - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/cache/TestCachingHiveMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/cache/TestCachingHiveMetastore.java index dc176ec0a00ac..89b78358317d3 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/cache/TestCachingHiveMetastore.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/cache/TestCachingHiveMetastore.java @@ -19,7 +19,6 @@ import com.google.common.collect.Iterables; import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.ThreadFactoryBuilder; -import io.airlift.log.Logger; import io.airlift.units.Duration; import io.trino.hive.thrift.metastore.ColumnStatisticsData; import io.trino.hive.thrift.metastore.ColumnStatisticsObj; @@ -36,7 +35,6 @@ import io.trino.plugin.hive.metastore.Partition; import io.trino.plugin.hive.metastore.PrincipalPrivileges; import io.trino.plugin.hive.metastore.Table; -import io.trino.plugin.hive.metastore.UnimplementedHiveMetastore; import io.trino.plugin.hive.metastore.thrift.BridgingHiveMetastore; import io.trino.plugin.hive.metastore.thrift.MockThriftMetastoreClient; import io.trino.plugin.hive.metastore.thrift.ThriftHiveMetastore; @@ -57,15 +55,10 @@ import org.junit.jupiter.api.Timeout; import org.junit.jupiter.api.parallel.Execution; -import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.OptionalLong; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.CountDownLatch; import java.util.concurrent.CyclicBarrier; import java.util.concurrent.Executor; @@ -75,24 +68,19 @@ import java.util.concurrent.TimeUnit; import java.util.function.Consumer; -import static com.google.common.base.Preconditions.checkState; import static com.google.common.util.concurrent.MoreExecutors.listeningDecorator; import static io.airlift.concurrent.Threads.daemonThreadsNamed; import static io.airlift.slice.Slices.utf8Slice; import static io.trino.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY; import static io.trino.plugin.hive.HiveColumnHandle.createBaseColumn; -import static io.trino.plugin.hive.HiveStorageFormat.TEXTFILE; import static io.trino.plugin.hive.HiveType.HIVE_LONG; import static io.trino.plugin.hive.HiveType.HIVE_STRING; -import static io.trino.plugin.hive.HiveType.toHiveType; -import static io.trino.plugin.hive.TableType.EXTERNAL_TABLE; import static io.trino.plugin.hive.TableType.VIRTUAL_VIEW; import static io.trino.plugin.hive.TestingThriftHiveMetastoreBuilder.testingThriftHiveMetastoreBuilder; import static io.trino.plugin.hive.metastore.HiveColumnStatistics.createIntegerColumnStatistics; import static io.trino.plugin.hive.metastore.MetastoreUtil.computePartitionKeyFilter; import static io.trino.plugin.hive.metastore.MetastoreUtil.makePartitionName; import static io.trino.plugin.hive.metastore.StorageFormat.VIEW_STORAGE_FORMAT; -import static io.trino.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat; import static io.trino.plugin.hive.metastore.cache.CachingHiveMetastore.createPerTransactionCache; import static io.trino.plugin.hive.metastore.thrift.MockThriftMetastoreClient.BAD_DATABASE; import static io.trino.plugin.hive.metastore.thrift.MockThriftMetastoreClient.BAD_PARTITION; @@ -124,8 +112,6 @@ @Execution(SAME_THREAD) public class TestCachingHiveMetastore { - private static final Logger log = Logger.get(TestCachingHiveMetastore.class); - private static final PartitionStatistics TEST_STATS = PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(2398040535435L), OptionalLong.empty(), OptionalLong.empty())) .setColumnStatistics(ImmutableMap.of(TEST_COLUMN, createIntegerColumnStatistics(OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty()))) @@ -181,7 +167,7 @@ public void testCachingWithOnlyPartitionsCacheEnabled() .usesCache(); assertThatCachingWithDisabledPartitionCache() - .whenExecuting(testedMetastore -> testedMetastore.getAllTables(TEST_DATABASE)) + .whenExecuting(testedMetastore -> testedMetastore.getTables(TEST_DATABASE)) .usesCache(); assertThatCachingWithDisabledPartitionCache() @@ -230,16 +216,16 @@ public void testGetAllDatabases() public void testGetAllTable() { assertThat(mockClient.getAccessCount()).isEqualTo(0); - assertThat(metastore.getAllTables(TEST_DATABASE)).isEqualTo(ImmutableList.of(TEST_TABLE)); + assertThat(metastore.getTables(TEST_DATABASE)).isEqualTo(ImmutableList.of(TEST_TABLE)); assertThat(mockClient.getAccessCount()).isEqualTo(1); - assertThat(metastore.getAllTables(TEST_DATABASE)).isEqualTo(ImmutableList.of(TEST_TABLE)); + assertThat(metastore.getTables(TEST_DATABASE)).isEqualTo(ImmutableList.of(TEST_TABLE)); assertThat(mockClient.getAccessCount()).isEqualTo(1); assertThat(metastore.getTableNamesStats().getRequestCount()).isEqualTo(2); assertThat(metastore.getTableNamesStats().getHitRate()).isEqualTo(0.5); metastore.flushCache(); - assertThat(metastore.getAllTables(TEST_DATABASE)).isEqualTo(ImmutableList.of(TEST_TABLE)); + assertThat(metastore.getTables(TEST_DATABASE)).isEqualTo(ImmutableList.of(TEST_TABLE)); assertThat(mockClient.getAccessCount()).isEqualTo(2); assertThat(metastore.getTableNamesStats().getRequestCount()).isEqualTo(3); assertThat(metastore.getTableNamesStats().getHitRate()).isEqualTo(1.0 / 3); @@ -253,7 +239,7 @@ public void testBatchGetAllTable() assertThat(mockClient.getAccessCount()).isEqualTo(1); assertThat(metastore.getAllTables()).isEqualTo(Optional.of(ImmutableList.of(TEST_SCHEMA_TABLE))); assertThat(mockClient.getAccessCount()).isEqualTo(1); - assertThat(metastore.getAllTables(TEST_DATABASE)).isEqualTo(ImmutableList.of(TEST_TABLE)); + assertThat(metastore.getTables(TEST_DATABASE)).isEqualTo(ImmutableList.of(TEST_TABLE)); assertThat(mockClient.getAccessCount()).isEqualTo(2); assertThat(metastore.getAllTableNamesStats().getRequestCount()).isEqualTo(2); assertThat(metastore.getAllTableNamesStats().getHitRate()).isEqualTo(.5); @@ -269,7 +255,7 @@ public void testBatchGetAllTable() @Test public void testInvalidDbGetAllTAbles() { - assertThat(metastore.getAllTables(BAD_DATABASE).isEmpty()).isTrue(); + assertThat(metastore.getTables(BAD_DATABASE).isEmpty()).isTrue(); } @Test @@ -582,6 +568,52 @@ public void testGetTableStatistics() assertThat(mockClient.getAccessCount()).isEqualTo(6); } + @Test + public void testGetTableStatisticsWithEmptyColumnStats() + { + assertThat(mockClient.getAccessCount()).isEqualTo(0); + + Table table = metastore.getTable(TEST_DATABASE, TEST_TABLE).orElseThrow(); + assertThat(mockClient.getAccessCount()).isEqualTo(1); + + // Force TEST_TABLE to not have column statistics available + mockClient.mockColumnStats(TEST_DATABASE, TEST_TABLE, ImmutableMap.of()); + PartitionStatistics expectedStats = PartitionStatistics.builder() + .setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(2398040535435L), OptionalLong.empty(), OptionalLong.empty())) + .setColumnStatistics(ImmutableMap.of()) + .build(); + assertThat(metastore.getTableStatistics(table)).isEqualTo(expectedStats); + assertThat(mockClient.getAccessCount()).isEqualTo(2); + + // Absence of column statistics should get cached and metastore client access count should stay the same + assertThat(metastore.getTableStatistics(table)).isEqualTo(expectedStats); + assertThat(mockClient.getAccessCount()).isEqualTo(2); + } + + @Test + public void testTableStatisticsWithEmptyColumnStatsWithNoCacheMissing() + { + CachingHiveMetastore metastore = createCachingHiveMetastore(new BridgingHiveMetastore(thriftHiveMetastore), CACHE_TTL, false, true, executor); + + assertThat(mockClient.getAccessCount()).isEqualTo(0); + + Table table = metastore.getTable(TEST_DATABASE, TEST_TABLE).orElseThrow(); + assertThat(mockClient.getAccessCount()).isEqualTo(1); + + // Force TEST_TABLE to not have column statistics available + mockClient.mockColumnStats(TEST_DATABASE, TEST_TABLE, ImmutableMap.of()); + PartitionStatistics expectedStats = PartitionStatistics.builder() + .setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(2398040535435L), OptionalLong.empty(), OptionalLong.empty())) + .setColumnStatistics(ImmutableMap.of()) + .build(); + assertThat(metastore.getTableStatistics(table)).isEqualTo(expectedStats); + assertThat(mockClient.getAccessCount()).isEqualTo(2); + + // Absence of column statistics does not get cached and metastore client access count increases + assertThat(metastore.getTableStatistics(table)).isEqualTo(expectedStats); + assertThat(mockClient.getAccessCount()).isEqualTo(3); + } + @Test public void testGetTableStatisticsWithoutMetadataCache() { @@ -750,6 +782,56 @@ public void testGetPartitionStatistics() .containsEntry("col3", intColumnStats(33)); } + @Test + public void testGetPartitionStatisticsWithEmptyColumnStats() + { + assertThat(mockClient.getAccessCount()).isEqualTo(0); + + Table table = metastore.getTable(TEST_DATABASE, TEST_TABLE).orElseThrow(); + assertThat(mockClient.getAccessCount()).isEqualTo(1); + + Partition partition = metastore.getPartition(table, TEST_PARTITION_VALUES2).orElseThrow(); + assertThat(mockClient.getAccessCount()).isEqualTo(2); + + // TEST_PARTITION2 does not have column statistics available + PartitionStatistics expectedStats = PartitionStatistics.builder() + .setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(2398040535435L), OptionalLong.empty(), OptionalLong.empty())) + .setColumnStatistics(ImmutableMap.of()) + .build(); + assertThat(metastore.getPartitionStatistics(table, ImmutableList.of(partition))).isEqualTo(ImmutableMap.of(TEST_PARTITION2, expectedStats)); + assertThat(mockClient.getAccessCount()).isEqualTo(3); + + // Absence of column statistics should get cached and metastore client access count should stay the same + assertThat(metastore.getPartitionStatistics(table, ImmutableList.of(partition))).isEqualTo(ImmutableMap.of(TEST_PARTITION2, expectedStats)); + assertThat(mockClient.getAccessCount()).isEqualTo(3); + } + + @Test + public void testGetPartitionStatisticsWithEmptyColumnStatsWithNoCacheMissing() + { + CachingHiveMetastore metastore = createCachingHiveMetastore(new BridgingHiveMetastore(thriftHiveMetastore), CACHE_TTL, false, true, executor); + + assertThat(mockClient.getAccessCount()).isEqualTo(0); + + Table table = metastore.getTable(TEST_DATABASE, TEST_TABLE).orElseThrow(); + assertThat(mockClient.getAccessCount()).isEqualTo(1); + + Partition partition = metastore.getPartition(table, TEST_PARTITION_VALUES2).orElseThrow(); + assertThat(mockClient.getAccessCount()).isEqualTo(2); + + // TEST_PARTITION2 does not have column statistics available + PartitionStatistics expectedStats = PartitionStatistics.builder() + .setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(2398040535435L), OptionalLong.empty(), OptionalLong.empty())) + .setColumnStatistics(ImmutableMap.of()) + .build(); + assertThat(metastore.getPartitionStatistics(table, ImmutableList.of(partition))).isEqualTo(ImmutableMap.of(TEST_PARTITION2, expectedStats)); + assertThat(mockClient.getAccessCount()).isEqualTo(3); + + // Absence of column statistics does not get cached and metastore client access count increases + assertThat(metastore.getPartitionStatistics(table, ImmutableList.of(partition))).isEqualTo(ImmutableMap.of(TEST_PARTITION2, expectedStats)); + assertThat(mockClient.getAccessCount()).isEqualTo(4); + } + @Test public void testGetPartitionStatisticsWithoutMetadataCache() { @@ -847,7 +929,7 @@ public void testUpdatePartitionStatistics() Table table = hiveMetastoreClosure.getTable(TEST_DATABASE, TEST_TABLE).orElseThrow(); assertThat(mockClient.getAccessCount()).isEqualTo(1); - hiveMetastoreClosure.updatePartitionStatistics(table.getDatabaseName(), table.getTableName(), TEST_PARTITION1, identity()); + hiveMetastoreClosure.updatePartitionStatistics(table.getDatabaseName(), table.getTableName(), Map.of(TEST_PARTITION1, identity())); assertThat(mockClient.getAccessCount()).isEqualTo(5); } @@ -932,167 +1014,6 @@ public void testCachingHiveMetastoreCreationViaMemoize() assertThat(metastore.getDatabaseNamesStats().getRequestCount()).isEqualTo(0); } - @Test - @Timeout(60) - public void testLoadAfterInvalidate() - throws Exception - { - testLoadAfterInvalidate(true); - testLoadAfterInvalidate(false); - } - - private void testLoadAfterInvalidate(boolean invalidateAll) - throws Exception - { - // State - CopyOnWriteArrayList tableColumns = new CopyOnWriteArrayList<>(); - ConcurrentMap tablePartitionsByName = new ConcurrentHashMap<>(); - Map tableParameters = new ConcurrentHashMap<>(); - tableParameters.put("frequent-changing-table-parameter", "parameter initial value"); - - // Initialize data - String databaseName = "my_database"; - String tableName = "my_table_name"; - - tableColumns.add(new Column("value", toHiveType(VARCHAR), Optional.empty() /* comment */, Map.of())); - tableColumns.add(new Column("pk", toHiveType(VARCHAR), Optional.empty() /* comment */, Map.of())); - - List partitionNames = new ArrayList<>(); - for (int i = 0; i < 10; i++) { - String partitionName = "pk=" + i; - tablePartitionsByName.put( - partitionName, - Partition.builder() - .setDatabaseName(databaseName) - .setTableName(tableName) - .setColumns(ImmutableList.copyOf(tableColumns)) - .setValues(List.of(Integer.toString(i))) - .withStorage(storage -> storage.setStorageFormat(fromHiveStorageFormat(TEXTFILE))) - .setParameters(Map.of("frequent-changing-partition-parameter", "parameter initial value")) - .build()); - partitionNames.add(partitionName); - } - - // Mock metastore - CountDownLatch getTableEnteredLatch = new CountDownLatch(1); - CountDownLatch getTableReturnLatch = new CountDownLatch(1); - CountDownLatch getTableFinishedLatch = new CountDownLatch(1); - CountDownLatch getPartitionsByNamesEnteredLatch = new CountDownLatch(1); - CountDownLatch getPartitionsByNamesReturnLatch = new CountDownLatch(1); - CountDownLatch getPartitionsByNamesFinishedLatch = new CountDownLatch(1); - - HiveMetastore mockMetastore = new UnimplementedHiveMetastore() - { - @Override - public Optional
getTable(String databaseName, String tableName) - { - Optional
table = Optional.of(Table.builder() - .setDatabaseName(databaseName) - .setTableName(tableName) - .setTableType(EXTERNAL_TABLE.name()) - .setDataColumns(tableColumns) - .setParameters(ImmutableMap.copyOf(tableParameters)) - // Required by 'Table', but not used by view translation. - .withStorage(storage -> storage.setStorageFormat(fromHiveStorageFormat(TEXTFILE))) - .setOwner(Optional.empty()) - .build()); - - getTableEnteredLatch.countDown(); // 1 - await(getTableReturnLatch, 10, SECONDS); // 2 - - return table; - } - - @Override - public Map> getPartitionsByNames(Table table, List partitionNames) - { - Map> result = new HashMap<>(); - for (String partitionName : partitionNames) { - result.put(partitionName, Optional.ofNullable(tablePartitionsByName.get(partitionName))); - } - - getPartitionsByNamesEnteredLatch.countDown(); // loader#1 - await(getPartitionsByNamesReturnLatch, 10, SECONDS); // loader#2 - - return result; - } - }; - - // Caching metastore - metastore = createCachingHiveMetastore(mockMetastore, CACHE_TTL, true, true, executor); - - // The test. Main thread does modifications and verifies subsequent load sees them. Background thread loads the state into the cache. - ExecutorService executor = Executors.newFixedThreadPool(1); - try { - Future future = executor.submit(() -> { - try { - Table table; - - table = metastore.getTable(databaseName, tableName).orElseThrow(); - getTableFinishedLatch.countDown(); // 3 - - metastore.getPartitionsByNames(table, partitionNames); - getPartitionsByNamesFinishedLatch.countDown(); // 6 - - return null; - } - catch (Throwable e) { - log.error(e); - throw e; - } - }); - - await(getTableEnteredLatch, 10, SECONDS); // 21 - tableParameters.put("frequent-changing-table-parameter", "main-thread-put-xyz"); - if (invalidateAll) { - metastore.flushCache(); - } - else { - metastore.invalidateTable(databaseName, tableName); - } - getTableReturnLatch.countDown(); // 2 - await(getTableFinishedLatch, 10, SECONDS); // 3 - Table table = metastore.getTable(databaseName, tableName).orElseThrow(); - assertThat(table.getParameters()) - .isEqualTo(Map.of("frequent-changing-table-parameter", "main-thread-put-xyz")); - - await(getPartitionsByNamesEnteredLatch, 10, SECONDS); // 4 - String partitionName = partitionNames.get(2); - Map newPartitionParameters = Map.of("frequent-changing-partition-parameter", "main-thread-put-alice"); - tablePartitionsByName.put(partitionName, - Partition.builder(tablePartitionsByName.get(partitionName)) - .setParameters(newPartitionParameters) - .build()); - if (invalidateAll) { - metastore.flushCache(); - } - else { - metastore.invalidateTable(databaseName, tableName); - } - getPartitionsByNamesReturnLatch.countDown(); // 5 - await(getPartitionsByNamesFinishedLatch, 10, SECONDS); // 6 - Map> loadedPartitions = metastore.getPartitionsByNames(table, partitionNames); - assertThat(loadedPartitions.get(partitionName)) - .isNotNull() - .isPresent() - .hasValueSatisfying(partition -> assertThat(partition.getParameters()).isEqualTo(newPartitionParameters)); - - // verify no failure in the background thread - future.get(10, SECONDS); - } - finally { - getTableEnteredLatch.countDown(); - getTableReturnLatch.countDown(); - getTableFinishedLatch.countDown(); - getPartitionsByNamesEnteredLatch.countDown(); - getPartitionsByNamesReturnLatch.countDown(); - getPartitionsByNamesFinishedLatch.countDown(); - - executor.shutdownNow(); - assertThat(executor.awaitTermination(10, SECONDS)).isTrue(); - } - } - @Test public void testDropTable() { @@ -1188,18 +1109,6 @@ private static HiveColumnStatistics intColumnStats(int nullsCount) return createIntegerColumnStatistics(OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(nullsCount), OptionalLong.empty()); } - private static void await(CountDownLatch latch, long timeout, TimeUnit unit) - { - try { - boolean awaited = latch.await(timeout, unit); - checkState(awaited, "wait timed out"); - } - catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RuntimeException(); - } - } - private PartitionCachingAssertions assertThatCachingWithDisabledPartitionCache() { return new PartitionCachingAssertions(executor); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/cache/TestCachingHiveMetastoreWithQueryRunner.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/cache/TestCachingHiveMetastoreWithQueryRunner.java index a5d520820803c..93b184573fd70 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/cache/TestCachingHiveMetastoreWithQueryRunner.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/cache/TestCachingHiveMetastoreWithQueryRunner.java @@ -16,33 +16,35 @@ import com.google.common.base.Joiner; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.inject.Key; import io.trino.Session; import io.trino.plugin.hive.HiveQueryRunner; -import io.trino.plugin.hive.metastore.file.FileHiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; +import io.trino.plugin.hive.metastore.RawHiveMetastoreFactory; import io.trino.spi.security.Identity; import io.trino.spi.security.SelectedRole; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.DistributedQueryRunner; import io.trino.testing.QueryRunner; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.parallel.Execution; -import java.nio.file.Path; import java.util.List; import java.util.Optional; import static com.google.common.base.Verify.verify; -import static com.google.common.collect.Lists.cartesianProduct; -import static com.google.common.io.MoreFiles.deleteRecursively; -import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; +import static io.trino.plugin.hive.TestingHiveUtils.getConnectorService; import static io.trino.spi.security.SelectedRole.Type.ROLE; import static io.trino.testing.TestingSession.testSessionBuilder; -import static java.nio.file.Files.createTempDirectory; import static java.util.Collections.nCopies; import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; -@Test(singleThreaded = true) +@TestInstance(PER_CLASS) +@Execution(SAME_THREAD) public class TestCachingHiveMetastoreWithQueryRunner extends AbstractTestQueryFramework { @@ -54,28 +56,27 @@ public class TestCachingHiveMetastoreWithQueryRunner private static final String ALICE_NAME = "alice"; private static final Session ALICE = getTestSession(new Identity.Builder(ALICE_NAME).build()); - private FileHiveMetastore fileHiveMetastore; + private HiveMetastore rawMetastore; @Override protected QueryRunner createQueryRunner() throws Exception { - Path temporaryMetastoreDirectory = createTempDirectory(null); - closeAfterClass(() -> deleteRecursively(temporaryMetastoreDirectory, ALLOW_INSECURE)); - DistributedQueryRunner queryRunner = HiveQueryRunner.builder(ADMIN) .setNodeCount(3) // Required by testPartitionAppend test. // Coordinator needs to be excluded from workers to deterministically reproduce the original problem // https://github.com/trinodb/trino/pull/6853 .setCoordinatorProperties(ImmutableMap.of("node-scheduler.include-coordinator", "false")) - .setMetastore(distributedQueryRunner -> fileHiveMetastore = createTestingFileHiveMetastore(temporaryMetastoreDirectory.toFile())) .setHiveProperties(ImmutableMap.of( "hive.security", "sql-standard", "hive.metastore-cache-ttl", "60m", "hive.metastore-refresh-interval", "10m")) .build(); + rawMetastore = getConnectorService(queryRunner, Key.get(HiveMetastoreFactory.class, RawHiveMetastoreFactory.class)) + .createMetastore(Optional.empty()); + queryRunner.execute(ADMIN, "CREATE SCHEMA " + SCHEMA); queryRunner.execute("CREATE TABLE test (test INT)"); @@ -103,8 +104,27 @@ public void testCacheRefreshOnGrantAndRevoke() .hasMessageContaining("Access Denied"); } - @Test(dataProvider = "testCacheRefreshOnRoleGrantAndRevokeParams") - public void testCacheRefreshOnRoleGrantAndRevoke(List grantRoleStatements, String revokeRoleStatement) + @Test + public void testCacheRefreshOnRoleGrantAndRevoke() + { + String grantSelectStatement = "GRANT SELECT ON test TO ROLE test_role"; + String grantRoleStatement = "GRANT test_role TO " + ALICE_NAME + " IN " + CATALOG; + List> grantRoleStatements = ImmutableList.of( + ImmutableList.of(grantSelectStatement, grantRoleStatement), + ImmutableList.of(grantRoleStatement, grantSelectStatement)); + List revokeRoleStatements = ImmutableList.of( + "DROP ROLE test_role IN " + CATALOG, + "REVOKE SELECT ON test FROM ROLE test_role", + "REVOKE test_role FROM " + ALICE_NAME + " IN " + CATALOG); + + for (String roleRevoke : revokeRoleStatements) { + for (List roleGrant : grantRoleStatements) { + testCacheRefreshOnRoleGrantAndRevoke(roleGrant, roleRevoke); + } + } + } + + private void testCacheRefreshOnRoleGrantAndRevoke(List grantRoleStatements, String revokeRoleStatement) { assertThatThrownBy(() -> getQueryRunner().execute(ALICE, "SELECT * FROM test")) .hasMessageContaining("Access Denied"); @@ -128,7 +148,7 @@ public void testFlushHiveMetastoreCacheProcedureCallable() getQueryRunner().execute("SELECT initial FROM cached"); // Rename column name in Metastore outside Trino - fileHiveMetastore.renameColumn("test", "cached", "initial", "renamed"); + rawMetastore.renameColumn("test", "cached", "initial", "renamed"); String renamedColumnQuery = "SELECT renamed FROM cached"; // Should fail as Trino has old metadata cached @@ -190,20 +210,4 @@ public void testPartitionAppend() String expected = Joiner.on(",").join(nCopies(nodeCount + 1, row)); assertQuery("SELECT * FROM test_part_append", "VALUES " + expected); } - - @DataProvider - public Object[][] testCacheRefreshOnRoleGrantAndRevokeParams() - { - String grantSelectStatement = "GRANT SELECT ON test TO ROLE test_role"; - String grantRoleStatement = "GRANT test_role TO " + ALICE_NAME + " IN " + CATALOG; - List> grantRoleStatements = ImmutableList.of( - ImmutableList.of(grantSelectStatement, grantRoleStatement), - ImmutableList.of(grantRoleStatement, grantSelectStatement)); - List revokeRoleStatements = ImmutableList.of( - "DROP ROLE test_role IN " + CATALOG, - "REVOKE SELECT ON test FROM ROLE test_role", - "REVOKE test_role FROM " + ALICE_NAME + " IN " + CATALOG); - return cartesianProduct(grantRoleStatements, revokeRoleStatements).stream() - .map(a -> a.toArray(Object[]::new)).toArray(Object[][]::new); - } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/file/TestingFileHiveMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/file/TestingFileHiveMetastore.java index e97982ff74840..9efac5c589845 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/file/TestingFileHiveMetastore.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/file/TestingFileHiveMetastore.java @@ -15,20 +15,22 @@ import io.trino.filesystem.Location; import io.trino.filesystem.TrinoFileSystemFactory; +import io.trino.filesystem.local.LocalFileSystemFactory; import io.trino.plugin.hive.NodeVersion; import io.trino.plugin.hive.metastore.HiveMetastoreConfig; import java.io.File; -import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_FACTORY; - public final class TestingFileHiveMetastore { private TestingFileHiveMetastore() {} public static FileHiveMetastore createTestingFileHiveMetastore(File catalogDirectory) { - return createTestingFileHiveMetastore(HDFS_FILE_SYSTEM_FACTORY, Location.of(catalogDirectory.toURI().toString())); + catalogDirectory.mkdirs(); + return createTestingFileHiveMetastore( + new LocalFileSystemFactory(catalogDirectory.toPath()), + Location.of("local:///")); } public static FileHiveMetastore createTestingFileHiveMetastore(TrinoFileSystemFactory fileSystemFactory, Location catalogDirectory) diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestGlueHiveMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestGlueHiveMetastore.java deleted file mode 100644 index 5a1586a6eb6a1..0000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestGlueHiveMetastore.java +++ /dev/null @@ -1,1605 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue; - -import com.amazonaws.services.glue.AWSGlueAsync; -import com.amazonaws.services.glue.AWSGlueAsyncClientBuilder; -import com.amazonaws.services.glue.model.CreateTableRequest; -import com.amazonaws.services.glue.model.Database; -import com.amazonaws.services.glue.model.DeleteDatabaseRequest; -import com.amazonaws.services.glue.model.DeleteTableRequest; -import com.amazonaws.services.glue.model.EntityNotFoundException; -import com.amazonaws.services.glue.model.GetDatabasesRequest; -import com.amazonaws.services.glue.model.GetDatabasesResult; -import com.amazonaws.services.glue.model.TableInput; -import com.amazonaws.services.glue.model.UpdateTableRequest; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import io.airlift.concurrent.BoundedExecutor; -import io.airlift.log.Logger; -import io.airlift.slice.Slice; -import io.trino.plugin.hive.AbstractTestHiveLocal; -import io.trino.plugin.hive.HiveBasicStatistics; -import io.trino.plugin.hive.HiveMetastoreClosure; -import io.trino.plugin.hive.HiveType; -import io.trino.plugin.hive.PartitionStatistics; -import io.trino.plugin.hive.metastore.HiveColumnStatistics; -import io.trino.plugin.hive.metastore.HiveMetastore; -import io.trino.plugin.hive.metastore.PartitionWithStatistics; -import io.trino.plugin.hive.metastore.Table; -import io.trino.plugin.hive.metastore.glue.converter.GlueInputConverter; -import io.trino.spi.TrinoException; -import io.trino.spi.block.Block; -import io.trino.spi.block.BlockBuilder; -import io.trino.spi.connector.ColumnMetadata; -import io.trino.spi.connector.ConnectorMetadata; -import io.trino.spi.connector.ConnectorOutputTableHandle; -import io.trino.spi.connector.ConnectorPageSink; -import io.trino.spi.connector.ConnectorSession; -import io.trino.spi.connector.ConnectorTableMetadata; -import io.trino.spi.connector.SchemaTableName; -import io.trino.spi.connector.TableNotFoundException; -import io.trino.spi.predicate.Domain; -import io.trino.spi.predicate.Range; -import io.trino.spi.predicate.TupleDomain; -import io.trino.spi.predicate.ValueSet; -import io.trino.spi.statistics.ComputedStatistics; -import io.trino.spi.statistics.TableStatisticType; -import io.trino.spi.type.BigintType; -import io.trino.spi.type.DateType; -import io.trino.spi.type.IntegerType; -import io.trino.spi.type.SmallintType; -import io.trino.spi.type.TimestampType; -import io.trino.spi.type.TinyintType; -import io.trino.spi.type.VarcharType; -import io.trino.testing.MaterializedResult; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - -import java.io.File; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.OptionalLong; -import java.util.Set; -import java.util.concurrent.Executor; -import java.util.function.Supplier; - -import static com.google.common.collect.ImmutableList.toImmutableList; -import static io.airlift.concurrent.MoreFutures.getFutureValue; -import static io.airlift.slice.Slices.utf8Slice; -import static io.trino.plugin.hive.HiveBasicStatistics.createEmptyStatistics; -import static io.trino.plugin.hive.HiveColumnStatisticType.MAX_VALUE; -import static io.trino.plugin.hive.HiveColumnStatisticType.MIN_VALUE; -import static io.trino.plugin.hive.HiveColumnStatisticType.NUMBER_OF_DISTINCT_VALUES; -import static io.trino.plugin.hive.HiveColumnStatisticType.NUMBER_OF_NON_NULL_VALUES; -import static io.trino.plugin.hive.HiveMetadata.TABLE_COMMENT; -import static io.trino.plugin.hive.HiveStorageFormat.ORC; -import static io.trino.plugin.hive.HiveStorageFormat.TEXTFILE; -import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_FACTORY; -import static io.trino.plugin.hive.TableType.EXTERNAL_TABLE; -import static io.trino.plugin.hive.TableType.VIRTUAL_VIEW; -import static io.trino.plugin.hive.ViewReaderUtil.ICEBERG_MATERIALIZED_VIEW_COMMENT; -import static io.trino.plugin.hive.ViewReaderUtil.PRESTO_VIEW_FLAG; -import static io.trino.plugin.hive.ViewReaderUtil.isTrinoMaterializedView; -import static io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION; -import static io.trino.plugin.hive.metastore.HiveColumnStatistics.createIntegerColumnStatistics; -import static io.trino.plugin.hive.metastore.glue.AwsSdkUtil.getPaginatedResults; -import static io.trino.plugin.hive.metastore.glue.PartitionFilterBuilder.DECIMAL_TYPE; -import static io.trino.plugin.hive.metastore.glue.PartitionFilterBuilder.decimalOf; -import static io.trino.plugin.hive.metastore.glue.TestingGlueHiveMetastore.createTestingAsyncGlueClient; -import static io.trino.plugin.hive.util.HiveUtil.DELTA_LAKE_PROVIDER; -import static io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_NAME; -import static io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_VALUE; -import static io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY; -import static io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable; -import static io.trino.plugin.hive.util.HiveUtil.isIcebergTable; -import static io.trino.spi.connector.RetryMode.NO_RETRIES; -import static io.trino.spi.type.BigintType.BIGINT; -import static io.trino.spi.type.VarcharType.VARCHAR; -import static io.trino.spi.type.VarcharType.createUnboundedVarcharType; -import static io.trino.testing.TestingPageSinkId.TESTING_PAGE_SINK_ID; -import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER; -import static java.lang.String.format; -import static java.lang.System.currentTimeMillis; -import static java.util.Collections.unmodifiableList; -import static java.util.Locale.ENGLISH; -import static java.util.Objects.requireNonNull; -import static java.util.UUID.randomUUID; -import static java.util.concurrent.TimeUnit.DAYS; -import static org.apache.hadoop.hive.common.FileUtils.makePartName; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.junit.jupiter.api.Assumptions.abort; - -/* - * GlueHiveMetastore currently uses AWS Default Credential Provider Chain, - * See https://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html#credentials-default - * on ways to set your AWS credentials which will be needed to run this test. - */ -public class TestGlueHiveMetastore - extends AbstractTestHiveLocal -{ - private static final Logger log = Logger.get(TestGlueHiveMetastore.class); - - private static final String PARTITION_KEY = "part_key_1"; - private static final String PARTITION_KEY2 = "part_key_2"; - private static final String TEST_DATABASE_NAME_PREFIX = "test_glue"; - - private static final List CREATE_TABLE_COLUMNS = ImmutableList.of(new ColumnMetadata("id", BIGINT)); - private static final List CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR = ImmutableList.builder() - .addAll(CREATE_TABLE_COLUMNS) - .add(new ColumnMetadata(PARTITION_KEY, VarcharType.VARCHAR)) - .build(); - private static final List CREATE_TABLE_COLUMNS_PARTITIONED_TWO_KEYS = ImmutableList.builder() - .addAll(CREATE_TABLE_COLUMNS) - .add(new ColumnMetadata(PARTITION_KEY, VarcharType.VARCHAR)) - .add(new ColumnMetadata(PARTITION_KEY2, BigintType.BIGINT)) - .build(); - private static final List CREATE_TABLE_COLUMNS_PARTITIONED_TINYINT = ImmutableList.builder() - .addAll(CREATE_TABLE_COLUMNS) - .add(new ColumnMetadata(PARTITION_KEY, TinyintType.TINYINT)) - .build(); - private static final List CREATE_TABLE_COLUMNS_PARTITIONED_SMALLINT = ImmutableList.builder() - .addAll(CREATE_TABLE_COLUMNS) - .add(new ColumnMetadata(PARTITION_KEY, SmallintType.SMALLINT)) - .build(); - private static final List CREATE_TABLE_COLUMNS_PARTITIONED_INTEGER = ImmutableList.builder() - .addAll(CREATE_TABLE_COLUMNS) - .add(new ColumnMetadata(PARTITION_KEY, IntegerType.INTEGER)) - .build(); - private static final List CREATE_TABLE_COLUMNS_PARTITIONED_BIGINT = ImmutableList.builder() - .addAll(CREATE_TABLE_COLUMNS) - .add(new ColumnMetadata(PARTITION_KEY, BigintType.BIGINT)) - .build(); - private static final List CREATE_TABLE_COLUMNS_PARTITIONED_DECIMAL = ImmutableList.builder() - .addAll(CREATE_TABLE_COLUMNS) - .add(new ColumnMetadata(PARTITION_KEY, DECIMAL_TYPE)) - .build(); - private static final List CREATE_TABLE_COLUMNS_PARTITIONED_DATE = ImmutableList.builder() - .addAll(CREATE_TABLE_COLUMNS) - .add(new ColumnMetadata(PARTITION_KEY, DateType.DATE)) - .build(); - private static final List CREATE_TABLE_COLUMNS_PARTITIONED_TIMESTAMP = ImmutableList.builder() - .addAll(CREATE_TABLE_COLUMNS) - .add(new ColumnMetadata(PARTITION_KEY, TimestampType.TIMESTAMP_MILLIS)) - .build(); - private static final List VARCHAR_PARTITION_VALUES = ImmutableList.of("2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01"); - - protected static final HiveBasicStatistics HIVE_BASIC_STATISTICS = new HiveBasicStatistics(1000, 5000, 3000, 4000); - protected static final HiveColumnStatistics INTEGER_COLUMN_STATISTICS = createIntegerColumnStatistics( - OptionalLong.of(-1000), - OptionalLong.of(1000), - OptionalLong.of(1), - OptionalLong.of(2)); - - private HiveMetastoreClosure metastore; - private AWSGlueAsync glueClient; - - public TestGlueHiveMetastore() - { - super(TEST_DATABASE_NAME_PREFIX + randomUUID().toString().toLowerCase(ENGLISH).replace("-", "")); - } - - protected AWSGlueAsync getGlueClient() - { - return glueClient; - } - - @BeforeAll - @Override - public void initialize() - throws Exception - { - super.initialize(); - // uncomment to get extra AWS debug information -// Logging logging = Logging.initialize(); -// logging.setLevel("com.amazonaws.request", Level.DEBUG); - - metastore = new HiveMetastoreClosure(metastoreClient, TESTING_TYPE_MANAGER, false); - glueClient = AWSGlueAsyncClientBuilder.defaultClient(); - } - - @Override - protected HiveMetastore createMetastore(File tempDir) - { - GlueHiveMetastoreConfig glueConfig = new GlueHiveMetastoreConfig(); - glueConfig.setDefaultWarehouseDir(tempDir.toURI().toString()); - glueConfig.setAssumeCanonicalPartitionKeys(true); - - Executor executor = new BoundedExecutor(this.executor, 10); - GlueMetastoreStats stats = new GlueMetastoreStats(); - return new GlueHiveMetastore( - HDFS_FILE_SYSTEM_FACTORY, - glueConfig, - executor, - new DefaultGlueColumnStatisticsProviderFactory(executor, executor), - createTestingAsyncGlueClient(glueConfig, stats), - stats, - new DefaultGlueMetastoreTableFilterProvider(true).get()); - } - - @Test - public void cleanupOrphanedDatabases() - { - long creationTimeMillisThreshold = currentTimeMillis() - DAYS.toMillis(1); - GlueHiveMetastore metastore = (GlueHiveMetastore) getMetastoreClient(); - GlueMetastoreStats stats = metastore.getStats(); - List orphanedDatabases = getPaginatedResults( - glueClient::getDatabases, - new GetDatabasesRequest(), - GetDatabasesRequest::setNextToken, - GetDatabasesResult::getNextToken, - stats.getGetDatabases()) - .map(GetDatabasesResult::getDatabaseList) - .flatMap(List::stream) - .filter(database -> database.getName().startsWith(TEST_DATABASE_NAME_PREFIX) && - database.getCreateTime().getTime() <= creationTimeMillisThreshold) - .map(Database::getName) - .collect(toImmutableList()); - - log.info("Found %s %s* databases that look orphaned, removing", orphanedDatabases.size(), TEST_DATABASE_NAME_PREFIX); - orphanedDatabases.forEach(database -> { - try { - glueClient.deleteDatabase(new DeleteDatabaseRequest() - .withName(database)); - } - catch (EntityNotFoundException e) { - log.info("Database [%s] not found, could be removed by other cleanup process", database); - } - catch (RuntimeException e) { - log.warn(e, "Failed to remove database [%s]", database); - } - }); - } - - @Test - @Override - public void testRenameTable() - { - // rename table is not yet supported by Glue - } - - @Test - @Override - public void testUpdateTableColumnStatisticsEmptyOptionalFields() - { - // this test expects consistency between written and read stats but this is not provided by glue at the moment - // when writing empty min/max statistics glue will return 0 to the readers - // in order to avoid incorrect data we skip writes for statistics with min/max = null - } - - @Test - @Override - public void testUpdatePartitionColumnStatisticsEmptyOptionalFields() - { - // this test expects consistency between written and read stats but this is not provided by glue at the moment - // when writing empty min/max statistics glue will return 0 to the readers - // in order to avoid incorrect data we skip writes for statistics with min/max = null - } - - @Test - @Override - public void testUpdateBasicPartitionStatistics() - throws Exception - { - SchemaTableName tableName = temporaryTable("update_basic_partition_statistics"); - try { - createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); - testUpdatePartitionStatistics( - tableName, - EMPTY_ROWCOUNT_STATISTICS, - ImmutableList.of(BASIC_STATISTICS_1, BASIC_STATISTICS_2), - ImmutableList.of(BASIC_STATISTICS_2, BASIC_STATISTICS_1)); - } - finally { - dropTable(tableName); - } - } - - @Test - @Override - public void testUpdatePartitionColumnStatistics() - throws Exception - { - SchemaTableName tableName = temporaryTable("update_partition_column_statistics"); - try { - createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); - // When the table has partitions, but row count statistics are set to zero, we treat this case as empty - // statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are - // used to ingest data into partitioned hive tables. - testUpdatePartitionStatistics( - tableName, - EMPTY_ROWCOUNT_STATISTICS, - ImmutableList.of(STATISTICS_1_1, STATISTICS_1_2, STATISTICS_2), - ImmutableList.of(STATISTICS_1_2, STATISTICS_1_1, STATISTICS_2)); - } - finally { - dropTable(tableName); - } - } - - @Test - @Override - public void testStorePartitionWithStatistics() - throws Exception - { - // When the table has partitions, but row count statistics are set to zero, we treat this case as empty - // statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are - // used to ingest data into partitioned hive tables. - testStorePartitionWithStatistics(STATISTICS_PARTITIONED_TABLE_COLUMNS, BASIC_STATISTICS_1, BASIC_STATISTICS_2, BASIC_STATISTICS_1, EMPTY_ROWCOUNT_STATISTICS); - } - - @Test - @Override - public void testGetPartitions() - throws Exception - { - try { - SchemaTableName tableName = temporaryTable("get_partitions"); - createDummyPartitionedTable(tableName, CREATE_TABLE_COLUMNS_PARTITIONED); - HiveMetastore metastoreClient = getMetastoreClient(); - Optional> partitionNames = metastoreClient.getPartitionNamesByFilter( - tableName.getSchemaName(), - tableName.getTableName(), - ImmutableList.of("ds"), TupleDomain.all()); - assertThat(partitionNames.isPresent()).isTrue(); - assertThat(partitionNames.get()).isEqualTo(ImmutableList.of("ds=2016-01-01", "ds=2016-01-02")); - } - finally { - dropTable(tablePartitionFormat); - } - } - - @Test - public void testGetPartitionsWithFilterUsingReservedKeywordsAsColumnName() - throws Exception - { - SchemaTableName tableName = temporaryTable("get_partitions_with_filter_using_reserved_keyword_column_name"); - try { - String reservedKeywordPartitionColumnName = "key"; - String regularColumnPartitionName = "int_partition"; - List columns = ImmutableList.builder() - .add(new ColumnMetadata("t_string", createUnboundedVarcharType())) - .add(new ColumnMetadata(reservedKeywordPartitionColumnName, createUnboundedVarcharType())) - .add(new ColumnMetadata(regularColumnPartitionName, BIGINT)) - .build(); - List partitionedBy = ImmutableList.of(reservedKeywordPartitionColumnName, regularColumnPartitionName); - - doCreateEmptyTable(tableName, ORC, columns, partitionedBy); - - HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient(), TESTING_TYPE_MANAGER, false); - Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new TableNotFoundException(tableName)); - - String partitionName1 = makePartName(ImmutableList.of(reservedKeywordPartitionColumnName, regularColumnPartitionName), ImmutableList.of("value1", "1")); - String partitionName2 = makePartName(ImmutableList.of(reservedKeywordPartitionColumnName, regularColumnPartitionName), ImmutableList.of("value2", "2")); - - List partitions = ImmutableList.of(partitionName1, partitionName2) - .stream() - .map(partitionName -> new PartitionWithStatistics(createDummyPartition(table, partitionName), partitionName, PartitionStatistics.empty())) - .collect(toImmutableList()); - metastoreClient.addPartitions(tableName.getSchemaName(), tableName.getTableName(), partitions); - metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), partitionName1, currentStatistics -> ZERO_TABLE_STATISTICS); - metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), partitionName2, currentStatistics -> ZERO_TABLE_STATISTICS); - - Optional> partitionNames = metastoreClient.getPartitionNamesByFilter( - tableName.getSchemaName(), - tableName.getTableName(), - ImmutableList.of(reservedKeywordPartitionColumnName, regularColumnPartitionName), - TupleDomain.withColumnDomains(ImmutableMap.of(regularColumnPartitionName, Domain.singleValue(BIGINT, 2L)))); - assertThat(partitionNames.isPresent()).isTrue(); - assertThat(partitionNames.get()).isEqualTo(ImmutableList.of("key=value2/int_partition=2")); - - // KEY is a reserved keyword in the grammar of the SQL parser used internally by Glue API - // and therefore should not be used in the partition filter - partitionNames = metastoreClient.getPartitionNamesByFilter( - tableName.getSchemaName(), - tableName.getTableName(), - ImmutableList.of(reservedKeywordPartitionColumnName, regularColumnPartitionName), - TupleDomain.withColumnDomains(ImmutableMap.of(reservedKeywordPartitionColumnName, Domain.singleValue(VARCHAR, utf8Slice("value1"))))); - assertThat(partitionNames.isPresent()).isTrue(); - assertThat(partitionNames.get()).isEqualTo(ImmutableList.of("key=value1/int_partition=1", "key=value2/int_partition=2")); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testGetDatabasesLogsStats() - { - GlueHiveMetastore metastore = (GlueHiveMetastore) getMetastoreClient(); - GlueMetastoreStats stats = metastore.getStats(); - double initialCallCount = stats.getGetDatabases().getTime().getAllTime().getCount(); - long initialFailureCount = stats.getGetDatabases().getTotalFailures().getTotalCount(); - getMetastoreClient().getAllDatabases(); - assertThat(stats.getGetDatabases().getTime().getAllTime().getCount()).isGreaterThan(initialCallCount); - assertThat(stats.getGetDatabases().getTime().getAllTime().getAvg()).isGreaterThan(0.0); - assertThat(stats.getGetDatabases().getTotalFailures().getTotalCount()).isEqualTo(initialFailureCount); - } - - @Test - public void testGetDatabaseFailureLogsStats() - { - GlueHiveMetastore metastore = (GlueHiveMetastore) getMetastoreClient(); - GlueMetastoreStats stats = metastore.getStats(); - long initialFailureCount = stats.getGetDatabase().getTotalFailures().getTotalCount(); - assertThatThrownBy(() -> getMetastoreClient().getDatabase(null)) - .isInstanceOf(TrinoException.class) - .hasMessageStartingWith("Database name cannot be equal to null or empty"); - assertThat(stats.getGetDatabase().getTotalFailures().getTotalCount()).isEqualTo(initialFailureCount + 1); - } - - @Test - public void testGetPartitionsFilterVarChar() - throws Exception - { - TupleDomain singleEquals = new PartitionFilterBuilder() - .addStringValues(PARTITION_KEY, "2020-01-01") - .build(); - TupleDomain greaterThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThan(VarcharType.VARCHAR, utf8Slice("2020-02-01"))) - .build(); - TupleDomain betweenInclusive = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.range(VarcharType.VARCHAR, utf8Slice("2020-02-01"), true, utf8Slice("2020-03-01"), true)) - .build(); - TupleDomain greaterThanOrEquals = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThanOrEqual(VarcharType.VARCHAR, utf8Slice("2020-03-01"))) - .build(); - TupleDomain inClause = new PartitionFilterBuilder() - .addStringValues(PARTITION_KEY, "2020-01-01", "2020-02-01") - .build(); - TupleDomain lessThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.lessThan(VarcharType.VARCHAR, utf8Slice("2020-03-01"))) - .build(); - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR, - PARTITION_KEY, - VARCHAR_PARTITION_VALUES, - ImmutableList.of(singleEquals, greaterThan, betweenInclusive, greaterThanOrEquals, inClause, lessThan, TupleDomain.all()), - ImmutableList.of( - ImmutableList.of("2020-01-01"), - ImmutableList.of("2020-03-01", "2020-04-01"), - ImmutableList.of("2020-02-01", "2020-03-01"), - ImmutableList.of("2020-03-01", "2020-04-01"), - ImmutableList.of("2020-01-01", "2020-02-01"), - ImmutableList.of("2020-01-01", "2020-02-01"), - ImmutableList.of("2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01"))); - } - - @Test - public void testGetPartitionsFilterBigInt() - throws Exception - { - TupleDomain singleEquals = new PartitionFilterBuilder() - .addBigintValues(PARTITION_KEY, 1000L) - .build(); - TupleDomain greaterThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThan(BigintType.BIGINT, 100L)) - .build(); - TupleDomain betweenInclusive = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.range(BigintType.BIGINT, 100L, true, 1000L, true)) - .build(); - TupleDomain greaterThanOrEquals = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThanOrEqual(BigintType.BIGINT, 100L)) - .build(); - TupleDomain inClause = new PartitionFilterBuilder() - .addBigintValues(PARTITION_KEY, 1L, 1000000L) - .build(); - TupleDomain lessThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.lessThan(BigintType.BIGINT, 1000L)) - .build(); - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_BIGINT, - PARTITION_KEY, - ImmutableList.of("1", "100", "1000", "1000000"), - ImmutableList.of(singleEquals, greaterThan, betweenInclusive, greaterThanOrEquals, inClause, lessThan, TupleDomain.all()), - ImmutableList.of( - ImmutableList.of("1000"), - ImmutableList.of("1000", "1000000"), - ImmutableList.of("100", "1000"), - ImmutableList.of("100", "1000", "1000000"), - ImmutableList.of("1", "1000000"), - ImmutableList.of("1", "100"), - ImmutableList.of("1", "100", "1000", "1000000"))); - } - - @Test - public void testGetPartitionsFilterInteger() - throws Exception - { - TupleDomain singleEquals = new PartitionFilterBuilder() - .addIntegerValues(PARTITION_KEY, 1000L) - .build(); - TupleDomain greaterThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThan(IntegerType.INTEGER, 100L)) - .build(); - TupleDomain betweenInclusive = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.range(IntegerType.INTEGER, 100L, true, 1000L, true)) - .build(); - TupleDomain greaterThanOrEquals = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThanOrEqual(IntegerType.INTEGER, 100L)) - .build(); - TupleDomain inClause = new PartitionFilterBuilder() - .addIntegerValues(PARTITION_KEY, 1L, 1000000L) - .build(); - TupleDomain lessThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.lessThan(IntegerType.INTEGER, 1000L)) - .build(); - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_INTEGER, - PARTITION_KEY, - ImmutableList.of("1", "100", "1000", "1000000"), - ImmutableList.of(singleEquals, greaterThan, betweenInclusive, greaterThanOrEquals, inClause, lessThan, TupleDomain.all()), - ImmutableList.of( - ImmutableList.of("1000"), - ImmutableList.of("1000", "1000000"), - ImmutableList.of("100", "1000"), - ImmutableList.of("100", "1000", "1000000"), - ImmutableList.of("1", "1000000"), - ImmutableList.of("1", "100"), - ImmutableList.of("1", "100", "1000", "1000000"))); - } - - @Test - public void testGetPartitionsFilterSmallInt() - throws Exception - { - TupleDomain singleEquals = new PartitionFilterBuilder() - .addSmallintValues(PARTITION_KEY, 1000L) - .build(); - TupleDomain greaterThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThan(SmallintType.SMALLINT, 100L)) - .build(); - TupleDomain betweenInclusive = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.range(SmallintType.SMALLINT, 100L, true, 1000L, true)) - .build(); - TupleDomain greaterThanOrEquals = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThanOrEqual(SmallintType.SMALLINT, 100L)) - .build(); - TupleDomain inClause = new PartitionFilterBuilder() - .addSmallintValues(PARTITION_KEY, 1L, 10000L) - .build(); - TupleDomain lessThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.lessThan(SmallintType.SMALLINT, 1000L)) - .build(); - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_SMALLINT, - PARTITION_KEY, - ImmutableList.of("1", "100", "1000", "10000"), - ImmutableList.of(singleEquals, greaterThan, betweenInclusive, greaterThanOrEquals, inClause, lessThan, TupleDomain.all()), - ImmutableList.of( - ImmutableList.of("1000"), - ImmutableList.of("1000", "10000"), - ImmutableList.of("100", "1000"), - ImmutableList.of("100", "1000", "10000"), - ImmutableList.of("1", "10000"), - ImmutableList.of("1", "100"), - ImmutableList.of("1", "100", "1000", "10000"))); - } - - @Test - public void testGetPartitionsFilterTinyInt() - throws Exception - { - TupleDomain singleEquals = new PartitionFilterBuilder() - .addTinyintValues(PARTITION_KEY, 127L) - .build(); - TupleDomain greaterThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThan(TinyintType.TINYINT, 10L)) - .build(); - TupleDomain betweenInclusive = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.range(TinyintType.TINYINT, 10L, true, 100L, true)) - .build(); - TupleDomain greaterThanOrEquals = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThanOrEqual(TinyintType.TINYINT, 10L)) - .build(); - TupleDomain inClause = new PartitionFilterBuilder() - .addTinyintValues(PARTITION_KEY, 1L, 127L) - .build(); - TupleDomain lessThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.lessThan(TinyintType.TINYINT, 100L)) - .build(); - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_TINYINT, - PARTITION_KEY, - ImmutableList.of("1", "10", "100", "127"), - ImmutableList.of(singleEquals, greaterThan, betweenInclusive, greaterThanOrEquals, inClause, lessThan, TupleDomain.all()), - ImmutableList.of( - ImmutableList.of("127"), - ImmutableList.of("100", "127"), - ImmutableList.of("10", "100"), - ImmutableList.of("10", "100", "127"), - ImmutableList.of("1", "127"), - ImmutableList.of("1", "10"), - ImmutableList.of("1", "10", "100", "127"))); - } - - @Test - public void testGetPartitionsFilterTinyIntNegatives() - throws Exception - { - TupleDomain singleEquals = new PartitionFilterBuilder() - .addTinyintValues(PARTITION_KEY, -128L) - .build(); - TupleDomain greaterThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThan(TinyintType.TINYINT, 0L)) - .build(); - TupleDomain betweenInclusive = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.range(TinyintType.TINYINT, 0L, true, 50L, true)) - .build(); - TupleDomain greaterThanOrEquals = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThanOrEqual(TinyintType.TINYINT, 0L)) - .build(); - TupleDomain inClause = new PartitionFilterBuilder() - .addTinyintValues(PARTITION_KEY, 0L, -128L) - .build(); - TupleDomain lessThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.lessThan(TinyintType.TINYINT, 0L)) - .build(); - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_TINYINT, - PARTITION_KEY, - ImmutableList.of("-128", "0", "50", "100"), - ImmutableList.of(singleEquals, greaterThan, betweenInclusive, greaterThanOrEquals, inClause, lessThan, TupleDomain.all()), - ImmutableList.of( - ImmutableList.of("-128"), - ImmutableList.of("100", "50"), - ImmutableList.of("0", "50"), - ImmutableList.of("0", "100", "50"), - ImmutableList.of("-128", "0"), - ImmutableList.of("-128"), - ImmutableList.of("-128", "0", "100", "50"))); - } - - @Test - public void testGetPartitionsFilterDecimal() - throws Exception - { - String value1 = "1.000"; - String value2 = "10.134"; - String value3 = "25.111"; - String value4 = "30.333"; - - TupleDomain singleEquals = new PartitionFilterBuilder() - .addDecimalValues(PARTITION_KEY, value1) - .build(); - TupleDomain greaterThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThan(DECIMAL_TYPE, decimalOf(value2))) - .build(); - TupleDomain betweenInclusive = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.range(DECIMAL_TYPE, decimalOf(value2), true, decimalOf(value3), true)) - .build(); - TupleDomain greaterThanOrEquals = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThanOrEqual(DECIMAL_TYPE, decimalOf(value3))) - .build(); - TupleDomain inClause = new PartitionFilterBuilder() - .addDecimalValues(PARTITION_KEY, value1, value4) - .build(); - TupleDomain lessThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.lessThan(DECIMAL_TYPE, decimalOf("25.5"))) - .build(); - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_DECIMAL, - PARTITION_KEY, - ImmutableList.of(value1, value2, value3, value4), - ImmutableList.of(singleEquals, greaterThan, betweenInclusive, greaterThanOrEquals, inClause, lessThan, TupleDomain.all()), - ImmutableList.of( - ImmutableList.of(value1), - ImmutableList.of(value3, value4), - ImmutableList.of(value2, value3), - ImmutableList.of(value3, value4), - ImmutableList.of(value1, value4), - ImmutableList.of(value1, value2, value3), - ImmutableList.of(value1, value2, value3, value4))); - } - - // we don't presently know how to properly convert a Date type into a string that is compatible with Glue. - @Test - public void testGetPartitionsFilterDate() - throws Exception - { - TupleDomain singleEquals = new PartitionFilterBuilder() - .addDateValues(PARTITION_KEY, 18000L) - .build(); - TupleDomain greaterThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThan(DateType.DATE, 19000L)) - .build(); - TupleDomain betweenInclusive = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.range(DateType.DATE, 19000L, true, 20000L, true)) - .build(); - TupleDomain greaterThanOrEquals = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThanOrEqual(DateType.DATE, 19000L)) - .build(); - TupleDomain inClause = new PartitionFilterBuilder() - .addDateValues(PARTITION_KEY, 18000L, 21000L) - .build(); - TupleDomain lessThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.lessThan(DateType.DATE, 20000L)) - .build(); - // we are unable to convert Date to a string format that Glue will accept, so it should translate to the wildcard in all cases. Commented out results are - // what we expect if we are able to do a proper conversion - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_DATE, - PARTITION_KEY, - ImmutableList.of("18000", "19000", "20000", "21000"), - ImmutableList.of( - singleEquals, greaterThan, betweenInclusive, greaterThanOrEquals, inClause, lessThan, TupleDomain.all()), - ImmutableList.of( -// ImmutableList.of("18000"), -// ImmutableList.of("20000", "21000"), -// ImmutableList.of("19000", "20000"), -// ImmutableList.of("19000", "20000", "21000"), -// ImmutableList.of("18000", "21000"), -// ImmutableList.of("18000", "19000"), - ImmutableList.of("18000", "19000", "20000", "21000"), - ImmutableList.of("18000", "19000", "20000", "21000"), - ImmutableList.of("18000", "19000", "20000", "21000"), - ImmutableList.of("18000", "19000", "20000", "21000"), - ImmutableList.of("18000", "19000", "20000", "21000"), - ImmutableList.of("18000", "19000", "20000", "21000"), - ImmutableList.of("18000", "19000", "20000", "21000"))); - } - - @Test - public void testGetPartitionsFilterTwoPartitionKeys() - throws Exception - { - TupleDomain equalsFilter = new PartitionFilterBuilder() - .addStringValues(PARTITION_KEY, "2020-03-01") - .addBigintValues(PARTITION_KEY2, 300L) - .build(); - TupleDomain rangeFilter = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThanOrEqual(VarcharType.VARCHAR, utf8Slice("2020-02-01"))) - .addRanges(PARTITION_KEY2, Range.greaterThan(BigintType.BIGINT, 200L)) - .build(); - - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_TWO_KEYS, - ImmutableList.of(PARTITION_KEY, PARTITION_KEY2), - ImmutableList.of( - PartitionValues.make("2020-01-01", "100"), - PartitionValues.make("2020-02-01", "200"), - PartitionValues.make("2020-03-01", "300"), - PartitionValues.make("2020-04-01", "400")), - ImmutableList.of(equalsFilter, rangeFilter, TupleDomain.all()), - ImmutableList.of( - ImmutableList.of(PartitionValues.make("2020-03-01", "300")), - ImmutableList.of( - PartitionValues.make("2020-03-01", "300"), - PartitionValues.make("2020-04-01", "400")), - ImmutableList.of( - PartitionValues.make("2020-01-01", "100"), - PartitionValues.make("2020-02-01", "200"), - PartitionValues.make("2020-03-01", "300"), - PartitionValues.make("2020-04-01", "400")))); - } - - @Test - public void testGetPartitionsFilterMaxLengthWildcard() - throws Exception - { - // this filter string will exceed the 2048 char limit set by glue, and we expect the filter to revert to the wildcard - TupleDomain filter = new PartitionFilterBuilder() - .addStringValues(PARTITION_KEY, "x".repeat(2048)) - .build(); - - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR, - PARTITION_KEY, - VARCHAR_PARTITION_VALUES, - ImmutableList.of(filter), - ImmutableList.of( - ImmutableList.of("2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01"))); - } - - @Test - public void testGetPartitionsFilterTwoPartitionKeysPartialQuery() - throws Exception - { - // we expect the second constraint to still be present and provide filtering - TupleDomain equalsFilter = new PartitionFilterBuilder() - .addStringValues(PARTITION_KEY, "x".repeat(2048)) - .addBigintValues(PARTITION_KEY2, 300L) - .build(); - - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_TWO_KEYS, - ImmutableList.of(PARTITION_KEY, PARTITION_KEY2), - ImmutableList.of( - PartitionValues.make("2020-01-01", "100"), - PartitionValues.make("2020-02-01", "200"), - PartitionValues.make("2020-03-01", "300"), - PartitionValues.make("2020-04-01", "400")), - ImmutableList.of(equalsFilter), - ImmutableList.of(ImmutableList.of(PartitionValues.make("2020-03-01", "300")))); - } - - @Test - public void testGetPartitionsFilterNone() - throws Exception - { - // test both a global none and that with a single column none, and a valid domain with none() - TupleDomain noneFilter = new PartitionFilterBuilder() - .addDomain(PARTITION_KEY, Domain.none(VarcharType.VARCHAR)) - .build(); - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR, - PARTITION_KEY, - VARCHAR_PARTITION_VALUES, - ImmutableList.of(TupleDomain.none(), noneFilter), - ImmutableList.of(ImmutableList.of(), ImmutableList.of())); - } - - @Test - public void testGetPartitionsFilterNotNull() - throws Exception - { - TupleDomain notNullFilter = new PartitionFilterBuilder() - .addDomain(PARTITION_KEY, Domain.notNull(VarcharType.VARCHAR)) - .build(); - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR, - PARTITION_KEY, - VARCHAR_PARTITION_VALUES, - ImmutableList.of(notNullFilter), - ImmutableList.of(ImmutableList.of("2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01"))); - } - - @Test - public void testGetPartitionsFilterIsNull() - throws Exception - { - TupleDomain isNullFilter = new PartitionFilterBuilder() - .addDomain(PARTITION_KEY, Domain.onlyNull(VarcharType.VARCHAR)) - .build(); - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR, - PARTITION_KEY, - VARCHAR_PARTITION_VALUES, - ImmutableList.of(isNullFilter), - ImmutableList.of(ImmutableList.of())); - } - - @Test - public void testGetPartitionsFilterIsNullWithValue() - throws Exception - { - List partitionList = new ArrayList<>(); - partitionList.add("100"); - partitionList.add(null); - - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR, - PARTITION_KEY, - partitionList, - ImmutableList.of(new PartitionFilterBuilder() - // IS NULL - .addDomain(PARTITION_KEY, Domain.onlyNull(VarcharType.VARCHAR)) - .build()), - ImmutableList.of(ImmutableList.of(GlueExpressionUtil.NULL_STRING))); - - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR, - PARTITION_KEY, - partitionList, - ImmutableList.of(new PartitionFilterBuilder() - // IS NULL or is a specific value - .addDomain(PARTITION_KEY, Domain.create(ValueSet.of(VARCHAR, utf8Slice("100")), true)) - .build()), - ImmutableList.of(ImmutableList.of("100", GlueExpressionUtil.NULL_STRING))); - } - - @Test - public void testGetPartitionsFilterEqualsOrIsNullWithValue() - throws Exception - { - TupleDomain equalsOrIsNullFilter = new PartitionFilterBuilder() - .addStringValues(PARTITION_KEY, "2020-03-01") - .addDomain(PARTITION_KEY, Domain.onlyNull(VarcharType.VARCHAR)) - .build(); - List partitionList = new ArrayList<>(); - partitionList.add("2020-01-01"); - partitionList.add("2020-02-01"); - partitionList.add("2020-03-01"); - partitionList.add(null); - - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR, - PARTITION_KEY, - partitionList, - ImmutableList.of(equalsOrIsNullFilter), - ImmutableList.of(ImmutableList.of("2020-03-01", GlueExpressionUtil.NULL_STRING))); - } - - @Test - public void testGetPartitionsFilterIsNotNull() - throws Exception - { - TupleDomain isNotNullFilter = new PartitionFilterBuilder() - .addDomain(PARTITION_KEY, Domain.notNull(VarcharType.VARCHAR)) - .build(); - List partitionList = new ArrayList<>(); - partitionList.add("100"); - partitionList.add(null); - - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR, - PARTITION_KEY, - partitionList, - ImmutableList.of(isNotNullFilter), - ImmutableList.of(ImmutableList.of("100"))); - } - - @Test - public void testGetPartitionsFilterUnsupported() - throws Exception - { - // Numeric types are unsupported for IS (NOT) NULL predicate pushdown - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_TINYINT, Domain.onlyNull(TinyintType.TINYINT), "127"); - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_SMALLINT, Domain.onlyNull(SmallintType.SMALLINT), "32767"); - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_INTEGER, Domain.onlyNull(IntegerType.INTEGER), "2147483647"); - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_BIGINT, Domain.onlyNull(BigintType.BIGINT), "9223372036854775807"); - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_DECIMAL, Domain.onlyNull(DECIMAL_TYPE), "12345.12345"); - - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_TINYINT, Domain.notNull(TinyintType.TINYINT), "127"); - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_SMALLINT, Domain.notNull(SmallintType.SMALLINT), "32767"); - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_INTEGER, Domain.notNull(IntegerType.INTEGER), "2147483647"); - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_BIGINT, Domain.notNull(BigintType.BIGINT), "9223372036854775807"); - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_DECIMAL, Domain.notNull(DECIMAL_TYPE), "12345.12345"); - - // Date and timestamp aren't numeric types, but the pushdown is unsupported because of GlueExpressionUtil.canConvertSqlTypeToStringForGlue - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_DATE, Domain.onlyNull(DateType.DATE), "2022-07-11"); - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_TIMESTAMP, Domain.onlyNull(TimestampType.TIMESTAMP_MILLIS), "2022-07-11 01:02:03.123"); - - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_DATE, Domain.notNull(DateType.DATE), "2022-07-11"); - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_TIMESTAMP, Domain.notNull(TimestampType.TIMESTAMP_MILLIS), "2022-07-11 01:02:03.123"); - } - - @Test - @Override - public void testPartitionSchemaMismatch() - { - abort("tests using existing tables are not supported"); - } - - private void testGetPartitionsFilterUnsupported(List columnMetadata, Domain domain, String partitionValue) - throws Exception - { - TupleDomain isNullFilter = new PartitionFilterBuilder() - .addDomain(PARTITION_KEY, domain) - .build(); - List partitionList = new ArrayList<>(); - partitionList.add(partitionValue); - partitionList.add(null); - - doGetPartitionsFilterTest( - columnMetadata, - PARTITION_KEY, - partitionList, - ImmutableList.of(isNullFilter), - // Currently, we get NULL partition from Glue and filter it in our side because - // (column '__HIVE_DEFAULT_PARTITION__') on numeric types causes exception on Glue. e.g. 'input string: "__HIVE_D" is not an integer' - ImmutableList.of(ImmutableList.of(partitionValue, GlueExpressionUtil.NULL_STRING))); - } - - @Test - public void testGetPartitionsFilterEqualsAndIsNotNull() - throws Exception - { - TupleDomain equalsAndIsNotNullFilter = new PartitionFilterBuilder() - .addDomain(PARTITION_KEY, Domain.notNull(VarcharType.VARCHAR)) - .addBigintValues(PARTITION_KEY2, 300L) - .build(); - - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_TWO_KEYS, - ImmutableList.of(PARTITION_KEY, PARTITION_KEY2), - ImmutableList.of( - PartitionValues.make("2020-01-01", "100"), - PartitionValues.make("2020-02-01", "200"), - PartitionValues.make("2020-03-01", "300"), - PartitionValues.make(null, "300")), - ImmutableList.of(equalsAndIsNotNullFilter), - ImmutableList.of(ImmutableList.of(PartitionValues.make("2020-03-01", "300")))); - } - - @Test - public void testUpdateStatisticsOnCreate() - { - SchemaTableName tableName = temporaryTable("update_statistics_create"); - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - - List columns = ImmutableList.of(new ColumnMetadata("a_column", BigintType.BIGINT)); - ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(TEXTFILE)); - ConnectorOutputTableHandle createTableHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES); - - // write data - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, createTableHandle, TESTING_PAGE_SINK_ID); - MaterializedResult data = MaterializedResult.resultBuilder(session, BigintType.BIGINT) - .row(1L) - .row(2L) - .row(3L) - .row(4L) - .row(5L) - .build(); - sink.appendPage(data.toPage()); - Collection fragments = getFutureValue(sink.finish()); - - // prepare statistics - ComputedStatistics statistics = ComputedStatistics.builder(ImmutableList.of(), ImmutableList.of()) - .addTableStatistic(TableStatisticType.ROW_COUNT, singleValueBlock(5)) - .addColumnStatistic(MIN_VALUE.createColumnStatisticMetadata("a_column"), singleValueBlock(1)) - .addColumnStatistic(MAX_VALUE.createColumnStatisticMetadata("a_column"), singleValueBlock(1)) - .addColumnStatistic(NUMBER_OF_DISTINCT_VALUES.createColumnStatisticMetadata("a_column"), singleValueBlock(1)) - .addColumnStatistic(NUMBER_OF_NON_NULL_VALUES.createColumnStatisticMetadata("a_column"), singleValueBlock(1)) - .build(); - - // finish CTAS - metadata.finishCreateTable(session, createTableHandle, fragments, ImmutableList.of(statistics)); - transaction.commit(); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testUpdatePartitionedStatisticsOnCreate() - { - SchemaTableName tableName = temporaryTable("update_partitioned_statistics_create"); - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - - List columns = ImmutableList.of( - new ColumnMetadata("a_column", BigintType.BIGINT), - new ColumnMetadata("part_column", BigintType.BIGINT)); - - ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(TEXTFILE, ImmutableList.of("part_column"))); - ConnectorOutputTableHandle createTableHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES); - - // write data - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, createTableHandle, TESTING_PAGE_SINK_ID); - MaterializedResult data = MaterializedResult.resultBuilder(session, BigintType.BIGINT, BigintType.BIGINT) - .row(1L, 1L) - .row(2L, 1L) - .row(3L, 1L) - .row(4L, 2L) - .row(5L, 2L) - .build(); - sink.appendPage(data.toPage()); - Collection fragments = getFutureValue(sink.finish()); - - // prepare statistics - ComputedStatistics statistics1 = ComputedStatistics.builder(ImmutableList.of("part_column"), ImmutableList.of(singleValueBlock(1))) - .addTableStatistic(TableStatisticType.ROW_COUNT, singleValueBlock(3)) - .addColumnStatistic(MIN_VALUE.createColumnStatisticMetadata("a_column"), singleValueBlock(1)) - .addColumnStatistic(MAX_VALUE.createColumnStatisticMetadata("a_column"), singleValueBlock(1)) - .addColumnStatistic(NUMBER_OF_DISTINCT_VALUES.createColumnStatisticMetadata("a_column"), singleValueBlock(1)) - .addColumnStatistic(NUMBER_OF_NON_NULL_VALUES.createColumnStatisticMetadata("a_column"), singleValueBlock(1)) - .build(); - ComputedStatistics statistics2 = ComputedStatistics.builder(ImmutableList.of("part_column"), ImmutableList.of(singleValueBlock(2))) - .addTableStatistic(TableStatisticType.ROW_COUNT, singleValueBlock(2)) - .addColumnStatistic(MIN_VALUE.createColumnStatisticMetadata("a_column"), singleValueBlock(4)) - .addColumnStatistic(MAX_VALUE.createColumnStatisticMetadata("a_column"), singleValueBlock(4)) - .addColumnStatistic(NUMBER_OF_DISTINCT_VALUES.createColumnStatisticMetadata("a_column"), singleValueBlock(4)) - .addColumnStatistic(NUMBER_OF_NON_NULL_VALUES.createColumnStatisticMetadata("a_column"), singleValueBlock(4)) - .build(); - - // finish CTAS - metadata.finishCreateTable(session, createTableHandle, fragments, ImmutableList.of(statistics1, statistics2)); - transaction.commit(); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testStatisticsLargeNumberOfColumns() - throws Exception - { - SchemaTableName tableName = temporaryTable("test_statistics_large_number_of_columns"); - try { - ImmutableList.Builder columns = ImmutableList.builder(); - ImmutableMap.Builder columnStatistics = ImmutableMap.builder(); - for (int i = 1; i < 1500; ++i) { - String columnName = "t_bigint " + i + "_" + String.join("", Collections.nCopies(240, "x")); - columns.add(new ColumnMetadata(columnName, BIGINT)); - columnStatistics.put( - columnName, - createIntegerColumnStatistics( - OptionalLong.of(-1000 - i), - OptionalLong.of(1000 + i), - OptionalLong.of(i), - OptionalLong.of(2L * i))); - } - - PartitionStatistics partitionStatistics = PartitionStatistics.builder() - .setBasicStatistics(HIVE_BASIC_STATISTICS) - .setColumnStatistics(columnStatistics.buildOrThrow()).build(); - - doCreateEmptyTable(tableName, ORC, columns.build()); - testUpdateTableStatistics(tableName, ZERO_TABLE_STATISTICS, partitionStatistics); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testStatisticsLongColumnNames() - throws Exception - { - SchemaTableName tableName = temporaryTable("test_statistics_long_column_name"); - try { - String columnName1 = String.join("", Collections.nCopies(255, "x")); - String columnName2 = String.join("", Collections.nCopies(255, "ӆ")); - String columnName3 = String.join("", Collections.nCopies(255, "ö")); - - List columns = List.of( - new ColumnMetadata(columnName1, BIGINT), - new ColumnMetadata(columnName2, BIGINT), - new ColumnMetadata(columnName3, BIGINT)); - - Map columnStatistics = Map.of( - columnName1, INTEGER_COLUMN_STATISTICS, - columnName2, INTEGER_COLUMN_STATISTICS, - columnName3, INTEGER_COLUMN_STATISTICS); - PartitionStatistics partitionStatistics = PartitionStatistics.builder() - .setBasicStatistics(HIVE_BASIC_STATISTICS) - .setColumnStatistics(columnStatistics).build(); - - doCreateEmptyTable(tableName, ORC, columns); - - assertThat(metastore.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(ZERO_TABLE_STATISTICS); - testUpdateTableStatistics(tableName, ZERO_TABLE_STATISTICS, partitionStatistics); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testStatisticsColumnModification() - throws Exception - { - SchemaTableName tableName = temporaryTable("test_statistics_column_modification"); - try { - List columns = List.of( - new ColumnMetadata("column1", BIGINT), - new ColumnMetadata("column2", BIGINT), - new ColumnMetadata("column3", BIGINT)); - - doCreateEmptyTable(tableName, ORC, columns); - - Map columnStatistics = Map.of( - "column1", INTEGER_COLUMN_STATISTICS, - "column2", INTEGER_COLUMN_STATISTICS); - PartitionStatistics partitionStatistics = PartitionStatistics.builder() - .setBasicStatistics(HIVE_BASIC_STATISTICS) - .setColumnStatistics(columnStatistics).build(); - - // set table statistics for column1 - metastore.updateTableStatistics( - tableName.getSchemaName(), - tableName.getTableName(), - NO_ACID_TRANSACTION, - actualStatistics -> { - assertThat(actualStatistics).isEqualTo(ZERO_TABLE_STATISTICS); - return partitionStatistics; - }); - - assertThat(metastore.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(partitionStatistics); - - metastore.renameColumn(tableName.getSchemaName(), tableName.getTableName(), "column1", "column4"); - assertThat(metastore.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(new PartitionStatistics( - HIVE_BASIC_STATISTICS, - Map.of("column2", INTEGER_COLUMN_STATISTICS))); - - metastore.dropColumn(tableName.getSchemaName(), tableName.getTableName(), "column2"); - assertThat(metastore.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(new PartitionStatistics(HIVE_BASIC_STATISTICS, Map.of())); - - metastore.addColumn(tableName.getSchemaName(), tableName.getTableName(), "column5", HiveType.HIVE_INT, "comment"); - assertThat(metastore.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(new PartitionStatistics(HIVE_BASIC_STATISTICS, Map.of())); - - // TODO: column1 stats should be removed on column delete. However this is tricky since stats can be stored in multiple partitions. - metastore.renameColumn(tableName.getSchemaName(), tableName.getTableName(), "column4", "column1"); - assertThat(metastore.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(new PartitionStatistics( - HIVE_BASIC_STATISTICS, - Map.of("column1", INTEGER_COLUMN_STATISTICS))); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testStatisticsPartitionedTableColumnModification() - throws Exception - { - SchemaTableName tableName = temporaryTable("test_partitioned_table_statistics_column_modification"); - try { - List columns = List.of( - new ColumnMetadata("column1", BIGINT), - new ColumnMetadata("column2", BIGINT), - new ColumnMetadata("ds", VARCHAR)); - - Map columnStatistics = Map.of( - "column1", INTEGER_COLUMN_STATISTICS, - "column2", INTEGER_COLUMN_STATISTICS); - PartitionStatistics partitionStatistics = PartitionStatistics.builder() - .setBasicStatistics(HIVE_BASIC_STATISTICS) - .setColumnStatistics(columnStatistics).build(); - - createDummyPartitionedTable(tableName, columns); - GlueHiveMetastore metastoreClient = (GlueHiveMetastore) getMetastoreClient(); - double countBefore = metastoreClient.getStats().getBatchUpdatePartition().getTime().getAllTime().getCount(); - - metastore.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), "ds=2016-01-01", actualStatistics -> partitionStatistics); - - assertThat(metastoreClient.getStats().getBatchUpdatePartition().getTime().getAllTime().getCount()).isEqualTo(countBefore + 1); - PartitionStatistics tableStatistics = new PartitionStatistics(createEmptyStatistics(), Map.of()); - assertThat(metastore.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(tableStatistics); - assertThat(metastore.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), Set.of("ds=2016-01-01"))) - .isEqualTo(Map.of("ds=2016-01-01", partitionStatistics)); - - // renaming table column does not rename partition columns - metastore.renameColumn(tableName.getSchemaName(), tableName.getTableName(), "column1", "column4"); - assertThat(metastore.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(tableStatistics); - assertThat(metastore.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), Set.of("ds=2016-01-01"))) - .isEqualTo(Map.of("ds=2016-01-01", partitionStatistics)); - - // dropping table column does not drop partition columns - metastore.dropColumn(tableName.getSchemaName(), tableName.getTableName(), "column2"); - assertThat(metastore.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(tableStatistics); - assertThat(metastore.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), Set.of("ds=2016-01-01"))) - .isEqualTo(Map.of("ds=2016-01-01", partitionStatistics)); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testInvalidColumnStatisticsMetadata() - throws Exception - { - SchemaTableName tableName = temporaryTable("test_statistics_invalid_column_metadata"); - try { - List columns = List.of( - new ColumnMetadata("column1", BIGINT)); - - Map columnStatistics = Map.of( - "column1", INTEGER_COLUMN_STATISTICS); - PartitionStatistics partitionStatistics = PartitionStatistics.builder() - .setBasicStatistics(HIVE_BASIC_STATISTICS) - .setColumnStatistics(columnStatistics).build(); - - doCreateEmptyTable(tableName, ORC, columns); - - // set table statistics for column1 - metastore.updateTableStatistics( - tableName.getSchemaName(), - tableName.getTableName(), - NO_ACID_TRANSACTION, - actualStatistics -> { - assertThat(actualStatistics).isEqualTo(ZERO_TABLE_STATISTICS); - return partitionStatistics; - }); - - Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).get(); - TableInput tableInput = GlueInputConverter.convertTable(table); - tableInput.setParameters(ImmutableMap.builder() - .putAll(tableInput.getParameters()) - .put("column_stats_bad_data", "bad data") - .buildOrThrow()); - getGlueClient().updateTable(new UpdateTableRequest() - .withDatabaseName(tableName.getSchemaName()) - .withTableInput(tableInput)); - - assertThat(metastore.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(partitionStatistics); - } - finally { - dropTable(tableName); - } - } - - @Test - @Override - public void testPartitionColumnProperties() - { - // Glue currently does not support parameters on the partitioning columns - assertThatThrownBy(super::testPartitionColumnProperties) - .isInstanceOf(TrinoException.class) - .hasMessageStartingWith("Parameters not supported for partition columns (Service: AWSGlue; Status Code: 400; Error Code: InvalidInputException;"); - } - - @Test - public void testGlueObjectsWithoutStorageDescriptor() - { - // StorageDescriptor is an Optional field for Glue tables. - SchemaTableName table = temporaryTable("test_missing_storage_descriptor"); - DeleteTableRequest deleteTableRequest = new DeleteTableRequest() - .withDatabaseName(table.getSchemaName()) - .withName(table.getTableName()); - - try { - Supplier resetTableInput = () -> new TableInput() - .withStorageDescriptor(null) - .withName(table.getTableName()) - .withTableType(EXTERNAL_TABLE.name()); - - TableInput tableInput = resetTableInput.get(); - glueClient.createTable(new CreateTableRequest() - .withDatabaseName(database) - .withTableInput(tableInput)); - - assertThatThrownBy(() -> metastore.getTable(table.getSchemaName(), table.getTableName())) - .hasMessageStartingWith("Table StorageDescriptor is null for table"); - glueClient.deleteTable(deleteTableRequest); - - // Iceberg table - tableInput = resetTableInput.get().withParameters(ImmutableMap.of(ICEBERG_TABLE_TYPE_NAME, ICEBERG_TABLE_TYPE_VALUE)); - glueClient.createTable(new CreateTableRequest() - .withDatabaseName(database) - .withTableInput(tableInput)); - assertThat(isIcebergTable(metastore.getTable(table.getSchemaName(), table.getTableName()).orElseThrow())).isTrue(); - glueClient.deleteTable(deleteTableRequest); - - // Delta Lake table - tableInput = resetTableInput.get().withParameters(ImmutableMap.of(SPARK_TABLE_PROVIDER_KEY, DELTA_LAKE_PROVIDER)); - glueClient.createTable(new CreateTableRequest() - .withDatabaseName(database) - .withTableInput(tableInput)); - assertThat(isDeltaLakeTable(metastore.getTable(table.getSchemaName(), table.getTableName()).orElseThrow())).isTrue(); - glueClient.deleteTable(deleteTableRequest); - - // Iceberg materialized view - tableInput = resetTableInput.get().withTableType(VIRTUAL_VIEW.name()) - .withViewOriginalText("/* Presto Materialized View: eyJvcmlnaW5hbFNxbCI6IlNFTEVDVCAxIiwiY29sdW1ucyI6W3sibmFtZSI6ImEiLCJ0eXBlIjoiaW50ZWdlciJ9XX0= */") - .withViewExpandedText(ICEBERG_MATERIALIZED_VIEW_COMMENT) - .withParameters(ImmutableMap.of( - PRESTO_VIEW_FLAG, "true", - TABLE_COMMENT, ICEBERG_MATERIALIZED_VIEW_COMMENT)); - glueClient.createTable(new CreateTableRequest() - .withDatabaseName(database) - .withTableInput(tableInput)); - assertThat(isTrinoMaterializedView(metastore.getTable(table.getSchemaName(), table.getTableName()).orElseThrow())).isTrue(); - materializedViews.add(table); - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - // Not a view - assertThat(metadata.listViews(session, Optional.empty())) - .doesNotContain(table); - assertThat(metadata.listViews(session, Optional.of(table.getSchemaName()))) - .doesNotContain(table); - assertThat(metadata.getView(session, table)).isEmpty(); - } - finally { - materializedViews.remove(table); - } - } - finally { - // Table cannot be dropped through HiveMetastore since a TableHandle cannot be created - glueClient.deleteTable(new DeleteTableRequest() - .withDatabaseName(table.getSchemaName()) - .withName(table.getTableName())); - } - } - - @Test - public void testAlterTableComment() - throws Exception - { - SchemaTableName tableName = temporaryTable("test_alter_table_comment"); - doCreateEmptyTable(tableName, ORC, ImmutableList.of(new ColumnMetadata("name", BIGINT)), ImmutableList.of()); - try { - assertThat(metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow().getParameters()).doesNotContainKey(TABLE_COMMENT); - metastore.commentTable(tableName.getSchemaName(), tableName.getTableName(), Optional.of("a table comment")); - Map tableParameters = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow().getParameters(); - assertThat(tableParameters).containsEntry(TABLE_COMMENT, "a table comment"); - - metastore.commentTable(tableName.getSchemaName(), tableName.getTableName(), Optional.empty()); - tableParameters = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow().getParameters(); - assertThat(tableParameters.get(TABLE_COMMENT)).isNull(); - } - finally { - glueClient.deleteTable(new DeleteTableRequest() - .withDatabaseName(tableName.getSchemaName()) - .withName(tableName.getTableName())); - } - } - - @Test - public void testAlterColumnComment() - throws Exception - { - SchemaTableName tableName = temporaryTable("test_alter_column_comment"); - List columns = ImmutableList.of( - new ColumnMetadata("first_column", BIGINT), - new ColumnMetadata("second_column", VARCHAR), - new ColumnMetadata("partition_column", BIGINT)); - createDummyPartitionedTable(tableName, columns, ImmutableList.of("partition_column"), ImmutableList.of()); - try { - metastore.commentColumn(tableName.getSchemaName(), tableName.getTableName(), "second_column", Optional.of("second column comment")); - metastore.commentColumn(tableName.getSchemaName(), tableName.getTableName(), "partition_column", Optional.of("partition column comment")); - - Table withComment = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(); - assertThat(withComment.getColumn("first_column").orElseThrow().getComment()).isEmpty(); - assertThat(withComment.getColumn("second_column").orElseThrow().getComment()).isEqualTo(Optional.of("second column comment")); - assertThat(withComment.getColumn("partition_column").orElseThrow().getComment()).isEqualTo(Optional.of("partition column comment")); - - metastore.commentColumn(tableName.getSchemaName(), tableName.getTableName(), "second_column", Optional.empty()); - withComment = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(); - assertThat(withComment.getColumn("first_column").orElseThrow().getComment()).isEmpty(); - assertThat(withComment.getColumn("second_column").orElseThrow().getComment()).isEmpty(); - assertThat(withComment.getColumn("partition_column").orElseThrow().getComment()).isEqualTo(Optional.of("partition column comment")); - } - finally { - glueClient.deleteTable(new DeleteTableRequest() - .withDatabaseName(tableName.getSchemaName()) - .withName(tableName.getTableName())); - } - } - - private Block singleValueBlock(long value) - { - BlockBuilder blockBuilder = BIGINT.createBlockBuilder(null, 1); - BIGINT.writeLong(blockBuilder, value); - return blockBuilder.build(); - } - - private void doGetPartitionsFilterTest( - List columnMetadata, - String partitionColumnName, - List partitionStringValues, - List> filterList, - List> expectedSingleValueList) - throws Exception - { - List partitionValuesList = partitionStringValues.stream() - .map(PartitionValues::make) - .collect(toImmutableList()); - List> expectedPartitionValuesList = expectedSingleValueList.stream() - .map(expectedValue -> expectedValue.stream() - .map(PartitionValues::make) - .collect(toImmutableList())) - .collect(toImmutableList()); - doGetPartitionsFilterTest(columnMetadata, ImmutableList.of(partitionColumnName), partitionValuesList, filterList, expectedPartitionValuesList); - } - - /** - * @param filterList should be same sized list as expectedValuesList - */ - private void doGetPartitionsFilterTest( - List columnMetadata, - List partitionColumnNames, - List partitionValues, - List> filterList, - List> expectedValuesList) - throws Exception - { - try (CloseableSchamaTableName closeableTableName = new CloseableSchamaTableName(temporaryTable("get_partitions"))) { - SchemaTableName tableName = closeableTableName.getSchemaTableName(); - createDummyPartitionedTable(tableName, columnMetadata, partitionColumnNames, partitionValues); - HiveMetastore metastoreClient = getMetastoreClient(); - - for (int i = 0; i < filterList.size(); i++) { - TupleDomain filter = filterList.get(i); - List expectedValues = expectedValuesList.get(i); - List expectedResults = expectedValues.stream() - .map(expectedPartitionValues -> makePartName(partitionColumnNames, expectedPartitionValues.getValues())) - .collect(toImmutableList()); - - Optional> partitionNames = metastoreClient.getPartitionNamesByFilter( - tableName.getSchemaName(), - tableName.getTableName(), - partitionColumnNames, - filter); - assertThat(partitionNames.isPresent()).isTrue(); - assertThat(partitionNames.get()) - .describedAs(format("lists \nactual: %s\nexpected: %s\nmismatch for filter %s (input index %d)\n", partitionNames.get(), expectedResults, filter, i)) - .isEqualTo(expectedResults); - } - } - } - - private void createDummyPartitionedTable(SchemaTableName tableName, List columns, List partitionColumnNames, List partitionValues) - throws Exception - { - doCreateEmptyTable(tableName, ORC, columns, partitionColumnNames); - - HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient(), TESTING_TYPE_MANAGER, false); - Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new TableNotFoundException(tableName)); - List partitions = new ArrayList<>(); - List partitionNames = new ArrayList<>(); - partitionValues.stream() - .map(partitionValue -> makePartName(partitionColumnNames, partitionValue.values)) - .forEach( - partitionName -> { - partitions.add(new PartitionWithStatistics(createDummyPartition(table, partitionName), partitionName, PartitionStatistics.empty())); - partitionNames.add(partitionName); - }); - metastoreClient.addPartitions(tableName.getSchemaName(), tableName.getTableName(), partitions); - partitionNames.forEach( - partitionName -> metastoreClient.updatePartitionStatistics( - tableName.getSchemaName(), tableName.getTableName(), partitionName, currentStatistics -> ZERO_TABLE_STATISTICS)); - } - - private class CloseableSchamaTableName - implements AutoCloseable - { - private final SchemaTableName schemaTableName; - - private CloseableSchamaTableName(SchemaTableName schemaTableName) - { - this.schemaTableName = schemaTableName; - } - - public SchemaTableName getSchemaTableName() - { - return schemaTableName; - } - - @Override - public void close() - { - dropTable(schemaTableName); - } - } - - // container class for readability. Each value is one for a partitionKey, in order they appear in the schema - private static class PartitionValues - { - private final List values; - - private static PartitionValues make(String... values) - { - return new PartitionValues(Arrays.asList(values)); - } - - private PartitionValues(List values) - { - // Elements are nullable - //noinspection Java9CollectionFactory - this.values = unmodifiableList(new ArrayList<>(requireNonNull(values, "values is null"))); - } - - public List getValues() - { - return values; - } - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/InMemoryThriftMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/InMemoryThriftMetastore.java deleted file mode 100644 index e4a2b5ee09f10..0000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/InMemoryThriftMetastore.java +++ /dev/null @@ -1,743 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.thrift; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import com.google.errorprone.annotations.concurrent.GuardedBy; -import io.trino.hive.thrift.metastore.Database; -import io.trino.hive.thrift.metastore.FieldSchema; -import io.trino.hive.thrift.metastore.Partition; -import io.trino.hive.thrift.metastore.PrincipalPrivilegeSet; -import io.trino.hive.thrift.metastore.PrincipalType; -import io.trino.hive.thrift.metastore.Table; -import io.trino.plugin.hive.HiveColumnStatisticType; -import io.trino.plugin.hive.PartitionStatistics; -import io.trino.plugin.hive.SchemaAlreadyExistsException; -import io.trino.plugin.hive.TableAlreadyExistsException; -import io.trino.plugin.hive.TableType; -import io.trino.plugin.hive.acid.AcidTransaction; -import io.trino.plugin.hive.metastore.HivePrincipal; -import io.trino.plugin.hive.metastore.HivePrivilegeInfo; -import io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege; -import io.trino.plugin.hive.metastore.PartitionWithStatistics; -import io.trino.spi.TrinoException; -import io.trino.spi.connector.SchemaNotFoundException; -import io.trino.spi.connector.SchemaTableName; -import io.trino.spi.connector.TableNotFoundException; -import io.trino.spi.predicate.TupleDomain; -import io.trino.spi.security.RoleGrant; -import io.trino.spi.type.Type; -import org.apache.hadoop.fs.Path; - -import java.io.File; -import java.io.IOException; -import java.io.UncheckedIOException; -import java.net.URI; -import java.util.Collection; -import java.util.EnumSet; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; -import java.util.Set; -import java.util.function.Function; - -import static com.google.common.base.MoreObjects.toStringHelper; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.collect.ImmutableList.toImmutableList; -import static com.google.common.collect.ImmutableSet.toImmutableSet; -import static com.google.common.io.MoreFiles.deleteRecursively; -import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; -import static io.trino.plugin.hive.HiveBasicStatistics.createEmptyStatistics; -import static io.trino.plugin.hive.TableType.EXTERNAL_TABLE; -import static io.trino.plugin.hive.TableType.MANAGED_TABLE; -import static io.trino.plugin.hive.TableType.VIRTUAL_VIEW; -import static io.trino.plugin.hive.metastore.MetastoreUtil.partitionKeyFilterToStringList; -import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.toMetastoreApiPartition; -import static io.trino.plugin.hive.util.HiveUtil.toPartitionValues; -import static io.trino.spi.StandardErrorCode.SCHEMA_NOT_EMPTY; -import static java.util.Locale.US; -import static java.util.Objects.requireNonNull; -import static org.apache.hadoop.hive.common.FileUtils.makePartName; - -public class InMemoryThriftMetastore - implements ThriftMetastore -{ - @GuardedBy("this") - private final Map databases = new HashMap<>(); - @GuardedBy("this") - private final Map relations = new HashMap<>(); - @GuardedBy("this") - private final Map views = new HashMap<>(); - @GuardedBy("this") - private final Map partitions = new HashMap<>(); - @GuardedBy("this") - private final Map columnStatistics = new HashMap<>(); - @GuardedBy("this") - private final Map partitionColumnStatistics = new HashMap<>(); - @GuardedBy("this") - private final Map> tablePrivileges = new HashMap<>(); - - private final File baseDirectory; - private final boolean assumeCanonicalPartitionKeys; - - public InMemoryThriftMetastore(File baseDirectory, ThriftMetastoreConfig metastoreConfig) - { - this.baseDirectory = requireNonNull(baseDirectory, "baseDirectory is null"); - this.assumeCanonicalPartitionKeys = requireNonNull(metastoreConfig).isAssumeCanonicalPartitionKeys(); - checkArgument(!baseDirectory.exists(), "Base directory already exists"); - checkArgument(baseDirectory.mkdirs(), "Could not create base directory"); - } - - @Override - public synchronized void createDatabase(Database database) - { - requireNonNull(database, "database is null"); - - File directory; - if (database.getLocationUri() != null) { - directory = new File(URI.create(database.getLocationUri())); - } - else { - // use Hive default naming convention - directory = new File(baseDirectory, database.getName() + ".db"); - database = database.deepCopy(); - database.setLocationUri(directory.toURI().toString()); - } - - checkArgument(!directory.exists(), "Database directory already exists"); - checkArgument(isParentDir(directory, baseDirectory), "Database directory must be inside of the metastore base directory"); - checkArgument(directory.mkdirs(), "Could not create database directory"); - - if (databases.putIfAbsent(database.getName(), database) != null) { - throw new SchemaAlreadyExistsException(database.getName()); - } - } - - // TODO: respect deleteData - @Override - public synchronized void dropDatabase(String databaseName, boolean deleteData) - { - if (!databases.containsKey(databaseName)) { - throw new SchemaNotFoundException(databaseName); - } - if (!getAllTables(databaseName).isEmpty()) { - throw new TrinoException(SCHEMA_NOT_EMPTY, "Schema not empty: " + databaseName); - } - databases.remove(databaseName); - } - - @Override - public synchronized void alterDatabase(String databaseName, Database newDatabase) - { - String newDatabaseName = newDatabase.getName(); - - if (databaseName.equals(newDatabaseName)) { - if (databases.replace(databaseName, newDatabase) == null) { - throw new SchemaNotFoundException(databaseName); - } - return; - } - - Database database = databases.get(databaseName); - if (database == null) { - throw new SchemaNotFoundException(databaseName); - } - if (databases.putIfAbsent(newDatabaseName, database) != null) { - throw new SchemaAlreadyExistsException(newDatabaseName); - } - databases.remove(databaseName); - - rewriteKeys(relations, name -> new SchemaTableName(newDatabaseName, name.getTableName())); - rewriteKeys(views, name -> new SchemaTableName(newDatabaseName, name.getTableName())); - rewriteKeys(partitions, name -> name.withSchemaName(newDatabaseName)); - rewriteKeys(tablePrivileges, name -> name.withDatabase(newDatabaseName)); - } - - @Override - public synchronized List getAllDatabases() - { - return ImmutableList.copyOf(databases.keySet()); - } - - @Override - public synchronized void createTable(Table table) - { - TableType tableType = TableType.valueOf(table.getTableType()); - checkArgument(EnumSet.of(MANAGED_TABLE, EXTERNAL_TABLE, VIRTUAL_VIEW).contains(tableType), "Invalid table type: %s", tableType); - - if (tableType == VIRTUAL_VIEW) { - checkArgument(table.getSd().getLocation() == null, "Storage location for view must be null"); - } - else { - File directory = new File(new Path(table.getSd().getLocation()).toUri()); - checkArgument(directory.exists(), "Table directory [%s] does not exist", directory); - if (tableType == MANAGED_TABLE) { - checkArgument(isParentDir(directory, baseDirectory), "Table directory must be inside of the metastore base directory"); - } - } - - SchemaTableName schemaTableName = new SchemaTableName(table.getDbName(), table.getTableName()); - Table tableCopy = table.deepCopy(); - - if (relations.putIfAbsent(schemaTableName, tableCopy) != null) { - throw new TableAlreadyExistsException(schemaTableName); - } - - if (tableType == VIRTUAL_VIEW) { - views.put(schemaTableName, tableCopy); - } - - PrincipalPrivilegeSet privileges = table.getPrivileges(); - if (privileges != null && (!privileges.getUserPrivileges().isEmpty() || !privileges.getGroupPrivileges().isEmpty() || !privileges.getRolePrivileges().isEmpty())) { - throw new UnsupportedOperationException(); - } - } - - @Override - public synchronized void dropTable(String databaseName, String tableName, boolean deleteData) - { - List locations = listAllDataPaths(this, databaseName, tableName); - - SchemaTableName schemaTableName = new SchemaTableName(databaseName, tableName); - Table table = relations.remove(schemaTableName); - if (table == null) { - throw new TableNotFoundException(schemaTableName); - } - views.remove(schemaTableName); - partitions.keySet().removeIf(partitionName -> partitionName.matches(databaseName, tableName)); - - // remove data - if (deleteData && table.getTableType().equals(MANAGED_TABLE.name())) { - for (String location : locations) { - if (location != null) { - File directory = new File(new Path(location).toUri()); - checkArgument(isParentDir(directory, baseDirectory), "Table directory must be inside of the metastore base directory"); - deleteDirectory(directory); - } - } - } - } - - private static List listAllDataPaths(ThriftMetastore metastore, String schemaName, String tableName) - { - ImmutableList.Builder locations = ImmutableList.builder(); - Table table = metastore.getTable(schemaName, tableName).get(); - if (table.getSd().getLocation() != null) { - // For unpartitioned table, there should be nothing directly under this directory. - // But including this location in the set makes the directory content assert more - // extensive, which is desirable. - locations.add(table.getSd().getLocation()); - } - List partitionColumnNames = table.getPartitionKeys().stream() - .map(FieldSchema::getName) - .collect(toImmutableList()); - Optional> partitionNames = metastore.getPartitionNamesByFilter(schemaName, tableName, partitionColumnNames, TupleDomain.all()); - if (partitionNames.isPresent()) { - metastore.getPartitionsByNames(schemaName, tableName, partitionNames.get()).stream() - .map(partition -> partition.getSd().getLocation()) - .filter(location -> !location.startsWith(table.getSd().getLocation())) - .forEach(locations::add); - } - - return locations.build(); - } - - @Override - public synchronized void alterTable(String databaseName, String tableName, Table newTable) - { - SchemaTableName oldName = new SchemaTableName(databaseName, tableName); - SchemaTableName newName = new SchemaTableName(newTable.getDbName(), newTable.getTableName()); - - // if the name did not change, this is a simple schema change - if (oldName.equals(newName)) { - if (relations.replace(oldName, newTable) == null) { - throw new TableNotFoundException(oldName); - } - return; - } - - // remove old table definition and add the new one - Table table = relations.get(oldName); - if (table == null) { - throw new TableNotFoundException(oldName); - } - - if (relations.putIfAbsent(newName, newTable) != null) { - throw new TableAlreadyExistsException(newName); - } - relations.remove(oldName); - } - - @Override - public void alterTransactionalTable(Table table, long transactionId, long writeId) - { - alterTable(table.getDbName(), table.getTableName(), table); - } - - @Override - public synchronized List getAllTables(String databaseName) - { - ImmutableList.Builder tables = ImmutableList.builder(); - for (SchemaTableName schemaTableName : this.relations.keySet()) { - if (schemaTableName.getSchemaName().equals(databaseName)) { - tables.add(schemaTableName.getTableName()); - } - } - return tables.build(); - } - - @Override - public synchronized List getTablesWithParameter(String databaseName, String parameterKey, String parameterValue) - { - requireNonNull(parameterKey, "parameterKey is null"); - requireNonNull(parameterValue, "parameterValue is null"); - - return relations.entrySet().stream() - .filter(entry -> entry.getKey().getSchemaName().equals(databaseName) - && parameterValue.equals(entry.getValue().getParameters().get(parameterKey))) - .map(entry -> entry.getKey().getTableName()) - .collect(toImmutableList()); - } - - @Override - public synchronized List getAllViews(String databaseName) - { - ImmutableList.Builder tables = ImmutableList.builder(); - for (SchemaTableName schemaTableName : this.views.keySet()) { - if (schemaTableName.getSchemaName().equals(databaseName)) { - tables.add(schemaTableName.getTableName()); - } - } - return tables.build(); - } - - @Override - public synchronized Optional> getAllTables() - { - return Optional.of(ImmutableList.copyOf(relations.keySet())); - } - - @Override - public synchronized Optional> getAllViews() - { - return Optional.of(ImmutableList.copyOf(views.keySet())); - } - - @Override - public synchronized Optional getDatabase(String databaseName) - { - return Optional.ofNullable(databases.get(databaseName)); - } - - @Override - public synchronized void addPartitions(String databaseName, String tableName, List partitionsWithStatistics) - { - for (PartitionWithStatistics partitionWithStatistics : partitionsWithStatistics) { - Partition partition = toMetastoreApiPartition(partitionWithStatistics.getPartition()); - if (partition.getParameters() == null) { - partition.setParameters(ImmutableMap.of()); - } - PartitionName partitionKey = PartitionName.partition(databaseName, tableName, partitionWithStatistics.getPartitionName()); - partitions.put(partitionKey, partition); - partitionColumnStatistics.put(partitionKey, partitionWithStatistics.getStatistics()); - } - } - - @Override - public synchronized void dropPartition(String databaseName, String tableName, List parts, boolean deleteData) - { - partitions.entrySet().removeIf(entry -> - entry.getKey().matches(databaseName, tableName) && entry.getValue().getValues().equals(parts)); - } - - @Override - public synchronized void alterPartition(String databaseName, String tableName, PartitionWithStatistics partitionWithStatistics) - { - Partition partition = toMetastoreApiPartition(partitionWithStatistics.getPartition()); - if (partition.getParameters() == null) { - partition.setParameters(ImmutableMap.of()); - } - PartitionName partitionKey = PartitionName.partition(databaseName, tableName, partitionWithStatistics.getPartitionName()); - partitions.put(partitionKey, partition); - partitionColumnStatistics.put(partitionKey, partitionWithStatistics.getStatistics()); - } - - @Override - public synchronized Optional getPartition(String databaseName, String tableName, List partitionValues) - { - PartitionName name = PartitionName.partition(databaseName, tableName, partitionValues); - Partition partition = partitions.get(name); - if (partition == null) { - return Optional.empty(); - } - return Optional.of(partition.deepCopy()); - } - - @Override - public synchronized Optional> getPartitionNamesByFilter(String databaseName, String tableName, List columnNames, TupleDomain partitionKeysFilter) - { - Optional> parts = partitionKeyFilterToStringList(columnNames, partitionKeysFilter, assumeCanonicalPartitionKeys); - - if (parts.isEmpty()) { - return Optional.of(ImmutableList.of()); - } - return Optional.of(partitions.entrySet().stream() - .filter(entry -> partitionMatches(entry.getValue(), databaseName, tableName, parts.get())) - .map(entry -> entry.getKey().getPartitionName()) - .collect(toImmutableList())); - } - - private static boolean partitionMatches(Partition partition, String databaseName, String tableName, List parts) - { - if (!partition.getDbName().equals(databaseName) || - !partition.getTableName().equals(tableName)) { - return false; - } - List values = partition.getValues(); - if (values.size() != parts.size()) { - return false; - } - for (int i = 0; i < values.size(); i++) { - String part = parts.get(i); - if (!part.isEmpty() && !values.get(i).equals(part)) { - return false; - } - } - return true; - } - - @Override - public synchronized List getPartitionsByNames(String databaseName, String tableName, List partitionNames) - { - ImmutableList.Builder builder = ImmutableList.builder(); - for (String name : partitionNames) { - PartitionName partitionName = PartitionName.partition(databaseName, tableName, name); - Partition partition = partitions.get(partitionName); - if (partition == null) { - return ImmutableList.of(); - } - builder.add(partition.deepCopy()); - } - return builder.build(); - } - - @Override - public synchronized Optional
getTable(String databaseName, String tableName) - { - SchemaTableName schemaTableName = new SchemaTableName(databaseName, tableName); - return Optional.ofNullable(relations.get(schemaTableName)); - } - - @Override - public Set getSupportedColumnStatistics(Type type) - { - return ThriftMetastoreUtil.getSupportedColumnStatistics(type); - } - - @Override - public synchronized PartitionStatistics getTableStatistics(Table table) - { - return getTableStatistics(table.getDbName(), table.getTableName()); - } - - private synchronized PartitionStatistics getTableStatistics(String databaseName, String tableName) - { - SchemaTableName schemaTableName = new SchemaTableName(databaseName, tableName); - PartitionStatistics statistics = columnStatistics.get(schemaTableName); - if (statistics == null) { - statistics = new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of()); - } - return statistics; - } - - @Override - public synchronized Map getPartitionStatistics(Table table, List partitions) - { - List partitionColumns = table.getPartitionKeys().stream() - .map(FieldSchema::getName) - .collect(toImmutableList()); - Set partitionNames = partitions.stream() - .map(partition -> makePartName(partitionColumns, partition.getValues())) - .collect(toImmutableSet()); - return getPartitionStatistics(table.getDbName(), table.getTableName(), partitionNames); - } - - private synchronized Map getPartitionStatistics(String databaseName, String tableName, Set partitionNames) - { - ImmutableMap.Builder result = ImmutableMap.builder(); - for (String partitionName : partitionNames) { - PartitionName partitionKey = PartitionName.partition(databaseName, tableName, partitionName); - PartitionStatistics statistics = partitionColumnStatistics.get(partitionKey); - if (statistics == null) { - statistics = new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of()); - } - result.put(partitionName, statistics); - } - return result.buildOrThrow(); - } - - @Override - public synchronized void updateTableStatistics(String databaseName, String tableName, AcidTransaction transaction, Function update) - { - columnStatistics.put(new SchemaTableName(databaseName, tableName), update.apply(getTableStatistics(databaseName, tableName))); - } - - @Override - public synchronized void updatePartitionStatistics(Table table, String partitionName, Function update) - { - PartitionName partitionKey = PartitionName.partition(table.getDbName(), table.getTableName(), partitionName); - partitionColumnStatistics.put(partitionKey, update.apply(getPartitionStatistics(table.getDbName(), table.getTableName(), ImmutableSet.of(partitionName)).get(partitionName))); - } - - @Override - public void createRole(String role, String grantor) - { - throw new UnsupportedOperationException(); - } - - @Override - public void dropRole(String role) - { - throw new UnsupportedOperationException(); - } - - @Override - public Set listRoles() - { - throw new UnsupportedOperationException(); - } - - @Override - public void grantRoles(Set roles, Set grantees, boolean adminOption, HivePrincipal grantor) - { - throw new UnsupportedOperationException(); - } - - @Override - public void revokeRoles(Set roles, Set grantees, boolean adminOption, HivePrincipal grantor) - { - throw new UnsupportedOperationException(); - } - - @Override - public Set listRoleGrants(HivePrincipal principal) - { - throw new UnsupportedOperationException(); - } - - @Override - public Set listTablePrivileges(String databaseName, String tableName, Optional tableOwner, Optional principal) - { - return ImmutableSet.of(); - } - - @Override - public void grantTablePrivileges(String databaseName, String tableName, String tableOwner, HivePrincipal grantee, HivePrincipal grantor, Set privileges, boolean grantOption) - { - throw new UnsupportedOperationException(); - } - - @Override - public void revokeTablePrivileges(String databaseName, String tableName, String tableOwner, HivePrincipal grantee, HivePrincipal grantor, Set privileges, boolean grantOption) - { - throw new UnsupportedOperationException(); - } - - @Override - public Optional getFunction(String databaseName, String functionName) - { - throw new UnsupportedOperationException(); - } - - @Override - public Collection getFunctions(String databaseName, String functionName) - { - throw new UnsupportedOperationException(); - } - - @Override - public void createFunction(io.trino.hive.thrift.metastore.Function function) - { - throw new UnsupportedOperationException(); - } - - @Override - public void alterFunction(io.trino.hive.thrift.metastore.Function function) - { - throw new UnsupportedOperationException(); - } - - @Override - public void dropFunction(String databaseName, String functionName) - { - throw new UnsupportedOperationException(); - } - - private static boolean isParentDir(File directory, File baseDirectory) - { - for (File parent = directory.getParentFile(); parent != null; parent = parent.getParentFile()) { - if (parent.equals(baseDirectory)) { - return true; - } - } - return false; - } - - private static void deleteDirectory(File dir) - { - try { - deleteRecursively(dir.toPath(), ALLOW_INSECURE); - } - catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - private static class PartitionName - { - private final String schemaName; - private final String tableName; - private final List partitionValues; - private final String partitionName; // does not participate in equals and hashValue - - private PartitionName(String schemaName, String tableName, List partitionValues, String partitionName) - { - this.schemaName = schemaName.toLowerCase(US); - this.tableName = tableName.toLowerCase(US); - this.partitionValues = requireNonNull(partitionValues, "partitionValues is null"); - this.partitionName = partitionName; - } - - public static PartitionName partition(String schemaName, String tableName, String partitionName) - { - return new PartitionName(schemaName.toLowerCase(US), tableName.toLowerCase(US), toPartitionValues(partitionName), partitionName); - } - - public static PartitionName partition(String schemaName, String tableName, List partitionValues) - { - return new PartitionName(schemaName.toLowerCase(US), tableName.toLowerCase(US), partitionValues, null); - } - - public String getPartitionName() - { - return requireNonNull(partitionName, "partitionName is null"); - } - - public boolean matches(String schemaName, String tableName) - { - return this.schemaName.equals(schemaName) && - this.tableName.equals(tableName); - } - - public PartitionName withSchemaName(String schemaName) - { - return new PartitionName(schemaName, tableName, partitionValues, partitionName); - } - - @Override - public int hashCode() - { - return Objects.hash(schemaName, tableName, partitionValues); - } - - @Override - public boolean equals(Object obj) - { - if (this == obj) { - return true; - } - if (obj == null || getClass() != obj.getClass()) { - return false; - } - PartitionName other = (PartitionName) obj; - return Objects.equals(this.schemaName, other.schemaName) - && Objects.equals(this.tableName, other.tableName) - && Objects.equals(this.partitionValues, other.partitionValues); - } - - @Override - public String toString() - { - return schemaName + "/" + tableName + "/" + partitionName; - } - } - - private static class PrincipalTableKey - { - private final String principalName; - private final PrincipalType principalType; - private final String database; - private final String table; - - public PrincipalTableKey(String principalName, PrincipalType principalType, String table, String database) - { - this.principalName = requireNonNull(principalName, "principalName is null"); - this.principalType = requireNonNull(principalType, "principalType is null"); - this.table = requireNonNull(table, "table is null"); - this.database = requireNonNull(database, "database is null"); - } - - public PrincipalTableKey withDatabase(String database) - { - return new PrincipalTableKey(principalName, principalType, table, database); - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - PrincipalTableKey that = (PrincipalTableKey) o; - return Objects.equals(principalName, that.principalName) && - principalType == that.principalType && - Objects.equals(table, that.table) && - Objects.equals(database, that.database); - } - - @Override - public int hashCode() - { - return Objects.hash(principalName, principalType, table, database); - } - - @Override - public String toString() - { - return toStringHelper(this) - .add("principalName", principalName) - .add("principalType", principalType) - .add("table", table) - .add("database", database) - .toString(); - } - } - - private static void rewriteKeys(Map map, Function keyRewriter) - { - for (K key : ImmutableSet.copyOf(map.keySet())) { - K newKey = keyRewriter.apply(key); - if (!newKey.equals(key)) { - map.put(newKey, map.remove(key)); - } - } - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/MetastoreClientAdapterProvider.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/MetastoreClientAdapterProvider.java new file mode 100644 index 0000000000000..479448cc9a259 --- /dev/null +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/MetastoreClientAdapterProvider.java @@ -0,0 +1,19 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.hive.metastore.thrift; + +public interface MetastoreClientAdapterProvider +{ + ThriftMetastoreClient createThriftMetastoreClientAdapter(ThriftMetastoreClient delegate); +} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/MockThriftMetastoreClient.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/MockThriftMetastoreClient.java index 7cce4d2495a88..6856a02e978e2 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/MockThriftMetastoreClient.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/MockThriftMetastoreClient.java @@ -96,35 +96,27 @@ public MockThriftMetastoreClient() public void mockColumnStats(String database, String table, Map columnStatistics) { - this.columnStatistics.compute(new SchemaTableName(database, table), (ignored, oldColumnStats) -> { - if (oldColumnStats == null) { - oldColumnStats = new HashMap<>(); - } - oldColumnStats.putAll(Maps.transformEntries(columnStatistics, (columnName, stats) -> { - ColumnStatisticsObj statsObj = new ColumnStatisticsObj(); - statsObj.setColName(columnName); - statsObj.setStatsData(stats); - return statsObj; - })); - return oldColumnStats; - }); + this.columnStatistics.put( + new SchemaTableName(database, table), + Maps.transformEntries(columnStatistics, (columnName, stats) -> { + ColumnStatisticsObj statsObj = new ColumnStatisticsObj(); + statsObj.setColName(columnName); + statsObj.setStatsData(stats); + return statsObj; + })); } public void mockPartitionColumnStats(String database, String table, String partitionName, Map columnStatistics) { Map> tablePartitionColumnStatistics = databaseTablePartitionColumnStatistics.computeIfAbsent(new SchemaTableName(database, table), key -> new HashMap<>()); - tablePartitionColumnStatistics.compute(partitionName, (ignored, oldColumnStats) -> { - if (oldColumnStats == null) { - oldColumnStats = new HashMap<>(); - } - oldColumnStats.putAll(Maps.transformEntries(columnStatistics, (columnName, stats) -> { - ColumnStatisticsObj statsObj = new ColumnStatisticsObj(); - statsObj.setColName(columnName); - statsObj.setStatsData(stats); - return statsObj; - })); - return oldColumnStats; - }); + tablePartitionColumnStatistics.put( + partitionName, + Maps.transformEntries(columnStatistics, (columnName, stats) -> { + ColumnStatisticsObj statsObj = new ColumnStatisticsObj(); + statsObj.setColName(columnName); + statsObj.setStatsData(stats); + return statsObj; + })); } private static ColumnStatisticsData createLongColumnStats() @@ -259,11 +251,14 @@ public List getTableColumnStatistics(String databaseName, S Map columnStatistics = this.columnStatistics.get(new SchemaTableName(databaseName, tableName)); - if (columnStatistics == null || !columnStatistics.keySet().containsAll(columnNames)) { - throw new NoSuchObjectException(); + if (columnStatistics == null) { + return ImmutableList.of(); } - return columnNames.stream().map(columnStatistics::get).collect(toImmutableList()); + return columnNames.stream() + .filter(columnStatistics::containsKey) + .map(columnStatistics::get) + .collect(toImmutableList()); } @Override @@ -294,10 +289,13 @@ public Map> getPartitionColumnStatistics(Strin for (String partition : partitionNames) { Map columnStatistics = tablePartitionColumnStatistics.get(partition); - if (columnStatistics == null || !columnStatistics.keySet().containsAll(columnNames)) { - throw new NoSuchObjectException(); + if (columnStatistics == null) { + continue; } - result.put(partition, ImmutableList.copyOf(columnStatistics.values())); + result.put(partition, columnNames.stream() + .filter(columnStatistics::containsKey) + .map(columnStatistics::get) + .collect(toImmutableList())); } return result.buildOrThrow(); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestHiveMetastoreAccessOperations.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestHiveMetastoreAccessOperations.java index 35dce20c9f3e9..a850c387e0b63 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestHiveMetastoreAccessOperations.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestHiveMetastoreAccessOperations.java @@ -13,67 +13,44 @@ */ package io.trino.plugin.hive.metastore.thrift; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMultiset; import com.google.common.collect.Multiset; -import io.trino.Session; -import io.trino.plugin.hive.TestingHivePlugin; -import io.trino.plugin.hive.metastore.CountingAccessHiveMetastore; -import io.trino.plugin.hive.metastore.CountingAccessHiveMetastoreUtil; +import io.trino.plugin.hive.HiveQueryRunner; +import io.trino.plugin.hive.metastore.MetastoreMethod; import io.trino.testing.AbstractTestQueryFramework; -import io.trino.testing.DistributedQueryRunner; import io.trino.testing.QueryRunner; import org.intellij.lang.annotations.Language; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.parallel.Execution; -import java.io.File; - -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.CREATE_TABLE; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_DATABASE; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_PARTITIONS_BY_NAMES; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_PARTITION_NAMES_BY_FILTER; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_PARTITION_STATISTICS; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_TABLE; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_TABLE_STATISTICS; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.UPDATE_PARTITION_STATISTICS; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.UPDATE_TABLE_STATISTICS; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; -import static io.trino.testing.TestingSession.testSessionBuilder; +import static io.trino.plugin.hive.metastore.MetastoreInvocations.assertMetastoreInvocationsForQuery; +import static io.trino.plugin.hive.metastore.MetastoreMethod.CREATE_TABLE; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_DATABASE; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_PARTITIONS_BY_NAMES; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_PARTITION_NAMES_BY_FILTER; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_PARTITION_STATISTICS; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_TABLE; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_TABLE_STATISTICS; +import static io.trino.plugin.hive.metastore.MetastoreMethod.UPDATE_PARTITION_STATISTICS; +import static io.trino.plugin.hive.metastore.MetastoreMethod.UPDATE_TABLE_STATISTICS; import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; @Execution(SAME_THREAD)// metastore invocation counters shares mutable state so can't be run from many threads simultaneously public class TestHiveMetastoreAccessOperations extends AbstractTestQueryFramework { - private static final Session TEST_SESSION = testSessionBuilder() - .setCatalog("hive") - .setSchema("test_schema") - .build(); - - private CountingAccessHiveMetastore metastore; - @Override protected QueryRunner createQueryRunner() throws Exception { - DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(TEST_SESSION).build(); - - File baseDir = queryRunner.getCoordinator().getBaseDataDir().resolve("hive").toFile(); - metastore = new CountingAccessHiveMetastore(createTestingFileHiveMetastore(baseDir)); - - queryRunner.installPlugin(new TestingHivePlugin(metastore)); - queryRunner.createCatalog("hive", "hive", ImmutableMap.of()); - - queryRunner.execute("CREATE SCHEMA test_schema"); - return queryRunner; + return HiveQueryRunner.create(); } @Test public void testUse() { assertMetastoreInvocations("USE " + getSession().getSchema().orElseThrow(), - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_DATABASE) .build()); } @@ -82,7 +59,7 @@ public void testUse() public void testCreateTable() { assertMetastoreInvocations("CREATE TABLE test_create(id VARCHAR, age INT)", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(CREATE_TABLE) .add(GET_DATABASE) .add(GET_TABLE) @@ -94,7 +71,7 @@ public void testCreateTable() public void testCreateTableAsSelect() { assertMetastoreInvocations("CREATE TABLE test_ctas AS SELECT 1 AS age", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_DATABASE) .add(CREATE_TABLE) .add(GET_TABLE) @@ -108,7 +85,7 @@ public void testSelect() assertUpdate("CREATE TABLE test_select_from(id VARCHAR, age INT)"); assertMetastoreInvocations("SELECT * FROM test_select_from", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .build()); } @@ -119,7 +96,7 @@ public void testSelectPartitionedTable() assertUpdate("CREATE TABLE test_select_partition WITH (partitioned_by = ARRAY['part']) AS SELECT 1 AS data, 10 AS part", 1); assertMetastoreInvocations("SELECT * FROM test_select_partition", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .add(GET_PARTITION_NAMES_BY_FILTER) .add(GET_PARTITIONS_BY_NAMES) @@ -127,7 +104,7 @@ public void testSelectPartitionedTable() assertUpdate("INSERT INTO test_select_partition SELECT 2 AS data, 20 AS part", 1); assertMetastoreInvocations("SELECT * FROM test_select_partition", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .add(GET_PARTITION_NAMES_BY_FILTER) .add(GET_PARTITIONS_BY_NAMES) @@ -135,7 +112,7 @@ public void testSelectPartitionedTable() // Specify a specific partition assertMetastoreInvocations("SELECT * FROM test_select_partition WHERE part = 10", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .add(GET_PARTITION_NAMES_BY_FILTER) .add(GET_PARTITIONS_BY_NAMES) @@ -148,7 +125,7 @@ public void testSelectWithFilter() assertUpdate("CREATE TABLE test_select_from_where AS SELECT 2 AS age", 1); assertMetastoreInvocations("SELECT * FROM test_select_from_where WHERE age = 2", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .build()); } @@ -160,7 +137,7 @@ public void testSelectFromView() assertUpdate("CREATE VIEW test_select_view_view AS SELECT id, age FROM test_select_view_table"); assertMetastoreInvocations("SELECT * FROM test_select_view_view", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLE, 2) .build()); } @@ -172,7 +149,7 @@ public void testSelectFromViewWithFilter() assertUpdate("CREATE VIEW test_select_view_where_view AS SELECT age FROM test_select_view_where_table"); assertMetastoreInvocations("SELECT * FROM test_select_view_where_view WHERE age = 2", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLE, 2) .build()); } @@ -184,7 +161,7 @@ public void testJoin() assertUpdate("CREATE TABLE test_join_t2 AS SELECT 'name1' AS name, 'id1' AS id", 1); assertMetastoreInvocations("SELECT name, age FROM test_join_t1 JOIN test_join_t2 ON test_join_t2.id = test_join_t1.id", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLE, 2) .addCopies(GET_TABLE_STATISTICS, 2) .build()); @@ -196,7 +173,7 @@ public void testSelfJoin() assertUpdate("CREATE TABLE test_self_join_table AS SELECT 2 AS age, 0 parent, 3 AS id", 1); assertMetastoreInvocations("SELECT child.age, parent.age FROM test_self_join_table child JOIN test_self_join_table parent ON child.parent = parent.id", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .add(GET_TABLE_STATISTICS) .build()); @@ -208,7 +185,7 @@ public void testExplainSelect() assertUpdate("CREATE TABLE test_explain AS SELECT 2 AS age", 1); assertMetastoreInvocations("EXPLAIN SELECT * FROM test_explain", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .add(GET_TABLE_STATISTICS) .build()); @@ -220,7 +197,7 @@ public void testDescribe() assertUpdate("CREATE TABLE test_describe(id VARCHAR, age INT)"); assertMetastoreInvocations("DESCRIBE test_describe", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_DATABASE) .add(GET_TABLE) .build()); @@ -232,7 +209,7 @@ public void testShowStatsForTable() assertUpdate("CREATE TABLE test_show_stats AS SELECT 2 AS age", 1); assertMetastoreInvocations("SHOW STATS FOR test_show_stats", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .add(GET_TABLE_STATISTICS) .build()); @@ -244,7 +221,7 @@ public void testShowStatsForTableWithFilter() assertUpdate("CREATE TABLE test_show_stats_with_filter AS SELECT 2 AS age", 1); assertMetastoreInvocations("SHOW STATS FOR (SELECT * FROM test_show_stats_with_filter where age >= 2)", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .add(GET_TABLE_STATISTICS) .build()); @@ -256,7 +233,7 @@ public void testAnalyze() assertUpdate("CREATE TABLE test_analyze AS SELECT 2 AS age", 1); assertMetastoreInvocations("ANALYZE test_analyze", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .add(UPDATE_TABLE_STATISTICS) .build()); @@ -268,7 +245,7 @@ public void testAnalyzePartitionedTable() assertUpdate("CREATE TABLE test_analyze_partition WITH (partitioned_by = ARRAY['part']) AS SELECT 1 AS data, 10 AS part", 1); assertMetastoreInvocations("ANALYZE test_analyze_partition", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLE, 1) .add(GET_PARTITION_NAMES_BY_FILTER) .add(GET_PARTITIONS_BY_NAMES) @@ -279,7 +256,7 @@ public void testAnalyzePartitionedTable() assertUpdate("INSERT INTO test_analyze_partition SELECT 2 AS data, 20 AS part", 1); assertMetastoreInvocations("ANALYZE test_analyze_partition", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .add(GET_PARTITION_NAMES_BY_FILTER) .add(GET_PARTITIONS_BY_NAMES) @@ -293,8 +270,8 @@ public void testDropStats() { assertUpdate("CREATE TABLE drop_stats AS SELECT 2 AS age", 1); - assertMetastoreInvocations("CALL system.drop_stats('test_schema', 'drop_stats')", - ImmutableMultiset.builder() + assertMetastoreInvocations("CALL system.drop_stats(CURRENT_SCHEMA, 'drop_stats')", + ImmutableMultiset.builder() .add(GET_TABLE) .add(UPDATE_TABLE_STATISTICS) .build()); @@ -305,8 +282,8 @@ public void testDropStatsPartitionedTable() { assertUpdate("CREATE TABLE drop_stats_partition WITH (partitioned_by = ARRAY['part']) AS SELECT 1 AS data, 10 AS part", 1); - assertMetastoreInvocations("CALL system.drop_stats('test_schema', 'drop_stats_partition')", - ImmutableMultiset.builder() + assertMetastoreInvocations("CALL system.drop_stats(CURRENT_SCHEMA, 'drop_stats_partition')", + ImmutableMultiset.builder() .add(GET_TABLE) .add(GET_PARTITION_NAMES_BY_FILTER) .add(UPDATE_PARTITION_STATISTICS) @@ -314,16 +291,16 @@ public void testDropStatsPartitionedTable() assertUpdate("INSERT INTO drop_stats_partition SELECT 2 AS data, 20 AS part", 1); - assertMetastoreInvocations("CALL system.drop_stats('test_schema', 'drop_stats_partition')", - ImmutableMultiset.builder() + assertMetastoreInvocations("CALL system.drop_stats(CURRENT_SCHEMA, 'drop_stats_partition')", + ImmutableMultiset.builder() .add(GET_TABLE) .add(GET_PARTITION_NAMES_BY_FILTER) .addCopies(UPDATE_PARTITION_STATISTICS, 2) .build()); } - private void assertMetastoreInvocations(@Language("SQL") String query, Multiset expectedInvocations) + private void assertMetastoreInvocations(@Language("SQL") String query, Multiset expectedInvocations) { - CountingAccessHiveMetastoreUtil.assertMetastoreInvocations(metastore, getQueryRunner(), getQueryRunner().getDefaultSession(), query, expectedInvocations); + assertMetastoreInvocationsForQuery(getDistributedQueryRunner(), getSession(), query, expectedInvocations); } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestHiveMetastoreMetadataQueriesAccessOperations.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestHiveMetastoreMetadataQueriesAccessOperations.java index 348f971f32296..a47c41c580fe2 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestHiveMetastoreMetadataQueriesAccessOperations.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestHiveMetastoreMetadataQueriesAccessOperations.java @@ -14,44 +14,44 @@ package io.trino.plugin.hive.metastore.thrift; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMultiset; import com.google.common.collect.Multiset; +import io.airlift.log.Logger; +import io.trino.Session; +import io.trino.plugin.hive.HiveQueryRunner; import io.trino.plugin.hive.HiveType; -import io.trino.plugin.hive.TestingHivePlugin; +import io.trino.plugin.hive.containers.HiveHadoop; import io.trino.plugin.hive.metastore.Column; -import io.trino.plugin.hive.metastore.CountingAccessHiveMetastore; -import io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method; -import io.trino.plugin.hive.metastore.CountingAccessHiveMetastoreUtil; +import io.trino.plugin.hive.metastore.Database; +import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; +import io.trino.plugin.hive.metastore.MetastoreMethod; import io.trino.plugin.hive.metastore.Table; -import io.trino.plugin.hive.metastore.UnimplementedHiveMetastore; -import io.trino.spi.connector.RelationType; -import io.trino.spi.connector.SchemaTableName; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.DistributedQueryRunner; import io.trino.testing.QueryRunner; import org.intellij.lang.annotations.Language; +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.parallel.Execution; -import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.function.Function; -import java.util.stream.IntStream; -import static com.google.common.collect.ImmutableList.toImmutableList; -import static com.google.common.collect.ImmutableMap.toImmutableMap; -import static io.trino.plugin.hive.HiveStorageFormat.ORC; +import static io.airlift.units.Duration.nanosSince; +import static io.trino.plugin.hive.HiveStorageFormat.PARQUET; import static io.trino.plugin.hive.TableType.MANAGED_TABLE; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_ALL_DATABASES; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_ALL_RELATION_TYPES; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_ALL_TABLES; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_ALL_TABLES_FROM_DATABASE; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_ALL_VIEWS; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_ALL_VIEWS_FROM_DATABASE; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_RELATION_TYPES_FROM_DATABASE; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_TABLE; +import static io.trino.plugin.hive.TestingHiveUtils.getConnectorService; +import static io.trino.plugin.hive.metastore.MetastoreInvocations.assertMetastoreInvocationsForQuery; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_ALL_DATABASES; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_ALL_RELATION_TYPES; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_ALL_TABLES; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_ALL_VIEWS; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_RELATION_TYPES; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_TABLE; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_TABLES; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_VIEWS; +import static io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES; import static io.trino.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat; import static io.trino.testing.TestingSession.testSessionBuilder; import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; @@ -60,45 +60,89 @@ public class TestHiveMetastoreMetadataQueriesAccessOperations extends AbstractTestQueryFramework { + private static final Logger log = Logger.get(TestHiveMetastoreMetadataQueriesAccessOperations.class); + private static final int MAX_PREFIXES_COUNT = 20; private static final int TEST_SCHEMAS_COUNT = MAX_PREFIXES_COUNT + 1; private static final int TEST_TABLES_IN_SCHEMA_COUNT = MAX_PREFIXES_COUNT + 3; private static final int TEST_ALL_TABLES_COUNT = TEST_SCHEMAS_COUNT * TEST_TABLES_IN_SCHEMA_COUNT; - private MockHiveMetastore mockMetastore; - private CountingAccessHiveMetastore metastore; + private static final Session SESSION = testSessionBuilder() + .setCatalog("hive") + .setSchema(Optional.empty()) + .build(); + + private HiveHadoop hiveHadoop; @Override protected QueryRunner createQueryRunner() throws Exception { - DistributedQueryRunner queryRunner = DistributedQueryRunner.builder( - testSessionBuilder() - .setCatalog("hive") - .setSchema(Optional.empty()) - .build()) + hiveHadoop = HiveHadoop.builder().build(); + hiveHadoop.start(); + + DistributedQueryRunner queryRunner = HiveQueryRunner.builder(SESSION) // metadata queries do not use workers .setNodeCount(1) .addCoordinatorProperty("optimizer.experimental-max-prefetched-information-schema-prefixes", Integer.toString(MAX_PREFIXES_COUNT)) + .addHiveProperty("hive.metastore", "thrift") + .addHiveProperty("hive.metastore.uri", "thrift://" + hiveHadoop.getHiveMetastoreEndpoint()) + .addHiveProperty("hive.metastore.thrift.batch-fetch.enabled", "true") + .addHiveProperty("hive.hive-views.enabled", "true") + .setCreateTpchSchemas(false) .build(); - mockMetastore = new MockHiveMetastore(); - metastore = new CountingAccessHiveMetastore(mockMetastore); - queryRunner.installPlugin(new TestingHivePlugin(metastore)); - queryRunner.createCatalog("hive", "hive", ImmutableMap.of()); + try { + long start = System.nanoTime(); + createTestingTables(queryRunner); + log.info("Created testing tables in %s", nanosSince(start)); + } + catch (RuntimeException e) { + queryRunner.close(); + throw e; + } + return queryRunner; } - private void resetMetastoreSetup() + private static void createTestingTables(QueryRunner queryRunner) { - mockMetastore.setAllTablesViewsImplemented(false); + HiveMetastore metastore = getConnectorService(queryRunner, HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); + + for (int databaseId = 0; databaseId < TEST_SCHEMAS_COUNT; databaseId++) { + String databaseName = "test_schema_" + databaseId; + metastore.createDatabase(Database.builder() + .setDatabaseName(databaseName) + .setOwnerName(Optional.empty()) + .setOwnerType(Optional.empty()) + .build()); + + for (int tableId = 0; tableId < TEST_TABLES_IN_SCHEMA_COUNT; tableId++) { + Table.Builder table = Table.builder() + .setDatabaseName(databaseName) + .setTableName("test_table_" + tableId) + .setTableType(MANAGED_TABLE.name()) + .setDataColumns(ImmutableList.of( + new Column("id", HiveType.HIVE_INT, Optional.empty(), Map.of()), + new Column("name", HiveType.HIVE_STRING, Optional.empty(), Map.of()))) + .setOwner(Optional.empty()); + table.getStorageBuilder() + .setStorageFormat(fromHiveStorageFormat(PARQUET)); + metastore.createTable(table.build(), NO_PRIVILEGES); + } + } + } + + @AfterAll + void afterAll() + { + hiveHadoop.stop(); } @Test public void testSelectSchemasWithoutPredicate() { - resetMetastoreSetup(); - assertMetastoreInvocations("SELECT * FROM information_schema.schemata", ImmutableMultiset.of(GET_ALL_DATABASES)); assertMetastoreInvocations("SELECT * FROM system.jdbc.schemas", ImmutableMultiset.of(GET_ALL_DATABASES)); } @@ -106,8 +150,6 @@ public void testSelectSchemasWithoutPredicate() @Test public void testSelectSchemasWithFilterByInformationSchema() { - resetMetastoreSetup(); - assertMetastoreInvocations("SELECT * FROM information_schema.schemata WHERE schema_name = 'information_schema'", ImmutableMultiset.of(GET_ALL_DATABASES)); assertMetastoreInvocations("SELECT * FROM system.jdbc.schemas WHERE table_schem = 'information_schema'", ImmutableMultiset.of(GET_ALL_DATABASES)); } @@ -115,8 +157,6 @@ public void testSelectSchemasWithFilterByInformationSchema() @Test public void testSelectSchemasWithLikeOverSchemaName() { - resetMetastoreSetup(); - assertMetastoreInvocations("SELECT * FROM information_schema.schemata WHERE schema_name LIKE 'test%'", ImmutableMultiset.of(GET_ALL_DATABASES)); assertMetastoreInvocations("SELECT * FROM system.jdbc.schemas WHERE table_schem LIKE 'test%'", ImmutableMultiset.of(GET_ALL_DATABASES)); } @@ -124,29 +164,16 @@ public void testSelectSchemasWithLikeOverSchemaName() @Test public void testSelectTablesWithoutPredicate() { - resetMetastoreSetup(); - - mockMetastore.setAllTablesViewsImplemented(true); - Multiset tables = ImmutableMultiset.builder() + Multiset tables = ImmutableMultiset.builder() .add(GET_ALL_RELATION_TYPES) .build(); assertMetastoreInvocations("SELECT * FROM information_schema.tables", tables); assertMetastoreInvocations("SELECT * FROM system.jdbc.tables", tables); - - mockMetastore.setAllTablesViewsImplemented(false); - tables = ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_RELATION_TYPES_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .build(); - assertMetastoreInvocations("SELECT * FROM information_schema.tables", tables); - assertMetastoreInvocations("SELECT * FROM system.jdbc.tables", tables); } @Test public void testSelectTablesWithFilterByInformationSchema() { - resetMetastoreSetup(); - assertMetastoreInvocations("SELECT * FROM information_schema.tables WHERE table_schema = 'information_schema'", ImmutableMultiset.of()); assertMetastoreInvocations("SELECT * FROM system.jdbc.tables WHERE table_schem = 'information_schema'", ImmutableMultiset.of()); } @@ -154,138 +181,82 @@ public void testSelectTablesWithFilterByInformationSchema() @Test public void testSelectTablesWithFilterBySchema() { - resetMetastoreSetup(); - assertMetastoreInvocations( "SELECT * FROM information_schema.tables WHERE table_schema = 'test_schema_0'", - ImmutableMultiset.builder() - .add(GET_RELATION_TYPES_FROM_DATABASE) + ImmutableMultiset.builder() + .add(GET_RELATION_TYPES) .build()); assertMetastoreInvocations( "SELECT * FROM system.jdbc.tables WHERE table_schem = 'test_schema_0'", - ImmutableMultiset.builder() - .add(GET_RELATION_TYPES_FROM_DATABASE) + ImmutableMultiset.builder() + .add(GET_RELATION_TYPES) .build()); } @Test public void testSelectTablesWithLikeOverSchema() { - resetMetastoreSetup(); - - mockMetastore.setAllTablesViewsImplemented(true); assertMetastoreInvocations( "SELECT * FROM information_schema.tables WHERE table_schema LIKE 'test%'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_DATABASES) .add(GET_ALL_RELATION_TYPES) .build()); assertMetastoreInvocations( "SELECT * FROM system.jdbc.tables WHERE table_schem LIKE 'test%'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_RELATION_TYPES) .build()); - - mockMetastore.setAllTablesViewsImplemented(false); - Multiset tables = ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_RELATION_TYPES_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .build(); - assertMetastoreInvocations("SELECT * FROM information_schema.tables WHERE table_schema LIKE 'test%'", tables); - assertMetastoreInvocations("SELECT * FROM system.jdbc.tables WHERE table_schem LIKE 'test%'", tables); } @Test public void testSelectTablesWithFilterByTableName() { - resetMetastoreSetup(); - - mockMetastore.setAllTablesViewsImplemented(true); assertMetastoreInvocations( "SELECT * FROM information_schema.tables WHERE table_name = 'test_table_0'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_DATABASES) .add(GET_ALL_RELATION_TYPES) .build()); - Multiset tables = ImmutableMultiset.builder() + Multiset tables = ImmutableMultiset.builder() .add(GET_ALL_RELATION_TYPES) .build(); assertMetastoreInvocations("SELECT * FROM system.jdbc.tables WHERE table_name = 'test_table_0'", tables); assertMetastoreInvocations("SELECT * FROM system.jdbc.tables WHERE table_name LIKE 'test\\_table\\_0' ESCAPE '\\'", tables); assertMetastoreInvocations("SELECT * FROM system.jdbc.tables WHERE table_name LIKE 'test_table_0' ESCAPE '\\'", tables); - - mockMetastore.setAllTablesViewsImplemented(false); - tables = ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_RELATION_TYPES_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .build(); - assertMetastoreInvocations("SELECT * FROM information_schema.tables WHERE table_name = 'test_table_0'", tables); - assertMetastoreInvocations("SELECT * FROM system.jdbc.tables WHERE table_name = 'test_table_0'", tables); - assertMetastoreInvocations("SELECT * FROM system.jdbc.tables WHERE table_name LIKE 'test\\_table\\_0' ESCAPE '\\'", tables); - assertMetastoreInvocations("SELECT * FROM system.jdbc.tables WHERE table_name LIKE 'test_table_0' ESCAPE '\\'", tables); } @Test public void testSelectTablesWithLikeOverTableName() { - resetMetastoreSetup(); - - mockMetastore.setAllTablesViewsImplemented(true); assertMetastoreInvocations( "SELECT * FROM information_schema.tables WHERE table_name LIKE 'test%'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_DATABASES) .add(GET_ALL_RELATION_TYPES) .build()); assertMetastoreInvocations( "SELECT * FROM system.jdbc.tables WHERE table_name LIKE 'test%'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_RELATION_TYPES) .build()); - - mockMetastore.setAllTablesViewsImplemented(false); - Multiset tables = ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_RELATION_TYPES_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .build(); - assertMetastoreInvocations("SELECT * FROM information_schema.tables WHERE table_name LIKE 'test%'", tables); - assertMetastoreInvocations("SELECT * FROM system.jdbc.tables WHERE table_name LIKE 'test%'", tables); } @Test public void testSelectViewsWithoutPredicate() { - resetMetastoreSetup(); - - mockMetastore.setAllTablesViewsImplemented(true); assertMetastoreInvocations("SELECT * FROM information_schema.views", ImmutableMultiset.of(GET_ALL_VIEWS)); assertMetastoreInvocations( "SELECT * FROM system.jdbc.tables WHERE table_type = 'VIEW'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_RELATION_TYPES) .build()); - - mockMetastore.setAllTablesViewsImplemented(false); - assertMetastoreInvocations( - "SELECT * FROM information_schema.views", - ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_ALL_VIEWS_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .build()); - assertMetastoreInvocations( - "SELECT * FROM system.jdbc.tables WHERE table_type = 'VIEW'", - ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_RELATION_TYPES_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .build()); } @Test public void testSelectViewsWithFilterByInformationSchema() { - resetMetastoreSetup(); - assertMetastoreInvocations("SELECT * FROM information_schema.views WHERE table_schema = 'information_schema'", ImmutableMultiset.of()); assertMetastoreInvocations("SELECT * FROM system.jdbc.tables WHERE table_type = 'VIEW' AND table_schem = 'information_schema'", ImmutableMultiset.of()); } @@ -293,144 +264,76 @@ public void testSelectViewsWithFilterByInformationSchema() @Test public void testSelectViewsWithFilterBySchema() { - resetMetastoreSetup(); - - assertMetastoreInvocations("SELECT * FROM information_schema.views WHERE table_schema = 'test_schema_0'", ImmutableMultiset.of(GET_ALL_VIEWS_FROM_DATABASE)); + assertMetastoreInvocations("SELECT * FROM information_schema.views WHERE table_schema = 'test_schema_0'", ImmutableMultiset.of(GET_VIEWS)); assertMetastoreInvocations("SELECT * FROM system.jdbc.tables WHERE table_type = 'VIEW' AND table_schem = 'test_schema_0'", - ImmutableMultiset.builder() - .add(GET_RELATION_TYPES_FROM_DATABASE) + ImmutableMultiset.builder() + .add(GET_RELATION_TYPES) .build()); } @Test public void testSelectViewsWithLikeOverSchema() { - resetMetastoreSetup(); - - mockMetastore.setAllTablesViewsImplemented(true); assertMetastoreInvocations( "SELECT * FROM information_schema.views WHERE table_schema LIKE 'test%'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_DATABASES) .add(GET_ALL_VIEWS) .build()); assertMetastoreInvocations( "SELECT * FROM system.jdbc.tables WHERE table_type = 'VIEW' AND table_schem LIKE 'test%'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_RELATION_TYPES) .build()); - - mockMetastore.setAllTablesViewsImplemented(false); - assertMetastoreInvocations( - "SELECT * FROM information_schema.views WHERE table_schema LIKE 'test%'", - ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_ALL_VIEWS_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .build()); - assertMetastoreInvocations( - "SELECT * FROM system.jdbc.tables WHERE table_type = 'VIEW' AND table_schem LIKE 'test%'", - ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_RELATION_TYPES_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .build()); } @Test public void testSelectViewsWithFilterByTableName() { - resetMetastoreSetup(); - - mockMetastore.setAllTablesViewsImplemented(true); assertMetastoreInvocations( "SELECT * FROM information_schema.views WHERE table_name = 'test_table_0'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_DATABASES) .add(GET_ALL_VIEWS) .build()); assertMetastoreInvocations( "SELECT * FROM system.jdbc.tables WHERE table_type = 'VIEW' AND table_name = 'test_table_0'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_RELATION_TYPES) .build()); - - mockMetastore.setAllTablesViewsImplemented(false); - assertMetastoreInvocations( - "SELECT * FROM information_schema.views WHERE table_name = 'test_table_0'", - ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_ALL_VIEWS_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .build()); - assertMetastoreInvocations( - "SELECT * FROM system.jdbc.tables WHERE table_type = 'VIEW' AND table_name = 'test_table_0'", - ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_RELATION_TYPES_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .build()); } @Test public void testSelectViewsWithLikeOverTableName() { - resetMetastoreSetup(); - - mockMetastore.setAllTablesViewsImplemented(true); assertMetastoreInvocations( "SELECT * FROM information_schema.views WHERE table_name LIKE 'test%'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_DATABASES) .add(GET_ALL_VIEWS) .build()); assertMetastoreInvocations( "SELECT * FROM system.jdbc.tables WHERE table_type = 'VIEW' AND table_name LIKE 'test%'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_RELATION_TYPES) .build()); - - mockMetastore.setAllTablesViewsImplemented(false); - assertMetastoreInvocations( - "SELECT * FROM information_schema.views WHERE table_name LIKE 'test%'", - ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_ALL_VIEWS_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .build()); - assertMetastoreInvocations( - "SELECT * FROM system.jdbc.tables WHERE table_type = 'VIEW' AND table_name LIKE 'test%'", - ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_RELATION_TYPES_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .build()); } @Test public void testSelectColumnsWithoutPredicate() { - resetMetastoreSetup(); - - mockMetastore.setAllTablesViewsImplemented(true); - ImmutableMultiset tables = ImmutableMultiset.builder() + ImmutableMultiset tables = ImmutableMultiset.builder() .add(GET_ALL_TABLES) .add(GET_ALL_VIEWS) .addCopies(GET_TABLE, TEST_ALL_TABLES_COUNT) .build(); assertMetastoreInvocations("SELECT * FROM information_schema.columns", tables); assertMetastoreInvocations("SELECT * FROM system.jdbc.columns", tables); - - mockMetastore.setAllTablesViewsImplemented(false); - tables = ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_ALL_TABLES_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .addCopies(GET_ALL_VIEWS_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .addCopies(GET_TABLE, TEST_ALL_TABLES_COUNT) - .build(); - assertMetastoreInvocations("SELECT * FROM information_schema.columns", tables); - assertMetastoreInvocations("SELECT * FROM system.jdbc.columns", tables); } @Test public void testSelectColumnsFilterByInformationSchema() { - resetMetastoreSetup(); - assertMetastoreInvocations("SELECT * FROM information_schema.columns WHERE table_schema = 'information_schema'", ImmutableMultiset.of()); assertMetastoreInvocations("SELECT * FROM system.jdbc.columns WHERE table_schem = 'information_schema'", ImmutableMultiset.of()); } @@ -438,31 +341,29 @@ public void testSelectColumnsFilterByInformationSchema() @Test public void testSelectColumnsFilterBySchema() { - resetMetastoreSetup(); - assertMetastoreInvocations("SELECT * FROM information_schema.columns WHERE table_schema = 'test_schema_0'", - ImmutableMultiset.builder() - .add(GET_ALL_TABLES_FROM_DATABASE) - .add(GET_ALL_VIEWS_FROM_DATABASE) + ImmutableMultiset.builder() + .add(GET_TABLES) + .add(GET_VIEWS) .addCopies(GET_TABLE, TEST_TABLES_IN_SCHEMA_COUNT) .build()); assertMetastoreInvocations("SELECT * FROM system.jdbc.columns WHERE table_schem = 'test_schema_0'", - ImmutableMultiset.builder() - .add(GET_ALL_TABLES_FROM_DATABASE) - .add(GET_ALL_VIEWS_FROM_DATABASE) + ImmutableMultiset.builder() + .add(GET_TABLES) + .add(GET_VIEWS) .addCopies(GET_TABLE, TEST_TABLES_IN_SCHEMA_COUNT) .build()); assertMetastoreInvocations("SELECT * FROM system.jdbc.columns WHERE table_schem LIKE 'test\\_schema\\_0' ESCAPE '\\'", - ImmutableMultiset.builder() - .add(GET_ALL_TABLES_FROM_DATABASE) - .add(GET_ALL_VIEWS_FROM_DATABASE) + ImmutableMultiset.builder() + .add(GET_TABLES) + .add(GET_VIEWS) .addCopies(GET_TABLE, TEST_TABLES_IN_SCHEMA_COUNT) .build()); assertMetastoreInvocations("SELECT * FROM system.jdbc.columns WHERE table_schem LIKE 'test_schema_0' ESCAPE '\\'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_DATABASES) - .add(GET_ALL_TABLES_FROM_DATABASE) - .add(GET_ALL_VIEWS_FROM_DATABASE) + .add(GET_TABLES) + .add(GET_VIEWS) .addCopies(GET_TABLE, TEST_TABLES_IN_SCHEMA_COUNT) .build()); } @@ -470,12 +371,9 @@ public void testSelectColumnsFilterBySchema() @Test public void testSelectColumnsWithLikeOverSchema() { - resetMetastoreSetup(); - - mockMetastore.setAllTablesViewsImplemented(true); assertMetastoreInvocations( "SELECT * FROM information_schema.columns WHERE table_schema LIKE 'test%'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_DATABASES) .add(GET_ALL_TABLES) .add(GET_ALL_VIEWS) @@ -483,26 +381,9 @@ public void testSelectColumnsWithLikeOverSchema() .build()); assertMetastoreInvocations( "SELECT * FROM system.jdbc.columns WHERE table_schem LIKE 'test%'", - ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_ALL_TABLES_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .addCopies(GET_TABLE, TEST_ALL_TABLES_COUNT) - .build()); - - mockMetastore.setAllTablesViewsImplemented(false); - assertMetastoreInvocations( - "SELECT * FROM information_schema.columns WHERE table_schema LIKE 'test%'", - ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_ALL_TABLES_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .addCopies(GET_ALL_VIEWS_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .addCopies(GET_TABLE, TEST_ALL_TABLES_COUNT) - .build()); - assertMetastoreInvocations( - "SELECT * FROM system.jdbc.columns WHERE table_schem LIKE 'test%'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_DATABASES) - .addCopies(GET_ALL_TABLES_FROM_DATABASE, TEST_SCHEMAS_COUNT) + .addCopies(GET_TABLES, TEST_SCHEMAS_COUNT) .addCopies(GET_TABLE, TEST_ALL_TABLES_COUNT) .build()); } @@ -510,12 +391,9 @@ public void testSelectColumnsWithLikeOverSchema() @Test public void testSelectColumnsFilterByTableName() { - resetMetastoreSetup(); - - mockMetastore.setAllTablesViewsImplemented(true); assertMetastoreInvocations( "SELECT * FROM information_schema.columns WHERE table_name = 'test_table_0'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_DATABASES) .add(GET_ALL_TABLES) .add(GET_ALL_VIEWS) @@ -525,52 +403,24 @@ public void testSelectColumnsFilterByTableName() .build()); assertMetastoreInvocations( "SELECT * FROM system.jdbc.columns WHERE table_name = 'test_table_0'", - ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_TABLE, TEST_SCHEMAS_COUNT) - .build()); - assertMetastoreInvocations( - "SELECT * FROM system.jdbc.columns WHERE table_name LIKE 'test\\_table\\_0' ESCAPE '\\'", - ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_TABLE, TEST_SCHEMAS_COUNT) - .build()); - assertMetastoreInvocations( - "SELECT * FROM system.jdbc.columns WHERE table_name LIKE 'test_table_0' ESCAPE '\\'", - ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_ALL_TABLES_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .addCopies(GET_TABLE, TEST_SCHEMAS_COUNT) - .build()); - - mockMetastore.setAllTablesViewsImplemented(false); - assertMetastoreInvocations( - "SELECT * FROM information_schema.columns WHERE table_name = 'test_table_0'", - ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_ALL_TABLES_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .addCopies(GET_ALL_VIEWS_FROM_DATABASE, TEST_SCHEMAS_COUNT) - // TODO When there are many schemas, there are no "prefixes" and we end up calling ConnectorMetadata without any filter whatsoever. - // If such queries are common enough, we could iterate over schemas and for each schema try getting a table by given name. - .addCopies(GET_TABLE, TEST_ALL_TABLES_COUNT) - .build()); - assertMetastoreInvocations( - "SELECT * FROM system.jdbc.columns WHERE table_name = 'test_table_0'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_DATABASES) - .addCopies(GET_TABLE, TEST_SCHEMAS_COUNT) + .add(GET_TABLES) + .addCopies(GET_TABLE, TEST_SCHEMAS_COUNT + 1) .build()); assertMetastoreInvocations( "SELECT * FROM system.jdbc.columns WHERE table_name LIKE 'test\\_table\\_0' ESCAPE '\\'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_DATABASES) - .addCopies(GET_TABLE, TEST_SCHEMAS_COUNT) + .add(GET_TABLES) + .addCopies(GET_TABLE, TEST_SCHEMAS_COUNT + 1) .build()); assertMetastoreInvocations( "SELECT * FROM system.jdbc.columns WHERE table_name LIKE 'test_table_0' ESCAPE '\\'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_DATABASES) - .addCopies(GET_ALL_TABLES_FROM_DATABASE, TEST_SCHEMAS_COUNT) + .add(GET_TABLES) + .addCopies(GET_TABLES, TEST_SCHEMAS_COUNT) .addCopies(GET_TABLE, TEST_SCHEMAS_COUNT) .build()); } @@ -578,37 +428,17 @@ public void testSelectColumnsFilterByTableName() @Test public void testSelectColumnsWithLikeOverTableName() { - resetMetastoreSetup(); - - mockMetastore.setAllTablesViewsImplemented(true); assertMetastoreInvocations("SELECT * FROM information_schema.columns WHERE table_name LIKE 'test%'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_DATABASES) .add(GET_ALL_TABLES) .add(GET_ALL_VIEWS) .addCopies(GET_TABLE, TEST_ALL_TABLES_COUNT) .build()); assertMetastoreInvocations("SELECT * FROM system.jdbc.columns WHERE table_name LIKE 'test%'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_DATABASES) - .addCopies(GET_ALL_TABLES_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .addCopies(GET_TABLE, TEST_ALL_TABLES_COUNT) - .build()); - - mockMetastore.setAllTablesViewsImplemented(false); - assertMetastoreInvocations( - "SELECT * FROM information_schema.columns WHERE table_name LIKE 'test%'", - ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_ALL_TABLES_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .addCopies(GET_ALL_VIEWS_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .addCopies(GET_TABLE, TEST_ALL_TABLES_COUNT) - .build()); - assertMetastoreInvocations( - "SELECT * FROM system.jdbc.columns WHERE table_name LIKE 'test%'", - ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_ALL_TABLES_FROM_DATABASE, TEST_SCHEMAS_COUNT) + .addCopies(GET_TABLES, TEST_SCHEMAS_COUNT + 1) .addCopies(GET_TABLE, TEST_ALL_TABLES_COUNT) .build()); } @@ -616,52 +446,28 @@ public void testSelectColumnsWithLikeOverTableName() @Test public void testSelectColumnsFilterByColumn() { - resetMetastoreSetup(); - - mockMetastore.setAllTablesViewsImplemented(true); assertMetastoreInvocations( "SELECT * FROM information_schema.columns WHERE column_name = 'name'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_TABLES) .add(GET_ALL_VIEWS) .addCopies(GET_TABLE, TEST_ALL_TABLES_COUNT) .build()); assertMetastoreInvocations( "SELECT * FROM system.jdbc.columns WHERE column_name = 'name'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_TABLES) .add(GET_ALL_VIEWS) .addCopies(GET_TABLE, TEST_ALL_TABLES_COUNT) .build()); - - mockMetastore.setAllTablesViewsImplemented(false); - assertMetastoreInvocations( - "SELECT * FROM information_schema.columns WHERE column_name = 'name'", - ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_ALL_TABLES_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .addCopies(GET_ALL_VIEWS_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .addCopies(GET_TABLE, TEST_ALL_TABLES_COUNT) - .build()); - assertMetastoreInvocations( - "SELECT * FROM system.jdbc.columns WHERE column_name = 'name'", - ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_ALL_TABLES_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .addCopies(GET_ALL_VIEWS_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .addCopies(GET_TABLE, TEST_ALL_TABLES_COUNT) - .build()); } @Test public void testSelectColumnsWithLikeOverColumn() { - resetMetastoreSetup(); - - mockMetastore.setAllTablesViewsImplemented(true); assertMetastoreInvocations( "SELECT * FROM information_schema.columns WHERE column_name LIKE 'n%'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_DATABASES) .add(GET_ALL_TABLES) .add(GET_ALL_VIEWS) @@ -669,141 +475,29 @@ public void testSelectColumnsWithLikeOverColumn() .build()); assertMetastoreInvocations( "SELECT * FROM system.jdbc.columns WHERE column_name LIKE 'n%'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_TABLES) .add(GET_ALL_VIEWS) .addCopies(GET_TABLE, TEST_ALL_TABLES_COUNT) .build()); - - mockMetastore.setAllTablesViewsImplemented(false); - assertMetastoreInvocations( - "SELECT * FROM information_schema.columns WHERE column_name LIKE 'n%'", - ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_ALL_TABLES_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .addCopies(GET_ALL_VIEWS_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .addCopies(GET_TABLE, TEST_ALL_TABLES_COUNT) - .build()); - assertMetastoreInvocations( - "SELECT * FROM system.jdbc.columns WHERE column_name LIKE 'n%'", - ImmutableMultiset.builder() - .add(GET_ALL_DATABASES) - .addCopies(GET_ALL_TABLES_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .addCopies(GET_ALL_VIEWS_FROM_DATABASE, TEST_SCHEMAS_COUNT) - .addCopies(GET_TABLE, TEST_ALL_TABLES_COUNT) - .build()); } @Test public void testSelectColumnsFilterByTableAndSchema() { - resetMetastoreSetup(); - assertMetastoreInvocations("SELECT * FROM information_schema.columns WHERE table_schema = 'test_schema_0' AND table_name = 'test_table_0'", ImmutableMultiset.of(GET_TABLE)); assertMetastoreInvocations("SELECT * FROM system.jdbc.columns WHERE table_schem = 'test_schema_0' AND table_name = 'test_table_0'", ImmutableMultiset.of(GET_TABLE)); assertMetastoreInvocations("SELECT * FROM system.jdbc.columns WHERE table_schem LIKE 'test\\_schema\\_0' ESCAPE '\\' AND table_name LIKE 'test\\_table\\_0' ESCAPE '\\'", ImmutableMultiset.of(GET_TABLE)); assertMetastoreInvocations("SELECT * FROM system.jdbc.columns WHERE table_schem LIKE 'test_schema_0' ESCAPE '\\' AND table_name LIKE 'test_table_0' ESCAPE '\\'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_ALL_DATABASES) - .add(GET_ALL_TABLES_FROM_DATABASE) + .add(GET_TABLES) .add(GET_TABLE) .build()); } - private void assertMetastoreInvocations(@Language("SQL") String query, Multiset expectedInvocations) - { - CountingAccessHiveMetastoreUtil.assertMetastoreInvocations(metastore, getQueryRunner(), getQueryRunner().getDefaultSession(), query, expectedInvocations); - } - - private static class MockHiveMetastore - extends UnimplementedHiveMetastore + private void assertMetastoreInvocations(@Language("SQL") String query, Multiset expectedInvocations) { - private static final List SCHEMAS = IntStream.range(0, TEST_SCHEMAS_COUNT) - .mapToObj("test_schema_%d"::formatted) - .collect(toImmutableList()); - private static final List TABLES_PER_SCHEMA = IntStream.range(0, TEST_TABLES_IN_SCHEMA_COUNT) - .mapToObj("test_table_%d"::formatted) - .collect(toImmutableList()); - private static final ImmutableList ALL_TABLES = SCHEMAS.stream() - .flatMap(schema -> TABLES_PER_SCHEMA.stream() - .map(table -> new SchemaTableName(schema, table))) - .collect(toImmutableList()); - - private boolean allTablesViewsImplemented; - - @Override - public List getAllDatabases() - { - return SCHEMAS; - } - - @Override - public List getAllTables(String databaseName) - { - return TABLES_PER_SCHEMA; - } - - @Override - public Optional> getAllTables() - { - if (allTablesViewsImplemented) { - return Optional.of(ALL_TABLES); - } - return Optional.empty(); - } - - @Override - public Map getRelationTypes(String databaseName) - { - return TABLES_PER_SCHEMA.stream() - .collect(toImmutableMap(Function.identity(), ignore -> RelationType.TABLE)); - } - - @Override - public Optional> getRelationTypes() - { - if (allTablesViewsImplemented) { - return Optional.of(ALL_TABLES.stream() - .collect(toImmutableMap(Function.identity(), ignore -> RelationType.TABLE))); - } - return Optional.empty(); - } - - @Override - public List getAllViews(String databaseName) - { - return ImmutableList.of(); - } - - @Override - public Optional> getAllViews() - { - if (allTablesViewsImplemented) { - return Optional.of(ImmutableList.of()); - } - return Optional.empty(); - } - - @Override - public Optional
getTable(String databaseName, String tableName) - { - return Optional.of(Table.builder() - .setDatabaseName(databaseName) - .setTableName(tableName) - .setDataColumns(ImmutableList.of( - new Column("id", HiveType.HIVE_INT, Optional.empty(), Map.of()), - new Column("name", HiveType.HIVE_STRING, Optional.empty(), Map.of()))) - .setOwner(Optional.empty()) - .setTableType(MANAGED_TABLE.name()) - .withStorage(storage -> - storage.setStorageFormat(fromHiveStorageFormat(ORC)) - .setLocation(Optional.empty())) - .build()); - } - - public void setAllTablesViewsImplemented(boolean allTablesViewsImplemented) - { - this.allTablesViewsImplemented = allTablesViewsImplemented; - } + assertMetastoreInvocationsForQuery(getDistributedQueryRunner(), getQueryRunner().getDefaultSession(), query, expectedInvocations); } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestingTokenAwareMetastoreClientFactory.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestingTokenAwareMetastoreClientFactory.java index 0fa8a586d8dcb..1f2508261d160 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestingTokenAwareMetastoreClientFactory.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestingTokenAwareMetastoreClientFactory.java @@ -31,21 +31,29 @@ public class TestingTokenAwareMetastoreClientFactory private final DefaultThriftMetastoreClientFactory factory; private final HostAndPort address; + private final MetastoreClientAdapterProvider metastoreClientAdapterProvider; + public TestingTokenAwareMetastoreClientFactory(Optional socksProxy, HostAndPort address) { - this(socksProxy, address, TIMEOUT); + this(socksProxy, address, TIMEOUT, delegate -> delegate); } public TestingTokenAwareMetastoreClientFactory(Optional socksProxy, HostAndPort address, Duration timeout) + { + this(socksProxy, address, timeout, delegate -> delegate); + } + + public TestingTokenAwareMetastoreClientFactory(Optional socksProxy, HostAndPort address, Duration timeout, MetastoreClientAdapterProvider metastoreClientAdapterProvider) { this.factory = new DefaultThriftMetastoreClientFactory(Optional.empty(), socksProxy, timeout, timeout, AUTHENTICATION, "localhost"); this.address = requireNonNull(address, "address is null"); + this.metastoreClientAdapterProvider = requireNonNull(metastoreClientAdapterProvider, "metastoreClientAdapterProvider is null"); } @Override public ThriftMetastoreClient createMetastoreClient(Optional delegationToken) throws TException { - return factory.create(address, delegationToken); + return metastoreClientAdapterProvider.createThriftMetastoreClientAdapter(factory.create(address, delegationToken)); } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/optimizer/TestConnectorPushdownRulesWithHive.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/optimizer/TestConnectorPushdownRulesWithHive.java index f58af76061358..444348b5753ac 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/optimizer/TestConnectorPushdownRulesWithHive.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/optimizer/TestConnectorPushdownRulesWithHive.java @@ -26,6 +26,7 @@ import io.trino.plugin.hive.TestingHiveConnectorFactory; import io.trino.plugin.hive.metastore.Database; import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.spi.connector.CatalogHandle; import io.trino.spi.predicate.Domain; import io.trino.spi.predicate.TupleDomain; @@ -59,9 +60,10 @@ import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; import static io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR; import static io.trino.plugin.hive.HiveColumnHandle.createBaseColumn; +import static io.trino.plugin.hive.HiveQueryRunner.HIVE_CATALOG; import static io.trino.plugin.hive.HiveType.HIVE_INT; import static io.trino.plugin.hive.HiveType.toHiveType; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; +import static io.trino.plugin.hive.TestingHiveUtils.getConnectorService; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.IntegerType.INTEGER; import static io.trino.spi.type.RowType.field; @@ -72,7 +74,6 @@ import static io.trino.sql.planner.assertions.PlanMatchPattern.tableScan; import static io.trino.sql.tree.ArithmeticBinaryExpression.Operator.ADD; import static io.trino.sql.tree.ArithmeticUnaryExpression.Sign.MINUS; -import static io.trino.testing.TestingHandles.TEST_CATALOG_NAME; import static io.trino.testing.TestingSession.testSessionBuilder; import static java.lang.String.format; import static java.util.Arrays.asList; @@ -89,7 +90,7 @@ public class TestConnectorPushdownRulesWithHive private CatalogHandle catalogHandle; private static final Session HIVE_SESSION = testSessionBuilder() - .setCatalog(TEST_CATALOG_NAME) + .setCatalog(HIVE_CATALOG) .setSchema(SCHEMA_NAME) .build(); @@ -103,18 +104,18 @@ protected Optional createLocalQueryRunner() throw new UncheckedIOException(e); } - metastore = createTestingFileHiveMetastore(baseDir); - Database database = Database.builder() + LocalQueryRunner queryRunner = LocalQueryRunner.create(HIVE_SESSION); + queryRunner.createCatalog(HIVE_CATALOG, new TestingHiveConnectorFactory(baseDir.toPath()), ImmutableMap.of()); + catalogHandle = queryRunner.getCatalogHandle(HIVE_CATALOG); + + metastore = getConnectorService(queryRunner, HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); + + metastore.createDatabase(Database.builder() .setDatabaseName(SCHEMA_NAME) .setOwnerName(Optional.of("public")) .setOwnerType(Optional.of(PrincipalType.ROLE)) - .build(); - - metastore.createDatabase(database); - - LocalQueryRunner queryRunner = LocalQueryRunner.create(HIVE_SESSION); - queryRunner.createCatalog(TEST_CATALOG_NAME, new TestingHiveConnectorFactory(metastore), ImmutableMap.of()); - catalogHandle = queryRunner.getCatalogHandle(TEST_CATALOG_NAME); + .build()); return Optional.of(queryRunner); } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/optimizer/TestHivePlans.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/optimizer/TestHivePlans.java index 54c72c6597fbb..be611290ca3b2 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/optimizer/TestHivePlans.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/optimizer/TestHivePlans.java @@ -17,6 +17,7 @@ import io.trino.plugin.hive.TestingHiveConnectorFactory; import io.trino.plugin.hive.metastore.Database; import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.spi.security.PrincipalType; import io.trino.sql.planner.OptimizerConfig.JoinDistributionType; import io.trino.sql.planner.OptimizerConfig.JoinReorderingStrategy; @@ -38,7 +39,7 @@ import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; import static io.trino.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE; import static io.trino.SystemSessionProperties.JOIN_REORDERING_STRATEGY; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; +import static io.trino.plugin.hive.TestingHiveUtils.getConnectorService; import static io.trino.sql.planner.assertions.PlanMatchPattern.exchange; import static io.trino.sql.planner.assertions.PlanMatchPattern.filter; import static io.trino.sql.planner.assertions.PlanMatchPattern.join; @@ -73,22 +74,19 @@ protected LocalQueryRunner createLocalQueryRunner() catch (IOException e) { throw new UncheckedIOException(e); } - HiveMetastore metastore = createTestingFileHiveMetastore(baseDir); - Database database = Database.builder() + + LocalQueryRunner queryRunner = LocalQueryRunner.create(HIVE_SESSION); + queryRunner.createCatalog(HIVE_CATALOG_NAME, new TestingHiveConnectorFactory(baseDir.toPath()), Map.of("hive.max-partitions-for-eager-load", "5")); + + HiveMetastore metastore = getConnectorService(queryRunner, HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); + + metastore.createDatabase(Database.builder() .setDatabaseName(SCHEMA_NAME) .setOwnerName(Optional.of("public")) .setOwnerType(Optional.of(PrincipalType.ROLE)) - .build(); + .build()); - metastore.createDatabase(database); - - return createQueryRunner(HIVE_SESSION, metastore); - } - - protected LocalQueryRunner createQueryRunner(Session session, HiveMetastore metastore) - { - LocalQueryRunner queryRunner = LocalQueryRunner.create(session); - queryRunner.createCatalog(HIVE_CATALOG_NAME, new TestingHiveConnectorFactory(metastore), Map.of("hive.max-partitions-for-eager-load", "5")); return queryRunner; } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/optimizer/TestHiveProjectionPushdownIntoTableScan.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/optimizer/TestHiveProjectionPushdownIntoTableScan.java index 1de3d1f5d714c..2acac74db4843 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/optimizer/TestHiveProjectionPushdownIntoTableScan.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/optimizer/TestHiveProjectionPushdownIntoTableScan.java @@ -24,6 +24,7 @@ import io.trino.plugin.hive.TestingHiveConnectorFactory; import io.trino.plugin.hive.metastore.Database; import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.spi.connector.ColumnHandle; import io.trino.spi.predicate.Domain; import io.trino.spi.predicate.TupleDomain; @@ -43,7 +44,7 @@ import static com.google.common.io.MoreFiles.deleteRecursively; import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; import static io.trino.plugin.hive.TestHiveReaderProjectionsUtil.createProjectedColumnHandle; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; +import static io.trino.plugin.hive.TestingHiveUtils.getConnectorService; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.sql.planner.assertions.PlanMatchPattern.any; import static io.trino.sql.planner.assertions.PlanMatchPattern.anyTree; @@ -79,17 +80,18 @@ protected LocalQueryRunner createLocalQueryRunner() catch (IOException e) { throw new UncheckedIOException(e); } - HiveMetastore metastore = createTestingFileHiveMetastore(baseDir); - Database database = Database.builder() + + LocalQueryRunner queryRunner = LocalQueryRunner.create(HIVE_SESSION); + queryRunner.createCatalog(HIVE_CATALOG_NAME, new TestingHiveConnectorFactory(baseDir.toPath()), ImmutableMap.of()); + + HiveMetastore metastore = getConnectorService(queryRunner, HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); + + metastore.createDatabase(Database.builder() .setDatabaseName(SCHEMA_NAME) .setOwnerName(Optional.of("public")) .setOwnerType(Optional.of(PrincipalType.ROLE)) - .build(); - - metastore.createDatabase(database); - - LocalQueryRunner queryRunner = LocalQueryRunner.create(HIVE_SESSION); - queryRunner.createCatalog(HIVE_CATALOG_NAME, new TestingHiveConnectorFactory(metastore), ImmutableMap.of()); + .build()); return queryRunner; } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestHiveOrcWithShortZoneId.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestHiveOrcWithShortZoneId.java index 0e07eb2ecc2d9..c231a370e9937 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestHiveOrcWithShortZoneId.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestHiveOrcWithShortZoneId.java @@ -14,28 +14,50 @@ package io.trino.plugin.hive.orc; import com.google.common.collect.ImmutableList; +import com.google.common.io.Resources; +import io.trino.filesystem.Location; +import io.trino.filesystem.TrinoFileSystem; +import io.trino.filesystem.TrinoFileSystemFactory; import io.trino.plugin.hive.HiveQueryRunner; +import io.trino.spi.security.ConnectorIdentity; import io.trino.testing.AbstractTestQueryFramework; +import io.trino.testing.DistributedQueryRunner; import io.trino.testing.QueryRunner; import io.trino.testing.sql.TestTable; import org.junit.jupiter.api.Test; -import static io.trino.testing.containers.TestContainers.getPathFromClassPathResource; +import java.io.OutputStream; +import java.net.URL; +import java.util.UUID; + +import static io.trino.plugin.hive.TestingHiveUtils.getConnectorService; public class TestHiveOrcWithShortZoneId extends AbstractTestQueryFramework { - private String resourceLocation; + private Location dataFile; @Override protected QueryRunner createQueryRunner() throws Exception { - // See README.md to know how resource is generated - resourceLocation = getPathFromClassPathResource("with_short_zone_id/data"); - return HiveQueryRunner.builder() + DistributedQueryRunner queryRunner = HiveQueryRunner.builder() .addHiveProperty("hive.orc.read-legacy-short-zone-id", "true") .build(); + + URL resourceLocation = Resources.getResource("with_short_zone_id/data/data.orc"); + + TrinoFileSystem fileSystem = getConnectorService(queryRunner, TrinoFileSystemFactory.class) + .create(ConnectorIdentity.ofUser("test")); + + Location tempDir = Location.of("local:///temp_" + UUID.randomUUID()); + fileSystem.createDirectory(tempDir); + dataFile = tempDir.appendPath("data.orc"); + try (OutputStream out = fileSystem.newOutputFile(dataFile).create()) { + Resources.copy(resourceLocation, out); + } + + return queryRunner; } @Test @@ -45,7 +67,7 @@ public void testSelectWithShortZoneId() try (TestTable testTable = new TestTable( getQueryRunner()::execute, "test_select_with_short_zone_id_", - "(id INT, firstName VARCHAR, lastName VARCHAR) WITH (external_location = '%s')".formatted(resourceLocation))) { + "(id INT, firstName VARCHAR, lastName VARCHAR) WITH (external_location = '%s')".formatted(dataFile.parentDirectory()))) { assertQuery("SELECT * FROM " + testTable.getName(), "VALUES (1, 'John', 'Doe')"); } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java index e8a657ecb4e2f..f5cf9b88a1931 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java @@ -30,7 +30,6 @@ import io.trino.plugin.hive.HivePageSourceProvider; import io.trino.plugin.hive.HiveType; import io.trino.plugin.hive.NodeVersion; -import io.trino.plugin.hive.TableToPartitionMapping; import io.trino.plugin.hive.WriterKind; import io.trino.plugin.hive.metastore.StorageFormat; import io.trino.spi.Page; @@ -167,7 +166,7 @@ private static ConnectorPageSource createPageSource( ImmutableList.of(), columns, ImmutableList.of(), - TableToPartitionMapping.empty(), + ImmutableMap.of(), location.toString(), OptionalInt.empty(), length, diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/AbstractTestParquetReader.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/AbstractTestParquetReader.java index 21d6cb0479187..2606050da85b2 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/AbstractTestParquetReader.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/AbstractTestParquetReader.java @@ -41,9 +41,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.parquet.schema.MessageType; import org.joda.time.DateTimeZone; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import java.math.BigInteger; import java.util.ArrayList; @@ -96,7 +94,6 @@ import static io.trino.spi.type.VarcharType.VARCHAR; import static io.trino.spi.type.VarcharType.createUnboundedVarcharType; import static io.trino.spi.type.VarcharType.createVarcharType; -import static io.trino.testing.DataProviders.toDataProvider; import static io.trino.testing.DateTimeTestingUtils.sqlTimestampOf; import static io.trino.testing.StructuralTestUtil.mapType; import static java.lang.Math.floorDiv; @@ -131,24 +128,15 @@ public abstract class AbstractTestParquetReader private static final int MAX_PRECISION_INT32 = toIntExact(maxPrecision(4)); private static final int MAX_PRECISION_INT64 = toIntExact(maxPrecision(8)); - @SuppressWarnings("FieldCanBeLocal") - private Logger parquetLogger; - private final ParquetTester tester; protected AbstractTestParquetReader(ParquetTester tester) { this.tester = tester; - } - - @BeforeClass - public void setUp() - { assertThat(DateTimeZone.getDefault()).isEqualTo(DateTimeZone.forID("America/Bahia_Banderas")); // Parquet has excessive logging at INFO level - parquetLogger = Logger.getLogger("org.apache.parquet.hadoop"); - parquetLogger.setLevel(Level.WARNING); + Logger.getLogger("org.apache.parquet.hadoop").setLevel(Level.WARNING); } @Test @@ -885,52 +873,47 @@ public void testParquetShortDecimalWriteToTrinoDecimalWithNonMatchingScale() tester.testRoundTrip(javaLongObjectInspector, ImmutableList.of(10L), ImmutableList.of(SqlDecimal.of(100L, 10, 2)), createDecimalType(10, 2), Optional.of(parquetSchema)); } - @Test(dataProvider = "testDecimalInputProvider") - public void testDecimals(DecimalInput decimalInput) - throws Exception - { - for (int precision = 1; precision <= decimalInput.getMaxSupportedPrecision(); precision++) { - int scale = ThreadLocalRandom.current().nextInt(precision); - MessageType parquetSchema = parseMessageType(format( - "message hive_decimal { optional %s test (DECIMAL(%d, %d)); }", - decimalInput.getPrimitiveTypeName(precision), - precision, - scale)); - ImmutableList.Builder expectedValues = ImmutableList.builder(); - ImmutableList.Builder expectedValuesMaxPrecision = ImmutableList.builder(); - ImmutableList.Builder writeValuesBuilder = ImmutableList.builder(); - - BigInteger start = BigInteger.valueOf(10).pow(precision).subtract(ONE).negate(); - BigInteger end = BigInteger.valueOf(10).pow(precision); - BigInteger step = BigInteger.valueOf(1).max(end.subtract(start).divide(BigInteger.valueOf(1_500))); - for (BigInteger value = start; value.compareTo(end) < 0; value = value.add(step)) { - writeValuesBuilder.add(decimalInput.convertToWriteValue(value, scale)); - expectedValues.add(new SqlDecimal(value, precision, scale)); - expectedValuesMaxPrecision.add(new SqlDecimal(value, MAX_PRECISION, scale)); + @Test + public void testDecimals() + throws Exception + { + for (DecimalInput decimalInput : DecimalInput.values()) { + for (int precision = 1; precision <= decimalInput.getMaxSupportedPrecision(); precision++) { + int scale = ThreadLocalRandom.current().nextInt(precision); + MessageType parquetSchema = parseMessageType(format( + "message hive_decimal { optional %s test (DECIMAL(%d, %d)); }", + decimalInput.getPrimitiveTypeName(precision), + precision, + scale)); + ImmutableList.Builder expectedValues = ImmutableList.builder(); + ImmutableList.Builder expectedValuesMaxPrecision = ImmutableList.builder(); + ImmutableList.Builder writeValuesBuilder = ImmutableList.builder(); + + BigInteger start = BigInteger.valueOf(10).pow(precision).subtract(ONE).negate(); + BigInteger end = BigInteger.valueOf(10).pow(precision); + BigInteger step = BigInteger.valueOf(1).max(end.subtract(start).divide(BigInteger.valueOf(1_500))); + for (BigInteger value = start; value.compareTo(end) < 0; value = value.add(step)) { + writeValuesBuilder.add(decimalInput.convertToWriteValue(value, scale)); + expectedValues.add(new SqlDecimal(value, precision, scale)); + expectedValuesMaxPrecision.add(new SqlDecimal(value, MAX_PRECISION, scale)); + } + List writeValues = writeValuesBuilder.build(); + tester.testRoundTrip( + decimalInput.getParquetObjectInspector(precision, scale), + writeValues, + expectedValues.build(), + createDecimalType(precision, scale), + Optional.of(parquetSchema)); + tester.testRoundTrip( + decimalInput.getParquetObjectInspector(precision, scale), + writeValues, + expectedValuesMaxPrecision.build(), + createDecimalType(MAX_PRECISION, scale), + Optional.of(parquetSchema)); } - List writeValues = writeValuesBuilder.build(); - tester.testRoundTrip( - decimalInput.getParquetObjectInspector(precision, scale), - writeValues, - expectedValues.build(), - createDecimalType(precision, scale), - Optional.of(parquetSchema)); - tester.testRoundTrip( - decimalInput.getParquetObjectInspector(precision, scale), - writeValues, - expectedValuesMaxPrecision.build(), - createDecimalType(MAX_PRECISION, scale), - Optional.of(parquetSchema)); } } - @DataProvider - public Object[][] testDecimalInputProvider() - { - return Arrays.stream(DecimalInput.values()) - .collect(toDataProvider()); - } - private enum DecimalInput { INT32 { @@ -1235,41 +1218,36 @@ public void testReadParquetInt32AsTrinoShortDecimal() .isInstanceOf(TrinoException.class); } - @Test(dataProvider = "timestampPrecision") - public void testTimestamp(HiveTimestampPrecision precision) + @Test + public void testTimestamp() throws Exception { - List epochMillisValues = ContiguousSet.create(Range.closedOpen((long) -1_000, (long) 1_000), DiscreteDomain.longs()).stream() - .map(millis -> System.currentTimeMillis() + millis) - .collect(toImmutableList()); - List writeValues = epochMillisValues.stream() - .map(AbstractTestParquetReader::longToTimestamp) - .collect(toImmutableList()); - List readValues = epochMillisValues.stream() - .map(epochMillis -> SqlTimestamp.newInstance(precision.getPrecision(), epochMillis * 1_000, 0)) - .collect(toImmutableList()); - // INT96 backed timestamps are written by the default ParquetSchemaOptions - tester.testRoundTrip( - javaTimestampObjectInspector, - writeValues, - readValues, - createTimestampType(precision.getPrecision()), - Optional.empty()); - tester.testRoundTrip( - javaTimestampObjectInspector, - writeValues, - readValues, - getOnlyElement(TEST_COLUMN), - createTimestampType(precision.getPrecision()), - Optional.empty(), - ParquetSchemaOptions.withInt64BackedTimestamps()); - } - - @DataProvider - public Object[][] timestampPrecision() - { - return Stream.of(HiveTimestampPrecision.values()) - .collect(toDataProvider()); + for (HiveTimestampPrecision precision : HiveTimestampPrecision.values()) { + List epochMillisValues = ContiguousSet.create(Range.closedOpen((long) -1_000, (long) 1_000), DiscreteDomain.longs()).stream() + .map(millis -> System.currentTimeMillis() + millis) + .collect(toImmutableList()); + List writeValues = epochMillisValues.stream() + .map(AbstractTestParquetReader::longToTimestamp) + .collect(toImmutableList()); + List readValues = epochMillisValues.stream() + .map(epochMillis -> SqlTimestamp.newInstance(precision.getPrecision(), epochMillis * 1_000, 0)) + .collect(toImmutableList()); + // INT96 backed timestamps are written by the default ParquetSchemaOptions + tester.testRoundTrip( + javaTimestampObjectInspector, + writeValues, + readValues, + createTimestampType(precision.getPrecision()), + Optional.empty()); + tester.testRoundTrip( + javaTimestampObjectInspector, + writeValues, + readValues, + getOnlyElement(TEST_COLUMN), + createTimestampType(precision.getPrecision()), + Optional.empty(), + ParquetSchemaOptions.withInt64BackedTimestamps()); + } } @Test diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestFullParquetReader.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestFullParquetReader.java index 13ba2b6d57dd3..6bf6e6243fcc6 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestFullParquetReader.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestFullParquetReader.java @@ -13,12 +13,14 @@ */ package io.trino.plugin.hive.parquet; -import org.testng.annotations.Test; +import org.junit.jupiter.api.parallel.Execution; + +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; // Failing on multiple threads because of org.apache.hadoop.hive.ql.io.parquet.write.ParquetRecordWriterWrapper // uses a single record writer across all threads. // For example org.apache.parquet.column.values.factory.DefaultValuesWriterFactory#DEFAULT_V1_WRITER_FACTORY is shared mutable state. -@Test(singleThreaded = true) +@Execution(SAME_THREAD) public class TestFullParquetReader extends AbstractTestParquetReader { @@ -26,12 +28,4 @@ public TestFullParquetReader() { super(ParquetTester.fullParquetTester()); } - - @Test - public void forceTestNgToRespectSingleThreaded() - { - // TODO: Remove after updating TestNG to 7.4.0+ (https://github.com/trinodb/trino/issues/8571) - // TestNG doesn't enforce @Test(singleThreaded = true) when tests are defined in base class. According to - // https://github.com/cbeust/testng/issues/2361#issuecomment-688393166 a workaround it to add a dummy test to the leaf test class. - } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetDecimalScaling.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetDecimalScaling.java index f9316bac26461..89b1a2445076b 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetDecimalScaling.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetDecimalScaling.java @@ -36,8 +36,7 @@ import org.apache.parquet.schema.MessageType; import org.intellij.lang.annotations.Language; import org.joda.time.DateTimeZone; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.io.UncheckedIOException; @@ -51,7 +50,6 @@ import java.util.Properties; import java.util.UUID; import java.util.concurrent.ThreadLocalRandom; -import java.util.stream.Stream; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.Iterables.transform; @@ -60,8 +58,6 @@ import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMNS; import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMN_TYPES; import static io.trino.spi.type.Decimals.overflows; -import static io.trino.testing.DataProviders.cartesianProduct; -import static io.trino.testing.DataProviders.toDataProvider; import static io.trino.tpch.TpchTable.NATION; import static java.lang.Integer.MAX_VALUE; import static java.lang.String.format; @@ -100,8 +96,77 @@ protected QueryRunner createQueryRunner() * Tests if Parquet decimal with given precision and scale can be read into Trino decimal with different precision and scale * if Parquet decimal value could be rescaled into Trino decimal without losing most and least significant digits. */ - @Test(dataProvider = "testReadingMatchingPrecisionDataProvider") - public void testReadingMatchingPrecision( + @Test + public void testReadingMatchingPrecision() + { + for (WriterVersion writerVersion : WriterVersion.values()) { + testReadingMatchingPrecision( + 10, + 2, + false, + ImmutableList.of("10.01", "10", "1.23", maximumValue(10, 2), minimumValue(10, 2)), + ImmutableList.of("10.01", "10.00", "1.23", maximumValue(10, 2), minimumValue(10, 2)), + writerVersion); + + testReadingMatchingPrecision( + 10, + 2, + true, + ImmutableList.of("10.01", "10", "1.23", maximumValue(10, 2), minimumValue(10, 2)), + ImmutableList.of("10.01", "10.00", "1.23", maximumValue(10, 2), minimumValue(10, 2)), + writerVersion); + + testReadingMatchingPrecision( + 4, + 2, + false, + ImmutableList.of("10.01", "10", "1.23", maximumValue(4, 2), minimumValue(4, 2)), + ImmutableList.of("10.01", "10.00", "1.23", maximumValue(4, 2), minimumValue(4, 2)), + writerVersion); + + testReadingMatchingPrecision( + 4, + 2, + true, + ImmutableList.of("10.01", "10", "1.23", maximumValue(4, 2), minimumValue(4, 2)), + ImmutableList.of("10.01", "10.00", "1.23", maximumValue(4, 2), minimumValue(4, 2)), + writerVersion); + + testReadingMatchingPrecision( + 14, + 2, + false, + ImmutableList.of("10.01", "10", "1.23", maximumValue(14, 2), minimumValue(14, 2)), + ImmutableList.of("10.01", "10.00", "1.23", maximumValue(14, 2), minimumValue(14, 2)), + writerVersion); + + testReadingMatchingPrecision( + 6, + 3, + false, + ImmutableList.of("10.01", "10", "1.23", maximumValue(6, 3), minimumValue(6, 3)), + ImmutableList.of("10.010", "10.000", "1.230", maximumValue(6, 3), minimumValue(6, 3)), + writerVersion); + + testReadingMatchingPrecision( + 6, + 3, + true, + ImmutableList.of("10.01", "10", "1.23", maximumValue(6, 3), minimumValue(6, 3)), + ImmutableList.of("10.010", "10.000", "1.230", maximumValue(6, 3), minimumValue(6, 3)), + writerVersion); + + testReadingMatchingPrecision( + 38, + 4, + false, + ImmutableList.of("10.01", "10", "1.23", maximumValue(38, 4), minimumValue(38, 4)), + ImmutableList.of("10.0100", "10.0000", "1.2300", maximumValue(38, 4), minimumValue(38, 4)), + writerVersion); + } + } + + private void testReadingMatchingPrecision( int precision, int scale, boolean forceFixedLengthArray, @@ -123,42 +188,145 @@ public void testReadingMatchingPrecision( dropTable(tableName); } - @DataProvider - public Object[][] testReadingMatchingPrecisionDataProvider() - { - return withWriterVersion(new Object[][] { - {10, 2, false, - ImmutableList.of("10.01", "10", "1.23", maximumValue(10, 2), minimumValue(10, 2)), - ImmutableList.of("10.01", "10.00", "1.23", maximumValue(10, 2), minimumValue(10, 2))}, - {10, 2, true, - ImmutableList.of("10.01", "10", "1.23", maximumValue(10, 2), minimumValue(10, 2)), - ImmutableList.of("10.01", "10.00", "1.23", maximumValue(10, 2), minimumValue(10, 2))}, - {4, 2, false, - ImmutableList.of("10.01", "10", "1.23", maximumValue(4, 2), minimumValue(4, 2)), - ImmutableList.of("10.01", "10.00", "1.23", maximumValue(4, 2), minimumValue(4, 2))}, - {4, 2, true, - ImmutableList.of("10.01", "10", "1.23", maximumValue(4, 2), minimumValue(4, 2)), - ImmutableList.of("10.01", "10.00", "1.23", maximumValue(4, 2), minimumValue(4, 2))}, - {14, 2, false, - ImmutableList.of("10.01", "10", "1.23", maximumValue(14, 2), minimumValue(14, 2)), - ImmutableList.of("10.01", "10.00", "1.23", maximumValue(14, 2), minimumValue(14, 2))}, - {6, 3, false, - ImmutableList.of("10.01", "10", "1.23", maximumValue(6, 3), minimumValue(6, 3)), - ImmutableList.of("10.010", "10.000", "1.230", maximumValue(6, 3), minimumValue(6, 3))}, - {6, 3, true, - ImmutableList.of("10.01", "10", "1.23", maximumValue(6, 3), minimumValue(6, 3)), - ImmutableList.of("10.010", "10.000", "1.230", maximumValue(6, 3), minimumValue(6, 3))}, - {38, 4, false, - ImmutableList.of("10.01", "10", "1.23", maximumValue(38, 4), minimumValue(38, 4)), - ImmutableList.of("10.0100", "10.0000", "1.2300", maximumValue(38, 4), minimumValue(38, 4))} - }); - } - /** * Tests if Parquet decimal with given precision and scale can be read into Trino decimal with different precision and scale * if Parquet decimal value could be rescaled into Trino decimal without loosing most and least significant digits. */ - @Test(dataProvider = "testReadingRescaledDecimalsProvider") + @Test + public void testReadingRescaledDecimals() + { + for (WriterVersion writerVersion : WriterVersion.values()) { + testReadingRescaledDecimals( + 10, + 2, + false, + 12, + 4, + ImmutableList.of("10.01", "10", "1.23", maximumValue(10, 2), minimumValue(10, 2)), + ImmutableList.of("10.0100", "10.0000", "1.2300", maximumValue(10, 2), minimumValue(10, 2)), + writerVersion); + + testReadingRescaledDecimals( + 10, + 2, + true, + 13, + 5, + ImmutableList.of("10.01", "10", "1.23", maximumValue(10, 2), minimumValue(10, 2)), + ImmutableList.of("10.01000", "10.0000", "1.23000", maximumValue(10, 2), minimumValue(10, 2)), + writerVersion); + + testReadingRescaledDecimals( + 4, + 2, + false, + 6, + 4, + ImmutableList.of("10.01", "10", "1.23", maximumValue(4, 2), minimumValue(4, 2)), + ImmutableList.of("10.0100", "10.0000", "1.2300", maximumValue(4, 2), minimumValue(4, 2)), + writerVersion); + + testReadingRescaledDecimals( + 4, + 2, + false, + 6, + 2, + ImmutableList.of("10.01", "10", "1.23", maximumValue(4, 2), minimumValue(4, 2)), + ImmutableList.of("10.01", "10.00", "1.23", maximumValue(4, 2), minimumValue(4, 2)), + writerVersion); + + testReadingRescaledDecimals( + 10, + 2, + false, + 11, + 3, + ImmutableList.of("10.01", "10", "1.23", maximumValue(10, 2), minimumValue(10, 2)), + ImmutableList.of("10.010", "10.000", "1.230", maximumValue(10, 2), minimumValue(10, 2)), + writerVersion); + + testReadingRescaledDecimals( + 10, 2, + true, + 12, + 4, + ImmutableList.of("10.01", "10", "1.23", maximumValue(10, 2), minimumValue(10, 2)), + ImmutableList.of("10.0100", "10.0000", "1.2300", maximumValue(10, 2), minimumValue(10, 2)), + writerVersion); + + testReadingRescaledDecimals( + 4, + 2, + false, + 10, + 5, + ImmutableList.of("10.01", "10", "1.23", maximumValue(4, 2), minimumValue(4, 2)), + ImmutableList.of("10.01000", "10.00000", "1.23000", maximumValue(4, 2), minimumValue(4, 2)), + writerVersion); + + testReadingRescaledDecimals( + 4, + 2, + true, + 10, + 5, + ImmutableList.of("10.01", "10", "1.23", maximumValue(4, 2), minimumValue(4, 2)), + ImmutableList.of("10.01000", "10.00000", "1.23000", maximumValue(4, 2), minimumValue(4, 2)), + writerVersion); + + testReadingRescaledDecimals( + 14, + 2, + false, + 20, + 3, + ImmutableList.of("10.01", "10", "1.23", maximumValue(14, 2), minimumValue(14, 2)), + ImmutableList.of("10.010", "10.000", "1.230", maximumValue(14, 2), minimumValue(14, 2)), + writerVersion); + + testReadingRescaledDecimals( + 6, + 3, + false, + 9, + 6, + ImmutableList.of("10.01", "10", "1.23", maximumValue(6, 3), minimumValue(6, 3)), + ImmutableList.of("10.010000", "10.000000", "1.230000", maximumValue(6, 3), minimumValue(6, 3)), + writerVersion); + + testReadingRescaledDecimals( + 6, + 3, + true, + 9, + 6, + ImmutableList.of("10.01", "10", "1.23", maximumValue(6, 3), minimumValue(6, 3)), + ImmutableList.of("10.010000", "10.000000", "1.230000", maximumValue(6, 3), minimumValue(6, 3)), + writerVersion); + + testReadingRescaledDecimals( + 10, + 2, + false, + 38, + 4, + ImmutableList.of("10.01", "10", "1.23", maximumValue(10, 2), minimumValue(10, 2)), + ImmutableList.of("10.0100", "10.0000", "1.2300", maximumValue(10, 2), minimumValue(10, 2)), + writerVersion); + + testReadingRescaledDecimals( + 18, + 4, + false, + 38, + 14, + ImmutableList.of("10.01", "10", "1.23", maximumValue(18, 4), minimumValue(18, 4)), + ImmutableList.of("10.0100", "10.0000", "1.2300", maximumValue(18, 4), minimumValue(18, 4)), + writerVersion); + } + } + public void testReadingRescaledDecimals( int precision, int scale, @@ -183,58 +351,56 @@ public void testReadingRescaledDecimals( dropTable(tableName); } - @DataProvider - public Object[][] testReadingRescaledDecimalsProvider() - { - // parquetPrecision, parquetScale, useFixedLengthArray, schemaPrecision, schemaScale, writeValues, expectedValues - return withWriterVersion(new Object[][] { - {10, 2, false, 12, 4, - ImmutableList.of("10.01", "10", "1.23", maximumValue(10, 2), minimumValue(10, 2)), - ImmutableList.of("10.0100", "10.0000", "1.2300", maximumValue(10, 2), minimumValue(10, 2))}, - {10, 2, true, 13, 5, - ImmutableList.of("10.01", "10", "1.23", maximumValue(10, 2), minimumValue(10, 2)), - ImmutableList.of("10.01000", "10.0000", "1.23000", maximumValue(10, 2), minimumValue(10, 2))}, - {4, 2, false, 6, 4, - ImmutableList.of("10.01", "10", "1.23", maximumValue(4, 2), minimumValue(4, 2)), - ImmutableList.of("10.0100", "10.0000", "1.2300", maximumValue(4, 2), minimumValue(4, 2))}, - {4, 2, false, 6, 2, - ImmutableList.of("10.01", "10", "1.23", maximumValue(4, 2), minimumValue(4, 2)), - ImmutableList.of("10.01", "10.00", "1.23", maximumValue(4, 2), minimumValue(4, 2))}, - {10, 2, false, 11, 3, - ImmutableList.of("10.01", "10", "1.23", maximumValue(10, 2), minimumValue(10, 2)), - ImmutableList.of("10.010", "10.000", "1.230", maximumValue(10, 2), minimumValue(10, 2))}, - {10, 2, true, 12, 4, - ImmutableList.of("10.01", "10", "1.23", maximumValue(10, 2), minimumValue(10, 2)), - ImmutableList.of("10.0100", "10.0000", "1.2300", maximumValue(10, 2), minimumValue(10, 2))}, - {4, 2, false, 10, 5, - ImmutableList.of("10.01", "10", "1.23", maximumValue(4, 2), minimumValue(4, 2)), - ImmutableList.of("10.01000", "10.00000", "1.23000", maximumValue(4, 2), minimumValue(4, 2))}, - {4, 2, true, 10, 5, - ImmutableList.of("10.01", "10", "1.23", maximumValue(4, 2), minimumValue(4, 2)), - ImmutableList.of("10.01000", "10.00000", "1.23000", maximumValue(4, 2), minimumValue(4, 2))}, - {14, 2, false, 20, 3, - ImmutableList.of("10.01", "10", "1.23", maximumValue(14, 2), minimumValue(14, 2)), - ImmutableList.of("10.010", "10.000", "1.230", maximumValue(14, 2), minimumValue(14, 2))}, - {6, 3, false, 9, 6, - ImmutableList.of("10.01", "10", "1.23", maximumValue(6, 3), minimumValue(6, 3)), - ImmutableList.of("10.010000", "10.000000", "1.230000", maximumValue(6, 3), minimumValue(6, 3))}, - {6, 3, true, 9, 6, - ImmutableList.of("10.01", "10", "1.23", maximumValue(6, 3), minimumValue(6, 3)), - ImmutableList.of("10.010000", "10.000000", "1.230000", maximumValue(6, 3), minimumValue(6, 3))}, - {10, 2, false, 38, 4, - ImmutableList.of("10.01", "10", "1.23", maximumValue(10, 2), minimumValue(10, 2)), - ImmutableList.of("10.0100", "10.0000", "1.2300", maximumValue(10, 2), minimumValue(10, 2))}, - {18, 4, false, 38, 14, - ImmutableList.of("10.01", "10", "1.23", maximumValue(18, 4), minimumValue(18, 4)), - ImmutableList.of("10.0100", "10.0000", "1.2300", maximumValue(18, 4), minimumValue(18, 4))}, - }); - } - /** * Tests if Parquet decimal with given precision and scale can be read into Trino decimal with different precision and scale * if Parquet decimal value will be rounded to fit into Trino decimal. */ - @Test(dataProvider = "testReadingRoundedDecimalsProvider") + @Test + public void testReadingRoundedDecimals() + { + for (WriterVersion writerVersion : WriterVersion.values()) { + testReadingRoundedDecimals( + 10, + 2, + false, + 12, + 1, + ImmutableList.of("10.01", "10", "1.23", maximumValue(10, 2), minimumValue(10, 2)), + ImmutableList.of("10.01", "10", "1.23", maximumValue(10, 2), minimumValue(10, 2)), + writerVersion); + + testReadingRoundedDecimals( + 9, + 2, + true, + 12, + 1, + ImmutableList.of("10.01", "10", "1.23", maximumValue(9, 2), minimumValue(9, 2)), + ImmutableList.of("10.01", "10", "1.23", maximumValue(9, 2), minimumValue(9, 2)), + writerVersion); + + testReadingRoundedDecimals( + 4, + 2, + false, + 7, + 1, + ImmutableList.of("10.01", "10", "1.23", maximumValue(4, 2), minimumValue(4, 2)), + ImmutableList.of("10.01", "10", "1.23", maximumValue(4, 2), minimumValue(4, 2)), + writerVersion); + + testReadingRoundedDecimals( + 10, + 2, + false, + 12, + 1, + ImmutableList.of("10.01", "10", "1.23", maximumValue(10, 2), minimumValue(10, 2)), + ImmutableList.of("10.01", "10", "1.23", maximumValue(10, 2), minimumValue(10, 2)), + writerVersion); + } + } + public void testReadingRoundedDecimals( int precision, int scale, @@ -259,32 +425,25 @@ public void testReadingRoundedDecimals( dropTable(tableName); } - @DataProvider - public Object[][] testReadingRoundedDecimalsProvider() - { - // parquetPrecision, parquetScale, useFixedLengthArray, schemaPrecision, schemaScale, writeValues, expectedValues - return withWriterVersion(new Object[][] { - {10, 2, false, 12, 1, - ImmutableList.of("10.01", "10", "1.23", maximumValue(10, 2), minimumValue(10, 2)), - ImmutableList.of("10.01", "10", "1.23", maximumValue(10, 2), minimumValue(10, 2))}, - {9, 2, true, 12, 1, - ImmutableList.of("10.01", "10", "1.23", maximumValue(9, 2), minimumValue(9, 2)), - ImmutableList.of("10.01", "10", "1.23", maximumValue(9, 2), minimumValue(9, 2))}, - {4, 2, false, 7, 1, - ImmutableList.of("10.01", "10", "1.23", maximumValue(4, 2), minimumValue(4, 2)), - ImmutableList.of("10.01", "10", "1.23", maximumValue(4, 2), minimumValue(4, 2))}, - {10, 2, false, 12, 1, - ImmutableList.of("10.01", "10", "1.23", maximumValue(10, 2), minimumValue(10, 2)), - ImmutableList.of("10.01", "10", "1.23", maximumValue(10, 2), minimumValue(10, 2))}, - }); - } - /** * Tests if Parquet decimal with given precision and scale cannot be read into Presto decimal with different precision and scale * because when rescaling decimal we would loose most significant digits. */ - @Test(dataProvider = "testReadingNonRescalableDecimalsProvider") - public void testReadingNonRescalableDecimals( + @Test + public void testReadingNonRescalableDecimals() + { + for (WriterVersion writerVersion : WriterVersion.values()) { + testReadingNonRescalableDecimals(4, 2, false, 4, 3, ImmutableList.of("10.01"), writerVersion); + testReadingNonRescalableDecimals(10, 2, false, 10, 3, ImmutableList.of("12345678.91"), writerVersion); + testReadingNonRescalableDecimals(10, 2, false, 3, 2, ImmutableList.of("10.01"), writerVersion); + testReadingNonRescalableDecimals(10, 2, true, 14, 7, ImmutableList.of("99999999.99"), writerVersion); + testReadingNonRescalableDecimals(10, 2, false, 10, 4, ImmutableList.of("99999999.99"), writerVersion); + testReadingNonRescalableDecimals(18, 8, false, 32, 23, ImmutableList.of("1234567890.12345678"), writerVersion); + testReadingNonRescalableDecimals(20, 8, false, 32, 21, ImmutableList.of("123456789012.12345678"), writerVersion); + } + } + + private void testReadingNonRescalableDecimals( int precision, int scale, boolean forceFixedLengthArray, @@ -310,23 +469,22 @@ public void testReadingNonRescalableDecimals( dropTable(tableName); } - @DataProvider - public Object[][] testReadingNonRescalableDecimalsProvider() + @Test + public void testParquetLongFixedLenByteArrayWithTrinoShortDecimal() { - // parquetPrecision, parquetScale, useFixedLengthArray, schemaPrecision, schemaScale, writeValues - return withWriterVersion(new Object[][] { - {4, 2, false, 4, 3, ImmutableList.of("10.01")}, - {10, 2, false, 10, 3, ImmutableList.of("12345678.91")}, - {10, 2, false, 3, 2, ImmutableList.of("10.01")}, - {10, 2, true, 14, 7, ImmutableList.of("99999999.99")}, - {10, 2, false, 10, 4, ImmutableList.of("99999999.99")}, - {18, 8, false, 32, 23, ImmutableList.of("1234567890.12345678")}, - {20, 8, false, 32, 21, ImmutableList.of("123456789012.12345678")}, - }); + for (WriterVersion writerVersion : WriterVersion.values()) { + testParquetLongFixedLenByteArrayWithTrinoShortDecimal(5, 2, 19, 2, "-5", writerVersion); + testParquetLongFixedLenByteArrayWithTrinoShortDecimal(5, 2, 20, 2, "999.99", writerVersion); + testParquetLongFixedLenByteArrayWithTrinoShortDecimal(7, 2, 24, 2, "-99999.99", writerVersion); + testParquetLongFixedLenByteArrayWithTrinoShortDecimal(10, 2, 26, 2, "99999999.99", writerVersion); + testParquetLongFixedLenByteArrayWithTrinoShortDecimal(14, 4, 30, 4, "99999999.99", writerVersion); + testParquetLongFixedLenByteArrayWithTrinoShortDecimal(18, 8, 32, 8, "1234567890.12345678", writerVersion); + testParquetLongFixedLenByteArrayWithTrinoShortDecimal(18, 8, 32, 8, "123456789012.12345678", writerVersion); + testParquetLongFixedLenByteArrayWithTrinoShortDecimal(18, 8, 38, 8, "4989875563210.12345678", writerVersion); + } } - @Test(dataProvider = "testParquetLongFixedLenByteArrayWithTrinoShortDecimalProvider") - public void testParquetLongFixedLenByteArrayWithTrinoShortDecimal( + private void testParquetLongFixedLenByteArrayWithTrinoShortDecimal( int schemaPrecision, int schemaScale, int parquetPrecision, @@ -368,22 +526,6 @@ public void testParquetLongFixedLenByteArrayWithTrinoShortDecimal( dropTable(tableName); } - @DataProvider - public Object[][] testParquetLongFixedLenByteArrayWithTrinoShortDecimalProvider() - { - // schemaPrecision, schemaScale, parquetPrecision, parquetScale, writeValue - return withWriterVersion(new Object[][] { - {5, 2, 19, 2, "-5"}, - {5, 2, 20, 2, "999.99"}, - {7, 2, 24, 2, "-99999.99"}, - {10, 2, 26, 2, "99999999.99"}, - {14, 4, 30, 4, "99999999.99"}, - {18, 8, 32, 8, "1234567890.12345678"}, - {18, 8, 32, 8, "123456789012.12345678"}, - {18, 8, 38, 8, "4989875563210.12345678"}, - }); - } - protected void createTable(String tableName, int precision, int scale) { assertUpdate(format("CREATE TABLE tpch.%s (value decimal(%d, %d)) WITH (format = 'PARQUET')", tableName, precision, scale)); @@ -518,13 +660,6 @@ private static String generateTableName(String testCase, int precision, int scal return format("%s_%d_%d_%d", testCase, precision, scale, ThreadLocalRandom.current().nextInt(1, MAX_VALUE)); } - private static Object[][] withWriterVersion(Object[][] args) - { - Object[][] versions = Stream.of(WriterVersion.values()) - .collect(toDataProvider()); - return cartesianProduct(args, versions); - } - protected static class ParquetDecimalInsert { private final String columnName; diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetPageSourceFactory.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetPageSourceFactory.java index 2a24941ea5c0e..2e67a4838f861 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetPageSourceFactory.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetPageSourceFactory.java @@ -22,8 +22,7 @@ import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.PrimitiveType; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import java.util.Optional; @@ -37,8 +36,14 @@ public class TestParquetPageSourceFactory { - @Test(dataProvider = "useColumnNames") - public void testGetNestedMixedRepetitionColumnType(boolean useColumnNames) + @Test + public void testGetNestedMixedRepetitionColumnType() + { + testGetNestedMixedRepetitionColumnType(true); + testGetNestedMixedRepetitionColumnType(false); + } + + private void testGetNestedMixedRepetitionColumnType(boolean useColumnNames) { RowType rowType = rowType( RowType.field( @@ -66,13 +71,4 @@ public void testGetNestedMixedRepetitionColumnType(boolean useColumnNames) new PrimitiveType(REQUIRED, INT32, "required_level3")))); assertThat(ParquetPageSourceFactory.getColumnType(columnHandle, fileSchema, useColumnNames).get()).isEqualTo(fileSchema.getType("optional_level1")); } - - @DataProvider - public Object[][] useColumnNames() - { - return new Object[][] { - {true}, // use column name - {false} // use column index - }; - } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetReader.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestQuickParquetReader.java similarity index 64% rename from plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetReader.java rename to plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestQuickParquetReader.java index b8b43f2f9877e..9488344d59185 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetReader.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestQuickParquetReader.java @@ -13,25 +13,19 @@ */ package io.trino.plugin.hive.parquet; -import org.testng.annotations.Test; +import org.junit.jupiter.api.parallel.Execution; + +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; // Failing on multiple threads because of org.apache.hadoop.hive.ql.io.parquet.write.ParquetRecordWriterWrapper // uses a single record writer across all threads. // For example org.apache.parquet.column.values.factory.DefaultValuesWriterFactory#DEFAULT_V1_WRITER_FACTORY is shared mutable state. -@Test(singleThreaded = true) -public class TestParquetReader +@Execution(SAME_THREAD) +public class TestQuickParquetReader extends AbstractTestParquetReader { - public TestParquetReader() + public TestQuickParquetReader() { super(ParquetTester.quickParquetTester()); } - - @Test - public void forceTestNgToRespectSingleThreaded() - { - // TODO: Remove after updating TestNG to 7.4.0+ (https://github.com/trinodb/trino/issues/8571) - // TestNG doesn't enforce @Test(singleThreaded = true) when tests are defined in base class. According to - // https://github.com/cbeust/testng/issues/2361#issuecomment-688393166 a workaround it to add a dummy test to the leaf test class. - } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestReadingTimeLogicalAnnotation.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestReadingTimeLogicalAnnotation.java index b229f111478d5..2808a6da3de73 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestReadingTimeLogicalAnnotation.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestReadingTimeLogicalAnnotation.java @@ -14,16 +14,22 @@ package io.trino.plugin.hive.parquet; import com.google.common.io.Resources; +import io.trino.filesystem.Location; +import io.trino.filesystem.TrinoFileSystem; +import io.trino.filesystem.TrinoFileSystemFactory; import io.trino.plugin.hive.HiveQueryRunner; +import io.trino.spi.security.ConnectorIdentity; import io.trino.sql.query.QueryAssertions; import io.trino.testing.DistributedQueryRunner; import org.junit.jupiter.api.Test; -import java.io.File; +import java.io.OutputStream; +import java.net.URL; +import java.util.UUID; +import static io.trino.plugin.hive.TestingHiveUtils.getConnectorService; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.testing.MaterializedResult.resultBuilder; -import static java.lang.String.format; import static org.assertj.core.api.Assertions.assertThat; public class TestReadingTimeLogicalAnnotation @@ -32,17 +38,27 @@ public class TestReadingTimeLogicalAnnotation public void testReadingTimeLogicalAnnotationAsBigInt() throws Exception { - File parquetFile = new File(Resources.getResource("parquet_file_with_time_logical_annotation").toURI()); try (DistributedQueryRunner queryRunner = HiveQueryRunner.builder().build(); QueryAssertions assertions = new QueryAssertions(queryRunner)) { - queryRunner.execute(format(""" + URL resourceLocation = Resources.getResource("parquet_file_with_time_logical_annotation/time-micros.parquet"); + + TrinoFileSystem fileSystem = getConnectorService(queryRunner, TrinoFileSystemFactory.class) + .create(ConnectorIdentity.ofUser("test")); + + Location tempDir = Location.of("local:///temp_" + UUID.randomUUID()); + fileSystem.createDirectory(tempDir); + Location dataFile = tempDir.appendPath("data.parquet"); + try (OutputStream out = fileSystem.newOutputFile(dataFile).create()) { + Resources.copy(resourceLocation, out); + } + + queryRunner.execute(""" CREATE TABLE table_with_time_logical_annotation ( "opens" row(member0 bigint, member_1 varchar)) WITH ( external_location = '%s', format = 'PARQUET') - """, - parquetFile.getAbsolutePath())); + """.formatted(dataFile.parentDirectory())); assertThat(assertions.query("SELECT opens.member0 FROM table_with_time_logical_annotation GROUP BY 1 ORDER BY 1 LIMIT 5")) .matches(resultBuilder(queryRunner.getDefaultSession(), BIGINT) diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/predicate/TestParquetPredicateUtils.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/predicate/TestParquetPredicateUtils.java index 9aec2afa6bc60..8a64032f6c4bc 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/predicate/TestParquetPredicateUtils.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/predicate/TestParquetPredicateUtils.java @@ -29,8 +29,7 @@ import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.PrimitiveType; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import java.util.List; import java.util.Map; @@ -53,8 +52,8 @@ public class TestParquetPredicateUtils { - @Test(dataProvider = "useColumnNames") - public void testParquetTupleDomainPrimitiveArray(boolean useColumnNames) + @Test + public void testParquetTupleDomainPrimitiveArray() { HiveColumnHandle columnHandle = createBaseColumn("my_array", 0, HiveType.valueOf("array"), new ArrayType(INTEGER), REGULAR, Optional.empty()); TupleDomain domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(new ArrayType(INTEGER)))); @@ -64,12 +63,13 @@ public void testParquetTupleDomainPrimitiveArray(boolean useColumnNames) new GroupType(REPEATED, "bag", new PrimitiveType(OPTIONAL, INT32, "array_element")))); Map, ColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema); - TupleDomain tupleDomain = getParquetTupleDomain(descriptorsByPath, domain, fileSchema, useColumnNames); - assertThat(tupleDomain.isAll()).isTrue(); + + assertThat(getParquetTupleDomain(descriptorsByPath, domain, fileSchema, true).isAll()).isTrue(); + assertThat(getParquetTupleDomain(descriptorsByPath, domain, fileSchema, false).isAll()).isTrue(); } - @Test(dataProvider = "useColumnNames") - public void testParquetTupleDomainStructArray(boolean useColumnNames) + @Test + public void testParquetTupleDomainStructArray() { RowType.Field rowField = new RowType.Field(Optional.of("a"), INTEGER); RowType rowType = RowType.from(ImmutableList.of(rowField)); @@ -84,12 +84,19 @@ public void testParquetTupleDomainStructArray(boolean useColumnNames) new GroupType(OPTIONAL, "array_element", new PrimitiveType(OPTIONAL, INT32, "a"))))); Map, ColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema); - TupleDomain tupleDomain = getParquetTupleDomain(descriptorsByPath, domain, fileSchema, useColumnNames); - assertThat(tupleDomain.isAll()).isTrue(); + + assertThat(getParquetTupleDomain(descriptorsByPath, domain, fileSchema, true).isAll()).isTrue(); + assertThat(getParquetTupleDomain(descriptorsByPath, domain, fileSchema, false).isAll()).isTrue(); } - @Test(dataProvider = "useColumnNames") - public void testParquetTupleDomainPrimitive(boolean useColumnNames) + @Test + public void testParquetTupleDomainPrimitive() + { + testParquetTupleDomainPrimitive(true); + testParquetTupleDomainPrimitive(false); + } + + private void testParquetTupleDomainPrimitive(boolean useColumnNames) { HiveColumnHandle columnHandle = createBaseColumn("my_primitive", 0, HiveType.valueOf("bigint"), BIGINT, REGULAR, Optional.empty()); Domain singleValueDomain = Domain.singleValue(BIGINT, 123L); @@ -109,8 +116,8 @@ public void testParquetTupleDomainPrimitive(boolean useColumnNames) assertThat(predicateDomain).isEqualTo(singleValueDomain); } - @Test(dataProvider = "useColumnNames") - public void testParquetTupleDomainStruct(boolean useColumnNames) + @Test + public void testParquetTupleDomainStruct() { RowType rowType = rowType( RowType.field("a", INTEGER), @@ -125,12 +132,19 @@ public void testParquetTupleDomainStruct(boolean useColumnNames) new PrimitiveType(OPTIONAL, INT32, "b"), new PrimitiveType(OPTIONAL, INT32, "c"))); Map, ColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema); - TupleDomain tupleDomain = getParquetTupleDomain(descriptorsByPath, domain, fileSchema, useColumnNames); - assertThat(tupleDomain.isAll()).isTrue(); + + assertThat(getParquetTupleDomain(descriptorsByPath, domain, fileSchema, true).isAll()).isTrue(); + assertThat(getParquetTupleDomain(descriptorsByPath, domain, fileSchema, false).isAll()).isTrue(); } - @Test(dataProvider = "useColumnNames") - public void testParquetTupleDomainStructWithPrimitiveColumnPredicate(boolean useColumNames) + @Test + public void testParquetTupleDomainStructWithPrimitiveColumnPredicate() + { + testParquetTupleDomainStructWithPrimitiveColumnPredicate(true); + testParquetTupleDomainStructWithPrimitiveColumnPredicate(false); + } + + private void testParquetTupleDomainStructWithPrimitiveColumnPredicate(boolean useColumNames) { RowType baseType = rowType( RowType.field("a", INTEGER), @@ -167,8 +181,8 @@ public void testParquetTupleDomainStructWithPrimitiveColumnPredicate(boolean use assertThat(calculatedTupleDomain.getDomains().get()).containsEntry(selectedColumnDescriptor, predicateDomain); } - @Test(dataProvider = "useColumnNames") - public void testParquetTupleDomainStructWithComplexColumnPredicate(boolean useColumNames) + @Test + public void testParquetTupleDomainStructWithComplexColumnPredicate() { RowType c1Type = rowType( RowType.field("c1", INTEGER), @@ -206,12 +220,12 @@ public void testParquetTupleDomainStructWithComplexColumnPredicate(boolean useCo new PrimitiveType(OPTIONAL, INT32, "c2")))); Map, ColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema); // skip looking up predicates for complex types as Parquet only stores stats for primitives - TupleDomain calculatedTupleDomain = getParquetTupleDomain(descriptorsByPath, tupleDomain, fileSchema, useColumNames); - assertThat(calculatedTupleDomain.isAll()).isTrue(); + assertThat(getParquetTupleDomain(descriptorsByPath, tupleDomain, fileSchema, true).isAll()).isTrue(); + assertThat(getParquetTupleDomain(descriptorsByPath, tupleDomain, fileSchema, false).isAll()).isTrue(); } - @Test(dataProvider = "useColumnNames") - public void testParquetTupleDomainStructWithMissingPrimitiveColumn(boolean useColumnNames) + @Test + public void testParquetTupleDomainStructWithMissingPrimitiveColumn() { RowType baseType = rowType( RowType.field("a", INTEGER), @@ -241,12 +255,12 @@ public void testParquetTupleDomainStructWithMissingPrimitiveColumn(boolean useCo new PrimitiveType(OPTIONAL, INT32, "a"), new PrimitiveType(OPTIONAL, INT32, "b"))); Map, ColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema); - TupleDomain calculatedTupleDomain = getParquetTupleDomain(descriptorsByPath, tupleDomain, fileSchema, useColumnNames); - assertThat(calculatedTupleDomain.isAll()).isTrue(); + assertThat(getParquetTupleDomain(descriptorsByPath, tupleDomain, fileSchema, true).isAll()).isTrue(); + assertThat(getParquetTupleDomain(descriptorsByPath, tupleDomain, fileSchema, false).isAll()).isTrue(); } - @Test(dataProvider = "useColumnNames") - public void testParquetTupleDomainMap(boolean useColumnNames) + @Test + public void testParquetTupleDomainMap() { MapType mapType = new MapType(INTEGER, INTEGER, new TypeOperators()); @@ -261,16 +275,7 @@ public void testParquetTupleDomainMap(boolean useColumnNames) new PrimitiveType(OPTIONAL, INT32, "value")))); Map, ColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema); - TupleDomain tupleDomain = getParquetTupleDomain(descriptorsByPath, domain, fileSchema, useColumnNames); - assertThat(tupleDomain.isAll()).isTrue(); - } - - @DataProvider - public Object[][] useColumnNames() - { - return new Object[][] { - {true}, // use column name - {false} // use column index - }; + assertThat(getParquetTupleDomain(descriptorsByPath, domain, fileSchema, true).isAll()).isTrue(); + assertThat(getParquetTupleDomain(descriptorsByPath, domain, fileSchema, false).isAll()).isTrue(); } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3/TestHiveS3MinioQueries.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3/TestHiveS3MinioQueries.java index ecac36e437fb8..0f40e6abb7b02 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3/TestHiveS3MinioQueries.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3/TestHiveS3MinioQueries.java @@ -15,21 +15,15 @@ import com.google.common.collect.ImmutableMap; import io.trino.plugin.hive.HiveQueryRunner; -import io.trino.plugin.hive.NodeVersion; -import io.trino.plugin.hive.metastore.HiveMetastoreConfig; -import io.trino.plugin.hive.metastore.file.FileHiveMetastore; -import io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.QueryRunner; import io.trino.testing.containers.Minio; import org.junit.jupiter.api.Test; -import java.io.File; import java.util.regex.Matcher; import java.util.regex.Pattern; import static com.google.common.base.Verify.verify; -import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_FACTORY; import static io.trino.testing.TestingNames.randomNameSuffix; import static io.trino.testing.containers.Minio.MINIO_ACCESS_KEY; import static io.trino.testing.containers.Minio.MINIO_SECRET_KEY; @@ -49,18 +43,8 @@ protected QueryRunner createQueryRunner() minio.start(); return HiveQueryRunner.builder() - .setMetastore(queryRunner -> { - File baseDir = queryRunner.getCoordinator().getBaseDataDir().resolve("hive_data").toFile(); - return new FileHiveMetastore( - new NodeVersion("testversion"), - HDFS_FILE_SYSTEM_FACTORY, - new HiveMetastoreConfig().isHideDeltaLakeTables(), - new FileHiveMetastoreConfig() - .setCatalogDirectory(baseDir.toURI().toString()) - .setDisableLocationChecks(true) // matches Glue behavior - .setMetastoreUser("test")); - }) .setHiveProperties(ImmutableMap.builder() + .put("hive.metastore.disable-location-checks", "true") .put("hive.s3.aws-access-key", MINIO_ACCESS_KEY) .put("hive.s3.aws-secret-key", MINIO_SECRET_KEY) .put("hive.s3.endpoint", minio.getMinioAddress()) diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3/TestS3FileSystemAccessOperations.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3/TestS3FileSystemAccessOperations.java index a57b47e67f5b3..4e41f64437152 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3/TestS3FileSystemAccessOperations.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3/TestS3FileSystemAccessOperations.java @@ -18,21 +18,9 @@ import com.google.common.collect.ImmutableMultiset; import com.google.common.collect.Multiset; import io.airlift.units.DataSize; -import io.opentelemetry.api.OpenTelemetry; -import io.opentelemetry.sdk.OpenTelemetrySdk; -import io.opentelemetry.sdk.testing.exporter.InMemorySpanExporter; -import io.opentelemetry.sdk.trace.SdkTracerProvider; import io.opentelemetry.sdk.trace.data.SpanData; -import io.opentelemetry.sdk.trace.export.SimpleSpanProcessor; import io.trino.Session; -import io.trino.filesystem.s3.S3FileSystemConfig; -import io.trino.filesystem.s3.S3FileSystemFactory; import io.trino.plugin.hive.HiveQueryRunner; -import io.trino.plugin.hive.NodeVersion; -import io.trino.plugin.hive.metastore.HiveMetastoreConfig; -import io.trino.plugin.hive.metastore.file.FileHiveMetastore; -import io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig; -import io.trino.plugin.hive.metastore.tracing.TracingHiveMetastore; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.DistributedQueryRunner; import io.trino.testing.QueryRunner; @@ -46,7 +34,7 @@ import java.util.List; import java.util.Map; -import java.util.Optional; +import java.util.function.Predicate; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.collect.Maps.uniqueIndex; @@ -67,7 +55,6 @@ public class TestS3FileSystemAccessOperations private static final String BUCKET = "test-bucket"; private Minio minio; - private InMemorySpanExporter spanExporter; @Override protected QueryRunner createQueryRunner() @@ -77,33 +64,9 @@ protected QueryRunner createQueryRunner() minio.start(); minio.createBucket(BUCKET); - spanExporter = closeAfterClass(InMemorySpanExporter.create()); - - SdkTracerProvider tracerProvider = SdkTracerProvider.builder() - .addSpanProcessor(SimpleSpanProcessor.create(spanExporter)) - .build(); - - OpenTelemetry openTelemetry = OpenTelemetrySdk.builder() - .setTracerProvider(tracerProvider) - .build(); - return HiveQueryRunner.builder() - .setMetastore(ignored -> new TracingHiveMetastore( - openTelemetry.getTracer("test"), - new FileHiveMetastore( - new NodeVersion("testversion"), - new S3FileSystemFactory(openTelemetry, new S3FileSystemConfig() - .setAwsAccessKey(MINIO_ACCESS_KEY) - .setAwsSecretKey(MINIO_SECRET_KEY) - .setRegion(MINIO_REGION) - .setEndpoint(minio.getMinioAddress()) - .setPathStyleAccess(true)), - new HiveMetastoreConfig().isHideDeltaLakeTables(), - new FileHiveMetastoreConfig() - .setCatalogDirectory("s3://%s/catalog".formatted(BUCKET)) - .setDisableLocationChecks(true) // matches Glue behavior - .setMetastoreUser("test")))) .setHiveProperties(ImmutableMap.builder() + .put("hive.metastore.disable-location-checks", "true") .put("fs.hadoop.enabled", "false") .put("fs.native-s3.enabled", "true") .put("s3.aws-access-key", MINIO_ACCESS_KEY) @@ -112,8 +75,9 @@ protected QueryRunner createQueryRunner() .put("s3.endpoint", minio.getMinioAddress()) .put("s3.path-style-access", "true") .put("hive.non-managed-table-writes-enabled", "true") + .put("hive.metastore", "file") + .put("hive.metastore.catalog.dir", "s3://%s/catalog".formatted(BUCKET)) .buildOrThrow()) - .setOpenTelemetry(openTelemetry) .setInitialSchemasLocationBase("s3://" + BUCKET) .build(); } @@ -122,7 +86,6 @@ protected QueryRunner createQueryRunner() public void tearDown() { // closed by closeAfterClass - spanExporter = null; minio = null; } @@ -202,25 +165,34 @@ private void assertFileSystemAccesses(@Language("SQL") String query, Multiset expectedAccesses) { DistributedQueryRunner queryRunner = getDistributedQueryRunner(); - spanExporter.reset(); queryRunner.executeWithQueryId(session, query); - assertMultisetsEqual(getOperations(), expectedAccesses); + assertMultisetsEqual(getOperations(queryRunner.getSpans()), expectedAccesses); } - private Multiset getOperations() + private static Multiset getOperations(List items) { - List items = spanExporter.getFinishedSpanItems(); Map spansById = uniqueIndex(items, SpanData::getSpanId); return items.stream() .filter(span -> span.getName().startsWith("S3.")) - .filter(span -> Optional.ofNullable(span.getParentSpanId()) - .map(spansById::get) - .map(parent -> !parent.getName().startsWith("HiveMetastore.")) - .orElse(true)) + .filter(span -> !hasAncestor(span, spansById, parent -> parent.getName().startsWith("HiveMetastore."))) .map(SpanData::getName) .collect(toCollection(HashMultiset::create)); } + private static boolean hasAncestor(SpanData span, Map spansById, Predicate predicate) + { + while (true) { + SpanData parent = spansById.get(span.getParentSpanId()); + if (parent == null) { + return false; + } + if (predicate.test(parent)) { + return true; + } + span = parent; + } + } + private static int occurrences(StorageFormat tableType, int orcValue, int parquetValue) { checkArgument(!(orcValue == parquetValue), "No need to use Occurrences when ORC and Parquet"); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/util/TestLazyMap.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/util/TestLazyMap.java index 6fbe9fffc81ec..34bf2bd0bf20d 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/util/TestLazyMap.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/util/TestLazyMap.java @@ -28,7 +28,7 @@ import static org.apache.hadoop.hive.serde2.lazy.LazyFactory.createLazyObject; import static org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyObjectInspectorFactory.getLazySimpleMapObjectInspector; import static org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyPrimitiveObjectInspectorFactory.getLazyStringObjectInspector; -import static org.testng.Assert.assertEquals; +import static org.assertj.core.api.Assertions.assertThat; public class TestLazyMap { @@ -45,7 +45,7 @@ public void test() assertMapDecode("\\N\u0003ignored\u0002\u0003", ImmutableMap.of(lazyString(""), lazyString(""))); HashMap expectedMap = new HashMap<>(); - expectedMap.put("null", null); + expectedMap.put(lazyString("null"), null); assertMapDecode("\\N\u0003ignored\u0002null\u0003\\N", expectedMap); } @@ -63,7 +63,7 @@ public static void assertMapDecode(String encodedMap, Map map = lazyMap.getMap(); - assertEquals(map, expectedMap); + assertThat(map).isEqualTo(expectedMap); } private static LazyString lazyString(String string) diff --git a/plugin/trino-hive/src/test/resources/parquet_page_skipping/column_name_with_dot/20230725_101306_00056_6ramm_28cb680f-d745-40c6-98ad-b56c8ee94ac6 b/plugin/trino-hive/src/test/resources/parquet_page_skipping/column_name_with_dot/data.parquet similarity index 100% rename from plugin/trino-hive/src/test/resources/parquet_page_skipping/column_name_with_dot/20230725_101306_00056_6ramm_28cb680f-d745-40c6-98ad-b56c8ee94ac6 rename to plugin/trino-hive/src/test/resources/parquet_page_skipping/column_name_with_dot/data.parquet diff --git a/plugin/trino-hive/src/test/resources/parquet_page_skipping/lineitem_sorted_by_suppkey/000000_0_a94130b9-2234-4000-9162-4114aefcd919_20230725_103128_00063_6ramm b/plugin/trino-hive/src/test/resources/parquet_page_skipping/lineitem_sorted_by_suppkey/data.parquet similarity index 100% rename from plugin/trino-hive/src/test/resources/parquet_page_skipping/lineitem_sorted_by_suppkey/000000_0_a94130b9-2234-4000-9162-4114aefcd919_20230725_103128_00063_6ramm rename to plugin/trino-hive/src/test/resources/parquet_page_skipping/lineitem_sorted_by_suppkey/data.parquet diff --git a/plugin/trino-hive/src/test/resources/parquet_page_skipping/orders_sorted_by_totalprice/000000_0_ca5374d9-007e-4bbd-8717-bac6677b6ee7_20230725_074756_00016_6ramm b/plugin/trino-hive/src/test/resources/parquet_page_skipping/orders_sorted_by_totalprice/data.parquet similarity index 100% rename from plugin/trino-hive/src/test/resources/parquet_page_skipping/orders_sorted_by_totalprice/000000_0_ca5374d9-007e-4bbd-8717-bac6677b6ee7_20230725_074756_00016_6ramm rename to plugin/trino-hive/src/test/resources/parquet_page_skipping/orders_sorted_by_totalprice/data.parquet diff --git a/plugin/trino-hive/src/test/resources/parquet_page_skipping/random/20230725_092119_00042_6ramm_25f11bb4-b7f7-4d05-afff-ba6b72bfb531 b/plugin/trino-hive/src/test/resources/parquet_page_skipping/random/data.parquet similarity index 100% rename from plugin/trino-hive/src/test/resources/parquet_page_skipping/random/20230725_092119_00042_6ramm_25f11bb4-b7f7-4d05-afff-ba6b72bfb531 rename to plugin/trino-hive/src/test/resources/parquet_page_skipping/random/data.parquet diff --git a/plugin/trino-hive/src/test/resources/parquet_page_skipping/unsupported_column_index/issue-16801.parquet b/plugin/trino-hive/src/test/resources/parquet_page_skipping/unsupported_column_index/data.parquet similarity index 100% rename from plugin/trino-hive/src/test/resources/parquet_page_skipping/unsupported_column_index/issue-16801.parquet rename to plugin/trino-hive/src/test/resources/parquet_page_skipping/unsupported_column_index/data.parquet diff --git a/plugin/trino-hive/src/test/resources/spark_bucketed_nation/._SUCCESS.crc b/plugin/trino-hive/src/test/resources/spark_bucketed_nation/._SUCCESS.crc deleted file mode 100644 index 3b7b044936a89..0000000000000 Binary files a/plugin/trino-hive/src/test/resources/spark_bucketed_nation/._SUCCESS.crc and /dev/null differ diff --git a/plugin/trino-hive/src/test/resources/spark_bucketed_nation/.part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00000.c000.snappy.orc.crc b/plugin/trino-hive/src/test/resources/spark_bucketed_nation/.part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00000.c000.snappy.orc.crc deleted file mode 100644 index df1434b32cf94..0000000000000 Binary files a/plugin/trino-hive/src/test/resources/spark_bucketed_nation/.part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00000.c000.snappy.orc.crc and /dev/null differ diff --git a/plugin/trino-hive/src/test/resources/spark_bucketed_nation/.part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00001.c000.snappy.orc.crc b/plugin/trino-hive/src/test/resources/spark_bucketed_nation/.part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00001.c000.snappy.orc.crc deleted file mode 100644 index 5eda6fd9ac791..0000000000000 Binary files a/plugin/trino-hive/src/test/resources/spark_bucketed_nation/.part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00001.c000.snappy.orc.crc and /dev/null differ diff --git a/plugin/trino-hive/src/test/resources/spark_bucketed_nation/.part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00002.c000.snappy.orc.crc b/plugin/trino-hive/src/test/resources/spark_bucketed_nation/.part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00002.c000.snappy.orc.crc deleted file mode 100644 index 574388cf75d0c..0000000000000 Binary files a/plugin/trino-hive/src/test/resources/spark_bucketed_nation/.part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00002.c000.snappy.orc.crc and /dev/null differ diff --git a/plugin/trino-hive/src/test/resources/spark_bucketed_nation/part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00000.c000.snappy.orc b/plugin/trino-hive/src/test/resources/spark_bucketed_nation/part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00000.c000.snappy.orc deleted file mode 100644 index f06bb44a68dc9..0000000000000 Binary files a/plugin/trino-hive/src/test/resources/spark_bucketed_nation/part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00000.c000.snappy.orc and /dev/null differ diff --git a/plugin/trino-hive/src/test/resources/spark_bucketed_nation/part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00001.c000.snappy.orc b/plugin/trino-hive/src/test/resources/spark_bucketed_nation/part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00001.c000.snappy.orc deleted file mode 100644 index eb4387fc03ac4..0000000000000 Binary files a/plugin/trino-hive/src/test/resources/spark_bucketed_nation/part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00001.c000.snappy.orc and /dev/null differ diff --git a/plugin/trino-hive/src/test/resources/spark_bucketed_nation/part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00002.c000.snappy.orc b/plugin/trino-hive/src/test/resources/spark_bucketed_nation/part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00002.c000.snappy.orc deleted file mode 100644 index c2aa001763ea7..0000000000000 Binary files a/plugin/trino-hive/src/test/resources/spark_bucketed_nation/part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00002.c000.snappy.orc and /dev/null differ diff --git a/plugin/trino-hive/src/test/resources/with_short_zone_id/data/part-00000-cab83205-643e-4b22-9846-54395fde4199-c000 b/plugin/trino-hive/src/test/resources/with_short_zone_id/data/data.orc similarity index 100% rename from plugin/trino-hive/src/test/resources/with_short_zone_id/data/part-00000-cab83205-643e-4b22-9846-54395fde4199-c000 rename to plugin/trino-hive/src/test/resources/with_short_zone_id/data/data.orc diff --git a/plugin/trino-http-event-listener/pom.xml b/plugin/trino-http-event-listener/pom.xml index 4f81b6d59a6ce..8e1fea2806ab6 100644 --- a/plugin/trino-http-event-listener/pom.xml +++ b/plugin/trino-http-event-listener/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-hudi/pom.xml b/plugin/trino-hudi/pom.xml index 5e6c486cf3e17..0f08bcd4cc63b 100644 --- a/plugin/trino-hudi/pom.xml +++ b/plugin/trino-hudi/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -15,7 +15,8 @@ ${project.parent.basedir} - 0.12.3 + 0.14.0 + 2.4.9 @@ -188,6 +189,13 @@ runtime + + com.esotericsoftware + kryo-shaded + 4.0.2 + test + + io.airlift junit-extensions @@ -217,6 +225,12 @@ io.trino trino-main test + + + io.trino + re2j + + @@ -224,6 +238,12 @@ trino-main test-jar test + + + io.trino + re2j + + @@ -275,6 +295,32 @@ test + + org.apache.hbase + hbase-client + ${dep.hbase.version} + test + + + * + * + + + + + + org.apache.hbase + hbase-server + ${dep.hbase.version} + test + + + * + * + + + + org.apache.hudi hudi-client-common diff --git a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiConfig.java b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiConfig.java index c7fd2f84e4bbc..306287f9ff9fd 100644 --- a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiConfig.java +++ b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiConfig.java @@ -50,6 +50,7 @@ public class HudiConfig private int splitLoaderParallelism = 4; private int splitGeneratorParallelism = 4; private long perTransactionMetastoreCacheMaximumSize = 2000; + private boolean queryPartitionFilterRequired; public List getColumnsToHide() { @@ -193,4 +194,17 @@ public HudiConfig setPerTransactionMetastoreCacheMaximumSize(long perTransaction this.perTransactionMetastoreCacheMaximumSize = perTransactionMetastoreCacheMaximumSize; return this; } + + @Config("hudi.query-partition-filter-required") + @ConfigDescription("Require a filter on at least one partition column") + public HudiConfig setQueryPartitionFilterRequired(boolean queryPartitionFilterRequired) + { + this.queryPartitionFilterRequired = queryPartitionFilterRequired; + return this; + } + + public boolean isQueryPartitionFilterRequired() + { + return queryPartitionFilterRequired; + } } diff --git a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiConnector.java b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiConnector.java index a4da3a1eae4f1..18bb5a85308e6 100644 --- a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiConnector.java +++ b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiConnector.java @@ -13,8 +13,10 @@ */ package io.trino.plugin.hudi; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; +import com.google.inject.Injector; import io.airlift.bootstrap.LifeCycleManager; import io.trino.plugin.base.classloader.ClassLoaderSafeConnectorMetadata; import io.trino.plugin.base.session.SessionPropertiesProvider; @@ -42,6 +44,7 @@ public class HudiConnector implements Connector { + private final Injector injector; private final LifeCycleManager lifeCycleManager; private final HudiTransactionManager transactionManager; private final ConnectorSplitManager splitManager; @@ -52,6 +55,7 @@ public class HudiConnector private final List> tableProperties; public HudiConnector( + Injector injector, LifeCycleManager lifeCycleManager, HudiTransactionManager transactionManager, ConnectorSplitManager splitManager, @@ -61,6 +65,7 @@ public HudiConnector( Set sessionPropertiesProviders, List> tableProperties) { + this.injector = requireNonNull(injector, "injector is null"); this.lifeCycleManager = requireNonNull(lifeCycleManager, "lifeCycleManager is null"); this.transactionManager = requireNonNull(transactionManager, "transactionManager is null"); this.splitManager = requireNonNull(splitManager, "splitManager is null"); @@ -144,4 +149,10 @@ public final void shutdown() { lifeCycleManager.stop(); } + + @VisibleForTesting + public Injector getInjector() + { + return injector; + } } diff --git a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiConnectorFactory.java b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiConnectorFactory.java index 21c7a6c03e3e7..e084e2ceadacf 100644 --- a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiConnectorFactory.java +++ b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiConnectorFactory.java @@ -36,6 +36,6 @@ public String getName() public Connector create(String catalogName, Map config, ConnectorContext context) { checkStrictSpiVersionMatch(context, this); - return createConnector(catalogName, config, context, Optional.empty(), Optional.empty()); + return createConnector(catalogName, config, context, Optional.empty()); } } diff --git a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiMetadata.java b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiMetadata.java index fbbc5e09cb4e7..e732474cae63c 100644 --- a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiMetadata.java +++ b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiMetadata.java @@ -45,23 +45,29 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import java.util.function.Function; +import java.util.stream.Stream; import static com.google.common.base.Strings.isNullOrEmpty; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static com.google.common.collect.ImmutableSet.toImmutableSet; import static io.trino.plugin.hive.HiveMetadata.TABLE_COMMENT; import static io.trino.plugin.hive.HiveTimestampPrecision.NANOSECONDS; import static io.trino.plugin.hive.util.HiveUtil.columnMetadataGetter; +import static io.trino.plugin.hive.util.HiveUtil.getPartitionKeyColumnHandles; import static io.trino.plugin.hive.util.HiveUtil.hiveColumnHandles; import static io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema; import static io.trino.plugin.hive.util.HiveUtil.isHudiTable; import static io.trino.plugin.hudi.HudiErrorCode.HUDI_BAD_DATA; import static io.trino.plugin.hudi.HudiSessionProperties.getColumnsToHide; +import static io.trino.plugin.hudi.HudiSessionProperties.isQueryPartitionFilterRequired; import static io.trino.plugin.hudi.HudiTableProperties.LOCATION_PROPERTY; import static io.trino.plugin.hudi.HudiTableProperties.PARTITIONED_BY_PROPERTY; import static io.trino.plugin.hudi.HudiUtil.hudiMetadataExists; import static io.trino.plugin.hudi.model.HudiTableType.COPY_ON_WRITE; +import static io.trino.spi.StandardErrorCode.QUERY_REJECTED; import static io.trino.spi.StandardErrorCode.UNSUPPORTED_TABLE_TYPE; import static io.trino.spi.connector.SchemaTableName.schemaTableName; import static java.lang.String.format; @@ -114,6 +120,7 @@ public HudiTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName.getTableName(), table.get().getStorage().getLocation(), COPY_ON_WRITE, + getPartitionKeyColumnHandles(table.get(), typeManager), TupleDomain.all(), TupleDomain.all()); } @@ -162,12 +169,30 @@ public Optional> applyFilter(C { HudiTableHandle handle = (HudiTableHandle) tableHandle; HudiPredicates predicates = HudiPredicates.from(constraint.getSummary()); + TupleDomain regularColumnPredicates = predicates.getRegularColumnPredicates(); + TupleDomain partitionColumnPredicates = predicates.getPartitionColumnPredicates(); + + // TODO Since the constraint#predicate isn't utilized during split generation. So, + // Let's not add constraint#predicateColumns to newConstraintColumns. + Set newConstraintColumns = Stream.concat( + Stream.concat( + regularColumnPredicates.getDomains().stream() + .map(Map::keySet) + .flatMap(Collection::stream), + partitionColumnPredicates.getDomains().stream() + .map(Map::keySet) + .flatMap(Collection::stream)), + handle.getConstraintColumns().stream()) + .collect(toImmutableSet()); + HudiTableHandle newHudiTableHandle = handle.applyPredicates( - predicates.getPartitionColumnPredicates(), - predicates.getRegularColumnPredicates()); + newConstraintColumns, + partitionColumnPredicates, + regularColumnPredicates); if (handle.getPartitionPredicates().equals(newHudiTableHandle.getPartitionPredicates()) - && handle.getRegularPredicates().equals(newHudiTableHandle.getRegularPredicates())) { + && handle.getRegularPredicates().equals(newHudiTableHandle.getRegularPredicates()) + && handle.getConstraintColumns().equals(newHudiTableHandle.getConstraintColumns())) { return Optional.empty(); } @@ -205,7 +230,7 @@ public List listTables(ConnectorSession session, Optional tableNames = ImmutableList.builder(); for (String schemaName : listSchemas(session, optionalSchemaName)) { - for (String tableName : metastore.getAllTables(schemaName)) { + for (String tableName : metastore.getTables(schemaName)) { tableNames.add(new SchemaTableName(schemaName, tableName)); } } @@ -224,6 +249,27 @@ public Iterator streamTableColumns(ConnectorSession sessio .iterator(); } + @Override + public void validateScan(ConnectorSession session, ConnectorTableHandle handle) + { + HudiTableHandle hudiTableHandle = (HudiTableHandle) handle; + if (isQueryPartitionFilterRequired(session)) { + if (!hudiTableHandle.getPartitionColumns().isEmpty()) { + Set partitionColumns = hudiTableHandle.getPartitionColumns().stream() + .map(HiveColumnHandle::getName) + .collect(toImmutableSet()); + Set constraintColumns = hudiTableHandle.getConstraintColumns().stream() + .map(HiveColumnHandle::getBaseColumnName) + .collect(toImmutableSet()); + if (Collections.disjoint(constraintColumns, partitionColumns)) { + throw new TrinoException( + QUERY_REJECTED, + format("Filter required on %s for at least one of the partition columns: %s", hudiTableHandle.getSchemaTableName(), String.join(", ", partitionColumns))); + } + } + } + } + HiveMetastore getMetastore() { return metastore; diff --git a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiSessionProperties.java b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiSessionProperties.java index ede43ec3386b6..f7946ff3ef9fa 100644 --- a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiSessionProperties.java +++ b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiSessionProperties.java @@ -50,6 +50,7 @@ public class HudiSessionProperties private static final String MAX_SPLITS_PER_SECOND = "max_splits_per_second"; private static final String MAX_OUTSTANDING_SPLITS = "max_outstanding_splits"; private static final String SPLIT_GENERATOR_PARALLELISM = "split_generator_parallelism"; + private static final String QUERY_PARTITION_FILTER_REQUIRED = "query_partition_filter_required"; private final List> sessionProperties; @@ -113,6 +114,11 @@ public HudiSessionProperties(HudiConfig hudiConfig, ParquetReaderConfig parquetR SPLIT_GENERATOR_PARALLELISM, "Number of threads to generate splits from partitions", hudiConfig.getSplitGeneratorParallelism(), + false), + booleanProperty( + QUERY_PARTITION_FILTER_REQUIRED, + "Require a filter on at least one partition column", + hudiConfig.isQueryPartitionFilterRequired(), false)); } @@ -167,4 +173,9 @@ public static int getSplitGeneratorParallelism(ConnectorSession session) { return session.getProperty(SPLIT_GENERATOR_PARALLELISM, Integer.class); } + + public static boolean isQueryPartitionFilterRequired(ConnectorSession session) + { + return session.getProperty(QUERY_PARTITION_FILTER_REQUIRED, Boolean.class); + } } diff --git a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiTableHandle.java b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiTableHandle.java index 0da9f2d897a77..9101deb4de0bc 100644 --- a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiTableHandle.java +++ b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiTableHandle.java @@ -14,13 +14,18 @@ package io.trino.plugin.hudi; import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableSet; import io.trino.plugin.hive.HiveColumnHandle; import io.trino.plugin.hudi.model.HudiTableType; import io.trino.spi.connector.ConnectorTableHandle; import io.trino.spi.connector.SchemaTableName; import io.trino.spi.predicate.TupleDomain; +import java.util.List; +import java.util.Set; + import static io.trino.spi.connector.SchemaTableName.schemaTableName; import static java.util.Objects.requireNonNull; @@ -31,6 +36,9 @@ public class HudiTableHandle private final String tableName; private final String basePath; private final HudiTableType tableType; + private final List partitionColumns; + // Used only for validation when config property hudi.query-partition-filter-required is enabled + private final Set constraintColumns; private final TupleDomain partitionPredicates; private final TupleDomain regularPredicates; @@ -40,13 +48,29 @@ public HudiTableHandle( @JsonProperty("tableName") String tableName, @JsonProperty("basePath") String basePath, @JsonProperty("tableType") HudiTableType tableType, + @JsonProperty("partitionColumns") List partitionColumns, @JsonProperty("partitionPredicates") TupleDomain partitionPredicates, @JsonProperty("regularPredicates") TupleDomain regularPredicates) + { + this(schemaName, tableName, basePath, tableType, partitionColumns, ImmutableSet.of(), partitionPredicates, regularPredicates); + } + + public HudiTableHandle( + String schemaName, + String tableName, + String basePath, + HudiTableType tableType, + List partitionColumns, + Set constraintColumns, + TupleDomain partitionPredicates, + TupleDomain regularPredicates) { this.schemaName = requireNonNull(schemaName, "schemaName is null"); this.tableName = requireNonNull(tableName, "tableName is null"); this.basePath = requireNonNull(basePath, "basePath is null"); this.tableType = requireNonNull(tableType, "tableType is null"); + this.partitionColumns = requireNonNull(partitionColumns, "partitionColumns is null"); + this.constraintColumns = requireNonNull(constraintColumns, "constraintColumns is null"); this.partitionPredicates = requireNonNull(partitionPredicates, "partitionPredicates is null"); this.regularPredicates = requireNonNull(regularPredicates, "regularPredicates is null"); } @@ -81,6 +105,19 @@ public TupleDomain getPartitionPredicates() return partitionPredicates; } + @JsonProperty + public List getPartitionColumns() + { + return partitionColumns; + } + + // do not serialize constraint columns as they are not needed on workers + @JsonIgnore + public Set getConstraintColumns() + { + return constraintColumns; + } + @JsonProperty public TupleDomain getRegularPredicates() { @@ -93,6 +130,7 @@ public SchemaTableName getSchemaTableName() } HudiTableHandle applyPredicates( + Set constraintColumns, TupleDomain partitionTupleDomain, TupleDomain regularTupleDomain) { @@ -101,6 +139,8 @@ HudiTableHandle applyPredicates( tableName, basePath, tableType, + partitionColumns, + constraintColumns, partitionPredicates.intersect(partitionTupleDomain), regularPredicates.intersect(regularTupleDomain)); } diff --git a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/InternalHudiConnectorFactory.java b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/InternalHudiConnectorFactory.java index 9a09752ce57f0..01dd53080eabe 100644 --- a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/InternalHudiConnectorFactory.java +++ b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/InternalHudiConnectorFactory.java @@ -24,7 +24,6 @@ import io.airlift.json.JsonModule; import io.opentelemetry.api.OpenTelemetry; import io.opentelemetry.api.trace.Tracer; -import io.trino.filesystem.TrinoFileSystemFactory; import io.trino.filesystem.manager.FileSystemModule; import io.trino.plugin.base.CatalogName; import io.trino.plugin.base.classloader.ClassLoaderSafeConnectorPageSourceProvider; @@ -33,7 +32,6 @@ import io.trino.plugin.base.jmx.MBeanServerModule; import io.trino.plugin.base.session.SessionPropertiesProvider; import io.trino.plugin.hive.NodeVersion; -import io.trino.plugin.hive.metastore.HiveMetastore; import io.trino.plugin.hive.metastore.HiveMetastoreModule; import io.trino.spi.NodeManager; import io.trino.spi.classloader.ThreadContextClassLoader; @@ -57,8 +55,7 @@ public static Connector createConnector( String catalogName, Map config, ConnectorContext context, - Optional metastore, - Optional fileSystemFactory) + Optional module) { ClassLoader classLoader = InternalHudiConnectorFactory.class.getClassLoader(); try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) { @@ -67,12 +64,11 @@ public static Connector createConnector( new MBeanModule(), new JsonModule(), new HudiModule(), - new HiveMetastoreModule(metastore), - fileSystemFactory - .map(factory -> (Module) binder -> binder.bind(TrinoFileSystemFactory.class).toInstance(factory)) - .orElseGet(() -> new FileSystemModule(catalogName, context.getNodeManager(), context.getOpenTelemetry())), + new HiveMetastoreModule(Optional.empty()), + new FileSystemModule(catalogName, context.getNodeManager(), context.getOpenTelemetry()), new MBeanServerModule(), binder -> { + module.ifPresent(binder::install); binder.bind(OpenTelemetry.class).toInstance(context.getOpenTelemetry()); binder.bind(Tracer.class).toInstance(context.getTracer()); binder.bind(NodeVersion.class).toInstance(new NodeVersion(context.getNodeManager().getCurrentNode().getVersion())); @@ -95,6 +91,7 @@ public static Connector createConnector( HudiTableProperties hudiTableProperties = injector.getInstance(HudiTableProperties.class); return new HudiConnector( + injector, lifeCycleManager, transactionManager, new ClassLoaderSafeConnectorSplitManager(splitManager, classLoader), diff --git a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/HudiQueryRunner.java b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/HudiQueryRunner.java index 61b3f6b65ed92..a732f4b203920 100644 --- a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/HudiQueryRunner.java +++ b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/HudiQueryRunner.java @@ -18,21 +18,17 @@ import io.airlift.log.Logger; import io.airlift.log.Logging; import io.trino.Session; -import io.trino.plugin.hive.SchemaAlreadyExistsException; +import io.trino.filesystem.Location; import io.trino.plugin.hive.metastore.Database; -import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.plugin.hudi.testing.HudiTablesInitializer; import io.trino.plugin.hudi.testing.ResourceHudiTablesInitializer; import io.trino.spi.security.PrincipalType; import io.trino.testing.DistributedQueryRunner; -import java.io.File; -import java.nio.file.Path; import java.util.Map; import java.util.Optional; -import static io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; import static io.trino.testing.TestingSession.testSessionBuilder; public final class HudiQueryRunner @@ -57,28 +53,20 @@ public static DistributedQueryRunner createHudiQueryRunner( .setExtraProperties(extraProperties) .build(); - Path coordinatorBaseDir = queryRunner.getCoordinator().getBaseDataDir(); - File catalogDir = coordinatorBaseDir.resolve("catalog").toFile(); - HiveMetastore metastore = createTestingFileHiveMetastore(catalogDir); - - // create testing database - Database database = Database.builder() - .setDatabaseName(SCHEMA_NAME) - .setOwnerName(Optional.of("public")) - .setOwnerType(Optional.of(PrincipalType.ROLE)) - .build(); - try { - metastore.createDatabase(database); - } - catch (SchemaAlreadyExistsException e) { - // do nothing if database already exists - } - - queryRunner.installPlugin(new TestingHudiPlugin(Optional.of(metastore))); + queryRunner.installPlugin(new TestingHudiPlugin(queryRunner.getCoordinator().getBaseDataDir().resolve("hudi_data"))); queryRunner.createCatalog("hudi", "hudi", connectorProperties); - String dataDir = coordinatorBaseDir.resolve("data").toString(); - dataLoader.initializeTables(queryRunner, metastore, SCHEMA_NAME, dataDir, HDFS_ENVIRONMENT); + // Hudi connector does not support creating schema or any other write operations + ((HudiConnector) queryRunner.getCoordinator().getConnector("hudi")).getInjector() + .getInstance(HiveMetastoreFactory.class) + .createMetastore(Optional.empty()) + .createDatabase(Database.builder() + .setDatabaseName(SCHEMA_NAME) + .setOwnerName(Optional.of("public")) + .setOwnerType(Optional.of(PrincipalType.ROLE)) + .build()); + + dataLoader.initializeTables(queryRunner, Location.of("local:///"), SCHEMA_NAME); return queryRunner; } diff --git a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/S3HudiQueryRunner.java b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/S3HudiQueryRunner.java index 6605ef23a31fd..27d1233298245 100644 --- a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/S3HudiQueryRunner.java +++ b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/S3HudiQueryRunner.java @@ -14,23 +14,13 @@ package io.trino.plugin.hudi; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; import io.airlift.log.Logger; import io.airlift.log.Logging; import io.trino.Session; -import io.trino.filesystem.hdfs.HdfsFileSystemFactory; -import io.trino.hdfs.DynamicHdfsConfiguration; -import io.trino.hdfs.HdfsConfig; -import io.trino.hdfs.HdfsConfigurationInitializer; -import io.trino.hdfs.HdfsEnvironment; -import io.trino.hdfs.authentication.NoHdfsAuthentication; -import io.trino.hdfs.s3.HiveS3Config; -import io.trino.hdfs.s3.TrinoS3ConfigurationInitializer; -import io.trino.plugin.hive.SchemaAlreadyExistsException; +import io.trino.filesystem.Location; import io.trino.plugin.hive.containers.HiveMinioDataLake; import io.trino.plugin.hive.metastore.Database; -import io.trino.plugin.hive.metastore.HiveMetastore; -import io.trino.plugin.hive.metastore.thrift.BridgingHiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.plugin.hudi.testing.HudiTablesInitializer; import io.trino.plugin.hudi.testing.TpchHudiTablesInitializer; import io.trino.spi.security.PrincipalType; @@ -40,9 +30,6 @@ import java.util.Map; import java.util.Optional; -import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_STATS; -import static io.trino.plugin.hive.HiveTestUtils.SOCKS_PROXY; -import static io.trino.plugin.hive.TestingThriftHiveMetastoreBuilder.testingThriftHiveMetastoreBuilder; import static io.trino.testing.TestingSession.testSessionBuilder; import static io.trino.testing.containers.Minio.MINIO_ACCESS_KEY; import static io.trino.testing.containers.Minio.MINIO_REGION; @@ -62,30 +49,10 @@ public static DistributedQueryRunner create( HiveMinioDataLake hiveMinioDataLake) throws Exception { - String basePath = "s3a://" + hiveMinioDataLake.getBucketName() + "/" + TPCH_SCHEMA; - HdfsEnvironment hdfsEnvironment = getHdfsEnvironment(hiveMinioDataLake); - - HiveMetastore metastore = new BridgingHiveMetastore( - testingThriftHiveMetastoreBuilder() - .metastoreClient(hiveMinioDataLake.getHiveHadoop().getHiveMetastoreEndpoint()) - .fileSystemFactory(new HdfsFileSystemFactory(hdfsEnvironment, HDFS_FILE_SYSTEM_STATS)) - .build()); - Database database = Database.builder() - .setDatabaseName(TPCH_SCHEMA) - .setOwnerName(Optional.of("public")) - .setOwnerType(Optional.of(PrincipalType.ROLE)) - .build(); - try { - metastore.createDatabase(database); - } - catch (SchemaAlreadyExistsException e) { - // do nothing if database already exists - } - DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(createSession()) .setExtraProperties(extraProperties) .build(); - queryRunner.installPlugin(new TestingHudiPlugin(Optional.of(metastore))); + queryRunner.installPlugin(new TestingHudiPlugin(queryRunner.getCoordinator().getBaseDataDir().resolve("hudi_data"))); queryRunner.createCatalog( "hudi", "hudi", @@ -100,7 +67,18 @@ public static DistributedQueryRunner create( .putAll(connectorProperties) .buildOrThrow()); - dataLoader.initializeTables(queryRunner, metastore, TPCH_SCHEMA, basePath, hdfsEnvironment); + // Hudi connector does not support creating schema or any other write operations + ((HudiConnector) queryRunner.getCoordinator().getConnector("hudi")).getInjector() + .getInstance(HiveMetastoreFactory.class) + .createMetastore(Optional.empty()) + .createDatabase(Database.builder() + .setDatabaseName(TPCH_SCHEMA) + .setOwnerName(Optional.of("public")) + .setOwnerType(Optional.of(PrincipalType.ROLE)) + .build()); + + dataLoader.initializeTables(queryRunner, Location.of("s3://" + hiveMinioDataLake.getBucketName() + "/"), TPCH_SCHEMA); + return queryRunner; } @@ -112,26 +90,6 @@ private static Session createSession() .build(); } - private static HdfsEnvironment getHdfsEnvironment(HiveMinioDataLake hiveMinioDataLake) - { - DynamicHdfsConfiguration dynamicHdfsConfiguration = new DynamicHdfsConfiguration( - new HdfsConfigurationInitializer( - new HdfsConfig() - .setSocksProxy(SOCKS_PROXY.orElse(null)), - ImmutableSet.of( - new TrinoS3ConfigurationInitializer(new HiveS3Config() - .setS3AwsAccessKey(MINIO_ACCESS_KEY) - .setS3AwsSecretKey(MINIO_SECRET_KEY) - .setS3Endpoint(hiveMinioDataLake.getMinio().getMinioAddress()) - .setS3PathStyleAccess(true)))), - ImmutableSet.of()); - - return new HdfsEnvironment( - dynamicHdfsConfiguration, - new HdfsConfig(), - new NoHdfsAuthentication()); - } - public static void main(String[] args) throws Exception { diff --git a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestHudiConfig.java b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestHudiConfig.java index 2aaed93bcad8a..719ef64bce1a5 100644 --- a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestHudiConfig.java +++ b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestHudiConfig.java @@ -39,7 +39,8 @@ public void testDefaults() .setMaxOutstandingSplits(1000) .setSplitLoaderParallelism(4) .setSplitGeneratorParallelism(4) - .setPerTransactionMetastoreCacheMaximumSize(2000)); + .setPerTransactionMetastoreCacheMaximumSize(2000) + .setQueryPartitionFilterRequired(false)); } @Test @@ -56,6 +57,7 @@ public void testExplicitPropertyMappings() .put("hudi.split-loader-parallelism", "16") .put("hudi.split-generator-parallelism", "32") .put("hudi.per-transaction-metastore-cache-maximum-size", "1000") + .put("hudi.query-partition-filter-required", "true") .buildOrThrow(); HudiConfig expected = new HudiConfig() @@ -68,7 +70,8 @@ public void testExplicitPropertyMappings() .setMaxOutstandingSplits(100) .setSplitLoaderParallelism(16) .setSplitGeneratorParallelism(32) - .setPerTransactionMetastoreCacheMaximumSize(1000); + .setPerTransactionMetastoreCacheMaximumSize(1000) + .setQueryPartitionFilterRequired(true); assertFullMapping(properties, expected); } diff --git a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestHudiPlugin.java b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestHudiPlugin.java deleted file mode 100644 index 71ec2e585916a..0000000000000 --- a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestHudiPlugin.java +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hudi; - -import com.google.common.collect.ImmutableMap; -import io.airlift.bootstrap.ApplicationConfigurationException; -import io.trino.plugin.hive.HiveConfig; -import io.trino.spi.Plugin; -import io.trino.spi.connector.ConnectorFactory; -import io.trino.testing.TestingConnectorContext; -import org.junit.jupiter.api.Test; - -import java.util.Map; -import java.util.Optional; - -import static com.google.common.collect.Iterables.getOnlyElement; -import static org.assertj.core.api.Assertions.assertThatThrownBy; - -public class TestHudiPlugin -{ - @Test - public void testCreateConnector() - { - ConnectorFactory factory = getConnectorFactory(); - factory.create( - "test", - Map.of( - "hive.metastore.uri", "thrift://foo:1234", - "bootstrap.quiet", "true"), - new TestingConnectorContext()) - .shutdown(); - } - - @Test - public void testCreateTestingConnector() - { - Plugin plugin = new TestingHudiPlugin(Optional.empty()); - ConnectorFactory factory = getOnlyElement(plugin.getConnectorFactories()); - factory.create( - "test", - Map.of( - "hive.metastore.uri", "thrift://foo:1234", - "bootstrap.quiet", "true"), - new TestingConnectorContext()) - .shutdown(); - } - - @Test - public void testTestingFileMetastore() - { - ConnectorFactory factory = getConnectorFactory(); - factory.create( - "test", - ImmutableMap.of( - "hive.metastore", "file", - "hive.metastore.catalog.dir", "/tmp", - "bootstrap.quiet", "true"), - new TestingConnectorContext()) - .shutdown(); - } - - @Test - public void testThriftMetastore() - { - ConnectorFactory factory = getConnectorFactory(); - factory.create( - "test", - Map.of( - "hive.metastore", "thrift", - "hive.metastore.uri", "thrift://foo:1234", - "bootstrap.quiet", "true"), - new TestingConnectorContext()) - .shutdown(); - } - - @Test - public void testGlueMetastore() - { - ConnectorFactory factory = getConnectorFactory(); - factory.create( - "test", - Map.of( - "hive.metastore", "glue", - "hive.metastore.glue.region", "us-east-2", - "bootstrap.quiet", "true"), - new TestingConnectorContext()) - .shutdown(); - - assertThatThrownBy(() -> factory.create( - "test", - Map.of( - "hive.metastore", "glue", - "hive.metastore.uri", "thrift://foo:1234", - "bootstrap.quiet", "true"), - new TestingConnectorContext())) - .isInstanceOf(ApplicationConfigurationException.class) - .hasMessageContaining("Error: Configuration property 'hive.metastore.uri' was not used"); - } - - @Test - public void testHiveConfigIsNotBound() - { - ConnectorFactory factory = getConnectorFactory(); - assertThatThrownBy(() -> factory.create("test", - Map.of( - "hive.metastore.uri", "thrift://foo:1234", - // Try setting any property provided by HiveConfig class - HiveConfig.CONFIGURATION_HIVE_PARTITION_PROJECTION_ENABLED, "true", - "bootstrap.quiet", "true"), - new TestingConnectorContext())) - .hasMessageContaining("Error: Configuration property 'hive.partition-projection-enabled' was not used"); - } - - private static ConnectorFactory getConnectorFactory() - { - return getOnlyElement(new HudiPlugin().getConnectorFactories()); - } -} diff --git a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestHudiSmokeTest.java b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestHudiSmokeTest.java index 71a687134880e..6313e921cdf2a 100644 --- a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestHudiSmokeTest.java +++ b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestHudiSmokeTest.java @@ -14,13 +14,17 @@ package io.trino.plugin.hudi; import com.google.common.collect.ImmutableMap; +import io.trino.Session; +import io.trino.filesystem.Location; +import io.trino.filesystem.TrinoFileSystemFactory; +import io.trino.filesystem.TrinoInputFile; import io.trino.plugin.hudi.testing.ResourceHudiTablesInitializer; +import io.trino.spi.security.ConnectorIdentity; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.QueryRunner; +import org.intellij.lang.annotations.Language; import org.junit.jupiter.api.Test; -import java.nio.file.Files; -import java.nio.file.Path; import java.time.ZonedDateTime; import static io.trino.plugin.hudi.HudiQueryRunner.createHudiQueryRunner; @@ -44,8 +48,8 @@ protected QueryRunner createQueryRunner() public void testReadNonPartitionedTable() { assertQuery( - "SELECT rowid, name FROM " + HUDI_NON_PART_COW, - "SELECT * FROM VALUES ('row_1', 'bob'), ('row_2', 'john'), ('row_3', 'tom')"); + "SELECT id, name FROM " + HUDI_NON_PART_COW, + "SELECT * FROM VALUES (1, 'a1'), (2, 'a2')"); } @Test @@ -132,9 +136,10 @@ public void testMetaColumns() @Test public void testPathColumn() + throws Exception { String path = (String) computeScalar("SELECT \"$path\" FROM " + HUDI_COW_PT_TBL + " WHERE id = 1"); - assertThat(toPath(path)).exists(); + assertThat(toInputFile(path).exists()).isTrue(); } @Test @@ -143,7 +148,7 @@ public void testFileSizeColumn() { String path = (String) computeScalar("SELECT \"$path\" FROM " + HUDI_COW_PT_TBL + " WHERE id = 1"); long fileSize = (long) computeScalar("SELECT \"$file_size\" FROM " + HUDI_COW_PT_TBL + " WHERE id = 1"); - assertThat(fileSize).isEqualTo(Files.size(toPath(path))); + assertThat(fileSize).isEqualTo(toInputFile(path).length()); } @Test @@ -153,7 +158,7 @@ public void testFileModifiedColumn() String path = (String) computeScalar("SELECT \"$path\" FROM " + HUDI_COW_PT_TBL + " WHERE id = 1"); ZonedDateTime fileModifiedTime = (ZonedDateTime) computeScalar("SELECT \"$file_modified_time\" FROM " + HUDI_COW_PT_TBL + " WHERE id = 1"); assertThat(fileModifiedTime.toInstant().toEpochMilli()) - .isEqualTo(Files.getLastModifiedTime(toPath(path)).toInstant().toEpochMilli()); + .isEqualTo(toInputFile(path).lastModified().toEpochMilli()); } @Test @@ -165,9 +170,207 @@ public void testPartitionColumn() assertQueryFails("SELECT \"$partition\" FROM " + HUDI_NON_PART_COW, ".* Column '\\$partition' cannot be resolved"); } - private static Path toPath(String path) + @Test + public void testPartitionFilterRequired() + { + Session session = withPartitionFilterRequired(getSession()); + + assertQueryFails( + session, + "SELECT * FROM " + HUDI_COW_PT_TBL, + "Filter required on tests." + HUDI_COW_PT_TBL.getTableName() + " for at least one of the partition columns: dt, hh"); + } + + @Test + public void testPartitionFilterRequiredPredicateOnNonPartitionColumn() + { + Session session = withPartitionFilterRequired(getSession()); + + assertQueryFails( + session, + "SELECT * FROM " + HUDI_COW_PT_TBL + " WHERE id = 1", + "Filter required on tests." + HUDI_COW_PT_TBL.getTableName() + " for at least one of the partition columns: dt, hh"); + } + + @Test + public void testPartitionFilterRequiredNestedQueryWithInnerPartitionPredicate() + { + Session session = withPartitionFilterRequired(getSession()); + + assertQuery(session, "SELECT name FROM (SELECT * FROM " + HUDI_COW_PT_TBL + " WHERE dt = '2021-12-09') WHERE id = 1", "VALUES 'a1'"); + } + + @Test + public void testPartitionFilterRequiredNestedQueryWithOuterPartitionPredicate() + { + Session session = withPartitionFilterRequired(getSession()); + + assertQuery(session, "SELECT name FROM (SELECT * FROM " + HUDI_COW_PT_TBL + " WHERE id = 1) WHERE dt = '2021-12-09'", "VALUES 'a1'"); + } + + @Test + public void testPartitionFilterRequiredNestedWithIsNotNullFilter() + { + Session session = withPartitionFilterRequired(getSession()); + + assertQuery(session, "SELECT name FROM " + HUDI_COW_PT_TBL + " WHERE dt IS NOT null", "VALUES 'a1', 'a2'"); + } + + @Test + public void testPartitionFilterRequiredFilterRemovedByPlanner() + { + Session session = withPartitionFilterRequired(getSession()); + + assertQueryFails( + session, + "SELECT id FROM " + HUDI_COW_PT_TBL + " WHERE dt IS NOT null OR true", + "Filter required on tests." + HUDI_COW_PT_TBL.getTableName() + " for at least one of the partition columns: dt, hh"); + } + + @Test + public void testPartitionFilterRequiredOnJoin() + { + Session session = withPartitionFilterRequired(getSession()); + @Language("RegExp") String errorMessage = "Filter required on tests." + HUDI_COW_PT_TBL.getTableName() + " for at least one of the partition columns: dt, hh"; + + // ON with partition column + assertQueryFails( + session, + "SELECT t1.name, t2.name FROM " + HUDI_COW_PT_TBL + " t1 JOIN " + HUDI_NON_PART_COW + " t2 ON (t1.dt = t2.dt)", + errorMessage); + // ON with partition column and WHERE with same left table's partition column + assertQuery( + session, + "SELECT t1.name, t2.name FROM " + HUDI_COW_PT_TBL + " t1 JOIN " + HUDI_NON_PART_COW + " t2 ON (t1.dt = t2.dt) WHERE t1.dt = '2021-12-09'", + "VALUES ('a1', 'a1'), ('a2', 'a2'), ('a1', 'a2'), ('a2', 'a1')"); + // ON with partition column and WHERE with same right table's regular column + assertQuery( + session, + "SELECT t1.name, t2.name FROM " + HUDI_COW_PT_TBL + " t1 JOIN " + HUDI_NON_PART_COW + " t2 ON (t1.dt = t2.dt) WHERE t2.dt = '2021-12-09'", + "VALUES ('a1', 'a1'), ('a2', 'a2'), ('a1', 'a2'), ('a2', 'a1')"); + // ON with partition column and WHERE with different left table's partition column + assertQuery( + session, + "SELECT t1.name, t2.name FROM " + HUDI_COW_PT_TBL + " t1 JOIN " + HUDI_NON_PART_COW + " t2 ON (t1.dt = t2.dt) WHERE t1.hh = '10'", + "VALUES ('a1', 'a1'), ('a1', 'a2')"); + // ON with partition column and WHERE with different regular column + assertQueryFails( + session, + "SELECT t1.name, t2.name FROM " + HUDI_COW_PT_TBL + " t1 JOIN " + HUDI_NON_PART_COW + " t2 ON (t1.dt = t2.dt) WHERE t2.hh = '10'", + errorMessage); + // ON with partition column and WHERE with regular column + assertQueryFails( + session, + "SELECT t1.name, t2.name FROM " + HUDI_COW_PT_TBL + " t1 JOIN " + HUDI_NON_PART_COW + " t2 ON (t1.dt = t2.dt) WHERE t1.id = 1", + errorMessage); + + // ON with regular column + assertQueryFails( + session, + "SELECT t1.name, t2.name FROM " + HUDI_COW_PT_TBL + " t1 JOIN " + HUDI_NON_PART_COW + " t2 ON (t1.id = t2.id)", + errorMessage); + // ON with regular column and WHERE with left table's partition column + assertQuery( + session, + "SELECT t1.name, t2.name FROM " + HUDI_COW_PT_TBL + " t1 JOIN " + HUDI_NON_PART_COW + " t2 ON (t1.id = t2.id) WHERE t1.dt = '2021-12-09'", + "VALUES ('a1', 'a1'), ('a2', 'a2')"); + // ON with partition column and WHERE with right table's regular column + assertQueryFails( + session, + "SELECT t1.name, t2.name FROM " + HUDI_COW_PT_TBL + " t1 JOIN " + HUDI_NON_PART_COW + " t2 ON (t1.dt = t2.dt) WHERE t2.id = 1", + errorMessage); + } + + @Test + public void testPartitionFilterRequiredOnJoinBothTablePartitioned() + { + Session session = withPartitionFilterRequired(getSession()); + + // ON with partition column + assertQueryFails( + session, + "SELECT t1.name, t2.name FROM " + HUDI_COW_PT_TBL + " t1 JOIN " + HUDI_COW_PT_TBL + " t2 ON (t1.dt = t2.dt)", + "Filter required on tests." + HUDI_COW_PT_TBL.getTableName() + " for at least one of the partition columns: dt, hh"); + // ON with partition column and WHERE with same left table's partition column + assertQuery( + session, + "SELECT t1.name, t2.name FROM " + HUDI_COW_PT_TBL + " t1 JOIN " + HUDI_COW_PT_TBL + " t2 ON (t1.dt = t2.dt) WHERE t1.dt = '2021-12-09'", + "VALUES ('a1', 'a1'), ('a2', 'a2'), ('a1', 'a2'), ('a2', 'a1')"); + // ON with partition column and WHERE with same right table's partition column + assertQuery( + session, + "SELECT t1.name, t2.name FROM " + HUDI_COW_PT_TBL + " t1 JOIN " + HUDI_COW_PT_TBL + " t2 ON (t1.dt = t2.dt) WHERE t2.dt = '2021-12-09'", + "VALUES ('a1', 'a1'), ('a2', 'a2'), ('a1', 'a2'), ('a2', 'a1')"); + + @Language("RegExp") String errorMessage = "Filter required on tests." + HUDI_COW_PT_TBL.getTableName() + " for at least one of the partition columns: dt, hh"; + // ON with partition column and WHERE with different left table's partition column + assertQueryFails(session, "SELECT t1.name, t2.name FROM " + HUDI_COW_PT_TBL + " t1 JOIN " + HUDI_COW_PT_TBL + " t2 ON (t1.dt = t2.dt) WHERE t1.hh = '10'", errorMessage); + // ON with partition column and WHERE with different right table's partition column + assertQueryFails(session, "SELECT t1.name, t2.name FROM " + HUDI_COW_PT_TBL + " t1 JOIN " + HUDI_COW_PT_TBL + " t2 ON (t1.dt = t2.dt) WHERE t2.hh = '10'", errorMessage); + // ON with partition column and WHERE with regular column + assertQueryFails(session, "SELECT t1.name, t2.name FROM " + HUDI_COW_PT_TBL + " t1 JOIN " + HUDI_COW_PT_TBL + " t2 ON (t1.dt = t2.dt) WHERE t2.id = 1", errorMessage); + + // ON with regular column + assertQueryFails(session, "SELECT t1.name, t2.name FROM " + HUDI_COW_PT_TBL + " t1 JOIN " + HUDI_COW_PT_TBL + " t2 ON (t1.id = t2.id)", errorMessage); + // ON with regular column and WHERE with regular column + assertQueryFails(session, "SELECT t1.name, t2.name FROM " + HUDI_COW_PT_TBL + " t1 JOIN " + HUDI_COW_PT_TBL + " t2 ON (t1.id = t2.id) WHERE t1.id = 1", errorMessage); + // ON with regular column and WHERE with left table's partition column + assertQueryFails(session, "SELECT t1.name, t2.name FROM " + HUDI_COW_PT_TBL + " t1 JOIN " + HUDI_COW_PT_TBL + " t2 ON (t1.id = t2.id) WHERE t1.dt = '2021-12-09'", errorMessage); + } + + @Test + public void testPartitionFilterRequiredWithLike() + { + Session session = withPartitionFilterRequired(getSession()); + assertQueryFails( + session, + "SELECT name FROM " + HUDI_COW_PT_TBL + " WHERE name LIKE '%1'", + "Filter required on tests." + HUDI_COW_PT_TBL.getTableName() + " for at least one of the partition columns: dt, hh"); + } + + @Test + public void testPartitionFilterRequiredFilterIncluded() + { + Session session = withPartitionFilterRequired(getSession()); + assertQuery(session, "SELECT name FROM " + HUDI_COW_PT_TBL + " WHERE hh = '10'", "VALUES 'a1'"); + assertQuery(session, "SELECT count(*) FROM " + HUDI_COW_PT_TBL + " WHERE hh < '12'", "VALUES 2"); + assertQuery(session, "SELECT count(*) FROM " + HUDI_COW_PT_TBL + " WHERE Hh < '11'", "VALUES 1"); + assertQuery(session, "SELECT count(*) FROM " + HUDI_COW_PT_TBL + " WHERE HH < '10'", "VALUES 0"); + assertQuery(session, "SELECT name FROM " + HUDI_COW_PT_TBL + " WHERE CAST(hh AS INTEGER) % 2 = 1 and hh IS NOT NULL", "VALUES 'a2'"); + assertQuery(session, "SELECT count(*) FROM " + HUDI_COW_PT_TBL + " WHERE hh IS NULL", "VALUES 0"); + assertQuery(session, "SELECT count(*) FROM " + HUDI_COW_PT_TBL + " WHERE hh IS NOT NULL", "VALUES 2"); + assertQuery(session, "SELECT name FROM " + HUDI_COW_PT_TBL + " WHERE hh LIKE '10'", "VALUES 'a1'"); + assertQuery(session, "SELECT name FROM " + HUDI_COW_PT_TBL + " WHERE hh LIKE '1%'", "VALUES 'a1', 'a2'"); + assertQuery(session, "SELECT name FROM " + HUDI_COW_PT_TBL + " WHERE id = 1 AND dt = '2021-12-09'", "VALUES 'a1'"); + assertQuery(session, "SELECT name FROM " + HUDI_COW_PT_TBL + " WHERE hh = '11' AND dt = '2021-12-09'", "VALUES 'a2'"); + assertQuery(session, "SELECT count(*) FROM " + HUDI_COW_PT_TBL + " WHERE hh = '12' AND dt = '2021-12-19'", "VALUES 0"); + + // Predicate which could not be translated into tuple domain + @Language("RegExp") String errorMessage = "Filter required on tests." + HUDI_COW_PT_TBL.getTableName() + " for at least one of the partition columns: dt, hh"; + assertQueryFails(session, "SELECT count(*) FROM " + HUDI_COW_PT_TBL + " WHERE CAST(hh AS INTEGER) % 2 = 0", errorMessage); + assertQueryFails(session, "SELECT count(*) FROM " + HUDI_COW_PT_TBL + " WHERE CAST(hh AS INTEGER) - 11 = 0", errorMessage); + assertQueryFails(session, "SELECT count(*) FROM " + HUDI_COW_PT_TBL + " WHERE CAST(hh AS INTEGER) * 2 = 20", errorMessage); + assertQueryFails(session, "SELECT count(*) FROM " + HUDI_COW_PT_TBL + " WHERE CAST(hh AS INTEGER) % 2 > 0", errorMessage); + assertQueryFails(session, "SELECT count(*) FROM " + HUDI_COW_PT_TBL + " WHERE name LIKE '%1' OR hh LIKE '%1'", errorMessage); + assertQueryFails(session, "SELECT count(*) FROM " + HUDI_COW_PT_TBL + " WHERE name LIKE '%1' AND hh LIKE '%0'", errorMessage); + assertQueryFails(session, "SELECT count(*) FROM " + HUDI_COW_PT_TBL + " WHERE id = 1 OR dt = '2021-12-09'", errorMessage); + assertQueryFails(session, "SELECT count(*) FROM " + HUDI_COW_PT_TBL + " WHERE hh = '11' OR dt = '2021-12-09'", errorMessage); + assertQueryFails(session, "SELECT count(*) FROM " + HUDI_COW_PT_TBL + " WHERE hh = '12' OR dt = '2021-12-19'", errorMessage); + assertQueryFails(session, "SELECT count(*) AS COUNT FROM " + HUDI_COW_PT_TBL + " WHERE CAST(hh AS INTEGER) > 2 GROUP BY name ", errorMessage); + } + + private static Session withPartitionFilterRequired(Session session) + { + return Session.builder(session) + .setCatalogSessionProperty(session.getCatalog().orElseThrow(), "query_partition_filter_required", "true") + .build(); + } + + private TrinoInputFile toInputFile(String path) { - // Remove leading 'file:' because path column returns 'file:/path-to-file' in case of local file system - return Path.of(path.replaceFirst("^file:", "")); + return ((HudiConnector) getDistributedQueryRunner().getCoordinator().getConnector("hudi")).getInjector() + .getInstance(TrinoFileSystemFactory.class) + .create(ConnectorIdentity.ofUser("test")) + .newInputFile(Location.of(path)); } } diff --git a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestingExtendedHiveMetastore.java b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestingExtendedHiveMetastore.java deleted file mode 100644 index fbe97020b0fe6..0000000000000 --- a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestingExtendedHiveMetastore.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hudi; - -import io.trino.plugin.hive.metastore.Table; -import io.trino.plugin.hive.metastore.UnimplementedHiveMetastore; -import io.trino.spi.predicate.TupleDomain; - -import java.util.List; -import java.util.Optional; - -import static java.util.Objects.requireNonNull; - -public class TestingExtendedHiveMetastore - extends UnimplementedHiveMetastore -{ - private final Table table; - private final List partitions; - - public TestingExtendedHiveMetastore(Table table, List partitions) - { - this.table = requireNonNull(table, "table is null"); - this.partitions = requireNonNull(partitions, "partitions is null"); - } - - @Override - public Optional
getTable(String databaseName, String tableName) - { - return Optional.of(table); - } - - @Override - public Optional> getPartitionNamesByFilter(String databaseName, String tableName, List columnNames, TupleDomain partitionKeysFilter) - { - return Optional.of(partitions); - } -} diff --git a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestingHudiConnectorFactory.java b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestingHudiConnectorFactory.java index d221ada7e851a..73e2108084465 100644 --- a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestingHudiConnectorFactory.java +++ b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestingHudiConnectorFactory.java @@ -13,25 +13,31 @@ */ package io.trino.plugin.hudi; -import io.trino.plugin.hive.metastore.HiveMetastore; +import com.google.common.collect.ImmutableMap; +import io.trino.filesystem.TrinoFileSystemFactory; +import io.trino.filesystem.local.LocalFileSystemFactory; +import io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig; import io.trino.spi.connector.Connector; import io.trino.spi.connector.ConnectorContext; import io.trino.spi.connector.ConnectorFactory; +import java.nio.file.Path; import java.util.Map; import java.util.Optional; +import static com.google.inject.multibindings.MapBinder.newMapBinder; +import static io.airlift.configuration.ConfigBinder.configBinder; import static io.trino.plugin.hudi.InternalHudiConnectorFactory.createConnector; -import static java.util.Objects.requireNonNull; public class TestingHudiConnectorFactory implements ConnectorFactory { - private final Optional metastore; + private final Path localFileSystemRootPath; - public TestingHudiConnectorFactory(Optional metastore) + public TestingHudiConnectorFactory(Path localFileSystemRootPath) { - this.metastore = requireNonNull(metastore, "metastore is null"); + localFileSystemRootPath.toFile().mkdirs(); + this.localFileSystemRootPath = localFileSystemRootPath; } @Override @@ -43,6 +49,16 @@ public String getName() @Override public Connector create(String catalogName, Map config, ConnectorContext context) { - return createConnector(catalogName, config, context, metastore, Optional.empty()); + ImmutableMap.Builder configBuilder = ImmutableMap.builder() + .putAll(config) + .put("bootstrap.quiet", "true"); + if (!config.containsKey("hive.metastore")) { + configBuilder.put("hive.metastore", "file"); + } + return createConnector(catalogName, configBuilder.buildOrThrow(), context, Optional.of(binder -> { + newMapBinder(binder, String.class, TrinoFileSystemFactory.class) + .addBinding("local").toInstance(new LocalFileSystemFactory(localFileSystemRootPath)); + configBinder(binder).bindConfigDefaults(FileHiveMetastoreConfig.class, metastoreConfig -> metastoreConfig.setCatalogDirectory("local:///managed/")); + })); } } diff --git a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestingHudiPlugin.java b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestingHudiPlugin.java index 42788856d895f..02744377e2dad 100644 --- a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestingHudiPlugin.java +++ b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestingHudiPlugin.java @@ -14,11 +14,10 @@ package io.trino.plugin.hudi; import com.google.common.collect.ImmutableList; -import io.trino.plugin.hive.metastore.HiveMetastore; import io.trino.spi.connector.ConnectorFactory; +import java.nio.file.Path; import java.util.List; -import java.util.Optional; import static com.google.common.base.Verify.verify; import static java.util.Objects.requireNonNull; @@ -26,11 +25,11 @@ public class TestingHudiPlugin extends HudiPlugin { - private final Optional metastore; + private final Path localFileSystemRootPath; - public TestingHudiPlugin(Optional metastore) + public TestingHudiPlugin(Path localFileSystemRootPath) { - this.metastore = requireNonNull(metastore, "metastore is null"); + this.localFileSystemRootPath = requireNonNull(localFileSystemRootPath, "localFileSystemRootPath is null"); } @Override @@ -39,6 +38,6 @@ public Iterable getConnectorFactories() List connectorFactories = ImmutableList.copyOf(super.getConnectorFactories()); verify(connectorFactories.size() == 1, "Unexpected connector factories: %s", connectorFactories); - return ImmutableList.of(new TestingHudiConnectorFactory(metastore)); + return ImmutableList.of(new TestingHudiConnectorFactory(localFileSystemRootPath)); } } diff --git a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/testing/HudiTablesInitializer.java b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/testing/HudiTablesInitializer.java index 770ed52f0406b..b9a64da14a2d2 100644 --- a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/testing/HudiTablesInitializer.java +++ b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/testing/HudiTablesInitializer.java @@ -13,17 +13,11 @@ */ package io.trino.plugin.hudi.testing; -import io.trino.hdfs.HdfsEnvironment; -import io.trino.plugin.hive.metastore.HiveMetastore; -import io.trino.testing.QueryRunner; +import io.trino.filesystem.Location; +import io.trino.testing.DistributedQueryRunner; public interface HudiTablesInitializer { - void initializeTables( - QueryRunner queryRunner, - HiveMetastore metastore, - String schemaName, - String dataDir, - HdfsEnvironment hdfsEnvironment) + void initializeTables(DistributedQueryRunner queryRunner, Location externalLocation, String schemaName) throws Exception; } diff --git a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/testing/ResourceHudiTablesInitializer.java b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/testing/ResourceHudiTablesInitializer.java index 3c89326d69aea..9c664c055f6f0 100644 --- a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/testing/ResourceHudiTablesInitializer.java +++ b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/testing/ResourceHudiTablesInitializer.java @@ -15,21 +15,26 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import io.airlift.log.Logger; -import io.trino.hdfs.HdfsEnvironment; +import io.trino.filesystem.Location; +import io.trino.filesystem.TrinoFileSystem; +import io.trino.filesystem.TrinoFileSystemFactory; import io.trino.plugin.hive.HiveType; import io.trino.plugin.hive.PartitionStatistics; import io.trino.plugin.hive.metastore.Column; import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.plugin.hive.metastore.Partition; import io.trino.plugin.hive.metastore.PartitionWithStatistics; import io.trino.plugin.hive.metastore.PrincipalPrivileges; import io.trino.plugin.hive.metastore.StorageFormat; import io.trino.plugin.hive.metastore.Table; -import io.trino.testing.QueryRunner; +import io.trino.plugin.hudi.HudiConnector; +import io.trino.spi.security.ConnectorIdentity; +import io.trino.testing.DistributedQueryRunner; import java.io.File; import java.io.IOException; +import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; @@ -56,27 +61,23 @@ public class ResourceHudiTablesInitializer implements HudiTablesInitializer { - public ResourceHudiTablesInitializer() {} - @Override - public void initializeTables( - QueryRunner queryRunner, - HiveMetastore metastore, - String schemaName, - String dataDir, - HdfsEnvironment environment) + public void initializeTables(DistributedQueryRunner queryRunner, Location externalLocation, String schemaName) throws Exception { - Path basePath = Path.of(dataDir); - copyDir(new File(getResource("hudi-testing-data").toURI()).toPath(), basePath); - Logger.get(getClass()).info("Prepared table data in %s", basePath); + TrinoFileSystem fileSystem = ((HudiConnector) queryRunner.getCoordinator().getConnector("hudi")).getInjector() + .getInstance(TrinoFileSystemFactory.class) + .create(ConnectorIdentity.ofUser("test")); + Location baseLocation = externalLocation.appendSuffix(schemaName); + copyDir(new File(getResource("hudi-testing-data").toURI()).toPath(), fileSystem, baseLocation); for (TestingTable table : TestingTable.values()) { String tableName = table.getTableName(); + Location tablePath = baseLocation.appendPath(tableName); createTable( - metastore, + queryRunner, schemaName, - basePath.resolve(tableName), + tablePath, tableName, table.getDataColumns(), table.getPartitionColumns(), @@ -85,9 +86,9 @@ public void initializeTables( } private void createTable( - HiveMetastore metastore, + DistributedQueryRunner queryRunner, String schemaName, - Path tablePath, + Location tablePath, String tableName, List dataColumns, List partitionColumns, @@ -108,8 +109,11 @@ private void createTable( .setParameters(ImmutableMap.of("serialization.format", "1", "EXTERNAL", "TRUE")) .withStorage(storageBuilder -> storageBuilder .setStorageFormat(storageFormat) - .setLocation("file://" + tablePath)) + .setLocation(tablePath.toString())) .build(); + HiveMetastore metastore = ((HudiConnector) queryRunner.getCoordinator().getConnector("hudi")).getInjector() + .getInstance(HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); metastore.createTable(table, PrincipalPrivileges.NO_PRIVILEGES); List partitionsToAdd = new ArrayList<>(); @@ -120,7 +124,7 @@ private void createTable( .setValues(extractPartitionValues(partitionName)) .withStorage(storageBuilder -> storageBuilder .setStorageFormat(storageFormat) - .setLocation("file://" + tablePath.resolve(partitionPath))) + .setLocation(tablePath.appendPath(partitionPath).toString())) .setColumns(dataColumns) .build(); partitionsToAdd.add(new PartitionWithStatistics(partition, partitionName, PartitionStatistics.empty())); @@ -133,20 +137,25 @@ private static Column column(String name, HiveType type) return new Column(name, type, Optional.empty(), Map.of()); } - private static void copyDir(Path srcDir, Path dstDir) + public static void copyDir(Path sourceDirectory, TrinoFileSystem fileSystem, Location destinationDirectory) throws IOException { - try (Stream paths = Files.walk(srcDir)) { + try (Stream paths = Files.walk(sourceDirectory)) { for (Iterator iterator = paths.iterator(); iterator.hasNext(); ) { Path path = iterator.next(); - Path relativePath = srcDir.relativize(path); if (path.toFile().isDirectory()) { - Files.createDirectories(dstDir.resolve(relativePath)); + continue; + } + + // hudi blows up if crc files are present + if (path.toString().endsWith(".crc")) { + continue; } - else { - Path dstFile = dstDir.resolve(relativePath); - Files.createDirectories(dstFile.getParent()); - Files.copy(path, dstFile); + + Location location = destinationDirectory.appendPath(sourceDirectory.relativize(path).toString()); + fileSystem.createDirectory(location.parentDirectory()); + try (OutputStream out = fileSystem.newOutputFile(location).create()) { + Files.copy(path, out); } } } @@ -211,14 +220,11 @@ public Map getPartitions() private static List nonPartitionRegularColumns() { return ImmutableList.of( - column("rowid", HIVE_STRING), - column("partitionid", HIVE_STRING), - column("precomb", HIVE_LONG), + column("id", HIVE_LONG), column("name", HIVE_STRING), - column("versionid", HIVE_STRING), - column("tobedeletedstr", HIVE_STRING), - column("inttolong", HIVE_INT), - column("longtoint", HIVE_LONG)); + column("ts", HIVE_LONG), + column("dt", HIVE_STRING), + column("hh", HIVE_STRING)); } private static List stockTicksRegularColumns() diff --git a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/testing/TpchHudiTablesInitializer.java b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/testing/TpchHudiTablesInitializer.java index 7807fdf129456..243501f8459de 100644 --- a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/testing/TpchHudiTablesInitializer.java +++ b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/testing/TpchHudiTablesInitializer.java @@ -16,18 +16,25 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import io.airlift.log.Logger; +import io.trino.filesystem.Location; +import io.trino.filesystem.TrinoFileSystem; +import io.trino.filesystem.TrinoFileSystemFactory; import io.trino.hdfs.HdfsContext; import io.trino.hdfs.HdfsEnvironment; import io.trino.plugin.hive.HiveType; import io.trino.plugin.hive.metastore.Column; import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; +import io.trino.plugin.hive.metastore.PrincipalPrivileges; import io.trino.plugin.hive.metastore.StorageFormat; import io.trino.plugin.hive.metastore.Table; +import io.trino.plugin.hudi.HudiConnector; import io.trino.plugin.tpch.TpchPlugin; import io.trino.spi.connector.CatalogSchemaName; +import io.trino.spi.security.ConnectorIdentity; +import io.trino.testing.DistributedQueryRunner; import io.trino.testing.MaterializedResult; import io.trino.testing.MaterializedRow; -import io.trino.testing.QueryRunner; import io.trino.tpch.TpchColumn; import io.trino.tpch.TpchColumnType; import io.trino.tpch.TpchColumnTypes; @@ -40,6 +47,7 @@ import org.apache.hudi.client.HoodieJavaWriteClient; import org.apache.hudi.client.common.HoodieJavaEngineContext; import org.apache.hudi.common.bootstrap.index.NoOpBootstrapIndex; +import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.model.HoodieAvroPayload; import org.apache.hudi.common.model.HoodieAvroRecord; import org.apache.hudi.common.model.HoodieKey; @@ -69,21 +77,24 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Verify.verify; +import static com.google.common.io.MoreFiles.deleteRecursively; +import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; +import static io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT; import static io.trino.plugin.hive.HiveType.HIVE_DATE; import static io.trino.plugin.hive.HiveType.HIVE_DOUBLE; import static io.trino.plugin.hive.HiveType.HIVE_INT; import static io.trino.plugin.hive.HiveType.HIVE_LONG; import static io.trino.plugin.hive.HiveType.HIVE_STRING; import static io.trino.plugin.hive.TableType.EXTERNAL_TABLE; -import static io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES; import static io.trino.plugin.hive.util.HiveClassNames.HUDI_PARQUET_INPUT_FORMAT; import static io.trino.plugin.hive.util.HiveClassNames.MAPRED_PARQUET_OUTPUT_FORMAT_CLASS; import static io.trino.plugin.hive.util.HiveClassNames.PARQUET_HIVE_SERDE_CLASS; import static io.trino.testing.TestingConnectorSession.SESSION; import static java.lang.String.format; +import static java.nio.file.Files.createTempDirectory; import static java.util.Collections.unmodifiableList; import static java.util.Objects.requireNonNull; -import static java.util.stream.Collectors.toUnmodifiableList; +import static org.apache.hudi.common.model.HoodieTableType.COPY_ON_WRITE; public class TpchHudiTablesInitializer implements HudiTablesInitializer @@ -110,29 +121,41 @@ public TpchHudiTablesInitializer(HoodieTableType tableType, List> t } @Override - public void initializeTables( - QueryRunner queryRunner, - HiveMetastore metastore, - String schemaName, - String dataDir, - HdfsEnvironment hdfsEnvironment) + public void initializeTables(DistributedQueryRunner queryRunner, Location externalLocation, String schemaName) + throws Exception { queryRunner.installPlugin(new TpchPlugin()); queryRunner.createCatalog(TPCH_TINY.getCatalogName(), "tpch", ImmutableMap.of()); - for (TpchTable table : tpchTables) { - load(table, queryRunner, metastore, schemaName, dataDir, hdfsEnvironment); + TrinoFileSystem fileSystem = ((HudiConnector) queryRunner.getCoordinator().getConnector("hudi")).getInjector() + .getInstance(TrinoFileSystemFactory.class) + .create(ConnectorIdentity.ofUser("test")); + HiveMetastore metastore = ((HudiConnector) queryRunner.getCoordinator().getConnector("hudi")).getInjector() + .getInstance(HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); + + Location dataLocation = externalLocation.appendPath("tpch"); + + java.nio.file.Path tempDir = createTempDirectory("test"); + try { + for (TpchTable tpchTable : tpchTables) { + java.nio.file.Path tempTableDir = tempDir.resolve(tpchTable.getTableName()); + load(tpchTable, queryRunner, tempTableDir); + + Location tableLocation = dataLocation.appendPath(tpchTable.getTableName()); + ResourceHudiTablesInitializer.copyDir(tempTableDir, fileSystem, tableLocation); + + Table table = createTableDefinition(schemaName, tpchTable, tableLocation); + metastore.createTable(table, PrincipalPrivileges.NO_PRIVILEGES); + } + } + finally { + deleteRecursively(tempDir, ALLOW_INSECURE); } } - private void load( - TpchTable tpchTables, - QueryRunner queryRunner, - HiveMetastore metastore, - String schemaName, - String basePath, - HdfsEnvironment hdfsEnvironment) + public void load(TpchTable tpchTables, DistributedQueryRunner queryRunner, java.nio.file.Path tableDirectory) { - try (HoodieJavaWriteClient writeClient = createWriteClient(tpchTables, basePath, hdfsEnvironment)) { + try (HoodieJavaWriteClient writeClient = createWriteClient(tpchTables, HDFS_ENVIRONMENT, new Path(tableDirectory.toUri()))) { RecordConverter recordConverter = createRecordConverter(tpchTables); @Language("SQL") String sql = generateScanSql(TPCH_TINY, tpchTables); @@ -148,11 +171,9 @@ private void load( writeClient.startCommitWithTime(timestamp); writeClient.insert(records, timestamp); } - - metastore.createTable(createMetastoreTable(schemaName, tpchTables, basePath), NO_PRIVILEGES); } - private String generateScanSql(CatalogSchemaName catalogSchemaName, TpchTable table) + private static String generateScanSql(CatalogSchemaName catalogSchemaName, TpchTable table) { StringBuilder builder = new StringBuilder(); builder.append("SELECT "); @@ -166,12 +187,11 @@ private String generateScanSql(CatalogSchemaName catalogSchemaName, TpchTable return builder.toString(); } - private Table createMetastoreTable(String schemaName, TpchTable table, String basePath) + private static Table createTableDefinition(String schemaName, TpchTable table, Location location) { - String tablePath = getTablePath(table, basePath); List columns = Stream.of(HUDI_META_COLUMNS, createMetastoreColumns(table)) .flatMap(Collection::stream) - .collect(toUnmodifiableList()); + .toList(); StorageFormat storageFormat = StorageFormat.create( PARQUET_HIVE_SERDE_CLASS, HUDI_PARQUET_INPUT_FORMAT, @@ -186,51 +206,47 @@ private Table createMetastoreTable(String schemaName, TpchTable table, String .setParameters(ImmutableMap.of("serialization.format", "1", "EXTERNAL", "TRUE")) .withStorage(storageBuilder -> storageBuilder .setStorageFormat(storageFormat) - .setLocation(tablePath)) + .setLocation(location.toString())) .build(); } - private HoodieJavaWriteClient createWriteClient(TpchTable table, String basePath, HdfsEnvironment hdfsEnvironment) + private static HoodieJavaWriteClient createWriteClient(TpchTable table, HdfsEnvironment hdfsEnvironment, Path tablePath) { - String tableName = table.getTableName(); - String tablePath = getTablePath(table, basePath); Schema schema = createAvroSchema(table); - Configuration conf = hdfsEnvironment.getConfiguration(CONTEXT, new Path(tablePath)); + Configuration conf = hdfsEnvironment.getConfiguration(CONTEXT, tablePath); try { HoodieTableMetaClient.withPropertyBuilder() - .setTableType(tableType) - .setTableName(tableName) + .setTableType(COPY_ON_WRITE) + .setTableName(table.getTableName()) .setBootstrapIndexClass(NoOpBootstrapIndex.class.getName()) .setPayloadClassName(HoodieAvroPayload.class.getName()) .setRecordKeyFields(FIELD_UUID) - .initTable(conf, tablePath); + .initTable(conf, tablePath.toString()); } catch (IOException e) { - throw new RuntimeException("Could not init table " + tableName, e); + throw new RuntimeException("Could not init table " + table.getTableName(), e); } HoodieIndexConfig indexConfig = HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build(); HoodieArchivalConfig archivalConfig = HoodieArchivalConfig.newBuilder().archiveCommitsWith(20, 30).build(); HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder() - .withPath(tablePath) + .withPath(tablePath.toString()) .withSchema(schema.toString()) .withParallelism(2, 2) .withDeleteParallelism(2) - .forTable(tableName) + .forTable(table.getTableName()) .withIndexConfig(indexConfig) .withArchivalConfig(archivalConfig) .withEmbeddedTimelineServerEnabled(false) .withMarkersType(MarkerType.DIRECT.name()) + // Disabling Hudi metadata table (MDT) in tests as the support of + // reading MDT is broken after removal of Hudi dependencies from compile time + .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build()) .build(); return new HoodieJavaWriteClient<>(new HoodieJavaEngineContext(conf), cfg); } - private String getTablePath(TpchTable table, String basePath) - { - return basePath + "/" + table.getTableName(); - } - private static RecordConverter createRecordConverter(TpchTable table) { Schema schema = createAvroSchema(table); @@ -239,11 +255,11 @@ private static RecordConverter createRecordConverter(TpchTable table) int numberOfColumns = columns.size(); List columnNames = columns.stream() .map(TpchColumn::getSimplifiedColumnName) - .collect(toUnmodifiableList()); + .toList(); List> columnConverters = columns.stream() .map(TpchColumn::getType) .map(TpchHudiTablesInitializer::avroEncoderOf) - .collect(toUnmodifiableList()); + .toList(); return row -> { checkArgument(row.size() == numberOfColumns); @@ -260,14 +276,7 @@ private static RecordConverter createRecordConverter(TpchTable table) // wrap to a HoodieRecord HoodieKey key = new HoodieKey(uuid, PARTITION_PATH); HoodieAvroPayload data = new HoodieAvroPayload(Option.of(record)); - return new HoodieRecord<>(key, data) - { - @Override - public HoodieRecord newInstance() - { - return new HoodieAvroRecord<>(key, data, null); - } - }; + return new HoodieAvroRecord<>(key, data, null); }; } diff --git a/plugin/trino-hudi/src/test/resources/README.md b/plugin/trino-hudi/src/test/resources/README.md new file mode 100644 index 0000000000000..26f65f3fcd78f --- /dev/null +++ b/plugin/trino-hudi/src/test/resources/README.md @@ -0,0 +1,47 @@ +# Hudi Test Resources + +## Generating Hudi Resources + +Follow these steps to create the `hudi_non_part_cow` test table and utilize it for testing. `hudi_non_part_cow` resource is generated using `423` trino version. + +### Start the Hudi environment + +Execute the following command in the terminal to initiate the Hudi environment: + +```shell +testing/bin/ptl env up --environment singlenode-hudi +``` + +### Generate Resources + +* Open the `spark-sql` terminal and initiate the `spark-sql` shell in the `ptl-spark` container. +* Execute the following Spark SQL queries to create the `hudi_non_part_cow` table: + +``` +spark-sql> CREATE TABLE default.hudi_non_part_cow ( + id bigint, + name string, + ts bigint, + dt string, + hh string + ) + USING hudi + TBLPROPERTIES ( + type = 'cow', + primaryKey = 'id', + preCombineField = 'ts' + ) + LOCATION 's3://test-bucket/hudi_non_part_cow'; + +spark-sql> INSERT INTO default.hudi_non_part_cow (id, name, ts, dt, hh) VALUES + (1, 'a1', 1000, '2021-12-09', '10'), + (2, 'a2', 2000, '2021-12-09', '11'); +``` + +### Download Resources + +Download the `hudi_non_part_cow` table from the MinIO client http://localhost:9001/buckets/test-bucket/browse. + +### Use Resources + +Unzip the downloaded `hudi_non_part_cow.zip`. Remove any unnecessary files obtained after unzipping to prepare the resource for testing. diff --git a/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie/20211217110514527.commit b/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie/20211217110514527.commit deleted file mode 100644 index f77eeb137f026..0000000000000 --- a/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie/20211217110514527.commit +++ /dev/null @@ -1,50 +0,0 @@ -{ - "partitionToWriteStats" : { - "" : [ { - "fileId" : "d0875d00-483d-4e8b-bbbe-c520366c47a0-0", - "path" : "d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet", - "prevCommit" : "null", - "numWrites" : 3, - "numDeletes" : 0, - "numUpdateWrites" : 0, - "numInserts" : 3, - "totalWriteBytes" : 436273, - "totalWriteErrors" : 0, - "tempPath" : null, - "partitionPath" : "", - "totalLogRecords" : 0, - "totalLogFilesCompacted" : 0, - "totalLogSizeCompacted" : 0, - "totalUpdatedRecordsCompacted" : 0, - "totalLogBlocks" : 0, - "totalCorruptLogBlock" : 0, - "totalRollbackBlocks" : 0, - "fileSizeInBytes" : 436273, - "minEventTime" : null, - "maxEventTime" : null - } ] - }, - "compacted" : false, - "extraMetadata" : { - "schema" : "{\"type\":\"record\",\"name\":\"hudi_non_part_cow_record\",\"namespace\":\"hoodie.hudi_non_part_cow\",\"fields\":[{\"name\":\"rowId\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"partitionId\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"preComb\",\"type\":[\"null\",\"long\"],\"default\":null},{\"name\":\"name\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"versionId\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"toBeDeletedStr\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"intToLong\",\"type\":[\"null\",\"int\"],\"default\":null},{\"name\":\"longToInt\",\"type\":[\"null\",\"long\"],\"default\":null}]}" - }, - "operationType" : "INSERT", - "writePartitionPaths" : [ "" ], - "fileIdAndRelativePaths" : { - "d0875d00-483d-4e8b-bbbe-c520366c47a0-0" : "d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet" - }, - "totalRecordsDeleted" : 0, - "totalLogRecordsCompacted" : 0, - "totalLogFilesCompacted" : 0, - "totalCompactedRecordsUpdated" : 0, - "totalLogFilesSize" : 0, - "totalScanTime" : 0, - "totalCreateTime" : 1743, - "totalUpsertTime" : 0, - "minAndMaxEventTime" : { - "Optional.empty" : { - "val" : null, - "present" : false - } - } -} \ No newline at end of file diff --git a/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie/20211217110514527.commit.requested b/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie/20211217110514527.commit.requested deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie/20231127051653361.commit b/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie/20231127051653361.commit new file mode 100644 index 0000000000000..9fc9470ff41a2 --- /dev/null +++ b/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie/20231127051653361.commit @@ -0,0 +1,37 @@ +{ + "partitionToWriteStats" : { + "" : [ { + "fileId" : "05b0f4ec-00fb-49f2-a1e2-7f510f3da93b-0", + "path" : "05b0f4ec-00fb-49f2-a1e2-7f510f3da93b-0_0-27-28_20231127051653361.parquet", + "prevCommit" : "null", + "numWrites" : 2, + "numDeletes" : 0, + "numUpdateWrites" : 0, + "numInserts" : 2, + "totalWriteBytes" : 435338, + "totalWriteErrors" : 0, + "tempPath" : null, + "partitionPath" : "", + "totalLogRecords" : 0, + "totalLogFilesCompacted" : 0, + "totalLogSizeCompacted" : 0, + "totalUpdatedRecordsCompacted" : 0, + "totalLogBlocks" : 0, + "totalCorruptLogBlock" : 0, + "totalRollbackBlocks" : 0, + "fileSizeInBytes" : 435338, + "minEventTime" : null, + "maxEventTime" : null, + "runtimeStats" : { + "totalScanTime" : 0, + "totalUpsertTime" : 0, + "totalCreateTime" : 856 + } + } ] + }, + "compacted" : false, + "extraMetadata" : { + "schema" : "{\"type\":\"record\",\"name\":\"hudi_non_part_cow_record\",\"namespace\":\"hoodie.hudi_non_part_cow\",\"fields\":[{\"name\":\"id\",\"type\":[\"null\",\"long\"],\"default\":null},{\"name\":\"name\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"ts\",\"type\":[\"null\",\"long\"],\"default\":null},{\"name\":\"dt\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"hh\",\"type\":[\"null\",\"string\"],\"default\":null}]}" + }, + "operationType" : "UPSERT" +} \ No newline at end of file diff --git a/plugin/trino-hive/src/test/resources/spark_bucketed_nation/_SUCCESS b/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie/20231127051653361.commit.requested similarity index 100% rename from plugin/trino-hive/src/test/resources/spark_bucketed_nation/_SUCCESS rename to plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie/20231127051653361.commit.requested diff --git a/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie/20211217110514527.inflight b/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie/20231127051653361.inflight similarity index 56% rename from plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie/20211217110514527.inflight rename to plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie/20231127051653361.inflight index 6605bcaf9b36c..dad745d91bd72 100644 --- a/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie/20211217110514527.inflight +++ b/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie/20231127051653361.inflight @@ -7,7 +7,7 @@ "numWrites" : 0, "numDeletes" : 0, "numUpdateWrites" : 0, - "numInserts" : 3, + "numInserts" : 2, "totalWriteBytes" : 0, "totalWriteErrors" : 0, "tempPath" : null, @@ -21,28 +21,11 @@ "totalRollbackBlocks" : 0, "fileSizeInBytes" : 0, "minEventTime" : null, - "maxEventTime" : null + "maxEventTime" : null, + "runtimeStats" : null } ] }, "compacted" : false, "extraMetadata" : { }, - "operationType" : "INSERT", - "writePartitionPaths" : [ "" ], - "fileIdAndRelativePaths" : { - "" : null - }, - "totalRecordsDeleted" : 0, - "totalLogRecordsCompacted" : 0, - "totalLogFilesCompacted" : 0, - "totalCompactedRecordsUpdated" : 0, - "totalLogFilesSize" : 0, - "totalScanTime" : 0, - "totalCreateTime" : 0, - "totalUpsertTime" : 0, - "minAndMaxEventTime" : { - "Optional.empty" : { - "val" : null, - "present" : false - } - } + "operationType" : "UPSERT" } \ No newline at end of file diff --git a/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie/hoodie.properties b/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie/hoodie.properties index 3d03fa7915c39..aa323696b299e 100644 --- a/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie/hoodie.properties +++ b/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie/hoodie.properties @@ -1,14 +1,17 @@ -#Properties saved on Fri Dec 17 11:05:14 UTC 2021 -#Fri Dec 17 11:05:14 UTC 2021 -hoodie.table.precombine.field=preComb -hoodie.table.partition.fields= +#Updated at 2023-11-27T05:16:58.380652Z +#Mon Nov 27 05:16:58 UTC 2023 hoodie.table.type=COPY_ON_WRITE +hoodie.table.metadata.partitions=files +hoodie.table.precombine.field=ts hoodie.archivelog.folder=archived -hoodie.populate.meta.fields=true +hoodie.table.create.schema={"type"\:"record","name"\:"hudi_non_part_cow_record","namespace"\:"hoodie.hudi_non_part_cow","fields"\:[{"name"\:"_hoodie_commit_time","type"\:["string","null"]},{"name"\:"_hoodie_commit_seqno","type"\:["string","null"]},{"name"\:"_hoodie_record_key","type"\:["string","null"]},{"name"\:"_hoodie_partition_path","type"\:["string","null"]},{"name"\:"_hoodie_file_name","type"\:["string","null"]},{"name"\:"id","type"\:["long","null"]},{"name"\:"name","type"\:["string","null"]},{"name"\:"ts","type"\:["long","null"]},{"name"\:"dt","type"\:["string","null"]},{"name"\:"hh","type"\:["string","null"]}]} hoodie.timeline.layout.version=1 -hoodie.table.version=3 -hoodie.table.recordkey.fields=rowId -hoodie.table.base.file.format=PARQUET -hoodie.table.keygenerator.class=org.apache.hudi.keygen.NonpartitionedKeyGenerator +hoodie.table.checksum=2968816715 +hoodie.datasource.write.drop.partition.columns=false +hoodie.table.recordkey.fields=id hoodie.table.name=hudi_non_part_cow -hoodie.datasource.write.hive_style_partitioning=false +hoodie.datasource.write.hive_style_partitioning=true +hoodie.table.keygenerator.class=org.apache.hudi.keygen.NonpartitionedKeyGenerator +hoodie.database.name=default +hoodie.datasource.write.partitionpath.urlencode=false +hoodie.table.version=5 diff --git a/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie_partition_metadata b/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie_partition_metadata index f2149eb6cd5a3..e9de1b96c3ff3 100644 --- a/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie_partition_metadata +++ b/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/.hoodie_partition_metadata @@ -1,4 +1,4 @@ #partition metadata -#Fri Dec 17 11:05:23 UTC 2021 -commitTime=20211217110514527 +#Mon Nov 27 05:16:59 UTC 2023 +commitTime=20231127051653361 partitionDepth=0 diff --git a/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet b/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/05b0f4ec-00fb-49f2-a1e2-7f510f3da93b-0_0-27-28_20231127051653361.parquet similarity index 98% rename from plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet rename to plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/05b0f4ec-00fb-49f2-a1e2-7f510f3da93b-0_0-27-28_20231127051653361.parquet index 52de8719bf62d..c8368bd7c33cd 100644 Binary files a/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet and b/plugin/trino-hudi/src/test/resources/hudi-testing-data/hudi_non_part_cow/05b0f4ec-00fb-49f2-a1e2-7f510f3da93b-0_0-27-28_20231127051653361.parquet differ diff --git a/plugin/trino-iceberg/pom.xml b/plugin/trino-iceberg/pom.xml index ee25b31756783..a4bff116e72b3 100644 --- a/plugin/trino-iceberg/pom.xml +++ b/plugin/trino-iceberg/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -15,15 +15,6 @@ ${project.parent.basedir} - - - instances 0.71.0 @@ -189,7 +180,7 @@ org.apache.datasketches datasketches-java - 4.2.0 + 5.0.1 @@ -587,7 +578,7 @@ org.keycloak keycloak-core - 22.0.5 + 23.0.3 test @@ -603,12 +594,6 @@ test - - org.testng - testng - test - - org.xerial sqlite-jdbc @@ -646,23 +631,6 @@ - - org.apache.maven.plugins - maven-surefire-plugin - - - - org.apache.maven.surefire - surefire-junit-platform - ${dep.plugin.surefire.version} - - - org.apache.maven.surefire - surefire-testng - ${dep.plugin.surefire.version} - - - diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergConfig.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergConfig.java index 5b3e0d7f8df5c..67e4b9675c2a6 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergConfig.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergConfig.java @@ -30,6 +30,7 @@ import java.util.Optional; import static io.airlift.units.DataSize.Unit.GIGABYTE; +import static io.airlift.units.DataSize.Unit.MEGABYTE; import static io.trino.plugin.hive.HiveCompressionCodec.ZSTD; import static io.trino.plugin.iceberg.CatalogType.HIVE_METASTORE; import static io.trino.plugin.iceberg.IcebergFileFormat.PARQUET; @@ -67,6 +68,7 @@ public class IcebergConfig private Duration expireSnapshotsMinRetention = new Duration(7, DAYS); private Duration removeOrphanFilesMinRetention = new Duration(7, DAYS); private DataSize targetMaxFileSize = DataSize.of(1, GIGABYTE); + private DataSize idleWriterMinFileSize = DataSize.of(16, MEGABYTE); // This is meant to protect users who are misusing schema locations (by // putting schemas in locations with extraneous files), so default to false // to avoid deleting those files if Trino is unable to check. @@ -315,6 +317,20 @@ public IcebergConfig setTargetMaxFileSize(DataSize targetMaxFileSize) return this; } + @NotNull + public DataSize getIdleWriterMinFileSize() + { + return idleWriterMinFileSize; + } + + @Config("iceberg.idle-writer-min-file-size") + @ConfigDescription("Minimum data written by a single partition writer before it can be consider as 'idle' and could be closed by the engine") + public IcebergConfig setIdleWriterMinFileSize(DataSize idleWriterMinFileSize) + { + this.idleWriterMinFileSize = idleWriterMinFileSize; + return this; + } + public boolean isDeleteSchemaLocationsFallback() { return this.deleteSchemaLocationsFallback; diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java index f13626380ea60..d50818352c422 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java @@ -227,6 +227,7 @@ import static io.trino.plugin.iceberg.IcebergSessionProperties.isQueryPartitionFilterRequired; import static io.trino.plugin.iceberg.IcebergSessionProperties.isStatisticsEnabled; import static io.trino.plugin.iceberg.IcebergTableName.isDataTable; +import static io.trino.plugin.iceberg.IcebergTableName.isIcebergTableName; import static io.trino.plugin.iceberg.IcebergTableName.isMaterializedViewStorage; import static io.trino.plugin.iceberg.IcebergTableName.tableNameFrom; import static io.trino.plugin.iceberg.IcebergTableProperties.FILE_FORMAT_PROPERTY; @@ -234,6 +235,7 @@ import static io.trino.plugin.iceberg.IcebergTableProperties.PARTITIONING_PROPERTY; import static io.trino.plugin.iceberg.IcebergTableProperties.SORTED_BY_PROPERTY; import static io.trino.plugin.iceberg.IcebergTableProperties.getPartitioning; +import static io.trino.plugin.iceberg.IcebergTableProperties.getTableLocation; import static io.trino.plugin.iceberg.IcebergUtil.canEnforceColumnConstraintInSpecs; import static io.trino.plugin.iceberg.IcebergUtil.commit; import static io.trino.plugin.iceberg.IcebergUtil.deserializePartitionValue; @@ -266,6 +268,7 @@ import static io.trino.spi.StandardErrorCode.COLUMN_ALREADY_EXISTS; import static io.trino.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY; import static io.trino.spi.StandardErrorCode.INVALID_ARGUMENTS; +import static io.trino.spi.StandardErrorCode.INVALID_TABLE_PROPERTY; import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; import static io.trino.spi.StandardErrorCode.QUERY_REJECTED; import static io.trino.spi.StandardErrorCode.TABLE_NOT_FOUND; @@ -297,6 +300,7 @@ import static org.apache.iceberg.TableProperties.FORMAT_VERSION; import static org.apache.iceberg.TableProperties.WRITE_LOCATION_PROVIDER_IMPL; import static org.apache.iceberg.types.TypeUtil.indexParents; +import static org.apache.iceberg.util.LocationUtil.stripTrailingSlash; import static org.apache.iceberg.util.SnapshotUtil.schemaFor; public class IcebergMetadata @@ -387,6 +391,10 @@ public ConnectorTableHandle getTableHandle( throw new TrinoException(NOT_SUPPORTED, "Read table with start version is not supported"); } + if (!isIcebergTableName(tableName.getTableName())) { + return null; + } + if (isMaterializedViewStorage(tableName.getTableName())) { verify(endVersion.isEmpty(), "Materialized views do not support versioned queries"); @@ -547,7 +555,7 @@ public Optional getSystemTable(ConnectorSession session, SchemaTabl private Optional getRawSystemTable(ConnectorSession session, SchemaTableName tableName) { - if (isDataTable(tableName.getTableName()) || isMaterializedViewStorage(tableName.getTableName())) { + if (!isIcebergTableName(tableName.getTableName()) || isDataTable(tableName.getTableName()) || isMaterializedViewStorage(tableName.getTableName())) { return Optional.empty(); } @@ -565,21 +573,16 @@ private Optional getRawSystemTable(ConnectorSession session, Schema return Optional.empty(); } - Optional tableType = IcebergTableName.tableTypeFrom(tableName.getTableName()); - if (tableType.isEmpty()) { - return Optional.empty(); - } - SchemaTableName systemTableName = new SchemaTableName(tableName.getSchemaName(), IcebergTableName.tableNameWithType(name, tableType.get())); - return switch (tableType.get()) { - case DATA -> throw new VerifyException("Unexpected DATA table type"); // Handled above. - case HISTORY -> Optional.of(new HistoryTable(systemTableName, table)); - case SNAPSHOTS -> Optional.of(new SnapshotsTable(systemTableName, typeManager, table)); - case PARTITIONS -> Optional.of(new PartitionTable(systemTableName, typeManager, table, getCurrentSnapshotId(table))); - case MANIFESTS -> Optional.of(new ManifestsTable(systemTableName, table, getCurrentSnapshotId(table))); - case FILES -> Optional.of(new FilesTable(systemTableName, typeManager, table, getCurrentSnapshotId(table))); - case PROPERTIES -> Optional.of(new PropertiesTable(systemTableName, table)); - case REFS -> Optional.of(new RefsTable(systemTableName, table)); - case MATERIALIZED_VIEW_STORAGE -> throw new VerifyException("Unexpected MATERIALIZED_VIEW_STORAGE table type"); + TableType tableType = IcebergTableName.tableTypeFrom(tableName.getTableName()); + return switch (tableType) { + case DATA, MATERIALIZED_VIEW_STORAGE -> throw new VerifyException("Unexpected table type: " + tableType); // Handled above. + case HISTORY -> Optional.of(new HistoryTable(tableName, table)); + case SNAPSHOTS -> Optional.of(new SnapshotsTable(tableName, typeManager, table)); + case PARTITIONS -> Optional.of(new PartitionTable(tableName, typeManager, table, getCurrentSnapshotId(table))); + case MANIFESTS -> Optional.of(new ManifestsTable(tableName, table, getCurrentSnapshotId(table))); + case FILES -> Optional.of(new FilesTable(tableName, typeManager, table, getCurrentSnapshotId(table))); + case PROPERTIES -> Optional.of(new PropertiesTable(tableName, table)); + case REFS -> Optional.of(new RefsTable(tableName, table)); }; } @@ -934,15 +937,27 @@ public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, Con if (!schemaExists(session, schemaName)) { throw new SchemaNotFoundException(schemaName); } + + String tableLocation = null; if (replace) { IcebergTableHandle table = (IcebergTableHandle) getTableHandle(session, tableMetadata.getTableSchema().getTable(), Optional.empty(), Optional.empty()); if (table != null) { verifyTableVersionForUpdate(table); Table icebergTable = catalog.loadTable(session, table.getSchemaTableName()); + Optional providedTableLocation = getTableLocation(tableMetadata.getProperties()); + if (providedTableLocation.isPresent() && !stripTrailingSlash(providedTableLocation.get()).equals(icebergTable.location())) { + throw new TrinoException(INVALID_TABLE_PROPERTY, format("The provided location '%s' does not match the existing table location '%s'", providedTableLocation.get(), icebergTable.location())); + } validateNotModifyingOldSnapshot(table, icebergTable); + tableLocation = icebergTable.location(); } } - transaction = newCreateTableTransaction(catalog, tableMetadata, session, replace); + + if (tableLocation == null) { + tableLocation = getTableLocation(tableMetadata.getProperties()) + .orElseGet(() -> catalog.defaultTableLocation(session, tableMetadata.getTable())); + } + transaction = newCreateTableTransaction(catalog, tableMetadata, session, replace, tableLocation); Location location = Location.of(transaction.table().location()); TrinoFileSystem fileSystem = fileSystemFactory.create(session); try { @@ -2543,16 +2558,15 @@ else if (isMetadataColumnId(columnHandle.getId())) { remainingConstraint = TupleDomain.withColumnDomains(newUnenforced).intersect(TupleDomain.withColumnDomains(unsupported)); } - Set newConstraintColumns = constraint.getPredicateColumns() - .map(columnHandles -> columnHandles.stream() - .map(columnHandle -> (IcebergColumnHandle) columnHandle) - .collect(toImmutableSet())) - .orElse(ImmutableSet.of()); + Set newConstraintColumns = Streams.concat( + table.getConstraintColumns().stream(), + constraint.getPredicateColumns().orElseGet(ImmutableSet::of).stream() + .map(columnHandle -> (IcebergColumnHandle) columnHandle)) + .collect(toImmutableSet()); if (newEnforcedConstraint.equals(table.getEnforcedPredicate()) && newUnenforcedConstraint.equals(table.getUnenforcedPredicate()) - && newConstraintColumns.equals(table.getConstraintColumns()) - && constraint.getPredicateColumns().isEmpty()) { + && newConstraintColumns.equals(table.getConstraintColumns())) { return Optional.empty(); } @@ -2575,7 +2589,7 @@ else if (isMetadataColumnId(columnHandle.getId())) { table.getStorageProperties(), table.isRecordScannedFiles(), table.getMaxScannedFileSize(), - Sets.union(table.getConstraintColumns(), newConstraintColumns), + newConstraintColumns, table.getForAnalyze()), remainingConstraint.transformKeys(ColumnHandle.class::cast), extractionResult.remainingExpression(), @@ -2750,9 +2764,15 @@ Table getIcebergTable(ConnectorSession session, SchemaTableName schemaTableName) } @Override - public void createMaterializedView(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition, boolean replace, boolean ignoreExisting) + public void createMaterializedView( + ConnectorSession session, + SchemaTableName viewName, + ConnectorMaterializedViewDefinition definition, + Map properties, + boolean replace, + boolean ignoreExisting) { - catalog.createMaterializedView(session, viewName, definition, replace, ignoreExisting); + catalog.createMaterializedView(session, viewName, definition, properties, replace, ignoreExisting); } @Override @@ -2882,6 +2902,12 @@ public Optional getMaterializedView(Connect return catalog.getMaterializedView(session, viewName); } + @Override + public Map getMaterializedViewProperties(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition) + { + return catalog.getMaterializedViewProperties(session, viewName, definition); + } + @Override public void renameMaterializedView(ConnectorSession session, SchemaTableName source, SchemaTableName target) { diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSink.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSink.java index b54d8f25fa9bd..284385d0e49b4 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSink.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSink.java @@ -16,6 +16,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Streams; import io.airlift.json.JsonCodec; +import io.airlift.log.Logger; import io.airlift.slice.Slice; import io.airlift.units.DataSize; import io.trino.filesystem.Location; @@ -91,6 +92,8 @@ public class IcebergPageSink implements ConnectorPageSink { + private static final Logger LOG = Logger.get(IcebergPageSink.class); + private static final int MAX_PAGE_POSITIONS = 4096; private final int maxOpenWriters; @@ -105,6 +108,7 @@ public class IcebergPageSink private final MetricsConfig metricsConfig; private final PagePartitioner pagePartitioner; private final long targetMaxFileSize; + private final long idleWriterMinFileSize; private final Map storageProperties; private final List sortOrder; private final boolean sortedWritingEnabled; @@ -120,6 +124,7 @@ public class IcebergPageSink private final List writers = new ArrayList<>(); private final List closedWriterRollbackActions = new ArrayList<>(); private final Collection commitTasks = new ArrayList<>(); + private final List activeWriters = new ArrayList<>(); private long writtenBytes; private long memoryUsage; @@ -157,6 +162,7 @@ public IcebergPageSink( this.maxOpenWriters = maxOpenWriters; this.pagePartitioner = new PagePartitioner(pageIndexerFactory, toPartitionColumns(inputColumns, partitionSpec)); this.targetMaxFileSize = IcebergSessionProperties.getTargetMaxFileSize(session); + this.idleWriterMinFileSize = IcebergSessionProperties.getIdleWriterMinFileSize(session); this.storageProperties = requireNonNull(storageProperties, "storageProperties is null"); this.sortOrder = requireNonNull(sortOrder, "sortOrder is null"); this.sortedWritingEnabled = isSortedWritingEnabled(session); @@ -300,7 +306,9 @@ private void writePage(Page page) pageForWriter = pageForWriter.getPositions(positions, 0, positions.length); } - IcebergFileWriter writer = writers.get(index).getWriter(); + WriteContext writeContext = writers.get(index); + verify(writeContext != null, "Expected writer at index %s", index); + IcebergFileWriter writer = writeContext.getWriter(); long currentWritten = writer.getWrittenBytes(); long currentMemory = writer.getMemoryUsage(); @@ -309,6 +317,8 @@ private void writePage(Page page) writtenBytes += (writer.getWrittenBytes() - currentWritten); memoryUsage += (writer.getMemoryUsage() - currentMemory); + // Mark this writer as active (i.e. not idle) + activeWriters.set(index, true); } } @@ -323,6 +333,7 @@ private int[] getWriterIndexes(Page page) // expand writers list to new size while (writers.size() <= pagePartitioner.getMaxIndex()) { writers.add(null); + activeWriters.add(false); } // create missing writers @@ -369,14 +380,30 @@ private int[] getWriterIndexes(Page page) memoryUsage += writer.getWriter().getMemoryUsage(); } verify(writers.size() == pagePartitioner.getMaxIndex() + 1); - verify(!writers.contains(null)); return writerIndexes; } + @Override + public void closeIdleWriters() + { + for (int writerIndex = 0; writerIndex < writers.size(); writerIndex++) { + WriteContext writeContext = writers.get(writerIndex); + if (activeWriters.get(writerIndex) || writeContext == null || writeContext.getWriter().getWrittenBytes() <= idleWriterMinFileSize) { + activeWriters.set(writerIndex, false); + continue; + } + LOG.debug("Closing writer %s with %s bytes written", writerIndex, writeContext.getWriter().getWrittenBytes()); + closeWriter(writerIndex); + } + } + private void closeWriter(int writerIndex) { WriteContext writeContext = writers.get(writerIndex); + if (writeContext == null) { + return; + } IcebergFileWriter writer = writeContext.getWriter(); long currentWritten = writer.getWrittenBytes(); diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java index 19bde2a570789..c90e13d3df338 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java @@ -13,7 +13,6 @@ */ package io.trino.plugin.iceberg; -import com.google.common.base.Suppliers; import com.google.common.base.VerifyException; import com.google.common.collect.AbstractIterator; import com.google.common.collect.BiMap; @@ -133,6 +132,7 @@ import java.util.function.Supplier; import static com.google.common.base.Preconditions.checkState; +import static com.google.common.base.Suppliers.memoize; import static com.google.common.base.Verify.verify; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableMap.toImmutableMap; @@ -167,12 +167,15 @@ import static io.trino.plugin.iceberg.IcebergSessionProperties.getParquetSmallFileThreshold; import static io.trino.plugin.iceberg.IcebergSessionProperties.isOrcBloomFiltersEnabled; import static io.trino.plugin.iceberg.IcebergSessionProperties.isOrcNestedLazy; +import static io.trino.plugin.iceberg.IcebergSessionProperties.isParquetIgnoreStatistics; import static io.trino.plugin.iceberg.IcebergSessionProperties.isUseFileSizeFromMetadata; import static io.trino.plugin.iceberg.IcebergSessionProperties.useParquetBloomFilter; import static io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD; +import static io.trino.plugin.iceberg.IcebergSplitSource.partitionMatchesPredicate; import static io.trino.plugin.iceberg.IcebergUtil.deserializePartitionValue; import static io.trino.plugin.iceberg.IcebergUtil.getColumnHandle; import static io.trino.plugin.iceberg.IcebergUtil.getPartitionKeys; +import static io.trino.plugin.iceberg.IcebergUtil.getPartitionValues; import static io.trino.plugin.iceberg.IcebergUtil.schemaFromHandles; import static io.trino.plugin.iceberg.delete.EqualityDeleteFilter.readEqualityDeletes; import static io.trino.plugin.iceberg.delete.PositionDeleteFilter.readPositionDeletes; @@ -194,6 +197,7 @@ import static java.lang.String.format; import static java.util.Locale.ENGLISH; import static java.util.Objects.requireNonNull; +import static java.util.function.Function.identity; import static java.util.function.Predicate.not; import static java.util.stream.Collectors.groupingBy; import static java.util.stream.Collectors.mapping; @@ -332,8 +336,11 @@ else if (identity.getId() == TRINO_MERGE_PARTITION_DATA) { } }); - TupleDomain effectivePredicate = unenforcedPredicate - .intersect(dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast)) + TupleDomain effectivePredicate = getEffectivePredicate( + tableSchema, + partitionKeys, + dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast), + unenforcedPredicate) .simplify(ICEBERG_DOMAIN_COMPACTION_THRESHOLD); if (effectivePredicate.isNone()) { return new EmptyPageSource(); @@ -386,7 +393,7 @@ else if (identity.getId() == TRINO_MERGE_PARTITION_DATA) { .map(readerColumns -> readerColumns.get().stream().map(IcebergColumnHandle.class::cast).collect(toList())) .orElse(requiredColumns); - Supplier> deletePredicate = Suppliers.memoize(() -> { + Supplier> deletePredicate = memoize(() -> { List deleteFilters = readDeletes( session, tableSchema, @@ -408,6 +415,28 @@ else if (identity.getId() == TRINO_MERGE_PARTITION_DATA) { deletePredicate); } + private TupleDomain getEffectivePredicate( + Schema tableSchema, + Map> partitionKeys, + TupleDomain dynamicFilterPredicate, + TupleDomain unenforcedPredicate) + { + TupleDomain effectivePredicate = unenforcedPredicate.intersect(dynamicFilterPredicate); + if (dynamicFilterPredicate.isAll() || dynamicFilterPredicate.isNone() || partitionKeys.isEmpty()) { + return effectivePredicate; + } + Set partitionColumns = partitionKeys.keySet().stream() + .map(fieldId -> getColumnHandle(tableSchema.findField(fieldId), typeManager)) + .collect(toImmutableSet()); + Supplier> partitionValues = memoize(() -> getPartitionValues(partitionColumns, partitionKeys)); + if (!partitionMatchesPredicate(partitionColumns, partitionValues, effectivePredicate)) { + return TupleDomain.none(); + } + // Filter out partition columns domains from the dynamic filter because they should be irrelevant at data file level + return effectivePredicate + .filter((columnHandle, domain) -> !partitionKeys.containsKey(columnHandle.getId())); + } + private Set requiredColumnsForDeletes(Schema schema, List deletes) { ImmutableSet.Builder requiredColumns = ImmutableSet.builder(); @@ -584,6 +613,7 @@ public ReaderPageSourceWithRowPositions createDataPageSource( .withMaxReadBlockSize(getParquetMaxReadBlockSize(session)) .withMaxReadBlockRowCount(getParquetMaxReadBlockRowCount(session)) .withSmallFileThreshold(getParquetSmallFileThreshold(session)) + .withIgnoreStatistics(isParquetIgnoreStatistics(session)) .withBloomFilter(useParquetBloomFilter(session)) // TODO https://github.com/trinodb/trino/issues/11000 .withUseColumnIndex(false), @@ -957,7 +987,7 @@ private static ReaderPageSourceWithRowPositions createParquetPageSource( MessageType requestedSchema = getMessageType(regularColumns, fileSchema.getName(), parquetIdToField); Map, ColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, requestedSchema); - TupleDomain parquetTupleDomain = getParquetTupleDomain(descriptorsByPath, effectivePredicate); + TupleDomain parquetTupleDomain = options.isIgnoreStatistics() ? TupleDomain.all() : getParquetTupleDomain(descriptorsByPath, effectivePredicate); TupleDomainParquetPredicate parquetPredicate = buildPredicate(requestedSchema, parquetTupleDomain, descriptorsByPath, UTC); List rowGroups = getFilteredRowGroups( @@ -1442,12 +1472,14 @@ private static TupleDomain getParquetTupleDomain(Map descriptorsById = descriptorsByPath.values().stream() + .collect(toImmutableMap(descriptor -> descriptor.getPrimitiveType().getId().intValue(), identity())); ImmutableMap.Builder predicate = ImmutableMap.builder(); effectivePredicate.getDomains().orElseThrow().forEach((columnHandle, domain) -> { String baseType = columnHandle.getType().getTypeSignature().getBase(); // skip looking up predicates for complex types as Parquet only stores stats for primitives if (columnHandle.isBaseColumn() && (!baseType.equals(StandardTypes.MAP) && !baseType.equals(StandardTypes.ARRAY) && !baseType.equals(StandardTypes.ROW))) { - ColumnDescriptor descriptor = descriptorsByPath.get(ImmutableList.of(columnHandle.getName())); + ColumnDescriptor descriptor = descriptorsById.get(columnHandle.getId()); if (descriptor != null) { predicate.put(descriptor, domain); } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSessionProperties.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSessionProperties.java index 42868452dc522..b499cd212ed1d 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSessionProperties.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSessionProperties.java @@ -76,6 +76,7 @@ public final class IcebergSessionProperties private static final String PARQUET_USE_BLOOM_FILTER = "parquet_use_bloom_filter"; private static final String PARQUET_MAX_READ_BLOCK_ROW_COUNT = "parquet_max_read_block_row_count"; private static final String PARQUET_SMALL_FILE_THRESHOLD = "parquet_small_file_threshold"; + private static final String PARQUET_IGNORE_STATISTICS = "parquet_ignore_statistics"; private static final String PARQUET_WRITER_BLOCK_SIZE = "parquet_writer_block_size"; private static final String PARQUET_WRITER_PAGE_SIZE = "parquet_writer_page_size"; private static final String PARQUET_WRITER_BATCH_SIZE = "parquet_writer_batch_size"; @@ -84,6 +85,7 @@ public final class IcebergSessionProperties public static final String EXTENDED_STATISTICS_ENABLED = "extended_statistics_enabled"; private static final String PROJECTION_PUSHDOWN_ENABLED = "projection_pushdown_enabled"; private static final String TARGET_MAX_FILE_SIZE = "target_max_file_size"; + private static final String IDLE_WRITER_MIN_FILE_SIZE = "idle_writer_min_file_size"; public static final String COLLECT_EXTENDED_STATISTICS_ON_WRITE = "collect_extended_statistics_on_write"; private static final String HIVE_CATALOG_NAME = "hive_catalog_name"; private static final String MINIMUM_ASSIGNED_SPLIT_WEIGHT = "minimum_assigned_split_weight"; @@ -234,6 +236,11 @@ public IcebergSessionProperties( parquetReaderConfig.getSmallFileThreshold(), value -> validateMaxDataSize(PARQUET_SMALL_FILE_THRESHOLD, value, DataSize.valueOf(PARQUET_READER_MAX_SMALL_FILE_THRESHOLD)), false)) + .add(booleanProperty( + PARQUET_IGNORE_STATISTICS, + "Ignore statistics from Parquet to allow querying files with corrupted or incorrect statistics", + parquetReaderConfig.isIgnoreStatistics(), + false)) .add(dataSizeProperty( PARQUET_WRITER_BLOCK_SIZE, "Parquet: Writer block size", @@ -279,6 +286,11 @@ public IcebergSessionProperties( "Target maximum size of written files; the actual size may be larger", icebergConfig.getTargetMaxFileSize(), false)) + .add(dataSizeProperty( + IDLE_WRITER_MIN_FILE_SIZE, + "Minimum data written by a single partition writer before it can be consider as 'idle' and could be closed by the engine", + icebergConfig.getIdleWriterMinFileSize(), + false)) .add(booleanProperty( COLLECT_EXTENDED_STATISTICS_ON_WRITE, COLLECT_EXTENDED_STATISTICS_ON_WRITE_DESCRIPTION, @@ -442,6 +454,11 @@ public static DataSize getParquetSmallFileThreshold(ConnectorSession session) return session.getProperty(PARQUET_SMALL_FILE_THRESHOLD, DataSize.class); } + public static boolean isParquetIgnoreStatistics(ConnectorSession session) + { + return session.getProperty(PARQUET_IGNORE_STATISTICS, Boolean.class); + } + public static DataSize getParquetWriterPageSize(ConnectorSession session) { return session.getProperty(PARQUET_WRITER_PAGE_SIZE, DataSize.class); @@ -492,6 +509,11 @@ public static long getTargetMaxFileSize(ConnectorSession session) return session.getProperty(TARGET_MAX_FILE_SIZE, DataSize.class).toBytes(); } + public static long getIdleWriterMinFileSize(ConnectorSession session) + { + return session.getProperty(IDLE_WRITER_MIN_FILE_SIZE, DataSize.class).toBytes(); + } + public static Optional getHiveCatalogName(ConnectorSession session) { return Optional.ofNullable(session.getProperty(HIVE_CATALOG_NAME, String.class)); diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSplitSource.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSplitSource.java index b18feb4c66767..5212772b99e90 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSplitSource.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSplitSource.java @@ -15,11 +15,15 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Stopwatch; +import com.google.common.cache.CacheBuilder; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Maps; import com.google.common.io.Closer; import io.airlift.units.DataSize; import io.airlift.units.Duration; +import io.trino.cache.NonEvictableCache; import io.trino.filesystem.Location; import io.trino.filesystem.TrinoFileSystemFactory; import io.trino.filesystem.TrinoInputFile; @@ -61,6 +65,7 @@ import java.util.OptionalLong; import java.util.Set; import java.util.concurrent.CompletableFuture; +import java.util.function.Predicate; import java.util.function.Supplier; import static com.google.common.base.Preconditions.checkArgument; @@ -72,6 +77,8 @@ import static com.google.common.collect.Sets.intersection; import static com.google.common.math.LongMath.saturatedAdd; import static io.airlift.slice.Slices.utf8Slice; +import static io.trino.cache.CacheUtils.uncheckedCacheGet; +import static io.trino.cache.SafeCaches.buildNonEvictableCache; import static io.trino.plugin.iceberg.ExpressionConverter.isConvertableToIcebergExpression; import static io.trino.plugin.iceberg.ExpressionConverter.toIcebergExpression; import static io.trino.plugin.iceberg.IcebergColumnHandle.fileModifiedTimeColumnHandle; @@ -109,7 +116,7 @@ public class IcebergSplitSource private final DynamicFilter dynamicFilter; private final long dynamicFilteringWaitTimeoutMillis; private final Stopwatch dynamicFilterWaitStopwatch; - private final Constraint constraint; + private final PartitionConstraintMatcher partitionConstraintMatcher; private final TypeManager typeManager; private final Closer closer = Closer.create(); private final double minimumAssignedSplitWeight; @@ -124,7 +131,6 @@ public class IcebergSplitSource private long targetSplitSize; private CloseableIterator fileScanIterator; private Iterator fileTasksIterator = emptyIterator(); - private boolean fileHasAnyDeletions; private final boolean recordScannedFiles; private final ImmutableSet.Builder scannedFiles = ImmutableSet.builder(); @@ -152,7 +158,7 @@ public IcebergSplitSource( this.dynamicFilter = requireNonNull(dynamicFilter, "dynamicFilter is null"); this.dynamicFilteringWaitTimeoutMillis = dynamicFilteringWaitTimeout.toMillis(); this.dynamicFilterWaitStopwatch = Stopwatch.createStarted(); - this.constraint = requireNonNull(constraint, "constraint is null"); + this.partitionConstraintMatcher = new PartitionConstraintMatcher(constraint); this.typeManager = requireNonNull(typeManager, "typeManager is null"); this.recordScannedFiles = recordScannedFiles; this.minimumAssignedSplitWeight = minimumAssignedSplitWeight; @@ -229,83 +235,88 @@ public CompletableFuture getNextBatch(int maxSize) List splits = new ArrayList<>(maxSize); while (splits.size() < maxSize && (fileTasksIterator.hasNext() || fileScanIterator.hasNext())) { if (!fileTasksIterator.hasNext()) { + if (limit.isPresent() && limit.getAsLong() <= outputRowsLowerBound) { + finish(); + break; + } FileScanTask wholeFileTask = fileScanIterator.next(); - if (wholeFileTask.deletes().isEmpty() && noDataColumnsProjected(wholeFileTask)) { + boolean fileHasNoDeletions = wholeFileTask.deletes().isEmpty(); + + if (pruneFileScanTask(wholeFileTask, fileHasNoDeletions, dynamicFilterPredicate)) { + continue; + } + + if (recordScannedFiles) { + // Positional and Equality deletes can only be cleaned up if the whole table has been optimized. + // Equality deletes may apply to many files, and position deletes may be grouped together. This makes it difficult to know if they are obsolete. + List fullyAppliedDeletes = tableHandle.getEnforcedPredicate().isAll() ? wholeFileTask.deletes() : ImmutableList.of(); + scannedFiles.add(new DataFileWithDeleteFiles(wholeFileTask.file(), fullyAppliedDeletes)); + } + + if (fileHasNoDeletions) { + // There were no deletions, so we will produce splits covering the whole file + outputRowsLowerBound = saturatedAdd(outputRowsLowerBound, wholeFileTask.file().recordCount()); + } + + if (fileHasNoDeletions && noDataColumnsProjected(wholeFileTask)) { fileTasksIterator = List.of(wholeFileTask).iterator(); } else { fileTasksIterator = wholeFileTask.split(targetSplitSize).iterator(); } - fileHasAnyDeletions = false; // In theory, .split() could produce empty iterator, so let's evaluate the outer loop condition again. continue; } - FileScanTask scanTask = fileTasksIterator.next(); - fileHasAnyDeletions = fileHasAnyDeletions || !scanTask.deletes().isEmpty(); - if (scanTask.deletes().isEmpty() && - maxScannedFileSizeInBytes.isPresent() && - scanTask.file().fileSizeInBytes() > maxScannedFileSizeInBytes.get()) { - continue; - } + splits.add(toIcebergSplit(fileTasksIterator.next())); + } + return completedFuture(new ConnectorSplitBatch(splits, isFinished())); + } - if (!pathDomain.includesNullableValue(utf8Slice(scanTask.file().path().toString()))) { - continue; - } - if (!fileModifiedTimeDomain.isAll()) { - long fileModifiedTime = getModificationTime(scanTask.file().path().toString()); - if (!fileModifiedTimeDomain.includesNullableValue(packDateTimeWithZone(fileModifiedTime, UTC_KEY))) { - continue; - } + private boolean pruneFileScanTask(FileScanTask fileScanTask, boolean fileHasNoDeletions, TupleDomain dynamicFilterPredicate) + { + if (fileHasNoDeletions && + maxScannedFileSizeInBytes.isPresent() && + fileScanTask.file().fileSizeInBytes() > maxScannedFileSizeInBytes.get()) { + return true; + } + + if (!pathDomain.isAll() && !pathDomain.includesNullableValue(utf8Slice(fileScanTask.file().path().toString()))) { + return true; + } + if (!fileModifiedTimeDomain.isAll()) { + long fileModifiedTime = getModificationTime(fileScanTask.file().path().toString()); + if (!fileModifiedTimeDomain.includesNullableValue(packDateTimeWithZone(fileModifiedTime, UTC_KEY))) { + return true; } - IcebergSplit icebergSplit = toIcebergSplit(scanTask); + } - Schema fileSchema = scanTask.spec().schema(); - Map> partitionKeys = getPartitionKeys(scanTask); + Schema fileSchema = fileScanTask.spec().schema(); + Map> partitionKeys = getPartitionKeys(fileScanTask); - Set identityPartitionColumns = partitionKeys.keySet().stream() - .map(fieldId -> getColumnHandle(fileSchema.findField(fieldId), typeManager)) - .collect(toImmutableSet()); + Set identityPartitionColumns = partitionKeys.keySet().stream() + .map(fieldId -> getColumnHandle(fileSchema.findField(fieldId), typeManager)) + .collect(toImmutableSet()); - Supplier> partitionValues = memoize(() -> getPartitionValues(identityPartitionColumns, partitionKeys)); + Supplier> partitionValues = memoize(() -> getPartitionValues(identityPartitionColumns, partitionKeys)); - if (!dynamicFilterPredicate.isAll() && !dynamicFilterPredicate.equals(pushedDownDynamicFilterPredicate)) { - if (!partitionMatchesPredicate( - identityPartitionColumns, - partitionValues, - dynamicFilterPredicate)) { - continue; - } - if (!fileMatchesPredicate( - fieldIdToType, - dynamicFilterPredicate, - scanTask.file().lowerBounds(), - scanTask.file().upperBounds(), - scanTask.file().nullValueCounts())) { - continue; - } + if (!dynamicFilterPredicate.isAll() && !dynamicFilterPredicate.equals(pushedDownDynamicFilterPredicate)) { + if (!partitionMatchesPredicate( + identityPartitionColumns, + partitionValues, + dynamicFilterPredicate)) { + return true; } - if (!partitionMatchesConstraint(identityPartitionColumns, partitionValues, constraint)) { - continue; + if (!fileMatchesPredicate( + fieldIdToType, + dynamicFilterPredicate, + fileScanTask.file().lowerBounds(), + fileScanTask.file().upperBounds(), + fileScanTask.file().nullValueCounts())) { + return true; } - if (recordScannedFiles) { - // Positional and Equality deletes can only be cleaned up if the whole table has been optimized. - // Equality deletes may apply to many files, and position deletes may be grouped together. This makes it difficult to know if they are obsolete. - List fullyAppliedDeletes = tableHandle.getEnforcedPredicate().isAll() ? scanTask.deletes() : ImmutableList.of(); - scannedFiles.add(new DataFileWithDeleteFiles(scanTask.file(), fullyAppliedDeletes)); - } - if (!fileTasksIterator.hasNext()) { - // This is the last task for this file - if (!fileHasAnyDeletions) { - // There were no deletions, so we produced splits covering the whole file - outputRowsLowerBound = saturatedAdd(outputRowsLowerBound, scanTask.file().recordCount()); - if (limit.isPresent() && limit.getAsLong() <= outputRowsLowerBound) { - finish(); - } - } - } - splits.add(icebergSplit); } - return completedFuture(new ConnectorSplitBatch(splits, isFinished())); + + return !partitionConstraintMatcher.matches(identityPartitionColumns, partitionValues); } private boolean noDataColumnsProjected(FileScanTask fileScanTask) @@ -432,19 +443,38 @@ else if (upperBound != null) { return Domain.create(ValueSet.ofRanges(statisticsRange), mayContainNulls); } - static boolean partitionMatchesConstraint( - Set identityPartitionColumns, - Supplier> partitionValues, - Constraint constraint) + private static class PartitionConstraintMatcher { - // We use Constraint just to pass functional predicate here from DistributedExecutionPlanner - verify(constraint.getSummary().isAll()); + private final NonEvictableCache, Boolean> partitionConstraintResults; + private final Optional>> predicate; + private final Optional> predicateColumns; + + private PartitionConstraintMatcher(Constraint constraint) + { + // We use Constraint just to pass functional predicate here from DistributedExecutionPlanner + verify(constraint.getSummary().isAll()); + this.predicate = constraint.predicate(); + this.predicateColumns = constraint.getPredicateColumns(); + this.partitionConstraintResults = buildNonEvictableCache(CacheBuilder.newBuilder().maximumSize(1000)); + } - if (constraint.predicate().isEmpty() || - intersection(constraint.getPredicateColumns().orElseThrow(), identityPartitionColumns).isEmpty()) { - return true; + boolean matches( + Set identityPartitionColumns, + Supplier> partitionValuesSupplier) + { + if (predicate.isEmpty()) { + return true; + } + Set predicatePartitionColumns = intersection(predicateColumns.orElseThrow(), identityPartitionColumns); + if (predicatePartitionColumns.isEmpty()) { + return true; + } + Map partitionValues = partitionValuesSupplier.get(); + return uncheckedCacheGet( + partitionConstraintResults, + ImmutableMap.copyOf(Maps.filterKeys(partitionValues, predicatePartitionColumns::contains)), + () -> predicate.orElseThrow().test(partitionValues)); } - return constraint.predicate().get().test(partitionValues.get()); } @VisibleForTesting diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergTableName.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergTableName.java index 8e5582cc2fb9c..63790f48bb050 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergTableName.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergTableName.java @@ -13,15 +13,15 @@ */ package io.trino.plugin.iceberg; -import io.trino.spi.TrinoException; - -import java.util.Optional; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Verify.verify; import static io.trino.plugin.iceberg.TableType.DATA; import static io.trino.plugin.iceberg.TableType.MATERIALIZED_VIEW_STORAGE; -import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; import static java.util.Locale.ENGLISH; import static java.util.Objects.requireNonNull; @@ -29,9 +29,22 @@ public final class IcebergTableName { private IcebergTableName() {} - private static final Pattern TABLE_PATTERN = Pattern.compile("" + - "(?
[^$@]+)" + - "(?:\\$(?[^@]+))?"); + private static final Pattern TABLE_PATTERN; + + static { + String referencableTableTypes = Stream.of(TableType.values()) + .filter(tableType -> tableType != DATA) + .map(tableType -> tableType.name().toLowerCase(ENGLISH)) + .collect(Collectors.joining("|")); + TABLE_PATTERN = Pattern.compile("" + + "(?
[^$@]+)" + + "(?:\\$(?(?i:" + referencableTableTypes + ")))?"); + } + + public static boolean isIcebergTableName(String tableName) + { + return TABLE_PATTERN.matcher(tableName).matches(); + } public static String tableNameWithType(String tableName, TableType tableType) { @@ -39,52 +52,38 @@ public static String tableNameWithType(String tableName, TableType tableType) return tableName + "$" + tableType.name().toLowerCase(ENGLISH); } - public static String tableNameFrom(String name) + public static String tableNameFrom(String validIcebergTableName) { - Matcher match = TABLE_PATTERN.matcher(name); - if (!match.matches()) { - throw new TrinoException(NOT_SUPPORTED, "Invalid Iceberg table name: " + name); - } - + Matcher match = TABLE_PATTERN.matcher(validIcebergTableName); + checkArgument(match.matches(), "Invalid Iceberg table name: %s", validIcebergTableName); return match.group("table"); } - public static Optional tableTypeFrom(String name) + public static TableType tableTypeFrom(String validIcebergTableName) { - Matcher match = TABLE_PATTERN.matcher(name); - if (!match.matches()) { - throw new TrinoException(NOT_SUPPORTED, "Invalid Iceberg table name: " + name); - } + Matcher match = TABLE_PATTERN.matcher(validIcebergTableName); + checkArgument(match.matches(), "Invalid Iceberg table name: %s", validIcebergTableName); + String typeString = match.group("type"); if (typeString == null) { - return Optional.of(DATA); - } - try { - TableType parsedType = TableType.valueOf(typeString.toUpperCase(ENGLISH)); - if (parsedType == DATA) { - // $data cannot be encoded in table name - return Optional.empty(); - } - return Optional.of(parsedType); - } - catch (IllegalArgumentException e) { - return Optional.empty(); + return DATA; } + TableType parsedType = TableType.valueOf(typeString.toUpperCase(ENGLISH)); + // $data cannot be encoded in table name + verify(parsedType != DATA, "parsedType is unexpectedly DATA"); + return parsedType; } - public static boolean isDataTable(String name) + public static boolean isDataTable(String validIcebergTableName) { - Matcher match = TABLE_PATTERN.matcher(name); - if (!match.matches()) { - throw new TrinoException(NOT_SUPPORTED, "Invalid Iceberg table name: " + name); - } + Matcher match = TABLE_PATTERN.matcher(validIcebergTableName); + checkArgument(match.matches(), "Invalid Iceberg table name: %s", validIcebergTableName); String typeString = match.group("type"); return typeString == null; } - public static boolean isMaterializedViewStorage(String name) + public static boolean isMaterializedViewStorage(String validIcebergTableName) { - Optional tableType = tableTypeFrom(name); - return tableType.isPresent() && tableType.get() == MATERIALIZED_VIEW_STORAGE; + return tableTypeFrom(validIcebergTableName) == MATERIALIZED_VIEW_STORAGE; } } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java index fe71cdbad39b4..3613ec3023d86 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java @@ -116,7 +116,6 @@ import static io.trino.plugin.iceberg.IcebergTableProperties.getOrcBloomFilterFpp; import static io.trino.plugin.iceberg.IcebergTableProperties.getPartitioning; import static io.trino.plugin.iceberg.IcebergTableProperties.getSortOrder; -import static io.trino.plugin.iceberg.IcebergTableProperties.getTableLocation; import static io.trino.plugin.iceberg.PartitionFields.parsePartitionFields; import static io.trino.plugin.iceberg.PartitionFields.toPartitionFields; import static io.trino.plugin.iceberg.SortFieldUtils.parseSortFields; @@ -638,19 +637,17 @@ public static Schema schemaFromMetadata(List columns) return new Schema(icebergSchema.asStructType().fields()); } - public static Transaction newCreateTableTransaction(TrinoCatalog catalog, ConnectorTableMetadata tableMetadata, ConnectorSession session, boolean replace) + public static Transaction newCreateTableTransaction(TrinoCatalog catalog, ConnectorTableMetadata tableMetadata, ConnectorSession session, boolean replace, String tableLocation) { SchemaTableName schemaTableName = tableMetadata.getTable(); Schema schema = schemaFromMetadata(tableMetadata.getColumns()); PartitionSpec partitionSpec = parsePartitionFields(schema, getPartitioning(tableMetadata.getProperties())); SortOrder sortOrder = parseSortFields(schema, getSortOrder(tableMetadata.getProperties())); - String targetPath = getTableLocation(tableMetadata.getProperties()) - .orElseGet(() -> catalog.defaultTableLocation(session, schemaTableName)); if (replace) { - return catalog.newCreateOrReplaceTableTransaction(session, schemaTableName, schema, partitionSpec, sortOrder, targetPath, createTableProperties(tableMetadata)); + return catalog.newCreateOrReplaceTableTransaction(session, schemaTableName, schema, partitionSpec, sortOrder, tableLocation, createTableProperties(tableMetadata)); } - return catalog.newCreateTableTransaction(session, schemaTableName, schema, partitionSpec, sortOrder, targetPath, createTableProperties(tableMetadata)); + return catalog.newCreateTableTransaction(session, schemaTableName, schema, partitionSpec, sortOrder, tableLocation, createTableProperties(tableMetadata)); } public static Map createTableProperties(ConnectorTableMetadata tableMetadata) diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractIcebergTableOperations.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractIcebergTableOperations.java index 5920798b8a106..708d48d45a304 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractIcebergTableOperations.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractIcebergTableOperations.java @@ -40,6 +40,7 @@ import java.util.Objects; import java.util.Optional; import java.util.OptionalInt; +import java.util.function.Function; import static com.google.common.base.Preconditions.checkState; import static com.google.common.collect.ImmutableList.toImmutableList; @@ -233,6 +234,13 @@ protected String writeNewMetadata(TableMetadata metadata, int newVersion) } protected void refreshFromMetadataLocation(String newLocation) + { + refreshFromMetadataLocation( + newLocation, + metadataLocation -> TableMetadataParser.read(fileIo, fileIo.newInputFile(metadataLocation))); + } + + protected void refreshFromMetadataLocation(String newLocation, Function metadataLoader) { // use null-safe equality check because new tables have a null metadata location if (Objects.equals(currentMetadataLocation, newLocation)) { @@ -254,7 +262,7 @@ protected void refreshFromMetadataLocation(String newLocation) .withMaxDuration(Duration.ofMinutes(10)) .abortOn(failure -> failure instanceof ValidationException || isNotFoundException(failure)) .build()) - .get(() -> TableMetadataParser.read(fileIo, io().newInputFile(newLocation))); + .get(() -> metadataLoader.apply(newLocation)); } catch (Throwable failure) { if (isNotFoundException(failure)) { diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractTrinoCatalog.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractTrinoCatalog.java index f9e80d3eb42f6..d461164a5e707 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractTrinoCatalog.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractTrinoCatalog.java @@ -77,6 +77,7 @@ import static io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter.mappedCopy; import static io.trino.plugin.hive.util.HiveUtil.escapeTableName; import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_FILESYSTEM_ERROR; +import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_INVALID_METADATA; import static io.trino.plugin.iceberg.IcebergMaterializedViewAdditionalProperties.STORAGE_SCHEMA; import static io.trino.plugin.iceberg.IcebergMaterializedViewAdditionalProperties.getStorageSchema; import static io.trino.plugin.iceberg.IcebergMaterializedViewDefinition.decodeMaterializedViewData; @@ -201,6 +202,25 @@ public Optional getMaterializedView(Connect protected abstract Optional doGetMaterializedView(ConnectorSession session, SchemaTableName schemaViewName); + @Override + public Map getMaterializedViewProperties(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition) + { + SchemaTableName storageTableName = definition.getStorageTable() + .orElseThrow(() -> new TrinoException(ICEBERG_INVALID_METADATA, "Materialized view definition is missing a storage table")) + .getSchemaTableName(); + + try { + Table storageTable = loadTable(session, definition.getStorageTable().orElseThrow().getSchemaTableName()); + return ImmutableMap.builder() + .putAll(getIcebergTableProperties(storageTable)) + .put(STORAGE_SCHEMA, storageTableName.getSchemaName()) + .buildOrThrow(); + } + catch (RuntimeException e) { + throw new TrinoException(ICEBERG_FILESYSTEM_ERROR, "Unable to load storage table metadata for materialized view: " + viewName); + } + } + protected Transaction newCreateTableTransaction( ConnectorSession session, SchemaTableName schemaTableName, @@ -281,20 +301,24 @@ protected void deleteTableDirectory(TrinoFileSystem fileSystem, SchemaTableName } } - protected Location createMaterializedViewStorage(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition) + protected Location createMaterializedViewStorage( + ConnectorSession session, + SchemaTableName viewName, + ConnectorMaterializedViewDefinition definition, + Map materializedViewProperties) { - if (getStorageSchema(definition.getProperties()).isPresent()) { + if (getStorageSchema(materializedViewProperties).isPresent()) { throw new TrinoException(NOT_SUPPORTED, "Materialized view property '%s' is not supported when hiding materialized view storage tables is enabled".formatted(STORAGE_SCHEMA)); } SchemaTableName storageTableName = new SchemaTableName(viewName.getSchemaName(), tableNameWithType(viewName.getTableName(), MATERIALIZED_VIEW_STORAGE)); - String tableLocation = getTableLocation(definition.getProperties()) + String tableLocation = getTableLocation(materializedViewProperties) .orElseGet(() -> defaultTableLocation(session, viewName)); - List columns = columnsForMaterializedView(definition); + List columns = columnsForMaterializedView(definition, materializedViewProperties); Schema schema = schemaFromMetadata(columns); - PartitionSpec partitionSpec = parsePartitionFields(schema, getPartitioning(definition.getProperties())); - SortOrder sortOrder = parseSortFields(schema, getSortOrder(definition.getProperties())); - Map properties = createTableProperties(new ConnectorTableMetadata(storageTableName, columns, definition.getProperties(), Optional.empty())); + PartitionSpec partitionSpec = parsePartitionFields(schema, getPartitioning(materializedViewProperties)); + SortOrder sortOrder = parseSortFields(schema, getSortOrder(materializedViewProperties)); + Map properties = createTableProperties(new ConnectorTableMetadata(storageTableName, columns, materializedViewProperties, Optional.empty())); TableMetadata metadata = newTableMetadata(schema, partitionSpec, sortOrder, tableLocation, properties); @@ -307,25 +331,31 @@ protected Location createMaterializedViewStorage(ConnectorSession session, Schem return metadataFileLocation; } - protected SchemaTableName createMaterializedViewStorageTable(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition) + protected SchemaTableName createMaterializedViewStorageTable( + ConnectorSession session, + SchemaTableName viewName, + ConnectorMaterializedViewDefinition definition, + Map materializedViewProperties) { // Generate a storage table name and create a storage table. The properties in the definition are table properties for the // storage table as indicated in the materialized view definition. String storageTableName = "st_" + randomUUID().toString().replace("-", ""); - String storageSchema = getStorageSchema(definition.getProperties()).orElse(viewName.getSchemaName()); + String storageSchema = getStorageSchema(materializedViewProperties).orElse(viewName.getSchemaName()); SchemaTableName storageTable = new SchemaTableName(storageSchema, storageTableName); - List columns = columnsForMaterializedView(definition); + List columns = columnsForMaterializedView(definition, materializedViewProperties); - ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(storageTable, columns, definition.getProperties(), Optional.empty()); - Transaction transaction = IcebergUtil.newCreateTableTransaction(this, tableMetadata, session, false); + ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(storageTable, columns, materializedViewProperties, Optional.empty()); + String tableLocation = getTableLocation(tableMetadata.getProperties()) + .orElseGet(() -> defaultTableLocation(session, tableMetadata.getTable())); + Transaction transaction = IcebergUtil.newCreateTableTransaction(this, tableMetadata, session, false, tableLocation); AppendFiles appendFiles = transaction.newAppend(); commit(appendFiles, session); transaction.commitTransaction(); return storageTable; } - private List columnsForMaterializedView(ConnectorMaterializedViewDefinition definition) + private List columnsForMaterializedView(ConnectorMaterializedViewDefinition definition, Map materializedViewProperties) { Schema schemaWithTimestampTzPreserved = schemaFromMetadata(mappedCopy( definition.getColumns(), @@ -340,7 +370,7 @@ private List columnsForMaterializedView(ConnectorMaterializedVie } return new ColumnMetadata(column.getName(), type); })); - PartitionSpec partitionSpec = parsePartitionFields(schemaWithTimestampTzPreserved, getPartitioning(definition.getProperties())); + PartitionSpec partitionSpec = parsePartitionFields(schemaWithTimestampTzPreserved, getPartitioning(materializedViewProperties)); Set temporalPartitioningSources = partitionSpec.fields().stream() .flatMap(partitionField -> { Types.NestedField sourceField = schemaWithTimestampTzPreserved.findField(partitionField.sourceId()); @@ -422,7 +452,6 @@ private Type typeForMaterializedViewStorageTable(Type type) } protected ConnectorMaterializedViewDefinition getMaterializedViewDefinition( - Table icebergTable, Optional owner, String viewOriginalText, SchemaTableName storageTableName) @@ -437,11 +466,7 @@ protected ConnectorMaterializedViewDefinition getMaterializedViewDefinition( definition.getGracePeriod(), definition.getComment(), owner, - definition.getPath(), - ImmutableMap.builder() - .putAll(getIcebergTableProperties(icebergTable)) - .put(STORAGE_SCHEMA, storageTableName.getSchemaName()) - .buildOrThrow()); + definition.getPath()); } protected List toSpiMaterializedViewColumns(List columns) diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/TrinoCatalog.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/TrinoCatalog.java index 8d44190ee3079..e2aa957148ae4 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/TrinoCatalog.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/TrinoCatalog.java @@ -163,6 +163,7 @@ void createMaterializedView( ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition, + Map materializedViewProperties, boolean replace, boolean ignoreExisting); @@ -172,6 +173,8 @@ void createMaterializedView( Optional getMaterializedView(ConnectorSession session, SchemaTableName viewName); + Map getMaterializedViewProperties(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition); + Optional getMaterializedViewStorageTable(ConnectorSession session, SchemaTableName viewName); void renameMaterializedView(ConnectorSession session, SchemaTableName source, SchemaTableName target); diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java index fd79f33587d37..b901158e8d0d8 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java @@ -866,7 +866,7 @@ else if (isTrinoMaterializedView(tableType, parameters)) { try { // Note: this is racy from cache invalidation perspective, but it should not matter here uncheckedCacheGet(materializedViewCache, schemaTableName, () -> { - ConnectorMaterializedViewDefinition materializedView = createMaterializedViewDefinition(session, schemaTableName, table); + ConnectorMaterializedViewDefinition materializedView = createMaterializedViewDefinition(schemaTableName, table); return new MaterializedViewData( materializedView, Optional.ofNullable(parameters.get(METADATA_LOCATION_PROP))); @@ -1128,6 +1128,7 @@ public void createMaterializedView( ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition, + Map materializedViewProperties, boolean replace, boolean ignoreExisting) { @@ -1146,7 +1147,7 @@ public void createMaterializedView( } if (hideMaterializedViewStorageTable) { - Location storageMetadataLocation = createMaterializedViewStorage(session, viewName, definition); + Location storageMetadataLocation = createMaterializedViewStorage(session, viewName, definition, materializedViewProperties); TableInput materializedViewTableInput = getMaterializedViewTableInput( viewName.getTableName(), encodeMaterializedViewData(fromConnectorMaterializedViewDefinition(definition)), @@ -1160,7 +1161,7 @@ public void createMaterializedView( } } else { - createMaterializedViewWithStorageTable(session, viewName, definition, existing); + createMaterializedViewWithStorageTable(session, viewName, definition, materializedViewProperties, existing); } } @@ -1168,10 +1169,11 @@ private void createMaterializedViewWithStorageTable( ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition, + Map materializedViewProperties, Optional existing) { // Create the storage table - SchemaTableName storageTable = createMaterializedViewStorageTable(session, viewName, definition); + SchemaTableName storageTable = createMaterializedViewStorageTable(session, viewName, definition, materializedViewProperties); // Create a view indicating the storage table TableInput materializedViewTableInput = getMaterializedViewTableInput( viewName.getTableName(), @@ -1218,8 +1220,7 @@ public void updateMaterializedViewColumnComment(ConnectorSession session, Schema definition.getGracePeriod(), definition.getComment(), definition.getOwner(), - definition.getPath(), - definition.getProperties()); + definition.getPath()); updateMaterializedView(viewName, newDefinition); } @@ -1293,11 +1294,10 @@ protected Optional doGetMaterializedView(Co return Optional.empty(); } - return Optional.of(createMaterializedViewDefinition(session, viewName, table)); + return Optional.of(createMaterializedViewDefinition(viewName, table)); } private ConnectorMaterializedViewDefinition createMaterializedViewDefinition( - ConnectorSession session, SchemaTableName viewName, com.amazonaws.services.glue.model.Table table) { @@ -1315,54 +1315,18 @@ private ConnectorMaterializedViewDefinition createMaterializedViewDefinition( .orElse(viewName.getSchemaName()); SchemaTableName storageTableName = new SchemaTableName(storageSchema, storageTable); - Table icebergTable; - try { - icebergTable = loadTable(session, storageTableName); - } - catch (RuntimeException e) { - // The materialized view could be removed concurrently. This may manifest in a number of ways, e.g. - // - io.trino.spi.connector.TableNotFoundException - // - org.apache.iceberg.exceptions.NotFoundException when accessing manifest file - // - other failures when reading storage table's metadata files - // Retry, as we're catching broadly. - throw new MaterializedViewMayBeBeingRemovedException(e); - } - String viewOriginalText = table.getViewOriginalText(); if (viewOriginalText == null) { throw new TrinoException(ICEBERG_BAD_DATA, "Materialized view did not have original text " + viewName); } return getMaterializedViewDefinition( - icebergTable, Optional.ofNullable(table.getOwner()), viewOriginalText, storageTableName); } SchemaTableName storageTableName = new SchemaTableName(viewName.getSchemaName(), tableNameWithType(viewName.getTableName(), MATERIALIZED_VIEW_STORAGE)); - Table icebergTable; - try { - TableMetadata metadata = getMaterializedViewTableMetadata(session, storageTableName, storageMetadataLocation); - IcebergTableOperations operations = tableOperationsProvider.createTableOperations( - this, - session, - storageTableName.getSchemaName(), - storageTableName.getTableName(), - Optional.empty(), - Optional.empty()); - operations.initializeFromMetadata(metadata); - icebergTable = new BaseTable(operations, quotedTableName(storageTableName), TRINO_METRICS_REPORTER); - } - catch (RuntimeException e) { - // The materialized view could be removed concurrently. This may manifest in a number of ways, e.g. - // - org.apache.iceberg.exceptions.NotFoundException when accessing manifest file - // - other failures when reading storage table's metadata files - // Retry, as we're catching broadly. - throw new MaterializedViewMayBeBeingRemovedException(e); - } - return getMaterializedViewDefinition( - icebergTable, Optional.ofNullable(table.getOwner()), table.getViewOriginalText(), storageTableName); diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/TrinoHiveCatalog.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/TrinoHiveCatalog.java index 702fd0ccd699b..c2de70ba78e10 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/TrinoHiveCatalog.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/TrinoHiveCatalog.java @@ -360,7 +360,7 @@ public List listTables(ConnectorSession session, Optional tablesListBuilder = ImmutableSet.builder(); for (String schemaName : listNamespaces(session, namespace)) { - metastore.getAllTables(schemaName).forEach(tableName -> tablesListBuilder.add(new SchemaTableName(schemaName, tableName))); + metastore.getTables(schemaName).forEach(tableName -> tablesListBuilder.add(new SchemaTableName(schemaName, tableName))); } return tablesListBuilder.build().asList(); } @@ -568,6 +568,7 @@ public void createMaterializedView( ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition, + Map materializedViewProperties, boolean replace, boolean ignoreExisting) { @@ -586,7 +587,7 @@ public void createMaterializedView( } if (hideMaterializedViewStorageTable) { - Location storageMetadataLocation = createMaterializedViewStorage(session, viewName, definition); + Location storageMetadataLocation = createMaterializedViewStorage(session, viewName, definition, materializedViewProperties); Map viewProperties = createMaterializedViewProperties(session, storageMetadataLocation); Column dummyColumn = new Column("dummy", HIVE_STRING, Optional.empty(), ImmutableMap.of()); @@ -614,7 +615,7 @@ public void createMaterializedView( } } else { - createMaterializedViewWithStorageTable(session, viewName, definition, existing); + createMaterializedViewWithStorageTable(session, viewName, definition, materializedViewProperties, existing); } } @@ -622,9 +623,10 @@ private void createMaterializedViewWithStorageTable( ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition, + Map materializedViewProperties, Optional existing) { - SchemaTableName storageTable = createMaterializedViewStorageTable(session, viewName, definition); + SchemaTableName storageTable = createMaterializedViewStorageTable(session, viewName, definition, materializedViewProperties); // Create a view indicating the storage table Map viewProperties = createMaterializedViewProperties(session, storageTable); @@ -686,8 +688,7 @@ public void updateMaterializedViewColumnComment(ConnectorSession session, Schema definition.getGracePeriod(), definition.getComment(), definition.getOwner(), - definition.getPath(), - definition.getProperties()); + definition.getPath()); replaceMaterializedView(session, viewName, existing, newDefinition); } @@ -766,23 +767,7 @@ protected Optional doGetMaterializedView(Co String storageSchema = Optional.ofNullable(materializedView.getParameters().get(STORAGE_SCHEMA)) .orElse(viewName.getSchemaName()); SchemaTableName storageTableName = new SchemaTableName(storageSchema, storageTable); - - Table icebergTable; - try { - icebergTable = loadTable(session, storageTableName); - } - catch (RuntimeException e) { - // The materialized view could be removed concurrently. This may manifest in a number of ways, e.g. - // - io.trino.spi.connector.TableNotFoundException - // - org.apache.iceberg.exceptions.NotFoundException when accessing manifest file - // - other failures when reading storage table's metadata files - // Retry, as we're catching broadly. - metastore.invalidateTable(viewName.getSchemaName(), viewName.getTableName()); - metastore.invalidateTable(storageSchema, storageTable); - throw new MaterializedViewMayBeBeingRemovedException(e); - } return Optional.of(getMaterializedViewDefinition( - icebergTable, materializedView.getOwner(), materializedView.getViewOriginalText() .orElseThrow(() -> new TrinoException(HIVE_INVALID_METADATA, "No view original text: " + viewName)), @@ -790,33 +775,11 @@ protected Optional doGetMaterializedView(Co } SchemaTableName storageTableName = new SchemaTableName(viewName.getSchemaName(), IcebergTableName.tableNameWithType(viewName.getTableName(), MATERIALIZED_VIEW_STORAGE)); - IcebergTableOperations operations = tableOperationsProvider.createTableOperations( - this, - session, - storageTableName.getSchemaName(), - storageTableName.getTableName(), - Optional.empty(), - Optional.empty()); - try { - TableMetadata metadata = getMaterializedViewTableMetadata(session, storageTableName, materializedView); - operations.initializeFromMetadata(metadata); - Table icebergTable = new BaseTable(operations, quotedTableName(storageTableName), TRINO_METRICS_REPORTER); - - return Optional.of(getMaterializedViewDefinition( - icebergTable, - materializedView.getOwner(), - materializedView.getViewOriginalText() - .orElseThrow(() -> new TrinoException(HIVE_INVALID_METADATA, "No view original text: " + viewName)), - storageTableName)); - } - catch (RuntimeException e) { - // The materialized view could be removed concurrently. This may manifest in a number of ways, e.g. - // - org.apache.iceberg.exceptions.NotFoundException when accessing manifest file - // - other failures when reading storage table's metadata files - // Retry, as we're catching broadly. - metastore.invalidateTable(viewName.getSchemaName(), viewName.getTableName()); - throw new MaterializedViewMayBeBeingRemovedException(e); - } + return Optional.of(getMaterializedViewDefinition( + materializedView.getOwner(), + materializedView.getViewOriginalText() + .orElseThrow(() -> new TrinoException(HIVE_INVALID_METADATA, "No view original text: " + viewName)), + storageTableName)); } @Override diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/jdbc/TrinoJdbcCatalog.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/jdbc/TrinoJdbcCatalog.java index 86a8d3191e9a6..1bde8682c8644 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/jdbc/TrinoJdbcCatalog.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/jdbc/TrinoJdbcCatalog.java @@ -445,7 +445,13 @@ public Optional getMaterializedViewStorageTable(ConnectorSession sess } @Override - public void createMaterializedView(ConnectorSession session, SchemaTableName schemaViewName, ConnectorMaterializedViewDefinition definition, boolean replace, boolean ignoreExisting) + public void createMaterializedView( + ConnectorSession session, + SchemaTableName schemaViewName, + ConnectorMaterializedViewDefinition definition, + Map materializedViewProperties, + boolean replace, + boolean ignoreExisting) { throw new TrinoException(NOT_SUPPORTED, "createMaterializedView is not supported for Iceberg JDBC catalogs"); } @@ -468,6 +474,12 @@ public Optional getMaterializedView(Connect return Optional.empty(); } + @Override + public Map getMaterializedViewProperties(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition) + { + throw new TrinoException(NOT_SUPPORTED, "getMaterializedViewProperties is not supported for Iceberg JDBC catalogs"); + } + @Override public void renameMaterializedView(ConnectorSession session, SchemaTableName source, SchemaTableName target) { diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/nessie/IcebergNessieTableOperations.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/nessie/IcebergNessieTableOperations.java index 602e526a213e0..3fe3c5b33dc44 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/nessie/IcebergNessieTableOperations.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/nessie/IcebergNessieTableOperations.java @@ -19,9 +19,11 @@ import io.trino.spi.connector.SchemaTableName; import io.trino.spi.connector.TableNotFoundException; import org.apache.iceberg.TableMetadata; +import org.apache.iceberg.TableMetadataParser; import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.nessie.NessieIcebergClient; +import org.apache.iceberg.nessie.NessieUtil; import org.projectnessie.error.NessieConflictException; import org.projectnessie.error.NessieNotFoundException; import org.projectnessie.model.ContentKey; @@ -80,6 +82,16 @@ public TableMetadata refresh(boolean invalidateCaches) return super.refresh(invalidateCaches); } + @Override + protected void refreshFromMetadataLocation(String newLocation) + { + super.refreshFromMetadataLocation( + newLocation, + location -> NessieUtil.updateTableMetadataWithNessieSpecificProperties( + TableMetadataParser.read(fileIo, location), + location, table, getSchemaTableName().toString(), nessieClient.getReference())); + } + @Override protected String getRefreshedLocation(boolean invalidateCaches) { diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/nessie/TrinoNessieCatalog.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/nessie/TrinoNessieCatalog.java index 361c2cfd6986a..1a8c3544920eb 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/nessie/TrinoNessieCatalog.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/nessie/TrinoNessieCatalog.java @@ -405,6 +405,7 @@ public void createMaterializedView( ConnectorSession session, SchemaTableName schemaViewName, ConnectorMaterializedViewDefinition definition, + Map materializedViewProperties, boolean replace, boolean ignoreExisting) { @@ -429,6 +430,12 @@ public Optional getMaterializedView(Connect return Optional.empty(); } + @Override + public Map getMaterializedViewProperties(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition) + { + throw new TrinoException(NOT_SUPPORTED, "The Iceberg Nessie catalog does not support materialized views"); + } + @Override public Optional getMaterializedViewStorageTable(ConnectorSession session, SchemaTableName viewName) { diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/rest/TrinoRestCatalog.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/rest/TrinoRestCatalog.java index 74d296d574edd..71cd72315e401 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/rest/TrinoRestCatalog.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/rest/TrinoRestCatalog.java @@ -183,16 +183,7 @@ public void renameNamespace(ConnectorSession session, String source, String targ public List listTables(ConnectorSession session, Optional namespace) { SessionContext sessionContext = convert(session); - List namespaces; - - if (namespace.isPresent() && namespaceExists(session, namespace.get())) { - namespaces = ImmutableList.of(Namespace.of(namespace.get())); - } - else { - namespaces = listNamespaces(session).stream() - .map(Namespace::of) - .collect(toImmutableList()); - } + List namespaces = listNamespaces(session, namespace); ImmutableList.Builder tables = ImmutableList.builder(); for (Namespace restNamespace : namespaces) { @@ -438,7 +429,13 @@ public List listMaterializedViews(ConnectorSession session, Opt } @Override - public void createMaterializedView(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition, boolean replace, boolean ignoreExisting) + public void createMaterializedView( + ConnectorSession session, + SchemaTableName viewName, + ConnectorMaterializedViewDefinition definition, + Map materializedViewProperties, + boolean replace, + boolean ignoreExisting) { throw new TrinoException(NOT_SUPPORTED, "createMaterializedView is not supported for Iceberg REST catalog"); } @@ -461,6 +458,12 @@ public Optional getMaterializedView(Connect return Optional.empty(); } + @Override + public Map getMaterializedViewProperties(ConnectorSession session, SchemaTableName viewName, ConnectorMaterializedViewDefinition definition) + { + throw new TrinoException(NOT_SUPPORTED, "The Iceberg REST catalog does not support materialized views"); + } + @Override public Optional getMaterializedViewStorageTable(ConnectorSession session, SchemaTableName viewName) { @@ -543,4 +546,15 @@ private static TableIdentifier toIdentifier(SchemaTableName schemaTableName) { return TableIdentifier.of(schemaTableName.getSchemaName(), schemaTableName.getTableName()); } + + private List listNamespaces(ConnectorSession session, Optional namespace) + { + if (namespace.isEmpty()) { + return listNamespaces(session).stream() + .map(Namespace::of) + .collect(toImmutableList()); + } + + return ImmutableList.of(Namespace.of(namespace.get())); + } } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/procedure/MigrateProcedure.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/procedure/MigrateProcedure.java index 91283376cbb68..7ff3a65bf742e 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/procedure/MigrateProcedure.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/procedure/MigrateProcedure.java @@ -232,7 +232,7 @@ public void doMigrate(ConnectorSession session, String schemaName, String tableN ImmutableList.Builder dataFilesBuilder = ImmutableList.builder(); if (hiveTable.getPartitionColumns().isEmpty()) { log.debug("Building data files from %s", location); - dataFilesBuilder.addAll(buildDataFiles(session, recursive, storageFormat, location, partitionSpec, new PartitionData(new Object[]{}), schema)); + dataFilesBuilder.addAll(buildDataFiles(session, recursive, storageFormat, location, partitionSpec, new PartitionData(new Object[0]), schema)); } else { Map> partitions = listAllPartitions(metastore, hiveTable); diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorSmokeTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorSmokeTest.java index df10f40055a19..d196f209612e9 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorSmokeTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorSmokeTest.java @@ -132,7 +132,7 @@ public void testDeleteRowsConcurrently() ExecutorService executor = newFixedThreadPool(threads); List rows = ImmutableList.of("(1, 0, 0, 0)", "(0, 1, 0, 0)", "(0, 0, 1, 0)", "(0, 0, 0, 1)"); - String[] expectedErrors = new String[]{"Failed to commit Iceberg update to table:", "Failed to replace table due to concurrent updates:"}; + String[] expectedErrors = new String[] {"Failed to commit Iceberg update to table:", "Failed to replace table due to concurrent updates:"}; try (TestTable table = new TestTable( getQueryRunner()::execute, "test_concurrent_delete", diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java index 958a82ea15c0b..a6f825ee77701 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java @@ -31,6 +31,7 @@ import io.trino.operator.OperatorStats; import io.trino.plugin.hive.TestingHivePlugin; import io.trino.plugin.iceberg.fileio.ForwardingFileIo; +import io.trino.server.DynamicFilterService; import io.trino.spi.QueryId; import io.trino.spi.connector.ColumnHandle; import io.trino.spi.connector.Constraint; @@ -78,7 +79,6 @@ import java.time.format.DateTimeFormatter; import java.util.HashSet; import java.util.List; -import java.util.Locale; import java.util.Map; import java.util.NoSuchElementException; import java.util.Optional; @@ -101,11 +101,11 @@ import static com.google.common.collect.MoreCollectors.onlyElement; import static com.google.common.util.concurrent.Uninterruptibles.sleepUninterruptibly; import static io.trino.SystemSessionProperties.DETERMINE_PARTITION_COUNT_FOR_WRITE_ENABLED; +import static io.trino.SystemSessionProperties.ENABLE_DYNAMIC_FILTERING; import static io.trino.SystemSessionProperties.SCALE_WRITERS; import static io.trino.SystemSessionProperties.TASK_MAX_WRITER_COUNT; import static io.trino.SystemSessionProperties.TASK_MIN_WRITER_COUNT; import static io.trino.SystemSessionProperties.USE_PREFERRED_WRITE_PARTITIONING; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; import static io.trino.plugin.iceberg.IcebergFileFormat.AVRO; import static io.trino.plugin.iceberg.IcebergFileFormat.ORC; import static io.trino.plugin.iceberg.IcebergFileFormat.PARQUET; @@ -133,14 +133,15 @@ import static io.trino.testing.TestingConnectorSession.SESSION; import static io.trino.testing.TestingNames.randomNameSuffix; import static io.trino.testing.TestingSession.testSessionBuilder; +import static io.trino.testing.TransactionBuilder.transaction; import static io.trino.testing.assertions.Assert.assertEventually; -import static io.trino.transaction.TransactionBuilder.transaction; import static java.lang.String.format; import static java.lang.String.join; import static java.nio.charset.StandardCharsets.UTF_8; import static java.time.ZoneOffset.UTC; import static java.time.format.DateTimeFormatter.ISO_OFFSET_DATE_TIME; import static java.util.Collections.nCopies; +import static java.util.Locale.ENGLISH; import static java.util.Objects.requireNonNull; import static java.util.UUID.randomUUID; import static java.util.concurrent.TimeUnit.MILLISECONDS; @@ -306,7 +307,7 @@ public void testShowCreateSchema() .matches("CREATE SCHEMA iceberg.tpch\n" + "AUTHORIZATION USER user\n" + "WITH \\(\n" + - "\\s+location = '.*/iceberg_data/tpch'\n" + + "\\s+location = '.*/tpch'\n" + "\\)"); } @@ -345,7 +346,7 @@ public void testShowCreateTable() "WITH (\n" + " format = '" + format.name() + "',\n" + " format_version = 2,\n" + - " location = '\\E.*/iceberg_data/tpch/orders-.*\\Q'\n" + + " location = '\\E.*/tpch/orders-.*\\Q'\n" + ")\\E"); } @@ -3220,7 +3221,7 @@ protected void testBucketTransformForType( String greaterValueInSameBucket, String valueInOtherBucket) { - String tableName = format("test_bucket_transform%s", type.toLowerCase(Locale.ENGLISH)); + String tableName = format("test_bucket_transform%s", type.toLowerCase(ENGLISH)); assertUpdate(format("CREATE TABLE %s (d %s) WITH (partitioning = ARRAY['bucket(d, 2)'])", tableName, type)); assertUpdate(format("INSERT INTO %s VALUES (NULL), (%s), (%s), (%s)", tableName, value, greaterValueInSameBucket, valueInOtherBucket), 4); @@ -4195,7 +4196,7 @@ public void testIncorrectIcebergFileSizes() // Using Iceberg provided file size fails the query assertQueryFails( "SELECT * FROM test_iceberg_file_size", - "(Malformed ORC file\\. Invalid file metadata.*)|(.*Error opening Iceberg split.* Incorrect file size \\(%s\\) for file .*)".formatted(alteredValue)); + "(Malformed ORC file\\. Invalid file metadata.*)|(.*Malformed Parquet file.*)"); dropTable("test_iceberg_file_size"); } @@ -4235,6 +4236,18 @@ public void testSplitPruningForFilterOnPartitionColumn() verifySplitCount("SELECT * FROM " + tableName + " WHERE regionkey % 5 = 3", 1); assertUpdate("DROP TABLE " + tableName); + + // Partition by multiple columns + assertUpdate(noRedistributeWrites, "CREATE TABLE " + tableName + " WITH (partitioning = ARRAY['regionkey', 'nationkey']) AS SELECT * FROM nation", 25); + // Create 2 files per partition + assertUpdate(noRedistributeWrites, "INSERT INTO " + tableName + " SELECT * FROM nation", 25); + // sanity check that table contains exactly 50 files + assertThat(computeScalar("SELECT count(*) FROM \"" + tableName + "$files\"")).isEqualTo(50L); + + verifySplitCount("SELECT * FROM " + tableName + " WHERE regionkey % 5 = 3", 10); + verifySplitCount("SELECT * FROM " + tableName + " WHERE (regionkey * 2) - nationkey = 0", 6); + + assertUpdate("DROP TABLE " + tableName); } @Test @@ -4775,7 +4788,7 @@ private void verifySplitCount(QueryId queryId, long expectedSplitCount) } } - private OperatorStats getOperatorStats(QueryId queryId) + protected OperatorStats getOperatorStats(QueryId queryId) { try { return getDistributedQueryRunner().getCoordinator() @@ -5026,7 +5039,7 @@ public void testOptimizeTimePartitionedTable() private void testOptimizeTimePartitionedTable(String dataType, String partitioningFormat, int expectedFilesAfterOptimize) { String tableName = "test_optimize_time_partitioned_" + - (dataType + "_" + partitioningFormat).toLowerCase(Locale.ENGLISH).replaceAll("[^a-z0-9_]", ""); + (dataType + "_" + partitioningFormat).toLowerCase(ENGLISH).replaceAll("[^a-z0-9_]", ""); assertUpdate(format("CREATE TABLE %s(p %s, val varchar) WITH (partitioning = ARRAY['%s'])", tableName, dataType, format(partitioningFormat, "p"))); // Do several inserts so ensure more than one input file @@ -5188,7 +5201,7 @@ public void testOptimizeSnapshot() long snapshotId = getCurrentSnapshotId(tableName); assertUpdate("INSERT INTO " + tableName + " VALUES 22", 1); assertThatThrownBy(() -> query("ALTER TABLE \"%s@%d\" EXECUTE OPTIMIZE".formatted(tableName, snapshotId))) - .hasMessage(format("Invalid Iceberg table name: %s@%d", tableName, snapshotId)); + .hasMessage(format("line 1:7: Table 'iceberg.tpch.\"%s@%s\"' does not exist", tableName, snapshotId)); assertThat(query("SELECT * FROM " + tableName)) .matches("VALUES 11, 22"); @@ -5600,7 +5613,7 @@ public void testExpireSnapshotsOnSnapshot() long snapshotId = getCurrentSnapshotId(tableName); assertUpdate("INSERT INTO " + tableName + " VALUES 22", 1); assertThatThrownBy(() -> query("ALTER TABLE \"%s@%d\" EXECUTE EXPIRE_SNAPSHOTS".formatted(tableName, snapshotId))) - .hasMessage(format("Invalid Iceberg table name: %s@%d", tableName, snapshotId)); + .hasMessage(format("line 1:7: Table 'iceberg.tpch.\"%s@%s\"' does not exist", tableName, snapshotId)); assertThat(query("SELECT * FROM " + tableName)) .matches("VALUES 11, 22"); @@ -5792,7 +5805,7 @@ public void testRemoveOrphanFilesOnSnapshot() long snapshotId = getCurrentSnapshotId(tableName); assertUpdate("INSERT INTO " + tableName + " VALUES 22", 1); assertThatThrownBy(() -> query("ALTER TABLE \"%s@%d\" EXECUTE REMOVE_ORPHAN_FILES".formatted(tableName, snapshotId))) - .hasMessage(format("Invalid Iceberg table name: %s@%d", tableName, snapshotId)); + .hasMessage(format("line 1:7: Table 'iceberg.tpch.\"%s@%s\"' does not exist", tableName, snapshotId)); assertThat(query("SELECT * FROM " + tableName)) .matches("VALUES 11, 22"); @@ -5935,19 +5948,19 @@ public void testModifyingOldSnapshotIsNotPossible() assertUpdate(format("INSERT INTO %s VALUES 4,5,6", tableName), 3); assertQuery(format("SELECT * FROM %s FOR VERSION AS OF %d", tableName, oldSnapshotId), "VALUES 1,2,3"); assertThatThrownBy(() -> query(format("INSERT INTO \"%s@%d\" VALUES 7,8,9", tableName, oldSnapshotId))) - .hasMessage(format("Invalid Iceberg table name: %s@%d", tableName, oldSnapshotId)); + .hasMessage(format("Table 'iceberg.tpch.\"%s@%s\"' does not exist", tableName, oldSnapshotId)); assertThatThrownBy(() -> query(format("DELETE FROM \"%s@%d\" WHERE col = 5", tableName, oldSnapshotId))) - .hasMessage(format("Invalid Iceberg table name: %s@%d", tableName, oldSnapshotId)); + .hasMessage(format("line 1:1: Table 'iceberg.tpch.\"%s@%s\"' does not exist", tableName, oldSnapshotId)); assertThatThrownBy(() -> query(format("UPDATE \"%s@%d\" SET col = 50 WHERE col = 5", tableName, oldSnapshotId))) - .hasMessage(format("Invalid Iceberg table name: %s@%d", tableName, oldSnapshotId)); + .hasMessage(format("line 1:1: Table 'iceberg.tpch.\"%s@%s\"' does not exist", tableName, oldSnapshotId)); assertThatThrownBy(() -> query(format("INSERT INTO \"%s@%d\" VALUES 7,8,9", tableName, getCurrentSnapshotId(tableName)))) - .hasMessage(format("Invalid Iceberg table name: %s@%d", tableName, getCurrentSnapshotId(tableName))); + .hasMessage(format("Table 'iceberg.tpch.\"%s@%s\"' does not exist", tableName, getCurrentSnapshotId(tableName))); assertThatThrownBy(() -> query(format("DELETE FROM \"%s@%d\" WHERE col = 9", tableName, getCurrentSnapshotId(tableName)))) - .hasMessage(format("Invalid Iceberg table name: %s@%d", tableName, getCurrentSnapshotId(tableName))); + .hasMessage(format("line 1:1: Table 'iceberg.tpch.\"%s@%s\"' does not exist", tableName, getCurrentSnapshotId(tableName))); assertThatThrownBy(() -> assertUpdate(format("UPDATE \"%s@%d\" set col = 50 WHERE col = 5", tableName, getCurrentSnapshotId(tableName)))) - .hasMessage(format("Invalid Iceberg table name: %s@%d", tableName, getCurrentSnapshotId(tableName))); + .hasMessage(format("line 1:1: Table 'iceberg.tpch.\"%s@%s\"' does not exist", tableName, getCurrentSnapshotId(tableName))); assertThatThrownBy(() -> query(format("ALTER TABLE \"%s@%d\" EXECUTE OPTIMIZE", tableName, oldSnapshotId))) - .hasMessage(format("Invalid Iceberg table name: %s@%d", tableName, oldSnapshotId)); + .hasMessage(format("line 1:7: Table 'iceberg.tpch.\"%s@%s\"' does not exist", tableName, oldSnapshotId)); assertQuery(format("SELECT * FROM %s", tableName), "VALUES 1,2,3,4,5,6"); assertUpdate("DROP TABLE " + tableName); @@ -6355,6 +6368,55 @@ public void testCreateOrReplaceTableWithComments() } } + @Test + public void testCreateOrReplaceTableWithSameLocation() + { + try (TestTable table = new TestTable( + getQueryRunner()::execute, + "test_create_or_replace_with_same_location_", + "(a integer)")) { + String initialTableLocation = getTableLocation(table.getName()); + assertUpdate("INSERT INTO " + table.getName() + " VALUES 1", 1); + assertThat(query("SELECT * FROM " + table.getName())) + .matches("VALUES 1"); + long v1SnapshotId = getCurrentSnapshotId(table.getName()); + assertUpdate("CREATE OR REPLACE TABLE " + table.getName() + " (a integer)"); + assertThat(getTableLocation(table.getName())) + .isEqualTo(initialTableLocation); + assertUpdate("CREATE OR REPLACE TABLE " + table.getName() + " (a integer) WITH (location = '" + initialTableLocation + "')"); + String initialTableLocationWithTrailingSlash = initialTableLocation.endsWith("/") ? initialTableLocation : initialTableLocation + "/"; + assertUpdate("CREATE OR REPLACE TABLE " + table.getName() + " (a integer) WITH (location = '" + initialTableLocationWithTrailingSlash + "')"); + assertThat(getTableLocation(table.getName())) + .isEqualTo(initialTableLocation); + assertThat(query("SELECT * FROM " + table.getName())) + .returnsEmptyResult(); + assertUpdate("CREATE OR REPLACE TABLE " + table.getName() + " WITH (location = '" + initialTableLocation + "') AS SELECT 2 as a", 1); + assertThat(query("SELECT * FROM " + table.getName())) + .matches("VALUES 2"); + assertThat(getTableLocation(table.getName())) + .isEqualTo(initialTableLocation); + assertThat(query("SELECT * FROM " + table.getName() + " FOR VERSION AS OF " + v1SnapshotId)) + .matches("VALUES 1"); + } + } + + @Test + public void testCreateOrReplaceTableWithChangeInLocation() + { + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_create_or_replace_change_location_", "(a integer) ")) { + String initialTableLocation = getTableLocation(table.getName()) + randomNameSuffix(); + long v1SnapshotId = getCurrentSnapshotId(table.getName()); + assertQueryFails( + "CREATE OR REPLACE TABLE " + table.getName() + " (a integer) WITH (location = '%s')".formatted(initialTableLocation), + "The provided location '%s' does not match the existing table location '.*'".formatted(initialTableLocation)); + assertQueryFails( + "CREATE OR REPLACE TABLE " + table.getName() + " WITH (location = '%s') AS SELECT 1 AS a".formatted(initialTableLocation), + "The provided location '%s' does not match the existing table location '.*'".formatted(initialTableLocation)); + assertThat(getCurrentSnapshotId(table.getName())) + .isEqualTo(v1SnapshotId); + } + } + @Test public void testMergeSimpleSelectPartitioned() { @@ -6961,7 +7023,7 @@ public void testDropCorruptedTableWithHiveRedirection() "iceberg.catalog.type", "TESTING_FILE_METASTORE", "hive.metastore.catalog.dir", dataDirectory.getPath())); - queryRunner.installPlugin(new TestingHivePlugin(createTestingFileHiveMetastore(dataDirectory))); + queryRunner.installPlugin(new TestingHivePlugin(dataDirectory.toPath())); queryRunner.createCatalog( hiveRedirectionCatalog, "hive", @@ -7305,6 +7367,65 @@ public void testUuidDynamicFilter() } } + @Test + public void testDynamicFilterWithExplicitPartitionFilter() + { + String catalog = getSession().getCatalog().orElseThrow(); + try (TestTable salesTable = new TestTable(getQueryRunner()::execute, "sales_table", "(date date, receipt_id varchar, amount decimal(10,2)) with (partitioning=array['date'])"); + TestTable dimensionTable = new TestTable(getQueryRunner()::execute, "dimension_table", "(date date, following_holiday boolean, year int)")) { + assertUpdate(""" + INSERT INTO %s + VALUES + (DATE '2023-01-01' , false, 2023), + (DATE '2023-01-02' , true, 2023), + (DATE '2023-01-03' , false, 2023)""".formatted(dimensionTable.getName()), 3); + assertUpdate(""" + INSERT INTO %s + VALUES + (DATE '2023-01-02' , '#2023#1', DECIMAL '122.12'), + (DATE '2023-01-02' , '#2023#2', DECIMAL '124.12'), + (DATE '2023-01-02' , '#2023#3', DECIMAL '99.99'), + (DATE '2023-01-02' , '#2023#4', DECIMAL '95.12'), + (DATE '2023-01-03' , '#2023#5', DECIMAL '199.12'), + (DATE '2023-01-04' , '#2023#6', DECIMAL '99.55'), + (DATE '2023-01-05' , '#2023#7', DECIMAL '50.11'), + (DATE '2023-01-05' , '#2023#8', DECIMAL '60.20'), + (DATE '2023-01-05' , '#2023#9', DECIMAL '70.75'), + (DATE '2023-01-05' , '#2023#10', DECIMAL '80.12')""".formatted(salesTable.getName()), 10); + + String selectQuery = """ + SELECT receipt_id + FROM %s s + JOIN %s d + ON s.date = d.date + WHERE + d.following_holiday = true AND + d.date BETWEEN DATE '2023-01-01' AND DATE '2024-01-01'""".formatted(salesTable.getName(), dimensionTable.getName()); + MaterializedResultWithQueryId result = getDistributedQueryRunner().executeWithQueryId( + Session.builder(getSession()) + .setCatalogSessionProperty(catalog, DYNAMIC_FILTERING_WAIT_TIMEOUT, "10s") + .build(), + selectQuery); + MaterializedResult expected = computeActual( + Session.builder(getSession()) + .setSystemProperty(ENABLE_DYNAMIC_FILTERING, "false") + .build(), + selectQuery); + assertEqualsIgnoreOrder(result.getResult(), expected); + + DynamicFilterService.DynamicFiltersStats dynamicFiltersStats = getDistributedQueryRunner().getCoordinator() + .getQueryManager() + .getFullQueryInfo(result.getQueryId()) + .getQueryStats() + .getDynamicFiltersStats(); + // The dynamic filter reduces the range specified for the partition column `date` from `date :: [[2023-01-01, 2024-01-01]]` to `date :: {[2023-01-02]}` + assertThat(dynamicFiltersStats.getTotalDynamicFilters()).isEqualTo(1L); + assertThat(dynamicFiltersStats.getLazyDynamicFilters()).isEqualTo(1L); + assertThat(dynamicFiltersStats.getReplicatedDynamicFilters()).isEqualTo(0L); + assertThat(dynamicFiltersStats.getDynamicFiltersCompleted()).isEqualTo(1L); + } + } + @Override protected void verifyTableNameLengthFailurePermissible(Throwable e) { @@ -7482,6 +7603,26 @@ private void testTimePrecisionOnCreateTableAsSelectWithNoData(String inputType, } } + @Test + public void testSystemTables() + { + String catalog = getSession().getCatalog().orElseThrow(); + String schema = getSession().getSchema().orElseThrow(); + for (TableType tableType : TableType.values()) { + if (tableType != TableType.DATA) { + // Like a system table. Make sure this is "table not found". + assertQueryFails( + "TABLE \"$%s\"".formatted(tableType.name().toLowerCase(ENGLISH)), + "\\Qline 1:1: Table '%s.%s.\"$%s\"' does not exist".formatted(catalog, schema, tableType.name().toLowerCase(ENGLISH))); + } + } + + // given the base table exists + assertQuerySucceeds("TABLE nation"); + // verify that $ results in table not found + assertQueryFails("TABLE \"nation$foo\"", "\\Qline 1:1: Table '%s.%s.\"nation$foo\"' does not exist".formatted(catalog, schema)); + } + @Override protected Optional filterSetColumnTypesDataProvider(SetColumnTypeSetup setup) { diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergMaterializedViewTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergMaterializedViewTest.java index 9f3820d214109..6250594e79092 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergMaterializedViewTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergMaterializedViewTest.java @@ -16,11 +16,14 @@ import com.google.common.collect.ImmutableSet; import io.trino.Session; import io.trino.filesystem.Location; -import io.trino.filesystem.local.LocalFileSystem; +import io.trino.filesystem.TrinoFileSystem; +import io.trino.filesystem.TrinoFileSystemFactory; import io.trino.plugin.iceberg.fileio.ForwardingFileIo; import io.trino.spi.QueryId; +import io.trino.spi.security.ConnectorIdentity; import io.trino.sql.tree.ExplainType; import io.trino.testing.AbstractTestQueryFramework; +import io.trino.testing.DistributedQueryRunner; import io.trino.testing.MaterializedRow; import org.apache.iceberg.PartitionField; import org.apache.iceberg.TableMetadata; @@ -31,7 +34,6 @@ import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.parallel.Execution; -import java.nio.file.Path; import java.util.Optional; import java.util.Set; @@ -745,8 +747,12 @@ protected String getColumnComment(String tableName, String columnName) private TableMetadata getStorageTableMetadata(String materializedViewName) { + DistributedQueryRunner queryRunner = (DistributedQueryRunner) getQueryRunner(); + TrinoFileSystem fileSystemFactory = ((IcebergConnector) queryRunner.getCoordinator().getConnector("iceberg")).getInjector() + .getInstance(TrinoFileSystemFactory.class) + .create(ConnectorIdentity.ofUser("test")); Location metadataLocation = Location.of(getStorageMetadataLocation(materializedViewName)); - return TableMetadataParser.read(new ForwardingFileIo(new LocalFileSystem(Path.of(metadataLocation.parentDirectory().toString()))), "local:///" + metadataLocation); + return TableMetadataParser.read(new ForwardingFileIo(fileSystemFactory), metadataLocation.toString()); } private long getLatestSnapshotId(String tableName) diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/IcebergQueryRunner.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/IcebergQueryRunner.java index b38adf6aea7f8..14ef2d855e0e4 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/IcebergQueryRunner.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/IcebergQueryRunner.java @@ -36,7 +36,6 @@ import java.nio.file.attribute.FileAttribute; import java.nio.file.attribute.PosixFilePermission; import java.nio.file.attribute.PosixFilePermissions; -import java.util.HashMap; import java.util.Map; import java.util.Optional; import java.util.Set; @@ -138,16 +137,9 @@ public DistributedQueryRunner build() queryRunner.installPlugin(new TpchPlugin()); queryRunner.createCatalog("tpch", "tpch"); - queryRunner.installPlugin(new IcebergPlugin()); - Map icebergProperties = new HashMap<>(this.icebergProperties.buildOrThrow()); - String catalogType = icebergProperties.get("iceberg.catalog.type"); Path dataDir = metastoreDirectory.map(File::toPath).orElseGet(() -> queryRunner.getCoordinator().getBaseDataDir().resolve("iceberg_data")); - if (catalogType == null) { - icebergProperties.put("iceberg.catalog.type", "TESTING_FILE_METASTORE"); - icebergProperties.put("hive.metastore.catalog.dir", dataDir.toString()); - } - - queryRunner.createCatalog(ICEBERG_CATALOG, "iceberg", icebergProperties); + queryRunner.installPlugin(new TestingIcebergPlugin(dataDir)); + queryRunner.createCatalog(ICEBERG_CATALOG, "iceberg", icebergProperties.buildOrThrow()); schemaInitializer.orElseGet(() -> SchemaInitializer.builder().build()).accept(queryRunner); return queryRunner; diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/IcebergTestUtils.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/IcebergTestUtils.java index 39199092f2c35..a4716d6b5701c 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/IcebergTestUtils.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/IcebergTestUtils.java @@ -19,7 +19,6 @@ import io.trino.filesystem.TrinoFileSystem; import io.trino.filesystem.TrinoFileSystemFactory; import io.trino.filesystem.TrinoInputFile; -import io.trino.filesystem.local.LocalInputFile; import io.trino.orc.OrcDataSource; import io.trino.orc.OrcReader; import io.trino.orc.OrcReaderOptions; @@ -35,7 +34,6 @@ import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; import org.apache.parquet.hadoop.metadata.ParquetMetadata; -import java.io.File; import java.io.IOException; import java.io.UncheckedIOException; import java.util.List; @@ -113,11 +111,6 @@ private static boolean checkOrcFileSorting(Supplier dataSourceSup } } - public static boolean checkParquetFileSorting(String path, String sortColumnName) - { - return checkParquetFileSorting(new LocalInputFile(new File(path)), sortColumnName); - } - @SuppressWarnings({"unchecked", "rawtypes"}) public static boolean checkParquetFileSorting(TrinoInputFile inputFile, String sortColumnName) { diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestCloseIdleWriters.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestCloseIdleWriters.java new file mode 100644 index 0000000000000..869555a555094 --- /dev/null +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestCloseIdleWriters.java @@ -0,0 +1,91 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg; + +import com.google.common.collect.ImmutableMap; +import io.trino.Session; +import io.trino.testing.AbstractTestQueryFramework; +import io.trino.testing.QueryRunner; +import org.intellij.lang.annotations.Language; +import org.junit.jupiter.api.Test; + +import static io.trino.SystemSessionProperties.IDLE_WRITER_MIN_DATA_SIZE_THRESHOLD; +import static io.trino.SystemSessionProperties.SCALE_WRITERS; +import static io.trino.SystemSessionProperties.TASK_MAX_WRITER_COUNT; +import static io.trino.SystemSessionProperties.TASK_MIN_WRITER_COUNT; +import static io.trino.SystemSessionProperties.TASK_SCALE_WRITERS_ENABLED; +import static io.trino.testing.TestingNames.randomNameSuffix; +import static org.assertj.core.api.Assertions.assertThat; + +public class TestCloseIdleWriters + extends AbstractTestQueryFramework +{ + @Override + protected QueryRunner createQueryRunner() + throws Exception + { + return IcebergQueryRunner.builder() + .setNodeCount(1) + // Set the target max file size to 100GB so that we don't close writers due to file size in append + // page. + .setIcebergProperties(ImmutableMap.of( + "iceberg.target-max-file-size", "100GB", + "iceberg.idle-writer-min-file-size", "0.1MB")) + .build(); + } + + @Test + public void testCloseIdleWriters() + { + String tableName = "task_close_idle_writers_" + randomNameSuffix(); + try { + // Create a table with two partitions (0 and 1). Using the order by trick we will write the partitions in + // this order 0, 1, and then again 0. This way we are sure that during partition 1 write there will + // be an idle writer for partition 0. Additionally, during second partition 0 write, there will be an idle + // writer for partition 1. + @Language("SQL") String createTableSql = """ + CREATE TABLE %s WITH (format = 'ORC', partitioning = ARRAY['shipmodeVal']) + AS SELECT orderkey, partkey, suppkey, linenumber, quantity, extendedprice, + discount, tax, returnflag, linestatus, commitdate, receiptdate, shipinstruct, + comment, shipdate, + CASE + WHEN shipmode IN ('AIR', 'FOB', 'SHIP', 'TRUCK') THEN 0 + WHEN shipmode IN ('MAIL', 'RAIL', 'REG AIR') THEN 1 + ELSE 2 + END AS shipmodeVal + FROM tpch.tiny.lineitem + ORDER BY shipmode + LIMIT 60174 + """.formatted(tableName); + + // Disable all kind of scaling and set idle writer threshold to 5MB + assertUpdate( + Session.builder(getSession()) + .setSystemProperty(SCALE_WRITERS, "false") + .setSystemProperty(TASK_SCALE_WRITERS_ENABLED, "false") + .setSystemProperty(TASK_MAX_WRITER_COUNT, "1") + .setSystemProperty(TASK_MIN_WRITER_COUNT, "1") + .setSystemProperty(IDLE_WRITER_MIN_DATA_SIZE_THRESHOLD, "0.1MB") + .build(), + createTableSql, + 60174); + long files = (long) computeScalar("SELECT count(DISTINCT \"$path\") FROM " + tableName); + // There should more than 2 files since we triggered close idle writers. + assertThat(files).isGreaterThan(2); + } + finally { + assertUpdate("DROP TABLE IF EXISTS " + tableName); + } + } +} diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergConfig.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergConfig.java index 89a5d16e14f4b..4ca2417beba46 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergConfig.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergConfig.java @@ -60,6 +60,7 @@ public void testDefaults() .setRemoveOrphanFilesMinRetention(new Duration(7, DAYS)) .setDeleteSchemaLocationsFallback(false) .setTargetMaxFileSize(DataSize.of(1, GIGABYTE)) + .setIdleWriterMinFileSize(DataSize.of(16, MEGABYTE)) .setMinimumAssignedSplitWeight(0.05) .setHideMaterializedViewStorageTable(true) .setMaterializedViewsStorageSchema(null) @@ -89,6 +90,7 @@ public void testExplicitPropertyMappings() .put("iceberg.remove_orphan_files.min-retention", "14h") .put("iceberg.delete-schema-locations-fallback", "true") .put("iceberg.target-max-file-size", "1MB") + .put("iceberg.idle-writer-min-file-size", "1MB") .put("iceberg.minimum-assigned-split-weight", "0.01") .put("iceberg.materialized-views.hide-storage-table", "false") .put("iceberg.materialized-views.storage-schema", "mv_storage_schema") @@ -115,6 +117,7 @@ public void testExplicitPropertyMappings() .setRemoveOrphanFilesMinRetention(new Duration(14, HOURS)) .setDeleteSchemaLocationsFallback(true) .setTargetMaxFileSize(DataSize.of(1, MEGABYTE)) + .setIdleWriterMinFileSize(DataSize.of(1, MEGABYTE)) .setMinimumAssignedSplitWeight(0.01) .setHideMaterializedViewStorageTable(false) .setMaterializedViewsStorageSchema("mv_storage_schema") diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergConnectorSmokeTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergConnectorSmokeTest.java index a397934fee860..ca81ed98fe685 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergConnectorSmokeTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergConnectorSmokeTest.java @@ -16,21 +16,17 @@ import com.google.common.collect.ImmutableMap; import io.trino.filesystem.Location; import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; +import io.trino.testing.DistributedQueryRunner; import io.trino.testing.QueryRunner; -import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.TestInstance; -import java.io.File; import java.io.IOException; import java.io.UncheckedIOException; -import java.nio.file.Files; -import java.nio.file.Path; +import java.util.Optional; -import static com.google.common.io.MoreFiles.deleteRecursively; -import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; +import static io.trino.plugin.iceberg.IcebergQueryRunner.ICEBERG_CATALOG; import static io.trino.plugin.iceberg.IcebergTestUtils.checkOrcFileSorting; -import static java.lang.String.format; import static org.apache.iceberg.FileFormat.ORC; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; @@ -42,7 +38,6 @@ public class TestIcebergConnectorSmokeTest extends BaseIcebergConnectorSmokeTest { private HiveMetastore metastore; - private File metastoreDir; public TestIcebergConnectorSmokeTest() { @@ -53,24 +48,17 @@ public TestIcebergConnectorSmokeTest() protected QueryRunner createQueryRunner() throws Exception { - this.metastoreDir = Files.createTempDirectory("test_iceberg_table_smoke_test").toFile(); - this.metastoreDir.deleteOnExit(); - this.metastore = createTestingFileHiveMetastore(metastoreDir); - return IcebergQueryRunner.builder() + DistributedQueryRunner queryRunner = IcebergQueryRunner.builder() .setInitialTables(REQUIRED_TPCH_TABLES) - .setMetastoreDirectory(metastoreDir) .setIcebergProperties(ImmutableMap.of( "iceberg.file-format", format.name(), "iceberg.register-table-procedure.enabled", "true", "iceberg.writer-sort-buffer-size", "1MB")) .build(); - } - - @AfterAll - public void tearDown() - throws IOException - { - deleteRecursively(metastoreDir.toPath(), ALLOW_INSECURE); + metastore = ((IcebergConnector) queryRunner.getCoordinator().getConnector(ICEBERG_CATALOG)).getInjector() + .getInstance(HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); + return queryRunner; } @Override @@ -91,20 +79,25 @@ protected String getMetadataLocation(String tableName) @Override protected String schemaPath() { - return format("%s/%s", metastoreDir, getSession().getSchema().orElseThrow()); + return "local:///%s".formatted(getSession().getSchema().orElseThrow()); } @Override protected boolean locationExists(String location) { - return Files.exists(Path.of(location)); + try { + return fileSystem.newInputFile(Location.of(location)).exists(); + } + catch (IOException e) { + throw new UncheckedIOException(e); + } } @Override protected void deleteDirectory(String location) { try { - deleteRecursively(Path.of(location), ALLOW_INSECURE); + fileSystem.deleteDirectory(Location.of(location)); } catch (IOException e) { throw new UncheckedIOException(e); diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergFileOperations.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergFileOperations.java index 8b2b79cc6de2d..236ca6d515a0e 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergFileOperations.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergFileOperations.java @@ -21,9 +21,7 @@ import io.trino.SystemSessionProperties; import io.trino.filesystem.TrackingFileSystemFactory; import io.trino.filesystem.TrackingFileSystemFactory.OperationType; -import io.trino.filesystem.hdfs.HdfsFileSystemFactory; -import io.trino.plugin.hive.metastore.HiveMetastore; -import io.trino.plugin.iceberg.catalog.file.TestingIcebergFileMetastoreCatalogModule; +import io.trino.filesystem.local.LocalFileSystemFactory; import io.trino.plugin.tpch.TpchPlugin; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.DistributedQueryRunner; @@ -35,7 +33,7 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; -import java.io.File; +import java.nio.file.Path; import java.util.Optional; import java.util.function.Predicate; @@ -47,14 +45,12 @@ import static io.trino.filesystem.TrackingFileSystemFactory.OperationType.INPUT_FILE_NEW_STREAM; import static io.trino.filesystem.TrackingFileSystemFactory.OperationType.OUTPUT_FILE_CREATE; import static io.trino.filesystem.TrackingFileSystemFactory.OperationType.OUTPUT_FILE_CREATE_OR_OVERWRITE; -import static io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT; -import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_STATS; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; import static io.trino.plugin.iceberg.IcebergQueryRunner.ICEBERG_CATALOG; import static io.trino.plugin.iceberg.IcebergSessionProperties.COLLECT_EXTENDED_STATISTICS_ON_WRITE; import static io.trino.plugin.iceberg.TestIcebergFileOperations.FileType.DATA; import static io.trino.plugin.iceberg.TestIcebergFileOperations.FileType.MANIFEST; import static io.trino.plugin.iceberg.TestIcebergFileOperations.FileType.METADATA_JSON; +import static io.trino.plugin.iceberg.TestIcebergFileOperations.FileType.METASTORE; import static io.trino.plugin.iceberg.TestIcebergFileOperations.FileType.SNAPSHOT; import static io.trino.plugin.iceberg.TestIcebergFileOperations.FileType.STATS; import static io.trino.plugin.iceberg.TestIcebergFileOperations.FileType.fromFilePath; @@ -97,12 +93,12 @@ protected DistributedQueryRunner createQueryRunner() .addCoordinatorProperty("optimizer.experimental-max-prefetched-information-schema-prefixes", Integer.toString(MAX_PREFIXES_COUNT)) .build(); - File baseDir = queryRunner.getCoordinator().getBaseDataDir().resolve("iceberg_data").toFile(); - HiveMetastore metastore = createTestingFileHiveMetastore(baseDir); - - trackingFileSystemFactory = new TrackingFileSystemFactory(new HdfsFileSystemFactory(HDFS_ENVIRONMENT, HDFS_FILE_SYSTEM_STATS)); + Path dataDirectory = queryRunner.getCoordinator().getBaseDataDir().resolve("iceberg_data"); + dataDirectory.toFile().mkdirs(); + trackingFileSystemFactory = new TrackingFileSystemFactory(new LocalFileSystemFactory(dataDirectory)); queryRunner.installPlugin(new TestingIcebergPlugin( - Optional.of(new TestingIcebergFileMetastoreCatalogModule(metastore)), + dataDirectory, + Optional.empty(), Optional.of(trackingFileSystemFactory), binder -> { newOptionalBinder(binder, Key.get(boolean.class, AsyncIcebergSplitProducer.class)) @@ -766,10 +762,10 @@ public void testSystemMetadataMaterializedViews() .build()); // Bulk retrieval without selecting freshness - assertFileSystemAccesses(session, "SELECT schema_name, name FROM system.metadata.materialized_views WHERE schema_name = CURRENT_SCHEMA", - ImmutableMultiset.builder() - .addCopies(new FileOperation(METADATA_JSON, INPUT_FILE_NEW_STREAM), 2) - .build()); + assertFileSystemAccesses( + session, + "SELECT schema_name, name FROM system.metadata.materialized_views WHERE schema_name = CURRENT_SCHEMA", + ImmutableMultiset.of()); // Bulk retrieval for two schemas assertFileSystemAccesses(session, "SELECT * FROM system.metadata.materialized_views WHERE schema_name IN (CURRENT_SCHEMA, 'non_existent')", @@ -784,10 +780,15 @@ public void testSystemMetadataMaterializedViews() .build()); // Pointed lookup without selecting freshness - assertFileSystemAccesses(session, "SELECT schema_name, name FROM system.metadata.materialized_views WHERE schema_name = CURRENT_SCHEMA AND name = 'mv1'", - ImmutableMultiset.builder() - .add(new FileOperation(METADATA_JSON, INPUT_FILE_NEW_STREAM)) - .build()); + assertFileSystemAccesses( + session, + "SELECT schema_name, name FROM system.metadata.materialized_views WHERE schema_name = CURRENT_SCHEMA AND name = 'mv1'", + ImmutableMultiset.of()); + + assertFileSystemAccesses( + session, + "SELECT * FROM iceberg.information_schema.columns WHERE table_schema = CURRENT_SCHEMA AND table_name = 'mv1'", + ImmutableMultiset.of()); assertUpdate("DROP SCHEMA " + schemaName + " CASCADE"); } @@ -868,14 +869,14 @@ enum Scope @Override public boolean test(FileOperation fileOperation) { - return fileOperation.fileType() != DATA; + return fileOperation.fileType() != DATA && fileOperation.fileType() != METASTORE; } }, ALL_FILES { @Override public boolean test(FileOperation fileOperation) { - return true; + return fileOperation.fileType() != METASTORE; } }, } @@ -887,6 +888,7 @@ enum FileType MANIFEST, STATS, DATA, + METASTORE, /**/; public static FileType fromFilePath(String path) @@ -906,6 +908,9 @@ public static FileType fromFilePath(String path) if (path.contains("/data/") && (path.endsWith(".orc") || path.endsWith(".parquet"))) { return DATA; } + if (path.endsWith(".trinoSchema") || path.contains("/.trinoPermissions/")) { + return METASTORE; + } throw new IllegalArgumentException("File not recognized: " + path); } } diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergGetTableStatisticsOperations.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergGetTableStatisticsOperations.java index 46260b4121039..36bdc4ba43ae7 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergGetTableStatisticsOperations.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergGetTableStatisticsOperations.java @@ -14,14 +14,12 @@ package io.trino.plugin.iceberg; import com.google.common.collect.ImmutableMap; -import io.opentelemetry.sdk.testing.exporter.InMemorySpanExporter; -import io.opentelemetry.sdk.trace.SdkTracerProvider; +import io.opentelemetry.sdk.testing.junit5.OpenTelemetryExtension; import io.opentelemetry.sdk.trace.data.SpanData; -import io.opentelemetry.sdk.trace.export.SimpleSpanProcessor; import io.trino.metadata.InternalFunctionBundle; import io.trino.plugin.hive.metastore.Database; import io.trino.plugin.hive.metastore.HiveMetastore; -import io.trino.plugin.iceberg.catalog.file.TestingIcebergFileMetastoreCatalogModule; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.plugin.tpch.TpchPlugin; import io.trino.spi.security.PrincipalType; import io.trino.testing.AbstractTestQueryFramework; @@ -32,19 +30,19 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.extension.RegisterExtension; import org.junit.jupiter.api.parallel.Execution; -import java.io.File; import java.io.IOException; import java.nio.file.Files; +import java.nio.file.Path; import java.util.Optional; import static com.google.common.io.MoreFiles.deleteRecursively; import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; -import static com.google.inject.util.Modules.EMPTY_MODULE; import static io.trino.execution.querystats.PlanOptimizersStatsCollector.createPlanOptimizersStatsCollector; import static io.trino.execution.warnings.WarningCollector.NOOP; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; +import static io.trino.plugin.iceberg.IcebergQueryRunner.ICEBERG_CATALOG; import static io.trino.sql.planner.LogicalPlanner.Stage.OPTIMIZED_AND_VALIDATED; import static io.trino.testing.TestingSession.testSessionBuilder; import static org.assertj.core.api.Assertions.assertThat; @@ -58,22 +56,18 @@ public class TestIcebergGetTableStatisticsOperations extends AbstractTestQueryFramework { + @RegisterExtension + static final OpenTelemetryExtension TELEMETRY = OpenTelemetryExtension.create(); + private LocalQueryRunner localQueryRunner; - private InMemorySpanExporter spanExporter; - private File metastoreDir; + private Path metastoreDir; @Override protected QueryRunner createQueryRunner() throws Exception { - spanExporter = closeAfterClass(InMemorySpanExporter.create()); - - SdkTracerProvider tracerProvider = SdkTracerProvider.builder() - .addSpanProcessor(SimpleSpanProcessor.create(spanExporter)) - .build(); - localQueryRunner = LocalQueryRunner.builder(testSessionBuilder().build()) - .withMetadataDecorator(metadata -> new TracingMetadata(tracerProvider.get("test"), metadata)) + .withMetadataDecorator(metadata -> new TracingMetadata(TELEMETRY.getOpenTelemetry().getTracer("test"), metadata)) .build(); localQueryRunner.installPlugin(new TpchPlugin()); localQueryRunner.createCatalog("tpch", "tpch", ImmutableMap.of()); @@ -82,12 +76,16 @@ protected QueryRunner createQueryRunner() new IcebergPlugin().getFunctions().forEach(functions::functions); localQueryRunner.addFunctions(functions.build()); - metastoreDir = Files.createTempDirectory("test_iceberg_get_table_statistics_operations").toFile(); - HiveMetastore metastore = createTestingFileHiveMetastore(metastoreDir); + metastoreDir = Files.createTempDirectory("test_iceberg_get_table_statistics_operations"); localQueryRunner.createCatalog( "iceberg", - new TestingIcebergConnectorFactory(Optional.of(new TestingIcebergFileMetastoreCatalogModule(metastore)), Optional.empty(), EMPTY_MODULE), + new TestingIcebergConnectorFactory(metastoreDir), ImmutableMap.of()); + + HiveMetastore metastore = ((IcebergConnector) localQueryRunner.getConnector(ICEBERG_CATALOG)).getInjector() + .getInstance(HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); + Database database = Database.builder() .setDatabaseName("tiny") .setOwnerName(Optional.of("public")) @@ -106,20 +104,13 @@ protected QueryRunner createQueryRunner() public void tearDown() throws IOException { - deleteRecursively(metastoreDir.toPath(), ALLOW_INSECURE); + deleteRecursively(metastoreDir, ALLOW_INSECURE); localQueryRunner.close(); } - private void resetCounters() - { - spanExporter.reset(); - } - @Test public void testTwoWayJoin() { - resetCounters(); - planDistributedQuery("SELECT * " + "FROM iceberg.tiny.orders o, iceberg.tiny.lineitem l " + "WHERE o.orderkey = l.orderkey"); @@ -129,8 +120,6 @@ public void testTwoWayJoin() @Test public void testThreeWayJoin() { - resetCounters(); - planDistributedQuery("SELECT * " + "FROM iceberg.tiny.customer c, iceberg.tiny.orders o, iceberg.tiny.lineitem l " + "WHERE o.orderkey = l.orderkey AND c.custkey = o.custkey"); @@ -148,9 +137,9 @@ private void planDistributedQuery(@Language("SQL") String sql) createPlanOptimizersStatsCollector())); } - private long getTableStatisticsMethodInvocations() + private static long getTableStatisticsMethodInvocations() { - return spanExporter.getFinishedSpanItems().stream() + return TELEMETRY.getSpans().stream() .map(SpanData::getName) .filter(name -> name.equals("Metadata.getTableStatistics")) .count(); diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMaterializedView.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMaterializedView.java index 7b8a2aa6dd714..ba916ffaca313 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMaterializedView.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMaterializedView.java @@ -15,18 +15,17 @@ import io.trino.Session; import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.plugin.hive.metastore.Table; import io.trino.sql.tree.ExplainType; import io.trino.testing.DistributedQueryRunner; import org.junit.jupiter.api.Test; -import java.io.File; -import java.nio.file.Files; -import java.nio.file.Path; import java.util.Map; +import java.util.Optional; import static io.trino.plugin.base.util.Closables.closeAllSuppress; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; +import static io.trino.plugin.iceberg.IcebergQueryRunner.ICEBERG_CATALOG; import static org.apache.iceberg.BaseMetastoreTableOperations.METADATA_LOCATION_PROP; import static org.assertj.core.api.Assertions.assertThat; @@ -34,21 +33,19 @@ public class TestIcebergMaterializedView extends BaseIcebergMaterializedViewTest { private Session secondIceberg; - private String fileMetastoreDirectory; private HiveMetastore metastore; @Override protected DistributedQueryRunner createQueryRunner() throws Exception { - File metastoreDir = Files.createTempDirectory("test_iceberg_table_smoke_test").toFile(); - metastoreDir.deleteOnExit(); - this.fileMetastoreDirectory = metastoreDir.getAbsolutePath(); - this.metastore = createTestingFileHiveMetastore(metastoreDir); DistributedQueryRunner queryRunner = IcebergQueryRunner.builder() - .setMetastoreDirectory(metastoreDir) .build(); try { + metastore = ((IcebergConnector) queryRunner.getCoordinator().getConnector(ICEBERG_CATALOG)).getInjector() + .getInstance(HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); + queryRunner.createCatalog("iceberg2", "iceberg", Map.of( "iceberg.catalog.type", "TESTING_FILE_METASTORE", "hive.metastore.catalog.dir", queryRunner.getCoordinator().getBaseDataDir().resolve("iceberg2-catalog").toString(), @@ -70,7 +67,7 @@ protected DistributedQueryRunner createQueryRunner() @Override protected String getSchemaDirectory() { - return Path.of(fileMetastoreDirectory, "tpch").toString(); + return "local:///tpch"; } @Override diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMergeAppend.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMergeAppend.java index 33e15cedb9a85..355cb21ee25c0 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMergeAppend.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMergeAppend.java @@ -16,6 +16,8 @@ import io.trino.filesystem.TrinoFileSystemFactory; import io.trino.plugin.base.CatalogName; import io.trino.plugin.hive.TrinoViewHiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.plugin.hive.metastore.cache.CachingHiveMetastore; import io.trino.plugin.iceberg.catalog.IcebergTableOperationsProvider; import io.trino.plugin.iceberg.catalog.TrinoCatalog; @@ -31,10 +33,10 @@ import org.apache.iceberg.Table; import org.junit.jupiter.api.Test; -import java.io.File; +import java.util.Optional; import static io.trino.plugin.hive.metastore.cache.CachingHiveMetastore.createPerTransactionCache; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; +import static io.trino.plugin.iceberg.IcebergQueryRunner.ICEBERG_CATALOG; import static io.trino.plugin.iceberg.IcebergTestUtils.getFileSystemFactory; import static org.assertj.core.api.Assertions.assertThat; @@ -49,8 +51,10 @@ protected QueryRunner createQueryRunner() throws Exception { DistributedQueryRunner queryRunner = IcebergQueryRunner.createIcebergQueryRunner(); - File baseDir = queryRunner.getCoordinator().getBaseDataDir().resolve("iceberg_data").toFile(); - CachingHiveMetastore cachingHiveMetastore = createPerTransactionCache(createTestingFileHiveMetastore(baseDir), 1000); + HiveMetastore metastore = ((IcebergConnector) queryRunner.getCoordinator().getConnector(ICEBERG_CATALOG)).getInjector() + .getInstance(HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); + CachingHiveMetastore cachingHiveMetastore = createPerTransactionCache(metastore, 1000); TrinoFileSystemFactory fileSystemFactory = getFileSystemFactory(queryRunner); tableOperationsProvider = new FileMetastoreTableOperationsProvider(fileSystemFactory); trinoCatalog = new TrinoHiveCatalog( diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMetadataListing.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMetadataListing.java index d05e92225477b..98e5aa57a4a23 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMetadataListing.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMetadataListing.java @@ -15,18 +15,13 @@ import com.google.common.collect.ImmutableMap; import io.trino.Session; -import io.trino.metadata.MaterializedViewDefinition; -import io.trino.metadata.QualifiedObjectName; import io.trino.plugin.hive.TestingHivePlugin; -import io.trino.plugin.hive.metastore.file.FileHiveMetastore; -import io.trino.plugin.iceberg.catalog.file.TestingIcebergFileMetastoreCatalogModule; -import io.trino.spi.connector.SchemaTableName; +import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.spi.security.Identity; import io.trino.spi.security.SelectedRole; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.DistributedQueryRunner; -import io.trino.transaction.TransactionId; -import io.trino.transaction.TransactionManager; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -35,8 +30,7 @@ import java.io.File; import java.util.Optional; -import static com.google.inject.util.Modules.EMPTY_MODULE; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; +import static io.trino.plugin.hive.TestingHiveUtils.getConnectorService; import static io.trino.spi.security.SelectedRole.Type.ROLE; import static io.trino.testing.TestingSession.testSessionBuilder; import static org.assertj.core.api.Assertions.assertThat; @@ -46,8 +40,7 @@ public class TestIcebergMetadataListing extends AbstractTestQueryFramework { - private FileHiveMetastore metastore; - private SchemaTableName storageTable; + private HiveMetastore metastore; @Override protected DistributedQueryRunner createQueryRunner() @@ -62,13 +55,14 @@ protected DistributedQueryRunner createQueryRunner() File baseDir = queryRunner.getCoordinator().getBaseDataDir().resolve("iceberg_data").toFile(); - metastore = createTestingFileHiveMetastore(baseDir); - - queryRunner.installPlugin(new TestingIcebergPlugin(Optional.of(new TestingIcebergFileMetastoreCatalogModule(metastore)), Optional.empty(), EMPTY_MODULE)); + queryRunner.installPlugin(new TestingIcebergPlugin(baseDir.toPath())); queryRunner.createCatalog("iceberg", "iceberg"); - queryRunner.installPlugin(new TestingHivePlugin(metastore)); + queryRunner.installPlugin(new TestingHivePlugin(baseDir.toPath())); queryRunner.createCatalog("hive", "hive", ImmutableMap.of("hive.security", "sql-standard")); + metastore = getConnectorService(queryRunner, HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); + return queryRunner; } @@ -80,7 +74,6 @@ public void setUp() assertQuerySucceeds("CREATE TABLE iceberg.test_schema.iceberg_table2 (_double DOUBLE) WITH (partitioning = ARRAY['_double'])"); assertQuerySucceeds("CREATE MATERIALIZED VIEW iceberg.test_schema.iceberg_materialized_view AS " + "SELECT * FROM iceberg.test_schema.iceberg_table1"); - storageTable = getStorageTable("iceberg", "test_schema", "iceberg_materialized_view"); assertQuerySucceeds("CREATE VIEW iceberg.test_schema.iceberg_view AS SELECT * FROM iceberg.test_schema.iceberg_table1"); assertQuerySucceeds("CREATE TABLE hive.test_schema.hive_table (_double DOUBLE)"); @@ -102,7 +95,7 @@ public void tearDown() @Test public void testTableListing() { - assertThat(metastore.getAllTables("test_schema")) + assertThat(metastore.getTables("test_schema")) .containsExactlyInAnyOrder( "iceberg_table1", "iceberg_table2", @@ -151,15 +144,4 @@ public void testTableValidation() assertQuerySucceeds("SELECT * FROM iceberg.test_schema.iceberg_table1"); assertQueryFails("SELECT * FROM iceberg.test_schema.hive_table", "Not an Iceberg table: test_schema.hive_table"); } - - private SchemaTableName getStorageTable(String catalogName, String schemaName, String objectName) - { - TransactionManager transactionManager = getQueryRunner().getTransactionManager(); - TransactionId transactionId = transactionManager.beginTransaction(false); - Session session = getSession().beginTransactionId(transactionId, transactionManager, getQueryRunner().getAccessControl()); - Optional materializedView = getQueryRunner().getMetadata() - .getMaterializedView(session, new QualifiedObjectName(catalogName, schemaName, objectName)); - assertThat(materializedView).isPresent(); - return materializedView.get().getStorageTable().get().getSchemaTableName(); - } } diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMetastoreAccessOperations.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMetastoreAccessOperations.java index 005a0cfecccec..b79b2cecce2aa 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMetastoreAccessOperations.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMetastoreAccessOperations.java @@ -16,9 +16,7 @@ import com.google.common.collect.ImmutableMultiset; import com.google.common.collect.Multiset; import io.trino.Session; -import io.trino.plugin.hive.metastore.CountingAccessHiveMetastore; -import io.trino.plugin.hive.metastore.CountingAccessHiveMetastoreUtil; -import io.trino.plugin.iceberg.catalog.file.TestingIcebergFileMetastoreCatalogModule; +import io.trino.plugin.hive.metastore.MetastoreMethod; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.DistributedQueryRunner; import org.intellij.lang.annotations.Language; @@ -28,18 +26,16 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; -import java.io.File; import java.util.Optional; -import static com.google.inject.util.Modules.EMPTY_MODULE; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.CREATE_TABLE; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.DROP_TABLE; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_ALL_TABLES_FROM_DATABASE; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_DATABASE; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_TABLE; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.GET_TABLES_WITH_PARAMETER; -import static io.trino.plugin.hive.metastore.CountingAccessHiveMetastore.Method.REPLACE_TABLE; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; +import static io.trino.plugin.hive.metastore.MetastoreInvocations.assertMetastoreInvocationsForQuery; +import static io.trino.plugin.hive.metastore.MetastoreMethod.CREATE_TABLE; +import static io.trino.plugin.hive.metastore.MetastoreMethod.DROP_TABLE; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_DATABASE; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_TABLE; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_TABLES; +import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_TABLES_WITH_PARAMETER; +import static io.trino.plugin.hive.metastore.MetastoreMethod.REPLACE_TABLE; import static io.trino.plugin.iceberg.IcebergSessionProperties.COLLECT_EXTENDED_STATISTICS_ON_WRITE; import static io.trino.plugin.iceberg.TableType.DATA; import static io.trino.plugin.iceberg.TableType.FILES; @@ -51,7 +47,6 @@ import static io.trino.plugin.iceberg.TableType.REFS; import static io.trino.plugin.iceberg.TableType.SNAPSHOTS; import static io.trino.testing.TestingNames.randomNameSuffix; -import static io.trino.testing.TestingSession.testSessionBuilder; import static org.assertj.core.api.Assertions.assertThat; @Execution(ExecutionMode.SAME_THREAD) // metastore invocation counters shares mutable state so can't be run from many threads simultaneously @@ -59,28 +54,14 @@ public class TestIcebergMetastoreAccessOperations extends AbstractTestQueryFramework { private static final int MAX_PREFIXES_COUNT = 10; - private static final Session TEST_SESSION = testSessionBuilder() - .setCatalog("iceberg") - .setSchema("test_schema") - .build(); - - private CountingAccessHiveMetastore metastore; @Override protected DistributedQueryRunner createQueryRunner() throws Exception { - DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(TEST_SESSION) + return IcebergQueryRunner.builder() .addCoordinatorProperty("optimizer.experimental-max-prefetched-information-schema-prefixes", Integer.toString(MAX_PREFIXES_COUNT)) .build(); - - File baseDir = queryRunner.getCoordinator().getBaseDataDir().resolve("iceberg_data").toFile(); - metastore = new CountingAccessHiveMetastore(createTestingFileHiveMetastore(baseDir)); - queryRunner.installPlugin(new TestingIcebergPlugin(Optional.of(new TestingIcebergFileMetastoreCatalogModule(metastore)), Optional.empty(), EMPTY_MODULE)); - queryRunner.createCatalog("iceberg", "iceberg"); - - queryRunner.execute("CREATE SCHEMA test_schema"); - return queryRunner; } @Test @@ -93,7 +74,7 @@ public void testUse() .setSchema(Optional.empty()) .build(); assertMetastoreInvocations(session, "USE %s.%s".formatted(catalog, schema), - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_DATABASE) .build()); } @@ -102,7 +83,7 @@ public void testUse() public void testCreateTable() { assertMetastoreInvocations("CREATE TABLE test_create (id VARCHAR, age INT)", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(CREATE_TABLE) .add(GET_DATABASE) .add(GET_TABLE) @@ -113,13 +94,13 @@ public void testCreateTable() public void testCreateOrReplaceTable() { assertMetastoreInvocations("CREATE OR REPLACE TABLE test_create_or_replace (id VARCHAR, age INT)", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(CREATE_TABLE) .add(GET_DATABASE) .add(GET_TABLE) .build()); assertMetastoreInvocations("CREATE OR REPLACE TABLE test_create_or_replace (id VARCHAR, age INT)", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_DATABASE) .add(REPLACE_TABLE) .add(GET_TABLE) @@ -132,7 +113,7 @@ public void testCreateTableAsSelect() assertMetastoreInvocations( withStatsOnWrite(getSession(), false), "CREATE TABLE test_ctas AS SELECT 1 AS age", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_DATABASE) .add(CREATE_TABLE) .add(GET_TABLE) @@ -141,7 +122,7 @@ public void testCreateTableAsSelect() assertMetastoreInvocations( withStatsOnWrite(getSession(), true), "CREATE TABLE test_ctas_with_stats AS SELECT 1 AS age", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_DATABASE) .add(CREATE_TABLE) .addCopies(GET_TABLE, 4) @@ -154,7 +135,7 @@ public void testCreateOrReplaceTableAsSelect() { assertMetastoreInvocations( "CREATE OR REPLACE TABLE test_cortas AS SELECT 1 AS age", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_DATABASE) .add(CREATE_TABLE) .addCopies(GET_TABLE, 4) @@ -163,7 +144,7 @@ public void testCreateOrReplaceTableAsSelect() assertMetastoreInvocations( "CREATE OR REPLACE TABLE test_cortas AS SELECT 1 AS age", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_DATABASE) .addCopies(GET_TABLE, 3) .addCopies(REPLACE_TABLE, 2) @@ -176,7 +157,7 @@ public void testSelect() assertUpdate("CREATE TABLE test_select_from (id VARCHAR, age INT)"); assertMetastoreInvocations("SELECT * FROM test_select_from", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .build()); } @@ -187,7 +168,7 @@ public void testSelectWithFilter() assertUpdate("CREATE TABLE test_select_from_where AS SELECT 2 as age", 1); assertMetastoreInvocations("SELECT * FROM test_select_from_where WHERE age = 2", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .build()); } @@ -199,7 +180,7 @@ public void testSelectFromView() assertUpdate("CREATE VIEW test_select_view_view AS SELECT id, age FROM test_select_view_table"); assertMetastoreInvocations("SELECT * FROM test_select_view_view", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLE, 2) .build()); } @@ -211,7 +192,7 @@ public void testSelectFromViewWithFilter() assertUpdate("CREATE VIEW test_select_view_where_view AS SELECT age FROM test_select_view_where_table"); assertMetastoreInvocations("SELECT * FROM test_select_view_where_view WHERE age = 2", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLE, 2) .build()); } @@ -223,7 +204,7 @@ public void testSelectFromMaterializedView() assertUpdate("CREATE MATERIALIZED VIEW test_select_mview_view AS SELECT id, age FROM test_select_mview_table"); assertMetastoreInvocations("SELECT * FROM test_select_mview_view", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLE, 2) .build()); } @@ -235,7 +216,7 @@ public void testSelectFromMaterializedViewWithFilter() assertUpdate("CREATE MATERIALIZED VIEW test_select_mview_where_view AS SELECT age FROM test_select_mview_where_table"); assertMetastoreInvocations("SELECT * FROM test_select_mview_where_view WHERE age = 2", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLE, 2) .build()); } @@ -247,7 +228,7 @@ public void testRefreshMaterializedView() assertUpdate("CREATE MATERIALIZED VIEW test_refresh_mview_view AS SELECT id, age FROM test_refresh_mview_table"); assertMetastoreInvocations("REFRESH MATERIALIZED VIEW test_refresh_mview_view", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLE, 2) .addCopies(REPLACE_TABLE, 1) .build()); @@ -260,7 +241,7 @@ public void testJoin() assertUpdate("CREATE TABLE test_join_t2 AS SELECT 'name1' as name, 'id1' AS id", 1); assertMetastoreInvocations("SELECT name, age FROM test_join_t1 JOIN test_join_t2 ON test_join_t2.id = test_join_t1.id", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLE, 2) .build()); } @@ -271,7 +252,7 @@ public void testSelfJoin() assertUpdate("CREATE TABLE test_self_join_table AS SELECT 2 as age, 0 parent, 3 AS id", 1); assertMetastoreInvocations("SELECT child.age, parent.age FROM test_self_join_table child JOIN test_self_join_table parent ON child.parent = parent.id", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .build()); } @@ -282,7 +263,7 @@ public void testExplainSelect() assertUpdate("CREATE TABLE test_explain AS SELECT 2 as age", 1); assertMetastoreInvocations("EXPLAIN SELECT * FROM test_explain", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .build()); } @@ -293,7 +274,7 @@ public void testShowStatsForTable() assertUpdate("CREATE TABLE test_show_stats AS SELECT 2 as age", 1); assertMetastoreInvocations("SHOW STATS FOR test_show_stats", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .build()); } @@ -304,7 +285,7 @@ public void testShowStatsForTableWithFilter() assertUpdate("CREATE TABLE test_show_stats_with_filter AS SELECT 2 as age", 1); assertMetastoreInvocations("SHOW STATS FOR (SELECT * FROM test_show_stats_with_filter where age >= 2)", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .build()); } @@ -316,42 +297,42 @@ public void testSelectSystemTable() // select from $history assertMetastoreInvocations("SELECT * FROM \"test_select_snapshots$history\"", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLE, 1) .build()); // select from $snapshots assertMetastoreInvocations("SELECT * FROM \"test_select_snapshots$snapshots\"", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLE, 1) .build()); // select from $manifests assertMetastoreInvocations("SELECT * FROM \"test_select_snapshots$manifests\"", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLE, 1) .build()); // select from $partitions assertMetastoreInvocations("SELECT * FROM \"test_select_snapshots$partitions\"", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLE, 1) .build()); // select from $files assertMetastoreInvocations("SELECT * FROM \"test_select_snapshots$files\"", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLE, 1) .build()); // select from $properties assertMetastoreInvocations("SELECT * FROM \"test_select_snapshots$properties\"", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLE, 1) .build()); assertQueryFails("SELECT * FROM \"test_select_snapshots$materialized_view_storage\"", - "Table 'test_schema.test_select_snapshots\\$materialized_view_storage' not found"); + "Table 'tpch.test_select_snapshots\\$materialized_view_storage' not found"); // This test should get updated if a new system table is added. assertThat(TableType.values()) @@ -364,7 +345,7 @@ public void testUnregisterTable() assertUpdate("CREATE TABLE test_unregister_table AS SELECT 2 as age", 1); assertMetastoreInvocations("CALL system.unregister_table(CURRENT_SCHEMA, 'test_unregister_table')", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_DATABASE) .add(GET_TABLE) .add(DROP_TABLE) @@ -392,21 +373,21 @@ public void testInformationSchemaColumns(int tables) // Bulk retrieval assertMetastoreInvocations(session, "SELECT * FROM information_schema.columns WHERE table_schema = CURRENT_SCHEMA AND table_name LIKE 'test_select_i_s_columns%'", - ImmutableMultiset.builder() - .add(GET_ALL_TABLES_FROM_DATABASE) + ImmutableMultiset.builder() + .add(GET_TABLES) .addCopies(GET_TABLE, tables * 2) .addCopies(GET_TABLES_WITH_PARAMETER, 2) .build()); // Pointed lookup assertMetastoreInvocations(session, "SELECT * FROM information_schema.columns WHERE table_schema = CURRENT_SCHEMA AND table_name = 'test_select_i_s_columns0'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .build()); // Pointed lookup via DESCRIBE (which does some additional things before delegating to information_schema.columns) assertMetastoreInvocations(session, "DESCRIBE test_select_i_s_columns0", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_DATABASE) .add(GET_TABLE) .build()); @@ -438,23 +419,23 @@ public void testSystemMetadataTableComments(int tables) // Bulk retrieval assertMetastoreInvocations(session, "SELECT * FROM system.metadata.table_comments WHERE schema_name = CURRENT_SCHEMA AND table_name LIKE 'test_select_s_m_t_comments%'", - ImmutableMultiset.builder() - .add(GET_ALL_TABLES_FROM_DATABASE) + ImmutableMultiset.builder() + .add(GET_TABLES) .addCopies(GET_TABLE, tables * 2) .addCopies(GET_TABLES_WITH_PARAMETER, 2) .build()); // Bulk retrieval for two schemas assertMetastoreInvocations(session, "SELECT * FROM system.metadata.table_comments WHERE schema_name IN (CURRENT_SCHEMA, 'non_existent') AND table_name LIKE 'test_select_s_m_t_comments%'", - ImmutableMultiset.builder() - .addCopies(GET_ALL_TABLES_FROM_DATABASE, 2) + ImmutableMultiset.builder() + .addCopies(GET_TABLES, 2) .addCopies(GET_TABLES_WITH_PARAMETER, 4) .addCopies(GET_TABLE, tables * 2) .build()); // Pointed lookup assertMetastoreInvocations(session, "SELECT * FROM system.metadata.table_comments WHERE schema_name = CURRENT_SCHEMA AND table_name = 'test_select_s_m_t_comments0'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLE, 1) .build()); @@ -493,34 +474,34 @@ public void testSystemMetadataMaterializedViews() // Bulk retrieval assertMetastoreInvocations(session, "SELECT * FROM system.metadata.materialized_views WHERE schema_name = CURRENT_SCHEMA", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLES_WITH_PARAMETER) .addCopies(GET_TABLE, 4) .build()); // Bulk retrieval without selecting freshness assertMetastoreInvocations(session, "SELECT schema_name, name FROM system.metadata.materialized_views WHERE schema_name = CURRENT_SCHEMA", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLES_WITH_PARAMETER) .addCopies(GET_TABLE, 2) .build()); // Bulk retrieval for two schemas assertMetastoreInvocations(session, "SELECT * FROM system.metadata.materialized_views WHERE schema_name IN (CURRENT_SCHEMA, 'non_existent')", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLES_WITH_PARAMETER, 2) .addCopies(GET_TABLE, 4) .build()); // Pointed lookup assertMetastoreInvocations(session, "SELECT * FROM system.metadata.materialized_views WHERE schema_name = CURRENT_SCHEMA AND name = 'mv1'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .addCopies(GET_TABLE, 3) .build()); // Pointed lookup without selecting freshness assertMetastoreInvocations(session, "SELECT schema_name, name FROM system.metadata.materialized_views WHERE schema_name = CURRENT_SCHEMA AND name = 'mv1'", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_TABLE) .build()); @@ -531,20 +512,20 @@ public void testSystemMetadataMaterializedViews() public void testShowTables() { assertMetastoreInvocations("SHOW TABLES", - ImmutableMultiset.builder() + ImmutableMultiset.builder() .add(GET_DATABASE) - .add(GET_ALL_TABLES_FROM_DATABASE) + .add(GET_TABLES) .build()); } - private void assertMetastoreInvocations(@Language("SQL") String query, Multiset expectedInvocations) + private void assertMetastoreInvocations(@Language("SQL") String query, Multiset expectedInvocations) { assertMetastoreInvocations(getSession(), query, expectedInvocations); } - private void assertMetastoreInvocations(Session session, @Language("SQL") String query, Multiset expectedInvocations) + private void assertMetastoreInvocations(Session session, @Language("SQL") String query, Multiset expectedInvocations) { - CountingAccessHiveMetastoreUtil.assertMetastoreInvocations(metastore, getQueryRunner(), session, query, expectedInvocations); + assertMetastoreInvocationsForQuery(getDistributedQueryRunner(), session, query, expectedInvocations); } private static Session withStatsOnWrite(Session session, boolean enabled) diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMigrateProcedure.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMigrateProcedure.java index 126b116dcb497..f814f1829bdf2 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMigrateProcedure.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMigrateProcedure.java @@ -45,10 +45,10 @@ protected QueryRunner createQueryRunner() { dataDirectory = Files.createTempDirectory("_test_hidden"); DistributedQueryRunner queryRunner = IcebergQueryRunner.builder().setMetastoreDirectory(dataDirectory.toFile()).build(); - queryRunner.installPlugin(new TestingHivePlugin()); + queryRunner.installPlugin(new TestingHivePlugin(dataDirectory)); queryRunner.createCatalog("hive", "hive", ImmutableMap.builder() - .put("hive.metastore", "file") - .put("hive.metastore.catalog.dir", dataDirectory.toString()) +// .put("hive.metastore", "file") +// .put("hive.metastore.catalog.dir", dataDirectory.toString()) .put("hive.security", "allow-all") .buildOrThrow()); return queryRunner; diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergNodeLocalDynamicSplitPruning.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergNodeLocalDynamicSplitPruning.java index 686e76eed7b07..5657b21b840c9 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergNodeLocalDynamicSplitPruning.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergNodeLocalDynamicSplitPruning.java @@ -30,7 +30,6 @@ import io.trino.orc.OutputStreamOrcDataSink; import io.trino.plugin.hive.FileFormatDataSourceStats; import io.trino.plugin.hive.HiveTransactionHandle; -import io.trino.plugin.hive.metastore.Column; import io.trino.plugin.hive.orc.OrcReaderConfig; import io.trino.plugin.hive.orc.OrcWriterConfig; import io.trino.plugin.hive.parquet.ParquetReaderConfig; @@ -43,7 +42,11 @@ import io.trino.spi.connector.ConnectorPageSource; import io.trino.spi.connector.DynamicFilter; import io.trino.spi.predicate.Domain; +import io.trino.spi.predicate.Range; import io.trino.spi.predicate.TupleDomain; +import io.trino.spi.predicate.ValueSet; +import io.trino.spi.type.DecimalType; +import io.trino.spi.type.SqlDecimal; import io.trino.spi.type.Type; import io.trino.testing.TestingConnectorSession; import org.apache.iceberg.PartitionSpec; @@ -54,7 +57,9 @@ import org.junit.jupiter.api.Test; import java.io.IOException; +import java.math.BigDecimal; import java.nio.file.Files; +import java.time.LocalDate; import java.util.List; import java.util.Map; import java.util.Optional; @@ -65,11 +70,11 @@ import static io.trino.orc.metadata.CompressionKind.NONE; import static io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT; import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_STATS; -import static io.trino.plugin.hive.HiveType.HIVE_INT; -import static io.trino.plugin.hive.HiveType.HIVE_STRING; import static io.trino.plugin.iceberg.ColumnIdentity.TypeCategory.PRIMITIVE; import static io.trino.plugin.iceberg.IcebergFileFormat.ORC; import static io.trino.plugin.iceberg.util.OrcTypeConverter.toOrcType; +import static io.trino.spi.type.DateType.DATE; +import static io.trino.spi.type.Decimals.writeShortDecimal; import static io.trino.spi.type.IntegerType.INTEGER; import static io.trino.spi.type.VarcharType.VARCHAR; import static io.trino.testing.TestingHandles.TEST_CATALOG_HANDLE; @@ -80,28 +85,28 @@ public class TestIcebergNodeLocalDynamicSplitPruning { - private static final String SCHEMA_NAME = "test"; - private static final String TABLE_NAME = "test"; - private static final Column KEY_COLUMN = new Column("a_integer", HIVE_INT, Optional.empty(), Map.of()); - private static final ColumnIdentity KEY_COLUMN_IDENTITY = new ColumnIdentity(1, KEY_COLUMN.getName(), PRIMITIVE, ImmutableList.of()); - private static final IcebergColumnHandle KEY_ICEBERG_COLUMN_HANDLE = new IcebergColumnHandle(KEY_COLUMN_IDENTITY, INTEGER, ImmutableList.of(), INTEGER, Optional.empty()); - private static final int KEY_COLUMN_VALUE = 42; - private static final Column DATA_COLUMN = new Column("a_varchar", HIVE_STRING, Optional.empty(), Map.of()); - private static final ColumnIdentity DATA_COLUMN_IDENTITY = new ColumnIdentity(2, DATA_COLUMN.getName(), PRIMITIVE, ImmutableList.of()); - private static final IcebergColumnHandle DATA_ICEBERG_COLUMN_HANDLE = new IcebergColumnHandle(DATA_COLUMN_IDENTITY, VARCHAR, ImmutableList.of(), VARCHAR, Optional.empty()); - private static final String DATA_COLUMN_VALUE = "hello world"; - private static final Schema TABLE_SCHEMA = new Schema( - optional(KEY_COLUMN_IDENTITY.getId(), KEY_COLUMN.getName(), Types.IntegerType.get()), - optional(DATA_COLUMN_IDENTITY.getId(), DATA_COLUMN.getName(), Types.StringType.get())); private static final OrcReaderConfig ORC_READER_CONFIG = new OrcReaderConfig(); private static final OrcWriterConfig ORC_WRITER_CONFIG = new OrcWriterConfig(); private static final ParquetReaderConfig PARQUET_READER_CONFIG = new ParquetReaderConfig(); private static final ParquetWriterConfig PARQUET_WRITER_CONFIG = new ParquetWriterConfig(); @Test - public void testDynamicSplitPruning() + public void testDynamicSplitPruningOnUnpartitionedTable() throws IOException { + String tableName = "unpartitioned_table"; + String keyColumnName = "a_integer"; + ColumnIdentity keyColumnIdentity = new ColumnIdentity(1, keyColumnName, PRIMITIVE, ImmutableList.of()); + IcebergColumnHandle keyColumnHandle = new IcebergColumnHandle(keyColumnIdentity, INTEGER, ImmutableList.of(), INTEGER, Optional.empty()); + int keyColumnValue = 42; + String dataColumnName = "a_varchar"; + ColumnIdentity dataColumnIdentity = new ColumnIdentity(2, dataColumnName, PRIMITIVE, ImmutableList.of()); + IcebergColumnHandle dataColumnHandle = new IcebergColumnHandle(dataColumnIdentity, VARCHAR, ImmutableList.of(), VARCHAR, Optional.empty()); + String dataColumnValue = "hello world"; + Schema tableSchema = new Schema( + optional(keyColumnIdentity.getId(), keyColumnName, Types.IntegerType.get()), + optional(dataColumnIdentity.getId(), dataColumnName, Types.StringType.get())); + IcebergConfig icebergConfig = new IcebergConfig(); HiveTransactionHandle transaction = new HiveTransactionHandle(false); try (TempFile file = new TempFile()) { @@ -109,88 +114,400 @@ public void testDynamicSplitPruning() TrinoOutputFile outputFile = new LocalOutputFile(file.file()); TrinoInputFile inputFile = new LocalInputFile(file.file()); - writeOrcContent(outputFile); + List columnNames = ImmutableList.of(keyColumnName, dataColumnName); + List types = ImmutableList.of(INTEGER, VARCHAR); + + try (OrcWriter writer = new OrcWriter( + OutputStreamOrcDataSink.create(outputFile), + columnNames, + types, + toOrcType(tableSchema), + NONE, + new OrcWriterOptions(), + ImmutableMap.of(), + true, + OrcWriteValidation.OrcWriteValidationMode.BOTH, + new OrcWriterStats())) { + BlockBuilder keyBuilder = INTEGER.createBlockBuilder(null, 1); + INTEGER.writeLong(keyBuilder, keyColumnValue); + BlockBuilder dataBuilder = VARCHAR.createBlockBuilder(null, 1); + VARCHAR.writeString(dataBuilder, dataColumnValue); + writer.write(new Page(keyBuilder.build(), dataBuilder.build())); + } + + IcebergSplit split = new IcebergSplit( + inputFile.toString(), + 0, + inputFile.length(), + inputFile.length(), + -1, // invalid; normally known + ORC, + PartitionSpecParser.toJson(PartitionSpec.unpartitioned()), + PartitionData.toJson(new PartitionData(new Object[] {})), + ImmutableList.of(), + SplitWeight.standard()); - try (ConnectorPageSource emptyPageSource = createTestingPageSource(transaction, icebergConfig, inputFile, getDynamicFilter(getTupleDomainForSplitPruning()))) { + String tablePath = inputFile.location().fileName(); + TableHandle tableHandle = new TableHandle( + TEST_CATALOG_HANDLE, + new IcebergTableHandle( + CatalogHandle.fromId("iceberg:NORMAL:v12345"), + "test_schema", + tableName, + TableType.DATA, + Optional.empty(), + SchemaParser.toJson(tableSchema), + Optional.of(PartitionSpecParser.toJson(PartitionSpec.unpartitioned())), + 2, + TupleDomain.withColumnDomains(ImmutableMap.of(keyColumnHandle, Domain.singleValue(INTEGER, (long) keyColumnValue))), + TupleDomain.all(), + OptionalLong.empty(), + ImmutableSet.of(keyColumnHandle), + Optional.empty(), + tablePath, + ImmutableMap.of(), + false, + Optional.empty(), + ImmutableSet.of(), + Optional.of(false)), + transaction); + + TupleDomain splitPruningPredicate = TupleDomain.withColumnDomains( + ImmutableMap.of( + keyColumnHandle, + Domain.singleValue(INTEGER, 1L))); + try (ConnectorPageSource emptyPageSource = createTestingPageSource(transaction, icebergConfig, split, tableHandle, ImmutableList.of(keyColumnHandle, dataColumnHandle), getDynamicFilter(splitPruningPredicate))) { assertThat(emptyPageSource.getNextPage()).isNull(); } - try (ConnectorPageSource nonEmptyPageSource = createTestingPageSource(transaction, icebergConfig, inputFile, getDynamicFilter(getNonSelectiveTupleDomain()))) { + TupleDomain nonSelectivePredicate = TupleDomain.withColumnDomains( + ImmutableMap.of( + keyColumnHandle, + Domain.singleValue(INTEGER, (long) keyColumnValue))); + try (ConnectorPageSource nonEmptyPageSource = createTestingPageSource(transaction, icebergConfig, split, tableHandle, ImmutableList.of(keyColumnHandle, dataColumnHandle), getDynamicFilter(nonSelectivePredicate))) { Page page = nonEmptyPageSource.getNextPage(); assertThat(page).isNotNull(); - assertThat(page.getBlock(0).getPositionCount()).isEqualTo(1); - assertThat(page.getBlock(0).getInt(0, 0)).isEqualTo(KEY_COLUMN_VALUE); - assertThat(page.getBlock(1).getPositionCount()).isEqualTo(1); - assertThat(page.getBlock(1).getSlice(0, 0, page.getBlock(1).getSliceLength(0)).toStringUtf8()).isEqualTo(DATA_COLUMN_VALUE); + assertThat(page.getPositionCount()).isEqualTo(1); + assertThat(page.getBlock(0).getInt(0, 0)).isEqualTo(keyColumnValue); + assertThat(page.getBlock(1).getSlice(0, 0, page.getBlock(1).getSliceLength(0)).toStringUtf8()).isEqualTo(dataColumnValue); } } } - private static void writeOrcContent(TrinoOutputFile outputFile) + @Test + public void testDynamicSplitPruningWithExplicitPartitionFilter() throws IOException { - List columnNames = ImmutableList.of(KEY_COLUMN.getName(), DATA_COLUMN.getName()); - List types = ImmutableList.of(INTEGER, VARCHAR); - - try (OrcWriter writer = new OrcWriter( - OutputStreamOrcDataSink.create(outputFile), - columnNames, - types, - toOrcType(TABLE_SCHEMA), - NONE, - new OrcWriterOptions(), - ImmutableMap.of(), - true, - OrcWriteValidation.OrcWriteValidationMode.BOTH, - new OrcWriterStats())) { - BlockBuilder keyBuilder = INTEGER.createBlockBuilder(null, 1); - INTEGER.writeLong(keyBuilder, KEY_COLUMN_VALUE); - BlockBuilder dataBuilder = VARCHAR.createBlockBuilder(null, 1); - VARCHAR.writeString(dataBuilder, DATA_COLUMN_VALUE); - writer.write(new Page(keyBuilder.build(), dataBuilder.build())); + String tableName = "sales_table"; + String dateColumnName = "date"; + ColumnIdentity dateColumnIdentity = new ColumnIdentity(1, dateColumnName, PRIMITIVE, ImmutableList.of()); + IcebergColumnHandle dateColumnHandle = new IcebergColumnHandle(dateColumnIdentity, DATE, ImmutableList.of(), DATE, Optional.empty()); + long dateColumnValue = LocalDate.of(2023, 1, 10).toEpochDay(); + String receiptColumnName = "receipt"; + ColumnIdentity receiptColumnIdentity = new ColumnIdentity(2, receiptColumnName, PRIMITIVE, ImmutableList.of()); + IcebergColumnHandle receiptColumnHandle = new IcebergColumnHandle(receiptColumnIdentity, VARCHAR, ImmutableList.of(), VARCHAR, Optional.empty()); + String receiptColumnValue = "#12345"; + String amountColumnName = "amount"; + ColumnIdentity amountColumnIdentity = new ColumnIdentity(3, amountColumnName, PRIMITIVE, ImmutableList.of()); + DecimalType amountColumnType = DecimalType.createDecimalType(10, 2); + IcebergColumnHandle amountColumnHandle = new IcebergColumnHandle(amountColumnIdentity, amountColumnType, ImmutableList.of(), amountColumnType, Optional.empty()); + BigDecimal amountColumnValue = new BigDecimal("1234567.65"); + Schema tableSchema = new Schema( + optional(dateColumnIdentity.getId(), dateColumnName, Types.DateType.get()), + optional(receiptColumnIdentity.getId(), receiptColumnName, Types.StringType.get()), + optional(amountColumnIdentity.getId(), amountColumnName, Types.DecimalType.of(10, 2))); + PartitionSpec partitionSpec = PartitionSpec.builderFor(tableSchema) + .identity(dateColumnName) + .build(); + + IcebergConfig icebergConfig = new IcebergConfig(); + HiveTransactionHandle transaction = new HiveTransactionHandle(false); + try (TempFile file = new TempFile()) { + Files.delete(file.path()); + + TrinoOutputFile outputFile = new LocalOutputFile(file.file()); + TrinoInputFile inputFile = new LocalInputFile(file.file()); + List columnNames = ImmutableList.of(dateColumnName, receiptColumnName, amountColumnName); + List types = ImmutableList.of(DATE, VARCHAR, amountColumnType); + + try (OrcWriter writer = new OrcWriter( + OutputStreamOrcDataSink.create(outputFile), + columnNames, + types, + toOrcType(tableSchema), + NONE, + new OrcWriterOptions(), + ImmutableMap.of(), + true, + OrcWriteValidation.OrcWriteValidationMode.BOTH, + new OrcWriterStats())) { + BlockBuilder dateBuilder = DATE.createBlockBuilder(null, 1); + DATE.writeLong(dateBuilder, dateColumnValue); + BlockBuilder receiptBuilder = VARCHAR.createBlockBuilder(null, 1); + VARCHAR.writeString(receiptBuilder, receiptColumnValue); + BlockBuilder amountBuilder = amountColumnType.createBlockBuilder(null, 1); + writeShortDecimal(amountBuilder, amountColumnValue.unscaledValue().longValueExact()); + writer.write(new Page(dateBuilder.build(), receiptBuilder.build(), amountBuilder.build())); + } + + IcebergSplit split = new IcebergSplit( + inputFile.toString(), + 0, + inputFile.length(), + inputFile.length(), + -1, // invalid; normally known + ORC, + PartitionSpecParser.toJson(partitionSpec), + PartitionData.toJson(new PartitionData(new Object[] {dateColumnValue})), + ImmutableList.of(), + SplitWeight.standard()); + + String tablePath = inputFile.location().fileName(); + TableHandle tableHandle = new TableHandle( + TEST_CATALOG_HANDLE, + new IcebergTableHandle( + CatalogHandle.fromId("iceberg:NORMAL:v12345"), + "test_schema", + tableName, + TableType.DATA, + Optional.empty(), + SchemaParser.toJson(tableSchema), + Optional.of(PartitionSpecParser.toJson(partitionSpec)), + 2, + TupleDomain.all(), + TupleDomain.all(), + OptionalLong.empty(), + ImmutableSet.of(dateColumnHandle), + Optional.empty(), + tablePath, + ImmutableMap.of(), + false, + Optional.empty(), + ImmutableSet.of(), + Optional.of(false)), + transaction); + + // Simulate situations where the dynamic filter (e.g.: while performing a JOIN with another table) reduces considerably + // the amount of data to be processed from the current table + + TupleDomain differentDatePredicate = TupleDomain.withColumnDomains( + ImmutableMap.of( + dateColumnHandle, + Domain.singleValue(DATE, LocalDate.of(2023, 2, 2).toEpochDay()))); + TupleDomain nonOverlappingDatePredicate = TupleDomain.withColumnDomains( + ImmutableMap.of( + dateColumnHandle, + Domain.create(ValueSet.ofRanges(Range.greaterThanOrEqual(DATE, LocalDate.of(2023, 2, 2).toEpochDay())), true))); + for (TupleDomain partitionPredicate : List.of(differentDatePredicate, nonOverlappingDatePredicate)) { + try (ConnectorPageSource emptyPageSource = createTestingPageSource( + transaction, + icebergConfig, + split, + tableHandle, + ImmutableList.of(dateColumnHandle, receiptColumnHandle, amountColumnHandle), + getDynamicFilter(partitionPredicate))) { + assertThat(emptyPageSource.getNextPage()).isNull(); + } + } + + TupleDomain sameDatePredicate = TupleDomain.withColumnDomains( + ImmutableMap.of( + dateColumnHandle, + Domain.singleValue(DATE, dateColumnValue))); + TupleDomain overlappingDatePredicate = TupleDomain.withColumnDomains( + ImmutableMap.of( + dateColumnHandle, + Domain.create(ValueSet.ofRanges(Range.range(DATE, LocalDate.of(2023, 1, 1).toEpochDay(), true, LocalDate.of(2023, 2, 1).toEpochDay(), false)), true))); + for (TupleDomain partitionPredicate : List.of(sameDatePredicate, overlappingDatePredicate)) { + try (ConnectorPageSource nonEmptyPageSource = createTestingPageSource( + transaction, + icebergConfig, + split, + tableHandle, + ImmutableList.of(dateColumnHandle, receiptColumnHandle, amountColumnHandle), + getDynamicFilter(partitionPredicate))) { + Page page = nonEmptyPageSource.getNextPage(); + assertThat(page).isNotNull(); + assertThat(page.getPositionCount()).isEqualTo(1); + assertThat(page.getBlock(0).getInt(0, 0)).isEqualTo(dateColumnValue); + assertThat(page.getBlock(1).getSlice(0, 0, page.getBlock(1).getSliceLength(0)).toStringUtf8()).isEqualTo(receiptColumnValue); + assertThat(((SqlDecimal) amountColumnType.getObjectValue(null, page.getBlock(2), 0)).toBigDecimal()).isEqualTo(amountColumnValue); + } + } } } - private static ConnectorPageSource createTestingPageSource(HiveTransactionHandle transaction, IcebergConfig icebergConfig, TrinoInputFile inputFile, DynamicFilter dynamicFilter) + @Test + public void testDynamicSplitPruningWithExplicitPartitionFilterPartitionEvolution() throws IOException { - IcebergSplit split = new IcebergSplit( - inputFile.toString(), - 0, - inputFile.length(), - inputFile.length(), - -1, // invalid; normally known - ORC, - PartitionSpecParser.toJson(PartitionSpec.unpartitioned()), - PartitionData.toJson(new PartitionData(new Object[] {})), - ImmutableList.of(), - SplitWeight.standard()); - - String tablePath = inputFile.location().fileName(); - TableHandle tableHandle = new TableHandle( - TEST_CATALOG_HANDLE, - new IcebergTableHandle( - CatalogHandle.fromId("iceberg:NORMAL:v12345"), - SCHEMA_NAME, - TABLE_NAME, - TableType.DATA, - Optional.empty(), - SchemaParser.toJson(TABLE_SCHEMA), - Optional.of(PartitionSpecParser.toJson(PartitionSpec.unpartitioned())), - 2, - TupleDomain.withColumnDomains(ImmutableMap.of(KEY_ICEBERG_COLUMN_HANDLE, Domain.singleValue(INTEGER, (long) KEY_COLUMN_VALUE))), - TupleDomain.all(), - OptionalLong.empty(), - ImmutableSet.of(KEY_ICEBERG_COLUMN_HANDLE), - Optional.empty(), - tablePath, - ImmutableMap.of(), - false, - Optional.empty(), - ImmutableSet.of(), - Optional.of(false)), - transaction); + String tableName = "sales_table"; + String yearColumnName = "year"; + ColumnIdentity yearColumnIdentity = new ColumnIdentity(1, yearColumnName, PRIMITIVE, ImmutableList.of()); + IcebergColumnHandle yearColumnHandle = new IcebergColumnHandle(yearColumnIdentity, INTEGER, ImmutableList.of(), INTEGER, Optional.empty()); + long yearColumnValue = 2023L; + String monthColumnName = "month"; + ColumnIdentity monthColumnIdentity = new ColumnIdentity(2, monthColumnName, PRIMITIVE, ImmutableList.of()); + IcebergColumnHandle monthColumnHandle = new IcebergColumnHandle(monthColumnIdentity, INTEGER, ImmutableList.of(), INTEGER, Optional.empty()); + long monthColumnValue = 1L; + String receiptColumnName = "receipt"; + ColumnIdentity receiptColumnIdentity = new ColumnIdentity(3, receiptColumnName, PRIMITIVE, ImmutableList.of()); + IcebergColumnHandle receiptColumnHandle = new IcebergColumnHandle(receiptColumnIdentity, VARCHAR, ImmutableList.of(), VARCHAR, Optional.empty()); + String receiptColumnValue = "#12345"; + String amountColumnName = "amount"; + ColumnIdentity amountColumnIdentity = new ColumnIdentity(4, amountColumnName, PRIMITIVE, ImmutableList.of()); + DecimalType amountColumnType = DecimalType.createDecimalType(10, 2); + IcebergColumnHandle amountColumnHandle = new IcebergColumnHandle(amountColumnIdentity, amountColumnType, ImmutableList.of(), amountColumnType, Optional.empty()); + BigDecimal amountColumnValue = new BigDecimal("1234567.65"); + Schema tableSchema = new Schema( + optional(yearColumnIdentity.getId(), yearColumnName, Types.IntegerType.get()), + optional(monthColumnIdentity.getId(), monthColumnName, Types.IntegerType.get()), + optional(receiptColumnIdentity.getId(), receiptColumnName, Types.StringType.get()), + optional(amountColumnIdentity.getId(), amountColumnName, Types.DecimalType.of(10, 2))); + PartitionSpec partitionSpec = PartitionSpec.builderFor(tableSchema) + .identity(yearColumnName) + .build(); + IcebergConfig icebergConfig = new IcebergConfig(); + HiveTransactionHandle transaction = new HiveTransactionHandle(false); + try (TempFile file = new TempFile()) { + Files.delete(file.path()); + + TrinoOutputFile outputFile = new LocalOutputFile(file.file()); + TrinoInputFile inputFile = new LocalInputFile(file.file()); + List columnNames = ImmutableList.of(yearColumnName, monthColumnName, receiptColumnName, amountColumnName); + List types = ImmutableList.of(INTEGER, INTEGER, VARCHAR, amountColumnType); + + try (OrcWriter writer = new OrcWriter( + OutputStreamOrcDataSink.create(outputFile), + columnNames, + types, + toOrcType(tableSchema), + NONE, + new OrcWriterOptions(), + ImmutableMap.of(), + true, + OrcWriteValidation.OrcWriteValidationMode.BOTH, + new OrcWriterStats())) { + BlockBuilder yearBuilder = INTEGER.createBlockBuilder(null, 1); + INTEGER.writeLong(yearBuilder, yearColumnValue); + BlockBuilder monthBuilder = INTEGER.createBlockBuilder(null, 1); + INTEGER.writeLong(monthBuilder, monthColumnValue); + BlockBuilder receiptBuilder = VARCHAR.createBlockBuilder(null, 1); + VARCHAR.writeString(receiptBuilder, receiptColumnValue); + BlockBuilder amountBuilder = amountColumnType.createBlockBuilder(null, 1); + writeShortDecimal(amountBuilder, amountColumnValue.unscaledValue().longValueExact()); + writer.write(new Page(yearBuilder.build(), monthBuilder.build(), receiptBuilder.build(), amountBuilder.build())); + } + + IcebergSplit split = new IcebergSplit( + inputFile.toString(), + 0, + inputFile.length(), + inputFile.length(), + -1, // invalid; normally known + ORC, + PartitionSpecParser.toJson(partitionSpec), + PartitionData.toJson(new PartitionData(new Object[] {yearColumnValue})), + ImmutableList.of(), + SplitWeight.standard()); + + String tablePath = inputFile.location().fileName(); + // Simulate the situation where `month` column is added at a later phase as partitioning column + // in addition to the `year` column, which leads to use it as unenforced predicate in the table handle + // after applying the filter + TableHandle tableHandle = new TableHandle( + TEST_CATALOG_HANDLE, + new IcebergTableHandle( + CatalogHandle.fromId("iceberg:NORMAL:v12345"), + "test_schema", + tableName, + TableType.DATA, + Optional.empty(), + SchemaParser.toJson(tableSchema), + Optional.of(PartitionSpecParser.toJson(partitionSpec)), + 2, + TupleDomain.withColumnDomains( + ImmutableMap.of( + yearColumnHandle, + Domain.create(ValueSet.ofRanges(Range.range(INTEGER, 2023L, true, 2024L, true)), true))), + TupleDomain.withColumnDomains( + ImmutableMap.of( + monthColumnHandle, + Domain.create(ValueSet.ofRanges(Range.range(INTEGER, 1L, true, 12L, true)), true))), + OptionalLong.empty(), + ImmutableSet.of(yearColumnHandle, monthColumnHandle, receiptColumnHandle, amountColumnHandle), + Optional.empty(), + tablePath, + ImmutableMap.of(), + false, + Optional.empty(), + ImmutableSet.of(), + Optional.of(false)), + transaction); + + // Simulate situations where the dynamic filter (e.g.: while performing a JOIN with another table) reduces considerably + // the amount of data to be processed from the current table + TupleDomain differentYearPredicate = TupleDomain.withColumnDomains( + ImmutableMap.of( + yearColumnHandle, + Domain.singleValue(INTEGER, 2024L))); + TupleDomain sameYearAndDifferentMonthPredicate = TupleDomain.withColumnDomains( + ImmutableMap.of( + yearColumnHandle, + Domain.singleValue(INTEGER, 2023L), + monthColumnHandle, + Domain.singleValue(INTEGER, 2L))); + for (TupleDomain partitionPredicate : List.of(differentYearPredicate, sameYearAndDifferentMonthPredicate)) { + try (ConnectorPageSource emptyPageSource = createTestingPageSource( + transaction, + icebergConfig, + split, + tableHandle, + ImmutableList.of(yearColumnHandle, monthColumnHandle, receiptColumnHandle, amountColumnHandle), + getDynamicFilter(partitionPredicate))) { + assertThat(emptyPageSource.getNextPage()).isNull(); + } + } + + TupleDomain sameYearPredicate = TupleDomain.withColumnDomains( + ImmutableMap.of( + yearColumnHandle, + Domain.singleValue(INTEGER, 2023L))); + TupleDomain sameYearAndMonthPredicate = TupleDomain.withColumnDomains( + ImmutableMap.of( + yearColumnHandle, + Domain.singleValue(INTEGER, 2023L), + monthColumnHandle, + Domain.singleValue(INTEGER, 1L))); + for (TupleDomain partitionPredicate : List.of(sameYearPredicate, sameYearAndMonthPredicate)) { + try (ConnectorPageSource nonEmptyPageSource = createTestingPageSource( + transaction, + icebergConfig, + split, + tableHandle, + ImmutableList.of(yearColumnHandle, monthColumnHandle, receiptColumnHandle, amountColumnHandle), + getDynamicFilter(partitionPredicate))) { + Page page = nonEmptyPageSource.getNextPage(); + assertThat(page).isNotNull(); + assertThat(page.getPositionCount()).isEqualTo(1); + assertThat(page.getBlock(0).getInt(0, 0)).isEqualTo(2023L); + assertThat(page.getBlock(1).getInt(0, 0)).isEqualTo(1L); + assertThat(page.getBlock(2).getSlice(0, 0, page.getBlock(2).getSliceLength(0)).toStringUtf8()).isEqualTo(receiptColumnValue); + assertThat(((SqlDecimal) amountColumnType.getObjectValue(null, page.getBlock(3), 0)).toBigDecimal()).isEqualTo(amountColumnValue); + } + } + } + } + + private static ConnectorPageSource createTestingPageSource( + HiveTransactionHandle transaction, + IcebergConfig icebergConfig, + IcebergSplit split, + TableHandle tableHandle, + List columns, + DynamicFilter dynamicFilter) + { FileFormatDataSourceStats stats = new FileFormatDataSourceStats(); IcebergPageSourceProvider provider = new IcebergPageSourceProvider( new HdfsFileSystemFactory(HDFS_ENVIRONMENT, HDFS_FILE_SYSTEM_STATS), @@ -204,26 +521,10 @@ private static ConnectorPageSource createTestingPageSource(HiveTransactionHandle getSession(icebergConfig), split, tableHandle.getConnectorHandle(), - ImmutableList.of(KEY_ICEBERG_COLUMN_HANDLE, DATA_ICEBERG_COLUMN_HANDLE), + columns, dynamicFilter); } - private static TupleDomain getTupleDomainForSplitPruning() - { - return TupleDomain.withColumnDomains( - ImmutableMap.of( - KEY_ICEBERG_COLUMN_HANDLE, - Domain.singleValue(INTEGER, 1L))); - } - - private static TupleDomain getNonSelectiveTupleDomain() - { - return TupleDomain.withColumnDomains( - ImmutableMap.of( - KEY_ICEBERG_COLUMN_HANDLE, - Domain.singleValue(INTEGER, (long) KEY_COLUMN_VALUE))); - } - private static TestingConnectorSession getSession(IcebergConfig icebergConfig) { return TestingConnectorSession.builder() diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergOrcMetricsCollection.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergOrcMetricsCollection.java index 0371c613249a4..03f440472ac52 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergOrcMetricsCollection.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergOrcMetricsCollection.java @@ -19,11 +19,11 @@ import io.trino.plugin.base.CatalogName; import io.trino.plugin.hive.TrinoViewHiveMetastore; import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.plugin.hive.metastore.cache.CachingHiveMetastore; import io.trino.plugin.iceberg.catalog.IcebergTableOperationsProvider; import io.trino.plugin.iceberg.catalog.TrinoCatalog; import io.trino.plugin.iceberg.catalog.file.FileMetastoreTableOperationsProvider; -import io.trino.plugin.iceberg.catalog.file.TestingIcebergFileMetastoreCatalogModule; import io.trino.plugin.iceberg.catalog.hms.TrinoHiveCatalog; import io.trino.plugin.tpch.TpchPlugin; import io.trino.spi.connector.SchemaTableName; @@ -43,13 +43,11 @@ import java.util.Map; import java.util.Optional; -import static com.google.inject.util.Modules.EMPTY_MODULE; import static io.trino.SystemSessionProperties.MAX_DRIVERS_PER_TASK; import static io.trino.SystemSessionProperties.TASK_CONCURRENCY; import static io.trino.SystemSessionProperties.TASK_MAX_WRITER_COUNT; import static io.trino.SystemSessionProperties.TASK_MIN_WRITER_COUNT; import static io.trino.plugin.hive.metastore.cache.CachingHiveMetastore.createPerTransactionCache; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; import static io.trino.plugin.iceberg.DataFileRecord.toDataFileRecord; import static io.trino.plugin.iceberg.IcebergQueryRunner.ICEBERG_CATALOG; import static io.trino.plugin.iceberg.IcebergTestUtils.getFileSystemFactory; @@ -80,13 +78,17 @@ protected QueryRunner createQueryRunner() .build(); File baseDir = queryRunner.getCoordinator().getBaseDataDir().resolve("iceberg_data").toFile(); - HiveMetastore metastore = createTestingFileHiveMetastore(baseDir); - queryRunner.installPlugin(new TestingIcebergPlugin(Optional.of(new TestingIcebergFileMetastoreCatalogModule(metastore)), Optional.empty(), EMPTY_MODULE)); + queryRunner.installPlugin(new TestingIcebergPlugin(baseDir.toPath())); queryRunner.createCatalog(ICEBERG_CATALOG, "iceberg", ImmutableMap.of("iceberg.file-format", "ORC")); TrinoFileSystemFactory fileSystemFactory = getFileSystemFactory(queryRunner); tableOperationsProvider = new FileMetastoreTableOperationsProvider(fileSystemFactory); + + HiveMetastore metastore = ((IcebergConnector) queryRunner.getCoordinator().getConnector(ICEBERG_CATALOG)).getInjector() + .getInstance(HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); + CachingHiveMetastore cachingHiveMetastore = createPerTransactionCache(metastore, 1000); trinoCatalog = new TrinoHiveCatalog( new CatalogName("catalog"), @@ -178,10 +180,8 @@ public void testMetrics() assertThat(datafile.getRecordCount()).isEqualTo(1); assertThat(datafile.getValueCounts().size()).isEqualTo(1); assertThat(datafile.getNullValueCounts().size()).isEqualTo(1); - datafile.getUpperBounds().forEach((k, v) -> { - assertThat(v.length()).isEqualTo(10); }); - datafile.getLowerBounds().forEach((k, v) -> { - assertThat(v.length()).isEqualTo(10); }); + datafile.getUpperBounds().forEach((k, v) -> assertThat(v.length()).isEqualTo(10)); + datafile.getLowerBounds().forEach((k, v) -> assertThat(v.length()).isEqualTo(10)); // keep both c1 and c2 metrics assertUpdate("create table c_metrics (c1 varchar, c2 varchar)"); diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergParquetConnectorTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergParquetConnectorTest.java index f2830b0408c12..8af23930e067d 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergParquetConnectorTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergParquetConnectorTest.java @@ -13,8 +13,14 @@ */ package io.trino.plugin.iceberg; +import io.trino.Session; +import io.trino.filesystem.Location; +import io.trino.operator.OperatorStats; +import io.trino.testing.DistributedQueryRunner; import io.trino.testing.MaterializedResult; +import io.trino.testing.MaterializedResultWithQueryId; import io.trino.testing.sql.TestTable; +import org.intellij.lang.annotations.Language; import org.junit.jupiter.api.Test; import java.util.Optional; @@ -24,6 +30,7 @@ import static io.trino.plugin.iceberg.IcebergFileFormat.PARQUET; import static io.trino.plugin.iceberg.IcebergTestUtils.checkParquetFileSorting; import static io.trino.plugin.iceberg.IcebergTestUtils.withSmallRowGroups; +import static io.trino.testing.QueryAssertions.assertEqualsIgnoreOrder; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -90,9 +97,72 @@ public void testDropAmbiguousRowFieldCaseSensitivity() .hasStackTraceContaining("Multiple entries with same key"); } + @Test + public void testIgnoreParquetStatistics() + { + try (TestTable table = new TestTable( + getQueryRunner()::execute, + "test_ignore_parquet_statistics", + "WITH (sorted_by = ARRAY['custkey']) AS TABLE tpch.tiny.customer WITH NO DATA")) { + assertUpdate( + withSmallRowGroups(getSession()), + "INSERT INTO " + table.getName() + " TABLE tpch.tiny.customer", + "VALUES 1500"); + + @Language("SQL") String query = "SELECT * FROM " + table.getName() + " WHERE custkey = 100"; + + DistributedQueryRunner queryRunner = getDistributedQueryRunner(); + MaterializedResultWithQueryId resultWithoutParquetStatistics = queryRunner.executeWithQueryId( + Session.builder(getSession()) + .setCatalogSessionProperty(getSession().getCatalog().orElseThrow(), "parquet_ignore_statistics", "true") + .build(), + query); + OperatorStats queryStatsWithoutParquetStatistics = getOperatorStats(resultWithoutParquetStatistics.getQueryId()); + assertThat(queryStatsWithoutParquetStatistics.getPhysicalInputPositions()).isGreaterThan(0); + + MaterializedResultWithQueryId resultWithParquetStatistics = queryRunner.executeWithQueryId(getSession(), query); + OperatorStats queryStatsWithParquetStatistics = getOperatorStats(resultWithParquetStatistics.getQueryId()); + assertThat(queryStatsWithParquetStatistics.getPhysicalInputPositions()).isGreaterThan(0); + assertThat(queryStatsWithParquetStatistics.getPhysicalInputPositions()) + .isLessThan(queryStatsWithoutParquetStatistics.getPhysicalInputPositions()); + + assertEqualsIgnoreOrder(resultWithParquetStatistics.getResult(), resultWithoutParquetStatistics.getResult()); + } + } + + @Test + public void testPushdownPredicateToParquetAfterColumnRename() + { + try (TestTable table = new TestTable( + getQueryRunner()::execute, + "test_pushdown_predicate_statistics", + "WITH (sorted_by = ARRAY['custkey']) AS TABLE tpch.tiny.customer WITH NO DATA")) { + assertUpdate( + withSmallRowGroups(getSession()), + "INSERT INTO " + table.getName() + " TABLE tpch.tiny.customer", + "VALUES 1500"); + + assertUpdate("ALTER TABLE " + table.getName() + " RENAME COLUMN custkey TO custkey1"); + + DistributedQueryRunner queryRunner = getDistributedQueryRunner(); + MaterializedResultWithQueryId resultWithoutPredicate = queryRunner.executeWithQueryId(getSession(), "TABLE " + table.getName()); + OperatorStats queryStatsWithoutPredicate = getOperatorStats(resultWithoutPredicate.getQueryId()); + assertThat(queryStatsWithoutPredicate.getPhysicalInputPositions()).isGreaterThan(0); + assertThat(resultWithoutPredicate.getResult()).hasSize(1500); + + @Language("SQL") String selectiveQuery = "SELECT * FROM " + table.getName() + " WHERE custkey1 = 100"; + MaterializedResultWithQueryId selectiveQueryResult = queryRunner.executeWithQueryId(getSession(), selectiveQuery); + OperatorStats queryStatsSelectiveQuery = getOperatorStats(selectiveQueryResult.getQueryId()); + assertThat(queryStatsSelectiveQuery.getPhysicalInputPositions()).isGreaterThan(0); + assertThat(queryStatsSelectiveQuery.getPhysicalInputPositions()) + .isLessThan(queryStatsWithoutPredicate.getPhysicalInputPositions()); + assertThat(selectiveQueryResult.getResult()).hasSize(1); + } + } + @Override protected boolean isFileSorted(String path, String sortColumnName) { - return checkParquetFileSorting(path, sortColumnName); + return checkParquetFileSorting(fileSystem.newInputFile(Location.of(path)), sortColumnName); } } diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergParquetWithBloomFilters.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergParquetWithBloomFilters.java index 54b3c936b1d14..fa8d634704c49 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergParquetWithBloomFilters.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergParquetWithBloomFilters.java @@ -39,10 +39,8 @@ protected QueryRunner createQueryRunner() dataDirectory = queryRunner.getCoordinator().getBaseDataDir().resolve("iceberg_data"); // create hive catalog - queryRunner.installPlugin(new TestingHivePlugin()); + queryRunner.installPlugin(new TestingHivePlugin(dataDirectory)); queryRunner.createCatalog("hive", "hive", ImmutableMap.builder() - .put("hive.metastore", "file") - .put("hive.metastore.catalog.dir", dataDirectory.toString()) .put("hive.security", "allow-all") .buildOrThrow()); diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergPlugin.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergPlugin.java index 775dad7252973..0f055ebed57d7 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergPlugin.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergPlugin.java @@ -25,7 +25,6 @@ import java.util.Map; import static com.google.common.collect.Iterables.getOnlyElement; -import static io.trino.plugin.hive.HiveConfig.HIVE_VIEWS_ENABLED; import static org.assertj.core.api.Assertions.assertThatThrownBy; public class TestIcebergPlugin @@ -214,7 +213,7 @@ public void testIcebergPluginFailsWhenIncorrectPropertyProvided() "test", Map.of( "iceberg.catalog.type", "HIVE_METASTORE", - HIVE_VIEWS_ENABLED, "true", + "hive.hive-views.enabled", "true", "hive.metastore.uri", "thrift://foo:1234", "bootstrap.quiet", "true"), new TestingConnectorContext()) diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergProjectionPushdownPlans.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergProjectionPushdownPlans.java index 6ab65da9f99fd..cd5de6b18f3a3 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergProjectionPushdownPlans.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergProjectionPushdownPlans.java @@ -22,7 +22,7 @@ import io.trino.metadata.TableHandle; import io.trino.plugin.hive.metastore.Database; import io.trino.plugin.hive.metastore.HiveMetastore; -import io.trino.plugin.iceberg.catalog.file.TestingIcebergFileMetastoreCatalogModule; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.spi.connector.ColumnHandle; import io.trino.spi.predicate.Domain; import io.trino.spi.predicate.TupleDomain; @@ -43,8 +43,6 @@ import static com.google.common.base.Predicates.equalTo; import static com.google.common.io.MoreFiles.deleteRecursively; import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; -import static com.google.inject.util.Modules.EMPTY_MODULE; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.sql.planner.assertions.PlanMatchPattern.any; import static io.trino.sql.planner.assertions.PlanMatchPattern.anyTree; @@ -80,7 +78,6 @@ protected LocalQueryRunner createLocalQueryRunner() catch (IOException e) { throw new UncheckedIOException(e); } - HiveMetastore metastore = createTestingFileHiveMetastore(metastoreDir); LocalQueryRunner queryRunner = LocalQueryRunner.create(session); InternalFunctionBundle.InternalFunctionBundleBuilder functions = InternalFunctionBundle.builder(); @@ -89,9 +86,13 @@ protected LocalQueryRunner createLocalQueryRunner() queryRunner.createCatalog( CATALOG, - new TestingIcebergConnectorFactory(Optional.of(new TestingIcebergFileMetastoreCatalogModule(metastore)), Optional.empty(), EMPTY_MODULE), + new TestingIcebergConnectorFactory(metastoreDir.toPath()), ImmutableMap.of()); + HiveMetastore metastore = ((IcebergConnector) queryRunner.getConnector(CATALOG)).getInjector() + .getInstance(HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); + Database database = Database.builder() .setDatabaseName(SCHEMA) .setOwnerName(Optional.of("public")) @@ -217,8 +218,14 @@ public void testDereferencePushdown() "expr_0", expression("expr_0"), "expr_0_y", expression("expr_0[2]")), join(INNER, builder -> builder - .equiCriteria("t_expr_1", "s_expr_1") + .equiCriteria("s_expr_1", "t_expr_1") .left( + anyTree( + tableScan( + equalTo(((IcebergTableHandle) tableHandle.get().getConnectorHandle()).withProjectedColumns(Set.of(column1Handle))), + TupleDomain.all(), + ImmutableMap.of("s_expr_1", equalTo(column1Handle))))) + .right( anyTree( filter( "x = BIGINT '2'", @@ -233,12 +240,6 @@ public void testDereferencePushdown() unenforcedConstraint.equals(expectedUnenforcedConstraint); }, TupleDomain.all(), - ImmutableMap.of("x", equalTo(columnX), "expr_0", equalTo(column0Handle), "t_expr_1", equalTo(column1Handle)))))) - .right( - anyTree( - tableScan( - equalTo(((IcebergTableHandle) tableHandle.get().getConnectorHandle()).withProjectedColumns(Set.of(column1Handle))), - TupleDomain.all(), - ImmutableMap.of("s_expr_1", equalTo(column1Handle))))))))); + ImmutableMap.of("x", equalTo(columnX), "expr_0", equalTo(column0Handle), "t_expr_1", equalTo(column1Handle)))))))))); } } diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergReadVersionedTable.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergReadVersionedTable.java index 92374ea8742aa..eb33197680b1f 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergReadVersionedTable.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergReadVersionedTable.java @@ -90,10 +90,10 @@ public void testSelectTableWithEndLongTimestampWithTimezone() public void testEndVersionInTableNameAndForClauseShouldFail() { assertQueryFails("SELECT * FROM \"test_iceberg_read_versioned_table@" + v1SnapshotId + "\" FOR VERSION AS OF " + v1SnapshotId, - "Invalid Iceberg table name: test_iceberg_read_versioned_table@%d".formatted(v1SnapshotId)); + "line 1:15: Table 'iceberg.tpch.\"test_iceberg_read_versioned_table@%d\"' does not exist".formatted(v1SnapshotId)); assertQueryFails("SELECT * FROM \"test_iceberg_read_versioned_table@" + v1SnapshotId + "\" FOR TIMESTAMP AS OF " + timestampLiteral(v1EpochMillis, 9), - "Invalid Iceberg table name: test_iceberg_read_versioned_table@%d".formatted(v1SnapshotId)); + "line 1:15: Table 'iceberg.tpch.\"test_iceberg_read_versioned_table@%d\"' does not exist".formatted(v1SnapshotId)); } @Test diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergRegisterTableProcedure.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergRegisterTableProcedure.java index 079f9af759d5d..a8550d28f94d8 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergRegisterTableProcedure.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergRegisterTableProcedure.java @@ -20,7 +20,10 @@ import io.trino.filesystem.Location; import io.trino.filesystem.TrinoFileSystem; import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.iceberg.catalog.file.TestingIcebergFileMetastoreCatalogModule; +import io.trino.plugin.tpch.TpchPlugin; import io.trino.testing.AbstractTestQueryFramework; +import io.trino.testing.DistributedQueryRunner; import io.trino.testing.MaterializedResult; import io.trino.testing.QueryRunner; import org.apache.hadoop.conf.Configuration; @@ -43,18 +46,23 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Optional; import java.util.regex.Matcher; import java.util.regex.Pattern; import static com.google.common.base.Verify.verify; import static com.google.common.io.MoreFiles.deleteRecursively; import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; +import static com.google.inject.util.Modules.EMPTY_MODULE; +import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_FACTORY; import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; +import static io.trino.plugin.iceberg.IcebergQueryRunner.ICEBERG_CATALOG; import static io.trino.plugin.iceberg.IcebergTestUtils.getFileSystemFactory; import static io.trino.plugin.iceberg.IcebergUtil.METADATA_FOLDER_NAME; import static io.trino.plugin.iceberg.procedure.RegisterTableProcedure.getLatestMetadataLocation; import static io.trino.testing.TestingConnectorSession.SESSION; import static io.trino.testing.TestingNames.randomNameSuffix; +import static io.trino.testing.TestingSession.testSessionBuilder; import static java.lang.String.format; import static java.util.Locale.ENGLISH; import static org.apache.iceberg.Files.localInput; @@ -73,11 +81,26 @@ protected QueryRunner createQueryRunner() { metastoreDir = Files.createTempDirectory("test_iceberg_register_table").toFile(); metastoreDir.deleteOnExit(); - metastore = createTestingFileHiveMetastore(metastoreDir); - return IcebergQueryRunner.builder() - .setMetastoreDirectory(metastoreDir) - .setIcebergProperties(ImmutableMap.of("iceberg.register-table-procedure.enabled", "true")) - .build(); + metastore = createTestingFileHiveMetastore(HDFS_FILE_SYSTEM_FACTORY, Location.of(metastoreDir.getAbsolutePath())); + + // TODO: convert to IcebergQueryRunner when there is a replacement for HadoopTables that works with TrinoFileSystem + DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(testSessionBuilder() + .setCatalog(ICEBERG_CATALOG) + .setSchema("tpch") + .build()).build(); + + queryRunner.installPlugin(new TpchPlugin()); + queryRunner.createCatalog("tpch", "tpch"); + + Path dataDir = queryRunner.getCoordinator().getBaseDataDir().resolve("iceberg_data"); + queryRunner.installPlugin(new TestingIcebergPlugin( + dataDir, + Optional.of(new TestingIcebergFileMetastoreCatalogModule(metastore)), + Optional.empty(), + EMPTY_MODULE)); + queryRunner.createCatalog(ICEBERG_CATALOG, "iceberg", ImmutableMap.of("iceberg.register-table-procedure.enabled", "true")); + queryRunner.execute("CREATE SCHEMA iceberg.tpch"); + return queryRunner; } @BeforeAll diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergSplitSource.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergSplitSource.java index 0f54199c85e79..98504649f63ce 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergSplitSource.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergSplitSource.java @@ -21,6 +21,7 @@ import io.trino.plugin.base.CatalogName; import io.trino.plugin.hive.TrinoViewHiveMetastore; import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.plugin.hive.metastore.cache.CachingHiveMetastore; import io.trino.plugin.hive.orc.OrcReaderConfig; import io.trino.plugin.hive.orc.OrcWriterConfig; @@ -69,7 +70,7 @@ import static com.google.common.io.MoreFiles.deleteRecursively; import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; import static io.trino.plugin.hive.metastore.cache.CachingHiveMetastore.createPerTransactionCache; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; +import static io.trino.plugin.iceberg.IcebergQueryRunner.ICEBERG_CATALOG; import static io.trino.plugin.iceberg.IcebergTestUtils.getFileSystemFactory; import static io.trino.spi.connector.Constraint.alwaysTrue; import static io.trino.spi.type.BigintType.BIGINT; @@ -102,13 +103,16 @@ protected QueryRunner createQueryRunner() { File tempDir = Files.createTempDirectory("test_iceberg_split_source").toFile(); this.metastoreDir = new File(tempDir, "iceberg_data"); - HiveMetastore metastore = createTestingFileHiveMetastore(metastoreDir); DistributedQueryRunner queryRunner = IcebergQueryRunner.builder() .setInitialTables(NATION) .setMetastoreDirectory(metastoreDir) .build(); + HiveMetastore metastore = ((IcebergConnector) queryRunner.getCoordinator().getConnector(ICEBERG_CATALOG)).getInjector() + .getInstance(HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); + this.fileSystemFactory = getFileSystemFactory(queryRunner); CachingHiveMetastore cachingHiveMetastore = createPerTransactionCache(metastore, 1000); this.catalog = new TrinoHiveCatalog( diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergStatistics.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergStatistics.java index dc86ff1beb924..8b2a0632de086 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergStatistics.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergStatistics.java @@ -597,7 +597,7 @@ public void testAnalyzeSnapshot() long snapshotId = getCurrentSnapshotId(tableName); assertUpdate("INSERT INTO " + tableName + " VALUES 22", 1); assertThatThrownBy(() -> query("ANALYZE \"%s@%d\"".formatted(tableName, snapshotId))) - .hasMessage(format("Invalid Iceberg table name: %s@%d", tableName, snapshotId)); + .hasMessage(format("line 1:1: Table 'iceberg.tpch.\"%s@%s\"' does not exist", tableName, snapshotId)); assertThat(query("SELECT * FROM " + tableName)) .matches("VALUES 11, 22"); @@ -695,7 +695,7 @@ public void testDropStatsSnapshot() long snapshotId = getCurrentSnapshotId(tableName); assertUpdate("INSERT INTO " + tableName + " VALUES 22", 1); assertThatThrownBy(() -> query("ALTER TABLE \"%s@%d\" EXECUTE DROP_EXTENDED_STATS".formatted(tableName, snapshotId))) - .hasMessage(format("Invalid Iceberg table name: %s@%d", tableName, snapshotId)); + .hasMessage(format("line 1:7: Table 'iceberg.tpch.\"%s@%s\"' does not exist", tableName, snapshotId)); assertThat(query("SELECT * FROM " + tableName)) .matches("VALUES 11, 22"); diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergTableName.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergTableName.java index 5758307909a38..2be5163468466 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergTableName.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergTableName.java @@ -15,11 +15,8 @@ import org.junit.jupiter.api.Test; -import java.util.Optional; - -import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; -import static io.trino.testing.assertions.TrinoExceptionAssert.assertTrinoExceptionThrownBy; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; public class TestIcebergTableName { @@ -30,16 +27,16 @@ public void testParse() assertParseNameAndType("abc$history", "abc", TableType.HISTORY); assertParseNameAndType("abc$snapshots", "abc", TableType.SNAPSHOTS); - assertNoValidTableType("abc$data"); - assertInvalid("abc@123", "Invalid Iceberg table name: abc@123"); - assertInvalid("abc@xyz", "Invalid Iceberg table name: abc@xyz"); - assertNoValidTableType("abc$what"); - assertInvalid("abc@123$data@456", "Invalid Iceberg table name: abc@123$data@456"); - assertInvalid("abc@123$snapshots", "Invalid Iceberg table name: abc@123$snapshots"); - assertInvalid("abc$snapshots@456", "Invalid Iceberg table name: abc$snapshots@456"); - assertInvalid("xyz$data@456", "Invalid Iceberg table name: xyz$data@456"); - assertInvalid("abc$partitions@456", "Invalid Iceberg table name: abc$partitions@456"); - assertInvalid("abc$manifests@456", "Invalid Iceberg table name: abc$manifests@456"); + assertInvalid("abc$data"); + assertInvalid("abc@123"); + assertInvalid("abc@xyz"); + assertInvalid("abc$what"); + assertInvalid("abc@123$data@456"); + assertInvalid("abc@123$snapshots"); + assertInvalid("abc$snapshots@456"); + assertInvalid("xyz$data@456"); + assertInvalid("abc$partitions@456"); + assertInvalid("abc$manifests@456"); } @Test @@ -47,28 +44,47 @@ public void testIsDataTable() { assertThat(IcebergTableName.isDataTable("abc")).isTrue(); - assertThat(IcebergTableName.isDataTable("abc$data")).isFalse(); // it's invalid + assertThatThrownBy(() -> IcebergTableName.isDataTable("abc$data")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid Iceberg table name: abc$data"); + assertThat(IcebergTableName.isDataTable("abc$history")).isFalse(); - assertThat(IcebergTableName.isDataTable("abc$invalid")).isFalse(); + + assertThatThrownBy(() -> IcebergTableName.isDataTable("abc$invalid")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid Iceberg table name: abc$invalid"); } @Test public void testTableNameFrom() { assertThat(IcebergTableName.tableNameFrom("abc")).isEqualTo("abc"); - assertThat(IcebergTableName.tableNameFrom("abc$data")).isEqualTo("abc"); + + assertThatThrownBy(() -> IcebergTableName.tableNameFrom("abc$data")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid Iceberg table name: abc$data"); + assertThat(IcebergTableName.tableNameFrom("abc$history")).isEqualTo("abc"); - assertThat(IcebergTableName.tableNameFrom("abc$invalid")).isEqualTo("abc"); + + assertThatThrownBy(() -> IcebergTableName.tableNameFrom("abc$invalid")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid Iceberg table name: abc$invalid"); } @Test public void testTableTypeFrom() { - assertThat(IcebergTableName.tableTypeFrom("abc")).isEqualTo(Optional.of(TableType.DATA)); - assertThat(IcebergTableName.tableTypeFrom("abc$data")).isEqualTo(Optional.empty()); // it's invalid - assertThat(IcebergTableName.tableTypeFrom("abc$history")).isEqualTo(Optional.of(TableType.HISTORY)); + assertThat(IcebergTableName.tableTypeFrom("abc")).isEqualTo(TableType.DATA); + + assertThatThrownBy(() -> IcebergTableName.tableTypeFrom("abc$data")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid Iceberg table name: abc$data"); + + assertThat(IcebergTableName.tableTypeFrom("abc$history")).isEqualTo(TableType.HISTORY); - assertThat(IcebergTableName.tableTypeFrom("abc$invalid")).isEqualTo(Optional.empty()); + assertThatThrownBy(() -> IcebergTableName.tableTypeFrom("abc$invalid")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid Iceberg table name: abc$invalid"); } @Test @@ -78,22 +94,19 @@ public void testTableNameWithType() assertThat(IcebergTableName.tableNameWithType("abc", TableType.HISTORY)).isEqualTo("abc$history"); } - private static void assertInvalid(String inputName, String message) + private static void assertInvalid(String inputName) { - assertTrinoExceptionThrownBy(() -> IcebergTableName.tableTypeFrom(inputName)) - .hasErrorCode(NOT_SUPPORTED) - .hasMessage(message); - } + assertThat(IcebergTableName.isIcebergTableName(inputName)).isFalse(); - private static void assertNoValidTableType(String inputName) - { - assertThat(IcebergTableName.tableTypeFrom(inputName)) - .isEmpty(); + assertThatThrownBy(() -> IcebergTableName.tableTypeFrom(inputName)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid Iceberg table name: " + inputName); } private static void assertParseNameAndType(String inputName, String tableName, TableType tableType) { + assertThat(IcebergTableName.isIcebergTableName(inputName)).isTrue(); assertThat(IcebergTableName.tableNameFrom(inputName)).isEqualTo(tableName); - assertThat(IcebergTableName.tableTypeFrom(inputName)).isEqualTo(Optional.of(tableType)); + assertThat(IcebergTableName.tableTypeFrom(inputName)).isEqualTo(tableType); } } diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergTableWithCustomLocation.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergTableWithCustomLocation.java index eb2cf82fe9e90..93786b701d8a8 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergTableWithCustomLocation.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergTableWithCustomLocation.java @@ -15,26 +15,22 @@ import io.trino.filesystem.Location; import io.trino.filesystem.TrinoFileSystem; +import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.plugin.hive.metastore.Table; -import io.trino.plugin.hive.metastore.file.FileHiveMetastore; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.DistributedQueryRunner; import io.trino.testing.MaterializedResult; -import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; -import java.io.File; import java.io.IOException; -import java.nio.file.Files; import java.util.Map; import java.util.Optional; -import static com.google.common.io.MoreFiles.deleteRecursively; -import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; import static io.trino.plugin.hive.TableType.EXTERNAL_TABLE; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; import static io.trino.plugin.iceberg.DataFileRecord.toDataFileRecord; +import static io.trino.plugin.iceberg.IcebergQueryRunner.ICEBERG_CATALOG; import static io.trino.plugin.iceberg.IcebergTestUtils.getFileSystemFactory; import static io.trino.testing.TestingConnectorSession.SESSION; import static java.lang.String.format; @@ -43,21 +39,22 @@ public class TestIcebergTableWithCustomLocation extends AbstractTestQueryFramework { - private FileHiveMetastore metastore; - private File metastoreDir; + private HiveMetastore metastore; private TrinoFileSystem fileSystem; @Override protected DistributedQueryRunner createQueryRunner() throws Exception { - metastoreDir = Files.createTempDirectory("test_iceberg").toFile(); - metastore = createTestingFileHiveMetastore(metastoreDir); - - return IcebergQueryRunner.builder() + DistributedQueryRunner queryRunner = IcebergQueryRunner.builder() .setIcebergProperties(Map.of("iceberg.unique-table-location", "true")) - .setMetastoreDirectory(metastoreDir) .build(); + + metastore = ((IcebergConnector) queryRunner.getCoordinator().getConnector(ICEBERG_CATALOG)).getInjector() + .getInstance(HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); + + return queryRunner; } @BeforeAll @@ -66,13 +63,6 @@ public void initFileSystem() fileSystem = getFileSystemFactory(getDistributedQueryRunner()).create(SESSION); } - @AfterAll - public void tearDown() - throws IOException - { - deleteRecursively(metastoreDir.toPath(), ALLOW_INSECURE); - } - @Test public void testTableHasUuidSuffixInLocation() { diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergTableWithExternalLocation.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergTableWithExternalLocation.java index a8d8a88cf42ba..1c297f3edac57 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergTableWithExternalLocation.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergTableWithExternalLocation.java @@ -15,25 +15,23 @@ import io.trino.filesystem.Location; import io.trino.filesystem.TrinoFileSystem; +import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.plugin.hive.metastore.Table; -import io.trino.plugin.hive.metastore.file.FileHiveMetastore; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.DistributedQueryRunner; import io.trino.testing.MaterializedResult; -import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; import java.io.File; import java.io.IOException; -import java.nio.file.Files; +import java.util.Optional; -import static com.google.common.io.MoreFiles.deleteRecursively; -import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; import static io.trino.plugin.hive.TableType.EXTERNAL_TABLE; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; import static io.trino.plugin.iceberg.DataFileRecord.toDataFileRecord; +import static io.trino.plugin.iceberg.IcebergQueryRunner.ICEBERG_CATALOG; import static io.trino.plugin.iceberg.IcebergTestUtils.getFileSystemFactory; import static io.trino.testing.TestingConnectorSession.SESSION; import static io.trino.testing.TestingNames.randomNameSuffix; @@ -45,20 +43,21 @@ public class TestIcebergTableWithExternalLocation extends AbstractTestQueryFramework { - private FileHiveMetastore metastore; - private File metastoreDir; + private HiveMetastore metastore; private TrinoFileSystem fileSystem; @Override protected DistributedQueryRunner createQueryRunner() throws Exception { - metastoreDir = Files.createTempDirectory("test_iceberg").toFile(); - metastore = createTestingFileHiveMetastore(metastoreDir); - - return IcebergQueryRunner.builder() - .setMetastoreDirectory(metastoreDir) + DistributedQueryRunner queryRunner = IcebergQueryRunner.builder() .build(); + + metastore = ((IcebergConnector) queryRunner.getCoordinator().getConnector(ICEBERG_CATALOG)).getInjector() + .getInstance(HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); + + return queryRunner; } @BeforeAll @@ -67,13 +66,6 @@ public void initFileSystem() fileSystem = getFileSystemFactory(getDistributedQueryRunner()).create(SESSION); } - @AfterAll - public void tearDown() - throws IOException - { - deleteRecursively(metastoreDir.toPath(), ALLOW_INSECURE); - } - @Test public void testCreateAndDrop() throws IOException diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergV2.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergV2.java index 89afa7897ee25..b75eef99f21dc 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergV2.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergV2.java @@ -22,6 +22,7 @@ import io.trino.plugin.blackhole.BlackHolePlugin; import io.trino.plugin.hive.TrinoViewHiveMetastore; import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.plugin.hive.metastore.cache.CachingHiveMetastore; import io.trino.plugin.iceberg.catalog.IcebergTableOperationsProvider; import io.trino.plugin.iceberg.catalog.TrinoCatalog; @@ -40,7 +41,6 @@ import io.trino.testing.DistributedQueryRunner; import io.trino.testing.QueryRunner; import io.trino.testing.sql.TestTable; -import org.apache.hadoop.fs.Path; import org.apache.iceberg.BaseTable; import org.apache.iceberg.DataFile; import org.apache.iceberg.DataFiles; @@ -62,16 +62,12 @@ import org.apache.iceberg.io.FileIO; import org.apache.iceberg.parquet.Parquet; import org.apache.iceberg.types.Types; -import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; import java.io.Closeable; -import java.io.File; -import java.io.IOException; import java.nio.ByteBuffer; -import java.nio.file.Files; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -86,10 +82,8 @@ import static com.google.common.base.Verify.verify; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.Iterables.getOnlyElement; -import static com.google.common.io.MoreFiles.deleteRecursively; -import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; import static io.trino.plugin.hive.metastore.cache.CachingHiveMetastore.createPerTransactionCache; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; +import static io.trino.plugin.iceberg.IcebergQueryRunner.ICEBERG_CATALOG; import static io.trino.plugin.iceberg.IcebergTestUtils.getFileSystemFactory; import static io.trino.plugin.iceberg.IcebergUtil.loadIcebergTable; import static io.trino.spi.type.IntegerType.INTEGER; @@ -112,23 +106,20 @@ public class TestIcebergV2 extends AbstractTestQueryFramework { private HiveMetastore metastore; - private java.nio.file.Path tempDir; - private File metastoreDir; private TrinoFileSystemFactory fileSystemFactory; @Override protected QueryRunner createQueryRunner() throws Exception { - tempDir = Files.createTempDirectory("test_iceberg_v2"); - metastoreDir = tempDir.resolve("iceberg_data").toFile(); - metastore = createTestingFileHiveMetastore(metastoreDir); - DistributedQueryRunner queryRunner = IcebergQueryRunner.builder() .setInitialTables(NATION) - .setMetastoreDirectory(metastoreDir) .build(); + metastore = ((IcebergConnector) queryRunner.getCoordinator().getConnector(ICEBERG_CATALOG)).getInjector() + .getInstance(HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); + try { queryRunner.installPlugin(new BlackHolePlugin()); queryRunner.createCatalog("blackhole", "blackhole"); @@ -147,13 +138,6 @@ public void initFileSystemFactory() fileSystemFactory = getFileSystemFactory(getDistributedQueryRunner()); } - @AfterAll - public void tearDown() - throws IOException - { - deleteRecursively(tempDir, ALLOW_INSECURE); - } - @Test public void testSettingFormatVersion() { @@ -195,12 +179,9 @@ public void testV2TableWithPositionDelete() String dataFilePath = (String) computeActual("SELECT file_path FROM \"" + tableName + "$files\" LIMIT 1").getOnlyValue(); - Path metadataDir = new Path(metastoreDir.toURI()); - String deleteFileName = "delete_file_" + UUID.randomUUID(); FileIO fileIo = new ForwardingFileIo(fileSystemFactory.create(SESSION)); - Path path = new Path(metadataDir, deleteFileName); - PositionDeleteWriter writer = Parquet.writeDeletes(fileIo.newOutputFile(path.toString())) + PositionDeleteWriter writer = Parquet.writeDeletes(fileIo.newOutputFile("local:///delete_file_" + UUID.randomUUID())) .createWriterFunc(GenericParquetWriter::buildWriter) .forTable(icebergTable) .overwrite() @@ -225,7 +206,7 @@ public void testV2TableWithEqualityDelete() String tableName = "test_v2_equality_delete" + randomNameSuffix(); assertUpdate("CREATE TABLE " + tableName + " AS SELECT * FROM tpch.tiny.nation", 25); Table icebergTable = loadTable(tableName); - writeEqualityDeleteToNationTable(icebergTable, Optional.of(icebergTable.spec()), Optional.of(new PartitionData(new Long[]{1L}))); + writeEqualityDeleteToNationTable(icebergTable, Optional.of(icebergTable.spec()), Optional.of(new PartitionData(new Long[] {1L}))); assertQuery("SELECT * FROM " + tableName, "SELECT * FROM nation WHERE regionkey != 1"); // nationkey is before the equality delete column in the table schema, comment is after assertQuery("SELECT nationkey, comment FROM " + tableName, "SELECT nationkey, comment FROM nation WHERE regionkey != 1"); @@ -254,7 +235,7 @@ public void testV2TableWithEqualityDeleteWhenColumnIsNested() "SELECT regionkey, ARRAY[1,2] array_column, MAP(ARRAY[1], ARRAY[2]) map_column, " + "CAST(ROW(1, 2e0) AS ROW(x BIGINT, y DOUBLE)) row_column FROM tpch.tiny.nation", 25); Table icebergTable = loadTable(tableName); - writeEqualityDeleteToNationTable(icebergTable, Optional.of(icebergTable.spec()), Optional.of(new PartitionData(new Long[]{1L}))); + writeEqualityDeleteToNationTable(icebergTable, Optional.of(icebergTable.spec()), Optional.of(new PartitionData(new Long[] {1L}))); assertQuery("SELECT array_column[1], map_column[1], row_column.x FROM " + tableName, "SELECT 1, 2, 1 FROM nation WHERE regionkey != 1"); } @@ -266,7 +247,7 @@ public void testOptimizingV2TableRemovesEqualityDeletesWhenWholeTableIsScanned() assertUpdate("CREATE TABLE " + tableName + " WITH (partitioning = ARRAY['regionkey']) AS SELECT * FROM tpch.tiny.nation", 25); Table icebergTable = loadTable(tableName); assertThat(icebergTable.currentSnapshot().summary()).containsEntry("total-equality-deletes", "0"); - writeEqualityDeleteToNationTable(icebergTable, Optional.of(icebergTable.spec()), Optional.of(new PartitionData(new Long[]{1L}))); + writeEqualityDeleteToNationTable(icebergTable, Optional.of(icebergTable.spec()), Optional.of(new PartitionData(new Long[] {1L}))); List initialActiveFiles = getActiveFiles(tableName); query("ALTER TABLE " + tableName + " EXECUTE OPTIMIZE"); assertQuery("SELECT * FROM " + tableName, "SELECT * FROM nation WHERE regionkey != 1"); @@ -286,7 +267,7 @@ public void testOptimizingV2TableDoesntRemoveEqualityDeletesWhenOnlyPartOfTheTab Table icebergTable = loadTable(tableName); assertThat(icebergTable.currentSnapshot().summary()).containsEntry("total-equality-deletes", "0"); List initialActiveFiles = getActiveFiles(tableName); - writeEqualityDeleteToNationTable(icebergTable, Optional.of(icebergTable.spec()), Optional.of(new PartitionData(new Long[]{1L}))); + writeEqualityDeleteToNationTable(icebergTable, Optional.of(icebergTable.spec()), Optional.of(new PartitionData(new Long[] {1L}))); query("ALTER TABLE " + tableName + " EXECUTE OPTIMIZE WHERE regionkey != 1"); assertQuery("SELECT * FROM " + tableName, "SELECT * FROM nation WHERE regionkey != 1"); // nationkey is before the equality delete column in the table schema, comment is after @@ -303,7 +284,7 @@ public void testSelectivelyOptimizingLeavesEqualityDeletes() String tableName = "test_selectively_optimizing_leaves_eq_deletes_" + randomNameSuffix(); assertUpdate("CREATE TABLE " + tableName + " WITH (partitioning = ARRAY['nationkey']) AS SELECT * FROM tpch.tiny.nation", 25); Table icebergTable = loadTable(tableName); - writeEqualityDeleteToNationTable(icebergTable, Optional.of(icebergTable.spec()), Optional.of(new PartitionData(new Long[]{1L}))); + writeEqualityDeleteToNationTable(icebergTable, Optional.of(icebergTable.spec()), Optional.of(new PartitionData(new Long[] {1L}))); query("ALTER TABLE " + tableName + " EXECUTE OPTIMIZE WHERE nationkey < 5"); assertQuery("SELECT * FROM " + tableName, "SELECT * FROM nation WHERE regionkey != 1 OR nationkey != 1"); assertThat(loadTable(tableName).currentSnapshot().summary()).containsEntry("total-equality-deletes", "1"); @@ -439,7 +420,7 @@ public void testOptimizingWholeTableRemovesEqualityDeletes() String tableName = "test_optimizing_whole_table_removes_eq_deletes_" + randomNameSuffix(); assertUpdate("CREATE TABLE " + tableName + " WITH (partitioning = ARRAY['nationkey']) AS SELECT * FROM tpch.tiny.nation", 25); Table icebergTable = loadTable(tableName); - writeEqualityDeleteToNationTable(icebergTable, Optional.of(icebergTable.spec()), Optional.of(new PartitionData(new Long[]{1L}))); + writeEqualityDeleteToNationTable(icebergTable, Optional.of(icebergTable.spec()), Optional.of(new PartitionData(new Long[] {1L}))); query("ALTER TABLE " + tableName + " EXECUTE OPTIMIZE"); assertQuery("SELECT * FROM " + tableName, "SELECT * FROM nation WHERE regionkey != 1 OR nationkey != 1"); assertThat(loadTable(tableName).currentSnapshot().summary()).containsEntry("total-equality-deletes", "0"); @@ -472,7 +453,7 @@ public void testOptimizingPartitionsOfV2TableWithGlobalEqualityDeleteFile() assertUpdate("CREATE TABLE " + tableName + " WITH (partitioning = ARRAY['regionkey']) AS SELECT * FROM tpch.tiny.nation", 25); Table icebergTable = loadTable(tableName); assertThat(icebergTable.currentSnapshot().summary()).containsEntry("total-equality-deletes", "0"); - writeEqualityDeleteToNationTable(icebergTable, Optional.of(icebergTable.spec()), Optional.of(new PartitionData(new Long[]{1L}))); + writeEqualityDeleteToNationTable(icebergTable, Optional.of(icebergTable.spec()), Optional.of(new PartitionData(new Long[] {1L}))); List initialActiveFiles = getActiveFiles(tableName); assertQuery("SELECT * FROM " + tableName, "SELECT * FROM nation WHERE regionkey != 1"); query("ALTER TABLE " + tableName + " EXECUTE OPTIMIZE WHERE regionkey != 1"); @@ -483,8 +464,8 @@ public void testOptimizingPartitionsOfV2TableWithGlobalEqualityDeleteFile() List updatedFiles = getActiveFiles(tableName); assertThat(updatedFiles) .doesNotContain(initialActiveFiles.stream() - .filter(path -> !path.contains("regionkey=1")) - .toArray(String[]::new)); + .filter(path -> !path.contains("regionkey=1")) + .toArray(String[]::new)); } @Test @@ -761,7 +742,7 @@ public void testFilesTable() throws Exception { String tableName = "test_files_table_" + randomNameSuffix(); - String tableLocation = metastoreDir.getPath() + "/" + tableName; + String tableLocation = "local:///" + tableName; assertUpdate("CREATE TABLE " + tableName + " WITH (location = '" + tableLocation + "', format_version = 2) AS SELECT * FROM tpch.tiny.nation", 25); BaseTable table = loadTable(tableName); Metrics metrics = new Metrics( @@ -974,7 +955,7 @@ private void writeEqualityDeleteToNationTableWithDeleteColumns( List equalityDeleteFieldIds = deleteColumns.stream() .map(name -> deleteRowSchema.findField(name).fieldId()) .collect(toImmutableList()); - writeEqualityDeleteToNationTableWithDeleteColumns(icebergTable, partitionSpec, partitionData, overwriteValues, deleteRowSchema, equalityDeleteFieldIds); + writeEqualityDeleteToNationTableWithDeleteColumns(icebergTable, partitionSpec, partitionData, overwriteValues, deleteRowSchema, equalityDeleteFieldIds); } private void writeEqualityDeleteToNationTableWithDeleteColumns( @@ -986,11 +967,9 @@ private void writeEqualityDeleteToNationTableWithDeleteColumns( List equalityDeleteFieldIds) throws Exception { - Path metadataDir = new Path(metastoreDir.toURI()); - String deleteFileName = "delete_file_" + UUID.randomUUID(); FileIO fileIo = new ForwardingFileIo(fileSystemFactory.create(SESSION)); - Parquet.DeleteWriteBuilder writerBuilder = Parquet.writeDeletes(fileIo.newOutputFile(new Path(metadataDir, deleteFileName).toString())) + Parquet.DeleteWriteBuilder writerBuilder = Parquet.writeDeletes(fileIo.newOutputFile("local:///delete_file_" + UUID.randomUUID())) .forTable(icebergTable) .rowSchema(deleteRowSchema) .createWriterFunc(GenericParquetWriter::buildWriter) diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestMetadataQueryOptimization.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestMetadataQueryOptimization.java index 6f1365a367904..cd10f096b7759 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestMetadataQueryOptimization.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestMetadataQueryOptimization.java @@ -19,7 +19,7 @@ import io.trino.metadata.InternalFunctionBundle; import io.trino.plugin.hive.metastore.Database; import io.trino.plugin.hive.metastore.HiveMetastore; -import io.trino.plugin.iceberg.catalog.file.TestingIcebergFileMetastoreCatalogModule; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.spi.security.PrincipalType; import io.trino.sql.planner.assertions.BasePushdownPlanTest; import io.trino.sql.tree.LongLiteral; @@ -35,9 +35,7 @@ import static com.google.common.io.MoreFiles.deleteRecursively; import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; -import static com.google.inject.util.Modules.EMPTY_MODULE; import static io.trino.SystemSessionProperties.TASK_MAX_WRITER_COUNT; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; import static io.trino.sql.planner.assertions.PlanMatchPattern.anyTree; import static io.trino.sql.planner.assertions.PlanMatchPattern.values; import static io.trino.testing.TestingSession.testSessionBuilder; @@ -66,7 +64,6 @@ protected LocalQueryRunner createLocalQueryRunner() catch (IOException e) { throw new UncheckedIOException(e); } - HiveMetastore metastore = createTestingFileHiveMetastore(baseDir); LocalQueryRunner queryRunner = LocalQueryRunner.create(session); InternalFunctionBundle.InternalFunctionBundleBuilder functions = InternalFunctionBundle.builder(); @@ -75,9 +72,13 @@ protected LocalQueryRunner createLocalQueryRunner() queryRunner.createCatalog( ICEBERG_CATALOG, - new TestingIcebergConnectorFactory(Optional.of(new TestingIcebergFileMetastoreCatalogModule(metastore)), Optional.empty(), EMPTY_MODULE), + new TestingIcebergConnectorFactory(baseDir.toPath()), ImmutableMap.of()); + HiveMetastore metastore = ((IcebergConnector) queryRunner.getConnector(ICEBERG_CATALOG)).getInjector() + .getInstance(HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); + Database database = Database.builder() .setDatabaseName(SCHEMA_NAME) .setOwnerName(Optional.of("public")) diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestSharedHiveMetastore.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestSharedHiveMetastore.java index 130edc81cd825..73c16750d31da 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestSharedHiveMetastore.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestSharedHiveMetastore.java @@ -27,7 +27,6 @@ import java.nio.file.Path; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; import static io.trino.plugin.iceberg.IcebergQueryRunner.ICEBERG_CATALOG; import static io.trino.plugin.tpch.TpchMetadata.TINY_SCHEMA_NAME; import static io.trino.testing.QueryAssertions.copyTpchTables; @@ -80,7 +79,7 @@ protected QueryRunner createQueryRunner() "hive.metastore.catalog.dir", dataDirectory.toString(), "iceberg.hive-catalog-name", "hive")); - queryRunner.installPlugin(new TestingHivePlugin(createTestingFileHiveMetastore(dataDirectory.toFile()))); + queryRunner.installPlugin(new TestingHivePlugin(dataDirectory)); queryRunner.createCatalog(HIVE_CATALOG, "hive", ImmutableMap.of("hive.allow-drop-table", "true")); queryRunner.createCatalog( "hive_with_redirections", @@ -105,12 +104,12 @@ public void cleanup() @Override protected String getExpectedHiveCreateSchema(String catalogName) { - String expectedHiveCreateSchema = "CREATE SCHEMA %s.%s\n" + - "WITH (\n" + - " location = 'file:%s/%s'\n" + - ")"; - - return format(expectedHiveCreateSchema, catalogName, schema, dataDirectory, schema); + return """ + CREATE SCHEMA %s.%s + WITH ( + location = 'local:///%s' + )""" + .formatted(catalogName, schema, schema); } @Override diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestingIcebergConnectorFactory.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestingIcebergConnectorFactory.java index fbc412b548c2e..3e61a6e831a92 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestingIcebergConnectorFactory.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestingIcebergConnectorFactory.java @@ -13,15 +13,22 @@ */ package io.trino.plugin.iceberg; +import com.google.common.collect.ImmutableMap; import com.google.inject.Module; import io.trino.filesystem.TrinoFileSystemFactory; +import io.trino.filesystem.local.LocalFileSystemFactory; +import io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig; import io.trino.spi.connector.Connector; import io.trino.spi.connector.ConnectorContext; import io.trino.spi.connector.ConnectorFactory; +import java.nio.file.Path; import java.util.Map; import java.util.Optional; +import static com.google.inject.multibindings.MapBinder.newMapBinder; +import static com.google.inject.util.Modules.EMPTY_MODULE; +import static io.airlift.configuration.ConfigBinder.configBinder; import static io.trino.plugin.iceberg.InternalIcebergConnectorFactory.createConnector; import static java.util.Objects.requireNonNull; @@ -32,11 +39,27 @@ public class TestingIcebergConnectorFactory private final Optional fileSystemFactory; private final Module module; - public TestingIcebergConnectorFactory(Optional icebergCatalogModule, Optional fileSystemFactory, Module module) + public TestingIcebergConnectorFactory(Path localFileSystemRootPath) { + this(localFileSystemRootPath, Optional.empty(), Optional.empty(), EMPTY_MODULE); + } + + @Deprecated + public TestingIcebergConnectorFactory( + Path localFileSystemRootPath, + Optional icebergCatalogModule, + Optional fileSystemFactory, + Module module) + { + localFileSystemRootPath.toFile().mkdirs(); this.icebergCatalogModule = requireNonNull(icebergCatalogModule, "icebergCatalogModule is null"); this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); - this.module = requireNonNull(module, "module is null"); + this.module = binder -> { + binder.install(module); + newMapBinder(binder, String.class, TrinoFileSystemFactory.class) + .addBinding("local").toInstance(new LocalFileSystemFactory(localFileSystemRootPath)); + configBinder(binder).bindConfigDefaults(FileHiveMetastoreConfig.class, config -> config.setCatalogDirectory("local:///")); + }; } @Override @@ -48,6 +71,12 @@ public String getName() @Override public Connector create(String catalogName, Map config, ConnectorContext context) { + if (!config.containsKey("iceberg.catalog.type")) { + config = ImmutableMap.builder() + .putAll(config) + .put("iceberg.catalog.type", "TESTING_FILE_METASTORE") + .buildOrThrow(); + } return createConnector(catalogName, config, context, module, icebergCatalogModule, fileSystemFactory); } } diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestingIcebergPlugin.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestingIcebergPlugin.java index 7ed7cb4c18c81..3ea6e645d32d3 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestingIcebergPlugin.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestingIcebergPlugin.java @@ -18,21 +18,31 @@ import io.trino.filesystem.TrinoFileSystemFactory; import io.trino.spi.connector.ConnectorFactory; +import java.nio.file.Path; import java.util.List; import java.util.Optional; import static com.google.common.base.Verify.verify; +import static com.google.inject.util.Modules.EMPTY_MODULE; import static java.util.Objects.requireNonNull; public class TestingIcebergPlugin extends IcebergPlugin { + private final Path localFileSystemRootPath; private final Optional icebergCatalogModule; private final Optional fileSystemFactory; private final Module module; - public TestingIcebergPlugin(Optional icebergCatalogModule, Optional fileSystemFactory, Module module) + public TestingIcebergPlugin(Path localFileSystemRootPath) { + this(localFileSystemRootPath, Optional.empty(), Optional.empty(), EMPTY_MODULE); + } + + @Deprecated + public TestingIcebergPlugin(Path localFileSystemRootPath, Optional icebergCatalogModule, Optional fileSystemFactory, Module module) + { + this.localFileSystemRootPath = requireNonNull(localFileSystemRootPath, "localFileSystemRootPath is null"); this.icebergCatalogModule = requireNonNull(icebergCatalogModule, "icebergCatalogModule is null"); this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); this.module = requireNonNull(module, "module is null"); @@ -44,6 +54,6 @@ public Iterable getConnectorFactories() List connectorFactories = ImmutableList.copyOf(super.getConnectorFactories()); verify(connectorFactories.size() == 1, "Unexpected connector factories: %s", connectorFactories); - return ImmutableList.of(new TestingIcebergConnectorFactory(icebergCatalogModule, fileSystemFactory, module)); + return ImmutableList.of(new TestingIcebergConnectorFactory(localFileSystemRootPath, icebergCatalogModule, fileSystemFactory, module)); } } diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/BaseTrinoCatalogTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/BaseTrinoCatalogTest.java index 36b15e3942180..8ed61c8af764a 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/BaseTrinoCatalogTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/BaseTrinoCatalogTest.java @@ -29,6 +29,7 @@ import io.trino.spi.security.PrincipalType; import io.trino.spi.security.TrinoPrincipal; import io.trino.spi.type.VarcharType; +import io.trino.util.AutoCloseableCloser; import org.apache.iceberg.NullOrder; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; @@ -39,12 +40,12 @@ import org.junit.jupiter.api.Test; import java.io.IOException; -import java.io.UncheckedIOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; import java.util.Map; import java.util.Optional; +import java.util.UUID; import static io.airlift.json.JsonCodec.jsonCodec; import static io.trino.plugin.hive.HiveErrorCode.HIVE_DATABASE_LOCATION_ERROR; @@ -73,7 +74,7 @@ public void testCreateNamespaceWithLocation() TrinoCatalog catalog = createTrinoCatalog(false); String namespace = "test_create_namespace_with_location_" + randomNameSuffix(); Map namespaceProperties = new HashMap<>(defaultNamespaceProperties(namespace)); - String namespaceLocation = (String) namespaceProperties.computeIfAbsent(LOCATION_PROPERTY, ignored -> "/a/path/"); + String namespaceLocation = (String) namespaceProperties.computeIfAbsent(LOCATION_PROPERTY, ignored -> "local:///a/path/"); namespaceProperties = ImmutableMap.copyOf(namespaceProperties); catalog.createNamespace(SESSION, namespace, namespaceProperties, new TrinoPrincipal(PrincipalType.USER, SESSION.getUser())); assertThat(catalog.listNamespaces(SESSION)).contains(namespace); @@ -300,16 +301,9 @@ public void testUseUniqueTableLocations() String table = "tableName"; SchemaTableName schemaTableName = new SchemaTableName(namespace, table); Map namespaceProperties = new HashMap<>(defaultNamespaceProperties(namespace)); - String namespaceLocation = (String) namespaceProperties.computeIfAbsent(LOCATION_PROPERTY, ignored -> { - try { - Path tmpDirectory = Files.createTempDirectory("iceberg_catalog_test_rename_table_"); - tmpDirectory.toFile().deleteOnExit(); - return tmpDirectory.toString(); - } - catch (IOException e) { - throw new UncheckedIOException(e); - } - }); + String namespaceLocation = (String) namespaceProperties.computeIfAbsent( + LOCATION_PROPERTY, + ignored -> "local:///iceberg_catalog_test_rename_table_" + UUID.randomUUID()); catalog.createNamespace(SESSION, namespace, namespaceProperties, new TrinoPrincipal(PrincipalType.USER, SESSION.getUser())); try { @@ -392,6 +386,54 @@ public void testView() } } + @Test + public void testListTables() + throws Exception + { + TrinoCatalog catalog = createTrinoCatalog(false); + TrinoPrincipal principal = new TrinoPrincipal(PrincipalType.USER, SESSION.getUser()); + + try (AutoCloseableCloser closer = AutoCloseableCloser.create()) { + String ns1 = "ns1" + randomNameSuffix(); + String ns2 = "ns2" + randomNameSuffix(); + catalog.createNamespace(SESSION, ns1, defaultNamespaceProperties(ns1), principal); + closer.register(() -> catalog.dropNamespace(SESSION, ns1)); + catalog.createNamespace(SESSION, ns2, defaultNamespaceProperties(ns2), principal); + closer.register(() -> catalog.dropNamespace(SESSION, ns2)); + + SchemaTableName table1 = new SchemaTableName(ns1, "t1"); + SchemaTableName table2 = new SchemaTableName(ns2, "t2"); + catalog.newCreateTableTransaction( + SESSION, + table1, + new Schema(Types.NestedField.of(1, true, "col1", Types.LongType.get())), + PartitionSpec.unpartitioned(), + SortOrder.unsorted(), + arbitraryTableLocation(catalog, SESSION, table1), + ImmutableMap.of()) + .commitTransaction(); + closer.register(() -> catalog.dropTable(SESSION, table1)); + + catalog.newCreateTableTransaction( + SESSION, + table2, + new Schema(Types.NestedField.of(1, true, "col1", Types.LongType.get())), + PartitionSpec.unpartitioned(), + SortOrder.unsorted(), + arbitraryTableLocation(catalog, SESSION, table2), + ImmutableMap.of()) + .commitTransaction(); + closer.register(() -> catalog.dropTable(SESSION, table2)); + + // No namespace provided, all tables across all namespaces should be returned + assertThat(catalog.listTables(SESSION, Optional.empty())).containsAll(ImmutableList.of(table1, table2)); + // Namespace is provided and exists + assertThat(catalog.listTables(SESSION, Optional.of(ns1))).isEqualTo(ImmutableList.of(table1)); + // Namespace is provided and does not exist + assertThat(catalog.listTables(SESSION, Optional.of("non_existing"))).isEmpty(); + } + } + private String arbitraryTableLocation(TrinoCatalog catalog, ConnectorSession session, SchemaTableName schemaTableName) throws Exception { diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/file/TestIcebergFileMetastoreCreateTableFailure.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/file/TestIcebergFileMetastoreCreateTableFailure.java deleted file mode 100644 index 373b44231e546..0000000000000 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/file/TestIcebergFileMetastoreCreateTableFailure.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.iceberg.catalog.file; - -import io.trino.Session; -import io.trino.plugin.hive.NodeVersion; -import io.trino.plugin.hive.metastore.HiveMetastore; -import io.trino.plugin.hive.metastore.HiveMetastoreConfig; -import io.trino.plugin.hive.metastore.PrincipalPrivileges; -import io.trino.plugin.hive.metastore.Table; -import io.trino.plugin.hive.metastore.file.FileHiveMetastore; -import io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig; -import io.trino.plugin.iceberg.TestingIcebergPlugin; -import io.trino.spi.connector.SchemaNotFoundException; -import io.trino.testing.AbstractTestQueryFramework; -import io.trino.testing.DistributedQueryRunner; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; -import org.junit.jupiter.api.parallel.Execution; - -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.Optional; -import java.util.concurrent.atomic.AtomicReference; - -import static com.google.common.io.MoreFiles.deleteRecursively; -import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; -import static com.google.inject.util.Modules.EMPTY_MODULE; -import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_FACTORY; -import static io.trino.testing.TestingNames.randomNameSuffix; -import static io.trino.testing.TestingSession.testSessionBuilder; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; -import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; - -@TestInstance(PER_CLASS) -@Execution(SAME_THREAD) // testException is shared mutable state -public class TestIcebergFileMetastoreCreateTableFailure - extends AbstractTestQueryFramework -{ - private static final String ICEBERG_CATALOG = "iceberg"; - private static final String SCHEMA_NAME = "test_schema"; - - private static final String METADATA_GLOB = "glob:**.metadata.json"; - - private Path dataDirectory; - private HiveMetastore metastore; - private final AtomicReference testException = new AtomicReference<>(); - - @Override - protected DistributedQueryRunner createQueryRunner() - throws Exception - { - this.dataDirectory = Files.createTempDirectory("test_iceberg_create_table_failure"); - // Using FileHiveMetastore as approximation of HMS - this.metastore = new FileHiveMetastore( - new NodeVersion("testversion"), - HDFS_FILE_SYSTEM_FACTORY, - new HiveMetastoreConfig().isHideDeltaLakeTables(), - new FileHiveMetastoreConfig() - .setCatalogDirectory(dataDirectory.toString())) - { - @Override - public synchronized void createTable(Table table, PrincipalPrivileges principalPrivileges) - { - throw testException.get(); - } - }; - - Session session = testSessionBuilder() - .setCatalog(ICEBERG_CATALOG) - .setSchema(SCHEMA_NAME) - .build(); - - DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(session).build(); - queryRunner.installPlugin(new TestingIcebergPlugin(Optional.of(new TestingIcebergFileMetastoreCatalogModule(metastore)), Optional.empty(), EMPTY_MODULE)); - queryRunner.createCatalog(ICEBERG_CATALOG, "iceberg"); - queryRunner.execute("CREATE SCHEMA " + SCHEMA_NAME); - - return queryRunner; - } - - @AfterAll - public void cleanup() - throws Exception - { - if (metastore != null) { - metastore.dropDatabase(SCHEMA_NAME, true); - } - if (dataDirectory != null) { - deleteRecursively(dataDirectory, ALLOW_INSECURE); - } - } - - @Test - public void testCreateTableFailureMetadataCleanedUp() - { - String exceptionMessage = "Test-simulated metastore schema not found exception"; - testException.set(new SchemaNotFoundException("simulated_test_schema", exceptionMessage)); - testCreateTableFailure(exceptionMessage, false); - } - - @Test - public void testCreateTableFailureMetadataNotCleanedUp() - { - String exceptionMessage = "Test-simulated metastore runtime exception"; - testException.set(new RuntimeException(exceptionMessage)); - testCreateTableFailure(exceptionMessage, true); - } - - protected void testCreateTableFailure(String expectedExceptionMessage, boolean shouldMetadataFileExist) - { - String tableName = "test_create_failure_" + randomNameSuffix(); - String tableLocation = Path.of(dataDirectory.toString(), tableName).toString(); - assertThatThrownBy(() -> getQueryRunner().execute("CREATE TABLE " + tableName + " (a varchar) WITH (location = '" + tableLocation + "')")) - .hasMessageContaining(expectedExceptionMessage); - - Path metadataDirectory = Path.of(tableLocation, "metadata"); - if (shouldMetadataFileExist) { - assertThat(metadataDirectory).as("Metadata file should exist").isDirectoryContaining(METADATA_GLOB); - } - else { - // file cleanup is more conservative since https://github.com/apache/iceberg/pull/8599 - assertThat(metadataDirectory).as("Metadata file should not exist").isDirectoryNotContaining(METADATA_GLOB); - } - } -} diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/file/TestIcebergFileMetastoreTableOperationsInsertFailure.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/file/TestIcebergFileMetastoreTableOperationsInsertFailure.java deleted file mode 100644 index 8bd537a466e9c..0000000000000 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/file/TestIcebergFileMetastoreTableOperationsInsertFailure.java +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.iceberg.catalog.file; - -import com.google.common.collect.ImmutableMap; -import io.trino.Session; -import io.trino.metadata.InternalFunctionBundle; -import io.trino.plugin.hive.NodeVersion; -import io.trino.plugin.hive.metastore.Database; -import io.trino.plugin.hive.metastore.HiveMetastore; -import io.trino.plugin.hive.metastore.HiveMetastoreConfig; -import io.trino.plugin.hive.metastore.PrincipalPrivileges; -import io.trino.plugin.hive.metastore.Table; -import io.trino.plugin.hive.metastore.file.FileHiveMetastore; -import io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig; -import io.trino.plugin.iceberg.IcebergPlugin; -import io.trino.plugin.iceberg.TestingIcebergConnectorFactory; -import io.trino.spi.security.PrincipalType; -import io.trino.testing.AbstractTestQueryFramework; -import io.trino.testing.LocalQueryRunner; -import org.apache.iceberg.exceptions.CommitStateUnknownException; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; - -import java.io.File; -import java.nio.file.Files; -import java.util.Optional; - -import static com.google.common.io.MoreFiles.deleteRecursively; -import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; -import static com.google.inject.util.Modules.EMPTY_MODULE; -import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_FACTORY; -import static io.trino.testing.TestingSession.testSessionBuilder; -import static java.lang.String.format; -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; - -@TestInstance(PER_CLASS) -public class TestIcebergFileMetastoreTableOperationsInsertFailure - extends AbstractTestQueryFramework -{ - private static final String ICEBERG_CATALOG = "iceberg"; - private static final String SCHEMA_NAME = "test_schema"; - private File baseDir; - - @Override - protected LocalQueryRunner createQueryRunner() - throws Exception - { - Session session = testSessionBuilder() - .setCatalog(ICEBERG_CATALOG) - .setSchema(SCHEMA_NAME) - .build(); - - baseDir = Files.createTempDirectory(null).toFile(); - - HiveMetastore metastore = new FileHiveMetastore( - new NodeVersion("testversion"), - HDFS_FILE_SYSTEM_FACTORY, - new HiveMetastoreConfig().isHideDeltaLakeTables(), - new FileHiveMetastoreConfig() - .setCatalogDirectory(baseDir.toURI().toString()) - .setMetastoreUser("test")) - { - @Override - public synchronized void replaceTable(String databaseName, String tableName, Table newTable, PrincipalPrivileges principalPrivileges) - { - super.replaceTable(databaseName, tableName, newTable, principalPrivileges); - throw new RuntimeException("Test-simulated metastore timeout exception"); - } - }; - LocalQueryRunner queryRunner = LocalQueryRunner.create(session); - - InternalFunctionBundle.InternalFunctionBundleBuilder functions = InternalFunctionBundle.builder(); - new IcebergPlugin().getFunctions().forEach(functions::functions); - queryRunner.addFunctions(functions.build()); - - queryRunner.createCatalog( - ICEBERG_CATALOG, - new TestingIcebergConnectorFactory(Optional.of(new TestingIcebergFileMetastoreCatalogModule(metastore)), Optional.empty(), EMPTY_MODULE), - ImmutableMap.of()); - - Database database = Database.builder() - .setDatabaseName(SCHEMA_NAME) - .setOwnerName(Optional.of("public")) - .setOwnerType(Optional.of(PrincipalType.ROLE)) - .build(); - metastore.createDatabase(database); - - return queryRunner; - } - - @AfterAll - public void cleanup() - throws Exception - { - if (baseDir != null) { - deleteRecursively(baseDir.toPath(), ALLOW_INSECURE); - } - } - - @Test - public void testInsertFailureDoesNotCorruptTheTableMetadata() - { - String tableName = "test_insert_failure"; - - getQueryRunner().execute(format("CREATE TABLE %s (a_varchar) AS VALUES ('Trino')", tableName)); - assertThatThrownBy(() -> getQueryRunner().execute("INSERT INTO " + tableName + " VALUES 'rocks'")) - .isInstanceOf(CommitStateUnknownException.class) - .hasMessageContaining("Test-simulated metastore timeout exception"); - assertQuery("SELECT * FROM " + tableName, "VALUES 'Trino', 'rocks'"); - } -} diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/file/TestTrinoHiveCatalogWithFileMetastore.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/file/TestTrinoHiveCatalogWithFileMetastore.java index 2798d5ce6d0fb..474fd270a7bca 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/file/TestTrinoHiveCatalogWithFileMetastore.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/file/TestTrinoHiveCatalogWithFileMetastore.java @@ -13,7 +13,9 @@ */ package io.trino.plugin.iceberg.catalog.file; +import io.trino.filesystem.Location; import io.trino.filesystem.TrinoFileSystemFactory; +import io.trino.filesystem.local.LocalFileSystemFactory; import io.trino.plugin.base.CatalogName; import io.trino.plugin.hive.TrinoViewHiveMetastore; import io.trino.plugin.hive.metastore.HiveMetastore; @@ -35,7 +37,6 @@ import static com.google.common.io.MoreFiles.deleteRecursively; import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; -import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_FACTORY; import static io.trino.plugin.hive.metastore.cache.CachingHiveMetastore.createPerTransactionCache; import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; @@ -46,8 +47,9 @@ public class TestTrinoHiveCatalogWithFileMetastore extends BaseTrinoCatalogTest { - private HiveMetastore metastore; private Path tempDir; + private TrinoFileSystemFactory fileSystemFactory; + private HiveMetastore metastore; @BeforeAll public void setUp() @@ -55,7 +57,9 @@ public void setUp() { tempDir = Files.createTempDirectory("test_trino_hive_catalog"); File metastoreDir = tempDir.resolve("iceberg_data").toFile(); - metastore = createTestingFileHiveMetastore(metastoreDir); + metastoreDir.mkdirs(); + fileSystemFactory = new LocalFileSystemFactory(metastoreDir.toPath()); + metastore = createTestingFileHiveMetastore(fileSystemFactory, Location.of("local:///")); } @AfterAll @@ -68,7 +72,6 @@ public void tearDown() @Override protected TrinoCatalog createTrinoCatalog(boolean useUniqueTableLocations) { - TrinoFileSystemFactory fileSystemFactory = HDFS_FILE_SYSTEM_FACTORY; CachingHiveMetastore cachingHiveMetastore = createPerTransactionCache(metastore, 1000); return new TrinoHiveCatalog( new CatalogName("catalog"), diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogAccessOperations.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogAccessOperations.java index 317441595a764..ce44f319d69f0 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogAccessOperations.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogAccessOperations.java @@ -107,6 +107,7 @@ protected QueryRunner createQueryRunner() trackingFileSystemFactory = new TrackingFileSystemFactory(new HdfsFileSystemFactory(HDFS_ENVIRONMENT, HDFS_FILE_SYSTEM_STATS)); queryRunner.installPlugin(new TestingIcebergPlugin( + tmp.toPath(), Optional.empty(), Optional.of(trackingFileSystemFactory), EMPTY_MODULE)); diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCreateTableFailure.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCreateTableFailure.java deleted file mode 100644 index 6ee8c7a30fd13..0000000000000 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCreateTableFailure.java +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.iceberg.catalog.glue; - -import com.amazonaws.services.glue.AWSGlueAsync; -import com.amazonaws.services.glue.model.InvalidInputException; -import com.amazonaws.services.glue.model.OperationTimeoutException; -import com.google.common.collect.ImmutableMap; -import io.airlift.log.Logger; -import io.trino.Session; -import io.trino.filesystem.FileEntry; -import io.trino.filesystem.FileIterator; -import io.trino.filesystem.Location; -import io.trino.filesystem.TrinoFileSystem; -import io.trino.metadata.InternalFunctionBundle; -import io.trino.plugin.hive.metastore.Database; -import io.trino.plugin.hive.metastore.glue.GlueHiveMetastore; -import io.trino.plugin.iceberg.IcebergPlugin; -import io.trino.plugin.iceberg.TestingIcebergConnectorFactory; -import io.trino.spi.security.PrincipalType; -import io.trino.testing.AbstractTestQueryFramework; -import io.trino.testing.LocalQueryRunner; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; - -import java.lang.reflect.InvocationTargetException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.Optional; -import java.util.concurrent.atomic.AtomicReference; - -import static com.google.common.io.MoreFiles.deleteRecursively; -import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; -import static com.google.common.reflect.Reflection.newProxy; -import static com.google.inject.util.Modules.EMPTY_MODULE; -import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_FACTORY; -import static io.trino.plugin.hive.metastore.glue.TestingGlueHiveMetastore.createTestingGlueHiveMetastore; -import static io.trino.testing.TestingConnectorSession.SESSION; -import static io.trino.testing.TestingNames.randomNameSuffix; -import static io.trino.testing.TestingSession.testSessionBuilder; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; - -/* - * The test currently uses AWS Default Credential Provider Chain, - * See https://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html#credentials-default - * on ways to set your AWS credentials which will be needed to run this test. - */ -@TestInstance(PER_CLASS) -public class TestIcebergGlueCreateTableFailure - extends AbstractTestQueryFramework -{ - private static final Logger LOG = Logger.get(TestIcebergGlueCreateTableFailure.class); - - private static final String ICEBERG_CATALOG = "iceberg"; - - private final String schemaName = "test_iceberg_glue_" + randomNameSuffix(); - - private Path dataDirectory; - private TrinoFileSystem fileSystem; - private GlueHiveMetastore glueHiveMetastore; - private final AtomicReference testException = new AtomicReference<>(); - - @Override - protected LocalQueryRunner createQueryRunner() - throws Exception - { - Session session = testSessionBuilder() - .setCatalog(ICEBERG_CATALOG) - .setSchema(schemaName) - .build(); - LocalQueryRunner queryRunner = LocalQueryRunner.create(session); - - AWSGlueAsyncAdapterProvider awsGlueAsyncAdapterProvider = delegate -> newProxy(AWSGlueAsync.class, (proxy, method, methodArgs) -> { - Object result; - if (method.getName().equals("createTable")) { - throw testException.get(); - } - try { - result = method.invoke(delegate, methodArgs); - } - catch (InvocationTargetException e) { - throw e.getCause(); - } - return result; - }); - - InternalFunctionBundle.InternalFunctionBundleBuilder functions = InternalFunctionBundle.builder(); - new IcebergPlugin().getFunctions().forEach(functions::functions); - queryRunner.addFunctions(functions.build()); - - queryRunner.createCatalog( - ICEBERG_CATALOG, - new TestingIcebergConnectorFactory(Optional.of(new TestingIcebergGlueCatalogModule(awsGlueAsyncAdapterProvider)), Optional.empty(), EMPTY_MODULE), - ImmutableMap.of()); - - dataDirectory = Files.createTempDirectory("test_iceberg_create_table_failure"); - dataDirectory.toFile().deleteOnExit(); - - glueHiveMetastore = createTestingGlueHiveMetastore(dataDirectory); - fileSystem = HDFS_FILE_SYSTEM_FACTORY.create(SESSION); - - Database database = Database.builder() - .setDatabaseName(schemaName) - .setOwnerName(Optional.of("public")) - .setOwnerType(Optional.of(PrincipalType.ROLE)) - .setLocation(Optional.of(dataDirectory.toString())) - .build(); - glueHiveMetastore.createDatabase(database); - - return queryRunner; - } - - @AfterAll - public void cleanup() - { - try { - if (glueHiveMetastore != null) { - glueHiveMetastore.dropDatabase(schemaName, false); - } - if (dataDirectory != null) { - deleteRecursively(dataDirectory, ALLOW_INSECURE); - } - } - catch (Exception e) { - LOG.error(e, "Failed to clean up Glue database: %s", schemaName); - } - } - - @Test - public void testCreateTableFailureMetadataCleanedUp() - throws Exception - { - final String exceptionMessage = "Test-simulated metastore invalid input exception"; - testException.set(new InvalidInputException(exceptionMessage)); - testCreateTableFailure(exceptionMessage, false); - } - - @Test - public void testCreateTableFailureMetadataNotCleanedUp() - throws Exception - { - final String exceptionMessage = "Test-simulated metastore operation timeout exception"; - testException.set(new OperationTimeoutException(exceptionMessage)); - testCreateTableFailure(exceptionMessage, true); - } - - private void testCreateTableFailure(String expectedExceptionMessage, boolean shouldMetadataFileExist) - throws Exception - { - String tableName = "test_create_failure_" + randomNameSuffix(); - assertThatThrownBy(() -> getQueryRunner().execute("CREATE TABLE " + tableName + " (a_varchar) AS VALUES ('Trino')")) - .hasMessageContaining(expectedExceptionMessage); - - assertMetadataLocation(tableName, shouldMetadataFileExist); - } - - protected void assertMetadataLocation(String tableName, boolean shouldMetadataFileExist) - throws Exception - { - FileIterator fileIterator = fileSystem.listFiles(Location.of(dataDirectory.toString())); - String tableLocationPrefix = Path.of(dataDirectory.toString(), tableName).toString(); - boolean metadataFileFound = false; - while (fileIterator.hasNext()) { - FileEntry fileEntry = fileIterator.next(); - String location = fileEntry.location().toString(); - if (location.startsWith(tableLocationPrefix) && location.endsWith(".metadata.json")) { - metadataFileFound = true; - break; - } - } - if (shouldMetadataFileExist) { - assertThat(metadataFileFound).as("Metadata file should exist").isTrue(); - } - else { - assertThat(metadataFileFound).as("Metadata file should not exist").isFalse(); - } - } -} diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueTableOperationsInsertFailure.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueTableOperationsInsertFailure.java index cc358d58da676..10a3447da4354 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueTableOperationsInsertFailure.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueTableOperationsInsertFailure.java @@ -89,14 +89,14 @@ protected LocalQueryRunner createQueryRunner() new IcebergPlugin().getFunctions().forEach(functions::functions); queryRunner.addFunctions(functions.build()); + Path dataDirectory = Files.createTempDirectory("iceberg_data"); + dataDirectory.toFile().deleteOnExit(); + queryRunner.createCatalog( ICEBERG_CATALOG, - new TestingIcebergConnectorFactory(Optional.of(new TestingIcebergGlueCatalogModule(awsGlueAsyncAdapterProvider)), Optional.empty(), EMPTY_MODULE), + new TestingIcebergConnectorFactory(dataDirectory, Optional.of(new TestingIcebergGlueCatalogModule(awsGlueAsyncAdapterProvider)), Optional.empty(), EMPTY_MODULE), ImmutableMap.of()); - Path dataDirectory = Files.createTempDirectory("iceberg_data"); - dataDirectory.toFile().deleteOnExit(); - glueHiveMetastore = createTestingGlueHiveMetastore(dataDirectory); Database database = Database.builder() diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergS3AndGlueMetastoreTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergS3AndGlueMetastoreTest.java index 326b38011ba3b..bc63f29b491ce 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergS3AndGlueMetastoreTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergS3AndGlueMetastoreTest.java @@ -19,7 +19,7 @@ import io.trino.plugin.iceberg.IcebergQueryRunner; import io.trino.testing.DistributedQueryRunner; import io.trino.testing.QueryRunner; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import java.nio.file.Path; import java.util.Set; @@ -104,8 +104,16 @@ protected Set getAllDataFilesFromTableDirectory(String tableLocation) .collect(Collectors.toUnmodifiableSet()); } - @Test(dataProvider = "locationPatternsDataProvider") - public void testAnalyzeWithProvidedTableLocation(boolean partitioned, LocationPattern locationPattern) + @Test + public void testAnalyzeWithProvidedTableLocation() + { + for (LocationPattern locationPattern : LocationPattern.values()) { + testAnalyzeWithProvidedTableLocation(false, locationPattern); + testAnalyzeWithProvidedTableLocation(true, locationPattern); + } + } + + private void testAnalyzeWithProvidedTableLocation(boolean partitioned, LocationPattern locationPattern) { String tableName = "test_analyze_" + randomNameSuffix(); String location = locationPattern.locationForTable(bucketName, schemaName, tableName); diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestSharedGlueMetastore.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestSharedGlueMetastore.java index 8b18b345f2e34..a82525f3b53e6 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestSharedGlueMetastore.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestSharedGlueMetastore.java @@ -94,7 +94,7 @@ protected QueryRunner createQueryRunner() "iceberg.hive-catalog-name", "hive")); this.glueMetastore = createTestingGlueHiveMetastore(dataDirectory); - queryRunner.installPlugin(new TestingHivePlugin(glueMetastore)); + queryRunner.installPlugin(new TestingHivePlugin(queryRunner.getCoordinator().getBaseDataDir().resolve("hive_data"), glueMetastore)); queryRunner.createCatalog(HIVE_CATALOG, "hive"); queryRunner.createCatalog( "hive_with_redirections", diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/hms/TestIcebergHiveMetastoreTableOperationsReleaseLockFailure.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/hms/TestIcebergHiveMetastoreTableOperationsReleaseLockFailure.java deleted file mode 100644 index 0ee57e0c3f1ff..0000000000000 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/hms/TestIcebergHiveMetastoreTableOperationsReleaseLockFailure.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.iceberg.catalog.hms; - -import com.google.common.collect.ImmutableMap; -import io.trino.Session; -import io.trino.hive.thrift.metastore.Table; -import io.trino.metadata.InternalFunctionBundle; -import io.trino.plugin.hive.metastore.AcidTransactionOwner; -import io.trino.plugin.hive.metastore.Database; -import io.trino.plugin.hive.metastore.HiveMetastore; -import io.trino.plugin.hive.metastore.thrift.BridgingHiveMetastore; -import io.trino.plugin.hive.metastore.thrift.InMemoryThriftMetastore; -import io.trino.plugin.hive.metastore.thrift.ThriftMetastore; -import io.trino.plugin.hive.metastore.thrift.ThriftMetastoreConfig; -import io.trino.plugin.hive.metastore.thrift.ThriftMetastoreFactory; -import io.trino.plugin.iceberg.IcebergPlugin; -import io.trino.plugin.iceberg.TestingIcebergConnectorFactory; -import io.trino.spi.security.ConnectorIdentity; -import io.trino.spi.security.PrincipalType; -import io.trino.testing.AbstractTestQueryFramework; -import io.trino.testing.LocalQueryRunner; -import org.junit.jupiter.api.Test; - -import java.io.File; -import java.nio.file.Files; -import java.util.Optional; - -import static com.google.inject.util.Modules.EMPTY_MODULE; -import static io.trino.testing.TestingSession.testSessionBuilder; -import static java.lang.String.format; - -public class TestIcebergHiveMetastoreTableOperationsReleaseLockFailure - extends AbstractTestQueryFramework -{ - private static final String ICEBERG_CATALOG = "iceberg"; - private static final String SCHEMA_NAME = "test_schema"; - private File baseDir; - - @Override - protected LocalQueryRunner createQueryRunner() - throws Exception - { - Session session = testSessionBuilder() - .setCatalog(ICEBERG_CATALOG) - .setSchema(SCHEMA_NAME) - .build(); - - baseDir = Files.createTempDirectory(null).toFile(); - baseDir.deleteOnExit(); - - LocalQueryRunner queryRunner = LocalQueryRunner.create(session); - - InternalFunctionBundle.InternalFunctionBundleBuilder functions = InternalFunctionBundle.builder(); - new IcebergPlugin().getFunctions().forEach(functions::functions); - queryRunner.addFunctions(functions.build()); - - ThriftMetastore thriftMetastore = createMetastoreWithReleaseLockFailure(); - HiveMetastore hiveMetastore = new BridgingHiveMetastore(thriftMetastore); - TestingIcebergHiveMetastoreCatalogModule testModule = new TestingIcebergHiveMetastoreCatalogModule(hiveMetastore, buildThriftMetastoreFactory(thriftMetastore)); - - queryRunner.createCatalog( - ICEBERG_CATALOG, - new TestingIcebergConnectorFactory(Optional.of(testModule), Optional.empty(), EMPTY_MODULE), - ImmutableMap.of()); - - Database database = Database.builder() - .setDatabaseName(SCHEMA_NAME) - .setOwnerName(Optional.of("public")) - .setOwnerType(Optional.of(PrincipalType.ROLE)) - .build(); - hiveMetastore.createDatabase(database); - - return queryRunner; - } - - @Test - public void testReleaseLockFailureDoesNotCorruptTheTable() - { - String tableName = "test_release_lock_failure"; - query(format("CREATE TABLE %s (a_varchar) AS VALUES ('Trino')", tableName)); - query(format("INSERT INTO %s VALUES 'rocks'", tableName)); - assertQuery("SELECT * FROM " + tableName, "VALUES 'Trino', 'rocks'"); - } - - private InMemoryThriftMetastore createMetastoreWithReleaseLockFailure() - { - return new InMemoryThriftMetastore(new File(baseDir + "/metastore"), new ThriftMetastoreConfig()) { - @Override - public long acquireTableExclusiveLock(AcidTransactionOwner transactionOwner, String queryId, String dbName, String tableName) - { - // returning dummy lock - return 100; - } - - @Override - public void releaseTableLock(long lockId) - { - throw new RuntimeException("Release table lock has failed!"); - } - - @Override - public synchronized void createTable(Table table) - { - // InMemoryThriftMetastore throws an exception if the table has any privileges set - table.setPrivileges(null); - super.createTable(table); - } - }; - } - - private static ThriftMetastoreFactory buildThriftMetastoreFactory(ThriftMetastore thriftMetastore) - { - return new ThriftMetastoreFactory() - { - @Override - public boolean isImpersonationEnabled() - { - return false; - } - - @Override - public ThriftMetastore createMetastore(Optional identity) - { - return thriftMetastore; - } - }; - } -} diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/containers/KeycloakContainer.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/containers/KeycloakContainer.java index a43074b990629..4ba4fa4ea869c 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/containers/KeycloakContainer.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/containers/KeycloakContainer.java @@ -30,7 +30,7 @@ public class KeycloakContainer extends BaseTestContainer { - public static final String DEFAULT_IMAGE = "quay.io/keycloak/keycloak:21.1.2"; + public static final String DEFAULT_IMAGE = "quay.io/keycloak/keycloak:23.0.3"; public static final String DEFAULT_HOST_NAME = "keycloak"; public static final String DEFAULT_USER_NAME = "admin"; diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/optimizer/TestConnectorPushdownRulesWithIceberg.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/optimizer/TestConnectorPushdownRulesWithIceberg.java index 900c649508e63..d2f22f1591503 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/optimizer/TestConnectorPushdownRulesWithIceberg.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/optimizer/TestConnectorPushdownRulesWithIceberg.java @@ -23,12 +23,13 @@ import io.trino.plugin.hive.HiveTransactionHandle; import io.trino.plugin.hive.metastore.Database; import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.plugin.iceberg.ColumnIdentity; import io.trino.plugin.iceberg.IcebergColumnHandle; +import io.trino.plugin.iceberg.IcebergConnector; import io.trino.plugin.iceberg.IcebergPlugin; import io.trino.plugin.iceberg.IcebergTableHandle; import io.trino.plugin.iceberg.TestingIcebergConnectorFactory; -import io.trino.plugin.iceberg.catalog.file.TestingIcebergFileMetastoreCatalogModule; import io.trino.spi.connector.CatalogHandle; import io.trino.spi.predicate.Domain; import io.trino.spi.predicate.TupleDomain; @@ -61,8 +62,6 @@ import static com.google.common.io.MoreFiles.deleteRecursively; import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; -import static com.google.inject.util.Modules.EMPTY_MODULE; -import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; import static io.trino.plugin.iceberg.ColumnIdentity.TypeCategory.STRUCT; import static io.trino.plugin.iceberg.ColumnIdentity.primitiveColumnIdentity; import static io.trino.plugin.iceberg.TableType.DATA; @@ -106,16 +105,6 @@ protected Optional createLocalQueryRunner() catch (IOException e) { throw new UncheckedIOException(e); } - metastore = createTestingFileHiveMetastore(baseDir); - Database database = Database.builder() - .setDatabaseName(SCHEMA_NAME) - .setOwnerName(Optional.of("public")) - .setOwnerType(Optional.of(PrincipalType.ROLE)) - .build(); - - metastore.createDatabase(database); - - HiveMetastore metastore = createTestingFileHiveMetastore(baseDir); LocalQueryRunner queryRunner = LocalQueryRunner.create(ICEBERG_SESSION); InternalFunctionBundle.InternalFunctionBundleBuilder functions = InternalFunctionBundle.builder(); @@ -124,9 +113,21 @@ protected Optional createLocalQueryRunner() queryRunner.createCatalog( TEST_CATALOG_NAME, - new TestingIcebergConnectorFactory(Optional.of(new TestingIcebergFileMetastoreCatalogModule(metastore)), Optional.empty(), EMPTY_MODULE), + new TestingIcebergConnectorFactory(baseDir.toPath()), ImmutableMap.of()); catalogHandle = queryRunner.getCatalogHandle(TEST_CATALOG_NAME); + + metastore = ((IcebergConnector) queryRunner.getConnector(TEST_CATALOG_NAME)).getInjector() + .getInstance(HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); + Database database = Database.builder() + .setDatabaseName(SCHEMA_NAME) + .setOwnerName(Optional.of("public")) + .setOwnerType(Optional.of(PrincipalType.ROLE)) + .build(); + + metastore.createDatabase(database); + return Optional.of(queryRunner); } diff --git a/plugin/trino-ignite/pom.xml b/plugin/trino-ignite/pom.xml index 211ae7925dc08..a9a5ae86d8954 100644 --- a/plugin/trino-ignite/pom.xml +++ b/plugin/trino-ignite/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-ignite/src/main/java/io/trino/plugin/ignite/IgniteClient.java b/plugin/trino-ignite/src/main/java/io/trino/plugin/ignite/IgniteClient.java index 9309cd5f259cc..9fa2f946b2ade 100644 --- a/plugin/trino-ignite/src/main/java/io/trino/plugin/ignite/IgniteClient.java +++ b/plugin/trino-ignite/src/main/java/io/trino/plugin/ignite/IgniteClient.java @@ -45,6 +45,7 @@ import io.trino.plugin.jdbc.aggregation.ImplementCountDistinct; import io.trino.plugin.jdbc.aggregation.ImplementMinMax; import io.trino.plugin.jdbc.aggregation.ImplementSum; +import io.trino.plugin.jdbc.expression.ComparisonOperator; import io.trino.plugin.jdbc.expression.JdbcConnectorExpressionRewriterBuilder; import io.trino.plugin.jdbc.expression.ParameterizedExpression; import io.trino.plugin.jdbc.expression.RewriteComparison; @@ -163,8 +164,15 @@ public IgniteClient( JdbcTypeHandle bigintTypeHandle = new JdbcTypeHandle(Types.BIGINT, Optional.of("bigint"), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty()); this.connectorExpressionRewriter = JdbcConnectorExpressionRewriterBuilder.newBuilder() - .add(new RewriteComparison(ImmutableSet.of(RewriteComparison.ComparisonOperator.EQUAL, RewriteComparison.ComparisonOperator.NOT_EQUAL))) + .add(new RewriteComparison(ImmutableSet.of(ComparisonOperator.EQUAL, ComparisonOperator.NOT_EQUAL))) .addStandardRules(this::quoted) + .map("$equal(left, right)").to("left = right") + .map("$not_equal(left, right)").to("left <> right") + .map("$is_distinct_from(left, right)").to("left IS DISTINCT FROM right") + .map("$less_than(left, right)").to("left < right") + .map("$less_than_or_equal(left, right)").to("left <= right") + .map("$greater_than(left, right)").to("left > right") + .map("$greater_than_or_equal(left, right)").to("left >= right") .map("$like(value: varchar, pattern: varchar): boolean").to("value LIKE pattern") .map("$not($is_null(value))").to("value IS NOT NULL") .map("$not(value: boolean)").to("NOT value") @@ -570,6 +578,25 @@ public String buildInsertSql(JdbcOutputTableHandle handle, List c @Override public Optional implementJoin( + ConnectorSession session, + JoinType joinType, + PreparedQuery leftSource, + Map leftProjections, + PreparedQuery rightSource, + Map rightProjections, + List joinConditions, + JoinStatistics statistics) + { + // Ignite does not support FULL JOIN + if (joinType == JoinType.FULL_OUTER) { + return Optional.empty(); + } + + return super.implementJoin(session, joinType, leftSource, leftProjections, rightSource, rightProjections, joinConditions, statistics); + } + + @Override + public Optional legacyImplementJoin( ConnectorSession session, JoinType joinType, PreparedQuery leftSource, @@ -584,7 +611,7 @@ public Optional implementJoin( return Optional.empty(); } - return super.implementJoin(session, joinType, leftSource, rightSource, joinConditions, rightAssignments, leftAssignments, statistics); + return super.legacyImplementJoin(session, joinType, leftSource, rightSource, joinConditions, rightAssignments, leftAssignments, statistics); } @Override diff --git a/plugin/trino-jmx/pom.xml b/plugin/trino-jmx/pom.xml index ad2464afa78a2..dd6973b4ac180 100644 --- a/plugin/trino-jmx/pom.xml +++ b/plugin/trino-jmx/pom.xml @@ -4,7 +4,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-kafka/pom.xml b/plugin/trino-kafka/pom.xml index 879684c3ced73..3fe24be379bdc 100644 --- a/plugin/trino-kafka/pom.xml +++ b/plugin/trino-kafka/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-kafka/src/test/java/io/trino/plugin/kafka/protobuf/TestProtobufEncoder.java b/plugin/trino-kafka/src/test/java/io/trino/plugin/kafka/protobuf/TestProtobufEncoder.java index 41dc2fae1c8ec..da9b8bae14ca9 100644 --- a/plugin/trino-kafka/src/test/java/io/trino/plugin/kafka/protobuf/TestProtobufEncoder.java +++ b/plugin/trino-kafka/src/test/java/io/trino/plugin/kafka/protobuf/TestProtobufEncoder.java @@ -396,7 +396,7 @@ private void testNestedStructuralDataTypes(String stringData, Integer integerDat BlockBuilder mapBlockBuilder = mapType.createBlockBuilder(null, 1); Block mapBlock = mapType.createBlockFromKeyValue( Optional.empty(), - new int[]{0, 1}, + new int[] {0, 1}, nativeValueToBlock(VARCHAR, utf8Slice("Key")), rowBlockBuilder.build()); mapType.appendTo( @@ -409,12 +409,12 @@ private void testNestedStructuralDataTypes(String stringData, Integer integerDat Block arrayBlock = fromElementBlock( 1, Optional.empty(), - new int[]{0, rowBlockBuilder.getPositionCount()}, + new int[] {0, rowBlockBuilder.getPositionCount()}, rowBlockBuilder.build()); listType.appendTo(arrayBlock, 0, listBlockBuilder); BlockBuilder nestedBlockBuilder = nestedRowType.createBlockBuilder(null, 1); - Block rowBlock = fromFieldBlocks(1, new Block[]{listBlockBuilder.build(), mapBlockBuilder.build(), rowBlockBuilder.build()}); + Block rowBlock = fromFieldBlocks(1, new Block[] {listBlockBuilder.build(), mapBlockBuilder.build(), rowBlockBuilder.build()}); nestedRowType.appendTo(rowBlock, 0, nestedBlockBuilder); rowEncoder.appendColumnValue(nestedBlockBuilder.build(), 0); diff --git a/plugin/trino-kinesis/pom.xml b/plugin/trino-kinesis/pom.xml index 5988c20a55e20..733d4706230a2 100644 --- a/plugin/trino-kinesis/pom.xml +++ b/plugin/trino-kinesis/pom.xml @@ -4,7 +4,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-kinesis/src/test/java/io/trino/plugin/kinesis/TestMinimalFunctionality.java b/plugin/trino-kinesis/src/test/java/io/trino/plugin/kinesis/TestMinimalFunctionality.java index 18a3297bf2c73..cfa8372decd0f 100644 --- a/plugin/trino-kinesis/src/test/java/io/trino/plugin/kinesis/TestMinimalFunctionality.java +++ b/plugin/trino-kinesis/src/test/java/io/trino/plugin/kinesis/TestMinimalFunctionality.java @@ -45,7 +45,7 @@ import java.util.stream.Stream; import static io.trino.spi.type.TimeZoneKey.UTC_KEY; -import static io.trino.transaction.TransactionBuilder.transaction; +import static io.trino.testing.TransactionBuilder.transaction; import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Locale.ENGLISH; import static org.assertj.core.api.Assertions.assertThat; diff --git a/plugin/trino-kinesis/src/test/java/io/trino/plugin/kinesis/TestRecordAccess.java b/plugin/trino-kinesis/src/test/java/io/trino/plugin/kinesis/TestRecordAccess.java index 4ee7baffd22e7..0030cd70d5af1 100644 --- a/plugin/trino-kinesis/src/test/java/io/trino/plugin/kinesis/TestRecordAccess.java +++ b/plugin/trino-kinesis/src/test/java/io/trino/plugin/kinesis/TestRecordAccess.java @@ -43,7 +43,7 @@ import java.util.zip.GZIPOutputStream; import static io.trino.testing.TestingSession.testSessionBuilder; -import static io.trino.transaction.TransactionBuilder.transaction; +import static io.trino.testing.TransactionBuilder.transaction; import static java.lang.String.format; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; diff --git a/plugin/trino-kudu/pom.xml b/plugin/trino-kudu/pom.xml index 0e944903a4f08..61874946a4fc7 100644 --- a/plugin/trino-kudu/pom.xml +++ b/plugin/trino-kudu/pom.xml @@ -4,7 +4,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -228,12 +228,6 @@ toxiproxy test - - - org.testng - testng - test - diff --git a/plugin/trino-kudu/src/main/java/io/trino/plugin/kudu/TypeHelper.java b/plugin/trino-kudu/src/main/java/io/trino/plugin/kudu/TypeHelper.java index 5fdd31d30ab66..88763e9525e8d 100644 --- a/plugin/trino-kudu/src/main/java/io/trino/plugin/kudu/TypeHelper.java +++ b/plugin/trino-kudu/src/main/java/io/trino/plugin/kudu/TypeHelper.java @@ -50,23 +50,20 @@ private TypeHelper() {} public static org.apache.kudu.Type toKuduClientType(Type type) { - if (type instanceof VarcharType) { - return org.apache.kudu.Type.STRING; + if (type == BooleanType.BOOLEAN) { + return org.apache.kudu.Type.BOOL; } - if (type.equals(TIMESTAMP_MILLIS)) { - return org.apache.kudu.Type.UNIXTIME_MICROS; + if (type == TinyintType.TINYINT) { + return org.apache.kudu.Type.INT8; } - if (type == BigintType.BIGINT) { - return org.apache.kudu.Type.INT64; + if (type == SmallintType.SMALLINT) { + return org.apache.kudu.Type.INT16; } if (type == IntegerType.INTEGER) { return org.apache.kudu.Type.INT32; } - if (type == SmallintType.SMALLINT) { - return org.apache.kudu.Type.INT16; - } - if (type == TinyintType.TINYINT) { - return org.apache.kudu.Type.INT8; + if (type == BigintType.BIGINT) { + return org.apache.kudu.Type.INT64; } if (type == RealType.REAL) { return org.apache.kudu.Type.FLOAT; @@ -74,20 +71,23 @@ public static org.apache.kudu.Type toKuduClientType(Type type) if (type == DoubleType.DOUBLE) { return org.apache.kudu.Type.DOUBLE; } - if (type == BooleanType.BOOLEAN) { - return org.apache.kudu.Type.BOOL; + if (type instanceof DecimalType) { + return org.apache.kudu.Type.DECIMAL; + } + if (type instanceof CharType) { + return org.apache.kudu.Type.STRING; + } + if (type instanceof VarcharType) { + return org.apache.kudu.Type.STRING; } if (type instanceof VarbinaryType) { return org.apache.kudu.Type.BINARY; } - if (type instanceof DecimalType) { - return org.apache.kudu.Type.DECIMAL; - } if (type == DateType.DATE) { return org.apache.kudu.Type.STRING; } - if (type instanceof CharType) { - return org.apache.kudu.Type.STRING; + if (type.equals(TIMESTAMP_MILLIS)) { + return org.apache.kudu.Type.UNIXTIME_MICROS; } throw new TrinoException(NOT_SUPPORTED, "Unsupported type: " + type); } @@ -100,29 +100,29 @@ public static Type fromKuduColumn(ColumnSchema column) private static Type fromKuduClientType(org.apache.kudu.Type ktype, ColumnTypeAttributes attributes) { switch (ktype) { - case STRING: - return VarcharType.VARCHAR; - case UNIXTIME_MICROS: - return TIMESTAMP_MILLIS; - case INT64: - return BigintType.BIGINT; - case INT32: - return IntegerType.INTEGER; - case INT16: - return SmallintType.SMALLINT; + case BOOL: + return BooleanType.BOOLEAN; case INT8: return TinyintType.TINYINT; + case INT16: + return SmallintType.SMALLINT; + case INT32: + return IntegerType.INTEGER; + case INT64: + return BigintType.BIGINT; case FLOAT: return RealType.REAL; case DOUBLE: return DoubleType.DOUBLE; - case BOOL: - return BooleanType.BOOLEAN; - case BINARY: - return VarbinaryType.VARBINARY; case DECIMAL: return DecimalType.createDecimalType(attributes.getPrecision(), attributes.getScale()); // TODO: add support for varchar and date types: https://github.com/trinodb/trino/issues/11009 + case STRING: + return VarcharType.VARCHAR; + case BINARY: + return VarbinaryType.VARBINARY; + case UNIXTIME_MICROS: + return TIMESTAMP_MILLIS; case VARCHAR: case DATE: break; @@ -132,104 +132,68 @@ private static Type fromKuduClientType(org.apache.kudu.Type ktype, ColumnTypeAtt public static Object getJavaValue(Type type, Object nativeValue) { - if (type instanceof VarcharType) { - return ((Slice) nativeValue).toStringUtf8(); - } - if (type.equals(TIMESTAMP_MILLIS)) { - // Kudu's native format is in microseconds - return nativeValue; - } - if (type == BigintType.BIGINT) { + if (type == BooleanType.BOOLEAN) { return nativeValue; } - if (type == IntegerType.INTEGER) { - return ((Long) nativeValue).intValue(); + if (type == TinyintType.TINYINT) { + return ((Long) nativeValue).byteValue(); } if (type == SmallintType.SMALLINT) { return ((Long) nativeValue).shortValue(); } - if (type == TinyintType.TINYINT) { - return ((Long) nativeValue).byteValue(); + if (type == IntegerType.INTEGER) { + return ((Long) nativeValue).intValue(); } - if (type == DoubleType.DOUBLE) { + if (type == BigintType.BIGINT) { return nativeValue; } if (type == RealType.REAL) { // conversion can result in precision lost return intBitsToFloat(((Long) nativeValue).intValue()); } - if (type == BooleanType.BOOLEAN) { + if (type == DoubleType.DOUBLE) { return nativeValue; } - if (type instanceof VarbinaryType) { - return ((Slice) nativeValue).toByteBuffer(); - } if (type instanceof DecimalType decimalType) { if (decimalType.isShort()) { return new BigDecimal(BigInteger.valueOf((long) nativeValue), decimalType.getScale()); } return new BigDecimal(((Int128) nativeValue).toBigInteger(), decimalType.getScale()); } + if (type instanceof VarcharType) { + return ((Slice) nativeValue).toStringUtf8(); + } + if (type instanceof VarbinaryType) { + return ((Slice) nativeValue).toByteBuffer(); + } + if (type.equals(TIMESTAMP_MILLIS)) { + // Kudu's native format is in microseconds + return nativeValue; + } throw new IllegalStateException("Back conversion not implemented for " + type); } public static Object getObject(Type type, RowResult row, int field) { - if (row.isNull(field)) { - return null; - } - if (type instanceof VarcharType) { - return row.getString(field); - } - if (type.equals(TIMESTAMP_MILLIS)) { - return truncateEpochMicrosToMillis(row.getLong(field)); - } - if (type == BigintType.BIGINT) { - return row.getLong(field); - } - if (type == IntegerType.INTEGER) { - return row.getInt(field); - } - if (type == SmallintType.SMALLINT) { - return row.getShort(field); - } - if (type == TinyintType.TINYINT) { - return row.getByte(field); - } - if (type == DoubleType.DOUBLE) { - return row.getDouble(field); - } - if (type == RealType.REAL) { - return row.getFloat(field); - } - if (type == BooleanType.BOOLEAN) { - return row.getBoolean(field); - } - if (type instanceof VarbinaryType) { - return Slices.wrappedHeapBuffer(row.getBinary(field)); - } - if (type instanceof DecimalType) { - return Decimals.encodeScaledValue(row.getDecimal(field), ((DecimalType) type).getScale()); + if (type instanceof DecimalType decimalType) { + return Decimals.encodeScaledValue(row.getDecimal(field), decimalType.getScale()); } throw new IllegalStateException("getObject not implemented for " + type); } public static long getLong(Type type, RowResult row, int field) { - if (type.equals(TIMESTAMP_MILLIS)) { - return truncateEpochMicrosToMillis(row.getLong(field)); + if (type == TinyintType.TINYINT) { + return row.getByte(field); } - if (type == BigintType.BIGINT) { - return row.getLong(field); + if (type == SmallintType.SMALLINT) { + return row.getShort(field); } if (type == IntegerType.INTEGER) { return row.getInt(field); } - if (type == SmallintType.SMALLINT) { - return row.getShort(field); - } - if (type == TinyintType.TINYINT) { - return row.getByte(field); + if (type == BigintType.BIGINT) { + return row.getLong(field); } if (type == RealType.REAL) { return floatToRawIntBits(row.getFloat(field)); @@ -240,6 +204,9 @@ public static long getLong(Type type, RowResult row, int field) } throw new IllegalStateException("getLong not supported for long decimal: " + type); } + if (type.equals(TIMESTAMP_MILLIS)) { + return truncateEpochMicrosToMillis(row.getLong(field)); + } throw new IllegalStateException("getLong not implemented for " + type); } diff --git a/plugin/trino-local-file/pom.xml b/plugin/trino-local-file/pom.xml index cea93e66eb001..e25ca18de4fcc 100644 --- a/plugin/trino-local-file/pom.xml +++ b/plugin/trino-local-file/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-mariadb/pom.xml b/plugin/trino-mariadb/pom.xml index 4116f5e8ccd42..517b94b472b0f 100644 --- a/plugin/trino-mariadb/pom.xml +++ b/plugin/trino-mariadb/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-mariadb/src/main/java/io/trino/plugin/mariadb/MariaDbClient.java b/plugin/trino-mariadb/src/main/java/io/trino/plugin/mariadb/MariaDbClient.java index 23dbb45e3310f..18b6e2ec02b51 100644 --- a/plugin/trino-mariadb/src/main/java/io/trino/plugin/mariadb/MariaDbClient.java +++ b/plugin/trino-mariadb/src/main/java/io/trino/plugin/mariadb/MariaDbClient.java @@ -59,6 +59,7 @@ import io.trino.spi.connector.JoinStatistics; import io.trino.spi.connector.JoinType; import io.trino.spi.connector.SchemaTableName; +import io.trino.spi.expression.ConnectorExpression; import io.trino.spi.statistics.ColumnStatistics; import io.trino.spi.statistics.Estimate; import io.trino.spi.statistics.TableStatistics; @@ -173,6 +174,7 @@ public class MariaDbClient private static final int PARSE_ERROR = 1064; private final boolean statisticsEnabled; + private final ConnectorExpressionRewriter connectorExpressionRewriter; private final AggregateFunctionRewriter aggregateFunctionRewriter; @Inject @@ -187,8 +189,17 @@ public MariaDbClient( super("`", connectionFactory, queryBuilder, config.getJdbcTypesMappedToVarchar(), identifierMapping, queryModifier, false); JdbcTypeHandle bigintTypeHandle = new JdbcTypeHandle(Types.BIGINT, Optional.of("bigint"), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty()); - ConnectorExpressionRewriter connectorExpressionRewriter = JdbcConnectorExpressionRewriterBuilder.newBuilder() + this.connectorExpressionRewriter = JdbcConnectorExpressionRewriterBuilder.newBuilder() .addStandardRules(this::quoted) + // No "real" on the list; pushdown on REAL is disabled also in toColumnMapping + .withTypeClass("numeric_type", ImmutableSet.of("tinyint", "smallint", "integer", "bigint", "decimal", "double")) + .map("$equal(left: numeric_type, right: numeric_type)").to("left = right") + .map("$not_equal(left: numeric_type, right: numeric_type)").to("left <> right") + // .map("$is_distinct_from(left: numeric_type, right: numeric_type)").to("left IS DISTINCT FROM right") + .map("$less_than(left: numeric_type, right: numeric_type)").to("left < right") + .map("$less_than_or_equal(left: numeric_type, right: numeric_type)").to("left <= right") + .map("$greater_than(left: numeric_type, right: numeric_type)").to("left > right") + .map("$greater_than_or_equal(left: numeric_type, right: numeric_type)").to("left >= right") .build(); this.statisticsEnabled = statisticsConfig.isEnabled(); this.aggregateFunctionRewriter = new AggregateFunctionRewriter<>( @@ -222,6 +233,12 @@ public boolean supportsAggregationPushdown(ConnectorSession session, JdbcTableHa return preventTextualTypeAggregationPushdown(groupingSets); } + @Override + public Optional convertPredicate(ConnectorSession session, ConnectorExpression expression, Map assignments) + { + return connectorExpressionRewriter.rewrite(session, expression, assignments); + } + private static Optional toTypeHandle(DecimalType decimalType) { return Optional.of(new JdbcTypeHandle(Types.NUMERIC, Optional.of("decimal"), Optional.of(decimalType.getPrecision()), Optional.of(decimalType.getScale()), Optional.empty(), Optional.empty())); @@ -339,6 +356,8 @@ public Optional toColumnMapping(ConnectorSession session, Connect case Types.BIGINT: return Optional.of(bigintColumnMapping()); case Types.REAL: + // Disable pushdown because floating-point values are approximate and not stored as exact values, + // attempts to treat them as exact in comparisons may lead to problems return Optional.of(ColumnMapping.longMapping( REAL, (resultSet, columnIndex) -> floatToRawIntBits(resultSet.getFloat(columnIndex)), @@ -617,6 +636,24 @@ public boolean isTopNGuaranteed(ConnectorSession session) @Override public Optional implementJoin( + ConnectorSession session, + JoinType joinType, + PreparedQuery leftSource, + Map leftProjections, + PreparedQuery rightSource, + Map rightProjections, + List joinConditions, + JoinStatistics statistics) + { + if (joinType == JoinType.FULL_OUTER) { + // Not supported in MariaDB + return Optional.empty(); + } + return super.implementJoin(session, joinType, leftSource, leftProjections, rightSource, rightProjections, joinConditions, statistics); + } + + @Override + public Optional legacyImplementJoin( ConnectorSession session, JoinType joinType, PreparedQuery leftSource, @@ -630,7 +667,7 @@ public Optional implementJoin( // Not supported in MariaDB return Optional.empty(); } - return super.implementJoin(session, joinType, leftSource, rightSource, joinConditions, rightAssignments, leftAssignments, statistics); + return super.legacyImplementJoin(session, joinType, leftSource, rightSource, joinConditions, rightAssignments, leftAssignments, statistics); } @Override diff --git a/plugin/trino-memory/pom.xml b/plugin/trino-memory/pom.xml index 860f99a46ddd5..93add17f06dc9 100644 --- a/plugin/trino-memory/pom.xml +++ b/plugin/trino-memory/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-ml/pom.xml b/plugin/trino-ml/pom.xml index 5e85c2fe5b0e5..75aeb4ec3b896 100644 --- a/plugin/trino-ml/pom.xml +++ b/plugin/trino-ml/pom.xml @@ -4,7 +4,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-mongodb/pom.xml b/plugin/trino-mongodb/pom.xml index ef823ea6c80e0..dfb821c671ef0 100644 --- a/plugin/trino-mongodb/pom.xml +++ b/plugin/trino-mongodb/pom.xml @@ -4,7 +4,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoClientConfig.java b/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoClientConfig.java index fa1e82640a52c..9d87b72ff47c2 100644 --- a/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoClientConfig.java +++ b/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoClientConfig.java @@ -45,6 +45,7 @@ public class MongoClientConfig private String requiredReplicaSetName; private String implicitRowFieldPrefix = "_pos"; private boolean projectionPushDownEnabled = true; + private boolean allowLocalScheduling; @NotNull public String getSchemaCollection() @@ -251,4 +252,17 @@ public MongoClientConfig setProjectionPushdownEnabled(boolean projectionPushDown this.projectionPushDownEnabled = projectionPushDownEnabled; return this; } + + public boolean isAllowLocalScheduling() + { + return allowLocalScheduling; + } + + @Config("mongodb.allow-local-scheduling") + @ConfigDescription("Assign MongoDB splits to a specific host if worker and MongoDB share the same cluster") + public MongoClientConfig setAllowLocalScheduling(boolean allowLocalScheduling) + { + this.allowLocalScheduling = allowLocalScheduling; + return this; + } } diff --git a/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoClientModule.java b/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoClientModule.java index f856c7d21f1ae..f08d728027556 100644 --- a/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoClientModule.java +++ b/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoClientModule.java @@ -13,6 +13,7 @@ */ package io.trino.plugin.mongodb; +import com.google.common.collect.ImmutableList; import com.google.inject.Binder; import com.google.inject.Provides; import com.google.inject.Scopes; @@ -45,6 +46,7 @@ public class MongoClientModule public void setup(Binder binder) { binder.bind(MongoConnector.class).in(Scopes.SINGLETON); + binder.bind(MongoTransactionManager.class).in(Scopes.SINGLETON); binder.bind(MongoSplitManager.class).in(Scopes.SINGLETON); binder.bind(MongoPageSourceProvider.class).in(Scopes.SINGLETON); binder.bind(MongoPageSinkProvider.class).in(Scopes.SINGLETON); @@ -59,6 +61,12 @@ public void setup(Binder binder) MongoClientConfig::getTlsEnabled, new MongoSslModule())); + install(conditionalModule( + MongoClientConfig.class, + MongoClientConfig::isAllowLocalScheduling, + internalBinder -> internalBinder.bind(MongoServerDetailsProvider.class).toInstance(ImmutableList::of), + internalBinder -> internalBinder.bind(MongoServerDetailsProvider.class).to(SessionBasedMongoServerDetailsProvider.class).in(Scopes.SINGLETON))); + newSetBinder(binder, ConnectorTableFunction.class).addBinding().toProvider(Query.class).in(Scopes.SINGLETON); } diff --git a/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoConnector.java b/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoConnector.java index 03e16a0554e40..b90901e3d1147 100644 --- a/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoConnector.java +++ b/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoConnector.java @@ -29,43 +29,36 @@ import java.util.List; import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.collect.ImmutableList.toImmutableList; -import static io.trino.spi.transaction.IsolationLevel.READ_UNCOMMITTED; -import static io.trino.spi.transaction.IsolationLevel.checkConnectorSupports; import static java.util.Objects.requireNonNull; public class MongoConnector implements Connector { private final MongoSession mongoSession; + private final MongoTransactionManager transactionManager; private final MongoSplitManager splitManager; private final MongoPageSourceProvider pageSourceProvider; private final MongoPageSinkProvider pageSinkProvider; - private final MongoMetadataFactory mongoMetadataFactory; private final Set connectorTableFunctions; private final List> sessionProperties; - private final ConcurrentMap transactions = new ConcurrentHashMap<>(); - @Inject public MongoConnector( MongoSession mongoSession, + MongoTransactionManager transactionManager, MongoSplitManager splitManager, MongoPageSourceProvider pageSourceProvider, MongoPageSinkProvider pageSinkProvider, - MongoMetadataFactory mongoMetadataFactory, Set connectorTableFunctions, Set sessionPropertiesProviders) { this.mongoSession = mongoSession; + this.transactionManager = requireNonNull(transactionManager, "transactionManager is null"); this.splitManager = requireNonNull(splitManager, "splitManager is null"); this.pageSourceProvider = requireNonNull(pageSourceProvider, "pageSourceProvider is null"); this.pageSinkProvider = requireNonNull(pageSinkProvider, "pageSinkProvider is null"); - this.mongoMetadataFactory = requireNonNull(mongoMetadataFactory, "mongoMetadataFactory is null"); this.connectorTableFunctions = ImmutableSet.copyOf(requireNonNull(connectorTableFunctions, "connectorTableFunctions is null")); this.sessionProperties = sessionPropertiesProviders.stream() .flatMap(sessionPropertiesProvider -> sessionPropertiesProvider.getSessionProperties().stream()) @@ -75,32 +68,25 @@ public MongoConnector( @Override public ConnectorTransactionHandle beginTransaction(IsolationLevel isolationLevel, boolean readOnly, boolean autoCommit) { - checkConnectorSupports(READ_UNCOMMITTED, isolationLevel); - MongoTransactionHandle transaction = new MongoTransactionHandle(); - transactions.put(transaction, mongoMetadataFactory.create()); - return transaction; + return transactionManager.beginTransaction(isolationLevel); } @Override public ConnectorMetadata getMetadata(ConnectorSession session, ConnectorTransactionHandle transaction) { - MongoMetadata metadata = transactions.get(transaction); - checkArgument(metadata != null, "no such transaction: %s", transaction); - return metadata; + return transactionManager.getMetadata(transaction); } @Override public void commit(ConnectorTransactionHandle transaction) { - checkArgument(transactions.remove(transaction) != null, "no such transaction: %s", transaction); + transactionManager.commit(transaction); } @Override public void rollback(ConnectorTransactionHandle transaction) { - MongoMetadata metadata = transactions.remove(transaction); - checkArgument(metadata != null, "no such transaction: %s", transaction); - metadata.rollback(); + transactionManager.rollback(transaction); } @Override diff --git a/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoPageSource.java b/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoPageSource.java index 130f4d344ca8f..3bdf2d61f9f64 100644 --- a/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoPageSource.java +++ b/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoPageSource.java @@ -411,7 +411,7 @@ private static Object getDbRefValue(DBRef dbRefValue, MongoColumnHandle columnHa if (columnHandle.getType() instanceof RowType) { return dbRefValue; } - checkArgument(columnHandle.isDbRefField(), "columnHandle is not a dbRef field: " + columnHandle); + checkArgument(columnHandle.isDbRefField(), "columnHandle is not a dbRef field: %s", columnHandle); List dereferenceNames = columnHandle.getDereferenceNames(); checkState(!dereferenceNames.isEmpty(), "dereferenceNames is empty"); String leafColumnName = dereferenceNames.get(dereferenceNames.size() - 1); diff --git a/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoServerDetailsProvider.java b/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoServerDetailsProvider.java new file mode 100644 index 0000000000000..d072a47b699b7 --- /dev/null +++ b/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoServerDetailsProvider.java @@ -0,0 +1,23 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.mongodb; + +import io.trino.spi.HostAddress; + +import java.util.List; + +public interface MongoServerDetailsProvider +{ + List getServerAddress(); +} diff --git a/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoSplitManager.java b/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoSplitManager.java index 2695498cde0db..979422b75bc25 100644 --- a/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoSplitManager.java +++ b/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoSplitManager.java @@ -14,7 +14,6 @@ package io.trino.plugin.mongodb; import com.google.inject.Inject; -import io.trino.spi.HostAddress; import io.trino.spi.connector.ConnectorSession; import io.trino.spi.connector.ConnectorSplitManager; import io.trino.spi.connector.ConnectorSplitSource; @@ -24,17 +23,17 @@ import io.trino.spi.connector.DynamicFilter; import io.trino.spi.connector.FixedSplitSource; -import java.util.List; +import static java.util.Objects.requireNonNull; public class MongoSplitManager implements ConnectorSplitManager { - private final List addresses; + private final MongoServerDetailsProvider serverDetailsProvider; @Inject - public MongoSplitManager(MongoSession session) + public MongoSplitManager(MongoServerDetailsProvider serverDetailsProvider) { - this.addresses = session.getAddresses(); + this.serverDetailsProvider = requireNonNull(serverDetailsProvider, "serverDetailsProvider is null"); } @Override @@ -45,7 +44,7 @@ public ConnectorSplitSource getSplits( DynamicFilter dynamicFilter, Constraint constraint) { - MongoSplit split = new MongoSplit(addresses); + MongoSplit split = new MongoSplit(serverDetailsProvider.getServerAddress()); return new FixedSplitSource(split); } diff --git a/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoTransactionManager.java b/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoTransactionManager.java new file mode 100644 index 0000000000000..b2f074f19e1a6 --- /dev/null +++ b/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/MongoTransactionManager.java @@ -0,0 +1,65 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.mongodb; + +import com.google.inject.Inject; +import io.trino.spi.connector.ConnectorTransactionHandle; +import io.trino.spi.transaction.IsolationLevel; + +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; + +import static com.google.common.base.Preconditions.checkArgument; +import static io.trino.spi.transaction.IsolationLevel.READ_COMMITTED; +import static io.trino.spi.transaction.IsolationLevel.checkConnectorSupports; +import static java.util.Objects.requireNonNull; + +public class MongoTransactionManager +{ + private final ConcurrentMap transactions = new ConcurrentHashMap<>(); + private final MongoMetadataFactory metadataFactory; + + @Inject + public MongoTransactionManager(MongoMetadataFactory metadataFactory) + { + this.metadataFactory = requireNonNull(metadataFactory, "metadataFactory is null"); + } + + public ConnectorTransactionHandle beginTransaction(IsolationLevel isolationLevel) + { + checkConnectorSupports(READ_COMMITTED, isolationLevel); + MongoTransactionHandle transaction = new MongoTransactionHandle(); + transactions.put(transaction, metadataFactory.create()); + return transaction; + } + + public MongoMetadata getMetadata(ConnectorTransactionHandle transaction) + { + MongoMetadata metadata = transactions.get(transaction); + checkArgument(metadata != null, "no such transaction: %s", transaction); + return metadata; + } + + public void commit(ConnectorTransactionHandle transaction) + { + checkArgument(transactions.remove(transaction) != null, "no such transaction: %s", transaction); + } + + public void rollback(ConnectorTransactionHandle transaction) + { + MongoMetadata metadata = transactions.remove(transaction); + checkArgument(metadata != null, "no such transaction: %s", transaction); + metadata.rollback(); + } +} diff --git a/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/SessionBasedMongoServerDetailsProvider.java b/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/SessionBasedMongoServerDetailsProvider.java new file mode 100644 index 0000000000000..0a0020274b01d --- /dev/null +++ b/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/SessionBasedMongoServerDetailsProvider.java @@ -0,0 +1,39 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.mongodb; + +import com.google.inject.Inject; +import io.trino.spi.HostAddress; + +import java.util.List; + +import static java.util.Objects.requireNonNull; + +public class SessionBasedMongoServerDetailsProvider + implements MongoServerDetailsProvider +{ + private final MongoSession mongoSession; + + @Inject + public SessionBasedMongoServerDetailsProvider(MongoSession mongoSession) + { + this.mongoSession = requireNonNull(mongoSession, "mongoSession is null"); + } + + @Override + public List getServerAddress() + { + return mongoSession.getAddresses(); + } +} diff --git a/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/ptf/Query.java b/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/ptf/Query.java index 413faba8d0653..1634cd0b8e97a 100644 --- a/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/ptf/Query.java +++ b/plugin/trino-mongodb/src/main/java/io/trino/plugin/mongodb/ptf/Query.java @@ -21,9 +21,8 @@ import io.airlift.slice.Slice; import io.trino.plugin.mongodb.MongoColumnHandle; import io.trino.plugin.mongodb.MongoMetadata; -import io.trino.plugin.mongodb.MongoMetadataFactory; -import io.trino.plugin.mongodb.MongoSession; import io.trino.plugin.mongodb.MongoTableHandle; +import io.trino.plugin.mongodb.MongoTransactionManager; import io.trino.plugin.mongodb.RemoteTableName; import io.trino.spi.TrinoException; import io.trino.spi.connector.ColumnHandle; @@ -34,6 +33,7 @@ import io.trino.spi.connector.ConnectorTableSchema; import io.trino.spi.connector.ConnectorTransactionHandle; import io.trino.spi.connector.SchemaTableName; +import io.trino.spi.connector.TableNotFoundException; import io.trino.spi.function.table.AbstractConnectorTableFunction; import io.trino.spi.function.table.Argument; import io.trino.spi.function.table.ConnectorTableFunction; @@ -62,30 +62,26 @@ public class Query public static final String SCHEMA_NAME = "system"; public static final String NAME = "query"; - private final MongoMetadata metadata; - private final MongoSession session; + private final MongoTransactionManager transactionManager; @Inject - public Query(MongoMetadataFactory mongoMetadataFactory, MongoSession session) + public Query(MongoTransactionManager transactionManager) { - requireNonNull(session, "session is null"); - this.metadata = mongoMetadataFactory.create(); - this.session = session; + this.transactionManager = requireNonNull(transactionManager, "transactionManager is null"); } @Override public ConnectorTableFunction get() { - return new QueryFunction(metadata, session); + return new QueryFunction(transactionManager); } public static class QueryFunction extends AbstractConnectorTableFunction { - private final MongoMetadata metadata; - private final MongoSession mongoSession; + private final MongoTransactionManager transactionManager; - public QueryFunction(MongoMetadata metadata, MongoSession mongoSession) + public QueryFunction(MongoTransactionManager transactionManager) { super( SCHEMA_NAME, @@ -104,8 +100,7 @@ public QueryFunction(MongoMetadata metadata, MongoSession mongoSession) .type(VARCHAR) .build()), GENERIC_TABLE); - this.metadata = requireNonNull(metadata, "metadata is null"); - this.mongoSession = requireNonNull(mongoSession, "mongoSession is null"); + this.transactionManager = requireNonNull(transactionManager, "transactionManager is null"); } @Override @@ -115,6 +110,7 @@ public TableFunctionAnalysis analyze( Map arguments, ConnectorAccessControl accessControl) { + MongoMetadata metadata = transactionManager.getMetadata(transaction); String database = ((Slice) ((ScalarArgument) arguments.get("DATABASE")).getValue()).toStringUtf8(); String collection = ((Slice) ((ScalarArgument) arguments.get("COLLECTION")).getValue()).toStringUtf8(); String filter = ((Slice) ((ScalarArgument) arguments.get("FILTER")).getValue()).toStringUtf8(); @@ -124,11 +120,17 @@ public TableFunctionAnalysis analyze( if (!collection.equals(collection.toLowerCase(ENGLISH))) { throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "Only lowercase collection name is supported"); } - RemoteTableName remoteTableName = mongoSession.toRemoteSchemaTableName(new SchemaTableName(database, collection)); + SchemaTableName schemaTableName = new SchemaTableName(database, collection); + MongoTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName); + if (tableHandle == null) { + throw new TableNotFoundException(schemaTableName); + } + + RemoteTableName remoteTableName = tableHandle.getRemoteTableName(); // Don't store Document object to MongoTableHandle for avoiding serialization issue parseFilter(filter); - MongoTableHandle tableHandle = new MongoTableHandle(new SchemaTableName(database, collection), remoteTableName, Optional.of(filter)); + tableHandle = new MongoTableHandle(schemaTableName, remoteTableName, Optional.of(filter)); ConnectorTableSchema tableSchema = metadata.getTableSchema(session, tableHandle); Map columnsByName = metadata.getColumnHandles(session, tableHandle); List columns = tableSchema.getColumns().stream() diff --git a/plugin/trino-mongodb/src/test/java/io/trino/plugin/mongodb/TestMongoClientConfig.java b/plugin/trino-mongodb/src/test/java/io/trino/plugin/mongodb/TestMongoClientConfig.java index 13bd693dfaf61..abbbc4424a1c3 100644 --- a/plugin/trino-mongodb/src/test/java/io/trino/plugin/mongodb/TestMongoClientConfig.java +++ b/plugin/trino-mongodb/src/test/java/io/trino/plugin/mongodb/TestMongoClientConfig.java @@ -43,7 +43,8 @@ public void testDefaults() .setWriteConcern(WriteConcernType.ACKNOWLEDGED) .setRequiredReplicaSetName(null) .setImplicitRowFieldPrefix("_pos") - .setProjectionPushdownEnabled(true)); + .setProjectionPushdownEnabled(true) + .setAllowLocalScheduling(false)); } @Test @@ -67,6 +68,7 @@ public void testExplicitPropertyMappings() .put("mongodb.required-replica-set", "replica_set") .put("mongodb.implicit-row-field-prefix", "_prefix") .put("mongodb.projection-pushdown-enabled", "false") + .put("mongodb.allow-local-scheduling", "true") .buildOrThrow(); MongoClientConfig expected = new MongoClientConfig() @@ -85,7 +87,8 @@ public void testExplicitPropertyMappings() .setWriteConcern(WriteConcernType.UNACKNOWLEDGED) .setRequiredReplicaSetName("replica_set") .setImplicitRowFieldPrefix("_prefix") - .setProjectionPushdownEnabled(false); + .setProjectionPushdownEnabled(false) + .setAllowLocalScheduling(true); assertFullMapping(properties, expected); } diff --git a/plugin/trino-mysql-event-listener/pom.xml b/plugin/trino-mysql-event-listener/pom.xml index b7904f9a45165..6dbafc9f4f074 100644 --- a/plugin/trino-mysql-event-listener/pom.xml +++ b/plugin/trino-mysql-event-listener/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-mysql/pom.xml b/plugin/trino-mysql/pom.xml index bdf9e9b22a3c5..8b2d51b2d12bf 100644 --- a/plugin/trino-mysql/pom.xml +++ b/plugin/trino-mysql/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-mysql/src/main/java/io/trino/plugin/mysql/MySqlClient.java b/plugin/trino-mysql/src/main/java/io/trino/plugin/mysql/MySqlClient.java index a53870ed76ce8..241ffd0cf0514 100644 --- a/plugin/trino-mysql/src/main/java/io/trino/plugin/mysql/MySqlClient.java +++ b/plugin/trino-mysql/src/main/java/io/trino/plugin/mysql/MySqlClient.java @@ -270,6 +270,15 @@ public MySqlClient( this.connectorExpressionRewriter = JdbcConnectorExpressionRewriterBuilder.newBuilder() .addStandardRules(this::quoted) + // No "real" on the list; pushdown on REAL is disabled also in toColumnMapping + .withTypeClass("numeric_type", ImmutableSet.of("tinyint", "smallint", "integer", "bigint", "decimal", "double")) + .map("$equal(left: numeric_type, right: numeric_type)").to("left = right") + .map("$not_equal(left: numeric_type, right: numeric_type)").to("left <> right") + // .map("$is_distinct_from(left: numeric_type, right: numeric_type)").to("left IS DISTINCT FROM right") + .map("$less_than(left: numeric_type, right: numeric_type)").to("left < right") + .map("$less_than_or_equal(left: numeric_type, right: numeric_type)").to("left <= right") + .map("$greater_than(left: numeric_type, right: numeric_type)").to("left > right") + .map("$greater_than_or_equal(left: numeric_type, right: numeric_type)").to("left >= right") .add(new RewriteLikeWithCaseSensitivity()) .add(new RewriteLikeEscapeWithCaseSensitivity()) .build(); @@ -1002,6 +1011,30 @@ public boolean isTopNGuaranteed(ConnectorSession session) @Override public Optional implementJoin( + ConnectorSession session, + JoinType joinType, + PreparedQuery leftSource, + Map leftProjections, + PreparedQuery rightSource, + Map rightProjections, + List joinConditions, + JoinStatistics statistics) + { + if (joinType == JoinType.FULL_OUTER) { + // Not supported in MySQL + return Optional.empty(); + } + return implementJoinCostAware( + session, + joinType, + leftSource, + rightSource, + statistics, + () -> super.implementJoin(session, joinType, leftSource, leftProjections, rightSource, rightProjections, joinConditions, statistics)); + } + + @Override + public Optional legacyImplementJoin( ConnectorSession session, JoinType joinType, PreparedQuery leftSource, @@ -1021,7 +1054,7 @@ public Optional implementJoin( leftSource, rightSource, statistics, - () -> super.implementJoin(session, joinType, leftSource, rightSource, joinConditions, rightAssignments, leftAssignments, statistics)); + () -> super.legacyImplementJoin(session, joinType, leftSource, rightSource, joinConditions, rightAssignments, leftAssignments, statistics)); } @Override diff --git a/plugin/trino-opensearch/pom.xml b/plugin/trino-opensearch/pom.xml new file mode 100644 index 0000000000000..5808a50ef4255 --- /dev/null +++ b/plugin/trino-opensearch/pom.xml @@ -0,0 +1,379 @@ + + + 4.0.0 + + + io.trino + trino-root + 436-SNAPSHOT + ../../pom.xml + + + trino-opensearch + trino-plugin + Trino - OpenSearch Connector + + + ${project.parent.basedir} + 2.11.1 + + + + + com.amazonaws + aws-java-sdk-core + + + + com.amazonaws + aws-java-sdk-sts + + + + com.fasterxml.jackson.core + jackson-core + + + + com.fasterxml.jackson.core + jackson-databind + + + + com.google.guava + guava + + + + com.google.inject + guice + + + + dev.failsafe + failsafe + + + + io.airlift + bootstrap + + + + io.airlift + concurrent + + + + io.airlift + configuration + + + + io.airlift + json + + + + io.airlift + log + + + + io.airlift + stats + + + + io.airlift + units + + + + io.trino + trino-plugin-toolkit + + + + jakarta.annotation + jakarta.annotation-api + + + + jakarta.validation + jakarta.validation-api + + + + org.apache.httpcomponents + httpasyncclient + 4.1.5 + + + commons-logging + commons-logging + + + + + + org.apache.httpcomponents + httpclient + 4.5.14 + + + commons-logging + commons-logging + + + + + + org.apache.httpcomponents + httpcore + 4.4.16 + + + + org.apache.httpcomponents + httpcore-nio + 4.4.16 + + + + org.opensearch + opensearch + ${dep.opensearch.version} + + + org.hdrhistogram + HdrHistogram + + + + + + org.opensearch + opensearch-common + ${dep.opensearch.version} + + + + org.opensearch + opensearch-core + ${dep.opensearch.version} + + + org.hdrhistogram + HdrHistogram + + + + + + org.opensearch.client + opensearch-rest-client + ${dep.opensearch.version} + + + commons-logging + commons-logging + + + + + + org.opensearch.client + opensearch-rest-high-level-client + ${dep.opensearch.version} + + + org.hdrhistogram + HdrHistogram + + + + + + org.weakref + jmxutils + + + + com.fasterxml.jackson.core + jackson-annotations + provided + + + + io.airlift + slice + provided + + + + io.opentelemetry + opentelemetry-api + provided + + + + io.opentelemetry + opentelemetry-context + provided + + + + io.trino + trino-spi + provided + + + + org.openjdk.jol + jol-core + provided + + + + io.airlift + log-manager + runtime + + + + io.airlift + node + runtime + + + + org.opensearch + opensearch-x-content + ${dep.opensearch.version} + runtime + + + + io.airlift + http-server + test + + + + io.airlift + junit-extensions + test + + + + io.airlift + testing + test + + + + io.trino + trino-client + test + + + + io.trino + trino-jmx + test + + + + io.trino + trino-main + test + + + commons-codec + commons-codec + + + + + + io.trino + trino-main + test-jar + test + + + commons-codec + commons-codec + + + + + + io.trino + trino-testing + test + + + + io.trino + trino-testing-services + test + + + + io.trino + trino-tpch + test + + + + io.trino.tpch + tpch + test + + + + org.assertj + assertj-core + test + + + + org.eclipse.jetty.toolchain + jetty-jakarta-servlet-api + test + + + + org.jetbrains + annotations + test + + + + org.junit.jupiter + junit-jupiter-api + test + + + + org.junit.jupiter + junit-jupiter-engine + test + + + + org.opensearch + opensearch-testcontainers + 2.0.1 + test + + + + org.testcontainers + testcontainers + test + + + diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/AwsSecurityConfig.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/AwsSecurityConfig.java new file mode 100644 index 0000000000000..16f86187def2b --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/AwsSecurityConfig.java @@ -0,0 +1,97 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import io.airlift.configuration.Config; +import io.airlift.configuration.ConfigDescription; +import io.airlift.configuration.ConfigSecuritySensitive; +import jakarta.validation.constraints.NotNull; + +import java.util.Optional; + +public class AwsSecurityConfig +{ + private String accessKey; + private String secretKey; + private String region; + private String iamRole; + private String externalId; + + @NotNull + public Optional getAccessKey() + { + return Optional.ofNullable(accessKey); + } + + @Config("opensearch.aws.access-key") + public AwsSecurityConfig setAccessKey(String key) + { + this.accessKey = key; + return this; + } + + @NotNull + public Optional getSecretKey() + { + return Optional.ofNullable(secretKey); + } + + @Config("opensearch.aws.secret-key") + @ConfigSecuritySensitive + public AwsSecurityConfig setSecretKey(String key) + { + this.secretKey = key; + return this; + } + + public String getRegion() + { + return region; + } + + @Config("opensearch.aws.region") + public AwsSecurityConfig setRegion(String region) + { + this.region = region; + return this; + } + + @NotNull + public Optional getIamRole() + { + return Optional.ofNullable(iamRole); + } + + @Config("opensearch.aws.iam-role") + @ConfigDescription("Optional AWS IAM role to assume for authenticating. If set, this role will be used to get credentials to sign requests to ES.") + public AwsSecurityConfig setIamRole(String iamRole) + { + this.iamRole = iamRole; + return this; + } + + @NotNull + public Optional getExternalId() + { + return Optional.ofNullable(externalId); + } + + @Config("opensearch.aws.external-id") + @ConfigDescription("Optional external id to pass to AWS STS while assuming a role") + public AwsSecurityConfig setExternalId(String externalId) + { + this.externalId = externalId; + return this; + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/BuiltinColumns.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/BuiltinColumns.java new file mode 100644 index 0000000000000..57a3be5176b38 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/BuiltinColumns.java @@ -0,0 +1,91 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import io.trino.plugin.opensearch.decoders.IdColumnDecoder; +import io.trino.plugin.opensearch.decoders.ScoreColumnDecoder; +import io.trino.plugin.opensearch.decoders.SourceColumnDecoder; +import io.trino.spi.connector.ColumnHandle; +import io.trino.spi.connector.ColumnMetadata; +import io.trino.spi.type.Type; + +import java.util.Map; +import java.util.Optional; + +import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static io.trino.spi.type.RealType.REAL; +import static io.trino.spi.type.VarcharType.VARCHAR; +import static java.util.Arrays.stream; +import static java.util.function.Function.identity; + +enum BuiltinColumns +{ + ID("_id", VARCHAR, new IdColumnDecoder.Descriptor(), true), + SOURCE("_source", VARCHAR, new SourceColumnDecoder.Descriptor(), false), + SCORE("_score", REAL, new ScoreColumnDecoder.Descriptor(), false); + + private static final Map COLUMNS_BY_NAME = stream(values()) + .collect(toImmutableMap(BuiltinColumns::getName, identity())); + + private final String name; + private final Type type; + private final DecoderDescriptor decoderDescriptor; + private final boolean supportsPredicates; + + BuiltinColumns(String name, Type type, DecoderDescriptor decoderDescriptor, boolean supportsPredicates) + { + this.name = name; + this.type = type; + this.decoderDescriptor = decoderDescriptor; + this.supportsPredicates = supportsPredicates; + } + + public static Optional of(String name) + { + return Optional.ofNullable(COLUMNS_BY_NAME.get(name)); + } + + public static boolean isBuiltinColumn(String name) + { + return COLUMNS_BY_NAME.containsKey(name); + } + + public String getName() + { + return name; + } + + public Type getType() + { + return type; + } + + public ColumnMetadata getMetadata() + { + return ColumnMetadata.builder() + .setName(name) + .setType(type) + .setHidden(true) + .build(); + } + + public ColumnHandle getColumnHandle() + { + return new OpenSearchColumnHandle( + name, + type, + decoderDescriptor, + supportsPredicates); + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/CountQueryPageSource.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/CountQueryPageSource.java new file mode 100644 index 0000000000000..9487574234fbc --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/CountQueryPageSource.java @@ -0,0 +1,92 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import io.trino.plugin.opensearch.client.OpenSearchClient; +import io.trino.spi.Page; +import io.trino.spi.connector.ConnectorPageSource; + +import static java.lang.Math.toIntExact; +import static java.util.Objects.requireNonNull; + +class CountQueryPageSource + implements ConnectorPageSource +{ + // This implementation of the page source is used whenever a query doesn't reference any columns + // from the ES table. We need to limit the number of rows per page in case there are projections + // in the query that can cause page sizes to explode. For example: SELECT rand() FROM some_table + // TODO (https://github.com/trinodb/trino/issues/16824) allow connector to return pages of arbitrary row count and handle this gracefully in engine + private static final int BATCH_SIZE = 10000; + + private final long readTimeNanos; + private long remaining; + + public CountQueryPageSource(OpenSearchClient client, OpenSearchTableHandle table, OpenSearchSplit split) + { + requireNonNull(client, "client is null"); + requireNonNull(table, "table is null"); + requireNonNull(split, "split is null"); + + long start = System.nanoTime(); + long count = client.count( + split.getIndex(), + split.getShard(), + OpenSearchQueryBuilder.buildSearchQuery(table.getConstraint().transformKeys(OpenSearchColumnHandle.class::cast), table.getQuery(), table.getRegexes())); + readTimeNanos = System.nanoTime() - start; + + if (table.getLimit().isPresent()) { + count = Math.min(table.getLimit().getAsLong(), count); + } + + remaining = count; + } + + @Override + public boolean isFinished() + { + return remaining == 0; + } + + @Override + public Page getNextPage() + { + int batch = toIntExact(Math.min(BATCH_SIZE, remaining)); + remaining -= batch; + + return new Page(batch); + } + + @Override + public long getReadTimeNanos() + { + return readTimeNanos; + } + + @Override + public long getCompletedBytes() + { + return 0; + } + + @Override + public long getMemoryUsage() + { + return 0; + } + + @Override + public void close() + { + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/DecoderDescriptor.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/DecoderDescriptor.java new file mode 100644 index 0000000000000..899dedae04248 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/DecoderDescriptor.java @@ -0,0 +1,62 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import io.trino.plugin.opensearch.decoders.ArrayDecoder; +import io.trino.plugin.opensearch.decoders.BigintDecoder; +import io.trino.plugin.opensearch.decoders.BooleanDecoder; +import io.trino.plugin.opensearch.decoders.Decoder; +import io.trino.plugin.opensearch.decoders.DoubleDecoder; +import io.trino.plugin.opensearch.decoders.IdColumnDecoder; +import io.trino.plugin.opensearch.decoders.IntegerDecoder; +import io.trino.plugin.opensearch.decoders.IpAddressDecoder; +import io.trino.plugin.opensearch.decoders.RawJsonDecoder; +import io.trino.plugin.opensearch.decoders.RealDecoder; +import io.trino.plugin.opensearch.decoders.RowDecoder; +import io.trino.plugin.opensearch.decoders.ScoreColumnDecoder; +import io.trino.plugin.opensearch.decoders.SmallintDecoder; +import io.trino.plugin.opensearch.decoders.SourceColumnDecoder; +import io.trino.plugin.opensearch.decoders.TimestampDecoder; +import io.trino.plugin.opensearch.decoders.TinyintDecoder; +import io.trino.plugin.opensearch.decoders.VarbinaryDecoder; +import io.trino.plugin.opensearch.decoders.VarcharDecoder; + +@JsonTypeInfo( + use = JsonTypeInfo.Id.NAME, + property = "@type") +@JsonSubTypes({ + @JsonSubTypes.Type(value = BooleanDecoder.Descriptor.class, name = "boolean"), + @JsonSubTypes.Type(value = SmallintDecoder.Descriptor.class, name = "smallint"), + @JsonSubTypes.Type(value = TinyintDecoder.Descriptor.class, name = "tinyint"), + @JsonSubTypes.Type(value = IntegerDecoder.Descriptor.class, name = "integer"), + @JsonSubTypes.Type(value = BigintDecoder.Descriptor.class, name = "bigint"), + @JsonSubTypes.Type(value = TimestampDecoder.Descriptor.class, name = "timestamp"), + @JsonSubTypes.Type(value = RealDecoder.Descriptor.class, name = "real"), + @JsonSubTypes.Type(value = DoubleDecoder.Descriptor.class, name = "double"), + @JsonSubTypes.Type(value = VarcharDecoder.Descriptor.class, name = "varchar"), + @JsonSubTypes.Type(value = VarbinaryDecoder.Descriptor.class, name = "varbinary"), + @JsonSubTypes.Type(value = IpAddressDecoder.Descriptor.class, name = "ipAddress"), + @JsonSubTypes.Type(value = RowDecoder.Descriptor.class, name = "row"), + @JsonSubTypes.Type(value = ArrayDecoder.Descriptor.class, name = "array"), + @JsonSubTypes.Type(value = RawJsonDecoder.Descriptor.class, name = "rawJson"), + @JsonSubTypes.Type(value = IdColumnDecoder.Descriptor.class, name = "idColumn"), + @JsonSubTypes.Type(value = ScoreColumnDecoder.Descriptor.class, name = "scoreColumn"), + @JsonSubTypes.Type(value = SourceColumnDecoder.Descriptor.class, name = "sourceColumn"), +}) +public interface DecoderDescriptor +{ + Decoder createDecoder(); +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/NodesSystemTable.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/NodesSystemTable.java new file mode 100644 index 0000000000000..c2369f2290090 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/NodesSystemTable.java @@ -0,0 +1,105 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.google.common.collect.ImmutableList; +import com.google.inject.Inject; +import io.trino.plugin.opensearch.client.OpenSearchClient; +import io.trino.plugin.opensearch.client.OpenSearchNode; +import io.trino.spi.Node; +import io.trino.spi.NodeManager; +import io.trino.spi.Page; +import io.trino.spi.block.BlockBuilder; +import io.trino.spi.connector.ColumnMetadata; +import io.trino.spi.connector.ConnectorPageSource; +import io.trino.spi.connector.ConnectorSession; +import io.trino.spi.connector.ConnectorTableMetadata; +import io.trino.spi.connector.ConnectorTransactionHandle; +import io.trino.spi.connector.FixedPageSource; +import io.trino.spi.connector.SchemaTableName; +import io.trino.spi.connector.SystemTable; +import io.trino.spi.predicate.TupleDomain; + +import java.util.Set; + +import static io.trino.spi.type.VarcharType.VARCHAR; +import static io.trino.spi.type.VarcharType.createUnboundedVarcharType; +import static java.util.Objects.requireNonNull; + +public class NodesSystemTable + implements SystemTable +{ + private static final ConnectorTableMetadata METADATA = new ConnectorTableMetadata( + new SchemaTableName("system", "nodes"), + ImmutableList.builder() + .add(new ColumnMetadata("trino_node_id", createUnboundedVarcharType())) + .add(new ColumnMetadata("trino_node_address", createUnboundedVarcharType())) + .add(new ColumnMetadata("opensearch_node_id", createUnboundedVarcharType())) + .add(new ColumnMetadata("opensearch_node_address", createUnboundedVarcharType())) + .build()); + + private final OpenSearchClient client; + private final Node currentNode; + + @Inject + public NodesSystemTable(NodeManager nodeManager, OpenSearchClient client) + { + requireNonNull(nodeManager, "nodeManager is null"); + + this.client = requireNonNull(client, "client is null"); + currentNode = nodeManager.getCurrentNode(); + } + + @Override + public Distribution getDistribution() + { + return Distribution.ALL_NODES; + } + + @Override + public ConnectorTableMetadata getTableMetadata() + { + return METADATA; + } + + @Override + public ConnectorPageSource pageSource(ConnectorTransactionHandle transaction, ConnectorSession session, TupleDomain constraint) + { + Set nodes = client.getNodes(); + + BlockBuilder nodeId = VARCHAR.createBlockBuilder(null, nodes.size()); + BlockBuilder trinoAddress = VARCHAR.createBlockBuilder(null, nodes.size()); + BlockBuilder opensearchNodeId = VARCHAR.createBlockBuilder(null, nodes.size()); + BlockBuilder opensearchAddress = VARCHAR.createBlockBuilder(null, nodes.size()); + + for (OpenSearchNode node : nodes) { + VARCHAR.writeString(nodeId, currentNode.getNodeIdentifier()); + VARCHAR.writeString(trinoAddress, currentNode.getHostAndPort().toString()); + VARCHAR.writeString(opensearchNodeId, node.getId()); + + if (node.getAddress().isPresent()) { + VARCHAR.writeString(opensearchAddress, node.getAddress().get()); + } + else { + opensearchAddress.appendNull(); + } + } + + return new FixedPageSource(ImmutableList.of(new Page( + nodeId.build(), + trinoAddress.build(), + opensearchNodeId.build(), + opensearchAddress.build()))); + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchColumnHandle.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchColumnHandle.java new file mode 100644 index 0000000000000..c3a9ac340ba71 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchColumnHandle.java @@ -0,0 +1,98 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import io.trino.spi.connector.ColumnHandle; +import io.trino.spi.type.Type; + +import java.util.Objects; + +import static java.util.Objects.requireNonNull; + +public final class OpenSearchColumnHandle + implements ColumnHandle +{ + private final String name; + private final Type type; + private final DecoderDescriptor decoderDescriptor; + private final boolean supportsPredicates; + + @JsonCreator + public OpenSearchColumnHandle( + @JsonProperty("name") String name, + @JsonProperty("type") Type type, + @JsonProperty("decoderDescriptor") DecoderDescriptor decoderDescriptor, + @JsonProperty("supportsPredicates") boolean supportsPredicates) + { + this.name = requireNonNull(name, "name is null"); + this.type = requireNonNull(type, "type is null"); + this.decoderDescriptor = requireNonNull(decoderDescriptor, "decoderDescriptor is null"); + this.supportsPredicates = supportsPredicates; + } + + @JsonProperty + public String getName() + { + return name; + } + + @JsonProperty + public Type getType() + { + return type; + } + + @JsonProperty + public DecoderDescriptor getDecoderDescriptor() + { + return decoderDescriptor; + } + + @JsonProperty + public boolean isSupportsPredicates() + { + return supportsPredicates; + } + + @Override + public int hashCode() + { + return Objects.hash(name, type, decoderDescriptor, supportsPredicates); + } + + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if ((obj == null) || (getClass() != obj.getClass())) { + return false; + } + + OpenSearchColumnHandle other = (OpenSearchColumnHandle) obj; + return this.supportsPredicates == other.supportsPredicates && + Objects.equals(this.getName(), other.getName()) && + Objects.equals(this.getType(), other.getType()) && + Objects.equals(this.getDecoderDescriptor(), other.getDecoderDescriptor()); + } + + @Override + public String toString() + { + return getName() + "::" + getType(); + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchConfig.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchConfig.java new file mode 100644 index 0000000000000..febbe8dfc0c15 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchConfig.java @@ -0,0 +1,344 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import io.airlift.configuration.Config; +import io.airlift.configuration.ConfigDescription; +import io.airlift.configuration.ConfigSecuritySensitive; +import io.airlift.configuration.validation.FileExists; +import io.airlift.units.Duration; +import io.airlift.units.MinDuration; +import jakarta.validation.constraints.Min; +import jakarta.validation.constraints.NotNull; + +import java.io.File; +import java.util.List; +import java.util.Optional; + +import static java.util.concurrent.TimeUnit.MILLISECONDS; +import static java.util.concurrent.TimeUnit.MINUTES; +import static java.util.concurrent.TimeUnit.SECONDS; + +public class OpenSearchConfig +{ + public enum Security + { + AWS, + PASSWORD, + } + + private List hosts; + private int port = 9200; + private String defaultSchema = "default"; + private int scrollSize = 1_000; + private Duration scrollTimeout = new Duration(1, MINUTES); + private Duration requestTimeout = new Duration(10, SECONDS); + private Duration connectTimeout = new Duration(1, SECONDS); + private Duration backoffInitDelay = new Duration(500, MILLISECONDS); + private Duration backoffMaxDelay = new Duration(20, SECONDS); + private Duration maxRetryTime = new Duration(30, SECONDS); + private Duration nodeRefreshInterval = new Duration(1, MINUTES); + private int maxHttpConnections = 25; + private int httpThreadCount = Runtime.getRuntime().availableProcessors(); + + private boolean tlsEnabled; + private File keystorePath; + private File trustStorePath; + private String keystorePassword; + private String truststorePassword; + private boolean ignorePublishAddress; + private boolean verifyHostnames = true; + + private Security security; + + @NotNull + public List getHosts() + { + return hosts; + } + + @Config("opensearch.host") + public OpenSearchConfig setHosts(List hosts) + { + this.hosts = hosts; + return this; + } + + public int getPort() + { + return port; + } + + @Config("opensearch.port") + public OpenSearchConfig setPort(int port) + { + this.port = port; + return this; + } + + @NotNull + public String getDefaultSchema() + { + return defaultSchema; + } + + @Config("opensearch.default-schema-name") + @ConfigDescription("Default schema name to use") + public OpenSearchConfig setDefaultSchema(String defaultSchema) + { + this.defaultSchema = defaultSchema; + return this; + } + + @NotNull + @Min(1) + public int getScrollSize() + { + return scrollSize; + } + + @Config("opensearch.scroll-size") + @ConfigDescription("Scroll batch size") + public OpenSearchConfig setScrollSize(int scrollSize) + { + this.scrollSize = scrollSize; + return this; + } + + @NotNull + public Duration getScrollTimeout() + { + return scrollTimeout; + } + + @Config("opensearch.scroll-timeout") + @ConfigDescription("Scroll timeout") + public OpenSearchConfig setScrollTimeout(Duration scrollTimeout) + { + this.scrollTimeout = scrollTimeout; + return this; + } + + @NotNull + public Duration getRequestTimeout() + { + return requestTimeout; + } + + @Config("opensearch.request-timeout") + @ConfigDescription("OpenSearch request timeout") + public OpenSearchConfig setRequestTimeout(Duration requestTimeout) + { + this.requestTimeout = requestTimeout; + return this; + } + + @NotNull + public Duration getConnectTimeout() + { + return connectTimeout; + } + + @Config("opensearch.connect-timeout") + @ConfigDescription("OpenSearch connect timeout") + public OpenSearchConfig setConnectTimeout(Duration timeout) + { + this.connectTimeout = timeout; + return this; + } + + @NotNull + public Duration getBackoffInitDelay() + { + return backoffInitDelay; + } + + @Config("opensearch.backoff-init-delay") + @ConfigDescription("Initial delay to wait between backpressure retries") + public OpenSearchConfig setBackoffInitDelay(Duration backoffInitDelay) + { + this.backoffInitDelay = backoffInitDelay; + return this; + } + + @NotNull + public Duration getBackoffMaxDelay() + { + return backoffMaxDelay; + } + + @Config("opensearch.backoff-max-delay") + @ConfigDescription("Maximum delay to wait between backpressure retries") + public OpenSearchConfig setBackoffMaxDelay(Duration backoffMaxDelay) + { + this.backoffMaxDelay = backoffMaxDelay; + return this; + } + + @NotNull + public Duration getMaxRetryTime() + { + return maxRetryTime; + } + + @Config("opensearch.max-retry-time") + @ConfigDescription("Maximum timeout in case of multiple retries") + public OpenSearchConfig setMaxRetryTime(Duration maxRetryTime) + { + this.maxRetryTime = maxRetryTime; + return this; + } + + @NotNull + @MinDuration("1ms") + public Duration getNodeRefreshInterval() + { + return nodeRefreshInterval; + } + + @Config("opensearch.node-refresh-interval") + @ConfigDescription("How often to refresh the list of available nodes in the OpenSearch cluster") + public OpenSearchConfig setNodeRefreshInterval(Duration nodeRefreshInterval) + { + this.nodeRefreshInterval = nodeRefreshInterval; + return this; + } + + @Config("opensearch.max-http-connections") + @ConfigDescription("Maximum number of persistent HTTP connections to OpenSearch cluster") + public OpenSearchConfig setMaxHttpConnections(int size) + { + this.maxHttpConnections = size; + return this; + } + + @NotNull + public int getMaxHttpConnections() + { + return maxHttpConnections; + } + + @Config("opensearch.http-thread-count") + @ConfigDescription("Number of threads handling HTTP connections to OpenSearch cluster") + public OpenSearchConfig setHttpThreadCount(int count) + { + this.httpThreadCount = count; + return this; + } + + @NotNull + public int getHttpThreadCount() + { + return httpThreadCount; + } + + public boolean isTlsEnabled() + { + return tlsEnabled; + } + + @Config("opensearch.tls.enabled") + public OpenSearchConfig setTlsEnabled(boolean tlsEnabled) + { + this.tlsEnabled = tlsEnabled; + return this; + } + + public Optional<@FileExists File> getKeystorePath() + { + return Optional.ofNullable(keystorePath); + } + + @Config("opensearch.tls.keystore-path") + public OpenSearchConfig setKeystorePath(File path) + { + this.keystorePath = path; + return this; + } + + public Optional getKeystorePassword() + { + return Optional.ofNullable(keystorePassword); + } + + @Config("opensearch.tls.keystore-password") + @ConfigSecuritySensitive + public OpenSearchConfig setKeystorePassword(String password) + { + this.keystorePassword = password; + return this; + } + + public Optional<@FileExists File> getTrustStorePath() + { + return Optional.ofNullable(trustStorePath); + } + + @Config("opensearch.tls.truststore-path") + public OpenSearchConfig setTrustStorePath(File path) + { + this.trustStorePath = path; + return this; + } + + public Optional getTruststorePassword() + { + return Optional.ofNullable(truststorePassword); + } + + @Config("opensearch.tls.truststore-password") + @ConfigSecuritySensitive + public OpenSearchConfig setTruststorePassword(String password) + { + this.truststorePassword = password; + return this; + } + + public boolean isVerifyHostnames() + { + return verifyHostnames; + } + + @Config("opensearch.tls.verify-hostnames") + public OpenSearchConfig setVerifyHostnames(boolean verify) + { + this.verifyHostnames = verify; + return this; + } + + public boolean isIgnorePublishAddress() + { + return ignorePublishAddress; + } + + @Config("opensearch.ignore-publish-address") + public OpenSearchConfig setIgnorePublishAddress(boolean ignorePublishAddress) + { + this.ignorePublishAddress = ignorePublishAddress; + return this; + } + + @NotNull + public Optional getSecurity() + { + return Optional.ofNullable(security); + } + + @Config("opensearch.security") + public OpenSearchConfig setSecurity(Security security) + { + this.security = security; + return this; + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchConnector.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchConnector.java new file mode 100644 index 0000000000000..0387fed5d8e7a --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchConnector.java @@ -0,0 +1,104 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.google.common.collect.ImmutableSet; +import com.google.inject.Inject; +import io.airlift.bootstrap.LifeCycleManager; +import io.trino.spi.connector.Connector; +import io.trino.spi.connector.ConnectorMetadata; +import io.trino.spi.connector.ConnectorPageSourceProvider; +import io.trino.spi.connector.ConnectorSession; +import io.trino.spi.connector.ConnectorSplitManager; +import io.trino.spi.connector.ConnectorTransactionHandle; +import io.trino.spi.connector.SystemTable; +import io.trino.spi.function.table.ConnectorTableFunction; +import io.trino.spi.transaction.IsolationLevel; + +import java.util.Set; + +import static io.trino.spi.transaction.IsolationLevel.READ_COMMITTED; +import static io.trino.spi.transaction.IsolationLevel.checkConnectorSupports; +import static java.util.Objects.requireNonNull; + +public class OpenSearchConnector + implements Connector +{ + private final LifeCycleManager lifeCycleManager; + private final OpenSearchMetadata metadata; + private final OpenSearchSplitManager splitManager; + private final OpenSearchPageSourceProvider pageSourceProvider; + private final NodesSystemTable nodesSystemTable; + private final Set connectorTableFunctions; + + @Inject + public OpenSearchConnector( + LifeCycleManager lifeCycleManager, + OpenSearchMetadata metadata, + OpenSearchSplitManager splitManager, + OpenSearchPageSourceProvider pageSourceProvider, + NodesSystemTable nodesSystemTable, + Set connectorTableFunctions) + { + this.lifeCycleManager = requireNonNull(lifeCycleManager, "lifeCycleManager is null"); + this.metadata = requireNonNull(metadata, "metadata is null"); + this.splitManager = requireNonNull(splitManager, "splitManager is null"); + this.pageSourceProvider = requireNonNull(pageSourceProvider, "pageSourceProvider is null"); + this.nodesSystemTable = requireNonNull(nodesSystemTable, "nodesSystemTable is null"); + this.connectorTableFunctions = ImmutableSet.copyOf(requireNonNull(connectorTableFunctions, "connectorTableFunctions is null")); + } + + @Override + public ConnectorTransactionHandle beginTransaction(IsolationLevel isolationLevel, boolean readOnly, boolean autoCommit) + { + checkConnectorSupports(READ_COMMITTED, isolationLevel); + return OpenSearchTransactionHandle.INSTANCE; + } + + @Override + public ConnectorMetadata getMetadata(ConnectorSession session, ConnectorTransactionHandle transactionHandle) + { + return metadata; + } + + @Override + public ConnectorSplitManager getSplitManager() + { + return splitManager; + } + + @Override + public ConnectorPageSourceProvider getPageSourceProvider() + { + return pageSourceProvider; + } + + @Override + public Set getSystemTables() + { + return ImmutableSet.of(nodesSystemTable); + } + + @Override + public Set getTableFunctions() + { + return connectorTableFunctions; + } + + @Override + public final void shutdown() + { + lifeCycleManager.stop(); + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchConnectorFactory.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchConnectorFactory.java new file mode 100644 index 0000000000000..7479f1cd1f07f --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchConnectorFactory.java @@ -0,0 +1,71 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.google.inject.Injector; +import io.airlift.bootstrap.Bootstrap; +import io.airlift.json.JsonModule; +import io.trino.plugin.base.CatalogName; +import io.trino.plugin.base.TypeDeserializerModule; +import io.trino.plugin.base.jmx.ConnectorObjectNameGeneratorModule; +import io.trino.plugin.base.jmx.MBeanServerModule; +import io.trino.spi.NodeManager; +import io.trino.spi.connector.Connector; +import io.trino.spi.connector.ConnectorContext; +import io.trino.spi.connector.ConnectorFactory; +import org.weakref.jmx.guice.MBeanModule; + +import java.util.Map; + +import static io.trino.plugin.base.Versions.checkStrictSpiVersionMatch; +import static java.util.Objects.requireNonNull; + +public class OpenSearchConnectorFactory + implements ConnectorFactory +{ + OpenSearchConnectorFactory() {} + + @Override + public String getName() + { + return "opensearch"; + } + + @Override + public Connector create(String catalogName, Map config, ConnectorContext context) + { + requireNonNull(catalogName, "catalogName is null"); + requireNonNull(config, "config is null"); + checkStrictSpiVersionMatch(context, this); + + Bootstrap app = new Bootstrap( + new MBeanModule(), + new MBeanServerModule(), + new ConnectorObjectNameGeneratorModule("io.trino.plugin.opensearch", "trino.plugin.opensearch"), + new JsonModule(), + new TypeDeserializerModule(context.getTypeManager()), + new OpenSearchConnectorModule(), + binder -> { + binder.bind(NodeManager.class).toInstance(context.getNodeManager()); + binder.bind(CatalogName.class).toInstance(new CatalogName(catalogName)); + }); + + Injector injector = app + .doNotInitializeLogging() + .setRequiredConfigurationProperties(config) + .initialize(); + + return injector.getInstance(OpenSearchConnector.class); + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchConnectorModule.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchConnectorModule.java new file mode 100644 index 0000000000000..4e4a4a5cfcfce --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchConnectorModule.java @@ -0,0 +1,66 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.google.inject.Binder; +import com.google.inject.Scopes; +import io.airlift.configuration.AbstractConfigurationAwareModule; +import io.trino.plugin.opensearch.client.OpenSearchClient; +import io.trino.plugin.opensearch.ptf.RawQuery; +import io.trino.spi.function.table.ConnectorTableFunction; + +import static com.google.inject.multibindings.Multibinder.newSetBinder; +import static com.google.inject.multibindings.OptionalBinder.newOptionalBinder; +import static io.airlift.configuration.ConditionalModule.conditionalModule; +import static io.airlift.configuration.ConfigBinder.configBinder; +import static java.util.function.Predicate.isEqual; +import static org.weakref.jmx.guice.ExportBinder.newExporter; + +public class OpenSearchConnectorModule + extends AbstractConfigurationAwareModule +{ + @Override + protected void setup(Binder binder) + { + binder.bind(OpenSearchConnector.class).in(Scopes.SINGLETON); + binder.bind(OpenSearchMetadata.class).in(Scopes.SINGLETON); + binder.bind(OpenSearchSplitManager.class).in(Scopes.SINGLETON); + binder.bind(OpenSearchPageSourceProvider.class).in(Scopes.SINGLETON); + binder.bind(OpenSearchClient.class).in(Scopes.SINGLETON); + binder.bind(NodesSystemTable.class).in(Scopes.SINGLETON); + + newExporter(binder).export(OpenSearchClient.class).withGeneratedName(); + + configBinder(binder).bindConfig(OpenSearchConfig.class); + + newOptionalBinder(binder, AwsSecurityConfig.class); + newOptionalBinder(binder, PasswordConfig.class); + + newSetBinder(binder, ConnectorTableFunction.class).addBinding().toProvider(RawQuery.class).in(Scopes.SINGLETON); + + install(conditionalModule( + OpenSearchConfig.class, + config -> config.getSecurity() + .filter(isEqual(OpenSearchConfig.Security.AWS)) + .isPresent(), + conditionalBinder -> configBinder(conditionalBinder).bindConfig(AwsSecurityConfig.class))); + + install(conditionalModule( + OpenSearchConfig.class, + config -> config.getSecurity() + .filter(isEqual(OpenSearchConfig.Security.PASSWORD)) + .isPresent(), + conditionalBinder -> configBinder(conditionalBinder).bindConfig(PasswordConfig.class))); + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchErrorCode.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchErrorCode.java new file mode 100644 index 0000000000000..c72e83f4d5041 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchErrorCode.java @@ -0,0 +1,44 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import io.trino.spi.ErrorCode; +import io.trino.spi.ErrorCodeSupplier; +import io.trino.spi.ErrorType; + +import static io.trino.spi.ErrorType.EXTERNAL; +import static io.trino.spi.ErrorType.USER_ERROR; + +public enum OpenSearchErrorCode + implements ErrorCodeSupplier +{ + OPENSEARCH_CONNECTION_ERROR(0, EXTERNAL), + OPENSEARCH_INVALID_RESPONSE(1, EXTERNAL), + OPENSEARCH_SSL_INITIALIZATION_FAILURE(2, EXTERNAL), + OPENSEARCH_QUERY_FAILURE(3, USER_ERROR), + OPENSEARCH_INVALID_METADATA(4, USER_ERROR); + + private final ErrorCode errorCode; + + OpenSearchErrorCode(int code, ErrorType type) + { + errorCode = new ErrorCode(code + 0x0503_0000, name(), type); + } + + @Override + public ErrorCode toErrorCode() + { + return errorCode; + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchMetadata.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchMetadata.java new file mode 100644 index 0000000000000..d013f117e7c8b --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchMetadata.java @@ -0,0 +1,707 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.inject.Inject; +import io.airlift.slice.Slice; +import io.trino.plugin.base.expression.ConnectorExpressions; +import io.trino.plugin.opensearch.client.IndexMetadata; +import io.trino.plugin.opensearch.client.IndexMetadata.DateTimeType; +import io.trino.plugin.opensearch.client.IndexMetadata.ObjectType; +import io.trino.plugin.opensearch.client.IndexMetadata.PrimitiveType; +import io.trino.plugin.opensearch.client.IndexMetadata.ScaledFloatType; +import io.trino.plugin.opensearch.client.OpenSearchClient; +import io.trino.plugin.opensearch.decoders.ArrayDecoder; +import io.trino.plugin.opensearch.decoders.BigintDecoder; +import io.trino.plugin.opensearch.decoders.BooleanDecoder; +import io.trino.plugin.opensearch.decoders.DoubleDecoder; +import io.trino.plugin.opensearch.decoders.IntegerDecoder; +import io.trino.plugin.opensearch.decoders.IpAddressDecoder; +import io.trino.plugin.opensearch.decoders.RawJsonDecoder; +import io.trino.plugin.opensearch.decoders.RealDecoder; +import io.trino.plugin.opensearch.decoders.RowDecoder; +import io.trino.plugin.opensearch.decoders.SmallintDecoder; +import io.trino.plugin.opensearch.decoders.TimestampDecoder; +import io.trino.plugin.opensearch.decoders.TinyintDecoder; +import io.trino.plugin.opensearch.decoders.VarbinaryDecoder; +import io.trino.plugin.opensearch.decoders.VarcharDecoder; +import io.trino.plugin.opensearch.ptf.RawQuery.RawQueryFunctionHandle; +import io.trino.spi.TrinoException; +import io.trino.spi.connector.ColumnHandle; +import io.trino.spi.connector.ColumnMetadata; +import io.trino.spi.connector.ConnectorMetadata; +import io.trino.spi.connector.ConnectorSession; +import io.trino.spi.connector.ConnectorTableHandle; +import io.trino.spi.connector.ConnectorTableMetadata; +import io.trino.spi.connector.ConnectorTableProperties; +import io.trino.spi.connector.Constraint; +import io.trino.spi.connector.ConstraintApplicationResult; +import io.trino.spi.connector.LimitApplicationResult; +import io.trino.spi.connector.SchemaTableName; +import io.trino.spi.connector.SchemaTablePrefix; +import io.trino.spi.connector.TableColumnsMetadata; +import io.trino.spi.connector.TableFunctionApplicationResult; +import io.trino.spi.expression.Call; +import io.trino.spi.expression.ConnectorExpression; +import io.trino.spi.expression.Constant; +import io.trino.spi.expression.Variable; +import io.trino.spi.function.table.ConnectorTableFunctionHandle; +import io.trino.spi.predicate.Domain; +import io.trino.spi.predicate.TupleDomain; +import io.trino.spi.type.ArrayType; +import io.trino.spi.type.RowType; +import io.trino.spi.type.StandardTypes; +import io.trino.spi.type.Type; +import io.trino.spi.type.TypeManager; +import io.trino.spi.type.TypeSignature; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.OptionalLong; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Verify.verifyNotNull; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.common.collect.ImmutableSet.toImmutableSet; +import static com.google.common.collect.Iterators.singletonIterator; +import static io.airlift.slice.SliceUtf8.getCodePointAt; +import static io.airlift.slice.SliceUtf8.lengthOfCodePoint; +import static io.trino.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT; +import static io.trino.spi.expression.StandardFunctions.LIKE_FUNCTION_NAME; +import static io.trino.spi.type.BigintType.BIGINT; +import static io.trino.spi.type.BooleanType.BOOLEAN; +import static io.trino.spi.type.DoubleType.DOUBLE; +import static io.trino.spi.type.IntegerType.INTEGER; +import static io.trino.spi.type.RealType.REAL; +import static io.trino.spi.type.SmallintType.SMALLINT; +import static io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS; +import static io.trino.spi.type.TinyintType.TINYINT; +import static io.trino.spi.type.VarbinaryType.VARBINARY; +import static io.trino.spi.type.VarcharType.VARCHAR; +import static java.lang.String.format; +import static java.util.Collections.emptyIterator; +import static java.util.Locale.ENGLISH; +import static java.util.Objects.requireNonNull; + +public class OpenSearchMetadata + implements ConnectorMetadata +{ + private static final String PASSTHROUGH_QUERY_RESULT_COLUMN_NAME = "result"; + private static final ColumnMetadata PASSTHROUGH_QUERY_RESULT_COLUMN_METADATA = ColumnMetadata.builder() + .setName(PASSTHROUGH_QUERY_RESULT_COLUMN_NAME) + .setType(VARCHAR) + .setNullable(true) + .setHidden(false) + .build(); + + private static final Map PASSTHROUGH_QUERY_COLUMNS = ImmutableMap.of( + PASSTHROUGH_QUERY_RESULT_COLUMN_NAME, + new OpenSearchColumnHandle( + PASSTHROUGH_QUERY_RESULT_COLUMN_NAME, + VARCHAR, + new VarcharDecoder.Descriptor(PASSTHROUGH_QUERY_RESULT_COLUMN_NAME), + false)); + + // See https://opensearch.org/docs/latest/query-dsl/term/regexp/ + private static final Set REGEXP_RESERVED_CHARACTERS = IntStream.of('.', '?', '+', '*', '|', '{', '}', '[', ']', '(', ')', '"', '#', '@', '&', '<', '>', '~') + .boxed() + .collect(toImmutableSet()); + + private final Type ipAddressType; + private final OpenSearchClient client; + private final String schemaName; + + @Inject + public OpenSearchMetadata(TypeManager typeManager, OpenSearchClient client, OpenSearchConfig config) + { + this.ipAddressType = typeManager.getType(new TypeSignature(StandardTypes.IPADDRESS)); + this.client = requireNonNull(client, "client is null"); + this.schemaName = config.getDefaultSchema(); + } + + @Override + public List listSchemaNames(ConnectorSession session) + { + return ImmutableList.of(schemaName); + } + + @Override + public OpenSearchTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName) + { + requireNonNull(tableName, "tableName is null"); + + if (tableName.getSchemaName().equals(schemaName)) { + if (client.indexExists(tableName.getTableName()) && !client.getIndexMetadata(tableName.getTableName()).getSchema().getFields().isEmpty()) { + return new OpenSearchTableHandle(OpenSearchTableHandle.Type.SCAN, schemaName, tableName.getTableName(), Optional.empty()); + } + } + + return null; + } + + @Override + public ConnectorTableMetadata getTableMetadata(ConnectorSession session, ConnectorTableHandle table) + { + OpenSearchTableHandle handle = (OpenSearchTableHandle) table; + + if (isPassthroughQuery(handle)) { + return new ConnectorTableMetadata( + new SchemaTableName(handle.getSchema(), handle.getIndex()), + ImmutableList.of(PASSTHROUGH_QUERY_RESULT_COLUMN_METADATA)); + } + return getTableMetadata(handle.getSchema(), handle.getIndex()); + } + + private ConnectorTableMetadata getTableMetadata(String schemaName, String tableName) + { + InternalTableMetadata internalTableMetadata = makeInternalTableMetadata(schemaName, tableName); + return new ConnectorTableMetadata(new SchemaTableName(schemaName, tableName), internalTableMetadata.getColumnMetadata()); + } + + private InternalTableMetadata makeInternalTableMetadata(ConnectorTableHandle table) + { + OpenSearchTableHandle handle = (OpenSearchTableHandle) table; + return makeInternalTableMetadata(handle.getSchema(), handle.getIndex()); + } + + private InternalTableMetadata makeInternalTableMetadata(String schema, String tableName) + { + IndexMetadata metadata = client.getIndexMetadata(tableName); + List fields = getColumnFields(metadata); + return new InternalTableMetadata(new SchemaTableName(schema, tableName), makeColumnMetadata(fields), makeColumnHandles(fields)); + } + + private List getColumnFields(IndexMetadata metadata) + { + Map counts = metadata.getSchema() + .getFields().stream() + .collect(Collectors.groupingBy(f -> f.getName().toLowerCase(ENGLISH), Collectors.counting())); + + return metadata.getSchema().getFields().stream() + .filter(field -> toTrino(field) != null && counts.get(field.getName().toLowerCase(ENGLISH)) <= 1) + .collect(toImmutableList()); + } + + private List makeColumnMetadata(List fields) + { + ImmutableList.Builder result = ImmutableList.builder(); + + for (BuiltinColumns builtinColumn : BuiltinColumns.values()) { + result.add(builtinColumn.getMetadata()); + } + + for (IndexMetadata.Field field : fields) { + result.add(ColumnMetadata.builder() + .setName(field.getName()) + .setType(toTrino(field).getType()) + .build()); + } + return result.build(); + } + + private Map makeColumnHandles(List fields) + { + ImmutableMap.Builder result = ImmutableMap.builder(); + + for (BuiltinColumns builtinColumn : BuiltinColumns.values()) { + result.put(builtinColumn.getName(), builtinColumn.getColumnHandle()); + } + + for (IndexMetadata.Field field : fields) { + TypeAndDecoder converted = toTrino(field); + result.put(field.getName(), new OpenSearchColumnHandle( + field.getName(), + converted.getType(), + converted.getDecoderDescriptor(), + supportsPredicates(field.getType()))); + } + + return result.buildOrThrow(); + } + + private static boolean supportsPredicates(IndexMetadata.Type type) + { + if (type instanceof DateTimeType) { + return true; + } + + if (type instanceof PrimitiveType) { + switch (((PrimitiveType) type).getName().toLowerCase(ENGLISH)) { + case "boolean": + case "byte": + case "short": + case "integer": + case "long": + case "double": + case "float": + case "keyword": + return true; + } + } + + return false; + } + + private TypeAndDecoder toTrino(IndexMetadata.Field field) + { + return toTrino("", field); + } + + private TypeAndDecoder toTrino(String prefix, IndexMetadata.Field field) + { + String path = appendPath(prefix, field.getName()); + + checkArgument(!field.asRawJson() || !field.isArray(), format("A column, (%s) cannot be declared as a Trino array and also be rendered as json.", path)); + + if (field.asRawJson()) { + return new TypeAndDecoder(VARCHAR, new RawJsonDecoder.Descriptor(path)); + } + + if (field.isArray()) { + TypeAndDecoder element = toTrino(path, elementField(field)); + return new TypeAndDecoder(new ArrayType(element.getType()), new ArrayDecoder.Descriptor(element.getDecoderDescriptor())); + } + + IndexMetadata.Type type = field.getType(); + if (type instanceof PrimitiveType primitiveType) { + switch (primitiveType.getName()) { + case "float": + return new TypeAndDecoder(REAL, new RealDecoder.Descriptor(path)); + case "double": + return new TypeAndDecoder(DOUBLE, new DoubleDecoder.Descriptor(path)); + case "byte": + return new TypeAndDecoder(TINYINT, new TinyintDecoder.Descriptor(path)); + case "short": + return new TypeAndDecoder(SMALLINT, new SmallintDecoder.Descriptor(path)); + case "integer": + return new TypeAndDecoder(INTEGER, new IntegerDecoder.Descriptor(path)); + case "long": + return new TypeAndDecoder(BIGINT, new BigintDecoder.Descriptor(path)); + case "text": + case "keyword": + return new TypeAndDecoder(VARCHAR, new VarcharDecoder.Descriptor(path)); + case "ip": + return new TypeAndDecoder(ipAddressType, new IpAddressDecoder.Descriptor(path, ipAddressType)); + case "boolean": + return new TypeAndDecoder(BOOLEAN, new BooleanDecoder.Descriptor(path)); + case "binary": + return new TypeAndDecoder(VARBINARY, new VarbinaryDecoder.Descriptor(path)); + } + } + else if (type instanceof ScaledFloatType) { + return new TypeAndDecoder(DOUBLE, new DoubleDecoder.Descriptor(path)); + } + else if (type instanceof DateTimeType dateTimeType) { + if (dateTimeType.getFormats().isEmpty()) { + return new TypeAndDecoder(TIMESTAMP_MILLIS, new TimestampDecoder.Descriptor(path)); + } + // otherwise, skip -- we don't support custom formats, yet + } + else if (type instanceof ObjectType objectType) { + ImmutableList.Builder rowFieldsBuilder = ImmutableList.builder(); + ImmutableList.Builder decoderFields = ImmutableList.builder(); + for (IndexMetadata.Field rowField : objectType.getFields()) { + String name = rowField.getName(); + TypeAndDecoder child = toTrino(path, rowField); + + if (child != null) { + decoderFields.add(new RowDecoder.NameAndDescriptor(name, child.getDecoderDescriptor())); + rowFieldsBuilder.add(RowType.field(name, child.getType())); + } + } + + List rowFields = rowFieldsBuilder.build(); + if (!rowFields.isEmpty()) { + return new TypeAndDecoder(RowType.from(rowFields), new RowDecoder.Descriptor(path, decoderFields.build())); + } + + // otherwise, skip -- row types must have at least 1 field + } + + return null; + } + + private static String appendPath(String base, String element) + { + if (base.isEmpty()) { + return element; + } + + return base + "." + element; + } + + public static IndexMetadata.Field elementField(IndexMetadata.Field field) + { + checkArgument(field.isArray(), "Cannot get element field from a non-array field"); + return new IndexMetadata.Field(field.asRawJson(), false, field.getName(), field.getType()); + } + + @Override + public List listTables(ConnectorSession session, Optional schemaName) + { + if (schemaName.isPresent() && !schemaName.get().equals(this.schemaName)) { + return ImmutableList.of(); + } + + ImmutableList.Builder result = ImmutableList.builder(); + Set indexes = ImmutableSet.copyOf(client.getIndexes()); + + indexes.stream() + .map(index -> new SchemaTableName(this.schemaName, index)) + .forEach(result::add); + + client.getAliases().entrySet().stream() + .filter(entry -> indexes.contains(entry.getKey())) + .flatMap(entry -> entry.getValue().stream() + .map(alias -> new SchemaTableName(this.schemaName, alias))) + .distinct() + .forEach(result::add); + + return result.build(); + } + + @Override + public Map getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle) + { + OpenSearchTableHandle table = (OpenSearchTableHandle) tableHandle; + + if (isPassthroughQuery(table)) { + return PASSTHROUGH_QUERY_COLUMNS; + } + + InternalTableMetadata tableMetadata = makeInternalTableMetadata(tableHandle); + return tableMetadata.getColumnHandles(); + } + + @Override + public ColumnMetadata getColumnMetadata(ConnectorSession session, ConnectorTableHandle tableHandle, ColumnHandle columnHandle) + { + OpenSearchTableHandle table = (OpenSearchTableHandle) tableHandle; + OpenSearchColumnHandle column = (OpenSearchColumnHandle) columnHandle; + + if (isPassthroughQuery(table)) { + if (column.getName().equals(PASSTHROUGH_QUERY_RESULT_COLUMN_METADATA.getName())) { + return PASSTHROUGH_QUERY_RESULT_COLUMN_METADATA; + } + + throw new IllegalArgumentException(format("Unexpected column for table '%s$query': %s", table.getIndex(), column.getName())); + } + + return BuiltinColumns.of(column.getName()) + .map(BuiltinColumns::getMetadata) + .orElse(ColumnMetadata.builder() + .setName(column.getName()) + .setType(column.getType()) + .build()); + } + + @Override + public Map> listTableColumns(ConnectorSession session, SchemaTablePrefix prefix) + { + throw new UnsupportedOperationException("The deprecated listTableColumns is not supported because streamTableColumns is implemented instead"); + } + + @Override + public Iterator streamTableColumns(ConnectorSession session, SchemaTablePrefix prefix) + { + if (prefix.getSchema().isPresent() && !prefix.getSchema().get().equals(schemaName)) { + return emptyIterator(); + } + + if (prefix.getSchema().isPresent() && prefix.getTable().isPresent()) { + ConnectorTableMetadata metadata = getTableMetadata(prefix.getSchema().get(), prefix.getTable().get()); + return singletonIterator(TableColumnsMetadata.forTable(metadata.getTable(), metadata.getColumns())); + } + + return listTables(session, prefix.getSchema()).stream() + .map(name -> getTableMetadata(name.getSchemaName(), name.getTableName())) + .map(tableMetadata -> TableColumnsMetadata.forTable(tableMetadata.getTable(), tableMetadata.getColumns())) + .iterator(); + } + + @Override + public ConnectorTableProperties getTableProperties(ConnectorSession session, ConnectorTableHandle table) + { + OpenSearchTableHandle handle = (OpenSearchTableHandle) table; + + return new ConnectorTableProperties( + handle.getConstraint(), + Optional.empty(), + Optional.empty(), + ImmutableList.of()); + } + + @Override + public Optional> applyLimit(ConnectorSession session, ConnectorTableHandle table, long limit) + { + OpenSearchTableHandle handle = (OpenSearchTableHandle) table; + + if (isPassthroughQuery(handle)) { + // limit pushdown currently not supported passthrough query + return Optional.empty(); + } + + if (handle.getLimit().isPresent() && handle.getLimit().getAsLong() <= limit) { + return Optional.empty(); + } + + handle = new OpenSearchTableHandle( + handle.getType(), + handle.getSchema(), + handle.getIndex(), + handle.getConstraint(), + handle.getRegexes(), + handle.getQuery(), + OptionalLong.of(limit)); + + return Optional.of(new LimitApplicationResult<>(handle, false, false)); + } + + @Override + public Optional> applyFilter(ConnectorSession session, ConnectorTableHandle table, Constraint constraint) + { + OpenSearchTableHandle handle = (OpenSearchTableHandle) table; + + if (isPassthroughQuery(handle)) { + // filter pushdown currently not supported for passthrough query + return Optional.empty(); + } + + Map supported = new HashMap<>(); + Map unsupported = new HashMap<>(); + Map domains = constraint.getSummary().getDomains().orElseThrow(() -> new IllegalArgumentException("constraint summary is NONE")); + for (Map.Entry entry : domains.entrySet()) { + OpenSearchColumnHandle column = (OpenSearchColumnHandle) entry.getKey(); + + if (column.isSupportsPredicates()) { + supported.put(column, entry.getValue()); + } + else { + unsupported.put(column, entry.getValue()); + } + } + + TupleDomain oldDomain = handle.getConstraint(); + TupleDomain newDomain = oldDomain.intersect(TupleDomain.withColumnDomains(supported)); + + ConnectorExpression oldExpression = constraint.getExpression(); + Map newRegexes = new HashMap<>(handle.getRegexes()); + List expressions = ConnectorExpressions.extractConjuncts(constraint.getExpression()); + List notHandledExpressions = new ArrayList<>(); + for (ConnectorExpression expression : expressions) { + if (expression instanceof Call call) { + if (isSupportedLikeCall(call)) { + List arguments = call.getArguments(); + String variableName = ((Variable) arguments.get(0)).getName(); + OpenSearchColumnHandle column = (OpenSearchColumnHandle) constraint.getAssignments().get(variableName); + verifyNotNull(column, "No assignment for %s", variableName); + String columnName = column.getName(); + Object pattern = ((Constant) arguments.get(1)).getValue(); + Optional escape = Optional.empty(); + if (arguments.size() == 3) { + escape = Optional.of((Slice) ((Constant) arguments.get(2)).getValue()); + } + + if (!newRegexes.containsKey(columnName) && pattern instanceof Slice) { + IndexMetadata metadata = client.getIndexMetadata(handle.getIndex()); + if (metadata.getSchema() + .getFields().stream() + .anyMatch(field -> columnName.equals(field.getName()) && field.getType() instanceof PrimitiveType && "keyword".equals(((PrimitiveType) field.getType()).getName()))) { + newRegexes.put(columnName, likeToRegexp((Slice) pattern, escape)); + continue; + } + } + } + } + notHandledExpressions.add(expression); + } + + ConnectorExpression newExpression = ConnectorExpressions.and(notHandledExpressions); + if (oldDomain.equals(newDomain) && oldExpression.equals(newExpression)) { + return Optional.empty(); + } + + handle = new OpenSearchTableHandle( + handle.getType(), + handle.getSchema(), + handle.getIndex(), + newDomain, + newRegexes, + handle.getQuery(), + handle.getLimit()); + + return Optional.of(new ConstraintApplicationResult<>(handle, TupleDomain.withColumnDomains(unsupported), newExpression, false)); + } + + protected static boolean isSupportedLikeCall(Call call) + { + if (!LIKE_FUNCTION_NAME.equals(call.getFunctionName())) { + return false; + } + + List arguments = call.getArguments(); + if (arguments.size() < 2 || arguments.size() > 3) { + return false; + } + + if (!(arguments.get(0) instanceof Variable) || !(arguments.get(1) instanceof Constant)) { + return false; + } + + if (arguments.size() == 3) { + return arguments.get(2) instanceof Constant; + } + + return true; + } + + protected static String likeToRegexp(Slice pattern, Optional escape) + { + Optional escapeChar = escape.map(OpenSearchMetadata::getEscapeChar); + StringBuilder regex = new StringBuilder(); + boolean escaped = false; + int position = 0; + while (position < pattern.length()) { + int currentChar = getCodePointAt(pattern, position); + position += lengthOfCodePoint(currentChar); + checkEscape(!escaped || currentChar == '%' || currentChar == '_' || currentChar == escapeChar.get()); + if (!escaped && escapeChar.isPresent() && currentChar == escapeChar.get()) { + escaped = true; + } + else { + switch (currentChar) { + case '%': + regex.append(escaped ? "%" : ".*"); + escaped = false; + break; + case '_': + regex.append(escaped ? "_" : "."); + escaped = false; + break; + case '\\': + regex.append("\\\\"); + break; + default: + // escape special regex characters + if (REGEXP_RESERVED_CHARACTERS.contains(currentChar)) { + regex.append('\\'); + } + + regex.appendCodePoint(currentChar); + escaped = false; + } + } + } + + checkEscape(!escaped); + return regex.toString(); + } + + private static void checkEscape(boolean condition) + { + if (!condition) { + throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "Escape character must be followed by '%', '_' or the escape character itself"); + } + } + + private static char getEscapeChar(Slice escape) + { + String escapeString = escape.toStringUtf8(); + if (escapeString.length() == 1) { + return escapeString.charAt(0); + } + throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "Escape string must be a single character"); + } + + private static boolean isPassthroughQuery(OpenSearchTableHandle table) + { + return table.getType().equals(OpenSearchTableHandle.Type.QUERY); + } + + @Override + public Optional> applyTableFunction(ConnectorSession session, ConnectorTableFunctionHandle handle) + { + if (!(handle instanceof RawQueryFunctionHandle)) { + return Optional.empty(); + } + + ConnectorTableHandle tableHandle = ((RawQueryFunctionHandle) handle).getTableHandle(); + List columnHandles = ImmutableList.copyOf(getColumnHandles(session, tableHandle).values()); + return Optional.of(new TableFunctionApplicationResult<>(tableHandle, columnHandles)); + } + + private static class InternalTableMetadata + { + private final SchemaTableName tableName; + private final List columnMetadata; + private final Map columnHandles; + + public InternalTableMetadata( + SchemaTableName tableName, + List columnMetadata, + Map columnHandles) + { + this.tableName = tableName; + this.columnMetadata = columnMetadata; + this.columnHandles = columnHandles; + } + + public SchemaTableName getTableName() + { + return tableName; + } + + public List getColumnMetadata() + { + return columnMetadata; + } + + public Map getColumnHandles() + { + return columnHandles; + } + } + + private static class TypeAndDecoder + { + private final Type type; + private final DecoderDescriptor decoderDescriptor; + + public TypeAndDecoder(Type type, DecoderDescriptor decoderDescriptor) + { + this.type = type; + this.decoderDescriptor = decoderDescriptor; + } + + public Type getType() + { + return type; + } + + public DecoderDescriptor getDecoderDescriptor() + { + return decoderDescriptor; + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchPageSourceProvider.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchPageSourceProvider.java new file mode 100644 index 0000000000000..d4a6d3ae4c8c0 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchPageSourceProvider.java @@ -0,0 +1,79 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.google.inject.Inject; +import io.trino.plugin.opensearch.client.OpenSearchClient; +import io.trino.spi.connector.ColumnHandle; +import io.trino.spi.connector.ConnectorPageSource; +import io.trino.spi.connector.ConnectorPageSourceProvider; +import io.trino.spi.connector.ConnectorSession; +import io.trino.spi.connector.ConnectorSplit; +import io.trino.spi.connector.ConnectorTableHandle; +import io.trino.spi.connector.ConnectorTransactionHandle; +import io.trino.spi.connector.DynamicFilter; +import io.trino.spi.type.TypeManager; + +import java.util.List; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static io.trino.plugin.opensearch.OpenSearchTableHandle.Type.QUERY; +import static java.util.Objects.requireNonNull; + +public class OpenSearchPageSourceProvider + implements ConnectorPageSourceProvider +{ + private final OpenSearchClient client; + private final TypeManager typeManager; + + @Inject + public OpenSearchPageSourceProvider(OpenSearchClient client, TypeManager typeManager) + { + this.client = requireNonNull(client, "client is null"); + this.typeManager = requireNonNull(typeManager, "typeManager is null"); + } + + @Override + public ConnectorPageSource createPageSource( + ConnectorTransactionHandle transaction, + ConnectorSession session, + ConnectorSplit split, + ConnectorTableHandle table, + List columns, + DynamicFilter dynamicFilter) + { + requireNonNull(split, "split is null"); + requireNonNull(table, "table is null"); + + OpenSearchTableHandle opensearchTable = (OpenSearchTableHandle) table; + OpenSearchSplit opensearchSplit = (OpenSearchSplit) split; + + if (opensearchTable.getType().equals(QUERY)) { + return new PassthroughQueryPageSource(client, opensearchTable); + } + + if (columns.isEmpty()) { + return new CountQueryPageSource(client, opensearchTable, opensearchSplit); + } + + return new ScanQueryPageSource( + client, + typeManager, + opensearchTable, + opensearchSplit, + columns.stream() + .map(OpenSearchColumnHandle.class::cast) + .collect(toImmutableList())); + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchPlugin.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchPlugin.java new file mode 100644 index 0000000000000..bd44b5d688f4c --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchPlugin.java @@ -0,0 +1,44 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; +import io.trino.spi.Plugin; +import io.trino.spi.connector.ConnectorFactory; + +import static java.util.Objects.requireNonNull; + +public class OpenSearchPlugin + implements Plugin +{ + private final ConnectorFactory connectorFactory; + + public OpenSearchPlugin() + { + connectorFactory = new OpenSearchConnectorFactory(); + } + + @VisibleForTesting + OpenSearchPlugin(OpenSearchConnectorFactory factory) + { + connectorFactory = requireNonNull(factory, "factory is null"); + } + + @Override + public synchronized Iterable getConnectorFactories() + { + return ImmutableList.of(connectorFactory); + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchQueryBuilder.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchQueryBuilder.java new file mode 100644 index 0000000000000..9873c46e90102 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchQueryBuilder.java @@ -0,0 +1,172 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.google.common.collect.ImmutableList; +import io.airlift.slice.Slice; +import io.trino.spi.predicate.Domain; +import io.trino.spi.predicate.Range; +import io.trino.spi.predicate.TupleDomain; +import io.trino.spi.type.Type; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.ExistsQueryBuilder; +import org.opensearch.index.query.MatchAllQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryStringQueryBuilder; +import org.opensearch.index.query.RangeQueryBuilder; +import org.opensearch.index.query.RegexpQueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; + +import java.time.Instant; +import java.time.ZoneOffset; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; +import static com.google.common.collect.Iterables.getOnlyElement; +import static io.trino.spi.type.BigintType.BIGINT; +import static io.trino.spi.type.BooleanType.BOOLEAN; +import static io.trino.spi.type.DoubleType.DOUBLE; +import static io.trino.spi.type.IntegerType.INTEGER; +import static io.trino.spi.type.RealType.REAL; +import static io.trino.spi.type.SmallintType.SMALLINT; +import static io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS; +import static io.trino.spi.type.Timestamps.MICROSECONDS_PER_MILLISECOND; +import static io.trino.spi.type.TinyintType.TINYINT; +import static io.trino.spi.type.VarcharType.VARCHAR; +import static java.lang.Math.floorDiv; +import static java.lang.Math.toIntExact; +import static java.time.format.DateTimeFormatter.ISO_DATE_TIME; + +public final class OpenSearchQueryBuilder +{ + private OpenSearchQueryBuilder() {} + + public static QueryBuilder buildSearchQuery(TupleDomain constraint, Optional query, Map regexes) + { + BoolQueryBuilder queryBuilder = new BoolQueryBuilder(); + if (constraint.getDomains().isPresent()) { + for (Map.Entry entry : constraint.getDomains().get().entrySet()) { + OpenSearchColumnHandle column = entry.getKey(); + Domain domain = entry.getValue(); + + checkArgument(!domain.isNone(), "Unexpected NONE domain for %s", column.getName()); + if (!domain.isAll()) { + addPredicateToQueryBuilder(queryBuilder, column.getName(), domain, column.getType()); + } + } + } + + regexes.forEach((name, value) -> queryBuilder.filter(new BoolQueryBuilder().must(new RegexpQueryBuilder(name, value)))); + + query.map(QueryStringQueryBuilder::new) + .ifPresent(queryBuilder::must); + + if (queryBuilder.hasClauses()) { + return queryBuilder; + } + return new MatchAllQueryBuilder(); + } + + private static void addPredicateToQueryBuilder(BoolQueryBuilder queryBuilder, String columnName, Domain domain, Type type) + { + checkArgument(domain.getType().isOrderable(), "Domain type must be orderable"); + + if (domain.getValues().isNone()) { + queryBuilder.mustNot(new ExistsQueryBuilder(columnName)); + return; + } + + if (domain.getValues().isAll()) { + queryBuilder.filter(new ExistsQueryBuilder(columnName)); + return; + } + + List shouldClauses = getShouldClauses(columnName, domain, type); + if (shouldClauses.size() == 1) { + queryBuilder.filter(getOnlyElement(shouldClauses)); + return; + } + if (shouldClauses.size() > 1) { + BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); + shouldClauses.forEach(boolQueryBuilder::should); + queryBuilder.filter(boolQueryBuilder); + return; + } + } + + private static List getShouldClauses(String columnName, Domain domain, Type type) + { + ImmutableList.Builder shouldClauses = ImmutableList.builder(); + for (Range range : domain.getValues().getRanges().getOrderedRanges()) { + checkState(!range.isAll(), "Invalid range for column: %s", columnName); + if (range.isSingleValue()) { + shouldClauses.add(new TermQueryBuilder(columnName, getValue(type, range.getSingleValue()))); + } + else { + RangeQueryBuilder rangeQueryBuilder = new RangeQueryBuilder(columnName); + if (!range.isLowUnbounded()) { + Object lowBound = getValue(type, range.getLowBoundedValue()); + if (range.isLowInclusive()) { + rangeQueryBuilder.gte(lowBound); + } + else { + rangeQueryBuilder.gt(lowBound); + } + } + if (!range.isHighUnbounded()) { + Object highBound = getValue(type, range.getHighBoundedValue()); + if (range.isHighInclusive()) { + rangeQueryBuilder.lte(highBound); + } + else { + rangeQueryBuilder.lt(highBound); + } + } + shouldClauses.add(rangeQueryBuilder); + } + } + if (domain.isNullAllowed()) { + shouldClauses.add(new BoolQueryBuilder().mustNot(new ExistsQueryBuilder(columnName))); + } + return shouldClauses.build(); + } + + private static Object getValue(Type type, Object value) + { + if (type.equals(BOOLEAN) || + type.equals(TINYINT) || + type.equals(SMALLINT) || + type.equals(INTEGER) || + type.equals(BIGINT) || + type.equals(DOUBLE)) { + return value; + } + if (type.equals(REAL)) { + return Float.intBitsToFloat(toIntExact(((Long) value))); + } + if (type.equals(VARCHAR)) { + return ((Slice) value).toStringUtf8(); + } + if (type.equals(TIMESTAMP_MILLIS)) { + return Instant.ofEpochMilli(floorDiv((Long) value, MICROSECONDS_PER_MILLISECOND)) + .atZone(ZoneOffset.UTC) + .toLocalDateTime() + .format(ISO_DATE_TIME); + } + throw new IllegalArgumentException("Unhandled type: " + type); + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchSplit.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchSplit.java new file mode 100644 index 0000000000000..f3b63af37a52f --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchSplit.java @@ -0,0 +1,99 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableList; +import io.airlift.slice.SizeOf; +import io.trino.spi.HostAddress; +import io.trino.spi.connector.ConnectorSplit; + +import java.util.List; +import java.util.Optional; + +import static com.google.common.base.MoreObjects.toStringHelper; +import static io.airlift.slice.SizeOf.estimatedSizeOf; +import static io.airlift.slice.SizeOf.instanceSize; +import static io.airlift.slice.SizeOf.sizeOf; +import static java.util.Objects.requireNonNull; + +public class OpenSearchSplit + implements ConnectorSplit +{ + private static final int INSTANCE_SIZE = instanceSize(OpenSearchSplit.class); + + private final String index; + private final int shard; + private final Optional address; + + @JsonCreator + public OpenSearchSplit( + @JsonProperty("index") String index, + @JsonProperty("shard") int shard, + @JsonProperty("address") Optional address) + { + this.index = requireNonNull(index, "index is null"); + this.shard = shard; + this.address = requireNonNull(address, "address is null"); + } + + @JsonProperty + public String getIndex() + { + return index; + } + + @JsonProperty + public int getShard() + { + return shard; + } + + @JsonProperty + public Optional getAddress() + { + return address; + } + + @Override + public List getAddresses() + { + return address.map(host -> ImmutableList.of(HostAddress.fromString(host))) + .orElseGet(ImmutableList::of); + } + + @Override + public Object getInfo() + { + return this; + } + + @Override + public long getRetainedSizeInBytes() + { + return INSTANCE_SIZE + + estimatedSizeOf(index) + + sizeOf(address, SizeOf::estimatedSizeOf); + } + + @Override + public String toString() + { + return toStringHelper(this) + .add("index", index) + .add("shard", shard) + .toString(); + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchSplitManager.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchSplitManager.java new file mode 100644 index 0000000000000..110a9245afa62 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchSplitManager.java @@ -0,0 +1,63 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.google.inject.Inject; +import io.trino.plugin.opensearch.client.OpenSearchClient; +import io.trino.spi.connector.ConnectorSession; +import io.trino.spi.connector.ConnectorSplitManager; +import io.trino.spi.connector.ConnectorSplitSource; +import io.trino.spi.connector.ConnectorTableHandle; +import io.trino.spi.connector.ConnectorTransactionHandle; +import io.trino.spi.connector.Constraint; +import io.trino.spi.connector.DynamicFilter; +import io.trino.spi.connector.FixedSplitSource; + +import java.util.List; +import java.util.Optional; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static java.util.Objects.requireNonNull; + +public class OpenSearchSplitManager + implements ConnectorSplitManager +{ + private final OpenSearchClient client; + + @Inject + public OpenSearchSplitManager(OpenSearchClient client) + { + this.client = requireNonNull(client, "client is null"); + } + + @Override + public ConnectorSplitSource getSplits( + ConnectorTransactionHandle transaction, + ConnectorSession session, + ConnectorTableHandle table, + DynamicFilter dynamicFilter, + Constraint constraint) + { + OpenSearchTableHandle tableHandle = (OpenSearchTableHandle) table; + + if (tableHandle.getType().equals(OpenSearchTableHandle.Type.QUERY)) { + return new FixedSplitSource(new OpenSearchSplit(tableHandle.getIndex(), 0, Optional.empty())); + } + List splits = client.getSearchShards(tableHandle.getIndex()).stream() + .map(shard -> new OpenSearchSplit(shard.getIndex(), shard.getId(), shard.getAddress())) + .collect(toImmutableList()); + + return new FixedSplitSource(splits); + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchTableHandle.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchTableHandle.java new file mode 100644 index 0000000000000..e95cf612e7124 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchTableHandle.java @@ -0,0 +1,170 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableMap; +import io.trino.spi.connector.ColumnHandle; +import io.trino.spi.connector.ConnectorTableHandle; +import io.trino.spi.predicate.TupleDomain; + +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.OptionalLong; +import java.util.stream.Collectors; + +import static java.util.Objects.requireNonNull; + +public final class OpenSearchTableHandle + implements ConnectorTableHandle +{ + public enum Type + { + SCAN, QUERY + } + + private final Type type; + private final String schema; + private final String index; + private final TupleDomain constraint; + private final Map regexes; + private final Optional query; + private final OptionalLong limit; + + public OpenSearchTableHandle(Type type, String schema, String index, Optional query) + { + this.type = requireNonNull(type, "type is null"); + this.schema = requireNonNull(schema, "schema is null"); + this.index = requireNonNull(index, "index is null"); + this.query = requireNonNull(query, "query is null"); + + constraint = TupleDomain.all(); + regexes = ImmutableMap.of(); + limit = OptionalLong.empty(); + } + + @JsonCreator + public OpenSearchTableHandle( + @JsonProperty("type") Type type, + @JsonProperty("schema") String schema, + @JsonProperty("index") String index, + @JsonProperty("constraint") TupleDomain constraint, + @JsonProperty("regexes") Map regexes, + @JsonProperty("query") Optional query, + @JsonProperty("limit") OptionalLong limit) + { + this.type = requireNonNull(type, "type is null"); + this.schema = requireNonNull(schema, "schema is null"); + this.index = requireNonNull(index, "index is null"); + this.constraint = requireNonNull(constraint, "constraint is null"); + this.regexes = ImmutableMap.copyOf(requireNonNull(regexes, "regexes is null")); + this.query = requireNonNull(query, "query is null"); + this.limit = requireNonNull(limit, "limit is null"); + } + + @JsonProperty + public Type getType() + { + return type; + } + + @JsonProperty + public String getSchema() + { + return schema; + } + + @JsonProperty + public String getIndex() + { + return index; + } + + @JsonProperty + public TupleDomain getConstraint() + { + return constraint; + } + + @JsonProperty + public Map getRegexes() + { + return regexes; + } + + @JsonProperty + public OptionalLong getLimit() + { + return limit; + } + + @JsonProperty + public Optional getQuery() + { + return query; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + OpenSearchTableHandle that = (OpenSearchTableHandle) o; + return type == that.type && + schema.equals(that.schema) && + index.equals(that.index) && + constraint.equals(that.constraint) && + regexes.equals(that.regexes) && + query.equals(that.query) && + limit.equals(that.limit); + } + + @Override + public int hashCode() + { + return Objects.hash(type, schema, index, constraint, regexes, query, limit); + } + + @Override + public String toString() + { + StringBuilder builder = new StringBuilder(); + builder.append(type + ":" + index); + + StringBuilder attributes = new StringBuilder(); + if (!regexes.isEmpty()) { + attributes.append("regexes=["); + attributes.append(regexes.entrySet().stream() + .map(regex -> regex.getKey() + ":" + regex.getValue()) + .collect(Collectors.joining(", "))); + attributes.append("]"); + } + limit.ifPresent(value -> attributes.append("limit=" + value)); + query.ifPresent(value -> attributes.append("query" + value)); + + if (attributes.length() > 0) { + builder.append("("); + builder.append(attributes); + builder.append(")"); + } + + return builder.toString(); + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchTransactionHandle.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchTransactionHandle.java new file mode 100644 index 0000000000000..ebef379d43895 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/OpenSearchTransactionHandle.java @@ -0,0 +1,22 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import io.trino.spi.connector.ConnectorTransactionHandle; + +public enum OpenSearchTransactionHandle + implements ConnectorTransactionHandle +{ + INSTANCE +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/PassthroughQueryPageSource.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/PassthroughQueryPageSource.java new file mode 100644 index 0000000000000..f29269259d3d2 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/PassthroughQueryPageSource.java @@ -0,0 +1,91 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.google.common.collect.ImmutableList; +import io.airlift.slice.Slices; +import io.trino.plugin.opensearch.client.OpenSearchClient; +import io.trino.spi.Page; +import io.trino.spi.PageBuilder; +import io.trino.spi.block.BlockBuilder; +import io.trino.spi.connector.ConnectorPageSource; + +import java.io.IOException; + +import static io.trino.spi.type.VarcharType.VARCHAR; +import static java.util.Objects.requireNonNull; + +public class PassthroughQueryPageSource + implements ConnectorPageSource +{ + private final long readTimeNanos; + private final String result; + private boolean done; + + public PassthroughQueryPageSource(OpenSearchClient client, OpenSearchTableHandle table) + { + requireNonNull(client, "client is null"); + requireNonNull(table, "table is null"); + + long start = System.nanoTime(); + result = client.executeQuery(table.getIndex(), table.getQuery().get()); + readTimeNanos = System.nanoTime() - start; + } + + @Override + public long getCompletedBytes() + { + return result.length(); + } + + @Override + public long getReadTimeNanos() + { + return readTimeNanos; + } + + @Override + public boolean isFinished() + { + return done; + } + + @Override + public Page getNextPage() + { + if (done) { + return null; + } + + done = true; + + PageBuilder page = new PageBuilder(1, ImmutableList.of(VARCHAR)); + page.declarePosition(); + BlockBuilder column = page.getBlockBuilder(0); + VARCHAR.writeSlice(column, Slices.utf8Slice(result)); + return page.build(); + } + + @Override + public long getMemoryUsage() + { + return 0; + } + + @Override + public void close() + throws IOException + { + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/PasswordConfig.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/PasswordConfig.java new file mode 100644 index 0000000000000..392f7507ce063 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/PasswordConfig.java @@ -0,0 +1,51 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import io.airlift.configuration.Config; +import io.airlift.configuration.ConfigSecuritySensitive; +import jakarta.validation.constraints.NotNull; + +public class PasswordConfig +{ + private String user; + private String password; + + @NotNull + public String getUser() + { + return user; + } + + @Config("opensearch.auth.user") + public PasswordConfig setUser(String user) + { + this.user = user; + return this; + } + + @NotNull + public String getPassword() + { + return password; + } + + @Config("opensearch.auth.password") + @ConfigSecuritySensitive + public PasswordConfig setPassword(String password) + { + this.password = password; + return this; + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/ScanQueryPageSource.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/ScanQueryPageSource.java new file mode 100644 index 0000000000000..eee688f9d5b4b --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/ScanQueryPageSource.java @@ -0,0 +1,323 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.google.common.collect.AbstractIterator; +import com.google.common.collect.ImmutableList; +import io.airlift.log.Logger; +import io.trino.plugin.opensearch.client.OpenSearchClient; +import io.trino.plugin.opensearch.decoders.Decoder; +import io.trino.spi.Page; +import io.trino.spi.block.Block; +import io.trino.spi.block.BlockBuilder; +import io.trino.spi.block.PageBuilderStatus; +import io.trino.spi.connector.ConnectorPageSource; +import io.trino.spi.type.RowType; +import io.trino.spi.type.Type; +import io.trino.spi.type.TypeManager; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.search.SearchHit; +import org.opensearch.search.SearchHits; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.OptionalLong; +import java.util.function.Supplier; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static io.trino.plugin.opensearch.BuiltinColumns.SOURCE; +import static io.trino.plugin.opensearch.BuiltinColumns.isBuiltinColumn; +import static io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS; +import static java.util.Objects.requireNonNull; +import static java.util.function.Predicate.isEqual; +import static java.util.stream.Collectors.toList; + +public class ScanQueryPageSource + implements ConnectorPageSource +{ + private static final Logger LOG = Logger.get(ScanQueryPageSource.class); + + private final List decoders; + + private final SearchHitIterator iterator; + private final BlockBuilder[] columnBuilders; + private final List columns; + private long totalBytes; + private long readTimeNanos; + + public ScanQueryPageSource( + OpenSearchClient client, + TypeManager typeManager, + OpenSearchTableHandle table, + OpenSearchSplit split, + List columns) + { + requireNonNull(client, "client is null"); + requireNonNull(typeManager, "typeManager is null"); + requireNonNull(columns, "columns is null"); + + this.columns = ImmutableList.copyOf(columns); + + decoders = createDecoders(columns); + + // When the _source field is requested, we need to bypass column pruning when fetching the document + boolean needAllFields = columns.stream() + .map(OpenSearchColumnHandle::getName) + .anyMatch(isEqual(SOURCE.getName())); + + // Columns to fetch as doc_fields instead of pulling them out of the JSON source + // This is convenient for types such as DATE, TIMESTAMP, etc, which have multiple possible + // representations in JSON, but a single normalized representation as doc_field. + List documentFields = flattenFields(columns).entrySet().stream() + .filter(entry -> entry.getValue().equals(TIMESTAMP_MILLIS)) + .map(Map.Entry::getKey) + .collect(toImmutableList()); + + columnBuilders = columns.stream() + .map(OpenSearchColumnHandle::getType) + .map(type -> type.createBlockBuilder(null, 1)) + .toArray(BlockBuilder[]::new); + + List requiredFields = columns.stream() + .map(OpenSearchColumnHandle::getName) + .filter(name -> !isBuiltinColumn(name)) + .collect(toList()); + + // sorting by _doc (index order) get special treatment in OpenSearch and is more efficient + Optional sort = Optional.of("_doc"); + + if (table.getQuery().isPresent()) { + // However, if we're using a custom OpenSearch query, use default sorting. + // Documents will be scored and returned based on relevance + sort = Optional.empty(); + } + + long start = System.nanoTime(); + SearchResponse searchResponse = client.beginSearch( + split.getIndex(), + split.getShard(), + OpenSearchQueryBuilder.buildSearchQuery(table.getConstraint().transformKeys(OpenSearchColumnHandle.class::cast), table.getQuery(), table.getRegexes()), + needAllFields ? Optional.empty() : Optional.of(requiredFields), + documentFields, + sort, + table.getLimit()); + readTimeNanos += System.nanoTime() - start; + this.iterator = new SearchHitIterator(client, () -> searchResponse, table.getLimit()); + } + + @Override + public long getCompletedBytes() + { + return totalBytes; + } + + @Override + public long getReadTimeNanos() + { + return readTimeNanos + iterator.getReadTimeNanos(); + } + + @Override + public boolean isFinished() + { + return !iterator.hasNext(); + } + + @Override + public long getMemoryUsage() + { + return 0; + } + + @Override + public void close() + { + iterator.close(); + } + + @Override + public Page getNextPage() + { + long size = 0; + while (size < PageBuilderStatus.DEFAULT_MAX_PAGE_SIZE_IN_BYTES && iterator.hasNext()) { + SearchHit hit = iterator.next(); + Map document = hit.getSourceAsMap(); + + for (int i = 0; i < decoders.size(); i++) { + String field = columns.get(i).getName(); + decoders.get(i).decode(hit, () -> getField(document, field), columnBuilders[i]); + } + + if (hit.getSourceRef() != null) { + totalBytes += hit.getSourceRef().length(); + } + + size = Arrays.stream(columnBuilders) + .mapToLong(BlockBuilder::getSizeInBytes) + .sum(); + } + + Block[] blocks = new Block[columnBuilders.length]; + for (int i = 0; i < columnBuilders.length; i++) { + blocks[i] = columnBuilders[i].build(); + columnBuilders[i] = columnBuilders[i].newBlockBuilderLike(null); + } + + return new Page(blocks); + } + + public static Object getField(Map document, String field) + { + Object value = document.get(field); + if (value == null) { + Map result = new HashMap<>(); + String prefix = field + "."; + for (Map.Entry entry : document.entrySet()) { + String key = entry.getKey(); + if (key.startsWith(prefix)) { + result.put(key.substring(prefix.length()), entry.getValue()); + } + } + + if (!result.isEmpty()) { + return result; + } + } + + return value; + } + + private Map flattenFields(List columns) + { + Map result = new HashMap<>(); + + for (OpenSearchColumnHandle column : columns) { + flattenFields(result, column.getName(), column.getType()); + } + + return result; + } + + private void flattenFields(Map result, String fieldName, Type type) + { + if (type instanceof RowType) { + for (RowType.Field field : ((RowType) type).getFields()) { + flattenFields(result, appendPath(fieldName, field.getName().get()), field.getType()); + } + } + else { + result.put(fieldName, type); + } + } + + private List createDecoders(List columns) + { + return columns.stream() + .map(OpenSearchColumnHandle::getDecoderDescriptor) + .map(DecoderDescriptor::createDecoder) + .collect(toImmutableList()); + } + + private static String appendPath(String base, String element) + { + if (base.isEmpty()) { + return element; + } + + return base + "." + element; + } + + private static class SearchHitIterator + extends AbstractIterator + { + private final OpenSearchClient client; + private final Supplier first; + private final OptionalLong limit; + + private SearchHits searchHits; + private String scrollId; + private int currentPosition; + + private long readTimeNanos; + private long totalRecordCount; + + public SearchHitIterator(OpenSearchClient client, Supplier first, OptionalLong limit) + { + this.client = client; + this.first = first; + this.limit = limit; + this.totalRecordCount = 0; + } + + public long getReadTimeNanos() + { + return readTimeNanos; + } + + @Override + protected SearchHit computeNext() + { + if (limit.isPresent() && totalRecordCount == limit.getAsLong()) { + // No more record is necessary. + return endOfData(); + } + + if (scrollId == null) { + long start = System.nanoTime(); + SearchResponse response = first.get(); + readTimeNanos += System.nanoTime() - start; + reset(response); + } + else if (currentPosition == searchHits.getHits().length) { + long start = System.nanoTime(); + SearchResponse response = client.nextPage(scrollId); + readTimeNanos += System.nanoTime() - start; + reset(response); + } + + if (currentPosition == searchHits.getHits().length) { + return endOfData(); + } + + SearchHit hit = searchHits.getAt(currentPosition); + currentPosition++; + totalRecordCount++; + + return hit; + } + + private void reset(SearchResponse response) + { + scrollId = response.getScrollId(); + searchHits = response.getHits(); + currentPosition = 0; + } + + public void close() + { + if (scrollId != null) { + try { + client.clearScroll(scrollId); + } + catch (Exception e) { + // ignore + LOG.debug(e, "Error clearing scroll"); + } + } + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/AwsRequestSigner.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/AwsRequestSigner.java new file mode 100644 index 0000000000000..e93017aae848c --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/AwsRequestSigner.java @@ -0,0 +1,114 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.client; + +import com.amazonaws.DefaultRequest; +import com.amazonaws.auth.AWS4Signer; +import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.http.HttpMethodName; +import org.apache.http.Header; +import org.apache.http.HttpEntityEnclosingRequest; +import org.apache.http.HttpHost; +import org.apache.http.HttpRequest; +import org.apache.http.HttpRequestInterceptor; +import org.apache.http.NameValuePair; +import org.apache.http.client.utils.URIBuilder; +import org.apache.http.entity.BasicHttpEntity; +import org.apache.http.message.BasicHeader; +import org.apache.http.protocol.HttpContext; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import static com.google.common.base.Preconditions.checkState; +import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static java.lang.String.CASE_INSENSITIVE_ORDER; +import static org.apache.http.protocol.HttpCoreContext.HTTP_TARGET_HOST; + +class AwsRequestSigner + implements HttpRequestInterceptor +{ + private static final String SERVICE_NAME = "aoss"; + private final AWSCredentialsProvider credentialsProvider; + private final AWS4Signer signer; + + public AwsRequestSigner(String region, AWSCredentialsProvider credentialsProvider) + { + this.credentialsProvider = credentialsProvider; + this.signer = new AWS4Signer(); + + signer.setServiceName(SERVICE_NAME); + signer.setRegionName(region); + } + + @Override + public void process(HttpRequest request, HttpContext context) + throws IOException + { + String method = request.getRequestLine().getMethod(); + + URI uri = URI.create(request.getRequestLine().getUri()); + URIBuilder uriBuilder = new URIBuilder(uri); + + Map> parameters = new TreeMap<>(CASE_INSENSITIVE_ORDER); + for (NameValuePair parameter : uriBuilder.getQueryParams()) { + parameters.computeIfAbsent(parameter.getName(), key -> new ArrayList<>()) + .add(parameter.getValue()); + } + + Map headers = Arrays.stream(request.getAllHeaders()) + .collect(toImmutableMap(Header::getName, Header::getValue)); + + InputStream content = null; + if (request instanceof HttpEntityEnclosingRequest enclosingRequest) { + if (enclosingRequest.getEntity() != null) { + content = enclosingRequest.getEntity().getContent(); + } + } + + DefaultRequest awsRequest = new DefaultRequest<>(SERVICE_NAME); + + HttpHost host = (HttpHost) context.getAttribute(HTTP_TARGET_HOST); + if (host != null) { + awsRequest.setEndpoint(URI.create(host.toURI())); + } + awsRequest.setHttpMethod(HttpMethodName.fromValue(method)); + awsRequest.setResourcePath(uri.getRawPath()); + awsRequest.setContent(content); + awsRequest.setParameters(parameters); + awsRequest.setHeaders(headers); + + signer.sign(awsRequest, credentialsProvider.getCredentials()); + + Header[] newHeaders = awsRequest.getHeaders().entrySet().stream() + .map(entry -> new BasicHeader(entry.getKey(), entry.getValue())) + .toArray(Header[]::new); + + request.setHeaders(newHeaders); + + InputStream newContent = awsRequest.getContent(); + checkState(newContent == null || request instanceof HttpEntityEnclosingRequest); + if (newContent != null) { + BasicHttpEntity entity = new BasicHttpEntity(); + entity.setContent(newContent); + ((HttpEntityEnclosingRequest) request).setEntity(entity); + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/BackpressureRestClient.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/BackpressureRestClient.java new file mode 100644 index 0000000000000..7c6d0538cc17f --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/BackpressureRestClient.java @@ -0,0 +1,158 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.client; + +import com.google.common.base.Stopwatch; +import dev.failsafe.Failsafe; +import dev.failsafe.FailsafeException; +import dev.failsafe.RetryPolicy; +import dev.failsafe.event.ExecutionAttemptedEvent; +import dev.failsafe.event.ExecutionCompletedEvent; +import dev.failsafe.function.CheckedSupplier; +import io.airlift.log.Logger; +import io.airlift.stats.TimeStat; +import io.trino.plugin.opensearch.OpenSearchConfig; +import org.apache.http.Header; +import org.apache.http.HttpEntity; +import org.apache.http.HttpHost; +import org.opensearch.client.Node; +import org.opensearch.client.Request; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.Response; +import org.opensearch.client.ResponseException; +import org.opensearch.client.RestClient; +import org.opensearch.core.rest.RestStatus; + +import java.io.IOException; +import java.util.Map; + +import static com.google.common.base.Throwables.throwIfInstanceOf; +import static com.google.common.base.Throwables.throwIfUnchecked; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static java.time.temporal.ChronoUnit.MILLIS; +import static java.util.Arrays.stream; +import static java.util.Objects.requireNonNull; +import static java.util.concurrent.TimeUnit.MILLISECONDS; + +public class BackpressureRestClient +{ + private static final Logger log = Logger.get(BackpressureRestClient.class); + + private final RestClient delegate; + private final RetryPolicy retryPolicy; + private final TimeStat backpressureStats; + private final ThreadLocal stopwatch = ThreadLocal.withInitial(Stopwatch::createUnstarted); + + public BackpressureRestClient(RestClient delegate, OpenSearchConfig config, TimeStat backpressureStats) + { + this.delegate = requireNonNull(delegate, "restClient is null"); + this.backpressureStats = requireNonNull(backpressureStats, "backpressureStats is null"); + retryPolicy = RetryPolicy.builder() + .withMaxAttempts(-1) + .withMaxDuration(java.time.Duration.ofMillis(config.getMaxRetryTime().toMillis())) + .withBackoff(config.getBackoffInitDelay().toMillis(), config.getBackoffMaxDelay().toMillis(), MILLIS) + .withJitter(0.125) + .handleIf(BackpressureRestClient::isBackpressure) + .onFailedAttempt(this::onFailedAttempt) + .onSuccess(this::onComplete) + .onFailure(this::onComplete) + .build(); + } + + public void setHosts(HttpHost... hosts) + { + delegate.setNodes(stream(hosts) + .map(Node::new) + .collect(toImmutableList())); + } + + public Response performRequest(String method, String endpoint, Header... headers) + throws IOException + { + return executeWithRetries(() -> delegate.performRequest(toRequest(method, endpoint, headers))); + } + + public Response performRequest(String method, String endpoint, Map params, HttpEntity entity, Header... headers) + throws IOException + { + return executeWithRetries(() -> delegate.performRequest(toRequest(method, endpoint, params, entity, headers))); + } + + private static Request toRequest(String method, String endpoint, Map params, HttpEntity entity, Header... headers) + { + Request request = toRequest(method, endpoint, headers); + requireNonNull(params, "parameters cannot be null"); + for (Map.Entry entry : params.entrySet()) { + request.addParameter(entry.getKey(), entry.getValue()); + } + request.setEntity(entity); + return request; + } + + private static Request toRequest(String method, String endpoint, Header... headers) + { + requireNonNull(headers, "headers cannot be null"); + Request request = new Request(method, endpoint); + RequestOptions.Builder options = request.getOptions().toBuilder(); + for (Header header : headers) { + options.addHeader(header.getName(), header.getValue()); + } + request.setOptions(options); + return request; + } + + public void close() + throws IOException + { + delegate.close(); + } + + private static boolean isBackpressure(Throwable throwable) + { + return (throwable instanceof ResponseException) && + (((ResponseException) throwable).getResponse().getStatusLine().getStatusCode() == RestStatus.TOO_MANY_REQUESTS.getStatus()); + } + + private void onComplete(ExecutionCompletedEvent executionCompletedEvent) + { + if (stopwatch.get().isRunning()) { + long delayMillis = stopwatch.get().elapsed(MILLISECONDS); + log.debug("Adding %s milliseconds to backpressure stats", delayMillis); + stopwatch.get().reset(); + backpressureStats.add(delayMillis, MILLISECONDS); + } + } + + private Response executeWithRetries(CheckedSupplier supplier) + throws IOException + { + try { + return Failsafe.with(retryPolicy).get(supplier); + } + catch (FailsafeException e) { + Throwable throwable = e.getCause(); + throwIfInstanceOf(throwable, IOException.class); + throwIfUnchecked(throwable); + throw new RuntimeException("Unexpected cause from FailsafeException", throwable); + } + } + + private void onFailedAttempt(ExecutionAttemptedEvent executionAttemptedEvent) + { + log.debug("REST attempt failed: %s", executionAttemptedEvent.getLastException()); + if (!stopwatch.get().isRunning()) { + stopwatch.get().start(); + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/BackpressureRestHighLevelClient.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/BackpressureRestHighLevelClient.java new file mode 100644 index 0000000000000..b75a0caf01f46 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/BackpressureRestHighLevelClient.java @@ -0,0 +1,142 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.client; + +import com.google.common.base.Stopwatch; +import dev.failsafe.Failsafe; +import dev.failsafe.FailsafeException; +import dev.failsafe.RetryPolicy; +import dev.failsafe.event.ExecutionAttemptedEvent; +import dev.failsafe.event.ExecutionCompletedEvent; +import dev.failsafe.function.CheckedSupplier; +import io.airlift.log.Logger; +import io.airlift.stats.TimeStat; +import io.trino.plugin.opensearch.OpenSearchConfig; +import org.opensearch.OpenSearchStatusException; +import org.opensearch.action.search.ClearScrollRequest; +import org.opensearch.action.search.ClearScrollResponse; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.action.search.SearchScrollRequest; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestClientBuilder; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.core.action.ActionResponse; +import org.opensearch.core.rest.RestStatus; + +import java.io.Closeable; +import java.io.IOException; + +import static com.google.common.base.Throwables.throwIfInstanceOf; +import static com.google.common.base.Throwables.throwIfUnchecked; +import static java.time.temporal.ChronoUnit.MILLIS; +import static java.util.Objects.requireNonNull; +import static java.util.concurrent.TimeUnit.MILLISECONDS; + +public class BackpressureRestHighLevelClient + implements Closeable +{ + private static final Logger log = Logger.get(BackpressureRestHighLevelClient.class); + + private final RestHighLevelClient delegate; + private final BackpressureRestClient backpressureRestClient; + private final RetryPolicy retryPolicy; + private final TimeStat backpressureStats; + private final ThreadLocal stopwatch = ThreadLocal.withInitial(Stopwatch::createUnstarted); + + public BackpressureRestHighLevelClient(RestClientBuilder restClientBuilder, OpenSearchConfig config, TimeStat backpressureStats) + { + this.backpressureStats = requireNonNull(backpressureStats, "backpressureStats is null"); + delegate = new RestHighLevelClient(requireNonNull(restClientBuilder, "restClientBuilder is null")); + backpressureRestClient = new BackpressureRestClient(delegate.getLowLevelClient(), config, backpressureStats); + retryPolicy = RetryPolicy.builder() + .withMaxAttempts(-1) + .withMaxDuration(java.time.Duration.ofMillis(config.getMaxRetryTime().toMillis())) + .withBackoff(config.getBackoffInitDelay().toMillis(), config.getBackoffMaxDelay().toMillis(), MILLIS) + .withJitter(0.125) + .handleIf(BackpressureRestHighLevelClient::isBackpressure) + .onFailedAttempt(this::onFailedAttempt) + .onSuccess(this::onComplete) + .onFailure(this::onComplete) + .build(); + } + + public BackpressureRestClient getLowLevelClient() + { + return backpressureRestClient; + } + + @Override + public void close() + throws IOException + { + delegate.close(); + } + + public SearchResponse search(SearchRequest searchRequest) + throws IOException + { + return executeWithRetries(() -> delegate.search(searchRequest, RequestOptions.DEFAULT)); + } + + public SearchResponse searchScroll(SearchScrollRequest searchScrollRequest) + throws IOException + { + return executeWithRetries(() -> delegate.scroll(searchScrollRequest, RequestOptions.DEFAULT)); + } + + public ClearScrollResponse clearScroll(ClearScrollRequest clearScrollRequest) + throws IOException + { + return executeWithRetries(() -> delegate.clearScroll(clearScrollRequest, RequestOptions.DEFAULT)); + } + + private static boolean isBackpressure(Throwable throwable) + { + return (throwable instanceof OpenSearchStatusException) && + (((OpenSearchStatusException) throwable).status() == RestStatus.TOO_MANY_REQUESTS); + } + + private void onComplete(ExecutionCompletedEvent executionCompletedEvent) + { + if (stopwatch.get().isRunning()) { + long delayMillis = stopwatch.get().elapsed(MILLISECONDS); + log.debug("Adding %s milliseconds to backpressure stats", delayMillis); + stopwatch.get().reset(); + backpressureStats.add(delayMillis, MILLISECONDS); + } + } + + private T executeWithRetries(CheckedSupplier supplier) + throws IOException + { + try { + return Failsafe.with(retryPolicy).get(supplier); + } + catch (FailsafeException e) { + Throwable throwable = e.getCause(); + throwIfInstanceOf(throwable, IOException.class); + throwIfUnchecked(throwable); + throw new RuntimeException("Unexpected cause from FailsafeException", throwable); + } + } + + private void onFailedAttempt(ExecutionAttemptedEvent executionAttemptedEvent) + { + log.debug("REST attempt failed: %s", executionAttemptedEvent.getLastException()); + if (!stopwatch.get().isRunning()) { + stopwatch.get().start(); + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/CountResponse.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/CountResponse.java new file mode 100644 index 0000000000000..f6db36a1012e2 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/CountResponse.java @@ -0,0 +1,34 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.client; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +public class CountResponse +{ + private final long count; + + @JsonCreator + public CountResponse(@JsonProperty("count") long count) + { + this.count = count; + } + + @JsonProperty + public long getCount() + { + return count; + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/IndexMetadata.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/IndexMetadata.java new file mode 100644 index 0000000000000..ff0d956595ee6 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/IndexMetadata.java @@ -0,0 +1,145 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.client; + +import com.google.common.collect.ImmutableList; + +import java.util.List; + +import static com.google.common.base.Preconditions.checkArgument; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +public class IndexMetadata +{ + private final ObjectType schema; + + public IndexMetadata(ObjectType schema) + { + this.schema = requireNonNull(schema, "schema is null"); + } + + public ObjectType getSchema() + { + return schema; + } + + public static class Field + { + private final boolean asRawJson; + private final boolean isArray; + private final String name; + private final Type type; + + public Field(boolean asRawJson, boolean isArray, String name, Type type) + { + checkArgument(!asRawJson || !isArray, + format("A column, (%s) cannot be declared as a Trino array and also be rendered as json.", name)); + this.asRawJson = asRawJson; + this.isArray = isArray; + this.name = requireNonNull(name, "name is null"); + this.type = requireNonNull(type, "type is null"); + } + + public boolean asRawJson() + { + return asRawJson; + } + + public boolean isArray() + { + return isArray; + } + + public String getName() + { + return name; + } + + public Type getType() + { + return type; + } + } + + public interface Type {} + + public static class PrimitiveType + implements Type + { + private final String name; + + public PrimitiveType(String name) + { + this.name = requireNonNull(name, "name is null"); + } + + public String getName() + { + return name; + } + } + + public static class DateTimeType + implements Type + { + private final List formats; + + public DateTimeType(List formats) + { + requireNonNull(formats, "formats is null"); + + this.formats = ImmutableList.copyOf(formats); + } + + public List getFormats() + { + return formats; + } + } + + public static class ObjectType + implements Type + { + private final List fields; + + public ObjectType(List fields) + { + requireNonNull(fields, "fields is null"); + + this.fields = ImmutableList.copyOf(fields); + } + + public List getFields() + { + return fields; + } + } + + public static class ScaledFloatType + implements Type + { + private final double scale; + + public ScaledFloatType(double scale) + { + this.scale = scale; + } + + public double getScale() + { + return scale; + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/NodesResponse.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/NodesResponse.java new file mode 100644 index 0000000000000..3ed2b84b16bc8 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/NodesResponse.java @@ -0,0 +1,90 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.client; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; + +import java.util.Map; +import java.util.Optional; +import java.util.Set; + +import static java.util.Objects.requireNonNull; + +public class NodesResponse +{ + private final Map nodes; + + @JsonCreator + public NodesResponse(@JsonProperty("nodes") Map nodes) + { + requireNonNull(nodes, "nodes is null"); + + this.nodes = ImmutableMap.copyOf(nodes); + } + + public Map getNodes() + { + return nodes; + } + + public static class Node + { + private final Set roles; + private final Optional http; + + @JsonCreator + public Node( + @JsonProperty("roles") Set roles, + @JsonProperty("http") Optional http) + { + this.roles = ImmutableSet.copyOf(roles); + this.http = requireNonNull(http, "http is null"); + } + + public Set getRoles() + { + return roles; + } + + public Optional getAddress() + { + return http.map(Http::getAddress); + } + } + + public static class Http + { + private final String address; + + @JsonCreator + public Http(@JsonProperty("publish_address") String address) + { + this.address = address; + } + + public String getAddress() + { + return address; + } + + @Override + public String toString() + { + return address; + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/OpenSearchClient.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/OpenSearchClient.java new file mode 100644 index 0000000000000..b86da88413f9d --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/OpenSearchClient.java @@ -0,0 +1,790 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.client; + +import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; +import com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider; +import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.NullNode; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Sets; +import com.google.inject.Inject; +import io.airlift.json.JsonCodec; +import io.airlift.json.ObjectMapperProvider; +import io.airlift.log.Logger; +import io.airlift.stats.TimeStat; +import io.airlift.units.Duration; +import io.trino.plugin.opensearch.AwsSecurityConfig; +import io.trino.plugin.opensearch.OpenSearchConfig; +import io.trino.plugin.opensearch.OpenSearchErrorCode; +import io.trino.plugin.opensearch.PasswordConfig; +import io.trino.spi.TrinoException; +import jakarta.annotation.PostConstruct; +import jakarta.annotation.PreDestroy; +import org.apache.http.HttpEntity; +import org.apache.http.HttpHost; +import org.apache.http.auth.AuthScope; +import org.apache.http.auth.UsernamePasswordCredentials; +import org.apache.http.client.CredentialsProvider; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.conn.ssl.NoopHostnameVerifier; +import org.apache.http.entity.ByteArrayEntity; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.client.BasicCredentialsProvider; +import org.apache.http.impl.nio.client.HttpAsyncClientBuilder; +import org.apache.http.impl.nio.reactor.IOReactorConfig; +import org.apache.http.message.BasicHeader; +import org.apache.http.util.EntityUtils; +import org.opensearch.OpenSearchStatusException; +import org.opensearch.action.search.ClearScrollRequest; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.action.search.SearchScrollRequest; +import org.opensearch.client.Response; +import org.opensearch.client.ResponseException; +import org.opensearch.client.RestClient; +import org.opensearch.client.RestClientBuilder; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.weakref.jmx.Managed; +import org.weakref.jmx.Nested; + +import javax.net.ssl.SSLContext; + +import java.io.File; +import java.io.IOException; +import java.security.GeneralSecurityException; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.OptionalLong; +import java.util.Set; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Function; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static io.airlift.concurrent.Threads.daemonThreadsNamed; +import static io.airlift.json.JsonCodec.jsonCodec; +import static io.trino.plugin.base.ssl.SslUtils.createSSLContext; +import static java.lang.StrictMath.toIntExact; +import static java.lang.String.format; +import static java.nio.charset.StandardCharsets.UTF_8; +import static java.util.concurrent.Executors.newSingleThreadScheduledExecutor; +import static java.util.concurrent.TimeUnit.MILLISECONDS; +import static org.opensearch.action.search.SearchType.QUERY_THEN_FETCH; + +public class OpenSearchClient +{ + private static final Logger LOG = Logger.get(OpenSearchClient.class); + + private static final JsonCodec SEARCH_SHARDS_RESPONSE_CODEC = jsonCodec(SearchShardsResponse.class); + private static final JsonCodec NODES_RESPONSE_CODEC = jsonCodec(NodesResponse.class); + private static final JsonCodec COUNT_RESPONSE_CODEC = jsonCodec(CountResponse.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapperProvider().get(); + + private static final Pattern ADDRESS_PATTERN = Pattern.compile("((?[^/]+)/)?(?.+):(?\\d+)"); + private static final Set NODE_ROLES = ImmutableSet.of("data", "data_content", "data_hot", "data_warm", "data_cold", "data_frozen"); + + private final BackpressureRestHighLevelClient client; + private final int scrollSize; + private final Duration scrollTimeout; + + private final AtomicReference> nodes = new AtomicReference<>(ImmutableSet.of()); + private final ScheduledExecutorService executor = newSingleThreadScheduledExecutor(daemonThreadsNamed("NodeRefresher")); + private final AtomicBoolean started = new AtomicBoolean(); + private final Duration refreshInterval; + private final boolean tlsEnabled; + private final boolean ignorePublishAddress; + + private final TimeStat searchStats = new TimeStat(MILLISECONDS); + private final TimeStat nextPageStats = new TimeStat(MILLISECONDS); + private final TimeStat countStats = new TimeStat(MILLISECONDS); + private final TimeStat backpressureStats = new TimeStat(MILLISECONDS); + + @Inject + public OpenSearchClient( + OpenSearchConfig config, + Optional awsSecurityConfig, + Optional passwordConfig) + { + client = createClient(config, awsSecurityConfig, passwordConfig, backpressureStats); + + this.ignorePublishAddress = config.isIgnorePublishAddress(); + this.scrollSize = config.getScrollSize(); + this.scrollTimeout = config.getScrollTimeout(); + this.refreshInterval = config.getNodeRefreshInterval(); + this.tlsEnabled = config.isTlsEnabled(); + } + + @PostConstruct + public void initialize() + { + if (!started.getAndSet(true)) { + // do the first refresh eagerly + refreshNodes(); + + executor.scheduleWithFixedDelay(this::refreshNodes, refreshInterval.toMillis(), refreshInterval.toMillis(), MILLISECONDS); + } + } + + @PreDestroy + public void close() + throws IOException + { + executor.shutdownNow(); + client.close(); + } + + private void refreshNodes() + { + // discover other nodes in the cluster and add them to the client + try { + Set nodes = fetchNodes(); + + HttpHost[] hosts = nodes.stream() + .map(OpenSearchNode::getAddress) + .filter(Optional::isPresent) + .map(Optional::get) + .map(address -> HttpHost.create(format("%s://%s", tlsEnabled ? "https" : "http", address))) + .toArray(HttpHost[]::new); + + if (hosts.length > 0 && !ignorePublishAddress) { + client.getLowLevelClient().setHosts(hosts); + } + + this.nodes.set(nodes); + } + catch (Throwable e) { + // Catch all exceptions here since throwing an exception from executor#scheduleWithFixedDelay method + // suppresses all future scheduled invocations + LOG.error(e, "Error refreshing nodes"); + } + } + + private static BackpressureRestHighLevelClient createClient( + OpenSearchConfig config, + Optional awsSecurityConfig, + Optional passwordConfig, + TimeStat backpressureStats) + { + RestClientBuilder builder = RestClient.builder( + config.getHosts().stream() + .map(httpHost -> new HttpHost(httpHost, config.getPort(), config.isTlsEnabled() ? "https" : "http")) + .toArray(HttpHost[]::new)); + + builder.setHttpClientConfigCallback(ignored -> { + RequestConfig requestConfig = RequestConfig.custom() + .setConnectTimeout(toIntExact(config.getConnectTimeout().toMillis())) + .setSocketTimeout(toIntExact(config.getRequestTimeout().toMillis())) + .build(); + + IOReactorConfig reactorConfig = IOReactorConfig.custom() + .setIoThreadCount(config.getHttpThreadCount()) + .build(); + + // the client builder passed to the call-back is configured to use system properties, which makes it + // impossible to configure concurrency settings, so we need to build a new one from scratch + HttpAsyncClientBuilder clientBuilder = HttpAsyncClientBuilder.create() + .setDefaultRequestConfig(requestConfig) + .setDefaultIOReactorConfig(reactorConfig) + .setMaxConnPerRoute(config.getMaxHttpConnections()) + .setMaxConnTotal(config.getMaxHttpConnections()); + if (config.isTlsEnabled()) { + buildSslContext(config.getKeystorePath(), config.getKeystorePassword(), config.getTrustStorePath(), config.getTruststorePassword()) + .ifPresent(clientBuilder::setSSLContext); + + if (!config.isVerifyHostnames()) { + clientBuilder.setSSLHostnameVerifier(NoopHostnameVerifier.INSTANCE); + } + } + + passwordConfig.ifPresent(securityConfig -> { + CredentialsProvider credentials = new BasicCredentialsProvider(); + credentials.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(securityConfig.getUser(), securityConfig.getPassword())); + clientBuilder.setDefaultCredentialsProvider(credentials); + }); + + awsSecurityConfig.ifPresent(securityConfig -> clientBuilder.addInterceptorLast(new AwsRequestSigner( + securityConfig.getRegion(), + getAwsCredentialsProvider(securityConfig)))); + + return clientBuilder; + }); + + return new BackpressureRestHighLevelClient(builder, config, backpressureStats); + } + + private static AWSCredentialsProvider getAwsCredentialsProvider(AwsSecurityConfig config) + { + AWSCredentialsProvider credentialsProvider = DefaultAWSCredentialsProviderChain.getInstance(); + + if (config.getAccessKey().isPresent() && config.getSecretKey().isPresent()) { + credentialsProvider = new AWSStaticCredentialsProvider(new BasicAWSCredentials( + config.getAccessKey().get(), + config.getSecretKey().get())); + } + + if (config.getIamRole().isPresent()) { + STSAssumeRoleSessionCredentialsProvider.Builder credentialsProviderBuilder = new STSAssumeRoleSessionCredentialsProvider.Builder(config.getIamRole().get(), "trino-session") + .withStsClient(AWSSecurityTokenServiceClientBuilder.standard() + .withRegion(config.getRegion()) + .withCredentials(credentialsProvider) + .build()); + config.getExternalId().ifPresent(credentialsProviderBuilder::withExternalId); + credentialsProvider = credentialsProviderBuilder.build(); + } + + return credentialsProvider; + } + + private static Optional buildSslContext( + Optional keyStorePath, + Optional keyStorePassword, + Optional trustStorePath, + Optional trustStorePassword) + { + if (keyStorePath.isEmpty() && trustStorePath.isEmpty()) { + return Optional.empty(); + } + + try { + return Optional.of(createSSLContext(keyStorePath, keyStorePassword, trustStorePath, trustStorePassword)); + } + catch (GeneralSecurityException | IOException e) { + throw new TrinoException(OpenSearchErrorCode.OPENSEARCH_SSL_INITIALIZATION_FAILURE, e); + } + } + + private Set fetchNodes() + { + NodesResponse nodesResponse = doRequest("/_nodes/http", NODES_RESPONSE_CODEC::fromJson); + + ImmutableSet.Builder result = ImmutableSet.builder(); + for (Map.Entry entry : nodesResponse.getNodes().entrySet()) { + String nodeId = entry.getKey(); + NodesResponse.Node node = entry.getValue(); + + if (!Sets.intersection(node.getRoles(), NODE_ROLES).isEmpty()) { + Optional address = node.getAddress() + .flatMap(OpenSearchClient::extractAddress); + + result.add(new OpenSearchNode(nodeId, address)); + } + } + + return result.build(); + } + + public Set getNodes() + { + return nodes.get(); + } + + public List getSearchShards(String index) + { + Map nodeById = getNodes().stream() + .collect(toImmutableMap(OpenSearchNode::getId, Function.identity())); + + SearchShardsResponse shardsResponse = doRequest(format("/%s/_search_shards", index), SEARCH_SHARDS_RESPONSE_CODEC::fromJson); + + ImmutableList.Builder shards = ImmutableList.builder(); + List nodes = ImmutableList.copyOf(nodeById.values()); + + for (List shardGroup : shardsResponse.getShardGroups()) { + Optional candidate = shardGroup.stream() + .filter(shard -> shard.getNode() != null && nodeById.containsKey(shard.getNode())) + .min(this::shardPreference); + + SearchShardsResponse.Shard chosen; + OpenSearchNode node; + if (candidate.isEmpty()) { + // pick an arbitrary shard with and assign to an arbitrary node + chosen = shardGroup.stream() + .min(this::shardPreference) + .get(); + node = nodes.get(chosen.getShard() % nodes.size()); + } + else { + chosen = candidate.get(); + node = nodeById.get(chosen.getNode()); + } + + shards.add(new Shard(chosen.getIndex(), chosen.getShard(), node.getAddress())); + } + + return shards.build(); + } + + private int shardPreference(SearchShardsResponse.Shard left, SearchShardsResponse.Shard right) + { + // Favor non-primary shards + if (left.isPrimary() == right.isPrimary()) { + return 0; + } + + return left.isPrimary() ? 1 : -1; + } + + public boolean indexExists(String index) + { + String path = format("/%s/_mappings", index); + + try { + Response response = client.getLowLevelClient() + .performRequest("GET", path); + + return response.getStatusLine().getStatusCode() == 200; + } + catch (ResponseException e) { + if (e.getResponse().getStatusLine().getStatusCode() == 404) { + return false; + } + throw new TrinoException(OpenSearchErrorCode.OPENSEARCH_CONNECTION_ERROR, e); + } + catch (IOException e) { + throw new TrinoException(OpenSearchErrorCode.OPENSEARCH_CONNECTION_ERROR, e); + } + } + + public List getIndexes() + { + return doRequest("/_cat/indices?h=index,docs.count,docs.deleted&format=json&s=index:asc", body -> { + try { + ImmutableList.Builder result = ImmutableList.builder(); + JsonNode root = OBJECT_MAPPER.readTree(body); + for (int i = 0; i < root.size(); i++) { + String index = root.get(i).get("index").asText(); + // make sure the index has mappings we can use to derive the schema + int docsCount = root.get(i).get("docs.count").asInt(); + int deletedDocsCount = root.get(i).get("docs.deleted").asInt(); + if (docsCount == 0 && deletedDocsCount == 0) { + // without documents, the index won't have any dynamic mappings, but maybe there are some explicit ones + if (getIndexMetadata(index).getSchema().getFields().isEmpty()) { + continue; + } + } + result.add(index); + } + return result.build(); + } + catch (IOException e) { + throw new TrinoException(OpenSearchErrorCode.OPENSEARCH_INVALID_RESPONSE, e); + } + }); + } + + public Map> getAliases() + { + return doRequest("/_aliases", body -> { + try { + ImmutableMap.Builder> result = ImmutableMap.builder(); + JsonNode root = OBJECT_MAPPER.readTree(body); + + Iterator> elements = root.fields(); + while (elements.hasNext()) { + Map.Entry element = elements.next(); + JsonNode aliases = element.getValue().get("aliases"); + Iterator aliasNames = aliases.fieldNames(); + if (aliasNames.hasNext()) { + result.put(element.getKey(), ImmutableList.copyOf(aliasNames)); + } + } + return result.buildOrThrow(); + } + catch (IOException e) { + throw new TrinoException(OpenSearchErrorCode.OPENSEARCH_INVALID_RESPONSE, e); + } + }); + } + + public IndexMetadata getIndexMetadata(String index) + { + String path = format("/%s/_mappings", index); + + return doRequest(path, body -> { + try { + JsonNode mappings = OBJECT_MAPPER.readTree(body) + .elements().next() + .get("mappings"); + + if (!mappings.elements().hasNext()) { + return new IndexMetadata(new IndexMetadata.ObjectType(ImmutableList.of())); + } + if (!mappings.has("properties")) { + // Older versions of OpenSearch supported multiple "type" mappings + // for a given index. Newer versions support only one and don't + // expose it in the document. Here we skip it if it's present. + mappings = mappings.elements().next(); + + if (!mappings.has("properties")) { + return new IndexMetadata(new IndexMetadata.ObjectType(ImmutableList.of())); + } + } + + JsonNode metaNode = nullSafeNode(mappings, "_meta"); + + JsonNode metaProperties = nullSafeNode(metaNode, "trino"); + + //stay backwards compatible with _meta.presto namespace for meta properties for some releases + if (metaProperties.isNull()) { + metaProperties = nullSafeNode(metaNode, "presto"); + } + + return new IndexMetadata(parseType(mappings.get("properties"), metaProperties)); + } + catch (IOException e) { + throw new TrinoException(OpenSearchErrorCode.OPENSEARCH_INVALID_RESPONSE, e); + } + }); + } + + private IndexMetadata.ObjectType parseType(JsonNode properties, JsonNode metaProperties) + { + Iterator> entries = properties.fields(); + + ImmutableList.Builder result = ImmutableList.builder(); + while (entries.hasNext()) { + Map.Entry field = entries.next(); + + String name = field.getKey(); + JsonNode value = field.getValue(); + + //default type is object + String type = "object"; + if (value.has("type")) { + type = value.get("type").asText(); + } + JsonNode metaNode = nullSafeNode(metaProperties, name); + boolean isArray = !metaNode.isNull() && metaNode.has("isArray") && metaNode.get("isArray").asBoolean(); + boolean asRawJson = !metaNode.isNull() && metaNode.has("asRawJson") && metaNode.get("asRawJson").asBoolean(); + + // While it is possible to handle isArray and asRawJson in the same column by creating a ARRAY(VARCHAR) type, we chose not to take + // this route, as it will likely lead to confusion in dealing with array syntax in Trino and potentially nested array and other + // syntax when parsing the raw json. + if (isArray && asRawJson) { + throw new TrinoException(OpenSearchErrorCode.OPENSEARCH_INVALID_METADATA, + format("A column, (%s) cannot be declared as a Trino array and also be rendered as json.", name)); + } + + switch (type) { + case "date": + List formats = ImmutableList.of(); + if (value.has("format")) { + formats = Arrays.asList(value.get("format").asText().split("\\|\\|")); + } + result.add(new IndexMetadata.Field(asRawJson, isArray, name, new IndexMetadata.DateTimeType(formats))); + break; + case "scaled_float": + result.add(new IndexMetadata.Field(asRawJson, isArray, name, new IndexMetadata.ScaledFloatType(value.get("scaling_factor").asDouble()))); + break; + case "nested": + case "object": + if (value.has("properties")) { + result.add(new IndexMetadata.Field(asRawJson, isArray, name, parseType(value.get("properties"), metaNode))); + } + else { + LOG.debug("Ignoring empty object field: %s", name); + } + break; + + default: + result.add(new IndexMetadata.Field(asRawJson, isArray, name, new IndexMetadata.PrimitiveType(type))); + } + } + + return new IndexMetadata.ObjectType(result.build()); + } + + private JsonNode nullSafeNode(JsonNode jsonNode, String name) + { + if (jsonNode == null || jsonNode.isNull() || jsonNode.get(name) == null) { + return NullNode.getInstance(); + } + return jsonNode.get(name); + } + + public String executeQuery(String index, String query) + { + String path = format("/%s/_search", index); + + Response response; + try { + response = client.getLowLevelClient() + .performRequest( + "GET", + path, + ImmutableMap.of(), + new ByteArrayEntity(query.getBytes(UTF_8)), + new BasicHeader("Content-Type", "application/json"), + new BasicHeader("Accept-Encoding", "application/json")); + } + catch (IOException e) { + throw new TrinoException(OpenSearchErrorCode.OPENSEARCH_CONNECTION_ERROR, e); + } + + String body; + try { + body = EntityUtils.toString(response.getEntity()); + } + catch (IOException e) { + throw new TrinoException(OpenSearchErrorCode.OPENSEARCH_INVALID_RESPONSE, e); + } + + return body; + } + + public SearchResponse beginSearch(String index, int shard, QueryBuilder query, Optional> fields, List documentFields, Optional sort, OptionalLong limit) + { + SearchSourceBuilder sourceBuilder = SearchSourceBuilder.searchSource() + .query(query); + + if (limit.isPresent() && limit.getAsLong() < scrollSize) { + // Safe to cast it to int because scrollSize is int. + sourceBuilder.size(toIntExact(limit.getAsLong())); + } + else { + sourceBuilder.size(scrollSize); + } + + sort.ifPresent(sourceBuilder::sort); + + fields.ifPresent(values -> { + if (values.isEmpty()) { + sourceBuilder.fetchSource(false); + } + else { + sourceBuilder.fetchSource(values.toArray(new String[0]), null); + } + }); + documentFields.forEach(sourceBuilder::docValueField); + + LOG.debug("Begin search: %s:%s, query: %s", index, shard, sourceBuilder); + + SearchRequest request = new SearchRequest(index) + .searchType(QUERY_THEN_FETCH) + .preference("_shards:" + shard) + .scroll(new TimeValue(scrollTimeout.toMillis())) + .source(sourceBuilder); + + long start = System.nanoTime(); + try { + return client.search(request); + } + catch (IOException e) { + throw new TrinoException(OpenSearchErrorCode.OPENSEARCH_CONNECTION_ERROR, e); + } + catch (OpenSearchStatusException e) { + Throwable[] suppressed = e.getSuppressed(); + if (suppressed.length > 0) { + Throwable cause = suppressed[0]; + if (cause instanceof ResponseException) { + throw propagate((ResponseException) cause); + } + } + + throw new TrinoException(OpenSearchErrorCode.OPENSEARCH_CONNECTION_ERROR, e); + } + finally { + searchStats.add(Duration.nanosSince(start)); + } + } + + public SearchResponse nextPage(String scrollId) + { + LOG.debug("Next page: %s", scrollId); + + SearchScrollRequest request = new SearchScrollRequest(scrollId) + .scroll(new TimeValue(scrollTimeout.toMillis())); + + long start = System.nanoTime(); + try { + return client.searchScroll(request); + } + catch (IOException e) { + throw new TrinoException(OpenSearchErrorCode.OPENSEARCH_CONNECTION_ERROR, e); + } + finally { + nextPageStats.add(Duration.nanosSince(start)); + } + } + + public long count(String index, int shard, QueryBuilder query) + { + SearchSourceBuilder sourceBuilder = SearchSourceBuilder.searchSource() + .query(query); + + LOG.debug("Count: %s:%s, query: %s", index, shard, sourceBuilder); + + long start = System.nanoTime(); + try { + Response response; + try { + response = client.getLowLevelClient() + .performRequest( + "GET", + format("/%s/_count?preference=_shards:%s", index, shard), + ImmutableMap.of(), + new StringEntity(sourceBuilder.toString()), + new BasicHeader("Content-Type", "application/json")); + } + catch (ResponseException e) { + throw propagate(e); + } + catch (IOException e) { + throw new TrinoException(OpenSearchErrorCode.OPENSEARCH_CONNECTION_ERROR, e); + } + + try { + return COUNT_RESPONSE_CODEC.fromJson(EntityUtils.toByteArray(response.getEntity())) + .getCount(); + } + catch (IOException e) { + throw new TrinoException(OpenSearchErrorCode.OPENSEARCH_INVALID_RESPONSE, e); + } + } + finally { + countStats.add(Duration.nanosSince(start)); + } + } + + public void clearScroll(String scrollId) + { + ClearScrollRequest request = new ClearScrollRequest(); + request.addScrollId(scrollId); + try { + client.clearScroll(request); + } + catch (IOException e) { + throw new TrinoException(OpenSearchErrorCode.OPENSEARCH_CONNECTION_ERROR, e); + } + } + + @Managed + @Nested + public TimeStat getSearchStats() + { + return searchStats; + } + + @Managed + @Nested + public TimeStat getNextPageStats() + { + return nextPageStats; + } + + @Managed + @Nested + public TimeStat getCountStats() + { + return countStats; + } + + @Managed + @Nested + public TimeStat getBackpressureStats() + { + return backpressureStats; + } + + private T doRequest(String path, ResponseHandler handler) + { + checkArgument(path.startsWith("/"), "path must be an absolute path"); + + Response response; + try { + response = client.getLowLevelClient() + .performRequest("GET", path); + } + catch (IOException e) { + throw new TrinoException(OpenSearchErrorCode.OPENSEARCH_CONNECTION_ERROR, e); + } + + String body; + try { + body = EntityUtils.toString(response.getEntity()); + } + catch (IOException e) { + throw new TrinoException(OpenSearchErrorCode.OPENSEARCH_INVALID_RESPONSE, e); + } + + return handler.process(body); + } + + private static TrinoException propagate(ResponseException exception) + { + HttpEntity entity = exception.getResponse().getEntity(); + + if (entity != null && entity.getContentType() != null) { + try { + JsonNode reason = OBJECT_MAPPER.readTree(entity.getContent()).path("error") + .path("root_cause") + .path(0) + .path("reason"); + + if (!reason.isMissingNode()) { + throw new TrinoException(OpenSearchErrorCode.OPENSEARCH_QUERY_FAILURE, reason.asText(), exception); + } + } + catch (IOException e) { + TrinoException result = new TrinoException(OpenSearchErrorCode.OPENSEARCH_QUERY_FAILURE, exception); + result.addSuppressed(e); + throw result; + } + } + + throw new TrinoException(OpenSearchErrorCode.OPENSEARCH_QUERY_FAILURE, exception); + } + + @VisibleForTesting + static Optional extractAddress(String address) + { + Matcher matcher = ADDRESS_PATTERN.matcher(address); + + if (!matcher.matches()) { + return Optional.empty(); + } + + String cname = matcher.group("cname"); + String ip = matcher.group("ip"); + String port = matcher.group("port"); + + if (cname != null) { + return Optional.of(cname + ":" + port); + } + + return Optional.of(ip + ":" + port); + } + + private interface ResponseHandler + { + T process(String body); + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/OpenSearchNode.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/OpenSearchNode.java new file mode 100644 index 0000000000000..c41a4228b4002 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/OpenSearchNode.java @@ -0,0 +1,46 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.client; + +import java.util.Optional; + +import static java.util.Objects.requireNonNull; + +public class OpenSearchNode +{ + private final String id; + private final Optional address; + + public OpenSearchNode(String id, Optional address) + { + this.id = requireNonNull(id, "id is null"); + this.address = requireNonNull(address, "address is null"); + } + + public String getId() + { + return id; + } + + public Optional getAddress() + { + return address; + } + + @Override + public String toString() + { + return id + "@" + address.orElse(""); + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/SearchShardsResponse.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/SearchShardsResponse.java new file mode 100644 index 0000000000000..75b3b447520d6 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/SearchShardsResponse.java @@ -0,0 +1,87 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.client; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableList; + +import java.util.List; + +import static java.util.Objects.requireNonNull; + +public class SearchShardsResponse +{ + private final List> shardGroups; + + @JsonCreator + public SearchShardsResponse(@JsonProperty("shards") List> shardGroups) + { + requireNonNull(shardGroups, "shardGroups is null"); + + this.shardGroups = ImmutableList.copyOf(shardGroups); + } + + public List> getShardGroups() + { + return shardGroups; + } + + public static class Shard + { + private final String index; + private final boolean primary; + private final String node; + private final int shard; + + @JsonCreator + public Shard( + @JsonProperty("index") String index, + @JsonProperty("shard") int shard, + @JsonProperty("primary") boolean primary, + @JsonProperty("node") String node) + { + this.index = requireNonNull(index, "index is null"); + this.shard = shard; + this.primary = primary; + this.node = requireNonNull(node, "node is null"); + } + + public String getIndex() + { + return index; + } + + public boolean isPrimary() + { + return primary; + } + + public String getNode() + { + return node; + } + + public int getShard() + { + return shard; + } + + @Override + public String toString() + { + return index + ":" + shard + "@" + node + (primary ? "[primary]" : "[replica]"); + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/Shard.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/Shard.java new file mode 100644 index 0000000000000..ac0e76fd2b98c --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/client/Shard.java @@ -0,0 +1,53 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.client; + +import java.util.Optional; + +import static java.util.Objects.requireNonNull; + +public class Shard +{ + private final String index; + private final int id; + private final Optional address; + + public Shard(String index, int id, Optional address) + { + this.index = requireNonNull(index, "index is null"); + this.id = id; + this.address = requireNonNull(address, "address is null"); + } + + public String getIndex() + { + return index; + } + + public int getId() + { + return id; + } + + public Optional getAddress() + { + return address; + } + + @Override + public String toString() + { + return index + ":" + id + "@" + address.orElse(""); + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/ArrayDecoder.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/ArrayDecoder.java new file mode 100644 index 0000000000000..d7b1c6a823bf6 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/ArrayDecoder.java @@ -0,0 +1,75 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.decoders; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import io.trino.plugin.opensearch.DecoderDescriptor; +import io.trino.spi.block.ArrayBlockBuilder; +import io.trino.spi.block.BlockBuilder; +import org.opensearch.search.SearchHit; + +import java.util.List; +import java.util.function.Supplier; + +public class ArrayDecoder + implements Decoder +{ + private final Decoder elementDecoder; + + public ArrayDecoder(Decoder elementDecoder) + { + this.elementDecoder = elementDecoder; + } + + @Override + public void decode(SearchHit hit, Supplier getter, BlockBuilder output) + { + Object data = getter.get(); + + if (data == null) { + output.appendNull(); + } + else if (data instanceof List list) { + ((ArrayBlockBuilder) output).buildEntry(elementBuilder -> list.forEach(element -> elementDecoder.decode(hit, () -> element, elementBuilder))); + } + else { + ((ArrayBlockBuilder) output).buildEntry(elementBuilder -> elementDecoder.decode(hit, () -> data, elementBuilder)); + } + } + + public static class Descriptor + implements DecoderDescriptor + { + private final DecoderDescriptor elementDescriptor; + + @JsonCreator + public Descriptor(DecoderDescriptor elementDescriptor) + { + this.elementDescriptor = elementDescriptor; + } + + @JsonProperty + public DecoderDescriptor getElementDescriptor() + { + return elementDescriptor; + } + + @Override + public Decoder createDecoder() + { + return new ArrayDecoder(elementDescriptor.createDecoder()); + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/BigintDecoder.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/BigintDecoder.java new file mode 100644 index 0000000000000..0d08a8db2bce7 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/BigintDecoder.java @@ -0,0 +1,90 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.decoders; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import io.trino.plugin.opensearch.DecoderDescriptor; +import io.trino.spi.TrinoException; +import io.trino.spi.block.BlockBuilder; +import org.opensearch.search.SearchHit; + +import java.util.function.Supplier; + +import static io.trino.spi.StandardErrorCode.TYPE_MISMATCH; +import static io.trino.spi.type.BigintType.BIGINT; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +public class BigintDecoder + implements Decoder +{ + private final String path; + + public BigintDecoder(String path) + { + this.path = requireNonNull(path, "path is null"); + } + + @Override + public void decode(SearchHit hit, Supplier getter, BlockBuilder output) + { + Object value = getter.get(); + if (value == null) { + output.appendNull(); + } + else if (value instanceof Number) { + BIGINT.writeLong(output, ((Number) value).longValue()); + } + else if (value instanceof String stringValue) { + if (stringValue.isEmpty()) { + output.appendNull(); + return; + } + try { + BIGINT.writeLong(output, Long.parseLong(stringValue)); + } + catch (NumberFormatException e) { + throw new TrinoException(TYPE_MISMATCH, format("Cannot parse value for field '%s' as BIGINT: %s", path, value)); + } + } + else { + throw new TrinoException(TYPE_MISMATCH, format("Expected a numeric value for field '%s' of type BIGINT: %s [%s]", path, value, value.getClass().getSimpleName())); + } + } + + public static class Descriptor + implements DecoderDescriptor + { + private final String path; + + @JsonCreator + public Descriptor(String path) + { + this.path = path; + } + + @JsonProperty + public String getPath() + { + return path; + } + + @Override + public Decoder createDecoder() + { + return new BigintDecoder(path); + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/BooleanDecoder.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/BooleanDecoder.java new file mode 100644 index 0000000000000..57fdddb4e0627 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/BooleanDecoder.java @@ -0,0 +1,89 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.decoders; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import io.trino.plugin.opensearch.DecoderDescriptor; +import io.trino.spi.TrinoException; +import io.trino.spi.block.BlockBuilder; +import org.opensearch.search.SearchHit; + +import java.util.function.Supplier; + +import static io.trino.spi.StandardErrorCode.TYPE_MISMATCH; +import static io.trino.spi.type.BooleanType.BOOLEAN; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +public class BooleanDecoder + implements Decoder +{ + private final String path; + + public BooleanDecoder(String path) + { + this.path = requireNonNull(path, "path is null"); + } + + @Override + public void decode(SearchHit hit, Supplier getter, BlockBuilder output) + { + Object value = getter.get(); + if (value == null) { + output.appendNull(); + } + else if (value instanceof Boolean) { + BOOLEAN.writeBoolean(output, (Boolean) value); + } + else if (value instanceof String) { + if (value.equals("true")) { + BOOLEAN.writeBoolean(output, true); + } + else if (value.equals("false") || value.equals("")) { + BOOLEAN.writeBoolean(output, false); + } + else { + throw new TrinoException(TYPE_MISMATCH, format("Cannot parse value for field '%s' as BOOLEAN: %s", path, value)); + } + } + else { + throw new TrinoException(TYPE_MISMATCH, format("Expected a boolean value for field %s of type BOOLEAN: %s [%s]", path, value, value.getClass().getSimpleName())); + } + } + + public static class Descriptor + implements DecoderDescriptor + { + private final String path; + + @JsonCreator + public Descriptor(String path) + { + this.path = path; + } + + @JsonProperty + public String getPath() + { + return path; + } + + @Override + public Decoder createDecoder() + { + return new BooleanDecoder(path); + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/Decoder.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/Decoder.java new file mode 100644 index 0000000000000..1199d5fc42f2d --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/Decoder.java @@ -0,0 +1,24 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.decoders; + +import io.trino.spi.block.BlockBuilder; +import org.opensearch.search.SearchHit; + +import java.util.function.Supplier; + +public interface Decoder +{ + void decode(SearchHit hit, Supplier getter, BlockBuilder output); +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/DoubleDecoder.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/DoubleDecoder.java new file mode 100644 index 0000000000000..908ec9345c510 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/DoubleDecoder.java @@ -0,0 +1,95 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.decoders; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import io.trino.plugin.opensearch.DecoderDescriptor; +import io.trino.spi.TrinoException; +import io.trino.spi.block.BlockBuilder; +import org.opensearch.search.SearchHit; + +import java.util.function.Supplier; + +import static io.trino.spi.StandardErrorCode.TYPE_MISMATCH; +import static io.trino.spi.type.DoubleType.DOUBLE; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +public class DoubleDecoder + implements Decoder +{ + private final String path; + + public DoubleDecoder(String path) + { + this.path = requireNonNull(path, "path is null"); + } + + @Override + public void decode(SearchHit hit, Supplier getter, BlockBuilder output) + { + Object value = getter.get(); + if (value == null) { + output.appendNull(); + return; + } + + double decoded; + if (value instanceof Number number) { + decoded = number.doubleValue(); + } + else if (value instanceof String stringValue) { + if (stringValue.isEmpty()) { + output.appendNull(); + return; + } + try { + decoded = Double.parseDouble(stringValue); + } + catch (NumberFormatException e) { + throw new TrinoException(TYPE_MISMATCH, format("Cannot parse value for field '%s' as DOUBLE: %s", path, value)); + } + } + else { + throw new TrinoException(TYPE_MISMATCH, format("Expected a numeric value for field %s of type DOUBLE: %s [%s]", path, value, value.getClass().getSimpleName())); + } + + DOUBLE.writeDouble(output, decoded); + } + + public static class Descriptor + implements DecoderDescriptor + { + private final String path; + + @JsonCreator + public Descriptor(String path) + { + this.path = path; + } + + @JsonProperty + public String getPath() + { + return path; + } + + @Override + public Decoder createDecoder() + { + return new DoubleDecoder(path); + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/IdColumnDecoder.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/IdColumnDecoder.java new file mode 100644 index 0000000000000..264f60f632bb0 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/IdColumnDecoder.java @@ -0,0 +1,43 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.decoders; + +import io.airlift.slice.Slices; +import io.trino.plugin.opensearch.DecoderDescriptor; +import io.trino.spi.block.BlockBuilder; +import org.opensearch.search.SearchHit; + +import java.util.function.Supplier; + +import static io.trino.spi.type.VarcharType.VARCHAR; + +public class IdColumnDecoder + implements Decoder +{ + @Override + public void decode(SearchHit hit, Supplier getter, BlockBuilder output) + { + VARCHAR.writeSlice(output, Slices.utf8Slice(hit.getId())); + } + + public static class Descriptor + implements DecoderDescriptor + { + @Override + public Decoder createDecoder() + { + return new IdColumnDecoder(); + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/IntegerDecoder.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/IntegerDecoder.java new file mode 100644 index 0000000000000..fd09f8543e755 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/IntegerDecoder.java @@ -0,0 +1,100 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.decoders; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import io.trino.plugin.opensearch.DecoderDescriptor; +import io.trino.spi.TrinoException; +import io.trino.spi.block.BlockBuilder; +import org.opensearch.search.SearchHit; + +import java.util.function.Supplier; + +import static io.trino.spi.StandardErrorCode.TYPE_MISMATCH; +import static io.trino.spi.type.IntegerType.INTEGER; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +public class IntegerDecoder + implements Decoder +{ + private final String path; + + public IntegerDecoder(String path) + { + this.path = requireNonNull(path, "path is null"); + } + + @Override + public void decode(SearchHit hit, Supplier getter, BlockBuilder output) + { + Object value = getter.get(); + + if (value == null) { + output.appendNull(); + return; + } + + long decoded; + if (value instanceof Number number) { + decoded = number.longValue(); + } + else if (value instanceof String stringValue) { + if (stringValue.isEmpty()) { + output.appendNull(); + return; + } + try { + decoded = Long.parseLong(stringValue); + } + catch (NumberFormatException e) { + throw new TrinoException(TYPE_MISMATCH, format("Cannot parse value for field '%s' as INTEGER: %s", path, value)); + } + } + else { + throw new TrinoException(TYPE_MISMATCH, format("Expected a numeric value for field '%s' of type INTEGER: %s [%s]", path, value, value.getClass().getSimpleName())); + } + + if (decoded < Integer.MIN_VALUE || decoded > Integer.MAX_VALUE) { + throw new TrinoException(TYPE_MISMATCH, format("Value out of range for field '%s' of type INTEGER: %s", path, decoded)); + } + + INTEGER.writeLong(output, decoded); + } + + public static class Descriptor + implements DecoderDescriptor + { + private final String path; + + @JsonCreator + public Descriptor(String path) + { + this.path = path; + } + + @JsonProperty + public String getPath() + { + return path; + } + + @Override + public Decoder createDecoder() + { + return new IntegerDecoder(path); + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/IpAddressDecoder.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/IpAddressDecoder.java new file mode 100644 index 0000000000000..9b50ae4e5bc54 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/IpAddressDecoder.java @@ -0,0 +1,124 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.decoders; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.net.InetAddresses; +import io.airlift.slice.Slice; +import io.airlift.slice.Slices; +import io.trino.plugin.opensearch.DecoderDescriptor; +import io.trino.spi.TrinoException; +import io.trino.spi.block.BlockBuilder; +import io.trino.spi.type.Type; +import org.opensearch.search.SearchHit; + +import java.util.function.Supplier; + +import static io.airlift.slice.Slices.wrappedBuffer; +import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR; +import static io.trino.spi.StandardErrorCode.INVALID_CAST_ARGUMENT; +import static io.trino.spi.StandardErrorCode.TYPE_MISMATCH; +import static java.lang.String.format; +import static java.lang.System.arraycopy; +import static java.util.Objects.requireNonNull; + +public class IpAddressDecoder + implements Decoder +{ + private final String path; + private final Type ipAddressType; + + public IpAddressDecoder(String path, Type type) + { + this.path = requireNonNull(path, "path is null"); + this.ipAddressType = requireNonNull(type, "type is null"); + } + + @Override + public void decode(SearchHit hit, Supplier getter, BlockBuilder output) + { + Object value = getter.get(); + if (value == null) { + output.appendNull(); + } + else if (value instanceof String address) { + Slice slice = castToIpAddress(Slices.utf8Slice(address)); + ipAddressType.writeSlice(output, slice); + } + else { + throw new TrinoException(TYPE_MISMATCH, format("Expected a string value for field '%s' of type IP: %s [%s]", path, value, value.getClass().getSimpleName())); + } + } + + // This is a copy of IpAddressOperators.castFromVarcharToIpAddress method + private Slice castToIpAddress(Slice slice) + { + byte[] address; + try { + address = InetAddresses.forString(slice.toStringUtf8()).getAddress(); + } + catch (IllegalArgumentException e) { + throw new TrinoException(INVALID_CAST_ARGUMENT, "Cannot cast value to IPADDRESS: " + slice.toStringUtf8()); + } + + byte[] bytes; + if (address.length == 4) { + bytes = new byte[16]; + bytes[10] = (byte) 0xff; + bytes[11] = (byte) 0xff; + arraycopy(address, 0, bytes, 12, 4); + } + else if (address.length == 16) { + bytes = address; + } + else { + throw new TrinoException(GENERIC_INTERNAL_ERROR, "Invalid InetAddress length: " + address.length); + } + + return wrappedBuffer(bytes); + } + + public static class Descriptor + implements DecoderDescriptor + { + private final String path; + private final Type ipAddressType; + + @JsonCreator + public Descriptor(String path, Type ipAddressType) + { + this.path = path; + this.ipAddressType = ipAddressType; + } + + @JsonProperty + public String getPath() + { + return path; + } + + @JsonProperty + public Type getIpAddressType() + { + return ipAddressType; + } + + @Override + public Decoder createDecoder() + { + return new IpAddressDecoder(path, ipAddressType); + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/RawJsonDecoder.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/RawJsonDecoder.java new file mode 100644 index 0000000000000..a1290b2ecef01 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/RawJsonDecoder.java @@ -0,0 +1,89 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.decoders; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import io.airlift.json.ObjectMapperProvider; +import io.airlift.slice.Slices; +import io.trino.plugin.opensearch.DecoderDescriptor; +import io.trino.spi.TrinoException; +import io.trino.spi.block.BlockBuilder; +import org.opensearch.search.SearchHit; + +import java.util.function.Supplier; + +import static io.trino.spi.StandardErrorCode.TYPE_MISMATCH; +import static io.trino.spi.type.VarcharType.VARCHAR; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +public class RawJsonDecoder + implements Decoder +{ + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapperProvider().get(); + private final String path; + + public RawJsonDecoder(String path) + { + this.path = requireNonNull(path, "path is null"); + } + + @Override + public void decode(SearchHit hit, Supplier getter, BlockBuilder output) + { + Object value = getter.get(); + if (value == null) { + output.appendNull(); + } + else { + try { + String rawJsonValue = OBJECT_MAPPER.writeValueAsString(value); + VARCHAR.writeSlice(output, Slices.utf8Slice(rawJsonValue)); + } + catch (JsonProcessingException e) { + throw new TrinoException( + TYPE_MISMATCH, + format("Expected valid json for field '%s' marked to be rendered as JSON: %s [%s]", path, value, value.getClass().getSimpleName()), + e); + } + } + } + + public static class Descriptor + implements DecoderDescriptor + { + private final String path; + + @JsonCreator + public Descriptor(String path) + { + this.path = path; + } + + @JsonProperty + public String getPath() + { + return path; + } + + @Override + public Decoder createDecoder() + { + return new RawJsonDecoder(path); + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/RealDecoder.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/RealDecoder.java new file mode 100644 index 0000000000000..22276f919c65e --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/RealDecoder.java @@ -0,0 +1,95 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.decoders; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import io.trino.plugin.opensearch.DecoderDescriptor; +import io.trino.spi.TrinoException; +import io.trino.spi.block.BlockBuilder; +import org.opensearch.search.SearchHit; + +import java.util.function.Supplier; + +import static io.trino.spi.StandardErrorCode.TYPE_MISMATCH; +import static io.trino.spi.type.RealType.REAL; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +public class RealDecoder + implements Decoder +{ + private final String path; + + public RealDecoder(String path) + { + this.path = requireNonNull(path, "path is null"); + } + + @Override + public void decode(SearchHit hit, Supplier getter, BlockBuilder output) + { + Object value = getter.get(); + if (value == null) { + output.appendNull(); + return; + } + + float decoded; + if (value instanceof Number number) { + decoded = number.floatValue(); + } + else if (value instanceof String stringValue) { + if (stringValue.isEmpty()) { + output.appendNull(); + return; + } + try { + decoded = Float.parseFloat(stringValue); + } + catch (NumberFormatException e) { + throw new TrinoException(TYPE_MISMATCH, format("Cannot parse value for field '%s' as REAL: %s", path, value)); + } + } + else { + throw new TrinoException(TYPE_MISMATCH, format("Expected a numeric value for field %s of type REAL: %s [%s]", path, value, value.getClass().getSimpleName())); + } + + REAL.writeLong(output, Float.floatToRawIntBits(decoded)); + } + + public static class Descriptor + implements DecoderDescriptor + { + private final String path; + + @JsonCreator + public Descriptor(String path) + { + this.path = path; + } + + @JsonProperty + public String getPath() + { + return path; + } + + @Override + public Decoder createDecoder() + { + return new RealDecoder(path); + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/RowDecoder.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/RowDecoder.java new file mode 100644 index 0000000000000..522243a3d8ad4 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/RowDecoder.java @@ -0,0 +1,132 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.decoders; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import io.trino.plugin.opensearch.DecoderDescriptor; +import io.trino.plugin.opensearch.ScanQueryPageSource; +import io.trino.spi.TrinoException; +import io.trino.spi.block.BlockBuilder; +import io.trino.spi.block.RowBlockBuilder; +import org.opensearch.search.SearchHit; + +import java.util.List; +import java.util.Map; +import java.util.function.Supplier; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static io.trino.spi.StandardErrorCode.TYPE_MISMATCH; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +public class RowDecoder + implements Decoder +{ + private final String path; + private final List fieldNames; + private final List decoders; + + public RowDecoder(String path, List fieldNames, List decoders) + { + this.path = requireNonNull(path, "path is null"); + this.fieldNames = fieldNames; + this.decoders = decoders; + } + + @Override + public void decode(SearchHit hit, Supplier getter, BlockBuilder output) + { + Object data = getter.get(); + + if (data == null) { + output.appendNull(); + } + else if (data instanceof Map) { + ((RowBlockBuilder) output).buildEntry(fieldBuilders -> { + for (int i = 0; i < decoders.size(); i++) { + String field = fieldNames.get(i); + decoders.get(i).decode(hit, () -> ScanQueryPageSource.getField((Map) data, field), fieldBuilders.get(i)); + } + }); + } + else { + throw new TrinoException(TYPE_MISMATCH, format("Expected object for field '%s' of type ROW: %s [%s]", path, data, data.getClass().getSimpleName())); + } + } + + public static class Descriptor + implements DecoderDescriptor + { + private final String path; + private final List fields; + + @JsonCreator + public Descriptor(String path, List fields) + { + this.path = path; + this.fields = fields; + } + + @JsonProperty + public String getPath() + { + return path; + } + + @JsonProperty + public List getFields() + { + return fields; + } + + @Override + public Decoder createDecoder() + { + return new RowDecoder( + path, + fields.stream() + .map(NameAndDescriptor::getName) + .collect(toImmutableList()), + fields.stream() + .map(field -> field.getDescriptor().createDecoder()) + .collect(toImmutableList())); + } + } + + public static class NameAndDescriptor + { + private final String name; + private final DecoderDescriptor descriptor; + + @JsonCreator + public NameAndDescriptor(String name, DecoderDescriptor descriptor) + { + this.name = name; + this.descriptor = descriptor; + } + + @JsonProperty + public String getName() + { + return name; + } + + @JsonProperty + public DecoderDescriptor getDescriptor() + { + return descriptor; + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/ScoreColumnDecoder.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/ScoreColumnDecoder.java new file mode 100644 index 0000000000000..d7dfa71eece8c --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/ScoreColumnDecoder.java @@ -0,0 +1,42 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.decoders; + +import io.trino.plugin.opensearch.DecoderDescriptor; +import io.trino.spi.block.BlockBuilder; +import org.opensearch.search.SearchHit; + +import java.util.function.Supplier; + +import static io.trino.spi.type.RealType.REAL; + +public class ScoreColumnDecoder + implements Decoder +{ + @Override + public void decode(SearchHit hit, Supplier getter, BlockBuilder output) + { + REAL.writeLong(output, Float.floatToRawIntBits(hit.getScore())); + } + + public static class Descriptor + implements DecoderDescriptor + { + @Override + public Decoder createDecoder() + { + return new ScoreColumnDecoder(); + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/SmallintDecoder.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/SmallintDecoder.java new file mode 100644 index 0000000000000..61198f34e1f40 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/SmallintDecoder.java @@ -0,0 +1,99 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.decoders; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import io.trino.plugin.opensearch.DecoderDescriptor; +import io.trino.spi.TrinoException; +import io.trino.spi.block.BlockBuilder; +import org.opensearch.search.SearchHit; + +import java.util.function.Supplier; + +import static io.trino.spi.StandardErrorCode.TYPE_MISMATCH; +import static io.trino.spi.type.SmallintType.SMALLINT; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +public class SmallintDecoder + implements Decoder +{ + private final String path; + + public SmallintDecoder(String path) + { + this.path = requireNonNull(path, "path is null"); + } + + @Override + public void decode(SearchHit hit, Supplier getter, BlockBuilder output) + { + Object value = getter.get(); + if (value == null) { + output.appendNull(); + return; + } + + long decoded; + if (value instanceof Number number) { + decoded = number.longValue(); + } + else if (value instanceof String stringValue) { + if (stringValue.isEmpty()) { + output.appendNull(); + return; + } + try { + decoded = Long.parseLong(stringValue); + } + catch (NumberFormatException e) { + throw new TrinoException(TYPE_MISMATCH, format("Cannot parse value for field '%s' as SMALLINT: %s", path, value)); + } + } + else { + throw new TrinoException(TYPE_MISMATCH, format("Expected a numeric value for field '%s' of type SMALLINT: %s [%s]", path, value, value.getClass().getSimpleName())); + } + + if (decoded < Short.MIN_VALUE || decoded > Short.MAX_VALUE) { + throw new TrinoException(TYPE_MISMATCH, format("Value out of range for field '%s' of type SMALLINT: %s", path, decoded)); + } + + SMALLINT.writeLong(output, decoded); + } + + public static class Descriptor + implements DecoderDescriptor + { + private final String path; + + @JsonCreator + public Descriptor(String path) + { + this.path = path; + } + + @JsonProperty + public String getPath() + { + return path; + } + + @Override + public Decoder createDecoder() + { + return new SmallintDecoder(path); + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/SourceColumnDecoder.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/SourceColumnDecoder.java new file mode 100644 index 0000000000000..dd00f28906845 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/SourceColumnDecoder.java @@ -0,0 +1,43 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.decoders; + +import io.airlift.slice.Slices; +import io.trino.plugin.opensearch.DecoderDescriptor; +import io.trino.spi.block.BlockBuilder; +import org.opensearch.search.SearchHit; + +import java.util.function.Supplier; + +import static io.trino.spi.type.VarcharType.VARCHAR; + +public class SourceColumnDecoder + implements Decoder +{ + @Override + public void decode(SearchHit hit, Supplier getter, BlockBuilder output) + { + VARCHAR.writeSlice(output, Slices.utf8Slice(hit.getSourceAsString())); + } + + public static class Descriptor + implements DecoderDescriptor + { + @Override + public Decoder createDecoder() + { + return new SourceColumnDecoder(); + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/TimestampDecoder.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/TimestampDecoder.java new file mode 100644 index 0000000000000..d13203266682b --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/TimestampDecoder.java @@ -0,0 +1,118 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.decoders; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.primitives.Longs; +import io.trino.plugin.opensearch.DecoderDescriptor; +import io.trino.spi.TrinoException; +import io.trino.spi.block.BlockBuilder; +import org.opensearch.common.document.DocumentField; +import org.opensearch.search.SearchHit; + +import java.time.Instant; +import java.time.LocalDateTime; +import java.util.function.Supplier; + +import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; +import static io.trino.spi.StandardErrorCode.TYPE_MISMATCH; +import static io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS; +import static io.trino.spi.type.Timestamps.MICROSECONDS_PER_MILLISECOND; +import static java.lang.String.format; +import static java.time.ZoneOffset.UTC; +import static java.time.format.DateTimeFormatter.ISO_DATE_TIME; +import static java.util.Objects.requireNonNull; + +public class TimestampDecoder + implements Decoder +{ + private final String path; + + public TimestampDecoder(String path) + { + this.path = requireNonNull(path, "path is null"); + } + + @Override + public void decode(SearchHit hit, Supplier getter, BlockBuilder output) + { + DocumentField documentField = hit.getFields().get(path); + Object value; + + if (documentField != null) { + if (documentField.getValues().size() > 1) { + throw new TrinoException(TYPE_MISMATCH, format("Expected single value for column '%s', found: %s", path, documentField.getValues().size())); + } + value = documentField.getValue(); + } + else { + value = getter.get(); + } + + if (value == null) { + output.appendNull(); + } + else { + LocalDateTime timestamp; + if (value instanceof String valueString) { + Long epochMillis = Longs.tryParse(valueString); + if (epochMillis != null) { + timestamp = LocalDateTime.ofInstant(Instant.ofEpochMilli(epochMillis), UTC); + } + else { + timestamp = ISO_DATE_TIME.parse(valueString, LocalDateTime::from); + } + } + else if (value instanceof Number) { + timestamp = LocalDateTime.ofInstant(Instant.ofEpochMilli(((Number) value).longValue()), UTC); + } + else { + throw new TrinoException(NOT_SUPPORTED, format( + "Unsupported representation for field '%s' of type TIMESTAMP: %s [%s]", + path, + value, + value.getClass().getSimpleName())); + } + + long epochMicros = timestamp.atOffset(UTC).toInstant().toEpochMilli() * MICROSECONDS_PER_MILLISECOND; + + TIMESTAMP_MILLIS.writeLong(output, epochMicros); + } + } + + public static class Descriptor + implements DecoderDescriptor + { + private final String path; + + @JsonCreator + public Descriptor(String path) + { + this.path = path; + } + + @JsonProperty + public String getPath() + { + return path; + } + + @Override + public Decoder createDecoder() + { + return new TimestampDecoder(path); + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/TinyintDecoder.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/TinyintDecoder.java new file mode 100644 index 0000000000000..2e8dc67322def --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/TinyintDecoder.java @@ -0,0 +1,99 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.decoders; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import io.trino.plugin.opensearch.DecoderDescriptor; +import io.trino.spi.TrinoException; +import io.trino.spi.block.BlockBuilder; +import org.opensearch.search.SearchHit; + +import java.util.function.Supplier; + +import static io.trino.spi.StandardErrorCode.TYPE_MISMATCH; +import static io.trino.spi.type.TinyintType.TINYINT; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +public class TinyintDecoder + implements Decoder +{ + private final String path; + + public TinyintDecoder(String path) + { + this.path = requireNonNull(path, "path is null"); + } + + @Override + public void decode(SearchHit hit, Supplier getter, BlockBuilder output) + { + Object value = getter.get(); + if (value == null) { + output.appendNull(); + return; + } + + long decoded; + if (value instanceof Number number) { + decoded = number.longValue(); + } + else if (value instanceof String stringValue) { + if (stringValue.isEmpty()) { + output.appendNull(); + return; + } + try { + decoded = Long.parseLong(stringValue); + } + catch (NumberFormatException e) { + throw new TrinoException(TYPE_MISMATCH, format("Cannot parse value for field '%s' as TINYINT: %s", path, value)); + } + } + else { + throw new TrinoException(TYPE_MISMATCH, format("Expected a numeric value for field '%s' of type TINYINT: %s [%s]", path, value, value.getClass().getSimpleName())); + } + + if (decoded < Byte.MIN_VALUE || decoded > Byte.MAX_VALUE) { + throw new TrinoException(TYPE_MISMATCH, format("Value out of range for field '%s' of type TINYINT: %s", path, decoded)); + } + + TINYINT.writeLong(output, decoded); + } + + public static class Descriptor + implements DecoderDescriptor + { + private final String path; + + @JsonCreator + public Descriptor(String path) + { + this.path = path; + } + + @JsonProperty + public String getPath() + { + return path; + } + + @Override + public Decoder createDecoder() + { + return new TinyintDecoder(path); + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/VarbinaryDecoder.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/VarbinaryDecoder.java new file mode 100644 index 0000000000000..6af55652f199e --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/VarbinaryDecoder.java @@ -0,0 +1,80 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.decoders; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import io.airlift.slice.Slices; +import io.trino.plugin.opensearch.DecoderDescriptor; +import io.trino.spi.TrinoException; +import io.trino.spi.block.BlockBuilder; +import org.opensearch.search.SearchHit; + +import java.util.Base64; +import java.util.function.Supplier; + +import static io.trino.spi.StandardErrorCode.TYPE_MISMATCH; +import static io.trino.spi.type.VarbinaryType.VARBINARY; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +public class VarbinaryDecoder + implements Decoder +{ + private final String path; + + public VarbinaryDecoder(String path) + { + this.path = requireNonNull(path, "path is null"); + } + + @Override + public void decode(SearchHit hit, Supplier getter, BlockBuilder output) + { + Object value = getter.get(); + if (value == null) { + output.appendNull(); + } + else if (value instanceof String) { + VARBINARY.writeSlice(output, Slices.wrappedBuffer(Base64.getDecoder().decode(value.toString()))); + } + else { + throw new TrinoException(TYPE_MISMATCH, format("Expected a string value for field '%s' of type VARBINARY: %s [%s]", path, value, value.getClass().getSimpleName())); + } + } + + public static class Descriptor + implements DecoderDescriptor + { + private final String path; + + @JsonCreator + public Descriptor(String path) + { + this.path = path; + } + + @JsonProperty + public String getPath() + { + return path; + } + + @Override + public Decoder createDecoder() + { + return new VarbinaryDecoder(path); + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/VarcharDecoder.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/VarcharDecoder.java new file mode 100644 index 0000000000000..468a9f09671a0 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/decoders/VarcharDecoder.java @@ -0,0 +1,79 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.decoders; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import io.airlift.slice.Slices; +import io.trino.plugin.opensearch.DecoderDescriptor; +import io.trino.spi.TrinoException; +import io.trino.spi.block.BlockBuilder; +import org.opensearch.search.SearchHit; + +import java.util.function.Supplier; + +import static io.trino.spi.StandardErrorCode.TYPE_MISMATCH; +import static io.trino.spi.type.VarcharType.VARCHAR; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +public class VarcharDecoder + implements Decoder +{ + private final String path; + + public VarcharDecoder(String path) + { + this.path = requireNonNull(path, "path is null"); + } + + @Override + public void decode(SearchHit hit, Supplier getter, BlockBuilder output) + { + Object value = getter.get(); + if (value == null) { + output.appendNull(); + } + else if (value instanceof String || value instanceof Number) { + VARCHAR.writeSlice(output, Slices.utf8Slice(value.toString())); + } + else { + throw new TrinoException(TYPE_MISMATCH, format("Expected a string or numeric value for field '%s' of type VARCHAR: %s [%s]", path, value, value.getClass().getSimpleName())); + } + } + + public static class Descriptor + implements DecoderDescriptor + { + private final String path; + + @JsonCreator + public Descriptor(String path) + { + this.path = path; + } + + @JsonProperty + public String getPath() + { + return path; + } + + @Override + public Decoder createDecoder() + { + return new VarcharDecoder(path); + } + } +} diff --git a/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/ptf/RawQuery.java b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/ptf/RawQuery.java new file mode 100644 index 0000000000000..ae0b1013b0b70 --- /dev/null +++ b/plugin/trino-opensearch/src/main/java/io/trino/plugin/opensearch/ptf/RawQuery.java @@ -0,0 +1,147 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.ptf; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.inject.Inject; +import com.google.inject.Provider; +import io.airlift.slice.Slice; +import io.trino.plugin.opensearch.OpenSearchColumnHandle; +import io.trino.plugin.opensearch.OpenSearchMetadata; +import io.trino.plugin.opensearch.OpenSearchTableHandle; +import io.trino.spi.connector.ColumnHandle; +import io.trino.spi.connector.ColumnSchema; +import io.trino.spi.connector.ConnectorAccessControl; +import io.trino.spi.connector.ConnectorSession; +import io.trino.spi.connector.ConnectorTableHandle; +import io.trino.spi.connector.ConnectorTableSchema; +import io.trino.spi.connector.ConnectorTransactionHandle; +import io.trino.spi.function.table.AbstractConnectorTableFunction; +import io.trino.spi.function.table.Argument; +import io.trino.spi.function.table.ConnectorTableFunction; +import io.trino.spi.function.table.ConnectorTableFunctionHandle; +import io.trino.spi.function.table.Descriptor; +import io.trino.spi.function.table.ScalarArgument; +import io.trino.spi.function.table.ScalarArgumentSpecification; +import io.trino.spi.function.table.TableFunctionAnalysis; + +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static io.trino.spi.function.table.ReturnTypeSpecification.GenericTable.GENERIC_TABLE; +import static io.trino.spi.type.VarcharType.VARCHAR; +import static java.util.Objects.requireNonNull; +import static java.util.stream.Collectors.toList; + +public class RawQuery + implements Provider +{ + public static final String SCHEMA_NAME = "system"; + public static final String NAME = "raw_query"; + + private final OpenSearchMetadata metadata; + + @Inject + public RawQuery(OpenSearchMetadata metadata) + { + this.metadata = requireNonNull(metadata, "metadata is null"); + } + + @Override + public ConnectorTableFunction get() + { + return new RawQueryFunction(metadata); + } + + public static class RawQueryFunction + extends AbstractConnectorTableFunction + { + private final OpenSearchMetadata metadata; + + public RawQueryFunction(OpenSearchMetadata metadata) + { + super( + SCHEMA_NAME, + NAME, + List.of( + ScalarArgumentSpecification.builder() + .name("SCHEMA") + .type(VARCHAR) + .build(), + ScalarArgumentSpecification.builder() + .name("INDEX") + .type(VARCHAR) + .build(), + ScalarArgumentSpecification.builder() + .name("QUERY") + .type(VARCHAR) + .build()), + GENERIC_TABLE); + this.metadata = requireNonNull(metadata, "metadata is null"); + } + + @Override + public TableFunctionAnalysis analyze( + ConnectorSession session, + ConnectorTransactionHandle transaction, + Map arguments, + ConnectorAccessControl accessControl) + { + String schema = ((Slice) ((ScalarArgument) arguments.get("SCHEMA")).getValue()).toStringUtf8(); + String index = ((Slice) ((ScalarArgument) arguments.get("INDEX")).getValue()).toStringUtf8(); + String query = ((Slice) ((ScalarArgument) arguments.get("QUERY")).getValue()).toStringUtf8(); + + OpenSearchTableHandle tableHandle = new OpenSearchTableHandle(OpenSearchTableHandle.Type.QUERY, schema, index, Optional.of(query)); + ConnectorTableSchema tableSchema = metadata.getTableSchema(session, tableHandle); + Map columnsByName = metadata.getColumnHandles(session, tableHandle); + List columns = tableSchema.getColumns().stream() + .map(ColumnSchema::getName) + .map(columnsByName::get) + .collect(toImmutableList()); + + Descriptor returnedType = new Descriptor(columns.stream() + .map(OpenSearchColumnHandle.class::cast) + .map(column -> new Descriptor.Field(column.getName(), Optional.of(column.getType()))) + .collect(toList())); + + RawQueryFunctionHandle handle = new RawQueryFunctionHandle(tableHandle); + + return TableFunctionAnalysis.builder() + .returnedType(returnedType) + .handle(handle) + .build(); + } + } + + public static class RawQueryFunctionHandle + implements ConnectorTableFunctionHandle + { + private final OpenSearchTableHandle tableHandle; + + @JsonCreator + public RawQueryFunctionHandle(@JsonProperty("tableHandle") OpenSearchTableHandle tableHandle) + { + this.tableHandle = requireNonNull(tableHandle, "tableHandle is null"); + } + + @JsonProperty + public ConnectorTableHandle getTableHandle() + { + return tableHandle; + } + } +} diff --git a/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/BaseOpenSearchConnectorTest.java b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/BaseOpenSearchConnectorTest.java new file mode 100644 index 0000000000000..3e727a8facb06 --- /dev/null +++ b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/BaseOpenSearchConnectorTest.java @@ -0,0 +1,1954 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.net.HostAndPort; +import io.trino.Session; +import io.trino.spi.type.VarcharType; +import io.trino.sql.planner.plan.LimitNode; +import io.trino.testing.AbstractTestQueries; +import io.trino.testing.BaseConnectorTest; +import io.trino.testing.MaterializedResult; +import io.trino.testing.QueryRunner; +import io.trino.testing.TestingConnectorBehavior; +import io.trino.tpch.TpchTable; +import org.apache.http.HttpHost; +import org.intellij.lang.annotations.Language; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.opensearch.client.Request; +import org.opensearch.client.RestClient; +import org.opensearch.client.RestHighLevelClient; + +import java.io.IOException; +import java.time.LocalDateTime; +import java.util.List; +import java.util.Map; + +import static io.trino.plugin.opensearch.OpenSearchQueryRunner.createOpenSearchQueryRunner; +import static io.trino.spi.type.DoubleType.DOUBLE; +import static io.trino.spi.type.VarcharType.VARCHAR; +import static io.trino.testing.MaterializedResult.resultBuilder; +import static io.trino.testing.TestingNames.randomNameSuffix; +import static java.lang.String.format; +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; + +@TestInstance(PER_CLASS) +public abstract class BaseOpenSearchConnectorTest + extends BaseConnectorTest +{ + private final String image; + private final String catalogName; + private OpenSearchServer opensearch; + protected RestHighLevelClient client; + + BaseOpenSearchConnectorTest(String image, String catalogName) + { + this.image = image; + this.catalogName = catalogName; + } + + @Override + protected QueryRunner createQueryRunner() + throws Exception + { + opensearch = new OpenSearchServer(image, false, ImmutableMap.of()); + HostAndPort address = opensearch.getAddress(); + client = new RestHighLevelClient(RestClient.builder(new HttpHost(address.getHost(), address.getPort()))); + + return createOpenSearchQueryRunner( + opensearch.getAddress(), + TpchTable.getTables(), + ImmutableMap.of(), + ImmutableMap.of(), + 3, + catalogName); + } + + @AfterAll + public final void destroy() + throws IOException + { + opensearch.stop(); + opensearch = null; + client.close(); + client = null; + } + + @Override + protected boolean hasBehavior(TestingConnectorBehavior connectorBehavior) + { + return switch (connectorBehavior) { + case SUPPORTS_ADD_COLUMN, + SUPPORTS_COMMENT_ON_COLUMN, + SUPPORTS_COMMENT_ON_TABLE, + SUPPORTS_CREATE_MATERIALIZED_VIEW, + SUPPORTS_CREATE_SCHEMA, + SUPPORTS_CREATE_TABLE, + SUPPORTS_CREATE_VIEW, + SUPPORTS_DELETE, + SUPPORTS_INSERT, + SUPPORTS_LIMIT_PUSHDOWN, + SUPPORTS_MERGE, + SUPPORTS_RENAME_COLUMN, + SUPPORTS_RENAME_TABLE, + SUPPORTS_ROW_TYPE, + SUPPORTS_SET_COLUMN_TYPE, + SUPPORTS_TOPN_PUSHDOWN, + SUPPORTS_UPDATE -> false; + default -> super.hasBehavior(connectorBehavior); + }; + } + + /** + * This method overrides the default values used for the data provider + * of the test {@link AbstractTestQueries#testLargeIn()} by taking + * into account that by default OpenSearch 2.x supports only up to `1024` + * clauses in query. + *

+ * Consult `index.query.bool.max_clause_count` opensearch.yml setting + * for more details. + * + * @return the amount of clauses to be used in large queries + */ + @Override + protected List largeInValuesCountData() + { + return ImmutableList.of(200, 500, 1000); + } + + @Test + public void testWithoutBackpressure() + { + assertQuerySucceeds("SELECT * FROM orders"); + // Check that JMX stats show no sign of backpressure + assertQueryReturnsEmptyResult(format("SELECT 1 FROM jmx.current.\"trino.plugin.opensearch.client:*name=%s*\" WHERE \"backpressurestats.alltime.count\" > 0", catalogName)); + assertQueryReturnsEmptyResult(format("SELECT 1 FROM jmx.current.\"trino.plugin.opensearch.client:*name=%s*\" WHERE \"backpressurestats.alltime.max\" > 0", catalogName)); + } + + @Test + @Override + public void testSelectAll() + { + // List columns explicitly, as there's no defined order in OpenSearch + assertQuery("SELECT orderkey, custkey, orderstatus, totalprice, orderdate, orderpriority, clerk, shippriority, comment FROM orders"); + } + + @Override + protected MaterializedResult getDescribeOrdersResult() + { + // The column metadata for the OpenSearch connector tables are provided + // based on the column name in alphabetical order. + return resultBuilder(getSession(), VARCHAR, VARCHAR, VARCHAR, VARCHAR) + .row("clerk", "varchar", "", "") + .row("comment", "varchar", "", "") + .row("custkey", "bigint", "", "") + .row("orderdate", "timestamp(3)", "", "") + .row("orderkey", "bigint", "", "") + .row("orderpriority", "varchar", "", "") + .row("orderstatus", "varchar", "", "") + .row("shippriority", "bigint", "", "") + .row("totalprice", "real", "", "") + .build(); + } + + @Test + @Override + public void testPredicateReflectedInExplain() + { + // The format of the string representation of what gets shown in the table scan is connector-specific + // and there's no requirement that the conform to a specific shape or contain certain keywords. + + assertExplain( + "EXPLAIN SELECT name FROM nation WHERE nationkey = 42", + "nationkey::bigint", "::\\s\\[\\[42\\]\\]"); + } + + @Test + @Override + public void testSortItemsReflectedInExplain() + { + // The format of the string representation of what gets shown in the table scan is connector-specific + // and there's no requirement that the conform to a specific shape or contain certain keywords. + assertExplain( + "EXPLAIN SELECT name FROM nation ORDER BY nationkey DESC NULLS LAST LIMIT 5", + "TopNPartial\\[count = 5, orderBy = \\[nationkey DESC"); + } + + @Test + @Override + public void testShowCreateTable() + { + assertThat(computeActual("SHOW CREATE TABLE orders").getOnlyValue()) + .isEqualTo(format("CREATE TABLE %s.tpch.orders (\n", catalogName) + + " clerk varchar,\n" + + " comment varchar,\n" + + " custkey bigint,\n" + + " orderdate timestamp(3),\n" + + " orderkey bigint,\n" + + " orderpriority varchar,\n" + + " orderstatus varchar,\n" + + " shippriority bigint,\n" + + " totalprice real\n" + + ")"); + } + + @Test + @Override + public void testShowColumns() + { + assertThat(query("SHOW COLUMNS FROM orders")).matches(getDescribeOrdersResult()); + } + + @Test + public void testNullPredicate() + throws IOException + { + String indexName = "null_predicate1"; + @Language("JSON") + String properties = "" + + "{" + + " \"properties\":{" + + " \"null_keyword\": { \"type\": \"keyword\" }," + + " \"custkey\": { \"type\": \"keyword\" }" + + " }" + + "}"; + createIndex(indexName, properties); + index(indexName, ImmutableMap.builder() + .put("null_keyword", 32) + .put("custkey", 1301) + .buildOrThrow()); + + assertQueryReturnsEmptyResult("SELECT * FROM null_predicate1 WHERE null_keyword IS NULL"); + assertQueryReturnsEmptyResult("SELECT * FROM null_predicate1 WHERE null_keyword = '10' OR null_keyword IS NULL"); + + assertQuery("SELECT custkey, null_keyword FROM null_predicate1 WHERE null_keyword = '32' OR null_keyword IS NULL", "VALUES (1301, 32)"); + assertQuery("SELECT custkey FROM null_predicate1 WHERE null_keyword = '32' OR null_keyword IS NULL", "VALUES (1301)"); + + // not null filter + // filtered column is selected + assertQuery("SELECT custkey, null_keyword FROM null_predicate1 WHERE null_keyword IS NOT NULL", "VALUES (1301, 32)"); + assertQuery("SELECT custkey, null_keyword FROM null_predicate1 WHERE null_keyword = '32' OR null_keyword IS NOT NULL", "VALUES (1301, 32)"); + + // filtered column is not selected + assertQuery("SELECT custkey FROM null_predicate1 WHERE null_keyword = '32' OR null_keyword IS NOT NULL", "VALUES (1301)"); + + indexName = "null_predicate2"; + properties = "" + + "{" + + " \"properties\":{" + + " \"null_keyword\": { \"type\": \"keyword\" }," + + " \"custkey\": { \"type\": \"keyword\" }" + + " }" + + "}"; + createIndex(indexName, properties); + index(indexName, ImmutableMap.of("custkey", 1301)); + + // not null filter + assertQueryReturnsEmptyResult("SELECT * FROM null_predicate2 WHERE null_keyword IS NOT NULL"); + assertQueryReturnsEmptyResult("SELECT * FROM null_predicate2 WHERE null_keyword = '10' OR null_keyword IS NOT NULL"); + + // filtered column is selected + assertQuery("SELECT custkey, null_keyword FROM null_predicate2 WHERE null_keyword IS NULL", "VALUES (1301, NULL)"); + assertQuery("SELECT custkey, null_keyword FROM null_predicate2 WHERE null_keyword = '32' OR null_keyword IS NULL", "VALUES (1301, NULL)"); + + // filtered column is not selected + assertQuery("SELECT custkey FROM null_predicate2 WHERE null_keyword = '32' OR null_keyword IS NULL", "VALUES (1301)"); + + index(indexName, ImmutableMap.builder() + .put("null_keyword", 32) + .put("custkey", 1302) + .buildOrThrow()); + + assertQuery("SELECT custkey, null_keyword FROM null_predicate2 WHERE null_keyword = '32' OR null_keyword IS NULL", "VALUES (1301, NULL), (1302, 32)"); + assertQuery("SELECT custkey FROM null_predicate2 WHERE null_keyword = '32' OR null_keyword IS NULL", "VALUES (1301), (1302)"); + } + + @Test + public void testNestedFields() + throws IOException + { + String indexName = "data"; + index(indexName, ImmutableMap.builder() + .put("name", "nestfield") + .put("fields.fielda", 32) + .put("fields.fieldb", "valueb") + .buildOrThrow()); + + assertQuery( + "SELECT name, fields.fielda, fields.fieldb FROM data", + "VALUES ('nestfield', 32, 'valueb')"); + } + + @Test + public void testNameConflict() + throws IOException + { + String indexName = "name_conflict"; + index(indexName, ImmutableMap.builder() + .put("field", "value") + .put("Conflict", "conflict1") + .put("conflict", "conflict2") + .buildOrThrow()); + + assertQuery( + "SELECT * FROM name_conflict", + "VALUES ('value')"); + } + + @Test + public void testArrayFields() + throws IOException + { + String indexName = "test_arrays"; + + @Language("JSON") + String mapping = "" + + "{" + + " \"_meta\": {" + + " \"trino\": {" + + " \"a\": {" + + " \"b\": {" + + " \"y\": {" + + " \"isArray\": true" + + " }" + + " }" + + " }," + + " \"c\": {" + + " \"f\": {" + + " \"g\": {" + + " \"isArray\": true" + + " }," + + " \"isArray\": true" + + " }" + + " }," + + " \"j\": {" + + " \"isArray\": true" + + " }," + + " \"k\": {" + + " \"isArray\": true" + + " }" + + " }" + + " }," + + " \"properties\":{" + + " \"a\": {" + + " \"type\": \"object\"," + + " \"properties\": {" + + " \"b\": {" + + " \"type\": \"object\"," + + " \"properties\": {" + + " \"x\": {" + + " \"type\": \"integer\"" + + " }," + + " \"y\": {" + + " \"type\": \"keyword\"" + + " }" + + " } " + + " }" + + " }" + + " }," + + " \"c\": {" + + " \"type\": \"object\"," + + " \"properties\": {" + + " \"d\": {" + + " \"type\": \"keyword\"" + + " }," + + " \"e\": {" + + " \"type\": \"keyword\"" + + " }," + + " \"f\": {" + + " \"type\": \"object\"," + + " \"properties\": {" + + " \"g\": {" + + " \"type\": \"integer\"" + + " }," + + " \"h\": {" + + " \"type\": \"integer\"" + + " }" + + " } " + + " }" + + " }" + + " }," + + " \"i\": {" + + " \"type\": \"long\"" + + " }," + + " \"j\": {" + + " \"type\": \"long\"" + + " }," + + " \"k\": {" + + " \"type\": \"long\"" + + " }" + + " }" + + "}"; + + createIndex(indexName, mapping); + + index(indexName, ImmutableMap.builder() + .put("a", ImmutableMap.builder() + .put("b", ImmutableMap.builder() + .put("x", 1) + .put("y", ImmutableList.builder() + .add("hello") + .add("world") + .build()) + .buildOrThrow()) + .buildOrThrow()) + .put("c", ImmutableMap.builder() + .put("d", "foo") + .put("e", "bar") + .put("f", ImmutableList.>builder() + .add(ImmutableMap.builder() + .put("g", ImmutableList.builder() + .add(10) + .add(20) + .build()) + .put("h", 100) + .buildOrThrow()) + .add(ImmutableMap.builder() + .put("g", ImmutableList.builder() + .add(30) + .add(40) + .build()) + .put("h", 200) + .buildOrThrow()) + .build()) + .buildOrThrow()) + .put("j", ImmutableList.builder() + .add(50L) + .add(60L) + .build()) + .buildOrThrow()); + + assertQuery( + "SELECT a.b.y[1], c.f[1].g[2], c.f[2].g[1], j[2], k[1] FROM test_arrays", + "VALUES ('hello', 20, 30, 60, NULL)"); + } + + @Test + public void testAsRawJson() + throws IOException + { + String indexName = "raw_json_" + randomNameSuffix(); + + @Language("JSON") + String mapping = "" + + "{" + + " \"_meta\": {" + + " \"trino\": {" + + " \"es_object\": {" + + " \"array_of_string_arrays\": {" + + " \"asRawJson\": true" + + " }," + + " \"arrayOfIntArrays\": {" + + " \"asRawJson\": true" + + " }" + + " }," + + " \"es_array_object\": {" + + " \"isArray\": true," + + " \"array_of_string_arrays\": {" + + " \"asRawJson\": true" + + " }," + + " \"arrayOfIntArrays\": {" + + " \"asRawJson\": true" + + " }" + + " }," + + " \"es_raw_object\": {" + + " \"asRawJson\": true," + + " \"array_of_string_arrays\": {" + + " \"isArray\": true" + + " }," + + " \"arrayOfIntArrays\": {" + + " \"isArray\": true" + + " }" + + " }," + + " \"array_of_string_arrays\": {" + + " \"asRawJson\": true" + + " }," + + " \"array_of_long_arrays\": {" + + " \"asRawJson\": true" + + " }" + + " }" + + " }," + + " \"properties\": {" + + " \"es_object\": {" + + " \"type\": \"object\"," + + " \"properties\": {" + + " \"array_of_string_arrays\": {" + + " \"type\": \"keyword\"" + + " }," + + " \"arrayOfIntArrays\": {" + + " \"type\": \"integer\"" + + " }" + + " }" + + " }," + + " \"es_array_object\": {" + + " \"type\": \"object\"," + + " \"properties\": {" + + " \"array_of_string_arrays\": {" + + " \"type\": \"keyword\"" + + " }," + + " \"arrayOfIntArrays\": {" + + " \"type\": \"integer\"" + + " }" + + " }" + + " }," + + " \"es_raw_object\": {" + + " \"type\": \"object\"," + + " \"properties\": {" + + " \"array_of_string_arrays\": {" + + " \"type\": \"keyword\"" + + " }," + + " \"arrayOfIntArrays\": {" + + " \"type\": \"integer\"" + + " }" + + " }" + + " }," + + " \"array_of_string_arrays\": {" + + " \"type\": \"text\"" + + " }," + + " \"array_of_long_arrays\": {" + + " \"type\": \"long\"" + + " }," + + " \"order_field\": {" + + " \"type\": \"integer\"" + + " }" + + " }" + + "}"; + + createIndex(indexName, mapping); + + index(indexName, ImmutableMap.builder() + .put("es_object", ImmutableMap.builder() + .put("array_of_string_arrays", ImmutableList.>builder() + .add(ImmutableList.builder() + .add("abc") + .add("def") + .build()) + .build()) + .put("arrayOfIntArrays", ImmutableList.builder() + .add(123) + .add(ImmutableList.builder() + .add(234) + .add(345) + .build()) + .build()) + .buildOrThrow()) + .put("es_array_object", ImmutableMap.builder() + .put("array_of_string_arrays", ImmutableList.>builder() + .add(ImmutableList.builder() + .add("abc") + .add("def") + .build()) + .build()) + .put("arrayOfIntArrays", ImmutableList.builder() + .add(123) + .add(ImmutableList.builder() + .add(234) + .add(345) + .build()) + .build()) + .buildOrThrow()) + .put("es_raw_object", ImmutableMap.builder() + .put("array_of_string_arrays", ImmutableList.>builder() + .add(ImmutableList.builder() + .add("abc") + .add("def") + .build()) + .build()) + .put("arrayOfIntArrays", ImmutableList.builder() + .add(123) + .add(ImmutableList.builder() + .add(234) + .add(345) + .build()) + .build()) + .buildOrThrow()) + .put("array_of_string_arrays", ImmutableList.>builder() + .add(ImmutableList.builder() + .add("abc") + .add("def") + .build()) + .build()) + .put("array_of_long_arrays", ImmutableList.builder() + .add(123L) + .add(ImmutableList.builder() + .add(234L) + .add(345L) + .build()) + .build()) + .put("order_field", 1) + .buildOrThrow()); + + index(indexName, ImmutableMap.builder() + .put("es_object", ImmutableMap.builder() + .put("array_of_string_arrays", "Join the Trino Slack: https://trino.io/slack.html") + .put("arrayOfIntArrays", 867) + .buildOrThrow()) + .put("es_array_object", ImmutableMap.builder() + .put("array_of_string_arrays", "If you like Presto, you'll love Trino: https://trino.io/slack.html") + .put("arrayOfIntArrays", 321) + .buildOrThrow()) + .put("es_raw_object", ImmutableMap.builder() + .put("array_of_string_arrays", "The founders and core contributors of Presto, and are now working on Trino: https://trino.io/blog/2020/12/27/announcing-trino.html") + .put("arrayOfIntArrays", 654) + .buildOrThrow()) + .put("array_of_string_arrays", "Check out the bi-weekly Trino Community Broadcast https://trino.io/broadcast/") + .put("array_of_long_arrays", 5309L) + .put("order_field", 2) + .buildOrThrow()); + + MaterializedResult rows = computeActual("" + + "SELECT " + + "json_extract(array_of_string_arrays, '$[0][0]'), " + + "json_extract(array_of_string_arrays, '$[0][1]'), " + + "array_of_string_arrays, " + + "json_extract(array_of_long_arrays, '$[0]'), " + + "try(json_extract(array_of_long_arrays, '$[1][0]')), " + + "try(json_extract(array_of_long_arrays, '$[1][1]')), " + + "array_of_long_arrays " + + "FROM " + indexName + " " + + "ORDER BY order_field"); + + MaterializedResult expected = resultBuilder(getSession(), rows.getTypes()) + .row("\"abc\"", "\"def\"", "[[\"abc\",\"def\"]]", "123", "234", "345", "[123,[234,345]]") + .row(null, null, "\"Check out the bi-weekly Trino Community Broadcast https://trino.io/broadcast/\"", null, null, null, "5309") + .build(); + + assertThat(rows.getMaterializedRows()).isEqualTo(expected.getMaterializedRows()); + + MaterializedResult nestedRows = computeActual("" + + "SELECT " + + "json_extract(es_object.array_of_string_arrays, '$[0][0]'), " + + "json_extract(es_object.array_of_string_arrays, '$[0][1]'), " + + "es_object.array_of_string_arrays, " + + "json_extract(es_object.arrayOfIntArrays, '$[0]'), " + + "try(json_extract(es_object.arrayOfIntArrays, '$[1][0]')), " + + "try(json_extract(es_object.arrayOfIntArrays, '$[1][1]')), " + + "es_object.arrayOfIntArrays " + + "FROM " + indexName + " " + + "ORDER BY order_field"); + + MaterializedResult nestedExpected = resultBuilder(getSession(), nestedRows.getTypes()) + .row("\"abc\"", "\"def\"", "[[\"abc\",\"def\"]]", "123", "234", "345", "[123,[234,345]]") + .row(null, null, "\"Join the Trino Slack: https://trino.io/slack.html\"", null, null, null, "867") + .build(); + + assertThat(nestedRows.getMaterializedRows()).isEqualTo(nestedExpected.getMaterializedRows()); + + MaterializedResult arrayRows = computeActual("" + + "SELECT " + + "json_extract(es_array_object[1].array_of_string_arrays, '$[0][0]'), " + + "json_extract(es_array_object[1].array_of_string_arrays, '$[0][1]'), " + + "es_array_object[1].array_of_string_arrays, " + + "json_extract(es_array_object[1].arrayOfIntArrays, '$[0]'), " + + "try(json_extract(es_array_object[1].arrayOfIntArrays, '$[1][0]')), " + + "try(json_extract(es_array_object[1].arrayOfIntArrays, '$[1][1]')), " + + "es_array_object[1].arrayOfIntArrays " + + "FROM " + indexName + " " + + "ORDER BY order_field"); + + MaterializedResult arrayExpected = resultBuilder(getSession(), arrayRows.getTypes()) + .row("\"abc\"", "\"def\"", "[[\"abc\",\"def\"]]", "123", "234", "345", "[123,[234,345]]") + .row(null, null, "\"If you like Presto, you'll love Trino: https://trino.io/slack.html\"", null, null, null, "321") + .build(); + + assertThat(arrayRows.getMaterializedRows()).isEqualTo(arrayExpected.getMaterializedRows()); + + MaterializedResult rawRows = computeActual("" + + "SELECT " + + "json_extract(es_raw_object, '$.array_of_string_arrays[0][0]'), " + + "json_extract(es_raw_object, '$.array_of_string_arrays[0][1]'), " + + "json_extract(es_raw_object, '$.array_of_string_arrays'), " + + "json_extract(es_raw_object, '$.arrayOfIntArrays[0]'), " + + "try(json_extract(es_raw_object, '$.arrayOfIntArrays[1][0]')), " + + "try(json_extract(es_raw_object, '$.arrayOfIntArrays[1][1]')), " + + "json_extract(es_raw_object, '$.arrayOfIntArrays') " + + "FROM " + indexName + " " + + "ORDER BY order_field"); + + MaterializedResult rawRowsExpected = resultBuilder(getSession(), rawRows.getTypes()) + .row("\"abc\"", "\"def\"", "[[\"abc\",\"def\"]]", "123", "234", "345", "[123,[234,345]]") + .row(null, null, "\"The founders and core contributors of Presto, and are now working on Trino: https://trino.io/blog/2020/12/27/announcing-trino.html\"", null, null, null, "654") + .build(); + + assertThat(rawRows.getMaterializedRows()).isEqualTo(rawRowsExpected.getMaterializedRows()); + } + + @Test + public void testAsRawJsonForAllPrimitiveTypes() + throws IOException + { + String indexName = "raw_json_primitive_" + randomNameSuffix(); + + @Language("JSON") + String mapping = "" + + "{" + + " \"_meta\": {" + + " \"trino\": {" + + " \"es_binary\": {" + + " \"asRawJson\": true" + + " }," + + " \"es_boolean\": {" + + " \"asRawJson\": true" + + " }," + + " \"es_long\": {" + + " \"asRawJson\": true" + + " }," + + " \"es_integer\": {" + + " \"asRawJson\": true" + + " }," + + " \"es_short\": {" + + " \"asRawJson\": true" + + " }," + + " \"es_byte\": {" + + " \"asRawJson\": true" + + " }," + + " \"es_double\": {" + + " \"asRawJson\": true" + + " }," + + " \"es_float\": {" + + " \"asRawJson\": true" + + " }" + + " }" + + " }," + + " \"properties\": {" + + " \"es_binary\": {" + + " \"type\": \"binary\"" + + " }," + + " \"es_boolean\": {" + + " \"type\": \"boolean\"" + + " }," + + " \"es_long\": {" + + " \"type\": \"long\"" + + " }," + + " \"es_integer\": {" + + " \"type\": \"integer\"" + + " }," + + " \"es_short\": {" + + " \"type\": \"short\"" + + " }," + + " \"es_byte\": {" + + " \"type\": \"byte\"" + + " }," + + " \"es_double\": {" + + " \"type\": \"double\"" + + " }," + + " \"es_float\": {" + + " \"type\": \"float\"" + + " }," + + " \"order_field\": {" + + " \"type\": \"integer\"" + + " }" + + " }" + + "}"; + + createIndex(indexName, mapping); + + index(indexName, ImmutableMap.builder() + .put("es_binary", "test".getBytes(UTF_8)) + .put("es_boolean", true) + .put("es_long", (long) 123) + .put("es_integer", 123) + .put("es_short", (short) 123) + .put("es_byte", (byte) 123) + .put("es_double", (double) 123) + .put("es_float", (float) 123) + .put("order_field", 1) + .buildOrThrow()); + + MaterializedResult rows = computeActual("" + + "SELECT " + + "es_binary, " + + "es_boolean, " + + "es_long, " + + "es_integer, " + + "es_short, " + + "es_byte, " + + "es_double, " + + "es_float " + + "FROM " + indexName + " " + + "ORDER BY order_field"); + + MaterializedResult expected = resultBuilder(getSession(), rows.getTypes()) + .row("\"dGVzdA==\"", "true", "123", "123", "123", "123", "123.0", "123.0") + .build(); + assertThat(rows.getTypes()) + .hasOnlyElementsOfType(VarcharType.class); + + assertThat(rows.getMaterializedRows()).isEqualTo(expected.getMaterializedRows()); + + deleteIndex(indexName); + } + + @Test + public void testAsRawJsonCases() + throws IOException + { + String indexName = "raw_json_cases_" + randomNameSuffix(); + + @Language("JSON") + String mapping = "" + + "{" + + " \"_meta\": {" + + " \"trino\": {" + + " \"es_binary\": {" + + " \"asRawJson\": true" + + " }," + + " \"es_boolean\": {" + + " \"asRawJson\": true" + + " }," + + " \"es_timestamp\": {" + + " \"asRawJson\": true" + + " }" + + " }" + + " }," + + " \"properties\": {" + + " \"es_binary\": {" + + " \"type\": \"binary\"" + + " }," + + " \"es_boolean\": {" + + " \"type\": \"boolean\"" + + " }," + + " \"es_timestamp\": {" + + " \"type\": \"date\"" + + " }" + + " }" + + "}"; + + createIndex(indexName, mapping); + + index(indexName, ImmutableMap.builder() + .put("es_binary", "test".getBytes(UTF_8)) + .put("es_boolean", true) + .put("es_timestamp", 123) + .buildOrThrow()); + + MaterializedResult rows = computeActual("" + + "SELECT " + + "es_binary, " + + "es_boolean, " + + "es_timestamp " + + "FROM " + indexName); + + MaterializedResult expected = resultBuilder(getSession(), rows.getTypes()) + .row("\"dGVzdA==\"", "true", "123") + .build(); + + assertThat(rows.getMaterializedRows()).isEqualTo(expected.getMaterializedRows()); + assertThat(rows.getTypes()) + .hasOnlyElementsOfType(VarcharType.class); + + deleteIndex(indexName); + } + + @Test + public void testAsRawJsonAndIsArraySameFieldException() + throws IOException + { + String indexName = "raw_json_array_exception" + randomNameSuffix(); + + @Language("JSON") + String mapping = "" + + "{" + + " \"_meta\": {" + + " \"trino\": {" + + " \"array_raw_field\": {" + + " \"asRawJson\": true," + + " \"isArray\": true" + + " }" + + " }" + + " }," + + " \"properties\": {" + + " \"array_raw_field\": {" + + " \"type\": \"text\"" + + " }" + + " }" + + "}"; + + createIndex(indexName, mapping); + + index(indexName, ImmutableMap.of("array_raw_field", "test")); + + assertThatThrownBy(() -> computeActual("SELECT array_raw_field FROM " + indexName)) + .hasMessage("A column, (array_raw_field) cannot be declared as a Trino array and also be rendered as json."); + + deleteIndex(indexName); + } + + @Test + public void testMixedArray() + throws IOException + { + String indexName = "test_mixed_arrays"; + + @Language("JSON") + String mapping = "" + + "{" + + " \"_meta\": {" + + " \"trino\": {" + + " \"a\": {" + + " \"isArray\": true" + + " }" + + " }" + + " }," + + " \"properties\": {" + + " \"a\": {" + + " \"type\": \"keyword\"" + + " }" + + " }" + + "}"; + + createIndex(indexName, mapping); + + index(indexName, ImmutableMap.of()); + + index(indexName, ImmutableMap.of("a", "hello")); + + index(indexName, ImmutableMap.of("a", ImmutableList.of("foo", "bar"))); + + assertQuery( + "SELECT a FROM test_mixed_arrays", + "VALUES NULL, ARRAY['hello'], ARRAY['foo', 'bar']"); + } + + @Test + public void testEmptyNumericFields() + throws IOException + { + String indexName = "emptynumeric"; + + @Language("JSON") + String mapping = "" + + "{" + + " \"properties\": { " + + " \"byte_column\": {\"type\": \"byte\"}," + + " \"short_column\": {\"type\": \"short\"}," + + " \"integer_column\": {\"type\": \"integer\"}," + + " \"long_column\": {\"type\": \"long\"}," + + " \"float_column\": {\"type\": \"float\"}," + + " \"scaled_float_column\": {\"type\": \"scaled_float\", \"scaling_factor\": 100}," + + " \"double_column\": {\"type\": \"double\"}" + + " }" + + "}"; + + createIndex(indexName, mapping); + index(indexName, ImmutableMap.builder() + .put("byte_column", "") + .put("short_column", "") + .put("integer_column", "") + .put("long_column", "") + .put("float_column", "") + .put("scaled_float_column", "") + .put("double_column", "") + .buildOrThrow()); + + assertQuery( + "SELECT byte_column, short_column, integer_column, long_column, float_column, scaled_float_column, double_column FROM emptynumeric", + "VALUES (NULL, NULL, NULL, NULL, NULL, NULL, NULL)"); + + deleteIndex(indexName); + } + + @Test + public void testEmptyObjectFields() + throws IOException + { + String indexName = "emptyobject"; + index(indexName, ImmutableMap.builder() + .put("name", "stringfield") + .put("emptyobject", ImmutableMap.of()) + .put("fields.fielda", 32) + .put("fields.fieldb", ImmutableMap.of()) + .buildOrThrow()); + + assertQuery( + "SELECT name, fields.fielda FROM emptyobject", + "VALUES ('stringfield', 32)"); + } + + @Test + public void testNestedVariants() + throws IOException + { + String indexName = "nested_variants"; + + index(indexName, + ImmutableMap.of("a", + ImmutableMap.of("b", + ImmutableMap.of("c", + "value1")))); + + index(indexName, + ImmutableMap.of("a.b", + ImmutableMap.of("c", + "value2"))); + + index(indexName, + ImmutableMap.of("a", + ImmutableMap.of("b.c", + "value3"))); + + index(indexName, + ImmutableMap.of("a.b.c", "value4")); + + assertQuery( + "SELECT a.b.c FROM nested_variants", + "VALUES 'value1', 'value2', 'value3', 'value4'"); + } + + @Test + public void testLike() + throws IOException + { + String indexName = "like_test"; + + @Language("JSON") + String mappings = "" + + "{" + + " \"properties\": { " + + " \"keyword_column\": { \"type\": \"keyword\" }," + + " \"text_column\": { \"type\": \"text\" }" + + " }" + + "}"; + + createIndex(indexName, mappings); + + index(indexName, ImmutableMap.builder() + .put("keyword_column", "so.me tex\\t") + .put("text_column", "so.me tex\\t") + .buildOrThrow()); + + // Add another document to make sure '.' is escaped and not treated as any character + index(indexName, ImmutableMap.builder() + .put("keyword_column", "soome tex\\t") + .put("text_column", "soome tex\\t") + .buildOrThrow()); + + // Add another document to make sure '%' can be escaped and not treated as any character + index(indexName, ImmutableMap.builder() + .put("keyword_column", "soome%text") + .put("text_column", "soome%text") + .buildOrThrow()); + + // Add another document to make sure utf8 character sequence length is right + index(indexName, ImmutableMap.builder() + .put("keyword_column", "中文") + .put("text_column", "中文") + .buildOrThrow()); + + // Add another document to make sure utf8 character sequence length is right + index(indexName, ImmutableMap.builder() + .put("keyword_column", "こんにちは") + .put("text_column", "こんにちは") + .buildOrThrow()); + + // Add another document to make sure utf8 character sequence length is right + index(indexName, ImmutableMap.builder() + .put("keyword_column", "안녕하세요") + .put("text_column", "안녕하세요") + .buildOrThrow()); + + // Add another document to make sure utf8 character sequence length is right + index(indexName, ImmutableMap.builder() + .put("keyword_column", "Привет") + .put("text_column", "Привет") + .buildOrThrow()); + + assertThat(query("" + + "SELECT " + + "keyword_column " + + "FROM " + indexName + " " + + "WHERE keyword_column LIKE 's_.m%ex\\t'")) + .matches("VALUES VARCHAR 'so.me tex\\t'") + .isFullyPushedDown(); + + assertThat(query("" + + "SELECT " + + "text_column " + + "FROM " + indexName + " " + + "WHERE text_column LIKE 's_.m%ex\\t'")) + .matches("VALUES VARCHAR 'so.me tex\\t'"); + + assertThat(query("" + + "SELECT " + + "text_column " + + "FROM " + indexName + " " + + "WHERE keyword_column LIKE 'soome$%%' ESCAPE '$'")) + .matches("VALUES VARCHAR 'soome%text'") + .isFullyPushedDown(); + + assertThat(query("" + + "SELECT " + + "text_column " + + "FROM " + indexName + " " + + "WHERE keyword_column LIKE '中%'")) + .matches("VALUES VARCHAR '中文'") + .isFullyPushedDown(); + + assertThat(query("" + + "SELECT " + + "text_column " + + "FROM " + indexName + " " + + "WHERE keyword_column LIKE 'こんに%'")) + .matches("VALUES VARCHAR 'こんにちは'") + .isFullyPushedDown(); + + assertThat(query("" + + "SELECT " + + "text_column " + + "FROM " + indexName + " " + + "WHERE keyword_column LIKE '안녕하%'")) + .matches("VALUES VARCHAR '안녕하세요'") + .isFullyPushedDown(); + + assertThat(query("" + + "SELECT " + + "text_column " + + "FROM " + indexName + " " + + "WHERE keyword_column LIKE 'При%'")) + .matches("VALUES VARCHAR 'Привет'") + .isFullyPushedDown(); + } + + @Test + public void testDataTypes() + throws IOException + { + String indexName = "types"; + + @Language("JSON") + String mappings = "" + + "{" + + " \"properties\": { " + + " \"boolean_column\": { \"type\": \"boolean\" }," + + " \"float_column\": { \"type\": \"float\" }," + + " \"double_column\": { \"type\": \"double\" }," + + " \"integer_column\": { \"type\": \"integer\" }," + + " \"long_column\": { \"type\": \"long\" }," + + " \"keyword_column\": { \"type\": \"keyword\" }," + + " \"text_column\": { \"type\": \"text\" }," + + " \"binary_column\": { \"type\": \"binary\" }," + + " \"timestamp_column\": { \"type\": \"date\" }," + + " \"ipv4_column\": { \"type\": \"ip\" }," + + " \"ipv6_column\": { \"type\": \"ip\" }," + + " \"scaled_float_column\": { \"type\": \"scaled_float\", \"scaling_factor\": 100 }" + + " }" + + "}"; + + createIndex(indexName, mappings); + + index(indexName, ImmutableMap.builder() + .put("boolean_column", true) + .put("float_column", 1.0f) + .put("double_column", 1.0d) + .put("integer_column", 1) + .put("long_column", 1L) + .put("keyword_column", "cool") + .put("text_column", "some text") + .put("binary_column", new byte[] {(byte) 0xCA, (byte) 0xFE}) + .put("timestamp_column", 0) + .put("ipv4_column", "1.2.3.4") + .put("ipv6_column", "2001:db8:0:0:1:0:0:1") + .put("scaled_float_column", 123456.78d) + .buildOrThrow()); + + MaterializedResult rows = computeActual("" + + "SELECT " + + "boolean_column, " + + "float_column, " + + "double_column, " + + "integer_column, " + + "long_column, " + + "keyword_column, " + + "text_column, " + + "binary_column, " + + "timestamp_column, " + + "ipv4_column, " + + "ipv6_column, " + + "scaled_float_column " + + "FROM types"); + + MaterializedResult expected = resultBuilder(getSession(), rows.getTypes()) + .row( + true, + 1.0f, + 1.0d, + 1, + 1L, + "cool", + "some text", + new byte[] {(byte) 0xCA, (byte) 0xFE}, + LocalDateTime.of(1970, 1, 1, 0, 0), + "1.2.3.4", + "2001:db8::1:0:0:1", + 123456.78d) + .build(); + + assertThat(rows.getMaterializedRows()).isEqualTo(expected.getMaterializedRows()); + } + + @Test + public void testTableWithUnsupportedTypes() + throws IOException + { + String indexName = "unsupported_types"; + + @Language("JSON") + String mappings = "" + + "{" + + " \"properties\": { " + + " \"long_column\": { \"type\": \"long\" }," + + " \"unsupported_type\": { \"type\": \"completion\"}" + + " }" + + "}"; + + createIndex(indexName, mappings); + + index(indexName, ImmutableMap.builder() + .put("long_column", 1L) + .put("unsupported_type", ImmutableList.of("foo", "bar")) + .buildOrThrow()); + + MaterializedResult rows = computeActual("SELECT * FROM unsupported_types"); + MaterializedResult expected = resultBuilder(getSession(), rows.getTypes()) + .row(1L) + .build(); + + assertThat(rows.getMaterializedRows()).isEqualTo(expected.getMaterializedRows()); + } + + @Test + public void testBoolean() + throws IOException + { + String indexName = "booleans"; + + @Language("JSON") + String mappings = "" + + "{" + + " \"properties\": { " + + " \"boolean_column\": { \"type\": \"boolean\" }" + + " }" + + "}"; + + createIndex(indexName, mappings); + + index(indexName, ImmutableMap.of("boolean_column", true)); + + index(indexName, ImmutableMap.of("boolean_column", "true")); + + index(indexName, ImmutableMap.of("boolean_column", false)); + + index(indexName, ImmutableMap.of("boolean_column", "false")); + + index(indexName, ImmutableMap.of("boolean_column", "")); + + MaterializedResult rows = computeActual("SELECT boolean_column FROM booleans"); + + MaterializedResult expected = resultBuilder(getSession(), rows.getTypes()) + .row(true) + .row(true) + .row(false) + .row(false) + .row(false) + .build(); + + assertThat(rows.getMaterializedRows()).containsExactlyInAnyOrderElementsOf(expected.getMaterializedRows()); + } + + @Test + public void testTimestamps() + throws IOException + { + String indexName = "timestamps"; + + @Language("JSON") + String mappings = "" + + "{" + + " \"properties\": { " + + " \"timestamp_column\": { \"type\": \"date\" }" + + " }" + + "}"; + + createIndex(indexName, mappings); + + index(indexName, ImmutableMap.of("timestamp_column", "2015-01-01")); + + index(indexName, ImmutableMap.of("timestamp_column", "2015-01-01T12:10:30Z")); + + index(indexName, ImmutableMap.of("timestamp_column", 1420070400001L)); + + index(indexName, ImmutableMap.of("timestamp_column", "1420070400001")); + + MaterializedResult rows = computeActual("SELECT timestamp_column FROM timestamps"); + + MaterializedResult expected = resultBuilder(getSession(), rows.getTypes()) + .row(LocalDateTime.parse("2015-01-01T00:00:00")) + .row(LocalDateTime.parse("2015-01-01T12:10:30")) + .row(LocalDateTime.parse("2015-01-01T00:00:00.001")) + .row(LocalDateTime.parse("2015-01-01T00:00:00.001")) + .build(); + + assertThat(rows.getMaterializedRows()).containsExactlyInAnyOrderElementsOf(expected.getMaterializedRows()); + } + + @Test + public void testNestedTimestamps() + throws IOException + { + String indexName = "nested_timestamps"; + + @Language("JSON") + String mappings = "" + + "{" + + " \"properties\":{" + + " \"field\": {" + + " \"properties\": {" + + " \"timestamp_column\": { \"type\": \"date\" }" + + " }" + + " }" + + " }" + + "}"; + + createIndex(indexName, mappings); + + index(indexName, ImmutableMap.of("field", ImmutableMap.of("timestamp_column", 0))); + index(indexName, ImmutableMap.of("field", ImmutableMap.of("timestamp_column", "1"))); + index(indexName, ImmutableMap.of("field", ImmutableMap.of("timestamp_column", "1970-01-01T01:01:00+0000"))); + + assertThat(query("SELECT field.timestamp_column FROM " + indexName)) + .matches("VALUES " + + "(TIMESTAMP '1970-01-01 00:00:00.000')," + + "(TIMESTAMP '1970-01-01 00:00:00.001')," + + "(TIMESTAMP '1970-01-01 01:01:00.000')"); + } + + @Test + public void testScaledFloat() + throws Exception + { + String indexName = "scaled_float_type"; + + @Language("JSON") + String mappings = "" + + "{" + + " \"properties\": { " + + " \"text_column\": { \"type\": \"text\" }," + + " \"scaled_float_column\": { \"type\": \"scaled_float\", \"scaling_factor\": 100 }" + + " }" + + "}"; + + createIndex(indexName, mappings); + + index(indexName, ImmutableMap.builder() + .put("text_column", "foo") + .put("scaled_float_column", 123.4567d) + .buildOrThrow()); + + index(indexName, ImmutableMap.builder() + .put("text_column", "bar") + .put("scaled_float_column", 123.46d) + .buildOrThrow()); + + index(indexName, ImmutableMap.builder() + .put("text_column", "random value") + .put("scaled_float_column", 9.8d) + .buildOrThrow()); + + // Trino query filters in the engine, so the rounding (dependent on scaling factor) does not impact results + assertThat(query(""" + SELECT text_column, scaled_float_column + FROM scaled_float_type + WHERE scaled_float_column = 123.46 + """)) + .matches(resultBuilder(getSession(), ImmutableList.of(VARCHAR, DOUBLE)) + .row("bar", 123.46d) + .build()); + } + + @Test + public void testCoercions() + throws IOException + { + String indexName = "coercions"; + + @Language("JSON") + String mappings = "" + + "{" + + " \"properties\": { " + + " \"float_column\": { \"type\": \"float\" }," + + " \"double_column\": { \"type\": \"double\" }," + + " \"integer_column\": { \"type\": \"integer\" }," + + " \"long_column\": { \"type\": \"long\" }" + + " }" + + "}"; + + createIndex(indexName, mappings); + + index(indexName, ImmutableMap.builder() + .put("float_column", "1.0") + .put("double_column", "1.0") + .put("integer_column", "1") + .put("long_column", "1") + .buildOrThrow()); + + MaterializedResult rows = computeActual("" + + "SELECT " + + "float_column, " + + "double_column, " + + "integer_column, " + + "long_column " + + "FROM coercions"); + + MaterializedResult expected = resultBuilder(getSession(), rows.getTypes()) + .row(1.0f, 1.0d, 1, 1L) + .build(); + + assertThat(rows.getMaterializedRows()).isEqualTo(expected.getMaterializedRows()); + } + + @Test + public void testFilters() + throws IOException + { + String indexName = "filter_pushdown"; + + @Language("JSON") + String mappings = "" + + "{" + + " \"properties\": { " + + " \"boolean_column\": { \"type\": \"boolean\" }," + + " \"float_column\": { \"type\": \"float\" }," + + " \"double_column\": { \"type\": \"double\" }," + + " \"integer_column\": { \"type\": \"integer\" }," + + " \"long_column\": { \"type\": \"long\" }," + + " \"keyword_column\": { \"type\": \"keyword\" }," + + " \"text_column\": { \"type\": \"text\" }," + + " \"binary_column\": { \"type\": \"binary\" }," + + " \"timestamp_column\": { \"type\": \"date\" }," + + " \"ipv4_column\": { \"type\": \"ip\" }," + + " \"ipv6_column\": { \"type\": \"ip\" }" + + " }" + + "}"; + + createIndex(indexName, mappings); + + index(indexName, ImmutableMap.builder() + .put("boolean_column", true) + .put("byte_column", 1) + .put("short_column", 2) + .put("integer_column", 3) + .put("long_column", 4L) + .put("float_column", 1.0f) + .put("double_column", 1.0d) + .put("keyword_column", "cool") + .put("text_column", "some text") + .put("binary_column", new byte[] {(byte) 0xCA, (byte) 0xFE}) + .put("timestamp_column", 1569888000000L) + .put("ipv4_column", "1.2.3.4") + .put("ipv6_column", "2001:db8:0:0:1:0:0:1") + .buildOrThrow()); + + // boolean + assertQuery("SELECT count(*) FROM filter_pushdown WHERE boolean_column = true", "VALUES 1"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE boolean_column = false", "VALUES 0"); + + // tinyint + assertQuery("SELECT count(*) FROM filter_pushdown WHERE byte_column = 1", "VALUES 1"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE byte_column = 0", "VALUES 0"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE byte_column > 1", "VALUES 0"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE byte_column < 1", "VALUES 0"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE byte_column > 0", "VALUES 1"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE byte_column < 10", "VALUES 1"); + + // smallint + assertQuery("SELECT count(*) FROM filter_pushdown WHERE short_column = 2", "VALUES 1"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE short_column > 2", "VALUES 0"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE short_column < 2", "VALUES 0"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE short_column = 0", "VALUES 0"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE short_column > 0", "VALUES 1"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE short_column < 10", "VALUES 1"); + + // integer + assertQuery("SELECT count(*) FROM filter_pushdown WHERE integer_column = 3", "VALUES 1"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE integer_column > 3", "VALUES 0"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE integer_column < 3", "VALUES 0"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE integer_column = 0", "VALUES 0"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE integer_column > 0", "VALUES 1"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE integer_column < 10", "VALUES 1"); + + // bigint + assertQuery("SELECT count(*) FROM filter_pushdown WHERE long_column = 4", "VALUES 1"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE long_column > 4", "VALUES 0"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE long_column < 4", "VALUES 0"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE long_column = 0", "VALUES 0"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE long_column > 0", "VALUES 1"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE long_column < 10", "VALUES 1"); + + // real + assertQuery("SELECT count(*) FROM filter_pushdown WHERE float_column = 1.0", "VALUES 1"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE float_column > 1.0", "VALUES 0"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE float_column < 1.0", "VALUES 0"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE float_column = 0.0", "VALUES 0"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE float_column > 0.0", "VALUES 1"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE float_column < 10.0", "VALUES 1"); + + // double + assertQuery("SELECT count(*) FROM filter_pushdown WHERE double_column = 1.0", "VALUES 1"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE double_column > 1.0", "VALUES 0"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE double_column < 1.0", "VALUES 0"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE double_column = 0.0", "VALUES 0"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE double_column > 0.0", "VALUES 1"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE double_column < 10.0", "VALUES 1"); + + // varchar + assertQuery("SELECT count(*) FROM filter_pushdown WHERE keyword_column = 'cool'", "VALUES 1"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE keyword_column = 'bar'", "VALUES 0"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE text_column = 'some text'", "VALUES 1"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE text_column = 'some'", "VALUES 0"); + + // binary + assertQuery("SELECT count(*) FROM filter_pushdown WHERE binary_column = x'CAFE'", "VALUES 1"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE binary_column = x'ABCD'", "VALUES 0"); + + // timestamp + assertQuery("SELECT count(*) FROM filter_pushdown WHERE timestamp_column = TIMESTAMP '2019-10-01 00:00:00'", "VALUES 1"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE timestamp_column > TIMESTAMP '2019-10-01 00:00:00'", "VALUES 0"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE timestamp_column < TIMESTAMP '2019-10-01 00:00:00'", "VALUES 0"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE timestamp_column = TIMESTAMP '2019-10-02 00:00:00'", "VALUES 0"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE timestamp_column > TIMESTAMP '2001-01-01 00:00:00'", "VALUES 1"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE timestamp_column < TIMESTAMP '2030-01-01 00:00:00'", "VALUES 1"); + + // ipaddress + assertQuery("SELECT count(*) FROM filter_pushdown WHERE ipv4_column = IPADDRESS '1.2.3.4'", "VALUES 1"); + assertQuery("SELECT count(*) FROM filter_pushdown WHERE ipv6_column = IPADDRESS '2001:db8::1:0:0:1'", "VALUES 1"); + } + + @Test + public void testLimitPushdown() + throws IOException + { + assertThat(query("SELECT name FROM nation LIMIT 30")).isNotFullyPushedDown(LimitNode.class); // Use high limit for result determinism + } + + @Test + public void testDataTypesNested() + throws IOException + { + String indexName = "types_nested"; + + @Language("JSON") + String properties = "" + + "{" + + " \"properties\":{" + + " \"field\": {" + + " \"properties\": {" + + " \"boolean_column\": { \"type\": \"boolean\" }," + + " \"float_column\": { \"type\": \"float\" }," + + " \"double_column\": { \"type\": \"double\" }," + + " \"integer_column\": { \"type\": \"integer\" }," + + " \"long_column\": { \"type\": \"long\" }," + + " \"keyword_column\": { \"type\": \"keyword\" }," + + " \"text_column\": { \"type\": \"text\" }," + + " \"binary_column\": { \"type\": \"binary\" }," + + " \"timestamp_column\": { \"type\": \"date\" }," + + " \"ipv4_column\": { \"type\": \"ip\" }," + + " \"ipv6_column\": { \"type\": \"ip\" }" + + " }" + + " }" + + " }" + + "}"; + + createIndex(indexName, properties); + + index(indexName, ImmutableMap.of( + "field", + ImmutableMap.builder() + .put("boolean_column", true) + .put("float_column", 1.0f) + .put("double_column", 1.0d) + .put("integer_column", 1) + .put("long_column", 1L) + .put("keyword_column", "cool") + .put("text_column", "some text") + .put("binary_column", new byte[] {(byte) 0xCA, (byte) 0xFE}) + .put("timestamp_column", 0) + .put("ipv4_column", "1.2.3.4") + .put("ipv6_column", "2001:db8:0:0:1:0:0:1") + .buildOrThrow())); + + MaterializedResult rows = computeActual("" + + "SELECT " + + "field.boolean_column, " + + "field.float_column, " + + "field.double_column, " + + "field.integer_column, " + + "field.long_column, " + + "field.keyword_column, " + + "field.text_column, " + + "field.binary_column, " + + "field.timestamp_column, " + + "field.ipv4_column, " + + "field.ipv6_column " + + "FROM types_nested"); + + MaterializedResult expected = resultBuilder(getSession(), rows.getTypes()) + .row(true, 1.0f, 1.0d, 1, 1L, "cool", "some text", new byte[] {(byte) 0xCA, (byte) 0xFE}, + LocalDateTime.of(1970, 1, 1, 0, 0), "1.2.3.4", "2001:db8::1:0:0:1") + .build(); + + assertThat(rows.getMaterializedRows()).isEqualTo(expected.getMaterializedRows()); + } + + @Test + public void testNestedTypeDataTypesNested() + throws IOException + { + String indexName = "nested_type_nested"; + + @Language("JSON") + String mappings = "" + + "{" + + " \"properties\":{" + + " \"nested_field\": {" + + " \"type\":\"nested\"," + + " \"properties\": {" + + " \"boolean_column\": { \"type\": \"boolean\" }," + + " \"float_column\": { \"type\": \"float\" }," + + " \"double_column\": { \"type\": \"double\" }," + + " \"integer_column\": { \"type\": \"integer\" }," + + " \"long_column\": { \"type\": \"long\" }," + + " \"keyword_column\": { \"type\": \"keyword\" }," + + " \"text_column\": { \"type\": \"text\" }," + + " \"binary_column\": { \"type\": \"binary\" }," + + " \"timestamp_column\": { \"type\": \"date\" }," + + " \"ipv4_column\": { \"type\": \"ip\" }," + + " \"ipv6_column\": { \"type\": \"ip\" }" + + " }" + + " }" + + " }" + + "}"; + + createIndex(indexName, mappings); + + index(indexName, ImmutableMap.of( + "nested_field", + ImmutableMap.builder() + .put("boolean_column", true) + .put("float_column", 1.0f) + .put("double_column", 1.0d) + .put("integer_column", 1) + .put("long_column", 1L) + .put("keyword_column", "cool") + .put("text_column", "some text") + .put("binary_column", new byte[] {(byte) 0xCA, (byte) 0xFE}) + .put("timestamp_column", 0) + .put("ipv4_column", "1.2.3.4") + .put("ipv6_column", "2001:db8:0:0:1:0:0:1") + .buildOrThrow())); + + MaterializedResult rows = computeActual("" + + "SELECT " + + "nested_field.boolean_column, " + + "nested_field.float_column, " + + "nested_field.double_column, " + + "nested_field.integer_column, " + + "nested_field.long_column, " + + "nested_field.keyword_column, " + + "nested_field.text_column, " + + "nested_field.binary_column, " + + "nested_field.timestamp_column, " + + "nested_field.ipv4_column, " + + "nested_field.ipv6_column " + + "FROM nested_type_nested"); + + MaterializedResult expected = resultBuilder(getSession(), rows.getTypes()) + .row(true, 1.0f, 1.0d, 1, 1L, "cool", "some text", new byte[] {(byte) 0xCA, (byte) 0xFE}, + LocalDateTime.of(1970, 1, 1, 0, 0), "1.2.3.4", "2001:db8::1:0:0:1") + .build(); + + assertThat(rows.getMaterializedRows()).isEqualTo(expected.getMaterializedRows()); + } + + @Test + public void testMixedCase() + throws IOException + { + String indexName = "mixed_case"; + index(indexName, ImmutableMap.builder() + .put("Name", "john") + .put("AGE", 32) + .buildOrThrow()); + + assertQuery( + "SELECT name, age FROM mixed_case", + "VALUES ('john', 32)"); + + assertQuery( + "SELECT name, age FROM mixed_case WHERE name = 'john'", + "VALUES ('john', 32)"); + } + + @Test + public void testNumericKeyword() + throws IOException + { + String indexName = "numeric_keyword"; + @Language("JSON") + String properties = "" + + "{" + + " \"properties\":{" + + " \"numeric_keyword\": { \"type\": \"keyword\" }" + + " }" + + "}"; + createIndex(indexName, properties); + index(indexName, ImmutableMap.of("numeric_keyword", 20)); + + assertQuery( + "SELECT numeric_keyword FROM numeric_keyword", + "VALUES 20"); + assertQuery( + "SELECT numeric_keyword FROM numeric_keyword where numeric_keyword = '20'", + "VALUES 20"); + } + + @Test + public void testAlias() + throws IOException + { + String aliasName = format("alias_%s", randomNameSuffix()); + addAlias("orders", aliasName); + + assertQuery( + "SELECT count(*) FROM " + aliasName, + "SELECT count(*) FROM orders"); + } + + @Test + public void testSelectInformationSchemaForMultiIndexAlias() + throws IOException + { + addAlias("nation", "multi_alias"); + addAlias("region", "multi_alias"); + + // No duplicate entries should be found in information_schema.tables or information_schema.columns. + testSelectInformationSchemaTables(); + testSelectInformationSchemaColumns(); + } + + @Test // TODO (https://github.com/trinodb/trino/issues/2428) + @Disabled + public void testMultiIndexAlias() + throws IOException + { + addAlias("nation", "multi_alias"); + addAlias("region", "multi_alias"); + + assertQuery( + "SELECT count(*) FROM multi_alias", + "SELECT (SELECT count(*) FROM region) + (SELECT count(*) FROM nation)"); + } + + @Test + public void testEmptyIndexWithMappings() + throws IOException + { + String indexName = "test_empty_index_with_mappings"; + + @Language("JSON") + String mappings = "" + + "{" + + " \"properties\": { " + + " \"dummy_column\": { \"type\": \"long\" }" + + " }" + + "}"; + + createIndex(indexName, mappings); + + assertQuery(format("SELECT column_name FROM information_schema.columns WHERE table_name = '%s'", indexName), "VALUES ('dummy_column')"); + assertThat(computeActual("SHOW TABLES").getOnlyColumnAsSet()).contains(indexName); + assertQueryReturnsEmptyResult("SELECT * FROM " + indexName); + } + + @Test + public void testEmptyIndexNoMappings() + throws IOException + { + String indexName = "test_empty_index"; + + createIndex(indexName); + assertTableDoesNotExist(indexName); + } + + @Test + public void testEmptyAliasNoMappings() + throws IOException + { + String indexName = "test_empty_index_for_alias"; + String aliasName = "test_empty_alias"; + + createIndex(indexName); + addAlias(indexName, aliasName); + assertTableDoesNotExist(aliasName); + } + + @Test + public void testMissingIndex() + { + assertTableDoesNotExist("nonexistent_table"); + } + + @Test + public void testQueryTableFunction() + { + // select single record + assertQuery("SELECT json_query(result, 'lax $[0][0].hits.hits._source') " + + format("FROM TABLE(%s.system.raw_query(", catalogName) + + "schema => 'tpch', " + + "index => 'nation', " + + "query => '{\"query\": {\"match\": {\"name\": \"ALGERIA\"}}}')) t(result)", + "VALUES '{\"nationkey\":0,\"name\":\"ALGERIA\",\"regionkey\":0,\"comment\":\" haggle. carefully final deposits detect slyly agai\"}'"); + + // parameters + Session session = Session.builder(getSession()) + .addPreparedStatement( + "my_query", + format("SELECT json_query(result, 'lax $[0][0].hits.hits._source') FROM TABLE(%s.system.raw_query(schema => ?, index => ?, query => ?))", catalogName)) + .build(); + assertQuery( + session, + "EXECUTE my_query USING 'tpch', 'nation', '{\"query\": {\"match\": {\"name\": \"ALGERIA\"}}}'", + "VALUES '{\"nationkey\":0,\"name\":\"ALGERIA\",\"regionkey\":0,\"comment\":\" haggle. carefully final deposits detect slyly agai\"}'"); + + // select multiple records by range. Use array wrapper to wrap multiple results + assertQuery("SELECT array_sort(CAST(json_parse(json_query(result, 'lax $[0][0].hits.hits._source.name' WITH ARRAY WRAPPER)) AS array(varchar))) " + + format("FROM TABLE(%s.system.raw_query(", catalogName) + + "schema => 'tpch', " + + "index => 'nation', " + + "query => '{\"query\": {\"range\": {\"nationkey\": {\"gte\": 0,\"lte\": 3}}}}')) t(result)", + "VALUES ARRAY['ALGERIA', 'ARGENTINA', 'BRAZIL', 'CANADA']"); + + // use aggregations + @Language("JSON") + String query = "{\n" + + " \"size\": 0,\n" + + " \"aggs\" : {\n" + + " \"max_orderkey\" : { \"max\" : { \"field\" : \"orderkey\" } },\n" + + " \"sum_orderkey\" : { \"sum\" : { \"field\" : \"orderkey\" } }\n" + + " }\n" + + "}"; + + assertQuery( + format("WITH data(r) AS (" + + " SELECT CAST(json_parse(result) AS ROW(aggregations ROW(max_orderkey ROW(value BIGINT), sum_orderkey ROW(value BIGINT)))) " + + " FROM TABLE(%s.system.raw_query(" + + " schema => 'tpch', " + + " index => 'orders', " + + " query => '%s'))) " + + "SELECT r.aggregations.max_orderkey.value, r.aggregations.sum_orderkey.value " + + "FROM data", catalogName, query), + "VALUES (60000, 449872500)"); + + // no matches + assertQuery("SELECT json_query(result, 'lax $[0][0].hits.hits') " + + format("FROM TABLE(%s.system.raw_query(", catalogName) + + "schema => 'tpch', " + + "index => 'nation', " + + "query => '{\"query\": {\"match\": {\"name\": \"UTOPIA\"}}}')) t(result)", + "VALUES '[]'"); + + // syntax error + assertThatThrownBy(() -> query("SELECT * " + + format("FROM TABLE(%s.system.raw_query(", catalogName) + + "schema => 'tpch', " + + "index => 'nation', " + + "query => 'wrong syntax')) t(result)")) + .hasMessageContaining("json_parse_exception"); + } + + protected void assertTableDoesNotExist(String name) + { + assertQueryReturnsEmptyResult(format("SELECT * FROM information_schema.columns WHERE table_name = '%s'", name)); + assertThat(computeActual("SHOW TABLES").getOnlyColumnAsSet().contains(name)).isFalse(); + assertQueryFails("SELECT * FROM " + name, ".*Table '" + catalogName + ".tpch." + name + "' does not exist"); + } + + protected String indexEndpoint(String index, String docId) + { + return format("/%s/_doc/%s", index, docId); + } + + private void index(String index, Map document) + throws IOException + { + String json = new ObjectMapper().writeValueAsString(document); + String endpoint = format("%s?refresh", indexEndpoint(index, String.valueOf(System.nanoTime()))); + + Request request = new Request("PUT", endpoint); + request.setJsonEntity(json); + client.getLowLevelClient().performRequest(request); + } + + private void addAlias(String index, String alias) + throws IOException + { + Request request = new Request("PUT", format("/%s/_alias/%s", index, alias)); + client.getLowLevelClient().performRequest(request); + + refreshIndex(alias); + } + + protected String indexMapping(@Language("JSON") String properties) + { + return "{\"mappings\": " + properties + "}"; + } + + private void createIndex(String indexName) + throws IOException + { + Request request = new Request("PUT", "/" + indexName); + client.getLowLevelClient().performRequest(request); + } + + private void createIndex(String indexName, @Language("JSON") String properties) + throws IOException + { + String mappings = indexMapping(properties); + Request request = new Request("PUT", "/" + indexName); + request.setJsonEntity(mappings); + client.getLowLevelClient().performRequest(request); + } + + private void refreshIndex(String index) + throws IOException + { + Request request = new Request("GET", format("/%s/_refresh", index)); + client.getLowLevelClient().performRequest(request); + } + + private void deleteIndex(String indexName) + throws IOException + { + Request request = new Request("DELETE", "/" + indexName); + client.getLowLevelClient().performRequest(request); + } +} diff --git a/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/OpenSearchLoader.java b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/OpenSearchLoader.java new file mode 100644 index 0000000000000..22cc2e980b346 --- /dev/null +++ b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/OpenSearchLoader.java @@ -0,0 +1,147 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import io.trino.Session; +import io.trino.client.Column; +import io.trino.client.QueryData; +import io.trino.client.QueryStatusInfo; +import io.trino.server.testing.TestingTrinoServer; +import io.trino.spi.type.Type; +import io.trino.spi.type.VarcharType; +import io.trino.testing.AbstractTestingTrinoClient; +import io.trino.testing.ResultsSession; +import org.opensearch.action.bulk.BulkRequest; +import org.opensearch.action.index.IndexRequest; +import org.opensearch.action.support.WriteRequest; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicReference; + +import static com.google.common.base.Preconditions.checkState; +import static io.trino.spi.type.BigintType.BIGINT; +import static io.trino.spi.type.BooleanType.BOOLEAN; +import static io.trino.spi.type.DateType.DATE; +import static io.trino.spi.type.DoubleType.DOUBLE; +import static io.trino.spi.type.IntegerType.INTEGER; +import static java.util.Objects.requireNonNull; +import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; + +public class OpenSearchLoader + extends AbstractTestingTrinoClient +{ + private final String tableName; + private final RestHighLevelClient client; + + public OpenSearchLoader( + RestHighLevelClient client, + String tableName, + TestingTrinoServer trinoServer, + Session defaultSession) + { + super(trinoServer, defaultSession); + + this.tableName = requireNonNull(tableName, "tableName is null"); + this.client = requireNonNull(client, "client is null"); + } + + @Override + public ResultsSession getResultSession(Session session) + { + requireNonNull(session, "session is null"); + return new OpenSearchLoadingSession(); + } + + private class OpenSearchLoadingSession + implements ResultsSession + { + private final AtomicReference> types = new AtomicReference<>(); + + private OpenSearchLoadingSession() {} + + @Override + public void addResults(QueryStatusInfo statusInfo, QueryData data) + { + if (types.get() == null && statusInfo.getColumns() != null) { + types.set(getTypes(statusInfo.getColumns())); + } + + if (data.getData() == null) { + return; + } + checkState(types.get() != null, "Type information is missing"); + List columns = statusInfo.getColumns(); + + BulkRequest request = new BulkRequest(); + for (List fields : data.getData()) { + try { + XContentBuilder dataBuilder = jsonBuilder().startObject(); + for (int i = 0; i < fields.size(); i++) { + Type type = types.get().get(i); + Object value = convertValue(fields.get(i), type); + dataBuilder.field(columns.get(i).getName(), value); + } + dataBuilder.endObject(); + + request.add(new IndexRequest(tableName).source(dataBuilder)); + } + catch (IOException e) { + throw new UncheckedIOException("Error loading data into OpenSearch index: " + tableName, e); + } + } + + request.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); + try { + client.bulk(request, RequestOptions.DEFAULT); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public Void build(Map setSessionProperties, Set resetSessionProperties) + { + return null; + } + + private Object convertValue(Object value, Type type) + { + if (value == null) { + return null; + } + + if (type == BOOLEAN || type == DATE || type instanceof VarcharType) { + return value; + } + if (type == BIGINT) { + return ((Number) value).longValue(); + } + if (type == INTEGER) { + return ((Number) value).intValue(); + } + if (type == DOUBLE) { + return ((Number) value).doubleValue(); + } + throw new IllegalArgumentException("Unhandled type: " + type); + } + } +} diff --git a/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/OpenSearchQueryRunner.java b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/OpenSearchQueryRunner.java new file mode 100644 index 0000000000000..a4ea3d90b5691 --- /dev/null +++ b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/OpenSearchQueryRunner.java @@ -0,0 +1,162 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.google.common.collect.ImmutableMap; +import com.google.common.net.HostAndPort; +import io.airlift.log.Level; +import io.airlift.log.Logger; +import io.airlift.log.Logging; +import io.trino.metadata.QualifiedObjectName; +import io.trino.plugin.jmx.JmxPlugin; +import io.trino.plugin.tpch.TpchPlugin; +import io.trino.testing.DistributedQueryRunner; +import io.trino.testing.QueryRunner; +import io.trino.testing.TestingTrinoClient; +import io.trino.tpch.TpchTable; +import org.apache.http.HttpHost; +import org.opensearch.client.RestClient; +import org.opensearch.client.RestHighLevelClient; + +import java.util.Map; + +import static io.airlift.testing.Closeables.closeAllSuppress; +import static io.airlift.units.Duration.nanosSince; +import static io.trino.plugin.opensearch.OpenSearchServer.OPENSEARCH_IMAGE; +import static io.trino.plugin.tpch.TpchMetadata.TINY_SCHEMA_NAME; +import static io.trino.testing.TestingSession.testSessionBuilder; +import static java.lang.String.format; +import static java.util.Locale.ENGLISH; +import static java.util.concurrent.TimeUnit.SECONDS; + +public final class OpenSearchQueryRunner +{ + static { + Logging logging = Logging.initialize(); + logging.setLevel("org.opensearch.client.RestClient", Level.OFF); + } + + private OpenSearchQueryRunner() {} + + private static final Logger LOG = Logger.get(OpenSearchQueryRunner.class); + private static final String TPCH_SCHEMA = "tpch"; + + public static DistributedQueryRunner createOpenSearchQueryRunner( + HostAndPort address, + Iterable> tables, + Map extraProperties, + Map extraConnectorProperties, + int nodeCount) + throws Exception + { + return createOpenSearchQueryRunner(address, tables, extraProperties, extraConnectorProperties, nodeCount, "opensearch"); + } + + public static DistributedQueryRunner createOpenSearchQueryRunner( + HostAndPort address, + Iterable> tables, + Map extraProperties, + Map extraConnectorProperties, + int nodeCount, + String catalogName) + throws Exception + { + RestHighLevelClient client = null; + DistributedQueryRunner queryRunner = null; + try { + queryRunner = DistributedQueryRunner.builder(testSessionBuilder() + .setCatalog(catalogName) + .setSchema(TPCH_SCHEMA) + .build()) + .setExtraProperties(extraProperties) + .setNodeCount(nodeCount) + .build(); + + queryRunner.installPlugin(new JmxPlugin()); + queryRunner.createCatalog("jmx", "jmx"); + + queryRunner.installPlugin(new TpchPlugin()); + queryRunner.createCatalog("tpch", "tpch"); + + OpenSearchConnectorFactory testFactory = new OpenSearchConnectorFactory(); + + installOpenSearchPlugin(address, queryRunner, catalogName, testFactory, extraConnectorProperties); + + TestingTrinoClient trinoClient = queryRunner.getClient(); + + LOG.info("Loading data..."); + + client = new RestHighLevelClient(RestClient.builder(HttpHost.create(address.toString()))); + long startTime = System.nanoTime(); + for (TpchTable table : tables) { + loadTpchTopic(client, trinoClient, table); + } + LOG.info("Loading complete in %s", nanosSince(startTime).toString(SECONDS)); + + return queryRunner; + } + catch (Exception e) { + closeAllSuppress(e, queryRunner, client); + throw e; + } + } + + private static void installOpenSearchPlugin( + HostAndPort address, + QueryRunner queryRunner, + String catalogName, + OpenSearchConnectorFactory factory, + Map extraConnectorProperties) + { + queryRunner.installPlugin(new OpenSearchPlugin(factory)); + Map config = ImmutableMap.builder() + .put("opensearch.host", address.getHost()) + .put("opensearch.port", Integer.toString(address.getPort())) + // Node discovery relies on the publish_address exposed via the OpenSearch API + // This doesn't work well within a docker environment that maps OpenSearch port to a random public port + .put("opensearch.ignore-publish-address", "true") + .put("opensearch.default-schema-name", TPCH_SCHEMA) + .put("opensearch.scroll-size", "1000") + .put("opensearch.scroll-timeout", "1m") + .put("opensearch.request-timeout", "2m") + .putAll(extraConnectorProperties) + .buildOrThrow(); + + queryRunner.createCatalog(catalogName, "opensearch", config); + } + + private static void loadTpchTopic(RestHighLevelClient client, TestingTrinoClient trinoClient, TpchTable table) + { + long start = System.nanoTime(); + LOG.info("Running import for %s", table.getTableName()); + OpenSearchLoader loader = new OpenSearchLoader(client, table.getTableName().toLowerCase(ENGLISH), trinoClient.getServer(), trinoClient.getDefaultSession()); + loader.execute(format("SELECT * from %s", new QualifiedObjectName(TPCH_SCHEMA, TINY_SCHEMA_NAME, table.getTableName().toLowerCase(ENGLISH)))); + LOG.info("Imported %s in %s", table.getTableName(), nanosSince(start).convertToMostSuccinctTimeUnit()); + } + + public static void main(String[] args) + throws Exception + { + DistributedQueryRunner queryRunner = createOpenSearchQueryRunner( + new OpenSearchServer(OPENSEARCH_IMAGE, false, ImmutableMap.of()).getAddress(), + TpchTable.getTables(), + ImmutableMap.of("http-server.http.port", "8080"), + ImmutableMap.of(), + 3); + + Logger log = Logger.get(OpenSearchQueryRunner.class); + log.info("======== SERVER STARTED ========"); + log.info("\n====\n%s\n====", queryRunner.getCoordinator().getBaseUrl()); + } +} diff --git a/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/OpenSearchServer.java b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/OpenSearchServer.java new file mode 100644 index 0000000000000..8139098faffb9 --- /dev/null +++ b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/OpenSearchServer.java @@ -0,0 +1,84 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.google.common.net.HostAndPort; +import io.trino.testing.ResourcePresence; +import org.opensearch.testcontainers.OpensearchContainer; +import org.testcontainers.containers.Network; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Map; + +import static com.google.common.io.MoreFiles.deleteRecursively; +import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; +import static java.nio.charset.StandardCharsets.UTF_8; +import static java.nio.file.Files.createTempDirectory; +import static org.testcontainers.utility.MountableFile.forHostPath; + +public class OpenSearchServer +{ + public static final String OPENSEARCH_IMAGE = "opensearchproject/opensearch:2.11.0"; + + private final Path configurationPath; + private final OpensearchContainer container; + + public OpenSearchServer(String image, boolean secured, Map configurationFiles) + throws IOException + { + this(Network.SHARED, image, secured, configurationFiles); + } + + public OpenSearchServer(Network network, String image, boolean secured, Map configurationFiles) + throws IOException + { + container = new OpensearchContainer<>(image); + container.withNetwork(network); + if (secured) { + container.withSecurityEnabled(); + } + + configurationPath = createTempDirectory(null); + for (Map.Entry entry : configurationFiles.entrySet()) { + String name = entry.getKey(); + String contents = entry.getValue(); + + Path path = configurationPath.resolve(name); + Files.writeString(path, contents, UTF_8); + container.withCopyFileToContainer(forHostPath(path, 0777), "/usr/share/opensearch/config/" + name); + } + + container.start(); + } + + public void stop() + throws IOException + { + container.close(); + deleteRecursively(configurationPath, ALLOW_INSECURE); + } + + @ResourcePresence + public boolean isRunning() + { + return container.isRunning(); + } + + public HostAndPort getAddress() + { + return HostAndPort.fromParts(container.getHost(), container.getMappedPort(9200)); + } +} diff --git a/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestAwsSecurityConfig.java b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestAwsSecurityConfig.java new file mode 100644 index 0000000000000..ce17f71e506b2 --- /dev/null +++ b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestAwsSecurityConfig.java @@ -0,0 +1,58 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.Test; + +import java.util.Map; + +import static io.airlift.configuration.testing.ConfigAssertions.assertFullMapping; +import static io.airlift.configuration.testing.ConfigAssertions.assertRecordedDefaults; +import static io.airlift.configuration.testing.ConfigAssertions.recordDefaults; + +public class TestAwsSecurityConfig +{ + @Test + public void testDefaults() + { + assertRecordedDefaults(recordDefaults(AwsSecurityConfig.class) + .setAccessKey(null) + .setSecretKey(null) + .setRegion(null) + .setIamRole(null) + .setExternalId(null)); + } + + @Test + public void testExplicitPropertyMappings() + { + Map properties = ImmutableMap.builder() + .put("opensearch.aws.access-key", "access") + .put("opensearch.aws.secret-key", "secret") + .put("opensearch.aws.region", "region") + .put("opensearch.aws.iam-role", "iamRole") + .put("opensearch.aws.external-id", "externalId") + .buildOrThrow(); + + AwsSecurityConfig expected = new AwsSecurityConfig() + .setAccessKey("access") + .setSecretKey("secret") + .setRegion("region") + .setIamRole("iamRole") + .setExternalId("externalId"); + + assertFullMapping(properties, expected); + } +} diff --git a/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestOpenSearch2ConnectorTest.java b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestOpenSearch2ConnectorTest.java new file mode 100644 index 0000000000000..a07c25b1bb448 --- /dev/null +++ b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestOpenSearch2ConnectorTest.java @@ -0,0 +1,36 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.google.common.collect.ImmutableList; +import org.junit.jupiter.api.parallel.Isolated; + +import java.util.List; + +@Isolated +public class TestOpenSearch2ConnectorTest + extends BaseOpenSearchConnectorTest +{ + public TestOpenSearch2ConnectorTest() + { + super("opensearchproject/opensearch:2.2.0", "opensearch"); + } + + @Override + protected List largeInValuesCountData() + { + // 1000 IN fails with "Query contains too many nested clauses; maxClauseCount is set to 1024" + return ImmutableList.of(200, 500); + } +} diff --git a/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestOpenSearchConfig.java b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestOpenSearchConfig.java new file mode 100644 index 0000000000000..c563d766ea84f --- /dev/null +++ b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestOpenSearchConfig.java @@ -0,0 +1,119 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.google.common.collect.ImmutableMap; +import io.airlift.units.Duration; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Map; + +import static io.airlift.configuration.testing.ConfigAssertions.assertFullMapping; +import static io.airlift.configuration.testing.ConfigAssertions.assertRecordedDefaults; +import static io.airlift.configuration.testing.ConfigAssertions.recordDefaults; +import static io.trino.plugin.opensearch.OpenSearchConfig.Security.AWS; +import static java.util.concurrent.TimeUnit.MILLISECONDS; +import static java.util.concurrent.TimeUnit.MINUTES; +import static java.util.concurrent.TimeUnit.SECONDS; + +public class TestOpenSearchConfig +{ + @Test + public void testDefaults() + { + assertRecordedDefaults(recordDefaults(OpenSearchConfig.class) + .setHosts(null) + .setPort(9200) + .setDefaultSchema("default") + .setScrollSize(1000) + .setScrollTimeout(new Duration(1, MINUTES)) + .setRequestTimeout(new Duration(10, SECONDS)) + .setConnectTimeout(new Duration(1, SECONDS)) + .setBackoffInitDelay(new Duration(500, MILLISECONDS)) + .setBackoffMaxDelay(new Duration(20, SECONDS)) + .setMaxRetryTime(new Duration(30, SECONDS)) + .setNodeRefreshInterval(new Duration(1, MINUTES)) + .setMaxHttpConnections(25) + .setHttpThreadCount(Runtime.getRuntime().availableProcessors()) + .setTlsEnabled(false) + .setKeystorePath(null) + .setKeystorePassword(null) + .setTrustStorePath(null) + .setTruststorePassword(null) + .setVerifyHostnames(true) + .setIgnorePublishAddress(false) + .setSecurity(null)); + } + + @Test + public void testExplicitPropertyMappings() + throws IOException + { + Path keystoreFile = Files.createTempFile(null, null); + Path truststoreFile = Files.createTempFile(null, null); + + Map properties = ImmutableMap.builder() + .put("opensearch.host", "example.com") + .put("opensearch.port", "9999") + .put("opensearch.default-schema-name", "test") + .put("opensearch.scroll-size", "4000") + .put("opensearch.scroll-timeout", "20s") + .put("opensearch.request-timeout", "1s") + .put("opensearch.connect-timeout", "10s") + .put("opensearch.backoff-init-delay", "100ms") + .put("opensearch.backoff-max-delay", "15s") + .put("opensearch.max-retry-time", "10s") + .put("opensearch.node-refresh-interval", "10m") + .put("opensearch.max-http-connections", "100") + .put("opensearch.http-thread-count", "30") + .put("opensearch.tls.enabled", "true") + .put("opensearch.tls.keystore-path", keystoreFile.toString()) + .put("opensearch.tls.keystore-password", "keystore-password") + .put("opensearch.tls.truststore-path", truststoreFile.toString()) + .put("opensearch.tls.truststore-password", "truststore-password") + .put("opensearch.tls.verify-hostnames", "false") + .put("opensearch.ignore-publish-address", "true") + .put("opensearch.security", "AWS") + .buildOrThrow(); + + OpenSearchConfig expected = new OpenSearchConfig() + .setHosts(Arrays.asList("example.com")) + .setPort(9999) + .setDefaultSchema("test") + .setScrollSize(4000) + .setScrollTimeout(new Duration(20, SECONDS)) + .setRequestTimeout(new Duration(1, SECONDS)) + .setConnectTimeout(new Duration(10, SECONDS)) + .setBackoffInitDelay(new Duration(100, MILLISECONDS)) + .setBackoffMaxDelay(new Duration(15, SECONDS)) + .setMaxRetryTime(new Duration(10, SECONDS)) + .setNodeRefreshInterval(new Duration(10, MINUTES)) + .setMaxHttpConnections(100) + .setHttpThreadCount(30) + .setTlsEnabled(true) + .setKeystorePath(keystoreFile.toFile()) + .setKeystorePassword("keystore-password") + .setTrustStorePath(truststoreFile.toFile()) + .setTruststorePassword("truststore-password") + .setVerifyHostnames(false) + .setIgnorePublishAddress(true) + .setSecurity(AWS); + + assertFullMapping(properties, expected); + } +} diff --git a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/TestElasticsearchOpenSearchConnectorTest.java b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestOpenSearchConnectorTest.java similarity index 58% rename from plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/TestElasticsearchOpenSearchConnectorTest.java rename to plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestOpenSearchConnectorTest.java index ee0dd999729df..bdafc4822dd15 100644 --- a/plugin/trino-elasticsearch/src/test/java/io/trino/plugin/elasticsearch/TestElasticsearchOpenSearchConnectorTest.java +++ b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestOpenSearchConnectorTest.java @@ -11,28 +11,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.plugin.elasticsearch; +package io.trino.plugin.opensearch; -import static java.lang.String.format; - -public class TestElasticsearchOpenSearchConnectorTest - extends BaseElasticsearchConnectorTest +public class TestOpenSearchConnectorTest + extends BaseOpenSearchConnectorTest { - public TestElasticsearchOpenSearchConnectorTest() + public TestOpenSearchConnectorTest() { // 1.0.0 and 1.0.1 causes NotSslRecordException during the initialization super("opensearchproject/opensearch:1.1.0", "opensearch"); } - - @Override - protected String indexEndpoint(String index, String docId) - { - return format("/%s/_doc/%s", index, docId); - } - - @Override - protected String indexMapping(String properties) - { - return "{\"mappings\": " + properties + "}"; - } } diff --git a/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestOpenSearchLatestConnectorTest.java b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestOpenSearchLatestConnectorTest.java new file mode 100644 index 0000000000000..5fd7c1d0196ab --- /dev/null +++ b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestOpenSearchLatestConnectorTest.java @@ -0,0 +1,36 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.google.common.collect.ImmutableList; +import org.junit.jupiter.api.parallel.Isolated; + +import java.util.List; + +@Isolated +public class TestOpenSearchLatestConnectorTest + extends BaseOpenSearchConnectorTest +{ + public TestOpenSearchLatestConnectorTest() + { + super("opensearchproject/opensearch:latest", "opensearch"); + } + + @Override + protected List largeInValuesCountData() + { + // 1000 IN fails with "Query contains too many nested clauses; maxClauseCount is set to 1024" + return ImmutableList.of(200, 500); + } +} diff --git a/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestOpenSearchMetadata.java b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestOpenSearchMetadata.java new file mode 100644 index 0000000000000..2bccee951465d --- /dev/null +++ b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestOpenSearchMetadata.java @@ -0,0 +1,47 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import io.airlift.slice.Slices; +import org.junit.jupiter.api.Test; + +import java.util.Optional; + +import static org.assertj.core.api.Assertions.assertThat; + +public class TestOpenSearchMetadata +{ + @Test + public void testLikeToRegexp() + { + assertThat(likeToRegexp("a_b_c", Optional.empty())).isEqualTo("a.b.c"); + assertThat(likeToRegexp("a%b%c", Optional.empty())).isEqualTo("a.*b.*c"); + assertThat(likeToRegexp("a%b_c", Optional.empty())).isEqualTo("a.*b.c"); + assertThat(likeToRegexp("a[b", Optional.empty())).isEqualTo("a\\[b"); + assertThat(likeToRegexp("a_\\_b", Optional.of("\\"))).isEqualTo("a._b"); + assertThat(likeToRegexp("a$_b", Optional.of("$"))).isEqualTo("a_b"); + assertThat(likeToRegexp("s_.m%ex\\t", Optional.of("$"))).isEqualTo("s.\\.m.*ex\\\\t"); + assertThat(likeToRegexp("\000%", Optional.empty())).isEqualTo("\000.*"); + assertThat(likeToRegexp("\000%", Optional.of("\000"))).isEqualTo("%"); + assertThat(likeToRegexp("中文%", Optional.empty())).isEqualTo("中文.*"); + assertThat(likeToRegexp("こんにちは%", Optional.empty())).isEqualTo("こんにちは.*"); + assertThat(likeToRegexp("안녕하세요%", Optional.empty())).isEqualTo("안녕하세요.*"); + assertThat(likeToRegexp("Привет%", Optional.empty())).isEqualTo("Привет.*"); + } + + private static String likeToRegexp(String pattern, Optional escapeChar) + { + return OpenSearchMetadata.likeToRegexp(Slices.utf8Slice(pattern), escapeChar.map(Slices::utf8Slice)); + } +} diff --git a/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestOpenSearchQueryBuilder.java b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestOpenSearchQueryBuilder.java new file mode 100644 index 0000000000000..4a8407ff125dc --- /dev/null +++ b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestOpenSearchQueryBuilder.java @@ -0,0 +1,137 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import io.trino.plugin.opensearch.decoders.DoubleDecoder; +import io.trino.plugin.opensearch.decoders.IntegerDecoder; +import io.trino.plugin.opensearch.decoders.VarcharDecoder; +import io.trino.spi.predicate.Domain; +import io.trino.spi.predicate.Range; +import io.trino.spi.predicate.TupleDomain; +import io.trino.spi.predicate.ValueSet; +import org.junit.jupiter.api.Test; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.ExistsQueryBuilder; +import org.opensearch.index.query.MatchAllQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.RangeQueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; + +import java.util.Map; +import java.util.Optional; + +import static io.trino.spi.type.DoubleType.DOUBLE; +import static io.trino.spi.type.IntegerType.INTEGER; +import static io.trino.spi.type.VarcharType.VARCHAR; +import static org.assertj.core.api.Assertions.assertThat; + +public class TestOpenSearchQueryBuilder +{ + private static final OpenSearchColumnHandle NAME = new OpenSearchColumnHandle("name", VARCHAR, new VarcharDecoder.Descriptor("name"), true); + private static final OpenSearchColumnHandle AGE = new OpenSearchColumnHandle("age", INTEGER, new IntegerDecoder.Descriptor("age"), true); + private static final OpenSearchColumnHandle SCORE = new OpenSearchColumnHandle("score", DOUBLE, new DoubleDecoder.Descriptor("score"), true); + private static final OpenSearchColumnHandle LENGTH = new OpenSearchColumnHandle("length", DOUBLE, new DoubleDecoder.Descriptor("length"), true); + + @Test + public void testMatchAll() + { + assertQueryBuilder( + ImmutableMap.of(), + new MatchAllQueryBuilder()); + } + + @Test + public void testOneConstraint() + { + // SingleValue + assertQueryBuilder( + ImmutableMap.of(AGE, Domain.singleValue(INTEGER, 1L)), + new BoolQueryBuilder().filter(new TermQueryBuilder(AGE.getName(), 1L))); + + // Range + assertQueryBuilder( + ImmutableMap.of(SCORE, Domain.create(ValueSet.ofRanges(Range.range(DOUBLE, 65.0, false, 80.0, true)), false)), + new BoolQueryBuilder().filter(new RangeQueryBuilder(SCORE.getName()).gt(65.0).lte(80.0))); + + // List + assertQueryBuilder( + ImmutableMap.of(NAME, Domain.multipleValues(VARCHAR, ImmutableList.of("alice", "bob"))), + new BoolQueryBuilder().filter( + new BoolQueryBuilder() + .should(new TermQueryBuilder(NAME.getName(), "alice")) + .should(new TermQueryBuilder(NAME.getName(), "bob")))); + // all + assertQueryBuilder( + ImmutableMap.of(AGE, Domain.all(INTEGER)), + new MatchAllQueryBuilder()); + + // notNull + assertQueryBuilder( + ImmutableMap.of(AGE, Domain.notNull(INTEGER)), + new BoolQueryBuilder().filter(new ExistsQueryBuilder(AGE.getName()))); + + // isNull + assertQueryBuilder( + ImmutableMap.of(AGE, Domain.onlyNull(INTEGER)), + new BoolQueryBuilder().mustNot(new ExistsQueryBuilder(AGE.getName()))); + + // isNullAllowed + assertQueryBuilder( + ImmutableMap.of(AGE, Domain.singleValue(INTEGER, 1L, true)), + new BoolQueryBuilder().filter( + new BoolQueryBuilder() + .should(new TermQueryBuilder(AGE.getName(), 1L)) + .should(new BoolQueryBuilder().mustNot(new ExistsQueryBuilder(AGE.getName()))))); + } + + @Test + public void testMultiConstraint() + { + assertQueryBuilder( + ImmutableMap.of( + AGE, Domain.singleValue(INTEGER, 1L), + SCORE, Domain.create(ValueSet.ofRanges(Range.range(DOUBLE, 65.0, false, 80.0, true)), false)), + new BoolQueryBuilder() + .filter(new TermQueryBuilder(AGE.getName(), 1L)) + .filter(new RangeQueryBuilder(SCORE.getName()).gt(65.0).lte(80.0))); + + assertQueryBuilder( + ImmutableMap.of( + LENGTH, Domain.create(ValueSet.ofRanges(Range.range(DOUBLE, 160.0, true, 180.0, true)), false), + SCORE, Domain.create(ValueSet.ofRanges( + Range.range(DOUBLE, 65.0, false, 80.0, true), + Range.equal(DOUBLE, 90.0)), false)), + new BoolQueryBuilder() + .filter(new RangeQueryBuilder(LENGTH.getName()).gte(160.0).lte(180.0)) + .filter(new BoolQueryBuilder() + .should(new RangeQueryBuilder(SCORE.getName()).gt(65.0).lte(80.0)) + .should(new TermQueryBuilder(SCORE.getName(), 90.0)))); + + assertQueryBuilder( + ImmutableMap.of( + AGE, Domain.singleValue(INTEGER, 10L), + SCORE, Domain.onlyNull(DOUBLE)), + new BoolQueryBuilder() + .filter(new TermQueryBuilder(AGE.getName(), 10L)) + .mustNot(new ExistsQueryBuilder(SCORE.getName()))); + } + + private static void assertQueryBuilder(Map domains, QueryBuilder expected) + { + QueryBuilder actual = OpenSearchQueryBuilder.buildSearchQuery(TupleDomain.withColumnDomains(domains), Optional.empty(), Map.of()); + assertThat(actual).isEqualTo(expected); + } +} diff --git a/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestPasswordAuthentication.java b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestPasswordAuthentication.java new file mode 100644 index 0000000000000..9375ba1511f8e --- /dev/null +++ b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestPasswordAuthentication.java @@ -0,0 +1,146 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.amazonaws.util.Base64; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.io.Resources; +import com.google.common.net.HostAndPort; +import io.trino.sql.query.QueryAssertions; +import io.trino.testing.DistributedQueryRunner; +import org.apache.http.HttpHost; +import org.apache.http.impl.nio.client.HttpAsyncClientBuilder; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.parallel.Isolated; +import org.opensearch.client.Request; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestClient; +import org.opensearch.client.RestHighLevelClient; + +import java.io.File; +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; +import java.security.GeneralSecurityException; +import java.util.Optional; + +import static com.google.common.io.Resources.getResource; +import static io.airlift.testing.Closeables.closeAll; +import static io.trino.plugin.base.ssl.SslUtils.createSSLContext; +import static io.trino.plugin.opensearch.OpenSearchQueryRunner.createOpenSearchQueryRunner; +import static io.trino.plugin.opensearch.OpenSearchServer.OPENSEARCH_IMAGE; +import static java.lang.String.format; +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; + +@Isolated +@TestInstance(PER_CLASS) +public class TestPasswordAuthentication +{ + private static final String USER = "admin"; + private static final String PASSWORD = "admin"; + + private OpenSearchServer opensearch; + private RestHighLevelClient client; + private QueryAssertions assertions; + + @BeforeAll + public void setUp() + throws Exception + { + opensearch = new OpenSearchServer(OPENSEARCH_IMAGE, true, ImmutableMap.builder() + .put("opensearch.yml", loadResource("opensearch.yml")) + .put("esnode.pem", loadResource("esnode.pem")) + .put("esnode-key.pem", loadResource("esnode-key.pem")) + .put("root-ca.pem", loadResource("root-ca.pem")) + .buildOrThrow()); + + HostAndPort address = opensearch.getAddress(); + client = new RestHighLevelClient(RestClient.builder(new HttpHost(address.getHost(), address.getPort(), "https")) + .setHttpClientConfigCallback(this::setupSslContext)); + + DistributedQueryRunner runner = createOpenSearchQueryRunner( + opensearch.getAddress(), + ImmutableList.of(), + ImmutableMap.of(), + ImmutableMap.builder() + .put("opensearch.security", "PASSWORD") + .put("opensearch.auth.user", USER) + .put("opensearch.auth.password", PASSWORD) + .put("opensearch.tls.enabled", "true") + .put("opensearch.tls.verify-hostnames", "false") + .put("opensearch.tls.truststore-path", new File(getResource("truststore.jks").toURI()).getPath()) + .put("opensearch.tls.truststore-password", "123456") + .buildOrThrow(), + 3); + + assertions = new QueryAssertions(runner); + } + + private HttpAsyncClientBuilder setupSslContext(HttpAsyncClientBuilder clientBuilder) + { + try { + return clientBuilder.setSSLContext(createSSLContext( + Optional.empty(), + Optional.empty(), + Optional.of(new File(Resources.getResource("truststore.jks").toURI())), + Optional.of("123456"))); + } + catch (GeneralSecurityException | IOException | URISyntaxException e) { + throw new RuntimeException(e); + } + } + + @AfterAll + public final void destroy() + throws IOException + { + closeAll( + () -> assertions.close(), + () -> opensearch.stop(), + () -> client.close()); + + assertions = null; + opensearch = null; + client = null; + } + + @Test + public void test() + throws IOException + { + String json = new ObjectMapper().writeValueAsString(ImmutableMap.of("value", 42L)); + + Request request = new Request("POST", "/test/_doc?refresh"); + request.setJsonEntity(json); + request.setOptions(RequestOptions.DEFAULT.toBuilder() + .addHeader("Authorization", format("Basic %s", Base64.encodeAsString(format("%s:%s", USER, PASSWORD).getBytes(StandardCharsets.UTF_8))))); + client.getLowLevelClient().performRequest(request); + + assertThat(assertions.query("SELECT * FROM test")) + .matches("VALUES BIGINT '42'"); + } + + private static String loadResource(String file) + throws IOException + { + return Resources.toString(getResource(file), UTF_8); + } +} diff --git a/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestPasswordConfig.java b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestPasswordConfig.java new file mode 100644 index 0000000000000..f115069b15388 --- /dev/null +++ b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/TestPasswordConfig.java @@ -0,0 +1,49 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch; + +import com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.Test; + +import java.util.Map; + +import static io.airlift.configuration.testing.ConfigAssertions.assertFullMapping; +import static io.airlift.configuration.testing.ConfigAssertions.assertRecordedDefaults; +import static io.airlift.configuration.testing.ConfigAssertions.recordDefaults; + +public class TestPasswordConfig +{ + @Test + public void testDefaults() + { + assertRecordedDefaults(recordDefaults(PasswordConfig.class) + .setUser(null) + .setPassword(null)); + } + + @Test + public void testExplicitPropertyMappings() + { + Map properties = ImmutableMap.builder() + .put("opensearch.auth.user", "user") + .put("opensearch.auth.password", "password") + .buildOrThrow(); + + PasswordConfig expected = new PasswordConfig() + .setUser("user") + .setPassword("password"); + + assertFullMapping(properties, expected); + } +} diff --git a/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/client/TestExtractAddress.java b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/client/TestExtractAddress.java new file mode 100644 index 0000000000000..28ec3f08b0a26 --- /dev/null +++ b/plugin/trino-opensearch/src/test/java/io/trino/plugin/opensearch/client/TestExtractAddress.java @@ -0,0 +1,39 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.opensearch.client; + +import org.junit.jupiter.api.Test; + +import java.util.Optional; + +import static io.trino.plugin.opensearch.client.OpenSearchClient.extractAddress; +import static org.assertj.core.api.Assertions.assertThat; + +public class TestExtractAddress +{ + @Test + public void test() + { + assertThat(extractAddress("node/1.2.3.4:9200")).isEqualTo(Optional.of("node:9200")); + assertThat(extractAddress("1.2.3.4:9200")).isEqualTo(Optional.of("1.2.3.4:9200")); + assertThat(extractAddress("node/1.2.3.4:9200")).isEqualTo(Optional.of("node:9200")); + assertThat(extractAddress("node/[fe80::1]:9200")).isEqualTo(Optional.of("node:9200")); + assertThat(extractAddress("[fe80::1]:9200")).isEqualTo(Optional.of("[fe80::1]:9200")); + + assertThat(extractAddress("")).isEqualTo(Optional.empty()); + assertThat(extractAddress("node/1.2.3.4")).isEqualTo(Optional.empty()); + assertThat(extractAddress("node/1.2.3.4:xxxx")).isEqualTo(Optional.empty()); + assertThat(extractAddress("1.2.3.4:xxxx")).isEqualTo(Optional.empty()); + } +} diff --git a/plugin/trino-opensearch/src/test/resources/esnode-key.pem b/plugin/trino-opensearch/src/test/resources/esnode-key.pem new file mode 100644 index 0000000000000..6bbe34db2e2b2 --- /dev/null +++ b/plugin/trino-opensearch/src/test/resources/esnode-key.pem @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQC3lnDqYCsnVWLx +lokBBhM4kJsOc+Dwp7FQWvcEEDQYIPHBUDoE1b1uPO10iwDy41+vCd+L3tkt5vgz +hkU+W+AMYOLVJSDUTU1oaFT3gQdAPBLssMZJ+mrVVysxK/BOkB1r4GRCj3+DaL0/ +YiIm+VT4fmt1bQ46C9AqPtGBNjjAheOfBVvGsp8j9zs296OtUnlDLYhtEPF5bSpj +yTn0rlKuW9Vc1zYNFW+oshLMJxmiLcFCFzLKRKdNLGNXs0JGy7BfjOGJ7b8zgzBe +8phua7VvDoYw4Hda8z8uNSBWd0bOOE0DT4mbbiC+M0G3c76TNceeZMGlkw6g0Vt4 +G5jFYTIXAgMBAAECggEAJE/GvKzRzEFYxST7qy6q7ZS4fobP9nmMmjKryet0ilM1 +zgESwLMP+TZm1xbJVMFBQytGQnGM7kXRUMuddYSRj5E4PGVQFX3GskB0kdG8/+kB +6zvZtHcp21GW8u5YZQmfMB+dZQbh9zlnuRotXB0Tv+StV3f/CN8eSMTNBmwVlIpw +N//eAC3/F9VbY6L3EhtSWwcqONPITsfetvYMb3VMQNgdrm23xYHN3LIqJeDLxYO6 +AdasfwLqs1KEChy4y0+aST9bYvIC2Kgpjm6oT5cIOmMUUmr3194lDCGRtU3J+kQN +fqzqNTMnrummvd2QSzESRWlAXyleduj6gHR+W+wa9QKBgQC7LRgPHoYJzbys71i9 +owv3ZGG9KDBA3wnbu1oCklGL1V/x6fuw/OTBt7ajP/zRJKczcg7Po0Z//fL4y0PD +L5npej9+hAaeARHiiNi7Seo5Qb/DtWj+TcwkIWVr3SikzG/md29kfMIHiEpc2kHY +qYMw7aXHBLUkx1DnS3mVAQXZdQKBgQD7F4/77ktEsas7GxbFycQ7YNMs4UgxcjRa +ezGKmQEJhLJU799WWg0LYpHR1r1SzZ5rp6vqIo0JDwjssZu/Hfr4hMtsjlU3XL7Q +1Ne9C45BrmSJ/VKETUTel5FFU9K136fMmyxUhAxKJeBonHNkt8s6AeU1QX4qnynd +ZlmoM38f2wKBgCOFQjyUSVGQ9o5G+as7RukS5q1z8/xLl15YppM4WztTKdsJ0Vbc +Fzlf+MvwN7Uk5IoRyhfDX3yon7RxNpHS9b1mEqiau3q+rzszRxX7x/RHsvGb+qnP +776ni8y1cqIbndy5q60BM/0j8yC91qMcQCZH4x7qmMm6/XJ3U0JgqFCFAoGBALvn +/TvHMBLfd0pLUnaAMVdSdZbvnQxL8DwKV5/mMu2BnceMn7HKwFpV0NSdhJ9z+2uL +Vk9ove0akE45+FlFQW08S9QYjFNbF/3GWxFBlZs3vEu8BUW3YaPG/9NHELhFARlQ +wxMYou3WfKMs+fpUxUTK6FI7igSxii4DC3Oh3i/jAoGBAJxvc5WwNEyogvCJ7uSy +dVT2PUeGmMDuuEf3C1Flk/ShzetpuvmKuC8DQsbEopIE92tAWwf3gLVIfAmH6bQy +xyuMjjVdl6L37sHX4jUzdf8hw5OF45LgiKfTiBVl2PsD2UT9RJEisk7Z5IvvW45W +RxzOk4AwW2uSgJD8NGTDmwLU +-----END PRIVATE KEY----- diff --git a/plugin/trino-opensearch/src/test/resources/esnode.pem b/plugin/trino-opensearch/src/test/resources/esnode.pem new file mode 100644 index 0000000000000..6b033c7f8de27 --- /dev/null +++ b/plugin/trino-opensearch/src/test/resources/esnode.pem @@ -0,0 +1,21 @@ +-----BEGIN CERTIFICATE----- +MIIDYTCCAkmgAwIBAgIVALANPI8Y3YDiH0EsyazPC0a305zjMA0GCSqGSIb3DQEB +CwUAMDQxMjAwBgNVBAMTKUVsYXN0aWMgQ2VydGlmaWNhdGUgVG9vbCBBdXRvZ2Vu +ZXJhdGVkIENBMB4XDTIzMTIzMDE3MzIyM1oXDTI2MTIyOTE3MzIyM1owHzEdMBsG +A1UEAxMUZWxhc3RpY3NlYXJjaC1zZXJ2ZXIwggEiMA0GCSqGSIb3DQEBAQUAA4IB +DwAwggEKAoIBAQC3lnDqYCsnVWLxlokBBhM4kJsOc+Dwp7FQWvcEEDQYIPHBUDoE +1b1uPO10iwDy41+vCd+L3tkt5vgzhkU+W+AMYOLVJSDUTU1oaFT3gQdAPBLssMZJ ++mrVVysxK/BOkB1r4GRCj3+DaL0/YiIm+VT4fmt1bQ46C9AqPtGBNjjAheOfBVvG +sp8j9zs296OtUnlDLYhtEPF5bSpjyTn0rlKuW9Vc1zYNFW+oshLMJxmiLcFCFzLK +RKdNLGNXs0JGy7BfjOGJ7b8zgzBe8phua7VvDoYw4Hda8z8uNSBWd0bOOE0DT4mb +biC+M0G3c76TNceeZMGlkw6g0Vt4G5jFYTIXAgMBAAGjfzB9MB0GA1UdDgQWBBSA +l2wp8zAFBrBMH+Zf+tS0dznNnjAfBgNVHSMEGDAWgBTqhtqQcPqrVyIj2evQlUs3 +iON1BjAwBgNVHREEKTAngglsb2NhbGhvc3SHBH8AAAGCFGVsYXN0aWNzZWFyY2gt +c2VydmVyMAkGA1UdEwQCMAAwDQYJKoZIhvcNAQELBQADggEBACYQKkoSJrQsFWpR +zTD+5visP/W/wUeHdQxU5iEesLsd43r7jKkZLRJzZgDtPSfYw3LLL/dZHVCtzdUg +NtarP+EaMp5YlGyEvMRiaIy2pFajHN9+wElImQhi5iw5CY6R/YL6m4ap2mVmIeMi +nvWKYaibMzNx2wouahIrEvANb3N93lr8T5LDfLPPLXmHAa0ebk2GbWt90mVdk6UZ +QQqI0VK8YlzR9ybp8jJ5ZRgXIegtn91Tts164+61wQQ2M6fV1le41m/1NENAzkIW +Q7LJvvqe+Q3YO8yBvxYP7ru/kKgBWHPyof+6mocqirwvrVLb+MPL+sKolcuXP0sS +c52vk0Q= +-----END CERTIFICATE----- diff --git a/plugin/trino-opensearch/src/test/resources/opensearch.yml b/plugin/trino-opensearch/src/test/resources/opensearch.yml new file mode 100644 index 0000000000000..d158eb73c4b06 --- /dev/null +++ b/plugin/trino-opensearch/src/test/resources/opensearch.yml @@ -0,0 +1,32 @@ +cluster.name: docker-cluster + +# Bind to all interfaces because we don't know what IP address Docker will assign to us. +network.host: 0.0.0.0 + +# # minimum_master_nodes need to be explicitly set when bound on a public IP +# # set to 1 to allow single node clusters +# discovery.zen.minimum_master_nodes: 1 + +# Setting network.host to a non-loopback address enables the annoying bootstrap checks. "Single-node" mode disables them again. +# discovery.type: single-node + +######## Start OpenSearch Security Demo Configuration ######## +# WARNING: revise all the lines below before you go into production +plugins.security.ssl.transport.pemcert_filepath: esnode.pem +plugins.security.ssl.transport.pemkey_filepath: esnode-key.pem +plugins.security.ssl.transport.pemtrustedcas_filepath: root-ca.pem +plugins.security.ssl.transport.enforce_hostname_verification: false +plugins.security.ssl.http.enabled: true +plugins.security.ssl.http.pemcert_filepath: esnode.pem +plugins.security.ssl.http.pemkey_filepath: esnode-key.pem +plugins.security.ssl.http.pemtrustedcas_filepath: root-ca.pem +plugins.security.ssl.http.clientauth_mode: OPTIONAL +plugins.security.allow_unsafe_democertificates: true +plugins.security.allow_default_init_securityindex: true +plugins.security.audit.type: internal_opensearch +plugins.security.enable_snapshot_restore_privilege: true +plugins.security.check_snapshot_restore_write_privileges: true +plugins.security.restapi.roles_enabled: ["all_access", "security_rest_api_access"] +plugins.security.system_indices.enabled: true +plugins.security.system_indices.indices: [".plugins-ml-model", ".plugins-ml-task", ".opendistro-alerting-config", ".opendistro-alerting-alert*", ".opendistro-anomaly-results*", ".opendistro-anomaly-detector*", ".opendistro-anomaly-checkpoints", ".opendistro-anomaly-detection-state", ".opendistro-reports-*", ".opensearch-notifications-*", ".opensearch-notebooks", ".opensearch-observability", ".opendistro-asynchronous-search-response*", ".replication-metadata-store"] +node.max_local_storage_nodes: 3 \ No newline at end of file diff --git a/plugin/trino-opensearch/src/test/resources/root-ca.pem b/plugin/trino-opensearch/src/test/resources/root-ca.pem new file mode 100644 index 0000000000000..4cf446a6cc277 --- /dev/null +++ b/plugin/trino-opensearch/src/test/resources/root-ca.pem @@ -0,0 +1,20 @@ +-----BEGIN CERTIFICATE----- +MIIDSTCCAjGgAwIBAgIUByq1K6VbZFocXfpnEohIw8NgEW4wDQYJKoZIhvcNAQEL +BQAwNDEyMDAGA1UEAxMpRWxhc3RpYyBDZXJ0aWZpY2F0ZSBUb29sIEF1dG9nZW5l +cmF0ZWQgQ0EwHhcNMjMxMjMwMTczMjIxWhcNMjYxMjI5MTczMjIxWjA0MTIwMAYD +VQQDEylFbGFzdGljIENlcnRpZmljYXRlIFRvb2wgQXV0b2dlbmVyYXRlZCBDQTCC +ASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAImUzmkzHoVgREpIVHcjvtkV +oT4QjNvciH0bZOcnIUBpXq5ywBoSS3CjckF9mirkAJXZHS+M13155+JKO085Ewy7 +U684jsEVRS8GmNiKrKNYKFiwdKeQKIYtwmwgHMfJCEwcFaVOmJ2PhbiHiQj+8lMr +mRMwS+Wy7deci9uJuzykSHQ7LW7MNcZBCBkWBtctI2p4h21yYZzwn4UzsDGD7i02 +GNJ/CHG4M5QjLY9P/tCHoss9kHDUn7k/rnezk8nHZgf2XAwVYdJBbVeYvUe7HgtK ++35FeSACslOtgV2kQJpULwvh8wiqgP+/oIhNoNPW/NpyoOT4luQmJfxZV5SKJ08C +AwEAAaNTMFEwHQYDVR0OBBYEFOqG2pBw+qtXIiPZ69CVSzeI43UGMB8GA1UdIwQY +MBaAFOqG2pBw+qtXIiPZ69CVSzeI43UGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZI +hvcNAQELBQADggEBABKr0j2hk5qzWs8uWR4yqxjL4+MQTSaBtvcMHCF6w1rCCRLt +4sIz7Gy0ecnqjfXscjrrrEA4ruYP4CpAvRXyYgmJ2F1jLYrMcmYnYupCwRl88ygO +dvjVTdAjOib0NBUsE1DA8l0idFYHipCV2izpKjF/AB1HnhWm1A++pC3nZ++x7o6u +lqOuTwEMDBMnwpc+CQpQPSa9QCIl60LNpbhH+rWAL2xm5fdLV/Hs3hFQvABGorgR +78F/kBfk30c/sM2tQog7By/ic6KIEFQhFGvDz2fAqt2BAAyO22nWEQZQI+JTBAH4 +tEBodOialM5LlPFAOU0gmr6q7g8lFIkVYwxadUw= +-----END CERTIFICATE----- diff --git a/plugin/trino-opensearch/src/test/resources/serverkey.pem b/plugin/trino-opensearch/src/test/resources/serverkey.pem new file mode 100644 index 0000000000000..6bbe34db2e2b2 --- /dev/null +++ b/plugin/trino-opensearch/src/test/resources/serverkey.pem @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQC3lnDqYCsnVWLx +lokBBhM4kJsOc+Dwp7FQWvcEEDQYIPHBUDoE1b1uPO10iwDy41+vCd+L3tkt5vgz +hkU+W+AMYOLVJSDUTU1oaFT3gQdAPBLssMZJ+mrVVysxK/BOkB1r4GRCj3+DaL0/ +YiIm+VT4fmt1bQ46C9AqPtGBNjjAheOfBVvGsp8j9zs296OtUnlDLYhtEPF5bSpj +yTn0rlKuW9Vc1zYNFW+oshLMJxmiLcFCFzLKRKdNLGNXs0JGy7BfjOGJ7b8zgzBe +8phua7VvDoYw4Hda8z8uNSBWd0bOOE0DT4mbbiC+M0G3c76TNceeZMGlkw6g0Vt4 +G5jFYTIXAgMBAAECggEAJE/GvKzRzEFYxST7qy6q7ZS4fobP9nmMmjKryet0ilM1 +zgESwLMP+TZm1xbJVMFBQytGQnGM7kXRUMuddYSRj5E4PGVQFX3GskB0kdG8/+kB +6zvZtHcp21GW8u5YZQmfMB+dZQbh9zlnuRotXB0Tv+StV3f/CN8eSMTNBmwVlIpw +N//eAC3/F9VbY6L3EhtSWwcqONPITsfetvYMb3VMQNgdrm23xYHN3LIqJeDLxYO6 +AdasfwLqs1KEChy4y0+aST9bYvIC2Kgpjm6oT5cIOmMUUmr3194lDCGRtU3J+kQN +fqzqNTMnrummvd2QSzESRWlAXyleduj6gHR+W+wa9QKBgQC7LRgPHoYJzbys71i9 +owv3ZGG9KDBA3wnbu1oCklGL1V/x6fuw/OTBt7ajP/zRJKczcg7Po0Z//fL4y0PD +L5npej9+hAaeARHiiNi7Seo5Qb/DtWj+TcwkIWVr3SikzG/md29kfMIHiEpc2kHY +qYMw7aXHBLUkx1DnS3mVAQXZdQKBgQD7F4/77ktEsas7GxbFycQ7YNMs4UgxcjRa +ezGKmQEJhLJU799WWg0LYpHR1r1SzZ5rp6vqIo0JDwjssZu/Hfr4hMtsjlU3XL7Q +1Ne9C45BrmSJ/VKETUTel5FFU9K136fMmyxUhAxKJeBonHNkt8s6AeU1QX4qnynd +ZlmoM38f2wKBgCOFQjyUSVGQ9o5G+as7RukS5q1z8/xLl15YppM4WztTKdsJ0Vbc +Fzlf+MvwN7Uk5IoRyhfDX3yon7RxNpHS9b1mEqiau3q+rzszRxX7x/RHsvGb+qnP +776ni8y1cqIbndy5q60BM/0j8yC91qMcQCZH4x7qmMm6/XJ3U0JgqFCFAoGBALvn +/TvHMBLfd0pLUnaAMVdSdZbvnQxL8DwKV5/mMu2BnceMn7HKwFpV0NSdhJ9z+2uL +Vk9ove0akE45+FlFQW08S9QYjFNbF/3GWxFBlZs3vEu8BUW3YaPG/9NHELhFARlQ +wxMYou3WfKMs+fpUxUTK6FI7igSxii4DC3Oh3i/jAoGBAJxvc5WwNEyogvCJ7uSy +dVT2PUeGmMDuuEf3C1Flk/ShzetpuvmKuC8DQsbEopIE92tAWwf3gLVIfAmH6bQy +xyuMjjVdl6L37sHX4jUzdf8hw5OF45LgiKfTiBVl2PsD2UT9RJEisk7Z5IvvW45W +RxzOk4AwW2uSgJD8NGTDmwLU +-----END PRIVATE KEY----- diff --git a/plugin/trino-opensearch/src/test/resources/truststore.jks b/plugin/trino-opensearch/src/test/resources/truststore.jks new file mode 100644 index 0000000000000..7f756310f03d1 Binary files /dev/null and b/plugin/trino-opensearch/src/test/resources/truststore.jks differ diff --git a/plugin/trino-oracle/pom.xml b/plugin/trino-oracle/pom.xml index 19f7be5c195ad..614570a100129 100644 --- a/plugin/trino-oracle/pom.xml +++ b/plugin/trino-oracle/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -58,6 +58,11 @@ trino-base-jdbc + + io.trino + trino-matching + + io.trino trino-plugin-toolkit diff --git a/plugin/trino-oracle/src/main/java/io/trino/plugin/oracle/OracleClient.java b/plugin/trino-oracle/src/main/java/io/trino/plugin/oracle/OracleClient.java index 47837cff4e3ef..e621317714080 100644 --- a/plugin/trino-oracle/src/main/java/io/trino/plugin/oracle/OracleClient.java +++ b/plugin/trino-oracle/src/main/java/io/trino/plugin/oracle/OracleClient.java @@ -62,6 +62,7 @@ import io.trino.spi.connector.ConnectorSession; import io.trino.spi.connector.ConnectorTableMetadata; import io.trino.spi.connector.JoinCondition; +import io.trino.spi.expression.ConnectorExpression; import io.trino.spi.type.CharType; import io.trino.spi.type.DecimalType; import io.trino.spi.type.Decimals; @@ -231,6 +232,14 @@ public OracleClient( this.connectorExpressionRewriter = JdbcConnectorExpressionRewriterBuilder.newBuilder() .addStandardRules(this::quoted) + .withTypeClass("numeric_type", ImmutableSet.of("tinyint", "smallint", "integer", "bigint", "decimal", "real", "double")) + .map("$equal(left: numeric_type, right: numeric_type)").to("left = right") + .map("$not_equal(left: numeric_type, right: numeric_type)").to("left <> right") + .map("$less_than(left: numeric_type, right: numeric_type)").to("left < right") + .map("$less_than_or_equal(left: numeric_type, right: numeric_type)").to("left <= right") + .map("$greater_than(left: numeric_type, right: numeric_type)").to("left > right") + .map("$greater_than_or_equal(left: numeric_type, right: numeric_type)").to("left >= right") + .add(new RewriteStringComparison()) .build(); JdbcTypeHandle bigintTypeHandle = new JdbcTypeHandle(TRINO_BIGINT_TYPE, Optional.of("NUMBER"), Optional.of(0), Optional.of(0), Optional.empty(), Optional.empty()); @@ -538,6 +547,12 @@ public Optional implementAggregation(ConnectorSession session, A return aggregateFunctionRewriter.rewrite(session, aggregate, assignments); } + @Override + public Optional convertPredicate(ConnectorSession session, ConnectorExpression expression, Map assignments) + { + return connectorExpressionRewriter.rewrite(session, expression, assignments); + } + private static Optional toTypeHandle(DecimalType decimalType) { return Optional.of(new JdbcTypeHandle(OracleTypes.NUMBER, Optional.of("NUMBER"), Optional.of(decimalType.getPrecision()), Optional.of(decimalType.getScale()), Optional.empty(), Optional.empty())); diff --git a/plugin/trino-oracle/src/main/java/io/trino/plugin/oracle/RewriteStringComparison.java b/plugin/trino-oracle/src/main/java/io/trino/plugin/oracle/RewriteStringComparison.java new file mode 100644 index 0000000000000..1e3f8c73bf7d9 --- /dev/null +++ b/plugin/trino-oracle/src/main/java/io/trino/plugin/oracle/RewriteStringComparison.java @@ -0,0 +1,93 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.oracle; + +import com.google.common.collect.ImmutableList; +import io.trino.matching.Capture; +import io.trino.matching.Captures; +import io.trino.matching.Pattern; +import io.trino.plugin.base.expression.ConnectorExpressionRule; +import io.trino.plugin.jdbc.JdbcColumnHandle; +import io.trino.plugin.jdbc.QueryParameter; +import io.trino.plugin.jdbc.expression.ComparisonOperator; +import io.trino.plugin.jdbc.expression.ParameterizedExpression; +import io.trino.spi.expression.Call; +import io.trino.spi.expression.Variable; +import io.trino.spi.type.CharType; +import io.trino.spi.type.VarcharType; +import oracle.jdbc.OracleTypes; + +import java.util.Optional; +import java.util.stream.Stream; + +import static com.google.common.collect.ImmutableSet.toImmutableSet; +import static io.trino.matching.Capture.newCapture; +import static io.trino.plugin.base.expression.ConnectorExpressionPatterns.argument; +import static io.trino.plugin.base.expression.ConnectorExpressionPatterns.argumentCount; +import static io.trino.plugin.base.expression.ConnectorExpressionPatterns.call; +import static io.trino.plugin.base.expression.ConnectorExpressionPatterns.functionName; +import static io.trino.plugin.base.expression.ConnectorExpressionPatterns.type; +import static io.trino.plugin.base.expression.ConnectorExpressionPatterns.variable; +import static io.trino.spi.type.BooleanType.BOOLEAN; + +public class RewriteStringComparison + implements ConnectorExpressionRule +{ + private static final Capture FIRST_ARGUMENT = newCapture(); + private static final Capture SECOND_ARGUMENT = newCapture(); + private static final Pattern PATTERN = call() + .with(type().equalTo(BOOLEAN)) + .with(functionName().matching(Stream.of(ComparisonOperator.values()) + .filter(comparison -> comparison != ComparisonOperator.IS_DISTINCT_FROM) + .map(ComparisonOperator::getFunctionName) + .collect(toImmutableSet()) + ::contains)) + .with(argumentCount().equalTo(2)) + .with(argument(0).matching(variable().with(type().matching(type -> type instanceof CharType || type instanceof VarcharType)).capturedAs(FIRST_ARGUMENT))) + .with(argument(1).matching(variable().with(type().matching(type -> type instanceof CharType || type instanceof VarcharType)).capturedAs(SECOND_ARGUMENT))); + + @Override + public Pattern getPattern() + { + return PATTERN; + } + + @Override + public Optional rewrite(Call expression, Captures captures, RewriteContext context) + { + ComparisonOperator comparison = ComparisonOperator.forFunctionName(expression.getFunctionName()); + Variable firstArgument = captures.get(FIRST_ARGUMENT); + Variable secondArgument = captures.get(SECOND_ARGUMENT); + + if (isClob(firstArgument, context) || isClob(secondArgument, context)) { + return Optional.empty(); + } + return context.defaultRewrite(firstArgument).flatMap(first -> + context.defaultRewrite(secondArgument).map(second -> + new ParameterizedExpression( + "(%s) %s (%s)".formatted(first.expression(), comparison.getOperator(), second.expression()), + ImmutableList.builder() + .addAll(first.parameters()) + .addAll(second.parameters()) + .build()))); + } + + private static boolean isClob(Variable variable, RewriteContext context) + { + return switch (((JdbcColumnHandle) context.getAssignment(variable.getName())).getJdbcTypeHandle().getJdbcType()) { + case OracleTypes.CLOB, OracleTypes.NCLOB -> true; + default -> false; + }; + } +} diff --git a/plugin/trino-password-authenticators/pom.xml b/plugin/trino-password-authenticators/pom.xml index 6306c9f15c13c..3d0e2cbd98b79 100644 --- a/plugin/trino-password-authenticators/pom.xml +++ b/plugin/trino-password-authenticators/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -164,11 +164,5 @@ toxiproxy test - - - org.testng - testng - test - diff --git a/plugin/trino-phoenix5/pom.xml b/plugin/trino-phoenix5/pom.xml index 97a834b0a75be..89c7ec6174365 100644 --- a/plugin/trino-phoenix5/pom.xml +++ b/plugin/trino-phoenix5/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-phoenix5/src/main/java/io/trino/plugin/phoenix5/PhoenixClient.java b/plugin/trino-phoenix5/src/main/java/io/trino/plugin/phoenix5/PhoenixClient.java index 9a381e6b1043e..7bd91251c851a 100644 --- a/plugin/trino-phoenix5/src/main/java/io/trino/plugin/phoenix5/PhoenixClient.java +++ b/plugin/trino-phoenix5/src/main/java/io/trino/plugin/phoenix5/PhoenixClient.java @@ -39,6 +39,7 @@ import io.trino.plugin.jdbc.RemoteTableName; import io.trino.plugin.jdbc.WriteFunction; import io.trino.plugin.jdbc.WriteMapping; +import io.trino.plugin.jdbc.expression.ComparisonOperator; import io.trino.plugin.jdbc.expression.JdbcConnectorExpressionRewriterBuilder; import io.trino.plugin.jdbc.expression.ParameterizedExpression; import io.trino.plugin.jdbc.expression.RewriteComparison; @@ -50,6 +51,8 @@ import io.trino.spi.connector.ConnectorSession; import io.trino.spi.connector.ConnectorTableHandle; import io.trino.spi.connector.ConnectorTableMetadata; +import io.trino.spi.connector.JoinStatistics; +import io.trino.spi.connector.JoinType; import io.trino.spi.connector.SchemaNotFoundException; import io.trino.spi.connector.SchemaTableName; import io.trino.spi.expression.ConnectorExpression; @@ -246,7 +249,7 @@ public PhoenixClient(PhoenixConfig config, ConnectionFactory connectionFactory, getConnectionProperties(config).forEach((k, v) -> configuration.set((String) k, (String) v)); this.connectorExpressionRewriter = JdbcConnectorExpressionRewriterBuilder.newBuilder() .addStandardRules(this::quoted) - .add(new RewriteComparison(ImmutableSet.of(RewriteComparison.ComparisonOperator.EQUAL, RewriteComparison.ComparisonOperator.NOT_EQUAL))) + .add(new RewriteComparison(ImmutableSet.of(ComparisonOperator.EQUAL, ComparisonOperator.NOT_EQUAL))) .withTypeClass("integer_type", ImmutableSet.of("tinyint", "smallint", "integer", "bigint")) .map("$add(left: integer_type, right: integer_type)").to("left + right") .map("$subtract(left: integer_type, right: integer_type)").to("left - right") @@ -263,6 +266,21 @@ public Optional convertPredicate(ConnectorSession sessi return connectorExpressionRewriter.rewrite(session, expression, assignments); } + @Override + public Optional implementJoin( + ConnectorSession session, + JoinType joinType, + PreparedQuery leftSource, + Map leftProjections, + PreparedQuery rightSource, + Map rightProjections, + List joinConditions, + JoinStatistics statistics) + { + // Joins are currently not supported + return Optional.empty(); + } + public Connection getConnection(ConnectorSession session) throws SQLException { diff --git a/plugin/trino-pinot/pom.xml b/plugin/trino-pinot/pom.xml index 7d2e84cf60606..f530198220fb5 100755 --- a/plugin/trino-pinot/pom.xml +++ b/plugin/trino-pinot/pom.xml @@ -4,7 +4,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -15,13 +15,6 @@ ${project.parent.basedir} 0.12.1 - - instances @@ -658,12 +651,6 @@ testcontainers test - - - org.testng - testng - test - diff --git a/plugin/trino-postgresql/pom.xml b/plugin/trino-postgresql/pom.xml index cd51a4ee159ef..15e205edd9196 100644 --- a/plugin/trino-postgresql/pom.xml +++ b/plugin/trino-postgresql/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-postgresql/src/main/java/io/trino/plugin/postgresql/PostgreSqlClient.java b/plugin/trino-postgresql/src/main/java/io/trino/plugin/postgresql/PostgreSqlClient.java index affd261a925bf..d6247d866ed82 100644 --- a/plugin/trino-postgresql/src/main/java/io/trino/plugin/postgresql/PostgreSqlClient.java +++ b/plugin/trino-postgresql/src/main/java/io/trino/plugin/postgresql/PostgreSqlClient.java @@ -68,7 +68,6 @@ import io.trino.plugin.jdbc.aggregation.ImplementVarianceSamp; import io.trino.plugin.jdbc.expression.JdbcConnectorExpressionRewriterBuilder; import io.trino.plugin.jdbc.expression.ParameterizedExpression; -import io.trino.plugin.jdbc.expression.RewriteComparison; import io.trino.plugin.jdbc.expression.RewriteIn; import io.trino.plugin.jdbc.logging.RemoteQueryModifier; import io.trino.plugin.postgresql.PostgreSqlConfig.ArrayMapping; @@ -135,6 +134,7 @@ import java.util.OptionalLong; import java.util.UUID; import java.util.function.BiFunction; +import java.util.function.Predicate; import java.util.stream.Stream; import static com.google.common.base.Preconditions.checkArgument; @@ -303,12 +303,19 @@ public PostgreSqlClient( this.statisticsEnabled = statisticsConfig.isEnabled(); + Predicate pushdownWithCollateEnabled = PostgreSqlSessionProperties::isEnableStringPushdownWithCollate; this.connectorExpressionRewriter = JdbcConnectorExpressionRewriterBuilder.newBuilder() .addStandardRules(this::quoted) - // TODO allow all comparison operators for numeric types - .add(new RewriteComparison(ImmutableSet.of(RewriteComparison.ComparisonOperator.EQUAL, RewriteComparison.ComparisonOperator.NOT_EQUAL))) .add(new RewriteIn()) .withTypeClass("integer_type", ImmutableSet.of("tinyint", "smallint", "integer", "bigint")) + .withTypeClass("numeric_type", ImmutableSet.of("tinyint", "smallint", "integer", "bigint", "decimal", "real", "double")) + .map("$equal(left, right)").to("left = right") + .map("$not_equal(left, right)").to("left <> right") + .map("$is_distinct_from(left, right)").to("left IS DISTINCT FROM right") + .map("$less_than(left: numeric_type, right: numeric_type)").to("left < right") + .map("$less_than_or_equal(left: numeric_type, right: numeric_type)").to("left <= right") + .map("$greater_than(left: numeric_type, right: numeric_type)").to("left > right") + .map("$greater_than_or_equal(left: numeric_type, right: numeric_type)").to("left >= right") .map("$add(left: integer_type, right: integer_type)").to("left + right") .map("$subtract(left: integer_type, right: integer_type)").to("left - right") .map("$multiply(left: integer_type, right: integer_type)").to("left * right") @@ -321,6 +328,11 @@ public PostgreSqlClient( .map("$not(value: boolean)").to("NOT value") .map("$is_null(value)").to("value IS NULL") .map("$nullif(first, second)").to("NULLIF(first, second)") + .withTypeClass("collatable_type", ImmutableSet.of("char", "varchar")) + .when(pushdownWithCollateEnabled).map("$less_than(left: collatable_type, right: collatable_type)").to("left < right COLLATE \"C\"") + .when(pushdownWithCollateEnabled).map("$less_than_or_equal(left: collatable_type, right: collatable_type)").to("left <= right COLLATE \"C\"") + .when(pushdownWithCollateEnabled).map("$greater_than(left: collatable_type, right: collatable_type)").to("left > right COLLATE \"C\"") + .when(pushdownWithCollateEnabled).map("$greater_than_or_equal(left: collatable_type, right: collatable_type)").to("left >= right COLLATE \"C\"") .build(); JdbcTypeHandle bigintTypeHandle = new JdbcTypeHandle(Types.BIGINT, Optional.of("bigint"), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty()); @@ -1036,6 +1048,30 @@ private static Optional readRowCountTableStat(StatisticsDao statisticsDao, @Override public Optional implementJoin( + ConnectorSession session, + JoinType joinType, + PreparedQuery leftSource, + Map leftProjections, + PreparedQuery rightSource, + Map rightProjections, + List joinConditions, + JoinStatistics statistics) + { + if (joinType == JoinType.FULL_OUTER) { + // FULL JOIN is only supported with merge-joinable or hash-joinable join conditions + return Optional.empty(); + } + return implementJoinCostAware( + session, + joinType, + leftSource, + rightSource, + statistics, + () -> super.implementJoin(session, joinType, leftSource, leftProjections, rightSource, rightProjections, joinConditions, statistics)); + } + + @Override + public Optional legacyImplementJoin( ConnectorSession session, JoinType joinType, PreparedQuery leftSource, @@ -1055,7 +1091,7 @@ public Optional implementJoin( leftSource, rightSource, statistics, - () -> super.implementJoin(session, joinType, leftSource, rightSource, joinConditions, rightAssignments, leftAssignments, statistics)); + () -> super.legacyImplementJoin(session, joinType, leftSource, rightSource, joinConditions, rightAssignments, leftAssignments, statistics)); } @Override @@ -1314,7 +1350,7 @@ private ObjectReadFunction varcharMapReadFunction() varcharMapType.getValueType().writeSlice(valueBlockBuilder, utf8Slice(entry.getValue())); } } - MapBlock mapBlock = varcharMapType.createBlockFromKeyValue(Optional.empty(), new int[]{0, map.size()}, keyBlockBuilder.build(), valueBlockBuilder.build()); + MapBlock mapBlock = varcharMapType.createBlockFromKeyValue(Optional.empty(), new int[] {0, map.size()}, keyBlockBuilder.build(), valueBlockBuilder.build()); return varcharMapType.getObject(mapBlock, 0); }); } diff --git a/plugin/trino-postgresql/src/test/java/io/trino/plugin/postgresql/TestPostgreSqlClient.java b/plugin/trino-postgresql/src/test/java/io/trino/plugin/postgresql/TestPostgreSqlClient.java index fd47c631f77d3..90692c075be72 100644 --- a/plugin/trino-postgresql/src/test/java/io/trino/plugin/postgresql/TestPostgreSqlClient.java +++ b/plugin/trino-postgresql/src/test/java/io/trino/plugin/postgresql/TestPostgreSqlClient.java @@ -13,6 +13,7 @@ */ package io.trino.plugin.postgresql; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import io.trino.plugin.base.mapping.DefaultIdentifierMapping; import io.trino.plugin.jdbc.BaseJdbcConfig; @@ -33,6 +34,7 @@ import io.trino.spi.connector.ConnectorSession; import io.trino.spi.expression.ConnectorExpression; import io.trino.spi.expression.Variable; +import io.trino.spi.session.PropertyMetadata; import io.trino.spi.type.Type; import io.trino.sql.planner.ConnectorExpressionTranslator; import io.trino.sql.planner.LiteralEncoder; @@ -118,7 +120,10 @@ public class TestPostgreSqlClient private static final ConnectorSession SESSION = TestingConnectorSession .builder() - .setPropertyMetadata(new JdbcMetadataSessionProperties(new JdbcMetadataConfig(), Optional.empty()).getSessionProperties()) + .setPropertyMetadata(ImmutableList.>builder() + .addAll(new JdbcMetadataSessionProperties(new JdbcMetadataConfig(), Optional.empty()).getSessionProperties()) + .addAll(new PostgreSqlSessionProperties(new PostgreSqlConfig()).getSessionProperties()) + .build()) .build(); @Test diff --git a/plugin/trino-postgresql/src/test/java/io/trino/plugin/postgresql/TestPostgreSqlConnectorTest.java b/plugin/trino-postgresql/src/test/java/io/trino/plugin/postgresql/TestPostgreSqlConnectorTest.java index 8170cdcff5f2e..77d01fc9de471 100644 --- a/plugin/trino-postgresql/src/test/java/io/trino/plugin/postgresql/TestPostgreSqlConnectorTest.java +++ b/plugin/trino-postgresql/src/test/java/io/trino/plugin/postgresql/TestPostgreSqlConnectorTest.java @@ -15,6 +15,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import io.airlift.log.Logger; import io.airlift.units.Duration; import io.trino.Session; import io.trino.plugin.jdbc.BaseJdbcConnectorTest; @@ -80,6 +81,8 @@ public class TestPostgreSqlConnectorTest extends BaseJdbcConnectorTest { + private static final Logger log = Logger.get(TestPostgreSqlConnectorTest.class); + protected TestingPostgreSqlServer postgreSqlServer; @Override @@ -641,6 +644,8 @@ public void testStringJoinPushdownWithCollate() // inequality for (String operator : nonEqualities) { + log.info("Testing operator=%s", operator); + // bigint inequality predicate assertThat(query(withoutDynamicFiltering, format("SELECT r.name, n.name FROM nation n JOIN region r ON n.regionkey %s r.regionkey", operator))) // Currently no pushdown as inequality predicate is removed from Join to maintain Cross Join and Filter as separate nodes @@ -654,6 +659,7 @@ public void testStringJoinPushdownWithCollate() // inequality along with an equality, which constitutes an equi-condition and allows filter to remain as part of the Join for (String operator : nonEqualities) { + log.info("Testing operator=%s", operator); assertConditionallyPushedDown( session, format("SELECT n.name, c.name FROM nation n JOIN customer c ON n.nationkey = c.nationkey AND n.regionkey %s c.custkey", operator), @@ -663,6 +669,7 @@ public void testStringJoinPushdownWithCollate() // varchar inequality along with an equality, which constitutes an equi-condition and allows filter to remain as part of the Join for (String operator : nonEqualities) { + log.info("Testing operator=%s", operator); assertConditionallyPushedDown( session, format("SELECT n.name, nl.name FROM nation n JOIN %s nl ON n.regionkey = nl.regionkey AND n.name %s nl.name", nationLowercaseTable.getName(), operator), diff --git a/plugin/trino-prometheus/pom.xml b/plugin/trino-prometheus/pom.xml index 083b7ddf1644f..2b33f26c168d5 100644 --- a/plugin/trino-prometheus/pom.xml +++ b/plugin/trino-prometheus/pom.xml @@ -4,7 +4,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -257,11 +257,5 @@ testcontainers test - - - org.testng - testng - test - diff --git a/plugin/trino-prometheus/src/test/java/io/trino/plugin/prometheus/TestPrometheusIntegration.java b/plugin/trino-prometheus/src/test/java/io/trino/plugin/prometheus/TestPrometheusIntegration.java index 9e771bb78791e..0fae916c36e5f 100644 --- a/plugin/trino-prometheus/src/test/java/io/trino/plugin/prometheus/TestPrometheusIntegration.java +++ b/plugin/trino-prometheus/src/test/java/io/trino/plugin/prometheus/TestPrometheusIntegration.java @@ -30,7 +30,6 @@ import static java.util.concurrent.TimeUnit.DAYS; import static java.util.concurrent.TimeUnit.SECONDS; import static org.assertj.core.api.Assertions.assertThat; -import static org.testng.Assert.assertEquals; public class TestPrometheusIntegration extends AbstractTestQueryFramework @@ -129,7 +128,6 @@ public void testCorrectNumberOfSplitsCreated() (DynamicFilter) null, Constraint.alwaysTrue()); int numSplits = splits.getNextBatch(NUMBER_MORE_THAN_EXPECTED_NUMBER_SPLITS).getNow(null).getSplits().size(); - assertEquals(numSplits, config.getMaxQueryRangeDuration().getValue(TimeUnit.SECONDS) / config.getQueryChunkSizeDuration().getValue(TimeUnit.SECONDS), - 0.001); + assertThat((double) numSplits).isEqualTo(config.getMaxQueryRangeDuration().getValue(TimeUnit.SECONDS) / config.getQueryChunkSizeDuration().getValue(TimeUnit.SECONDS)); } } diff --git a/plugin/trino-prometheus/src/test/java/io/trino/plugin/prometheus/TestPrometheusSplit.java b/plugin/trino-prometheus/src/test/java/io/trino/plugin/prometheus/TestPrometheusSplit.java index dc156c3fe0561..4ac86b8a344e8 100644 --- a/plugin/trino-prometheus/src/test/java/io/trino/plugin/prometheus/TestPrometheusSplit.java +++ b/plugin/trino-prometheus/src/test/java/io/trino/plugin/prometheus/TestPrometheusSplit.java @@ -62,9 +62,9 @@ import static java.util.concurrent.TimeUnit.SECONDS; import static org.apache.http.client.utils.URLEncodedUtils.parse; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.data.Offset.offset; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; -import static org.testng.Assert.assertEquals; @TestInstance(PER_CLASS) @Execution(CONCURRENT) @@ -210,7 +210,7 @@ public void testQueryDividedIntoSplitsShouldHaveCorrectSpacingBetweenTimes() assertThat(paramsMap1).containsEntry("query", "up[1d]"); assertThat(paramsMap2).containsEntry("query", "up[1d]"); long diff = Double.valueOf(paramsMap2.get("time")).longValue() - Double.valueOf(paramsMap1.get("time")).longValue(); - assertEquals(config.getQueryChunkSizeDuration().getValue(TimeUnit.SECONDS), diff, 0.0001); + assertThat(config.getQueryChunkSizeDuration().getValue(TimeUnit.SECONDS)).isCloseTo(diff, offset(0.0001)); } @Test diff --git a/plugin/trino-raptor-legacy/pom.xml b/plugin/trino-raptor-legacy/pom.xml index 47e4c75853928..eeff307695e7f 100644 --- a/plugin/trino-raptor-legacy/pom.xml +++ b/plugin/trino-raptor-legacy/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -313,11 +313,5 @@ testcontainers test - - - org.testng - testng - test - diff --git a/plugin/trino-redis/pom.xml b/plugin/trino-redis/pom.xml index e85e49cda7f93..831f54a920ff4 100644 --- a/plugin/trino-redis/pom.xml +++ b/plugin/trino-redis/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-redis/src/test/java/io/trino/plugin/redis/TestMinimalFunctionality.java b/plugin/trino-redis/src/test/java/io/trino/plugin/redis/TestMinimalFunctionality.java index ac67e6d9d1086..1f85511d69450 100644 --- a/plugin/trino-redis/src/test/java/io/trino/plugin/redis/TestMinimalFunctionality.java +++ b/plugin/trino-redis/src/test/java/io/trino/plugin/redis/TestMinimalFunctionality.java @@ -18,14 +18,17 @@ import io.trino.metadata.TableHandle; import io.trino.security.AllowAllAccessControl; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.parallel.Execution; import java.util.Map; import java.util.Optional; -import static io.trino.transaction.TransactionBuilder.transaction; +import static io.trino.testing.TransactionBuilder.transaction; import static java.lang.String.format; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; +@Execution(SAME_THREAD) // clearData(), populateData() looks like shared mutable state public class TestMinimalFunctionality extends AbstractTestMinimalFunctionality { diff --git a/plugin/trino-redshift/pom.xml b/plugin/trino-redshift/pom.xml index 3535e4ab43bb9..b9defa27b006a 100644 --- a/plugin/trino-redshift/pom.xml +++ b/plugin/trino-redshift/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -21,7 +21,7 @@ com.amazon.redshift redshift-jdbc42 - 2.1.0.23 + 2.1.0.24 diff --git a/plugin/trino-redshift/src/main/java/io/trino/plugin/redshift/RedshiftClient.java b/plugin/trino-redshift/src/main/java/io/trino/plugin/redshift/RedshiftClient.java index ba04e2693f02c..c1cc81b720332 100644 --- a/plugin/trino-redshift/src/main/java/io/trino/plugin/redshift/RedshiftClient.java +++ b/plugin/trino-redshift/src/main/java/io/trino/plugin/redshift/RedshiftClient.java @@ -405,6 +405,29 @@ protected boolean isSupportedJoinCondition(ConnectorSession session, JdbcJoinCon @Override public Optional implementJoin(ConnectorSession session, + JoinType joinType, + PreparedQuery leftSource, + Map leftProjections, + PreparedQuery rightSource, + Map rightProjections, + List joinConditions, + JoinStatistics statistics) + { + if (joinType == JoinType.FULL_OUTER) { + // FULL JOIN is only supported with merge-joinable or hash-joinable join conditions + return Optional.empty(); + } + return implementJoinCostAware( + session, + joinType, + leftSource, + rightSource, + statistics, + () -> super.implementJoin(session, joinType, leftSource, leftProjections, rightSource, rightProjections, joinConditions, statistics)); + } + + @Override + public Optional legacyImplementJoin(ConnectorSession session, JoinType joinType, PreparedQuery leftSource, PreparedQuery rightSource, @@ -423,7 +446,7 @@ public Optional implementJoin(ConnectorSession session, leftSource, rightSource, statistics, - () -> super.implementJoin(session, joinType, leftSource, rightSource, joinConditions, rightAssignments, leftAssignments, statistics)); + () -> super.legacyImplementJoin(session, joinType, leftSource, rightSource, joinConditions, rightAssignments, leftAssignments, statistics)); } @Override diff --git a/plugin/trino-redshift/src/test/java/io/trino/plugin/redshift/TestRedshiftTypeMapping.java b/plugin/trino-redshift/src/test/java/io/trino/plugin/redshift/TestRedshiftTypeMapping.java index b2ea6798863b9..52a96949499cc 100644 --- a/plugin/trino-redshift/src/test/java/io/trino/plugin/redshift/TestRedshiftTypeMapping.java +++ b/plugin/trino-redshift/src/test/java/io/trino/plugin/redshift/TestRedshiftTypeMapping.java @@ -462,8 +462,7 @@ public void testTimestamp() testTimestamp(testZone); } - @Test - public void testTimestamp(ZoneId sessionZone) + private void testTimestamp(ZoneId sessionZone) { Session session = Session.builder(getSession()) .setTimeZoneKey(getTimeZoneKey(sessionZone.getId())) @@ -837,7 +836,7 @@ private void runTestCases(String tableName, List testCases) } @Test - public static void checkIllegalRedshiftTimePrecision() + public void checkIllegalRedshiftTimePrecision() { assertRedshiftCreateFails( "check_redshift_time_precision_error", @@ -846,7 +845,7 @@ public static void checkIllegalRedshiftTimePrecision() } @Test - public static void checkIllegalRedshiftTimestampPrecision() + public void checkIllegalRedshiftTimestampPrecision() { assertRedshiftCreateFails( "check_redshift_timestamp_precision_error", diff --git a/plugin/trino-resource-group-managers/pom.xml b/plugin/trino-resource-group-managers/pom.xml index 97e276cf212d6..730877fe81ce6 100644 --- a/plugin/trino-resource-group-managers/pom.xml +++ b/plugin/trino-resource-group-managers/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -257,11 +257,5 @@ testcontainers test - - - org.testng - testng - test - diff --git a/plugin/trino-session-property-managers/pom.xml b/plugin/trino-session-property-managers/pom.xml index 7838a5bb9aacb..fb09231697036 100644 --- a/plugin/trino-session-property-managers/pom.xml +++ b/plugin/trino-session-property-managers/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -180,12 +180,24 @@ io.trino trino-main test + + + io.trino + re2j + + io.trino trino-testing test + + + io.trino + re2j + + diff --git a/plugin/trino-singlestore/pom.xml b/plugin/trino-singlestore/pom.xml index 1b46a94680771..4f6a2d76605a5 100644 --- a/plugin/trino-singlestore/pom.xml +++ b/plugin/trino-singlestore/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-singlestore/src/main/java/io/trino/plugin/singlestore/SingleStoreClient.java b/plugin/trino-singlestore/src/main/java/io/trino/plugin/singlestore/SingleStoreClient.java index b715c1b293d83..2f2afd7884cb4 100644 --- a/plugin/trino-singlestore/src/main/java/io/trino/plugin/singlestore/SingleStoreClient.java +++ b/plugin/trino-singlestore/src/main/java/io/trino/plugin/singlestore/SingleStoreClient.java @@ -15,6 +15,7 @@ import com.google.common.collect.ImmutableSet; import com.google.inject.Inject; +import io.trino.plugin.base.expression.ConnectorExpressionRewriter; import io.trino.plugin.base.mapping.IdentifierMapping; import io.trino.plugin.jdbc.BaseJdbcClient; import io.trino.plugin.jdbc.BaseJdbcConfig; @@ -31,6 +32,8 @@ import io.trino.plugin.jdbc.QueryBuilder; import io.trino.plugin.jdbc.RemoteTableName; import io.trino.plugin.jdbc.WriteMapping; +import io.trino.plugin.jdbc.expression.JdbcConnectorExpressionRewriterBuilder; +import io.trino.plugin.jdbc.expression.ParameterizedExpression; import io.trino.plugin.jdbc.logging.RemoteQueryModifier; import io.trino.spi.TrinoException; import io.trino.spi.connector.AggregateFunction; @@ -40,6 +43,7 @@ import io.trino.spi.connector.JoinStatistics; import io.trino.spi.connector.JoinType; import io.trino.spi.connector.SchemaTableName; +import io.trino.spi.expression.ConnectorExpression; import io.trino.spi.type.CharType; import io.trino.spi.type.DecimalType; import io.trino.spi.type.Decimals; @@ -151,6 +155,7 @@ public class SingleStoreClient private static final Pattern UNSIGNED_TYPE_REGEX = Pattern.compile("(?i).*unsigned$"); private final Type jsonType; + private final ConnectorExpressionRewriter connectorExpressionRewriter; @Inject public SingleStoreClient( @@ -183,6 +188,19 @@ protected SingleStoreClient( super("`", connectionFactory, queryBuilder, config.getJdbcTypesMappedToVarchar(), identifierMapping, queryModifier, supportsRetries); requireNonNull(typeManager, "typeManager is null"); this.jsonType = typeManager.getType(new TypeSignature(StandardTypes.JSON)); + + this.connectorExpressionRewriter = JdbcConnectorExpressionRewriterBuilder.newBuilder() + .addStandardRules(this::quoted) + // No "real" on the list; pushdown on REAL is disabled also in toColumnMapping + .withTypeClass("numeric_type", ImmutableSet.of("tinyint", "smallint", "integer", "bigint", "decimal", "double")) + .map("$equal(left: numeric_type, right: numeric_type)").to("left = right") + .map("$not_equal(left: numeric_type, right: numeric_type)").to("left <> right") + // .map("$is_distinct_from(left: numeric_type, right: numeric_type)").to("left IS DISTINCT FROM right") + .map("$less_than(left: numeric_type, right: numeric_type)").to("left < right") + .map("$less_than_or_equal(left: numeric_type, right: numeric_type)").to("left <= right") + .map("$greater_than(left: numeric_type, right: numeric_type)").to("left > right") + .map("$greater_than_or_equal(left: numeric_type, right: numeric_type)").to("left >= right") + .build(); } @Override @@ -553,8 +571,32 @@ public boolean isTopNGuaranteed(ConnectorSession session) return true; } + @Override + public Optional convertPredicate(ConnectorSession session, ConnectorExpression expression, Map assignments) + { + return connectorExpressionRewriter.rewrite(session, expression, assignments); + } + @Override public Optional implementJoin( + ConnectorSession session, + JoinType joinType, + PreparedQuery leftSource, + Map leftProjections, + PreparedQuery rightSource, + Map rightProjections, + List joinConditions, + JoinStatistics statistics) + { + if (joinType == JoinType.FULL_OUTER) { + // Not supported in SingleStore + return Optional.empty(); + } + return super.implementJoin(session, joinType, leftSource, leftProjections, rightSource, rightProjections, joinConditions, statistics); + } + + @Override + public Optional legacyImplementJoin( ConnectorSession session, JoinType joinType, PreparedQuery leftSource, @@ -568,7 +610,7 @@ public Optional implementJoin( // Not supported in SingleStore return Optional.empty(); } - return super.implementJoin(session, joinType, leftSource, rightSource, joinConditions, rightAssignments, leftAssignments, statistics); + return super.legacyImplementJoin(session, joinType, leftSource, rightSource, joinConditions, rightAssignments, leftAssignments, statistics); } @Override diff --git a/plugin/trino-snowflake/pom.xml b/plugin/trino-snowflake/pom.xml index 33f14d2a93990..b99a536a724e6 100644 --- a/plugin/trino-snowflake/pom.xml +++ b/plugin/trino-snowflake/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -93,6 +93,12 @@ + + io.airlift + junit-extensions + test + + io.airlift testing diff --git a/plugin/trino-snowflake/src/main/java/io/trino/plugin/snowflake/SnowflakeClient.java b/plugin/trino-snowflake/src/main/java/io/trino/plugin/snowflake/SnowflakeClient.java index c68e786e2efeb..5ffc2de254b0c 100644 --- a/plugin/trino-snowflake/src/main/java/io/trino/plugin/snowflake/SnowflakeClient.java +++ b/plugin/trino-snowflake/src/main/java/io/trino/plugin/snowflake/SnowflakeClient.java @@ -17,7 +17,6 @@ import com.google.common.collect.ImmutableSet; import com.google.inject.Inject; import io.airlift.log.Logger; -import io.airlift.slice.Slices; import io.trino.plugin.base.aggregation.AggregateFunctionRewriter; import io.trino.plugin.base.aggregation.AggregateFunctionRule; import io.trino.plugin.base.expression.ConnectorExpressionRewriter; @@ -92,10 +91,10 @@ import java.util.function.BiFunction; import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.util.concurrent.MoreExecutors.directExecutor; +import static io.airlift.slice.Slices.utf8Slice; import static io.trino.plugin.jdbc.JdbcErrorCode.JDBC_ERROR; import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; -import static io.trino.spi.type.DecimalType.createDecimalType; +import static io.trino.spi.type.TimestampWithTimeZoneType.MAX_SHORT_PRECISION; import static io.trino.spi.type.TimestampWithTimeZoneType.createTimestampWithTimeZoneType; import static io.trino.spi.type.Timestamps.MILLISECONDS_PER_SECOND; import static io.trino.spi.type.Timestamps.NANOSECONDS_PER_MILLISECOND; @@ -110,11 +109,11 @@ public class SnowflakeClient /* TIME supports an optional precision parameter for fractional seconds, e.g. TIME(3). Time precision can range from 0 (seconds) to 9 (nanoseconds). The default precision is 9. All TIME values must be between 00:00:00 and 23:59:59.999999999. TIME internally stores “wallclock” time, and all operations on TIME values are performed without taking any time zone into consideration. */ - private static final int SNOWFLAKE_MAX_SUPPORTED_TIMESTAMP_PRECISION = 9; + private static final int MAX_SUPPORTED_TEMPORAL_PRECISION = 9; private static final Logger log = Logger.get(SnowflakeClient.class); - private static final DateTimeFormatter SNOWFLAKE_DATETIME_FORMATTER = DateTimeFormatter.ofPattern("y-MM-dd'T'HH:mm:ss.SSSSSSSSSXXX"); + private static final DateTimeFormatter SNOWFLAKE_DATETIME_FORMATTER = DateTimeFormatter.ofPattern("u-MM-dd'T'HH:mm:ss.SSSSSSSSSXXX"); private static final DateTimeFormatter SNOWFLAKE_DATE_FORMATTER = DateTimeFormatter.ofPattern("uuuu-MM-dd"); - private static final DateTimeFormatter SNOWFLAKE_TIMESTAMP_FORMATTER = DateTimeFormatter.ofPattern("y-MM-dd'T'HH:mm:ss.SSSSSSSSS"); + private static final DateTimeFormatter SNOWFLAKE_TIMESTAMP_FORMATTER = DateTimeFormatter.ofPattern("u-MM-dd'T'HH:mm:ss.SSSSSSSSS"); private static final DateTimeFormatter SNOWFLAKE_TIME_FORMATTER = DateTimeFormatter.ofPattern("HH:mm:ss.SSSSSSSSS"); private final AggregateFunctionRewriter aggregateFunctionRewriter; @@ -129,81 +128,6 @@ private interface ColumnMappingFunction } private static final TimeZone UTC_TZ = TimeZone.getTimeZone(ZoneId.of("UTC")); - // Mappings for JDBC column types to internal Trino types - private static final Map STANDARD_COLUMN_MAPPINGS = ImmutableMap.builder() - .put(Types.BOOLEAN, StandardColumnMappings.booleanColumnMapping()) - .put(Types.TINYINT, StandardColumnMappings.tinyintColumnMapping()) - .put(Types.SMALLINT, StandardColumnMappings.smallintColumnMapping()) - .put(Types.INTEGER, StandardColumnMappings.integerColumnMapping()) - .put(Types.BIGINT, StandardColumnMappings.bigintColumnMapping()) - .put(Types.REAL, StandardColumnMappings.realColumnMapping()) - .put(Types.DOUBLE, StandardColumnMappings.doubleColumnMapping()) - .put(Types.FLOAT, StandardColumnMappings.doubleColumnMapping()) - .put(Types.BINARY, StandardColumnMappings.varbinaryColumnMapping()) - .put(Types.VARBINARY, StandardColumnMappings.varbinaryColumnMapping()) - .put(Types.LONGVARBINARY, StandardColumnMappings.varbinaryColumnMapping()) - .buildOrThrow(); - - private static final Map SHOWFLAKE_COLUMN_MAPPINGS = ImmutableMap.builder() - .put("time", typeHandle -> Optional.of(timeColumnMapping(typeHandle))) - .put("timestampntz", typeHandle -> Optional.of(timestampColumnMapping(typeHandle))) - .put("timestamptz", typeHandle -> Optional.of(timestampTzColumnMapping(typeHandle))) - .put("timestampltz", typeHandle -> Optional.of(timestampTzColumnMapping(typeHandle))) - .put("date", typeHandle -> Optional.of(ColumnMapping.longMapping( - DateType.DATE, - (resultSet, columnIndex) -> LocalDate.ofEpochDay(resultSet.getLong(columnIndex)).toEpochDay(), - snowFlakeDateWriter()))) - .put("object", typeHandle -> Optional.of(ColumnMapping.sliceMapping( - createUnboundedVarcharType(), - StandardColumnMappings.varcharReadFunction(createUnboundedVarcharType()), - StandardColumnMappings.varcharWriteFunction(), - PredicatePushdownController.DISABLE_PUSHDOWN))) - .put("array", typeHandle -> Optional.of(ColumnMapping.sliceMapping( - createUnboundedVarcharType(), - StandardColumnMappings.varcharReadFunction(createUnboundedVarcharType()), - StandardColumnMappings.varcharWriteFunction(), - PredicatePushdownController.DISABLE_PUSHDOWN))) - .put("variant", typeHandle -> Optional.of(ColumnMapping.sliceMapping( - createUnboundedVarcharType(), - variantReadFunction(), - StandardColumnMappings.varcharWriteFunction(), - PredicatePushdownController.FULL_PUSHDOWN))) - .put("varchar", typeHandle -> Optional.of(varcharColumnMapping(typeHandle.getRequiredColumnSize()))) - .put("number", typeHandle -> { - int decimalDigits = typeHandle.getRequiredDecimalDigits(); - int precision = typeHandle.getRequiredColumnSize() + Math.max(-decimalDigits, 0); - if (precision > 38) { - return Optional.empty(); - } - return Optional.of(columnMappingPushdown( - StandardColumnMappings.decimalColumnMapping(createDecimalType(precision, Math.max(decimalDigits, 0)), RoundingMode.UNNECESSARY))); - }) - .buildOrThrow(); - - // Mappings for internal Trino types to JDBC column types - private static final Map STANDARD_WRITE_MAPPINGS = ImmutableMap.builder() - .put("BooleanType", WriteMapping.booleanMapping("boolean", StandardColumnMappings.booleanWriteFunction())) - .put("BigintType", WriteMapping.longMapping("number(19)", StandardColumnMappings.bigintWriteFunction())) - .put("IntegerType", WriteMapping.longMapping("number(10)", StandardColumnMappings.integerWriteFunction())) - .put("SmallintType", WriteMapping.longMapping("number(5)", StandardColumnMappings.smallintWriteFunction())) - .put("TinyintType", WriteMapping.longMapping("number(3)", StandardColumnMappings.tinyintWriteFunction())) - .put("DoubleType", WriteMapping.doubleMapping("double precision", StandardColumnMappings.doubleWriteFunction())) - .put("RealType", WriteMapping.longMapping("real", StandardColumnMappings.realWriteFunction())) - .put("VarbinaryType", WriteMapping.sliceMapping("varbinary", StandardColumnMappings.varbinaryWriteFunction())) - .put("DateType", WriteMapping.longMapping("date", snowFlakeDateWriter())) - .buildOrThrow(); - - private static final Map SNOWFLAKE_WRITE_MAPPINGS = ImmutableMap.builder() - .put("TimeType", type -> WriteMapping.longMapping("time", SnowflakeClient.snowFlaketimeWriter(type))) - .put("ShortTimestampType", SnowflakeClient::snowFlakeTimestampWriter) - .put("ShortTimestampWithTimeZoneType", SnowflakeClient::snowFlakeTimestampWithTZWriter) - .put("LongTimestampType", SnowflakeClient::snowFlakeTimestampWithTZWriter) - .put("LongTimestampWithTimeZoneType", SnowflakeClient::snowFlakeTimestampWithTZWriter) - .put("VarcharType", SnowflakeClient::snowFlakeVarCharWriter) - .put("CharType", SnowflakeClient::snowFlakeCharWriter) - .put("LongDecimalType", SnowflakeClient::snowFlakeDecimalWriter) - .put("ShortDecimalType", SnowflakeClient::snowFlakeDecimalWriter) - .buildOrThrow(); @Inject public SnowflakeClient( @@ -232,23 +156,9 @@ public SnowflakeClient( .build()); } - @Override - public void abortReadConnection(Connection connection, ResultSet resultSet) - throws SQLException - { - // Abort connection before closing. Without this, the Snowflake driver - // attempts to drain the connection by reading all the results. - connection.abort(directExecutor()); - } - @Override public Optional toColumnMapping(ConnectorSession session, Connection connection, JdbcTypeHandle typeHandle) { - Optional mapping = getForcedMappingToVarchar(typeHandle); - if (mapping.isPresent()) { - return mapping; - } - String jdbcTypeName = typeHandle.getJdbcTypeName() .orElseThrow(() -> new TrinoException(JDBC_ERROR, "Type name is missing: " + typeHandle)); jdbcTypeName = jdbcTypeName.toLowerCase(Locale.ENGLISH); @@ -275,18 +185,9 @@ public Optional toColumnMapping(ConnectorSession session, Connect } final Map snowflakeColumnMappings = ImmutableMap.builder() - .put("time", handle -> { - return Optional.of(timeColumnMapping(handle)); - }) - .put("date", handle -> { - return Optional.of(ColumnMapping.longMapping( - DateType.DATE, (resultSet, columnIndex) -> - LocalDate.ofEpochDay(resultSet.getLong(columnIndex)).toEpochDay(), - snowFlakeDateWriter())); - }) - .put("varchar", handle -> { - return Optional.of(varcharColumnMapping(handle.getRequiredColumnSize())); - }) + .put("time", handle -> { return Optional.of(timeColumnMapping(handle.getRequiredDecimalDigits())); }) + .put("date", handle -> { return Optional.of(ColumnMapping.longMapping(DateType.DATE, (resultSet, columnIndex) -> LocalDate.ofEpochDay(resultSet.getLong(columnIndex)).toEpochDay(), snowFlakeDateWriter())); }) + .put("varchar", handle -> { return Optional.of(varcharColumnMapping(handle.getRequiredColumnSize())); }) .put("number", handle -> { int decimalDigits = handle.getRequiredDecimalDigits(); int precision = handle.getRequiredColumnSize() + Math.max(-decimalDigits, 0); @@ -294,8 +195,7 @@ public Optional toColumnMapping(ConnectorSession session, Connect return Optional.empty(); } return Optional.of(columnMappingPushdown( - StandardColumnMappings.decimalColumnMapping(DecimalType.createDecimalType( - precision, Math.max(decimalDigits, 0)), RoundingMode.UNNECESSARY))); + StandardColumnMappings.decimalColumnMapping(DecimalType.createDecimalType(precision, Math.max(decimalDigits, 0)), RoundingMode.UNNECESSARY))); }) .buildOrThrow(); @@ -305,7 +205,7 @@ public Optional toColumnMapping(ConnectorSession session, Connect } // Code should never reach here so throw an error. - throw new TrinoException(NOT_SUPPORTED, "SNOWFLAKE_CONNECTOR_COLUMN_TYPE_NOT_SUPPORTED: Unsupported column type(" + type + "):" + jdbcTypeName); + throw new TrinoException(NOT_SUPPORTED, "Unsupported column type(" + type + "):" + jdbcTypeName); } @Override @@ -334,7 +234,7 @@ public WriteMapping toWriteMapping(ConnectorSession session, Type type) final Map snowflakeWriteMappings = ImmutableMap.builder() .put("TimeType", writeType -> { - return WriteMapping.longMapping("time", SnowflakeClient.snowFlaketimeWriter(writeType)); + return WriteMapping.longMapping("time", timeWriteFunction(((TimeType) writeType).getPrecision())); }) .put("ShortTimestampType", writeType -> { WriteMapping myMap = SnowflakeClient.snowFlakeTimestampWriter(writeType); @@ -375,7 +275,7 @@ public WriteMapping toWriteMapping(ConnectorSession session, Type type) return writeMappingFunction.convert(type); } - throw new TrinoException(NOT_SUPPORTED, "SNOWFLAKE_CONNECTOR_COLUMN_TYPE_NOT_SUPPORTED: Unsupported column type: " + type.getDisplayName() + ", simple:" + simple); + throw new TrinoException(NOT_SUPPORTED, "Unsupported column type: " + type.getDisplayName() + ", simple:" + simple); } @Override @@ -422,10 +322,9 @@ private static ColumnMapping columnMappingPushdown(ColumnMapping mapping) return new ColumnMapping(mapping.getType(), mapping.getReadFunction(), mapping.getWriteFunction(), PredicatePushdownController.FULL_PUSHDOWN); } - private static ColumnMapping timeColumnMapping(JdbcTypeHandle typeHandle) + private static ColumnMapping timeColumnMapping(int precision) { - int precision = typeHandle.getRequiredDecimalDigits(); - checkArgument(precision <= SNOWFLAKE_MAX_SUPPORTED_TIMESTAMP_PRECISION, "The max timestamp precision in Snowflake is " + SNOWFLAKE_MAX_SUPPORTED_TIMESTAMP_PRECISION); + checkArgument(precision <= MAX_SUPPORTED_TEMPORAL_PRECISION, "The max timestamp precision in Snowflake is " + MAX_SUPPORTED_TEMPORAL_PRECISION); return ColumnMapping.longMapping( TimeType.createTimeType(precision), (resultSet, columnIndex) -> { @@ -436,21 +335,15 @@ private static ColumnMapping timeColumnMapping(JdbcTypeHandle typeHandle) PredicatePushdownController.FULL_PUSHDOWN); } - private static LongWriteFunction snowFlaketimeWriter(Type type) - { - return timeWriteFunction(((TimeType) type).getPrecision()); - } - private static LongWriteFunction timeWriteFunction(int precision) { - checkArgument(precision <= SNOWFLAKE_MAX_SUPPORTED_TIMESTAMP_PRECISION, "Unsupported precision: %s", precision); - String bindExpression = format("CAST(? AS time(%s))", precision); + checkArgument(precision <= MAX_SUPPORTED_TEMPORAL_PRECISION, "Unsupported precision: %s", precision); return new LongWriteFunction() { @Override public String getBindExpression() { - return bindExpression; + return format("CAST(? AS time(%s))", precision); } @Override @@ -474,16 +367,14 @@ private static ColumnMapping timestampTzColumnMapping(JdbcTypeHandle typeHandle) String jdbcTypeName = typeHandle.getJdbcTypeName() .orElseThrow(() -> new TrinoException(JDBC_ERROR, "Type name is missing: " + typeHandle)); int type = typeHandle.getJdbcType(); - log.debug("timestampTZColumnMapping: jdbcTypeName(%s):%s precision:%s", type, jdbcTypeName, precision); - - if (precision <= 3) { + if (precision <= MAX_SHORT_PRECISION) { return ColumnMapping.longMapping( createTimestampWithTimeZoneType(precision), (resultSet, columnIndex) -> { ZonedDateTime timestamp = SNOWFLAKE_DATETIME_FORMATTER.parse(resultSet.getString(columnIndex), ZonedDateTime::from); return DateTimeEncoding.packDateTimeWithZone(timestamp.toInstant().toEpochMilli(), timestamp.getZone().getId()); }, - timestampWithTZWriter(), + timestampWithTimezoneWriteFunction(), PredicatePushdownController.FULL_PUSHDOWN); } else { @@ -568,8 +459,8 @@ private static WriteMapping snowFlakeTimestampWriter(Type type) { TimestampType timestampType = (TimestampType) type; checkArgument( - timestampType.getPrecision() <= SNOWFLAKE_MAX_SUPPORTED_TIMESTAMP_PRECISION, - "The max timestamp precision in Snowflake is " + SNOWFLAKE_MAX_SUPPORTED_TIMESTAMP_PRECISION); + timestampType.getPrecision() <= MAX_SUPPORTED_TEMPORAL_PRECISION, + "The max timestamp precision in Snowflake is " + MAX_SUPPORTED_TEMPORAL_PRECISION); if (timestampType.isShort()) { return WriteMapping.longMapping(format("timestamp_ntz(%d)", timestampType.getPrecision()), timestampWriteFunction()); @@ -593,14 +484,14 @@ private static WriteMapping snowFlakeTimestampWithTZWriter(Type type) { TimestampWithTimeZoneType timeTZType = (TimestampWithTimeZoneType) type; - checkArgument(timeTZType.getPrecision() <= SNOWFLAKE_MAX_SUPPORTED_TIMESTAMP_PRECISION, "Max Snowflake precision is is " + SNOWFLAKE_MAX_SUPPORTED_TIMESTAMP_PRECISION); + checkArgument(timeTZType.getPrecision() <= MAX_SUPPORTED_TEMPORAL_PRECISION, "Max Snowflake precision is is " + MAX_SUPPORTED_TEMPORAL_PRECISION); if (timeTZType.isShort()) { - return WriteMapping.longMapping(format("timestamp_tz(%d)", timeTZType.getPrecision()), timestampWithTZWriter()); + return WriteMapping.longMapping(format("timestamp_tz(%d)", timeTZType.getPrecision()), timestampWithTimezoneWriteFunction()); } return WriteMapping.objectMapping(format("timestamp_tz(%d)", timeTZType.getPrecision()), longTimestampWithTzWriteFunction()); } - private static LongWriteFunction timestampWithTZWriter() + private static LongWriteFunction timestampWithTimezoneWriteFunction() { return (statement, index, encodedTimeWithZone) -> { Instant instant = Instant.ofEpochMilli(DateTimeEncoding.unpackMillisUtc(encodedTimeWithZone)); diff --git a/plugin/trino-snowflake/src/main/java/io/trino/plugin/snowflake/SnowflakeClientModule.java b/plugin/trino-snowflake/src/main/java/io/trino/plugin/snowflake/SnowflakeClientModule.java index 19fc358471915..587ca8d11faab 100644 --- a/plugin/trino-snowflake/src/main/java/io/trino/plugin/snowflake/SnowflakeClientModule.java +++ b/plugin/trino-snowflake/src/main/java/io/trino/plugin/snowflake/SnowflakeClientModule.java @@ -18,6 +18,7 @@ import com.google.inject.Provides; import com.google.inject.Scopes; import com.google.inject.Singleton; +import io.opentelemetry.api.OpenTelemetry; import io.trino.plugin.jdbc.BaseJdbcConfig; import io.trino.plugin.jdbc.ConnectionFactory; import io.trino.plugin.jdbc.DriverConnectionFactory; @@ -49,7 +50,7 @@ public void configure(Binder binder) @Singleton @Provides @ForBaseJdbc - public ConnectionFactory getConnectionFactory(BaseJdbcConfig baseJdbcConfig, SnowflakeConfig snowflakeConfig, CredentialProvider credentialProvider) + public ConnectionFactory getConnectionFactory(BaseJdbcConfig baseJdbcConfig, SnowflakeConfig snowflakeConfig, CredentialProvider credentialProvider, OpenTelemetry openTelemetry) throws MalformedURLException { Properties properties = new Properties(); @@ -90,6 +91,6 @@ public ConnectionFactory getConnectionFactory(BaseJdbcConfig baseJdbcConfig, Sno } } - return new DriverConnectionFactory(new SnowflakeDriver(), baseJdbcConfig.getConnectionUrl(), properties, credentialProvider); + return new DriverConnectionFactory(new SnowflakeDriver(), baseJdbcConfig.getConnectionUrl(), properties, credentialProvider, openTelemetry); } } diff --git a/plugin/trino-snowflake/src/main/java/io/trino/plugin/snowflake/SnowflakeConfig.java b/plugin/trino-snowflake/src/main/java/io/trino/plugin/snowflake/SnowflakeConfig.java index 6dbf125201774..c002728f85b76 100644 --- a/plugin/trino-snowflake/src/main/java/io/trino/plugin/snowflake/SnowflakeConfig.java +++ b/plugin/trino-snowflake/src/main/java/io/trino/plugin/snowflake/SnowflakeConfig.java @@ -79,13 +79,6 @@ public Optional getTimestampNoTimezoneAsUTC() return Optional.ofNullable(timestampNoTimezoneAsUTC); } - @Config("snowflake.timestamp-no-timezone-as-utc") - public SnowflakeConfig setTimestampNoTimezoneAsUTC(Boolean timestampNoTimezoneAsUTC) - { - this.timestampNoTimezoneAsUTC = timestampNoTimezoneAsUTC; - return this; - } - public Optional getHTTPProxy() { return Optional.ofNullable(httpProxy); diff --git a/plugin/trino-snowflake/src/test/java/io/trino/plugin/snowflake/BaseSnowflakeConnectorTest.java b/plugin/trino-snowflake/src/test/java/io/trino/plugin/snowflake/BaseSnowflakeConnectorTest.java index b547624cebeac..0b64ddd61ee1c 100644 --- a/plugin/trino-snowflake/src/test/java/io/trino/plugin/snowflake/BaseSnowflakeConnectorTest.java +++ b/plugin/trino-snowflake/src/test/java/io/trino/plugin/snowflake/BaseSnowflakeConnectorTest.java @@ -59,12 +59,10 @@ protected boolean hasBehavior(TestingConnectorBehavior connectorBehavior) return false; case SUPPORTS_COMMENT_ON_COLUMN: case SUPPORTS_ADD_COLUMN_WITH_COMMENT: - case SUPPORTS_COMMENT_ON_TABLE: case SUPPORTS_CREATE_TABLE_WITH_TABLE_COMMENT: case SUPPORTS_CREATE_TABLE_WITH_COLUMN_COMMENT: case SUPPORTS_SET_COLUMN_TYPE: return false; - case SUPPORTS_DROP_FIELD: case SUPPORTS_ROW_TYPE: case SUPPORTS_ARRAY: return false; @@ -323,7 +321,6 @@ public void testCreateTableAsSelect() "SELECT 1234567890, 123", "SELECT count(*) + 1 FROM nation"); - // TODO: BigQuery throws table not found at BigQueryClient.insert if we reuse the same table name tableName = "test_ctas" + randomNameSuffix(); assertExplainAnalyze("EXPLAIN ANALYZE CREATE TABLE " + tableName + " AS SELECT name FROM nation"); assertQuery("SELECT * from " + tableName, "SELECT name FROM nation"); @@ -357,7 +354,6 @@ public void testCreateTable() assertQueryFails("CREATE TABLE " + tableName + " (a bad_type)", ".* Unknown type 'bad_type' for column 'a'"); assertFalse(getQueryRunner().tableExists(getSession(), tableName)); - // TODO (https://github.com/trinodb/trino/issues/5901) revert to longer name when Oracle version is updated tableName = "test_cr_not_exists_" + randomNameSuffix(); assertUpdate("CREATE TABLE " + tableName + " (a bigint, b varchar(50), c double)"); assertTrue(getQueryRunner().tableExists(getSession(), tableName)); diff --git a/plugin/trino-snowflake/src/test/java/io/trino/plugin/snowflake/SnowflakeQueryRunner.java b/plugin/trino-snowflake/src/test/java/io/trino/plugin/snowflake/SnowflakeQueryRunner.java index a50debaf003b6..2f877068f88af 100644 --- a/plugin/trino-snowflake/src/test/java/io/trino/plugin/snowflake/SnowflakeQueryRunner.java +++ b/plugin/trino-snowflake/src/test/java/io/trino/plugin/snowflake/SnowflakeQueryRunner.java @@ -36,7 +36,6 @@ public final class SnowflakeQueryRunner private SnowflakeQueryRunner() {} public static DistributedQueryRunner createSnowflakeQueryRunner( - TestingSnowflakeServer server, Map extraProperties, Map connectorProperties, Iterable> tables) @@ -85,7 +84,6 @@ public static void main(String[] args) throws Exception { DistributedQueryRunner queryRunner = createSnowflakeQueryRunner( - new TestingSnowflakeServer(), ImmutableMap.of("http-server.http.port", "8080"), ImmutableMap.of(), ImmutableList.of()); diff --git a/plugin/trino-snowflake/src/test/java/io/trino/plugin/snowflake/TestSnowflakeConfig.java b/plugin/trino-snowflake/src/test/java/io/trino/plugin/snowflake/TestSnowflakeConfig.java index eb5c32a3d063c..93b4dc8dff9b0 100644 --- a/plugin/trino-snowflake/src/test/java/io/trino/plugin/snowflake/TestSnowflakeConfig.java +++ b/plugin/trino-snowflake/src/test/java/io/trino/plugin/snowflake/TestSnowflakeConfig.java @@ -32,8 +32,7 @@ public void testDefaults() .setDatabase(null) .setRole(null) .setWarehouse(null) - .setHTTPProxy(null) - .setTimestampNoTimezoneAsUTC(null)); + .setHTTPProxy(null)); } @Test @@ -53,8 +52,7 @@ public void testExplicitPropertyMappings() .setDatabase("MYDATABASE") .setRole("MYROLE") .setWarehouse("MYWAREHOUSE") - .setHTTPProxy("MYPROXY") - .setTimestampNoTimezoneAsUTC(true); + .setHTTPProxy("MYPROXY"); assertFullMapping(properties, expected); } diff --git a/plugin/trino-snowflake/src/test/java/io/trino/plugin/snowflake/TestSnowflakeConnectorTest.java b/plugin/trino-snowflake/src/test/java/io/trino/plugin/snowflake/TestSnowflakeConnectorTest.java index 8b9b0c78c73b5..b448e5756c0b0 100644 --- a/plugin/trino-snowflake/src/test/java/io/trino/plugin/snowflake/TestSnowflakeConnectorTest.java +++ b/plugin/trino-snowflake/src/test/java/io/trino/plugin/snowflake/TestSnowflakeConnectorTest.java @@ -26,8 +26,7 @@ public class TestSnowflakeConnectorTest protected QueryRunner createQueryRunner() throws Exception { - server = closeAfterClass(new TestingSnowflakeServer()); - return createSnowflakeQueryRunner(server, ImmutableMap.of(), ImmutableMap.of(), REQUIRED_TPCH_TABLES); + return createSnowflakeQueryRunner(ImmutableMap.of(), ImmutableMap.of(), REQUIRED_TPCH_TABLES); } @Override diff --git a/plugin/trino-snowflake/src/test/java/io/trino/plugin/snowflake/TestSnowflakeTypeMapping.java b/plugin/trino-snowflake/src/test/java/io/trino/plugin/snowflake/TestSnowflakeTypeMapping.java index 5377e013a6cd8..1e7a28572b6e8 100644 --- a/plugin/trino-snowflake/src/test/java/io/trino/plugin/snowflake/TestSnowflakeTypeMapping.java +++ b/plugin/trino-snowflake/src/test/java/io/trino/plugin/snowflake/TestSnowflakeTypeMapping.java @@ -65,7 +65,6 @@ public class TestSnowflakeTypeMapping @BeforeAll public void setUp() { - String zone = jvmZone.getId(); checkState(jvmZone.getId().equals("America/Bahia_Banderas"), "Timezone not configured correctly. Add -Duser.timezone=America/Bahia_Banderas to your JVM arguments"); checkIsGap(jvmZone, LocalDate.of(1970, 1, 1)); checkIsGap(vilnius, LocalDate.of(1983, 4, 1)); @@ -76,9 +75,7 @@ public void setUp() protected QueryRunner createQueryRunner() throws Exception { - snowflakeServer = new TestingSnowflakeServer(); return createSnowflakeQueryRunner( - snowflakeServer, ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of()); @@ -322,7 +319,6 @@ private void testTimestamp(ZoneId sessionZone) .build(); SqlDataTypeTest.create() - // after epoch (MariaDb's timestamp type doesn't support values <= epoch) .addRoundTrip("timestamp(3)", "TIMESTAMP '2019-03-18 10:01:17.987'", createTimestampType(3), "TIMESTAMP '2019-03-18 10:01:17.987'") // time doubled in JVM zone .addRoundTrip("timestamp(3)", "TIMESTAMP '2018-10-28 01:33:17.456'", createTimestampType(3), "TIMESTAMP '2018-10-28 01:33:17.456'") diff --git a/plugin/trino-sqlserver/pom.xml b/plugin/trino-sqlserver/pom.xml index 7e6e87f22753a..441ee718262a0 100644 --- a/plugin/trino-sqlserver/pom.xml +++ b/plugin/trino-sqlserver/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -15,15 +15,6 @@ ${project.parent.basedir} - - - instances diff --git a/plugin/trino-sqlserver/src/main/java/io/trino/plugin/sqlserver/SqlServerClient.java b/plugin/trino-sqlserver/src/main/java/io/trino/plugin/sqlserver/SqlServerClient.java index a9dfb09b7d4a7..a148e184ce72d 100644 --- a/plugin/trino-sqlserver/src/main/java/io/trino/plugin/sqlserver/SqlServerClient.java +++ b/plugin/trino-sqlserver/src/main/java/io/trino/plugin/sqlserver/SqlServerClient.java @@ -60,9 +60,10 @@ import io.trino.plugin.jdbc.aggregation.ImplementAvgFloatingPoint; import io.trino.plugin.jdbc.aggregation.ImplementMinMax; import io.trino.plugin.jdbc.aggregation.ImplementSum; +import io.trino.plugin.jdbc.expression.ComparisonOperator; import io.trino.plugin.jdbc.expression.JdbcConnectorExpressionRewriterBuilder; import io.trino.plugin.jdbc.expression.ParameterizedExpression; -import io.trino.plugin.jdbc.expression.RewriteComparison; +import io.trino.plugin.jdbc.expression.RewriteCaseSensitiveComparison; import io.trino.plugin.jdbc.expression.RewriteIn; import io.trino.plugin.jdbc.logging.RemoteQueryModifier; import io.trino.spi.TrinoException; @@ -76,6 +77,7 @@ import io.trino.spi.connector.TableNotFoundException; import io.trino.spi.expression.ConnectorExpression; import io.trino.spi.predicate.Domain; +import io.trino.spi.predicate.Range; import io.trino.spi.predicate.ValueSet; import io.trino.spi.statistics.ColumnStatistics; import io.trino.spi.statistics.Estimate; @@ -91,6 +93,7 @@ import io.trino.spi.type.Type; import io.trino.spi.type.VarbinaryType; import io.trino.spi.type.VarcharType; +import microsoft.sql.DateTimeOffset; import org.jdbi.v3.core.Handle; import org.jdbi.v3.core.Jdbi; @@ -102,6 +105,7 @@ import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.sql.Statement; +import java.sql.Timestamp; import java.sql.Types; import java.time.Instant; import java.time.LocalDate; @@ -110,6 +114,7 @@ import java.time.ZoneId; import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -179,6 +184,7 @@ import static io.trino.spi.type.RealType.REAL; import static io.trino.spi.type.SmallintType.SMALLINT; import static io.trino.spi.type.TimeType.createTimeType; +import static io.trino.spi.type.TimeZoneKey.UTC_KEY; import static io.trino.spi.type.TimeZoneKey.getTimeZoneKey; import static io.trino.spi.type.TimestampType.MAX_SHORT_PRECISION; import static io.trino.spi.type.TimestampType.createTimestampType; @@ -199,6 +205,7 @@ import static java.lang.String.format; import static java.lang.String.join; import static java.math.RoundingMode.UNNECESSARY; +import static java.time.temporal.ChronoField.NANO_OF_SECOND; import static java.util.Objects.requireNonNull; import static java.util.stream.Collectors.joining; @@ -225,6 +232,13 @@ public class SqlServerClient private static final int MAX_SUPPORTED_TEMPORAL_PRECISION = 7; + private static final DateTimeFormatter DATE_TIME_OFFSET_FORMATTER = new DateTimeFormatterBuilder() + .appendPattern("yyyy-MM-dd HH:mm:ss") + .appendFraction(NANO_OF_SECOND, 0, MAX_SUPPORTED_TEMPORAL_PRECISION, true) + .appendPattern(" ") + .appendZoneId() + .toFormatter(); + private static final PredicatePushdownController SQLSERVER_CHARACTER_PUSHDOWN = (session, domain) -> { if (domain.isNullableSingleValue()) { return FULL_PUSHDOWN.apply(session, domain); @@ -239,12 +253,39 @@ public class SqlServerClient } // Domain#simplify can turn a discrete set into a range predicate // Push down of range predicate for varchar/char types could lead to incorrect results - // when the remote database is case insensitive + // when the remote database is case-insensitive return DISABLE_PUSHDOWN.apply(session, domain); } return FULL_PUSHDOWN.apply(session, simplifiedDomain); }; + // Dates prior to the Gregorian calendar switch in 1582 can cause incorrect results when pushed down, + // so we disable predicate push down when the domain contains values prior to 1583 + private static final Instant GREGORIAN_SWITCH_INSTANT = Instant.parse("1583-01-01T00:00:00Z"); + private static final DateTimeOffset GREGORIAN_SWITCH_DATETIMEOFFSET = DateTimeOffset.valueOf(new Timestamp(GREGORIAN_SWITCH_INSTANT.toEpochMilli()), 0); + private static final LongTimestampWithTimeZone LONG_DATETIMEOFFSET_DISABLE_VALUE = + LongTimestampWithTimeZone.fromEpochSecondsAndFraction( + GREGORIAN_SWITCH_INSTANT.getEpochSecond(), + (long) GREGORIAN_SWITCH_INSTANT.getNano() * PICOSECONDS_PER_NANOSECOND, + UTC_KEY); + private static final long SHORT_DATETIMEOFFSET_DISABLE_VALUE = GREGORIAN_SWITCH_INSTANT.toEpochMilli(); + + private static final PredicatePushdownController SQLSERVER_DATE_TIME_PUSHDOWN = (session, domain) -> { + Domain simplifiedDomain = domain.simplify(getDomainCompactionThreshold(session)); + for (Range range : simplifiedDomain.getValues().getRanges().getOrderedRanges()) { + Range disableRange = range.getType().getJavaType().equals(LongTimestampWithTimeZone.class) + ? Range.lessThan(range.getType(), LONG_DATETIMEOFFSET_DISABLE_VALUE) + : Range.lessThan(range.getType(), SHORT_DATETIMEOFFSET_DISABLE_VALUE); + + // If there is any overlap of any predicate range and (-inf, 1583), disable push down + if (range.overlaps(disableRange)) { + return DISABLE_PUSHDOWN.apply(session, domain); + } + } + + return FULL_PUSHDOWN.apply(session, domain); + }; + @Inject public SqlServerClient( BaseJdbcConfig config, @@ -260,9 +301,16 @@ public SqlServerClient( this.connectorExpressionRewriter = JdbcConnectorExpressionRewriterBuilder.newBuilder() .addStandardRules(this::quoted) - .add(new RewriteComparison(ImmutableSet.of(RewriteComparison.ComparisonOperator.EQUAL, RewriteComparison.ComparisonOperator.NOT_EQUAL))) .add(new RewriteIn()) .withTypeClass("integer_type", ImmutableSet.of("tinyint", "smallint", "integer", "bigint")) + .withTypeClass("numeric_type", ImmutableSet.of("tinyint", "smallint", "integer", "bigint", "decimal", "real", "double")) + .map("$equal(left: numeric_type, right: numeric_type)").to("left = right") + .map("$not_equal(left: numeric_type, right: numeric_type)").to("left <> right") + .map("$less_than(left: numeric_type, right: numeric_type)").to("left < right") + .map("$less_than_or_equal(left: numeric_type, right: numeric_type)").to("left <= right") + .map("$greater_than(left: numeric_type, right: numeric_type)").to("left > right") + .map("$greater_than_or_equal(left: numeric_type, right: numeric_type)").to("left >= right") + .add(new RewriteCaseSensitiveComparison(ImmutableSet.of(ComparisonOperator.EQUAL, ComparisonOperator.NOT_EQUAL))) .map("$add(left: integer_type, right: integer_type)").to("left + right") .map("$subtract(left: integer_type, right: integer_type)").to("left - right") .map("$multiply(left: integer_type, right: integer_type)").to("left * right") @@ -838,6 +886,26 @@ protected String escapeObjectNameForMetadataQuery(String name, String escape) @Override public Optional implementJoin( + ConnectorSession session, + JoinType joinType, + PreparedQuery leftSource, + Map leftProjections, + PreparedQuery rightSource, + Map rightProjections, + List joinConditions, + JoinStatistics statistics) + { + return implementJoinCostAware( + session, + joinType, + leftSource, + rightSource, + statistics, + () -> super.implementJoin(session, joinType, leftSource, leftProjections, rightSource, rightProjections, joinConditions, statistics)); + } + + @Override + public Optional legacyImplementJoin( ConnectorSession session, JoinType joinType, PreparedQuery leftSource, @@ -853,7 +921,7 @@ public Optional implementJoin( leftSource, rightSource, statistics, - () -> super.implementJoin(session, joinType, leftSource, rightSource, joinConditions, rightAssignments, leftAssignments, statistics)); + () -> super.legacyImplementJoin(session, joinType, leftSource, rightSource, joinConditions, rightAssignments, leftAssignments, statistics)); } private LongWriteFunction sqlServerTimeWriteFunction(int precision) @@ -894,19 +962,29 @@ private static ColumnMapping timestampWithTimeZoneColumnMapping(int precision) return ColumnMapping.longMapping( createTimestampWithTimeZoneType(precision), shortTimestampWithTimeZoneReadFunction(), - shortTimestampWithTimeZoneWriteFunction()); + shortTimestampWithTimeZoneWriteFunction(), + SQLSERVER_DATE_TIME_PUSHDOWN); } return ColumnMapping.objectMapping( createTimestampWithTimeZoneType(precision), longTimestampWithTimeZoneReadFunction(), - longTimestampWithTimeZoneWriteFunction()); + longTimestampWithTimeZoneWriteFunction(), + SQLSERVER_DATE_TIME_PUSHDOWN); } private static LongReadFunction shortTimestampWithTimeZoneReadFunction() { return (resultSet, columnIndex) -> { - OffsetDateTime offsetDateTime = resultSet.getObject(columnIndex, OffsetDateTime.class); - ZonedDateTime zonedDateTime = offsetDateTime.toZonedDateTime(); + ZonedDateTime zonedDateTime; + DateTimeOffset dateTimeOffset = resultSet.getObject(columnIndex, DateTimeOffset.class); + if (dateTimeOffset.compareTo(GREGORIAN_SWITCH_DATETIMEOFFSET) < 0) { + String stringValue = resultSet.getString(columnIndex); + zonedDateTime = ZonedDateTime.from(DATE_TIME_OFFSET_FORMATTER.parse(stringValue)); + } + else { + zonedDateTime = dateTimeOffset.getOffsetDateTime().toZonedDateTime(); + } + return packDateTimeWithZone(zonedDateTime.toInstant().toEpochMilli(), zonedDateTime.getZone().getId()); }; } @@ -925,7 +1003,16 @@ private static ObjectReadFunction longTimestampWithTimeZoneReadFunction() return ObjectReadFunction.of( LongTimestampWithTimeZone.class, (resultSet, columnIndex) -> { - OffsetDateTime offsetDateTime = resultSet.getObject(columnIndex, OffsetDateTime.class); + OffsetDateTime offsetDateTime; + DateTimeOffset dateTimeOffset = resultSet.getObject(columnIndex, DateTimeOffset.class); + if (dateTimeOffset.compareTo(GREGORIAN_SWITCH_DATETIMEOFFSET) < 0) { + String stringValue = resultSet.getString(columnIndex); + offsetDateTime = ZonedDateTime.from(DATE_TIME_OFFSET_FORMATTER.parse(stringValue)).toOffsetDateTime(); + } + else { + offsetDateTime = dateTimeOffset.getOffsetDateTime(); + } + return LongTimestampWithTimeZone.fromEpochSecondsAndFraction( offsetDateTime.toEpochSecond(), (long) offsetDateTime.getNano() * PICOSECONDS_PER_NANOSECOND, diff --git a/plugin/trino-sqlserver/src/test/java/io/trino/plugin/sqlserver/BaseSqlServerConnectorTest.java b/plugin/trino-sqlserver/src/test/java/io/trino/plugin/sqlserver/BaseSqlServerConnectorTest.java index 1ed1ec95b8eea..779d10220df45 100644 --- a/plugin/trino-sqlserver/src/test/java/io/trino/plugin/sqlserver/BaseSqlServerConnectorTest.java +++ b/plugin/trino-sqlserver/src/test/java/io/trino/plugin/sqlserver/BaseSqlServerConnectorTest.java @@ -24,6 +24,7 @@ import io.trino.sql.planner.plan.FilterNode; import io.trino.testing.TestingConnectorBehavior; import io.trino.testing.sql.TestTable; +import io.trino.testing.sql.TestView; import org.junit.jupiter.api.Test; import java.util.List; @@ -111,10 +112,10 @@ protected Optional filterDataMappingSmokeTestData(DataMapp @Test public void testReadFromView() { - onRemoteDatabase().execute("CREATE VIEW test_view AS SELECT * FROM orders"); - assertThat(getQueryRunner().tableExists(getSession(), "test_view")).isTrue(); - assertQuery("SELECT orderkey FROM test_view", "SELECT orderkey FROM orders"); - onRemoteDatabase().execute("DROP VIEW IF EXISTS test_view"); + try (TestView view = new TestView(onRemoteDatabase(), "test_view", "SELECT * FROM orders")) { + assertThat(getQueryRunner().tableExists(getSession(), view.getName())).isTrue(); + assertQuery("SELECT orderkey FROM " + view.getName(), "SELECT orderkey FROM orders"); + } } @Override @@ -459,31 +460,40 @@ private void testCreateWithDataCompression(DataCompression dataCompression) @Test public void testShowCreateForPartitionedTablesWithDataCompression() { - onRemoteDatabase().execute("CREATE PARTITION FUNCTION pfSales (DATE)\n" + - "AS RANGE LEFT FOR VALUES \n" + - "('2013-01-01', '2014-01-01', '2015-01-01')"); - onRemoteDatabase().execute("CREATE PARTITION SCHEME psSales\n" + - "AS PARTITION pfSales \n" + - "ALL TO ([PRIMARY])"); - onRemoteDatabase().execute("CREATE TABLE partitionedsales (\n" + - " SalesDate DATE,\n" + - " Quantity INT\n" + - ") ON psSales(SalesDate) WITH (DATA_COMPRESSION = PAGE)"); - assertThat((String) computeActual("SHOW CREATE TABLE partitionedsales").getOnlyValue()) - .matches("CREATE TABLE \\w+\\.\\w+\\.partitionedsales \\Q(\n" + - " salesdate date,\n" + - " quantity integer\n" + - ")"); - assertUpdate("DROP TABLE partitionedSales"); - onRemoteDatabase().execute("DROP PARTITION SCHEME psSales"); - onRemoteDatabase().execute("DROP PARTITION FUNCTION pfSales"); + String partitionFunction = "pfSales" + randomNameSuffix(); + String partitionScheme = "psSales" + randomNameSuffix(); + String tableName = "partitionedsales" + randomNameSuffix(); + + try { + onRemoteDatabase().execute("CREATE PARTITION FUNCTION " + partitionFunction + " (DATE)\n" + + "AS RANGE LEFT FOR VALUES \n" + + "('2013-01-01', '2014-01-01', '2015-01-01')"); + onRemoteDatabase().execute("CREATE PARTITION SCHEME " + partitionScheme + "\n" + + "AS PARTITION " + partitionFunction + " \n" + + "ALL TO ([PRIMARY])"); + onRemoteDatabase().execute("CREATE TABLE " + tableName + " (\n" + + " SalesDate DATE,\n" + + " Quantity INT\n" + + ") ON " + partitionScheme + "(SalesDate) WITH (DATA_COMPRESSION = PAGE)"); + assertThat((String) computeActual("SHOW CREATE TABLE " + tableName).getOnlyValue()) + .matches("CREATE TABLE \\w+\\.\\w+\\." + tableName + " \\Q(\n" + + " salesdate date,\n" + + " quantity integer\n" + + ")"); + } + finally { + assertUpdate("DROP TABLE IF EXISTS " + tableName); + onRemoteDatabase().execute("DROP PARTITION SCHEME " + partitionScheme); + onRemoteDatabase().execute("DROP PARTITION FUNCTION " + partitionFunction); + } } @Test public void testShowCreateForIndexedAndCompressedTable() { // SHOW CREATE doesn't expose data compression for Indexed tables - onRemoteDatabase().execute("CREATE TABLE test_show_indexed_table (\n" + + String tableName = "test_show_indexed_table" + randomNameSuffix(); + onRemoteDatabase().execute("CREATE TABLE " + tableName + " (\n" + " key1 BIGINT NOT NULL,\n" + " key2 BIGINT NOT NULL,\n" + " key3 BIGINT NOT NULL,\n" + @@ -493,23 +503,26 @@ public void testShowCreateForIndexedAndCompressedTable() " CONSTRAINT IX_IndexedTable UNIQUE (key2, key3),\n" + " INDEX IX_MyTable4 NONCLUSTERED (key4, key5))\n" + " WITH (DATA_COMPRESSION = PAGE)"); - - assertThat((String) computeActual("SHOW CREATE TABLE test_show_indexed_table").getOnlyValue()) - .isEqualTo("CREATE TABLE sqlserver.dbo.test_show_indexed_table (\n" + - " key1 bigint NOT NULL,\n" + - " key2 bigint NOT NULL,\n" + - " key3 bigint NOT NULL,\n" + - " key4 bigint NOT NULL,\n" + - " key5 bigint NOT NULL\n" + - ")"); - - assertUpdate("DROP TABLE test_show_indexed_table"); + try { + assertThat((String) computeActual("SHOW CREATE TABLE " + tableName).getOnlyValue()) + .isEqualTo("CREATE TABLE sqlserver.dbo." + tableName + " (\n" + + " key1 bigint NOT NULL,\n" + + " key2 bigint NOT NULL,\n" + + " key3 bigint NOT NULL,\n" + + " key4 bigint NOT NULL,\n" + + " key5 bigint NOT NULL\n" + + ")"); + } + finally { + assertUpdate("DROP TABLE " + tableName); + } } @Test public void testShowCreateForUniqueConstraintCompressedTable() { - onRemoteDatabase().execute("CREATE TABLE test_show_unique_constraint_table (\n" + + String tableName = "test_show_unique_constraint_table" + randomNameSuffix(); + onRemoteDatabase().execute("CREATE TABLE " + tableName + " (\n" + " key1 BIGINT NOT NULL,\n" + " key2 BIGINT NOT NULL,\n" + " key3 BIGINT NOT NULL,\n" + @@ -518,20 +531,22 @@ public void testShowCreateForUniqueConstraintCompressedTable() " UNIQUE (key1, key4),\n" + " UNIQUE (key2, key3))\n" + " WITH (DATA_COMPRESSION = PAGE)"); - - assertThat((String) computeActual("SHOW CREATE TABLE test_show_unique_constraint_table").getOnlyValue()) - .isEqualTo("CREATE TABLE sqlserver.dbo.test_show_unique_constraint_table (\n" + - " key1 bigint NOT NULL,\n" + - " key2 bigint NOT NULL,\n" + - " key3 bigint NOT NULL,\n" + - " key4 bigint NOT NULL,\n" + - " key5 bigint NOT NULL\n" + - ")\n" + - "WITH (\n" + - " data_compression = 'PAGE'\n" + - ")"); - - assertUpdate("DROP TABLE test_show_unique_constraint_table"); + try { + assertThat((String) computeActual("SHOW CREATE TABLE " + tableName).getOnlyValue()) + .isEqualTo("CREATE TABLE sqlserver.dbo." + tableName + " (\n" + + " key1 bigint NOT NULL,\n" + + " key2 bigint NOT NULL,\n" + + " key3 bigint NOT NULL,\n" + + " key4 bigint NOT NULL,\n" + + " key5 bigint NOT NULL\n" + + ")\n" + + "WITH (\n" + + " data_compression = 'PAGE'\n" + + ")"); + } + finally { + assertUpdate("DROP TABLE " + tableName); + } } @Test diff --git a/plugin/trino-sqlserver/src/test/java/io/trino/plugin/sqlserver/BaseSqlServerTypeMapping.java b/plugin/trino-sqlserver/src/test/java/io/trino/plugin/sqlserver/BaseSqlServerTypeMapping.java index f6aae0bb6f1d7..97144a7092816 100644 --- a/plugin/trino-sqlserver/src/test/java/io/trino/plugin/sqlserver/BaseSqlServerTypeMapping.java +++ b/plugin/trino-sqlserver/src/test/java/io/trino/plugin/sqlserver/BaseSqlServerTypeMapping.java @@ -34,6 +34,7 @@ import java.time.LocalDate; import java.time.LocalDateTime; import java.time.ZoneId; +import java.util.List; import java.util.function.Function; import static com.google.common.base.Preconditions.checkState; @@ -53,9 +54,11 @@ import static io.trino.spi.type.VarbinaryType.VARBINARY; import static io.trino.spi.type.VarcharType.createUnboundedVarcharType; import static io.trino.spi.type.VarcharType.createVarcharType; +import static io.trino.sql.planner.assertions.PlanMatchPattern.tableScan; import static io.trino.testing.TestingNames.randomNameSuffix; import static java.lang.String.format; import static java.time.ZoneOffset.UTC; +import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; @@ -836,6 +839,138 @@ private void testSqlServerDatetimeOffset(ZoneId sessionZone) .execute(getQueryRunner(), session, sqlServerCreateAndInsert("test_sqlserver_datetimeoffset")); } + @Test + public void testSqlServerDatetimeOffsetHistoricalDates() + { + testSqlServerDatetimeOffsetHistoricalDates(UTC); + testSqlServerDatetimeOffsetHistoricalDates(ZoneId.systemDefault()); + // using two non-JVM zones so that we don't need to worry what SQL Server system zone is + // no DST in 1970, but has DST in later years (e.g. 2018) + testSqlServerDatetimeOffsetHistoricalDates(ZoneId.of("Europe/Vilnius")); + // minutes offset change since 1970-01-01, no DST + testSqlServerDatetimeOffsetHistoricalDates(ZoneId.of("Asia/Kathmandu")); + testSqlServerDatetimeOffsetHistoricalDates(TestingSession.DEFAULT_TIME_ZONE_KEY.getZoneId()); + } + + public void testSqlServerDatetimeOffsetHistoricalDates(ZoneId sessionZone) + { + Session session = Session.builder(getSession()) + .setTimeZoneKey(TimeZoneKey.getTimeZoneKey(sessionZone.getId())) + .build(); + + SqlDataTypeTest.create() + .addRoundTrip("DATETIMEOFFSET(0)", "'1400-09-27 00:00:00+07:00'", createTimestampWithTimeZoneType(0), "TIMESTAMP '1400-09-27 00:00:00+07:00'") + .addRoundTrip("DATETIMEOFFSET(1)", "'1400-09-27 00:00:00.1+07:00'", createTimestampWithTimeZoneType(1), "TIMESTAMP '1400-09-27 00:00:00.1+07:00'") + .addRoundTrip("DATETIMEOFFSET(2)", "'1400-09-27 00:00:00.12+07:00'", createTimestampWithTimeZoneType(2), "TIMESTAMP '1400-09-27 00:00:00.12+07:00'") + .addRoundTrip("DATETIMEOFFSET(3)", "'1400-09-27 00:00:00.123+07:00'", createTimestampWithTimeZoneType(3), "TIMESTAMP '1400-09-27 00:00:00.123+07:00'") + .addRoundTrip("DATETIMEOFFSET(4)", "'1400-09-27 00:00:00.1234+07:00'", createTimestampWithTimeZoneType(4), "TIMESTAMP '1400-09-27 00:00:00.1234+07:00'") + .addRoundTrip("DATETIMEOFFSET(5)", "'1400-09-27 00:00:00.12345+07:00'", createTimestampWithTimeZoneType(5), "TIMESTAMP '1400-09-27 00:00:00.12345+07:00'") + .addRoundTrip("DATETIMEOFFSET(6)", "'1400-09-27 00:00:00.123456+07:00'", createTimestampWithTimeZoneType(6), "TIMESTAMP '1400-09-27 00:00:00.123456+07:00'") + .addRoundTrip("DATETIMEOFFSET(7)", "'1400-09-27 00:00:00.1234567+07:00'", createTimestampWithTimeZoneType(7), "TIMESTAMP '1400-09-27 00:00:00.1234567+07:00'") + .execute(getQueryRunner(), session, sqlServerCreateAndInsert("test_sqlserver_datetimeoffset_historical_date")); + } + + @Test + public void testSqlServerDatetimeOffsetHistoricalDatesRangeQuery() + { + // Tests the custom predicate push down controller for DATETIMEOFFSET types with values before and after 1583 + List dateTimeOffsetValues = List.of( + "'1400-01-01 00:00:00.1234567+00:00'", + "'1500-01-01 00:00:00.1234567+00:00'", + "'1582-12-31 23:59:59.9999999+00:00'", + "'1583-01-01 00:00:00+00:00'", + "'1583-01-01 00:00:00.1234567+00:00'", + "'1600-01-01 00:00:00.1234567+00:00'", + "'1700-01-01 00:00:00.1234567+00:00'", + "'1800-01-01 00:00:00.1234567+00:00'", + "'1900-01-01 00:00:00.1234567+00:00'"); + + try (TestTable table = new TestTable(onRemoteDatabase(), "test_sqlserver_datetimeoffset_historical_date_range_query", "(col0 datetimeoffset(7))", dateTimeOffsetValues)) { + assertThat(query("SELECT count(*) FROM " + table.getName())) + .matches("SELECT CAST(9 AS BIGINT)") + .isFullyPushedDown(); + + assertThat(query("SELECT * FROM " + table.getName() + " WHERE col0 <= TIMESTAMP '1582-12-31 23:59:59.9999999+00:00'")) + .matches(""" + VALUES (TIMESTAMP '1400-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1500-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1582-12-31 23:59:59.9999999+00:00')""") + .isNotFullyPushedDown(tableScan(table.getName())); + + assertThat(query("SELECT * FROM " + table.getName() + " WHERE col0 >= TIMESTAMP '1583-01-01 00:00:00+00:00'")) + .matches(""" + VALUES (TIMESTAMP '1583-01-01 00:00:00+00:00'), + (TIMESTAMP '1583-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1600-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1700-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1800-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1900-01-01 00:00:00.1234567+00:00')""") + .isFullyPushedDown(); + + assertThat(query("SELECT * FROM " + table.getName() + " WHERE col0 IN (TIMESTAMP '1582-12-31 23:59:59.9999999+00:00', TIMESTAMP '1583-01-01 00:00:00+00:00')")) + .matches(""" + VALUES (TIMESTAMP '1582-12-31 23:59:59.9999999+00:00'), + (TIMESTAMP '1583-01-01 00:00:00+00:00')""") + .isNotFullyPushedDown(tableScan(table.getName())); + + assertThat(query("SELECT * FROM " + table.getName() + " WHERE col0 IN (TIMESTAMP '1583-01-01 00:00:00+00:00', TIMESTAMP '1600-01-01 00:00:00.1234567+00:00')")) + .matches(""" + VALUES (TIMESTAMP '1583-01-01 00:00:00+00:00'), + (TIMESTAMP '1600-01-01 00:00:00.1234567+00:00')""") + .isFullyPushedDown(); + + assertThat(query("SELECT * FROM " + table.getName() + " WHERE col0 NOT IN (TIMESTAMP '1582-12-31 23:59:59.9999999+00:00', TIMESTAMP '1600-01-01 00:00:00.1234567+00:00')")) + .matches(""" + VALUES (TIMESTAMP '1400-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1500-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1583-01-01 00:00:00+00:00'), + (TIMESTAMP '1583-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1700-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1800-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1900-01-01 00:00:00.1234567+00:00')""") + .isNotFullyPushedDown(tableScan(table.getName())); + + assertThat(query("SELECT * FROM " + table.getName() + " WHERE col0 NOT IN (TIMESTAMP '1583-01-01 00:00:00+00:00', TIMESTAMP '1600-01-01 00:00:00.1234567+00:00')")) + .matches(""" + VALUES (TIMESTAMP '1400-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1500-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1582-12-31 23:59:59.9999999+00:00'), + (TIMESTAMP '1583-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1700-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1800-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1900-01-01 00:00:00.1234567+00:00')""") + .isNotFullyPushedDown(tableScan(table.getName())); + + assertThat(query("SELECT * FROM " + table.getName() + " WHERE col0 BETWEEN TIMESTAMP '1582-12-31 23:59:59.9999999+00:00' AND TIMESTAMP '1600-01-01 00:00:00.1234567+00:00'")) + .matches(""" + VALUES (TIMESTAMP '1582-12-31 23:59:59.9999999+00:00'), + (TIMESTAMP '1583-01-01 00:00:00+00:00'), + (TIMESTAMP '1583-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1600-01-01 00:00:00.1234567+00:00')""") + .isNotFullyPushedDown(tableScan(table.getName())); + + assertThat(query("SELECT * FROM " + table.getName() + " WHERE col0 BETWEEN TIMESTAMP '1583-01-01 00:00:00+00:00' AND TIMESTAMP '1600-01-01 00:00:00.1234567+00:00'")) + .matches(""" + VALUES (TIMESTAMP '1583-01-01 00:00:00+00:00'), + (TIMESTAMP '1583-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1600-01-01 00:00:00.1234567+00:00')""") + .isFullyPushedDown(); + + assertThat(query("SELECT * FROM " + table.getName() + " WHERE col0 <= TIMESTAMP '1990-01-01 00:00:00+00:00'")) + .matches(""" + VALUES (TIMESTAMP '1400-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1500-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1582-12-31 23:59:59.9999999+00:00'), + (TIMESTAMP '1583-01-01 00:00:00+00:00'), + (TIMESTAMP '1583-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1600-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1700-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1800-01-01 00:00:00.1234567+00:00'), + (TIMESTAMP '1900-01-01 00:00:00.1234567+00:00')""") + .isNotFullyPushedDown(tableScan(table.getName())); + } + } + protected DataSetup trinoCreateAsSelect(String tableNamePrefix) { return trinoCreateAsSelect(getSession(), tableNamePrefix); diff --git a/plugin/trino-teradata-functions/pom.xml b/plugin/trino-teradata-functions/pom.xml index 9f6091c6856fb..3246a4c5429c9 100644 --- a/plugin/trino-teradata-functions/pom.xml +++ b/plugin/trino-teradata-functions/pom.xml @@ -4,7 +4,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-thrift-api/pom.xml b/plugin/trino-thrift-api/pom.xml index 1ccccd4bb56eb..fec2de0085624 100644 --- a/plugin/trino-thrift-api/pom.xml +++ b/plugin/trino-thrift-api/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -83,12 +83,6 @@ junit-jupiter-engine test - - - org.testng - testng - test - diff --git a/plugin/trino-thrift-testing-server/pom.xml b/plugin/trino-thrift-testing-server/pom.xml index 9ac075a82a1d0..eded21b2756b2 100644 --- a/plugin/trino-thrift-testing-server/pom.xml +++ b/plugin/trino-thrift-testing-server/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-thrift/pom.xml b/plugin/trino-thrift/pom.xml index ba76784280563..0284ae57d8de9 100644 --- a/plugin/trino-thrift/pom.xml +++ b/plugin/trino-thrift/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -241,12 +241,6 @@ junit-jupiter-engine test - - - org.testng - testng - test - diff --git a/plugin/trino-tpcds/pom.xml b/plugin/trino-tpcds/pom.xml index eb484191d85b3..4d9a87f765870 100644 --- a/plugin/trino-tpcds/pom.xml +++ b/plugin/trino-tpcds/pom.xml @@ -4,7 +4,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/plugin/trino-tpch/pom.xml b/plugin/trino-tpch/pom.xml index 721574b2eab7b..5144675d01ea5 100644 --- a/plugin/trino-tpch/pom.xml +++ b/plugin/trino-tpch/pom.xml @@ -4,7 +4,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/pom.xml b/pom.xml index 97d3d383ef682..3e1c58b500535 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT pom ${project.artifactId} @@ -77,7 +77,6 @@ plugin/trino-geospatial plugin/trino-google-sheets plugin/trino-hive - plugin/trino-hive-hadoop2 plugin/trino-http-event-listener plugin/trino-hudi plugin/trino-iceberg @@ -93,6 +92,7 @@ plugin/trino-mongodb plugin/trino-mysql plugin/trino-mysql-event-listener + plugin/trino-opensearch plugin/trino-oracle plugin/trino-password-authenticators plugin/trino-phoenix5 @@ -139,12 +139,12 @@ - 17 + 21 true true true - 17.0.5 + 21.0.1 -missing ${project.basedir} 8 @@ -176,31 +176,32 @@ 1.10.2 239 4.13.1 - 14.0.1 + 14.0.2 1.11.3 - 1.12.604 + 1.12.630 4.17.0 7.5.1 87 1.21 1.0.8 2.23.0 - 10.1.0 + 10.4.1 1.43.3 - 1.4.2 - 5.13.0 + 1.4.3 + 5.14.0 + 2.12.6 0.12.3 - 3.6.0 + 3.6.1 1.9.20 3.6.0 ${dep.airlift.version} 1.13.1 3.25.1 - 2.2.19 + 2.2.20 2.1.2 2.0.62.Final 201 - 4.8.0 + 4.8.1 @@ -209,7 +210,7 @@ com.azure azure-sdk-bom - 1.2.18 + 1.2.19 pom import @@ -249,7 +250,7 @@ io.grpc grpc-bom - 1.59.1 + 1.60.1 pom import @@ -265,7 +266,7 @@ org.eclipse.jetty jetty-bom - 11.0.18 + 11.0.19 pom import @@ -273,7 +274,7 @@ org.jdbi jdbi3-bom - 3.42.0 + 3.43.0 pom import @@ -297,7 +298,7 @@ software.amazon.awssdk bom - 2.21.37 + 2.22.10 pom import @@ -305,7 +306,7 @@ com.adobe.testing s3mock-testcontainers - 3.2.0 + 3.3.0 @@ -496,13 +497,13 @@ com.github.luben zstd-jni - 1.5.5-10 + 1.5.5-11 com.github.oshi oshi-core - 6.4.7 + 6.4.10 @@ -514,7 +515,7 @@ com.google.cloud.bigdataoss gcs-connector - hadoop3-2.2.18 + 3.0.0 shaded @@ -588,13 +589,13 @@ com.nimbusds nimbus-jose-jwt - 9.37.2 + 9.37.3 com.nimbusds oauth2-oidc-sdk - 11.7 + 11.9 jdk11 @@ -637,7 +638,7 @@ commons-io commons-io - 2.15.0 + 2.15.1 @@ -827,7 +828,7 @@ io.dropwizard.metrics metrics-core - 4.2.22 + 4.2.23 @@ -932,7 +933,7 @@ io.projectreactor reactor-core - 3.4.33 + 3.4.34 @@ -1140,12 +1141,6 @@ ${project.version} - - io.trino - trino-hive-hadoop2 - ${project.version} - - io.trino trino-hudi @@ -1631,7 +1626,7 @@ net.bytebuddy byte-buddy - 1.14.10 + 1.14.11 @@ -1659,30 +1654,6 @@ 2.3 - - org.alluxio - alluxio-shaded-client - 2.9.3 - - - commons-logging - commons-logging - - - log4j - log4j - - - org.slf4j - slf4j-api - - - org.slf4j - slf4j-log4j12 - - - - org.antlr antlr4-runtime @@ -1953,7 +1924,7 @@ org.checkerframework checker-qual - 3.40.0 + 3.42.0 @@ -1965,7 +1936,7 @@ org.codehaus.plexus plexus-xml - 4.0.2 + 4.0.3 @@ -1996,7 +1967,7 @@ org.javassist javassist - 3.29.2-GA + 3.30.2-GA @@ -2026,7 +1997,7 @@ org.mariadb.jdbc mariadb-java-client - 3.3.1 + 3.3.2 @@ -2044,7 +2015,7 @@ org.postgresql postgresql - 42.7.0 + 42.7.1 @@ -2056,7 +2027,7 @@ org.roaringbitmap RoaringBitmap - 1.0.0 + 1.0.1 @@ -2065,6 +2036,12 @@ 1.13.1 + + org.threeten + threetenbp + 1.6.8 + + org.xerial.snappy snappy-java @@ -2214,6 +2191,7 @@ com/google/common/collect/Iterables.getOnlyElement:(Ljava/lang/Iterable;)Ljava/lang/Object; + com/google/common/collect/Iterables.getOnlyElement:(Ljava/lang/Iterable;Ljava/lang/Object;)Ljava/lang/Object; com/google/common/collect/Iterables.getLast:(Ljava/lang/Iterable;)Ljava/lang/Object; com/google/common/collect/Iterables.getLast:(Ljava/lang/Iterable;Ljava/lang/Object;)Ljava/lang/Object; @@ -2234,7 +2212,6 @@ com/google/common/collect/Iterables.getLast:(Ljava/lang/Iterable;)Ljava/lang/Object; com/google/common/collect/Iterables.cycle:(Ljava/lang/Iterable;)Ljava/lang/Iterable; com/google/common/collect/Iterables.cycle:([Ljava/lang/Object;)Ljava/lang/Iterable; - com/google/common/collect/Iterables.getOnlyElement:(Ljava/lang/Iterable;Ljava/lang/Object;)Ljava/lang/Object; com/google/common/io/BaseEncoding.base64:()Lcom/google/common/io/BaseEncoding; @@ -2408,80 +2385,6 @@ opencensus/proto/trace/v1/trace_config.proto - - - - io.grpc - grpc-services - - - org.alluxio - alluxio-shaded-client - - - - grpc/binlog/v1/binarylog.proto - grpc/health/v1/health.proto - grpc/reflection/v1alpha/reflection.proto - grpc/channelz/v1/channelz.proto - - - - - - com.google.android - annotations - - - org.alluxio - alluxio-shaded-client - - - - android.annotation.SuppressLint - android.annotation.TargetApi - - - - - - com.google.re2j - re2j - - - io.trino - re2j - - - - com.google.re2j - - - - - - org.alluxio - alluxio-shaded-client - - - com.google.protobuf - protobuf-java - - - - google/protobuf/any.proto - google/protobuf/api.proto - google/protobuf/descriptor.proto - google/protobuf/duration.proto - google/protobuf/empty.proto - google/protobuf/field_mask.proto - google/protobuf/source_context.proto - google/protobuf/struct.proto - google/protobuf/timestamp.proto - google/protobuf/type.proto - google/protobuf/wrappers.proto - - diff --git a/service/trino-proxy/pom.xml b/service/trino-proxy/pom.xml index c147ba4838656..b4adbcea150b8 100644 --- a/service/trino-proxy/pom.xml +++ b/service/trino-proxy/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/service/trino-verifier/pom.xml b/service/trino-verifier/pom.xml index a8ab89b6bfa68..61d627374ed71 100644 --- a/service/trino-verifier/pom.xml +++ b/service/trino-verifier/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/testing/trino-benchmark-queries/pom.xml b/testing/trino-benchmark-queries/pom.xml index 11777cbd2b339..250e5c2c991a8 100644 --- a/testing/trino-benchmark-queries/pom.xml +++ b/testing/trino-benchmark-queries/pom.xml @@ -4,7 +4,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/testing/trino-benchto-benchmarks/pom.xml b/testing/trino-benchto-benchmarks/pom.xml index 3c31fa6350dce..d4181beed75ee 100644 --- a/testing/trino-benchto-benchmarks/pom.xml +++ b/testing/trino-benchto-benchmarks/pom.xml @@ -4,7 +4,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/testing/trino-faulttolerant-tests/pom.xml b/testing/trino-faulttolerant-tests/pom.xml index bce534c1e5e3e..ebbeb2578c05c 100644 --- a/testing/trino-faulttolerant-tests/pom.xml +++ b/testing/trino-faulttolerant-tests/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -13,16 +13,6 @@ ${project.parent.basedir} - - - instances @@ -185,6 +175,12 @@ test + + io.trino + trino-filesystem + test + + io.trino trino-hdfs @@ -428,33 +424,10 @@ testcontainers test - - - org.testng - testng - test - - - org.apache.maven.plugins - maven-surefire-plugin - - - - org.apache.maven.surefire - surefire-junit-platform - ${dep.plugin.surefire.version} - - - org.apache.maven.surefire - surefire-testng - ${dep.plugin.surefire.version} - - - org.basepom.maven duplicate-finder-maven-plugin diff --git a/testing/trino-faulttolerant-tests/src/test/java/io/trino/faulttolerant/TestFaultTolerantExecutionDynamicFiltering.java b/testing/trino-faulttolerant-tests/src/test/java/io/trino/faulttolerant/TestFaultTolerantExecutionDynamicFiltering.java index 9247c1ce3c60f..facbec507a3c2 100644 --- a/testing/trino-faulttolerant-tests/src/test/java/io/trino/faulttolerant/TestFaultTolerantExecutionDynamicFiltering.java +++ b/testing/trino-faulttolerant-tests/src/test/java/io/trino/faulttolerant/TestFaultTolerantExecutionDynamicFiltering.java @@ -25,7 +25,9 @@ import io.trino.testing.FaultTolerantExecutionConnectorTestHelper; import io.trino.testing.QueryRunner; import io.trino.testing.TestingMetadata; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; +import org.junit.jupiter.api.parallel.Execution; import java.util.Set; @@ -34,8 +36,9 @@ import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.sql.planner.OptimizerConfig.JoinDistributionType; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; -@Test(singleThreaded = true) +@Execution(SAME_THREAD) public class TestFaultTolerantExecutionDynamicFiltering extends AbstractTestCoordinatorDynamicFiltering { @@ -71,8 +74,7 @@ protected RetryPolicy getRetryPolicy() // results in each instance of DynamicFilterSourceOperator receiving fewer input rows. Therefore, testing max-distinct-values-per-driver // requires larger build side and the assertions on the collected domain are adjusted for multiple ranges instead of single range. @Override - @Test(timeOut = 30_000, dataProvider = "testJoinDistributionType") - public void testSemiJoinWithNonSelectiveBuildSide(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) + protected void testSemiJoinWithNonSelectiveBuildSide(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) { assertQueryDynamicFilters( noJoinReordering(joinDistributionType, coordinatorDynamicFiltersDistribution), @@ -90,8 +92,7 @@ public void testSemiJoinWithNonSelectiveBuildSide(JoinDistributionType joinDistr } @Override - @Test(timeOut = 30_000, dataProvider = "testJoinDistributionType") - public void testJoinWithNonSelectiveBuildSide(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) + protected void testJoinWithNonSelectiveBuildSide(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) { assertQueryDynamicFilters( noJoinReordering(joinDistributionType, coordinatorDynamicFiltersDistribution), @@ -109,7 +110,8 @@ public void testJoinWithNonSelectiveBuildSide(JoinDistributionType joinDistribut } @Override - @Test(timeOut = 30_000) + @Test + @Timeout(30) public void testRightJoinWithNonSelectiveBuildSide() { assertQueryDynamicFilters( diff --git a/testing/trino-faulttolerant-tests/src/test/java/io/trino/faulttolerant/TestOverridePartitionCountRecursively.java b/testing/trino-faulttolerant-tests/src/test/java/io/trino/faulttolerant/TestOverridePartitionCountRecursively.java index e73ec67fafe46..98c6fb53f33bd 100644 --- a/testing/trino-faulttolerant-tests/src/test/java/io/trino/faulttolerant/TestOverridePartitionCountRecursively.java +++ b/testing/trino-faulttolerant-tests/src/test/java/io/trino/faulttolerant/TestOverridePartitionCountRecursively.java @@ -57,8 +57,8 @@ import static io.trino.sql.planner.SystemPartitioningHandle.SINGLE_DISTRIBUTION; import static io.trino.sql.planner.SystemPartitioningHandle.SOURCE_DISTRIBUTION; import static io.trino.sql.planner.TopologicalOrderSubPlanVisitor.sortPlanInTopologicalOrder; +import static io.trino.testing.TransactionBuilder.transaction; import static io.trino.tpch.TpchTable.getTables; -import static io.trino.transaction.TransactionBuilder.transaction; import static java.util.Objects.requireNonNull; import static org.assertj.core.api.Assertions.assertThat; diff --git a/testing/trino-faulttolerant-tests/src/test/java/io/trino/faulttolerant/hive/TestHiveFaultTolerantExecutionAggregations.java b/testing/trino-faulttolerant-tests/src/test/java/io/trino/faulttolerant/hive/TestHiveFaultTolerantExecutionAggregations.java index 19dcd88fbaacd..da94ddc79b672 100644 --- a/testing/trino-faulttolerant-tests/src/test/java/io/trino/faulttolerant/hive/TestHiveFaultTolerantExecutionAggregations.java +++ b/testing/trino-faulttolerant-tests/src/test/java/io/trino/faulttolerant/hive/TestHiveFaultTolerantExecutionAggregations.java @@ -26,7 +26,6 @@ import static io.trino.plugin.exchange.filesystem.containers.MinioStorage.getExchangeManagerProperties; import static io.trino.testing.TestingNames.randomNameSuffix; -import static io.trino.tpch.TpchTable.getTables; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; @TestInstance(PER_CLASS) @@ -49,7 +48,7 @@ protected QueryRunner createQueryRunner(Map extraProperties) runner.installPlugin(new FileSystemExchangePlugin()); runner.loadExchangeManager("filesystem", getExchangeManagerProperties(minioStorage)); }) - .setInitialTables(getTables()) + .setInitialTables(REQUIRED_TPCH_TABLES) .build(); } diff --git a/testing/trino-faulttolerant-tests/src/test/java/io/trino/faulttolerant/hive/TestHiveRuntimeAdaptivePartitioningFaultTolerantExecutionAggregations.java b/testing/trino-faulttolerant-tests/src/test/java/io/trino/faulttolerant/hive/TestHiveRuntimeAdaptivePartitioningFaultTolerantExecutionAggregations.java index e09a4ddff0c3d..55765c94c309c 100644 --- a/testing/trino-faulttolerant-tests/src/test/java/io/trino/faulttolerant/hive/TestHiveRuntimeAdaptivePartitioningFaultTolerantExecutionAggregations.java +++ b/testing/trino-faulttolerant-tests/src/test/java/io/trino/faulttolerant/hive/TestHiveRuntimeAdaptivePartitioningFaultTolerantExecutionAggregations.java @@ -22,8 +22,6 @@ import java.util.Map; -import static io.trino.tpch.TpchTable.getTables; - public class TestHiveRuntimeAdaptivePartitioningFaultTolerantExecutionAggregations extends AbstractTestFaultTolerantExecutionAggregations { @@ -42,7 +40,7 @@ protected QueryRunner createQueryRunner(Map extraProperties) runner.loadExchangeManager("filesystem", ImmutableMap.of("exchange.base-directories", System.getProperty("java.io.tmpdir") + "/trino-local-file-system-exchange-manager")); }) - .setInitialTables(getTables()) + .setInitialTables(REQUIRED_TPCH_TABLES) .build(); } } diff --git a/testing/trino-faulttolerant-tests/src/test/java/io/trino/faulttolerant/iceberg/TestIcebergParquetFaultTolerantExecutionConnectorTest.java b/testing/trino-faulttolerant-tests/src/test/java/io/trino/faulttolerant/iceberg/TestIcebergParquetFaultTolerantExecutionConnectorTest.java index 32d6c5d0a2db8..d46ecfe22a61e 100644 --- a/testing/trino-faulttolerant-tests/src/test/java/io/trino/faulttolerant/iceberg/TestIcebergParquetFaultTolerantExecutionConnectorTest.java +++ b/testing/trino-faulttolerant-tests/src/test/java/io/trino/faulttolerant/iceberg/TestIcebergParquetFaultTolerantExecutionConnectorTest.java @@ -13,6 +13,7 @@ */ package io.trino.faulttolerant.iceberg; +import io.trino.filesystem.Location; import io.trino.plugin.exchange.filesystem.FileSystemExchangePlugin; import io.trino.plugin.exchange.filesystem.containers.MinioStorage; import io.trino.plugin.iceberg.IcebergQueryRunner; @@ -78,7 +79,7 @@ public void testStatsBasedRepartitionDataOnInsert() @Override protected boolean isFileSorted(String path, String sortColumnName) { - return checkParquetFileSorting(path, sortColumnName); + return checkParquetFileSorting(fileSystem.newInputFile(Location.of(path)), sortColumnName); } @AfterAll diff --git a/testing/trino-plugin-reader/pom.xml b/testing/trino-plugin-reader/pom.xml index 078ca5cd85d58..a2defff381c46 100644 --- a/testing/trino-plugin-reader/pom.xml +++ b/testing/trino-plugin-reader/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/testing/trino-product-tests-launcher/pom.xml b/testing/trino-product-tests-launcher/pom.xml index 03eb0334fe483..64d87208fcf31 100644 --- a/testing/trino-product-tests-launcher/pom.xml +++ b/testing/trino-product-tests-launcher/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -149,7 +149,7 @@ com.databricks databricks-jdbc - 2.6.32 + 2.6.36 runtime @@ -199,6 +199,12 @@ + + io.airlift + junit-extensions + test + + io.trino trino-jdbc @@ -212,8 +218,14 @@ - org.testng - testng + org.junit.jupiter + junit-jupiter-api + test + + + + org.junit.jupiter + junit-jupiter-engine test diff --git a/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/env/DockerContainer.java b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/env/DockerContainer.java index d204eed5854dd..0a9f567579a95 100644 --- a/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/env/DockerContainer.java +++ b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/env/DockerContainer.java @@ -87,8 +87,8 @@ public class DockerContainer @GuardedBy("this") private OptionalLong lastStartFinishTimeNanos = OptionalLong.empty(); - private List logPaths = new ArrayList<>(); - private List listeners = new ArrayList<>(); + private final List logPaths = new ArrayList<>(); + private final List listeners = new ArrayList<>(); private boolean temporary; private static final ImagePullPolicy pullPolicy = new ConditionalPullPolicy(); diff --git a/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/env/environment/EnvMultinodeAllConnectors.java b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/env/environment/EnvMultinodeAllConnectors.java index 7e289ee8583c9..c44f85b98437a 100644 --- a/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/env/environment/EnvMultinodeAllConnectors.java +++ b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/env/environment/EnvMultinodeAllConnectors.java @@ -69,6 +69,7 @@ public void extendEnvironment(Environment.Builder builder) "singlestore", "mongodb", "mysql", + "opensearch", "oracle", "phoenix5", "pinot", diff --git a/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/SuiteClients.java b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/SuiteClients.java index c7d63bb5c449a..deb49293cf44f 100644 --- a/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/SuiteClients.java +++ b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/SuiteClients.java @@ -32,7 +32,7 @@ public List getTestRuns(EnvironmentConfig config) return ImmutableList.of( testOnEnvironment(EnvMultinode.class) .withGroups( - "configured-features", + "configured_features", "cli", "jdbc", "trino_jdbc") diff --git a/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/SuiteFunctions.java b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/SuiteFunctions.java index 4513988a1ad50..ad3fc9e5543d4 100644 --- a/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/SuiteFunctions.java +++ b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/SuiteFunctions.java @@ -31,7 +31,7 @@ public List getTestRuns(EnvironmentConfig config) { return ImmutableList.of( testOnEnvironment(EnvMultinode.class) - .withGroups("configured-features", "functions") + .withGroups("configured_features", "functions") .build()); } } diff --git a/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/SuiteGcs.java b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/SuiteGcs.java index 08b5b635276c4..325a1a0aec9ca 100644 --- a/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/SuiteGcs.java +++ b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/SuiteGcs.java @@ -31,7 +31,7 @@ public List getTestRuns(EnvironmentConfig config) { return ImmutableList.of( testOnEnvironment(EnvMultinodeGcs.class) - .withGroups("delta-lake-gcs", "configured-features") + .withGroups("delta-lake-gcs", "configured_features") .build()); } } diff --git a/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/Suite8NonGeneric.java b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/SuiteHiveTransactional.java similarity index 92% rename from testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/Suite8NonGeneric.java rename to testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/SuiteHiveTransactional.java index be6a4c7860e3c..fff324a09477d 100644 --- a/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/Suite8NonGeneric.java +++ b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/SuiteHiveTransactional.java @@ -25,7 +25,7 @@ import static com.google.common.base.Verify.verify; import static io.trino.tests.product.launcher.suite.SuiteTestRun.testOnEnvironment; -public class Suite8NonGeneric +public class SuiteHiveTransactional extends Suite { @Override @@ -35,7 +35,7 @@ public List getTestRuns(EnvironmentConfig config) return ImmutableList.of( testOnEnvironment(EnvSinglenodeHiveAcid.class) - .withGroups("configured_features", "hdp3_only", "hive_transactional") + .withGroups("configured_features", "hive_transactional") .build()); } } diff --git a/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/multinode-all/opensearch.properties b/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/multinode-all/opensearch.properties new file mode 100644 index 0000000000000..255cfc60ef9ef --- /dev/null +++ b/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/multinode-all/opensearch.properties @@ -0,0 +1,4 @@ +connector.name=opensearch +opensearch.host=host1.invalid +opensearch.port=9200 +opensearch.default-schema-name=default diff --git a/testing/trino-product-tests-launcher/src/test/java/io/trino/tests/product/launcher/cli/TestInvocations.java b/testing/trino-product-tests-launcher/src/test/java/io/trino/tests/product/launcher/cli/TestInvocations.java index c3fc55911bc43..4344639ac7e85 100644 --- a/testing/trino-product-tests-launcher/src/test/java/io/trino/tests/product/launcher/cli/TestInvocations.java +++ b/testing/trino-product-tests-launcher/src/test/java/io/trino/tests/product/launcher/cli/TestInvocations.java @@ -14,7 +14,8 @@ package io.trino.tests.product.launcher.cli; import com.google.common.base.Splitter; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.parallel.Execution; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -24,8 +25,9 @@ import static io.trino.tests.product.launcher.cli.Launcher.execute; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; -@Test(singleThreaded = true) +@Execution(SAME_THREAD) public class TestInvocations { @Test diff --git a/testing/trino-product-tests-launcher/src/test/java/io/trino/tests/product/launcher/cli/TestOptionsPrinter.java b/testing/trino-product-tests-launcher/src/test/java/io/trino/tests/product/launcher/cli/TestOptionsPrinter.java index e1bd087930e09..a5728b650e79b 100644 --- a/testing/trino-product-tests-launcher/src/test/java/io/trino/tests/product/launcher/cli/TestOptionsPrinter.java +++ b/testing/trino-product-tests-launcher/src/test/java/io/trino/tests/product/launcher/cli/TestOptionsPrinter.java @@ -14,7 +14,7 @@ package io.trino.tests.product.launcher.cli; import com.google.common.collect.ImmutableList; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import picocli.CommandLine.Option; import picocli.CommandLine.Parameters; diff --git a/testing/trino-product-tests-launcher/src/test/java/io/trino/tests/product/launcher/env/TestConfigurations.java b/testing/trino-product-tests-launcher/src/test/java/io/trino/tests/product/launcher/env/TestConfigurations.java index 2f5a744d0153e..28016a171170c 100644 --- a/testing/trino-product-tests-launcher/src/test/java/io/trino/tests/product/launcher/env/TestConfigurations.java +++ b/testing/trino-product-tests-launcher/src/test/java/io/trino/tests/product/launcher/env/TestConfigurations.java @@ -17,7 +17,7 @@ import io.trino.tests.product.launcher.suite.suites.Suite1; import io.trino.tests.product.launcher.suite.suites.Suite6NonGeneric; import io.trino.tests.product.launcher.suite.suites.SuiteTpcds; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import static io.trino.tests.product.launcher.Configurations.canonicalEnvironmentName; import static io.trino.tests.product.launcher.Configurations.nameForSuiteClass; diff --git a/testing/trino-product-tests-launcher/src/test/java/io/trino/tests/product/launcher/local/TestManuallyJdbcOauth2.java b/testing/trino-product-tests-launcher/src/test/java/io/trino/tests/product/launcher/local/TestManuallyJdbcOauth2.java index 1a87ab9de2c8a..ffd092f1a8a64 100644 --- a/testing/trino-product-tests-launcher/src/test/java/io/trino/tests/product/launcher/local/TestManuallyJdbcOauth2.java +++ b/testing/trino-product-tests-launcher/src/test/java/io/trino/tests/product/launcher/local/TestManuallyJdbcOauth2.java @@ -13,8 +13,8 @@ */ package io.trino.tests.product.launcher.local; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; import java.net.InetAddress; import java.net.Socket; @@ -23,7 +23,6 @@ import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; -import java.sql.SQLException; import java.util.Properties; import static java.lang.String.format; @@ -32,8 +31,7 @@ public class TestManuallyJdbcOauth2 { - @BeforeClass(alwaysRun = true) - public void verifyEtcHostsEntries() + private static void verifyEtcHostsEntries() throws UnknownHostException { assertThat(InetAddress.getByName("presto-master").isLoopbackAddress()).isTrue(); @@ -53,10 +51,13 @@ public void verifyEtcHostsEntries() * 127.0.0.1 hydra * 127.0.0.1 hydra-consent */ - @Test(enabled = false) + @Test + @Disabled public void shouldAuthenticateAndExecuteQuery() - throws SQLException + throws Exception { + verifyEtcHostsEntries(); + Properties properties = new Properties(); String jdbcUrl = format("jdbc:trino://presto-master:7778?" + "SSL=true&" diff --git a/testing/trino-product-tests-launcher/src/test/java/io/trino/tests/product/launcher/util/TestConsoleTable.java b/testing/trino-product-tests-launcher/src/test/java/io/trino/tests/product/launcher/util/TestConsoleTable.java index 99170caa82fea..91797f932ce04 100644 --- a/testing/trino-product-tests-launcher/src/test/java/io/trino/tests/product/launcher/util/TestConsoleTable.java +++ b/testing/trino-product-tests-launcher/src/test/java/io/trino/tests/product/launcher/util/TestConsoleTable.java @@ -13,7 +13,7 @@ */ package io.trino.tests.product.launcher.util; -import org.testng.annotations.Test; +import org.junit.jupiter.api.Test; import static org.assertj.core.api.Assertions.assertThat; diff --git a/testing/trino-product-tests/pom.xml b/testing/trino-product-tests/pom.xml index 8aaf3b31cfbd8..315dbdf876808 100644 --- a/testing/trino-product-tests/pom.xml +++ b/testing/trino-product-tests/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/TestGroups.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/TestGroups.java index 1d4b1c7a32c04..980ec71c3addf 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/TestGroups.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/TestGroups.java @@ -64,7 +64,6 @@ public final class TestGroups public static final String LDAP_CLI = "ldap_cli"; public static final String LDAP_AND_FILE_CLI = "ldap_and_file_cli"; public static final String LDAP_MULTIPLE_BINDS = "ldap_multiple_binds"; - public static final String HDP3_ONLY = "hdp3_only"; public static final String TLS = "tls"; public static final String ROLES = "roles"; public static final String CANCEL_QUERY = "cancel_query"; diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/AbstractTestHiveViews.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/AbstractTestHiveViews.java index cbf872c0d3613..a81c2dd5df042 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/AbstractTestHiveViews.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/AbstractTestHiveViews.java @@ -23,7 +23,6 @@ import io.trino.testng.services.Flaky; import io.trino.tests.product.utils.QueryExecutors; import org.intellij.lang.annotations.Language; -import org.testng.SkipException; import org.testng.annotations.Test; import java.math.BigDecimal; @@ -233,10 +232,6 @@ public void testViewWithUnsupportedCoercion() @Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH) public void testOuterParentheses() { - if (getHiveVersionMajor() <= 1) { - throw new SkipException("The old Hive doesn't allow outer parentheses in a view definition"); - } - onHive().executeQuery("CREATE OR REPLACE VIEW view_outer_parentheses AS (SELECT 'parentheses' AS col FROM nation LIMIT 1)"); assertViewQuery("SELECT * FROM view_outer_parentheses", @@ -391,12 +386,10 @@ public void testHiveViewInInformationSchema() onTrino().executeQuery("CREATE TABLE test_schema.trino_table(a int)"); onTrino().executeQuery("CREATE VIEW test_schema.trino_test_view AS SELECT * FROM nation"); - boolean hiveWithTableNamesByType = getHiveVersionMajor() >= 3 || - (getHiveVersionMajor() == 2 && getHiveVersionMinor() >= 3); assertThat(onTrino().executeQuery("SELECT * FROM information_schema.tables WHERE table_schema = 'test_schema'")).containsOnly( row("hive", "test_schema", "trino_table", "BASE TABLE"), row("hive", "test_schema", "hive_table", "BASE TABLE"), - row("hive", "test_schema", "hive_test_view", hiveWithTableNamesByType ? "VIEW" : "BASE TABLE"), + row("hive", "test_schema", "hive_test_view", "VIEW"), row("hive", "test_schema", "trino_test_view", "VIEW")); assertThat(onTrino().executeQuery("SELECT view_definition FROM information_schema.views WHERE table_schema = 'test_schema' and table_name = 'hive_test_view'")).containsOnly( @@ -588,10 +581,6 @@ public void testUnionAllViews() @Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH) public void testUnionDistinctViews() { - if (getHiveVersionMajor() < 1 || (getHiveVersionMajor() == 1 && getHiveVersionMinor() < 2)) { - throw new SkipException("UNION DISTINCT and plain UNION are not supported before Hive 1.2.0"); - } - onHive().executeQuery("DROP TABLE IF EXISTS union_helper"); onHive().executeQuery("CREATE TABLE union_helper (\n" + "r_regionkey BIGINT,\n" diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java index f02df83c37c1b..4c28ec22fe7ac 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java @@ -105,13 +105,21 @@ protected void doTestHiveCoercion(HiveTableDefinition tableDefinition) "tinyint_to_int", "tinyint_to_bigint", "tinyint_to_double", + "tinyint_to_shortdecimal", + "tinyint_to_longdecimal", "smallint_to_int", "smallint_to_bigint", "smallint_to_double", + "smallint_to_shortdecimal", + "smallint_to_longdecimal", "int_to_bigint", "int_to_double", + "int_to_shortdecimal", + "int_to_longdecimal", "bigint_to_double", "bigint_to_varchar", + "bigint_to_shortdecimal", + "bigint_to_longdecimal", "float_to_double", "double_to_float", "double_to_string", @@ -147,6 +155,8 @@ protected void doTestHiveCoercion(HiveTableDefinition tableDefinition) "string_to_double", "varchar_to_double_infinity", "varchar_to_special_double", + "date_to_string", + "date_to_bounded_varchar", "char_to_bigger_char", "char_to_smaller_char", "timestamp_millis_to_date", @@ -195,13 +205,21 @@ protected void insertTableRows(String tableName, String floatToDoubleType) " TINYINT '2', " + " TINYINT '-3', " + " TINYINT '4', " + + " TINYINT '5', " + + " TINYINT '6', " + " SMALLINT '100', " + " SMALLINT '-101', " + " SMALLINT '1024', " + + " SMALLINT '2048', " + + " SMALLINT '4096', " + " INTEGER '2323', " + " INTEGER '16384', " + + " INTEGER '16385', " + + " INTEGER '16386', " + " 1234567890, " + " 12345, " + + " 9223372, " + + " 9223372036, " + " REAL '0.5', " + " DOUBLE '0.5', " + " DOUBLE '12345.12345', " + @@ -237,6 +255,8 @@ protected void insertTableRows(String tableName, String floatToDoubleType) " '1234.01234', " + " 'Infinity'," + " 'NaN'," + + " DATE '2023-09-28', " + + " DATE '2000-04-13', " + " 'abc', " + " 'abc', " + " TIMESTAMP '2022-12-31 23:59:59.999', " + @@ -257,13 +277,21 @@ protected void insertTableRows(String tableName, String floatToDoubleType) " TINYINT '-2', " + " NULL, " + " TINYINT '-4', " + + " TINYINT '-5', " + + " TINYINT '-6', " + " SMALLINT '-100', " + " SMALLINT '101', " + " SMALLINT '-1024', " + + " SMALLINT '-2048', " + + " SMALLINT '-4096', " + " INTEGER '-2323', " + " INTEGER '-16384', " + + " INTEGER '-16385', " + + " INTEGER '-16386', " + " -1234567890, " + " -12345, " + + " -9223372, " + + " -9223372036, " + " REAL '-1.5', " + " DOUBLE '-1.5', " + " DOUBLE 'NaN', " + @@ -299,6 +327,8 @@ protected void insertTableRows(String tableName, String floatToDoubleType) " '0', " + " '-Infinity'," + " 'Invalid Double'," + + " DATE '2123-09-27', " + + " DATE '1900-01-01', " + " '\uD83D\uDCB0\uD83D\uDCB0\uD83D\uDCB0', " + " '\uD83D\uDCB0\uD83D\uDCB0\uD83D\uDCB0', " + " TIMESTAMP '1970-01-01 00:00:00.123', " + @@ -323,7 +353,7 @@ protected Map> expectedValuesForEngineProvider(Engine engin if (Stream.of("rctext", "textfile", "sequencefile").anyMatch(isFormat)) { hiveValueForCaseChangeField = "\"lower2uppercase\":2"; } - else if (getHiveVersionMajor() == 3 && isFormat.test("orc")) { + else if (isFormat.test("orc")) { hiveValueForCaseChangeField = "\"LOWER2UPPERCASE\":null"; } else { @@ -405,6 +435,12 @@ else if (getHiveVersionMajor() == 3 && isFormat.test("orc")) { .put("tinyint_to_double", Arrays.asList( -4D, 4D)) + .put("tinyint_to_shortdecimal", Arrays.asList( + new BigDecimal(-5), + new BigDecimal(5))) + .put("tinyint_to_longdecimal", Arrays.asList( + new BigDecimal(-6), + new BigDecimal(6))) .put("smallint_to_int", ImmutableList.of( 100, -100)) @@ -414,18 +450,36 @@ else if (getHiveVersionMajor() == 3 && isFormat.test("orc")) { .put("smallint_to_double", ImmutableList.of( -1024D, 1024D)) + .put("smallint_to_shortdecimal", Arrays.asList( + new BigDecimal(-2048), + new BigDecimal(-2048))) + .put("smallint_to_longdecimal", Arrays.asList( + new BigDecimal(-4096), + new BigDecimal(4096))) .put("int_to_bigint", ImmutableList.of( 2323L, -2323L)) .put("int_to_double", ImmutableList.of( -16384D, 16384D)) + .put("int_to_shortdecimal", Arrays.asList( + new BigDecimal(-16385), + new BigDecimal(16385))) + .put("int_to_longdecimal", Arrays.asList( + new BigDecimal(-16386), + new BigDecimal(16386))) .put("bigint_to_double", ImmutableList.of( -1234567890D, 1234567890D)) .put("bigint_to_varchar", ImmutableList.of( "12345", "-12345")) + .put("bigint_to_shortdecimal", Arrays.asList( + new BigDecimal(-9223372L), + new BigDecimal(9223372L))) + .put("bigint_to_longdecimal", Arrays.asList( + new BigDecimal(-9223372036L), + new BigDecimal(9223372036L))) .put("float_to_double", ImmutableList.of( 0.5, -1.5)) @@ -519,6 +573,12 @@ else if (getHiveVersionMajor() == 3 && isFormat.test("orc")) { .put("varchar_to_special_double", Arrays.asList( coercedNaN == null ? null : Double.NaN, null)) + .put("date_to_string", ImmutableList.of( + "2023-09-28", + "2123-09-27")) + .put("date_to_bounded_varchar", ImmutableList.of( + "2000-04-13", + "1900-01-01")) .put("char_to_bigger_char", ImmutableList.of( "abc ", "\uD83D\uDCB0\uD83D\uDCB0\uD83D\uDCB0 ")) @@ -753,7 +813,7 @@ private void assertNestedSubFields(String tableName) Map> expectedNestedFieldTrino = ImmutableMap.of("nested_field", ImmutableList.of(2L, 2L)); Map> expectedNestedFieldHive; - if (getHiveVersionMajor() == 3 && isFormat.test("orc")) { + if (isFormat.test("orc")) { expectedNestedFieldHive = ImmutableMap.of("nested_field", Arrays.asList(null, null)); } else { @@ -776,14 +836,7 @@ private void assertNestedSubFields(String tableName) } else if (isFormat.test("parquet")) { assertQueryResults(Engine.HIVE, subfieldQueryUpperCase, expectedNestedFieldHive, expectedColumns, 2, tableName); - - if (getHiveVersionMajor() == 1) { - assertThatThrownBy(() -> assertQueryResults(Engine.HIVE, subfieldQueryLowerCase, expectedNestedFieldHive, expectedColumns, 2, tableName)) - .hasMessageContaining("java.sql.SQLException"); - } - else { - assertQueryResults(Engine.HIVE, subfieldQueryLowerCase, expectedNestedFieldHive, expectedColumns, 2, tableName); - } + assertQueryResults(Engine.HIVE, subfieldQueryLowerCase, expectedNestedFieldHive, expectedColumns, 2, tableName); } else { assertQueryResults(Engine.HIVE, subfieldQueryUpperCase, expectedNestedFieldHive, expectedColumns, 2, tableName); @@ -801,11 +854,19 @@ protected Map expectedExceptionsWithHiveContext() .put(columnContext("1.1", "parquet", "map_to_map"), "org.apache.hadoop.io.LongWritable cannot be cast to org.apache.hadoop.hive.serde2.io.ByteWritable") .put(columnContext("1.1", "parquet", "tinyint_to_bigint"), "org.apache.hadoop.io.LongWritable cannot be cast to org.apache.hadoop.hive.serde2.io.ByteWritable") .put(columnContext("1.1", "parquet", "tinyint_to_double"), "org.apache.hadoop.io.DoubleWritable cannot be cast to org.apache.hadoop.hive.serde2.io.ByteWritable") + .put(columnContext("1.1", "parquet", "tinyint_to_shortdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.ByteWritable") + .put(columnContext("1.1", "parquet", "tinyint_to_longdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.ByteWritable") .put(columnContext("1.1", "parquet", "smallint_to_bigint"), "org.apache.hadoop.io.LongWritable cannot be cast to org.apache.hadoop.hive.serde2.io.ShortWritable") .put(columnContext("1.1", "parquet", "smallint_to_double"), "org.apache.hadoop.io.DoubleWritable cannot be cast to org.apache.hadoop.hive.serde2.io.ShortWritable") + .put(columnContext("1.1", "parquet", "smallint_to_shortdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.ShortWritable") + .put(columnContext("1.1", "parquet", "smallint_to_longdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.ShortWritable") .put(columnContext("1.1", "parquet", "int_to_bigint"), "org.apache.hadoop.io.LongWritable cannot be cast to org.apache.hadoop.io.IntWritable") .put(columnContext("1.1", "parquet", "int_to_double"), "org.apache.hadoop.io.DoubleWritable cannot be cast to org.apache.hadoop.io.IntWritable") + .put(columnContext("1.1", "parquet", "int_to_shortdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.IntWritable") + .put(columnContext("1.1", "parquet", "int_to_longdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.IntWritable") .put(columnContext("1.1", "parquet", "bigint_to_double"), "org.apache.hadoop.io.DoubleWritable cannot be cast to org.apache.hadoop.io.LongWritable") + .put(columnContext("1.1", "parquet", "bigint_to_shortdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.LongWritable") + .put(columnContext("1.1", "parquet", "bigint_to_longdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.LongWritable") // Rcbinary .put(columnContext("1.1", "rcbinary", "row_to_row"), "java.util.ArrayList cannot be cast to org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct") .put(columnContext("1.1", "rcbinary", "list_to_list"), "java.util.ArrayList cannot be cast to org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray") @@ -834,11 +895,19 @@ protected Map expectedExceptionsWithHiveContext() .put(columnContext("2.1", "parquet", "map_to_map"), "org.apache.hadoop.io.LongWritable cannot be cast to org.apache.hadoop.hive.serde2.io.ByteWritable") .put(columnContext("2.1", "parquet", "tinyint_to_bigint"), "org.apache.hadoop.io.LongWritable cannot be cast to org.apache.hadoop.hive.serde2.io.ByteWritable") .put(columnContext("2.1", "parquet", "tinyint_to_double"), "org.apache.hadoop.io.DoubleWritable cannot be cast to org.apache.hadoop.hive.serde2.io.ByteWritable") + .put(columnContext("2.1", "parquet", "tinyint_to_shortdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.ByteWritable") + .put(columnContext("2.1", "parquet", "tinyint_to_longdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.ByteWritable") .put(columnContext("2.1", "parquet", "smallint_to_bigint"), "org.apache.hadoop.io.LongWritable cannot be cast to org.apache.hadoop.hive.serde2.io.ShortWritable") .put(columnContext("2.1", "parquet", "smallint_to_double"), "org.apache.hadoop.io.DoubleWritable cannot be cast to org.apache.hadoop.hive.serde2.io.ShortWritable") + .put(columnContext("2.1", "parquet", "smallint_to_shortdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.ShortWritable") + .put(columnContext("2.1", "parquet", "smallint_to_longdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.ShortWritable") .put(columnContext("2.1", "parquet", "int_to_bigint"), "org.apache.hadoop.io.LongWritable cannot be cast to org.apache.hadoop.io.IntWritable") .put(columnContext("2.1", "parquet", "int_to_double"), "org.apache.hadoop.io.DoubleWritable cannot be cast to org.apache.hadoop.io.IntWritable") + .put(columnContext("2.1", "parquet", "int_to_shortdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.IntWritable") + .put(columnContext("2.1", "parquet", "int_to_longdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.IntWritable") .put(columnContext("2.1", "parquet", "bigint_to_double"), "org.apache.hadoop.io.DoubleWritable cannot be cast to org.apache.hadoop.io.LongWritable") + .put(columnContext("2.1", "parquet", "bigint_to_shortdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.LongWritable") + .put(columnContext("2.1", "parquet", "bigint_to_longdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.LongWritable") // Rcbinary .put(columnContext("2.1", "rcbinary", "row_to_row"), "java.util.ArrayList cannot be cast to org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct") .put(columnContext("2.1", "rcbinary", "list_to_list"), "java.util.ArrayList cannot be cast to org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray") @@ -851,11 +920,19 @@ protected Map expectedExceptionsWithHiveContext() .put(columnContext("3.1", "parquet", "map_to_map"), "org.apache.hadoop.io.LongWritable cannot be cast to org.apache.hadoop.hive.serde2.io.ByteWritable") .put(columnContext("3.1", "parquet", "tinyint_to_bigint"), "org.apache.hadoop.io.LongWritable cannot be cast to org.apache.hadoop.hive.serde2.io.ByteWritable") .put(columnContext("3.1", "parquet", "tinyint_to_double"), "org.apache.hadoop.io.DoubleWritable cannot be cast to org.apache.hadoop.hive.serde2.io.ByteWritable") + .put(columnContext("3.1", "parquet", "tinyint_to_shortdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.ByteWritable") + .put(columnContext("3.1", "parquet", "tinyint_to_longdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.ByteWritable") .put(columnContext("3.1", "parquet", "smallint_to_bigint"), "org.apache.hadoop.io.LongWritable cannot be cast to org.apache.hadoop.hive.serde2.io.ShortWritable") .put(columnContext("3.1", "parquet", "smallint_to_double"), "org.apache.hadoop.io.DoubleWritable cannot be cast to org.apache.hadoop.hive.serde2.io.ShortWritable") + .put(columnContext("3.1", "parquet", "smallint_to_shortdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.ShortWritable") + .put(columnContext("3.1", "parquet", "smallint_to_longdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.ShortWritable") .put(columnContext("3.1", "parquet", "int_to_bigint"), "org.apache.hadoop.io.LongWritable cannot be cast to org.apache.hadoop.io.IntWritable") .put(columnContext("3.1", "parquet", "int_to_double"), "org.apache.hadoop.io.DoubleWritable cannot be cast to org.apache.hadoop.io.IntWritable") + .put(columnContext("3.1", "parquet", "int_to_shortdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.IntWritable") + .put(columnContext("3.1", "parquet", "int_to_longdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.IntWritable") .put(columnContext("3.1", "parquet", "bigint_to_double"), "org.apache.hadoop.io.DoubleWritable cannot be cast to org.apache.hadoop.io.LongWritable") + .put(columnContext("3.1", "parquet", "bigint_to_shortdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.LongWritable") + .put(columnContext("3.1", "parquet", "bigint_to_longdecimal"), "org.apache.hadoop.hive.serde2.io.HiveDecimalWritable cannot be cast to org.apache.hadoop.io.LongWritable") // Rcbinary .put(columnContext("3.1", "rcbinary", "row_to_row"), "java.util.ArrayList cannot be cast to org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct") .put(columnContext("3.1", "rcbinary", "list_to_list"), "java.util.ArrayList cannot be cast to org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray") @@ -935,13 +1012,21 @@ private void assertProperAlteredTableSchema(String tableName) row("tinyint_to_int", "integer"), row("tinyint_to_bigint", "bigint"), row("tinyint_to_double", "double"), + row("tinyint_to_shortdecimal", "decimal(10,2)"), + row("tinyint_to_longdecimal", "decimal(20,2)"), row("smallint_to_int", "integer"), row("smallint_to_bigint", "bigint"), row("smallint_to_double", "double"), + row("smallint_to_shortdecimal", "decimal(10,2)"), + row("smallint_to_longdecimal", "decimal(20,2)"), row("int_to_bigint", "bigint"), row("int_to_double", "double"), + row("int_to_shortdecimal", "decimal(10,2)"), + row("int_to_longdecimal", "decimal(20,2)"), row("bigint_to_double", "double"), row("bigint_to_varchar", "varchar"), + row("bigint_to_shortdecimal", "decimal(10,2)"), + row("bigint_to_longdecimal", "decimal(20,2)"), row("float_to_double", "double"), row("double_to_float", floatType), row("double_to_string", "varchar"), @@ -977,6 +1062,8 @@ private void assertProperAlteredTableSchema(String tableName) row("string_to_double", "double"), row("varchar_to_double_infinity", "double"), row("varchar_to_special_double", "double"), + row("date_to_string", "varchar"), + row("date_to_bounded_varchar", "varchar(12)"), row("char_to_bigger_char", "char(4)"), row("char_to_smaller_char", "char(2)"), row("timestamp_millis_to_date", "date"), @@ -1013,13 +1100,21 @@ private void assertColumnTypes( .put("tinyint_to_int", INTEGER) .put("tinyint_to_bigint", BIGINT) .put("tinyint_to_double", DOUBLE) + .put("tinyint_to_shortdecimal", DECIMAL) + .put("tinyint_to_longdecimal", DECIMAL) .put("smallint_to_int", INTEGER) .put("smallint_to_bigint", BIGINT) .put("smallint_to_double", DOUBLE) + .put("smallint_to_shortdecimal", DECIMAL) + .put("smallint_to_longdecimal", DECIMAL) .put("int_to_bigint", BIGINT) .put("int_to_double", DOUBLE) + .put("int_to_shortdecimal", DECIMAL) + .put("int_to_longdecimal", DECIMAL) .put("bigint_to_double", DOUBLE) .put("bigint_to_varchar", VARCHAR) + .put("bigint_to_shortdecimal", DECIMAL) + .put("bigint_to_longdecimal", DECIMAL) .put("float_to_double", DOUBLE) .put("double_to_float", floatType) .put("double_to_string", VARCHAR) @@ -1055,6 +1150,8 @@ private void assertColumnTypes( .put("string_to_double", DOUBLE) .put("varchar_to_double_infinity", DOUBLE) .put("varchar_to_special_double", DOUBLE) + .put("date_to_string", VARCHAR) + .put("date_to_bounded_varchar", VARCHAR) .put("char_to_bigger_char", CHAR) .put("char_to_smaller_char", CHAR) .put("id", BIGINT) @@ -1091,13 +1188,21 @@ private static void alterTableColumnTypes(String tableName) onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN tinyint_to_int tinyint_to_int int", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN tinyint_to_bigint tinyint_to_bigint bigint", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN tinyint_to_double tinyint_to_double double", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN tinyint_to_shortdecimal tinyint_to_shortdecimal decimal(10,2)", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN tinyint_to_longdecimal tinyint_to_longdecimal decimal(20,2)", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN smallint_to_int smallint_to_int int", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN smallint_to_bigint smallint_to_bigint bigint", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN smallint_to_double smallint_to_double double", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN smallint_to_shortdecimal smallint_to_shortdecimal decimal(10,2)", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN smallint_to_longdecimal smallint_to_longdecimal decimal(20,2)", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN int_to_bigint int_to_bigint bigint", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN int_to_double int_to_double double", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN int_to_shortdecimal int_to_shortdecimal decimal(10,2)", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN int_to_longdecimal int_to_longdecimal decimal(20,2)", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN bigint_to_double bigint_to_double double", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN bigint_to_varchar bigint_to_varchar string", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN bigint_to_shortdecimal bigint_to_shortdecimal decimal(10,2)", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN bigint_to_longdecimal bigint_to_longdecimal decimal(20,2)", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN float_to_double float_to_double double", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN double_to_float double_to_float %s", tableName, floatType)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN double_to_string double_to_string string", tableName)); @@ -1128,6 +1233,8 @@ private static void alterTableColumnTypes(String tableName) onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_bigger_varchar varchar_to_bigger_varchar varchar(4)", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_smaller_varchar varchar_to_smaller_varchar varchar(2)", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_date varchar_to_date date", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN date_to_string date_to_string string", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN date_to_bounded_varchar date_to_bounded_varchar varchar(12)", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_distant_date varchar_to_distant_date date", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_double varchar_to_double double", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN string_to_double string_to_double double", tableName)); diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestAvroSchemaUrl.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestAvroSchemaUrl.java index 83d9520ebc9cb..8530f9de494e1 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestAvroSchemaUrl.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestAvroSchemaUrl.java @@ -17,9 +17,7 @@ import io.trino.tempto.AfterMethodWithContext; import io.trino.tempto.BeforeMethodWithContext; import io.trino.tempto.hadoop.hdfs.HdfsClient; -import io.trino.tempto.query.QueryExecutionException; import io.trino.testng.services.Flaky; -import org.testng.SkipException; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -27,7 +25,6 @@ import java.io.InputStream; import java.nio.file.Paths; -import static com.google.common.base.Strings.isNullOrEmpty; import static io.trino.tempto.assertions.QueryAssert.Row.row; import static io.trino.tempto.assertions.QueryAssert.assertQueryFailure; import static io.trino.tests.product.TestGroups.AVRO; @@ -199,13 +196,6 @@ public void testTableWithLongColumnType() @Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH) public void testPartitionedTableWithLongColumnType() { - if (isOnHdp() && getHiveVersionMajor() < 3) { - // HDP 2.6 won't allow to define a partitioned table with schema having a column with type definition over 2000 characters. - // It is possible to create table with simpler schema and then alter the schema, but that results in different end state. - // To retain proper test coverage, this test needs to be disabled on HDP 2. - throw new SkipException("Skipping on HDP 2"); - } - onHive().executeQuery("DROP TABLE IF EXISTS test_avro_schema_url_partitioned_long_column"); onHive().executeQuery("" + "CREATE TABLE test_avro_schema_url_partitioned_long_column " + @@ -263,15 +253,4 @@ public void testHiveCreatedCamelCaseColumnTable() onHive().executeQuery("DROP TABLE IF EXISTS test_camelCase_avro_schema_url_hive"); } - - private boolean isOnHdp() - { - try { - String hdpVersion = (String) onHive().executeQuery("SET system:hdp.version").getOnlyValue(); - return !isNullOrEmpty(hdpVersion); - } - catch (QueryExecutionException e) { - return false; - } - } } diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveBucketedTables.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveBucketedTables.java index 96631d662f01c..1a7680ae983de 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveBucketedTables.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveBucketedTables.java @@ -302,14 +302,12 @@ public void testBucketingVersion() List bucketV1NameOptions = ImmutableList.of(bucketV1); List bucketV2NameOptions = ImmutableList.of(bucketV2Standard, bucketV2DirectInsert); - testBucketingVersion(BUCKETED_DEFAULT, value, false, (getHiveVersionMajor() < 3) ? bucketV1NameOptions : bucketV2NameOptions); - testBucketingVersion(BUCKETED_DEFAULT, value, true, (getHiveVersionMajor() < 3) ? bucketV1NameOptions : bucketV2NameOptions); + testBucketingVersion(BUCKETED_DEFAULT, value, false, bucketV2NameOptions); + testBucketingVersion(BUCKETED_DEFAULT, value, true, bucketV2NameOptions); testBucketingVersion(BUCKETED_V1, value, false, bucketV1NameOptions); testBucketingVersion(BUCKETED_V1, value, true, bucketV1NameOptions); - if (getHiveVersionMajor() >= 3) { - testBucketingVersion(BUCKETED_V2, value, false, bucketV2NameOptions); - testBucketingVersion(BUCKETED_V2, value, true, bucketV2NameOptions); - } + testBucketingVersion(BUCKETED_V2, value, false, bucketV2NameOptions); + testBucketingVersion(BUCKETED_V2, value, true, bucketV2NameOptions); } @Test(dataProvider = "testBucketingWithUnsupportedDataTypesDataProvider") @@ -459,7 +457,7 @@ private String getExpectedBucketVersion(BucketingType bucketingType) { switch (bucketingType) { case BUCKETED_DEFAULT: - return getHiveVersionMajor() < 3 ? "1" : "2"; + return "2"; case BUCKETED_V1: return "1"; case BUCKETED_V2: diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java index 8960585ad40ae..cd5d41a22338a 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java @@ -108,13 +108,21 @@ private static HiveTableDefinition.HiveTableDefinitionBuilder tableDefinitionBui " tinyint_to_int TINYINT," + " tinyint_to_bigint TINYINT," + " tinyint_to_double TINYINT," + + " tinyint_to_shortdecimal TINYINT," + + " tinyint_to_longdecimal TINYINT," + " smallint_to_int SMALLINT," + " smallint_to_bigint SMALLINT," + " smallint_to_double SMALLINT," + + " smallint_to_shortdecimal SMALLINT," + + " smallint_to_longdecimal SMALLINT," + " int_to_bigint INT," + " int_to_double INT," + + " int_to_shortdecimal INT," + + " int_to_longdecimal INT," + " bigint_to_double BIGINT," + " bigint_to_varchar BIGINT," + + " bigint_to_shortdecimal BIGINT," + + " bigint_to_longdecimal BIGINT," + " float_to_double " + floatType + "," + " double_to_float DOUBLE," + " double_to_string DOUBLE," + @@ -150,6 +158,8 @@ private static HiveTableDefinition.HiveTableDefinitionBuilder tableDefinitionBui " string_to_double STRING," + " varchar_to_double_infinity VARCHAR(40)," + " varchar_to_special_double VARCHAR(40)," + + " date_to_string DATE," + + " date_to_bounded_varchar DATE," + " char_to_bigger_char CHAR(3)," + " char_to_smaller_char CHAR(3)," + " timestamp_millis_to_date TIMESTAMP," + diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java index f11ceb4a1520f..8820004902c22 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java @@ -57,13 +57,21 @@ private static HiveTableDefinition.HiveTableDefinitionBuilder tableDefinitionBui tinyint_to_int TINYINT, tinyint_to_bigint TINYINT, tinyint_to_double TINYINT, + tinyint_to_shortdecimal TINYINT, + tinyint_to_longdecimal TINYINT, smallint_to_int SMALLINT, smallint_to_bigint SMALLINT, smallint_to_double SMALLINT, + smallint_to_shortdecimal SMALLINT, + smallint_to_longdecimal SMALLINT, int_to_bigint INT, int_to_double INT, + int_to_shortdecimal INT, + int_to_longdecimal INT, bigint_to_double BIGINT, bigint_to_varchar BIGINT, + bigint_to_shortdecimal BIGINT, + bigint_to_longdecimal BIGINT, float_to_double FLOAT, double_to_float DOUBLE, double_to_string DOUBLE, @@ -99,6 +107,8 @@ varchar_to_double VARCHAR(40), string_to_double STRING, varchar_to_double_infinity VARCHAR(40), varchar_to_special_double VARCHAR(40), + date_to_string DATE, + date_to_bounded_varchar DATE, char_to_bigger_char CHAR(3), char_to_smaller_char CHAR(3), timestamp_millis_to_date TIMESTAMP, diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCreateTable.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCreateTable.java index 1e75070b1613b..286fe88e05281 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCreateTable.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCreateTable.java @@ -24,7 +24,7 @@ import static io.trino.plugin.hive.HiveTableProperties.TRANSACTIONAL; import static io.trino.tempto.assertions.QueryAssert.Row.row; -import static io.trino.tests.product.TestGroups.HDP3_ONLY; +import static io.trino.tests.product.TestGroups.HIVE_TRANSACTIONAL; import static io.trino.tests.product.TestGroups.PROFILE_SPECIFIC_TESTS; import static io.trino.tests.product.TestGroups.STORAGE_FORMATS; import static io.trino.tests.product.utils.HadoopTestUtils.RETRYABLE_FAILURES_ISSUES; @@ -81,7 +81,7 @@ public void testCreateTableAsSelect() onTrino().executeQuery("DROP TABLE test_create_table_as_select"); } - @Test(groups = {HDP3_ONLY, PROFILE_SPECIFIC_TESTS}) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}) public void testVerifyEnvironmentHiveTransactionalByDefault() throws SQLException { diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveMaterializedView.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveMaterializedView.java index accad931f3dad..0c0bacdb41fff 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveMaterializedView.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveMaterializedView.java @@ -27,19 +27,9 @@ public class TestHiveMaterializedView extends HiveProductTest { - private boolean isTestEnabled() - { - // MATERIALIZED VIEW is supported since Hive 3 - return getHiveVersionMajor() >= 3; - } - @BeforeMethodWithContext public void setUp() { - if (!isTestEnabled()) { - return; - } - onHive().executeQuery("" + "CREATE TABLE test_materialized_view_table(x string) " + "STORED AS ORC " + @@ -50,10 +40,6 @@ public void setUp() @AfterMethodWithContext public void tearDown() { - if (!isTestEnabled()) { - return; - } - onHive().executeQuery("DROP TABLE IF EXISTS test_materialized_view_table"); } @@ -71,10 +57,6 @@ public void testPartitionedMaterializedView() private void testMaterializedView(boolean partitioned) { - if (!isTestEnabled()) { - return; - } - onHive().executeQuery("DROP MATERIALIZED VIEW test_materialized_view_view"); onHive().executeQuery("" + "CREATE MATERIALIZED VIEW test_materialized_view_view " + diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveMerge.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveMerge.java index 974d2a29f8572..52046c538a91d 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveMerge.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveMerge.java @@ -16,7 +16,6 @@ import io.trino.tempto.assertions.QueryAssert; import io.trino.tempto.query.QueryResult; import io.trino.tests.product.hive.util.TemporaryHiveTable; -import org.testng.SkipException; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -46,12 +45,12 @@ public class TestHiveMerge @Test(groups = HIVE_TRANSACTIONAL, timeOut = 60 * 60 * 1000) public void testMergeSimpleSelect() { - withTemporaryTable("merge_simple_select_target", true, false, NONE, targetTable -> { + withTemporaryTable("merge_simple_select_target", false, NONE, targetTable -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR) WITH (transactional = true)", targetTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 5, 'Antioch'), ('Bill', 7, 'Buena'), ('Carol', 3, 'Cambridge'), ('Dave', 11, 'Devon')", targetTable)); - withTemporaryTable("merge_simple_select_source", true, false, NONE, sourceTable -> { + withTemporaryTable("merge_simple_select_source", false, NONE, sourceTable -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR) WITH (transactional = true)", sourceTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 6, 'Arches'), ('Ed', 7, 'Etherville'), ('Carol', 9, 'Centreville'), ('Dave', 11, 'Darbyshire')", sourceTable)); @@ -71,12 +70,12 @@ public void testMergeSimpleSelect() @Test(groups = HIVE_TRANSACTIONAL, timeOut = 60 * 60 * 1000) public void testMergeSimpleSelectPartitioned() { - withTemporaryTable("merge_simple_select_partitioned_target", true, true, NONE, targetTable -> { + withTemporaryTable("merge_simple_select_partitioned_target", true, NONE, targetTable -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR) WITH (transactional = true, partitioned_by = ARRAY['address'])", targetTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 5, 'Antioch'), ('Bill', 7, 'Buena'), ('Carol', 3, 'Cambridge'), ('Dave', 11, 'Devon')", targetTable)); - withTemporaryTable("merge_simple_select_partitioned_source", true, false, NONE, sourceTable -> { + withTemporaryTable("merge_simple_select_partitioned_source", false, NONE, sourceTable -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR) WITH (transactional = true)", sourceTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 6, 'Arches'), ('Ed', 7, 'Etherville'), ('Carol', 9, 'Centreville'), ('Dave', 11, 'Darbyshire')", sourceTable)); @@ -97,7 +96,7 @@ public void testMergeSimpleSelectPartitioned() public void testMergeUpdateWithVariousLayouts(boolean partitioned, String bucketing) { BucketingType bucketingType = bucketing.isEmpty() ? NONE : BUCKETED_V2; - withTemporaryTable("merge_update_with_various_formats", true, partitioned, bucketingType, targetTable -> { + withTemporaryTable("merge_update_with_various_formats", partitioned, bucketingType, targetTable -> { StringBuilder builder = new StringBuilder(); builder.append("CREATE TABLE ") .append(targetTable) @@ -113,7 +112,7 @@ public void testMergeUpdateWithVariousLayouts(boolean partitioned, String bucket onTrino().executeQuery(format("INSERT INTO %s (customer, purchase) VALUES ('Dave', 'dates'), ('Lou', 'limes'), ('Carol', 'candles')", targetTable)); verifySelectForTrinoAndHive("SELECT * FROM " + targetTable, row("Dave", "dates"), row("Lou", "limes"), row("Carol", "candles")); - withTemporaryTable("merge_update_with_various_formats_source", true, false, NONE, sourceTable -> { + withTemporaryTable("merge_update_with_various_formats_source", false, NONE, sourceTable -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchase VARCHAR) WITH (transactional = true)", sourceTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchase) VALUES ('Craig', 'candles'), ('Len', 'limes'), ('Joe', 'jellybeans')", sourceTable)); @@ -133,13 +132,13 @@ public void testMergeUpdateWithVariousLayouts(boolean partitioned, String bucket @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) public void testMergeUnBucketedUnPartitionedFailure() { - withTemporaryTable("merge_with_various_formats_failure", true, false, NONE, targetTable -> { + withTemporaryTable("merge_with_various_formats_failure", false, NONE, targetTable -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchase VARCHAR) WITH (transactional = true)", targetTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchase) VALUES ('Dave', 'dates'), ('Lou', 'limes'), ('Carol', 'candles')", targetTable)); verifySelectForTrinoAndHive("SELECT * FROM " + targetTable, row("Dave", "dates"), row("Lou", "limes"), row("Carol", "candles")); - withTemporaryTable("merge_with_various_formats_failure_source", true, false, NONE, sourceTable -> { + withTemporaryTable("merge_with_various_formats_failure_source", false, NONE, sourceTable -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchase VARCHAR) WITH (transactional = true)", sourceTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchase) VALUES ('Craig', 'candles'), ('Len', 'limes'), ('Joe', 'jellybeans')", sourceTable)); @@ -170,7 +169,7 @@ public Object[][] partitionedAndBucketedProvider() @Test(groups = HIVE_TRANSACTIONAL, timeOut = 60 * 60 * 1000) public void testMergeMultipleOperationsUnbucketedUnpartitioned() { - withTemporaryTable("merge_multiple", true, false, NONE, targetTable -> { + withTemporaryTable("merge_multiple", false, NONE, targetTable -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchases INT, zipcode INT, spouse VARCHAR, address VARCHAR) WITH (transactional = true)", targetTable)); testMergeMultipleOperationsInternal(targetTable, 32); }); @@ -179,7 +178,7 @@ public void testMergeMultipleOperationsUnbucketedUnpartitioned() @Test(groups = HIVE_TRANSACTIONAL, timeOut = 60 * 60 * 1000) public void testMergeMultipleOperationsUnbucketedPartitioned() { - withTemporaryTable("merge_multiple", true, true, NONE, targetTable -> { + withTemporaryTable("merge_multiple", true, NONE, targetTable -> { onTrino().executeQuery(format("CREATE TABLE %s (purchases INT, zipcode INT, spouse VARCHAR, address VARCHAR, customer VARCHAR) WITH (transactional = true, partitioned_by = ARRAY['address', 'customer'])", targetTable)); testMergeMultipleOperationsInternal(targetTable, 32); }); @@ -188,7 +187,7 @@ public void testMergeMultipleOperationsUnbucketedPartitioned() @Test(groups = HIVE_TRANSACTIONAL, timeOut = 60 * 60 * 1000) public void testMergeMultipleOperationsBucketedUnpartitioned() { - withTemporaryTable("merge_multiple", true, false, BUCKETED_V2, targetTable -> { + withTemporaryTable("merge_multiple", false, BUCKETED_V2, targetTable -> { onHive().executeQuery(format("CREATE TABLE %s (customer STRING, purchases INT, zipcode INT, spouse STRING, address STRING)" + " CLUSTERED BY(customer, zipcode, address) INTO 4 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true')", targetTable)); testMergeMultipleOperationsInternal(targetTable, 32); @@ -263,7 +262,7 @@ private List getRowsFromQueryResult(QueryResult result) @Test(groups = HIVE_TRANSACTIONAL, timeOut = 60 * 60 * 1000) public void testMergeSimpleQuery() { - withTemporaryTable("merge_simple_query_target", true, false, NONE, targetTable -> { + withTemporaryTable("merge_simple_query_target", false, NONE, targetTable -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR) WITH (transactional = true)", targetTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 5, 'Antioch'), ('Bill', 7, 'Buena'), ('Carol', 3, 'Cambridge'), ('Dave', 11, 'Devon')", targetTable)); @@ -283,7 +282,7 @@ public void testMergeSimpleQuery() @Test(groups = HIVE_TRANSACTIONAL, timeOut = 60 * 60 * 1000) public void testMergeAllInserts() { - withTemporaryTable("merge_all_inserts", true, false, NONE, targetTable -> { + withTemporaryTable("merge_all_inserts", false, NONE, targetTable -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR) WITH (transactional = true)", targetTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 11, 'Antioch'), ('Bill', 7, 'Buena')", targetTable)); @@ -301,7 +300,7 @@ public void testMergeAllInserts() @Test(groups = HIVE_TRANSACTIONAL, timeOut = 60 * 60 * 1000) public void testMergeSimpleQueryPartitioned() { - withTemporaryTable("merge_simple_query_partitioned_target", true, true, NONE, targetTable -> { + withTemporaryTable("merge_simple_query_partitioned_target", true, NONE, targetTable -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR) WITH (transactional = true, partitioned_by = ARRAY['address'])", targetTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 5, 'Antioch'), ('Bill', 7, 'Buena'), ('Carol', 3, 'Cambridge'), ('Dave', 11, 'Devon')", targetTable)); @@ -322,12 +321,12 @@ public void testMergeSimpleQueryPartitioned() @Test(groups = HIVE_TRANSACTIONAL, timeOut = 60 * 60 * 1000) public void testMergeAllColumnsUpdated() { - withTemporaryTable("merge_all_columns_updated_target", true, false, NONE, targetTable -> { + withTemporaryTable("merge_all_columns_updated_target", false, NONE, targetTable -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR) WITH (transactional = true)", targetTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Dave', 11, 'Devon'), ('Aaron', 5, 'Antioch'), ('Bill', 7, 'Buena'), ('Carol', 3, 'Cambridge')", targetTable)); - withTemporaryTable("merge_all_columns_updated_source", true, false, NONE, sourceTable -> { + withTemporaryTable("merge_all_columns_updated_source", false, NONE, sourceTable -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR) WITH (transactional = true)", sourceTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Dave', 11, 'Darbyshire'), ('Aaron', 6, 'Arches'), ('Carol', 9, 'Centreville'), ('Ed', 7, 'Etherville')", sourceTable)); @@ -343,12 +342,12 @@ public void testMergeAllColumnsUpdated() @Test(groups = HIVE_TRANSACTIONAL, timeOut = 60 * 60 * 1000) public void testMergeAllMatchesDeleted() { - withTemporaryTable("merge_all_matches_deleted_target", true, false, NONE, targetTable -> { + withTemporaryTable("merge_all_matches_deleted_target", false, NONE, targetTable -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR) WITH (transactional = true)", targetTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 5, 'Antioch'), ('Bill', 7, 'Buena'), ('Carol', 3, 'Cambridge'), ('Dave', 11, 'Devon')", targetTable)); - withTemporaryTable("merge_all_matches_deleted_source", true, false, NONE, sourceTable -> { + withTemporaryTable("merge_all_matches_deleted_source", false, NONE, sourceTable -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR) WITH (transactional = true)", sourceTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 6, 'Arches'), ('Carol', 9, 'Centreville'), ('Dave', 11, 'Darbyshire'), ('Ed', 7, 'Etherville')", sourceTable)); @@ -364,12 +363,12 @@ public void testMergeAllMatchesDeleted() @Test(groups = HIVE_TRANSACTIONAL, timeOut = 60 * 60 * 1000, dataProvider = "partitionedBucketedFailure") public void testMergeMultipleRowsMatchFails(String createTableSql) { - withTemporaryTable("merge_all_matches_deleted_target", true, true, NONE, targetTable -> { + withTemporaryTable("merge_all_matches_deleted_target", true, NONE, targetTable -> { onHive().executeQuery(format(createTableSql, targetTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 5, 'Antioch'), ('Bill', 7, 'Antioch')", targetTable)); - withTemporaryTable("merge_all_matches_deleted_source", true, false, NONE, sourceTable -> { + withTemporaryTable("merge_all_matches_deleted_source", false, NONE, sourceTable -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR) WITH (transactional = true)", sourceTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 6, 'Adelphi'), ('Aaron', 8, 'Ashland')", sourceTable)); @@ -401,12 +400,12 @@ public Object[][] partitionedBucketedFailure() public void testMergeFailingPartitioning() { String testDescription = "failing_merge"; - withTemporaryTable(format("%s_target", testDescription), true, true, NONE, targetTable -> { + withTemporaryTable(format("%s_target", testDescription), true, NONE, targetTable -> { onHive().executeQuery(format("CREATE TABLE %s (customer STRING, purchases INT, address STRING) STORED AS ORC TBLPROPERTIES ('transactional'='true')", targetTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 5, 'Antioch'), ('Bill', 7, 'Buena'), ('Carol', 3, 'Cambridge'), ('Dave', 11, 'Devon')", targetTable)); - withTemporaryTable(format("%s_source", testDescription), true, true, NONE, sourceTable -> { + withTemporaryTable(format("%s_source", testDescription), true, NONE, sourceTable -> { onHive().executeQuery(format("CREATE TABLE %s (purchases INT, address STRING) PARTITIONED BY (customer STRING) STORED AS ORC TBLPROPERTIES ('transactional'='true')", sourceTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 6, 'Arches'), ('Ed', 7, 'Etherville'), ('Carol', 9, 'Centreville'), ('Dave', 11, 'Darbyshire')", sourceTable)); @@ -440,12 +439,12 @@ public void testMergeWithDifferentPartitioning(String testDescription, String cr private void testMergeWithDifferentPartitioningInternal(String testDescription, String createTargetTableSql, String createSourceTableSql) { - withTemporaryTable(format("%s_target", testDescription), true, true, NONE, targetTable -> { + withTemporaryTable(format("%s_target", testDescription), true, NONE, targetTable -> { onHive().executeQuery(format(createTargetTableSql, targetTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 5, 'Antioch'), ('Bill', 7, 'Buena'), ('Carol', 3, 'Cambridge'), ('Dave', 11, 'Devon')", targetTable)); - withTemporaryTable(format("%s_source", testDescription), true, true, NONE, sourceTable -> { + withTemporaryTable(format("%s_source", testDescription), true, NONE, sourceTable -> { onHive().executeQuery(format(createSourceTableSql, sourceTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 6, 'Arches'), ('Ed', 7, 'Etherville'), ('Carol', 9, 'Centreville'), ('Dave', 11, 'Darbyshire')", sourceTable)); @@ -502,7 +501,7 @@ public Object[][] targetAndSourceWithDifferentPartitioning() @Test(groups = HIVE_TRANSACTIONAL, timeOut = 60 * 60 * 1000) public void testMergeQueryWithStrangeCapitalization() { - withTemporaryTable("test_without_aliases_target", true, false, NONE, targetTable -> { + withTemporaryTable("test_without_aliases_target", false, NONE, targetTable -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR) WITH (transactional = true)", targetTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 5, 'Antioch'), ('Bill', 7, 'Buena'), ('Carol', 3, 'Cambridge'), ('Dave', 11, 'Devon')", targetTable)); @@ -521,12 +520,12 @@ public void testMergeQueryWithStrangeCapitalization() @Test(groups = HIVE_TRANSACTIONAL, timeOut = 60 * 60 * 1000) public void testMergeWithoutTablesAliases() { - withTemporaryTable("test_without_aliases_target", true, false, NONE, targetTable -> { + withTemporaryTable("test_without_aliases_target", false, NONE, targetTable -> { onTrino().executeQuery(format("CREATE TABLE %s (cusTomer VARCHAR, purchases INT, address VARCHAR) WITH (transactional = true)", targetTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 5, 'Antioch'), ('Bill', 7, 'Buena'), ('Carol', 3, 'Cambridge'), ('Dave', 11, 'Devon')", targetTable)); - withTemporaryTable("test_without_aliases_source", true, false, NONE, sourceTable -> { + withTemporaryTable("test_without_aliases_source", false, NONE, sourceTable -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR) WITH (transactional = true)", sourceTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 6, 'Arches'), ('Ed', 7, 'Etherville'), ('Carol', 9, 'Centreville'), ('Dave', 11, 'Darbyshire')", sourceTable)); @@ -545,12 +544,12 @@ public void testMergeWithoutTablesAliases() @Test(groups = HIVE_TRANSACTIONAL, timeOut = 60 * 60 * 1000) public void testMergeWithUnpredictablePredicates() { - withTemporaryTable("test_without_aliases_target", true, false, NONE, targetTable -> { + withTemporaryTable("test_without_aliases_target", false, NONE, targetTable -> { onTrino().executeQuery(format("CREATE TABLE %s (cusTomer VARCHAR, purchases INT, address VARCHAR) WITH (transactional = true)", targetTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 5, 'Antioch'), ('Bill', 7, 'Buena'), ('Carol', 3, 'Cambridge'), ('Dave', 11, 'Devon')", targetTable)); - withTemporaryTable("test_without_aliases_source", true, false, NONE, sourceTable -> { + withTemporaryTable("test_without_aliases_source", false, NONE, sourceTable -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR) WITH (transactional = true)", sourceTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 6, 'Arches'), ('Carol', 9, 'Centreville'), ('Dave', 11, 'Darbyshire'), ('Ed', 7, 'Etherville')", sourceTable)); @@ -586,13 +585,13 @@ public void testMergeWithUnpredictablePredicates() @Test(groups = HIVE_TRANSACTIONAL, timeOut = 60 * 60 * 1000) public void testMergeWithSimplifiedUnpredictablePredicates() { - withTemporaryTable("test_without_aliases_target", true, false, NONE, targetTable -> { + withTemporaryTable("test_without_aliases_target", false, NONE, targetTable -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR) WITH (transactional = true)", targetTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address)" + " VALUES ('Dave', 11, 'Devon'), ('Dave', 11, 'Darbyshire')", targetTable)); - withTemporaryTable("test_without_aliases_source", true, false, NONE, sourceTable -> { + withTemporaryTable("test_without_aliases_source", false, NONE, sourceTable -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR) WITH (transactional = true)", sourceTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Dave', 11, 'Darbyshire')", sourceTable)); @@ -611,12 +610,12 @@ public void testMergeWithSimplifiedUnpredictablePredicates() @Test(groups = HIVE_TRANSACTIONAL, timeOut = 60 * 60 * 1000) public void testMergeCasts() { - withTemporaryTable("merge_cast_target", true, false, NONE, targetTable -> { + withTemporaryTable("merge_cast_target", false, NONE, targetTable -> { onTrino().executeQuery(format("CREATE TABLE %s (col1 TINYINT, col2 SMALLINT, col3 INT, col4 BIGINT, col5 REAL, col6 DOUBLE) WITH (transactional = true)", targetTable)); onTrino().executeQuery(format("INSERT INTO %s VALUES (1, 2, 3, 4, 5, 6)", targetTable)); - withTemporaryTable("test_without_aliases_source", true, false, NONE, sourceTable -> { + withTemporaryTable("test_without_aliases_source", false, NONE, sourceTable -> { onTrino().executeQuery(format("CREATE TABLE %s (col1 DOUBLE, col2 REAL, col3 BIGINT, col4 INT, col5 SMALLINT, col6 TINYINT) WITH (transactional = true)", sourceTable)); onTrino().executeQuery(format("INSERT INTO %s VALUES (2, 3, 4, 5, 6, 7)", sourceTable)); @@ -633,12 +632,12 @@ public void testMergeCasts() @Test(groups = HIVE_TRANSACTIONAL, timeOut = 60 * 60 * 1000) public void testMergeSubqueries() { - withTemporaryTable("merge_nation_target", true, false, NONE, targetTable -> { + withTemporaryTable("merge_nation_target", false, NONE, targetTable -> { onTrino().executeQuery(format("CREATE TABLE %s (nation_name VARCHAR, region_name VARCHAR) WITH (transactional = true)", targetTable)); onTrino().executeQuery(format("INSERT INTO %s (nation_name, region_name) VALUES ('FRANCE', 'EUROPE'), ('ALGERIA', 'AFRICA'), ('GERMANY', 'EUROPE')", targetTable)); - withTemporaryTable("merge_nation_source", true, false, NONE, sourceTable -> { + withTemporaryTable("merge_nation_source", false, NONE, sourceTable -> { onTrino().executeQuery(format("CREATE TABLE %s (nation_name VARCHAR, region_name VARCHAR) WITH (transactional = true)", sourceTable)); onTrino().executeQuery(format("INSERT INTO %s VALUES ('ALGERIA', 'AFRICA'), ('FRANCE', 'EUROPE'), ('EGYPT', 'MIDDLE EAST'), ('RUSSIA', 'EUROPE')", sourceTable)); @@ -658,7 +657,7 @@ public void testMergeSubqueries() @Test(groups = HIVE_TRANSACTIONAL, timeOut = 60 * 60 * 1000) public void testMergeOriginalFilesTarget() { - withTemporaryTable("region", true, false, NONE, targetTable -> { + withTemporaryTable("region", false, NONE, targetTable -> { onTrino().executeQuery(format("CREATE TABLE %s WITH (transactional=true) AS TABLE tpch.tiny.region", targetTable)); // This merge is illegal, because many nations have the same region @@ -680,7 +679,7 @@ public void testMergeOriginalFilesTarget() @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) public void testMergeOverManySplits() { - withTemporaryTable("delete_select", true, false, NONE, targetTable -> { + withTemporaryTable("delete_select", false, NONE, targetTable -> { onTrino().executeQuery(format("CREATE TABLE %s (orderkey bigint, custkey bigint, orderstatus varchar(1), totalprice double, orderdate date, orderpriority varchar(15), clerk varchar(15), shippriority integer, comment varchar(79)) WITH (transactional = true)", targetTable)); onTrino().executeQuery(format("INSERT INTO %s SELECT * FROM tpch.\"sf0.1\".orders", targetTable)); @@ -698,7 +697,7 @@ public void testMergeOverManySplits() @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) public void testMergeFalseJoinCondition() { - withTemporaryTable("join_false", true, false, NONE, targetTable -> { + withTemporaryTable("join_false", false, NONE, targetTable -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR) WITH (transactional = true)", targetTable)); onTrino().executeQuery(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 11, 'Antioch'), ('Bill', 7, 'Buena')", targetTable)); @@ -750,20 +749,10 @@ public Object[][] inserterAndDeleterProvider() }; } - void withTemporaryTable(String rootName, boolean transactional, boolean isPartitioned, BucketingType bucketingType, Consumer testRunner) + void withTemporaryTable(String rootName, boolean isPartitioned, BucketingType bucketingType, Consumer testRunner) { - if (transactional) { - ensureTransactionalHive(); - } try (TemporaryHiveTable table = TemporaryHiveTable.temporaryHiveTable(tableName(rootName, isPartitioned, bucketingType))) { testRunner.accept(table.getName()); } } - - private void ensureTransactionalHive() - { - if (getHiveVersionMajor() < 3) { - throw new SkipException("Hive transactional tables are supported with Hive version 3 or above"); - } - } } diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveRequireQueryPartitionsFilter.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveRequireQueryPartitionsFilter.java index 31e6ebefa986c..8d996b1900799 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveRequireQueryPartitionsFilter.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveRequireQueryPartitionsFilter.java @@ -71,7 +71,7 @@ public void testRequiresQueryPartitionFilterOnSpecificSchema(String queryPartiti @DataProvider public Object[][] queryPartitionFilterRequiredSchemasDataProvider() { - return new Object[][]{ + return new Object[][] { {"ARRAY['default']"}, {"ARRAY['DEFAULT']"}, {"ARRAY['deFAUlt']"} diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveTransactionalTable.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveTransactionalTable.java index 1d38d71c96653..d94bfb81b3cf9 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveTransactionalTable.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveTransactionalTable.java @@ -27,7 +27,6 @@ import io.trino.tempto.query.QueryResult; import io.trino.testng.services.Flaky; import io.trino.tests.product.hive.util.TemporaryHiveTable; -import org.testng.SkipException; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -56,7 +55,9 @@ import static io.trino.tempto.assertions.QueryAssert.assertQueryFailure; import static io.trino.testing.TestingNames.randomNameSuffix; import static io.trino.tests.product.TestGroups.HIVE_TRANSACTIONAL; +import static io.trino.tests.product.TestGroups.PROFILE_SPECIFIC_TESTS; import static io.trino.tests.product.TestGroups.STORAGE_FORMATS; +import static io.trino.tests.product.hive.BucketingType.BUCKETED_DEFAULT; import static io.trino.tests.product.hive.BucketingType.BUCKETED_V2; import static io.trino.tests.product.hive.BucketingType.NONE; import static io.trino.tests.product.hive.TestHiveTransactionalTable.CompactionMode.MAJOR; @@ -98,20 +99,20 @@ public class TestHiveTransactionalTable @Inject private HdfsClient hdfsClient; - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) public void testReadFullAcid() { doTestReadFullAcid(false, BucketingType.NONE); } @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) public void testReadFullAcidBucketed() { doTestReadFullAcid(false, BucketingType.BUCKETED_DEFAULT); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testReadFullAcidPartitioned() { @@ -120,14 +121,14 @@ public void testReadFullAcidPartitioned() // This test is in STORAGE_FORMATS group to ensure test coverage of transactional tables with various // metastore and HDFS setups (kerberized or not, impersonation or not). - @Test(groups = {HIVE_TRANSACTIONAL, STORAGE_FORMATS}, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, STORAGE_FORMATS, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testReadFullAcidPartitionedBucketed() { doTestReadFullAcid(true, BucketingType.BUCKETED_DEFAULT); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testReadFullAcidBucketedV1() { @@ -135,7 +136,7 @@ public void testReadFullAcidBucketedV1() } @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) public void testReadFullAcidBucketedV2() { doTestReadFullAcid(false, BucketingType.BUCKETED_V2); @@ -143,10 +144,6 @@ public void testReadFullAcidBucketedV2() private void doTestReadFullAcid(boolean isPartitioned, BucketingType bucketingType) { - if (getHiveVersionMajor() < 3) { - throw new SkipException("Hive transactional tables are supported with Hive version 3 or above"); - } - try (TemporaryHiveTable table = TemporaryHiveTable.temporaryHiveTable(tableName("read_full_acid", isPartitioned, bucketingType))) { String tableName = table.getName(); onHive().executeQuery("CREATE TABLE " + tableName + " (col INT, fcol INT) " + @@ -200,28 +197,28 @@ private void doTestReadFullAcid(boolean isPartitioned, BucketingType bucketingTy } } - @Test(groups = HIVE_TRANSACTIONAL, dataProvider = "partitioningAndBucketingTypeDataProvider", timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "partitioningAndBucketingTypeDataProvider", timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testReadInsertOnlyOrc(boolean isPartitioned, BucketingType bucketingType) { testReadInsertOnly(isPartitioned, bucketingType, "STORED AS ORC"); } - @Test(groups = HIVE_TRANSACTIONAL, dataProvider = "partitioningAndBucketingTypeSmokeDataProvider", timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "partitioningAndBucketingTypeSmokeDataProvider", timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testReadInsertOnlyParquet(boolean isPartitioned, BucketingType bucketingType) { testReadInsertOnly(isPartitioned, bucketingType, "STORED AS PARQUET"); } - @Test(groups = HIVE_TRANSACTIONAL, dataProvider = "partitioningAndBucketingTypeSmokeDataProvider", timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "partitioningAndBucketingTypeSmokeDataProvider", timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testReadInsertOnlyText(boolean isPartitioned, BucketingType bucketingType) { testReadInsertOnly(isPartitioned, bucketingType, "STORED AS TEXTFILE"); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testReadInsertOnlyTextWithCustomFormatProperties() { @@ -236,10 +233,6 @@ public void testReadInsertOnlyTextWithCustomFormatProperties() private void testReadInsertOnly(boolean isPartitioned, BucketingType bucketingType, String hiveTableFormatDefinition) { - if (getHiveVersionMajor() < 3) { - throw new SkipException("Hive transactional tables are supported with Hive version 3 or above"); - } - try (TemporaryHiveTable table = TemporaryHiveTable.temporaryHiveTable(tableName("insert_only", isPartitioned, bucketingType))) { String tableName = table.getName(); @@ -281,14 +274,17 @@ private void testReadInsertOnly(boolean isPartitioned, BucketingType bucketingTy } } - @Test(groups = {STORAGE_FORMATS, HIVE_TRANSACTIONAL}, dataProvider = "partitioningAndBucketingTypeDataProvider", timeOut = TEST_TIMEOUT) + @Test(groups = {STORAGE_FORMATS, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) - public void testReadFullAcidWithOriginalFiles(boolean isPartitioned, BucketingType bucketingType) + public void testReadFullAcidWithOriginalFilesSmoke() { - if (getHiveVersionMajor() < 3) { - throw new SkipException("Trino Hive transactional tables are supported with Hive version 3 or above"); - } + testReadFullAcidWithOriginalFiles(true, BUCKETED_DEFAULT); + } + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "partitioningAndBucketingTypeDataProvider", timeOut = TEST_TIMEOUT) + @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) + public void testReadFullAcidWithOriginalFiles(boolean isPartitioned, BucketingType bucketingType) + { String tableName = "test_full_acid_acid_converted_table_read"; onHive().executeQuery("DROP TABLE IF EXISTS " + tableName); verify(bucketingType.getHiveTableProperties().isEmpty()); // otherwise we would need to include that in the CREATE TABLE's TBLPROPERTIES @@ -329,11 +325,11 @@ public void testReadFullAcidWithOriginalFiles(boolean isPartitioned, BucketingTy } } - @Test(groups = {STORAGE_FORMATS, HIVE_TRANSACTIONAL}, dataProvider = "partitioningAndBucketingTypeDataProvider", timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "partitioningAndBucketingTypeDataProvider", timeOut = TEST_TIMEOUT) @Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH) public void testUpdateFullAcidWithOriginalFilesTrinoInserting(boolean isPartitioned, BucketingType bucketingType) { - withTemporaryTable("trino_update_full_acid_acid_converted_table_read", true, isPartitioned, bucketingType, tableName -> { + withTemporaryTable("trino_update_full_acid_acid_converted_table_read", isPartitioned, bucketingType, tableName -> { onHive().executeQuery("DROP TABLE IF EXISTS " + tableName); verify(bucketingType.getHiveTableProperties().isEmpty()); // otherwise we would need to include that in the CREATE TABLE's TBLPROPERTIES onHive().executeQuery("CREATE TABLE " + tableName + " (col INT, fcol INT) " + @@ -379,11 +375,18 @@ public void testUpdateFullAcidWithOriginalFilesTrinoInserting(boolean isPartitio }); } - @Test(groups = {STORAGE_FORMATS, HIVE_TRANSACTIONAL}, dataProvider = "partitioningAndBucketingTypeDataProvider", timeOut = TEST_TIMEOUT) + @Test(groups = {STORAGE_FORMATS, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) + @Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH) + public void testUpdateFullAcidWithOriginalFilesTrinoInsertingAndDeletingSmoke() + { + testUpdateFullAcidWithOriginalFilesTrinoInsertingAndDeleting(true, BUCKETED_DEFAULT); + } + + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "partitioningAndBucketingTypeDataProvider", timeOut = TEST_TIMEOUT) @Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH) public void testUpdateFullAcidWithOriginalFilesTrinoInsertingAndDeleting(boolean isPartitioned, BucketingType bucketingType) { - withTemporaryTable("trino_update_full_acid_acid_converted_table_read", true, isPartitioned, bucketingType, tableName -> { + withTemporaryTable("trino_update_full_acid_acid_converted_table_read", isPartitioned, bucketingType, tableName -> { onHive().executeQuery("DROP TABLE IF EXISTS " + tableName); verify(bucketingType.getHiveTableProperties().isEmpty()); // otherwise we would need to include that in the CREATE TABLE's TBLPROPERTIES onHive().executeQuery("CREATE TABLE " + tableName + " (col INT, fcol INT) " + @@ -431,14 +434,10 @@ String makeValues(int colStart, int colCount, int fcol, boolean isPartitioned, i .collect(Collectors.joining(", ")); } - @Test(groups = {STORAGE_FORMATS, HIVE_TRANSACTIONAL}, dataProvider = "partitioningAndBucketingTypeDataProvider", timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "partitioningAndBucketingTypeDataProvider", timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testReadInsertOnlyWithOriginalFiles(boolean isPartitioned, BucketingType bucketingType) { - if (getHiveVersionMajor() < 3) { - throw new SkipException("Trino Hive transactional tables are supported with Hive version 3 or above"); - } - String tableName = "test_insert_only_acid_converted_table_read"; onHive().executeQuery("DROP TABLE IF EXISTS " + tableName); verify(bucketingType.getHiveTableProperties().isEmpty()); // otherwise we would need to include that in the CREATE TABLE's TBLPROPERTIES @@ -470,27 +469,6 @@ public void testReadInsertOnlyWithOriginalFiles(boolean isPartitioned, Bucketing } } - @Test(groups = HIVE_TRANSACTIONAL) - @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) - public void testFailAcidBeforeHive3() - { - if (getHiveVersionMajor() >= 3) { - throw new SkipException("This tests behavior of ACID table before Hive 3 "); - } - - try (TemporaryHiveTable table = TemporaryHiveTable.temporaryHiveTable("test_fail_acid_before_hive3_" + randomNameSuffix())) { - String tableName = table.getName(); - onHive().executeQuery("" + - "CREATE TABLE " + tableName + "(a bigint) " + - "CLUSTERED BY(a) INTO 4 BUCKETS " + - "STORED AS ORC " + - "TBLPROPERTIES ('transactional'='true')"); - - assertQueryFailure(() -> onTrino().executeQuery("SELECT * FROM " + tableName)) - .hasMessageContaining("Failed to open transaction. Transactional tables support requires Hive metastore version at least 3.0"); - } - } - @DataProvider public Object[][] partitioningAndBucketingTypeDataProvider() { @@ -511,14 +489,10 @@ public Object[][] partitioningAndBucketingTypeSmokeDataProvider() }; } - @Test(groups = HIVE_TRANSACTIONAL, dataProvider = "testCreateAcidTableDataProvider") + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "testCreateAcidTableDataProvider") @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testCtasAcidTable(boolean isPartitioned, BucketingType bucketingType) { - if (getHiveVersionMajor() < 3) { - throw new SkipException("Hive transactional tables are supported with Hive version 3 or above"); - } - try (TemporaryHiveTable table = TemporaryHiveTable.temporaryHiveTable(format("ctas_transactional_%s", randomNameSuffix()))) { String tableName = table.getName(); onTrino().executeQuery("CREATE TABLE " + tableName + " " + @@ -535,11 +509,11 @@ public void testCtasAcidTable(boolean isPartitioned, BucketingType bucketingType } } - @Test(groups = HIVE_TRANSACTIONAL, dataProvider = "testCreateAcidTableDataProvider") + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "testCreateAcidTableDataProvider") @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testCreateAcidTable(boolean isPartitioned, BucketingType bucketingType) { - withTemporaryTable("create_transactional", true, isPartitioned, bucketingType, tableName -> { + withTemporaryTable("create_transactional", isPartitioned, bucketingType, tableName -> { onTrino().executeQuery("CREATE TABLE " + tableName + " (col INTEGER, fcol INTEGER, partcol INTEGER)" + trinoTableProperties(ACID, isPartitioned, bucketingType)); @@ -548,11 +522,11 @@ public void testCreateAcidTable(boolean isPartitioned, BucketingType bucketingTy }); } - @Test(groups = HIVE_TRANSACTIONAL, dataProvider = "acidFormatColumnNames") + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "acidFormatColumnNames") @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidTableColumnNameConflict(String columnName) { - withTemporaryTable("acid_column_name_conflict", true, true, NONE, tableName -> { + withTemporaryTable("acid_column_name_conflict", true, NONE, tableName -> { onHive().executeQuery("CREATE TABLE " + tableName + " (`" + columnName + "` INTEGER, fcol INTEGER, partcol INTEGER) STORED AS ORC " + hiveTableProperties(ACID, NONE)); onTrino().executeQuery("INSERT INTO " + tableName + " VALUES (1, 2, 3)"); assertThat(onTrino().executeQuery("SELECT * FROM " + tableName)).containsOnly(row(1, 2, 3)); @@ -572,11 +546,11 @@ public Object[][] acidFormatColumnNames() }; } - @Test(groups = HIVE_TRANSACTIONAL) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testSimpleUnpartitionedTransactionalInsert() { - withTemporaryTable("unpartitioned_transactional_insert", true, false, NONE, tableName -> { + withTemporaryTable("unpartitioned_transactional_insert", false, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (column1 INT, column2 BIGINT) WITH (transactional = true)", tableName)); String insertQuery = format("INSERT INTO %s VALUES (11, 100), (12, 200), (13, 300)", tableName); @@ -595,11 +569,11 @@ public void testSimpleUnpartitionedTransactionalInsert() }); } - @Test(groups = HIVE_TRANSACTIONAL) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testTransactionalPartitionInsert() { - withTemporaryTable("transactional_partition_insert", true, true, NONE, tableName -> { + withTemporaryTable("transactional_partition_insert", true, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (column1 INT, column2 BIGINT) WITH (transactional = true, partitioned_by = ARRAY['column2'])", tableName)); onTrino().executeQuery(format("INSERT INTO %s (column2, column1) VALUES %s, %s", @@ -629,14 +603,14 @@ public void testTransactionalPartitionInsert() }); } - @Test(groups = HIVE_TRANSACTIONAL) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testTransactionalBucketedPartitionedInsert() { testTransactionalBucketedPartitioned(false); } - @Test(groups = HIVE_TRANSACTIONAL) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testTransactionalBucketedPartitionedInsertOnly() { @@ -645,7 +619,7 @@ public void testTransactionalBucketedPartitionedInsertOnly() private void testTransactionalBucketedPartitioned(boolean insertOnly) { - withTemporaryTable("bucketed_partitioned_insert_only", true, true, BUCKETED_V2, tableName -> { + withTemporaryTable("bucketed_partitioned_insert_only", true, BUCKETED_V2, tableName -> { String insertOnlyProperty = insertOnly ? ", 'transactional_properties'='insert_only'" : ""; onHive().executeQuery(format("CREATE TABLE %s (purchase STRING) PARTITIONED BY (customer STRING) CLUSTERED BY (purchase) INTO 3 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional' = 'true'%s)", @@ -669,11 +643,11 @@ private void testTransactionalBucketedPartitioned(boolean insertOnly) }); } - @Test(groups = HIVE_TRANSACTIONAL, dataProvider = "inserterAndDeleterProvider", timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "inserterAndDeleterProvider", timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testTransactionalUnpartitionedDelete(Engine inserter, Engine deleter) { - withTemporaryTable("unpartitioned_delete", true, false, NONE, tableName -> { + withTemporaryTable("unpartitioned_delete", false, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (column1 INTEGER, column2 BIGINT) WITH (format = 'ORC', transactional = true)", tableName)); execute(inserter, format("INSERT INTO %s (column1, column2) VALUES (1, 100), (2, 200), (3, 300), (4, 400), (5, 500)", tableName)); execute(deleter, format("DELETE FROM %s WHERE column2 = 100", tableName)); @@ -688,11 +662,11 @@ public void testTransactionalUnpartitionedDelete(Engine inserter, Engine deleter }); } - @Test(groups = HIVE_TRANSACTIONAL, dataProvider = "inserterAndDeleterProvider", timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "inserterAndDeleterProvider", timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testMultiDelete(Engine inserter, Engine deleter) { - withTemporaryTable("unpartitioned_multi_delete", true, false, NONE, tableName -> { + withTemporaryTable("unpartitioned_multi_delete", false, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (column1 INT, column2 BIGINT) WITH (transactional = true)", tableName)); execute(inserter, format("INSERT INTO %s VALUES (1, 100), (2, 200), (3, 300), (4, 400), (5, 500)", tableName)); execute(inserter, format("INSERT INTO %s VALUES (6, 600), (7, 700), (8, 800), (9, 900), (10, 1000)", tableName)); @@ -703,7 +677,7 @@ public void testMultiDelete(Engine inserter, Engine deleter) }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testReadAfterMultiInsertAndDelete() { @@ -714,8 +688,8 @@ public void testReadAfterMultiInsertAndDelete() // statement id, when filtering out deleted rows. // // For more context see https://issues.apache.org/jira/browse/HIVE-16832 - withTemporaryTable("partitioned_multi_insert", true, true, BucketingType.BUCKETED_V1, tableName -> { - withTemporaryTable("tmp_data_table", false, false, NONE, dataTableName -> { + withTemporaryTable("partitioned_multi_insert", true, BucketingType.BUCKETED_V1, tableName -> { + withTemporaryTable("tmp_data_table", false, NONE, dataTableName -> { onTrino().executeQuery(format("CREATE TABLE %s (a int, b int, c varchar(5)) WITH " + "(transactional = true, partitioned_by = ARRAY['c'], bucketed_by = ARRAY['a'], bucket_count = 2)", tableName)); onTrino().executeQuery(format("CREATE TABLE %s (x int)", dataTableName)); @@ -733,11 +707,11 @@ public void testReadAfterMultiInsertAndDelete() }); } - @Test(groups = HIVE_TRANSACTIONAL, dataProvider = "inserterAndDeleterProvider", timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "inserterAndDeleterProvider", timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testTransactionalMetadataDelete(Engine inserter, Engine deleter) { - withTemporaryTable("metadata_delete", true, true, NONE, tableName -> { + withTemporaryTable("metadata_delete", true, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (column1 INT, column2 BIGINT) WITH (transactional = true, partitioned_by = ARRAY['column2'])", tableName)); execute(inserter, format("INSERT INTO %s (column2, column1) VALUES %s, %s", tableName, @@ -749,11 +723,11 @@ public void testTransactionalMetadataDelete(Engine inserter, Engine deleter) }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH) public void testNonTransactionalMetadataDelete() { - withTemporaryTable("non_transactional_metadata_delete", false, true, NONE, tableName -> { + withTemporaryTable("non_transactional_metadata_delete", true, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (column2 BIGINT, column1 INT) WITH (partitioned_by = ARRAY['column1'])", tableName)); execute(Engine.TRINO, format("INSERT INTO %s (column1, column2) VALUES %s, %s", @@ -771,11 +745,11 @@ public void testNonTransactionalMetadataDelete() }); } - @Test(groups = HIVE_TRANSACTIONAL, dataProvider = "inserterAndDeleterProvider", timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "inserterAndDeleterProvider", timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testUnpartitionedDeleteAll(Engine inserter, Engine deleter) { - withTemporaryTable("unpartitioned_delete_all", true, false, NONE, tableName -> { + withTemporaryTable("unpartitioned_delete_all", false, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (column1 INT, column2 BIGINT) WITH (transactional = true)", tableName)); execute(inserter, format("INSERT INTO %s VALUES (1, 100), (2, 200), (3, 300), (4, 400), (5, 500)", tableName)); execute(deleter, "DELETE FROM " + tableName); @@ -783,11 +757,11 @@ public void testUnpartitionedDeleteAll(Engine inserter, Engine deleter) }); } - @Test(groups = HIVE_TRANSACTIONAL, dataProvider = "inserterAndDeleterProvider", timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "inserterAndDeleterProvider", timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testMultiColumnDelete(Engine inserter, Engine deleter) { - withTemporaryTable("multi_column_delete", true, false, NONE, tableName -> { + withTemporaryTable("multi_column_delete", false, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (column1 INT, column2 BIGINT) WITH (transactional = true)", tableName)); execute(inserter, format("INSERT INTO %s VALUES (1, 100), (2, 200), (3, 300), (4, 400), (5, 500)", tableName)); String where = " WHERE column1 >= 2 AND column2 <= 400"; @@ -796,11 +770,11 @@ public void testMultiColumnDelete(Engine inserter, Engine deleter) }); } - @Test(groups = HIVE_TRANSACTIONAL, dataProvider = "inserterAndDeleterProvider", timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "inserterAndDeleterProvider", timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testPartitionAndRowsDelete(Engine inserter, Engine deleter) { - withTemporaryTable("partition_and_rows_delete", true, true, NONE, tableName -> { + withTemporaryTable("partition_and_rows_delete", true, NONE, tableName -> { onTrino().executeQuery("CREATE TABLE " + tableName + " (column2 BIGINT, column1 INT) WITH (transactional = true, partitioned_by = ARRAY['column1'])"); execute(inserter, format("INSERT INTO %s (column1, column2) VALUES (1, 100), (1, 200), (2, 300), (2, 400), (2, 500)", tableName)); @@ -810,11 +784,11 @@ public void testPartitionAndRowsDelete(Engine inserter, Engine deleter) }); } - @Test(groups = HIVE_TRANSACTIONAL, dataProvider = "inserterAndDeleterProvider", timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "inserterAndDeleterProvider", timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testPartitionedInsertAndRowLevelDelete(Engine inserter, Engine deleter) { - withTemporaryTable("partitioned_row_level_delete", true, true, NONE, tableName -> { + withTemporaryTable("partitioned_row_level_delete", true, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (column2 INT, column1 BIGINT) WITH (transactional = true, partitioned_by = ARRAY['column1'])", tableName)); execute(inserter, format("INSERT INTO %s (column1, column2) VALUES %s, %s", @@ -834,11 +808,11 @@ public void testPartitionedInsertAndRowLevelDelete(Engine inserter, Engine delet }); } - @Test(groups = HIVE_TRANSACTIONAL, dataProvider = "inserterAndDeleterProvider", timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "inserterAndDeleterProvider", timeOut = TEST_TIMEOUT) @Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH) public void testBucketedPartitionedDelete(Engine inserter, Engine deleter) { - withTemporaryTable("bucketed_partitioned_delete", true, true, NONE, tableName -> { + withTemporaryTable("bucketed_partitioned_delete", true, NONE, tableName -> { onHive().executeQuery(format("CREATE TABLE %s (purchase STRING) PARTITIONED BY (customer STRING) CLUSTERED BY (purchase) INTO 3 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional' = 'true')", tableName)); execute(inserter, format("INSERT INTO %s (customer, purchase) VALUES", tableName) + @@ -865,11 +839,11 @@ public void testBucketedPartitionedDelete(Engine inserter, Engine deleter) }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testDeleteAllRowsInPartition() { - withTemporaryTable("bucketed_partitioned_delete", true, true, NONE, tableName -> { + withTemporaryTable("bucketed_partitioned_delete", true, NONE, tableName -> { onHive().executeQuery(format("CREATE TABLE %s (purchase STRING) PARTITIONED BY (customer STRING) STORED AS ORC TBLPROPERTIES ('transactional' = 'true')", tableName)); log.info("About to insert"); @@ -883,11 +857,11 @@ public void testDeleteAllRowsInPartition() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testDeleteAfterDelete() { - withTemporaryTable("delete_after_delete", true, false, NONE, tableName -> { + withTemporaryTable("delete_after_delete", false, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (id INT) WITH (transactional = true)", tableName)); onTrino().executeQuery(format("INSERT INTO %s VALUES (1), (2), (3)", tableName)); @@ -902,11 +876,11 @@ public void testDeleteAfterDelete() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testDeleteAfterDeleteWithPredicate() { - withTemporaryTable("delete_after_delete_predicate", true, false, NONE, tableName -> { + withTemporaryTable("delete_after_delete_predicate", false, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (id INT) WITH (transactional = true)", tableName)); onTrino().executeQuery(format("INSERT INTO %s VALUES (1), (2), (3)", tableName)); @@ -922,11 +896,11 @@ public void testDeleteAfterDeleteWithPredicate() }); } - @Test(groups = HIVE_TRANSACTIONAL, dataProvider = "inserterAndDeleterProvider", timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "inserterAndDeleterProvider", timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testBucketedUnpartitionedDelete(Engine inserter, Engine deleter) { - withTemporaryTable("bucketed_unpartitioned_delete", true, true, NONE, tableName -> { + withTemporaryTable("bucketed_unpartitioned_delete", true, NONE, tableName -> { onHive().executeQuery(format("CREATE TABLE %s (customer STRING, purchase STRING) CLUSTERED BY (purchase) INTO 3 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional' = 'true')", tableName)); execute(inserter, format("INSERT INTO %s (customer, purchase) VALUES", tableName) + @@ -953,11 +927,11 @@ public void testBucketedUnpartitionedDelete(Engine inserter, Engine deleter) }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testDeleteOverManySplits() { - withTemporaryTable("delete_select", true, false, NONE, tableName -> { + withTemporaryTable("delete_select", false, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s WITH (transactional = true) AS SELECT * FROM tpch.sf10.orders", tableName)); log.info("About to delete selected rows"); @@ -967,11 +941,11 @@ public void testDeleteOverManySplits() }); } - @Test(groups = HIVE_TRANSACTIONAL, dataProvider = "inserterAndDeleterProvider", timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "inserterAndDeleterProvider", timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testCorrectSelectCountStar(Engine inserter, Engine deleter) { - withTemporaryTable("select_count_star_delete", true, true, NONE, tableName -> { + withTemporaryTable("select_count_star_delete", true, NONE, tableName -> { onHive().executeQuery(format("CREATE TABLE %s (col1 INT, col2 BIGINT) PARTITIONED BY (col3 STRING) STORED AS ORC TBLPROPERTIES ('transactional'='true')", tableName)); execute(inserter, format("INSERT INTO %s VALUES (1, 100, 'a'), (2, 200, 'b'), (3, 300, 'c'), (4, 400, 'a'), (5, 500, 'b'), (6, 600, 'c')", tableName)); @@ -980,12 +954,12 @@ public void testCorrectSelectCountStar(Engine inserter, Engine deleter) }); } - @Test(groups = HIVE_TRANSACTIONAL, dataProvider = "insertersProvider", timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "insertersProvider", timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testInsertOnlyMultipleWriters(boolean bucketed, Engine inserter1, Engine inserter2) { log.info("testInsertOnlyMultipleWriters bucketed %s, inserter1 %s, inserter2 %s", bucketed, inserter1, inserter2); - withTemporaryTable("insert_only_partitioned", true, true, NONE, tableName -> { + withTemporaryTable("insert_only_partitioned", true, NONE, tableName -> { onHive().executeQuery(format("CREATE TABLE %s (col1 INT, col2 BIGINT) PARTITIONED BY (col3 STRING) %s STORED AS ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only')", tableName, bucketed ? "CLUSTERED BY (col2) INTO 3 BUCKETS" : "")); @@ -1005,11 +979,11 @@ public void testInsertOnlyMultipleWriters(boolean bucketed, Engine inserter1, En }); } - @Test(groups = HIVE_TRANSACTIONAL) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testInsertFailsInExplicitTrinoTransaction() { - withTemporaryTable("insert_fail_explicit_transaction", true, false, NONE, tableName -> { + withTemporaryTable("insert_fail_explicit_transaction", false, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (a_string varchar) WITH (format = 'ORC', transactional = true)", tableName)); onTrino().executeQuery("START TRANSACTION"); assertQueryFailure(() -> onTrino().executeQuery(format("INSERT INTO %s (a_string) VALUES ('Commander Bun Bun')", tableName))) @@ -1017,11 +991,11 @@ public void testInsertFailsInExplicitTrinoTransaction() }); } - @Test(groups = HIVE_TRANSACTIONAL) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testUpdateFailsInExplicitTrinoTransaction() { - withTemporaryTable("update_fail_explicit_transaction", true, false, NONE, tableName -> { + withTemporaryTable("update_fail_explicit_transaction", false, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (a_string varchar) WITH (format = 'ORC', transactional = true)", tableName)); onTrino().executeQuery("START TRANSACTION"); assertQueryFailure(() -> onTrino().executeQuery(format("UPDATE %s SET a_string = 'Commander Bun Bun'", tableName))) @@ -1029,11 +1003,11 @@ public void testUpdateFailsInExplicitTrinoTransaction() }); } - @Test(groups = HIVE_TRANSACTIONAL) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testDeleteFailsInExplicitTrinoTransaction() { - withTemporaryTable("delete_fail_explicit_transaction", true, false, NONE, tableName -> { + withTemporaryTable("delete_fail_explicit_transaction", false, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (a_string varchar) WITH (format = 'ORC', transactional = true)", tableName)); onTrino().executeQuery("START TRANSACTION"); assertQueryFailure(() -> onTrino().executeQuery(format("DELETE FROM %s WHERE a_string = 'Commander Bun Bun'", tableName))) @@ -1041,12 +1015,11 @@ public void testDeleteFailsInExplicitTrinoTransaction() }); } - @Test(groups = HIVE_TRANSACTIONAL, dataProvider = "transactionModeProvider") + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "transactionModeProvider") @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testColumnRenamesOrcPartitioned(boolean transactional) { - ensureSchemaEvolutionSupported(); - withTemporaryTable("test_column_renames_partitioned", transactional, false, NONE, tableName -> { + withTemporaryTable("test_column_renames_partitioned", false, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (id BIGINT, old_name VARCHAR, age INT, old_state VARCHAR)" + " WITH (format = 'ORC', transactional = %s, partitioned_by = ARRAY['old_state'])", tableName, transactional)); testOrcColumnRenames(tableName); @@ -1057,12 +1030,11 @@ public void testColumnRenamesOrcPartitioned(boolean transactional) }); } - @Test(groups = HIVE_TRANSACTIONAL, dataProvider = "transactionModeProvider") + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "transactionModeProvider") @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testColumnRenamesOrcNotPartitioned(boolean transactional) { - ensureSchemaEvolutionSupported(); - withTemporaryTable("test_orc_column_renames_not_partitioned", transactional, false, NONE, tableName -> { + withTemporaryTable("test_orc_column_renames_not_partitioned", false, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (id BIGINT, old_name VARCHAR, age INT, old_state VARCHAR)" + " WITH (format = 'ORC', transactional = %s)", tableName, transactional)); testOrcColumnRenames(tableName); @@ -1091,12 +1063,11 @@ private void testOrcColumnRenames(String tableName) verifySelectForTrinoAndHive("SELECT * FROM " + tableName, row(111, "Katy", 57, "CA"), row(222, "Joe", 72, "WA"), row(333, "Joan", 23, "OR")); } - @Test(groups = HIVE_TRANSACTIONAL, dataProvider = "transactionModeProvider") + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "transactionModeProvider") @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testOrcColumnSwap(boolean transactional) { - ensureSchemaEvolutionSupported(); - withTemporaryTable("test_orc_column_renames", transactional, false, NONE, tableName -> { + withTemporaryTable("test_orc_column_renames", false, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (name VARCHAR, state VARCHAR) WITH (format = 'ORC', transactional = %s)", tableName, transactional)); onTrino().executeQuery(format("INSERT INTO %s VALUES ('Katy', 'CA'), ('Joe', 'WA')", tableName)); verifySelectForTrinoAndHive("SELECT * FROM " + tableName, row("Katy", "CA"), row("Joe", "WA")); @@ -1109,12 +1080,11 @@ public void testOrcColumnSwap(boolean transactional) }); } - @Test(groups = HIVE_TRANSACTIONAL) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testBehaviorOnParquetColumnRenames() { - ensureSchemaEvolutionSupported(); - withTemporaryTable("test_parquet_column_renames", false, false, NONE, tableName -> { + withTemporaryTable("test_parquet_column_renames", false, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (id BIGINT, old_name VARCHAR, age INT, old_state VARCHAR) WITH (format = 'PARQUET', transactional = false)", tableName)); onTrino().executeQuery(format("INSERT INTO %s VALUES (111, 'Katy', 57, 'CA'), (222, 'Joe', 72, 'WA')", tableName)); verifySelectForTrinoAndHive("SELECT * FROM " + tableName, row(111, "Katy", 57, "CA"), row(222, "Joe", 72, "WA")); @@ -1133,12 +1103,11 @@ public void testBehaviorOnParquetColumnRenames() }); } - @Test(groups = HIVE_TRANSACTIONAL, dataProvider = "transactionModeProvider") + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "transactionModeProvider") @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testOrcColumnDropAdd(boolean transactional) { - ensureSchemaEvolutionSupported(); - withTemporaryTable("test_orc_add_drop", transactional, false, NONE, tableName -> { + withTemporaryTable("test_orc_add_drop", false, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (id BIGINT, old_name VARCHAR, age INT, old_state VARCHAR) WITH (transactional = %s)", tableName, transactional)); onTrino().executeQuery(format("INSERT INTO %s VALUES (111, 'Katy', 57, 'CA'), (222, 'Joe', 72, 'WA')", tableName)); verifySelectForTrinoAndHive("SELECT * FROM " + tableName, row(111, "Katy", 57, "CA"), row(222, "Joe", 72, "WA")); @@ -1156,12 +1125,11 @@ public void testOrcColumnDropAdd(boolean transactional) }); } - @Test(groups = HIVE_TRANSACTIONAL, dataProvider = "transactionModeProvider") + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, dataProvider = "transactionModeProvider") @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testOrcColumnTypeChange(boolean transactional) { - ensureSchemaEvolutionSupported(); - withTemporaryTable("test_orc_column_type_change", transactional, false, NONE, tableName -> { + withTemporaryTable("test_orc_column_type_change", false, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (id INT, old_name VARCHAR, age TINYINT, old_state VARCHAR) WITH (transactional = %s)", tableName, transactional)); onTrino().executeQuery(format("INSERT INTO %s VALUES (111, 'Katy', 57, 'CA'), (222, 'Joe', 72, 'WA')", tableName)); verifySelectForTrinoAndHive("SELECT * FROM " + tableName, row(111, "Katy", 57, "CA"), row(222, "Joe", 72, "WA")); @@ -1176,12 +1144,11 @@ public void testOrcColumnTypeChange(boolean transactional) }); } - @Test(groups = HIVE_TRANSACTIONAL) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testParquetColumnDropAdd() { - ensureSchemaEvolutionSupported(); - withTemporaryTable("test_parquet_add_drop", false, false, NONE, tableName -> { + withTemporaryTable("test_parquet_add_drop", false, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (id BIGINT, old_name VARCHAR, age INT, state VARCHAR) WITH (format = 'PARQUET')", tableName)); onTrino().executeQuery(format("INSERT INTO %s VALUES (111, 'Katy', 57, 'CA'), (222, 'Joe', 72, 'WA')", tableName)); verifySelectForTrinoAndHive("SELECT * FROM " + tableName, row(111, "Katy", 57, "CA"), row(222, "Joe", 72, "WA")); @@ -1213,11 +1180,11 @@ public Object[][] transactionModeProvider() }; } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidUpdateFailNonTransactional() { - withTemporaryTable("update_fail_nontransactional", true, true, NONE, tableName -> { + withTemporaryTable("update_fail_nontransactional", true, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchase VARCHAR)", tableName)); log.info("About to insert"); @@ -1229,11 +1196,11 @@ public void testAcidUpdateFailNonTransactional() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidUpdateFailInsertOnlyTable() { - withTemporaryTable("update_fail_insert_only", true, false, NONE, tableName -> { + withTemporaryTable("update_fail_insert_only", false, NONE, tableName -> { onHive().executeQuery("CREATE TABLE " + tableName + " (customer STRING, purchase STRING) " + "STORED AS ORC " + hiveTableProperties(INSERT_ONLY, NONE)); @@ -1247,11 +1214,11 @@ public void testAcidUpdateFailInsertOnlyTable() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidDeleteFailNonTransactional() { - withTemporaryTable("delete_fail_nontransactional", true, true, NONE, tableName -> { + withTemporaryTable("delete_fail_nontransactional", true, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (customer VARCHAR, purchase VARCHAR)", tableName)); log.info("About to insert"); @@ -1263,11 +1230,11 @@ public void testAcidDeleteFailNonTransactional() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidDeleteFailInsertOnlyTable() { - withTemporaryTable("delete_fail_insert_only", true, false, NONE, tableName -> { + withTemporaryTable("delete_fail_insert_only", false, NONE, tableName -> { onHive().executeQuery("CREATE TABLE " + tableName + " (customer STRING, purchase STRING) " + "STORED AS ORC " + hiveTableProperties(INSERT_ONLY, NONE)); @@ -1281,11 +1248,11 @@ public void testAcidDeleteFailInsertOnlyTable() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidUpdateSucceedUpdatingPartitionKey() { - withTemporaryTable("fail_update_partition_key", true, true, NONE, tableName -> { + withTemporaryTable("fail_update_partition_key", true, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (col1 INT, col2 VARCHAR, col3 BIGINT) WITH (transactional = true, partitioned_by = ARRAY['col3'])", tableName)); log.info("About to insert"); @@ -1299,11 +1266,11 @@ public void testAcidUpdateSucceedUpdatingPartitionKey() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidUpdateSucceedUpdatingBucketColumn() { - withTemporaryTable("fail_update_bucket_column", true, true, NONE, tableName -> { + withTemporaryTable("fail_update_bucket_column", true, NONE, tableName -> { onHive().executeQuery(format("CREATE TABLE %s (customer STRING, purchase STRING) CLUSTERED BY (purchase) INTO 3 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional' = 'true')", tableName)); log.info("About to insert"); @@ -1317,11 +1284,11 @@ public void testAcidUpdateSucceedUpdatingBucketColumn() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidUpdateFailOnIllegalCast() { - withTemporaryTable("fail_update_on_illegal_cast", true, true, NONE, tableName -> { + withTemporaryTable("fail_update_on_illegal_cast", true, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (col1 INT, col2 VARCHAR, col3 BIGINT) WITH (transactional = true)", tableName)); log.info("About to insert"); @@ -1333,11 +1300,11 @@ public void testAcidUpdateFailOnIllegalCast() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidUpdateSimple() { - withTemporaryTable("acid_update_simple", true, true, NONE, tableName -> { + withTemporaryTable("acid_update_simple", true, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (col1 TINYINT, col2 VARCHAR, col3 BIGINT, col4 BOOLEAN, col5 INT) WITH (transactional = true)", tableName)); log.info("About to insert"); onTrino().executeQuery(format("INSERT INTO %s (col1, col2, col3, col4, col5) VALUES (7, 'ONE', 1000, true, 101), (13, 'TWO', 2000, false, 202)", tableName)); @@ -1348,11 +1315,11 @@ public void testAcidUpdateSimple() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidUpdateSelectedValues() { - withTemporaryTable("acid_update_simple_selected", true, true, NONE, tableName -> { + withTemporaryTable("acid_update_simple_selected", true, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (col1 TINYINT, col2 VARCHAR, col3 BIGINT, col4 BOOLEAN, col5 INT) WITH (transactional = true)", tableName)); log.info("About to insert"); onTrino().executeQuery(format("INSERT INTO %s (col1, col2, col3, col4, col5) VALUES (7, 'ONE', 1000, true, 101), (13, 'TWO', 2000, false, 202)", tableName)); @@ -1363,11 +1330,11 @@ public void testAcidUpdateSelectedValues() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidUpdateCopyColumn() { - withTemporaryTable("acid_update_copy_column", true, true, NONE, tableName -> { + withTemporaryTable("acid_update_copy_column", true, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (col1 int, col2 int, col3 VARCHAR) WITH (transactional = true)", tableName)); log.info("About to insert"); onTrino().executeQuery(format("INSERT INTO %s (col1, col2, col3) VALUES (7, 15, 'ONE'), (13, 17, 'DEUX')", tableName)); @@ -1378,11 +1345,11 @@ public void testAcidUpdateCopyColumn() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidUpdateSomeLiteralNullColumnValues() { - withTemporaryTable("update_some_literal_null_columns", true, true, NONE, tableName -> { + withTemporaryTable("update_some_literal_null_columns", true, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (col1 TINYINT, col2 VARCHAR, col3 BIGINT, col4 BOOLEAN, col5 INT) WITH (transactional = true)", tableName)); log.info("About to insert"); onTrino().executeQuery(format("INSERT INTO %s (col1, col2, col3, col4, col5) VALUES (1, 'ONE', 1000, true, 101), (2, 'TWO', 2000, false, 202)", tableName)); @@ -1397,11 +1364,11 @@ public void testAcidUpdateSomeLiteralNullColumnValues() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidUpdateSomeComputedNullColumnValues() { - withTemporaryTable("update_some_computed_null_columns", true, true, NONE, tableName -> { + withTemporaryTable("update_some_computed_null_columns", true, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (col1 TINYINT, col2 VARCHAR, col3 BIGINT, col4 BOOLEAN, col5 INT) WITH (transactional = true)", tableName)); log.info("About to insert"); onTrino().executeQuery(format("INSERT INTO %s (col1, col2, col3, col4, col5) VALUES (1, 'ONE', 1000, true, 101), (2, 'TWO', 2000, false, 202)", tableName)); @@ -1417,11 +1384,11 @@ public void testAcidUpdateSomeComputedNullColumnValues() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidUpdateAllLiteralNullColumnValues() { - withTemporaryTable("update_all_literal_null_columns", true, true, NONE, tableName -> { + withTemporaryTable("update_all_literal_null_columns", true, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (col1 TINYINT, col2 VARCHAR, col3 BIGINT, col4 BOOLEAN, col5 INT) WITH (transactional = true)", tableName)); log.info("About to insert"); onTrino().executeQuery(format("INSERT INTO %s (col1, col2, col3, col4, col5) VALUES (1, 'ONE', 1000, true, 101), (2, 'TWO', 2000, false, 202)", tableName)); @@ -1432,11 +1399,11 @@ public void testAcidUpdateAllLiteralNullColumnValues() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidUpdateAllComputedNullColumnValues() { - withTemporaryTable("update_all_computed_null_columns", true, true, NONE, tableName -> { + withTemporaryTable("update_all_computed_null_columns", true, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (col1 TINYINT, col2 VARCHAR, col3 BIGINT, col4 BOOLEAN, col5 INT) WITH (transactional = true)", tableName)); log.info("About to insert"); onTrino().executeQuery(format("INSERT INTO %s (col1, col2, col3, col4, col5) VALUES (1, 'ONE', 1000, true, 101), (2, 'TWO', 2000, false, 202)", tableName)); @@ -1448,11 +1415,11 @@ public void testAcidUpdateAllComputedNullColumnValues() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidUpdateReversed() { - withTemporaryTable("update_reversed", true, true, NONE, tableName -> { + withTemporaryTable("update_reversed", true, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (col1 TINYINT, col2 VARCHAR, col3 BIGINT, col4 BOOLEAN, col5 INT) WITH (transactional = true)", tableName)); log.info("About to insert"); onTrino().executeQuery(format("INSERT INTO %s (col1, col2, col3, col4, col5) VALUES (1, 'ONE', 1000, true, 101), (2, 'TWO', 2000, false, 202)", tableName)); @@ -1463,11 +1430,11 @@ public void testAcidUpdateReversed() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidUpdatePermuted() { - withTemporaryTable("update_permuted", true, true, NONE, tableName -> { + withTemporaryTable("update_permuted", true, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (col1 TINYINT, col2 VARCHAR, col3 BIGINT, col4 BOOLEAN, col5 INT) WITH (transactional = true)", tableName)); log.info("About to insert"); onTrino().executeQuery(format("INSERT INTO %s (col1, col2, col3, col4, col5) VALUES (1, 'ONE', 1000, true, 101), (2, 'TWO', 2000, false, 202)", tableName)); @@ -1478,11 +1445,11 @@ public void testAcidUpdatePermuted() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidUpdateAllColumnsSetAndDependencies() { - withTemporaryTable("update_all_columns_set", true, true, NONE, tableName -> { + withTemporaryTable("update_all_columns_set", true, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (col1 TINYINT, col2 INT, col3 BIGINT, col4 INT, col5 TINYINT) WITH (transactional = true)", tableName)); log.info("About to insert"); onTrino().executeQuery(format("INSERT INTO %s (col1, col2, col3, col4, col5) VALUES (1, 2, 3, 4, 5), (21, 22, 23, 24, 25)", tableName)); @@ -1493,11 +1460,11 @@ public void testAcidUpdateAllColumnsSetAndDependencies() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidUpdatePartitioned() { - withTemporaryTable("update_partitioned", true, true, NONE, tableName -> { + withTemporaryTable("update_partitioned", true, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (col1 INT, col2 VARCHAR, col3 BIGINT) WITH (transactional = true, partitioned_by = ARRAY['col3'])", tableName)); log.info("About to insert"); @@ -1510,11 +1477,11 @@ public void testAcidUpdatePartitioned() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidUpdateBucketed() { - withTemporaryTable("update_bucketed", true, true, NONE, tableName -> { + withTemporaryTable("update_bucketed", true, NONE, tableName -> { onHive().executeQuery(format("CREATE TABLE %s (customer STRING, purchase STRING) CLUSTERED BY (customer) INTO 3 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional' = 'true')", tableName)); log.info("About to insert"); @@ -1527,11 +1494,11 @@ public void testAcidUpdateBucketed() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidUpdateMajorCompaction() { - withTemporaryTable("schema_evolution_column_addition", true, false, NONE, tableName -> { + withTemporaryTable("schema_evolution_column_addition", false, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (column1 INT, column2 BIGINT) WITH (transactional = true)", tableName)); onTrino().executeQuery(format("INSERT INTO %s VALUES (11, 100)", tableName)); onTrino().executeQuery(format("INSERT INTO %s VALUES (22, 200)", tableName)); @@ -1549,11 +1516,11 @@ public void testAcidUpdateMajorCompaction() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidUpdateWithSubqueryPredicate() { - withTemporaryTable("test_update_subquery", true, false, NONE, tableName -> { + withTemporaryTable("test_update_subquery", false, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (column1 INT, column2 varchar) WITH (transactional = true)", tableName)); onTrino().executeQuery(format("INSERT INTO %s VALUES (1, 'x')", tableName)); onTrino().executeQuery(format("INSERT INTO %s VALUES (2, 'y')", tableName)); @@ -1562,7 +1529,7 @@ public void testAcidUpdateWithSubqueryPredicate() onTrino().executeQuery(format("UPDATE %s SET column2 = 'row updated' WHERE column1 = (SELECT min(regionkey) + 1 FROM tpch.tiny.region)", tableName)); verifySelectForTrinoAndHive("SELECT * FROM " + tableName, row(1, "row updated"), row(2, "y")); - withTemporaryTable("second_table", true, false, NONE, secondTable -> { + withTemporaryTable("second_table", false, NONE, secondTable -> { onTrino().executeQuery(format("CREATE TABLE %s (regionkey bigint, name varchar(25), comment varchar(152)) WITH (transactional = true)", secondTable)); onTrino().executeQuery(format("INSERT INTO %s SELECT * FROM tpch.tiny.region", secondTable)); @@ -1580,11 +1547,11 @@ public void testAcidUpdateWithSubqueryPredicate() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidUpdateWithSubqueryAssignment() { - withTemporaryTable("test_update_subquery", true, false, NONE, tableName -> { + withTemporaryTable("test_update_subquery", false, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (column1 INT, column2 varchar) WITH (transactional = true)", tableName)); onTrino().executeQuery(format("INSERT INTO %s VALUES (1, 'x')", tableName)); onTrino().executeQuery(format("INSERT INTO %s VALUES (2, 'y')", tableName)); @@ -1593,7 +1560,7 @@ public void testAcidUpdateWithSubqueryAssignment() onTrino().executeQuery(format("UPDATE %s SET column2 = (SELECT max(name) FROM tpch.tiny.region)", tableName)); verifySelectForTrinoAndHive("SELECT * FROM " + tableName, row(1, "MIDDLE EAST"), row(2, "MIDDLE EAST")); - withTemporaryTable("second_table", true, false, NONE, secondTable -> { + withTemporaryTable("second_table", false, NONE, secondTable -> { onTrino().executeQuery(format("CREATE TABLE %s (regionkey bigint, name varchar(25), comment varchar(152)) WITH (transactional = true)", secondTable)); onTrino().executeQuery(format("INSERT INTO %s SELECT * FROM tpch.tiny.region", secondTable)); @@ -1611,11 +1578,11 @@ public void testAcidUpdateWithSubqueryAssignment() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidUpdateDuplicateUpdateValue() { - withTemporaryTable("test_update_bug", true, false, NONE, tableName -> { + withTemporaryTable("test_update_bug", false, NONE, tableName -> { onTrino().executeQuery( format("CREATE TABLE %s (", tableName) + " yyyy integer," + @@ -1647,11 +1614,11 @@ public void testAcidUpdateDuplicateUpdateValue() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testAcidUpdateMultipleDuplicateValues() { - withTemporaryTable("test_update_multiple", true, false, NONE, tableName -> { + withTemporaryTable("test_update_multiple", false, NONE, tableName -> { onTrino().executeQuery( format("CREATE TABLE %s (c1 int, c2 int, c3 int, c4 int, c5 int, c6 int) WITH (transactional = true)", tableName)); @@ -1675,10 +1642,10 @@ public void testAcidUpdateMultipleDuplicateValues() } @Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH) - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) public void testInsertDeleteUpdateWithTrinoAndHive() { - withTemporaryTable("update_insert_delete_trino_hive", true, true, NONE, tableName -> { + withTemporaryTable("update_insert_delete_trino_hive", true, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s (col1 TINYINT, col2 INT, col3 BIGINT, col4 INT, col5 TINYINT) WITH (transactional = true)", tableName)); log.info("Performing first insert on Trino"); @@ -1711,11 +1678,11 @@ public void testInsertDeleteUpdateWithTrinoAndHive() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testDeleteFromOriginalFiles() { - withTemporaryTable("delete_original_files", true, true, NONE, tableName -> { + withTemporaryTable("delete_original_files", true, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s WITH (transactional = true, partitioned_by = ARRAY['regionkey'])" + " AS SELECT nationkey, name, regionkey FROM tpch.tiny.nation", tableName)); verifyOriginalFiles(tableName, "WHERE regionkey = 4"); @@ -1727,14 +1694,14 @@ public void testDeleteFromOriginalFiles() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testDeleteWholePartition() { testDeleteWholePartition(false); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testDeleteWholePartitionWithOriginalFiles() { @@ -1743,7 +1710,7 @@ public void testDeleteWholePartitionWithOriginalFiles() private void testDeleteWholePartition(boolean withOriginalFiles) { - withTemporaryTable("delete_partitioned", true, true, NONE, tableName -> { + withTemporaryTable("delete_partitioned", true, NONE, tableName -> { if (withOriginalFiles) { onTrino().executeQuery(format("CREATE TABLE %s WITH (transactional = true, partitioned_by = ARRAY['regionkey'])" + " AS SELECT nationkey, name, regionkey FROM tpch.tiny.nation", tableName)); @@ -1774,11 +1741,11 @@ private void testDeleteWholePartition(boolean withOriginalFiles) }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testUpdateOriginalFilesPartitioned() { - withTemporaryTable("update_original_files", true, true, NONE, tableName -> { + withTemporaryTable("update_original_files", true, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s WITH (transactional = true, partitioned_by = ARRAY['regionkey'])" + " AS SELECT nationkey, name, regionkey FROM tpch.tiny.nation", tableName)); verifyOriginalFiles(tableName, "WHERE regionkey = 4"); @@ -1788,11 +1755,11 @@ public void testUpdateOriginalFilesPartitioned() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testUpdateOriginalFilesUnpartitioned() { - withTemporaryTable("update_original_files", true, true, NONE, tableName -> { + withTemporaryTable("update_original_files", true, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s WITH (transactional = true)" + " AS SELECT nationkey, name, regionkey FROM tpch.tiny.nation", tableName)); verifyOriginalFiles(tableName, "WHERE regionkey = 4"); @@ -1802,11 +1769,11 @@ public void testUpdateOriginalFilesUnpartitioned() }); } - @Test(groups = HIVE_TRANSACTIONAL, timeOut = TEST_TIMEOUT) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}, timeOut = TEST_TIMEOUT) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testInsertRowIdCorrectness() { - withTemporaryTable("test_insert_row_id_correctness", true, false, NONE, tableName -> { + withTemporaryTable("test_insert_row_id_correctness", false, NONE, tableName -> { // We use tpch.tiny.supplier because it is the smallest table that // is written as multiple pages by the ORC writer. If it stops // being split into pages, this test won't detect issues arising @@ -1875,25 +1842,18 @@ public Object[][] inserterAndDeleterProvider() }; } - void withTemporaryTable(String rootName, boolean transactional, boolean isPartitioned, BucketingType bucketingType, Consumer testRunner) + void withTemporaryTable(String rootName, boolean isPartitioned, BucketingType bucketingType, Consumer testRunner) { - if (transactional) { - ensureTransactionalHive(); - } try (TemporaryHiveTable table = TemporaryHiveTable.temporaryHiveTable(tableName(rootName, isPartitioned, bucketingType) + randomNameSuffix())) { testRunner.accept(table.getName()); } } - @Test(groups = HIVE_TRANSACTIONAL) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}) @Flaky(issue = "https://github.com/trinodb/trino/issues/5463", match = "Expected row count to be <4>, but was <6>") public void testFilesForAbortedTransactionsIgnored() throws Exception { - if (getHiveVersionMajor() < 3) { - throw new SkipException("Hive transactional tables are supported with Hive version 3 or above"); - } - String tableName = "test_aborted_transaction_table"; onHive().executeQuery("" + "CREATE TABLE " + tableName + " (col INT) " + @@ -1945,11 +1905,11 @@ public void testFilesForAbortedTransactionsIgnored() } } - @Test(groups = HIVE_TRANSACTIONAL) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testDoubleUpdateAndThenReadFromHive() { - withTemporaryTable("test_double_update", true, false, NONE, tableName -> { + withTemporaryTable("test_double_update", false, NONE, tableName -> { onTrino().executeQuery( "CREATE TABLE test_double_update ( " + "column1 INT, " + @@ -1966,11 +1926,11 @@ public void testDoubleUpdateAndThenReadFromHive() }); } - @Test(groups = HIVE_TRANSACTIONAL) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testDeleteWithOriginalFiles() { - withTemporaryTable("test_delete_with_original_files", true, false, NONE, tableName -> { + withTemporaryTable("test_delete_with_original_files", false, NONE, tableName -> { // these 3 properties are necessary to make sure there is more than 1 original file created onTrino().executeQuery("SET SESSION scale_writers = true"); onTrino().executeQuery("SET SESSION writer_scaling_min_data_processed = '4kB'"); @@ -1988,11 +1948,11 @@ public void testDeleteWithOriginalFiles() }); } - @Test(groups = HIVE_TRANSACTIONAL) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testDeleteWithOriginalFilesWithWhereClause() { - withTemporaryTable("test_delete_with_original_files_with_where_clause", true, false, NONE, tableName -> { + withTemporaryTable("test_delete_with_original_files_with_where_clause", false, NONE, tableName -> { // these 3 properties are necessary to make sure there is more than 1 original file created onTrino().executeQuery("SET SESSION scale_writers = true"); onTrino().executeQuery("SET SESSION writer_scaling_min_data_processed = '4kB'"); @@ -2026,11 +1986,11 @@ private void validateFileIsDirectlyUnderTableLocation(String tableName) "files in %s are not directly under table location", path)); } - @Test(groups = HIVE_TRANSACTIONAL) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testDeleteAfterMajorCompaction() { - withTemporaryTable("test_delete_after_major_compaction", true, false, NONE, tableName -> { + withTemporaryTable("test_delete_after_major_compaction", false, NONE, tableName -> { onTrino().executeQuery(format("CREATE TABLE %s WITH (transactional = true) AS SELECT * FROM tpch.tiny.nation", tableName)); compactTableAndWait(MAJOR, tableName, "", new Duration(3, MINUTES)); onTrino().executeQuery(format("DELETE FROM %s", tableName)); @@ -2038,14 +1998,14 @@ public void testDeleteAfterMajorCompaction() }); } - @Test(groups = HIVE_TRANSACTIONAL) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testUnbucketedPartitionedTransactionalTableWithTaskWriterCountGreaterThanOne() { unbucketedTransactionalTableWithTaskWriterCountGreaterThanOne(true); } - @Test(groups = HIVE_TRANSACTIONAL) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testUnbucketedTransactionalTableWithTaskWriterCountGreaterThanOne() { @@ -2054,7 +2014,7 @@ public void testUnbucketedTransactionalTableWithTaskWriterCountGreaterThanOne() private void unbucketedTransactionalTableWithTaskWriterCountGreaterThanOne(boolean isPartitioned) { - withTemporaryTable(format("test_unbucketed%s_transactional_table_with_task_writer_count_greater_than_one", isPartitioned ? "_partitioned" : ""), true, isPartitioned, NONE, tableName -> { + withTemporaryTable(format("test_unbucketed%s_transactional_table_with_task_writer_count_greater_than_one", isPartitioned ? "_partitioned" : ""), isPartitioned, NONE, tableName -> { onTrino().executeQuery(format( "CREATE TABLE %s " + "WITH (" + @@ -2094,18 +2054,15 @@ private void unbucketedTransactionalTableWithTaskWriterCountGreaterThanOne(boole }); } - @Test(groups = HIVE_TRANSACTIONAL) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testLargePartitionedDelete() { - if (getHiveVersionMajor() < 3) { - throw new SkipException("Hive transactional tables are supported with Hive version 3 or above"); - } - withTemporaryTable("large_delete_" + "stage1", false, false, NONE, tableStage1 -> { + withTemporaryTable("large_delete_" + "stage1", false, NONE, tableStage1 -> { onTrino().executeQuery("CREATE TABLE %s AS SELECT a, b, 20220101 AS d FROM UNNEST(SEQUENCE(1, 9001), SEQUENCE(1, 9001)) AS t(a, b)".formatted(tableStage1)); - withTemporaryTable("large_delete_" + "stage2", false, false, NONE, tableStage2 -> { + withTemporaryTable("large_delete_" + "stage2", false, NONE, tableStage2 -> { onTrino().executeQuery("CREATE TABLE %s AS SELECT a, b, 20220101 AS d FROM UNNEST(SEQUENCE(1, 100), SEQUENCE(1, 100)) AS t(a, b)".formatted(tableStage2)); - withTemporaryTable("large_delete_" + "new", true, true, NONE, tableNew -> { + withTemporaryTable("large_delete_" + "new", true, NONE, tableNew -> { onTrino().executeQuery(""" CREATE TABLE %s WITH (transactional=true, partitioned_by=ARRAY['d']) AS (SELECT stage1.a as a, stage1.b as b, stage1.d AS d FROM %s stage1, %s stage2 WHERE stage1.d = stage2.d) @@ -2128,18 +2085,15 @@ public void testLargePartitionedDelete() }); } - @Test(groups = HIVE_TRANSACTIONAL) + @Test(groups = {HIVE_TRANSACTIONAL, PROFILE_SPECIFIC_TESTS}) @Flaky(issue = ACID_CORRUPTION_DIRECTORY_ISSUE, match = ACID_CORRUPTION_DIRECTORY_RETRY_PATTERN) public void testLargePartitionedUpdate() { - if (getHiveVersionMajor() < 3) { - throw new SkipException("Hive transactional tables are supported with Hive version 3 or above"); - } - withTemporaryTable("large_update_" + "stage1", false, false, NONE, tableStage1 -> { + withTemporaryTable("large_update_" + "stage1", false, NONE, tableStage1 -> { onTrino().executeQuery("CREATE TABLE %s AS SELECT a, b, 20220101 AS d FROM UNNEST(SEQUENCE(1, 9001), SEQUENCE(1, 9001)) AS t(a, b)".formatted(tableStage1)); - withTemporaryTable("large_update_" + "stage2", false, false, NONE, tableStage2 -> { + withTemporaryTable("large_update_" + "stage2", false, NONE, tableStage2 -> { onTrino().executeQuery("CREATE TABLE %s AS SELECT a, b, 20220101 AS d FROM UNNEST(SEQUENCE(1, 100), SEQUENCE(1, 100)) AS t(a, b)".formatted(tableStage2)); - withTemporaryTable("large_update_" + "new", true, true, NONE, tableNew -> { + withTemporaryTable("large_update_" + "new", true, NONE, tableNew -> { onTrino().executeQuery(""" CREATE TABLE %s WITH (transactional=true, partitioned_by=ARRAY['d']) AS (SELECT stage1.a as a, stage1.b as b, stage1.d AS d FROM %s stage1, %s stage2 WHERE stage1.d = stage2.d) @@ -2350,20 +2304,6 @@ private String makeInsertValues(int col1Value, int col2First, int col2Last) return IntStream.rangeClosed(col2First, col2Last).mapToObj(i -> format("(%s, %s)", col1Value, i)).collect(Collectors.joining(", ")); } - private void ensureTransactionalHive() - { - if (getHiveVersionMajor() < 3) { - throw new SkipException("Hive transactional tables are supported with Hive version 3 or above"); - } - } - - private void ensureSchemaEvolutionSupported() - { - if (getHiveVersionMajor() < 3) { - throw new SkipException("Hive schema evolution requires Hive version 3 or above"); - } - } - public static void verifySelectForTrinoAndHive(String select, Row... rows) { verifySelect("onTrino", onTrino(), select, rows); diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveTransactionalTableInsert.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveTransactionalTableInsert.java index 07cfa1f9dc4b6..3d81a206265c5 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveTransactionalTableInsert.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveTransactionalTableInsert.java @@ -13,7 +13,6 @@ */ package io.trino.tests.product.hive; -import org.testng.SkipException; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -31,10 +30,6 @@ public class TestHiveTransactionalTableInsert @Test(dataProvider = "transactionalTableType", groups = HIVE_TRANSACTIONAL) public void testInsertIntoTransactionalTable(TransactionalTableType type) { - if (getHiveVersionMajor() < 3) { - throw new SkipException("Hive transactional tables are supported with Hive version 3 or above"); - } - String tableName = "test_insert_into_transactional_table_" + type.name().toLowerCase(ENGLISH); onHive().executeQuery("" + "CREATE TABLE " + tableName + "(a bigint)" + diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveViews.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveViews.java index 9a6815e8b5d1f..68f6c8ebcbbef 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveViews.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveViews.java @@ -72,14 +72,8 @@ private void testFailingHiveViewsWithInformationSchema() // is used, so Trino's information_schema.views table does not include translated Hive views. String withSchemaFilter = "SELECT table_name FROM information_schema.views WHERE table_schema = 'test_list_failing_views'"; String withNoFilter = "SELECT table_name FROM information_schema.views"; - if (getHiveVersionMajor() == 3) { - assertThat(onTrino().executeQuery(withSchemaFilter)).containsOnly(row("correct_view")); - assertThat(onTrino().executeQuery(withNoFilter)).contains(row("correct_view")); - } - else { - assertThat(onTrino().executeQuery(withSchemaFilter)).hasNoRows(); - assertThat(onTrino().executeQuery(withNoFilter).rows()).doesNotContain(ImmutableList.of("correct_view")); - } + assertThat(onTrino().executeQuery(withSchemaFilter)).containsOnly(row("correct_view")); + assertThat(onTrino().executeQuery(withNoFilter)).contains(row("correct_view")); // Queries with filters on table_schema and table_name are optimized to only fetch the specified table and uses // a different API. so the Hive version does not matter here. @@ -114,14 +108,8 @@ private void testFailingHiveViewsWithSystemJdbc() "table_schem = 'test_list_failing_views' AND " + "table_type = 'VIEW'"; String withNoFilter = "SELECT table_name FROM system.jdbc.tables WHERE table_cat = 'hive' AND table_type = 'VIEW'"; - if (getHiveVersionMajor() == 3) { - assertThat(onTrino().executeQuery(withSchemaFilter)).containsOnly(row("correct_view"), row("failing_view")); - assertThat(onTrino().executeQuery(withNoFilter)).contains(row("correct_view"), row("failing_view")); - } - else { - assertThat(onTrino().executeQuery(withSchemaFilter)).hasNoRows(); - assertThat(onTrino().executeQuery(withNoFilter).rows()).doesNotContain(ImmutableList.of("correct_view")); - } + assertThat(onTrino().executeQuery(withSchemaFilter)).containsOnly(row("correct_view"), row("failing_view")); + assertThat(onTrino().executeQuery(withNoFilter)).contains(row("correct_view"), row("failing_view")); // Queries with filters on table_schema and table_name are optimized to only fetch the specified table and uses // a different API. so the Hive version does not matter here. @@ -341,57 +329,28 @@ public void testFromUtcTimestamp() "1970-01-29 16:00:00.000")); // check result on Hive - if (isObsoleteFromUtcTimestampSemantics()) { - // For older hive version we expect different results on Hive side; as from_utc_timestamp semantics changed over time. - // Currently view transformation logic always follows new semantics. - // Leaving Hive assertions as documentation. - assertThat(onHive().executeQuery("SELECT * FROM test_from_utc_timestamp_view")) - .containsOnly(row( - "1969-12-31 21:30:00.123", - "1969-12-31 21:30:00.123", - "1969-12-31 21:30:10.123", - "1969-12-31 21:30:10.123", - "1970-01-03 21:30:00.123", - "1970-01-03 21:30:00.123", - "1970-01-30 21:30:00.123", - "1970-01-30 21:30:00.123", - "1970-01-30 21:30:00", - "1970-01-30 21:30:00", - "1970-01-30 21:30:00.123", - "1970-01-30 21:30:00.123", - "1970-01-30 21:30:00.123", - "1970-01-30 21:30:00.123", - "1970-01-30 21:30:00", - "1970-01-30 21:30:00", - "1970-01-30 08:00:00", - "1970-01-30 08:00:00", - "1970-01-29 16:00:00", - "1970-01-29 16:00:00")); - } - else { - assertThat(onHive().executeQuery("SELECT * FROM test_from_utc_timestamp_view")) - .containsOnly(row( - "1969-12-31 16:00:00.123", - "1969-12-31 16:00:00.123", - "1969-12-31 16:00:10.123", - "1969-12-31 16:00:10.123", - "1970-01-03 16:00:00.123", - "1970-01-03 16:00:00.123", - "1970-01-30 16:00:00.123", - "1970-01-30 16:00:00.123", - "1970-01-30 16:00:00", - "1970-01-30 16:00:00", - "1970-01-30 16:00:00.123", - "1970-01-30 16:00:00.123", - "1970-01-30 16:00:00.123", - "1970-01-30 16:00:00.123", - "1970-01-30 16:00:00", - "1970-01-30 16:00:00", - "1970-01-30 08:00:00", - "1970-01-30 08:00:00", - "1970-01-29 16:00:00", - "1970-01-29 16:00:00")); - } + assertThat(onHive().executeQuery("SELECT * FROM test_from_utc_timestamp_view")) + .containsOnly(row( + "1969-12-31 16:00:00.123", + "1969-12-31 16:00:00.123", + "1969-12-31 16:00:10.123", + "1969-12-31 16:00:10.123", + "1970-01-03 16:00:00.123", + "1970-01-03 16:00:00.123", + "1970-01-30 16:00:00.123", + "1970-01-30 16:00:00.123", + "1970-01-30 16:00:00", + "1970-01-30 16:00:00", + "1970-01-30 16:00:00.123", + "1970-01-30 16:00:00.123", + "1970-01-30 16:00:00.123", + "1970-01-30 16:00:00.123", + "1970-01-30 16:00:00", + "1970-01-30 16:00:00", + "1970-01-30 08:00:00", + "1970-01-30 08:00:00", + "1970-01-29 16:00:00", + "1970-01-29 16:00:00")); } @Test(groups = HIVE_VIEWS) @@ -485,34 +444,13 @@ public void testFromUtcTimestampCornerCases() row("2128-06-11 01:53:20.001")); // check result on Hive - if (isObsoleteFromUtcTimestampSemantics()) { - // For older hive version we expect different results on Hive side; as from_utc_timestamp semantics changed over time. - // Currently view transformation logic always follows new semantics. - // Leaving Hive assertions as documentation. - assertThat(onHive().executeQuery("SELECT * FROM test_from_utc_timestamp_corner_cases_view")) - .containsOnly( - row("1811-07-23 12:51:39.999"), // ??? - row("1938-04-24 19:43:19.999"), - row("1969-12-31 21:29:59.999"), - row("1969-12-31 21:30:00.001"), - row("2128-06-11 07:38:20.001")); - } - else { - assertThat(onHive().executeQuery("SELECT * FROM test_from_utc_timestamp_corner_cases_view")) - .containsOnly( - row("1811-07-23 07:13:41.999"), - row("1938-04-24 14:13:19.999"), - row("1969-12-31 15:59:59.999"), - row("1969-12-31 16:00:00.001"), - row("2128-06-11 01:53:20.001")); - } - } - - private boolean isObsoleteFromUtcTimestampSemantics() - { - // It appears from_utc_timestamp semantics in Hive changes some time on the way. The guess is that it happened - // together with change of timestamp semantics at version 3.1. - return getHiveVersionMajor() < 3 || (getHiveVersionMajor() == 3 && getHiveVersionMinor() < 1); + assertThat(onHive().executeQuery("SELECT * FROM test_from_utc_timestamp_corner_cases_view")) + .containsOnly( + row("1811-07-23 07:13:41.999"), + row("1938-04-24 14:13:19.999"), + row("1969-12-31 15:59:59.999"), + row("1969-12-31 16:00:00.001"), + row("2128-06-11 01:53:20.001")); } @Test(groups = HIVE_VIEWS) @@ -526,17 +464,7 @@ public void testCastTimestampAsDecimal() onHive().executeQuery("CREATE VIEW cast_timestamp_as_decimal_view AS SELECT CAST(a_timestamp as DECIMAL(10,0)) a_cast_timestamp FROM cast_timestamp_as_decimal"); String testQuery = "SELECT * FROM cast_timestamp_as_decimal_view"; - if (getHiveVersionMajor() > 3 || (getHiveVersionMajor() == 3 && getHiveVersionMinor() >= 1)) { - assertViewQuery( - testQuery, - queryAssert -> queryAssert.containsOnly(row(new BigDecimal("631282394")))); - } - else { - // For Hive versions older than 3.1 semantics of cast timestamp to decimal is different and it takes into account timezone Hive VM uses. - // We cannot replicate the behaviour in Trino, hence test only documents different expected results. - assertThat(onTrino().executeQuery(testQuery)).containsOnly(row(new BigDecimal("631282394"))); - assertThat(onHive().executeQuery(testQuery)).containsOnly(row(new BigDecimal("631261694"))); - } + assertViewQuery(testQuery, queryAssert -> queryAssert.containsOnly(row(new BigDecimal("631282394")))); onHive().executeQuery("DROP VIEW cast_timestamp_as_decimal_view"); onHive().executeQuery("DROP TABLE cast_timestamp_as_decimal"); diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveViewsLegacy.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveViewsLegacy.java index 5aa21f74550b4..4ca69c9e76ed9 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveViewsLegacy.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveViewsLegacy.java @@ -72,12 +72,10 @@ public void testHiveViewInInformationSchema() onTrino().executeQuery("CREATE TABLE test_schema.trino_table(a int)"); onTrino().executeQuery("CREATE VIEW test_schema.trino_test_view AS SELECT * FROM nation"); - boolean hiveWithTableNamesByType = getHiveVersionMajor() >= 3 || - (getHiveVersionMajor() == 2 && getHiveVersionMinor() >= 3); assertThat(onTrino().executeQuery("SELECT * FROM information_schema.tables WHERE table_schema = 'test_schema'")).containsOnly( row("hive", "test_schema", "trino_table", "BASE TABLE"), row("hive", "test_schema", "hive_table", "BASE TABLE"), - row("hive", "test_schema", "hive_test_view", hiveWithTableNamesByType ? "VIEW" : "BASE TABLE"), + row("hive", "test_schema", "hive_test_view", "VIEW"), row("hive", "test_schema", "trino_test_view", "VIEW")); assertThat(onTrino().executeQuery("SELECT view_definition FROM information_schema.views WHERE table_schema = 'test_schema' and table_name = 'hive_test_view'")).containsOnly( diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestReadUniontype.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestReadUniontype.java deleted file mode 100644 index c061c57ca798e..0000000000000 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestReadUniontype.java +++ /dev/null @@ -1,519 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.tests.product.hive; - -import io.trino.jdbc.Row; -import io.trino.tempto.AfterMethodWithContext; -import io.trino.tempto.BeforeMethodWithContext; -import io.trino.tempto.query.QueryResult; -import io.trino.testng.services.Flaky; -import org.testng.SkipException; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.Arrays; -import java.util.List; - -import static io.trino.testing.TestingNames.randomNameSuffix; -import static io.trino.tests.product.TestGroups.AVRO; -import static io.trino.tests.product.TestGroups.SMOKE; -import static io.trino.tests.product.utils.HadoopTestUtils.RETRYABLE_FAILURES_ISSUES; -import static io.trino.tests.product.utils.HadoopTestUtils.RETRYABLE_FAILURES_MATCH; -import static io.trino.tests.product.utils.QueryExecutors.onHive; -import static io.trino.tests.product.utils.QueryExecutors.onTrino; -import static java.lang.String.format; -import static org.assertj.core.api.Assertions.assertThat; -import static org.testng.Assert.assertEquals; - -public class TestReadUniontype - extends HiveProductTest -{ - private static final String TABLE_NAME = "test_read_uniontype"; - private static final String TABLE_NAME_SCHEMA_EVOLUTION = "test_read_uniontype_with_schema_evolution"; - - @BeforeMethodWithContext - @AfterMethodWithContext - public void cleanup() - { - onHive().executeQuery(format("DROP TABLE IF EXISTS %s", TABLE_NAME)); - onHive().executeQuery(format("DROP TABLE IF EXISTS %s", TABLE_NAME_SCHEMA_EVOLUTION)); - } - - @DataProvider(name = "storage_formats") - public static Object[][] storageFormats() - { - return new String[][] {{"ORC"}, {"AVRO"}}; - } - - @DataProvider(name = "union_dereference_test_cases") - public static Object[][] unionDereferenceTestCases() - { - String tableUnionDereference = "test_union_dereference" + randomNameSuffix(); - // Hive insertion for union type in AVRO format has bugs, so we test on different table schemas for AVRO than ORC. - return new Object[][] {{ - format( - "CREATE TABLE %s (unionLevel0 UNIONTYPE<" + - "INT, STRING>)" + - "STORED AS %s", - tableUnionDereference, - "AVRO"), - format( - "INSERT INTO TABLE %s " + - "SELECT create_union(0, 321, 'row1') " + - "UNION ALL " + - "SELECT create_union(1, 55, 'row2') ", - tableUnionDereference), - format("SELECT unionLevel0.field0 FROM %s WHERE unionLevel0.field0 IS NOT NULL", tableUnionDereference), - Arrays.asList(321), - format("SELECT unionLevel0.tag FROM %s", tableUnionDereference), - Arrays.asList((byte) 0, (byte) 1), - "DROP TABLE IF EXISTS " + tableUnionDereference}, - // there is an internal issue in Hive 1.2: - // unionLevel1 is declared as unionType, but has to be inserted by create_union(tagId, Int, String) - { - format( - "CREATE TABLE %s (unionLevel0 UNIONTYPE>>, intLevel0 INT )" + - "STORED AS %s", - tableUnionDereference, - "AVRO"), - format( - "INSERT INTO TABLE %s " + - "SELECT create_union(2, 321, 'row1', named_struct('intLevel1', 1, 'stringLevel1', 'structval', 'unionLevel1', create_union(0, 5, 'testString'))), 8 " + - "UNION ALL " + - "SELECT create_union(2, 321, 'row1', named_struct('intLevel1', 1, 'stringLevel1', 'structval', 'unionLevel1', create_union(1, 5, 'testString'))), 8 ", - tableUnionDereference), - format("SELECT unionLevel0.field2.unionLevel1.field1 FROM %s WHERE unionLevel0.field2.unionLevel1.field1 IS NOT NULL", tableUnionDereference), - Arrays.asList(5), - format("SELECT unionLevel0.field2.unionLevel1.tag FROM %s", tableUnionDereference), - Arrays.asList((byte) 0, (byte) 1), - "DROP TABLE IF EXISTS " + tableUnionDereference}, - { - format( - "CREATE TABLE %s (unionLevel0 UNIONTYPE<" + - "STRUCT>>)" + - "STORED AS %s", - tableUnionDereference, - "ORC"), - format( - "INSERT INTO TABLE %s " + - "SELECT create_union(0, named_struct('unionLevel1', create_union(0, 'testString1', 23))) " + - "UNION ALL " + - "SELECT create_union(0, named_struct('unionLevel1', create_union(1, 'testString2', 45))) ", - tableUnionDereference), - format("SELECT unionLevel0.field0.unionLevel1.field0 FROM %s WHERE unionLevel0.field0.unionLevel1.field0 IS NOT NULL", tableUnionDereference), - Arrays.asList("testString1"), - format("SELECT unionLevel0.field0.unionLevel1.tag FROM %s", tableUnionDereference), - Arrays.asList((byte) 0, (byte) 1), - "DROP TABLE IF EXISTS " + tableUnionDereference}, - { - format( - "CREATE TABLE %s (unionLevel0 UNIONTYPE>>, intLevel0 INT )" + - "STORED AS %s", - tableUnionDereference, - "ORC"), - format( - "INSERT INTO TABLE %s " + - "SELECT create_union(2, 321, 'row1', named_struct('intLevel1', 1, 'stringLevel1', 'structval', 'unionLevel1', create_union(0, 'testString', 5))), 8 " + - "UNION ALL " + - "SELECT create_union(2, 321, 'row1', named_struct('intLevel1', 1, 'stringLevel1', 'structval', 'unionLevel1', create_union(1, 'testString', 5))), 8 ", - tableUnionDereference), - format("SELECT unionLevel0.field2.unionLevel1.field0 FROM %s WHERE unionLevel0.field2.unionLevel1.field0 IS NOT NULL", tableUnionDereference), - Arrays.asList("testString"), - format("SELECT unionLevel0.field2.unionLevel1.tag FROM %s", tableUnionDereference), - Arrays.asList((byte) 0, (byte) 1), - "DROP TABLE IF EXISTS " + tableUnionDereference}}; - } - - @Test(dataProvider = "storage_formats", groups = {SMOKE, AVRO}) - @Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH) - public void testReadUniontype(String storageFormat) - { - // According to testing results, the Hive INSERT queries here only work in Hive 1.2 - if (getHiveVersionMajor() != 1 || getHiveVersionMinor() != 2) { - throw new SkipException("This test can only be run with Hive 1.2 (default config)"); - } - - onHive().executeQuery(format( - "CREATE TABLE %s (id INT,foo UNIONTYPE<" + - "INT," + - "DOUBLE," + - "ARRAY>)" + - "STORED AS %s", - TABLE_NAME, - storageFormat)); - - // Generate a file with rows: - // 0, {0: 36} - // 1, {1: 7.2} - // 2, {2: ['foo', 'bar']} - // 3, {1: 10.8} - // 4, {0: 144} - // 5, {2: ['hello'] - onHive().executeQuery(format( - "INSERT INTO TABLE %s " + - "SELECT 0, create_union(0, CAST(36 AS INT), CAST(NULL AS DOUBLE), ARRAY('foo','bar')) " + - "UNION ALL " + - "SELECT 1, create_union(1, CAST(NULL AS INT), CAST(7.2 AS DOUBLE), ARRAY('foo','bar')) " + - "UNION ALL " + - "SELECT 2, create_union(2, CAST(NULL AS INT), CAST(NULL AS DOUBLE), ARRAY('foo','bar')) " + - "UNION ALL " + - "SELECT 3, create_union(1, CAST(NULL AS INT), CAST(10.8 AS DOUBLE), ARRAY('foo','bar')) " + - "UNION ALL " + - "SELECT 4, create_union(0, CAST(144 AS INT), CAST(NULL AS DOUBLE), ARRAY('foo','bar')) " + - "UNION ALL " + - "SELECT 5, create_union(2, CAST(NULL AS INT), CAST(NULL AS DOUBLE), ARRAY('hello', 'world'))", - TABLE_NAME)); - // Generate a file with rows: - // 6, {0: 180} - // 7, {1: 21.6} - // 8, {0: 252} - onHive().executeQuery(format( - "INSERT INTO TABLE %s " + - "SELECT 6, create_union(0, CAST(180 AS INT), CAST(NULL AS DOUBLE), ARRAY('foo','bar')) " + - "UNION ALL " + - "SELECT 7, create_union(1, CAST(NULL AS INT), CAST(21.6 AS DOUBLE), ARRAY('foo','bar')) " + - "UNION ALL " + - "SELECT 8, create_union(0, CAST(252 AS INT), CAST(NULL AS DOUBLE), ARRAY('foo','bar'))", - TABLE_NAME)); - QueryResult selectAllResult = onTrino().executeQuery(format("SELECT * FROM %s", TABLE_NAME)); - assertEquals(selectAllResult.rows().size(), 9); - for (List row : selectAllResult.rows()) { - int id = (Integer) row.get(0); - switch (id) { - case 0: - assertStructEquals(row.get(1), new Object[] {(byte) 0, 36, null, null}); - break; - case 1: - assertStructEquals(row.get(1), new Object[] {(byte) 1, null, 7.2D, null}); - break; - case 2: - assertStructEquals(row.get(1), new Object[] {(byte) 2, null, null, Arrays.asList("foo", "bar")}); - break; - case 3: - assertStructEquals(row.get(1), new Object[] {(byte) 1, null, 10.8D, null}); - break; - case 4: - assertStructEquals(row.get(1), new Object[] {(byte) 0, 144, null, null}); - break; - case 5: - assertStructEquals(row.get(1), new Object[] {(byte) 2, null, null, Arrays.asList("hello", "world")}); - break; - case 6: - assertStructEquals(row.get(1), new Object[] {(byte) 0, 180, null, null}); - break; - case 7: - assertStructEquals(row.get(1), new Object[] {(byte) 1, null, 21.6, null}); - break; - case 8: - assertStructEquals(row.get(1), new Object[] {(byte) 0, 252, null, null}); - break; - } - } - } - - @Test(dataProvider = "union_dereference_test_cases", groups = {SMOKE, AVRO}) - @Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH) - public void testReadUniontypeWithDereference(String createTableSql, String insertSql, String selectSql, List expectedResult, String selectTagSql, List expectedTagResult, String dropTableSql) - { - // According to testing results, the Hive INSERT queries here only work in Hive 1.2 - if (getHiveVersionMajor() != 1 || getHiveVersionMinor() != 2) { - throw new SkipException("This test can only be run with Hive 1.2 (default config)"); - } - - onHive().executeQuery(createTableSql); - onHive().executeQuery(insertSql); - - QueryResult result = onTrino().executeQuery(selectSql); - assertThat(result.column(1)).containsExactlyInAnyOrderElementsOf(expectedResult); - result = onTrino().executeQuery(selectTagSql); - assertThat(result.column(1)).containsExactlyInAnyOrderElementsOf(expectedTagResult); - - onTrino().executeQuery(dropTableSql); - } - - @Test(dataProvider = "storage_formats", groups = {SMOKE, AVRO}) - @Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH) - public void testUnionTypeSchemaEvolution(String storageFormat) - { - // According to testing results, the Hive INSERT queries here only work in Hive 1.2 - if (getHiveVersionMajor() != 1 || getHiveVersionMinor() != 2) { - throw new SkipException("This test can only be run with Hive 1.2 (default config)"); - } - - onHive().executeQuery(format( - "CREATE TABLE %s (" - + "c0 INT," - + "c1 UNIONTYPE<" - + " STRUCT, " - + " STRUCT>) " - + "PARTITIONED BY (c2 INT) " - + "STORED AS %s", - TABLE_NAME_SCHEMA_EVOLUTION, - storageFormat)); - switch (storageFormat) { - case "AVRO": - testAvroSchemaEvolution(); - break; - case "ORC": - testORCSchemaEvolution(); - break; - default: - throw new UnsupportedOperationException("Unsupported table format."); - } - } - - /** - * When reading AVRO file, Trino needs the schema information from Hive metastore to deserialize Avro files. - * Therefore, when an ALTER table was issued in which the hive metastore changed the schema into an incompatible format, - * from Union to Struct or from Struct to Union in this case, Trino could not read those Avro files using the modified Hive metastore schema. - * However, when reading ORC files, Trino does not need schema information from Hive metastore to deserialize ORC files. - * Therefore, it can read ORC files even after changing the schema. - */ - @Test(groups = SMOKE) - @Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH) - public void testORCUnionToStructSchemaEvolution() - { - // According to testing results, the Hive INSERT queries here only work in Hive 1.2 - if (getHiveVersionMajor() != 1 || getHiveVersionMinor() != 2) { - throw new SkipException("This test can only be run with Hive 1.2 (default config)"); - } - String tableReadUnionAsStruct = "test_read_union_as_struct_" + randomNameSuffix(); - - onHive().executeQuery("SET hive.exec.dynamic.partition.mode = nonstrict"); - onHive().executeQuery("SET hive.exec.dynamic.partition=true"); - - onHive().executeQuery(format( - "CREATE TABLE %s(" + - "c1 UNIONTYPE, STRUCT>) " + - "PARTITIONED BY (p INT) STORED AS %s", - tableReadUnionAsStruct, - "ORC")); - - onHive().executeQuery(format("INSERT INTO TABLE %s PARTITION(p) " + - "SELECT CREATE_UNION(1, NAMED_STRUCT('a', 'a1', 'b', 'b1'), NAMED_STRUCT('c', 'ignores', 'd', 'ignore')), 999 FROM (SELECT 1) t", - tableReadUnionAsStruct)); - - onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN c1 c1 " + - " STRUCT, field1:STRUCT>", - tableReadUnionAsStruct)); - - onHive().executeQuery(format("INSERT INTO TABLE %s PARTITION(p) " + - "SELECT NAMED_STRUCT('tag', 0, 'field0', NAMED_STRUCT('a', 'a11', 'b', 'b1b'), 'field1', NAMED_STRUCT('c', 'ignores', 'd', 'ignores')), 100 FROM (SELECT 1) t", - tableReadUnionAsStruct)); - // using dereference - QueryResult selectAllResult = onTrino().executeQuery(format("SELECT c1.field0 FROM hive.default.%s", tableReadUnionAsStruct)); - // the first insert didn't add value to field0, since the tag is 1 during inserting - assertThat(selectAllResult.column(1)).containsExactlyInAnyOrder(null, Row.builder().addField("a", "a11").addField("b", "b1b").build()); - } - - /** - * When reading AVRO file, Trino needs the schema information from Hive metastore to deserialize Avro files. - * Therefore, when an ALTER table was issued in which the hive metastore changed the schema into an incompatible format, - * from Union to Struct or from Struct to Union in this case, Trino could not read those Avro files using the modified Hive metastore schema. - * However, when reading ORC files, Trino does not need schema information from Hive metastore to deserialize ORC files. - * Therefore, it can read ORC files even after changing the schema. - */ - @Test(groups = SMOKE) - @Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH) - public void testORCStructToUnionSchemaEvolution() - { - // According to testing results, the Hive INSERT queries here only work in Hive 1.2 - if (getHiveVersionMajor() != 1 || getHiveVersionMinor() != 2) { - throw new SkipException("This test can only be run with Hive 1.2 (default config)"); - } - String tableReadStructAsUnion = "test_read_struct_as_union_" + randomNameSuffix(); - - onHive().executeQuery("SET hive.exec.dynamic.partition.mode = nonstrict"); - onHive().executeQuery("SET hive.exec.dynamic.partition=true"); - - onHive().executeQuery(format( - "CREATE TABLE %s(" + - "c1 STRUCT, field1:STRUCT>) " + - "PARTITIONED BY (p INT) STORED AS %s", - tableReadStructAsUnion, - "ORC")); - - onHive().executeQuery(format("INSERT INTO TABLE %s PARTITION(p) " + - "SELECT NAMED_STRUCT('tag', 0Y, 'field0', NAMED_STRUCT('a', 'a11', 'b', 'b1b'), 'field1', NAMED_STRUCT('c', 'ignores', 'd', 'ignores')), 100 FROM (SELECT 1) t", - tableReadStructAsUnion)); - - onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN c1 c1 " + - " UNIONTYPE, STRUCT>", - tableReadStructAsUnion)); - - onHive().executeQuery(format("INSERT INTO TABLE %s PARTITION(p) " + - "SELECT CREATE_UNION(1, NAMED_STRUCT('a', 'a1', 'b', 'b1'), NAMED_STRUCT('c', 'ignores', 'd', 'ignore')), 999 from (SELECT 1) t", - tableReadStructAsUnion)); - - // using dereference - QueryResult selectAllResult = onTrino().executeQuery(format("SELECT c1.field0 FROM hive.default.%s", tableReadStructAsUnion)); - // the second insert didn't add value to field0, since the tag is 1 during inserting - assertThat(selectAllResult.column(1)).containsExactlyInAnyOrder(null, Row.builder().addField("a", "a11").addField("b", "b1b").build()); - } - - @Test(groups = SMOKE) - @Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH) - public void testReadOrcUniontypeWithCheckpoint() - { - // According to testing results, the Hive INSERT queries here only work in Hive 1.2 - if (getHiveVersionMajor() != 1 || getHiveVersionMinor() != 2) { - throw new SkipException("This test can only be run with Hive 1.2 (default config)"); - } - - // Set the row group size to 1000 (the minimum value). - onHive().executeQuery(format( - "CREATE TABLE %s (id INT,foo UNIONTYPE<" + - "INT," + - "DOUBLE," + - "ARRAY>)" + - "STORED AS ORC TBLPROPERTIES (\"orc.row.index.stride\"=\"1000\")", - TABLE_NAME)); - - // Generate a file with 1100 rows, as the default row group size is 1000, reading 1100 rows will involve - // streaming checkpoint. - StringBuilder builder = new StringBuilder(); - for (int i = 0; i < 1100; i++) { - builder.append("SELECT 0, create_union(0, CAST(36 AS INT), CAST(NULL AS DOUBLE), ARRAY('foo','bar')) "); - if (i < 1099) { - builder.append("UNION ALL "); - } - } - onHive().executeQuery(format( - "INSERT INTO TABLE %s " + builder.toString(), TABLE_NAME)); - - QueryResult selectAllResult = onTrino().executeQuery(format("SELECT * FROM %s", TABLE_NAME)); - assertEquals(selectAllResult.rows().size(), 1100); - } - - private void testORCSchemaEvolution() - { - // Generate a file with rows: - // 0, {0: } - // 1, {1: } - onHive().executeQuery(format("INSERT INTO TABLE %s PARTITION (c2 = 5) " - + "SELECT 0, create_union(0, named_struct('a', 'a1', 'b', 'b1'), named_struct('c', 'ignore')) " - + "UNION ALL " - + "SELECT 1, create_union(1, named_struct('a', 'ignore', 'b', 'ignore'), named_struct('c', 'c1'))", - TABLE_NAME_SCHEMA_EVOLUTION)); - - // Add a coercible change inside union type column. - onHive().executeQuery(format("ALTER TABLE %S CHANGE COLUMN c1 c1 UNIONTYPE, STRUCT>", - TABLE_NAME_SCHEMA_EVOLUTION)); - - QueryResult selectAllResult = onTrino().executeQuery(format("SELECT c0, c1 FROM %s", TABLE_NAME_SCHEMA_EVOLUTION)); - assertEquals(selectAllResult.rows().size(), 2); - for (List row : selectAllResult.rows()) { - int id = (Integer) row.get(0); - switch (id) { - case 0: - Row rowValueFirst = rowBuilder().addField("a", "a1").addField("b", "b1").build(); - assertStructEquals(row.get(1), new Object[]{(byte) 0, rowValueFirst, null}); - break; - case 1: - Row rowValueSecond = rowBuilder().addField("c", "c1").addField("d", null).build(); - assertStructEquals(row.get(1), new Object[]{(byte) 1, null, rowValueSecond}); - break; - } - } - } - - private void testAvroSchemaEvolution() - { - /** - * The following insertion fails on avro. - * - * hive (default)> INSERT INTO TABLE u_username.test_ut_avro partition (c2 = 5) - * > SELECT 1, create_union(1, named_struct('a', 'ignore', 'b', 'ignore'), named_struct('c', 'c1')); - * - * Error: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing writable (null) - * at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:179) - * at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54) - * at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:459) - * at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) - * at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:177) - * at java.security.AccessController.doPrivileged(Native Method) - * at javax.security.auth.Subject.doAs(Subject.java:422) - * at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1893) - * at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:171) - * Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing writable (null) - * at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:505) - * at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:170) - * ... 8 more - * Caused by: java.lang.ArrayIndexOutOfBoundsException: 1 - * at org.apache.avro.generic.GenericData$Record.get(GenericData.java:135) - * at org.apache.avro.generic.GenericData.getField(GenericData.java:580) - * at org.apache.avro.generic.GenericData.validate(GenericData.java:373) - * at org.apache.avro.generic.GenericData.validate(GenericData.java:395) - * at org.apache.avro.generic.GenericData.validate(GenericData.java:373) - * at org.apache.hadoop.hive.serde2.avro.AvroSerializer.serialize(AvroSerializer.java:96) - * - * So we try coercion logic on the first struct field inside the union (i.e. only for struct) only. - * - */ - // Generate a file with rows: - // 0, {0: } - // 1, {0: } - onHive().executeQuery(format( - "INSERT INTO TABLE %s PARTITION (c2 = 5) " - + "SELECT 0, create_union(0, named_struct('a', 'a1', 'b', 'b1'), named_struct('c', 'ignore')) " - + "UNION ALL " - + "SELECT 1, create_union(0, named_struct('a', 'a2', 'b', 'b2'), named_struct('c', 'ignore'))", - TABLE_NAME_SCHEMA_EVOLUTION)); - - // Add a coercible change inside union type column. - onHive().executeQuery(format("ALTER TABLE %S CHANGE COLUMN c1 c1 UNIONTYPE, STRUCT>", TABLE_NAME_SCHEMA_EVOLUTION)); - - QueryResult selectAllResult = onTrino().executeQuery(format("SELECT c0, c1 FROM %s", TABLE_NAME_SCHEMA_EVOLUTION)); - assertEquals(selectAllResult.rows().size(), 2); - for (List row : selectAllResult.rows()) { - int id = (Integer) row.get(0); - switch (id) { - case 0: - Row rowValueFirst = rowBuilder() - .addField("a", "a1") - .addField("b", "b1") - .addField("d", null) - .build(); - assertStructEquals(row.get(1), new Object[] {(byte) 0, rowValueFirst, null}); - break; - case 1: - Row rowValueSecond = rowBuilder() - .addField("a", "a2") - .addField("b", "b2") - .addField("d", null) - .build(); - assertStructEquals(row.get(1), new Object[] {(byte) 0, rowValueSecond, null}); - break; - } - } - } - - // TODO use Row as expected too, and use tempto QueryAssert - private static void assertStructEquals(Object actual, Object[] expected) - { - assertThat(actual).isInstanceOf(Row.class); - Row actualRow = (Row) actual; - assertEquals(actualRow.getFields().size(), expected.length); - for (int i = 0; i < actualRow.getFields().size(); i++) { - assertEquals(actualRow.getFields().get(i).getValue(), expected[i]); - } - } - - private static io.trino.jdbc.Row.Builder rowBuilder() - { - return io.trino.jdbc.Row.builder(); - } -} diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestRoles.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestRoles.java index 439bd97f61a1f..b092cb6481f85 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestRoles.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestRoles.java @@ -22,7 +22,6 @@ import io.trino.tempto.BeforeMethodWithContext; import io.trino.tempto.query.QueryExecutor; import io.trino.tempto.query.QueryResult; -import org.testng.SkipException; import org.testng.annotations.Test; import java.util.List; @@ -110,35 +109,6 @@ public void testListRoles() assertThat(actual.rows()).containsOnly(expected.rows().toArray(new List[] {})); } - @Test(groups = {ROLES, AUTHORIZATION, PROFILE_SPECIFIC_TESTS}) - public void testListGrants() - { - if (getHiveVersionMajor() >= 3) { - throw new SkipException(""); // TODO (https://github.com/trinodb/trino/issues/1218) this currently fails on HDP 3 - } - - onTrino().executeQuery("SHOW GRANTS"); // must not fail - onTrino().executeQuery("SELECT * FROM information_schema.table_privileges"); // must not fail - - onTrino().executeQuery("CREATE TABLE test_list_grants(c int)"); - - assertThat(onTrino().executeQuery("SHOW GRANTS")) - .contains( - row(userName, "USER", userName, "USER", "hive", "default", "test_list_grants", "SELECT", "YES", null), - row(userName, "USER", userName, "USER", "hive", "default", "test_list_grants", "INSERT", "YES", null), - row(userName, "USER", userName, "USER", "hive", "default", "test_list_grants", "UPDATE", "YES", null), - row(userName, "USER", userName, "USER", "hive", "default", "test_list_grants", "DELETE", "YES", null)); - - assertThat(onTrino().executeQuery("SELECT * FROM information_schema.table_privileges")) - .contains( - row(userName, "USER", userName, "USER", "hive", "default", "test_list_grants", "SELECT", "YES", null), - row(userName, "USER", userName, "USER", "hive", "default", "test_list_grants", "INSERT", "YES", null), - row(userName, "USER", userName, "USER", "hive", "default", "test_list_grants", "UPDATE", "YES", null), - row(userName, "USER", userName, "USER", "hive", "default", "test_list_grants", "DELETE", "YES", null)); - - onTrino().executeQuery("DROP TABLE test_list_grants"); - } - @Test(groups = {ROLES, AUTHORIZATION, PROFILE_SPECIFIC_TESTS}) public void testCreateDuplicateRole() { diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/iceberg/TestIcebergSparkCompatibility.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/iceberg/TestIcebergSparkCompatibility.java index 43c77e287ed74..921bb4daf4204 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/iceberg/TestIcebergSparkCompatibility.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/iceberg/TestIcebergSparkCompatibility.java @@ -590,7 +590,7 @@ public void testSparkReadingCompositeTrinoData(StorageFormat storageFormat) onTrino().executeQuery("DROP TABLE " + trinoTableName); } - @Test(groups = {ICEBERG, PROFILE_SPECIFIC_TESTS, ICEBERG_REST, ICEBERG_JDBC, ICEBERG_NESSIE}, dataProvider = "storageFormatsWithSpecVersion") + @Test(groups = {ICEBERG, PROFILE_SPECIFIC_TESTS, ICEBERG_REST, ICEBERG_JDBC}, dataProvider = "storageFormatsWithSpecVersion") public void testTrinoReadingSparkIcebergTablePropertiesData(StorageFormat storageFormat, int specVersion) { String baseTableName = toLowerCase("test_trino_reading_spark_iceberg_table_properties_" + storageFormat); @@ -986,6 +986,24 @@ public void testCreateAndDropTableWithSameLocationFailsOnTrino(int specVersion) onTrino().executeQuery(format("DROP TABLE %s", trinoTableName(tableSameLocation2))); } + @Test(groups = {ICEBERG, PROFILE_SPECIFIC_TESTS, ICEBERG_REST, ICEBERG_JDBC, ICEBERG_NESSIE}) + public void testTrinoWritingDataAfterSpark() + { + String baseTableName = toLowerCase("test_trino_write_after_spark"); + String sparkTableName = sparkTableName(baseTableName); + String trinoTableName = trinoTableName(baseTableName); + + onSpark().executeQuery("CREATE TABLE " + sparkTableName + " (a INT) USING ICEBERG"); + onSpark().executeQuery("INSERT INTO " + sparkTableName + " VALUES 1"); + + onTrino().executeQuery("INSERT INTO " + trinoTableName + " VALUES 2"); + + List expected = ImmutableList.of(row(1), row(2)); + assertThat(onTrino().executeQuery("SELECT * FROM " + trinoTableName)).containsOnly(expected); + assertThat(onSpark().executeQuery("SELECT * FROM " + sparkTableName)).containsOnly(expected); + onSpark().executeQuery("DROP TABLE " + sparkTableName); + } + @Test(groups = {ICEBERG, ICEBERG_JDBC, PROFILE_SPECIFIC_TESTS, ICEBERG_NESSIE}, dataProvider = "storageFormatsWithSpecVersion") public void testTrinoWritingDataWithObjectStorageLocationProvider(StorageFormat storageFormat, int specVersion) { diff --git a/testing/trino-server-dev/etc/config.properties b/testing/trino-server-dev/etc/config.properties index b786657e8d2b7..67c8b8bcce95a 100644 --- a/testing/trino-server-dev/etc/config.properties +++ b/testing/trino-server-dev/etc/config.properties @@ -37,7 +37,7 @@ plugin.bundles=\ ../../plugin/trino-memory/pom.xml,\ ../../plugin/trino-jmx/pom.xml,\ ../../plugin/trino-raptor-legacy/pom.xml,\ - ../../plugin/trino-hive-hadoop2/pom.xml,\ + ../../plugin/trino-hive/pom.xml,\ ../../plugin/trino-hudi/pom.xml,\ ../../plugin/trino-example-http/pom.xml,\ ../../plugin/trino-kafka/pom.xml, \ diff --git a/testing/trino-server-dev/pom.xml b/testing/trino-server-dev/pom.xml index ac80013894a44..a85cd990c12fa 100644 --- a/testing/trino-server-dev/pom.xml +++ b/testing/trino-server-dev/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/testing/trino-test-jdbc-compatibility-old-driver/pom.xml b/testing/trino-test-jdbc-compatibility-old-driver/pom.xml index 999d56505a128..9a81500a37ff5 100644 --- a/testing/trino-test-jdbc-compatibility-old-driver/pom.xml +++ b/testing/trino-test-jdbc-compatibility-old-driver/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -15,7 +15,7 @@ ${project.parent.basedir} - 435-SNAPSHOT + 436-SNAPSHOT diff --git a/testing/trino-test-jdbc-compatibility-old-server/pom.xml b/testing/trino-test-jdbc-compatibility-old-server/pom.xml index c8c837e69e739..49c596b8f2858 100644 --- a/testing/trino-test-jdbc-compatibility-old-server/pom.xml +++ b/testing/trino-test-jdbc-compatibility-old-server/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/testing/trino-test-jdbc-compatibility-old-server/src/test/java/io/trino/TestJdbcResultSetCompatibilityOldServer.java b/testing/trino-test-jdbc-compatibility-old-server/src/test/java/io/trino/TestJdbcResultSetCompatibilityOldServer.java index 3adc54e4ced0b..c4c5047550de0 100644 --- a/testing/trino-test-jdbc-compatibility-old-server/src/test/java/io/trino/TestJdbcResultSetCompatibilityOldServer.java +++ b/testing/trino-test-jdbc-compatibility-old-server/src/test/java/io/trino/TestJdbcResultSetCompatibilityOldServer.java @@ -139,12 +139,6 @@ protected Connection createConnection() return DriverManager.getConnection(trinoContainer.getJdbcUrl(), "test", null); } - @Override - protected int getTestedServerVersion() - { - return parseInt(getTestedTrinoVersion()); - } - @Override public String toString() { diff --git a/testing/trino-testing-containers/pom.xml b/testing/trino-testing-containers/pom.xml index ff83b97339de9..76607d3769197 100644 --- a/testing/trino-testing-containers/pom.xml +++ b/testing/trino-testing-containers/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -61,6 +61,12 @@ trino-testing-services + + org.junit.platform + junit-platform-launcher + true + + org.rnorth.duct-tape duct-tape diff --git a/testing/trino-testing-containers/src/main/java/io/trino/testing/containers/junit/ReportLeakedContainers.java b/testing/trino-testing-containers/src/main/java/io/trino/testing/containers/junit/ReportLeakedContainers.java new file mode 100644 index 0000000000000..e41a54bea4ecc --- /dev/null +++ b/testing/trino-testing-containers/src/main/java/io/trino/testing/containers/junit/ReportLeakedContainers.java @@ -0,0 +1,87 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.testing.containers.junit; + +import com.github.dockerjava.api.DockerClient; +import com.github.dockerjava.api.model.Container; +import io.airlift.log.Logger; +import org.junit.platform.launcher.TestExecutionListener; +import org.junit.platform.launcher.TestPlan; +import org.testcontainers.DockerClientFactory; + +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static com.google.common.base.MoreObjects.toStringHelper; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static java.lang.Boolean.getBoolean; +import static java.util.Objects.requireNonNull; +import static java.util.stream.Collectors.joining; + +public final class ReportLeakedContainers +{ + private ReportLeakedContainers() {} + + private static final Logger log = Logger.get(ReportLeakedContainers.class); + private static final boolean DISABLED = getBoolean("ReportLeakedContainers.disabled"); + + private static final Set ignoredIds = Collections.synchronizedSet(new HashSet<>()); + + public static void ignoreContainerId(String containerId) + { + ignoredIds.add(requireNonNull(containerId, "containerId is null")); + } + + // Separate class so that ReportLeakedContainers.ignoreContainerId can be called without pulling junit platform onto classpath + public static class Listener + implements TestExecutionListener + { + @Override + public void testPlanExecutionFinished(TestPlan testPlan) + { + if (DISABLED) { + log.info("ReportLeakedContainers disabled"); + return; + } + log.info("Checking for leaked containers"); + + @SuppressWarnings("resource") // Throws when close is attempted, as this is a global instance. + DockerClient dockerClient = DockerClientFactory.lazyClient(); + + List containers = dockerClient.listContainersCmd() + .withLabelFilter(Map.of(DockerClientFactory.TESTCONTAINERS_SESSION_ID_LABEL, DockerClientFactory.SESSION_ID)) + .exec() + .stream() + .filter(container -> !ignoredIds.contains(container.getId())) + .collect(toImmutableList()); + + if (!containers.isEmpty()) { + log.error("Leaked containers: %s", containers.stream() + .map(container -> toStringHelper("container") + .add("id", container.getId()) + .add("image", container.getImage()) + .add("imageId", container.getImageId()) + .toString()) + .collect(joining(", ", "[", "]"))); + + // JUnit does not fail on a listener exception. + System.err.println("JVM will be terminated"); + System.exit(1); + } + } + } +} diff --git a/testing/trino-testing-containers/src/main/resources/META-INF/services/org.junit.platform.launcher.TestExecutionListener b/testing/trino-testing-containers/src/main/resources/META-INF/services/org.junit.platform.launcher.TestExecutionListener new file mode 100644 index 0000000000000..c80b71364750e --- /dev/null +++ b/testing/trino-testing-containers/src/main/resources/META-INF/services/org.junit.platform.launcher.TestExecutionListener @@ -0,0 +1 @@ +io.trino.testing.containers.junit.ReportLeakedContainers$Listener diff --git a/testing/trino-testing-kafka/pom.xml b/testing/trino-testing-kafka/pom.xml index ccc9d5c586208..0699206118605 100644 --- a/testing/trino-testing-kafka/pom.xml +++ b/testing/trino-testing-kafka/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/testing/trino-testing-resources/pom.xml b/testing/trino-testing-resources/pom.xml index 307463e255df4..1c338f235e44a 100644 --- a/testing/trino-testing-resources/pom.xml +++ b/testing/trino-testing-resources/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/testing/trino-testing-services/pom.xml b/testing/trino-testing-services/pom.xml index 0a29fa6069c80..de395577465e0 100644 --- a/testing/trino-testing-services/pom.xml +++ b/testing/trino-testing-services/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml diff --git a/testing/trino-testing/pom.xml b/testing/trino-testing/pom.xml index 0e3bba2867006..42567f7441ff3 100644 --- a/testing/trino-testing/pom.xml +++ b/testing/trino-testing/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -179,11 +179,6 @@ junit-jupiter-api - - org.testng - testng - - org.jetbrains annotations @@ -220,26 +215,4 @@ test - - - - - org.apache.maven.plugins - maven-surefire-plugin - - - - org.apache.maven.surefire - surefire-junit-platform - ${dep.plugin.surefire.version} - - - org.apache.maven.surefire - surefire-testng - ${dep.plugin.surefire.version} - - - - - diff --git a/testing/trino-testing/src/main/java/io/trino/testing/AbstractTestAggregations.java b/testing/trino-testing/src/main/java/io/trino/testing/AbstractTestAggregations.java index c1ca47508ad10..6f5c7667d97b7 100644 --- a/testing/trino-testing/src/main/java/io/trino/testing/AbstractTestAggregations.java +++ b/testing/trino-testing/src/main/java/io/trino/testing/AbstractTestAggregations.java @@ -14,21 +14,35 @@ package io.trino.testing; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; import io.trino.Session; import io.trino.spi.type.TimeZoneKey; +import io.trino.tpch.TpchTable; import org.intellij.lang.annotations.Language; import org.junit.jupiter.api.Test; import java.util.List; +import java.util.Set; import static io.trino.SystemSessionProperties.MARK_DISTINCT_STRATEGY; import static io.trino.testing.MaterializedResult.resultBuilder; import static io.trino.testing.QueryAssertions.assertEqualsIgnoreOrder; +import static io.trino.tpch.TpchTable.LINE_ITEM; +import static io.trino.tpch.TpchTable.NATION; +import static io.trino.tpch.TpchTable.ORDERS; +import static io.trino.tpch.TpchTable.REGION; import static org.assertj.core.api.Assertions.assertThat; public abstract class AbstractTestAggregations extends AbstractTestQueryFramework { + protected static final Set> REQUIRED_TPCH_TABLES = ImmutableSet.>builder() + .add(LINE_ITEM) + .add(NATION) + .add(ORDERS) + .add(REGION) + .build(); + @Test public void testCountBoolean() { diff --git a/testing/trino-testing/src/main/java/io/trino/testing/AbstractTestEngineOnlyQueries.java b/testing/trino-testing/src/main/java/io/trino/testing/AbstractTestEngineOnlyQueries.java index 7fdd84268ac78..734cd082242f2 100644 --- a/testing/trino-testing/src/main/java/io/trino/testing/AbstractTestEngineOnlyQueries.java +++ b/testing/trino-testing/src/main/java/io/trino/testing/AbstractTestEngineOnlyQueries.java @@ -6642,6 +6642,15 @@ SELECT my_pow(2, 8) """)) .matches("VALUES 256"); + // function with dereference + assertThat(query(""" + WITH FUNCTION get(input row(varchar)) + RETURNS varchar + RETURN input[1] + SELECT get(ROW('abc')) + """)) + .matches("VALUES VARCHAR 'abc'"); + // validations for inline functions assertQueryFails("WITH FUNCTION a.b() RETURNS int RETURN 42 SELECT a.b()", "line 1:6: Inline function names cannot be qualified: a.b"); @@ -6694,6 +6703,22 @@ SELECT a(10) .hasMessage("line 3:8: Recursive language functions are not supported: a(integer):integer"); } + // ensure that JSON_TABLE runs properly in distributed mode (i.e., serialization of handles works correctly, etc) + @Test + public void testJsonTable() + { + assertThat(query(""" + SELECT first, last + FROM (SELECT '{"a" : [1, 2, 3], "b" : [4, 5, 6]}') t(json_col), JSON_TABLE( + json_col, + 'lax $.a' + COLUMNS( + first bigint PATH 'lax $[0]', + last bigint PATH 'lax $[last]')) + """)) + .matches("VALUES (BIGINT '1', BIGINT '3')"); + } + private static ZonedDateTime zonedDateTime(String value) { return ZONED_DATE_TIME_FORMAT.parse(value, ZonedDateTime::from); diff --git a/testing/trino-testing/src/main/java/io/trino/testing/AbstractTestQueryFramework.java b/testing/trino-testing/src/main/java/io/trino/testing/AbstractTestQueryFramework.java index 00c67b1dca148..e4fb18e592aaf 100644 --- a/testing/trino-testing/src/main/java/io/trino/testing/AbstractTestQueryFramework.java +++ b/testing/trino-testing/src/main/java/io/trino/testing/AbstractTestQueryFramework.java @@ -53,7 +53,6 @@ import io.trino.sql.tree.ExplainType; import io.trino.testing.TestingAccessControlManager.TestingPrivilege; import io.trino.testng.services.ReportBadTestAnnotations; -import io.trino.transaction.TransactionBuilder; import io.trino.util.AutoCloseableCloser; import org.assertj.core.api.AssertProvider; import org.intellij.lang.annotations.Language; @@ -61,8 +60,6 @@ import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.parallel.Execution; -import org.testng.annotations.AfterClass; -import org.testng.annotations.BeforeClass; import java.util.List; import java.util.Map; @@ -82,8 +79,8 @@ import static io.trino.execution.querystats.PlanOptimizersStatsCollector.createPlanOptimizersStatsCollector; import static io.trino.sql.SqlFormatter.formatSql; import static io.trino.sql.planner.OptimizerConfig.JoinReorderingStrategy; +import static io.trino.testing.TransactionBuilder.transaction; import static io.trino.testing.assertions.Assert.assertEventually; -import static io.trino.transaction.TransactionBuilder.transaction; import static java.lang.String.format; import static java.util.Collections.emptyList; import static java.util.concurrent.TimeUnit.SECONDS; @@ -104,7 +101,6 @@ public abstract class AbstractTestQueryFramework private H2QueryRunner h2QueryRunner; private io.trino.sql.query.QueryAssertions queryAssertions; - @BeforeClass @BeforeAll public void init() throws Exception @@ -122,7 +118,6 @@ protected abstract QueryRunner createQueryRunner() throws Exception; @AfterAll - @AfterClass(alwaysRun = true) public final void close() throws Exception { diff --git a/testing/trino-testing/src/main/java/io/trino/testing/AbstractTestingTrinoClient.java b/testing/trino-testing/src/main/java/io/trino/testing/AbstractTestingTrinoClient.java index 806ba333ecb0c..f2a9cb46267a6 100644 --- a/testing/trino-testing/src/main/java/io/trino/testing/AbstractTestingTrinoClient.java +++ b/testing/trino-testing/src/main/java/io/trino/testing/AbstractTestingTrinoClient.java @@ -48,7 +48,7 @@ import static io.trino.spi.session.ResourceEstimates.EXECUTION_TIME; import static io.trino.spi.session.ResourceEstimates.PEAK_MEMORY; import static io.trino.testing.TestingStatementClientFactory.DEFAULT_STATEMENT_FACTORY; -import static io.trino.transaction.TransactionBuilder.transaction; +import static io.trino.testing.TransactionBuilder.transaction; import static java.util.Objects.requireNonNull; public abstract class AbstractTestingTrinoClient diff --git a/testing/trino-testing/src/main/java/io/trino/testing/BaseConnectorTest.java b/testing/trino-testing/src/main/java/io/trino/testing/BaseConnectorTest.java index 214130af3770d..dd00a554460a7 100644 --- a/testing/trino-testing/src/main/java/io/trino/testing/BaseConnectorTest.java +++ b/testing/trino-testing/src/main/java/io/trino/testing/BaseConnectorTest.java @@ -1052,15 +1052,15 @@ public void testMaterializedView() .containsAll("VALUES '" + view.getObjectName() + "'"); // information_schema.tables without table_name filter so that ConnectorMetadata.listViews is exercised assertThat(query( - "SELECT table_name, table_type, trino_relation_type FROM information_schema.tables " + + "SELECT table_name, table_type FROM information_schema.tables " + "WHERE table_schema = '" + view.getSchemaName() + "'")) .skippingTypesCheck() - .containsAll("VALUES ('" + view.getObjectName() + "', 'BASE TABLE', 'MATERIALIZED VIEW')"); + .containsAll("VALUES ('" + view.getObjectName() + "', 'BASE TABLE')"); // information_schema.tables with table_name filter assertQuery( - "SELECT table_name, table_type, trino_relation_type FROM information_schema.tables " + + "SELECT table_name, table_type FROM information_schema.tables " + "WHERE table_schema = '" + view.getSchemaName() + "' and table_name = '" + view.getObjectName() + "'", - "VALUES ('" + view.getObjectName() + "', 'BASE TABLE', 'MATERIALIZED VIEW')"); + "VALUES ('" + view.getObjectName() + "', 'BASE TABLE')"); // system.jdbc.tables without filter assertThat(query("SELECT table_schem, table_name, table_type FROM system.jdbc.tables")) diff --git a/testing/trino-testing/src/main/java/io/trino/testing/DistributedQueryRunner.java b/testing/trino-testing/src/main/java/io/trino/testing/DistributedQueryRunner.java index e5a8b1b3084bd..2ca6f3a1fde93 100644 --- a/testing/trino-testing/src/main/java/io/trino/testing/DistributedQueryRunner.java +++ b/testing/trino-testing/src/main/java/io/trino/testing/DistributedQueryRunner.java @@ -22,6 +22,10 @@ import io.airlift.discovery.server.testing.TestingDiscoveryServer; import io.airlift.log.Logger; import io.airlift.log.Logging; +import io.opentelemetry.sdk.testing.exporter.InMemorySpanExporter; +import io.opentelemetry.sdk.trace.SpanProcessor; +import io.opentelemetry.sdk.trace.data.SpanData; +import io.opentelemetry.sdk.trace.export.SimpleSpanProcessor; import io.trino.Session; import io.trino.Session.SessionBuilder; import io.trino.connector.CoordinatorDynamicCatalogManager; @@ -55,7 +59,6 @@ import io.trino.sql.parser.SqlParser; import io.trino.sql.planner.NodePartitioningManager; import io.trino.sql.planner.Plan; -import io.trino.sql.tree.Statement; import io.trino.testing.containers.OpenTracingCollector; import io.trino.transaction.TransactionManager; import org.intellij.lang.annotations.Language; @@ -69,6 +72,7 @@ import java.util.Map; import java.util.Optional; import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -77,6 +81,7 @@ import static com.google.common.base.MoreObjects.firstNonNull; import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; import static com.google.common.base.Throwables.throwIfUnchecked; import static com.google.common.base.Verify.verify; import static com.google.inject.util.Modules.EMPTY_MODULE; @@ -95,11 +100,13 @@ public class DistributedQueryRunner { private static final Logger log = Logger.get(DistributedQueryRunner.class); private static final String ENVIRONMENT = "testing"; + private static final AtomicInteger unclosedInstances = new AtomicInteger(); private TestingDiscoveryServer discoveryServer; private TestingTrinoServer coordinator; private Optional backupCoordinator; private Runnable registerNewWorker; + private final InMemorySpanExporter spanExporter = InMemorySpanExporter.create(); private final List servers = new CopyOnWriteArrayList<>(); private final List functionBundles = new CopyOnWriteArrayList<>(ImmutableList.of(AbstractTestQueries.CUSTOM_FUNCTIONS)); private final List plugins = new CopyOnWriteArrayList<>(); @@ -145,7 +152,15 @@ private DistributedQueryRunner( closer.register(() -> extraCloseables.forEach(DistributedQueryRunner::closeUnchecked)); log.info("Created TestingDiscoveryServer in %s", nanosSince(discoveryStart)); - registerNewWorker = () -> createServer(false, extraProperties, environment, additionalModule, baseDataDir, Optional.empty(), Optional.of(ImmutableList.of()), ImmutableList.of()); + registerNewWorker = () -> createServer( + false, + extraProperties, + environment, + additionalModule, + baseDataDir, + Optional.empty(), + Optional.of(ImmutableList.of()), + ImmutableList.of()); int coordinatorCount = backupCoordinatorProperties.isEmpty() ? 1 : 2; checkArgument(nodeCount >= coordinatorCount, "nodeCount includes coordinator(s) count, so must be at least %s, got: %s", coordinatorCount, nodeCount); @@ -164,24 +179,28 @@ private DistributedQueryRunner( extraCoordinatorProperties.put("web-ui.user", "admin"); } - coordinator = createServer(true, extraCoordinatorProperties, environment, additionalModule, baseDataDir, systemAccessControlConfiguration, systemAccessControls, eventListeners); - if (backupCoordinatorProperties.isPresent()) { - Map extraBackupCoordinatorProperties = new HashMap<>(); - extraBackupCoordinatorProperties.putAll(extraProperties); - extraBackupCoordinatorProperties.putAll(backupCoordinatorProperties.get()); - backupCoordinator = Optional.of(createServer( - true, - extraBackupCoordinatorProperties, - environment, - additionalModule, - baseDataDir, - systemAccessControlConfiguration, - systemAccessControls, - eventListeners)); - } - else { - backupCoordinator = Optional.empty(); - } + coordinator = createServer( + true, + extraCoordinatorProperties, + environment, + additionalModule, + baseDataDir, + systemAccessControlConfiguration, + systemAccessControls, + eventListeners); + + backupCoordinator = backupCoordinatorProperties.map(properties -> createServer( + true, + ImmutableMap.builder() + .putAll(extraProperties) + .putAll(properties) + .buildOrThrow(), + environment, + additionalModule, + baseDataDir, + systemAccessControlConfiguration, + systemAccessControls, + eventListeners)); } catch (Exception e) { try { @@ -193,11 +212,15 @@ private DistributedQueryRunner( } // copy session using property manager in coordinator - defaultSession = defaultSession.toSessionRepresentation().toSession(coordinator.getSessionPropertyManager(), defaultSession.getIdentity().getExtraCredentials(), defaultSession.getExchangeEncryptionKey()); + defaultSession = defaultSession.toSessionRepresentation().toSession( + coordinator.getSessionPropertyManager(), + defaultSession.getIdentity().getExtraCredentials(), + defaultSession.getExchangeEncryptionKey()); + this.trinoClient = closer.register(testingTrinoClientFactory.create(coordinator, defaultSession)); ensureNodesGloballyVisible(); - log.info("Created DistributedQueryRunner in %s", nanosSince(start)); + log.info("Created DistributedQueryRunner in %s (unclosed instances = %s)", nanosSince(start), unclosedInstances.incrementAndGet()); } private TestingTrinoServer createServer( @@ -217,6 +240,7 @@ private TestingTrinoServer createServer( environment, additionalModule, baseDataDir, + Optional.of(SimpleSpanProcessor.create(spanExporter)), systemAccessControlConfiguration, systemAccessControls, eventListeners)); @@ -245,6 +269,7 @@ private static TestingTrinoServer createTestingTrinoServer( String environment, Module additionalModule, Optional baseDataDir, + Optional spanProcessor, Optional systemAccessControlConfiguration, Optional> systemAccessControls, List eventListeners) @@ -279,6 +304,7 @@ private static TestingTrinoServer createTestingTrinoServer( .setDiscoveryUri(discoveryUri) .setAdditionalModule(additionalModule) .setBaseDataDir(baseDataDir) + .setSpanProcessor(spanProcessor) .setSystemAccessControlConfiguration(systemAccessControlConfiguration) .setSystemAccessControls(systemAccessControls) .setEventListeners(eventListeners) @@ -312,6 +338,11 @@ public TestingTrinoClient getClient() return trinoClient; } + public List getSpans() + { + return spanExporter.getFinishedSpanItems(); + } + @Override public int getNodeCount() { @@ -483,13 +514,7 @@ public boolean tableExists(Session session, String table) @Override public MaterializedResult execute(@Language("SQL") String sql) { - lock.readLock().lock(); - try { - return trinoClient.execute(sql).getResult(); - } - finally { - lock.readLock().unlock(); - } + return execute(getDefaultSession(), sql); } @Override @@ -502,6 +527,7 @@ public MaterializedResultWithQueryId executeWithQueryId(Session session, @Langua { lock.readLock().lock(); try { + spanExporter.reset(); ResultWithQueryId result = trinoClient.execute(session, sql); return new MaterializedResultWithQueryId(result.getQueryId(), result.getResult()); } @@ -527,9 +553,12 @@ public Plan createPlan(Session session, String sql) // session must be in a transaction registered with the transaction manager in this query runner getTransactionManager().getTransactionInfo(session.getRequiredTransactionId()); - SqlParser sqlParser = coordinator.getInstance(Key.get(SqlParser.class)); - Statement statement = sqlParser.createStatement(sql); - return coordinator.getQueryExplainer().getLogicalPlan(session, statement, ImmutableList.of(), WarningCollector.NOOP, createPlanOptimizersStatsCollector()); + return coordinator.getQueryExplainer().getLogicalPlan( + session, + coordinator.getInstance(Key.get(SqlParser.class)).createStatement(sql), + ImmutableList.of(), + WarningCollector.NOOP, + createPlanOptimizersStatsCollector()); } public Plan getQueryPlan(QueryId queryId) @@ -593,6 +622,7 @@ public final void close() servers.clear(); functionBundles.clear(); plugins.clear(); + unclosedInstances.decrementAndGet(); trinoClient = null; closed = true; } @@ -627,6 +657,7 @@ private static void closeUnchecked(AutoCloseable closeable) public static class Builder> { private Session defaultSession; + private boolean withTracing; private int nodeCount = 3; private Map extraProperties = ImmutableMap.of(); private Map coordinatorProperties = ImmutableMap.of(); @@ -644,6 +675,8 @@ public static class Builder> protected Builder(Session defaultSession) { this.defaultSession = requireNonNull(defaultSession, "defaultSession is null"); + String tracingEnabled = firstNonNull(getenv("TESTS_TRACING_ENABLED"), "false"); + this.withTracing = parseBoolean(tracingEnabled) || tracingEnabled.equals("1"); } @CanIgnoreReturnValue @@ -799,19 +832,7 @@ public SELF enableBackupCoordinator() public SELF withTracing() { - OpenTracingCollector collector = new OpenTracingCollector(); - collector.start(); - extraCloseables = ImmutableList.of(collector); - this.addExtraProperties(Map.of("tracing.enabled", "true", "tracing.exporter.endpoint", collector.getExporterEndpoint().toString())); - this.setEventListener(new EventListener() - { - @Override - public void queryCompleted(QueryCompletedEvent queryCompletedEvent) - { - String queryId = queryCompletedEvent.getMetadata().getQueryId(); - log.info("TRACING: %s :: %s", queryId, collector.searchForQueryId(queryId)); - } - }); + this.withTracing = true; return self(); } @@ -824,9 +845,22 @@ protected SELF self() public DistributedQueryRunner build() throws Exception { - String tracingEnabled = firstNonNull(getenv("TESTS_TRACING_ENABLED"), "false"); - if (parseBoolean(tracingEnabled) || tracingEnabled.equals("1")) { - withTracing(); + if (withTracing) { + checkState(extraCloseables.isEmpty(), "extraCloseables already set"); + OpenTracingCollector collector = new OpenTracingCollector(); + collector.start(); + extraCloseables = ImmutableList.of(collector); + addExtraProperties(Map.of("tracing.enabled", "true", "tracing.exporter.endpoint", collector.getExporterEndpoint().toString())); + checkState(eventListeners.isEmpty(), "eventListeners already set"); + setEventListener(new EventListener() + { + @Override + public void queryCompleted(QueryCompletedEvent queryCompletedEvent) + { + String queryId = queryCompletedEvent.getMetadata().getQueryId(); + log.info("TRACING: %s :: %s", queryId, collector.searchForQueryId(queryId)); + } + }); } Optional systemAccessControlConfiguration = this.systemAccessControlConfiguration; diff --git a/testing/trino-testing/src/main/java/io/trino/testing/statistics/MetricComparator.java b/testing/trino-testing/src/main/java/io/trino/testing/statistics/MetricComparator.java index 40c4e59d63692..0cd60854a5f9d 100644 --- a/testing/trino-testing/src/main/java/io/trino/testing/statistics/MetricComparator.java +++ b/testing/trino-testing/src/main/java/io/trino/testing/statistics/MetricComparator.java @@ -28,7 +28,7 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.Iterables.getOnlyElement; -import static io.trino.transaction.TransactionBuilder.transaction; +import static io.trino.testing.TransactionBuilder.transaction; import static java.lang.String.format; import static java.util.stream.Collectors.joining; diff --git a/testing/trino-testing/src/main/java/io/trino/testing/statistics/StatsContext.java b/testing/trino-testing/src/main/java/io/trino/testing/statistics/StatsContext.java index bb8b3bb8c7e94..f86ed2fe9599b 100644 --- a/testing/trino-testing/src/main/java/io/trino/testing/statistics/StatsContext.java +++ b/testing/trino-testing/src/main/java/io/trino/testing/statistics/StatsContext.java @@ -36,7 +36,7 @@ public StatsContext(Map columnSymbols, TypeProvider types) public Symbol getSymbolForColumn(String columnName) { - checkArgument(columnSymbols.containsKey(columnName), "no symbol found for column '" + columnName + "'"); + checkArgument(columnSymbols.containsKey(columnName), "no symbol found for column '%s'", columnName); return columnSymbols.get(columnName); } diff --git a/testing/trino-tests/pom.xml b/testing/trino-tests/pom.xml index 3d58172bd50e0..98a777e62b10c 100644 --- a/testing/trino-tests/pom.xml +++ b/testing/trino-tests/pom.xml @@ -5,7 +5,7 @@ io.trino trino-root - 435-SNAPSHOT + 436-SNAPSHOT ../../pom.xml @@ -13,16 +13,6 @@ ${project.parent.basedir} - - - instances @@ -302,43 +292,21 @@ - org.openjdk.jmh - jmh-core + org.junit.jupiter + junit-jupiter-engine test org.openjdk.jmh - jmh-generator-annprocess + jmh-core test - org.testng - testng + org.openjdk.jmh + jmh-generator-annprocess test - - - - - org.apache.maven.plugins - maven-surefire-plugin - - - - - - - - - org.apache.maven.surefire - surefire-testng - ${dep.plugin.surefire.version} - - - - - diff --git a/testing/trino-tests/src/test/java/io/trino/connector/informationschema/TestInformationSchemaConnector.java b/testing/trino-tests/src/test/java/io/trino/connector/informationschema/TestInformationSchemaConnector.java index 18e23395d0c04..88236fbdf4786 100644 --- a/testing/trino-tests/src/test/java/io/trino/connector/informationschema/TestInformationSchemaConnector.java +++ b/testing/trino-tests/src/test/java/io/trino/connector/informationschema/TestInformationSchemaConnector.java @@ -28,6 +28,7 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.Timeout; +import org.junit.jupiter.api.parallel.Execution; import java.util.stream.IntStream; import java.util.stream.Stream; @@ -36,8 +37,10 @@ import static io.trino.testing.TestingSession.testSessionBuilder; import static java.util.stream.Collectors.joining; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; @TestInstance(PER_CLASS) +@Execution(SAME_THREAD) // countingMockConnector is shared mutable state public class TestInformationSchemaConnector extends AbstractTestQueryFramework { diff --git a/testing/trino-tests/src/test/java/io/trino/connector/system/runtime/TestKillQuery.java b/testing/trino-tests/src/test/java/io/trino/connector/system/runtime/TestKillQuery.java index 096a98f95f834..dae215d396f83 100644 --- a/testing/trino-tests/src/test/java/io/trino/connector/system/runtime/TestKillQuery.java +++ b/testing/trino-tests/src/test/java/io/trino/connector/system/runtime/TestKillQuery.java @@ -25,6 +25,7 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.Timeout; +import org.junit.jupiter.api.parallel.Execution; import java.util.Optional; import java.util.concurrent.ExecutionException; @@ -45,8 +46,10 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; @TestInstance(PER_CLASS) +@Execution(SAME_THREAD) // e.g. some tests methods modify AC configuration public class TestKillQuery extends AbstractTestQueryFramework { diff --git a/testing/trino-tests/src/test/java/io/trino/connector/system/runtime/TestSystemRuntimeConnector.java b/testing/trino-tests/src/test/java/io/trino/connector/system/runtime/TestSystemRuntimeConnector.java index a536672a36177..b8464b6100e17 100644 --- a/testing/trino-tests/src/test/java/io/trino/connector/system/runtime/TestSystemRuntimeConnector.java +++ b/testing/trino-tests/src/test/java/io/trino/connector/system/runtime/TestSystemRuntimeConnector.java @@ -29,9 +29,10 @@ import io.trino.testing.MaterializedResult; import io.trino.testing.MaterializedRow; import io.trino.testing.QueryRunner; -import org.testng.annotations.AfterClass; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; +import org.junit.jupiter.api.parallel.Execution; import java.time.ZonedDateTime; import java.util.List; @@ -52,15 +53,16 @@ import static java.lang.String.format; import static java.util.concurrent.TimeUnit.SECONDS; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; -@Test(singleThreaded = true) +@Execution(SAME_THREAD) public class TestSystemRuntimeConnector extends AbstractTestQueryFramework { private static final Function> DEFAULT_GET_COLUMNS = table -> ImmutableList.of(new ColumnMetadata("c", VARCHAR)); private static final AtomicLong counter = new AtomicLong(); - private static Function> getColumns = DEFAULT_GET_COLUMNS; + private static Function> getColumns; private final ExecutorService executor = Executors.newSingleThreadScheduledExecutor(threadsNamed(TestSystemRuntimeConnector.class.getSimpleName())); @@ -94,13 +96,7 @@ public Iterable getConnectorFactories() return queryRunner; } - @BeforeMethod - public void cleanup() - { - getColumns = DEFAULT_GET_COLUMNS; - } - - @AfterClass(alwaysRun = true) + @AfterAll public void tearDown() { executor.shutdownNow(); @@ -117,81 +113,88 @@ public void testRuntimeNodes() "('testversion', false, 'active')"); } - // Test is run multiple times because it is vulnerable to OS clock adjustment. See https://github.com/trinodb/trino/issues/5608 - @Test(invocationCount = 10, successPercentage = 80) + @Test public void testRuntimeQueriesTimestamps() { - ZonedDateTime timeBefore = ZonedDateTime.now(); - computeActual("SELECT 1"); - MaterializedResult result = computeActual("" + - "SELECT max(created), max(started), max(last_heartbeat), max(\"end\") " + - "FROM system.runtime.queries"); - ZonedDateTime timeAfter = ZonedDateTime.now(); + // Test is run multiple times because it is vulnerable to OS clock adjustment. See https://github.com/trinodb/trino/issues/5608 + run(10, 0.8, () -> { + ZonedDateTime timeBefore = ZonedDateTime.now(); + computeActual("SELECT 1"); + MaterializedResult result = computeActual("" + + "SELECT max(created), max(started), max(last_heartbeat), max(\"end\") " + + "FROM system.runtime.queries"); + ZonedDateTime timeAfter = ZonedDateTime.now(); - MaterializedRow row = Iterables.getOnlyElement(result.toTestTypes().getMaterializedRows()); - List fields = row.getFields(); - assertThat(fields).hasSize(4); - for (int i = 0; i < fields.size(); i++) { - Object value = fields.get(i); - assertThat((ZonedDateTime) value) - .as("value for field " + i) - .isNotNull() - .isAfterOrEqualTo(timeBefore) - .isBeforeOrEqualTo(timeAfter); - } + MaterializedRow row = Iterables.getOnlyElement(result.toTestTypes().getMaterializedRows()); + List fields = row.getFields(); + assertThat(fields).hasSize(4); + for (int i = 0; i < fields.size(); i++) { + Object value = fields.get(i); + assertThat((ZonedDateTime) value) + .as("value for field " + i) + .isNotNull() + .isAfterOrEqualTo(timeBefore) + .isBeforeOrEqualTo(timeAfter); + } + }); } - // Test is run multiple times because it is vulnerable to OS clock adjustment. See https://github.com/trinodb/trino/issues/5608 - @Test(invocationCount = 10, successPercentage = 80) + @Test public void testRuntimeTasksTimestamps() { - ZonedDateTime timeBefore = ZonedDateTime.now(); - computeActual("SELECT 1"); - MaterializedResult result = computeActual("" + - "SELECT max(created), max(start), max(last_heartbeat), max(\"end\") " + - "FROM system.runtime.tasks"); - ZonedDateTime timeAfter = ZonedDateTime.now(); + // Test is run multiple times because it is vulnerable to OS clock adjustment. See https://github.com/trinodb/trino/issues/5608 + run(10, 0.8, () -> { + ZonedDateTime timeBefore = ZonedDateTime.now(); + computeActual("SELECT 1"); + MaterializedResult result = computeActual("" + + "SELECT max(created), max(start), max(last_heartbeat), max(\"end\") " + + "FROM system.runtime.tasks"); + ZonedDateTime timeAfter = ZonedDateTime.now(); - MaterializedRow row = Iterables.getOnlyElement(result.toTestTypes().getMaterializedRows()); - List fields = row.getFields(); - assertThat(fields).hasSize(4); - for (int i = 0; i < fields.size(); i++) { - Object value = fields.get(i); - assertThat((ZonedDateTime) value) - .as("value for field " + i) - .isNotNull() - .isAfterOrEqualTo(timeBefore) - .isBeforeOrEqualTo(timeAfter); - } + MaterializedRow row = Iterables.getOnlyElement(result.toTestTypes().getMaterializedRows()); + List fields = row.getFields(); + assertThat(fields).hasSize(4); + for (int i = 0; i < fields.size(); i++) { + Object value = fields.get(i); + assertThat((ZonedDateTime) value) + .as("value for field " + i) + .isNotNull() + .isAfterOrEqualTo(timeBefore) + .isBeforeOrEqualTo(timeAfter); + } + }); } // Test is run multiple times because it is vulnerable to OS clock adjustment. See https://github.com/trinodb/trino/issues/5608 - @Test(invocationCount = 10, successPercentage = 80) + @Test public void testRuntimeTransactionsTimestamps() { - ZonedDateTime timeBefore = ZonedDateTime.now(); - computeActual("START TRANSACTION"); - MaterializedResult result = computeActual("" + - "SELECT max(create_time) " + - "FROM system.runtime.transactions"); - ZonedDateTime timeAfter = ZonedDateTime.now(); + run(10, 0.8, () -> { + ZonedDateTime timeBefore = ZonedDateTime.now(); + computeActual("START TRANSACTION"); + MaterializedResult result = computeActual("" + + "SELECT max(create_time) " + + "FROM system.runtime.transactions"); + ZonedDateTime timeAfter = ZonedDateTime.now(); - MaterializedRow row = Iterables.getOnlyElement(result.toTestTypes().getMaterializedRows()); - List fields = row.getFields(); - assertThat(fields).hasSize(1); - for (int i = 0; i < fields.size(); i++) { - Object value = fields.get(i); - assertThat((ZonedDateTime) value) - .as("value for field " + i) - .isNotNull() - .isAfterOrEqualTo(timeBefore) - .isBeforeOrEqualTo(timeAfter); - } + MaterializedRow row = Iterables.getOnlyElement(result.toTestTypes().getMaterializedRows()); + List fields = row.getFields(); + assertThat(fields).hasSize(1); + for (int i = 0; i < fields.size(); i++) { + Object value = fields.get(i); + assertThat((ZonedDateTime) value) + .as("value for field " + i) + .isNotNull() + .isAfterOrEqualTo(timeBefore) + .isBeforeOrEqualTo(timeAfter); + } + }); } @Test public void testFinishedQueryIsCaptured() { + getColumns = DEFAULT_GET_COLUMNS; String testQueryId = "test_query_id_" + counter.incrementAndGet(); getQueryRunner().execute(format("EXPLAIN SELECT 1 AS %s FROM test_table", testQueryId)); @@ -200,7 +203,8 @@ public void testFinishedQueryIsCaptured() "VALUES 'FINISHED'"); } - @Test(timeOut = 60_000) + @Test + @Timeout(60) public void testQueryDuringAnalysisIsCaptured() { SettableFuture> metadataFuture = SettableFuture.create(); @@ -236,7 +240,8 @@ public void testQueryDuringAnalysisIsCaptured() assertEventually(new Duration(5, SECONDS), () -> assertThat(queryFuture.isDone()).isTrue()); } - @Test(timeOut = 60_000) + @Test + @Timeout(60) public void testQueryKillingDuringAnalysis() { SettableFuture> metadataFuture = SettableFuture.create(); @@ -285,4 +290,23 @@ public void testTasksTable() getQueryRunner().execute("SELECT 1"); getQueryRunner().execute("SELECT * FROM system.runtime.tasks"); } + + private static void run(int repetitions, double successRate, Runnable test) + { + AssertionError lastError = null; + int failures = 0; + for (int iteration = 0; iteration < repetitions; iteration++) { + try { + test.run(); + } + catch (AssertionError e) { + failures++; + lastError = e; + } + } + + if (lastError != null && 1 - (failures * 1.0) / repetitions < successRate) { + throw lastError; + } + } } diff --git a/testing/trino-tests/src/test/java/io/trino/execution/AbstractTestCoordinatorDynamicFiltering.java b/testing/trino-tests/src/test/java/io/trino/execution/AbstractTestCoordinatorDynamicFiltering.java index 3ceec0e49c327..0c441452f3e5d 100644 --- a/testing/trino-tests/src/test/java/io/trino/execution/AbstractTestCoordinatorDynamicFiltering.java +++ b/testing/trino-tests/src/test/java/io/trino/execution/AbstractTestCoordinatorDynamicFiltering.java @@ -48,9 +48,11 @@ import io.trino.testing.TestingPageSinkProvider; import io.trino.testing.TestingTransactionHandle; import org.intellij.lang.annotations.Language; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.Timeout; +import org.junit.jupiter.api.parallel.Execution; import java.util.List; import java.util.Map; @@ -84,7 +86,11 @@ import static java.util.Objects.requireNonNull; import static java.util.concurrent.CompletableFuture.completedFuture; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; +import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; +@TestInstance(PER_CLASS) +@Execution(CONCURRENT) public abstract class AbstractTestCoordinatorDynamicFiltering extends AbstractTestQueryFramework { @@ -97,7 +103,7 @@ public abstract class AbstractTestCoordinatorDynamicFiltering private volatile Consumer> expectedCoordinatorDynamicFilterAssertion; private volatile Consumer> expectedTableScanDynamicFilterAssertion; - @BeforeClass + @BeforeAll public void setup() { // create lineitem table in test connector @@ -119,8 +125,16 @@ public void setup() protected abstract RetryPolicy getRetryPolicy(); - @Test(timeOut = 30_000, dataProvider = "testJoinDistributionType") - public void testJoinWithEmptyBuildSide(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) + @Test + @Timeout(30) + public void testJoinWithEmptyBuildSide() + { + testJoinWithEmptyBuildSide(BROADCAST, true); + testJoinWithEmptyBuildSide(PARTITIONED, true); + testJoinWithEmptyBuildSide(PARTITIONED, false); + } + + private void testJoinWithEmptyBuildSide(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) { assertQueryDynamicFilters( noJoinReordering(joinDistributionType, coordinatorDynamicFiltersDistribution), @@ -129,8 +143,16 @@ public void testJoinWithEmptyBuildSide(JoinDistributionType joinDistributionType TupleDomain.none()); } - @Test(timeOut = 30_000, dataProvider = "testJoinDistributionType") - public void testJoinWithLargeBuildSide(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) + @Test + @Timeout(30) + public void testJoinWithLargeBuildSide() + { + testJoinWithLargeBuildSide(BROADCAST, true); + testJoinWithLargeBuildSide(PARTITIONED, true); + testJoinWithLargeBuildSide(PARTITIONED, false); + } + + private void testJoinWithLargeBuildSide(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) { assertQueryDynamicFilters( noJoinReordering(joinDistributionType, coordinatorDynamicFiltersDistribution), @@ -139,8 +161,16 @@ public void testJoinWithLargeBuildSide(JoinDistributionType joinDistributionType TupleDomain.all()); } - @Test(timeOut = 30_000, dataProvider = "testJoinDistributionType") - public void testMultiColumnJoinWithDifferentCardinalitiesInBuildSide(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) + @Test + @Timeout(30) + public void testMultiColumnJoinWithDifferentCardinalitiesInBuildSide() + { + testMultiColumnJoinWithDifferentCardinalitiesInBuildSide(BROADCAST, true); + testMultiColumnJoinWithDifferentCardinalitiesInBuildSide(PARTITIONED, true); + testMultiColumnJoinWithDifferentCardinalitiesInBuildSide(PARTITIONED, false); + } + + private void testMultiColumnJoinWithDifferentCardinalitiesInBuildSide(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) { // orderkey has high cardinality, suppkey has low cardinality due to filter assertQueryDynamicFilters( @@ -154,8 +184,16 @@ public void testMultiColumnJoinWithDifferentCardinalitiesInBuildSide(JoinDistrib multipleValues(BIGINT, LongStream.rangeClosed(1L, 10L).boxed().collect(toImmutableList()))))); } - @Test(timeOut = 30_000, dataProvider = "testJoinDistributionType") - public void testJoinWithSelectiveBuildSide(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) + @Test + @Timeout(30) + public void testJoinWithSelectiveBuildSide() + { + testJoinWithSelectiveBuildSide(BROADCAST, true); + testJoinWithSelectiveBuildSide(PARTITIONED, true); + testJoinWithSelectiveBuildSide(PARTITIONED, false); + } + + private void testJoinWithSelectiveBuildSide(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) { assertQueryDynamicFilters( noJoinReordering(joinDistributionType, coordinatorDynamicFiltersDistribution), @@ -166,7 +204,8 @@ public void testJoinWithSelectiveBuildSide(JoinDistributionType joinDistribution singleValue(BIGINT, 1L)))); } - @Test(timeOut = 30_000) + @Test + @Timeout(30) public void testInequalityJoinWithSelectiveBuildSide() { assertQueryDynamicFilters( @@ -195,7 +234,8 @@ public void testInequalityJoinWithSelectiveBuildSide() Domain.create(ValueSet.ofRanges(Range.greaterThan(BIGINT, 1L)), false)))); } - @Test(timeOut = 30_000) + @Test + @Timeout(30) public void testIsNotDistinctFromJoinWithSelectiveBuildSide() { assertQueryDynamicFilters( @@ -218,7 +258,8 @@ public void testIsNotDistinctFromJoinWithSelectiveBuildSide() Domain.onlyNull(BIGINT)))); } - @Test(timeOut = 30_000) + @Test + @Timeout(30) public void testJoinWithImplicitCoercion() { // setup fact table with integer suppkey @@ -246,8 +287,16 @@ public void testJoinWithImplicitCoercion() multipleValues(createVarcharType(40), values)))); } - @Test(timeOut = 30_000, dataProvider = "testJoinDistributionType") - public void testJoinWithNonSelectiveBuildSide(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) + @Test + @Timeout(30) + public void testJoinWithNonSelectiveBuildSide() + { + testJoinWithNonSelectiveBuildSide(BROADCAST, true); + testJoinWithNonSelectiveBuildSide(PARTITIONED, true); + testJoinWithNonSelectiveBuildSide(PARTITIONED, false); + } + + protected void testJoinWithNonSelectiveBuildSide(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) { assertQueryDynamicFilters( noJoinReordering(joinDistributionType, coordinatorDynamicFiltersDistribution), @@ -258,8 +307,16 @@ public void testJoinWithNonSelectiveBuildSide(JoinDistributionType joinDistribut Domain.create(ValueSet.ofRanges(range(BIGINT, 1L, true, 100L, true)), false)))); } - @Test(timeOut = 30_000, dataProvider = "testJoinDistributionType") - public void testJoinWithMultipleDynamicFiltersOnProbe(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) + @Test + @Timeout(30) + public void testJoinWithMultipleDynamicFiltersOnProbe() + { + testJoinWithMultipleDynamicFiltersOnProbe(BROADCAST, true); + testJoinWithMultipleDynamicFiltersOnProbe(PARTITIONED, true); + testJoinWithMultipleDynamicFiltersOnProbe(PARTITIONED, false); + } + + private void testJoinWithMultipleDynamicFiltersOnProbe(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) { // supplier names Supplier#000000001 and Supplier#000000002 match suppkey 1 and 2 assertQueryDynamicFilters( @@ -274,7 +331,8 @@ public void testJoinWithMultipleDynamicFiltersOnProbe(JoinDistributionType joinD singleValue(BIGINT, 2L)))); } - @Test(timeOut = 30_000) + @Test + @Timeout(30) public void testRightJoinWithEmptyBuildSide() { assertQueryDynamicFilters( @@ -283,7 +341,8 @@ public void testRightJoinWithEmptyBuildSide() TupleDomain.none()); } - @Test(timeOut = 30_000) + @Test + @Timeout(30) public void testRightJoinWithNonSelectiveBuildSide() { assertQueryDynamicFilters( @@ -294,7 +353,8 @@ public void testRightJoinWithNonSelectiveBuildSide() Domain.create(ValueSet.ofRanges(range(BIGINT, 1L, true, 100L, true)), false)))); } - @Test(timeOut = 30_000) + @Test + @Timeout(30) public void testRightJoinWithSelectiveBuildSide() { assertQueryDynamicFilters( @@ -305,8 +365,16 @@ public void testRightJoinWithSelectiveBuildSide() singleValue(BIGINT, 1L)))); } - @Test(timeOut = 30_000, dataProvider = "testJoinDistributionType") - public void testSemiJoinWithEmptyBuildSide(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) + @Test + @Timeout(30) + public void testSemiJoinWithEmptyBuildSide() + { + testSemiJoinWithEmptyBuildSide(BROADCAST, true); + testSemiJoinWithEmptyBuildSide(PARTITIONED, true); + testSemiJoinWithEmptyBuildSide(PARTITIONED, false); + } + + private void testSemiJoinWithEmptyBuildSide(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) { assertQueryDynamicFilters( noJoinReordering(joinDistributionType, coordinatorDynamicFiltersDistribution), @@ -315,8 +383,16 @@ public void testSemiJoinWithEmptyBuildSide(JoinDistributionType joinDistribution TupleDomain.none()); } - @Test(timeOut = 30_000, dataProvider = "testJoinDistributionType") - public void testSemiJoinWithLargeBuildSide(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) + @Test + @Timeout(30) + public void testSemiJoinWithLargeBuildSide() + { + testSemiJoinWithLargeBuildSide(BROADCAST, true); + testSemiJoinWithLargeBuildSide(PARTITIONED, true); + testSemiJoinWithLargeBuildSide(PARTITIONED, false); + } + + private void testSemiJoinWithLargeBuildSide(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) { assertQueryDynamicFilters( noJoinReordering(joinDistributionType, coordinatorDynamicFiltersDistribution), @@ -325,8 +401,16 @@ public void testSemiJoinWithLargeBuildSide(JoinDistributionType joinDistribution TupleDomain.all()); } - @Test(timeOut = 30_000, dataProvider = "testJoinDistributionType") - public void testSemiJoinWithSelectiveBuildSide(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) + @Test + @Timeout(30) + public void testSemiJoinWithSelectiveBuildSide() + { + testSemiJoinWithSelectiveBuildSide(BROADCAST, true); + testSemiJoinWithSelectiveBuildSide(PARTITIONED, true); + testSemiJoinWithSelectiveBuildSide(PARTITIONED, false); + } + + private void testSemiJoinWithSelectiveBuildSide(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) { assertQueryDynamicFilters( noJoinReordering(joinDistributionType, coordinatorDynamicFiltersDistribution), @@ -337,8 +421,16 @@ public void testSemiJoinWithSelectiveBuildSide(JoinDistributionType joinDistribu singleValue(BIGINT, 1L)))); } - @Test(timeOut = 30_000, dataProvider = "testJoinDistributionType") - public void testSemiJoinWithNonSelectiveBuildSide(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) + @Test + @Timeout(30) + public void testSemiJoinWithNonSelectiveBuildSide() + { + testSemiJoinWithNonSelectiveBuildSide(BROADCAST, true); + testSemiJoinWithNonSelectiveBuildSide(PARTITIONED, true); + testSemiJoinWithNonSelectiveBuildSide(PARTITIONED, false); + } + + protected void testSemiJoinWithNonSelectiveBuildSide(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) { assertQueryDynamicFilters( noJoinReordering(joinDistributionType, coordinatorDynamicFiltersDistribution), @@ -349,8 +441,16 @@ public void testSemiJoinWithNonSelectiveBuildSide(JoinDistributionType joinDistr Domain.create(ValueSet.ofRanges(range(BIGINT, 1L, true, 100L, true)), false)))); } - @Test(timeOut = 30_000, dataProvider = "testJoinDistributionType") - public void testSemiJoinWithMultipleDynamicFiltersOnProbe(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) + @Test + @Timeout(30) + public void testSemiJoinWithMultipleDynamicFiltersOnProbe() + { + testSemiJoinWithMultipleDynamicFiltersOnProbe(BROADCAST, true); + testSemiJoinWithMultipleDynamicFiltersOnProbe(PARTITIONED, true); + testSemiJoinWithMultipleDynamicFiltersOnProbe(PARTITIONED, false); + } + + private void testSemiJoinWithMultipleDynamicFiltersOnProbe(JoinDistributionType joinDistributionType, boolean coordinatorDynamicFiltersDistribution) { // supplier names Supplier#000000001 and Supplier#000000002 match suppkey 1 and 2 assertQueryDynamicFilters( @@ -378,15 +478,6 @@ protected Session getDefaultSession() .build(); } - @DataProvider - public Object[][] testJoinDistributionType() - { - return new Object[][] { - {BROADCAST, true}, - {PARTITIONED, true}, - {PARTITIONED, false}}; - } - protected Session noJoinReordering(JoinDistributionType distributionType, boolean coordinatorDynamicFiltersDistribution) { return Session.builder(noJoinReordering(distributionType)) diff --git a/testing/trino-tests/src/test/java/io/trino/execution/TestBeginQuery.java b/testing/trino-tests/src/test/java/io/trino/execution/TestBeginQuery.java index 05d982ce03bdd..fae3329fcdf5b 100644 --- a/testing/trino-tests/src/test/java/io/trino/execution/TestBeginQuery.java +++ b/testing/trino-tests/src/test/java/io/trino/execution/TestBeginQuery.java @@ -42,6 +42,7 @@ import io.trino.testing.TestingSplitManager; import io.trino.testing.TestingTransactionHandle; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.parallel.Execution; import java.util.List; import java.util.Map; @@ -50,7 +51,9 @@ import static io.trino.testing.TestingSession.testSessionBuilder; import static java.util.Objects.requireNonNull; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; +@Execution(SAME_THREAD) // TestMetadata is shared mutable state public class TestBeginQuery extends AbstractTestQueryFramework { diff --git a/testing/trino-tests/src/test/java/io/trino/execution/TestCompletedEventWarnings.java b/testing/trino-tests/src/test/java/io/trino/execution/TestCompletedEventWarnings.java index 003675efacbbd..f0586b7248421 100644 --- a/testing/trino-tests/src/test/java/io/trino/execution/TestCompletedEventWarnings.java +++ b/testing/trino-tests/src/test/java/io/trino/execution/TestCompletedEventWarnings.java @@ -39,10 +39,10 @@ import static io.trino.SessionTestUtils.TEST_SESSION; import static org.assertj.core.api.Fail.fail; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; -import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; @TestInstance(PER_CLASS) -@Execution(CONCURRENT) +@Execution(SAME_THREAD) // EventsAwaitingQueries is shared mutable state public class TestCompletedEventWarnings { private static final int TEST_WARNINGS = 5; diff --git a/testing/trino-tests/src/test/java/io/trino/execution/TestCoordinatorDynamicFiltering.java b/testing/trino-tests/src/test/java/io/trino/execution/TestCoordinatorDynamicFiltering.java index 1a52a4fe8fc9a..d85e6430513dd 100644 --- a/testing/trino-tests/src/test/java/io/trino/execution/TestCoordinatorDynamicFiltering.java +++ b/testing/trino-tests/src/test/java/io/trino/execution/TestCoordinatorDynamicFiltering.java @@ -17,11 +17,12 @@ import io.trino.operator.RetryPolicy; import io.trino.testing.DistributedQueryRunner; import io.trino.testing.QueryRunner; -import org.testng.annotations.Test; +import org.junit.jupiter.api.parallel.Execution; import static io.trino.operator.RetryPolicy.NONE; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; -@Test(singleThreaded = true) +@Execution(SAME_THREAD) public class TestCoordinatorDynamicFiltering extends AbstractTestCoordinatorDynamicFiltering { diff --git a/testing/trino-tests/src/test/java/io/trino/execution/TestEventListenerBasic.java b/testing/trino-tests/src/test/java/io/trino/execution/TestEventListenerBasic.java index ae77214a462fd..a583cfc40ec05 100644 --- a/testing/trino-tests/src/test/java/io/trino/execution/TestEventListenerBasic.java +++ b/testing/trino-tests/src/test/java/io/trino/execution/TestEventListenerBasic.java @@ -180,8 +180,7 @@ public Iterable getConnectorFactories() Optional.of(Duration.ZERO), Optional.empty(), Optional.of("alice"), - ImmutableList.of(), - ImmutableMap.of()); + ImmutableList.of()); SchemaTableName materializedViewName = new SchemaTableName("default", "test_materialized_view"); return ImmutableMap.of(materializedViewName, definition); }) diff --git a/testing/trino-tests/src/test/java/io/trino/execution/TestEventListenerWithSplits.java b/testing/trino-tests/src/test/java/io/trino/execution/TestEventListenerWithSplits.java index 956aa7f794af1..5eaabe2e1716b 100644 --- a/testing/trino-tests/src/test/java/io/trino/execution/TestEventListenerWithSplits.java +++ b/testing/trino-tests/src/test/java/io/trino/execution/TestEventListenerWithSplits.java @@ -36,6 +36,7 @@ import io.trino.testing.QueryRunner; import org.intellij.lang.annotations.Language; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.parallel.Execution; import java.util.List; import java.util.Optional; @@ -48,7 +49,9 @@ import static java.util.concurrent.TimeUnit.SECONDS; import static java.util.stream.Collectors.toSet; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; +@Execution(SAME_THREAD) // EventsAwaitingQueries is shared mutable state public class TestEventListenerWithSplits extends AbstractTestQueryFramework { diff --git a/testing/trino-tests/src/test/java/io/trino/execution/TestExecutionJmxMetrics.java b/testing/trino-tests/src/test/java/io/trino/execution/TestExecutionJmxMetrics.java index 7a9cf391c1f7c..d4943f4340c70 100644 --- a/testing/trino-tests/src/test/java/io/trino/execution/TestExecutionJmxMetrics.java +++ b/testing/trino-tests/src/test/java/io/trino/execution/TestExecutionJmxMetrics.java @@ -17,7 +17,6 @@ import io.trino.Session; import io.trino.execution.resourcegroups.InternalResourceGroupManager; import io.trino.plugin.resourcegroups.ResourceGroupManagerPlugin; -import io.trino.server.PrefixObjectNameGeneratorModule; import io.trino.spi.QueryId; import io.trino.testing.DistributedQueryRunner; import io.trino.tests.tpch.TpchQueryRunnerBuilder; @@ -45,9 +44,7 @@ public class TestExecutionJmxMetrics public void testQueryStats() throws Exception { - try (DistributedQueryRunner queryRunner = TpchQueryRunnerBuilder.builder() - .setAdditionalModule(new PrefixObjectNameGeneratorModule("io.trino")) - .build()) { + try (DistributedQueryRunner queryRunner = TpchQueryRunnerBuilder.builder().build()) { queryRunner.installPlugin(new ResourceGroupManagerPlugin()); InternalResourceGroupManager resourceGroupManager = queryRunner.getCoordinator().getResourceGroupManager() .orElseThrow(() -> new IllegalStateException("Resource manager not configured")); diff --git a/testing/trino-tests/src/test/java/io/trino/execution/TestQueryTracker.java b/testing/trino-tests/src/test/java/io/trino/execution/TestQueryTracker.java index 6e1d3bfe17e4e..e65d5ed38bbc4 100644 --- a/testing/trino-tests/src/test/java/io/trino/execution/TestQueryTracker.java +++ b/testing/trino-tests/src/test/java/io/trino/execution/TestQueryTracker.java @@ -26,6 +26,7 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.Timeout; +import org.junit.jupiter.api.parallel.Execution; import java.util.concurrent.CountDownLatch; @@ -34,10 +35,12 @@ import static io.trino.testing.TestingSession.testSessionBuilder; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; // Tests need to finish before strict timeouts. Any background work // may make them flaky @TestInstance(PER_CLASS) +@Execution(SAME_THREAD) // CountDownLatches are shared mutable state public class TestQueryTracker extends AbstractTestQueryFramework { diff --git a/testing/trino-tests/src/test/java/io/trino/execution/TestQueues.java b/testing/trino-tests/src/test/java/io/trino/execution/TestQueues.java index 9f343c2693a93..906d7dab8382a 100644 --- a/testing/trino-tests/src/test/java/io/trino/execution/TestQueues.java +++ b/testing/trino-tests/src/test/java/io/trino/execution/TestQueues.java @@ -28,6 +28,7 @@ import io.trino.tests.tpch.TpchQueryRunnerBuilder; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; +import org.junit.jupiter.api.parallel.Execution; import java.util.Optional; import java.util.Set; @@ -49,8 +50,9 @@ import static java.util.Arrays.asList; import static java.util.Objects.requireNonNull; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; -// run single threaded to avoid creating multiple query runners at once +@Execution(SAME_THREAD) // run single threaded to avoid creating multiple query runners at once public class TestQueues { private static final String LONG_LASTING_QUERY = "SELECT COUNT(*) FROM lineitem"; diff --git a/testing/trino-tests/src/test/java/io/trino/execution/TestRefreshMaterializedView.java b/testing/trino-tests/src/test/java/io/trino/execution/TestRefreshMaterializedView.java index 3d26bf9bf5af5..316f87b8b5134 100644 --- a/testing/trino-tests/src/test/java/io/trino/execution/TestRefreshMaterializedView.java +++ b/testing/trino-tests/src/test/java/io/trino/execution/TestRefreshMaterializedView.java @@ -37,6 +37,7 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.Timeout; +import org.junit.jupiter.api.parallel.Execution; import java.time.Duration; import java.util.Optional; @@ -54,8 +55,10 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; @TestInstance(PER_CLASS) +@Execution(SAME_THREAD) public class TestRefreshMaterializedView extends AbstractTestQueryFramework { @@ -106,8 +109,7 @@ protected QueryRunner createQueryRunner() Optional.of(Duration.ZERO), Optional.empty(), Optional.of("alice"), - ImmutableList.of(), - ImmutableMap.of()))) + ImmutableList.of()))) .withDelegateMaterializedViewRefreshToConnector((connectorSession, schemaTableName) -> true) .withRefreshMaterializedView((connectorSession, schemaTableName) -> { startRefreshMaterializedView.set(null); diff --git a/testing/trino-tests/src/test/java/io/trino/execution/resourcegroups/db/TestEnvironments.java b/testing/trino-tests/src/test/java/io/trino/execution/resourcegroups/db/TestEnvironments.java index e826087d2ed82..d94ad84f5ad6b 100644 --- a/testing/trino-tests/src/test/java/io/trino/execution/resourcegroups/db/TestEnvironments.java +++ b/testing/trino-tests/src/test/java/io/trino/execution/resourcegroups/db/TestEnvironments.java @@ -18,6 +18,7 @@ import io.trino.testing.DistributedQueryRunner; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; +import org.junit.jupiter.api.parallel.Execution; import static io.trino.execution.QueryRunnerUtil.createQuery; import static io.trino.execution.QueryRunnerUtil.waitForQueryState; @@ -29,7 +30,9 @@ import static io.trino.execution.resourcegroups.db.H2TestUtil.createQueryRunner; import static io.trino.execution.resourcegroups.db.H2TestUtil.getDao; import static io.trino.execution.resourcegroups.db.H2TestUtil.getDbConfigUrl; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; +@Execution(SAME_THREAD) // run single threaded to avoid creating multiple query runners at once public class TestEnvironments { private static final String LONG_LASTING_QUERY = "SELECT COUNT(*) FROM lineitem"; diff --git a/testing/trino-tests/src/test/java/io/trino/execution/resourcegroups/db/TestQueuesDb.java b/testing/trino-tests/src/test/java/io/trino/execution/resourcegroups/db/TestQueuesDb.java index d61984362bbe2..3c857f8426dc6 100644 --- a/testing/trino-tests/src/test/java/io/trino/execution/resourcegroups/db/TestQueuesDb.java +++ b/testing/trino-tests/src/test/java/io/trino/execution/resourcegroups/db/TestQueuesDb.java @@ -32,8 +32,8 @@ import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.Timeout; +import org.junit.jupiter.api.parallel.Execution; import java.util.Optional; import java.util.Set; @@ -69,10 +69,9 @@ import static java.lang.String.format; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_METHOD; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; -// run single threaded to avoid creating multiple query runners at once -@TestInstance(PER_METHOD) +@Execution(SAME_THREAD) // run single threaded to avoid creating multiple query runners at once public class TestQueuesDb { // Copy of TestQueues with tests for db reconfiguration of resource groups diff --git a/testing/trino-tests/src/test/java/io/trino/memory/TestMemoryManager.java b/testing/trino-tests/src/test/java/io/trino/memory/TestMemoryManager.java index c9b9d54f4adeb..dfe071fc43dd6 100644 --- a/testing/trino-tests/src/test/java/io/trino/memory/TestMemoryManager.java +++ b/testing/trino-tests/src/test/java/io/trino/memory/TestMemoryManager.java @@ -57,10 +57,10 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; -import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; @TestInstance(PER_CLASS) -@Execution(CONCURRENT) +@Execution(SAME_THREAD) // run single threaded to avoid creating multiple query runners at once public class TestMemoryManager { private static final Session SESSION = testSessionBuilder() diff --git a/testing/trino-tests/src/test/java/io/trino/security/TestAccessControl.java b/testing/trino-tests/src/test/java/io/trino/security/TestAccessControl.java index 8daed713e7599..33961bf0cd525 100644 --- a/testing/trino-tests/src/test/java/io/trino/security/TestAccessControl.java +++ b/testing/trino-tests/src/test/java/io/trino/security/TestAccessControl.java @@ -65,6 +65,7 @@ import io.trino.testing.TestingGroupProvider; import io.trino.testing.TestingSession; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.parallel.Execution; import java.lang.invoke.MethodHandles; import java.time.Duration; @@ -118,7 +119,9 @@ import static java.util.stream.Collectors.toList; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; +@Execution(SAME_THREAD) public class TestAccessControl extends AbstractTestQueryFramework { @@ -214,8 +217,7 @@ public Map apply(Connector Optional.of(Duration.ZERO), Optional.of("comment"), Optional.of("owner"), - ImmutableList.of(), - ImmutableMap.of()); + ImmutableList.of()); return ImmutableMap.of( new SchemaTableName("default", "test_materialized_view"), materializedViewDefinition); } diff --git a/testing/trino-tests/src/test/java/io/trino/security/TestSystemSecurityMetadata.java b/testing/trino-tests/src/test/java/io/trino/security/TestSystemSecurityMetadata.java index 0136d6bf0f1e8..5891d68c8f520 100644 --- a/testing/trino-tests/src/test/java/io/trino/security/TestSystemSecurityMetadata.java +++ b/testing/trino-tests/src/test/java/io/trino/security/TestSystemSecurityMetadata.java @@ -21,10 +21,13 @@ import io.trino.testing.DistributedQueryRunner; import io.trino.testing.QueryRunner; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.parallel.Execution; import static com.google.inject.multibindings.OptionalBinder.newOptionalBinder; import static io.trino.testing.TestingSession.testSessionBuilder; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; +@Execution(SAME_THREAD) // TestingSystemSecurityMetadata is shared mutable state public class TestSystemSecurityMetadata extends AbstractTestQueryFramework { diff --git a/testing/trino-tests/src/test/java/io/trino/tests/TestGetTableStatisticsOperations.java b/testing/trino-tests/src/test/java/io/trino/tests/TestGetTableStatisticsOperations.java index e50a7c4307f9c..73591178989e8 100644 --- a/testing/trino-tests/src/test/java/io/trino/tests/TestGetTableStatisticsOperations.java +++ b/testing/trino-tests/src/test/java/io/trino/tests/TestGetTableStatisticsOperations.java @@ -14,50 +14,45 @@ package io.trino.tests; import com.google.common.collect.ImmutableMap; -import io.opentelemetry.sdk.testing.exporter.InMemorySpanExporter; -import io.opentelemetry.sdk.trace.SdkTracerProvider; +import io.opentelemetry.sdk.testing.junit5.OpenTelemetryExtension; import io.opentelemetry.sdk.trace.data.SpanData; -import io.opentelemetry.sdk.trace.export.SimpleSpanProcessor; import io.trino.execution.warnings.WarningCollector; import io.trino.plugin.tpch.TpchPlugin; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.LocalQueryRunner; import io.trino.testing.QueryRunner; -import io.trino.testng.services.ManageTestResources; import io.trino.tracing.TracingMetadata; import org.intellij.lang.annotations.Language; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.extension.RegisterExtension; +import org.junit.jupiter.api.parallel.Execution; import static io.trino.execution.querystats.PlanOptimizersStatsCollector.createPlanOptimizersStatsCollector; import static io.trino.sql.planner.LogicalPlanner.Stage.OPTIMIZED_AND_VALIDATED; import static io.trino.testing.TestingSession.testSessionBuilder; -import static io.trino.transaction.TransactionBuilder.transaction; +import static io.trino.testing.TransactionBuilder.transaction; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; @TestInstance(PER_CLASS) +@Execution(SAME_THREAD) public class TestGetTableStatisticsOperations extends AbstractTestQueryFramework { - @ManageTestResources.Suppress(because = "Not a TestNG test class") + @RegisterExtension + static final OpenTelemetryExtension TELEMETRY = OpenTelemetryExtension.create(); + private LocalQueryRunner localQueryRunner; - @ManageTestResources.Suppress(because = "Not a TestNG test class") - private InMemorySpanExporter spanExporter; @Override protected QueryRunner createQueryRunner() throws Exception { - spanExporter = closeAfterClass(InMemorySpanExporter.create()); - - SdkTracerProvider tracerProvider = SdkTracerProvider.builder() - .addSpanProcessor(SimpleSpanProcessor.create(spanExporter)) - .build(); - localQueryRunner = LocalQueryRunner.builder(testSessionBuilder().build()) - .withMetadataDecorator(metadata -> new TracingMetadata(tracerProvider.get("test"), metadata)) + .withMetadataDecorator(metadata -> new TracingMetadata(TELEMETRY.getOpenTelemetry().getTracer("test"), metadata)) .build(); localQueryRunner.installPlugin(new TpchPlugin()); localQueryRunner.createCatalog("tpch", "tpch", ImmutableMap.of()); @@ -69,19 +64,11 @@ public void tearDown() { localQueryRunner.close(); localQueryRunner = null; - spanExporter = null; - } - - private void resetCounters() - { - spanExporter.reset(); } @Test public void testTwoWayJoin() { - resetCounters(); - planDistributedQuery("SELECT * " + "FROM tpch.tiny.orders o, tpch.tiny.lineitem l " + "WHERE o.orderkey = l.orderkey"); @@ -91,8 +78,6 @@ public void testTwoWayJoin() @Test public void testThreeWayJoin() { - resetCounters(); - planDistributedQuery("SELECT * " + "FROM tpch.tiny.customer c, tpch.tiny.orders o, tpch.tiny.lineitem l " + "WHERE o.orderkey = l.orderkey AND c.custkey = o.custkey"); @@ -107,9 +92,9 @@ private void planDistributedQuery(@Language("SQL") String sql) }); } - private long getTableStatisticsMethodInvocations() + private static long getTableStatisticsMethodInvocations() { - return spanExporter.getFinishedSpanItems().stream() + return TELEMETRY.getSpans().stream() .map(SpanData::getName) .filter(name -> name.equals("Metadata.getTableStatistics")) .count(); diff --git a/testing/trino-tests/src/test/java/io/trino/tests/TestGracefulShutdown.java b/testing/trino-tests/src/test/java/io/trino/tests/TestGracefulShutdown.java index 9fbf1065fc931..ecb52a979e8c5 100644 --- a/testing/trino-tests/src/test/java/io/trino/tests/TestGracefulShutdown.java +++ b/testing/trino-tests/src/test/java/io/trino/tests/TestGracefulShutdown.java @@ -45,10 +45,10 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; -import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; @TestInstance(PER_CLASS) -@Execution(CONCURRENT) +@Execution(SAME_THREAD) // run single threaded to avoid creating multiple query runners at once public class TestGracefulShutdown { private static final long SHUTDOWN_TIMEOUT_MILLIS = 240_000; diff --git a/testing/trino-tests/src/test/java/io/trino/tests/TestMetadataManager.java b/testing/trino-tests/src/test/java/io/trino/tests/TestMetadataManager.java index c40dec07c4c8a..edbca15904bf4 100644 --- a/testing/trino-tests/src/test/java/io/trino/tests/TestMetadataManager.java +++ b/testing/trino-tests/src/test/java/io/trino/tests/TestMetadataManager.java @@ -30,9 +30,9 @@ import io.trino.spi.connector.SchemaTableName; import io.trino.testing.DistributedQueryRunner; import io.trino.testing.TestingSessionContext; +import io.trino.testing.TransactionBuilder; import io.trino.tests.tpch.TpchQueryRunnerBuilder; import io.trino.tracing.TracingMetadata; -import io.trino.transaction.TransactionBuilder; import org.intellij.lang.annotations.Language; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; @@ -50,7 +50,7 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; -import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; /** * This is integration / unit test suite. @@ -59,7 +59,7 @@ * This mapping has to be manually cleaned when query finishes execution (Metadata#cleanupQuery method). */ @TestInstance(PER_CLASS) -@Execution(CONCURRENT) +@Execution(SAME_THREAD) // metadataManager.getActiveQueryIds() is shared mutable state that affects the test outcome public class TestMetadataManager { private DistributedQueryRunner queryRunner; diff --git a/testing/trino-tests/src/test/java/io/trino/tests/TestMockConnector.java b/testing/trino-tests/src/test/java/io/trino/tests/TestMockConnector.java index 62e927c58b656..bc7f700696013 100644 --- a/testing/trino-tests/src/test/java/io/trino/tests/TestMockConnector.java +++ b/testing/trino-tests/src/test/java/io/trino/tests/TestMockConnector.java @@ -106,8 +106,7 @@ protected QueryRunner createQueryRunner() Optional.of(Duration.ZERO), Optional.empty(), Optional.of("alice"), - ImmutableList.of(), - ImmutableMap.of()))) + ImmutableList.of()))) .withData(schemaTableName -> { if (schemaTableName.equals(new SchemaTableName("default", "nation"))) { return TPCH_NATION_DATA; diff --git a/testing/trino-tests/src/test/java/io/trino/tests/TestProcedureCall.java b/testing/trino-tests/src/test/java/io/trino/tests/TestProcedureCall.java index 8854489ed17c7..a95ad64b1e594 100644 --- a/testing/trino-tests/src/test/java/io/trino/tests/TestProcedureCall.java +++ b/testing/trino-tests/src/test/java/io/trino/tests/TestProcedureCall.java @@ -27,6 +27,7 @@ import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.parallel.Execution; import java.util.List; @@ -36,8 +37,10 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; @TestInstance(PER_CLASS) +@Execution(SAME_THREAD) // ProcedureTester is shared mutable state public class TestProcedureCall extends AbstractTestQueryFramework { diff --git a/testing/trino-tests/src/test/java/io/trino/tests/TestQueryManager.java b/testing/trino-tests/src/test/java/io/trino/tests/TestQueryManager.java index 091a4a951b0fe..f7a7a91a4dbb1 100644 --- a/testing/trino-tests/src/test/java/io/trino/tests/TestQueryManager.java +++ b/testing/trino-tests/src/test/java/io/trino/tests/TestQueryManager.java @@ -27,10 +27,7 @@ import io.trino.testing.DistributedQueryRunner; import io.trino.testing.TestingSessionContext; import io.trino.tests.tpch.TpchQueryRunnerBuilder; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.Timeout; import org.junit.jupiter.api.parallel.Execution; @@ -48,64 +45,48 @@ import static java.util.Arrays.stream; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Fail.fail; -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; -import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; -@TestInstance(PER_CLASS) -@Execution(CONCURRENT) +@Execution(SAME_THREAD) // run single threaded to avoid creating multiple query runners at once public class TestQueryManager { - private DistributedQueryRunner queryRunner; - - @BeforeAll - public void setUp() - throws Exception - { - queryRunner = TpchQueryRunnerBuilder.builder().build(); - } - - @AfterAll - public void tearDown() - { - queryRunner.close(); - queryRunner = null; - } - @Test @Timeout(60) public void testFailQuery() throws Exception { - DispatchManager dispatchManager = queryRunner.getCoordinator().getDispatchManager(); - QueryId queryId = dispatchManager.createQueryId(); - dispatchManager.createQuery( - queryId, - Span.getInvalid(), - Slug.createNew(), - TestingSessionContext.fromSession(TEST_SESSION), - "SELECT * FROM lineitem") - .get(); + try (DistributedQueryRunner queryRunner = TpchQueryRunnerBuilder.builder().build()) { + DispatchManager dispatchManager = queryRunner.getCoordinator().getDispatchManager(); + QueryId queryId = dispatchManager.createQueryId(); + dispatchManager.createQuery( + queryId, + Span.getInvalid(), + Slug.createNew(), + TestingSessionContext.fromSession(TEST_SESSION), + "SELECT * FROM lineitem") + .get(); - // wait until query starts running - while (true) { - QueryState state = dispatchManager.getQueryInfo(queryId).getState(); - if (state.isDone()) { - fail("unexpected query state: " + state); + // wait until query starts running + while (true) { + QueryState state = dispatchManager.getQueryInfo(queryId).getState(); + if (state.isDone()) { + fail("unexpected query state: " + state); + } + if (state == RUNNING) { + break; + } + Thread.sleep(100); } - if (state == RUNNING) { - break; - } - Thread.sleep(100); - } - // cancel query - QueryManager queryManager = queryRunner.getCoordinator().getQueryManager(); - queryManager.failQuery(queryId, new TrinoException(GENERIC_INTERNAL_ERROR, "mock exception")); - QueryInfo queryInfo = queryManager.getFullQueryInfo(queryId); - assertThat(queryInfo.getState()).isEqualTo(FAILED); - assertThat(queryInfo.getErrorCode()).isEqualTo(GENERIC_INTERNAL_ERROR.toErrorCode()); - assertThat(queryInfo.getFailureInfo()).isNotNull(); - assertThat(queryInfo.getFailureInfo().getMessage()).isEqualTo("mock exception"); + // cancel query + QueryManager queryManager = queryRunner.getCoordinator().getQueryManager(); + queryManager.failQuery(queryId, new TrinoException(GENERIC_INTERNAL_ERROR, "mock exception")); + QueryInfo queryInfo = queryManager.getFullQueryInfo(queryId); + assertThat(queryInfo.getState()).isEqualTo(FAILED); + assertThat(queryInfo.getErrorCode()).isEqualTo(GENERIC_INTERNAL_ERROR.toErrorCode()); + assertThat(queryInfo.getFailureInfo()).isNotNull(); + assertThat(queryInfo.getFailureInfo().getMessage()).isEqualTo("mock exception"); + } } @Test diff --git a/testing/trino-tests/src/test/java/io/trino/tests/TestQuerySpillLimits.java b/testing/trino-tests/src/test/java/io/trino/tests/TestQuerySpillLimits.java index 9fb634752fa44..f13db591fc164 100644 --- a/testing/trino-tests/src/test/java/io/trino/tests/TestQuerySpillLimits.java +++ b/testing/trino-tests/src/test/java/io/trino/tests/TestQuerySpillLimits.java @@ -73,7 +73,7 @@ public void testMaxSpillPerNodeLimit() } }) .isInstanceOf(RuntimeException.class) - .hasMessage(".*Query exceeded local spill limit of 10B"); + .hasMessage("Query exceeded local spill limit of 10B"); } @Test @@ -86,7 +86,7 @@ public void testQueryMaxSpillPerNodeLimit() } }) .isInstanceOf(RuntimeException.class) - .hasMessageMatching(".*Query exceeded per-query local spill limit of 10B"); + .hasMessageMatching("Query exceeded per-query local spill limit of 10B"); } private LocalQueryRunner createLocalQueryRunner(NodeSpillConfig nodeSpillConfig) diff --git a/testing/trino-tests/src/test/java/io/trino/tests/TestSequenceFunction.java b/testing/trino-tests/src/test/java/io/trino/tests/TestSequenceFunction.java index 1be55965f6745..b1688f3e3a23c 100644 --- a/testing/trino-tests/src/test/java/io/trino/tests/TestSequenceFunction.java +++ b/testing/trino-tests/src/test/java/io/trino/tests/TestSequenceFunction.java @@ -18,7 +18,7 @@ import io.trino.testing.QueryRunner; import org.junit.jupiter.api.Test; -import static io.trino.operator.table.Sequence.SequenceFunctionSplit.DEFAULT_SPLIT_SIZE; +import static io.trino.operator.table.SequenceFunction.SequenceFunctionSplit.DEFAULT_SPLIT_SIZE; import static io.trino.testing.TestingSession.testSessionBuilder; import static java.lang.String.format; import static org.assertj.core.api.Assertions.assertThat; diff --git a/testing/trino-tests/src/test/java/io/trino/tests/TestServer.java b/testing/trino-tests/src/test/java/io/trino/tests/TestServer.java index e0dbbb86d8be1..526bd17c261e3 100644 --- a/testing/trino-tests/src/test/java/io/trino/tests/TestServer.java +++ b/testing/trino-tests/src/test/java/io/trino/tests/TestServer.java @@ -37,6 +37,8 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.parallel.Execution; import java.net.URI; import java.util.Collections; @@ -82,7 +84,11 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.assertj.core.api.Fail.fail; +import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; +import static org.junit.jupiter.api.parallel.ExecutionMode.CONCURRENT; +@TestInstance(PER_CLASS) +@Execution(CONCURRENT) public class TestServer { private static final JsonCodec QUERY_RESULTS_CODEC = jsonCodec(QueryResults.class);