diff --git a/.github/config/labeler-config.yml b/.github/config/labeler-config.yml
index ae685830c4a43..e7ff2840486e0 100644
--- a/.github/config/labeler-config.yml
+++ b/.github/config/labeler-config.yml
@@ -3,7 +3,6 @@
- lib/trino-orc/**
- lib/trino-parquet/**
- lib/trino-hive-formats/**
- - plugin/trino-hive-hadoop2/**
- plugin/trino-hive/**
- testing/trino-product-tests/**
- lib/trino-filesystem/**
@@ -20,7 +19,6 @@ delta-lake:
hive:
- plugin/trino-hive/**
- - plugin/trino-hive-hadoop2/**
hudi:
- plugin/trino-hudi/**
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index bc0df89d98b44..71fcf9249f7e3 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -57,7 +57,6 @@ jobs:
fail-fast: false
matrix:
java-version:
- - 17 # Keep testing on JDK 17 to ensure basic backward compatibility
- 21
timeout-minutes: 45
steps:
@@ -301,27 +300,7 @@ jobs:
- name: Install Hive Module
run: |
export MAVEN_OPTS="${MAVEN_INSTALL_OPTS}"
- $MAVEN clean install ${MAVEN_FAST_INSTALL} ${MAVEN_GIB} -Dgib.logImpactedTo=gib-impacted.log -am -pl :trino-hive-hadoop2
- - name: Run Hive Tests
- run: |
- source plugin/trino-hive-hadoop2/conf/hive-tests-${{ matrix.config }}.sh &&
- plugin/trino-hive-hadoop2/bin/run_hive_tests.sh
- - name: Run Hive S3 Tests
- env:
- AWS_ACCESS_KEY_ID: ${{ secrets.TRINO_AWS_ACCESS_KEY_ID }}
- AWS_SECRET_ACCESS_KEY: ${{ secrets.TRINO_AWS_SECRET_ACCESS_KEY }}
- AWS_REGION: ${{ vars.TRINO_AWS_REGION }}
- S3_BUCKET: ${{ vars.TRINO_S3_BUCKET }}
- S3_BUCKET_ENDPOINT: "https://s3.${{ vars.TRINO_AWS_REGION }}.amazonaws.com"
- run: |
- if [ "${AWS_ACCESS_KEY_ID}" != "" ]; then
- source plugin/trino-hive-hadoop2/conf/hive-tests-${{ matrix.config }}.sh &&
- plugin/trino-hive-hadoop2/bin/run_hive_s3_tests.sh
- if [ matrix.config == 'config-hdp3' ]; then
- # JsonSerde class needed for the S3 Select JSON tests is only available on hdp3.
- plugin/trino-hive-hadoop2/bin/run_hive_s3_select_json_tests.sh
- fi
- fi
+ $MAVEN clean install ${MAVEN_FAST_INSTALL} ${MAVEN_GIB} -Dgib.logImpactedTo=gib-impacted.log -am -pl :trino-hive
- name: Run Hive AWS Tests
env:
AWS_ACCESS_KEY_ID: ${{ secrets.TRINO_AWS_ACCESS_KEY_ID }}
@@ -333,53 +312,6 @@ jobs:
if [ "${AWS_ACCESS_KEY_ID}" != "" ]; then
$MAVEN test ${MAVEN_TEST} -pl :trino-hive -P aws-tests
fi
- - name: Run Hive Azure ABFS Access Key Tests
- if: matrix.config != 'config-empty' # Hive 1.x does not support Azure storage
- env:
- ABFS_CONTAINER: ${{ secrets.AZURE_ABFS_CONTAINER }}
- ABFS_ACCOUNT: ${{ secrets.AZURE_ABFS_ACCOUNT }}
- ABFS_ACCESS_KEY: ${{ secrets.AZURE_ABFS_ACCESSKEY }}
- run: |
- if [ "${ABFS_CONTAINER}" != "" ]; then
- source plugin/trino-hive-hadoop2/conf/hive-tests-${{ matrix.config }}.sh &&
- plugin/trino-hive-hadoop2/bin/run_hive_abfs_access_key_tests.sh
- fi
- - name: Run Hive Azure ABFS OAuth Tests
- if: matrix.config != 'config-empty' # Hive 1.x does not support Azure storage
- env:
- ABFS_CONTAINER: ${{ secrets.AZURE_ABFS_CONTAINER }}
- ABFS_ACCOUNT: ${{ secrets.AZURE_ABFS_ACCOUNT }}
- ABFS_OAUTH_ENDPOINT: ${{ secrets.AZURE_ABFS_OAUTH_ENDPOINT }}
- ABFS_OAUTH_CLIENTID: ${{ secrets.AZURE_ABFS_OAUTH_CLIENTID }}
- ABFS_OAUTH_SECRET: ${{ secrets.AZURE_ABFS_OAUTH_SECRET }}
- run: |
- if [ -n "$ABFS_CONTAINER" ]; then
- source plugin/trino-hive-hadoop2/conf/hive-tests-${{ matrix.config }}.sh &&
- plugin/trino-hive-hadoop2/bin/run_hive_abfs_oauth_tests.sh
- fi
- - name: Run Hive Azure WASB Tests
- if: matrix.config != 'config-empty' # Hive 1.x does not support Azure storage
- env:
- WASB_CONTAINER: ${{ secrets.AZURE_WASB_CONTAINER }}
- WASB_ACCOUNT: ${{ secrets.AZURE_WASB_ACCOUNT }}
- WASB_ACCESS_KEY: ${{ secrets.AZURE_WASB_ACCESSKEY }}
- run: |
- if [ "${WASB_CONTAINER}" != "" ]; then
- source plugin/trino-hive-hadoop2/conf/hive-tests-${{ matrix.config }}.sh &&
- plugin/trino-hive-hadoop2/bin/run_hive_wasb_tests.sh
- fi
- - name: Run Hive Azure ADL Tests
- if: matrix.config != 'config-empty' # Hive 1.x does not support Azure storage
- env:
- ADL_NAME: ${{ secrets.AZURE_ADL_NAME }}
- ADL_CLIENT_ID: ${{ secrets.AZURE_ADL_CLIENTID }}
- ADL_CREDENTIAL: ${{ secrets.AZURE_ADL_CREDENTIAL }}
- ADL_REFRESH_URL: ${{ secrets.AZURE_ADL_REFRESHURL }}
- run: |
- if [ "${ADL_NAME}" != "" ]; then
- source plugin/trino-hive-hadoop2/conf/hive-tests-${{ matrix.config }}.sh &&
- plugin/trino-hive-hadoop2/bin/run_hive_adl_tests.sh
- fi
- name: Upload test results
uses: actions/upload-artifact@v3
# Upload all test reports only on failure, because the artifacts are large
@@ -475,6 +407,7 @@ jobs:
!:trino-memory,
!:trino-mongodb,
!:trino-mysql,
+ !:trino-opensearch,
!:trino-oracle,
!:trino-orc,
!:trino-parquet,
@@ -579,7 +512,7 @@ jobs:
- { modules: lib/trino-filesystem-gcs, profile: cloud-tests }
- { modules: plugin/trino-accumulo }
- { modules: plugin/trino-bigquery }
- - { modules: plugin/trino-bigquery, profile: cloud-tests-arrow-and-fte }
+ - { modules: plugin/trino-bigquery, profile: cloud-tests-2 }
- { modules: plugin/trino-cassandra }
- { modules: plugin/trino-clickhouse }
- { modules: plugin/trino-delta-lake }
@@ -602,6 +535,7 @@ jobs:
- { modules: plugin/trino-mariadb }
- { modules: plugin/trino-mongodb }
- { modules: plugin/trino-mysql }
+ - { modules: plugin/trino-opensearch }
- { modules: plugin/trino-oracle }
- { modules: plugin/trino-phoenix5 }
- { modules: plugin/trino-pinot }
@@ -658,7 +592,7 @@ jobs:
matrix.modules != 'plugin/trino-singlestore'
&& ! (contains(matrix.modules, 'trino-delta-lake') && contains(matrix.profile, 'cloud-tests'))
&& ! (contains(matrix.modules, 'trino-iceberg') && contains(matrix.profile, 'cloud-tests'))
- && ! (contains(matrix.modules, 'trino-bigquery') && contains(matrix.profile, 'cloud-tests-arrow-and-fte'))
+ && ! (contains(matrix.modules, 'trino-bigquery') && contains(matrix.profile, 'cloud-tests-2'))
&& ! (contains(matrix.modules, 'trino-redshift') && contains(matrix.profile, 'cloud-tests'))
&& ! (contains(matrix.modules, 'trino-redshift') && contains(matrix.profile, 'fte-tests'))
&& ! (contains(matrix.modules, 'trino-filesystem-s3') && contains(matrix.profile, 'cloud-tests'))
@@ -761,25 +695,25 @@ jobs:
env:
BIGQUERY_CREDENTIALS_KEY: ${{ secrets.BIGQUERY_CREDENTIALS_KEY }}
GCP_STORAGE_BUCKET: ${{ vars.GCP_STORAGE_BUCKET }}
- if: matrix.modules == 'plugin/trino-bigquery' && !contains(matrix.profile, 'cloud-tests-arrow-and-fte') && (env.CI_SKIP_SECRETS_PRESENCE_CHECKS != '' || env.BIGQUERY_CREDENTIALS_KEY != '')
+ if: matrix.modules == 'plugin/trino-bigquery' && !contains(matrix.profile, 'cloud-tests-2') && (env.CI_SKIP_SECRETS_PRESENCE_CHECKS != '' || env.BIGQUERY_CREDENTIALS_KEY != '')
run: |
- $MAVEN test ${MAVEN_TEST} -pl :trino-bigquery -Pcloud-tests \
+ $MAVEN test ${MAVEN_TEST} -pl :trino-bigquery -Pcloud-tests-1 \
-Dbigquery.credentials-key="${BIGQUERY_CREDENTIALS_KEY}" \
- -Dtesting.gcp-storage-bucket="${GCP_STORAGE_BUCKET}" \
- -Dtesting.alternate-bq-project-id=bigquery-cicd-alternate
- - name: Cloud BigQuery Arrow and FTE Tests
+ -Dtesting.gcp-storage-bucket="${GCP_STORAGE_BUCKET}"
+ - name: Cloud BigQuery Smoke Tests
env:
BIGQUERY_CREDENTIALS_KEY: ${{ secrets.BIGQUERY_CREDENTIALS_KEY }}
GCP_STORAGE_BUCKET: ${{ vars.GCP_STORAGE_BUCKET }}
- if: matrix.modules == 'plugin/trino-bigquery' && contains(matrix.profile, 'cloud-tests-arrow-and-fte') && (env.CI_SKIP_SECRETS_PRESENCE_CHECKS != '' || env.BIGQUERY_CREDENTIALS_KEY != '')
+ if: matrix.modules == 'plugin/trino-bigquery' && contains(matrix.profile, 'cloud-tests-2') && (env.CI_SKIP_SECRETS_PRESENCE_CHECKS != '' || env.BIGQUERY_CREDENTIALS_KEY != '')
run: |
- $MAVEN test ${MAVEN_TEST} -pl :trino-bigquery -Pcloud-tests-arrow-and-fte \
+ $MAVEN test ${MAVEN_TEST} -pl :trino-bigquery -Pcloud-tests-2 \
-Dbigquery.credentials-key="${BIGQUERY_CREDENTIALS_KEY}" \
- -Dtesting.gcp-storage-bucket="${GCP_STORAGE_BUCKET}"
+ -Dtesting.gcp-storage-bucket="${GCP_STORAGE_BUCKET}" \
+ -Dtesting.alternate-bq-project-id=bigquery-cicd-alternate
- name: Cloud BigQuery Case Insensitive Mapping Tests
env:
BIGQUERY_CASE_INSENSITIVE_CREDENTIALS_KEY: ${{ secrets.BIGQUERY_CASE_INSENSITIVE_CREDENTIALS_KEY }}
- if: matrix.modules == 'plugin/trino-bigquery' && !contains(matrix.profile, 'cloud-tests-arrow-and-fte') && (env.CI_SKIP_SECRETS_PRESENCE_CHECKS != '' || env.BIGQUERY_CASE_INSENSITIVE_CREDENTIALS_KEY != '')
+ if: matrix.modules == 'plugin/trino-bigquery' && !contains(matrix.profile, 'cloud-tests-2') && (env.CI_SKIP_SECRETS_PRESENCE_CHECKS != '' || env.BIGQUERY_CASE_INSENSITIVE_CREDENTIALS_KEY != '')
run: |
$MAVEN test ${MAVEN_TEST} -pl :trino-bigquery -Pcloud-tests-case-insensitive-mapping -Dbigquery.credentials-key="${BIGQUERY_CASE_INSENSITIVE_CREDENTIALS_KEY}"
- name: Cloud Snowflake Tests
@@ -971,7 +905,7 @@ jobs:
- suite-5
- suite-6-non-generic
- suite-7-non-generic
- - suite-8-non-generic
+ - suite-hive-transactional
- suite-azure
- suite-delta-lake-databricks91
- suite-delta-lake-databricks104
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
index 82cb8ccfa07b9..a137d9ee2ba21 100644
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@@ -12,7 +12,7 @@ jobs:
runs-on: ubuntu-latest
if: github.repository == 'trinodb/trino'
steps:
- - uses: actions/stale@v8.0.0
+ - uses: actions/stale@v9.0.0
with:
stale-pr-message: 'This pull request has gone a while without any activity. Tagging the Trino developer relations team: @bitsondatadev @colebow @mosabua'
days-before-pr-stale: 21
@@ -21,3 +21,7 @@ jobs:
stale-pr-label: 'stale'
start-date: '2023-01-01T00:00:00Z'
exempt-draft-pr: true
+ operations-per-run: 200
+ # Avoid processing issues completely, see https://github.com/actions/stale/issues/1112
+ days-before-issue-stale: -1
+ days-before-issue-close: -1
diff --git a/.mvn/jvm.config b/.mvn/jvm.config
index 65c0bc4d12016..6a0272bcee2a2 100644
--- a/.mvn/jvm.config
+++ b/.mvn/jvm.config
@@ -9,3 +9,5 @@
--add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED
--add-opens=jdk.compiler/com.sun.tools.javac.code=ALL-UNNAMED
--add-opens=jdk.compiler/com.sun.tools.javac.comp=ALL-UNNAMED
+-XX:+UnlockDiagnosticVMOptions
+-XX:GCLockerRetryAllocationCount=100
diff --git a/README.md b/README.md
index a95e04aac1fd0..e93d0c57c9e7a 100644
--- a/README.md
+++ b/README.md
@@ -34,8 +34,10 @@ information about reporting vulnerabilities.
## Build requirements
* Mac OS X or Linux
-* Java 17.0.4+, 64-bit
+* Java 21.0.1+, 64-bit
* Docker
+ * Turn SELinux or other systems disabling write access to the local checkout
+ off, to allow containers to mount parts of the Trino source tree
## Building Trino
@@ -68,8 +70,8 @@ After opening the project in IntelliJ, double check that the Java SDK is
properly configured for the project:
* Open the File menu and select Project Structure
-* In the SDKs section, ensure that JDK 17 is selected (create one if none exist)
-* In the Project section, ensure the Project language level is set to 17
+* In the SDKs section, ensure that JDK 21 is selected (create one if none exist)
+* In the Project section, ensure the Project language level is set to 21
### Running a testing server
diff --git a/client/trino-cli/pom.xml b/client/trino-cli/pom.xml
index 7720045b66874..d64a685f4b199 100644
--- a/client/trino-cli/pom.xml
+++ b/client/trino-cli/pom.xml
@@ -5,7 +5,7 @@
io.trinotrino-root
- 435-SNAPSHOT
+ 436-SNAPSHOT../../pom.xml
@@ -15,7 +15,7 @@
${project.parent.basedir}8io.trino.cli.Trino
- 3.24.1
+ 3.25.0
@@ -147,12 +147,6 @@
junit-jupiter-enginetest
-
-
- org.testng
- testng
- test
-
diff --git a/client/trino-client/pom.xml b/client/trino-client/pom.xml
index 27587ca34d0af..875d625530ff7 100644
--- a/client/trino-client/pom.xml
+++ b/client/trino-client/pom.xml
@@ -5,7 +5,7 @@
io.trinotrino-root
- 435-SNAPSHOT
+ 436-SNAPSHOT../../pom.xml
@@ -122,11 +122,5 @@
junit-jupiter-apitest
-
-
- org.testng
- testng
- test
-
diff --git a/client/trino-client/src/main/java/io/trino/client/ClientTypeSignature.java b/client/trino-client/src/main/java/io/trino/client/ClientTypeSignature.java
index b408006b9949a..3e964f279b47a 100644
--- a/client/trino-client/src/main/java/io/trino/client/ClientTypeSignature.java
+++ b/client/trino-client/src/main/java/io/trino/client/ClientTypeSignature.java
@@ -29,6 +29,8 @@
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.trino.client.ClientStandardTypes.ROW;
+import static io.trino.client.ClientStandardTypes.TIMESTAMP_WITH_TIME_ZONE;
+import static io.trino.client.ClientStandardTypes.TIME_WITH_TIME_ZONE;
import static io.trino.client.ClientStandardTypes.VARCHAR;
import static java.util.Collections.unmodifiableList;
import static java.util.Objects.requireNonNull;
@@ -94,6 +96,15 @@ public String toString()
if (arguments.isEmpty()) {
return rawType;
}
+
+ if (rawType.equals(TIME_WITH_TIME_ZONE)) {
+ return "time(" + arguments.get(0) + ") with time zone";
+ }
+
+ if (rawType.equals(TIMESTAMP_WITH_TIME_ZONE)) {
+ return "timestamp(" + arguments.get(0) + ") with time zone";
+ }
+
return rawType + arguments.stream()
.map(ClientTypeSignatureParameter::toString)
.collect(joining(",", "(", ")"));
diff --git a/client/trino-jdbc/pom.xml b/client/trino-jdbc/pom.xml
index 9312577b7979f..c5472b875de37 100644
--- a/client/trino-jdbc/pom.xml
+++ b/client/trino-jdbc/pom.xml
@@ -5,7 +5,7 @@
io.trinotrino-root
- 435-SNAPSHOT
+ 436-SNAPSHOT../../pom.xml
@@ -193,7 +193,7 @@
io.trino
- trino-hive-hadoop2
+ trino-hivetest
@@ -207,6 +207,12 @@
io.trinotrino-maintest
+
+
+ io.trino
+ re2j
+
+
@@ -237,6 +243,12 @@
io.trinotrino-testingtest
+
+
+ io.trino
+ re2j
+
+
diff --git a/client/trino-jdbc/src/test/java/io/trino/jdbc/BaseTestJdbcResultSet.java b/client/trino-jdbc/src/test/java/io/trino/jdbc/BaseTestJdbcResultSet.java
index 6bae338a8cbb8..705b50a8e15d9 100644
--- a/client/trino-jdbc/src/test/java/io/trino/jdbc/BaseTestJdbcResultSet.java
+++ b/client/trino-jdbc/src/test/java/io/trino/jdbc/BaseTestJdbcResultSet.java
@@ -58,8 +58,6 @@ public abstract class BaseTestJdbcResultSet
protected abstract Connection createConnection()
throws SQLException;
- protected abstract int getTestedServerVersion();
-
@Test
public void testDuplicateColumnLabels()
throws Exception
@@ -441,10 +439,9 @@ public void testTimeWithTimeZone()
.isInstanceOf(SQLException.class)
.hasMessage("Expected value to be a date but is: 09:39:07+01:00");
assertThat(rs.getTime(column)).isEqualTo(Time.valueOf(LocalTime.of(1, 39, 7))); // TODO this should fail, or represent TIME '09:39:07'
- // TODO (https://github.com/trinodb/trino/issues/5317) placement of precision parameter
assertThatThrownBy(() -> rs.getTimestamp(column))
.isInstanceOf(IllegalArgumentException.class) // TODO (https://github.com/trinodb/trino/issues/5315) SQLException
- .hasMessage("Expected column to be a timestamp type but is time with time zone(0)");
+ .hasMessage("Expected column to be a timestamp type but is time(0) with time zone");
});
checkRepresentation(connectedStatement.getStatement(), "TIME '01:39:07 +01:00'", Types.TIME_WITH_TIMEZONE, (rs, column) -> {
@@ -458,10 +455,9 @@ public void testTimeWithTimeZone()
.isInstanceOf(SQLException.class)
.hasMessage("Expected value to be a date but is: 01:39:07+01:00");
assertThat(rs.getTime(column)).isEqualTo(someBogusValue); // TODO this should fail, or represent TIME '01:39:07'
- // TODO (https://github.com/trinodb/trino/issues/5317) placement of precision parameter
assertThatThrownBy(() -> rs.getTimestamp(column))
.isInstanceOf(IllegalArgumentException.class) // TODO (https://github.com/trinodb/trino/issues/5315) SQLException
- .hasMessage("Expected column to be a timestamp type but is time with time zone(0)");
+ .hasMessage("Expected column to be a timestamp type but is time(0) with time zone");
});
checkRepresentation(connectedStatement.getStatement(), "TIME '00:39:07 +01:00'", Types.TIME_WITH_TIMEZONE, (rs, column) -> {
@@ -475,10 +471,9 @@ public void testTimeWithTimeZone()
.isInstanceOf(SQLException.class)
.hasMessage("Expected value to be a date but is: 00:39:07+01:00");
assertThat(rs.getTime(column)).isEqualTo(someBogusValue); // TODO this should fail, as there no java.sql.Time representation for TIME '00:39:07' in America/Bahia_Banderas
- // TODO (https://github.com/trinodb/trino/issues/5317) placement of precision parameter
assertThatThrownBy(() -> rs.getTimestamp(column))
.isInstanceOf(IllegalArgumentException.class) // TODO (https://github.com/trinodb/trino/issues/5315) SQLException
- .hasMessage("Expected column to be a timestamp type but is time with time zone(0)");
+ .hasMessage("Expected column to be a timestamp type but is time(0) with time zone");
});
}
}
@@ -642,10 +637,9 @@ public void testTimestampWithTimeZone()
assertThatThrownBy(() -> rs.getDate(column))
.isInstanceOf(SQLException.class)
.hasMessage("Expected value to be a date but is: 1970-01-01 00:00:00.000 UTC");
- // TODO (https://github.com/trinodb/trino/issues/5317) placement of precision parameter
assertThatThrownBy(() -> rs.getTime(column))
.isInstanceOf(IllegalArgumentException.class) // TODO (https://github.com/trinodb/trino/issues/5315) SQLException
- .hasMessage("Expected column to be a time type but is timestamp with time zone(3)");
+ .hasMessage("Expected column to be a time type but is timestamp(3) with time zone");
assertThat(rs.getTimestamp(column)).isEqualTo(timestampForPointInTime);
});
@@ -657,10 +651,9 @@ public void testTimestampWithTimeZone()
assertThatThrownBy(() -> rs.getDate(column))
.isInstanceOf(SQLException.class)
.hasMessage("Expected value to be a date but is: 2018-02-13 13:14:15.227 Europe/Warsaw");
- // TODO (https://github.com/trinodb/trino/issues/5317) placement of precision parameter
assertThatThrownBy(() -> rs.getTime(column))
.isInstanceOf(IllegalArgumentException.class) // TODO (https://github.com/trinodb/trino/issues/5315) SQLException
- .hasMessage("Expected column to be a time type but is timestamp with time zone(3)");
+ .hasMessage("Expected column to be a time type but is timestamp(3) with time zone");
assertThat(rs.getTimestamp(column)).isEqualTo(timestampForPointInTime);
});
@@ -675,7 +668,7 @@ public void testTimestampWithTimeZone()
.hasMessage("Expected value to be a date but is: 2019-12-31 23:59:59.999999999999 Europe/Warsaw");
assertThatThrownBy(() -> rs.getTime(column))
.isInstanceOf(IllegalArgumentException.class) // TODO (https://github.com/trinodb/trino/issues/5315) SQLException
- .hasMessage("Expected column to be a time type but is timestamp with time zone(12)"); // TODO (https://github.com/trinodb/trino/issues/5317) placement of precision parameter
+ .hasMessage("Expected column to be a time type but is timestamp(12) with time zone");
assertThat(rs.getTimestamp(column)).isEqualTo(timestampForPointInTime);
});
@@ -694,7 +687,7 @@ public void testTimestampWithTimeZone()
.hasMessage("Expected value to be a date but is: 2019-12-31 23:59:59.999999999999 America/Bahia_Banderas");
assertThatThrownBy(() -> rs.getTime(column))
.isInstanceOf(IllegalArgumentException.class) // TODO (https://github.com/trinodb/trino/issues/5315) SQLException
- .hasMessage("Expected column to be a time type but is timestamp with time zone(12)"); // TODO (https://github.com/trinodb/trino/issues/5317) placement of precision parameter
+ .hasMessage("Expected column to be a time type but is timestamp(12) with time zone");
assertThat(rs.getTimestamp(column)).isEqualTo(timestampForPointInTime);
});
@@ -708,7 +701,7 @@ public void testTimestampWithTimeZone()
.hasMessage("Expected value to be a date but is: 1957-12-31 23:59:59.999999999999 Europe/Warsaw");
assertThatThrownBy(() -> rs.getTime(column))
.isInstanceOf(IllegalArgumentException.class) // TODO (https://github.com/trinodb/trino/issues/5315) SQLException
- .hasMessage("Expected column to be a time type but is timestamp with time zone(12)"); // TODO (https://github.com/trinodb/trino/issues/5317) placement of precision parameter
+ .hasMessage("Expected column to be a time type but is timestamp(12) with time zone");
assertThat(rs.getTimestamp(column)).isEqualTo(timestampForPointInTime);
});
@@ -720,10 +713,9 @@ public void testTimestampWithTimeZone()
assertThatThrownBy(() -> rs.getDate(column))
.isInstanceOf(SQLException.class)
.hasMessage("Expected value to be a date but is: 1970-01-01 09:14:15.227 Europe/Warsaw");
- // TODO (https://github.com/trinodb/trino/issues/5317) placement of precision parameter
assertThatThrownBy(() -> rs.getTime(column))
.isInstanceOf(IllegalArgumentException.class) // TODO (https://github.com/trinodb/trino/issues/5315) SQLException
- .hasMessage("Expected column to be a time type but is timestamp with time zone(3)");
+ .hasMessage("Expected column to be a time type but is timestamp(3) with time zone");
assertThat(rs.getTimestamp(column)).isEqualTo(timestampForPointInTime);
});
@@ -735,10 +727,9 @@ public void testTimestampWithTimeZone()
assertThatThrownBy(() -> rs.getDate(column))
.isInstanceOf(SQLException.class)
.hasMessage("Expected value to be a date but is: 1970-01-01 00:14:15.227 Europe/Warsaw");
- // TODO (https://github.com/trinodb/trino/issues/5317) placement of precision parameter
assertThatThrownBy(() -> rs.getTime(column))
.isInstanceOf(IllegalArgumentException.class) // TODO (https://github.com/trinodb/trino/issues/5315) SQLException
- .hasMessage("Expected column to be a time type but is timestamp with time zone(3)");
+ .hasMessage("Expected column to be a time type but is timestamp(3) with time zone");
assertThat(rs.getTimestamp(column)).isEqualTo(timestampForPointInTime);
});
@@ -757,7 +748,7 @@ public void testTimestampWithTimeZone()
.hasMessage("Expected value to be a date but is: +12345-01-23 01:23:45.123456789 Europe/Warsaw");
assertThatThrownBy(() -> rs.getTime(column))
.isInstanceOf(IllegalArgumentException.class) // TODO (https://github.com/trinodb/trino/issues/5315) SQLException
- .hasMessage("Expected column to be a time type but is timestamp with time zone(9)"); // TODO (https://github.com/trinodb/trino/issues/5317) placement of precision parameter
+ .hasMessage("Expected column to be a time type but is timestamp(9) with time zone");
assertThat(rs.getTimestamp(column)).isEqualTo(timestampForPointInTime);
});
}
@@ -810,8 +801,8 @@ public void testArray()
checkArrayRepresentation(connectedStatement.getStatement(), "TIMESTAMP '2017-01-02 09:00:00.123'", Types.TIMESTAMP, "timestamp(3)");
checkArrayRepresentation(connectedStatement.getStatement(), "TIMESTAMP '2017-01-02 09:00:00.123456789'", Types.TIMESTAMP, "timestamp(9)");
- checkArrayRepresentation(connectedStatement.getStatement(), "TIMESTAMP '2017-01-02 09:00:00.123 Europe/Warsaw'", Types.TIMESTAMP_WITH_TIMEZONE, "timestamp with time zone(3)");
- checkArrayRepresentation(connectedStatement.getStatement(), "TIMESTAMP '2017-01-02 09:00:00.123456789 Europe/Warsaw'", Types.TIMESTAMP_WITH_TIMEZONE, "timestamp with time zone(9)");
+ checkArrayRepresentation(connectedStatement.getStatement(), "TIMESTAMP '2017-01-02 09:00:00.123 Europe/Warsaw'", Types.TIMESTAMP_WITH_TIMEZONE, "timestamp(3) with time zone");
+ checkArrayRepresentation(connectedStatement.getStatement(), "TIMESTAMP '2017-01-02 09:00:00.123456789 Europe/Warsaw'", Types.TIMESTAMP_WITH_TIMEZONE, "timestamp(9) with time zone");
// array or array
checkRepresentation(connectedStatement.getStatement(), "ARRAY[NULL, ARRAY[NULL, BIGINT '1', 2]]", Types.ARRAY, (rs, column) -> {
diff --git a/client/trino-jdbc/src/test/java/io/trino/jdbc/TestJdbcResultSet.java b/client/trino-jdbc/src/test/java/io/trino/jdbc/TestJdbcResultSet.java
index 95c3378ed18cb..1a159b45ed55a 100644
--- a/client/trino-jdbc/src/test/java/io/trino/jdbc/TestJdbcResultSet.java
+++ b/client/trino-jdbc/src/test/java/io/trino/jdbc/TestJdbcResultSet.java
@@ -59,11 +59,4 @@ protected Connection createConnection()
String url = format("jdbc:trino://%s", server.getAddress());
return DriverManager.getConnection(url, "test", null);
}
-
- @Override
- protected int getTestedServerVersion()
- {
- // Latest version
- return Integer.MAX_VALUE;
- }
}
diff --git a/core/trino-grammar/pom.xml b/core/trino-grammar/pom.xml
index 75cf06246cc92..a77a6d86389c8 100644
--- a/core/trino-grammar/pom.xml
+++ b/core/trino-grammar/pom.xml
@@ -5,7 +5,7 @@
io.trinotrino-root
- 435-SNAPSHOT
+ 436-SNAPSHOT../../pom.xml
diff --git a/core/trino-grammar/src/main/antlr4/io/trino/grammar/sql/SqlBase.g4 b/core/trino-grammar/src/main/antlr4/io/trino/grammar/sql/SqlBase.g4
index c13530d62a80f..cea6360647f34 100644
--- a/core/trino-grammar/src/main/antlr4/io/trino/grammar/sql/SqlBase.g4
+++ b/core/trino-grammar/src/main/antlr4/io/trino/grammar/sql/SqlBase.g4
@@ -160,7 +160,7 @@ statement
(LIKE pattern=string (ESCAPE escape=string)?)? #showSchemas
| SHOW CATALOGS
(LIKE pattern=string (ESCAPE escape=string)?)? #showCatalogs
- | SHOW COLUMNS (FROM | IN) qualifiedName?
+ | SHOW COLUMNS (FROM | IN) qualifiedName
(LIKE pattern=string (ESCAPE escape=string)?)? #showColumns
| SHOW STATS FOR qualifiedName #showStats
| SHOW STATS FOR '(' rootQuery ')' #showStatsForQuery
diff --git a/core/trino-main/pom.xml b/core/trino-main/pom.xml
index 1363a9fa9a7c6..2722379dfe03f 100644
--- a/core/trino-main/pom.xml
+++ b/core/trino-main/pom.xml
@@ -5,7 +5,7 @@
io.trinotrino-root
- 435-SNAPSHOT
+ 436-SNAPSHOT../../pom.xml
@@ -13,15 +13,6 @@
${project.parent.basedir}
-
-
- instances
@@ -247,6 +238,11 @@
opentelemetry-context
+
+ io.opentelemetry
+ opentelemetry-sdk-trace
+
+
io.trinore2j
@@ -339,8 +335,8 @@
org.apache.lucene
- lucene-analyzers-common
- 8.11.2
+ lucene-analysis-common
+ 9.7.0
@@ -390,12 +386,6 @@
provided
-
- org.testng
- testng
- provided
-
-
com.squareup.okhttp3okhttp
@@ -533,25 +523,35 @@
-
-
-
- org.apache.maven.plugins
- maven-surefire-plugin
-
-
-
- org.apache.maven.surefire
- surefire-junit-platform
- ${dep.plugin.surefire.version}
-
-
- org.apache.maven.surefire
- surefire-testng
- ${dep.plugin.surefire.version}
-
-
-
-
-
+
+
+ benchmarks
+
+
+
+ org.codehaus.mojo
+ exec-maven-plugin
+
+ ${java.home}/bin/java
+
+ -DoutputDirectory=benchmark_outputs
+ -classpath
+
+ io.trino.benchmark.BenchmarkSuite
+
+ test
+
+
+
+ benchmarks
+
+ exec
+
+
+
+
+
+
+
+
diff --git a/core/trino-main/src/main/java/io/trino/SystemSessionProperties.java b/core/trino-main/src/main/java/io/trino/SystemSessionProperties.java
index 2f56a09680cc6..44d2805976aa8 100644
--- a/core/trino-main/src/main/java/io/trino/SystemSessionProperties.java
+++ b/core/trino-main/src/main/java/io/trino/SystemSessionProperties.java
@@ -53,6 +53,7 @@
import static io.trino.spi.type.TimeZoneKey.getTimeZoneKey;
import static java.lang.Math.min;
import static java.lang.String.format;
+import static java.util.concurrent.TimeUnit.SECONDS;
public final class SystemSessionProperties
implements SystemSessionPropertiesProvider
@@ -207,6 +208,8 @@ public final class SystemSessionProperties
public static final String USE_COST_BASED_PARTITIONING = "use_cost_based_partitioning";
public static final String FORCE_SPILLING_JOIN = "force_spilling_join";
public static final String PAGE_PARTITIONING_BUFFER_POOL_SIZE = "page_partitioning_buffer_pool_size";
+ public static final String IDLE_WRITER_MIN_DATA_SIZE_THRESHOLD = "idle_writer_min_data_size_threshold";
+ public static final String CLOSE_IDLE_WRITERS_TRIGGER_DURATION = "close_idle_writers_trigger_duration";
private final List> sessionProperties;
@@ -712,6 +715,7 @@ public SystemSessionProperties(
COST_ESTIMATION_WORKER_COUNT,
"Set the estimate count of workers while planning",
null,
+ value -> validateIntegerValue(value, COST_ESTIMATION_WORKER_COUNT, 1, true),
true),
booleanProperty(
OMIT_DATETIME_TYPE_PRECISION,
@@ -1058,6 +1062,14 @@ public SystemSessionProperties(
integerProperty(PAGE_PARTITIONING_BUFFER_POOL_SIZE,
"Maximum number of free buffers in the per task partitioned page buffer pool. Setting this to zero effectively disables the pool",
taskManagerConfig.getPagePartitioningBufferPoolSize(),
+ true),
+ dataSizeProperty(IDLE_WRITER_MIN_DATA_SIZE_THRESHOLD,
+ "Minimum amount of data written by a writer operator on average before it tries to close the idle writers",
+ DataSize.of(256, MEGABYTE),
+ true),
+ durationProperty(CLOSE_IDLE_WRITERS_TRIGGER_DURATION,
+ "The duration after which the writer operator tries to close the idle writers",
+ new Duration(5, SECONDS),
true));
}
@@ -1896,4 +1908,14 @@ public static int getPagePartitioningBufferPoolSize(Session session)
{
return session.getSystemProperty(PAGE_PARTITIONING_BUFFER_POOL_SIZE, Integer.class);
}
+
+ public static DataSize getIdleWriterMinDataSizeThreshold(Session session)
+ {
+ return session.getSystemProperty(IDLE_WRITER_MIN_DATA_SIZE_THRESHOLD, DataSize.class);
+ }
+
+ public static Duration getCloseIdleWritersTriggerDuration(Session session)
+ {
+ return session.getSystemProperty(CLOSE_IDLE_WRITERS_TRIGGER_DURATION, Duration.class);
+ }
}
diff --git a/core/trino-main/src/main/java/io/trino/connector/informationschema/InformationSchemaPageSource.java b/core/trino-main/src/main/java/io/trino/connector/informationschema/InformationSchemaPageSource.java
index b7a7f6ee41862..143cc6493c63d 100644
--- a/core/trino-main/src/main/java/io/trino/connector/informationschema/InformationSchemaPageSource.java
+++ b/core/trino-main/src/main/java/io/trino/connector/informationschema/InformationSchemaPageSource.java
@@ -277,43 +277,34 @@ private void addColumnsRecords(QualifiedTablePrefix prefix)
private void addTablesRecords(QualifiedTablePrefix prefix)
{
- boolean needsTableType = requiredColumns.contains("table_type") || requiredColumns.contains("trino_relation_type");
+ boolean needsTableType = requiredColumns.contains("table_type");
Set relations;
- Map relationTypes;
+ Set views;
if (needsTableType) {
- relationTypes = getRelationTypes(session, metadata, accessControl, prefix);
+ Map relationTypes = getRelationTypes(session, metadata, accessControl, prefix);
relations = relationTypes.keySet();
+ views = relationTypes.entrySet().stream()
+ .filter(entry -> entry.getValue() == RelationType.VIEW)
+ .map(Entry::getKey)
+ .collect(toImmutableSet());
}
else {
relations = listTables(session, metadata, accessControl, prefix);
- relationTypes = null;
+ views = Set.of();
}
+ // TODO (https://github.com/trinodb/trino/issues/8207) define a type for materialized views
for (SchemaTableName name : relations) {
String type = null;
- String trinoRelationType = null;
if (needsTableType) {
- switch (relationTypes.get(name)) {
- case TABLE -> {
- type = "BASE TABLE";
- trinoRelationType = type;
- }
- case VIEW -> {
- type = "VIEW";
- trinoRelationType = type;
- }
- case MATERIALIZED_VIEW -> {
- type = "BASE TABLE";
- trinoRelationType = "MATERIALIZED VIEW";
- }
- }
+ // if table and view names overlap, the view wins
+ type = views.contains(name) ? "VIEW" : "BASE TABLE";
}
addRecord(
prefix.getCatalogName(),
name.getSchemaName(),
name.getTableName(),
type,
- trinoRelationType,
null);
if (isLimitExhausted()) {
return;
diff --git a/core/trino-main/src/main/java/io/trino/connector/informationschema/InformationSchemaTable.java b/core/trino-main/src/main/java/io/trino/connector/informationschema/InformationSchemaTable.java
index e45bb8594d742..605076aa38419 100644
--- a/core/trino-main/src/main/java/io/trino/connector/informationschema/InformationSchemaTable.java
+++ b/core/trino-main/src/main/java/io/trino/connector/informationschema/InformationSchemaTable.java
@@ -47,7 +47,6 @@ public enum InformationSchemaTable
.column("table_schema", createUnboundedVarcharType())
.column("table_name", createUnboundedVarcharType())
.column("table_type", createUnboundedVarcharType())
- .hiddenColumn("trino_relation_type", createUnboundedVarcharType())
.hiddenColumn("table_comment", createUnboundedVarcharType()) // MySQL compatible
.build()),
VIEWS(table("views")
diff --git a/core/trino-main/src/main/java/io/trino/connector/system/GlobalSystemConnector.java b/core/trino-main/src/main/java/io/trino/connector/system/GlobalSystemConnector.java
index 3912bbe262c98..6fb93e229023e 100644
--- a/core/trino-main/src/main/java/io/trino/connector/system/GlobalSystemConnector.java
+++ b/core/trino-main/src/main/java/io/trino/connector/system/GlobalSystemConnector.java
@@ -15,7 +15,7 @@
import com.google.common.collect.ImmutableSet;
import com.google.inject.Inject;
-import io.trino.operator.table.Sequence.SequenceFunctionHandle;
+import io.trino.operator.table.SequenceFunction.SequenceFunctionHandle;
import io.trino.spi.connector.CatalogHandle;
import io.trino.spi.connector.CatalogHandle.CatalogVersion;
import io.trino.spi.connector.ConnectorMetadata;
@@ -33,7 +33,7 @@
import java.util.Set;
-import static io.trino.operator.table.Sequence.getSequenceFunctionSplitSource;
+import static io.trino.operator.table.SequenceFunction.getSequenceFunctionSplitSource;
import static io.trino.spi.connector.CatalogHandle.createRootCatalogHandle;
import static java.util.Objects.requireNonNull;
diff --git a/core/trino-main/src/main/java/io/trino/connector/system/SystemConnectorModule.java b/core/trino-main/src/main/java/io/trino/connector/system/SystemConnectorModule.java
index 306fb4347c39d..7d6bb634309bc 100644
--- a/core/trino-main/src/main/java/io/trino/connector/system/SystemConnectorModule.java
+++ b/core/trino-main/src/main/java/io/trino/connector/system/SystemConnectorModule.java
@@ -30,8 +30,8 @@
import io.trino.connector.system.jdbc.TableTypeJdbcTable;
import io.trino.connector.system.jdbc.TypesJdbcTable;
import io.trino.connector.system.jdbc.UdtJdbcTable;
-import io.trino.operator.table.ExcludeColumns;
-import io.trino.operator.table.Sequence;
+import io.trino.operator.table.ExcludeColumnsFunction;
+import io.trino.operator.table.SequenceFunction;
import io.trino.spi.connector.SystemTable;
import io.trino.spi.function.table.ConnectorTableFunction;
import io.trino.spi.procedure.Procedure;
@@ -79,7 +79,7 @@ public void configure(Binder binder)
binder.bind(GlobalSystemConnector.class).in(Scopes.SINGLETON);
Multibinder tableFunctions = Multibinder.newSetBinder(binder, ConnectorTableFunction.class);
- tableFunctions.addBinding().toProvider(ExcludeColumns.class).in(Scopes.SINGLETON);
- tableFunctions.addBinding().toProvider(Sequence.class).in(Scopes.SINGLETON);
+ tableFunctions.addBinding().to(ExcludeColumnsFunction.class).in(Scopes.SINGLETON);
+ tableFunctions.addBinding().to(SequenceFunction.class).in(Scopes.SINGLETON);
}
}
diff --git a/core/trino-main/src/main/java/io/trino/cost/CostCalculatorWithEstimatedExchanges.java b/core/trino-main/src/main/java/io/trino/cost/CostCalculatorWithEstimatedExchanges.java
index d536ca27c102c..cfc5ee9fb1ce7 100644
--- a/core/trino-main/src/main/java/io/trino/cost/CostCalculatorWithEstimatedExchanges.java
+++ b/core/trino-main/src/main/java/io/trino/cost/CostCalculatorWithEstimatedExchanges.java
@@ -32,6 +32,7 @@
import java.util.Objects;
import java.util.Optional;
+import static com.google.common.base.Preconditions.checkArgument;
import static io.trino.cost.LocalCostEstimate.addPartialComponents;
import static java.util.Objects.requireNonNull;
@@ -206,6 +207,7 @@ public static LocalCostEstimate calculateJoinCostWithoutOutput(
boolean replicated,
int estimatedSourceDistributedTaskCount)
{
+ checkArgument(estimatedSourceDistributedTaskCount > 0, "estimatedSourceDistributedTaskCount must be positive: %s", estimatedSourceDistributedTaskCount);
LocalCostEstimate exchangesCost = calculateJoinExchangeCost(
probe,
build,
diff --git a/core/trino-main/src/main/java/io/trino/cost/LocalCostEstimate.java b/core/trino-main/src/main/java/io/trino/cost/LocalCostEstimate.java
index 62a575ed3057d..cbeaad3913540 100644
--- a/core/trino-main/src/main/java/io/trino/cost/LocalCostEstimate.java
+++ b/core/trino-main/src/main/java/io/trino/cost/LocalCostEstimate.java
@@ -21,7 +21,9 @@
import java.util.stream.Stream;
import static com.google.common.base.MoreObjects.toStringHelper;
+import static com.google.common.base.Preconditions.checkArgument;
import static java.lang.Double.NaN;
+import static java.lang.Double.isNaN;
/**
* Represents inherent cost of some plan node, not including cost of its sources.
@@ -63,6 +65,9 @@ public LocalCostEstimate(
@JsonProperty("maxMemory") double maxMemory,
@JsonProperty("networkCost") double networkCost)
{
+ checkArgument(isNaN(cpuCost) || cpuCost >= 0, "cpuCost cannot be negative: %s", cpuCost);
+ checkArgument(isNaN(maxMemory) || maxMemory >= 0, "maxMemory cannot be negative: %s", maxMemory);
+ checkArgument(isNaN(networkCost) || networkCost >= 0, "networkCost cannot be negative: %s", networkCost);
this.cpuCost = cpuCost;
this.maxMemory = maxMemory;
this.networkCost = networkCost;
diff --git a/core/trino-main/src/main/java/io/trino/cost/PlanNodeStatsAndCostSummary.java b/core/trino-main/src/main/java/io/trino/cost/PlanNodeStatsAndCostSummary.java
index 9d151c0162e41..97b1f93855018 100644
--- a/core/trino-main/src/main/java/io/trino/cost/PlanNodeStatsAndCostSummary.java
+++ b/core/trino-main/src/main/java/io/trino/cost/PlanNodeStatsAndCostSummary.java
@@ -16,6 +16,9 @@
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
+import static com.google.common.base.Preconditions.checkArgument;
+import static java.lang.Double.isNaN;
+
public class PlanNodeStatsAndCostSummary
{
private final double outputRowCount;
@@ -32,6 +35,11 @@ public PlanNodeStatsAndCostSummary(
@JsonProperty("memoryCost") double memoryCost,
@JsonProperty("networkCost") double networkCost)
{
+ checkArgument(isNaN(outputRowCount) || outputRowCount >= 0, "outputRowCount cannot be negative: %s", outputRowCount);
+ checkArgument(isNaN(outputSizeInBytes) || outputSizeInBytes >= 0, "outputSizeInBytes cannot be negative: %s", outputSizeInBytes);
+ checkArgument(isNaN(cpuCost) || cpuCost >= 0, "cpuCost cannot be negative: %s", cpuCost);
+ checkArgument(isNaN(memoryCost) || memoryCost >= 0, "memoryCost cannot be negative: %s", memoryCost);
+ checkArgument(isNaN(networkCost) || networkCost >= 0, "networkCost cannot be negative: %s", networkCost);
this.outputRowCount = outputRowCount;
this.outputSizeInBytes = outputSizeInBytes;
this.cpuCost = cpuCost;
diff --git a/core/trino-main/src/main/java/io/trino/cost/TaskCountEstimator.java b/core/trino-main/src/main/java/io/trino/cost/TaskCountEstimator.java
index 21a460d5a8b3b..c78a11de5bb3a 100644
--- a/core/trino-main/src/main/java/io/trino/cost/TaskCountEstimator.java
+++ b/core/trino-main/src/main/java/io/trino/cost/TaskCountEstimator.java
@@ -23,6 +23,7 @@
import java.util.Set;
import java.util.function.IntSupplier;
+import static com.google.common.base.Preconditions.checkState;
import static io.trino.SystemSessionProperties.getCostEstimationWorkerCount;
import static io.trino.SystemSessionProperties.getFaultTolerantExecutionMaxPartitionCount;
import static io.trino.SystemSessionProperties.getMaxHashPartitionCount;
@@ -42,12 +43,17 @@ public TaskCountEstimator(NodeSchedulerConfig nodeSchedulerConfig, InternalNodeM
requireNonNull(nodeManager, "nodeManager is null");
this.numberOfNodes = () -> {
Set activeNodes = nodeManager.getAllNodes().getActiveNodes();
+ int count;
if (schedulerIncludeCoordinator) {
- return activeNodes.size();
+ count = activeNodes.size();
}
- return toIntExact(activeNodes.stream()
- .filter(node -> !node.isCoordinator())
- .count());
+ else {
+ count = toIntExact(activeNodes.stream()
+ .filter(node -> !node.isCoordinator())
+ .count());
+ }
+ // At least 1 even if no worker nodes currently registered. This is to prevent underflow or other mis-estimations.
+ return Math.max(count, 1);
};
}
@@ -60,9 +66,12 @@ public int estimateSourceDistributedTaskCount(Session session)
{
Integer costEstimationWorkerCount = getCostEstimationWorkerCount(session);
if (costEstimationWorkerCount != null) {
+ // validated to be at least 1
return costEstimationWorkerCount;
}
- return numberOfNodes.getAsInt();
+ int count = numberOfNodes.getAsInt();
+ checkState(count > 0, "%s should return positive number of nodes: %s", numberOfNodes, count);
+ return count;
}
public int estimateHashedTaskCount(Session session)
diff --git a/core/trino-main/src/main/java/io/trino/dispatcher/DispatchExecutor.java b/core/trino-main/src/main/java/io/trino/dispatcher/DispatchExecutor.java
index 7acd03cc6812b..54b979e503fc0 100644
--- a/core/trino-main/src/main/java/io/trino/dispatcher/DispatchExecutor.java
+++ b/core/trino-main/src/main/java/io/trino/dispatcher/DispatchExecutor.java
@@ -25,13 +25,13 @@
import org.weakref.jmx.Managed;
import org.weakref.jmx.Nested;
-import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ThreadPoolExecutor;
import static com.google.common.util.concurrent.MoreExecutors.listeningDecorator;
import static io.airlift.concurrent.Threads.daemonThreadsNamed;
+import static io.trino.util.Executors.decorateWithVersion;
import static java.util.Objects.requireNonNull;
import static java.util.concurrent.Executors.newCachedThreadPool;
import static java.util.concurrent.Executors.newScheduledThreadPool;
@@ -51,22 +51,7 @@ public DispatchExecutor(QueryManagerConfig config, VersionEmbedder versionEmbedd
{
ExecutorService coreExecutor = newCachedThreadPool(daemonThreadsNamed("dispatcher-query-%s"));
closer.register(coreExecutor::shutdownNow);
- executor = new DecoratingListeningExecutorService(
- listeningDecorator(coreExecutor),
- new DecoratingListeningExecutorService.TaskDecorator()
- {
- @Override
- public Runnable decorate(Runnable command)
- {
- return versionEmbedder.embedVersion(command);
- }
-
- @Override
- public Callable decorate(Callable task)
- {
- return versionEmbedder.embedVersion(task);
- }
- });
+ executor = decorateWithVersion(coreExecutor, versionEmbedder);
ScheduledExecutorService coreScheduledExecutor = newScheduledThreadPool(config.getQueryManagerExecutorPoolSize(), daemonThreadsNamed("dispatch-executor-%s"));
closer.register(coreScheduledExecutor::shutdownNow);
diff --git a/core/trino-main/src/main/java/io/trino/dispatcher/DispatchManager.java b/core/trino-main/src/main/java/io/trino/dispatcher/DispatchManager.java
index fd3e9b3563c65..39b7d1795a0d3 100644
--- a/core/trino-main/src/main/java/io/trino/dispatcher/DispatchManager.java
+++ b/core/trino-main/src/main/java/io/trino/dispatcher/DispatchManager.java
@@ -22,6 +22,7 @@
import io.opentelemetry.api.trace.Tracer;
import io.opentelemetry.context.Context;
import io.trino.Session;
+import io.trino.event.QueryMonitor;
import io.trino.execution.QueryIdGenerator;
import io.trino.execution.QueryInfo;
import io.trino.execution.QueryManagerConfig;
@@ -56,6 +57,7 @@
import static io.trino.execution.QueryState.RUNNING;
import static io.trino.spi.StandardErrorCode.QUERY_TEXT_TOO_LARGE;
import static io.trino.tracing.ScopedSpan.scopedSpan;
+import static io.trino.util.Failures.toFailure;
import static io.trino.util.StatementUtils.getQueryType;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
@@ -80,6 +82,7 @@ public class DispatchManager
private final QueryTracker queryTracker;
private final QueryManagerStats stats = new QueryManagerStats();
+ private final QueryMonitor queryMonitor;
@Inject
public DispatchManager(
@@ -94,7 +97,8 @@ public DispatchManager(
SessionPropertyManager sessionPropertyManager,
Tracer tracer,
QueryManagerConfig queryManagerConfig,
- DispatchExecutor dispatchExecutor)
+ DispatchExecutor dispatchExecutor,
+ QueryMonitor queryMonitor)
{
this.queryIdGenerator = requireNonNull(queryIdGenerator, "queryIdGenerator is null");
this.queryPreparer = requireNonNull(queryPreparer, "queryPreparer is null");
@@ -112,6 +116,7 @@ public DispatchManager(
this.dispatchExecutor = dispatchExecutor.getExecutor();
this.queryTracker = new QueryTracker<>(queryManagerConfig, dispatchExecutor.getScheduledExecutor());
+ this.queryMonitor = requireNonNull(queryMonitor, "queryMonitor is null");
}
@PostConstruct
@@ -236,6 +241,11 @@ private void createQueryInternal(QueryId queryId, Span querySpan, Slug slug,
Optional preparedSql = Optional.ofNullable(preparedQuery).flatMap(PreparedQuery::getPrepareSql);
DispatchQuery failedDispatchQuery = failedDispatchQueryFactory.createFailedDispatchQuery(session, query, preparedSql, Optional.empty(), throwable);
queryCreated(failedDispatchQuery);
+ // maintain proper order of calls such that EventListener has access to QueryInfo
+ // - add query to tracker
+ // - fire query created event
+ // - fire query completed event
+ queryMonitor.queryImmediateFailureEvent(failedDispatchQuery.getBasicQueryInfo(), toFailure(throwable));
querySpan.setStatus(StatusCode.ERROR, throwable.getMessage())
.recordException(throwable)
.end();
diff --git a/core/trino-main/src/main/java/io/trino/dispatcher/FailedDispatchQueryFactory.java b/core/trino-main/src/main/java/io/trino/dispatcher/FailedDispatchQueryFactory.java
index 3eeab6198ef39..65c75d35780e3 100644
--- a/core/trino-main/src/main/java/io/trino/dispatcher/FailedDispatchQueryFactory.java
+++ b/core/trino-main/src/main/java/io/trino/dispatcher/FailedDispatchQueryFactory.java
@@ -24,7 +24,6 @@
import java.util.Optional;
import java.util.concurrent.ExecutorService;
-import static io.trino.util.Failures.toFailure;
import static java.util.Objects.requireNonNull;
public class FailedDispatchQueryFactory
@@ -58,7 +57,6 @@ public FailedDispatchQuery createFailedDispatchQuery(Session session, String que
BasicQueryInfo queryInfo = failedDispatchQuery.getBasicQueryInfo();
queryMonitor.queryCreatedEvent(queryInfo);
- queryMonitor.queryImmediateFailureEvent(queryInfo, toFailure(throwable));
return failedDispatchQuery;
}
diff --git a/core/trino-main/src/main/java/io/trino/dispatcher/QueuedStatementResource.java b/core/trino-main/src/main/java/io/trino/dispatcher/QueuedStatementResource.java
index b4cd5713344cd..f2feb98f73574 100644
--- a/core/trino-main/src/main/java/io/trino/dispatcher/QueuedStatementResource.java
+++ b/core/trino-main/src/main/java/io/trino/dispatcher/QueuedStatementResource.java
@@ -32,7 +32,6 @@
import io.trino.execution.QueryManagerConfig;
import io.trino.execution.QueryState;
import io.trino.server.HttpRequestSessionContextFactory;
-import io.trino.server.ProtocolConfig;
import io.trino.server.ServerConfig;
import io.trino.server.SessionContext;
import io.trino.server.protocol.QueryInfoUrlFactory;
@@ -85,7 +84,8 @@
import static io.airlift.jaxrs.AsyncResponseHandler.bindAsyncResponse;
import static io.trino.execution.QueryState.FAILED;
import static io.trino.execution.QueryState.QUEUED;
-import static io.trino.server.HttpRequestSessionContextFactory.AUTHENTICATED_IDENTITY;
+import static io.trino.server.ServletSecurityUtils.authenticatedIdentity;
+import static io.trino.server.ServletSecurityUtils.clearAuthenticatedIdentity;
import static io.trino.server.protocol.QueryInfoUrlFactory.getQueryInfoUri;
import static io.trino.server.protocol.Slug.Context.EXECUTING_QUERY;
import static io.trino.server.protocol.Slug.Context.QUEUED_QUERY;
@@ -120,7 +120,6 @@ public class QueuedStatementResource
private final ScheduledExecutorService timeoutExecutor;
private final boolean compressionEnabled;
- private final Optional alternateHeaderName;
private final QueryManager queryManager;
@Inject
@@ -131,7 +130,6 @@ public QueuedStatementResource(
DispatchExecutor executor,
QueryInfoUrlFactory queryInfoUrlTemplate,
ServerConfig serverConfig,
- ProtocolConfig protocolConfig,
QueryManagerConfig queryManagerConfig)
{
this.sessionContextFactory = requireNonNull(sessionContextFactory, "sessionContextFactory is null");
@@ -141,7 +139,6 @@ public QueuedStatementResource(
this.timeoutExecutor = executor.getScheduledExecutor();
this.queryInfoUrlFactory = requireNonNull(queryInfoUrlTemplate, "queryInfoUrlTemplate is null");
this.compressionEnabled = serverConfig.isQueryResultsCompressionEnabled();
- this.alternateHeaderName = protocolConfig.getAlternateHeaderName();
queryManager = new QueryManager(queryManagerConfig.getClientTimeout());
}
@@ -178,19 +175,19 @@ public Response postStatement(
private Query registerQuery(String statement, HttpServletRequest servletRequest, HttpHeaders httpHeaders)
{
Optional remoteAddress = Optional.ofNullable(servletRequest.getRemoteAddr());
- Optional identity = Optional.ofNullable((Identity) servletRequest.getAttribute(AUTHENTICATED_IDENTITY));
+ Optional identity = authenticatedIdentity(servletRequest);
if (identity.flatMap(Identity::getPrincipal).map(InternalPrincipal.class::isInstance).orElse(false)) {
throw badRequest(FORBIDDEN, "Internal communication can not be used to start a query");
}
MultivaluedMap headers = httpHeaders.getRequestHeaders();
- SessionContext sessionContext = sessionContextFactory.createSessionContext(headers, alternateHeaderName, remoteAddress, identity);
+ SessionContext sessionContext = sessionContextFactory.createSessionContext(headers, remoteAddress, identity);
Query query = new Query(statement, sessionContext, dispatchManager, queryInfoUrlFactory, tracer);
queryManager.registerQuery(query);
// let authentication filter know that identity lifecycle has been handed off
- servletRequest.setAttribute(AUTHENTICATED_IDENTITY, null);
+ clearAuthenticatedIdentity(servletRequest);
return query;
}
diff --git a/core/trino-main/src/main/java/io/trino/execution/CreateMaterializedViewTask.java b/core/trino-main/src/main/java/io/trino/execution/CreateMaterializedViewTask.java
index 021ed3a89760c..ff4b109d209b0 100644
--- a/core/trino-main/src/main/java/io/trino/execution/CreateMaterializedViewTask.java
+++ b/core/trino-main/src/main/java/io/trino/execution/CreateMaterializedViewTask.java
@@ -160,8 +160,7 @@ public ListenableFuture execute(
// system path elements are not stored
.filter(element -> !element.getCatalogName().equals(GlobalSystemConnector.NAME))
.collect(toImmutableList()),
- Optional.empty(),
- properties);
+ Optional.empty());
Set specifiedPropertyKeys = statement.getProperties().stream()
// property names are case-insensitive and normalized to lower case
@@ -172,7 +171,7 @@ public ListenableFuture execute(
.filter(specifiedPropertyKeys::contains)
.collect(toImmutableMap(Function.identity(), properties::get));
accessControl.checkCanCreateMaterializedView(session.toSecurityContext(), name, explicitlySetProperties);
- plannerContext.getMetadata().createMaterializedView(session, name, definition, statement.isReplace(), statement.isNotExists());
+ plannerContext.getMetadata().createMaterializedView(session, name, definition, properties, statement.isReplace(), statement.isNotExists());
stateMachine.setOutput(analysis.getTarget());
stateMachine.setReferencedTables(analysis.getReferencedTables());
diff --git a/core/trino-main/src/main/java/io/trino/execution/FailureInjectionConfig.java b/core/trino-main/src/main/java/io/trino/execution/FailureInjectionConfig.java
index 2ee459b524842..b8d0a5b36aab7 100644
--- a/core/trino-main/src/main/java/io/trino/execution/FailureInjectionConfig.java
+++ b/core/trino-main/src/main/java/io/trino/execution/FailureInjectionConfig.java
@@ -15,6 +15,7 @@
import io.airlift.configuration.Config;
import io.airlift.configuration.ConfigDescription;
+import io.airlift.configuration.ConfigHidden;
import io.airlift.units.Duration;
import jakarta.validation.constraints.NotNull;
@@ -31,6 +32,7 @@ public Duration getExpirationPeriod()
return expirationPeriod;
}
+ @ConfigHidden // not supposed to be used outside of tests
@Config("failure-injection.expiration-period")
@ConfigDescription("Period after which an injected failure is considered expired and will no longer be triggering a failure")
public FailureInjectionConfig setExpirationPeriod(Duration expirationPeriod)
@@ -45,6 +47,7 @@ public Duration getRequestTimeout()
return requestTimeout;
}
+ @ConfigHidden // not supposed to be used outside of tests
@Config("failure-injection.request-timeout")
@ConfigDescription("Period after which requests blocked to emulate a timeout are released")
public FailureInjectionConfig setRequestTimeout(Duration requestTimeout)
diff --git a/core/trino-main/src/main/java/io/trino/execution/QueryExecutionMBean.java b/core/trino-main/src/main/java/io/trino/execution/QueryExecutionMBean.java
index cebb0948a5032..ccb8e18e4854d 100644
--- a/core/trino-main/src/main/java/io/trino/execution/QueryExecutionMBean.java
+++ b/core/trino-main/src/main/java/io/trino/execution/QueryExecutionMBean.java
@@ -26,7 +26,7 @@ public class QueryExecutionMBean
private final ThreadPoolExecutorMBean executorMBean;
@Inject
- public QueryExecutionMBean(@ForQueryExecution ExecutorService executor)
+ public QueryExecutionMBean(@QueryExecutorInternal ExecutorService executor)
{
this.executorMBean = new ThreadPoolExecutorMBean((ThreadPoolExecutor) executor);
}
diff --git a/core/trino-main/src/main/java/io/trino/execution/QueryExecutorInternal.java b/core/trino-main/src/main/java/io/trino/execution/QueryExecutorInternal.java
new file mode 100644
index 0000000000000..39d798b567bac
--- /dev/null
+++ b/core/trino-main/src/main/java/io/trino/execution/QueryExecutorInternal.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.execution;
+
+import com.google.inject.BindingAnnotation;
+
+import java.lang.annotation.Retention;
+import java.lang.annotation.Target;
+
+import static java.lang.annotation.ElementType.FIELD;
+import static java.lang.annotation.ElementType.METHOD;
+import static java.lang.annotation.ElementType.PARAMETER;
+import static java.lang.annotation.RetentionPolicy.RUNTIME;
+
+@Retention(RUNTIME)
+@Target({FIELD, PARAMETER, METHOD})
+@BindingAnnotation
+public @interface QueryExecutorInternal {}
diff --git a/core/trino-main/src/main/java/io/trino/execution/QueryStateMachine.java b/core/trino-main/src/main/java/io/trino/execution/QueryStateMachine.java
index d83d74f074dc0..e5b6052855a72 100644
--- a/core/trino-main/src/main/java/io/trino/execution/QueryStateMachine.java
+++ b/core/trino-main/src/main/java/io/trino/execution/QueryStateMachine.java
@@ -54,7 +54,6 @@
import io.trino.spi.type.Type;
import io.trino.sql.analyzer.Output;
import io.trino.sql.planner.PlanFragment;
-import io.trino.sql.planner.plan.TableScanNode;
import io.trino.tracing.TrinoAttributes;
import io.trino.transaction.TransactionId;
import io.trino.transaction.TransactionInfo;
@@ -669,7 +668,7 @@ private QueryStats getQueryStats(Optional rootStage, List
failedInternalNetworkInputPositions += stageStats.getFailedInternalNetworkInputPositions();
PlanFragment plan = stageInfo.getPlan();
- if (plan != null && plan.getPartitionedSourceNodes().stream().anyMatch(TableScanNode.class::isInstance)) {
+ if (plan != null && plan.containsTableScanNode()) {
rawInputDataSize += stageStats.getRawInputDataSize().toBytes();
failedRawInputDataSize += stageStats.getFailedRawInputDataSize().toBytes();
rawInputPositions += stageStats.getRawInputPositions();
diff --git a/core/trino-main/src/main/java/io/trino/execution/SqlQueryManager.java b/core/trino-main/src/main/java/io/trino/execution/SqlQueryManager.java
index fa3ee37b94545..acce256889772 100644
--- a/core/trino-main/src/main/java/io/trino/execution/SqlQueryManager.java
+++ b/core/trino-main/src/main/java/io/trino/execution/SqlQueryManager.java
@@ -22,6 +22,8 @@
import io.airlift.log.Logger;
import io.airlift.units.DataSize;
import io.airlift.units.Duration;
+import io.opentelemetry.api.trace.Tracer;
+import io.opentelemetry.context.Context;
import io.trino.ExceededCpuLimitException;
import io.trino.ExceededScanLimitException;
import io.trino.Session;
@@ -55,6 +57,7 @@
import static io.trino.SystemSessionProperties.getQueryMaxScanPhysicalBytes;
import static io.trino.execution.QueryState.RUNNING;
import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR;
+import static io.trino.tracing.ScopedSpan.scopedSpan;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static java.util.concurrent.Executors.newCachedThreadPool;
@@ -67,6 +70,7 @@ public class SqlQueryManager
private static final Logger log = Logger.get(SqlQueryManager.class);
private final ClusterMemoryManager memoryManager;
+ private final Tracer tracer;
private final QueryTracker queryTracker;
private final Duration maxQueryCpuTime;
@@ -79,9 +83,10 @@ public class SqlQueryManager
private final ThreadPoolExecutorMBean queryManagementExecutorMBean;
@Inject
- public SqlQueryManager(ClusterMemoryManager memoryManager, QueryManagerConfig queryManagerConfig)
+ public SqlQueryManager(ClusterMemoryManager memoryManager, Tracer tracer, QueryManagerConfig queryManagerConfig)
{
this.memoryManager = requireNonNull(memoryManager, "memoryManager is null");
+ this.tracer = requireNonNull(tracer, "tracer is null");
this.maxQueryCpuTime = queryManagerConfig.getQueryMaxCpuTime();
this.maxQueryScanPhysicalBytes = queryManagerConfig.getQueryMaxScanPhysicalBytes();
@@ -253,7 +258,11 @@ public void createQuery(QueryExecution queryExecution)
});
try (SetThreadName ignored = new SetThreadName("Query-%s", queryExecution.getQueryId())) {
- queryExecution.start();
+ try (var ignoredStartScope = scopedSpan(tracer.spanBuilder("query-start")
+ .setParent(Context.current().with(queryExecution.getSession().getQuerySpan()))
+ .startSpan())) {
+ queryExecution.start();
+ }
}
}
diff --git a/core/trino-main/src/main/java/io/trino/execution/SqlTaskManager.java b/core/trino-main/src/main/java/io/trino/execution/SqlTaskManager.java
index f4c8fe43ab19e..12d3a4a2a01e6 100644
--- a/core/trino-main/src/main/java/io/trino/execution/SqlTaskManager.java
+++ b/core/trino-main/src/main/java/io/trino/execution/SqlTaskManager.java
@@ -123,6 +123,7 @@ public class SqlTaskManager
private final ScheduledExecutorService taskManagementExecutor;
private final ScheduledExecutorService driverYieldExecutor;
+ private final ScheduledExecutorService driverTimeoutExecutor;
private final Duration infoCacheTime;
private final Duration clientTimeout;
@@ -216,6 +217,7 @@ public SqlTaskManager(
this.taskManagementExecutor = taskManagementExecutor.getExecutor();
this.driverYieldExecutor = newScheduledThreadPool(config.getTaskYieldThreads(), threadsNamed("task-yield-%s"));
+ this.driverTimeoutExecutor = newScheduledThreadPool(config.getDriverTimeoutThreads(), threadsNamed("task-driver-timeout-%s"));
SqlTaskExecutionFactory sqlTaskExecutionFactory = new SqlTaskExecutionFactory(taskNotificationExecutor, taskExecutor, planner, splitMonitor, tracer, config);
@@ -269,6 +271,7 @@ private QueryContext createQueryContext(
gcMonitor,
taskNotificationExecutor,
driverYieldExecutor,
+ driverTimeoutExecutor,
maxQuerySpillPerNode,
localSpillManager.getSpillSpaceTracker());
}
diff --git a/core/trino-main/src/main/java/io/trino/execution/StageStateMachine.java b/core/trino-main/src/main/java/io/trino/execution/StageStateMachine.java
index 2ddc4a6c3a9e0..5205c42943428 100644
--- a/core/trino-main/src/main/java/io/trino/execution/StageStateMachine.java
+++ b/core/trino-main/src/main/java/io/trino/execution/StageStateMachine.java
@@ -33,7 +33,6 @@
import io.trino.spi.eventlistener.StageGcStatistics;
import io.trino.sql.planner.PlanFragment;
import io.trino.sql.planner.plan.PlanNodeId;
-import io.trino.sql.planner.plan.TableScanNode;
import io.trino.tracing.TrinoAttributes;
import io.trino.util.Failures;
import it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap;
@@ -338,7 +337,7 @@ public BasicStageStats getBasicStageStats(Supplier> taskInfos
internalNetworkInputDataSize += taskStats.getInternalNetworkInputDataSize().toBytes();
internalNetworkInputPositions += taskStats.getInternalNetworkInputPositions();
- if (fragment.getPartitionedSourceNodes().stream().anyMatch(TableScanNode.class::isInstance)) {
+ if (fragment.containsTableScanNode()) {
rawInputDataSize += taskStats.getRawInputDataSize().toBytes();
rawInputPositions += taskStats.getRawInputPositions();
}
diff --git a/core/trino-main/src/main/java/io/trino/execution/TaskManagerConfig.java b/core/trino-main/src/main/java/io/trino/execution/TaskManagerConfig.java
index 4d584c36034ab..709e2d03c861a 100644
--- a/core/trino-main/src/main/java/io/trino/execution/TaskManagerConfig.java
+++ b/core/trino-main/src/main/java/io/trino/execution/TaskManagerConfig.java
@@ -100,6 +100,7 @@ public class TaskManagerConfig
private int taskNotificationThreads = 5;
private int taskYieldThreads = 3;
+ private int driverTimeoutThreads = 5;
private BigDecimal levelTimeMultiplier = new BigDecimal(2.0);
@@ -569,6 +570,20 @@ public TaskManagerConfig setTaskYieldThreads(int taskYieldThreads)
return this;
}
+ @Min(1)
+ public int getDriverTimeoutThreads()
+ {
+ return driverTimeoutThreads;
+ }
+
+ @Config("task.driver-timeout-threads")
+ @ConfigDescription("Number of threads used for timing out blocked drivers if the timeout is set")
+ public TaskManagerConfig setDriverTimeoutThreads(int driverTimeoutThreads)
+ {
+ this.driverTimeoutThreads = driverTimeoutThreads;
+ return this;
+ }
+
public boolean isInterruptStuckSplitTasksEnabled()
{
return interruptStuckSplitTasksEnabled;
diff --git a/core/trino-main/src/main/java/io/trino/execution/executor/timesharing/TimeSharingTaskExecutor.java b/core/trino-main/src/main/java/io/trino/execution/executor/timesharing/TimeSharingTaskExecutor.java
index 4536d9437af13..8a49ca436cd61 100644
--- a/core/trino-main/src/main/java/io/trino/execution/executor/timesharing/TimeSharingTaskExecutor.java
+++ b/core/trino-main/src/main/java/io/trino/execution/executor/timesharing/TimeSharingTaskExecutor.java
@@ -73,7 +73,7 @@
import static io.airlift.concurrent.Threads.threadsNamed;
import static io.airlift.tracing.Tracing.noopTracer;
import static io.trino.execution.executor.timesharing.MultilevelSplitQueue.computeLevel;
-import static io.trino.version.EmbedVersion.testingVersionEmbedder;
+import static io.trino.util.EmbedVersion.testingVersionEmbedder;
import static java.lang.Math.min;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
diff --git a/core/trino-main/src/main/java/io/trino/execution/scheduler/PipelinedQueryScheduler.java b/core/trino-main/src/main/java/io/trino/execution/scheduler/PipelinedQueryScheduler.java
index c370e7bca4da6..d5169126d8ee5 100644
--- a/core/trino-main/src/main/java/io/trino/execution/scheduler/PipelinedQueryScheduler.java
+++ b/core/trino-main/src/main/java/io/trino/execution/scheduler/PipelinedQueryScheduler.java
@@ -585,7 +585,7 @@ private static Map createOutputBuf
private static PipelinedOutputBufferManager createSingleStreamOutputBuffer(SqlStage stage)
{
PartitioningHandle partitioningHandle = stage.getFragment().getOutputPartitioningScheme().getPartitioning().getHandle();
- checkArgument(partitioningHandle.isSingleNode(), "partitioning is expected to be single node: " + partitioningHandle);
+ checkArgument(partitioningHandle.isSingleNode(), "partitioning is expected to be single node: %s", partitioningHandle);
return new PartitionedPipelinedOutputBufferManager(partitioningHandle, 1);
}
diff --git a/core/trino-main/src/main/java/io/trino/execution/scheduler/faulttolerant/BinPackingNodeAllocatorService.java b/core/trino-main/src/main/java/io/trino/execution/scheduler/faulttolerant/BinPackingNodeAllocatorService.java
index d6757fbce2d2f..2b8b874d488ba 100644
--- a/core/trino-main/src/main/java/io/trino/execution/scheduler/faulttolerant/BinPackingNodeAllocatorService.java
+++ b/core/trino-main/src/main/java/io/trino/execution/scheduler/faulttolerant/BinPackingNodeAllocatorService.java
@@ -34,6 +34,7 @@
import io.trino.metadata.InternalNode;
import io.trino.metadata.InternalNodeManager;
import io.trino.metadata.InternalNodeManager.NodesSnapshot;
+import io.trino.spi.HostAddress;
import io.trino.spi.TrinoException;
import io.trino.spi.memory.MemoryPoolInfo;
import jakarta.annotation.PostConstruct;
@@ -41,6 +42,7 @@
import org.assertj.core.util.VisibleForTesting;
import java.time.Duration;
+import java.util.ArrayList;
import java.util.Comparator;
import java.util.Deque;
import java.util.HashMap;
@@ -573,24 +575,25 @@ public BinPackingSimulation(
}
}
+ private List dropCoordinatorsIfNecessary(List candidates)
+ {
+ return scheduleOnCoordinator ? candidates : candidates.stream().filter(node -> !node.isCoordinator()).collect(toImmutableList());
+ }
+
public ReserveResult tryReserve(PendingAcquire acquire)
{
NodeRequirements requirements = acquire.getNodeRequirements();
Optional> catalogNodes = requirements.getCatalogHandle().map(nodesSnapshot::getConnectorNodes);
- List candidates = allNodesSorted.stream()
- .filter(node -> catalogNodes.isEmpty() || catalogNodes.get().contains(node))
- .filter(node -> {
- // Allow using coordinator if explicitly requested
- if (requirements.getAddresses().contains(node.getHostAndPort())) {
- return true;
- }
- if (requirements.getAddresses().isEmpty()) {
- return scheduleOnCoordinator || !node.isCoordinator();
- }
- return false;
- })
- .collect(toImmutableList());
+ List candidates = new ArrayList<>(allNodesSorted);
+ catalogNodes.ifPresent(candidates::retainAll); // Drop non-catalog nodes, if any.
+ Set addresses = requirements.getAddresses();
+ if (!addresses.isEmpty()) {
+ candidates = candidates.stream().filter(node -> addresses.contains(node.getHostAndPort())).collect(toImmutableList());
+ }
+ else {
+ candidates = dropCoordinatorsIfNecessary(candidates);
+ }
if (candidates.isEmpty()) {
return ReserveResult.NONE_MATCHING;
diff --git a/core/trino-main/src/main/java/io/trino/execution/scheduler/faulttolerant/EventDrivenFaultTolerantQueryScheduler.java b/core/trino-main/src/main/java/io/trino/execution/scheduler/faulttolerant/EventDrivenFaultTolerantQueryScheduler.java
index 1c8f611b7d236..2132a0b95c345 100644
--- a/core/trino-main/src/main/java/io/trino/execution/scheduler/faulttolerant/EventDrivenFaultTolerantQueryScheduler.java
+++ b/core/trino-main/src/main/java/io/trino/execution/scheduler/faulttolerant/EventDrivenFaultTolerantQueryScheduler.java
@@ -651,9 +651,11 @@ private static class Scheduler
private static final int EVENT_BUFFER_CAPACITY = 100;
private static final long EVENT_PROCESSING_ENFORCED_FREQUENCY_MILLIS = MINUTES.toMillis(1);
// If scheduler is stalled for SCHEDULER_STALLED_DURATION_THRESHOLD debug log will be emitted.
- // This value must be larger than EVENT_PROCESSING_ENFORCED_FREQUENCY as prerequiste for processing is
+ // If situation persists event logs will be emitted at SCHEDULER_MAX_DEBUG_INFO_FREQUENCY.
+ // SCHEDULER_STALLED_DURATION_THRESHOLD must be larger than EVENT_PROCESSING_ENFORCED_FREQUENCY as prerequiste for processing is
// that there are no events in the event queue.
- private static final long SCHEDULER_STALLED_DURATION_THRESHOLD_MILLIS = MINUTES.toMillis(5);
+ private static final long SCHEDULER_STALLED_DURATION_THRESHOLD_MILLIS = MINUTES.toMillis(10);
+ private static final long SCHEDULER_MAX_DEBUG_INFO_FREQUENCY_MILLIS = MINUTES.toMillis(10);
private static final long SCHEDULER_STALLED_DURATION_ON_TIME_EXCEEDED_THRESHOLD_MILLIS = SECONDS.toMillis(30);
private static final int EVENTS_DEBUG_INFOS_PER_BUCKET = 10;
@@ -688,7 +690,8 @@ private static class Scheduler
private final BlockingQueue eventQueue = new LinkedBlockingQueue<>();
private final List eventBuffer = new ArrayList<>(EVENT_BUFFER_CAPACITY);
- private final Stopwatch eventDebugInfoStopwatch = Stopwatch.createUnstarted();
+ private final Stopwatch noEventsStopwatch = Stopwatch.createUnstarted();
+ private final Stopwatch debugInfoStopwatch = Stopwatch.createUnstarted();
private final Optional eventDebugInfos;
private boolean started;
@@ -781,7 +784,7 @@ public Scheduler(
}
planInTopologicalOrder = sortPlanInTopologicalOrder(plan);
- eventDebugInfoStopwatch.start();
+ noEventsStopwatch.start();
}
public void run()
@@ -801,8 +804,8 @@ public void run()
}
if (queryInfo.getState() == QueryState.FAILED
&& queryInfo.getErrorCode() == EXCEEDED_TIME_LIMIT.toErrorCode()
- && eventDebugInfoStopwatch.elapsed().toMillis() > SCHEDULER_STALLED_DURATION_ON_TIME_EXCEEDED_THRESHOLD_MILLIS) {
- logDebugInfoSafe(format("Scheduler stalled for %s on EXCEEDED_TIME_LIMIT", eventDebugInfoStopwatch.elapsed()));
+ && noEventsStopwatch.elapsed().toMillis() > SCHEDULER_STALLED_DURATION_ON_TIME_EXCEEDED_THRESHOLD_MILLIS) {
+ logDebugInfoSafe(format("Scheduler stalled for %s on EXCEEDED_TIME_LIMIT", noEventsStopwatch.elapsed()));
}
});
@@ -896,13 +899,16 @@ private boolean processEvents()
if (eventDebugInfoRecorded) {
// mark that we processed some events; we filter out some no-op events.
// If only no-op events appear in event queue we still treat scheduler as stuck
- eventDebugInfoStopwatch.reset().start();
+ noEventsStopwatch.reset().start();
+ debugInfoStopwatch.reset();
}
else {
// if no events were recorded there is a chance scheduler is stalled
- if (log.isDebugEnabled() && eventDebugInfoStopwatch.elapsed().toMillis() > SCHEDULER_STALLED_DURATION_THRESHOLD_MILLIS) {
- logDebugInfoSafe("Scheduler stalled for %s".formatted(eventDebugInfoStopwatch.elapsed()));
- eventDebugInfoStopwatch.reset().start(); // reset to prevent extensive logging
+ if (log.isDebugEnabled()
+ && (!debugInfoStopwatch.isRunning() || debugInfoStopwatch.elapsed().toMillis() > SCHEDULER_MAX_DEBUG_INFO_FREQUENCY_MILLIS)
+ && noEventsStopwatch.elapsed().toMillis() > SCHEDULER_STALLED_DURATION_THRESHOLD_MILLIS) {
+ logDebugInfoSafe("Scheduler stalled for %s".formatted(noEventsStopwatch.elapsed()));
+ debugInfoStopwatch.reset().start(); // reset to prevent extensive logging
}
}
@@ -936,6 +942,7 @@ private void logDebugInfo(String reason)
log.debug("Scheduler debug info for %s START; reason=%s", queryStateMachine.getQueryId(), reason);
log.debug("General state: %s", toStringHelper(this)
.add("queryState", queryStateMachine.getQueryState())
+ .add("finalQueryInfo", queryStateMachine.getFinalQueryInfo())
.add("maxTaskExecutionAttempts", maxTaskExecutionAttempts)
.add("maxTasksWaitingForNode", maxTasksWaitingForNode)
.add("maxTasksWaitingForExecution", maxTasksWaitingForExecution)
diff --git a/core/trino-main/src/main/java/io/trino/likematcher/FjsMatcher.java b/core/trino-main/src/main/java/io/trino/likematcher/FjsMatcher.java
index 24fee50d8072b..365bff87b28a9 100644
--- a/core/trino-main/src/main/java/io/trino/likematcher/FjsMatcher.java
+++ b/core/trino-main/src/main/java/io/trino/likematcher/FjsMatcher.java
@@ -146,7 +146,7 @@ else if (matchEnd >= inputLimit - 1) {
j = kmpShifts[j];
// Continue to match the whole pattern using KMP
- while (j > 0) {
+ while (j >= 0) {
int size = findLongestMatch(input, i, pattern, j, Math.min(inputLimit - i, pattern.length - j));
i += size;
j += size;
diff --git a/core/trino-main/src/main/java/io/trino/memory/MemoryPool.java b/core/trino-main/src/main/java/io/trino/memory/MemoryPool.java
index b330931af945d..191179a8ae168 100644
--- a/core/trino-main/src/main/java/io/trino/memory/MemoryPool.java
+++ b/core/trino-main/src/main/java/io/trino/memory/MemoryPool.java
@@ -31,6 +31,7 @@
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
import static com.google.common.base.MoreObjects.toStringHelper;
@@ -53,9 +54,9 @@ public class MemoryPool
@GuardedBy("this")
private NonCancellableMemoryFuture future;
- @GuardedBy("this")
// TODO: It would be better if we just tracked QueryContexts, but their lifecycle is managed by a weak reference, so we can't do that
- private final Map queryMemoryReservations = new HashMap<>();
+ // It is guarded for updates by this, but can be read without holding a lock
+ private final Map queryMemoryReservations = new ConcurrentHashMap<>();
// This map keeps track of all the tagged allocations, e.g., query-1 -> ['TableScanOperator': 10MB, 'LazyOutputBuffer': 5MB, ...]
@GuardedBy("this")
@@ -347,7 +348,7 @@ public synchronized long getReservedRevocableBytes()
return reservedRevocableBytes;
}
- synchronized long getQueryMemoryReservation(QueryId queryId)
+ long getQueryMemoryReservation(QueryId queryId)
{
return queryMemoryReservations.getOrDefault(queryId, 0L);
}
diff --git a/core/trino-main/src/main/java/io/trino/memory/QueryContext.java b/core/trino-main/src/main/java/io/trino/memory/QueryContext.java
index c30e3ad282f71..3331e60d539d1 100644
--- a/core/trino-main/src/main/java/io/trino/memory/QueryContext.java
+++ b/core/trino-main/src/main/java/io/trino/memory/QueryContext.java
@@ -63,6 +63,7 @@ public class QueryContext
private final GcMonitor gcMonitor;
private final Executor notificationExecutor;
private final ScheduledExecutorService yieldExecutor;
+ private final ScheduledExecutorService timeoutExecutor;
private final long maxSpill;
private final SpillSpaceTracker spillSpaceTracker;
private final Map taskContexts = new ConcurrentHashMap<>();
@@ -86,6 +87,7 @@ public QueryContext(
GcMonitor gcMonitor,
Executor notificationExecutor,
ScheduledExecutorService yieldExecutor,
+ ScheduledExecutorService timeoutExecutor,
DataSize maxSpill,
SpillSpaceTracker spillSpaceTracker)
{
@@ -97,6 +99,7 @@ public QueryContext(
gcMonitor,
notificationExecutor,
yieldExecutor,
+ timeoutExecutor,
maxSpill,
spillSpaceTracker);
}
@@ -109,6 +112,7 @@ public QueryContext(
GcMonitor gcMonitor,
Executor notificationExecutor,
ScheduledExecutorService yieldExecutor,
+ ScheduledExecutorService timeoutExecutor,
DataSize maxSpill,
SpillSpaceTracker spillSpaceTracker)
{
@@ -118,6 +122,7 @@ public QueryContext(
this.gcMonitor = requireNonNull(gcMonitor, "gcMonitor is null");
this.notificationExecutor = requireNonNull(notificationExecutor, "notificationExecutor is null");
this.yieldExecutor = requireNonNull(yieldExecutor, "yieldExecutor is null");
+ this.timeoutExecutor = requireNonNull(timeoutExecutor, "timeoutExecutor is null");
this.maxSpill = maxSpill.toBytes();
this.spillSpaceTracker = requireNonNull(spillSpaceTracker, "spillSpaceTracker is null");
this.guaranteedMemory = guaranteedMemory;
@@ -220,12 +225,12 @@ public synchronized void freeSpill(long bytes)
spillSpaceTracker.free(bytes);
}
- public synchronized MemoryPool getMemoryPool()
+ public MemoryPool getMemoryPool()
{
return memoryPool;
}
- public synchronized long getUserMemoryReservation()
+ public long getUserMemoryReservation()
{
return memoryPool.getQueryMemoryReservation(queryId);
}
@@ -257,6 +262,7 @@ public TaskContext addTaskContext(
gcMonitor,
notificationExecutor,
yieldExecutor,
+ timeoutExecutor,
session,
taskMemoryContext,
notifyStatusChanged,
diff --git a/core/trino-main/src/main/java/io/trino/metadata/FunctionManager.java b/core/trino-main/src/main/java/io/trino/metadata/FunctionManager.java
index 23e071d2ecc2c..9a7c208380ea8 100644
--- a/core/trino-main/src/main/java/io/trino/metadata/FunctionManager.java
+++ b/core/trino-main/src/main/java/io/trino/metadata/FunctionManager.java
@@ -324,7 +324,10 @@ private record FunctionKey(ResolvedFunction resolvedFunction, InvocationConventi
public static FunctionManager createTestingFunctionManager()
{
TypeOperators typeOperators = new TypeOperators();
- GlobalFunctionCatalog functionCatalog = new GlobalFunctionCatalog();
+ GlobalFunctionCatalog functionCatalog = new GlobalFunctionCatalog(
+ () -> { throw new UnsupportedOperationException(); },
+ () -> { throw new UnsupportedOperationException(); },
+ () -> { throw new UnsupportedOperationException(); });
functionCatalog.addFunctions(SystemFunctionBundle.create(new FeaturesConfig(), typeOperators, new BlockTypeOperators(typeOperators), UNKNOWN));
functionCatalog.addFunctions(new InternalFunctionBundle(new LiteralFunction(new InternalBlockEncodingSerde(new BlockEncodingManager(), TESTING_TYPE_MANAGER))));
return new FunctionManager(CatalogServiceProvider.fail(), functionCatalog, LanguageFunctionProvider.DISABLED);
diff --git a/core/trino-main/src/main/java/io/trino/metadata/GlobalFunctionCatalog.java b/core/trino-main/src/main/java/io/trino/metadata/GlobalFunctionCatalog.java
index 0b8e28702a7cd..6cdd80f483000 100644
--- a/core/trino-main/src/main/java/io/trino/metadata/GlobalFunctionCatalog.java
+++ b/core/trino-main/src/main/java/io/trino/metadata/GlobalFunctionCatalog.java
@@ -18,9 +18,12 @@
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Multimap;
import com.google.errorprone.annotations.ThreadSafe;
+import com.google.inject.Inject;
+import com.google.inject.Provider;
import io.trino.connector.system.GlobalSystemConnector;
-import io.trino.operator.table.ExcludeColumns.ExcludeColumnsFunctionHandle;
-import io.trino.operator.table.Sequence.SequenceFunctionHandle;
+import io.trino.operator.table.ExcludeColumnsFunction.ExcludeColumnsFunctionHandle;
+import io.trino.operator.table.SequenceFunction.SequenceFunctionHandle;
+import io.trino.operator.table.json.JsonTable.JsonTableFunctionHandle;
import io.trino.spi.function.AggregationFunctionMetadata;
import io.trino.spi.function.AggregationImplementation;
import io.trino.spi.function.BoundSignature;
@@ -37,6 +40,7 @@
import io.trino.spi.function.WindowFunctionSupplier;
import io.trino.spi.function.table.ConnectorTableFunctionHandle;
import io.trino.spi.function.table.TableFunctionProcessorProvider;
+import io.trino.spi.type.TypeManager;
import io.trino.spi.type.TypeSignature;
import java.util.Collection;
@@ -51,21 +55,35 @@
import static io.trino.metadata.OperatorNameUtil.isOperatorName;
import static io.trino.metadata.OperatorNameUtil.mangleOperatorName;
import static io.trino.metadata.OperatorNameUtil.unmangleOperator;
-import static io.trino.operator.table.ExcludeColumns.getExcludeColumnsFunctionProcessorProvider;
-import static io.trino.operator.table.Sequence.getSequenceFunctionProcessorProvider;
+import static io.trino.operator.table.ExcludeColumnsFunction.getExcludeColumnsFunctionProcessorProvider;
+import static io.trino.operator.table.SequenceFunction.getSequenceFunctionProcessorProvider;
+import static io.trino.operator.table.json.JsonTable.getJsonTableFunctionProcessorProvider;
import static io.trino.spi.function.FunctionKind.AGGREGATE;
import static io.trino.spi.type.BigintType.BIGINT;
import static io.trino.spi.type.BooleanType.BOOLEAN;
import static io.trino.spi.type.IntegerType.INTEGER;
import static java.util.Locale.ENGLISH;
+import static java.util.Objects.requireNonNull;
@ThreadSafe
public class GlobalFunctionCatalog
implements FunctionProvider
{
public static final String BUILTIN_SCHEMA = "builtin";
+
+ private final Provider metadata;
+ private final Provider typeManager;
+ private final Provider functionManager;
private volatile FunctionMap functions = new FunctionMap();
+ @Inject
+ public GlobalFunctionCatalog(Provider metadata, Provider typeManager, Provider functionManager)
+ {
+ this.metadata = requireNonNull(metadata, "metadata is null");
+ this.typeManager = requireNonNull(typeManager, "typeManager is null");
+ this.functionManager = requireNonNull(functionManager, "functionManager is null");
+ }
+
public final synchronized void addFunctions(FunctionBundle functionBundle)
{
for (FunctionMetadata functionMetadata : functionBundle.getFunctions()) {
@@ -187,6 +205,9 @@ public TableFunctionProcessorProvider getTableFunctionProcessorProvider(Connecto
if (functionHandle instanceof SequenceFunctionHandle) {
return getSequenceFunctionProcessorProvider();
}
+ if (functionHandle instanceof JsonTableFunctionHandle) {
+ return getJsonTableFunctionProcessorProvider(metadata.get(), typeManager.get(), functionManager.get());
+ }
return null;
}
@@ -272,14 +293,14 @@ public Collection get(String functionName)
public FunctionMetadata get(FunctionId functionId)
{
FunctionMetadata functionMetadata = functionsById.get(functionId);
- checkArgument(functionMetadata != null, "Unknown function implementation: " + functionId);
+ checkArgument(functionMetadata != null, "Unknown function implementation: %s", functionId);
return functionMetadata;
}
public FunctionBundle getFunctionBundle(FunctionId functionId)
{
FunctionBundle functionBundle = functionBundlesById.get(functionId);
- checkArgument(functionBundle != null, "Unknown function implementation: " + functionId);
+ checkArgument(functionBundle != null, "Unknown function implementation: %s", functionId);
return functionBundle;
}
}
diff --git a/core/trino-main/src/main/java/io/trino/metadata/InternalFunctionBundle.java b/core/trino-main/src/main/java/io/trino/metadata/InternalFunctionBundle.java
index 20a94c3362fcf..54a8b6c65f502 100644
--- a/core/trino-main/src/main/java/io/trino/metadata/InternalFunctionBundle.java
+++ b/core/trino-main/src/main/java/io/trino/metadata/InternalFunctionBundle.java
@@ -184,7 +184,7 @@ private WindowFunctionSupplier specializeWindow(FunctionId functionId, BoundSign
private SqlFunction getSqlFunction(FunctionId functionId)
{
SqlFunction function = functions.get(functionId);
- checkArgument(function != null, "Unknown function implementation: " + functionId);
+ checkArgument(function != null, "Unknown function implementation: %s", functionId);
return function;
}
diff --git a/core/trino-main/src/main/java/io/trino/metadata/LanguageFunctionManager.java b/core/trino-main/src/main/java/io/trino/metadata/LanguageFunctionManager.java
index e4ae4ecfd77da..a415c339a0b9c 100644
--- a/core/trino-main/src/main/java/io/trino/metadata/LanguageFunctionManager.java
+++ b/core/trino-main/src/main/java/io/trino/metadata/LanguageFunctionManager.java
@@ -269,7 +269,7 @@ public synchronized List getFunctions(CatalogHandle catalogHan
public FunctionDependencyDeclaration getDependencies(FunctionId functionId, AccessControl accessControl)
{
LanguageFunctionImplementation function = implementationsById.get(functionId);
- checkArgument(function != null, "Unknown function implementation: " + functionId);
+ checkArgument(function != null, "Unknown function implementation: %s", functionId);
return function.getFunctionDependencies(accessControl);
}
@@ -285,7 +285,7 @@ public Optional specialize(ResolvedFunction resolv
public FunctionMetadata getFunctionMetadata(FunctionId functionId)
{
LanguageFunctionImplementation function = implementationsById.get(functionId);
- checkArgument(function != null, "Unknown function implementation: " + functionId);
+ checkArgument(function != null, "Unknown function implementation: %s", functionId);
return function.getFunctionMetadata();
}
@@ -293,7 +293,7 @@ public void registerResolvedFunction(ResolvedFunction resolvedFunction)
{
FunctionId functionId = resolvedFunction.getFunctionId();
LanguageFunctionImplementation function = implementationsById.get(functionId);
- checkArgument(function != null, "Unknown function implementation: " + functionId);
+ checkArgument(function != null, "Unknown function implementation: %s", functionId);
implementationsByResolvedFunction.put(resolvedFunction, function);
}
diff --git a/core/trino-main/src/main/java/io/trino/metadata/MaterializedViewDefinition.java b/core/trino-main/src/main/java/io/trino/metadata/MaterializedViewDefinition.java
index caff709a19b55..6dc49947b0308 100644
--- a/core/trino-main/src/main/java/io/trino/metadata/MaterializedViewDefinition.java
+++ b/core/trino-main/src/main/java/io/trino/metadata/MaterializedViewDefinition.java
@@ -13,7 +13,6 @@
*/
package io.trino.metadata;
-import com.google.common.collect.ImmutableMap;
import io.trino.spi.connector.CatalogSchemaName;
import io.trino.spi.connector.CatalogSchemaTableName;
import io.trino.spi.connector.ConnectorMaterializedViewDefinition;
@@ -21,7 +20,6 @@
import java.time.Duration;
import java.util.List;
-import java.util.Map;
import java.util.Optional;
import static com.google.common.base.MoreObjects.toStringHelper;
@@ -34,7 +32,6 @@ public class MaterializedViewDefinition
{
private final Optional gracePeriod;
private final Optional storageTable;
- private final Map properties;
public MaterializedViewDefinition(
String originalSql,
@@ -45,14 +42,12 @@ public MaterializedViewDefinition(
Optional comment,
Identity owner,
List path,
- Optional storageTable,
- Map properties)
+ Optional storageTable)
{
super(originalSql, catalog, schema, columns, comment, Optional.of(owner), path);
checkArgument(gracePeriod.isEmpty() || !gracePeriod.get().isNegative(), "gracePeriod cannot be negative: %s", gracePeriod);
this.gracePeriod = gracePeriod;
this.storageTable = requireNonNull(storageTable, "storageTable is null");
- this.properties = ImmutableMap.copyOf(requireNonNull(properties, "properties is null"));
}
public Optional getGracePeriod()
@@ -65,11 +60,6 @@ public Optional getStorageTable()
return storageTable;
}
- public Map getProperties()
- {
- return properties;
- }
-
public ConnectorMaterializedViewDefinition toConnectorMaterializedViewDefinition()
{
return new ConnectorMaterializedViewDefinition(
@@ -83,8 +73,7 @@ public ConnectorMaterializedViewDefinition toConnectorMaterializedViewDefinition
getGracePeriod(),
getComment(),
getRunAsIdentity().map(Identity::getUser),
- getPath(),
- properties);
+ getPath());
}
@Override
@@ -100,7 +89,6 @@ public String toString()
.add("runAsIdentity", getRunAsIdentity())
.add("path", getPath())
.add("storageTable", storageTable.orElse(null))
- .add("properties", properties)
.toString();
}
}
diff --git a/core/trino-main/src/main/java/io/trino/metadata/Metadata.java b/core/trino-main/src/main/java/io/trino/metadata/Metadata.java
index bb2cad83e056f..1b7ffa8e706c4 100644
--- a/core/trino-main/src/main/java/io/trino/metadata/Metadata.java
+++ b/core/trino-main/src/main/java/io/trino/metadata/Metadata.java
@@ -710,7 +710,13 @@ default ResolvedFunction getCoercion(Type fromType, Type toType)
/**
* Creates the specified materialized view with the specified view definition.
*/
- void createMaterializedView(Session session, QualifiedObjectName viewName, MaterializedViewDefinition definition, boolean replace, boolean ignoreExisting);
+ void createMaterializedView(
+ Session session,
+ QualifiedObjectName viewName,
+ MaterializedViewDefinition definition,
+ Map properties,
+ boolean replace,
+ boolean ignoreExisting);
/**
* Drops the specified materialized view.
@@ -740,6 +746,8 @@ default boolean isMaterializedView(Session session, QualifiedObjectName viewName
*/
Optional getMaterializedView(Session session, QualifiedObjectName viewName);
+ Map getMaterializedViewProperties(Session session, QualifiedObjectName objectName, MaterializedViewDefinition materializedViewDefinition);
+
/**
* Method to get difference between the states of table at two different points in time/or as of given token-ids.
* The method is used by the engine to determine if a materialized view is current with respect to the tables it depends on.
diff --git a/core/trino-main/src/main/java/io/trino/metadata/MetadataManager.java b/core/trino-main/src/main/java/io/trino/metadata/MetadataManager.java
index 1adeb8d71a4a5..67e5666663a3b 100644
--- a/core/trino-main/src/main/java/io/trino/metadata/MetadataManager.java
+++ b/core/trino-main/src/main/java/io/trino/metadata/MetadataManager.java
@@ -112,7 +112,6 @@
import io.trino.spi.type.TypeOperators;
import io.trino.sql.analyzer.TypeSignatureProvider;
import io.trino.sql.parser.SqlParser;
-import io.trino.sql.planner.ConnectorExpressions;
import io.trino.sql.planner.PartitioningHandle;
import io.trino.sql.tree.QualifiedName;
import io.trino.transaction.TransactionManager;
@@ -158,6 +157,7 @@
import static io.trino.metadata.RedirectionAwareTableHandle.noRedirection;
import static io.trino.metadata.RedirectionAwareTableHandle.withRedirectionTo;
import static io.trino.metadata.SignatureBinder.applyBoundVariables;
+import static io.trino.plugin.base.expression.ConnectorExpressions.extractVariables;
import static io.trino.spi.ErrorType.EXTERNAL;
import static io.trino.spi.StandardErrorCode.FUNCTION_IMPLEMENTATION_ERROR;
import static io.trino.spi.StandardErrorCode.FUNCTION_IMPLEMENTATION_MISSING;
@@ -1530,7 +1530,13 @@ public void dropView(Session session, QualifiedObjectName viewName)
}
@Override
- public void createMaterializedView(Session session, QualifiedObjectName viewName, MaterializedViewDefinition definition, boolean replace, boolean ignoreExisting)
+ public void createMaterializedView(
+ Session session,
+ QualifiedObjectName viewName,
+ MaterializedViewDefinition definition,
+ Map properties,
+ boolean replace,
+ boolean ignoreExisting)
{
CatalogMetadata catalogMetadata = getCatalogMetadataForWrite(session, viewName.getCatalogName());
CatalogHandle catalogHandle = catalogMetadata.getCatalogHandle();
@@ -1540,6 +1546,7 @@ public void createMaterializedView(Session session, QualifiedObjectName viewName
session.toConnectorSession(catalogHandle),
viewName.asSchemaTableName(),
definition.toConnectorMaterializedViewDefinition(),
+ properties,
replace,
ignoreExisting);
if (catalogMetadata.getSecurityManagement() == SYSTEM) {
@@ -1673,8 +1680,7 @@ private static MaterializedViewDefinition createMaterializedViewDefinition(Conne
view.getComment(),
runAsIdentity,
view.getPath(),
- view.getStorageTable(),
- view.getProperties());
+ view.getStorageTable());
}
private Optional getMaterializedViewInternal(Session session, QualifiedObjectName viewName)
@@ -1695,6 +1701,24 @@ private Optional getMaterializedViewInterna
return Optional.empty();
}
+ @Override
+ public Map getMaterializedViewProperties(Session session, QualifiedObjectName viewName, MaterializedViewDefinition materializedViewDefinition)
+ {
+ Optional catalog = getOptionalCatalogMetadata(session, viewName.getCatalogName());
+ if (catalog.isPresent()) {
+ CatalogMetadata catalogMetadata = catalog.get();
+ CatalogHandle catalogHandle = catalogMetadata.getCatalogHandle(session, viewName);
+ ConnectorMetadata metadata = catalogMetadata.getMetadataFor(session, catalogHandle);
+
+ ConnectorSession connectorSession = session.toConnectorSession(catalogHandle);
+ return ImmutableMap.copyOf(metadata.getMaterializedViewProperties(
+ connectorSession,
+ viewName.asSchemaTableName(),
+ materializedViewDefinition.toConnectorMaterializedViewDefinition()));
+ }
+ return ImmutableMap.of();
+ }
+
@Override
public MaterializedViewFreshness getMaterializedViewFreshness(Session session, QualifiedObjectName viewName)
{
@@ -2016,7 +2040,7 @@ private void verifyProjection(TableHandle table, List proje
.map(Assignment::getVariable)
.collect(toImmutableSet());
projections.stream()
- .flatMap(connectorExpression -> ConnectorExpressions.extractVariables(connectorExpression).stream())
+ .flatMap(connectorExpression -> extractVariables(connectorExpression).stream())
.map(Variable::getName)
.filter(variableName -> !assignedVariables.contains(variableName))
.findAny()
@@ -2737,7 +2761,10 @@ public MetadataManager build()
GlobalFunctionCatalog globalFunctionCatalog = this.globalFunctionCatalog;
if (globalFunctionCatalog == null) {
- globalFunctionCatalog = new GlobalFunctionCatalog();
+ globalFunctionCatalog = new GlobalFunctionCatalog(
+ () -> { throw new UnsupportedOperationException(); },
+ () -> { throw new UnsupportedOperationException(); },
+ () -> { throw new UnsupportedOperationException(); });
TypeOperators typeOperators = new TypeOperators();
globalFunctionCatalog.addFunctions(SystemFunctionBundle.create(new FeaturesConfig(), typeOperators, new BlockTypeOperators(typeOperators), UNKNOWN));
globalFunctionCatalog.addFunctions(new InternalFunctionBundle(new LiteralFunction(new InternalBlockEncodingSerde(new BlockEncodingManager(), typeManager))));
diff --git a/core/trino-main/src/main/java/io/trino/operator/Driver.java b/core/trino-main/src/main/java/io/trino/operator/Driver.java
index f6949c4bb204a..0033344d7fe57 100644
--- a/core/trino-main/src/main/java/io/trino/operator/Driver.java
+++ b/core/trino-main/src/main/java/io/trino/operator/Driver.java
@@ -46,12 +46,15 @@
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.base.Throwables.throwIfUnchecked;
import static com.google.common.base.Verify.verify;
+import static com.google.common.util.concurrent.Futures.nonCancellationPropagating;
+import static com.google.common.util.concurrent.Futures.withTimeout;
import static com.google.common.util.concurrent.MoreExecutors.directExecutor;
import static io.airlift.concurrent.MoreFutures.getFutureValue;
import static io.trino.operator.Operator.NOT_BLOCKED;
import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR;
import static java.lang.Boolean.TRUE;
import static java.util.Objects.requireNonNull;
+import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static java.util.concurrent.TimeUnit.NANOSECONDS;
//
@@ -455,6 +458,13 @@ private ListenableFuture processInternal(OperationTimer operationTimer)
// unblock when the first future is complete
ListenableFuture blocked = firstFinishedFuture(blockedFutures);
+ if (driverContext.getBlockedTimeout().isPresent()) {
+ blocked = withTimeout(
+ nonCancellationPropagating(blocked),
+ driverContext.getBlockedTimeout().get().toMillis(),
+ MILLISECONDS,
+ driverContext.getTimeoutExecutor());
+ }
// driver records serial blocked time
driverContext.recordBlocked(blocked);
// each blocked operator is responsible for blocking the execution
diff --git a/core/trino-main/src/main/java/io/trino/operator/DriverContext.java b/core/trino-main/src/main/java/io/trino/operator/DriverContext.java
index 0ea5d3a5d70e7..67998f0a5e45a 100644
--- a/core/trino-main/src/main/java/io/trino/operator/DriverContext.java
+++ b/core/trino-main/src/main/java/io/trino/operator/DriverContext.java
@@ -28,6 +28,7 @@
import org.joda.time.DateTime;
import java.util.List;
+import java.util.Optional;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.Executor;
import java.util.concurrent.ScheduledExecutorService;
@@ -54,6 +55,7 @@ public class DriverContext
private final PipelineContext pipelineContext;
private final Executor notificationExecutor;
private final ScheduledExecutorService yieldExecutor;
+ private final ScheduledExecutorService timeoutExecutor;
private final AtomicBoolean finished = new AtomicBoolean();
@@ -70,6 +72,7 @@ public class DriverContext
private final AtomicReference executionStartTime = new AtomicReference<>();
private final AtomicReference executionEndTime = new AtomicReference<>();
+ private final AtomicReference> blockedTimeout = new AtomicReference<>(Optional.empty());
private final MemoryTrackingContext driverMemoryContext;
@@ -82,12 +85,14 @@ public DriverContext(
PipelineContext pipelineContext,
Executor notificationExecutor,
ScheduledExecutorService yieldExecutor,
+ ScheduledExecutorService timeoutExecutor,
MemoryTrackingContext driverMemoryContext,
long splitWeight)
{
this.pipelineContext = requireNonNull(pipelineContext, "pipelineContext is null");
this.notificationExecutor = requireNonNull(notificationExecutor, "notificationExecutor is null");
this.yieldExecutor = requireNonNull(yieldExecutor, "yieldExecutor is null");
+ this.timeoutExecutor = requireNonNull(timeoutExecutor, "timeoutExecutor is null");
this.driverMemoryContext = requireNonNull(driverMemoryContext, "driverMemoryContext is null");
this.yieldSignal = new DriverYieldSignal();
this.splitWeight = splitWeight;
@@ -447,6 +452,21 @@ public ScheduledExecutorService getYieldExecutor()
return yieldExecutor;
}
+ public ScheduledExecutorService getTimeoutExecutor()
+ {
+ return timeoutExecutor;
+ }
+
+ public void setBlockedTimeout(Duration duration)
+ {
+ this.blockedTimeout.set(Optional.of(duration));
+ }
+
+ public Optional getBlockedTimeout()
+ {
+ return blockedTimeout.get();
+ }
+
private static long nanosBetween(long start, long end)
{
return max(0, end - start);
diff --git a/core/trino-main/src/main/java/io/trino/operator/FlatHash.java b/core/trino-main/src/main/java/io/trino/operator/FlatHash.java
index 3086d95940322..36119075a6823 100644
--- a/core/trino-main/src/main/java/io/trino/operator/FlatHash.java
+++ b/core/trino-main/src/main/java/io/trino/operator/FlatHash.java
@@ -342,7 +342,7 @@ private void rehash(int minimumRequiredCapacity)
// we incrementally allocate the record groups to smooth out memory allocation
if (capacity <= RECORDS_PER_GROUP) {
- recordGroups = new byte[][]{new byte[multiplyExact(capacity, recordSize)]};
+ recordGroups = new byte[][] {new byte[multiplyExact(capacity, recordSize)]};
}
else {
recordGroups = new byte[(capacity + 1) >> RECORDS_PER_GROUP_SHIFT][];
diff --git a/core/trino-main/src/main/java/io/trino/operator/FlatSet.java b/core/trino-main/src/main/java/io/trino/operator/FlatSet.java
index 5b5c298fdd284..fc6b968f4481f 100644
--- a/core/trino-main/src/main/java/io/trino/operator/FlatSet.java
+++ b/core/trino-main/src/main/java/io/trino/operator/FlatSet.java
@@ -99,7 +99,7 @@ public FlatSet(
private static byte[][] createRecordGroups(int capacity, int recordSize)
{
if (capacity < RECORDS_PER_GROUP) {
- return new byte[][]{new byte[multiplyExact(capacity, recordSize)]};
+ return new byte[][] {new byte[multiplyExact(capacity, recordSize)]};
}
byte[][] groups = new byte[(capacity + 1) >> RECORDS_PER_GROUP_SHIFT][];
diff --git a/core/trino-main/src/main/java/io/trino/operator/PipelineContext.java b/core/trino-main/src/main/java/io/trino/operator/PipelineContext.java
index 605d970564e1f..1f64fd5fb76dc 100644
--- a/core/trino-main/src/main/java/io/trino/operator/PipelineContext.java
+++ b/core/trino-main/src/main/java/io/trino/operator/PipelineContext.java
@@ -55,6 +55,7 @@ public class PipelineContext
private final TaskContext taskContext;
private final Executor notificationExecutor;
private final ScheduledExecutorService yieldExecutor;
+ private final ScheduledExecutorService timeoutExecutor;
private final int pipelineId;
private final boolean inputPipeline;
@@ -105,7 +106,7 @@ public class PipelineContext
private final MemoryTrackingContext pipelineMemoryContext;
- public PipelineContext(int pipelineId, TaskContext taskContext, Executor notificationExecutor, ScheduledExecutorService yieldExecutor, MemoryTrackingContext pipelineMemoryContext, boolean inputPipeline, boolean outputPipeline, boolean partitioned)
+ public PipelineContext(int pipelineId, TaskContext taskContext, Executor notificationExecutor, ScheduledExecutorService yieldExecutor, ScheduledExecutorService timeoutExecutor, MemoryTrackingContext pipelineMemoryContext, boolean inputPipeline, boolean outputPipeline, boolean partitioned)
{
this.pipelineId = pipelineId;
this.inputPipeline = inputPipeline;
@@ -114,6 +115,7 @@ public PipelineContext(int pipelineId, TaskContext taskContext, Executor notific
this.taskContext = requireNonNull(taskContext, "taskContext is null");
this.notificationExecutor = requireNonNull(notificationExecutor, "notificationExecutor is null");
this.yieldExecutor = requireNonNull(yieldExecutor, "yieldExecutor is null");
+ this.timeoutExecutor = requireNonNull(timeoutExecutor, "timeoutExecutor is null");
this.pipelineMemoryContext = requireNonNull(pipelineMemoryContext, "pipelineMemoryContext is null");
// Initialize the local memory contexts with the ExchangeOperator tag as ExchangeOperator will do the local memory allocations
pipelineMemoryContext.initializeLocalMemoryContexts(ExchangeOperator.class.getSimpleName());
@@ -156,6 +158,7 @@ public DriverContext addDriverContext(long splitWeight)
this,
notificationExecutor,
yieldExecutor,
+ timeoutExecutor,
pipelineMemoryContext.newMemoryTrackingContext(),
splitWeight);
drivers.add(driverContext);
diff --git a/core/trino-main/src/main/java/io/trino/operator/TableFunctionOperator.java b/core/trino-main/src/main/java/io/trino/operator/TableFunctionOperator.java
index f309a6d145c50..7b41bde101eda 100644
--- a/core/trino-main/src/main/java/io/trino/operator/TableFunctionOperator.java
+++ b/core/trino-main/src/main/java/io/trino/operator/TableFunctionOperator.java
@@ -128,7 +128,6 @@ public TableFunctionOperatorFactory(
{
requireNonNull(planNodeId, "planNodeId is null");
requireNonNull(tableFunctionProvider, "tableFunctionProvider is null");
- requireNonNull(catalogHandle, "catalogHandle is null");
requireNonNull(functionHandle, "functionHandle is null");
requireNonNull(requiredChannels, "requiredChannels is null");
requireNonNull(markerChannels, "markerChannels is null");
@@ -272,6 +271,7 @@ public TableFunctionOperator(
this.operatorContext = operatorContext;
this.session = operatorContext.getSession().toConnectorSession(catalogHandle);
+
this.processEmptyInput = !pruneWhenEmpty;
PagesIndex pagesIndex = pagesIndexFactory.newPagesIndex(sourceTypes, expectedPositions);
diff --git a/core/trino-main/src/main/java/io/trino/operator/TableWriterOperator.java b/core/trino-main/src/main/java/io/trino/operator/TableWriterOperator.java
index 37619858e1e45..ac4efe5b63074 100644
--- a/core/trino-main/src/main/java/io/trino/operator/TableWriterOperator.java
+++ b/core/trino-main/src/main/java/io/trino/operator/TableWriterOperator.java
@@ -20,7 +20,9 @@
import com.google.common.primitives.Ints;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
+import io.airlift.log.Logger;
import io.airlift.slice.Slice;
+import io.airlift.units.DataSize;
import io.airlift.units.Duration;
import io.trino.Session;
import io.trino.memory.context.LocalMemoryContext;
@@ -42,6 +44,7 @@
import java.util.Collection;
import java.util.List;
+import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Supplier;
@@ -53,6 +56,8 @@
import static com.google.common.util.concurrent.MoreExecutors.directExecutor;
import static io.airlift.concurrent.MoreFutures.getFutureValue;
import static io.airlift.concurrent.MoreFutures.toListenableFuture;
+import static io.trino.SystemSessionProperties.getCloseIdleWritersTriggerDuration;
+import static io.trino.SystemSessionProperties.getIdleWriterMinDataSizeThreshold;
import static io.trino.SystemSessionProperties.isStatisticsCpuTimerEnabled;
import static io.trino.spi.type.BigintType.BIGINT;
import static io.trino.spi.type.VarbinaryType.VARBINARY;
@@ -64,6 +69,7 @@
public class TableWriterOperator
implements Operator
{
+ private static final Logger LOG = Logger.get(TableWriterOperator.class);
public static final int ROW_COUNT_CHANNEL = 0;
public static final int FRAGMENT_CHANNEL = 1;
public static final int STATS_START_CHANNEL = 2;
@@ -111,10 +117,22 @@ public TableWriterOperatorFactory(
public Operator createOperator(DriverContext driverContext)
{
checkState(!closed, "Factory is already closed");
+ // Driver should call getOutput() periodically on TableWriterOperator to close idle writers which will essentially
+ // decrease the memory usage even if no pages were added to that writer thread.
+ if (getCloseIdleWritersTriggerDuration(session).toMillis() > 0) {
+ driverContext.setBlockedTimeout(getCloseIdleWritersTriggerDuration(session));
+ }
OperatorContext context = driverContext.addOperatorContext(operatorId, planNodeId, TableWriterOperator.class.getSimpleName());
Operator statisticsAggregationOperator = statisticsAggregationOperatorFactory.createOperator(driverContext);
boolean statisticsCpuTimerEnabled = !(statisticsAggregationOperator instanceof DevNullOperator) && isStatisticsCpuTimerEnabled(session);
- return new TableWriterOperator(context, createPageSink(driverContext), columnChannels, statisticsAggregationOperator, types, statisticsCpuTimerEnabled);
+ return new TableWriterOperator(
+ context,
+ createPageSink(driverContext),
+ columnChannels,
+ statisticsAggregationOperator,
+ types,
+ statisticsCpuTimerEnabled,
+ getIdleWriterMinDataSizeThreshold(session));
}
private ConnectorPageSink createPageSink(DriverContext driverContext)
@@ -159,6 +177,7 @@ private enum State
private final AtomicLong pageSinkPeakMemoryUsage = new AtomicLong();
private final Operator statisticAggregationOperator;
private final List types;
+ private final DataSize idleWriterMinDataSizeThreshold;
private ListenableFuture blocked = NOT_BLOCKED;
private CompletableFuture> finishFuture;
@@ -170,8 +189,10 @@ private enum State
private final OperationTiming statisticsTiming = new OperationTiming();
private final boolean statisticsCpuTimerEnabled;
-
private final Supplier tableWriterInfoSupplier;
+ // This records the last physical written data size when connector closeIdleWriters is triggered.
+ private long lastPhysicalWrittenDataSize;
+ private boolean newPagesAdded;
public TableWriterOperator(
OperatorContext operatorContext,
@@ -179,7 +200,8 @@ public TableWriterOperator(
List columnChannels,
Operator statisticAggregationOperator,
List types,
- boolean statisticsCpuTimerEnabled)
+ boolean statisticsCpuTimerEnabled,
+ DataSize idleWriterMinDataSizeThreshold)
{
this.operatorContext = requireNonNull(operatorContext, "operatorContext is null");
this.pageSinkMemoryContext = operatorContext.newLocalUserMemoryContext(TableWriterOperator.class.getSimpleName());
@@ -188,6 +210,7 @@ public TableWriterOperator(
this.statisticAggregationOperator = requireNonNull(statisticAggregationOperator, "statisticAggregationOperator is null");
this.types = ImmutableList.copyOf(requireNonNull(types, "types is null"));
this.statisticsCpuTimerEnabled = statisticsCpuTimerEnabled;
+ this.idleWriterMinDataSizeThreshold = requireNonNull(idleWriterMinDataSizeThreshold, "idleWriterMinDataSizeThreshold is null");
this.tableWriterInfoSupplier = createTableWriterInfoSupplier(pageSinkPeakMemoryUsage, statisticsTiming, pageSink);
this.operatorContext.setInfoSupplier(tableWriterInfoSupplier);
}
@@ -259,14 +282,20 @@ public void addInput(Page page)
rowCount += page.getPositionCount();
updateWrittenBytes();
operatorContext.recordWriterInputDataSize(page.getSizeInBytes());
+ newPagesAdded = true;
}
@Override
public Page getOutput()
{
- if (!blocked.isDone()) {
+ tryClosingIdleWriters();
+ // This method could be called even when new pages have not been added. In that case, we don't have to
+ // try to get the output from the aggregation operator. It could be expensive since getOutput() is
+ // called quite frequently.
+ if (!(blocked.isDone() && (newPagesAdded || state != State.RUNNING))) {
return null;
}
+ newPagesAdded = false;
if (!statisticAggregationOperator.isFinished()) {
OperationTimer timer = new OperationTimer(statisticsCpuTimerEnabled);
@@ -365,6 +394,24 @@ private void updateWrittenBytes()
writtenBytes = current;
}
+ private void tryClosingIdleWriters()
+ {
+ long physicalWrittenDataSize = getTaskContext().getPhysicalWrittenDataSize();
+ Optional writerCount = getTaskContext().getMaxWriterCount();
+ if (writerCount.isEmpty() || physicalWrittenDataSize - lastPhysicalWrittenDataSize <= idleWriterMinDataSizeThreshold.toBytes() * writerCount.get()) {
+ return;
+ }
+ pageSink.closeIdleWriters();
+ updateMemoryUsage();
+ updateWrittenBytes();
+ lastPhysicalWrittenDataSize = physicalWrittenDataSize;
+ }
+
+ private TaskContext getTaskContext()
+ {
+ return operatorContext.getDriverContext().getPipelineContext().getTaskContext();
+ }
+
private void updateMemoryUsage()
{
long pageSinkMemoryUsage = pageSink.getMemoryUsage();
diff --git a/core/trino-main/src/main/java/io/trino/operator/TaskContext.java b/core/trino-main/src/main/java/io/trino/operator/TaskContext.java
index f0cfa007d5e1a..e72670b2f4c69 100644
--- a/core/trino-main/src/main/java/io/trino/operator/TaskContext.java
+++ b/core/trino-main/src/main/java/io/trino/operator/TaskContext.java
@@ -69,6 +69,7 @@ public class TaskContext
private final GcMonitor gcMonitor;
private final Executor notificationExecutor;
private final ScheduledExecutorService yieldExecutor;
+ private final ScheduledExecutorService timeoutExecutor;
private final Session session;
private final long createNanos = System.nanoTime();
@@ -117,6 +118,7 @@ public static TaskContext createTaskContext(
GcMonitor gcMonitor,
Executor notificationExecutor,
ScheduledExecutorService yieldExecutor,
+ ScheduledExecutorService timeoutExecutor,
Session session,
MemoryTrackingContext taskMemoryContext,
Runnable notifyStatusChanged,
@@ -129,6 +131,7 @@ public static TaskContext createTaskContext(
gcMonitor,
notificationExecutor,
yieldExecutor,
+ timeoutExecutor,
session,
taskMemoryContext,
notifyStatusChanged,
@@ -144,6 +147,7 @@ private TaskContext(
GcMonitor gcMonitor,
Executor notificationExecutor,
ScheduledExecutorService yieldExecutor,
+ ScheduledExecutorService timeoutExecutor,
Session session,
MemoryTrackingContext taskMemoryContext,
Runnable notifyStatusChanged,
@@ -155,6 +159,7 @@ private TaskContext(
this.queryContext = requireNonNull(queryContext, "queryContext is null");
this.notificationExecutor = requireNonNull(notificationExecutor, "notificationExecutor is null");
this.yieldExecutor = requireNonNull(yieldExecutor, "yieldExecutor is null");
+ this.timeoutExecutor = requireNonNull(timeoutExecutor, "timeoutExecutor is null");
this.session = session;
this.taskMemoryContext = requireNonNull(taskMemoryContext, "taskMemoryContext is null");
@@ -186,6 +191,7 @@ public PipelineContext addPipelineContext(int pipelineId, boolean inputPipeline,
this,
notificationExecutor,
yieldExecutor,
+ timeoutExecutor,
taskMemoryContext.newMemoryTrackingContext(),
inputPipeline,
outputPipeline,
@@ -379,7 +385,7 @@ public void setMaxWriterCount(int maxWriterCount)
checkArgument(maxWriterCount > 0, "maxWriterCount must be > 0");
int oldMaxWriterCount = this.maxWriterCount.getAndSet(maxWriterCount);
- checkArgument(oldMaxWriterCount == -1 || oldMaxWriterCount == maxWriterCount, "maxWriterCount already set to " + oldMaxWriterCount);
+ checkArgument(oldMaxWriterCount == -1 || oldMaxWriterCount == maxWriterCount, "maxWriterCount already set to %s", oldMaxWriterCount);
}
public Optional getMaxWriterCount()
diff --git a/core/trino-main/src/main/java/io/trino/operator/aggregation/AbstractMapAggregationState.java b/core/trino-main/src/main/java/io/trino/operator/aggregation/AbstractMapAggregationState.java
index 4d4bfc76934fe..a18380534ad5f 100644
--- a/core/trino-main/src/main/java/io/trino/operator/aggregation/AbstractMapAggregationState.java
+++ b/core/trino-main/src/main/java/io/trino/operator/aggregation/AbstractMapAggregationState.java
@@ -186,7 +186,7 @@ public AbstractMapAggregationState(AbstractMapAggregationState state)
private static byte[][] createRecordGroups(int capacity, int recordSize)
{
if (capacity < RECORDS_PER_GROUP) {
- return new byte[][]{new byte[multiplyExact(capacity, recordSize)]};
+ return new byte[][] {new byte[multiplyExact(capacity, recordSize)]};
}
byte[][] groups = new byte[(capacity + 1) >> RECORDS_PER_GROUP_SHIFT][];
diff --git a/core/trino-main/src/main/java/io/trino/operator/aggregation/histogram/TypedHistogram.java b/core/trino-main/src/main/java/io/trino/operator/aggregation/histogram/TypedHistogram.java
index e40f503047a0f..f4763b6c0037b 100644
--- a/core/trino-main/src/main/java/io/trino/operator/aggregation/histogram/TypedHistogram.java
+++ b/core/trino-main/src/main/java/io/trino/operator/aggregation/histogram/TypedHistogram.java
@@ -139,7 +139,7 @@ public TypedHistogram(
private static byte[][] createRecordGroups(int capacity, int recordSize)
{
if (capacity < RECORDS_PER_GROUP) {
- return new byte[][]{new byte[multiplyExact(capacity, recordSize)]};
+ return new byte[][] {new byte[multiplyExact(capacity, recordSize)]};
}
byte[][] groups = new byte[(capacity + 1) >> RECORDS_PER_GROUP_SHIFT][];
diff --git a/core/trino-main/src/main/java/io/trino/operator/aggregation/multimapagg/AbstractMultimapAggregationState.java b/core/trino-main/src/main/java/io/trino/operator/aggregation/multimapagg/AbstractMultimapAggregationState.java
index 5a69677e9168b..0d2d57fffd9a1 100644
--- a/core/trino-main/src/main/java/io/trino/operator/aggregation/multimapagg/AbstractMultimapAggregationState.java
+++ b/core/trino-main/src/main/java/io/trino/operator/aggregation/multimapagg/AbstractMultimapAggregationState.java
@@ -191,7 +191,7 @@ public AbstractMultimapAggregationState(AbstractMultimapAggregationState state)
private static byte[][] createRecordGroups(int capacity, int recordSize)
{
if (capacity < RECORDS_PER_GROUP) {
- return new byte[][]{new byte[multiplyExact(capacity, recordSize)]};
+ return new byte[][] {new byte[multiplyExact(capacity, recordSize)]};
}
byte[][] groups = new byte[(capacity + 1) >> RECORDS_PER_GROUP_SHIFT][];
diff --git a/core/trino-main/src/main/java/io/trino/operator/exchange/LocalExchange.java b/core/trino-main/src/main/java/io/trino/operator/exchange/LocalExchange.java
index ea5486a637509..cda37c84c4785 100644
--- a/core/trino-main/src/main/java/io/trino/operator/exchange/LocalExchange.java
+++ b/core/trino-main/src/main/java/io/trino/operator/exchange/LocalExchange.java
@@ -53,7 +53,6 @@
import static io.trino.SystemSessionProperties.getSkewedPartitionMinDataProcessedRebalanceThreshold;
import static io.trino.operator.InterpretedHashGenerator.createChannelsHashGenerator;
import static io.trino.operator.exchange.LocalExchangeSink.finishedLocalExchangeSink;
-import static io.trino.operator.output.SkewedPartitionRebalancer.getScaleWritersMaxSkewedPartitions;
import static io.trino.sql.planner.PartitioningHandle.isScaledWriterHashDistribution;
import static io.trino.sql.planner.SystemPartitioningHandle.FIXED_ARBITRARY_DISTRIBUTION;
import static io.trino.sql.planner.SystemPartitioningHandle.FIXED_BROADCAST_DISTRIBUTION;
@@ -61,7 +60,6 @@
import static io.trino.sql.planner.SystemPartitioningHandle.FIXED_PASSTHROUGH_DISTRIBUTION;
import static io.trino.sql.planner.SystemPartitioningHandle.SCALED_WRITER_ROUND_ROBIN_DISTRIBUTION;
import static io.trino.sql.planner.SystemPartitioningHandle.SINGLE_DISTRIBUTION;
-import static java.lang.Math.max;
import static java.util.Objects.requireNonNull;
import static java.util.function.Function.identity;
@@ -147,12 +145,7 @@ else if (isScaledWriterHashDistribution(partitioning)) {
bufferCount,
1,
writerScalingMinDataProcessed.toBytes(),
- getSkewedPartitionMinDataProcessedRebalanceThreshold(session).toBytes(),
- // Keep the maxPartitionsToRebalance to atleast writer count such that single partition writes do
- // not suffer from skewness and can scale uniformly across all writers. Additionally, note that
- // maxWriterCount is calculated considering memory into account. So, it is safe to set the
- // maxPartitionsToRebalance to maximum number of writers.
- max(getScaleWritersMaxSkewedPartitions(session), bufferCount));
+ getSkewedPartitionMinDataProcessedRebalanceThreshold(session).toBytes());
LocalExchangeMemoryManager memoryManager = new LocalExchangeMemoryManager(maxBufferedBytes.toBytes());
sources = IntStream.range(0, bufferCount)
.mapToObj(i -> new LocalExchangeSource(memoryManager, source -> checkAllSourcesFinished()))
diff --git a/core/trino-main/src/main/java/io/trino/operator/exchange/ScaleWriterPartitioningExchanger.java b/core/trino-main/src/main/java/io/trino/operator/exchange/ScaleWriterPartitioningExchanger.java
index c4f7e56fcc469..8bc67b4d0b5c9 100644
--- a/core/trino-main/src/main/java/io/trino/operator/exchange/ScaleWriterPartitioningExchanger.java
+++ b/core/trino-main/src/main/java/io/trino/operator/exchange/ScaleWriterPartitioningExchanger.java
@@ -31,6 +31,7 @@
public class ScaleWriterPartitioningExchanger
implements LocalExchanger
{
+ private static final double SCALE_WRITER_MEMORY_PERCENTAGE = 0.7;
private final List> buffers;
private final LocalExchangeMemoryManager memoryManager;
private final long maxBufferedBytes;
@@ -83,11 +84,18 @@ public ScaleWriterPartitioningExchanger(
@Override
public void accept(Page page)
{
+ // Reset the value of partition row count, writer ids and data processed for this page
+ long dataProcessed = 0;
+ for (int partitionId = 0; partitionId < partitionRowCounts.length; partitionId++) {
+ partitionRowCounts[partitionId] = 0;
+ partitionWriterIds[partitionId] = -1;
+ }
+
// Scale up writers when current buffer memory utilization is more than 50% of the maximum.
- // Do not scale up if total memory used is greater than 50% of max memory per node.
+ // Do not scale up if total memory used is greater than 70% of max memory per node.
// We have to be conservative here otherwise scaling of writers will happen first
// before we hit this limit, and then we won't be able to do anything to stop OOM error.
- if (memoryManager.getBufferedBytes() > maxBufferedBytes * 0.5 && totalMemoryUsed.get() < maxMemoryPerNode * 0.5) {
+ if (memoryManager.getBufferedBytes() > maxBufferedBytes * 0.5 && totalMemoryUsed.get() < maxMemoryPerNode * SCALE_WRITER_MEMORY_PERCENTAGE) {
partitionRebalancer.rebalance();
}
@@ -111,13 +119,6 @@ public void accept(Page page)
writerAssignments[writerId].add(position);
}
- for (int partitionId = 0; partitionId < partitionRowCounts.length; partitionId++) {
- partitionRebalancer.addPartitionRowCount(partitionId, partitionRowCounts[partitionId]);
- // Reset the value of partition row count
- partitionRowCounts[partitionId] = 0;
- partitionWriterIds[partitionId] = -1;
- }
-
// build a page for each writer
for (int bucket = 0; bucket < writerAssignments.length; bucket++) {
IntArrayList positionsList = writerAssignments[bucket];
@@ -135,12 +136,22 @@ public void accept(Page page)
// whole input page will go to this partition, compact the input page avoid over-retaining memory and to
// match the behavior of sub-partitioned pages that copy positions out
page.compact();
- sendPageToPartition(buffers.get(bucket), page);
- return;
+ dataProcessed += sendPageToPartition(buffers.get(bucket), page);
+ break;
}
Page pageSplit = page.copyPositions(positions, 0, bucketSize);
- sendPageToPartition(buffers.get(bucket), pageSplit);
+ dataProcessed += sendPageToPartition(buffers.get(bucket), pageSplit);
+ }
+
+ // Only update the scaling state if the memory used is below the SCALE_WRITER_MEMORY_PERCENTAGE limit. Otherwise, if we keep updating
+ // the scaling state and the memory used is fluctuating around the limit, then we could do massive scaling
+ // in a single rebalancing cycle which could cause OOM error.
+ if (totalMemoryUsed.get() < maxMemoryPerNode * SCALE_WRITER_MEMORY_PERCENTAGE) {
+ for (int partitionId = 0; partitionId < partitionRowCounts.length; partitionId++) {
+ partitionRebalancer.addPartitionRowCount(partitionId, partitionRowCounts[partitionId]);
+ }
+ partitionRebalancer.addDataProcessed(dataProcessed);
}
}
@@ -155,11 +166,11 @@ private int getNextWriterId(int partitionId)
return partitionRebalancer.getTaskId(partitionId, partitionWriterIndexes[partitionId]++);
}
- private void sendPageToPartition(Consumer buffer, Page pageSplit)
+ private long sendPageToPartition(Consumer buffer, Page pageSplit)
{
long retainedSizeInBytes = pageSplit.getRetainedSizeInBytes();
- partitionRebalancer.addDataProcessed(retainedSizeInBytes);
memoryManager.updateMemoryUsage(retainedSizeInBytes);
buffer.accept(pageSplit);
+ return retainedSizeInBytes;
}
}
diff --git a/core/trino-main/src/main/java/io/trino/index/IndexManager.java b/core/trino-main/src/main/java/io/trino/operator/index/IndexManager.java
similarity index 98%
rename from core/trino-main/src/main/java/io/trino/index/IndexManager.java
rename to core/trino-main/src/main/java/io/trino/operator/index/IndexManager.java
index c204fbe804498..cd8b74ff7714b 100644
--- a/core/trino-main/src/main/java/io/trino/index/IndexManager.java
+++ b/core/trino-main/src/main/java/io/trino/operator/index/IndexManager.java
@@ -11,7 +11,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.index;
+package io.trino.operator.index;
import com.google.inject.Inject;
import io.trino.Session;
diff --git a/core/trino-main/src/main/java/io/trino/operator/output/PagePartitioner.java b/core/trino-main/src/main/java/io/trino/operator/output/PagePartitioner.java
index 4a58724a729a4..a9446d3aba047 100644
--- a/core/trino-main/src/main/java/io/trino/operator/output/PagePartitioner.java
+++ b/core/trino-main/src/main/java/io/trino/operator/output/PagePartitioner.java
@@ -69,6 +69,9 @@ public class PagePartitioner
private final int nullChannel; // when >= 0, send the position to every partition if this channel is null
private boolean hasAnyRowBeenReplicated;
+ // outputSizeInBytes that has already been reported to the operator stats during release and should be subtracted
+ // from future stats reporting to avoid double counting
+ private long outputSizeReportedBeforeRelease;
public PagePartitioner(
PartitionFunction partitionFunction,
@@ -135,7 +138,6 @@ public void partitionPage(Page page, OperatorContext operatorContext)
}
int outputPositionCount = replicatesAnyRow && !hasAnyRowBeenReplicated ? page.getPositionCount() + positionsAppenders.length - 1 : page.getPositionCount();
- long positionsAppendersSizeBefore = getPositionsAppendersSizeInBytes();
if (page.getPositionCount() < partitionFunction.partitionCount() * COLUMNAR_STRATEGY_COEFFICIENT) {
// Partition will have on average less than COLUMNAR_STRATEGY_COEFFICIENT rows.
// Doing it column-wise would degrade performance, so we fall back to row-wise approach.
@@ -146,11 +148,73 @@ public void partitionPage(Page page, OperatorContext operatorContext)
else {
partitionPageByColumn(page);
}
- long positionsAppendersSizeAfter = getPositionsAppendersSizeInBytes();
- flushPositionsAppenders(false);
+ long outputSizeInBytes = flushPositionsAppenders(false);
updateMemoryUsage();
+ operatorContext.recordOutput(outputSizeInBytes, outputPositionCount);
+ }
+
+ private long adjustFlushedOutputSizeWithEagerlyReportedBytes(long flushedOutputSize)
+ {
+ // Reduce the flushed output size by the previously eagerly reported amount to avoid double counting
+ if (outputSizeReportedBeforeRelease > 0) {
+ long adjustmentAmount = min(flushedOutputSize, outputSizeReportedBeforeRelease);
+ outputSizeReportedBeforeRelease -= adjustmentAmount;
+ flushedOutputSize -= adjustmentAmount;
+ }
+ return flushedOutputSize;
+ }
- operatorContext.recordOutput(positionsAppendersSizeAfter - positionsAppendersSizeBefore, outputPositionCount);
+ private long adjustEagerlyReportedBytesWithBufferedBytesOnRelease(long bufferedBytesOnRelease)
+ {
+ // adjust the amount to eagerly report as output by the amount already eagerly reported if the new value
+ // is larger, since this indicates that no data was flushed and only the delta between the two values should
+ // be reported eagerly
+ if (bufferedBytesOnRelease > outputSizeReportedBeforeRelease) {
+ long additionalBufferedBytes = bufferedBytesOnRelease - outputSizeReportedBeforeRelease;
+ outputSizeReportedBeforeRelease = bufferedBytesOnRelease;
+ return additionalBufferedBytes;
+ }
+ else {
+ // buffered size is unchanged or reduced (as a result of flushing) since last release, so
+ // do not report any additional bytes as output eagerly
+ return 0;
+ }
+ }
+
+ /**
+ * Prepares this {@link PagePartitioner} for release to the pool by checking for dictionary mode appenders and either flattening
+ * them into direct appenders or forcing their current pages to flush to preserve a valuable dictionary encoded representation. This
+ * is done before release because we know that after reuse, the appenders will not observe any more inputs using the same dictionary.
+ *
+ * When a {@link PagePartitioner} is released back to the {@link PagePartitionerPool} we don't know if it will ever be reused. If it is not
+ * reused, then we have no {@link OperatorContext} we can use to report the output size of the final flushed page, so instead we report the
+ * buffered bytes still in the partitioner after {@link PagePartitioner#prepareForRelease(OperatorContext)} as output bytes eagerly and record
+ * that amount in {@link #outputSizeReportedBeforeRelease}. If the {@link PagePartitioner} is reused after having reported buffered bytes eagerly,
+ * we then have to subtract that same amount from the subsequent output bytes to avoid double counting them.
+ */
+ public void prepareForRelease(OperatorContext operatorContext)
+ {
+ long bufferedSizeInBytes = 0;
+ long outputSizeInBytes = 0;
+ for (int partition = 0; partition < positionsAppenders.length; partition++) {
+ PositionsAppenderPageBuilder positionsAppender = positionsAppenders[partition];
+ Optional flushedPage = positionsAppender.flushOrFlattenBeforeRelease();
+ if (flushedPage.isPresent()) {
+ Page page = flushedPage.get();
+ outputSizeInBytes += page.getSizeInBytes();
+ enqueuePage(page, partition);
+ }
+ else {
+ // Dictionaries have now been flattened, so the new reported size is trustworthy to report
+ // eagerly
+ bufferedSizeInBytes += positionsAppender.getSizeInBytes();
+ }
+ }
+ updateMemoryUsage();
+ // Adjust flushed and buffered values against the previously eagerly reported sizes
+ outputSizeInBytes = adjustFlushedOutputSizeWithEagerlyReportedBytes(outputSizeInBytes);
+ bufferedSizeInBytes = adjustEagerlyReportedBytesWithBufferedBytesOnRelease(bufferedSizeInBytes);
+ operatorContext.recordOutput(outputSizeInBytes + bufferedSizeInBytes, 0 /* no new positions */);
}
public void partitionPageByRow(Page page)
@@ -210,15 +274,6 @@ public void partitionPageByColumn(Page page)
}
}
- private long getPositionsAppendersSizeInBytes()
- {
- long sizeInBytes = 0;
- for (PositionsAppenderPageBuilder pageBuilder : positionsAppenders) {
- sizeInBytes += pageBuilder.getSizeInBytes();
- }
- return sizeInBytes;
- }
-
private IntArrayList[] partitionPositions(Page page)
{
verify(page.getPositionCount() > 0, "position count is 0");
@@ -424,6 +479,7 @@ public void close()
{
try {
flushPositionsAppenders(true);
+ outputSizeReportedBeforeRelease = 0;
}
finally {
// clear buffers before memory release
@@ -432,16 +488,19 @@ public void close()
}
}
- private void flushPositionsAppenders(boolean force)
+ private long flushPositionsAppenders(boolean force)
{
+ long outputSizeInBytes = 0;
// add all full pages to output buffer
for (int partition = 0; partition < positionsAppenders.length; partition++) {
PositionsAppenderPageBuilder partitionPageBuilder = positionsAppenders[partition];
if (!partitionPageBuilder.isEmpty() && (force || partitionPageBuilder.isFull())) {
Page pagePartition = partitionPageBuilder.build();
+ outputSizeInBytes += pagePartition.getSizeInBytes();
enqueuePage(pagePartition, partition);
}
}
+ return adjustFlushedOutputSizeWithEagerlyReportedBytes(outputSizeInBytes);
}
private void enqueuePage(Page pagePartition, int partition)
diff --git a/core/trino-main/src/main/java/io/trino/operator/output/PartitionedOutputOperator.java b/core/trino-main/src/main/java/io/trino/operator/output/PartitionedOutputOperator.java
index 0bc28fee83302..fd683e126352e 100644
--- a/core/trino-main/src/main/java/io/trino/operator/output/PartitionedOutputOperator.java
+++ b/core/trino-main/src/main/java/io/trino/operator/output/PartitionedOutputOperator.java
@@ -284,6 +284,7 @@ public OperatorContext getOperatorContext()
public void finish()
{
if (!finished) {
+ pagePartitioner.prepareForRelease(operatorContext);
pagePartitionerPool.release(pagePartitioner);
finished = true;
}
diff --git a/core/trino-main/src/main/java/io/trino/operator/output/PositionsAppenderPageBuilder.java b/core/trino-main/src/main/java/io/trino/operator/output/PositionsAppenderPageBuilder.java
index 4ba6fd3361dfb..91948beec7611 100644
--- a/core/trino-main/src/main/java/io/trino/operator/output/PositionsAppenderPageBuilder.java
+++ b/core/trino-main/src/main/java/io/trino/operator/output/PositionsAppenderPageBuilder.java
@@ -21,6 +21,7 @@
import it.unimi.dsi.fastutil.ints.IntArrayList;
import java.util.List;
+import java.util.Optional;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
@@ -144,6 +145,32 @@ public boolean isEmpty()
return declaredPositions == 0;
}
+ public Optional flushOrFlattenBeforeRelease()
+ {
+ if (declaredPositions == 0) {
+ return Optional.empty();
+ }
+
+ for (UnnestingPositionsAppender positionsAppender : channelAppenders) {
+ if (positionsAppender.shouldForceFlushBeforeRelease()) {
+ // dictionary encoding will be preserved, so force the current page to be flushed
+ return Optional.of(build());
+ }
+ }
+
+ // transition from dictionary to direct mode if necessary, since we won't be able to reuse the
+ // same dictionary from the new operator
+ for (UnnestingPositionsAppender positionsAppender : channelAppenders) {
+ positionsAppender.flattenPendingDictionary();
+ }
+
+ // flush the current page if forced or if the builder is now full as a result of transitioning dictionaries to direct mode
+ if (isFull()) {
+ return Optional.of(build());
+ }
+ return Optional.empty();
+ }
+
public Page build()
{
Block[] blocks = new Block[channelAppenders.length];
diff --git a/core/trino-main/src/main/java/io/trino/operator/output/SkewedPartitionRebalancer.java b/core/trino-main/src/main/java/io/trino/operator/output/SkewedPartitionRebalancer.java
index 458b4ccd17d37..97a839b74b27c 100644
--- a/core/trino-main/src/main/java/io/trino/operator/output/SkewedPartitionRebalancer.java
+++ b/core/trino-main/src/main/java/io/trino/operator/output/SkewedPartitionRebalancer.java
@@ -32,7 +32,6 @@
import java.util.List;
import java.util.Objects;
import java.util.concurrent.CopyOnWriteArrayList;
-import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicLongArray;
import java.util.stream.IntStream;
@@ -86,12 +85,10 @@ public class SkewedPartitionRebalancer
private final int taskBucketCount;
private final long minPartitionDataProcessedRebalanceThreshold;
private final long minDataProcessedRebalanceThreshold;
- private final int maxPartitionsToRebalance;
private final AtomicLongArray partitionRowCount;
private final AtomicLong dataProcessed;
private final AtomicLong dataProcessedAtLastRebalance;
- private final AtomicInteger numOfRebalancedPartitions;
@GuardedBy("this")
private final long[] partitionDataSize;
@@ -158,12 +155,6 @@ public static int getMaxWritersBasedOnMemory(Session session)
return (int) ceil((double) getQueryMaxMemoryPerNode(session).toBytes() / getMaxMemoryPerPartitionWriter(session).toBytes());
}
- public static int getScaleWritersMaxSkewedPartitions(Session session)
- {
- // Set the value of maxSkewedPartitions to scale to 60% of maximum number of writers possible per node.
- return (int) (getMaxWritersBasedOnMemory(session) * 0.60);
- }
-
public static int getTaskCount(PartitioningScheme partitioningScheme)
{
// Todo: Handle skewness if there are more nodes/tasks than the buckets coming from connector
@@ -179,20 +170,17 @@ public SkewedPartitionRebalancer(
int taskCount,
int taskBucketCount,
long minPartitionDataProcessedRebalanceThreshold,
- long maxDataProcessedRebalanceThreshold,
- int maxPartitionsToRebalance)
+ long maxDataProcessedRebalanceThreshold)
{
this.partitionCount = partitionCount;
this.taskCount = taskCount;
this.taskBucketCount = taskBucketCount;
this.minPartitionDataProcessedRebalanceThreshold = minPartitionDataProcessedRebalanceThreshold;
this.minDataProcessedRebalanceThreshold = max(minPartitionDataProcessedRebalanceThreshold, maxDataProcessedRebalanceThreshold);
- this.maxPartitionsToRebalance = maxPartitionsToRebalance;
this.partitionRowCount = new AtomicLongArray(partitionCount);
this.dataProcessed = new AtomicLong();
this.dataProcessedAtLastRebalance = new AtomicLong();
- this.numOfRebalancedPartitions = new AtomicInteger();
this.partitionDataSize = new long[partitionCount];
this.partitionDataSizeAtLastRebalance = new long[partitionCount];
@@ -254,9 +242,7 @@ public void rebalance()
private boolean shouldRebalance(long dataProcessed)
{
// Rebalance only when total bytes processed since last rebalance is greater than rebalance threshold.
- // Check if the number of rebalanced partitions is less than maxPartitionsToRebalance.
- return (dataProcessed - dataProcessedAtLastRebalance.get()) >= minDataProcessedRebalanceThreshold
- && numOfRebalancedPartitions.get() < maxPartitionsToRebalance;
+ return (dataProcessed - dataProcessedAtLastRebalance.get()) >= minDataProcessedRebalanceThreshold;
}
private synchronized void rebalancePartitions(long dataProcessed)
@@ -317,7 +303,15 @@ private void calculatePartitionDataSize(long dataProcessed)
}
for (int partition = 0; partition < partitionCount; partition++) {
- partitionDataSize[partition] = (partitionRowCount.get(partition) * dataProcessed) / totalPartitionRowCount;
+ // Since we estimate the partitionDataSize based on partitionRowCount and total data processed. It is possible
+ // that the estimated partitionDataSize is slightly less than it was estimated at the last rebalance cycle.
+ // That's because for a given partition, row count hasn't increased, however overall data processed
+ // has increased. Therefore, we need to make sure that the estimated partitionDataSize should be
+ // at least partitionDataSizeAtLastRebalance. Otherwise, it will affect the ordering of minTaskBuckets
+ // priority queue.
+ partitionDataSize[partition] = max(
+ (partitionRowCount.get(partition) * dataProcessed) / totalPartitionRowCount,
+ partitionDataSize[partition]);
}
}
@@ -412,12 +406,6 @@ private boolean rebalancePartition(
return false;
}
- // If the number of rebalanced partitions is less than maxPartitionsToRebalance then assign
- // the partition to the task.
- if (numOfRebalancedPartitions.get() >= maxPartitionsToRebalance) {
- return false;
- }
-
assignments.add(toTaskBucket);
int newTaskCount = assignments.size();
@@ -438,8 +426,6 @@ private boolean rebalancePartition(
minTasks.addOrUpdate(taskBucket, Long.MAX_VALUE - estimatedTaskBucketDataSizeSinceLastRebalance[taskBucket.id]);
}
- // Increment the number of rebalanced partitions.
- numOfRebalancedPartitions.incrementAndGet();
log.debug("Rebalanced partition %s to task %s with taskCount %s", partitionId, toTaskBucket.taskId, assignments.size());
return true;
}
diff --git a/core/trino-main/src/main/java/io/trino/operator/output/UnnestingPositionsAppender.java b/core/trino-main/src/main/java/io/trino/operator/output/UnnestingPositionsAppender.java
index 23d2c11478615..258aeb54bd5e8 100644
--- a/core/trino-main/src/main/java/io/trino/operator/output/UnnestingPositionsAppender.java
+++ b/core/trino-main/src/main/java/io/trino/operator/output/UnnestingPositionsAppender.java
@@ -20,6 +20,7 @@
import io.trino.type.BlockTypeOperators.BlockPositionIsDistinctFrom;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntArrays;
+import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import jakarta.annotation.Nullable;
import java.util.Optional;
@@ -52,6 +53,7 @@ private enum State
private State state = State.UNINITIALIZED;
+ @Nullable
private ValueBlock dictionary;
private DictionaryIdsBuilder dictionaryIdsBuilder;
@@ -219,6 +221,28 @@ void addSizesToAccumulator(PositionsAppenderSizeAccumulator accumulator)
accumulator.accumulate(sizeInBytes, directSizeInBytes);
}
+ public void flattenPendingDictionary()
+ {
+ if (state == State.DICTIONARY && dictionary != null) {
+ transitionToDirect();
+ }
+ }
+
+ public boolean shouldForceFlushBeforeRelease()
+ {
+ if (state == State.DICTIONARY && dictionary != null) {
+ IntOpenHashSet uniqueIdsSet = new IntOpenHashSet();
+ int[] dictionaryIds = dictionaryIdsBuilder.getDictionaryIds();
+ for (int i = 0; i < dictionaryIdsBuilder.size(); i++) {
+ // At least one position is referenced multiple times, preserve the dictionary encoding and force the current page to flush
+ if (!uniqueIdsSet.add(dictionaryIds[i])) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
private static class DictionaryIdsBuilder
{
private static final int INSTANCE_SIZE = instanceSize(DictionaryIdsBuilder.class);
diff --git a/core/trino-main/src/main/java/io/trino/operator/scalar/DateTimeFunctions.java b/core/trino-main/src/main/java/io/trino/operator/scalar/DateTimeFunctions.java
index 34708653f848b..37df8394fb58c 100644
--- a/core/trino-main/src/main/java/io/trino/operator/scalar/DateTimeFunctions.java
+++ b/core/trino-main/src/main/java/io/trino/operator/scalar/DateTimeFunctions.java
@@ -127,7 +127,12 @@ public static Slice currentTimeZone(ConnectorSession session)
public static long fromUnixTime(ConnectorSession session, @SqlType(StandardTypes.DOUBLE) double unixTime)
{
// TODO (https://github.com/trinodb/trino/issues/5781)
- return packDateTimeWithZone(Math.round(unixTime * 1000), session.getTimeZoneKey());
+ try {
+ return packDateTimeWithZone(Math.round(unixTime * 1000), session.getTimeZoneKey());
+ }
+ catch (IllegalArgumentException e) {
+ throw new TrinoException(INVALID_FUNCTION_ARGUMENT, e);
+ }
}
@ScalarFunction("from_unixtime")
@@ -137,11 +142,11 @@ public static long fromUnixTime(@SqlType(StandardTypes.DOUBLE) double unixTime,
TimeZoneKey timeZoneKey;
try {
timeZoneKey = getTimeZoneKeyForOffset(toIntExact(hoursOffset * 60 + minutesOffset));
+ return packDateTimeWithZone(Math.round(unixTime * 1000), timeZoneKey);
}
catch (IllegalArgumentException e) {
throw new TrinoException(INVALID_FUNCTION_ARGUMENT, e);
}
- return packDateTimeWithZone(Math.round(unixTime * 1000), timeZoneKey);
}
@ScalarFunction("from_unixtime")
@@ -149,7 +154,12 @@ public static long fromUnixTime(@SqlType(StandardTypes.DOUBLE) double unixTime,
@SqlType("timestamp(3) with time zone")
public static long fromUnixTime(@SqlType(StandardTypes.DOUBLE) double unixTime, @SqlType("varchar(x)") Slice zoneId)
{
- return packDateTimeWithZone(Math.round(unixTime * 1000), zoneId.toStringUtf8());
+ try {
+ return packDateTimeWithZone(Math.round(unixTime * 1000), zoneId.toStringUtf8());
+ }
+ catch (IllegalArgumentException e) {
+ throw new TrinoException(INVALID_FUNCTION_ARGUMENT, e);
+ }
}
@ScalarFunction("from_unixtime_nanos")
@@ -172,7 +182,12 @@ public static LongTimestampWithTimeZone fromLong(@LiteralParameter("s") long sca
epochSeconds -= 1;
picosOfSecond += PICOSECONDS_PER_SECOND;
}
- return DateTimes.longTimestampWithTimeZone(epochSeconds, picosOfSecond, session.getTimeZoneKey().getZoneId());
+ try {
+ return DateTimes.longTimestampWithTimeZone(epochSeconds, picosOfSecond, session.getTimeZoneKey().getZoneId());
+ }
+ catch (ArithmeticException e) {
+ throw new TrinoException(INVALID_FUNCTION_ARGUMENT, e);
+ }
}
@LiteralParameters({"p", "s"})
@@ -216,7 +231,12 @@ public static long fromISO8601Timestamp(ConnectorSession session, @SqlType("varc
DateTimeFormatter formatter = ISODateTimeFormat.dateTimeParser()
.withChronology(getChronology(session.getTimeZoneKey()))
.withOffsetParsed();
- return packDateTimeWithZone(parseDateTimeHelper(formatter, iso8601DateTime.toStringUtf8()));
+ try {
+ return packDateTimeWithZone(parseDateTimeHelper(formatter, iso8601DateTime.toStringUtf8()));
+ }
+ catch (IllegalArgumentException e) {
+ throw new TrinoException(INVALID_FUNCTION_ARGUMENT, e);
+ }
}
@ScalarFunction("from_iso8601_timestamp_nanos")
diff --git a/core/trino-main/src/main/java/io/trino/operator/scalar/WordStemFunction.java b/core/trino-main/src/main/java/io/trino/operator/scalar/WordStemFunction.java
index e258d38f60efe..7ca6ebf76dd77 100644
--- a/core/trino-main/src/main/java/io/trino/operator/scalar/WordStemFunction.java
+++ b/core/trino-main/src/main/java/io/trino/operator/scalar/WordStemFunction.java
@@ -20,7 +20,7 @@
import io.trino.spi.function.LiteralParameters;
import io.trino.spi.function.ScalarFunction;
import io.trino.spi.function.SqlType;
-import org.tartarus.snowball.SnowballProgram;
+import org.tartarus.snowball.SnowballStemmer;
import org.tartarus.snowball.ext.ArmenianStemmer;
import org.tartarus.snowball.ext.BasqueStemmer;
import org.tartarus.snowball.ext.CatalanStemmer;
@@ -52,7 +52,7 @@ public final class WordStemFunction
{
private WordStemFunction() {}
- private static final Map> STEMMERS = ImmutableMap.>builder()
+ private static final Map> STEMMERS = ImmutableMap.>builder()
.put(utf8Slice("ca"), CatalanStemmer::new)
.put(utf8Slice("da"), DanishStemmer::new)
.put(utf8Slice("de"), German2Stemmer::new)
@@ -90,14 +90,14 @@ public static Slice wordStem(@SqlType("varchar(x)") Slice slice)
@SqlType("varchar(x)")
public static Slice wordStem(@SqlType("varchar(x)") Slice slice, @SqlType("varchar(2)") Slice language)
{
- Supplier stemmer = STEMMERS.get(language);
+ Supplier stemmer = STEMMERS.get(language);
if (stemmer == null) {
throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "Unknown stemmer language: " + language.toStringUtf8());
}
return wordStem(slice, stemmer.get());
}
- private static Slice wordStem(Slice slice, SnowballProgram stemmer)
+ private static Slice wordStem(Slice slice, SnowballStemmer stemmer)
{
stemmer.setCurrent(slice.toStringUtf8());
return stemmer.stem() ? utf8Slice(stemmer.getCurrent()) : slice;
diff --git a/core/trino-main/src/main/java/io/trino/operator/scalar/timetz/VarcharToTimeWithTimeZoneCast.java b/core/trino-main/src/main/java/io/trino/operator/scalar/timetz/VarcharToTimeWithTimeZoneCast.java
index b778ef6135ac5..17f6a39568030 100644
--- a/core/trino-main/src/main/java/io/trino/operator/scalar/timetz/VarcharToTimeWithTimeZoneCast.java
+++ b/core/trino-main/src/main/java/io/trino/operator/scalar/timetz/VarcharToTimeWithTimeZoneCast.java
@@ -51,11 +51,11 @@ private VarcharToTimeWithTimeZoneCast() {}
@SqlType("time(p) with time zone")
public static long castToShort(@LiteralParameter("p") long precision, ConnectorSession session, @SqlType("varchar(x)") Slice value)
{
- checkArgument((int) precision <= MAX_SHORT_PRECISION, "precision must be less than max short timestamp precision");
+ checkArgument((int) precision <= MAX_SHORT_PRECISION, "precision must be less than max short time with time zone precision");
Matcher matcher = DateTimes.TIME_PATTERN.matcher(trim(value).toStringUtf8());
if (!matcher.matches()) {
- throw new TrinoException(INVALID_CAST_ARGUMENT, "Value cannot be cast to timestamp: " + value.toStringUtf8());
+ throw new TrinoException(INVALID_CAST_ARGUMENT, "Value cannot be cast to time with time zone: " + value.toStringUtf8());
}
try {
@@ -67,7 +67,7 @@ public static long castToShort(@LiteralParameter("p") long precision, ConnectorS
return packTimeWithTimeZone(nanos, offsetMinutes);
}
catch (IllegalArgumentException e) {
- throw new TrinoException(INVALID_CAST_ARGUMENT, "Value cannot be cast to timestamp: " + value.toStringUtf8(), e);
+ throw new TrinoException(INVALID_CAST_ARGUMENT, "Value cannot be cast to time with time zone: " + value.toStringUtf8(), e);
}
}
@@ -79,7 +79,7 @@ public static LongTimeWithTimeZone castToLong(@LiteralParameter("p") long precis
Matcher matcher = DateTimes.TIME_PATTERN.matcher(trim(value).toStringUtf8());
if (!matcher.matches()) {
- throw new TrinoException(INVALID_CAST_ARGUMENT, "Value cannot be cast to timestamp: " + value.toStringUtf8());
+ throw new TrinoException(INVALID_CAST_ARGUMENT, "Value cannot be cast to time with time zone: " + value.toStringUtf8());
}
try {
@@ -91,7 +91,7 @@ public static LongTimeWithTimeZone castToLong(@LiteralParameter("p") long precis
return new LongTimeWithTimeZone(picos, offsetMinutes);
}
catch (IllegalArgumentException e) {
- throw new TrinoException(INVALID_CAST_ARGUMENT, "Value cannot be cast to timestamp: " + value.toStringUtf8(), e);
+ throw new TrinoException(INVALID_CAST_ARGUMENT, "Value cannot be cast to time with time zone: " + value.toStringUtf8(), e);
}
}
diff --git a/core/trino-main/src/main/java/io/trino/operator/table/ExcludeColumns.java b/core/trino-main/src/main/java/io/trino/operator/table/ExcludeColumns.java
deleted file mode 100644
index d650c97b2b987..0000000000000
--- a/core/trino-main/src/main/java/io/trino/operator/table/ExcludeColumns.java
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package io.trino.operator.table;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Sets;
-import com.google.inject.Provider;
-import io.trino.plugin.base.classloader.ClassLoaderSafeConnectorTableFunction;
-import io.trino.spi.TrinoException;
-import io.trino.spi.connector.ConnectorAccessControl;
-import io.trino.spi.connector.ConnectorSession;
-import io.trino.spi.connector.ConnectorTransactionHandle;
-import io.trino.spi.function.table.AbstractConnectorTableFunction;
-import io.trino.spi.function.table.Argument;
-import io.trino.spi.function.table.ConnectorTableFunction;
-import io.trino.spi.function.table.ConnectorTableFunctionHandle;
-import io.trino.spi.function.table.Descriptor;
-import io.trino.spi.function.table.DescriptorArgument;
-import io.trino.spi.function.table.DescriptorArgumentSpecification;
-import io.trino.spi.function.table.TableArgument;
-import io.trino.spi.function.table.TableArgumentSpecification;
-import io.trino.spi.function.table.TableFunctionAnalysis;
-import io.trino.spi.function.table.TableFunctionDataProcessor;
-import io.trino.spi.function.table.TableFunctionProcessorProvider;
-import io.trino.spi.type.RowType;
-
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
-import java.util.Set;
-
-import static com.google.common.collect.ImmutableSet.toImmutableSet;
-import static com.google.common.collect.Iterables.getOnlyElement;
-import static io.trino.metadata.GlobalFunctionCatalog.BUILTIN_SCHEMA;
-import static io.trino.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT;
-import static io.trino.spi.function.table.DescriptorArgument.NULL_DESCRIPTOR;
-import static io.trino.spi.function.table.ReturnTypeSpecification.GenericTable.GENERIC_TABLE;
-import static io.trino.spi.function.table.TableFunctionProcessorState.Finished.FINISHED;
-import static io.trino.spi.function.table.TableFunctionProcessorState.Processed.usedInputAndProduced;
-import static java.lang.String.format;
-import static java.util.Locale.ENGLISH;
-import static java.util.stream.Collectors.joining;
-
-public class ExcludeColumns
- implements Provider
-{
- public static final String NAME = "exclude_columns";
-
- @Override
- public ConnectorTableFunction get()
- {
- return new ClassLoaderSafeConnectorTableFunction(new ExcludeColumnsFunction(), getClass().getClassLoader());
- }
-
- public static class ExcludeColumnsFunction
- extends AbstractConnectorTableFunction
- {
- private static final String TABLE_ARGUMENT_NAME = "INPUT";
- private static final String DESCRIPTOR_ARGUMENT_NAME = "COLUMNS";
-
- public ExcludeColumnsFunction()
- {
- super(
- BUILTIN_SCHEMA,
- NAME,
- ImmutableList.of(
- TableArgumentSpecification.builder()
- .name(TABLE_ARGUMENT_NAME)
- .rowSemantics()
- .build(),
- DescriptorArgumentSpecification.builder()
- .name(DESCRIPTOR_ARGUMENT_NAME)
- .build()),
- GENERIC_TABLE);
- }
-
- @Override
- public TableFunctionAnalysis analyze(
- ConnectorSession session,
- ConnectorTransactionHandle transaction,
- Map arguments,
- ConnectorAccessControl accessControl)
- {
- DescriptorArgument excludedColumns = (DescriptorArgument) arguments.get(DESCRIPTOR_ARGUMENT_NAME);
- if (excludedColumns.equals(NULL_DESCRIPTOR)) {
- throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "COLUMNS descriptor is null");
- }
- Descriptor excludedColumnsDescriptor = excludedColumns.getDescriptor().orElseThrow();
- if (excludedColumnsDescriptor.getFields().stream().anyMatch(field -> field.getType().isPresent())) {
- throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "COLUMNS descriptor contains types");
- }
-
- // column names in DescriptorArgument are canonical wrt SQL identifier semantics.
- // column names in TableArgument are not canonical wrt SQL identifier semantics, as they are taken from the corresponding RelationType.
- // because of that, we match the excluded columns names case-insensitive
- // TODO apply proper identifier semantics
- Set excludedNames = excludedColumnsDescriptor.getFields().stream()
- .map(Descriptor.Field::getName)
- .map(name -> name.orElseThrow().toLowerCase(ENGLISH))
- .collect(toImmutableSet());
-
- List inputSchema = ((TableArgument) arguments.get(TABLE_ARGUMENT_NAME)).getRowType().getFields();
- Set inputNames = inputSchema.stream()
- .map(RowType.Field::getName)
- .filter(Optional::isPresent)
- .map(Optional::get)
- .map(name -> name.toLowerCase(ENGLISH))
- .collect(toImmutableSet());
-
- if (!inputNames.containsAll(excludedNames)) {
- String missingColumns = Sets.difference(excludedNames, inputNames).stream()
- .collect(joining(", ", "[", "]"));
- throw new TrinoException(INVALID_FUNCTION_ARGUMENT, format("Excluded columns: %s not present in the table", missingColumns));
- }
-
- ImmutableList.Builder requiredColumns = ImmutableList.builder();
- ImmutableList.Builder returnedColumns = ImmutableList.builder();
-
- for (int i = 0; i < inputSchema.size(); i++) {
- Optional name = inputSchema.get(i).getName();
- if (name.isEmpty() || !excludedNames.contains(name.orElseThrow().toLowerCase(ENGLISH))) {
- requiredColumns.add(i);
- // per SQL standard, all columns produced by a table function must be named. We allow anonymous columns.
- returnedColumns.add(new Descriptor.Field(name, Optional.of(inputSchema.get(i).getType())));
- }
- }
-
- List returnedType = returnedColumns.build();
- if (returnedType.isEmpty()) {
- throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "All columns are excluded");
- }
-
- return TableFunctionAnalysis.builder()
- .requiredColumns(TABLE_ARGUMENT_NAME, requiredColumns.build())
- .returnedType(new Descriptor(returnedType))
- .handle(new ExcludeColumnsFunctionHandle())
- .build();
- }
- }
-
- public static TableFunctionProcessorProvider getExcludeColumnsFunctionProcessorProvider()
- {
- return new TableFunctionProcessorProvider()
- {
- @Override
- public TableFunctionDataProcessor getDataProcessor(ConnectorSession session, ConnectorTableFunctionHandle handle)
- {
- return input -> {
- if (input == null) {
- return FINISHED;
- }
- return usedInputAndProduced(getOnlyElement(input).orElseThrow());
- };
- }
- };
- }
-
- public record ExcludeColumnsFunctionHandle()
- implements ConnectorTableFunctionHandle
- {
- // there's no information to remember. All logic is effectively delegated to the engine via `requiredColumns`.
- }
-}
diff --git a/core/trino-main/src/main/java/io/trino/operator/table/ExcludeColumnsFunction.java b/core/trino-main/src/main/java/io/trino/operator/table/ExcludeColumnsFunction.java
new file mode 100644
index 0000000000000..0417b91ffa64f
--- /dev/null
+++ b/core/trino-main/src/main/java/io/trino/operator/table/ExcludeColumnsFunction.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.operator.table;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Sets;
+import io.trino.spi.TrinoException;
+import io.trino.spi.connector.ConnectorAccessControl;
+import io.trino.spi.connector.ConnectorSession;
+import io.trino.spi.connector.ConnectorTransactionHandle;
+import io.trino.spi.function.table.AbstractConnectorTableFunction;
+import io.trino.spi.function.table.Argument;
+import io.trino.spi.function.table.ConnectorTableFunctionHandle;
+import io.trino.spi.function.table.Descriptor;
+import io.trino.spi.function.table.DescriptorArgument;
+import io.trino.spi.function.table.DescriptorArgumentSpecification;
+import io.trino.spi.function.table.TableArgument;
+import io.trino.spi.function.table.TableArgumentSpecification;
+import io.trino.spi.function.table.TableFunctionAnalysis;
+import io.trino.spi.function.table.TableFunctionDataProcessor;
+import io.trino.spi.function.table.TableFunctionProcessorProvider;
+import io.trino.spi.type.RowType;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+
+import static com.google.common.collect.ImmutableSet.toImmutableSet;
+import static com.google.common.collect.Iterables.getOnlyElement;
+import static io.trino.metadata.GlobalFunctionCatalog.BUILTIN_SCHEMA;
+import static io.trino.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT;
+import static io.trino.spi.function.table.DescriptorArgument.NULL_DESCRIPTOR;
+import static io.trino.spi.function.table.ReturnTypeSpecification.GenericTable.GENERIC_TABLE;
+import static io.trino.spi.function.table.TableFunctionProcessorState.Finished.FINISHED;
+import static io.trino.spi.function.table.TableFunctionProcessorState.Processed.usedInputAndProduced;
+import static java.lang.String.format;
+import static java.util.Locale.ENGLISH;
+import static java.util.stream.Collectors.joining;
+
+public class ExcludeColumnsFunction
+ extends AbstractConnectorTableFunction
+{
+ public static final String NAME = "exclude_columns";
+
+ private static final String TABLE_ARGUMENT_NAME = "INPUT";
+ private static final String DESCRIPTOR_ARGUMENT_NAME = "COLUMNS";
+
+ public ExcludeColumnsFunction()
+ {
+ super(
+ BUILTIN_SCHEMA,
+ NAME,
+ ImmutableList.of(
+ TableArgumentSpecification.builder()
+ .name(TABLE_ARGUMENT_NAME)
+ .rowSemantics()
+ .build(),
+ DescriptorArgumentSpecification.builder()
+ .name(DESCRIPTOR_ARGUMENT_NAME)
+ .build()),
+ GENERIC_TABLE);
+ }
+
+ @Override
+ public TableFunctionAnalysis analyze(
+ ConnectorSession session,
+ ConnectorTransactionHandle transaction,
+ Map arguments,
+ ConnectorAccessControl accessControl)
+ {
+ DescriptorArgument excludedColumns = (DescriptorArgument) arguments.get(DESCRIPTOR_ARGUMENT_NAME);
+ if (excludedColumns.equals(NULL_DESCRIPTOR)) {
+ throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "COLUMNS descriptor is null");
+ }
+ Descriptor excludedColumnsDescriptor = excludedColumns.getDescriptor().orElseThrow();
+ if (excludedColumnsDescriptor.getFields().stream().anyMatch(field -> field.getType().isPresent())) {
+ throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "COLUMNS descriptor contains types");
+ }
+
+ // column names in DescriptorArgument are canonical wrt SQL identifier semantics.
+ // column names in TableArgument are not canonical wrt SQL identifier semantics, as they are taken from the corresponding RelationType.
+ // because of that, we match the excluded columns names case-insensitive
+ // TODO apply proper identifier semantics
+ Set excludedNames = excludedColumnsDescriptor.getFields().stream()
+ .map(Descriptor.Field::getName)
+ .map(name -> name.orElseThrow().toLowerCase(ENGLISH))
+ .collect(toImmutableSet());
+
+ List inputSchema = ((TableArgument) arguments.get(TABLE_ARGUMENT_NAME)).getRowType().getFields();
+ Set inputNames = inputSchema.stream()
+ .map(RowType.Field::getName)
+ .filter(Optional::isPresent)
+ .map(Optional::get)
+ .map(name -> name.toLowerCase(ENGLISH))
+ .collect(toImmutableSet());
+
+ if (!inputNames.containsAll(excludedNames)) {
+ String missingColumns = Sets.difference(excludedNames, inputNames).stream()
+ .collect(joining(", ", "[", "]"));
+ throw new TrinoException(INVALID_FUNCTION_ARGUMENT, format("Excluded columns: %s not present in the table", missingColumns));
+ }
+
+ ImmutableList.Builder requiredColumns = ImmutableList.builder();
+ ImmutableList.Builder returnedColumns = ImmutableList.builder();
+
+ for (int i = 0; i < inputSchema.size(); i++) {
+ Optional name = inputSchema.get(i).getName();
+ if (name.isEmpty() || !excludedNames.contains(name.orElseThrow().toLowerCase(ENGLISH))) {
+ requiredColumns.add(i);
+ // per SQL standard, all columns produced by a table function must be named. We allow anonymous columns.
+ returnedColumns.add(new Descriptor.Field(name, Optional.of(inputSchema.get(i).getType())));
+ }
+ }
+
+ List returnedType = returnedColumns.build();
+ if (returnedType.isEmpty()) {
+ throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "All columns are excluded");
+ }
+
+ return TableFunctionAnalysis.builder()
+ .requiredColumns(TABLE_ARGUMENT_NAME, requiredColumns.build())
+ .returnedType(new Descriptor(returnedType))
+ .handle(new ExcludeColumnsFunctionHandle())
+ .build();
+ }
+
+ public static TableFunctionProcessorProvider getExcludeColumnsFunctionProcessorProvider()
+ {
+ return new TableFunctionProcessorProvider()
+ {
+ @Override
+ public TableFunctionDataProcessor getDataProcessor(ConnectorSession session, ConnectorTableFunctionHandle handle)
+ {
+ return input -> {
+ if (input == null) {
+ return FINISHED;
+ }
+ return usedInputAndProduced(getOnlyElement(input).orElseThrow());
+ };
+ }
+ };
+ }
+
+ public record ExcludeColumnsFunctionHandle()
+ implements ConnectorTableFunctionHandle
+ {
+ // there's no information to remember. All logic is effectively delegated to the engine via `requiredColumns`.
+ }
+}
diff --git a/core/trino-main/src/main/java/io/trino/operator/table/Sequence.java b/core/trino-main/src/main/java/io/trino/operator/table/SequenceFunction.java
similarity index 68%
rename from core/trino-main/src/main/java/io/trino/operator/table/Sequence.java
rename to core/trino-main/src/main/java/io/trino/operator/table/SequenceFunction.java
index c111f86c51401..2a1b176c95928 100644
--- a/core/trino-main/src/main/java/io/trino/operator/table/Sequence.java
+++ b/core/trino-main/src/main/java/io/trino/operator/table/SequenceFunction.java
@@ -17,8 +17,6 @@
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
-import com.google.inject.Provider;
-import io.trino.plugin.base.classloader.ClassLoaderSafeConnectorTableFunction;
import io.trino.spi.Page;
import io.trino.spi.PageBuilder;
import io.trino.spi.TrinoException;
@@ -31,7 +29,6 @@
import io.trino.spi.connector.FixedSplitSource;
import io.trino.spi.function.table.AbstractConnectorTableFunction;
import io.trino.spi.function.table.Argument;
-import io.trino.spi.function.table.ConnectorTableFunction;
import io.trino.spi.function.table.ConnectorTableFunctionHandle;
import io.trino.spi.function.table.ReturnTypeSpecification.DescribedTable;
import io.trino.spi.function.table.ScalarArgument;
@@ -48,7 +45,7 @@
import static com.google.common.base.Preconditions.checkState;
import static io.airlift.slice.SizeOf.instanceSize;
import static io.trino.metadata.GlobalFunctionCatalog.BUILTIN_SCHEMA;
-import static io.trino.operator.table.Sequence.SequenceFunctionSplit.MAX_SPLIT_SIZE;
+import static io.trino.operator.table.SequenceFunction.SequenceFunctionSplit.MAX_SPLIT_SIZE;
import static io.trino.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT;
import static io.trino.spi.function.table.Descriptor.descriptor;
import static io.trino.spi.function.table.TableFunctionProcessorState.Finished.FINISHED;
@@ -56,85 +53,75 @@
import static io.trino.spi.type.BigintType.BIGINT;
import static java.lang.String.format;
-public class Sequence
- implements Provider
+public class SequenceFunction
+ extends AbstractConnectorTableFunction
{
public static final String NAME = "sequence";
- @Override
- public ConnectorTableFunction get()
+ private static final String START_ARGUMENT_NAME = "START";
+ private static final String STOP_ARGUMENT_NAME = "STOP";
+ private static final String STEP_ARGUMENT_NAME = "STEP";
+
+ public SequenceFunction()
{
- return new ClassLoaderSafeConnectorTableFunction(new SequenceFunction(), getClass().getClassLoader());
+ super(
+ BUILTIN_SCHEMA,
+ NAME,
+ ImmutableList.of(
+ ScalarArgumentSpecification.builder()
+ .name(START_ARGUMENT_NAME)
+ .type(BIGINT)
+ .defaultValue(0L)
+ .build(),
+ ScalarArgumentSpecification.builder()
+ .name(STOP_ARGUMENT_NAME)
+ .type(BIGINT)
+ .build(),
+ ScalarArgumentSpecification.builder()
+ .name(STEP_ARGUMENT_NAME)
+ .type(BIGINT)
+ .defaultValue(1L)
+ .build()),
+ new DescribedTable(descriptor(ImmutableList.of("sequential_number"), ImmutableList.of(BIGINT))));
}
- public static class SequenceFunction
- extends AbstractConnectorTableFunction
+ @Override
+ public TableFunctionAnalysis analyze(
+ ConnectorSession session,
+ ConnectorTransactionHandle transaction,
+ Map arguments,
+ ConnectorAccessControl accessControl)
{
- private static final String START_ARGUMENT_NAME = "START";
- private static final String STOP_ARGUMENT_NAME = "STOP";
- private static final String STEP_ARGUMENT_NAME = "STEP";
-
- public SequenceFunction()
- {
- super(
- BUILTIN_SCHEMA,
- NAME,
- ImmutableList.of(
- ScalarArgumentSpecification.builder()
- .name(START_ARGUMENT_NAME)
- .type(BIGINT)
- .defaultValue(0L)
- .build(),
- ScalarArgumentSpecification.builder()
- .name(STOP_ARGUMENT_NAME)
- .type(BIGINT)
- .build(),
- ScalarArgumentSpecification.builder()
- .name(STEP_ARGUMENT_NAME)
- .type(BIGINT)
- .defaultValue(1L)
- .build()),
- new DescribedTable(descriptor(ImmutableList.of("sequential_number"), ImmutableList.of(BIGINT))));
+ Object startValue = ((ScalarArgument) arguments.get(START_ARGUMENT_NAME)).getValue();
+ if (startValue == null) {
+ throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "Start is null");
}
- @Override
- public TableFunctionAnalysis analyze(
- ConnectorSession session,
- ConnectorTransactionHandle transaction,
- Map arguments,
- ConnectorAccessControl accessControl)
- {
- Object startValue = ((ScalarArgument) arguments.get(START_ARGUMENT_NAME)).getValue();
- if (startValue == null) {
- throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "Start is null");
- }
-
- Object stopValue = ((ScalarArgument) arguments.get(STOP_ARGUMENT_NAME)).getValue();
- if (stopValue == null) {
- throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "Stop is null");
- }
+ Object stopValue = ((ScalarArgument) arguments.get(STOP_ARGUMENT_NAME)).getValue();
+ if (stopValue == null) {
+ throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "Stop is null");
+ }
- Object stepValue = ((ScalarArgument) arguments.get(STEP_ARGUMENT_NAME)).getValue();
- if (stepValue == null) {
- throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "Step is null");
- }
+ Object stepValue = ((ScalarArgument) arguments.get(STEP_ARGUMENT_NAME)).getValue();
+ if (stepValue == null) {
+ throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "Step is null");
+ }
- long start = (long) startValue;
- long stop = (long) stopValue;
- long step = (long) stepValue;
+ long start = (long) startValue;
+ long stop = (long) stopValue;
+ long step = (long) stepValue;
- if (start < stop && step <= 0) {
- throw new TrinoException(INVALID_FUNCTION_ARGUMENT, format("Step must be positive for sequence [%s, %s]", start, stop));
- }
-
- if (start > stop && step >= 0) {
- throw new TrinoException(INVALID_FUNCTION_ARGUMENT, format("Step must be negative for sequence [%s, %s]", start, stop));
- }
+ if (start < stop && step <= 0) {
+ throw new TrinoException(INVALID_FUNCTION_ARGUMENT, format("Step must be positive for sequence [%s, %s]", start, stop));
+ }
- return TableFunctionAnalysis.builder()
- .handle(new SequenceFunctionHandle(start, stop, start == stop ? 0 : step))
- .build();
+ if (start > stop && step >= 0) {
+ throw new TrinoException(INVALID_FUNCTION_ARGUMENT, format("Step must be negative for sequence [%s, %s]", start, stop));
}
+
+ return TableFunctionAnalysis.builder()
+ .handle(new SequenceFunctionHandle(start, stop, start == stop ? 0 : step))
+ .build();
}
public record SequenceFunctionHandle(long start, long stop, long step)
diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTable.java b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTable.java
new file mode 100644
index 0000000000000..1e1aff0faf19d
--- /dev/null
+++ b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTable.java
@@ -0,0 +1,219 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.operator.table.json;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.google.common.collect.ImmutableList;
+import io.trino.metadata.FunctionManager;
+import io.trino.metadata.Metadata;
+import io.trino.operator.table.json.execution.JsonTableProcessingFragment;
+import io.trino.spi.Page;
+import io.trino.spi.PageBuilder;
+import io.trino.spi.block.SqlRow;
+import io.trino.spi.connector.ConnectorSession;
+import io.trino.spi.function.table.ConnectorTableFunctionHandle;
+import io.trino.spi.function.table.TableFunctionDataProcessor;
+import io.trino.spi.function.table.TableFunctionProcessorProvider;
+import io.trino.spi.function.table.TableFunctionProcessorState;
+import io.trino.spi.type.RowType;
+import io.trino.spi.type.Type;
+import io.trino.spi.type.TypeManager;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Optional;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.collect.Iterables.getOnlyElement;
+import static io.trino.operator.scalar.json.ParameterUtil.getParametersArray;
+import static io.trino.operator.table.json.execution.ExecutionPlanner.getExecutionPlan;
+import static io.trino.spi.function.table.TableFunctionProcessorState.Finished.FINISHED;
+import static io.trino.spi.function.table.TableFunctionProcessorState.Processed.produced;
+import static io.trino.spi.function.table.TableFunctionProcessorState.Processed.usedInput;
+import static io.trino.spi.type.BigintType.BIGINT;
+import static io.trino.spi.type.TypeUtils.readNativeValue;
+import static io.trino.spi.type.TypeUtils.writeNativeValue;
+import static io.trino.type.Json2016Type.JSON_2016;
+import static java.util.Objects.requireNonNull;
+
+/**
+ * Implements feature ISO/IEC 9075-2:2023(E) 7.11 'JSON table'
+ * including features T824, T827, T838
+ */
+public class JsonTable
+{
+ private JsonTable() {}
+
+ /**
+ * This class comprises all information necessary to execute the json_table function:
+ *
+ * @param processingPlan the root of the processing plan tree
+ * @param outer the parent-child relationship between the input relation and the processingPlan result
+ * @param errorOnError the error behavior: true for ERROR ON ERROR, false for EMPTY ON ERROR
+ * @param parametersType type of the row containing JSON path parameters for the root JSON path. The function expects the parameters row in the channel 1.
+ * Other channels in the input page correspond to JSON context item (channel 0), and default values for the value columns. Each value column in the processingPlan
+ * knows the indexes of its default channels.
+ * @param outputTypes types of the proper columns produced by the function
+ */
+ public record JsonTableFunctionHandle(JsonTablePlanNode processingPlan, boolean outer, boolean errorOnError, Type parametersType, Type[] outputTypes)
+ implements ConnectorTableFunctionHandle
+ {
+ public JsonTableFunctionHandle
+ {
+ requireNonNull(processingPlan, "processingPlan is null");
+ requireNonNull(parametersType, "parametersType is null");
+ requireNonNull(outputTypes, "outputTypes is null");
+
+ // We can't use RowType in the public interface because it's not directly deserializeable from JSON. See TypeDeserializerModule.
+ checkArgument(parametersType instanceof RowType, "parametersType is not a row type");
+ }
+ }
+
+ public static TableFunctionProcessorProvider getJsonTableFunctionProcessorProvider(Metadata metadata, TypeManager typeManager, FunctionManager functionManager)
+ {
+ return new TableFunctionProcessorProvider()
+ {
+ @Override
+ public TableFunctionDataProcessor getDataProcessor(ConnectorSession session, ConnectorTableFunctionHandle handle)
+ {
+ JsonTableFunctionHandle jsonTableFunctionHandle = (JsonTableFunctionHandle) handle;
+ Object[] newRow = new Object[jsonTableFunctionHandle.outputTypes().length];
+ JsonTableProcessingFragment executionPlan = getExecutionPlan(
+ jsonTableFunctionHandle.processingPlan(),
+ newRow,
+ jsonTableFunctionHandle.errorOnError(),
+ jsonTableFunctionHandle.outputTypes(),
+ session,
+ metadata,
+ typeManager,
+ functionManager);
+ return new JsonTableFunctionProcessor(executionPlan, newRow, jsonTableFunctionHandle.outputTypes(), (RowType) jsonTableFunctionHandle.parametersType(), jsonTableFunctionHandle.outer());
+ }
+ };
+ }
+
+ public static class JsonTableFunctionProcessor
+ implements TableFunctionDataProcessor
+ {
+ private final PageBuilder pageBuilder;
+ private final int properColumnsCount;
+ private final JsonTableProcessingFragment executionPlan;
+ private final Object[] newRow;
+ private final RowType parametersType;
+ private final boolean outer;
+
+ private long totalPositionsProcessed;
+ private int currentPosition = -1;
+ private boolean currentPositionAlreadyProduced;
+
+ public JsonTableFunctionProcessor(JsonTableProcessingFragment executionPlan, Object[] newRow, Type[] outputTypes, RowType parametersType, boolean outer)
+ {
+ this.pageBuilder = new PageBuilder(ImmutableList.builder()
+ .add(outputTypes)
+ .add(BIGINT) // add additional position for pass-through index
+ .build());
+ this.properColumnsCount = outputTypes.length;
+ this.executionPlan = requireNonNull(executionPlan, "executionPlan is null");
+ this.newRow = requireNonNull(newRow, "newRow is null");
+ this.parametersType = requireNonNull(parametersType, "parametersType is null");
+ this.outer = outer;
+ }
+
+ @Override
+ public TableFunctionProcessorState process(List> input)
+ {
+ // no more input pages
+ if (input == null) {
+ if (pageBuilder.isEmpty()) {
+ return FINISHED;
+ }
+ return flushPageBuilder();
+ }
+
+ Page inputPage = getOnlyElement(input).orElseThrow();
+ while (!pageBuilder.isFull()) {
+ // new input page
+ if (currentPosition == -1) {
+ if (inputPage.getPositionCount() == 0) {
+ return usedInput();
+ }
+ else {
+ currentPosition = 0;
+ currentPositionAlreadyProduced = false;
+ totalPositionsProcessed++;
+ SqlRow parametersRow = (SqlRow) readNativeValue(parametersType, inputPage.getBlock(1), currentPosition);
+ executionPlan.resetRoot(
+ (JsonNode) readNativeValue(JSON_2016, inputPage.getBlock(0), currentPosition),
+ inputPage,
+ currentPosition,
+ getParametersArray(parametersType, parametersRow));
+ }
+ }
+
+ // try to get output row for the current position (one position can produce multiple rows)
+ boolean gotNewRow = executionPlan.getRow();
+ if (gotNewRow) {
+ currentPositionAlreadyProduced = true;
+ addOutputRow();
+ }
+ else {
+ if (outer && !currentPositionAlreadyProduced) {
+ addNullPaddedRow();
+ }
+ // go to next position in the input page
+ currentPosition++;
+ if (currentPosition < inputPage.getPositionCount()) {
+ currentPositionAlreadyProduced = false;
+ totalPositionsProcessed++;
+ SqlRow parametersRow = (SqlRow) readNativeValue(parametersType, inputPage.getBlock(1), currentPosition);
+ executionPlan.resetRoot(
+ (JsonNode) readNativeValue(JSON_2016, inputPage.getBlock(0), currentPosition),
+ inputPage,
+ currentPosition,
+ getParametersArray(parametersType, parametersRow));
+ }
+ else {
+ currentPosition = -1;
+ return usedInput();
+ }
+ }
+ }
+
+ return flushPageBuilder();
+ }
+
+ private TableFunctionProcessorState flushPageBuilder()
+ {
+ TableFunctionProcessorState result = produced(pageBuilder.build());
+ pageBuilder.reset();
+ return result;
+ }
+
+ private void addOutputRow()
+ {
+ pageBuilder.declarePosition();
+ for (int channel = 0; channel < properColumnsCount; channel++) {
+ writeNativeValue(pageBuilder.getType(channel), pageBuilder.getBlockBuilder(channel), newRow[channel]);
+ }
+ // pass-through index from partition start
+ BIGINT.writeLong(pageBuilder.getBlockBuilder(properColumnsCount), totalPositionsProcessed - 1);
+ }
+
+ private void addNullPaddedRow()
+ {
+ Arrays.fill(newRow, null);
+ addOutputRow();
+ }
+ }
+}
diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableColumn.java b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableColumn.java
new file mode 100644
index 0000000000000..8727e4254c67f
--- /dev/null
+++ b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableColumn.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.operator.table.json;
+
+import com.fasterxml.jackson.annotation.JsonSubTypes;
+import com.fasterxml.jackson.annotation.JsonTypeInfo;
+
+@JsonTypeInfo(
+ use = JsonTypeInfo.Id.NAME,
+ property = "@type")
+@JsonSubTypes({
+ @JsonSubTypes.Type(value = JsonTableOrdinalityColumn.class, name = "ordinality"),
+ @JsonSubTypes.Type(value = JsonTableQueryColumn.class, name = "query"),
+ @JsonSubTypes.Type(value = JsonTableValueColumn.class, name = "value"),
+})
+
+public sealed interface JsonTableColumn
+ permits JsonTableOrdinalityColumn, JsonTableQueryColumn, JsonTableValueColumn
+{
+}
diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableOrdinalityColumn.java b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableOrdinalityColumn.java
new file mode 100644
index 0000000000000..904bb385e4429
--- /dev/null
+++ b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableOrdinalityColumn.java
@@ -0,0 +1,19 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.operator.table.json;
+
+public record JsonTableOrdinalityColumn(int outputIndex)
+ implements JsonTableColumn
+{
+}
diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanCross.java b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanCross.java
new file mode 100644
index 0000000000000..f61c13f920c9b
--- /dev/null
+++ b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanCross.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.operator.table.json;
+
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+public record JsonTablePlanCross(List siblings)
+ implements JsonTablePlanNode
+{
+ public JsonTablePlanCross(List siblings)
+ {
+ this.siblings = ImmutableList.copyOf(siblings);
+ checkArgument(siblings.size() >= 2, "less than 2 siblings in Cross node");
+ }
+}
diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanLeaf.java b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanLeaf.java
new file mode 100644
index 0000000000000..f1cbafbe86cef
--- /dev/null
+++ b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanLeaf.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.operator.table.json;
+
+import com.google.common.collect.ImmutableList;
+import io.trino.json.ir.IrJsonPath;
+
+import java.util.List;
+
+import static java.util.Objects.requireNonNull;
+
+public record JsonTablePlanLeaf(IrJsonPath path, List columns)
+ implements JsonTablePlanNode
+{
+ public JsonTablePlanLeaf(IrJsonPath path, List columns)
+ {
+ this.path = requireNonNull(path, "path is null");
+ this.columns = ImmutableList.copyOf(columns);
+ }
+}
diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanNode.java b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanNode.java
new file mode 100644
index 0000000000000..73b56a75fb17f
--- /dev/null
+++ b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanNode.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.operator.table.json;
+
+import com.fasterxml.jackson.annotation.JsonSubTypes;
+import com.fasterxml.jackson.annotation.JsonTypeInfo;
+
+@JsonTypeInfo(
+ use = JsonTypeInfo.Id.NAME,
+ property = "@type")
+@JsonSubTypes({
+ @JsonSubTypes.Type(value = JsonTablePlanCross.class, name = "cross"),
+ @JsonSubTypes.Type(value = JsonTablePlanLeaf.class, name = "leaf"),
+ @JsonSubTypes.Type(value = JsonTablePlanSingle.class, name = "single"),
+ @JsonSubTypes.Type(value = JsonTablePlanUnion.class, name = "union"),
+})
+
+public sealed interface JsonTablePlanNode
+ permits JsonTablePlanCross, JsonTablePlanLeaf, JsonTablePlanSingle, JsonTablePlanUnion
+{
+}
diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanSingle.java b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanSingle.java
new file mode 100644
index 0000000000000..49423e2c4bd2b
--- /dev/null
+++ b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanSingle.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.operator.table.json;
+
+import com.google.common.collect.ImmutableList;
+import io.trino.json.ir.IrJsonPath;
+
+import java.util.List;
+
+import static java.util.Objects.requireNonNull;
+
+public record JsonTablePlanSingle(IrJsonPath path, List columns, boolean outer, JsonTablePlanNode child)
+ implements JsonTablePlanNode
+{
+ public JsonTablePlanSingle(IrJsonPath path, List columns, boolean outer, JsonTablePlanNode child)
+ {
+ this.path = requireNonNull(path, "path is null");
+ this.columns = ImmutableList.copyOf(columns);
+ this.outer = outer;
+ this.child = requireNonNull(child, "child is null");
+ }
+}
diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanUnion.java b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanUnion.java
new file mode 100644
index 0000000000000..e8a1f1caeaf4a
--- /dev/null
+++ b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTablePlanUnion.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.operator.table.json;
+
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+public record JsonTablePlanUnion(List siblings)
+ implements JsonTablePlanNode
+{
+ public JsonTablePlanUnion(List siblings)
+ {
+ this.siblings = ImmutableList.copyOf(siblings);
+ checkArgument(siblings.size() >= 2, "less than 2 siblings in Union node");
+ }
+}
diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableQueryColumn.java b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableQueryColumn.java
new file mode 100644
index 0000000000000..117df03c2c25f
--- /dev/null
+++ b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableQueryColumn.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.operator.table.json;
+
+import io.trino.json.ir.IrJsonPath;
+import io.trino.metadata.ResolvedFunction;
+
+import static java.util.Objects.requireNonNull;
+
+/**
+ * This representation does not contain all properties of the column as specified in json_table invocation.
+ * Certain properties are handled by the output function which is applied later.
+ * These are: output format and quotes behavior.
+ */
+public record JsonTableQueryColumn(
+ int outputIndex,
+ ResolvedFunction function,
+ IrJsonPath path,
+ long wrapperBehavior,
+ long emptyBehavior,
+ long errorBehavior)
+ implements JsonTableColumn
+{
+ public JsonTableQueryColumn
+ {
+ requireNonNull(function, "function is null");
+ requireNonNull(path, "path is null");
+ }
+}
diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableValueColumn.java b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableValueColumn.java
new file mode 100644
index 0000000000000..6d87bc4a5ffd8
--- /dev/null
+++ b/core/trino-main/src/main/java/io/trino/operator/table/json/JsonTableValueColumn.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.operator.table.json;
+
+import io.trino.json.ir.IrJsonPath;
+import io.trino.metadata.ResolvedFunction;
+
+import static java.util.Objects.requireNonNull;
+
+public record JsonTableValueColumn(
+ int outputIndex,
+ ResolvedFunction function,
+ IrJsonPath path,
+ long emptyBehavior,
+ int emptyDefaultInput, // channel number or -1 when default not specified
+ long errorBehavior,
+ int errorDefaultInput) // channel number or -1 when default not specified
+ implements JsonTableColumn
+{
+ public JsonTableValueColumn
+ {
+ requireNonNull(function, "function is null");
+ requireNonNull(path, "path is null");
+ }
+}
diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/execution/Column.java b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/Column.java
new file mode 100644
index 0000000000000..15eab03d10d33
--- /dev/null
+++ b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/Column.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.operator.table.json.execution;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import io.trino.spi.Page;
+
+public interface Column
+{
+ Object evaluate(long sequentialNumber, JsonNode item, Page input, int position);
+
+ int getOutputIndex();
+}
diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/execution/ExecutionPlanner.java b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/ExecutionPlanner.java
new file mode 100644
index 0000000000000..e6c4879db057a
--- /dev/null
+++ b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/ExecutionPlanner.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.operator.table.json.execution;
+
+import com.google.common.collect.ImmutableList;
+import io.trino.json.JsonPathInvocationContext;
+import io.trino.metadata.FunctionManager;
+import io.trino.metadata.Metadata;
+import io.trino.operator.table.json.JsonTableColumn;
+import io.trino.operator.table.json.JsonTableOrdinalityColumn;
+import io.trino.operator.table.json.JsonTablePlanCross;
+import io.trino.operator.table.json.JsonTablePlanLeaf;
+import io.trino.operator.table.json.JsonTablePlanNode;
+import io.trino.operator.table.json.JsonTablePlanSingle;
+import io.trino.operator.table.json.JsonTablePlanUnion;
+import io.trino.operator.table.json.JsonTableQueryColumn;
+import io.trino.operator.table.json.JsonTableValueColumn;
+import io.trino.spi.connector.ConnectorSession;
+import io.trino.spi.function.InvocationConvention;
+import io.trino.spi.function.ScalarFunctionImplementation;
+import io.trino.spi.type.Type;
+import io.trino.spi.type.TypeManager;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Throwables.throwIfUnchecked;
+import static com.google.common.collect.ImmutableList.toImmutableList;
+import static io.trino.spi.function.InvocationConvention.InvocationArgumentConvention.BOXED_NULLABLE;
+import static io.trino.spi.function.InvocationConvention.InvocationArgumentConvention.NEVER_NULL;
+import static io.trino.spi.function.InvocationConvention.InvocationReturnConvention.NULLABLE_RETURN;
+
+public class ExecutionPlanner
+{
+ private ExecutionPlanner()
+ {
+ }
+
+ public static JsonTableProcessingFragment getExecutionPlan(
+ JsonTablePlanNode plan,
+ Object[] newRow,
+ boolean errorOnError,
+ Type[] outputTypes,
+ ConnectorSession session,
+ Metadata metadata,
+ TypeManager typeManager,
+ FunctionManager functionManager)
+ {
+ if (plan instanceof JsonTablePlanLeaf planLeaf) {
+ return new FragmentLeaf(
+ planLeaf.path(),
+ planLeaf.columns().stream()
+ .map(column -> getColumn(column, outputTypes, session, functionManager))
+ .collect(toImmutableList()),
+ errorOnError,
+ newRow,
+ session,
+ metadata,
+ typeManager,
+ functionManager);
+ }
+ if (plan instanceof JsonTablePlanSingle planSingle) {
+ return new FragmentSingle(
+ planSingle.path(),
+ planSingle.columns().stream()
+ .map(column -> getColumn(column, outputTypes, session, functionManager))
+ .collect(toImmutableList()),
+ errorOnError,
+ planSingle.outer(),
+ getExecutionPlan(planSingle.child(), newRow, errorOnError, outputTypes, session, metadata, typeManager, functionManager),
+ newRow,
+ session,
+ metadata,
+ typeManager,
+ functionManager);
+ }
+ if (plan instanceof JsonTablePlanCross planCross) {
+ return new FragmentCross(planCross.siblings().stream()
+ .map(sibling -> getExecutionPlan(sibling, newRow, errorOnError, outputTypes, session, metadata, typeManager, functionManager))
+ .collect(toImmutableList()));
+ }
+ JsonTablePlanUnion planUnion = (JsonTablePlanUnion) plan;
+ return new FragmentUnion(
+ planUnion.siblings().stream()
+ .map(sibling -> getExecutionPlan(sibling, newRow, errorOnError, outputTypes, session, metadata, typeManager, functionManager))
+ .collect(toImmutableList()),
+ newRow);
+ }
+
+ private static Column getColumn(JsonTableColumn column, Type[] outputTypes, ConnectorSession session, FunctionManager functionManager)
+ {
+ if (column instanceof JsonTableValueColumn valueColumn) {
+ ScalarFunctionImplementation implementation = functionManager.getScalarFunctionImplementation(
+ valueColumn.function(),
+ new InvocationConvention(
+ ImmutableList.of(BOXED_NULLABLE, BOXED_NULLABLE, BOXED_NULLABLE, NEVER_NULL, BOXED_NULLABLE, NEVER_NULL, BOXED_NULLABLE),
+ NULLABLE_RETURN,
+ true,
+ true));
+ JsonPathInvocationContext context;
+ checkArgument(implementation.getInstanceFactory().isPresent(), "instance factory is missing");
+ try {
+ context = (JsonPathInvocationContext) implementation.getInstanceFactory().get().invoke();
+ }
+ catch (Throwable throwable) {
+ throwIfUnchecked(throwable);
+ throw new RuntimeException(throwable);
+ }
+ return new ValueColumn(
+ valueColumn.outputIndex(),
+ implementation.getMethodHandle()
+ .bindTo(context)
+ .bindTo(session),
+ valueColumn.path(),
+ valueColumn.emptyBehavior(),
+ valueColumn.emptyDefaultInput(),
+ valueColumn.errorBehavior(),
+ valueColumn.errorDefaultInput(),
+ outputTypes[valueColumn.outputIndex()]);
+ }
+ if (column instanceof JsonTableQueryColumn queryColumn) {
+ ScalarFunctionImplementation implementation = functionManager.getScalarFunctionImplementation(
+ queryColumn.function(),
+ new InvocationConvention(
+ ImmutableList.of(BOXED_NULLABLE, BOXED_NULLABLE, BOXED_NULLABLE, NEVER_NULL, NEVER_NULL, NEVER_NULL),
+ NULLABLE_RETURN,
+ true,
+ true));
+ JsonPathInvocationContext context;
+ checkArgument(implementation.getInstanceFactory().isPresent(), "instance factory is missing");
+ try {
+ context = (JsonPathInvocationContext) implementation.getInstanceFactory().get().invoke();
+ }
+ catch (Throwable throwable) {
+ throwIfUnchecked(throwable);
+ throw new RuntimeException(throwable);
+ }
+ return new QueryColumn(
+ queryColumn.outputIndex(),
+ implementation.getMethodHandle()
+ .bindTo(context)
+ .bindTo(session),
+ queryColumn.path(),
+ queryColumn.wrapperBehavior(),
+ queryColumn.emptyBehavior(),
+ queryColumn.errorBehavior());
+ }
+ return new OrdinalityColumn(((JsonTableOrdinalityColumn) column).outputIndex());
+ }
+}
diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentCross.java b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentCross.java
new file mode 100644
index 0000000000000..56cbdbe724be0
--- /dev/null
+++ b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentCross.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.operator.table.json.execution;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.google.common.collect.ImmutableList;
+import io.trino.spi.Page;
+
+import java.util.Arrays;
+import java.util.List;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static java.util.Objects.requireNonNull;
+
+public class FragmentCross
+ implements JsonTableProcessingFragment
+{
+ private final List siblings;
+ private final int[] outputLayout;
+
+ private Page input;
+ private int position;
+ private JsonNode currentItem;
+ private int currentSiblingIndex;
+
+ public FragmentCross(List siblings)
+ {
+ this.siblings = ImmutableList.copyOf(siblings);
+ checkArgument(siblings.size() >= 2, "less than 2 siblings in Cross node");
+ this.outputLayout = siblings.stream()
+ .map(JsonTableProcessingFragment::getOutputLayout)
+ .flatMapToInt(Arrays::stream)
+ .toArray();
+ }
+
+ @Override
+ public void reset(JsonNode item, Page input, int position)
+ {
+ this.currentItem = requireNonNull(item, "item is null");
+ this.input = requireNonNull(input, "input is null");
+ this.position = position;
+ siblings.get(0).reset(item, input, position);
+ this.currentSiblingIndex = 0;
+ }
+
+ /**
+ * All values produced by the siblings are stored on corresponding positions in `newRow`. It is a temporary representation of the result row, and is shared by all Fragments.
+ * The values in `newRow` are not cleared between subsequent calls to getRow(), so that the parts which do not change are automatically reused.
+ */
+ @Override
+ public boolean getRow()
+ {
+ while (currentSiblingIndex >= 0) {
+ boolean currentSiblingProducedRow = siblings.get(currentSiblingIndex).getRow();
+ if (currentSiblingProducedRow) {
+ for (int i = currentSiblingIndex + 1; i < siblings.size(); i++) {
+ JsonTableProcessingFragment sibling = siblings.get(i);
+ sibling.reset(currentItem, input, position);
+ boolean siblingProducedRow = sibling.getRow();
+ if (!siblingProducedRow) {
+ // if any sibling is empty, the whole CROSS fragment is empty
+ return false;
+ }
+ }
+ currentSiblingIndex = siblings.size() - 1;
+ return true;
+ }
+
+ // current sibling is finished
+ currentSiblingIndex--;
+ }
+
+ // fragment is finished
+ return false;
+ }
+
+ @Override
+ public int[] getOutputLayout()
+ {
+ return outputLayout;
+ }
+}
diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentLeaf.java b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentLeaf.java
new file mode 100644
index 0000000000000..9a11e63067d41
--- /dev/null
+++ b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentLeaf.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.operator.table.json.execution;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.google.common.collect.ImmutableList;
+import io.trino.json.JsonPathEvaluator;
+import io.trino.json.ir.IrJsonPath;
+import io.trino.metadata.FunctionManager;
+import io.trino.metadata.Metadata;
+import io.trino.spi.Page;
+import io.trino.spi.connector.ConnectorSession;
+import io.trino.spi.type.TypeManager;
+
+import java.util.List;
+
+import static io.trino.operator.table.json.execution.SequenceEvaluator.getSequence;
+import static java.util.Objects.requireNonNull;
+
+public class FragmentLeaf
+ implements JsonTableProcessingFragment
+{
+ private static final Object[] NO_PARAMETERS = new Object[0];
+
+ private final JsonPathEvaluator pathEvaluator;
+ private final List columns;
+ private final boolean errorOnError;
+ private final int[] outputLayout;
+
+ // the place where the computed values (or nulls) are stored while computing an output row
+ private final Object[] newRow;
+
+ private Page input;
+ private int position;
+ private List sequence;
+ private int nextItemIndex;
+
+ public FragmentLeaf(
+ IrJsonPath path,
+ List columns,
+ boolean errorOnError,
+ Object[] newRow,
+ ConnectorSession session,
+ Metadata metadata,
+ TypeManager typeManager,
+ FunctionManager functionManager)
+ {
+ requireNonNull(path, "path is null");
+ this.pathEvaluator = new JsonPathEvaluator(path, session, metadata, typeManager, functionManager);
+ this.columns = ImmutableList.copyOf(columns);
+ this.errorOnError = errorOnError;
+ this.outputLayout = columns.stream()
+ .mapToInt(Column::getOutputIndex)
+ .toArray();
+ this.newRow = requireNonNull(newRow, "newRow is null");
+ }
+
+ @Override
+ public void reset(JsonNode item, Page input, int position)
+ {
+ resetRoot(item, input, position, NO_PARAMETERS);
+ }
+
+ /**
+ * FragmentLeaf can be the root Fragment. The root fragment is the only fragment that may have path parameters.
+ * Prepares the root Fragment to produce rows for the new JSON item and a set of path parameters.
+ */
+ @Override
+ public void resetRoot(JsonNode item, Page input, int position, Object[] pathParameters)
+ {
+ requireNonNull(pathParameters, "pathParameters is null");
+ this.input = requireNonNull(input, "input is null");
+ this.position = position;
+ this.nextItemIndex = 0;
+ this.sequence = getSequence(item, pathParameters, pathEvaluator, errorOnError);
+ }
+
+ @Override
+ public boolean getRow()
+ {
+ if (nextItemIndex >= sequence.size()) {
+ // fragment is finished
+ return false;
+ }
+ JsonNode currentItem = sequence.get(nextItemIndex);
+ nextItemIndex++; // it is correct to pass the updated value to `column.evaluate()` because ordinality numbers are 1-based according to ISO/IEC 9075-2:2016(E) 7.11 p.461 General rules.
+ for (Column column : columns) {
+ newRow[column.getOutputIndex()] = column.evaluate(nextItemIndex, currentItem, input, position);
+ }
+ return true;
+ }
+
+ @Override
+ public int[] getOutputLayout()
+ {
+ return outputLayout;
+ }
+}
diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentSingle.java b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentSingle.java
new file mode 100644
index 0000000000000..d3d285f0658e3
--- /dev/null
+++ b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentSingle.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.operator.table.json.execution;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.google.common.collect.ImmutableList;
+import io.trino.json.JsonPathEvaluator;
+import io.trino.json.ir.IrJsonPath;
+import io.trino.metadata.FunctionManager;
+import io.trino.metadata.Metadata;
+import io.trino.spi.Page;
+import io.trino.spi.connector.ConnectorSession;
+import io.trino.spi.type.TypeManager;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.IntStream;
+
+import static io.trino.operator.table.json.execution.SequenceEvaluator.getSequence;
+import static java.util.Objects.requireNonNull;
+
+public class FragmentSingle
+ implements JsonTableProcessingFragment
+{
+ private static final Object[] NO_PARAMETERS = new Object[] {};
+
+ private final JsonPathEvaluator pathEvaluator;
+ private final List columns;
+ private final boolean errorOnError;
+ private final boolean outer;
+ private final JsonTableProcessingFragment child;
+ private final int[] outputLayout;
+
+ // the place where the computed values (or nulls) are stored while computing an output row
+ private final Object[] newRow;
+
+ private Page input;
+ private int position;
+ private List sequence;
+ private int nextItemIndex;
+
+ // start processing next item from the sequence
+ private boolean processNextItem;
+
+ // indicates if we need to produce null-padded row for OUTER
+ private boolean childAlreadyProduced;
+
+ public FragmentSingle(
+ IrJsonPath path,
+ List columns,
+ boolean errorOnError,
+ boolean outer,
+ JsonTableProcessingFragment child,
+ Object[] newRow,
+ ConnectorSession session,
+ Metadata metadata,
+ TypeManager typeManager,
+ FunctionManager functionManager)
+ {
+ requireNonNull(path, "path is null");
+ this.pathEvaluator = new JsonPathEvaluator(path, session, metadata, typeManager, functionManager);
+ this.columns = ImmutableList.copyOf(columns);
+ this.errorOnError = errorOnError;
+ this.outer = outer;
+ this.child = requireNonNull(child, "child is null");
+ this.outputLayout = IntStream.concat(
+ columns.stream()
+ .mapToInt(Column::getOutputIndex),
+ Arrays.stream(child.getOutputLayout()))
+ .toArray();
+ this.newRow = requireNonNull(newRow, "newRow is null");
+ }
+
+ @Override
+ public void reset(JsonNode item, Page input, int position)
+ {
+ resetRoot(item, input, position, NO_PARAMETERS);
+ }
+
+ /**
+ * FragmentSingle can be the root Fragment. The root fragment is the only fragment that may have path parameters.
+ * Prepares the root Fragment to produce rows for the new JSON item and a set of path parameters.
+ */
+ @Override
+ public void resetRoot(JsonNode item, Page input, int position, Object[] pathParameters)
+ {
+ requireNonNull(pathParameters, "pathParameters is null");
+ this.input = requireNonNull(input, "input is null");
+ this.position = position;
+ this.nextItemIndex = 0;
+ this.processNextItem = true;
+ this.sequence = getSequence(item, pathParameters, pathEvaluator, errorOnError);
+ }
+
+ /**
+ * All values produced by the columns are stored on corresponding positions in `newRow`.
+ * The values in `newRow` are not cleared between subsequent calls to `getRow()`, so the values for columns are automatically reused during iterating over child.
+ */
+ @Override
+ public boolean getRow()
+ {
+ while (true) {
+ if (processNextItem) {
+ if (nextItemIndex >= sequence.size()) {
+ // fragment is finished
+ return false;
+ }
+ JsonNode currentItem = sequence.get(nextItemIndex);
+ nextItemIndex++; // it is correct to pass the updated value to `column.evaluate()` because ordinality numbers are 1-based according to ISO/IEC 9075-2:2016(E) 7.11 p.461 General rules.
+ for (Column column : columns) {
+ newRow[column.getOutputIndex()] = column.evaluate(nextItemIndex, currentItem, input, position);
+ }
+ child.reset(currentItem, input, position);
+ childAlreadyProduced = false;
+ processNextItem = false;
+ }
+
+ boolean childProducedRow = child.getRow();
+ if (childProducedRow) {
+ childAlreadyProduced = true;
+ return true;
+ }
+
+ // child is finished
+ processNextItem = true;
+ if (outer && !childAlreadyProduced) {
+ appendNulls(child);
+ return true;
+ }
+ }
+ }
+
+ private void appendNulls(JsonTableProcessingFragment fragment)
+ {
+ for (int column : fragment.getOutputLayout()) {
+ newRow[column] = null;
+ }
+ }
+
+ @Override
+ public int[] getOutputLayout()
+ {
+ return outputLayout;
+ }
+}
diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentUnion.java b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentUnion.java
new file mode 100644
index 0000000000000..30ae142f9dfad
--- /dev/null
+++ b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/FragmentUnion.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.operator.table.json.execution;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.google.common.collect.ImmutableList;
+import io.trino.spi.Page;
+
+import java.util.Arrays;
+import java.util.List;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static java.util.Objects.requireNonNull;
+
+public class FragmentUnion
+ implements JsonTableProcessingFragment
+{
+ private final List siblings;
+ private final int[] outputLayout;
+
+ // the place where the computed values (or nulls) are stored while computing an output row
+ private final Object[] newRow;
+
+ private int currentSiblingIndex;
+
+ public FragmentUnion(List siblings, Object[] newRow)
+ {
+ this.siblings = ImmutableList.copyOf(siblings);
+ checkArgument(siblings.size() >= 2, "less than 2 siblings in Union node");
+ this.outputLayout = siblings.stream()
+ .map(JsonTableProcessingFragment::getOutputLayout)
+ .flatMapToInt(Arrays::stream)
+ .toArray();
+ this.newRow = requireNonNull(newRow, "newRow is null");
+ }
+
+ @Override
+ public void reset(JsonNode item, Page input, int position)
+ {
+ requireNonNull(item, "item is null");
+ requireNonNull(input, "input is null");
+ siblings.stream()
+ .forEach(sibling -> sibling.reset(item, input, position));
+ this.currentSiblingIndex = 0;
+ appendNulls(this);
+ }
+
+ /**
+ * The values produced by the current sibling are stored on corresponding positions in `newRow`, and for other siblings `newRow` is filled with nulls.
+ * The values in `newRow` are not cleared between subsequent calls to getRow(), so that the parts which do not change are automatically reused.
+ */
+ @Override
+ public boolean getRow()
+ {
+ while (true) {
+ if (currentSiblingIndex >= siblings.size()) {
+ // fragment is finished
+ return false;
+ }
+
+ JsonTableProcessingFragment currentSibling = siblings.get(currentSiblingIndex);
+ boolean currentSiblingProducedRow = currentSibling.getRow();
+ if (currentSiblingProducedRow) {
+ return true;
+ }
+
+ // current sibling is finished
+ appendNulls(currentSibling);
+ currentSiblingIndex++;
+ }
+ }
+
+ private void appendNulls(JsonTableProcessingFragment fragment)
+ {
+ for (int column : fragment.getOutputLayout()) {
+ newRow[column] = null;
+ }
+ }
+
+ @Override
+ public int[] getOutputLayout()
+ {
+ return outputLayout;
+ }
+}
diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/execution/JsonTableProcessingFragment.java b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/JsonTableProcessingFragment.java
new file mode 100644
index 0000000000000..bfe518b41036c
--- /dev/null
+++ b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/JsonTableProcessingFragment.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.operator.table.json.execution;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import io.trino.spi.Page;
+
+public interface JsonTableProcessingFragment
+{
+ /**
+ * Prepares the Fragment to produce rows for the new JSON item.
+ * Note: This method must be called for each new JSON item. Due to nesting, there might be multiple JSON items to process for a single position in the input page.
+ * Therefore, input and position may not change for subsequent calls.
+ *
+ * @param item the new JSON item
+ * @param input the input Page currently processed by json_table function
+ * @param position the currently processed position in the input page
+ */
+ void reset(JsonNode item, Page input, int position);
+
+ /**
+ * Prepares the root Fragment to produce rows for the new JSON item and new set of path parameters.
+ * Note: at the root level, there is one JSON item and one set of path parameters to process for each position in the input page.
+ *
+ * @param item the new JSON item
+ * @param input the input Page currently processed by json_table function
+ * @param position the currently processed position in the input page
+ * @param pathParameters JSON path parameters for the top-level JSON path
+ */
+ default void resetRoot(JsonNode item, Page input, int position, Object[] pathParameters)
+ {
+ throw new IllegalStateException("not the root fragment");
+ }
+
+ /**
+ * Tries to produce output values for all columns included in the Fragment,
+ * and stores them in corresponding positions in `newRow`.
+ * Note: According to OUTER or UNION semantics, some values might be null-padded instead of computed.
+ * Note: a single JSON item might result in multiple output rows. To fully process a JSON item, the caller must:
+ * - reset the Fragment with the JSON item
+ * - call getRow() and collect output rows as long as `true` is returned
+ * If `false` is returned, there is no output row available, and the JSON item is fully processed
+ *
+ * @return true if row was produced, false if row was not produced (Fragment is finished)
+ */
+ boolean getRow();
+
+ /**
+ * Returns an array containing indexes of columns produced by the fragment within all columns produced by json_table.
+ */
+ int[] getOutputLayout();
+}
diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/execution/OrdinalityColumn.java b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/OrdinalityColumn.java
new file mode 100644
index 0000000000000..d26479ecf9e41
--- /dev/null
+++ b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/OrdinalityColumn.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.operator.table.json.execution;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import io.trino.spi.Page;
+
+public class OrdinalityColumn
+ implements Column
+{
+ private final int outputIndex;
+
+ public OrdinalityColumn(int outputIndex)
+ {
+ this.outputIndex = outputIndex;
+ }
+
+ @Override
+ public Object evaluate(long sequentialNumber, JsonNode item, Page input, int position)
+ {
+ return sequentialNumber;
+ }
+
+ @Override
+ public int getOutputIndex()
+ {
+ return outputIndex;
+ }
+}
diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/execution/QueryColumn.java b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/QueryColumn.java
new file mode 100644
index 0000000000000..613ec5c41db39
--- /dev/null
+++ b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/QueryColumn.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.operator.table.json.execution;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import io.trino.json.ir.IrJsonPath;
+import io.trino.spi.Page;
+
+import java.lang.invoke.MethodHandle;
+
+import static com.google.common.base.Throwables.throwIfUnchecked;
+import static java.util.Objects.requireNonNull;
+
+public class QueryColumn
+ implements Column
+{
+ private final int outputIndex;
+ private final MethodHandle methodHandle;
+ private final IrJsonPath path;
+ private final long wrapperBehavior;
+ private final long emptyBehavior;
+ private final long errorBehavior;
+
+ public QueryColumn(int outputIndex, MethodHandle methodHandle, IrJsonPath path, long wrapperBehavior, long emptyBehavior, long errorBehavior)
+ {
+ this.outputIndex = outputIndex;
+ this.methodHandle = requireNonNull(methodHandle, "methodHandle is null");
+ this.path = requireNonNull(path, "path is null");
+ this.wrapperBehavior = wrapperBehavior;
+ this.emptyBehavior = emptyBehavior;
+ this.errorBehavior = errorBehavior;
+ }
+
+ @Override
+ public Object evaluate(long sequentialNumber, JsonNode item, Page input, int position)
+ {
+ try {
+ return methodHandle.invoke(item, path, null, wrapperBehavior, emptyBehavior, errorBehavior);
+ }
+ catch (Throwable throwable) {
+ // According to ISO/IEC 9075-2:2016(E) 7.11 p.462 General rules 1) e) ii) 3) D) any exception thrown by column evaluation should be propagated.
+ throwIfUnchecked(throwable);
+ throw new RuntimeException(throwable);
+ }
+ }
+
+ @Override
+ public int getOutputIndex()
+ {
+ return outputIndex;
+ }
+}
diff --git a/core/trino-main/src/main/java/io/trino/operator/table/json/execution/SequenceEvaluator.java b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/SequenceEvaluator.java
new file mode 100644
index 0000000000000..32b4fe0b9389c
--- /dev/null
+++ b/core/trino-main/src/main/java/io/trino/operator/table/json/execution/SequenceEvaluator.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.operator.table.json.execution;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.google.common.collect.ImmutableList;
+import io.trino.json.JsonPathEvaluator;
+import io.trino.json.PathEvaluationException;
+import io.trino.json.ir.TypedValue;
+import io.trino.operator.scalar.json.JsonOutputConversionException;
+
+import java.util.List;
+import java.util.Optional;
+
+import static com.google.common.base.Preconditions.checkState;
+import static io.trino.json.JsonInputErrorNode.JSON_ERROR;
+import static io.trino.json.ir.SqlJsonLiteralConverter.getJsonNode;
+import static java.lang.String.format;
+
+public class SequenceEvaluator
+{
+ private SequenceEvaluator()
+ {
+ }
+
+ // creates a sequence of JSON items, and applies error handling
+ public static List getSequence(JsonNode item, Object[] pathParameters, JsonPathEvaluator pathEvaluator, boolean errorOnError)
+ {
+ if (item == null) {
+ // According to ISO/IEC 9075-2:2016(E) 7.11 p.461 General rules 1) a) empty table should be returned for null input. Empty sequence will result in an empty table.
+ return ImmutableList.of();
+ }
+ // According to ISO/IEC 9075-2:2016(E) 7.11 p.461 General rules 1) e) exception thrown by path evaluation should be handled accordingly to json_table error behavior (ERROR or EMPTY).
+ // handle input conversion error for the context item
+ if (item.equals(JSON_ERROR)) {
+ checkState(!errorOnError, "input conversion error should have been thrown in the input function");
+ // the error behavior is EMPTY ON ERROR. Empty sequence will result in an empty table.
+ return ImmutableList.of();
+ }
+ // handle input conversion error for the path parameters
+ for (Object parameter : pathParameters) {
+ if (parameter.equals(JSON_ERROR)) {
+ checkState(!errorOnError, "input conversion error should have been thrown in the input function");
+ // the error behavior is EMPTY ON ERROR. Empty sequence will result in an empty table.
+ return ImmutableList.of();
+ }
+ }
+ // evaluate path into a sequence
+ List