Skip to content

Commit

Permalink
Introduce JDBC2 version store type, deprecate JDBC version store …
Browse files Browse the repository at this point in the history
…type

The current `JDBC` version store type uses a lot of columns in the `objs` table, which cause quite some overhead in storage. The new `JDBC2` version store type works like `JDBC`, but uses a single column for all object values and also serializes object-ids as binary values.
  • Loading branch information
snazy committed Aug 17, 2024
1 parent 142929d commit 3c452b4
Show file tree
Hide file tree
Showing 87 changed files with 3,654 additions and 59 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,17 @@ as necessary. Empty sections will not end in the release notes.
- Nessie commit author(s) and "signed off by" can now be configured for both Nessie clients and Iceberg
REST clients. More info on
[projectnessie.org](https://projectnessie.org/guides/iceberg-rest/#customizing-nessie-commit-author-et-al).
- Introduce new `JDBC2` version store type, which is has the same functionality as the `JDBC` version
store type, but uses way less columns, which reduces storage overhead for example in PostgreSQL a lot.

### Changes

### Deprecations

- The current version store type `JDBC` is deprecated, please migrate to the new `JDBC2` version store
type. Please use the [Nessie Server Admin Tool](https://projectnessie.org/nessie-latest/export_import)
to migrate from the `JDBC` version store type to `JDBC2`.

### Fixes

### Commits
Expand Down
2 changes: 2 additions & 0 deletions bom/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ dependencies {
api(project(":nessie-versioned-storage-inmemory-tests"))
api(project(":nessie-versioned-storage-jdbc"))
api(project(":nessie-versioned-storage-jdbc-tests"))
api(project(":nessie-versioned-storage-jdbc2"))
api(project(":nessie-versioned-storage-jdbc2-tests"))
api(project(":nessie-versioned-storage-mongodb"))
api(project(":nessie-versioned-storage-mongodb-tests"))
api(project(":nessie-versioned-storage-rocksdb"))
Expand Down
2 changes: 2 additions & 0 deletions gradle/projects.main.properties
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ nessie-versioned-storage-inmemory=versioned/storage/inmemory
nessie-versioned-storage-inmemory-tests=versioned/storage/inmemory-tests
nessie-versioned-storage-jdbc=versioned/storage/jdbc
nessie-versioned-storage-jdbc-tests=versioned/storage/jdbc-tests
nessie-versioned-storage-jdbc2=versioned/storage/jdbc2
nessie-versioned-storage-jdbc2-tests=versioned/storage/jdbc2-tests
nessie-versioned-storage-mongodb=versioned/storage/mongodb
nessie-versioned-storage-mongodb-tests=versioned/storage/mongodb-tests
nessie-versioned-storage-rocksdb=versioned/storage/rocksdb
Expand Down
7 changes: 2 additions & 5 deletions helm/nessie/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,8 @@ imagePullSecrets: []
# `quarkus.log.category."io.smallrye.config".level: DEBUG`
logLevel: INFO

# -- Which type of version store to use: IN_MEMORY, ROCKSDB, DYNAMODB, MONGODB, CASSANDRA, JDBC, BIGTABLE.
# (Legacy version store types are: INMEMORY, ROCKS, DYNAMO, MONGO, TRANSACTIONAL. If you are using
# one of these legacy version store types, migrate your existing repositories to the new version
# store types using the nessie-server-admin-tool's export/import functionality; support for these
# legacy version store types has been removed in Nessie 0.75.0.)
# -- Which type of version store to use: IN_MEMORY, ROCKSDB, DYNAMODB, MONGODB, CASSANDRA, JDBC2, BIGTABLE.
# Note: the version store type JDBC is deprecated, please use the Nessie Server Admin Tool to migrate to JDBC2.
versionStoreType: IN_MEMORY

# Cassandra settings. Only required when using CASSANDRA version store type; ignored otherwise.
Expand Down
2 changes: 1 addition & 1 deletion servers/jax-rs-tests/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ dependencies {
compileOnly("com.fasterxml.jackson.core:jackson-annotations")

testImplementation(project(":nessie-versioned-storage-inmemory-tests"))
testImplementation(project(":nessie-versioned-storage-jdbc-tests"))
testImplementation(project(":nessie-versioned-storage-jdbc2-tests"))
testRuntimeOnly(libs.agroal.pool)

testImplementation(project(":nessie-jaxrs-testextension"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
*/
package org.projectnessie.jaxrs.tests;

import org.projectnessie.versioned.storage.jdbctests.H2BackendTestFactory;
import org.projectnessie.versioned.storage.jdbc2tests.H2BackendTestFactory;
import org.projectnessie.versioned.storage.testextension.NessieBackend;

@NessieBackend(H2BackendTestFactory.class)
Expand Down
1 change: 1 addition & 0 deletions servers/quarkus-common/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ dependencies {
implementation(project(":nessie-versioned-storage-dynamodb"))
implementation(project(":nessie-versioned-storage-inmemory"))
implementation(project(":nessie-versioned-storage-jdbc"))
implementation(project(":nessie-versioned-storage-jdbc2"))
implementation(project(":nessie-versioned-storage-mongodb"))
implementation(project(":nessie-versioned-storage-rocksdb"))
implementation(project(":nessie-versioned-storage-store"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@
import io.smallrye.config.ConfigMapping;
import io.smallrye.config.WithName;
import java.util.Optional;
import org.projectnessie.versioned.storage.jdbc.JdbcBackendBaseConfig;
import org.projectnessie.versioned.storage.jdbc2.Jdbc2BackendBaseConfig;

/**
* Setting {@code nessie.version.store.type=JDBC} enables transactional/RDBMS as the version store
* Setting {@code nessie.version.store.type=JDBC2} enables transactional/RDBMS as the version store
* used by the Nessie server.
*
* <p>Configuration of the datastore will be done by Quarkus and depends on many factors, such as
Expand All @@ -34,7 +34,7 @@
* <p>For example, to configure a PostgresQL connection, the following configuration should be used:
*
* <ul>
* <li>{@code nessie.version.store.type=JDBC}
* <li>{@code nessie.version.store.type=JDBC2}
* <li>{@code nessie.version.store.persist.jdbc.datasource=postgresql}
* <li>{@code quarkus.datasource.postgresql.jdbc.url=jdbc:postgresql://localhost:5432/my_database}
* <li>{@code quarkus.datasource.postgresql.username=<your username>}
Expand All @@ -46,7 +46,7 @@
* <p>To connect to a MariaDB database instead, the following configuration should be used:
*
* <ul>
* <li>{@code nessie.version.store.type=JDBC}
* <li>{@code nessie.version.store.type=JDBC2}
* <li>{@code nessie.version.store.persist.jdbc.datasource=mariadb}
* <li>{@code quarkus.datasource.mariadb.jdbc.url=jdbc:mariadb://localhost:3306/my_database}
* <li>{@code quarkus.datasource.mariadb.username=<your username>}
Expand All @@ -57,7 +57,7 @@
* <p>To connect to a MySQL database instead, the following configuration should be used:
*
* <ul>
* <li>{@code nessie.version.store.type=JDBC}
* <li>{@code nessie.version.store.type=JDBC2}
* <li>{@code nessie.version.store.persist.jdbc.datasource=mysql}
* <li>{@code quarkus.datasource.mysql.jdbc.url=jdbc:mysql://localhost:3306/my_database}
* <li>{@code quarkus.datasource.mysql.username=<your username>}
Expand All @@ -69,7 +69,7 @@
* H2 is not recommended for production):
*
* <ul>
* <li>{@code nessie.version.store.type=JDBC}
* <li>{@code nessie.version.store.type=JDBC2}
* <li>{@code nessie.version.store.persist.jdbc.datasource=h2}
* </ul>
*
Expand All @@ -81,7 +81,7 @@
*/
@StaticInitSafe
@ConfigMapping(prefix = "nessie.version.store.persist.jdbc")
public interface QuarkusJdbcConfig extends JdbcBackendBaseConfig {
public interface QuarkusJdbcConfig extends Jdbc2BackendBaseConfig {

/**
* The name of the datasource to use. Must correspond to a configured datasource under {@code
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ public ConfigValue getValue(ConfigSourceInterceptorContext context, String name)
}

static boolean isDataSourceActive(ConfigSourceInterceptorContext context, String name) {
return versionStoreType(context) == VersionStoreType.JDBC
VersionStoreType type = versionStoreType(context);
return (type == VersionStoreType.JDBC || type == VersionStoreType.JDBC2)
&& dataSourceName(name).equals(activeDataSourceName(context));
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/*
* Copyright (C) 2022 Dremio
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.projectnessie.quarkus.providers.storage;

import static org.projectnessie.quarkus.config.VersionStoreConfig.VersionStoreType.JDBC2;

import io.quarkus.agroal.runtime.UnconfiguredDataSource;
import io.quarkus.arc.All;
import io.quarkus.arc.InstanceHandle;
import io.quarkus.datasource.common.runtime.DatabaseKind;
import io.quarkus.datasource.runtime.DataSourceBuildTimeConfig;
import io.quarkus.datasource.runtime.DataSourcesBuildTimeConfig;
import jakarta.annotation.PostConstruct;
import jakarta.enterprise.context.Dependent;
import jakarta.inject.Inject;
import java.util.List;
import javax.sql.DataSource;
import org.projectnessie.quarkus.config.QuarkusJdbcConfig;
import org.projectnessie.quarkus.providers.versionstore.StoreType;
import org.projectnessie.versioned.storage.common.persist.Backend;
import org.projectnessie.versioned.storage.jdbc2.Jdbc2BackendConfig;
import org.projectnessie.versioned.storage.jdbc2.Jdbc2BackendFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@StoreType(JDBC2)
@Dependent
public class Jdbc2BackendBuilder implements BackendBuilder {

/**
* The name of the default datasource. Corresponds to the default map key in {@link
* DataSourcesBuildTimeConfig#dataSources()}.
*/
public static final String DEFAULT_DATA_SOURCE_NAME = "<default>";

private static final Logger LOGGER = LoggerFactory.getLogger(Jdbc2BackendBuilder.class);

@Inject DataSourcesBuildTimeConfig dataSourcesConfig;

@Inject
@All
@SuppressWarnings("CdiInjectionPointsInspection")
List<InstanceHandle<DataSource>> dataSources;

@Inject QuarkusJdbcConfig config;

@PostConstruct
public void checkDataSourcesConfiguration() {
dataSourcesConfig.dataSources().forEach(this::checkDatabaseKind);
}

@Override
public Backend buildBackend() {
DataSource dataSource = selectDataSource();
Jdbc2BackendConfig c = Jdbc2BackendConfig.builder().from(config).dataSource(dataSource).build();
return new Jdbc2BackendFactory().buildBackend(c);
}

public static String unquoteDataSourceName(String dataSourceName) {
if (dataSourceName.startsWith("\"") && dataSourceName.endsWith("\"")) {
dataSourceName = dataSourceName.substring(1, dataSourceName.length() - 1);
}
return dataSourceName;
}

private void checkDatabaseKind(String dataSourceName, DataSourceBuildTimeConfig config) {
if (config.dbKind().isEmpty()) {
throw new IllegalArgumentException(
"Database kind not configured for datasource " + dataSourceName);
}
String databaseKind = config.dbKind().get();
if (!DatabaseKind.isPostgreSQL(databaseKind)
&& !DatabaseKind.isH2(databaseKind)
&& !DatabaseKind.isMariaDB(databaseKind)) {
throw new IllegalArgumentException(
"Database kind for datasource "
+ dataSourceName
+ " is configured to '"
+ databaseKind
+ "', which Nessie does not support yet; "
+ "currently PostgreSQL, H2, MariaDB (and MySQL via MariaDB driver) are supported. "
+ "Feel free to raise a pull request to support your database of choice.");
}
}

private DataSource selectDataSource() {
String dataSourceName =
config
.datasourceName()
.map(Jdbc2BackendBuilder::unquoteDataSourceName)
.orElse(DEFAULT_DATA_SOURCE_NAME);
DataSource dataSource = findDataSourceByName(dataSourceName);
if (dataSource instanceof UnconfiguredDataSource e) {
e.throwException();
}
if (dataSourceName.equals(DEFAULT_DATA_SOURCE_NAME)) {
LOGGER.warn(
"Using legacy datasource configuration under quarkus.datasource.*: "
+ "please migrate to quarkus.datasource.postgresql.* and "
+ "set nessie.version.store.persist.jdbc.datasource=postgresql");
} else {
LOGGER.info("Selected datasource: {}", dataSourceName);
}
return dataSource;
}

private DataSource findDataSourceByName(String dataSourceName) {
for (InstanceHandle<DataSource> handle : dataSources) {
String name = handle.getBean().getName();
name = name == null ? DEFAULT_DATA_SOURCE_NAME : unquoteDataSourceName(name);
if (name.equals(dataSourceName)) {
return handle.get();
}
}
throw new IllegalStateException("No datasource configured with name: " + dataSourceName);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,11 @@ public void checkDataSourcesConfiguration() {
@Override
public Backend buildBackend() {
DataSource dataSource = selectDataSource();
JdbcBackendConfig c = JdbcBackendConfig.builder().from(config).dataSource(dataSource).build();
JdbcBackendConfig c =
JdbcBackendConfig.builder()
.datasourceName(config.datasourceName())
.dataSource(dataSource)
.build();
return new JdbcBackendFactory().buildBackend(c);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,11 @@ enum VersionStoreType {
DYNAMODB,
MONGODB,
CASSANDRA,
/** JDBC variant using many distinct columns. */
JDBC,
BIGTABLE
/** JDBC variant using few columns, saves storage overhead for example in PostgreSQL. */
JDBC2,
BIGTABLE,
}

/** Sets which type of version store to use by Nessie. */
Expand Down
1 change: 1 addition & 0 deletions servers/quarkus-server/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ dependencies {
implementation(project(":nessie-versioned-spi"))
implementation(project(":nessie-notice"))
implementation(project(":nessie-versioned-storage-jdbc"))
implementation(project(":nessie-versioned-storage-jdbc2"))
implementation(libs.nessie.ui)

// Nessie internal Quarkus extension, currently only disables "non-indexed classes" (Jandex)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,15 @@ public class ConfigChecks {
@Inject QuarkusJdbcConfig jdbcConfig;

public void configCheck(@Observes StartupEvent event) {
if (versionStoreConfig.getVersionStoreType() == VersionStoreConfig.VersionStoreType.IN_MEMORY) {
VersionStoreConfig.VersionStoreType versionStoreType = versionStoreConfig.getVersionStoreType();
if (versionStoreType == VersionStoreConfig.VersionStoreType.IN_MEMORY) {
LOGGER.warn(
"Configured version store type IN_MEMORY is only for testing purposes and experimentation, not for production use. "
+ "Data will be lost when the process is shut down. "
+ "Recommended action: Use a supported database, see https://projectnessie.org/nessie-latest/configuration/");
}
if (versionStoreConfig.getVersionStoreType() == VersionStoreConfig.VersionStoreType.JDBC) {
if (versionStoreType == VersionStoreConfig.VersionStoreType.JDBC
|| versionStoreType == VersionStoreConfig.VersionStoreType.JDBC2) {
if (jdbcConfig.datasourceName().isPresent()
&& jdbcConfig.datasourceName().get().equalsIgnoreCase("h2")) {
LOGGER.warn(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,8 @@ nessie.server.send-stacktrace-to-client=false
# nessie.server.authorization.rules.allow_listing_reflog=\
# op=='VIEW_REFLOG' && role=='admin_user'

### which type of version store to use: IN_MEMORY, ROCKSDB, DYNAMODB, MONGODB, CASSANDRA, JDBC, BIGTABLE.
# Note: legacy configuration in `nessie.version.store.advanced` is _not_ applied to the version
# store types above. Use the config options starting with `nessie.version.store.persist`.
#
# Legacy version store types: INMEMORY, ROCKS, DYNAMO, MONGO, TRANSACTIONAL. If you are using
# one of these legacy version store types, migrate your existing repositories to the new version
# store types using the nessie-server-admin-tool's export/import functionality.
### which type of version store to use: IN_MEMORY, ROCKSDB, DYNAMODB, MONGODB, CASSANDRA, JDBC2, BIGTABLE.
# Note: the version store type JDBC is deprecated, please use the Nessie Server Admin Tool to migrate to JDBC2.
nessie.version.store.type=IN_MEMORY

# Object cache size as a value relative to the JVM's max heap size. The `cache-capacity-fraction-adjust-mb`
Expand Down
1 change: 1 addition & 0 deletions servers/quarkus-tests/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ dependencies {
implementation(project(":nessie-versioned-storage-cassandra-tests"))
implementation(project(":nessie-versioned-storage-dynamodb-tests"))
implementation(project(":nessie-versioned-storage-jdbc-tests"))
implementation(project(":nessie-versioned-storage-jdbc2-tests"))
implementation(project(":nessie-versioned-storage-mongodb-tests"))
implementation(project(":nessie-versioned-storage-rocksdb-tests"))
implementation(project(":nessie-versioned-storage-testextension"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import io.quarkus.test.common.QuarkusTestResourceLifecycleManager;
import java.util.Map;
import java.util.Optional;
import org.projectnessie.versioned.storage.jdbctests.MariaDBBackendTestFactory;
import org.projectnessie.versioned.storage.jdbc2tests.MariaDBBackendTestFactory;

public class MariaDBTestResourceLifecycleManager
implements QuarkusTestResourceLifecycleManager, DevServicesContext.ContextAware {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import org.projectnessie.versioned.storage.jdbctests.MySQLBackendTestFactory;
import org.projectnessie.versioned.storage.jdbc2tests.MySQLBackendTestFactory;

public class MySQLTestResourceLifecycleManager
implements QuarkusTestResourceLifecycleManager, DevServicesContext.ContextAware {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import io.quarkus.test.common.QuarkusTestResourceLifecycleManager;
import java.util.Map;
import java.util.Optional;
import org.projectnessie.versioned.storage.jdbctests.PostgreSQLBackendTestFactory;
import org.projectnessie.versioned.storage.jdbc2tests.PostgreSQLBackendTestFactory;

public class PostgresTestResourceLifecycleManager
implements QuarkusTestResourceLifecycleManager, DevServicesContext.ContextAware {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
*/
package org.projectnessie.quarkus.tests.profiles;

import static org.projectnessie.quarkus.config.VersionStoreConfig.VersionStoreType.JDBC;
import static org.projectnessie.quarkus.config.VersionStoreConfig.VersionStoreType.JDBC2;

import com.google.common.collect.ImmutableMap;
import java.util.Map;
Expand All @@ -26,7 +26,7 @@ public class QuarkusTestProfilePersistH2 extends BaseConfigProfile {
public Map<String, String> getConfigOverrides() {
return ImmutableMap.<String, String>builder()
.putAll(super.getConfigOverrides())
.put("nessie.version.store.type", JDBC.name())
.put("nessie.version.store.type", JDBC2.name())
.put("nessie.version.store.persist.jdbc.datasource", "h2")
.build();
}
Expand Down
Loading

0 comments on commit 3c452b4

Please sign in to comment.