From dfc9eb909e374cf369318138135e43c872c4dd40 Mon Sep 17 00:00:00 2001 From: ddebowczyk92 Date: Fri, 13 Sep 2024 15:33:46 +0200 Subject: [PATCH] Dataplex transport documentation (#3083) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Dominik Dębowczyk --- .../client/java/partials/java_transport.md | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/website/docs/client/java/partials/java_transport.md b/website/docs/client/java/partials/java_transport.md index 105293eb27..e9552921d7 100644 --- a/website/docs/client/java/partials/java_transport.md +++ b/website/docs/client/java/partials/java_transport.md @@ -700,3 +700,93 @@ OpenLineageClient client = OpenLineageClient.builder() + +### [Dataplex](https://github.com/OpenLineage/OpenLineage/blob/main/client/transports-dataplex/src/main/java/io/openlineage/client/transports/dataplex/DataplexTransport.java) + +To use this transport in your project, you need to include `io.openlineage:transports-dataplex` artifact in +your build configuration. This is particularly important for environments like `Spark`, where this transport must be on +the classpath for lineage events to be emitted correctly. + +#### Configuration + +- `type` - string, must be `"dataplex"`. Required. +- `endpoint` - string, specifies the endpoint to which events are sent, default value is + `datalineage.googleapis.com:443`. Optional. +- `projectId` - string, the project quota identifier. If not provided, it is determined based on user credentials. + Optional. +- `location` - string, [Dataplex location](https://cloud.google.com/dataplex/docs/locations). Optional, default: + `"us"`. +- `credentialsFile` - string, path + to + the [Service Account credentials JSON file](https://developers.google.com/workspace/guides/create-credentials#create_credentials_for_a_service_account). + Optional, if not + provided [Application Default Credentials](https://cloud.google.com/docs/authentication/application-default-credentials) + are used +- `mode` - enum that specifies the type of client used for publishing OpenLineage events to Dataplex. Possible values: + `sync` (synchronous) or `async` (asynchronous). Optional, default: `async`. + +#### Behavior + +- Events are serialized to JSON, included as part of a `gRPC` request, and then dispatched to the `Dataplex` endpoint. +- Depending on the `mode` chosen, requests are sent using either a synchronous or asynchronous client. + +#### Examples + + + + +```yaml +transport: + type: dataplex + projectId: your_gcp_project_id + location: us + mode: sync + credentialsFile: path/to/credentials.json +``` + + + + +```ini +spark.openlineage.transport.type=dataplex +spark.openlineage.transport.projectId=your_gcp_project_id +spark.openlineage.transport.location=us +spark.openlineage.transport.mode=sync +spark.openlineage.transport.credentialsFile=path/to/credentials.json +``` + + + + +```ini +openlineage.transport.type=dataplex +openlineage.transport.projectId=your_gcp_project_id +openlineage.transport.location=us +openlineage.transport.mode=sync +openlineage.transport.credentialsFile=path/to/credentials.json +``` + + + + +```java +import io.openlineage.client.OpenLineageClient; +import io.openlineage.client.transports.dataplex.DataplexConfig; +import io.openlineage.client.transports.dataplex.DataplexTransport; + + +DataplexConfig dataplexConfig = new DataplexConfig(); + +dataplexConfig.setProjectId("your_kinesis_stream_name"); +dataplexConfig.setLocation("your_aws_region"); +dataplexConfig.setMode("sync"); +dataplexConfig.setCredentialsFile("path/to/credentials.json"); + +OpenLineageClient client = OpenLineageClient.builder() + .transport( + new DataplexTransport(dataplexConfig)) + .build(); +``` + + + \ No newline at end of file