From 2c16fabd7fd1ed626afcd812d04be1aaced1be04 Mon Sep 17 00:00:00 2001 From: anmunoz Date: Mon, 7 Mar 2022 02:01:54 +0100 Subject: [PATCH] Update dockerfile and documentation --- docs/processors_catalogue/ngsi_ckan_sink.md | 128 +++++++++--------- .../upadate_ckan_metadata.md | 22 +-- docs/quick_start_guide.md | 4 +- docs/use_cases/ckan_publication.md | 32 ++--- nifi-ngsi-resources/docker-compose.yml | 14 +- nifi-ngsi-resources/docker/Dockerfile | 15 +- 6 files changed, 109 insertions(+), 106 deletions(-) diff --git a/docs/processors_catalogue/ngsi_ckan_sink.md b/docs/processors_catalogue/ngsi_ckan_sink.md index 40a6a1b..02fc7ad 100644 --- a/docs/processors_catalogue/ngsi_ckan_sink.md +++ b/docs/processors_catalogue/ngsi_ckan_sink.md @@ -1,25 +1,25 @@ # NGSIToCKAN Content: -- [Functionality](#section1) - - [Mapping NGSI-LD events to `NGSI-LDEvent` objects](#section1.1) - - [Mapping `NGSI-LDEvents` to CKAN data structures](#section1.2) - - [Organizations naming conventions](#section1.2.1) - - [Package/dataset naming conventions](#section1.2.2) - - [Resource naming conventions](#section1.2.3) - - [Column-like storing](#section1.2.4) - - [Example](#section1.3) - - [NGSI-LDEvent](#section1.3.1) - - [Organization, dataset and resource names](#section1.3.2) - - [Column-like storing](#section1.3.3) -- [Administration guide](#section2) - - [Configuration](#section2.1) - - [Use cases](#section2.2) - - [Important notes](#section2.3) - - [About the persistence mode](#section2.3.1) - - [About the encoding](#section2.3.3) -- [Programmers guide](#section3) - - [`NGSICKANSink` class](#section3.1) +- [Functionality](#section1) + - [Mapping NGSI-LD events to `NGSI-LDEvent` objects](#section1.1) + - [Mapping `NGSI-LDEvents` to CKAN data structures](#section1.2) + - [Organizations naming conventions](#section1.2.1) + - [Package/dataset naming conventions](#section1.2.2) + - [Resource naming conventions](#section1.2.3) + - [Column-like storing](#section1.2.4) + - [Example](#section1.3) + - [NGSI-LDEvent](#section1.3.1) + - [Organization, dataset and resource names](#section1.3.2) + - [Column-like storing](#section1.3.3) +- [Administration guide](#section2) + - [Configuration](#section2.1) + - [Use cases](#section2.2) + - [Important notes](#section2.3) + - [About the persistence mode](#section2.3.1) + - [About the encoding](#section2.3.3) +- [Programmers guide](#section3) + - [`NGSICKANSink` class](#section3.1) ## Functionality `NGSIToCKAN`, is a processor designed to persist NGSI-LD-like context data events within a [CKAN](http://ckan.org/) server.Usually, such a context data is notified by a @@ -38,7 +38,7 @@ This is done at the Draco-ngsi Http listeners (in NiFi, processors) thanks to NG [Top](#top) ### Mapping `NGSI-LDEvent`s to CKAN data structures -[CKAN ](http://docs.ckan.org/en/latest/user-guide.html) organizes the data in organizations containing packages or datasets; each one of these packages/datasets contains several resources whose data is finally stored in a PostgreSQL database (CKAN Datastore) or plain files (CKAN Filestore). Such organization is exploited by `NGSICKANSink` each time a `NGSI-LDEvent` is going to be persisted. +[CKAN](http://docs.ckan.org/en/latest/user-guide.html) organizes the data in organizations containing packages or datasets; each one of these packages/datasets contains several resources whose data is finally stored in a PostgreSQL database (CKAN Datastore) or plain files (CKAN Filestore). Such organization is exploited by `NGSICKANSink` each time a `NGSI-LDEvent` is going to be persisted. [Top](#top) @@ -49,7 +49,7 @@ https://www.postgresql.org/docs/current/static/sql-syntax-lexical.html#SQL-SYNTA Nevertheless, different than PostgreSQL, [organization lengths](http://docs.ckan.org/en/latest/api/#ckan.logic.action.create.organization_create) may be up to 100 characters (minimum, 2 characters). -* Data model by entity id (`data_model=dm-by-entity-id`). The organization name will take the value of the notified header `fiware-service`. Note that in this case, encoding is never applied. +- Data model by entity id (`data_model=dm-by-entity-id`). The organization name will take the value of the notified header `fiware-service`. Note that in this case, encoding is never applied. The following table summarizes the organization name composition: @@ -60,12 +60,12 @@ The following table summarizes the organization name composition: [Top](#top) #### Packages/datasets naming conventions -* Data model by entity (`data_model=dm-by-entity`). A package/dataset named as the notified `fiware-service` header value (or, in absence of such header, the defaulted value for the FIWARE service ) is created (if not existing yet) in the above organization. +- Data model by entity (`data_model=dm-by-entity`). A package/dataset named as the notified `fiware-service` header value (or, in absence of such header, the defaulted value for the FIWARE service ) is created (if not existing yet) in the above organization. Since based in [PostgreSQL only accepts](https://www.postgresql.org/docs/current/static/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS), it must be said only alphanumeric characters and the underscore (`_`) are accepted. The hyphen ('-') is also accepted. This leads to certain [encoding](#section2.3.3) is applied depending on the `enable_encoding` configuration parameter Nevertheless, different than PostgreSQL, [dataset lengths](http://docs.ckan.org/en/latest/api/#ckan.logic.action.create.package_create) may be up to 100 characters (minimum, 2 characters). -* Data model by entity id (`data_model=dm-by-entity-id`). A package/dataset name always take the entity ID. Such a name is already given in the NGSI-LDEvent values, see the [Configuration](#section2.1) section for more details) within the the `NGSI-LDEvent`. Note that in this case, encoding is never applied. +- Data model by entity id (`data_model=dm-by-entity-id`). A package/dataset name always take the entity ID. Such a name is already given in the NGSI-LDEvent values, see the [Configuration](#section2.1) section for more details) within the the `NGSI-LDEvent`. Note that in this case, encoding is never applied. The following table summarizes the package name composition: @@ -78,10 +78,10 @@ The following table summarizes the package name composition: #### Resources naming conventions The resource name depends on the configured data model (see the [Configuration](#section2.1) section for more details): -* Data model by entity (`data_model=dm-by-entity`). A resource name always take the concatenation of the entity ID and type. Such a name is already given in the `notified_entities`/`grouped_entities` header values (depending on using or not the grouping rules, see the [Configuration](#section2.1) section for more details) within the `NGSI-LDEvent`. +- Data model by entity (`data_model=dm-by-entity`). A resource name always take the concatenation of the entity ID and type. Such a name is already given in the `notified_entities`/`grouped_entities` header values (depending on using or not the grouping rules, see the [Configuration](#section2.1) section for more details) within the `NGSI-LDEvent`. -* Data model by entity id (`data_model=dm-by-entity-id`). A resource name always take the entity ID. Such a name is already given in the NGSI-LDEvent values, see the [Configuration](#section2.1) section for more details) within the the `NGSI-LDEvent`. Note that in this case, encoding is never applied. +- Data model by entity id (`data_model=dm-by-entity-id`). A resource name always take the entity ID. Such a name is already given in the NGSI-LDEvent values, see the [Configuration](#section2.1) section for more details) within the the `NGSI-LDEvent`. Note that in this case, encoding is never applied. It must be noticed a CKAN Datastore (and a viewer) is also created and associated to the resource above. This datastore, which in the end is a PostgreSQL table, will hold the persisted data. @@ -101,10 +101,10 @@ The following table summarizes the resource name composition: #### Column-like storing Regarding the specific data stored within the datastore associated to the resource, if `attr_persistence` parameter is set to `column` then a single line is composed for the whole notified entity, containing the following fields: -* `recvTime`: UTC timestamp in human-redable format ([ISO 8601](http://en.wikipedia.org/wiki/ISO_8601)). -* `entityId`: Notified entity identifier. -* `entityType`: Notified entity type. -* For each notified property/relationship, a field named as the property/relationship is considered. This field will store the property/relationship values along the time, if no unique value is presented, the values will be stored like a JSON string. +- `recvTimeTs` UTC timestamp in human-redable format ([ISO 8601](http://en.wikipedia.org/wiki/ISO_8601)). +- `entityId`: Notified entity identifier. +- `entityType`: Notified entity type. +- For each notified property/relationship, a field named as the property/relationship is considered. This field will store the property/relationship values along the time, if no unique value is presented, the values will be stored like a JSON string. [Top](#top) @@ -258,30 +258,30 @@ NOTE: `curl` is a Unix command allowing for interacting with REST APIs such as t ### Configuration `NGSIToCKAN` is configured through the following parameters: -| Parameter | Mandatory | Default value | Comments | -|---|---|---|---| -| CKAN Host | no | localhost | FQDN/IP address where the CKAN server runs. || -| CKAN Port | no | 80 || -| CKAN Viewer | no | recline\_grid\_view | Please check the [available](http://docs.ckan.org/en/latest/maintaining/data-viewer.html) viewers at CKAN documentation. | -| CKAN API Key | yes | N/A || -| ORION URL | yes | http://localhost:1026 | To be put as the filestore URL. | -| SSL | no | false || -| NGSI Version | yes | ld | The NGSI version of the incoming notification could (currently only ngsi-ld available)| -| Data Model | no | dm-by-entity | dm-by-entity-id, dm-by-entity | -| Attribute Persistence | no | column | column.| -| Default Service | no | test | The default Fiware service value for being used instead of the fiware-service header received for build the organization name | -| Default Service Path| no | /path | The default Fiware service path value for being used instead of the fiware-service.path header received for build the package name (currently not used) | -| Create DataStore | no | true | IF it is tru the DataStore is create and the data is stored in CKAN, otherwise teh Data store is not created and, in this way the Organization, package and dataset with the metadata is created associated with a link with the external resource | -| batch\_size | no | 1 | Number of events accumulated before persistence. | -| Enable Encoding | no | false | true or false, true applies the new encoding, false applies the old encoding. || -| Enable Lowercase | no | false | true or false. for applying lowercase to the name of organization, package dataset and resource|| -| Batch Size | no | 1 | Number of events accumulated before persistence. | -| batch\_timeout | no | 30 | Number of seconds the batch will be building before it is persisted as it is. | -| batch\_ttl | no | 10 | Number of retries when a batch cannot be persisted. Use `0` for no retries, `-1` for infinite retries. Please, consider an infinite TTL (even a very large one) may consume all the sink's channel capacity very quickly. | -| batch\_retry\_intervals | no | 5000 | Comma-separated list of intervals (in miliseconds) at which the retries regarding not persisted batches will be done. First retry will be done as many miliseconds after as the first value, then the second retry will be done as many miliseconds after as second value, and so on. If the batch\_ttl is greater than the number of intervals, the last interval is repeated. | -| Max Connections | no | 500 | Maximum number of connections allowed for a Http-based HDFS backend. | -| Max Connections per route | no | 100 | Maximum number of connections per route allowed for a Http-based HDFS backend. | -| Rollback on failure| false | false | Do a rollback in case of failure | +| Parameter | Mandatory | Default value | Comments | +|---|---|---------------------------|---| +| CKAN Host | no | localhost | FQDN/IP address where the CKAN server runs. || +| CKAN Port | no | 80 || +| CKAN Viewer | no | recline\_grid\_view | Please check the [available](http://docs.ckan.org/en/latest/maintaining/data-viewer.html) viewers at CKAN documentation. | +| CKAN API Key | yes | N/A || +| ORION URL | yes | [http://localhost:1026](http://localhost:1026) | To be put as the filestore URL. | +| SSL | no | false || +| NGSI Version | yes | ld | The NGSI version of the incoming notification could (currently only ngsi-ld available)| +| Data Model | no | dm-by-entity | dm-by-entity-id, dm-by-entity | +| Attribute Persistence | no | column | column.| +| Default Service | no | test | The default Fiware service value for being used instead of the fiware-service header received for build the organization name | +| Default Service Path| no | /path | The default Fiware service path value for being used instead of the fiware-service.path header received for build the package name (currently not used) | +| Create DataStore | no | true | IF it is tru the DataStore is create and the data is stored in CKAN, otherwise teh Data store is not created and, in this way the Organization, package and dataset with the metadata is created associated with a link with the external resource | +| batch\_size | no | 1 | Number of events accumulated before persistence. | +| Enable Encoding | no | false | true or false, true applies the new encoding, false applies the old encoding. || +| Enable Lowercase | no | false | true or false. for applying lowercase to the name of organization, package dataset and resource|| +| Batch Size | no | 1 | Number of events accumulated before persistence. | +| batch\_timeout | no | 30 | Number of seconds the batch will be building before it is persisted as it is. | +| batch\_ttl | no | 10 | Number of retries when a batch cannot be persisted. Use `0` for no retries, `-1` for infinite retries. Please, consider an infinite TTL (even a very large one) may consume all the sink's channel capacity very quickly. | +| batch\_retry\_intervals | no | 5000 | Comma-separated list of intervals (in miliseconds) at which the retries regarding not persisted batches will be done. First retry will be done as many miliseconds after as the first value, then the second retry will be done as many miliseconds after as second value, and so on. If the batch\_ttl is greater than the number of intervals, the last interval is repeated. | +| Max Connections | no | 500 | Maximum number of connections allowed for a Http-based HDFS backend. | +| Max Connections per route | no | 100 | Maximum number of connections per route allowed for a Http-based HDFS backend. | +| Rollback on failure| false | false | Do a rollback in case of failure | A configuration example could be: ![NGSIToCKAN configuration example](../images/processor-ckan.png) @@ -310,21 +310,21 @@ By default, `NGSIToCKAN` has a configured batch size and batch accumulation time #### About the encoding Until version 1.2.0 (included), Draco applied a very simple encoding: -* All non alphanumeric characters were replaced by underscore, `_`. -* The underscore was used as concatenator character as well. +- All non alphanumeric characters were replaced by underscore, `_`. +- The underscore was used as concatenator character as well. From version 1.3.0 (included), Draco applies this specific encoding tailored to CKAN data structures: -* Lowercase alphanumeric characters are not encoded. -* Upercase alphanumeric characters are encoded. -* Numeric characters are not encoded. -* Underscore character, `_`, is not encoded. -* Hyphen character, `-`, is not encoded. -* Equals character, `=`, is encoded as `xffff`. -* All other characters, including the slash in the FIWARE service paths, are encoded as a `x` character followed by the [Unicode](http://unicode-table.com) of the character. -* User defined strings composed of a `x` character and a Unicode are encoded as `xx` followed by the Unicode. -* `xffff` is used as concatenator character. +- Lowercase alphanumeric characters are not encoded. +- Upercase alphanumeric characters are encoded. +- Numeric characters are not encoded. +- Underscore character, `_`, is not encoded. +- Hyphen character, `-`, is not encoded. +- Equals character, `=`, is encoded as `xffff`. +- All other characters, including the slash in the FIWARE service paths, are encoded as a `x` character followed by the [Unicode](http://unicode-table.com) of the character. +- User defined strings composed of a `x` character and a Unicode are encoded as `xx` followed by the Unicode. +- `xffff` is used as concatenator character. Despite the old encoding will be deprecated in the future, it is possible to switch the encoding type through the `enable_encoding` parameter as explained in the [configuration](#section2.1) section. diff --git a/docs/processors_catalogue/upadate_ckan_metadata.md b/docs/processors_catalogue/upadate_ckan_metadata.md index b6fd322..8711dea 100644 --- a/docs/processors_catalogue/upadate_ckan_metadata.md +++ b/docs/processors_catalogue/upadate_ckan_metadata.md @@ -1,17 +1,17 @@ # UpdateCKANMetadata Content: -- [Functionality](#section1) -- [Mapping NGSI-LD events to `NGSI-LDEvent` objects](#section1.1) -- [Mapping `NGSI-LDEvents` to DCAT-AP Metadata into CKAN data structures](#section1.2) - - [DCAT-AP Metadata tags for Organizations](#section1.2.1) - - [DCAT-AP Metadata tags for Packages/Datasets](#section1.2.2) - - [DCAT-AP Metadata tags for Resources](#section1.2.3) -- [Administration guide](#section2) - - [Configuration](#section2.1) -- [Use cases](#section2.2) -- [Programmers guide](#section3) - - [`UpdateCKANMetadata` class](#section3.1) +- [Functionality](#section1) +- [Mapping NGSI-LD events to `NGSI-LDEvent` objects](#section1.1) +- [Mapping `NGSI-LDEvents` to DCAT-AP Metadata into CKAN data structures](#section1.2) + - [DCAT-AP Metadata tags for Organizations](#section1.2.1) + - [DCAT-AP Metadata tags for Packages/Datasets](#section1.2.2) + - [DCAT-AP Metadata tags for Resources](#section1.2.3) +- [Administration guide](#section2) + - [Configuration](#section2.1) +- [Use cases](#section2.2) +- [Programmers guide](#section3) + - [`UpdateCKANMetadata` class](#section3.1) ## Functionality `UpdateCKANMetadata`, is a processor designed to add additional metadata to an incoming flowfile or NGSI-LD event. This processor was designed to include all the metadata fields needed for complying with [DCAT-AP v2.0.1](https://joinup.ec.europa.eu/collection/semantic-interoperability-community-semic/news/dcat-ap-release-201) diff --git a/docs/quick_start_guide.md b/docs/quick_start_guide.md index af63dc4..4049661 100644 --- a/docs/quick_start_guide.md +++ b/docs/quick_start_guide.md @@ -62,8 +62,8 @@ mysql latest 273a1eca2d3a 2 weeks ago (2) Once you have your containers up and running, you can add the template provided for persisting data to MySQL. First, go to your browser and open Draco using this URL `https://localhost:9090/nifi/` using the following credentials in the login page: -- `user: admin` -- `password: pass1234567890` +- `user: admin` +- `password: pass1234567890` The next image provides you the location of many components of Draco. Please put special attention to the template button, play button and processor component, you will use them later. diff --git a/docs/use_cases/ckan_publication.md b/docs/use_cases/ckan_publication.md index d366a7f..27b2112 100755 --- a/docs/use_cases/ckan_publication.md +++ b/docs/use_cases/ckan_publication.md @@ -11,8 +11,8 @@ basic functionality focused on providing the needed procedure for configuring an ## Scenarios This use case is composed of two scenarios: -* Publish data and metadata into CKAN coming from NGSI-LD data sources (NGSI-DRACO-CKAN connector). -* Publish data and metadata into CKAN coming from REST data sources (REST-DRACO-CKAN connector). +- Publish data and metadata into CKAN coming from NGSI-LD data sources (NGSI-DRACO-CKAN connector). +- Publish data and metadata into CKAN coming from REST data sources (REST-DRACO-CKAN connector). ## Publish data and metadata into CKAN coming from NGSI-LD data sources (NGSI-DRACO-CKAN connector). This scenario covers the configuration and deployment of the needed components for being able to capture transform and process data and metadata coming from @@ -49,17 +49,17 @@ A graphical view of this template is presented in the next figure: ![connector-ngsi-draco-ckan](../images/connector-ngsi-draco-ckan.png) -* [ListenHTTP](../processors_catalogue/ngsi_rest_handler.md): it receives notifications from orion subscriptions. -* [UpdateCKANMetadataAttributes](../processors_catalogue/upadate_ckan_metadata.md): configures metadata for organizations, packages, resources and datastores. -* [NGSIToCKAN](../processors_catalogue/ngsi_ckan_sink.md): creates organizations, packages, resources, datastores and views in CKAN. +- [ListenHTTP](../processors_catalogue/ngsi_rest_handler.md): it receives notifications from orion subscriptions. +- [UpdateCKANMetadataAttributes](../processors_catalogue/upadate_ckan_metadata.md): configures metadata for organizations, packages, resources and datastores. +- [NGSIToCKAN](../processors_catalogue/ngsi_ckan_sink.md): creates organizations, packages, resources, datastores and views in CKAN. For more details about the configuration of processors listed above, please go to the de documentation link provided. The first scenario presented in this guide is composed of the following services: - - * Orion - * Draco - * CKAN + +- Orion +- Draco +- CKAN @@ -74,10 +74,10 @@ All the scenario is running using docker containers. Thus, it is needed to have This use case provides an example of how the user can publish data and metadata using the NGSI-DRACO-CKAN connector and other Generic enablers of the FIWARE ecosystem. It emulates the creation of a dataset with the context data about a Building entity containing information about buildings, their coordinates, and the city that the building belongs to. -* Building Entities that want to be published in a CKAN portal -* Each Building is stored as a new package with a resource pointing to a Context Broker URL -* Each Building belongs to a city -* Each city has its organization in the CKAN portal +- Building Entities that want to be published in a CKAN portal +- Each Building is stored as a new package with a resource pointing to a Context Broker URL +- Each Building belongs to a city +- Each city has its organization in the CKAN portal Example: ![use-case-example-entity](../images/use-case-example-entity.png) @@ -163,11 +163,11 @@ chmod a+x unican.sh ``` You can check if the Organization and package and dataset have been created. First, enter CKAN GUI via browser to [http://localhost:5000]. -* The organization Santander will be created +- The organization Santander will be created ![santander-organization](../images/santander-org.png) -* The package urn_ngsi-ld_building_building001 will be created -> NGSIToCKAN processors generates the package name taken from the notification as it was not defined in the UpdateCKANMetadataAttributes +- The package urn_ngsi-ld_building_building001 will be created -> NGSIToCKAN processors generates the package name taken from the notification as it was not defined in the UpdateCKANMetadataAttributes ![unican-package](../images/unican-build.png) -* The resource UniCAN will be created +- The resource UniCAN will be created ![unican-resource](../images/unican-res.png) 5. Download the catalog from CKAN. Go to the browser and put the following url: diff --git a/nifi-ngsi-resources/docker-compose.yml b/nifi-ngsi-resources/docker-compose.yml index 959f62d..c4cce14 100644 --- a/nifi-ngsi-resources/docker-compose.yml +++ b/nifi-ngsi-resources/docker-compose.yml @@ -19,13 +19,13 @@ services: # MYSQL_ROOT_PASSWORD: example # ports: # - "3306:3306" - db-postgres: - image: postgres - restart: always - environment: - POSTGRES_PASSWORD: example - ports: - - "5432:5432" + # db-postgres: + # image: postgres + # restart: always + # environment: + # POSTGRES_PASSWORD: example + # ports: + # - "5432:5432" # mongo-db: # image: mongo:3.4 # restart: always diff --git a/nifi-ngsi-resources/docker/Dockerfile b/nifi-ngsi-resources/docker/Dockerfile index a7e5e7c..1c8e345 100644 --- a/nifi-ngsi-resources/docker/Dockerfile +++ b/nifi-ngsi-resources/docker/Dockerfile @@ -1,5 +1,7 @@ -FROM openjdk:8u191-jre-alpine3.9 +ARG IMAGE_NAME=openjdk +ARG IMAGE_TAG=8-jre +FROM ${IMAGE_NAME}:${IMAGE_TAG} LABEL maintainer="Andrés Muñoz" LABEL site="https://github.com/ging/fiware-draco" @@ -22,17 +24,18 @@ ENV DRACO_RELEASE=2.1.0 ENV NIFI_NGSI_NAR_VERSION=2.1.0 ADD sh/ ${NIFI_BASE_DIR}/scripts/ +RUN chmod -R +x ${NIFI_BASE_DIR}/scripts/*.sh # Setup NiFi user and create necessary directories # change uid and gid for elasticsearch user -RUN apk --no-cache add shadow -RUN addgroup -g ${GID} -S nifi || groupmod -n nifi `getent group ${GID} | cut -d: -f1` \ - && adduser -u ${UID} -S nifi -g ${GID} \ +# Setup NiFi user and create necessary directories +RUN groupadd -g ${GID} nifi || groupmod -n nifi `getent group ${GID} | cut -d: -f1` \ + && useradd --shell /bin/bash -u ${UID} -g ${GID} -m nifi \ && mkdir -p ${NIFI_BASE_DIR} \ && chown -R nifi:nifi ${NIFI_BASE_DIR} \ - && apk update \ - && apk add jq xmlstarlet procps curl + && apt-get update \ + && apt-get install -y jq xmlstarlet procps USER nifi