From c479c1e4b57f4ce87cfe00f6e31e37f03570e896 Mon Sep 17 00:00:00 2001
From: Jason Hwee <1216418+hweej@users.noreply.github.com>
Date: Fri, 10 May 2024 10:26:49 -0400
Subject: [PATCH] Docker image improvements (#279)

* Add ignore rules for sdkman and vim config files.

* Add console logger config

* Clean up Dockerfile, modify ENTRYPOINT to CMD directive.

* Cleanup gitignore file

* Cleanup text and formatting a bit

* Modify docker image reference

* Add back readme alerts

* Fix alert typos

* Update README.md

* Remove log4j appender config during build

* Move away from openjdk base image

* Remove file appender logging instructions in-favor of stdout logging.
---
 .gitignore                                    |  20 +++-
 Dockerfile                                    |  39 ++++---
 README.md                                     | 102 +++++++++---------
 .../log4j.properties.console.EXAMPLE          |  29 +++++
 4 files changed, 122 insertions(+), 68 deletions(-)
 create mode 100644 annotationPipeline/src/main/resources/log4j.properties.console.EXAMPLE

diff --git a/.gitignore b/.gitignore
index 8f22496a..1883fc8c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,13 +1,23 @@
-annotationPipeline/target/*
+# Core
 annotator/target/*
+annotationPipeline/target/*
 annotationPipeline/src/main/resources/application.properties
+annotationPipeline/src/main/resources/log4j.properties
+annotationPipeline/data/output.txt
+
 databaseAnnotator/target/*
 databaseAnnotator/src/main/resources/application.properties
-.DS_Store
-annotationPipeline/src/main/resources/log4j.properties
 databaseAnnotator/src/main/resources/log4j.properties
-*.iml
+
+# Utilities Ignore
 swagger-codegen-cli.jar
+*.iml
 *repository.sqlite
-annotationPipeline/data/output.txt
+
+# OS Ignore
+.DS_Store
+
+# Editor Ignore
 /.idea/
+.vim/
+.sdkmanrc
diff --git a/Dockerfile b/Dockerfile
index a7cb1cbb..9235ed07 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,26 +1,39 @@
-FROM openjdk:21-jdk-slim
+# Multi-stage build
+FROM maven:3-eclipse-temurin-21 as build
 
+# Build args
+ARG MAVEN_OPTS=-DskipTests 
+
+# ENV variables
 ENV GN_HOME=/genome-nexus-annotation-pipeline
+ENV GN_RESOURCES=$GN_HOME/annotationPipeline/src/main/resources
+
+# Add source files
 COPY . $GN_HOME
 WORKDIR $GN_HOME
 
-COPY annotationPipeline/src/main/resources/log4j.properties.EXAMPLE $GN_HOME/annotationPipeline/src/main/resources/log4j.properties
-RUN apt-get update && apt-get install -y maven && apt-get clean;
-# set log4j file in properties
-RUN sed -i "s|log4j\.appender\.a\.File.*|log4j.appender.a.File = $GN_HOME/logs/genome-nexus-annotation-pipeline.log|" $GN_HOME/annotationPipeline/src/main/resources/log4j.properties
-
-ARG mvnprofiles=''
-RUN mvn -DskipTests clean install $mvnprofiles
+# Configure log4j file in properties
+RUN cp $GN_RESOURCES/log4j.properties.console.EXAMPLE $GN_RESOURCES/log4j.properties
 
+# Maven build
+RUN mvn ${MAVEN_OPTS} clean install -q
 
-FROM openjdk:21-jdk-slim
+# Stage-1
+FROM eclipse-temurin:21
 
 ENV GN_HOME=/genome-nexus-annotation-pipeline
+ENV GN_RESOURCES=$GN_HOME/annotationPipeline/src/main/resources
+ENV GN_TARGET=$GN_HOME/annotationPipeline/target
 
-COPY --from=0 $GN_HOME/annotationPipeline/target/annotationPipeline-*.jar $GN_HOME/annotationPipeline/target/annotationPipeline.jar
+# Update and install dependencies
+RUN apt-get update && apt-get -y install \ 
+    procps \
+    && apt-get clean
 
-COPY annotationPipeline/src/main/resources/application.properties.EXAMPLE $GN_HOME/annotationPipeline/src/main/resources/application.properties
+# Copy artifact from build-stage
+COPY --from=build $GN_TARGET/annotationPipeline-*.jar $GN_TARGET/annotationPipeline.jar
+COPY --from=build $GN_RESOURCES/application.properties.EXAMPLE $GN_RESOURCES/application.properties
 
-RUN apt-get update && apt-get install procps -y
+WORKDIR $GN_HOME/annotationPipeline/target
 
-ENTRYPOINT ["java", "-jar", "/genome-nexus-annotation-pipeline/annotationPipeline/target/annotationPipeline.jar"]
+CMD ["java", "-jar", "annotationPipeline.jar"]
diff --git a/README.md b/README.md
index bccd858c..93e487de 100644
--- a/README.md
+++ b/README.md
@@ -1,62 +1,58 @@
 # Genome Nexus Annotation Pipeline
-These tools allow for annotation of genomic variants from a MAF for import into
+Annotation of genomic variants from a MAF for import into
 the cBioPortal using [Genome Nexus](http://genomenexus.org).
 
 ## MAF Annotation
 The `annotationPipeline` module is a command line tool to annotate a maf using genome nexus. 
 
-**Pre-build steps**
+## Build From Source
 
-Create your `application.properties`:
-
-```
+1. **Create `application.properties` file:**
+```sh
 cp annotationPipeline/src/main/resources/application.properties.EXAMPLE annotationPipeline/src/main/resources/application.properties
 ```
+> [!TIP]
+> If you have your own installation of Genome Nexus, you can point to it by 
+> modifying the `genomenexus.base=<URL>` in `application.properties` 
 
-If you have your own
-installation of Genome Nexus, you can point to it by modifying the
-`application.properties` file located in
-`annotationPipeline/src/main/resources`.
+2. **Create `log4j.properties`:**
 
-Create your `log4j.properties`:
+```sh
+cp annotationPipeline/src/main/resources/log4j.properties.console.EXAMPLE annotationPipeline/src/main/resources/log4j.properties
 
+## Build
+```sh
+mvn clean install
 ```
-cp annotationPipeline/src/main/resources/log4j.properties.EXAMPLE annotationPipeline/src/main/resources/log4j.properties
+## Run Annotate
+To use it, build the project using maven and run it like so:
+```sh
+java -jar annotationPipeline/target/annotationPipeline-*.jar \
+    --filename <INPUT_MAF> \
+    --output-filename <OUTPUT DESTINATION> \
+    --isoform-override <mskcc or uniprot>
 ```
 
-Modify the property `log4j.appender.a.File` in your `log4j.properties` file to the desired log file path.
-
-To use it, build the project using maven and run it like so:
-    
-    mvn clean install
-    $JAVA_HOME/bin/java -jar annotationPipeline/target/annotationPipeline-*.jar \
-        --filename <INPUT_MAF> \
-        --output-filename <OUTPUT DESTINATION> \
-        --isoform-override <mskcc or uniprot>
-    
 To output error reporting to a file, supply the `-e` option a location for the file to be saved. By running the jar without any arguments or by providing the optional parameter `-h` you can view the full usage statement. 
 
-## Annotate data with Docker
-Genome Nexus Annotation Pipeline is available on Docker: https://hub.docker.com/r/genomenexus/gn-annotation-pipeline.
+## Annotate with Docker
+Genome Nexus Annotation Pipeline is available on DockerHub: https://hub.docker.com/r/genomenexus/gn-annotation-pipeline.
 
 #### Usage instruction
 ```
-docker run -v ${PWD}:/wd genomenexus/gn-annotation-pipeline:master --filename /wd/input.txt  --output-filename /wd/output.txt
+docker pull genomenexus/gn-annotation-pipeline:master 
+```
+```
+docker run -v ${PWD}:/wd genomenexus/gn-annotation-pipeline:master java -jar annotationPipeline.jar --filename /wd/input.txt  --output-filename /wd/output.txt
 ```
 - `-v ${PWD}:/wd`: This option maps the current working directory to a volume within the Docker container at the path `/wd`. This makes it possible for files in the host directory to be accessed and modified between container and host.
 - `--filename /wd/input.txt`: This option specifies the input file location at `/wd/input.txt`, which should be under the same directory as output file.
-
 - `--output-filename /wd/output.txt`: This option specifies the output file where the annotated results will be saved. The file will be created at `/wd/output.txt`, which should be under the same directory as input file.
 
-#### View logging file
-To enable logging in the Genome Nexus Annotation Pipeline and access the log file, you need to mount your local path to view the log file locally. By default, the log file is stored at `/genome-nexus-annotation-pipeline/logs/genome-nexus-annotation-pipeline.log`. Use the following command as an example:
-```
-docker run  -v ${PWD}:/wd -v ${PWD}/logs:/genome-nexus-annotation-pipeline/logs genomenexus/gn-annotation-pipeline:master --filename  /wd/input.txt  --output-filename /wd/output.txt
-```
-- `-v ${PWD}/logs:/genome-nexus-annotation-pipeline/logs` flag mounts the logs directory inside the current working directory to the corresponding directory inside the Docker container.
-- Other flags are the same as above
+### Logging
 
-Make sure to adjust the file paths according to your specific requirements. Once the command is executed, the log file will be generated and stored in the logs directory within your local directory. 
+> ![IMPORTANT]
+> Logging via docker has been changed to `stdout` by default since v1.0.4
 
 ### Optional parameters
 | Short | Long | Description | 
@@ -74,14 +70,18 @@ Make sure to adjust the file paths according to your specific requirements. Once
 | `-d` | `--ignore-original-location` | Genome-nexus-annotation-pipeline reads original genomic location info as input by default, if not existing, reading from normal genomic location info columns. Adding `-d` ignores original genomic location info columns (columns with prefix 'IGNORE_Genome_Nexus_Original_') and only use whatever in normal genomic location info columns. This would be helpful if you'd like to stick with current genomic location info columns.|
 
 ### Reference Genome
-The Genome Nexus Annotation Pipeline supports two versions of the human genome reference assembly: **GRCh37** and **GRCh38**.
-By default, the pipeline uses **GRCh37**. 
-#### Using GRCh38
+The Genome Nexus Annotation Pipeline supports two versions of the human genome reference assembly: 
+1. **GRCh37** (default)
+2. **GRCh38**
+
+> ![NOTE]
+> By default, the pipeline uses **GRCh37**. 
 
+#### Using GRCh38
 If you want to annotate with **GRCh38**, please set the `GENOMENEXUS_BASE` environment variable to `https://grch38.genomenexus.org`. Here's an example of how to do this:
 
 ```
-docker run -e GENOMENEXUS_BASE=https://grch38.genomenexus.org -v ${PWD}:/wd genomenexus/gn-annotation-pipeline:latest --filename /wd/input.txt  --output-filename /wd/output.txt --isoform-override uniprot
+docker run -e GENOMENEXUS_BASE=https://grch38.genomenexus.org -v ${PWD}:/wd genomenexus/gn-annotation-pipeline:master --filename /wd/input.txt --output-filename /wd/output.txt --isoform-override uniprot
 ```
 
 ### Annotation fields
@@ -165,15 +165,17 @@ docker run -e GENOMENEXUS_BASE=https://grch38.genomenexus.org -v ${PWD}:/wd geno
 
 ### Add additional annotation columns
 Genome Nexus supports additional annotation columns with the setting of "enrichment_fields". The configuration for these enrichment fields is managed through the `application.properties` file, please refer to `Pre-build steps` section.
+
 To configure the enrichment fields, you need to include the desired field names from the provided list in the `-Dgenomenexus.enrichment_fields=` parameter of the command line, or directly add field names in `genomenexus.enrichment_fields=` in `application.properties` file. Multiple field names can be specified by separating them with commas. `annotation_summary` is highly recommended to add as default since it's crucial for lots of annotation fields.
+
 **Example**:
 ```
-java 
--Dgenomenexus.enrichment_fields=annotation_summary,my_variant_info 
--jar annotationPipeline/target/annotationPipeline-*.jar \ -r \ 
---filename test/data/minimal_example.in.txt \ 
---output-filename test/data/minimal_example.out.uniprot.txt \ 
---isoform-override uniprot
+java -Dgenomenexus.enrichment_fields=annotation_summary,my_variant_info \
+    -jar annotationPipeline/target/annotationPipeline-*.jar \ 
+    -r \ 
+    --filename test/data/minimal_example.in.txt \ 
+    --output-filename test/data/minimal_example.out.uniprot.txt \ 
+    --isoform-override uniprot
 ```
 ##### Available enrichment fields:
 - annotation_summary:
@@ -195,13 +197,13 @@ For an example minimal input file see
 corresponding output
 [test/data/minimal_example.out.uniprot.txt](test/data/minimal_example.out.uniprot.txt).
 The output file was generated with:
-
-    $JAVA_HOME/bin/java -jar annotationPipeline/target/annotationPipeline-*.jar \
-        -r \
-        --filename test/data/minimal_example.in.txt  \
-        --output-filename test/data/minimal_example.out.uniprot.txt \
-        --isoform-override uniprot
-
+```
+$JAVA_HOME/bin/java -jar annotationPipeline/target/annotationPipeline-*.jar \
+    -r \
+    --filename test/data/minimal_example.in.txt  \
+    --output-filename test/data/minimal_example.out.uniprot.txt \
+    --isoform-override uniprot
+```
 
 ## Direct Database Annotation
 There used to be a utility/module called databaseAnnotator which could be
diff --git a/annotationPipeline/src/main/resources/log4j.properties.console.EXAMPLE b/annotationPipeline/src/main/resources/log4j.properties.console.EXAMPLE
new file mode 100644
index 00000000..2af41d43
--- /dev/null
+++ b/annotationPipeline/src/main/resources/log4j.properties.console.EXAMPLE
@@ -0,0 +1,29 @@
+# Change INFO to DEBUG, if you want to see debugging info on underlying libraries we use.
+log4j.rootLogger=INFO, stdout
+
+# Change INFO to DEBUG, if you want see debugging info on our packages only.
+log4j.category.org.mskcc=INFO
+#log4j.category.org.springframework=ALL
+
+#log4j.logger.org.springframework.security=DEBUG
+#log4j.logger.org.springframework.integration=DEBUG
+
+
+# Use the JVM option, e.g.: "java -DPORTAL_HOME=/pathto/portal_homedir",
+# or - "java -DPORTAL_HOME=$PORTAL_HOME", where PORTAL_HOME is shell (environment) variable.
+
+## IMPORTANT - THRESHOLD SHOULD NOT BE DEBUG FOR PRODUCTION, CREDENTIALS CAN BE DISPLAYED!
+
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.Target=System.out
+log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} [%t] %-5p %c - %m%n
+
+#log4j.logger.org.hibernate=INFO, a
+#log4j.logger.org.hibernate.SQL=DEBUG
+#log4j.logger.org.hibernate.type=TRACE
+#log4j.logger.org.hibernate.hql.ast.AST=info
+#log4j.logger.org.hibernate.tool.hbm2ddl=warn
+#log4j.logger.org.hibernate.hql=debug
+#log4j.logger.org.hibernate.cache=info
+#log4j.logger.org.hibernate.jdbc=debug