Merge pull request #244 from clara-genomics/dev-v0.4.0

Merge dev-v0.4.0 into master
NVIDIA-Genomics-Research · Nov 28, 2019 · 70b8294 · 70b8294
2 parents f491258 + 49ff9c9
commit 70b8294
Show file tree

Hide file tree

Showing 105 changed files with 4,612 additions and 3,951 deletions.
diff --git a/.clang-format b/.clang-format
@@ -49,7 +49,7 @@ BreakAfterJavaFieldAnnotations: false
 BreakStringLiterals: true
 ColumnLimit:     0
 CommentPragmas:  '^ IWYU pragma:'
-ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
 ConstructorInitializerIndentWidth: 4
 ContinuationIndentWidth: 4
 Cpp11BracedListStyle: true

diff --git a/.gitmodules b/.gitmodules
@@ -23,3 +23,6 @@
 [submodule "3rdparty/spoa"]
 	path = 3rdparty/spoa
 	url = https://github.com/rvaser/spoa.git
+[submodule "3rdparty/cub"]
+	path = 3rdparty/cub
+	url = https://github.com/NVlabs/cub.git
diff --git a/3rdparty/cub b/3rdparty/cub
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -10,7 +10,7 @@
 
 cmake_minimum_required(VERSION 3.10.2)
 set(CGA_PROJECT_NAME ClaraGenomicsAnalysis)
-set(CGA_VERSION 0.3.0)
+set(CGA_VERSION 0.4.0)
 project(${CGA_PROJECT_NAME})
 
 # Process options.
@@ -27,17 +27,25 @@ option(cga_device_synchronize_kernels "Run cudaDeviceSynchronize() in CGA_CU_CHE
 # overall consensus generated, and hence makes it harder to validate and debug.
 option(spoa_accurate "Run cudapoa code in mode that matches spoa" OFF)
 option(cga_enable_cudapoa_nw_print "Enable verbose prints within cudapoa NW kernel" OFF)
+option(cga_profiling "Compile a binary for profiling with NVTX markers." OFF)
+option(cga_generate_docs "Generate Doxygen documentation" ON)
 
+# Must be included before others for options value validation
+include(cmake/Utils.cmake)
+
+validate_boolean(cga_enable_tests)
 if (cga_enable_tests)
     message(STATUS "Enabling ClaraGenomicsAnalysis unit tests")
     set_property(GLOBAL PROPERTY enable_tests ON)
 endif()
 
+validate_boolean(cga_enable_benchmarks)
 if (cga_enable_benchmarks)
     message(STATUS "Enabling ClaraGenomicsAnalysis benchmarks")
     set_property(GLOBAL PROPERTY enable_benchmarks ON)
 endif()
 
+validate_boolean(cga_build_shared)
 if (cga_build_shared)
     message(STATUS "Building ClaraGenomicsAnalysis libraries as shared objects")
     set_property(GLOBAL PROPERTY cga_library_type SHARED)
@@ -64,8 +72,14 @@ add_subdirectory(cudamapper)
 add_subdirectory(cudaaligner)
 
 # Add documentation generation.
-set_doxygen_mainpage(${CMAKE_CURRENT_SOURCE_DIR}/README.md)
-add_docs_target("ClaraGenomicsAnalysis" "${CGA_VERSION}")
+validate_boolean(cga_generate_docs)
+if (cga_generate_docs)
+    message(STATUS "Enabling Doxygen documentation generation")
+    set_doxygen_mainpage(${CMAKE_CURRENT_SOURCE_DIR}/README.md)
+    add_docs_target("ClaraGenomicsAnalysis" "${CGA_VERSION}")
+else()
+    message(STATUS "Disabling Doxygen documentation generation")
+endif()
 
 # Add auto formatting.
 cga_enable_formatting_targets()

diff --git a/README.md b/README.md
@@ -37,6 +37,19 @@ A query fasta can be mapped to a reference as follows:
 To access more information about running cudamapper, run `cudamapper --help`.
 
 ## Clone Clara Genomics Analysis
+
+### Latest released version
+This will clone the repo to the `master` branch, which contains code for latest released version
+and hot-fixes.
+
+```
+git clone --recursive -b master git@github.com:clara-genomics/ClaraGenomicsAnalysis.git
+```
+
+### Latest development version
+This will clone the repo to the default branch, which is set to be the latest development branch.
+This branch is subject to change frequently as features and bug fixes are pushed.
+
 ```bash
 git clone --recursive git@github.com:clara-genomics/ClaraGenomicsAnalysis.git
 ```
@@ -111,15 +124,17 @@ e.g.
 A description of each of the benchmarks is present in a README under the module's benchmark folder.
 
 ## Enable Doc Generation
-To enable document generation for Clara Genomics Analysis, please install `Doxygen` on your system. Once
-`Doxygen` has been installed, run the following to build documents.
+To enable document generation for Clara Genomics Analysis, please install `Doxygen` on your system.
+Once`Doxygen` has been installed, run the following to build documents.
 
 ```bash
 make docs
 ```
 
 Docs are also generated as part of the default `all` target when `Doxygen` is available on the system.
 
+To disable documentation generation add `-Dcga_generate_docs=OFF` to the `cmake` command in the [build step](#build).
+
 ## Code Formatting
 
 ### C++ / CUDA
@@ -152,3 +167,24 @@ To run style check manually, simply run the following from the top level folder.
 ```
 flake8 pyclaragenomics/
 ```
+
+## Running CI Tests Locally
+Please note, your git repository will be mounted to the container, any untracked files will be removed from it.
+Before executing the CI locally, stash or add them to the index.
+
+Requirements:
+1. docker (https://docs.docker.com/install/linux/docker-ce/ubuntu/)
+2. nvidia-docker (https://github.com/NVIDIA/nvidia-docker)
+3. nvidia-container-runtime (https://github.com/NVIDIA/nvidia-container-runtime)
+
+Run the following command to execute the CI build steps inside a container locally:
+```bash
+bash ci/local/build.sh -r <ClaraGenomicsAnalysis repo path>
+```
+ci/local/build.sh script was adapted from [rapidsai/cudf](https://github.com/rapidsai/cudf/tree/branch-0.11/ci/local)
+
+The default docker image is **clara-genomics-base:cuda10.0-ubuntu16.04-gcc5-py3.7**.
+Other images from [gpuci/clara-genomics-base](https://hub.docker.com/r/gpuci/clara-genomics-base/tags) repository can be used instead, by using -i argument
+```bash
+bash ci/local/build.sh -r <ClaraGenomicsAnalysis repo path> -i gpuci/clara-genomics-base:cuda10.0-ubuntu18.04-gcc7-py3.6
+```
diff --git a/ci/common/build-test-sdk.sh b/ci/common/build-test-sdk.sh
@@ -60,10 +60,13 @@ if [ "$GPU_TEST" == '1' ]; then
   nvidia-smi
 
   logger "Running ClaraGenomicsAnalysis unit tests..."
-  LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR find ${LOCAL_BUILD_DIR}/install/tests -type f -exec {} \;
+  # Avoid using 'find' which reutrns 0 even if -exec command fails
+  for binary_test in "${LOCAL_BUILD_DIR}"/install/tests/*; do
+    LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR "${binary_test}";
+  done
 
   logger "Running ClaraGenomicsAnalysis benchmarks..."
-  LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR ${LOCAL_BUILD_DIR}/install/benchmarks/cudapoa/benchmark_cudapoa_singlebatch
-  LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR ${LOCAL_BUILD_DIR}/install/benchmarks/cudaaligner/benchmark_cudaaligner_singlebatch_singlealignment
+  LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR ${LOCAL_BUILD_DIR}/install/benchmarks/cudapoa/benchmark_cudapoa --benchmark_filter="BM_SingleBatchTest"
+  LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR ${LOCAL_BUILD_DIR}/install/benchmarks/cudaaligner/benchmark_cudaaligner --benchmark_filter="BM_SingleAlignment"
 fi
 
diff --git a/ci/common/prep-init-env.sh b/ci/common/prep-init-env.sh
@@ -56,6 +56,6 @@ if [ "${CUDA:0:2}" == '10' ]; then
 fi
 
 # Cleanup local git
-cd $1
+cd "$1"
 git clean -xdf
 
diff --git a/ci/common/test-pyclaragenomics.sh b/ci/common/test-pyclaragenomics.sh
@@ -14,17 +14,27 @@
 ######################################
 set -e
 
+run_tests() {
+  cd test/
+  if [ "${TEST_ON_GPU}" == '1' ]; then
+      python -m pytest -m gpu -s
+  else
+      python -m pytest -m cpu -s
+  fi
+}
+
 PYCLARAGENOMICS_DIR=$1
 cd $PYCLARAGENOMICS_DIR
 
-#Install external dependencies.
+# Install external dependencies.
 python -m pip install -r requirements.txt
-python setup.py install
+python setup_pyclaragenomics.py --build_output_folder cga_build
+run_tests
 
-# Run tests.
-cd test/
-if [ "${TEST_ON_GPU}" == '1' ]; then
-    python -m pytest -m gpu -s
-else
-    python -m pytest -m cpu -s
-fi
+cd $PYCLARAGENOMICS_DIR
+# Uninstall pyclaragenomics
+pip uninstall -y pyclaragenomics
+# Test wheel package creation
+python setup_pyclaragenomics.py --build_output_folder cga_build_wheel --create_wheel_only
+yes | pip install $PYCLARAGENOMICS_DIR/pyclaragenomics_wheel/pyclaragenomics-*.whl
+run_tests
diff --git a/ci/local/build.sh b/ci/local/build.sh
@@ -0,0 +1,134 @@
+#!/bin/bash
+#
+# Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+#
+
+DOCKER_IMAGE="gpuci/clara-genomics-base:cuda10.0-ubuntu16.04-gcc5-py3.7"
+REPO_PATH=${PWD}
+RAPIDS_DIR_IN_CONTAINER="/rapids"
+CPP_BUILD_DIR="cpp"
+CONTAINER_SHELL_ONLY=0
+
+SHORTHELP="$(basename "$0") [-h] [-H] [-s] [-r <repo_dir>] [-i <image_name>]"
+LONGHELP="${SHORTHELP}
+Build and test your local repository using a base gpuCI Docker image
+where:
+    -H   Show this help text
+    -r   Path to repository (defaults to working directory)
+    -i   Use Docker image (default is ${DOCKER_IMAGE})
+    -s   Skip building and testing and start an interactive shell in a container of the Docker image
+"
+
+# Limit GPUs available to container based on CUDA_VISIBLE_DEVICES
+if [[ -z "${CUDA_VISIBLE_DEVICES}" ]]; then
+    NVIDIA_VISIBLE_DEVICES="all"
+else
+    NVIDIA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}
+fi
+
+while getopts ":hHr:i:s" option; do
+    case ${option} in
+        r)
+            REPO_PATH=${OPTARG}
+            ;;
+        i)
+            DOCKER_IMAGE=${OPTARG}
+            ;;
+        s)
+            CONTAINER_SHELL_ONLY=1
+            ;;
+        h)
+            echo "${SHORTHELP}"
+            exit 0
+            ;;
+        H)
+            echo "${LONGHELP}"
+            exit 0
+            ;;
+        *)
+            echo "ERROR: Invalid flag"
+            echo "${SHORTHELP}"
+            exit 1
+            ;;
+    esac
+done
+IMAGE_FOLDER_NAME="build_$(echo $(basename "${DOCKER_IMAGE}")|sed -e 's/:/_/g')"
+REPO_PATH_IN_CONTAINER="${RAPIDS_DIR_IN_CONTAINER}/$(basename "${REPO_PATH}")"
+CPP_BUILD_DIR_IN_CONTAINER="${RAPIDS_DIR_IN_CONTAINER}/$(basename "${REPO_PATH}")/${IMAGE_FOLDER_NAME}/${CPP_BUILD_DIR}"
+
+
+# BASE_CONTAINER_BUILD_DIR is named after the image name, allowing for
+# multiple image builds to coexist on the local filesystem. This will
+# be mapped to the typical BUILD_DIR inside of the container. Builds
+# running in the container generate build artifacts just as they would
+# in a bare-metal environment, and the host filesystem is able to
+# maintain the host build in BUILD_DIR as well.
+# shellcheck disable=SC2001,SC2005,SC2046
+BASE_CONTAINER_BUILD_DIR=${REPO_PATH}/${IMAGE_FOLDER_NAME}
+CPP_CONTAINER_BUILD_DIR=${BASE_CONTAINER_BUILD_DIR}/cpp
+
+
+BUILD_SCRIPT="#!/bin/bash
+set -e
+WORKSPACE=${REPO_PATH_IN_CONTAINER}
+PREBUILD_SCRIPT=${REPO_PATH_IN_CONTAINER}/ci/gpu/prebuild.sh
+BUILD_SCRIPT=${REPO_PATH_IN_CONTAINER}/ci/gpu/build.sh
+if [ -f \${PREBUILD_SCRIPT} ]; then
+    source \${PREBUILD_SCRIPT}
+fi
+yes | source \${BUILD_SCRIPT}
+"
+
+if (( CONTAINER_SHELL_ONLY == 0 )); then
+    COMMAND="${CPP_BUILD_DIR_IN_CONTAINER}/build.sh || bash"
+else
+    COMMAND="bash"
+fi
+
+# Create the build dir for the container to mount, generate the build script inside of it
+mkdir -p "${BASE_CONTAINER_BUILD_DIR}"
+mkdir -p "${CPP_CONTAINER_BUILD_DIR}"
+# Create build directories. This is to ensure correct owner for directories. If
+# directories don't exist there is side effect from docker volume mounting creating build
+# directories owned by root(volume mount point(s))
+
+echo "${BUILD_SCRIPT}" > "${CPP_CONTAINER_BUILD_DIR}/build.sh"
+chmod ugo+x "${CPP_CONTAINER_BUILD_DIR}/build.sh"
+
+# Mount passwd and group files to docker. This allows docker to resolve username and group
+# avoiding these nags:
+#   * groups: cannot find name for group ID ID
+#   * I have no name!@id:/$
+# For ldap user user information is not present in system /etc/passwd and /etc/group files.
+# Hence we generate dummy files for ldap users which docker uses to resolve username and group
+
+PASSWD_FILE="/etc/passwd"
+GROUP_FILE="/etc/group"
+
+USER_FOUND=$(grep -wc "$(whoami)" < "$PASSWD_FILE")
+if [ "$USER_FOUND" == 0 ]; then
+  echo "Local User not found, LDAP WAR for docker mounts activated. Creating dummy passwd and group"
+  echo "files to allow docker resolve username and group"
+  cp "$PASSWD_FILE" /tmp/passwd
+  PASSWD_FILE="/tmp/passwd"
+  cp "$GROUP_FILE" /tmp/group
+  GROUP_FILE="/tmp/group"
+  echo "$(whoami):x:$(id -u):$(id -g):$(whoami),,,:$HOME:$SHELL" >> "$PASSWD_FILE"
+  echo "$(whoami):x:$(id -g):" >> "$GROUP_FILE"
+fi
+
+# Run the generated build script in a container
+sudo docker pull "${DOCKER_IMAGE}"
+sudo docker run --runtime=nvidia --rm -it -e NVIDIA_VISIBLE_DEVICES="${NVIDIA_VISIBLE_DEVICES}" \
+       -u "$(id -u)":"$(id -g)" \
+       -v "${REPO_PATH}":"${REPO_PATH_IN_CONTAINER}" \
+       -v "$PASSWD_FILE":/etc/passwd:ro \
+       -v "$GROUP_FILE":/etc/group:ro \
+       --cap-add=SYS_PTRACE \
+       "${DOCKER_IMAGE}" bash -c "${COMMAND}"
diff --git a/cmake/3rdparty.cmake b/cmake/3rdparty.cmake
@@ -33,4 +33,10 @@ endif()
 
 if (NOT TARGET spoa)
     add_subdirectory(3rdparty/spoa EXCLUDE_FROM_ALL)
+# Don't show warnings when compiling the 3rd party library
+    target_compile_options(spoa PRIVATE -w)
 endif()
+
+set(CUB_DIR ${PROJECT_SOURCE_DIR}/3rdparty/cub CACHE STRING
+	  "Path to cub repo")
+
diff --git a/cmake/Packaging.cmake b/cmake/Packaging.cmake
@@ -8,6 +8,8 @@
 # license agreement from NVIDIA CORPORATION is strictly prohibited.
 #
 
+set(CGA_ENABLE_PACKAGING TRUE)
+
 # Find Linux Distribution
 EXECUTE_PROCESS(
     COMMAND "awk" "-F=" "/^NAME/{print $2}" "/etc/os-release"
@@ -21,11 +23,14 @@ elseif(${LINUX_OS_NAME} MATCHES "CentOS")
     MESSAGE(STATUS "Package generator - RPM")
     SET(CPACK_GENERATOR "RPM")
 else()
-    MESSAGE(FATAL_ERROR "Unrecognized Linux distribution - ${LINUX_OS_NAME}")
+    MESSAGE(STATUS "Unrecognized Linux distribution - ${LINUX_OS_NAME}. Disabling packaging.")
+    set(CGA_ENABLE_PACKAGING FALSE)
 endif()
 
-SET(CPACK_DEBIAN_PACKAGE_MAINTAINER "NVIDIA Corporation")
-SET(CPACK_PACKAGE_VERSION "${CGA_VERSION}")
-SET(CPACK_PACKAGING_INSTALL_PREFIX "/usr/local/${CGA_PROJECT_NAME}-${CGA_VERSION}")
+if (CGA_ENABLE_PACKAGING)
+    SET(CPACK_DEBIAN_PACKAGE_MAINTAINER "NVIDIA Corporation")
+    SET(CPACK_PACKAGE_VERSION "${CGA_VERSION}")
+    SET(CPACK_PACKAGING_INSTALL_PREFIX "/usr/local/${CGA_PROJECT_NAME}-${CGA_VERSION}")
 
-include(CPack)
+    include(CPack)
+endif()
diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake
@@ -0,0 +1,15 @@
+#
+# Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+#
+
+function(validate_boolean CMAKE_OPTION)
+    if ((NOT ${CMAKE_OPTION} STREQUAL "ON") AND (NOT ${CMAKE_OPTION} STREQUAL "OFF"))
+        message(FATAL_ERROR "${CMAKE_OPTION}  can only be set to ON/OFF")
+    endif()
+endfunction(validate_boolean)
diff --git a/common/io/include/claragenomics/io/fasta_parser.hpp b/common/io/include/claragenomics/io/fasta_parser.hpp
@@ -62,4 +62,4 @@ class FastaParser
 std::unique_ptr<FastaParser> create_fasta_parser(const std::string& fasta_file);
 
 } // namespace io
-} // namespace claragenomicsi
+} // namespace claragenomics
-Original file line number
+Diff line change
@@ Expand Up / @@ -56,6 +56,6 @@ if [ "${CUDA:0:2}" == '10' ]; then @@
     fi
     # Cleanup local git
-    cd $1
+    cd "$1"
     git clean -xdf