diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..c588b7b --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,5 @@ +{ + "build": { + "dockerfile": "../Dockerfile" + } +} diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml new file mode 100644 index 0000000..7f7d009 --- /dev/null +++ b/.github/workflows/cmake-multi-platform.yml @@ -0,0 +1,64 @@ +# This starter workflow is for a CMake project running on multiple platforms. There is a different starter workflow if you just want a single platform. +# See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-single-platform.yml +name: CMake on multiple platforms + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + build: + runs-on: ${{ matrix.os }} + + strategy: + # Set fail-fast to false to ensure that feedback is delivered for all matrix combinations. Consider changing this to true when your workflow is stable. + fail-fast: false + + # Set up a matrix to run the following 3 configurations: + # 1. + # 2. + # + # To add more build types (Release, Debug, RelWithDebInfo, etc.) customize the build_type list. + matrix: + os: [ubuntu-latest] + build_type: [Release] + c_compiler: [gcc, clang] + include: + - os: ubuntu-latest + c_compiler: gcc + cpp_compiler: g++ + - os: ubuntu-latest + c_compiler: clang + cpp_compiler: clang++ + + steps: + - uses: actions/checkout@v3 + + - name: Set reusable strings + # Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file. + id: strings + shell: bash + run: | + echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT" + + - name: Configure CMake + # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. + # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type + run: > + cmake -B ${{ steps.strings.outputs.build-output-dir }} + -DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }} + -DCMAKE_C_COMPILER=${{ matrix.c_compiler }} + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + -S ${{ github.workspace }} + + - name: Build + # Build your program with the given configuration. Note that --config is needed because the default Windows generator is a multi-config generator (Visual Studio generator). + run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }} + + - name: Test + working-directory: ${{ steps.strings.outputs.build-output-dir }} + # Execute tests defined by the CMake configuration. Note that --build-config is needed because the default Windows generator is a multi-config generator (Visual Studio generator). + # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail + run: ctest --build-config ${{ matrix.build_type }} diff --git a/.gitignore b/.gitignore index 27f16ed..bda7b3d 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ tn93 validate_fasta tn93-cluster fasta_diff +seqdiff diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..4510bcb --- /dev/null +++ b/Dockerfile @@ -0,0 +1,12 @@ +# Docker image for a tn93 development environment +FROM oraclelinux:8 + +# Set up environment and install dependencies +RUN yum -y update && \ + yum install -y cmake gcc-c++ gcc-toolset-10 git make oracle-epel-release-el8 && \ + echo 'source /opt/rh/gcc-toolset-10/enable' > ~/.bashrc && \ + source ~/.bashrc + +# To compile tn93 within the development environment: +# cmake . +# make diff --git a/VERSION.txt b/VERSION.txt index 0a50760..eca15ab 100644 --- a/VERSION.txt +++ b/VERSION.txt @@ -1 +1 @@ -v1.0.14 +v1.0.14 \ No newline at end of file diff --git a/src/TN93.cpp b/src/TN93.cpp index 677c7c4..f2a6144 100644 --- a/src/TN93.cpp +++ b/src/TN93.cpp @@ -421,7 +421,7 @@ int main(int argc, const char *argv[]) { HISTOGRAM_SLICE, HISTOGRAM_BINS, weighted_count); - if (thisD >= -1.e-10 && thisD <= args.distance) { + if (thisD >= args.min_distance && thisD <= args.distance) { local_links_found += weighted_count; // char *s2 = stringText(sequences, seqLengths, seq1); if (!args.do_count) { diff --git a/src/argparse.cpp b/src/argparse.cpp index 154a16c..36df12c 100644 --- a/src/argparse.cpp +++ b/src/argparse.cpp @@ -20,6 +20,7 @@ namespace argparse "[-v] " "[-o OUTPUT] " "[-t THRESHOLD] " + "[-w MIN_THRESHOLD] " "[-a AMBIGS] " "[-g FRACTION] " "[-l OVERLAP] " @@ -43,6 +44,7 @@ namespace argparse " -v, --version show tn93 version \n" " -o OUTPUT direct the output to a file named OUTPUT (default=stdout)\n" " -t THRESHOLD only report (count) distances below this threshold (>=0, default=" TO_STR (DEFAULT_DISTANCE)")\n" + " -w MINIMUM THRESHOLD report distances above minimum threshold \n" " -a AMBIGS handle ambigous nucleotides using one of the following strategies (default=" TO_STR( DEFAULT_AMBIG ) ")\n" " resolve: resolve ambiguities to minimize distance (e.g.R matches A);\n" " average: average ambiguities (e.g.R-A is 0.5 A-A and 0.5 G-A);\n" @@ -118,6 +120,7 @@ namespace argparse input1( stdin ), input2( NULL ), distance( DEFAULT_DISTANCE ), + min_distance( DEFAULT_MIN_DISTANCE ), ambig( DEFAULT_AMBIG ), format ( DEFAULT_FORMAT ), overlap ( DEFAULT_OVERLAP ), @@ -149,6 +152,7 @@ namespace argparse else if ( arg[1] == 'v' ) version(); else if ( arg[1] == 'o' ) parse_output( next_arg (i, argc, argv) ); else if ( arg[1] == 't' ) parse_distance ( next_arg (i, argc, argv) ); + else if ( arg[1] == 'w' ) parse_min_distance ( next_arg (i, argc, argv) ); else if ( arg[1] == 'l') parse_overlap( next_arg (i, argc, argv) ); else if ( arg[1] == 'f') parse_format( next_arg (i, argc, argv) ); else if ( arg[1] == 'a') parse_ambig( next_arg (i, argc, argv) ); @@ -234,6 +238,15 @@ namespace argparse ERROR( "genetic distance threshold must be in [0,1], had: %s", str ); } + void args_t::parse_min_distance ( const char * str ) + { + min_distance = atof( str ); + + if ( min_distance < 0.0 || min_distance > 1.0) + ERROR( "genetic minimum distance threshold must be in [0,1], had: %s", str ); + } + + void args_t::parse_fraction ( const char * str ) { resolve_fraction = atof( str ); diff --git a/src/argparse.hpp b/src/argparse.hpp index a649ac9..f1df0d8 100644 --- a/src/argparse.hpp +++ b/src/argparse.hpp @@ -9,6 +9,7 @@ #define DEFAULT_FORMAT csv #define DEFAULT_FRACTION 1.0 #define DEFAULT_DISTANCE 0.015 +#define DEFAULT_MIN_DISTANCE -1.e-10 #define DEFAULT_COUNTS_IN_NAME ':' #define DEFAULT_OVERLAP 100 #define DEFAULT_INCLUDE_PROB 1.0 @@ -44,6 +45,7 @@ namespace argparse * input2; double distance; + double min_distance; ambig_t ambig; format_t format; unsigned long overlap; @@ -69,6 +71,7 @@ namespace argparse void parse_second_in( const char * ); void parse_output ( const char * ); void parse_distance ( const char * ); + void parse_min_distance ( const char * ); void parse_overlap ( const char * ); void parse_format ( const char * ); void parse_ambig ( const char * ); diff --git a/src/tn93_shared.cc b/src/tn93_shared.cc index 6201af0..8e665cf 100644 --- a/src/tn93_shared.cc +++ b/src/tn93_shared.cc @@ -129,7 +129,6 @@ const double resolutionsCount[] = { 1.f, 1./2.f, // R 1./2.f, // Y 1./2.f, // S - 1./2.f, // S 1./2.f, // W 1./2.f, // K 1./2.f, // M