diff --git a/.github/workflows/docker_build_test.yaml b/.github/workflows/docker_build_test.yaml index e8a964f..223d1aa 100644 --- a/.github/workflows/docker_build_test.yaml +++ b/.github/workflows/docker_build_test.yaml @@ -78,12 +78,12 @@ jobs: tar_gz_file: https://de.cyverse.org/dl/d/6EB55DDF-DC57-4BC9-BCF5-F668EF9D0B10/ci_greenness_indices_test_data.tar.gz parameter_json: .github/workflows/app_testing.json extra_docker_options: -v `pwd`/greenness_indices_files.json:/scif/apps/src/greenness-indices_files.json - test_data_tar_gz: https://de.cyverse.org/dl/d/3E5DAA36-3966-4511-A183-8813FAF76C88/ci_docker_test_data_3.tar.gz + test_data_tar_gz: https://data.cyverse.org/dav-anon/iplant/projects/aes/cct/diag/ci/ci_docker_test_data_5.tar.gz test_results: chmod +x .github/workflows/check_greenness_indices_app.sh && ./.github/workflows/check_greenness_indices_app.sh ${PWD} test_results_quality: | curl -sfL https://raw.githubusercontent.com/aswinkarthik/csvdiff/master/install.sh | sh -s -- -b ./csvdiff chmod +x './.github/workflows/check_csv_details.sh' - ./.github/workflows/check_csv_details.sh 'rgb_plot.csv' test_data '.' '1' '9,10,11,12,13,14,15,16,17,18' + ./.github/workflows/check_csv_details.sh 'rgb_plot.csv' test_data '.' '1' '7,8,9,10,11,12,13,14,15,16' - app: merge_csv tar_gz_file: https://de.cyverse.org/dl/d/7EB4E7EF-9635-483C-8C61-3CE9806B8906/ci_merge_csv_test_data_2.tar.gz parameter_json: .github/workflows/app_testing.json diff --git a/Dockerfile b/Dockerfile index 6c3aabc..7422ce0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,6 +6,7 @@ WORKDIR / # Install Python RUN apt-get update -y \ && apt-get install --no-install-recommends -y \ + git \ python3.8 \ python3-pip \ && ln -s /usr/bin/python3 /usr/bin/python \ @@ -139,5 +140,10 @@ COPY *.jx *.py *.sh jx-args.json /scif/apps/src/ RUN chmod a+x /scif/apps/src/*.sh RUN chmod a+x /scif/apps/src/*.py +COPY ./scif_app_recipes/git_v0.0.1_ubuntu20.04.scif /opt/ +RUN scif install /opt/git_v0.0.1_ubuntu20.04.scif +# Silence a git warning +RUN git config --global advice.detachedHead false + COPY . /home/extractor/drone-makeflow RUN chmod a+x /home/extractor/drone-makeflow \ No newline at end of file diff --git a/git_algo_rgb_plot.py b/git_algo_rgb_plot.py new file mode 100644 index 0000000..7ef8d3a --- /dev/null +++ b/git_algo_rgb_plot.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 +"""Fetches code from a git repository, performs a quality check, and executes it +""" + +import argparse +import logging +import os +import shutil +import subprocess +import tempfile + +REPO_DIR = os.environ.get('SCIF_APPDATA_git_plot_rgb', os.path.abspath(os.path.dirname(__file__))) +PLOT_BASE_BRANCH = os.environ.get('PLOT_BASE_BRANCH', 'v1.10') +PLOT_BASE_REPO = os.environ.get('PLOT_BASE_REPO', 'https://github.com/AgPipeline/plot-base-rgb.git') + + +def _check_install_requirements(requirements_file: str) -> None: + """Attempts to install requirements in the specified file + Arguments: + requirements_file: the file containing the requirements + """ + if os.path.exists(requirements_file): + cmd = ('python3', '-m', 'pip', 'install', '--upgrade', '--no-cache-dir', '-r', requirements_file) + # We don't want an exception thrown, so we silence pylint + # pylint: disable=subprocess-run-check + res = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + if res.returncode != 0: + logging.warning('Unable to pip install requirements file "%s"', os.path.basename(requirements_file)) + elif requirements_file is not None: + logging.warning('Specified requirements file was not found "%s"', requirements_file) + else: + logging.info('No requirements file specified for repository') + + +def _check_install_packages(source_file: str, working_dir: str, requirements_file: str = None) -> None: + """Checks for missing Python packages in the specified file + Arguments: + source_file: the source file to check + working_dir: folder where to place temporary files + requirements_file: optional file containing requires Python modules + """ + # Check for a requirements file and try to install those packages + _check_install_requirements(requirements_file) + + with open(source_file, 'r') as in_file: + all_lines = in_file.read() + + # Perform a simple check on imports + check_file = os.path.join(working_dir, '__check_import.py') + module_lines = [] + module_names = [] + with open(check_file, 'w') as out_file: + for one_line in all_lines: + if one_line.startswith('import ') or one_line.startswith('from '): + out_file.write(one_line + '\n') + line_chunks = one_line.split() + if len(line_chunks) >= 2: + module_names.append(line_chunks[1]) + module_lines.append(one_line) + + # If there's nothing to do, just return + if len(module_lines) <= 0: + return + + num_tries = 0 + while True: + # Try to run the file to catch missing includes + cmd = ('python3', check_file) + # We don't want an exception thrown, so we silence pylint + # pylint: disable=subprocess-run-check + res = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + if res.returncode == 0: + break + num_tries += 1 + if num_tries >= 2: + break + logging.info('Initial module check failed for "%s"', os.path.basename(check_file)) + logging.debug(res.stdout) + + # Try installing the modules + if module_names: + logging.debug('Trying to install modules %s', str(module_names)) + cmd = ('python3', '-m', 'pip', 'install', '--no-cache-dir', ' '.join(module_names)) + # We don't want an exception thrown, so we silence pylint + # pylint: disable=subprocess-run-check + res = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + if res.returncode != 0: + logging.warning('Unable to install all modules %s', ' '.join(module_names)) + logging.debug(res.stdout) + + if num_tries == 2: + logging.warning('Not all modules may be available for running script "%s"', os.path.basename(check_file)) + + +def get_args() -> tuple: + """Returns the command line arguments + Returns: + A tuple containing the git repo URI, the git branch or tag to use, and the command line + arguments for the code + """ + parser = argparse.ArgumentParser('Run Plot-level-RGB image code from a git repo') + + parser.add_argument('--requires', help='the file containing required Python packages') + parser.add_argument('git_repo', help='git repository containing the plot-level RGB algorithm') + parser.add_argument('git_branch', help='branch or tag of the git repository to use') + parser.add_argument('arguments', help='arguments to pass to the algorithm', nargs=argparse.REMAINDER) + + args = parser.parse_args() + + return args.git_repo, args.git_branch, args.requires, args.arguments + + +def run_git_code() -> None: + """Fetches, checks, and runs code from a git repository + """ + git_repo, git_branch, requirements_file, run_args = get_args() + + # Get our working path + working_dir = tempfile.mkdtemp(dir=REPO_DIR) + os.makedirs(working_dir, exist_ok=True) + + try: + base_dir = tempfile.mkdtemp(dir=REPO_DIR) + os.makedirs(base_dir, exist_ok=True) + # Get the base code from the repo + cmd = ('git', 'clone', '--depth', '1', '--quiet', '--branch', PLOT_BASE_BRANCH, PLOT_BASE_REPO, base_dir) + _ = subprocess.run(cmd, stdin=subprocess.PIPE, stderr=subprocess.STDOUT, check=True) + + # Install basic packages + req_file = os.path.join(base_dir, 'requirements.txt') + cmd = ('python3', '-m', 'pip', 'install', '--no-cache-dir', '-r', req_file) + _ = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True) + + # Get the code from the repo + cmd = ('git', 'clone', '--depth', '1', '--quiet', '--branch', git_branch, git_repo, working_dir) + _ = subprocess.run(cmd, stdin=subprocess.PIPE, stderr=subprocess.STDOUT, check=True) + + # Check the repo for validity + check_file = os.path.join(working_dir, 'algorithm_rgb.py') + if not os.path.exists(check_file): + msg = 'Missing required file: algorithm_rgb.py in repo %s branch %s' % (git_repo, git_branch) + logging.warning(msg) + raise RuntimeError(msg) + + # Install packages + req_file = requirements_file if requirements_file else os.path.join(working_dir, 'requirements.txt') + _check_install_packages(check_file, working_dir, req_file) + + # Copy the base python files over + for one_file in os.listdir(base_dir): + if one_file.endswith('.py'): + shutil.move(os.path.join(base_dir, one_file), os.path.join(working_dir, one_file)) + shutil.rmtree(base_dir) + + # Run the algorithm + run_file = os.path.join(working_dir, 'transformer.py') + cmd = ['python3', run_file] + cmd = cmd + run_args + _ = subprocess.run(cmd, stdin=subprocess.PIPE, stderr=subprocess.STDOUT, check=True) + + except Exception as ex: + if logging.getLogger().level == logging.DEBUG: + logging.exception('GIT repo %s branch/tag %s', git_repo, git_branch) + else: + logging.error('Exception caught for repo %s branch/tag %s', git_repo, git_branch) + logging.error(ex) + finally: + shutil.rmtree(working_dir) + + +if __name__ == '__main__': + run_git_code() diff --git a/git_rgb_plot_workflow.jx b/git_rgb_plot_workflow.jx new file mode 100644 index 0000000..7efa626 --- /dev/null +++ b/git_rgb_plot_workflow.jx @@ -0,0 +1,25 @@ +{ + "define": { + "GIT_SCRIPT": "/src/git_algo_rgb_plot.py", + }, + "rules": [ + { + "command": "${SCIF_APPROOT}/.venv/bin/python3 ${SCIF_APPS}/${SCRIPT_PATH} \"${GIT_REPO}\" \"${GIT_BRANCH}\" \"${INPUT_GEOTIFF}\" ${DOCKER_OPTIONS} --working_space \"${WORKING_FOLDER}\" ", + "environment": { + "SCRIPT_PATH": GIT_SCRIPT, + "GIT_REPO": GIT_RGB_PLOT_REPO, + "GIT_BRANCH": GIT_RGB_PLOT_BRANCH, + "INPUT_GEOTIFF": PLOT_INFO["FILE"], + "WORKING_FOLDER": PLOT_INFO["DIR"], + "DOCKER_OPTIONS": GIT_RGB_PLOT_OPTIONS, + }, + "inputs": [ + PLOT_INFO["FILE"] + ], + "outputs": [ + PLOT_INFO["DIR"] + "/rgb_plot.csv", + PLOT_INFO["DIR"] + "/result.json" + ] + } for PLOT_INFO in FILE_LIST + ] +} \ No newline at end of file diff --git a/merge_csv.py b/merge_csv.py index 9d4bcf2..0590b9c 100755 --- a/merge_csv.py +++ b/merge_csv.py @@ -102,12 +102,13 @@ def dir_type(dir_path: str) -> str: parser = argparse.ArgumentParser('CSV File discovery and merging') - parser.add_argument('--no_header', '-n', action='store_const', default=False, const=True, + parser.add_argument('--no-header', '-n', action='store_const', default=False, const=True, help='source CSV files do not have a header') - parser.add_argument('--header_count', '-c', type=int, default=1, help='number of header lines in files') + parser.add_argument('--header-count', '-c', type=int, default=1, help='number of header lines in files') parser.add_argument('--filter', '-f', help='comma separated list of files to filter in') parser.add_argument('--ignore', '-i', help='comma separated list of files to ignore') parser.add_argument('--ignore-dirs', help='comma separated list of directory names to ignore (eg: bad_folder, path/ignore)') + parser.add_argument('--output-file', help='merge all CSV files into this one file') parser.add_argument('source_folder', type=dir_type, help='the folder to search in') parser.add_argument('target_folder', type=dir_type, help='folder for combined CSV files') @@ -144,7 +145,8 @@ def merge(): continue # Get the target path and see if the source and destination are the same - dest_path = os.path.join(args.target_folder, os.path.basename(one_file)) + dest_file = os.path.basename(one_file) if not args.output_file else args.output_file + dest_path = os.path.join(args.target_folder, dest_file) if dest_path.lower() == source_path.lower(): continue diff --git a/scif_app_recipes/canopycover_v0.0.1_ubuntu20.04.scif b/scif_app_recipes/canopycover_v0.0.1_ubuntu20.04.scif index 57f0e20..0d48dbc 100644 --- a/scif_app_recipes/canopycover_v0.0.1_ubuntu20.04.scif +++ b/scif_app_recipes/canopycover_v0.0.1_ubuntu20.04.scif @@ -1,8 +1,8 @@ %appinstall canopycover # Download canopycover code. In the future use pip/conda install. - wget -O canopycover.tar.gz https://github.com/AgPipeline/transformer-canopycover/archive/v1.7.tar.gz + wget -O canopycover.tar.gz https://github.com/AgPipeline/transformer-canopycover/archive/v1.8.tar.gz tar xvf canopycover.tar.gz - mv transformer-canopycover-1.7 src + mv transformer-canopycover-1.8 src echo "Removing unneeded files" rm -rf --verbose src/test_data src/tests src/.github canopycover.tar.gz diff --git a/scif_app_recipes/git_v0.0.1_ubuntu20.04.scif b/scif_app_recipes/git_v0.0.1_ubuntu20.04.scif new file mode 100644 index 0000000..b826326 --- /dev/null +++ b/scif_app_recipes/git_v0.0.1_ubuntu20.04.scif @@ -0,0 +1,29 @@ +%appinstall git_rgb_plot + # Install Makeflow and other dependencies + python3.8 -m venv --system-site-packages .venv + .venv/bin/python3 -m pip install --upgrade --no-cache-dir pip + .venv/bin/python3 -m pip install --upgrade --no-cache-dir opencv-contrib-python-headless agpypeline + + # Add example Docker command to SCIF app help section + if [ -n $DOCKER_IMAGE ]; then + echo "\n\nExample Docker command: docker run $DOCKER_IMAGE run git\n" >> "${PWD}/scif/runscript.help" + fi + + # Generate remainder of SCIF app help section by running main script + .venv/bin/python3 /scif/apps/src/git_algo_rgb_plot.py --help >> "${PWD}/scif/runscript.help" + +%apprun git_rgb_plot + /cctools/bin/makeflow \ + --jx \ + --jx-args="/scif/apps/src/jx-args.json" \ + --jx-args="/scif/apps/src/git_rgb_plot_files.json" \ + --log-verbose \ + --retry-count=1 \ + --change-directory="${SCIF_APPDATA}" \ + --makeflow-log="${SCIF_APPDATA}/workflow.jx.makeflowlog" \ + --batch-log="${SCIF_APPDATA}/workflow.jx.batchlog" \ + ${1} \ + "/scif/apps/src/git_rgb_plot_workflow.jx" + +%apphelp git_rgb_plot + This app provides an entrypoint to the git tool diff --git a/scif_app_recipes/greenness_v0.0.1_ubuntu20.04.scif b/scif_app_recipes/greenness_v0.0.1_ubuntu20.04.scif index 2af1bf9..d4b5b05 100644 --- a/scif_app_recipes/greenness_v0.0.1_ubuntu20.04.scif +++ b/scif_app_recipes/greenness_v0.0.1_ubuntu20.04.scif @@ -4,16 +4,16 @@ .venv/bin/python3 -m pip install --upgrade --no-cache-dir pip # Download plot-base-rgb code - wget -O plot_base_rgb.tar.gz https://github.com/AgPipeline/plot-base-rgb/archive/v1.8.tar.gz + wget -O plot_base_rgb.tar.gz https://github.com/AgPipeline/plot-base-rgb/archive/v1.10.tar.gz tar xvf plot_base_rgb.tar.gz - mv plot-base-rgb-1.8 src + mv plot-base-rgb-1.10 src .venv/bin/python3 -m pip install --upgrade --no-cache-dir -r "${PWD}/src/requirements.txt" # Download greenness-indices code. In the future use pip/conda install. - wget -O greenness_indices.tar.gz https://github.com/AgPipeline/transformer-rgb-indices/archive/v1.3.tar.gz + wget -O greenness_indices.tar.gz https://github.com/AgPipeline/transformer-rgb-indices/archive/v1.4.tar.gz tar xvf greenness_indices.tar.gz - cp -r transformer-rgb-indices-1.3/* src/ - rm -r transformer-rgb-indices-1.3 + cp -r transformer-rgb-indices-1.4/* src/ + rm -r transformer-rgb-indices-1.4 .venv/bin/python3 -m pip install --upgrade --no-cache-dir -r "${PWD}/src/requirements.txt" echo "Removing unneeded files" rm -rf --verbose src/test_data src/tests src/.github plot_base_rgb.tar.gz greenness_indices.tar.gz diff --git a/scif_app_recipes/soilmask_ratio_v0.0.1_ubuntu20.04.scif b/scif_app_recipes/soilmask_ratio_v0.0.1_ubuntu20.04.scif index 22a04dd..1563703 100644 --- a/scif_app_recipes/soilmask_ratio_v0.0.1_ubuntu20.04.scif +++ b/scif_app_recipes/soilmask_ratio_v0.0.1_ubuntu20.04.scif @@ -1,8 +1,8 @@ %appinstall soilmask_ratio # Download soilmask ratio code. In the future use pip/conda install. - wget -O soilmask_ratio.tar.gz https://github.com/AgPipeline/transformer-soilmask-by-ratio/archive/v1.1.tar.gz + wget -O soilmask_ratio.tar.gz https://github.com/AgPipeline/transformer-soilmask-by-ratio/archive/v1.2.tar.gz tar xvzf soilmask_ratio.tar.gz - mv transformer-soilmask-by-ratio-1.1 src + mv transformer-soilmask-by-ratio-1.2 src echo "Removing unneeded files" rm -rf --verbose src/test_data src/tests src/.github src/figures soilmask_ratio.tar.gz diff --git a/scif_app_recipes/soilmask_v0.0.1_ubuntu20.04.scif b/scif_app_recipes/soilmask_v0.0.1_ubuntu20.04.scif index 1f31c39..21af76a 100644 --- a/scif_app_recipes/soilmask_v0.0.1_ubuntu20.04.scif +++ b/scif_app_recipes/soilmask_v0.0.1_ubuntu20.04.scif @@ -1,8 +1,8 @@ %appinstall soilmask # Download soilmask code. In the future use pip/conda install. - wget -O soilmask.tar.gz https://github.com/AgPipeline/transformer-soilmask/archive/v2.3.tar.gz + wget -O soilmask.tar.gz https://github.com/AgPipeline/transformer-soilmask/archive/v2.4.tar.gz tar xvzf soilmask.tar.gz - mv transformer-soilmask-2.3 src + mv transformer-soilmask-2.4 src echo "Removing unneeded files" rm -rf --verbose src/test_data src/tests src/.github src/figures soilmask.tar.gz