Skip to content

Commit

Permalink
Merge pull request #50 from AgPipeline/add_git_rgb_algo
Browse files Browse the repository at this point in the history
Add git rgb algo - no review
  • Loading branch information
Chris-Schnaufer authored Aug 4, 2021
2 parents 24c0030 + 691336f commit 18ee269
Show file tree
Hide file tree
Showing 10 changed files with 250 additions and 16 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/docker_build_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,12 @@ jobs:
tar_gz_file: https://de.cyverse.org/dl/d/6EB55DDF-DC57-4BC9-BCF5-F668EF9D0B10/ci_greenness_indices_test_data.tar.gz
parameter_json: .github/workflows/app_testing.json
extra_docker_options: -v `pwd`/greenness_indices_files.json:/scif/apps/src/greenness-indices_files.json
test_data_tar_gz: https://de.cyverse.org/dl/d/3E5DAA36-3966-4511-A183-8813FAF76C88/ci_docker_test_data_3.tar.gz
test_data_tar_gz: https://data.cyverse.org/dav-anon/iplant/projects/aes/cct/diag/ci/ci_docker_test_data_5.tar.gz
test_results: chmod +x .github/workflows/check_greenness_indices_app.sh && ./.github/workflows/check_greenness_indices_app.sh ${PWD}
test_results_quality: |
curl -sfL https://raw.githubusercontent.com/aswinkarthik/csvdiff/master/install.sh | sh -s -- -b ./csvdiff
chmod +x './.github/workflows/check_csv_details.sh'
./.github/workflows/check_csv_details.sh 'rgb_plot.csv' test_data '.' '1' '9,10,11,12,13,14,15,16,17,18'
./.github/workflows/check_csv_details.sh 'rgb_plot.csv' test_data '.' '1' '7,8,9,10,11,12,13,14,15,16'
- app: merge_csv
tar_gz_file: https://de.cyverse.org/dl/d/7EB4E7EF-9635-483C-8C61-3CE9806B8906/ci_merge_csv_test_data_2.tar.gz
parameter_json: .github/workflows/app_testing.json
Expand Down
6 changes: 6 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ WORKDIR /
# Install Python
RUN apt-get update -y \
&& apt-get install --no-install-recommends -y \
git \
python3.8 \
python3-pip \
&& ln -s /usr/bin/python3 /usr/bin/python \
Expand Down Expand Up @@ -139,5 +140,10 @@ COPY *.jx *.py *.sh jx-args.json /scif/apps/src/
RUN chmod a+x /scif/apps/src/*.sh
RUN chmod a+x /scif/apps/src/*.py

COPY ./scif_app_recipes/git_v0.0.1_ubuntu20.04.scif /opt/
RUN scif install /opt/git_v0.0.1_ubuntu20.04.scif
# Silence a git warning
RUN git config --global advice.detachedHead false

COPY . /home/extractor/drone-makeflow
RUN chmod a+x /home/extractor/drone-makeflow
172 changes: 172 additions & 0 deletions git_algo_rgb_plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
#!/usr/bin/env python3
"""Fetches code from a git repository, performs a quality check, and executes it
"""

import argparse
import logging
import os
import shutil
import subprocess
import tempfile

REPO_DIR = os.environ.get('SCIF_APPDATA_git_plot_rgb', os.path.abspath(os.path.dirname(__file__)))
PLOT_BASE_BRANCH = os.environ.get('PLOT_BASE_BRANCH', 'v1.10')
PLOT_BASE_REPO = os.environ.get('PLOT_BASE_REPO', 'https://github.com/AgPipeline/plot-base-rgb.git')


def _check_install_requirements(requirements_file: str) -> None:
"""Attempts to install requirements in the specified file
Arguments:
requirements_file: the file containing the requirements
"""
if os.path.exists(requirements_file):
cmd = ('python3', '-m', 'pip', 'install', '--upgrade', '--no-cache-dir', '-r', requirements_file)
# We don't want an exception thrown, so we silence pylint
# pylint: disable=subprocess-run-check
res = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
if res.returncode != 0:
logging.warning('Unable to pip install requirements file "%s"', os.path.basename(requirements_file))
elif requirements_file is not None:
logging.warning('Specified requirements file was not found "%s"', requirements_file)
else:
logging.info('No requirements file specified for repository')


def _check_install_packages(source_file: str, working_dir: str, requirements_file: str = None) -> None:
"""Checks for missing Python packages in the specified file
Arguments:
source_file: the source file to check
working_dir: folder where to place temporary files
requirements_file: optional file containing requires Python modules
"""
# Check for a requirements file and try to install those packages
_check_install_requirements(requirements_file)

with open(source_file, 'r') as in_file:
all_lines = in_file.read()

# Perform a simple check on imports
check_file = os.path.join(working_dir, '__check_import.py')
module_lines = []
module_names = []
with open(check_file, 'w') as out_file:
for one_line in all_lines:
if one_line.startswith('import ') or one_line.startswith('from '):
out_file.write(one_line + '\n')
line_chunks = one_line.split()
if len(line_chunks) >= 2:
module_names.append(line_chunks[1])
module_lines.append(one_line)

# If there's nothing to do, just return
if len(module_lines) <= 0:
return

num_tries = 0
while True:
# Try to run the file to catch missing includes
cmd = ('python3', check_file)
# We don't want an exception thrown, so we silence pylint
# pylint: disable=subprocess-run-check
res = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
if res.returncode == 0:
break
num_tries += 1
if num_tries >= 2:
break
logging.info('Initial module check failed for "%s"', os.path.basename(check_file))
logging.debug(res.stdout)

# Try installing the modules
if module_names:
logging.debug('Trying to install modules %s', str(module_names))
cmd = ('python3', '-m', 'pip', 'install', '--no-cache-dir', ' '.join(module_names))
# We don't want an exception thrown, so we silence pylint
# pylint: disable=subprocess-run-check
res = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
if res.returncode != 0:
logging.warning('Unable to install all modules %s', ' '.join(module_names))
logging.debug(res.stdout)

if num_tries == 2:
logging.warning('Not all modules may be available for running script "%s"', os.path.basename(check_file))


def get_args() -> tuple:
"""Returns the command line arguments
Returns:
A tuple containing the git repo URI, the git branch or tag to use, and the command line
arguments for the code
"""
parser = argparse.ArgumentParser('Run Plot-level-RGB image code from a git repo')

parser.add_argument('--requires', help='the file containing required Python packages')
parser.add_argument('git_repo', help='git repository containing the plot-level RGB algorithm')
parser.add_argument('git_branch', help='branch or tag of the git repository to use')
parser.add_argument('arguments', help='arguments to pass to the algorithm', nargs=argparse.REMAINDER)

args = parser.parse_args()

return args.git_repo, args.git_branch, args.requires, args.arguments


def run_git_code() -> None:
"""Fetches, checks, and runs code from a git repository
"""
git_repo, git_branch, requirements_file, run_args = get_args()

# Get our working path
working_dir = tempfile.mkdtemp(dir=REPO_DIR)
os.makedirs(working_dir, exist_ok=True)

try:
base_dir = tempfile.mkdtemp(dir=REPO_DIR)
os.makedirs(base_dir, exist_ok=True)
# Get the base code from the repo
cmd = ('git', 'clone', '--depth', '1', '--quiet', '--branch', PLOT_BASE_BRANCH, PLOT_BASE_REPO, base_dir)
_ = subprocess.run(cmd, stdin=subprocess.PIPE, stderr=subprocess.STDOUT, check=True)

# Install basic packages
req_file = os.path.join(base_dir, 'requirements.txt')
cmd = ('python3', '-m', 'pip', 'install', '--no-cache-dir', '-r', req_file)
_ = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True)

# Get the code from the repo
cmd = ('git', 'clone', '--depth', '1', '--quiet', '--branch', git_branch, git_repo, working_dir)
_ = subprocess.run(cmd, stdin=subprocess.PIPE, stderr=subprocess.STDOUT, check=True)

# Check the repo for validity
check_file = os.path.join(working_dir, 'algorithm_rgb.py')
if not os.path.exists(check_file):
msg = 'Missing required file: algorithm_rgb.py in repo %s branch %s' % (git_repo, git_branch)
logging.warning(msg)
raise RuntimeError(msg)

# Install packages
req_file = requirements_file if requirements_file else os.path.join(working_dir, 'requirements.txt')
_check_install_packages(check_file, working_dir, req_file)

# Copy the base python files over
for one_file in os.listdir(base_dir):
if one_file.endswith('.py'):
shutil.move(os.path.join(base_dir, one_file), os.path.join(working_dir, one_file))
shutil.rmtree(base_dir)

# Run the algorithm
run_file = os.path.join(working_dir, 'transformer.py')
cmd = ['python3', run_file]
cmd = cmd + run_args
_ = subprocess.run(cmd, stdin=subprocess.PIPE, stderr=subprocess.STDOUT, check=True)

except Exception as ex:
if logging.getLogger().level == logging.DEBUG:
logging.exception('GIT repo %s branch/tag %s', git_repo, git_branch)
else:
logging.error('Exception caught for repo %s branch/tag %s', git_repo, git_branch)
logging.error(ex)
finally:
shutil.rmtree(working_dir)


if __name__ == '__main__':
run_git_code()
25 changes: 25 additions & 0 deletions git_rgb_plot_workflow.jx
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"define": {
"GIT_SCRIPT": "/src/git_algo_rgb_plot.py",
},
"rules": [
{
"command": "${SCIF_APPROOT}/.venv/bin/python3 ${SCIF_APPS}/${SCRIPT_PATH} \"${GIT_REPO}\" \"${GIT_BRANCH}\" \"${INPUT_GEOTIFF}\" ${DOCKER_OPTIONS} --working_space \"${WORKING_FOLDER}\" ",
"environment": {
"SCRIPT_PATH": GIT_SCRIPT,
"GIT_REPO": GIT_RGB_PLOT_REPO,
"GIT_BRANCH": GIT_RGB_PLOT_BRANCH,
"INPUT_GEOTIFF": PLOT_INFO["FILE"],
"WORKING_FOLDER": PLOT_INFO["DIR"],
"DOCKER_OPTIONS": GIT_RGB_PLOT_OPTIONS,
},
"inputs": [
PLOT_INFO["FILE"]
],
"outputs": [
PLOT_INFO["DIR"] + "/rgb_plot.csv",
PLOT_INFO["DIR"] + "/result.json"
]
} for PLOT_INFO in FILE_LIST
]
}
8 changes: 5 additions & 3 deletions merge_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,13 @@ def dir_type(dir_path: str) -> str:

parser = argparse.ArgumentParser('CSV File discovery and merging')

parser.add_argument('--no_header', '-n', action='store_const', default=False, const=True,
parser.add_argument('--no-header', '-n', action='store_const', default=False, const=True,
help='source CSV files do not have a header')
parser.add_argument('--header_count', '-c', type=int, default=1, help='number of header lines in files')
parser.add_argument('--header-count', '-c', type=int, default=1, help='number of header lines in files')
parser.add_argument('--filter', '-f', help='comma separated list of files to filter in')
parser.add_argument('--ignore', '-i', help='comma separated list of files to ignore')
parser.add_argument('--ignore-dirs', help='comma separated list of directory names to ignore (eg: bad_folder, path/ignore)')
parser.add_argument('--output-file', help='merge all CSV files into this one file')
parser.add_argument('source_folder', type=dir_type, help='the folder to search in')
parser.add_argument('target_folder', type=dir_type, help='folder for combined CSV files')

Expand Down Expand Up @@ -144,7 +145,8 @@ def merge():
continue

# Get the target path and see if the source and destination are the same
dest_path = os.path.join(args.target_folder, os.path.basename(one_file))
dest_file = os.path.basename(one_file) if not args.output_file else args.output_file
dest_path = os.path.join(args.target_folder, dest_file)
if dest_path.lower() == source_path.lower():
continue

Expand Down
4 changes: 2 additions & 2 deletions scif_app_recipes/canopycover_v0.0.1_ubuntu20.04.scif
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
%appinstall canopycover
# Download canopycover code. In the future use pip/conda install.
wget -O canopycover.tar.gz https://github.com/AgPipeline/transformer-canopycover/archive/v1.7.tar.gz
wget -O canopycover.tar.gz https://github.com/AgPipeline/transformer-canopycover/archive/v1.8.tar.gz
tar xvf canopycover.tar.gz
mv transformer-canopycover-1.7 src
mv transformer-canopycover-1.8 src
echo "Removing unneeded files"
rm -rf --verbose src/test_data src/tests src/.github canopycover.tar.gz

Expand Down
29 changes: 29 additions & 0 deletions scif_app_recipes/git_v0.0.1_ubuntu20.04.scif
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
%appinstall git_rgb_plot
# Install Makeflow and other dependencies
python3.8 -m venv --system-site-packages .venv
.venv/bin/python3 -m pip install --upgrade --no-cache-dir pip
.venv/bin/python3 -m pip install --upgrade --no-cache-dir opencv-contrib-python-headless agpypeline

# Add example Docker command to SCIF app help section
if [ -n $DOCKER_IMAGE ]; then
echo "\n\nExample Docker command: docker run $DOCKER_IMAGE run git\n" >> "${PWD}/scif/runscript.help"
fi

# Generate remainder of SCIF app help section by running main script
.venv/bin/python3 /scif/apps/src/git_algo_rgb_plot.py --help >> "${PWD}/scif/runscript.help"

%apprun git_rgb_plot
/cctools/bin/makeflow \
--jx \
--jx-args="/scif/apps/src/jx-args.json" \
--jx-args="/scif/apps/src/git_rgb_plot_files.json" \
--log-verbose \
--retry-count=1 \
--change-directory="${SCIF_APPDATA}" \
--makeflow-log="${SCIF_APPDATA}/workflow.jx.makeflowlog" \
--batch-log="${SCIF_APPDATA}/workflow.jx.batchlog" \
${1} \
"/scif/apps/src/git_rgb_plot_workflow.jx"

%apphelp git_rgb_plot
This app provides an entrypoint to the git tool
10 changes: 5 additions & 5 deletions scif_app_recipes/greenness_v0.0.1_ubuntu20.04.scif
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@
.venv/bin/python3 -m pip install --upgrade --no-cache-dir pip

# Download plot-base-rgb code
wget -O plot_base_rgb.tar.gz https://github.com/AgPipeline/plot-base-rgb/archive/v1.8.tar.gz
wget -O plot_base_rgb.tar.gz https://github.com/AgPipeline/plot-base-rgb/archive/v1.10.tar.gz
tar xvf plot_base_rgb.tar.gz
mv plot-base-rgb-1.8 src
mv plot-base-rgb-1.10 src
.venv/bin/python3 -m pip install --upgrade --no-cache-dir -r "${PWD}/src/requirements.txt"

# Download greenness-indices code. In the future use pip/conda install.
wget -O greenness_indices.tar.gz https://github.com/AgPipeline/transformer-rgb-indices/archive/v1.3.tar.gz
wget -O greenness_indices.tar.gz https://github.com/AgPipeline/transformer-rgb-indices/archive/v1.4.tar.gz
tar xvf greenness_indices.tar.gz
cp -r transformer-rgb-indices-1.3/* src/
rm -r transformer-rgb-indices-1.3
cp -r transformer-rgb-indices-1.4/* src/
rm -r transformer-rgb-indices-1.4
.venv/bin/python3 -m pip install --upgrade --no-cache-dir -r "${PWD}/src/requirements.txt"
echo "Removing unneeded files"
rm -rf --verbose src/test_data src/tests src/.github plot_base_rgb.tar.gz greenness_indices.tar.gz
Expand Down
4 changes: 2 additions & 2 deletions scif_app_recipes/soilmask_ratio_v0.0.1_ubuntu20.04.scif
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
%appinstall soilmask_ratio
# Download soilmask ratio code. In the future use pip/conda install.
wget -O soilmask_ratio.tar.gz https://github.com/AgPipeline/transformer-soilmask-by-ratio/archive/v1.1.tar.gz
wget -O soilmask_ratio.tar.gz https://github.com/AgPipeline/transformer-soilmask-by-ratio/archive/v1.2.tar.gz
tar xvzf soilmask_ratio.tar.gz
mv transformer-soilmask-by-ratio-1.1 src
mv transformer-soilmask-by-ratio-1.2 src
echo "Removing unneeded files"
rm -rf --verbose src/test_data src/tests src/.github src/figures soilmask_ratio.tar.gz

Expand Down
4 changes: 2 additions & 2 deletions scif_app_recipes/soilmask_v0.0.1_ubuntu20.04.scif
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
%appinstall soilmask
# Download soilmask code. In the future use pip/conda install.
wget -O soilmask.tar.gz https://github.com/AgPipeline/transformer-soilmask/archive/v2.3.tar.gz
wget -O soilmask.tar.gz https://github.com/AgPipeline/transformer-soilmask/archive/v2.4.tar.gz
tar xvzf soilmask.tar.gz
mv transformer-soilmask-2.3 src
mv transformer-soilmask-2.4 src
echo "Removing unneeded files"
rm -rf --verbose src/test_data src/tests src/.github src/figures soilmask.tar.gz

Expand Down

0 comments on commit 18ee269

Please sign in to comment.