From ff8ff034e8d3524ec0daeb224ccb46c49a293cd2 Mon Sep 17 00:00:00 2001
From: Valentin Volkl <valentin.volkl@cern.ch>
Date: Thu, 26 May 2022 12:48:35 +0200
Subject: [PATCH] add 'fccanalysis run' command using subparsers (#180)

* add 'fccanalysis run' command using subparsers

* make sure tests find fccanalysis

* Update bin/fccanalysis

Co-authored-by: Clement Helsens <clement.helsens@cern.ch>

* update runLocal

* try to make ctest find fccanalysis

* update readme

* try to make ctest find fccanalysis

Co-authored-by: Clement Helsens <clement.helsens@cern.ch>
---
 CMakeLists.txt           |  7 +++++
 README.md                | 10 +++----
 bin/fccanalysis          | 14 +++++++++
 config/FCCAnalysisRun.py | 64 ++++++++++++++++++++++++----------------
 setup.sh                 |  2 ++
 tests/CMakeLists.txt     | 17 +++++++++++
 6 files changed, 84 insertions(+), 30 deletions(-)
 create mode 100755 bin/fccanalysis

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 78c2cc1593..90e095ce76 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -64,6 +64,13 @@ set(INSTALL_INCLUDE_DIR include CACHE PATH
 include(cmake/FCCAnalysesCreateConfig.cmake)
 
 
+file(COPY bin/fccanalysis
+  DESTINATION ${CMAKE_BINARY_DIR}
+  FILE_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ
+  GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
+install(PROGRAMS bin/fccanalysis DESTINATION bin)
+
+
 file(GLOB _run_python_files config/*.py)
 install(FILES ${_run_python_files} DESTINATION ${CMAKE_INSTALL_PREFIX}/python/config)
 install(FILES config/doPlots.py PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ DESTINATION ${CMAKE_INSTALL_PREFIX}/python/config)
diff --git a/README.md b/README.md
index 6594705c1f..9b11007a66 100644
--- a/README.md
+++ b/README.md
@@ -125,7 +125,7 @@ file (please note that then the sample will not be matched in the database for
 To run the pre-selection stage of the example analysis run:
 
 ```shell
-python config/FCCAnalysisRun.py examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py
+fccanalysis run examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py
 ```
 
 This will create the output files in the `ZH_mumu_recoil/stage1` subdirectory
@@ -135,7 +135,7 @@ You also have the possibility to bypass the samples specified in the
 `processList` variable by using command line parameter `--output`, like so:
 
 ```shell
-python config/FCCAnalysisRun.py examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py \
+fccanalysis run examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py \
        --output <myoutput.root> \
        --files-list <file.root or file1.root file2.root or file*.root>
 ```
@@ -144,7 +144,7 @@ The example analysis consists of two pre-selection stages, to run the second one
 slightly alter the previous command:
 
 ```shell
-python config/FCCAnalysisRun.py examples/FCCee/higgs/mH-recoil/mumu/analysis_stage2.py
+fccanalysis run examples/FCCee/higgs/mH-recoil/mumu/analysis_stage2.py
 ```
 
 
@@ -170,7 +170,7 @@ variables needs extra fields like `title`, number of bins and range for the
 histogram creation. In the example analysis it can be run like this:
 
 ```shell
-python config/FCCAnalysisRun.py examples/FCCee/higgs/mH-recoil/mumu/analysis_final.py \
+fccanalysis run examples/FCCee/higgs/mH-recoil/mumu/analysis_final.py \
        --final
 ```
 
@@ -187,7 +187,7 @@ the rendering of the plots but also ways of combining samples for plotting.
 In the example analysis it can be run in the following manner:
 
 ```shell
-python config/FCCAnalysisRun.py examples/FCCee/higgs/mH-recoil/mumu/analysis_plots.py \
+fccanalysis run examples/FCCee/higgs/mH-recoil/mumu/analysis_plots.py \
        --plots
 ```
 
diff --git a/bin/fccanalysis b/bin/fccanalysis
new file mode 100755
index 0000000000..2759dfac74
--- /dev/null
+++ b/bin/fccanalysis
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+
+
+if __name__ == "__main__":
+  import argparse
+  import sys
+  parser = argparse.ArgumentParser()
+  subparsers = parser.add_subparsers() 
+  parser_run = subparsers.add_parser('run', help="run a RDataFrame based FCC analysis")
+  if len(sys.argv)<2:
+     print("for usage run fccanalyses --help")
+     sys.exit(3)
+  from config.FCCAnalysisRun import * #such that the dictionary is loaded only if the configuration is ok
+  run(parser, parser_run)
diff --git a/config/FCCAnalysisRun.py b/config/FCCAnalysisRun.py
index 7b1de23d2e..b27a9ffdf4 100644
--- a/config/FCCAnalysisRun.py
+++ b/config/FCCAnalysisRun.py
@@ -311,7 +311,7 @@ def runPreprocess(df):
     sys.exit(3)
     return df
 #__________________________________________________________
-def runRDF(rdfModule, inputlist, outFile, nevt):
+def runRDF(rdfModule, inputlist, outFile, nevt, args):
     # for convenience and compatibility with user code
     ROOT.gInterpreter.Declare("using namespace FCCAnalyses;")
 
@@ -440,7 +440,7 @@ def sendToBatch(rdfModule, chunkList, process, analysisFile):
 
 
 #__________________________________________________________
-def runLocal(rdfModule, fileList, output, batch):
+def runLocal(rdfModule, fileList, args):
     #Create list of files to be Processed
     print ("----> Create dataframe object from files: ", )
     fileListRoot = ROOT.vector('string')()
@@ -465,13 +465,13 @@ def runLocal(rdfModule, fileList, output, batch):
     outFile = getElement(rdfModule,"outputDir")
     if outFile!="" and outFile[-1]!="/": outFile+="/"
 
-    if batch==False:
-        outFile+=output
+    if args.batch == False:
+        outFile+=args.output
     else:
-        outFile=output
+        outFile=args.output
     start_time = time.time()
     #run RDF
-    runRDF(rdfModule, fileListRoot, outFile, nevents_local)
+    runRDF(rdfModule, fileListRoot, outFile, nevents_local, args)
 
     outf = ROOT.TFile( outFile, "update" )
     outt = outf.Get("events")
@@ -498,7 +498,7 @@ def runLocal(rdfModule, fileList, output, batch):
     if args.bench:
         import json
 
-        analysis_path = sys.argv[1].rsplit('/', 1)[0]
+        analysis_path = args.pathToAnalysisScript.rsplit('/', 1)[0]
         analysis_name = getElement(rdfModule, 'analysisName')
         if not analysis_name:
             analysis_name = analysis_path
@@ -540,7 +540,7 @@ def runStages(args, rdfModule, preprocess):
         path, filename = os.path.split(args.output)
         if path!='': os.system("mkdir -p {}".format(path))
         testFile = getElement(rdfModule,"testFile")
-        runLocal(rdfModule, [testFile], args.output, True)
+        runLocal(rdfModule, [testFile], args)
         sys.exit(0)
 
     #check if files are specified, and if so run the analysis on it/them (this will exit after)
@@ -548,7 +548,7 @@ def runStages(args, rdfModule, preprocess):
         print("----> Running with user defined list of files (either locally or from batch)")
         path, filename = os.path.split(args.output)
         if path!='': os.system("mkdir -p {}".format(path))
-        runLocal(rdfModule, args.files_list, args.output, True)
+        runLocal(rdfModule, args.files_list, args)
         sys.exit(0)
 
     #check if batch mode and set start and end file from original list
@@ -596,7 +596,7 @@ def runStages(args, rdfModule, preprocess):
             #run locally
             if runBatch == False:
                 print ('----> Running Locally')
-                runLocal(rdfModule, chunkList[ch], outputchunk, args.batch)
+                runLocal(rdfModule, chunkList[ch], outputchunk, args)
 
             #run on batch
         if runBatch == True:
@@ -828,18 +828,11 @@ def runValidate(jobdir):
                 lastLine = line
             print(line)
 
-#__________________________________________________________
-if __name__ == "__main__":
-    #check the arguments
-    if len(sys.argv)<2:
-        print ("usage:")
-        print ("python ",sys.argv[0]," PATHTO/analysis.py <options>")
-        print ("python ",sys.argv[0]," --help for help")
-        sys.exit(3)
 
-    import argparse
-    parser = argparse.ArgumentParser()
+#__________________________________________________________
+def setup_run_parser(parser):
     publicOptions = parser.add_argument_group('User options')
+    publicOptions.add_argument("pathToAnalysisScript", help="path to analysis script")
     publicOptions.add_argument("--files-list", help="Specify input file to bypass the processList", default=[], nargs='+')
     publicOptions.add_argument("--output", help="Specify output file name to bypass the processList and or outputList, default output.root", type=str, default="output.root")
     publicOptions.add_argument("--nevents", help="Specify max number of events to process", type=int, default=-1)
@@ -856,13 +849,23 @@ def runValidate(jobdir):
     internalOptions = parser.add_argument_group('\033[4m\033[1m\033[91m Internal options, NOT FOR USERS\033[0m')
     internalOptions.add_argument("--batch", action='store_true', help="Submit on batch", default=False)
 
-    args, _ = parser.parse_known_args()
+
+#__________________________________________________________
+def run(mainparser, subparser):
+    """
+    Set things in motion.
+    The two parser arguments are a hack to allow running this 
+    both as `fccanalysis run` and `python config/FCCAnalysisRun.py`
+    For the latter case, both are the same (see below).
+    """
+    setup_run_parser(subparser)
+    args, _ = mainparser.parse_known_args()
+
     #check that the analysis file exists
-    analysisFile = sys.argv[1]
+    analysisFile = args.pathToAnalysisScript
     if not os.path.isfile(analysisFile):
-        print(sys.argv[1], " does not exist")
-        print("syntax should be: ")
-        print("python config/FCCAnalysisRun.py analysis.py <options>")
+        print("Script ", analysisFile, " does not exist")
+        print("specify a valid analysis script in the command line arguments")
         sys.exit(3)
 
     #set the RDF ELogLevel
@@ -905,3 +908,14 @@ def runValidate(jobdir):
                 print ('----> Can not have --final with --preprocess, exit')
                 sys.exit(3)
         runStages(args, rdfModule, args.preprocess)
+
+    
+#__________________________________________________________
+if __name__ == "__main__":
+    print("Running this script directly is deprecated, use `fccanalysis run` instead.")
+    # legacy behavior: allow running this script directly 
+    # with python config/FCCAnalysis.py 
+    # and the same behavior as `fccanalysis run`
+    import argparse
+    parser = argparse.ArgumentParser()
+    run(parser, parser)
diff --git a/setup.sh b/setup.sh
index 176d3591dd..6d94c47d44 100644
--- a/setup.sh
+++ b/setup.sh
@@ -5,6 +5,8 @@ if [ "${0}" != "${BASH_SOURCE}" ]; then
     echo "INFO: Key4hep stack already set up."
   fi
   export PYTHONPATH=$PWD:$PYTHONPATH
+  export PYTHONPATH=$PWD/python:$PYTHONPATH
+  export PATH=$PWD/bin:$PATH
   export LD_LIBRARY_PATH=$PWD/install/lib:$LD_LIBRARY_PATH
   export CMAKE_PREFIX_PATH=$PWD/install:$CMAKE_PREFIX_PATH
   export ROOT_INCLUDE_PATH=$PWD/install/include:$ROOT_INCLUDE_PATH
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 91cfb2315d..f33ee6d9b0 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -16,6 +16,23 @@ function(add_integration_test _testname)
     )
 endfunction()
 
+function(add_integration_test_2 _testname)
+
+  add_test(NAME fccanalysisrun_${_testname} 
+          # todo: figure out how to make ctest pick fccanalysis up from PATH
+          COMMAND ${CMAKE_SOURCE_DIR}/bin/fccanalysis run ${_testname} --test --nevents 100 --bench
+          WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+          )
+  set_property(TEST fccanalysisrun_${_testname} APPEND PROPERTY ENVIRONMENT
+    LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/analyzers/dataframe:$ENV{LD_LIBRARY_PATH}
+    PYTHONPATH=${CMAKE_SOURCE_DIR}:$ENV{PYTHONPATH}
+    PATH=${CMAKE_SOURCE_DIR}/bin:$CMAKE_BINARY_DIR:$ENV{PATH}
+    ROOT_INCLUDE_PATH=${CMAKE_SOURCE_DIR}/analyzers/dataframe:$ENV{ROOT_INCLUDE_PATH}
+    )
+endfunction()
+
+
+add_integration_test_2("examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py")
 
 add_integration_test("examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py")
 add_integration_test("examples/FCCee/flavour/Bc2TauNu/analysis_B2TauNu_truth.py")