From ff8ff034e8d3524ec0daeb224ccb46c49a293cd2 Mon Sep 17 00:00:00 2001 From: Valentin Volkl Date: Thu, 26 May 2022 12:48:35 +0200 Subject: [PATCH] add 'fccanalysis run' command using subparsers (#180) * add 'fccanalysis run' command using subparsers * make sure tests find fccanalysis * Update bin/fccanalysis Co-authored-by: Clement Helsens * update runLocal * try to make ctest find fccanalysis * update readme * try to make ctest find fccanalysis Co-authored-by: Clement Helsens --- CMakeLists.txt | 7 +++++ README.md | 10 +++---- bin/fccanalysis | 14 +++++++++ config/FCCAnalysisRun.py | 64 ++++++++++++++++++++++++---------------- setup.sh | 2 ++ tests/CMakeLists.txt | 17 +++++++++++ 6 files changed, 84 insertions(+), 30 deletions(-) create mode 100755 bin/fccanalysis diff --git a/CMakeLists.txt b/CMakeLists.txt index 78c2cc1593..90e095ce76 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,6 +64,13 @@ set(INSTALL_INCLUDE_DIR include CACHE PATH include(cmake/FCCAnalysesCreateConfig.cmake) +file(COPY bin/fccanalysis + DESTINATION ${CMAKE_BINARY_DIR} + FILE_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ + GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) +install(PROGRAMS bin/fccanalysis DESTINATION bin) + + file(GLOB _run_python_files config/*.py) install(FILES ${_run_python_files} DESTINATION ${CMAKE_INSTALL_PREFIX}/python/config) install(FILES config/doPlots.py PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ DESTINATION ${CMAKE_INSTALL_PREFIX}/python/config) diff --git a/README.md b/README.md index 6594705c1f..9b11007a66 100644 --- a/README.md +++ b/README.md @@ -125,7 +125,7 @@ file (please note that then the sample will not be matched in the database for To run the pre-selection stage of the example analysis run: ```shell -python config/FCCAnalysisRun.py examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py +fccanalysis run examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py ``` This will create the output files in the `ZH_mumu_recoil/stage1` subdirectory @@ -135,7 +135,7 @@ You also have the possibility to bypass the samples specified in the `processList` variable by using command line parameter `--output`, like so: ```shell -python config/FCCAnalysisRun.py examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py \ +fccanalysis run examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py \ --output \ --files-list ``` @@ -144,7 +144,7 @@ The example analysis consists of two pre-selection stages, to run the second one slightly alter the previous command: ```shell -python config/FCCAnalysisRun.py examples/FCCee/higgs/mH-recoil/mumu/analysis_stage2.py +fccanalysis run examples/FCCee/higgs/mH-recoil/mumu/analysis_stage2.py ``` @@ -170,7 +170,7 @@ variables needs extra fields like `title`, number of bins and range for the histogram creation. In the example analysis it can be run like this: ```shell -python config/FCCAnalysisRun.py examples/FCCee/higgs/mH-recoil/mumu/analysis_final.py \ +fccanalysis run examples/FCCee/higgs/mH-recoil/mumu/analysis_final.py \ --final ``` @@ -187,7 +187,7 @@ the rendering of the plots but also ways of combining samples for plotting. In the example analysis it can be run in the following manner: ```shell -python config/FCCAnalysisRun.py examples/FCCee/higgs/mH-recoil/mumu/analysis_plots.py \ +fccanalysis run examples/FCCee/higgs/mH-recoil/mumu/analysis_plots.py \ --plots ``` diff --git a/bin/fccanalysis b/bin/fccanalysis new file mode 100755 index 0000000000..2759dfac74 --- /dev/null +++ b/bin/fccanalysis @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 + + +if __name__ == "__main__": + import argparse + import sys + parser = argparse.ArgumentParser() + subparsers = parser.add_subparsers() + parser_run = subparsers.add_parser('run', help="run a RDataFrame based FCC analysis") + if len(sys.argv)<2: + print("for usage run fccanalyses --help") + sys.exit(3) + from config.FCCAnalysisRun import * #such that the dictionary is loaded only if the configuration is ok + run(parser, parser_run) diff --git a/config/FCCAnalysisRun.py b/config/FCCAnalysisRun.py index 7b1de23d2e..b27a9ffdf4 100644 --- a/config/FCCAnalysisRun.py +++ b/config/FCCAnalysisRun.py @@ -311,7 +311,7 @@ def runPreprocess(df): sys.exit(3) return df #__________________________________________________________ -def runRDF(rdfModule, inputlist, outFile, nevt): +def runRDF(rdfModule, inputlist, outFile, nevt, args): # for convenience and compatibility with user code ROOT.gInterpreter.Declare("using namespace FCCAnalyses;") @@ -440,7 +440,7 @@ def sendToBatch(rdfModule, chunkList, process, analysisFile): #__________________________________________________________ -def runLocal(rdfModule, fileList, output, batch): +def runLocal(rdfModule, fileList, args): #Create list of files to be Processed print ("----> Create dataframe object from files: ", ) fileListRoot = ROOT.vector('string')() @@ -465,13 +465,13 @@ def runLocal(rdfModule, fileList, output, batch): outFile = getElement(rdfModule,"outputDir") if outFile!="" and outFile[-1]!="/": outFile+="/" - if batch==False: - outFile+=output + if args.batch == False: + outFile+=args.output else: - outFile=output + outFile=args.output start_time = time.time() #run RDF - runRDF(rdfModule, fileListRoot, outFile, nevents_local) + runRDF(rdfModule, fileListRoot, outFile, nevents_local, args) outf = ROOT.TFile( outFile, "update" ) outt = outf.Get("events") @@ -498,7 +498,7 @@ def runLocal(rdfModule, fileList, output, batch): if args.bench: import json - analysis_path = sys.argv[1].rsplit('/', 1)[0] + analysis_path = args.pathToAnalysisScript.rsplit('/', 1)[0] analysis_name = getElement(rdfModule, 'analysisName') if not analysis_name: analysis_name = analysis_path @@ -540,7 +540,7 @@ def runStages(args, rdfModule, preprocess): path, filename = os.path.split(args.output) if path!='': os.system("mkdir -p {}".format(path)) testFile = getElement(rdfModule,"testFile") - runLocal(rdfModule, [testFile], args.output, True) + runLocal(rdfModule, [testFile], args) sys.exit(0) #check if files are specified, and if so run the analysis on it/them (this will exit after) @@ -548,7 +548,7 @@ def runStages(args, rdfModule, preprocess): print("----> Running with user defined list of files (either locally or from batch)") path, filename = os.path.split(args.output) if path!='': os.system("mkdir -p {}".format(path)) - runLocal(rdfModule, args.files_list, args.output, True) + runLocal(rdfModule, args.files_list, args) sys.exit(0) #check if batch mode and set start and end file from original list @@ -596,7 +596,7 @@ def runStages(args, rdfModule, preprocess): #run locally if runBatch == False: print ('----> Running Locally') - runLocal(rdfModule, chunkList[ch], outputchunk, args.batch) + runLocal(rdfModule, chunkList[ch], outputchunk, args) #run on batch if runBatch == True: @@ -828,18 +828,11 @@ def runValidate(jobdir): lastLine = line print(line) -#__________________________________________________________ -if __name__ == "__main__": - #check the arguments - if len(sys.argv)<2: - print ("usage:") - print ("python ",sys.argv[0]," PATHTO/analysis.py ") - print ("python ",sys.argv[0]," --help for help") - sys.exit(3) - import argparse - parser = argparse.ArgumentParser() +#__________________________________________________________ +def setup_run_parser(parser): publicOptions = parser.add_argument_group('User options') + publicOptions.add_argument("pathToAnalysisScript", help="path to analysis script") publicOptions.add_argument("--files-list", help="Specify input file to bypass the processList", default=[], nargs='+') publicOptions.add_argument("--output", help="Specify output file name to bypass the processList and or outputList, default output.root", type=str, default="output.root") publicOptions.add_argument("--nevents", help="Specify max number of events to process", type=int, default=-1) @@ -856,13 +849,23 @@ def runValidate(jobdir): internalOptions = parser.add_argument_group('\033[4m\033[1m\033[91m Internal options, NOT FOR USERS\033[0m') internalOptions.add_argument("--batch", action='store_true', help="Submit on batch", default=False) - args, _ = parser.parse_known_args() + +#__________________________________________________________ +def run(mainparser, subparser): + """ + Set things in motion. + The two parser arguments are a hack to allow running this + both as `fccanalysis run` and `python config/FCCAnalysisRun.py` + For the latter case, both are the same (see below). + """ + setup_run_parser(subparser) + args, _ = mainparser.parse_known_args() + #check that the analysis file exists - analysisFile = sys.argv[1] + analysisFile = args.pathToAnalysisScript if not os.path.isfile(analysisFile): - print(sys.argv[1], " does not exist") - print("syntax should be: ") - print("python config/FCCAnalysisRun.py analysis.py ") + print("Script ", analysisFile, " does not exist") + print("specify a valid analysis script in the command line arguments") sys.exit(3) #set the RDF ELogLevel @@ -905,3 +908,14 @@ def runValidate(jobdir): print ('----> Can not have --final with --preprocess, exit') sys.exit(3) runStages(args, rdfModule, args.preprocess) + + +#__________________________________________________________ +if __name__ == "__main__": + print("Running this script directly is deprecated, use `fccanalysis run` instead.") + # legacy behavior: allow running this script directly + # with python config/FCCAnalysis.py + # and the same behavior as `fccanalysis run` + import argparse + parser = argparse.ArgumentParser() + run(parser, parser) diff --git a/setup.sh b/setup.sh index 176d3591dd..6d94c47d44 100644 --- a/setup.sh +++ b/setup.sh @@ -5,6 +5,8 @@ if [ "${0}" != "${BASH_SOURCE}" ]; then echo "INFO: Key4hep stack already set up." fi export PYTHONPATH=$PWD:$PYTHONPATH + export PYTHONPATH=$PWD/python:$PYTHONPATH + export PATH=$PWD/bin:$PATH export LD_LIBRARY_PATH=$PWD/install/lib:$LD_LIBRARY_PATH export CMAKE_PREFIX_PATH=$PWD/install:$CMAKE_PREFIX_PATH export ROOT_INCLUDE_PATH=$PWD/install/include:$ROOT_INCLUDE_PATH diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 91cfb2315d..f33ee6d9b0 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -16,6 +16,23 @@ function(add_integration_test _testname) ) endfunction() +function(add_integration_test_2 _testname) + + add_test(NAME fccanalysisrun_${_testname} + # todo: figure out how to make ctest pick fccanalysis up from PATH + COMMAND ${CMAKE_SOURCE_DIR}/bin/fccanalysis run ${_testname} --test --nevents 100 --bench + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + ) + set_property(TEST fccanalysisrun_${_testname} APPEND PROPERTY ENVIRONMENT + LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/analyzers/dataframe:$ENV{LD_LIBRARY_PATH} + PYTHONPATH=${CMAKE_SOURCE_DIR}:$ENV{PYTHONPATH} + PATH=${CMAKE_SOURCE_DIR}/bin:$CMAKE_BINARY_DIR:$ENV{PATH} + ROOT_INCLUDE_PATH=${CMAKE_SOURCE_DIR}/analyzers/dataframe:$ENV{ROOT_INCLUDE_PATH} + ) +endfunction() + + +add_integration_test_2("examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py") add_integration_test("examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py") add_integration_test("examples/FCCee/flavour/Bc2TauNu/analysis_B2TauNu_truth.py")