Skip to content

Commit

Permalink
add 'fccanalysis run' command using subparsers (#180)
Browse files Browse the repository at this point in the history
* add 'fccanalysis run' command using subparsers

* make sure tests find fccanalysis

* Update bin/fccanalysis

Co-authored-by: Clement Helsens <clement.helsens@cern.ch>

* update runLocal

* try to make ctest find fccanalysis

* update readme

* try to make ctest find fccanalysis

Co-authored-by: Clement Helsens <clement.helsens@cern.ch>
  • Loading branch information
vvolkl and clementhelsens authored May 26, 2022
1 parent 82a87ff commit ff8ff03
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 30 deletions.
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,13 @@ set(INSTALL_INCLUDE_DIR include CACHE PATH
include(cmake/FCCAnalysesCreateConfig.cmake)


file(COPY bin/fccanalysis
DESTINATION ${CMAKE_BINARY_DIR}
FILE_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ
GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
install(PROGRAMS bin/fccanalysis DESTINATION bin)


file(GLOB _run_python_files config/*.py)
install(FILES ${_run_python_files} DESTINATION ${CMAKE_INSTALL_PREFIX}/python/config)
install(FILES config/doPlots.py PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ DESTINATION ${CMAKE_INSTALL_PREFIX}/python/config)
Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ file (please note that then the sample will not be matched in the database for
To run the pre-selection stage of the example analysis run:

```shell
python config/FCCAnalysisRun.py examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py
fccanalysis run examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py
```

This will create the output files in the `ZH_mumu_recoil/stage1` subdirectory
Expand All @@ -135,7 +135,7 @@ You also have the possibility to bypass the samples specified in the
`processList` variable by using command line parameter `--output`, like so:

```shell
python config/FCCAnalysisRun.py examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py \
fccanalysis run examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py \
--output <myoutput.root> \
--files-list <file.root or file1.root file2.root or file*.root>
```
Expand All @@ -144,7 +144,7 @@ The example analysis consists of two pre-selection stages, to run the second one
slightly alter the previous command:

```shell
python config/FCCAnalysisRun.py examples/FCCee/higgs/mH-recoil/mumu/analysis_stage2.py
fccanalysis run examples/FCCee/higgs/mH-recoil/mumu/analysis_stage2.py
```


Expand All @@ -170,7 +170,7 @@ variables needs extra fields like `title`, number of bins and range for the
histogram creation. In the example analysis it can be run like this:

```shell
python config/FCCAnalysisRun.py examples/FCCee/higgs/mH-recoil/mumu/analysis_final.py \
fccanalysis run examples/FCCee/higgs/mH-recoil/mumu/analysis_final.py \
--final
```

Expand All @@ -187,7 +187,7 @@ the rendering of the plots but also ways of combining samples for plotting.
In the example analysis it can be run in the following manner:

```shell
python config/FCCAnalysisRun.py examples/FCCee/higgs/mH-recoil/mumu/analysis_plots.py \
fccanalysis run examples/FCCee/higgs/mH-recoil/mumu/analysis_plots.py \
--plots
```

Expand Down
14 changes: 14 additions & 0 deletions bin/fccanalysis
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env python3


if __name__ == "__main__":
import argparse
import sys
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers()
parser_run = subparsers.add_parser('run', help="run a RDataFrame based FCC analysis")
if len(sys.argv)<2:
print("for usage run fccanalyses --help")
sys.exit(3)
from config.FCCAnalysisRun import * #such that the dictionary is loaded only if the configuration is ok
run(parser, parser_run)
64 changes: 39 additions & 25 deletions config/FCCAnalysisRun.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ def runPreprocess(df):
sys.exit(3)
return df
#__________________________________________________________
def runRDF(rdfModule, inputlist, outFile, nevt):
def runRDF(rdfModule, inputlist, outFile, nevt, args):
# for convenience and compatibility with user code
ROOT.gInterpreter.Declare("using namespace FCCAnalyses;")

Expand Down Expand Up @@ -440,7 +440,7 @@ def sendToBatch(rdfModule, chunkList, process, analysisFile):


#__________________________________________________________
def runLocal(rdfModule, fileList, output, batch):
def runLocal(rdfModule, fileList, args):
#Create list of files to be Processed
print ("----> Create dataframe object from files: ", )
fileListRoot = ROOT.vector('string')()
Expand All @@ -465,13 +465,13 @@ def runLocal(rdfModule, fileList, output, batch):
outFile = getElement(rdfModule,"outputDir")
if outFile!="" and outFile[-1]!="/": outFile+="/"

if batch==False:
outFile+=output
if args.batch == False:
outFile+=args.output
else:
outFile=output
outFile=args.output
start_time = time.time()
#run RDF
runRDF(rdfModule, fileListRoot, outFile, nevents_local)
runRDF(rdfModule, fileListRoot, outFile, nevents_local, args)

outf = ROOT.TFile( outFile, "update" )
outt = outf.Get("events")
Expand All @@ -498,7 +498,7 @@ def runLocal(rdfModule, fileList, output, batch):
if args.bench:
import json

analysis_path = sys.argv[1].rsplit('/', 1)[0]
analysis_path = args.pathToAnalysisScript.rsplit('/', 1)[0]
analysis_name = getElement(rdfModule, 'analysisName')
if not analysis_name:
analysis_name = analysis_path
Expand Down Expand Up @@ -540,15 +540,15 @@ def runStages(args, rdfModule, preprocess):
path, filename = os.path.split(args.output)
if path!='': os.system("mkdir -p {}".format(path))
testFile = getElement(rdfModule,"testFile")
runLocal(rdfModule, [testFile], args.output, True)
runLocal(rdfModule, [testFile], args)
sys.exit(0)

#check if files are specified, and if so run the analysis on it/them (this will exit after)
if len(args.files_list)>0:
print("----> Running with user defined list of files (either locally or from batch)")
path, filename = os.path.split(args.output)
if path!='': os.system("mkdir -p {}".format(path))
runLocal(rdfModule, args.files_list, args.output, True)
runLocal(rdfModule, args.files_list, args)
sys.exit(0)

#check if batch mode and set start and end file from original list
Expand Down Expand Up @@ -596,7 +596,7 @@ def runStages(args, rdfModule, preprocess):
#run locally
if runBatch == False:
print ('----> Running Locally')
runLocal(rdfModule, chunkList[ch], outputchunk, args.batch)
runLocal(rdfModule, chunkList[ch], outputchunk, args)

#run on batch
if runBatch == True:
Expand Down Expand Up @@ -828,18 +828,11 @@ def runValidate(jobdir):
lastLine = line
print(line)

#__________________________________________________________
if __name__ == "__main__":
#check the arguments
if len(sys.argv)<2:
print ("usage:")
print ("python ",sys.argv[0]," PATHTO/analysis.py <options>")
print ("python ",sys.argv[0]," --help for help")
sys.exit(3)

import argparse
parser = argparse.ArgumentParser()
#__________________________________________________________
def setup_run_parser(parser):
publicOptions = parser.add_argument_group('User options')
publicOptions.add_argument("pathToAnalysisScript", help="path to analysis script")
publicOptions.add_argument("--files-list", help="Specify input file to bypass the processList", default=[], nargs='+')
publicOptions.add_argument("--output", help="Specify output file name to bypass the processList and or outputList, default output.root", type=str, default="output.root")
publicOptions.add_argument("--nevents", help="Specify max number of events to process", type=int, default=-1)
Expand All @@ -856,13 +849,23 @@ def runValidate(jobdir):
internalOptions = parser.add_argument_group('\033[4m\033[1m\033[91m Internal options, NOT FOR USERS\033[0m')
internalOptions.add_argument("--batch", action='store_true', help="Submit on batch", default=False)

args, _ = parser.parse_known_args()

#__________________________________________________________
def run(mainparser, subparser):
"""
Set things in motion.
The two parser arguments are a hack to allow running this
both as `fccanalysis run` and `python config/FCCAnalysisRun.py`
For the latter case, both are the same (see below).
"""
setup_run_parser(subparser)
args, _ = mainparser.parse_known_args()

#check that the analysis file exists
analysisFile = sys.argv[1]
analysisFile = args.pathToAnalysisScript
if not os.path.isfile(analysisFile):
print(sys.argv[1], " does not exist")
print("syntax should be: ")
print("python config/FCCAnalysisRun.py analysis.py <options>")
print("Script ", analysisFile, " does not exist")
print("specify a valid analysis script in the command line arguments")
sys.exit(3)

#set the RDF ELogLevel
Expand Down Expand Up @@ -905,3 +908,14 @@ def runValidate(jobdir):
print ('----> Can not have --final with --preprocess, exit')
sys.exit(3)
runStages(args, rdfModule, args.preprocess)


#__________________________________________________________
if __name__ == "__main__":
print("Running this script directly is deprecated, use `fccanalysis run` instead.")
# legacy behavior: allow running this script directly
# with python config/FCCAnalysis.py
# and the same behavior as `fccanalysis run`
import argparse
parser = argparse.ArgumentParser()
run(parser, parser)
2 changes: 2 additions & 0 deletions setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ if [ "${0}" != "${BASH_SOURCE}" ]; then
echo "INFO: Key4hep stack already set up."
fi
export PYTHONPATH=$PWD:$PYTHONPATH
export PYTHONPATH=$PWD/python:$PYTHONPATH
export PATH=$PWD/bin:$PATH
export LD_LIBRARY_PATH=$PWD/install/lib:$LD_LIBRARY_PATH
export CMAKE_PREFIX_PATH=$PWD/install:$CMAKE_PREFIX_PATH
export ROOT_INCLUDE_PATH=$PWD/install/include:$ROOT_INCLUDE_PATH
Expand Down
17 changes: 17 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,23 @@ function(add_integration_test _testname)
)
endfunction()

function(add_integration_test_2 _testname)

add_test(NAME fccanalysisrun_${_testname}
# todo: figure out how to make ctest pick fccanalysis up from PATH
COMMAND ${CMAKE_SOURCE_DIR}/bin/fccanalysis run ${_testname} --test --nevents 100 --bench
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
)
set_property(TEST fccanalysisrun_${_testname} APPEND PROPERTY ENVIRONMENT
LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/analyzers/dataframe:$ENV{LD_LIBRARY_PATH}
PYTHONPATH=${CMAKE_SOURCE_DIR}:$ENV{PYTHONPATH}
PATH=${CMAKE_SOURCE_DIR}/bin:$CMAKE_BINARY_DIR:$ENV{PATH}
ROOT_INCLUDE_PATH=${CMAKE_SOURCE_DIR}/analyzers/dataframe:$ENV{ROOT_INCLUDE_PATH}
)
endfunction()


add_integration_test_2("examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py")

add_integration_test("examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py")
add_integration_test("examples/FCCee/flavour/Bc2TauNu/analysis_B2TauNu_truth.py")
Expand Down

0 comments on commit ff8ff03

Please sign in to comment.