From d61d158e43e355fab9e4ee376c6de3b13d066e26 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 6 Aug 2024 08:19:35 +0000 Subject: [PATCH 01/15] started removal MC --- .../semi_automatic_scripts/dl1_production.py | 368 +++--------------- .../semi_automatic_scripts/merging_runs.py | 199 ++-------- .../semi_automatic_scripts/stereo_events.py | 156 ++------ 3 files changed, 114 insertions(+), 609 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 38edec0b..8e39e38b 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -32,10 +32,8 @@ __all__ = [ "config_file_gen", - "lists_and_bash_generator", "lists_and_bash_gen_MAGIC", "directories_generator_real", - "directories_generator_MC", ] logger = logging.getLogger(__name__) @@ -43,7 +41,7 @@ logger.setLevel(logging.INFO) -def config_file_gen(target_dir, noise_value, NSB_match, source_name, config_gen): +def config_file_gen(target_dir, noise_value, source_name, config_gen): """ Here we create the configuration file needed for transforming DL0 into DL1 @@ -54,8 +52,6 @@ def config_file_gen(target_dir, noise_value, NSB_match, source_name, config_gen) Directory to store the results noise_value : list List of the noise correction values for LST - NSB_match : bool - If real data are matched to pre-processed MCs or not source_name : str Name of the target source config_gen : dict @@ -71,147 +67,18 @@ def config_file_gen(target_dir, noise_value, NSB_match, source_name, config_gen) LST_config = config_dict["LST"] MAGIC_config = config_dict["MAGIC"] - if not NSB_match: - LST_config["increase_nsb"]["extra_noise_in_dim_pixels"] = noise_value[0] - LST_config["increase_nsb"]["extra_bias_in_dim_pixels"] = noise_value[2] - LST_config["increase_nsb"]["extra_noise_in_bright_pixels"] = noise_value[1] + conf = { "mc_tel_ids": config_gen["mc_tel_ids"], "LST": LST_config, "MAGIC": MAGIC_config, } - if source_name == "MC": - file_name = f"{target_dir}/v{__version__}/MC/config_DL0_to_DL1.yaml" - else: - file_name = f"{target_dir}/v{__version__}/{source_name}/config_DL0_to_DL1.yaml" + + file_name = f"{target_dir}/v{__version__}/{source_name}/config_DL0_to_DL1.yaml" with open(file_name, "w") as f: yaml.dump(conf, f, default_flow_style=False) -def lists_and_bash_generator( - particle_type, - target_dir, - MC_path, - focal_length, - env_name, - cluster, -): - - """ - This function creates the lists list_nodes_*_complete.txt and list_folder_*.txt with the MC file paths. - After that, it generates a few bash scripts to link the MC paths to each subdirectory and to process them from DL0 to DL1. - These bash scripts will be called later in the main() function below. This step will be skipped in case the MC path has not been provided (MC_path='') - - Parameters - ---------- - particle_type : str - Particle type (e.g., protons) - target_dir : str - Directory to store the results - MC_path : str - Path to the MCs DL0s - focal_length : str - Focal length to be used to process MCs (e.g., 'nominal') - env_name : str - Name of the environment - cluster : str - Cluster system - """ - - if MC_path == "": - return - print(f"running {particle_type} from {MC_path}") - process_name = "MC" - - list_of_nodes = glob.glob(f"{MC_path}/node*") - dir1 = f"{target_dir}/v{__version__}/MC" - with open( - f"{dir1}/logs/list_nodes_{particle_type}_complete.txt", "w" - ) as f: # creating list_nodes_gammas_complete.txt - for i in list_of_nodes: - out_list = glob.glob(f"{i}/output*") - if len(out_list) == 0: - logger.error( - f"No output file for node {i}, or the directory structure is not the usual one. Skipping..." - ) - continue - elif len(out_list) == 1: - f.write(f"{out_list[0]}\n") - else: - output_index = input( - f"The available outputs are {out_list}, please provide the array index of the desired one:" - ) - f.write(f"{out_list[output_index]}\n") - - with open( - f"{dir1}/logs/list_folder_{particle_type}.txt", "w" - ) as f: # creating list_folder_gammas.txt - for i in list_of_nodes: - f.write(f'{i.split("/")[-1]}\n') - - #################################################################################### - # bash scripts that link the MC paths to each subdirectory. - #################################################################################### - if cluster != "SLURM": - logger.warning( - "Automatic processing not implemented for the cluster indicated in the config file" - ) - return - with open(f"linking_MC_{particle_type}_paths.sh", "w") as f: - slurm = slurm_lines( - queue="short", - job_name=process_name, - out_name=f"{dir1}/DL1/{particle_type}/logs/slurm-linkMC-%x.%j", - ) - lines_of_config_file = slurm + [ - "while read -r -u 3 lineA && read -r -u 4 lineB\n", - "do\n", - f" cd {dir1}/DL1/{particle_type}\n", - " mkdir $lineB\n", - " cd $lineA\n", - " ls -lR *.gz |wc -l\n", - f" mkdir -p {dir1}/DL1/{particle_type}/$lineB/logs/\n", - f" ls *.gz > {dir1}/DL1/{particle_type}/$lineB/logs/list_dl0.txt\n", - ' string=$lineA"/"\n', - f" export file={dir1}/DL1/{particle_type}/$lineB/logs/list_dl0.txt\n\n", - " cat $file | while read line; do echo $string${line}" - + f" >>{dir1}/DL1/{particle_type}/$lineB/logs/list_dl0_ok.txt; done\n\n", - ' echo "folder $lineB and node $lineA"\n', - f'done 3<"{dir1}/logs/list_nodes_{particle_type}_complete.txt" 4<"{dir1}/logs/list_folder_{particle_type}.txt"\n', - "", - ] - f.writelines(lines_of_config_file) - - ################################################################################################################ - # bash script that applies lst1_magic_mc_dl0_to_dl1.py to all MC data files. - ################################################################################################################ - - number_of_nodes = glob.glob(f"{MC_path}/node*") - number_of_nodes = len(number_of_nodes) - 1 - with open(f"linking_MC_{particle_type}_paths_r.sh", "w") as f: - slurm = slurm_lines( - queue="xxl", - job_name=process_name, - array=number_of_nodes, - mem="10g", - out_name=f"{dir1}/DL1/{particle_type}/logs/slurm-%x.%A_%a", - ) - lines_of_config_file = slurm + [ - f"cd {dir1}/DL1/{particle_type}\n\n", - f"export INF={dir1}/logs\n", - f"SAMPLE_LIST=($(<$INF/list_folder_{particle_type}.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "cd $SAMPLE\n\n", - f"export LOG={dir1}/DL1/{particle_type}/logs/simtel_{{$SAMPLE}}_${{SLURM_ARRAY_JOB_ID}}_${{SLURM_ARRAY_TASK_ID}}_all.log\n", - "cat logs/list_dl0_ok.txt | while read line\n", - "do\n", - f" cd {dir1}/../\n", - f" conda run -n {env_name} lst1_magic_mc_dl0_to_dl1 --input-file $line --output-dir {dir1}/DL1/{particle_type}/$SAMPLE --config-file {dir1}/config_DL0_to_DL1.yaml --focal_length_choice {focal_length}>>$LOG 2>&1\n\n", - "done\n", - "", - ] - f.writelines(lines_of_config_file) - def lists_and_bash_gen_MAGIC( target_dir, telescope_ids, MAGIC_runs, source, env_name, cluster @@ -300,7 +167,7 @@ def lists_and_bash_gen_MAGIC( def directories_generator_real( - target_dir, telescope_ids, MAGIC_runs, NSB_match, source_name + target_dir, telescope_ids, MAGIC_runs, source_name ): """ Here we create all subdirectories for a given workspace and target name. @@ -313,38 +180,14 @@ def directories_generator_real( List of the telescope IDs (set by the user) MAGIC_runs : array MAGIC dates and runs to be processed - NSB_match : bool - If real data are matched to pre-processed MCs or not source_name : str Name of the target source """ - if NSB_match: - os.makedirs(f"{target_dir}/v{__version__}/{source_name}/DL1", exist_ok=True) - dl1_dir = str(f"{target_dir}/v{__version__}/{source_name}/DL1") - else: - - dl1_dir = str(f"{target_dir}/v{__version__}/{source_name}/DL1") - if not os.path.exists(f"{target_dir}/v{__version__}/{source_name}"): - os.makedirs( - f"{target_dir}/v{__version__}/{source_name}/DL1", - exist_ok=True, - ) - - else: - overwrite = input( - f'data directory for {target_dir.split("/")[-1]} already exists. Would you like to overwrite it? [only "y" or "n"]: ' - ) - if overwrite == "y": - os.system(f"rm -r {target_dir}/v{__version__}/{source_name}") - os.makedirs( - f"{target_dir}/v{__version__}/{source_name}/DL1", - exist_ok=True, - ) - - else: - print("Directory not modified.") - + + os.makedirs(f"{target_dir}/v{__version__}/{source_name}/DL1", exist_ok=True) + dl1_dir = str(f"{target_dir}/v{__version__}/{source_name}/DL1") + ########################################### # MAGIC ########################################### @@ -354,48 +197,6 @@ def directories_generator_real( os.makedirs(f"{dl1_dir}/M{magic}/{i[0]}/{i[1]}/logs", exist_ok=True) -def directories_generator_MC(target_dir, telescope_ids): - - """ - Here we create all subdirectories for a given workspace and target name. - - Parameters - ---------- - target_dir : str - Directory to store the results - telescope_ids : list - List of the telescope IDs (set by the user) - """ - - dir_list = [ - "gammas", - "gammadiffuse", - "electrons", - "protons", - "helium", - ] - if not os.path.exists(f"{target_dir}/v{__version__}/MC"): - os.makedirs(f"{target_dir}/v{__version__}/MC/logs", exist_ok=True) - os.makedirs(f"{target_dir}/v{__version__}/MC/DL1", exist_ok=True) - for dir in dir_list: - os.makedirs( - f"{target_dir}/v{__version__}/MC/DL1/{dir}/logs", - exist_ok=True, - ) - else: - overwrite = input( - 'MC directory already exists. Would you like to overwrite it? [only "y" or "n"]: ' - ) - if overwrite == "y": - os.system(f"rm -r {target_dir}/v{__version__}/MC") - os.makedirs(f"{target_dir}/v{__version__}/MC/logs", exist_ok=True) - for dir in dir_list: - os.makedirs( - f"{target_dir}/v{__version__}/MC/DL1/{dir}/logs", - exist_ok=True, - ) - else: - print("Directory not modified.") def main(): @@ -407,15 +208,7 @@ def main(): # Here we are simply collecting the parameters from the command line, as input file, output directory, and configuration file parser = argparse.ArgumentParser() - parser.add_argument( - "--analysis-type", - "-t", - choices=["onlyMAGIC", "onlyMC"], - dest="analysis_type", - type=str, - default="doEverything", - help="You can type 'onlyMAGIC' or 'onlyMC' to run this script only on MAGIC or MC data, respectively.", - ) + parser.add_argument( "--config-file", @@ -434,14 +227,8 @@ def main(): telescope_ids = list(config["mc_tel_ids"].values()) env_name = config["general"]["env_name"] - NSB_match = config["general"]["NSB_matching"] - - # LST_runs_and_dates = config["general"]["LST_runs"] - MC_gammas = config["directories"]["MC_gammas"] - MC_electrons = config["directories"]["MC_electrons"] - MC_helium = config["directories"]["MC_helium"] - MC_protons = config["directories"]["MC_protons"] - MC_gammadiff = config["directories"]["MC_gammadiff"] + + focal_length = config["general"]["focal_length"] source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] @@ -454,97 +241,54 @@ def main(): else: source_list.append(source) noise_value = [0, 0, 0] - if not NSB_match: - nsb = config["general"]["NSB_MC"] - - noisebright = 1.15 * pow(nsb, 1.115) - biasdim = 0.358 * pow(nsb, 0.805) - noise_value = [nsb, noisebright, biasdim] - - if not NSB_match: - # Below we run the analysis on the MC data - if (args.analysis_type == "onlyMC") or (args.analysis_type == "doEverything"): - directories_generator_MC( - str(target_dir), telescope_ids - ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target - config_file_gen( - target_dir, noise_value, NSB_match, "MC", config - ) # TODO: fix here - to_process = { - "gammas": MC_gammas, - "electrons": MC_electrons, - "helium": MC_helium, - "protons": MC_protons, - "gammadiffuse": MC_gammadiff, - } - for particle in to_process.keys(): - lists_and_bash_generator( - particle, - target_dir, - to_process[particle], - focal_length, - env_name, - cluster, - ) - list_of_MC = glob.glob(f"linking_MC_{particle}_*.sh") - if len(list_of_MC) < 2: - logger.warning( - f"No bash script has been produced for processing {particle}" - ) - else: - launch_jobs_MC = f"linking=$(sbatch --parsable linking_MC_{particle}_paths.sh) && running=$(sbatch --parsable --dependency=afterany:$linking linking_MC_{particle}_paths_r.sh)" - os.system(launch_jobs_MC) - # Here we do the MC DL0 to DL1 conversion: + + for source_name in source_list: - if ( - (args.analysis_type == "onlyMAGIC") - or (args.analysis_type == "doEverything") - or (NSB_match) - ): - - MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" - MAGIC_runs = np.genfromtxt( - MAGIC_runs_and_dates, dtype=str, delimiter=",", ndmin=2 - ) # READ LIST OF DATES AND RUNS: format table where each line is like "2020_11_19,5093174" - - # TODO: fix here above - print("*** Converting Calibrated into DL1 data ***") - print(f"Process name: {source_name}") - print( - f"To check the jobs submitted to the cluster, type: squeue -n {source_name}" - ) - - directories_generator_real( - str(target_dir), telescope_ids, MAGIC_runs, NSB_match, source_name - ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target - config_file_gen( - target_dir, noise_value, NSB_match, source_name, config - ) # TODO: fix here - - # Below we run the analysis on the MAGIC data - - lists_and_bash_gen_MAGIC( - target_dir, - telescope_ids, - MAGIC_runs, - source_name, - env_name, - cluster, - ) # MAGIC real data - if (telescope_ids[-2] > 0) or (telescope_ids[-1] > 0): - list_of_MAGIC_runs = glob.glob(f"{source_name}_MAGIC-*.sh") - if len(list_of_MAGIC_runs) < 1: - logger.warning( - "No bash script has been produced. Please check the provided MAGIC_runs.txt and the MAGIC calibrated data" - ) - continue + + + MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" + MAGIC_runs = np.genfromtxt( + MAGIC_runs_and_dates, dtype=str, delimiter=",", ndmin=2 + ) # READ LIST OF DATES AND RUNS: format table where each line is like "2020_11_19,5093174" + + # TODO: fix here above + print("*** Converting Calibrated into DL1 data ***") + print(f"Process name: {source_name}") + print( + f"To check the jobs submitted to the cluster, type: squeue -n {source_name}" + ) + + directories_generator_real( + str(target_dir), telescope_ids, MAGIC_runs, source_name + ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target + config_file_gen( + target_dir, noise_value, source_name, config + ) # TODO: fix here + + # Below we run the analysis on the MAGIC data + + lists_and_bash_gen_MAGIC( + target_dir, + telescope_ids, + MAGIC_runs, + source_name, + env_name, + cluster, + ) # MAGIC real data + if (telescope_ids[-2] > 0) or (telescope_ids[-1] > 0): + list_of_MAGIC_runs = glob.glob(f"{source_name}_MAGIC-*.sh") + if len(list_of_MAGIC_runs) < 1: + logger.warning( + "No bash script has been produced. Please check the provided MAGIC_runs.txt and the MAGIC calibrated data" + ) + continue - launch_jobs = f"linking=$(sbatch --parsable {source_name}_linking_MAGIC_data_paths.sh)" - for n, run in enumerate(list_of_MAGIC_runs): - launch_jobs = f"{launch_jobs} && RES{n}=$(sbatch --parsable --dependency=afterany:$linking {run})" + launch_jobs = f"linking=$(sbatch --parsable {source_name}_linking_MAGIC_data_paths.sh)" + for n, run in enumerate(list_of_MAGIC_runs): + launch_jobs = f"{launch_jobs} && RES{n}=$(sbatch --parsable --dependency=afterany:$linking {run})" - os.system(launch_jobs) + os.system(launch_jobs) if __name__ == "__main__": diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index e4426453..c499e488 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -43,56 +43,13 @@ slurm_lines, ) -__all__ = ["split_train_test", "merge", "mergeMC"] +__all__ = ["merge"] logger = logging.getLogger(__name__) logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.INFO) -def split_train_test(target_dir, train_fraction): - - """ - This function splits the MC proton sample in 2, i.e. the "test" and the "train" subsamples, in case you want to make performance studies on MC. For regular analyses, you can/should use the whole MC sample for training. - It generates 2 subdirectories in the directory .../DL1/MC/protons named "test" and "train" and creates sub-sub-directories with the names of all nodes. - For each node sub-sub-directory we move `train_fraction` of the .h5 files to the "train" subdirectory and `1-train_fraction` of the .h5 files to the "test" subdirectory. - - Parameters - ---------- - target_dir : str - Path to the working directory - train_fraction : float - Fraction of proton MC files to be used in the training of RFs - """ - - proton_dir = f"{target_dir}/v{__version__}/MC/DL1/protons" - - list_of_dir = np.sort(glob.glob(f"{proton_dir}/node*{os.path.sep}")) - - for directory in tqdm( - range(len(list_of_dir)) - ): # tqdm allows us to print a progessbar in the terminal - node = list_of_dir[directory].split("/")[-2] - os.makedirs(f"{proton_dir}/train/{node}", exist_ok=True) - os.makedirs( - f'{proton_dir}/../protons_test/{list_of_dir[directory].split("/")[-2]}', - exist_ok=True, - ) - list_of_runs = np.sort( - glob.glob(f'{proton_dir}/{list_of_dir[directory].split("/")[-2]}/*.h5') - ) - number_train_runs = int(len(list_of_runs) * train_fraction) - for j in list_of_runs[0:number_train_runs]: - os.system(f"mv {j} {proton_dir}/train/{node}") - - os.system(f"cp {list_of_dir[directory]}logs/*.txt {proton_dir}/train/{node}") - os.system( - f"mv {list_of_dir[directory]}logs/*.txt {proton_dir}/../protons_test/{node}" - ) - os.system( - f"mv {list_of_dir[directory]}*.h5 {proton_dir}/../protons_test/{node}" - ) - os.system(f"rm -r {list_of_dir[directory]}") def merge(target_dir, MAGIC_runs, env_name, source, cluster): @@ -154,60 +111,6 @@ def merge(target_dir, MAGIC_runs, env_name, source, cluster): logger.error(f"{indir} does not exist") -def mergeMC(target_dir, identification, env_name, cluster): - - """ - This function creates the bash scripts to run merge_hdf_files.py in all MC nodes. - - Parameters - ---------- - target_dir : str - Path to the working directory - identification : str - Tells which sample to process - env_name : str - Name of the environment - cluster : str - Cluster system - """ - - process_name = "merging_MC" - - MC_DL1_dir = f"{target_dir}/v{__version__}/MC/DL1" - os.makedirs(f"{MC_DL1_dir}/{identification}/Merged/logs", exist_ok=True) - - if identification == "protons": - list_of_nodes = np.sort(glob.glob(f"{MC_DL1_dir}/{identification}/train/node*")) - else: - list_of_nodes = np.sort(glob.glob(f"{MC_DL1_dir}/{identification}/node*")) - - np.savetxt( - f"{MC_DL1_dir}/{identification}/list_of_nodes.txt", list_of_nodes, fmt="%s" - ) - - process_size = len(list_of_nodes) - 1 - - if cluster != "SLURM": - logger.warning( - "Automatic processing not implemented for the cluster indicated in the config file" - ) - return - with open(f"Merge_MC_{identification}.sh", "w") as f: - slurm = slurm_lines( - queue="short", - array=process_size, - mem="7g", - job_name=process_name, - out_name=f"{MC_DL1_dir}/{identification}/Merged/logs/slurm-%x.%A_%a", - ) - lines_bash_file = slurm + [ - f"SAMPLE_LIST=($(<{MC_DL1_dir}/{identification}/list_of_nodes.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - f"export LOG={MC_DL1_dir}/{identification}/Merged/logs" - + "/merged_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} merge_hdf_files --input-dir $SAMPLE --output-dir {MC_DL1_dir}/{identification}/Merged >$LOG 2>&1\n", - ] - f.writelines(lines_bash_file) def main(): @@ -226,16 +129,7 @@ def main(): help="Path to a configuration file", ) - parser.add_argument( - "--analysis-type", - "-t", - choices=["onlyMAGIC", "onlyMC"], - dest="analysis_type", - type=str, - default="doEverything", - help="You can type 'onlyMAGIC' or 'onlyMC' to run this script only on MAGIC or MC data, respectively.", - ) - + args = parser.parse_args() with open( args.config_file, "rb" @@ -258,42 +152,7 @@ def main(): else: source_list.append(source) - if not NSB_match: - if (args.analysis_type == "onlyMC") or (args.analysis_type == "doEverything"): - # Here we slice the proton MC data into "train" and "test" (but first we check if the directory already exists): - if not os.path.exists(f"{target_dir}/v{__version__}/MC/DL1/protons_test"): - print("***** Splitting protons into 'train' and 'test' datasets...") - split_train_test(target_dir, train_fraction) - - print("***** Generating merge_MC bashscripts...") - mergeMC( - target_dir, "protons", env_name, cluster - ) # generating the bash script to merge the files - mergeMC( - target_dir, "gammadiffuse", env_name, cluster - ) # generating the bash script to merge the files - mergeMC( - target_dir, "gammas", env_name, cluster - ) # generating the bash script to merge the files - mergeMC(target_dir, "protons_test", env_name, cluster) - mergeMC(target_dir, "helium", env_name, cluster) - mergeMC(target_dir, "electrons", env_name, cluster) - - print("***** Running merge_hdf_files.py on the MC data files...") - - # Below we run the bash scripts to merge the MC files - list_of_merging_scripts = np.sort(glob.glob("Merge_MC_*.sh")) - if len(list_of_merging_scripts) < 1: - logger.warning("No bash script has been produced for MC") - # TODO: check - - else: - launch_jobs = "" - for n, run in enumerate(list_of_merging_scripts): - launch_jobs += (" && " if n > 0 else "") + f"sbatch {run}" - - os.system(launch_jobs) - + for source_name in source_list: # Below we run the analysis on the MC data MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" @@ -302,34 +161,30 @@ def main(): ) # Below we run the analysis on the MAGIC data - if ( - (args.analysis_type == "onlyMAGIC") - or (args.analysis_type == "doEverything") - or (NSB_match) - ): - print("***** Generating merge_MAGIC bashscripts...") - merge( - target_dir, - MAGIC_runs, - env_name, - source_name, - cluster, - ) # generating the bash script to merge the subruns - - print("***** Running merge_hdf_files.py on the MAGIC data files...") - - # Below we run the bash scripts to merge the MAGIC files - list_of_merging_scripts = np.sort( - glob.glob(f"{source_name}_Merge_MAGIC*.sh") - ) - if len(list_of_merging_scripts) < 1: - logger.warning("No bash scripts for real data") - continue - launch_jobs = "" - for n, run in enumerate(list_of_merging_scripts): - launch_jobs += (" && " if n > 0 else "") + f"sbatch {run}" - - os.system(launch_jobs) + + print("***** Generating merge_MAGIC bashscripts...") + merge( + target_dir, + MAGIC_runs, + env_name, + source_name, + cluster, + ) # generating the bash script to merge the subruns + + print("***** Running merge_hdf_files.py on the MAGIC data files...") + + # Below we run the bash scripts to merge the MAGIC files + list_of_merging_scripts = np.sort( + glob.glob(f"{source_name}_Merge_MAGIC*.sh") + ) + if len(list_of_merging_scripts) < 1: + logger.warning("No bash scripts for real data") + continue + launch_jobs = "" + for n, run in enumerate(list_of_merging_scripts): + launch_jobs += (" && " if n > 0 else "") + f"sbatch {run}" + + os.system(launch_jobs) print(f"Process name: merging_{source_name}") print( diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 4ffd2530..752b20a2 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -33,7 +33,7 @@ slurm_lines, ) -__all__ = ["configfile_stereo", "bash_stereo", "bash_stereoMC"] +__all__ = ["configfile_stereo", "bash_stereo"] logger = logging.getLogger(__name__) logger.addHandler(logging.StreamHandler()) @@ -67,10 +67,7 @@ def configfile_stereo(target_dir, source_name, config_gen): "mc_tel_ids": config_gen["mc_tel_ids"], "stereo_reco": config_dict["stereo_reco"], } - if source_name == "MC": - file_name = f"{target_dir}/v{__version__}/MC/config_stereo.yaml" - else: - file_name = f"{target_dir}/v{__version__}/{source_name}/config_stereo.yaml" + file_name = f"{target_dir}/v{__version__}/{source_name}/config_stereo.yaml" with open(file_name, "w") as f: yaml.dump(conf, f, default_flow_style=False) @@ -148,55 +145,6 @@ def bash_stereo(target_dir, source, env_name, cluster): f.writelines(lines) -def bash_stereoMC(target_dir, identification, env_name, cluster): - - """ - This function generates the bashscripts for running the stereo analysis. - - Parameters - ---------- - target_dir : str - Path to the working directory - identification : str - Particle name. Options: protons, gammadiffuse, gammas, protons_test - env_name : str - Name of the environment - cluster : str - Cluster system - """ - - process_name = "stereo_MC" - - inputdir = f"{target_dir}/v{__version__}/MC/DL1/{identification}/Merged" - os.makedirs(f"{inputdir}/StereoMerged/logs", exist_ok=True) - - os.system( - f"ls {inputdir}/dl1*.h5 > {inputdir}/list_coin.txt" - ) # generating a list with the DL1 coincident data files. - with open(f"{inputdir}/list_coin.txt", "r") as f: - process_size = len(f.readlines()) - 1 - if cluster != "SLURM": - logger.warning( - "Automatic processing not implemented for the cluster indicated in the config file" - ) - return - with open(f"StereoEvents_MC_{identification}.sh", "w") as f: - slurm = slurm_lines( - queue="xxl", - job_name=f"{process_name}_stereo", - array=process_size, - mem="7g", - out_name=f"{inputdir}/StereoMerged/logs/slurm-%x.%A_%a", - ) - lines = slurm + [ - f"export INPUTDIR={inputdir}\n", - f"export OUTPUTDIR={inputdir}/StereoMerged\n", - "SAMPLE_LIST=($(<$INPUTDIR/list_coin.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "export LOG=$OUTPUTDIR/logs/stereo_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/MC/config_stereo.yaml >$LOG 2>&1", - ] - f.writelines(lines) def main(): @@ -215,16 +163,7 @@ def main(): help="Path to a configuration file", ) - parser.add_argument( - "--analysis-type", - "-t", - choices=["onlyMAGIC", "onlyMC"], - dest="analysis_type", - type=str, - default="doEverything", - help="You can type 'onlyMAGIC' or 'onlyMC' to run this script only on real or MC data, respectively.", - ) - + args = parser.parse_args() with open( args.config_file, "rb" @@ -245,68 +184,35 @@ def main(): source_list = joblib.load("list_sources.dat") else: source_list = [source] - if not NSB_match: - if ( - (args.analysis_type == "onlyMC") - or (args.analysis_type == "doEverything") - and not NSB_match - ): - configfile_stereo(target_dir, "MC", config) - print("***** Generating the bashscript for MCs...") - for part in [ - "gammadiffuse", - "gammas", - "protons", - "protons_test", - "helium", - "electrons", - ]: - bash_stereoMC(target_dir, part, env_name, cluster) - - list_of_stereo_scripts = np.sort(glob.glob("StereoEvents_MC_*.sh")) - if len(list_of_stereo_scripts) < 1: - logger.warning("No bash script has been produced for processing MCs") - else: - launch_jobs = "" - # TODO: check on N. bash scripts - - for n, run in enumerate(list_of_stereo_scripts): - launch_jobs += (" && " if n > 0 else "") + f"sbatch {run}" - - os.system(launch_jobs) - + for source_name in source_list: - if ( - (args.analysis_type == "onlyMAGIC") - or (args.analysis_type == "doEverything") - or (NSB_match) - ): - print("***** Generating file config_stereo.yaml...") - configfile_stereo(target_dir, source_name, config) - - # Below we run the analysis on the real data - - print("***** Generating the bashscript...") - bash_stereo(target_dir, source_name, env_name, cluster) - - print("***** Submitting processess to the cluster...") - print(f"Process name: {source_name}_stereo") - print( - f"To check the jobs submitted to the cluster, type: squeue -n {source_name}_stereo" - ) - - # Below we run the bash scripts to find the stereo events - list_of_stereo_scripts = np.sort( - glob.glob(f"{source_name}_StereoEvents*.sh") - ) - if len(list_of_stereo_scripts) < 1: - logger.warning("No bash scripts for real data") - continue - launch_jobs = "" - for n, run in enumerate(list_of_stereo_scripts): - launch_jobs += (" && " if n > 0 else "") + f"sbatch {run}" - - os.system(launch_jobs) + + print("***** Generating file config_stereo.yaml...") + configfile_stereo(target_dir, source_name, config) + + # Below we run the analysis on the real data + + print("***** Generating the bashscript...") + bash_stereo(target_dir, source_name, env_name, cluster) + + print("***** Submitting processess to the cluster...") + print(f"Process name: {source_name}_stereo") + print( + f"To check the jobs submitted to the cluster, type: squeue -n {source_name}_stereo" + ) + + # Below we run the bash scripts to find the stereo events + list_of_stereo_scripts = np.sort( + glob.glob(f"{source_name}_StereoEvents*.sh") + ) + if len(list_of_stereo_scripts) < 1: + logger.warning("No bash scripts for real data") + continue + launch_jobs = "" + for n, run in enumerate(list_of_stereo_scripts): + launch_jobs += (" && " if n > 0 else "") + f"sbatch {run}" + + os.system(launch_jobs) if __name__ == "__main__": From 02c4ca13e96122808bb33a85b05706a270f261f7 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 6 Aug 2024 08:52:43 +0000 Subject: [PATCH 02/15] no MC --- .../config_auto_MCP.yaml | 14 +++--------- .../semi_automatic_scripts/dl1_production.py | 4 ++-- .../semi_automatic_scripts/merging_runs.py | 22 ++++--------------- .../semi_automatic_scripts/stereo_events.py | 14 +++--------- 4 files changed, 12 insertions(+), 42 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_auto_MCP.yaml b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_auto_MCP.yaml index 00380cfc..df381828 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_auto_MCP.yaml +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_auto_MCP.yaml @@ -8,12 +8,7 @@ mc_tel_ids: directories: workspace_dir : "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/" # Output directory where all the data products will be saved. - # MC paths below are ignored if you set NSB_matching = true. - MC_gammas : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray" # set to "" if you don't want to process these Monte Carlo simulations. - MC_electrons : "" - MC_helium : "" - MC_protons : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/Protons/dec_2276/sim_telarray" - MC_gammadiff : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/GammaDiffuse/dec_2276/sim_telarray/" + data_selection: source_name_database: "CrabNebula" # MUST BE THE SAME AS IN THE DATABASE; Set to null to process all sources in the given time range. @@ -29,12 +24,9 @@ general: base_config_file: '' # path + name to a custom MCP config file. If not provided, the default config.yaml file will be used LST_version : "v0.10" # check the `processed_lstchain_file` version in the LST database! LST_tailcut : "tailcut84" - focal_length : "effective" simtel_nsb : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray/node_theta_14.984_az_355.158_/output_v1.4/simtel_corsika_theta_14.984_az_355.158_run10.simtel.gz" # simtel file (DL0) to evaluate NSB - lstchain_modified_config : true # use_flatfield_heuristic = True to evaluate NSB - proton_train_fraction : 0.8 # 0.8 means that 80% of the DL1 protons will be used for training the Random Forest. + lstchain_modified_config : true # use_flatfield_heuristic = True to evaluate NSB nsb : [0.5, 1.0, 1.5, 2.0, 2.5, 3.0] env_name : magic-lst # name of the conda environment to be used to process data. cluster : "SLURM" # cluster management system on which data are processed. At the moment we have only SLURM available, in the future maybe also condor (PIC, CNAF). - NSB_matching : true # Set to false to process also the MCs. Set to true if adequate MC productions (DLx) are already available on the IT Container. - NSB_MC : 0.5 # extra noise in dim pixels used to process MCs; e.g., you could put here the average NSB value of the processed LST runs. Ignored if NSB_matching=true. + diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 8e39e38b..48faf96b 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -11,7 +11,7 @@ No LST data is used here. Standard usage: -$ dl1_production (-t analysis_type) (-c config_file.yaml) +$ dl1_production (-c config_file.yaml) """ import argparse import glob @@ -229,7 +229,7 @@ def main(): env_name = config["general"]["env_name"] - focal_length = config["general"]["focal_length"] + source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] cluster = config["general"]["cluster"] diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index c499e488..5afe02c1 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -1,29 +1,16 @@ """ -This script splits the proton MC data sample into "train" -and "test", deletes possible MC failed runs (only those files -that end up with a size < 1 kB), and generates the bash -scripts to merge MC and real data files by calling the script "merge_hdf_files.py" -in the following order: +This script generates the bash +scripts to merge real data files by calling the script "merge_hdf_files.py": MAGIC: Merge the subruns into runs for M1 and M2 individually. -MC: - -Merges all MC runs in a node Usage: -$ merging_runs (-c config.yaml) (-t analysis_type) - -If you want to merge only the MAGIC or only the MC data, -you can do as follows: +$ merging_runs (-c config.yaml) -Only MAGIC: -$ merging_runs -t onlyMAGIC (-c config.yaml) -Only MC: -$ merging_runs -t onlyMC (-c config.yaml) """ import argparse @@ -138,7 +125,7 @@ def main(): target_dir = Path(config["directories"]["workspace_dir"]) - NSB_match = config["general"]["NSB_matching"] + train_fraction = float(config["general"]["proton_train_fraction"]) env_name = config["general"]["env_name"] @@ -154,7 +141,6 @@ def main(): source_list.append(source) for source_name in source_list: - # Below we run the analysis on the MC data MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" MAGIC_runs = np.genfromtxt( MAGIC_runs_and_dates, dtype=str, delimiter=",", ndmin=2 diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 752b20a2..bb7c5dff 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -1,19 +1,11 @@ """ This scripts generates and runs the bashscripts -to compute the stereo parameters of DL1 MC and +to compute the stereo parameters of DL1 Coincident MAGIC+LST data files. Usage: -$ stereo_events (-c config.yaml) (-t analysis_type) +$ stereo_events (-c config.yaml) -If you want to compute the stereo parameters only the real data or only the MC data, -you can do as follows: - -Only real data: -$ stereo_events -t onlyMAGIC (-c config.yaml) - -Only MC: -$ stereo_events -t onlyMC (-c config.yaml) """ import argparse @@ -174,7 +166,7 @@ def main(): env_name = config["general"]["env_name"] - NSB_match = config["general"]["NSB_matching"] + source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] From 0cd7b7a342ce9e22a025367c3d511f9a4f9616e8 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Wed, 7 Aug 2024 06:40:44 +0000 Subject: [PATCH 03/15] remove MC --- .../lst1_magic/semi_automatic_scripts/dl1_production.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 48faf96b..58aaf1eb 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -41,7 +41,7 @@ logger.setLevel(logging.INFO) -def config_file_gen(target_dir, noise_value, source_name, config_gen): +def config_file_gen(target_dir, source_name, config_gen): """ Here we create the configuration file needed for transforming DL0 into DL1 @@ -50,8 +50,6 @@ def config_file_gen(target_dir, noise_value, source_name, config_gen): ---------- target_dir : path Directory to store the results - noise_value : list - List of the noise correction values for LST source_name : str Name of the target source config_gen : dict @@ -240,7 +238,6 @@ def main(): else: source_list.append(source) - noise_value = [0, 0, 0] @@ -263,7 +260,7 @@ def main(): str(target_dir), telescope_ids, MAGIC_runs, source_name ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target config_file_gen( - target_dir, noise_value, source_name, config + target_dir, source_name, config ) # TODO: fix here # Below we run the analysis on the MAGIC data From 26a63ef9456fefcbc77ad1e71b334c723e00a127 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 9 Aug 2024 07:10:28 +0000 Subject: [PATCH 04/15] minor fixes --- .../semi_automatic_scripts/dl1_production.py | 23 ++++--------------- .../semi_automatic_scripts/merging_runs.py | 17 +++----------- .../semi_automatic_scripts/stereo_events.py | 17 ++++---------- 3 files changed, 12 insertions(+), 45 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 58aaf1eb..49a07fd4 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -65,19 +65,17 @@ def config_file_gen(target_dir, source_name, config_gen): LST_config = config_dict["LST"] MAGIC_config = config_dict["MAGIC"] - conf = { "mc_tel_ids": config_gen["mc_tel_ids"], "LST": LST_config, "MAGIC": MAGIC_config, } - + file_name = f"{target_dir}/v{__version__}/{source_name}/config_DL0_to_DL1.yaml" with open(file_name, "w") as f: yaml.dump(conf, f, default_flow_style=False) - def lists_and_bash_gen_MAGIC( target_dir, telescope_ids, MAGIC_runs, source, env_name, cluster ): @@ -164,9 +162,7 @@ def lists_and_bash_gen_MAGIC( f.writelines(lines) -def directories_generator_real( - target_dir, telescope_ids, MAGIC_runs, source_name -): +def directories_generator_real(target_dir, telescope_ids, MAGIC_runs, source_name): """ Here we create all subdirectories for a given workspace and target name. @@ -182,10 +178,9 @@ def directories_generator_real( Name of the target source """ - os.makedirs(f"{target_dir}/v{__version__}/{source_name}/DL1", exist_ok=True) dl1_dir = str(f"{target_dir}/v{__version__}/{source_name}/DL1") - + ########################################### # MAGIC ########################################### @@ -195,8 +190,6 @@ def directories_generator_real( os.makedirs(f"{dl1_dir}/M{magic}/{i[0]}/{i[1]}/logs", exist_ok=True) - - def main(): """ @@ -206,7 +199,6 @@ def main(): # Here we are simply collecting the parameters from the command line, as input file, output directory, and configuration file parser = argparse.ArgumentParser() - parser.add_argument( "--config-file", @@ -226,8 +218,6 @@ def main(): telescope_ids = list(config["mc_tel_ids"].values()) env_name = config["general"]["env_name"] - - source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] cluster = config["general"]["cluster"] @@ -238,11 +228,8 @@ def main(): else: source_list.append(source) - - for source_name in source_list: - MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" MAGIC_runs = np.genfromtxt( @@ -259,9 +246,7 @@ def main(): directories_generator_real( str(target_dir), telescope_ids, MAGIC_runs, source_name ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target - config_file_gen( - target_dir, source_name, config - ) # TODO: fix here + config_file_gen(target_dir, source_name, config) # TODO: fix here # Below we run the analysis on the MAGIC data diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 2627ee30..83e1a22c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -19,7 +19,6 @@ import joblib import numpy as np import yaml -from tqdm import tqdm from magicctapipe import __version__ from magicctapipe.scripts.lst1_magic.semi_automatic_scripts.clusters import ( @@ -34,8 +33,6 @@ logger.setLevel(logging.INFO) - - def merge(target_dir, MAGIC_runs, env_name, source, cluster): """ @@ -95,8 +92,6 @@ def merge(target_dir, MAGIC_runs, env_name, source, cluster): logger.error(f"{indir} does not exist") - - def main(): """ @@ -113,7 +108,6 @@ def main(): help="Path to a configuration file", ) - args = parser.parse_args() with open( args.config_file, "rb" @@ -122,9 +116,6 @@ def main(): target_dir = Path(config["directories"]["workspace_dir"]) - - train_fraction = float(config["general"]["proton_train_fraction"]) - env_name = config["general"]["env_name"] source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] @@ -136,7 +127,7 @@ def main(): else: source_list.append(source) - + for source_name in source_list: MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" MAGIC_runs = np.genfromtxt( @@ -144,7 +135,7 @@ def main(): ) # Below we run the analysis on the MAGIC data - + print("***** Generating merge_MAGIC bashscripts...") merge( target_dir, @@ -157,9 +148,7 @@ def main(): print("***** Running merge_hdf_files.py on the MAGIC data files...") # Below we run the bash scripts to merge the MAGIC files - list_of_merging_scripts = np.sort( - glob.glob(f"{source_name}_Merge_MAGIC*.sh") - ) + list_of_merging_scripts = np.sort(glob.glob(f"{source_name}_Merge_MAGIC*.sh")) if len(list_of_merging_scripts) < 1: logger.warning("No bash scripts for real data") continue diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index bb7c5dff..2547316d 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -1,11 +1,10 @@ """ This scripts generates and runs the bashscripts -to compute the stereo parameters of DL1 +to compute the stereo parameters of DL1 Coincident MAGIC+LST data files. Usage: -$ stereo_events (-c config.yaml) - +$ stereo_events (-c config.yaml) """ import argparse @@ -137,8 +136,6 @@ def bash_stereo(target_dir, source, env_name, cluster): f.writelines(lines) - - def main(): """ @@ -155,7 +152,6 @@ def main(): help="Path to a configuration file", ) - args = parser.parse_args() with open( args.config_file, "rb" @@ -166,7 +162,6 @@ def main(): env_name = config["general"]["env_name"] - source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] @@ -176,9 +171,9 @@ def main(): source_list = joblib.load("list_sources.dat") else: source_list = [source] - + for source_name in source_list: - + print("***** Generating file config_stereo.yaml...") configfile_stereo(target_dir, source_name, config) @@ -194,9 +189,7 @@ def main(): ) # Below we run the bash scripts to find the stereo events - list_of_stereo_scripts = np.sort( - glob.glob(f"{source_name}_StereoEvents*.sh") - ) + list_of_stereo_scripts = np.sort(glob.glob(f"{source_name}_StereoEvents*.sh")) if len(list_of_stereo_scripts) < 1: logger.warning("No bash scripts for real data") continue From 04e6d6f9553380ecde303f52408c7658bb48de73 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 9 Aug 2024 13:26:16 +0000 Subject: [PATCH 05/15] remove MC --- .../lst1_magic/semi_automatic_scripts/__init__.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py index 2cc82cf9..27768690 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py @@ -2,29 +2,22 @@ from .coincident_events import configfile_coincidence, linking_bash_lst from .dl1_production import ( config_file_gen, - directories_generator_MC, directories_generator_real, lists_and_bash_gen_MAGIC, - lists_and_bash_generator, ) from .merge_stereo import MergeStereo -from .merging_runs import merge, mergeMC, split_train_test -from .stereo_events import bash_stereo, bash_stereoMC, configfile_stereo +from .merging_runs import merge +from .stereo_events import bash_stereo, configfile_stereo __all__ = [ - "split_train_test", "merge", - "mergeMC", "config_file_gen", - "lists_and_bash_generator", "lists_and_bash_gen_MAGIC", "directories_generator_real", - "directories_generator_MC", "configfile_coincidence", "linking_bash_lst", "configfile_stereo", "bash_stereo", - "bash_stereoMC", "slurm_lines", "MergeStereo", ] From 45fc5b5eb137315c38f2bccbab18cbfc1067d839 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin <121040436+Elisa-Visentin@users.noreply.github.com> Date: Tue, 13 Aug 2024 10:19:18 +0200 Subject: [PATCH 06/15] Update dl1_production.py --- .../lst1_magic/semi_automatic_scripts/dl1_production.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 4cd685a9..1613ef78 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -230,8 +230,7 @@ def main(): if source is None: source = source_in source_list = [source] - - + for source_name in source_list: MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" From cc75273b685b5ef97dc763310bcf84e4d44d2ff3 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin <121040436+Elisa-Visentin@users.noreply.github.com> Date: Tue, 13 Aug 2024 10:20:20 +0200 Subject: [PATCH 07/15] Update merging_runs.py --- .../scripts/lst1_magic/semi_automatic_scripts/merging_runs.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index c53caf59..548ac4c4 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -128,8 +128,7 @@ def main(): if source is None: source = source_in source_list = [source] - - + for source_name in source_list: MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" MAGIC_runs = np.genfromtxt( From 04c247012e3bf5647fa2df73f7be9daa4a052093 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin <121040436+Elisa-Visentin@users.noreply.github.com> Date: Tue, 13 Aug 2024 10:24:21 +0200 Subject: [PATCH 08/15] Update dl1_production.py --- .../scripts/lst1_magic/semi_automatic_scripts/dl1_production.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 1613ef78..b08153bd 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -230,7 +230,7 @@ def main(): if source is None: source = source_in source_list = [source] - + for source_name in source_list: MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" From 4acd21c5eda211154fe1beee6b124f7463eef5de Mon Sep 17 00:00:00 2001 From: Elisa-Visentin <121040436+Elisa-Visentin@users.noreply.github.com> Date: Tue, 13 Aug 2024 10:24:44 +0200 Subject: [PATCH 09/15] Update merging_runs.py --- .../scripts/lst1_magic/semi_automatic_scripts/merging_runs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 548ac4c4..ca80d7d5 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -128,7 +128,7 @@ def main(): if source is None: source = source_in source_list = [source] - + for source_name in source_list: MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" MAGIC_runs = np.genfromtxt( From 7f91349ca5aff56b0b50beb775a1e49da2634cc4 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 13 Aug 2024 09:21:29 +0000 Subject: [PATCH 10/15] fix config --- .../lst1_magic/semi_automatic_scripts/dl1_production.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index b08153bd..9baf7269 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -62,13 +62,11 @@ def config_file_gen(target_dir, source_name, config_gen): config_file, "rb" ) as fc: # "rb" mode opens the file in binary format for reading config_dict = yaml.safe_load(fc) - LST_config = config_dict["LST"] - MAGIC_config = config_dict["MAGIC"] conf = { "mc_tel_ids": config_gen["mc_tel_ids"], - "LST": LST_config, - "MAGIC": MAGIC_config, + "LST": config_dict["LST"], + "MAGIC": config_dict["MAGIC"], } file_name = f"{target_dir}/v{__version__}/{source_name}/config_DL0_to_DL1.yaml" From fccd9508a04b67bffddd443607e0b5005ac9b943 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 13 Aug 2024 09:28:49 +0000 Subject: [PATCH 11/15] swap lines --- .../scripts/lst1_magic/semi_automatic_scripts/dl1_production.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 9baf7269..a8633727 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -176,8 +176,8 @@ def directories_generator_real(target_dir, telescope_ids, MAGIC_runs, source_nam Name of the target source """ - os.makedirs(f"{target_dir}/v{__version__}/{source_name}/DL1", exist_ok=True) dl1_dir = str(f"{target_dir}/v{__version__}/{source_name}/DL1") + os.makedirs(dl1_dir, exist_ok=True) ########################################### # MAGIC From 71101c652cee548a33696191161946fd7b72c694 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 13 Aug 2024 09:35:18 +0000 Subject: [PATCH 12/15] remove fixed TODO --- .../lst1_magic/semi_automatic_scripts/dl1_production.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index a8633727..1cbcf78c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -236,7 +236,6 @@ def main(): MAGIC_runs_and_dates, dtype=str, delimiter=",", ndmin=2 ) # READ LIST OF DATES AND RUNS: format table where each line is like "2020_11_19,5093174" - # TODO: fix here above print("*** Converting Calibrated into DL1 data ***") print(f"Process name: {source_name}") print( @@ -246,7 +245,7 @@ def main(): directories_generator_real( str(target_dir), telescope_ids, MAGIC_runs, source_name ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target - config_file_gen(target_dir, source_name, config) # TODO: fix here + config_file_gen(target_dir, source_name, config) # Below we run the analysis on the MAGIC data From c3c087319a31f4584a466e21994d7438d9ad9072 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 13 Aug 2024 09:42:34 +0000 Subject: [PATCH 13/15] fix readme --- magicctapipe/scripts/lst1_magic/README.md | 27 +++++------------------ 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index 9da8b49e..d24d53ab 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -29,30 +29,17 @@ During the analysis, some files (i.e., bash scripts, lists of sources and runs) ### DL0 to DL1 -In this step, we will convert the MAGIC Calibrated data to Data Level (DL) 1 (our goal is to reach DL3) and MC DL0 to DL1. +In this step, we will convert the MAGIC Calibrated data to Data Level (DL) 1 (our goal is to reach DL3). In your working IT Container directory (e.g. /fefs/aswg/workspace/yourname/yourprojectname), open your environment with the command `conda activate {env_name}` and update the file `config_auto_MCP.yaml` according to your analysis. If you need non-standard parameters (e.g., for the cleaning), take care that the `resources/config.yaml` file gets installed when you install the pipeline, so you will have to copy it, e.g. in your workspace, modify it and put the path to this new file in the `config_auto_MCP.yaml` (this way you don't need to install again the pipeline). -The file `config_auto_MCP.yaml` must contain the telescope IDs, the directories with the MC data (ignored if you set NSB_matching = true), the data selection, and some information on the night sky background (NSB) level and software versions: +The file `config_auto_MCP.yaml` must contain parameters for data selection and some information on the night sky background (NSB) level and software versions: ``` - mc_tel_ids: - LST-1: 1 - LST-2: 0 - LST-3: 0 - LST-4: 0 - MAGIC-I: 2 - MAGIC-II: 3 - directories: workspace_dir : "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/" # Output directory where all the data products will be saved. - # MC paths below are ignored if you set NSB_matching = true. - MC_gammas : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray" # set to "" if you don't want to process these Monte Carlo simulations. - MC_electrons : "" - MC_helium : "" - MC_protons : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/Protons/dec_2276/sim_telarray" - MC_gammadiff : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/GammaDiffuse/dec_2276/sim_telarray/" + data_selection: source_name_database: "CrabNebula" # MUST BE THE SAME AS IN THE DATABASE; Set to null to process all sources in the given time range. @@ -68,17 +55,13 @@ general: base_config_file: '' # path + name to a custom MCP config file. If not provided, the default config.yaml file will be used LST_version : "v0.10" # check the `processed_lstchain_file` version in the LST database! LST_tailcut : "tailcut84" - focal_length : "effective" simtel_nsb : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray/node_theta_14.984_az_355.158_/output_v1.4/simtel_corsika_theta_14.984_az_355.158_run10.simtel.gz" # simtel file (DL0) to evaluate NSB - lstchain_modified_config : true # use_flatfield_heuristic = True to evaluate NSB - proton_train_fraction : 0.8 # 0.8 means that 80% of the DL1 protons will be used for training the Random Forest. + lstchain_modified_config : true # use_flatfield_heuristic = True to evaluate NSB nsb : [0.5, 1.0, 1.5, 2.0, 2.5, 3.0] env_name : magic-lst # name of the conda environment to be used to process data. cluster : "SLURM" # cluster management system on which data are processed. At the moment we have only SLURM available, in the future maybe also condor (PIC, CNAF). - NSB_matching : true # Set to false to process also the MCs. Set to true if adequate MC productions (DLx) are already available on the IT Container. - NSB_MC : 0.5 # extra noise in dim pixels used to process MCs; e.g., you could put here the average NSB value of the processed LST runs. Ignored if NSB_matching=true. - + ``` WARNING: Only the runs for which the `LST_version` parameter matches the `processed_lstchain_file` version in the LST database (i.e., the version used to evaluate the NSB level; generally the last available and processable version of a run) will be processed. From 34cc7d79e17e1e30d6a5b337c31ef1723f8dcc4c Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 13 Aug 2024 13:08:11 +0000 Subject: [PATCH 14/15] fix merge problems --- .../lst1_magic/semi_automatic_scripts/dl1_production.py | 8 ++++---- .../lst1_magic/semi_automatic_scripts/stereo_events.py | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index e0125c55..bfd17707 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -41,7 +41,7 @@ logger.setLevel(logging.INFO) -def config_file_gen(target_dir, source_name, config_gen): +def config_file_gen(target_dir, source_name, config_file): """ Here we create the configuration file needed for transforming DL0 into DL1 @@ -64,7 +64,7 @@ def config_file_gen(target_dir, source_name, config_gen): config_dict = yaml.safe_load(fc) conf = { - "mc_tel_ids": config_gen["mc_tel_ids"], + "mc_tel_ids": config_dict["mc_tel_ids"], "LST": config_dict["LST"], "MAGIC": config_dict["MAGIC"], } @@ -228,7 +228,7 @@ def main(): if source is None: source = source_in source_list = [source] - + for source_name in source_list: MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" MAGIC_runs = np.genfromtxt( @@ -244,7 +244,7 @@ def main(): directories_generator_real( str(target_dir), telescope_ids, MAGIC_runs, source_name ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target - config_file_gen(target_dir, source_name, config) + config_file_gen(target_dir, source_name, config_file) # Below we run the analysis on the MAGIC data diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 2a97990d..a2f8a4ee 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -177,7 +177,7 @@ def main(): for source_name in source_list: print("***** Generating file config_stereo.yaml...") - configfile_stereo(target_dir, source_name, config) + configfile_stereo(target_dir, source_name, config_file) # Below we run the analysis on the real data @@ -202,6 +202,5 @@ def main(): os.system(launch_jobs) - if __name__ == "__main__": main() From 827239c3e7e2e8d5fef918ead7d7b05edb063943 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 13 Aug 2024 13:16:15 +0000 Subject: [PATCH 15/15] fixed workspace path (and spaces in the config) --- magicctapipe/scripts/lst1_magic/README.md | 10 ++++---- .../config_auto_MCP.yaml | 24 +++++++++---------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index d24d53ab..c494d0d7 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -31,7 +31,7 @@ During the analysis, some files (i.e., bash scripts, lists of sources and runs) In this step, we will convert the MAGIC Calibrated data to Data Level (DL) 1 (our goal is to reach DL3). -In your working IT Container directory (e.g. /fefs/aswg/workspace/yourname/yourprojectname), open your environment with the command `conda activate {env_name}` and update the file `config_auto_MCP.yaml` according to your analysis. If you need non-standard parameters (e.g., for the cleaning), take care that the `resources/config.yaml` file gets installed when you install the pipeline, so you will have to copy it, e.g. in your workspace, modify it and put the path to this new file in the `config_auto_MCP.yaml` (this way you don't need to install again the pipeline). +In your working IT Container directory (i.e., `workspace_dir`), open your environment with the command `conda activate {env_name}` and update the file `config_auto_MCP.yaml` according to your analysis. If you need non-standard parameters (e.g., for the cleaning), take care that the `resources/config.yaml` file gets installed when you install the pipeline, so you will have to copy it, e.g. in your workspace, modify it and put the path to this new file in the `config_auto_MCP.yaml` (this way you don't need to install again the pipeline). The file `config_auto_MCP.yaml` must contain parameters for data selection and some information on the night sky background (NSB) level and software versions: @@ -96,9 +96,9 @@ The command `dl1_production` does a series of things: - Creates a directory with the target name within the directory `yourprojectname/{MCP_version}` and several subdirectories inside it that are necessary for the rest of the data reduction. The main directories are: ``` -/fefs/aswg/workspace/yourname/yourprojectname/VERSION/ -/fefs/aswg/workspace/yourname/yourprojectname/VERSION/{source}/DL1 -/fefs/aswg/workspace/yourname/yourprojectname/VERSION/{source}/DL1/[subdirectories] +workspace_dir/VERSION/ +workspace_dir/VERSION/{source}/DL1 +workspace_dir/VERSION/{source}/DL1/[subdirectories] ``` where [subdirectories] stands for several subdirectories containing the MAGIC subruns in the DL1 format. - Generates a configuration file called `config_DL0_to_DL1.yaml` with telescope ID information and adopted imaging/cleaning cuts, and puts it in the directory `[...]/yourprojectname/VERSION/{source}/` created in the previous step. @@ -114,7 +114,7 @@ or > $ squeue -u your_user_name -Once it is done, all of the subdirectories in `/fefs/aswg/workspace/yourname/yourprojectname/VERSION/{source}/DL1` will be filled with files of the type `dl1_MX.RunXXXXXX.0XX.h5` for each MAGIC subrun. +Once it is done, all of the subdirectories in `workspace_dir/VERSION/{source}/DL1` will be filled with files of the type `dl1_MX.RunXXXXXX.0XX.h5` for each MAGIC subrun. WARNING: some of these jobs could fail due to 'broken' input files: before moving to the next step, check for failed jobs (through `job_accounting` and/or log files) and remove the output files produced by these failed jobs (these output files will generally have a very small size, lower than few kB, and cannot be read in the following steps) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_auto_MCP.yaml b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_auto_MCP.yaml index 7ad02b50..1ce9c418 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_auto_MCP.yaml +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_auto_MCP.yaml @@ -1,24 +1,24 @@ directories: - workspace_dir : "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/" # Output directory where all the data products will be saved. + workspace_dir: "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/" # Output directory where all the data products will be saved. data_selection: source_name_database: "CrabNebula" # MUST BE THE SAME AS IN THE DATABASE; Set to null to process all sources in the given time range. source_name_output: 'Crabtest' # Name tag of your target. Used only if source_name_database != null. - time_range : True # Search for all runs in a LST time range (e.g., 2020_01_01 -> 2022_01_01). - min : "2023_11_17" - max : "2024_03_03" - date_list : ['2020_12_15','2021_03_11'] # LST list of days to be processed (only if time_range=False), format: YYYY_MM_DD. + time_range: True # Search for all runs in a LST time range (e.g., 2020_01_01 -> 2022_01_01). + min: "2023_11_17" + max: "2024_03_03" + date_list: ['2020_12_15','2021_03_11'] # LST list of days to be processed (only if time_range=False), format: YYYY_MM_DD. skip_LST_runs: [3216,3217] # LST runs to ignore. skip_MAGIC_runs: [5094658] # MAGIC runs to ignore. general: base_config_file: '' # path + name to a custom MCP config file. If not provided, the default config.yaml file will be used - LST_version : "v0.10" # check the `processed_lstchain_file` version in the LST database! - LST_tailcut : "tailcut84" - simtel_nsb : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray/node_theta_14.984_az_355.158_/output_v1.4/simtel_corsika_theta_14.984_az_355.158_run10.simtel.gz" # simtel file (DL0) to evaluate NSB - lstchain_modified_config : true # use_flatfield_heuristic = True to evaluate NSB - nsb : [0.5, 1.0, 1.5, 2.0, 2.5, 3.0] - env_name : magic-lst # name of the conda environment to be used to process data. - cluster : "SLURM" # cluster management system on which data are processed. At the moment we have only SLURM available, in the future maybe also condor (PIC, CNAF). + LST_version: "v0.10" # check the `processed_lstchain_file` version in the LST database! + LST_tailcut: "tailcut84" + simtel_nsb: "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray/node_theta_14.984_az_355.158_/output_v1.4/simtel_corsika_theta_14.984_az_355.158_run10.simtel.gz" # simtel file (DL0) to evaluate NSB + lstchain_modified_config: true # use_flatfield_heuristic = True to evaluate NSB + nsb: [0.5, 1.0, 1.5, 2.0, 2.5, 3.0] + env_name: magic-lst # name of the conda environment to be used to process data. + cluster: "SLURM" # cluster management system on which data are processed. At the moment we have only SLURM available, in the future maybe also condor (PIC, CNAF).