Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

G4 fuse input #147

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions jobs/config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,30 @@ max_num_submit = 200
t_sleep = 1

[job]
container = xenonnt-montecarlo-development.simg
runids = 51938,51914,51907,51905,51908,51904,51906,51909,51911,51913,51939,51910
output_folder = /scratch/midway2/yuanlq/download/ambe
container = xenonnt-montecarlo-2024.04.1.simg
runids = 51389
output_folder = /scratch/midway3/jjakob/salt/ambe_update
package = fuse
saltax_mode = salt
simu_config_version = sr1_dev
generator_name = flat
corrections_version=global_v15
generator_name = ambe
recoil = 0
simu_mode = all
rate = 100
en_range = 0.2,45
rate = 20
en_range = 0.2,50
process_data = False
process_simu = True
skip_records = False
delete_records = False
storage_to_patch = /project/lgrandi/yuanlq/salt/raw_records,/scratch/midway2/yuanlq/salt/raw_records,/scratch/midway3/yuanlq/salt/raw_records
storage_to_patch = /scratch/midway2/jjakob/ambe_rawrecords2

[slurm]
username = yuanlq
username = jjakob
account = pi-lgrandi
job_title = sprinkling
partition = broadwl
qos = broadwl
mem_per_cpu = 45000
mem_per_cpu = 55000
cpus_per_task = 1
log_dir = /home/yuanlq/.tmp_job_submission/saltax/ambe
log_dir = /home/jjakob/tmp_job_submission
2 changes: 2 additions & 0 deletions saltax/contexts.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ def xenonnt_salted_fuse(
instr_file_name = saltax.instr_file_name(
runid=runid, recoil=recoil, generator_name=generator_name, mode=simu_mode, **kwargs
)

# If runid is not None, then we need to either load instruction or generate it
if runid is not None:
# Try to load instruction from file and generate if not found
Expand All @@ -254,6 +255,7 @@ def xenonnt_salted_fuse(
st.set_config(
{
"input_file": instr_file_name,
"input_type": saltax.generator.AMBE_INSTRUCTIONS_TYPE if generator_name=="ambe" else "wfsim",
"raw_records_file_size_target": MAX_RAW_RECORDS_FILE_SIZE_MB,
}
)
Expand Down
92 changes: 75 additions & 17 deletions saltax/instructions/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,28 @@
method="RegularGridInterpolator",
)
SE_INSTRUCTIONS_DIR = "/project/lgrandi/yuanlq/salt/se_instructions/"
AMBE_INSTRUCTIONS_FILE = "/project/lgrandi/yuanlq/salt/ambe_instructions/minghao_aptinput.csv"
YBE_INSTRUCTIONS_FILE = "/project2/lgrandi/ghusheng/ybe_instrutions/ybe_wfsim_instructions_6806_events_time_modified.csv"
# BASE_DIR = "/project2/lgrandi/yuanlq/shared/saltax_instr/"
#AMBE_INSTRUCTIONS_FILE = "/project/lgrandi/yuanlq/salt/ambe_instructions/minghao_aptinput.csv"
AMBE_INSTRUCTIONS_FILE = "/project2/lgrandi/jjakob/AmBeSprinkling/AmBe_fuse_input.csv"
#AMBE_INSTRUCTIONS_TYPE = "wfsim"
AMBE_INSTRUCTIONS_TYPE = "fuse"
BASE_DIR = os.path.abspath(__file__)[:-12] + "../../generated/"

FUSE_DTYPE = [
(("x position of the cluster [cm]", "x"), np.float32),
(("y position of the cluster [cm]", "y"), np.float32),
(("z position of the cluster [cm]", "z"), np.float32),
(("Number of photons at interaction position", "photons"), np.int32),
(("Number of electrons at interaction position", "electrons"), np.int32),
(("Number of excitons at interaction position", "excitons"), np.int32),
(("Electric field value at the cluster position [V/cm]", "e_field"), np.float32),
(("Energy of the cluster [keV]", "ed"), np.float32),
(("NEST interaction type", "nestid"), np.int8),
(("ID of the cluster", "cluster_id"), np.int32),
(("Time of the interaction", "t"), np.int64),
(("Geant4 event ID", "eventid"), np.int32),
]


def generate_vertex(r_range=R_RANGE, z_range=Z_RANGE, size=1):
"""Generate a random vertex in the TPC volume.
Expand Down Expand Up @@ -190,6 +207,8 @@ def instr_file_name(
default: BASE_DIR
:return: instruction file name
"""
if generator_name=="ambe":
generator_name += ("_" + AMBE_INSTRUCTIONS_TYPE)
if en_range is not None:
en_range = str(en_range[0]) + "_" + str(en_range[1])
else:
Expand Down Expand Up @@ -221,6 +240,43 @@ def instr_file_name(

return filename

def fill_fuse_instruction_i(i, cluster_i, selected_ambe,times_offset):
instr_i = np.zeros(len(selected_ambe), dtype=FUSE_DTYPE)
instr_i["t"] = times_offset[i] + selected_ambe["t"]
instr_i["eventid"] = selected_ambe["eventid"]
instr_i["x"] = selected_ambe["x"]
instr_i["y"] = selected_ambe["y"]
instr_i["z"] = selected_ambe["z"]
instr_i["photons"] = selected_ambe["photons"]
instr_i["electrons"] = selected_ambe["electrons"]
instr_i["excitons"] = selected_ambe["excitons"]
instr_i["e_field"] = selected_ambe["e_field"]
instr_i["ed"] = selected_ambe["ed"]
instr_i["nestid"] = selected_ambe["nestid"]
instr_i["cluster_id"] = cluster_i + np.arange(1, 1 + len(selected_ambe))

# Filter out 0 amplitudes
instr_i = instr_i[(instr_i["photons"] > 0) | (instr_i["electrons"] > 0)]

return instr_i

def fill_wfsim_instruction_i(i, selected_ambe,times_offset):
instr_i = np.zeros(len(selected_ambe), dtype=wfsim.instruction_dtype)
instr_i["time"] = times_offset[i] + selected_ambe["time"]
instr_i["event_number"] = i + 1
instr_i["type"] = selected_ambe["type"]
instr_i["x"] = selected_ambe["x"]
instr_i["y"] = selected_ambe["y"]
instr_i["z"] = selected_ambe["z"]
instr_i["recoil"] = selected_ambe["recoil"]
instr_i["e_dep"] = selected_ambe["e_dep"]
instr_i["amp"] = selected_ambe["amp"]
instr_i["n_excitons"] = selected_ambe["n_excitons"]

# Filter out 0 amplitudes
instr_i = instr_i[instr_i["amp"] > 0] # should also work if all amp's are zero

return instr_i

def generator_se(
runid,
Expand Down Expand Up @@ -325,6 +381,7 @@ def generator_ambe(
rate=1e9 / SALT_TIME_INTERVAL,
time_mode="uniform",
ambe_instructions_file=AMBE_INSTRUCTIONS_FILE,
instructions_type=AMBE_INSTRUCTIONS_TYPE,
**kwargs
):
"""Generate instructions for a run with AmBe source.
Expand Down Expand Up @@ -354,31 +411,32 @@ def generator_ambe(
)

# assign instructions
instr = np.zeros(0, dtype=wfsim.instruction_dtype)
if instructions_type == 'fuse':
instr = np.zeros(0, dtype=FUSE_DTYPE)
# Sort here just to be sure. Should be irrelevant as long as cluster_id is strictly increasing with time in fill_fuse_instruction_i
ambe_event_numbers = np.sort(ambe_event_numbers)
cluster_id = 0
if instructions_type == 'wfsim':
instr = np.zeros(0, dtype=wfsim.instruction_dtype)

for i in tqdm(range(n_tot)):
# bootstrapped ambe instruction
selected_ambe = ambe_instructions[
ambe_instructions["event_number"] == ambe_event_numbers[i]
]

# instruction for i-th event
instr_i = np.zeros(len(selected_ambe), dtype=wfsim.instruction_dtype)
instr_i["time"] = times_offset[i] + selected_ambe["time"]
instr_i["event_number"] = i + 1
instr_i["type"] = selected_ambe["type"]
instr_i["x"] = selected_ambe["x"]
instr_i["y"] = selected_ambe["y"]
instr_i["z"] = selected_ambe["z"]
instr_i["recoil"] = selected_ambe["recoil"]
instr_i["e_dep"] = selected_ambe["e_dep"]
instr_i["amp"] = selected_ambe["amp"]
instr_i["n_excitons"] = selected_ambe["n_excitons"]
if instructions_type == 'fuse':
instr_i = fill_fuse_instruction_i(i, cluster_id, selected_ambe, times_offset)
cluster_id += len(selected_ambe)
elif instructions_type == 'wfsim':
instr_i = fill_wfsim_instruction_i(i, selected_ambe, times_offset)
else:
raise ValueError(f'Instruction type {instructions_type} is not known. Must be either "fuse" or "wfsim".')

# concatenate instr
instr = np.concatenate((instr, instr_i))

# Filter out 0 amplitudes
instr = instr[instr["amp"] > 0]

return instr


Expand Down
45 changes: 29 additions & 16 deletions saltax/plugins/f_raw_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,12 @@ class SChunkCsvInput(FuseBasePlugin):
input_file = straxen.URLConfig(
track=False,
infer_type=False,
help="CSV file (in wfsim format) to read.",
help="CSV file (format as in input_type) to read.",
)
input_type = straxen.URLConfig(
track=False,
infer_type=False,
help="Input type of CSV file ('fuse' or 'wfsim').",
)
ns_no_instruction_after_chunk_start = straxen.URLConfig(
default=5e7,
Expand All @@ -81,6 +86,7 @@ def setup(self):
super().setup()
self.csv_file_reader = SCsvFileLoader(
input_file=self.input_file,
input_type=self.input_type,
random_number_generator=self.rng,
ns_no_instruction_before_chunk_end=self.ns_no_instruction_before_chunk_end,
ns_no_instruction_after_chunk_start=self.ns_no_instruction_after_chunk_start,
Expand All @@ -95,11 +101,10 @@ def compute(self, raw_records, start, end):
chunk_data = chunk_data.to_records(index=False)
data = np.zeros(len(chunk_data), dtype=self.dtype)
strax.copy_to_buffer(chunk_data, data, "_bring_data_into_correct_format")

self.source_done = source_done

# Stick rigorously with raw_records time range
return self.chunk(start=start, end=end, data=data, data_type="geant4_interactions")
return self.chunk(start=start, end=end, data=data)#, data_type="geant4_interactions")
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is where things are suspicious. How does the output records chunk look like? Are the chunks crazily huge? @jjakob03

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice if you can direct me to where your records/raw_records_simu is

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed this because fuse did drop it as well in this PR. Do you think we need it here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Honestly I am not sure if it is necessary and removing it will break things. I am just a bit nervous since this is chunk related, and chunk size does matter hugely for memory consumption. I see that your memory requested is already 55GB. I would say 70GB is the maximum we can take. Maybe give it a last try with 70GB?
Also I would want to see the raw_records_simu and records files. Is it somewhere on midway?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No sorry I keep them in my scratch. records chunk size is ~1.2GB each with 72 chunks in total and, raw_records_simu chunk size is ~300MB and also 72 chunks in total. Is that helpful?

Yes I would have already gone for more memory but broadwl won't give me more than 58GB and xenon1t partition can't see project and midway3/scratch.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No sorry I keep them in my scratch. records chunk size is ~1.2GB each with 72 chunks in total and, raw_records_simu chunk size is ~300MB and also 72 chunks in total. Is that helpful?

This is helpful. Maybe try lgrandi on midway3. Note that you will have to reinstall things... which gonna be a painful 5 minutes

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From the info you gave I don't think this is where things break. Maybe it is just the AmBe super heavy nature itself + some memory leakage. We never figured out why peak building is so expensive in memory.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By the way, it worked with 65GB on lgrandi. Also, I wasn't aware that the one run I tested with is a 20min AmBe run from topCW5d9m position, while the topCW7d8m ones are only 10 minutes. So probably this is really just super heavy AmBe 😐

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks I think it makes a lot of sense to me! Yeah this is how difficult things are for AmBe computing..


except StopIteration:
raise RuntimeError("Bug in chunk building!")
Expand All @@ -120,18 +125,20 @@ def is_ready(self, chunk_i):


class SCsvFileLoader:
"""Class to load a CSV file (in wfsim format) with detector simulation
"""Class to load a CSV file (in wfsim or fuse format) with detector simulation
instructions."""

def __init__(
self,
input_file,
input_type,
random_number_generator,
ns_no_instruction_before_chunk_end=NS_NO_INSTRUCTION_BEFORE_CHUNK_END,
ns_no_instruction_after_chunk_start=NS_NO_INSTRUCTION_AFTER_CHUNK_START,
debug=False,
):
self.input_file = input_file
self.input_type = input_type
self.rng = random_number_generator
self.ns_no_instruction_before_chunk_end = ns_no_instruction_before_chunk_end
self.ns_no_instruction_after_chunk_start = ns_no_instruction_after_chunk_start
Expand Down Expand Up @@ -170,25 +177,31 @@ def __init__(
]

# Translator to translate the wfsim instructions to the fuse format
self.translator = InstrTranslator(input_format="wfsim", output_format="fuse")
if self.input_type == "wfsim":
self.translator = InstrTranslator(input_format="wfsim", output_format="fuse")
elif self.input_type == "fuse":
print(f"No translator defined as input is already {self.input_type} type")
else:
raise ValueError(f"Input type {self.input_type} is not defined and should not be possible")

def output_chunk(self, chunk_start, chunk_end):
"""Load the simulation instructions from the csv file in wfsim format
and then translate them to the fuse format.
"""Load the simulation instructions from the csv file
and then translate them to the fuse format if neccessary.

Truncate the instructions to the chunk time range.
"""
# Load the csv file in wfsim format
log.debug("Loaded detector simulation instructions from a csv file in wfsim format!")
# Load the csv file
log.warning(f"Loaded detector simulation instructions from a csv file in {self.input_type} format!")
instructions = self._load_csv_file()

# Translate the wfsim instructions to the fuse format
log.debug("Translating the wfsim instructions to the fuse format!")
instructions = self.translator.translate(instructions)
log.debug("Instructions translated to the fuse format!")

if self.input_type == "wfsim":
# Translate the wfsim instructions to the fuse format
log.warning("Translating the wfsim instructions to the fuse format!")
instructions = self.translator.translate(instructions)
log.warning("Instructions translated to the fuse format!")

# truncate instructions to the chunk time range
log.debug("Truncating instructions to the chunk time range!")
log.warning("Truncating instructions to the chunk time range!")
log.debug(
"We will further truncate the instructions to the range [%d, %d]",
chunk_start + self.ns_no_instruction_after_chunk_start,
Expand All @@ -211,7 +224,7 @@ def output_chunk(self, chunk_start, chunk_end):

def _load_csv_file(self):
"""Load the simulation instructions from a csv file in wfsim format."""
log.debug("Loading detector simulation instructions from a csv file in wfsim format!")
log.warning(f"Loading detector simulation instructions from {self.input_file} in {self.input_type} format!")
df = pd.read_csv(self.input_file)

return df
Expand Down
Loading