Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add parse_retrieved_files option for the PP plugin. #1029

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions src/aiida_quantumespresso/calculations/pp.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ def define(cls, spec):
spec.input('metadata.options.output_filename', valid_type=str, default=cls._DEFAULT_OUTPUT_FILE)
spec.input('metadata.options.parser_name', valid_type=str, default='quantumespresso.pp')
spec.input('metadata.options.withmpi', valid_type=bool, default=True)
spec.input('metadata.options.keep_plot_file', valid_type=bool, default=False)
spec.input('metadata.options.keep_data_files', valid_type=bool, default=True)
spec.input('metadata.options.parse_data_files', valid_type=bool, default=True)

spec.output('output_parameters', valid_type=orm.Dict)
spec.output('output_data', valid_type=orm.ArrayData)
Expand Down Expand Up @@ -218,10 +219,10 @@ def prepare_for_submission(self, folder): # pylint: disable=too-many-branches,t
# distinguish them from one another. The `fileout` filename will be the full data filename with the `fileout`
# value as a suffix.
retrieve_tuples = [self._FILEOUT, (f'{self._FILPLOT}_*{self._FILEOUT}', '.', 0)]

if self.inputs.metadata.options.keep_plot_file:
if self.inputs.metadata.options.keep_data_files:
calcinfo.retrieve_list.extend(retrieve_tuples)
else:
# If we do not want to parse the retrieved files, temporary retrieval is meaningless
elif self.inputs.metadata.options.parse_data_files:
calcinfo.retrieve_temporary_list.extend(retrieve_tuples)

return calcinfo
58 changes: 29 additions & 29 deletions src/aiida_quantumespresso/parsers/pp.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,35 +117,35 @@ def get_key_from_filename(filename):
matches = re.search(pattern, filename)
return matches.group(1)

for filename in filenames:
# Directly parse the retrieved files after reading them to memory (`data_raw`). The raw data
# of each file is released from memory after parsing, to improve memory usage.
if filename.endswith(filename_suffix):
# Read the file to memory
try:
with file_opener(filename) as handle:
data_raw = handle.read()
except OSError:
return self.exit_codes.ERROR_OUTPUT_DATAFILE_READ.format(filename=filename)
# Parse the file
try:
key = get_key_from_filename(filename)
data_parsed.append((key, parsers[iflag](data_raw, self.units_dict[parsed_data['plot_num']])))
del data_raw
except Exception as exception: # pylint: disable=broad-except
return self.exit_codes.ERROR_OUTPUT_DATAFILE_PARSE.format(filename=filename, exception=exception)

# If we don't have any parsed files, we exit. Note that this will not catch the case where there should be more
# than one file, but the engine did not retrieve all of them. Since often we anyway don't know how many files
# should be retrieved there really is no way to check this explicitly.
if not data_parsed:
return self.exit_codes.ERROR_OUTPUT_DATAFILE_MISSING.format(filename=filename_prefix)

# Create output nodes
if len(data_parsed) == 1:
self.out('output_data', data_parsed[0][1])
else:
self.out('output_data_multiple', dict(data_parsed))
if self.node.base.attributes.get('parse_data_files', True):
for filename in filenames:
# Directly parse the retrieved files after reading them to memory (`data_raw`). The raw data
# of each file is released from memory after parsing, to improve memory usage.
if filename.endswith(filename_suffix):
# Read the file to memory
try:
with file_opener(filename) as handle:
data_raw = handle.read()
except OSError:
return self.exit_codes.ERROR_OUTPUT_DATAFILE_READ.format(filename=filename)
# Parse the file
try:
key = get_key_from_filename(filename)
data_parsed.append((key, parsers[iflag](data_raw, self.units_dict[parsed_data['plot_num']])))
del data_raw
except Exception as exception: # pylint: disable=broad-except
return self.exit_codes.ERROR_OUTPUT_DATAFILE_PARSE.format(filename=filename, exception=exception)

# If we don't have any parsed files, we exit. Note that this will not catch the case where there should be more
# than one file, but the engine did not retrieve all of them. Since often we anyway don't know how many files
# should be retrieved there really is no way to check this explicitly.
if not data_parsed:
return self.exit_codes.ERROR_OUTPUT_DATAFILE_MISSING.format(filename=filename_prefix)

if len(data_parsed) == 1:
self.out('output_data', data_parsed[0][1])
else:
self.out('output_data_multiple', dict(data_parsed))

return self.exit(logs=logs)

Expand Down
4 changes: 2 additions & 2 deletions tests/calculations/test_pp.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,11 @@ def test_pp_default(fixture_sandbox, generate_calc_job, generate_inputs, file_re
file_regression.check(input_written, encoding='utf-8', extension='.in')


def test_pp_keep_plot_file(fixture_sandbox, generate_calc_job, generate_inputs):
def test_pp_keep_data_files(fixture_sandbox, generate_calc_job, generate_inputs):
"""Test a `PpCalculation` where we want to retrieve the plot file."""
entry_point_name = 'quantumespresso.pp'
inputs = generate_inputs()
inputs.metadata.options.keep_plot_file = True
inputs.metadata.options.keep_data_files = True

calc_info = generate_calc_job(fixture_sandbox, entry_point_name, inputs)
retrieve_list = ['aiida.out', 'aiida.fileout', ('aiida.filplot_*aiida.fileout', '.', 0)]
Expand Down
6 changes: 3 additions & 3 deletions tests/parsers/test_pp.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,12 +297,12 @@ def test_pp_default_3d(
})


def test_pp_default_3d_keep_plot_file(generate_calc_job_node, generate_parser, generate_inputs_3d, tmpdir):
"""Test a `pp.x` calculation where `keep_plot_file=False` meaning files will be parsed from temporary directory."""
def test_pp_default_3d_keep_data_files(generate_calc_job_node, generate_parser, generate_inputs_3d, tmpdir):
"""Test a `pp.x` calculation where `keep_data_files=False` meaning files will be parsed from temporary directory."""
entry_point_calc_job = 'quantumespresso.pp'
entry_point_parser = 'quantumespresso.pp'

attributes = {'options': {'keep_plot_file': False}, 'retrieve_temporary_list': ['aiida.fileout']}
attributes = {'options': {'keep_data_files': False}, 'retrieve_temporary_list': ['aiida.fileout']}
node = generate_calc_job_node(
entry_point_calc_job,
test_name='default_3d',
Expand Down
Loading