-
Notifications
You must be signed in to change notification settings - Fork 17
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Clean up build of input index file #234
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,7 +12,7 @@ | |
from .lightcurve import LightCurve | ||
from .dataset import DataSet | ||
from .info import gammacat_info, GammaCatStr | ||
from .input import InputData | ||
from .input import InputData, InputInfoCollection | ||
from .utils import write_json, load_json, log_list_difference, load_yaml | ||
|
||
__all__ = [ | ||
|
@@ -199,6 +199,45 @@ def validate_list_of_files(self): | |
expected_files = expected_files_extra + expected_files_sed | ||
log_list_difference(actual, expected_files) | ||
|
||
class InputCollection: | ||
|
||
def __init__ (self, config): | ||
self.config = config | ||
self.data = InputData.read() | ||
self.info_files = InputInfoCollection.read() | ||
|
||
def run(self): | ||
self._validate_info_files() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here, all info files which are present in the input folder are validated (step 2 in comment). |
||
self._make_index_file_for_input() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line creates the input index file and stores it (step 3 in comment). |
||
|
||
def _validate_info_files(self): | ||
self.info_files.validate() | ||
|
||
def _make_index_file_for_input(self): | ||
resources = [] | ||
for info_filename in self.info_files.to_list(): | ||
info_data = load_yaml(info_filename) | ||
if info_data['data_entry']['status'] == 'missing': | ||
continue | ||
# TODO: Decide which datasets are copied to output collection by the keywords in | ||
# 'status' and 'reviewed' in info.yaml | ||
# e.g if info_data['data_entry']['status'] == 'complete': | ||
for dataset in info_data['datasets']: | ||
resource = GammaCatResource(0, 'empty') | ||
if dataset.endswith('yaml'): | ||
resource = DataSet.read(info_filename.parent / dataset).resource | ||
elif dataset.endswith('ecsv'): | ||
if 'lc' in dataset: | ||
resource = LightCurve.read(info_filename.parent / dataset).resource | ||
elif 'sed' in dataset: | ||
resource = SED.read(info_filename.parent / dataset).resource | ||
resource.location = str(info_filename.parent.relative_to(self.config.in_path) / dataset) | ||
resources.append(resource) | ||
|
||
self.index = GammaCatResourceIndex(resources).sort() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure whether this definition of the class instance variable There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If it's not used from elsewhere at the moment, better to keep it as a local variable only. |
||
|
||
path = self.config.index_input_json | ||
write_json(self.index.to_list(), path) | ||
|
||
class CollectionMaker: | ||
"""Make gamma-cat data collection (from the input files).""" | ||
|
@@ -212,8 +251,6 @@ def run(self): | |
step = self.config.step | ||
if step == 'all': | ||
self.process_all() | ||
elif step == 'input-index': | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not working anymore because it is not handled in this class. |
||
self.make_index_file_for_input() | ||
elif step == 'source-info': | ||
self.process_src_info() | ||
elif step == 'dataset': | ||
|
@@ -233,7 +270,6 @@ def input_data(self): | |
return InputData.read() | ||
|
||
def process_all(self): | ||
self.make_index_file_for_input() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above. Not part of this class anymore. |
||
|
||
self.process_src_info() | ||
self.process_datasets() | ||
|
@@ -290,32 +326,6 @@ def process_datasets(self): | |
path.parent.mkdir(parents=True, exist_ok=True) | ||
dataset.write(path) | ||
|
||
def make_index_file_for_input(self): | ||
resources = [] | ||
for info_filename in self.input_data.info_yaml_list: | ||
info_data = load_yaml(info_filename) | ||
if info_data['data_entry']['status'] == 'missing': | ||
continue | ||
# TODO: Decide which datasets are copied to output collection by the keywords in | ||
# 'status' and 'reviewed' in info.yaml | ||
# e.g if info_data['data_entry']['status'] == 'complete': | ||
for dataset in info_data['datasets']: | ||
resource = GammaCatResource(0, 'empty') | ||
if dataset.endswith('yaml'): | ||
resource = DataSet.read(info_filename.parent / dataset).resource | ||
elif dataset.endswith('ecsv'): | ||
if 'lc' in dataset: | ||
resource = LightCurve.read(info_filename.parent / dataset).resource | ||
elif 'sed' in dataset: | ||
resource = SED.read(info_filename.parent / dataset).resource | ||
resource.location = str(info_filename.parent.relative_to(self.config.in_path) / dataset) | ||
resources.append(resource) | ||
|
||
ri = GammaCatResourceIndex(resources).sort() | ||
|
||
path = self.config.index_input_json | ||
write_json(ri.to_list(), path) | ||
|
||
def make_index_file_for_output(self): | ||
|
||
# input and output should be consistent, modulo known differences | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,7 @@ | |
import subprocess | ||
import click | ||
from gammacat.info import gammacat_info | ||
from gammacat.collection import CollectionConfig, CollectionMaker | ||
from gammacat.collection import CollectionConfig, CollectionMaker, InputCollection | ||
from gammacat.catalog import CatalogConfig, CatalogMaker | ||
from gammacat.webpage import WebpageConfig, WebpageMaker | ||
from gammacat.checks import CheckerConfig, Checker | ||
|
@@ -45,7 +45,10 @@ def cli_collection(step): | |
out_path=gammacat_info.out_path, | ||
step=step, | ||
) | ||
CollectionMaker(config).run() | ||
if (step == 'input-index'): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know, this looks ugly and need follow up change! But it works, hence, I would leave it as it is for now and change it later. |
||
InputCollection(config).run() | ||
else: | ||
CollectionMaker(config).run() | ||
|
||
|
||
@cli.command(name='catalog') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This line creates a list of all info files which are present in the input folder (step 1 in comment).