gammapy · pdeiml · Jan 29, 2018 · Apr 13, 2018 · Apr 13, 2018 · pdeiml
diff --git a/gammacat/checks.py b/gammacat/checks.py
@@ -17,20 +17,6 @@
 
 log = logging.getLogger(__name__)
 
-
-# TODO: put this in a better place?
-def check_info_yaml():
-    """Check the info.yaml files in input/data"""
-    from gammacat.utils import load_yaml, validate_schema
-
-    schema = load_yaml('input/schemas/dataset_info.schema.yaml')
-
-    for path in Path('input/data').glob('*/*/info.yaml'):
-        print(f'Checking: {path}')
-        data = load_yaml(path)
-        validate_schema(path=path, data=data, schema=schema)
-
-
 class CheckerConfig:
     """Config for Checker"""
 
@@ -86,9 +72,6 @@ def check_all(self):
     def check_input(self):
         log.info('Run checks: input')
         self.input_data.validate()
-        check_info_yaml()
-        print()
-        print(self.input_data)
 
     def check_collection(self):
         log.info('Run checks: collection')

diff --git a/gammacat/collection.py b/gammacat/collection.py
@@ -12,7 +12,7 @@
 from .lightcurve import LightCurve
 from .dataset import DataSet
 from .info import gammacat_info, GammaCatStr
-from .input import InputData
+from .input import InputData, InputInfoCollection
 from .utils import write_json, load_json, log_list_difference, load_yaml
 
 __all__ = [
@@ -199,6 +199,45 @@ def validate_list_of_files(self):
         expected_files = expected_files_extra + expected_files_sed
         log_list_difference(actual, expected_files)
 
+class InputCollection:
+
+    def __init__ (self, config):
+        self.config = config
+        self.data = InputData.read()
+        self.info_files = InputInfoCollection.read()
+
+    def run(self):
+        self._validate_info_files()
+        self._make_index_file_for_input()
+
+    def _validate_info_files(self):
+       self.info_files.validate()
+
+    def _make_index_file_for_input(self):
+        resources = []
+        for info_filename in self.info_files.to_list():
+            info_data = load_yaml(info_filename)
+            if info_data['data_entry']['status'] == 'missing':
+                continue
+            # TODO: Decide which datasets are copied to output collection by the keywords in 
+            # 'status' and 'reviewed' in info.yaml
+            # e.g if info_data['data_entry']['status'] == 'complete':
+            for dataset in info_data['datasets']:
+                resource = GammaCatResource(0, 'empty')
+                if dataset.endswith('yaml'):
+                    resource = DataSet.read(info_filename.parent / dataset).resource
+                elif dataset.endswith('ecsv'):
+                    if 'lc' in dataset:
+                        resource = LightCurve.read(info_filename.parent / dataset).resource
+                    elif 'sed' in dataset:
+                        resource = SED.read(info_filename.parent / dataset).resource
+                resource.location = str(info_filename.parent.relative_to(self.config.in_path) / dataset)
+                resources.append(resource)
+
+        self.index = GammaCatResourceIndex(resources).sort()
+
+        path = self.config.index_input_json
+        write_json(self.index.to_list(), path)
 
 class CollectionMaker:
     """Make gamma-cat data collection (from the input files)."""
@@ -212,8 +251,6 @@ def run(self):
         step = self.config.step
         if step == 'all':
             self.process_all()
-        elif step == 'input-index':
-            self.make_index_file_for_input()
         elif step == 'source-info':
             self.process_src_info()
         elif step == 'dataset':
@@ -233,7 +270,6 @@ def input_data(self):
         return InputData.read()
 
     def process_all(self):
-        self.make_index_file_for_input()
 
         self.process_src_info()
         self.process_datasets()
@@ -290,32 +326,6 @@ def process_datasets(self):
                     path.parent.mkdir(parents=True, exist_ok=True)
                     dataset.write(path)
 
-    def make_index_file_for_input(self):
-        resources = []
-        for info_filename in self.input_data.info_yaml_list:
-            info_data = load_yaml(info_filename)
-            if info_data['data_entry']['status'] == 'missing':
-                continue
-            # TODO: Decide which datasets are copied to output collection by the keywords in 
-            # 'status' and 'reviewed' in info.yaml
-            # e.g if info_data['data_entry']['status'] == 'complete':
-            for dataset in info_data['datasets']:
-                resource = GammaCatResource(0, 'empty')
-                if dataset.endswith('yaml'):
-                    resource = DataSet.read(info_filename.parent / dataset).resource
-                elif dataset.endswith('ecsv'):
-                    if 'lc' in dataset:
-                        resource = LightCurve.read(info_filename.parent / dataset).resource
-                    elif 'sed' in dataset:
-                        resource = SED.read(info_filename.parent / dataset).resource
-                resource.location = str(info_filename.parent.relative_to(self.config.in_path) / dataset)
-                resources.append(resource)
-
-        ri = GammaCatResourceIndex(resources).sort()
-
-        path = self.config.index_input_json
-        write_json(ri.to_list(), path)
-
     def make_index_file_for_output(self):
 
         # input and output should be consistent, modulo known differences

diff --git a/gammacat/input.py b/gammacat/input.py
@@ -216,6 +216,53 @@ def validate(self):
         log.info('Validating YAML files in `input/sources`')
         [_.validate() for _ in self.data]
 
+class InputInfo:
+    """All basic info about the data of a single publication.
+    """
+
+    schema = load_yaml(gammacat_info.base_dir / 'input/schemas/dataset_info.schema.yaml')
+
+    def __init__(self, data, path):
+        self.data = data
+        self.path = path
+
+    @classmethod
+    def read(cls, path):
+        path = Path(path)
+        data = load_yaml(path)
+        return cls(data=data, path=path)
+
+    def validate(self):
+        validate_schema(path=self.path, data=self.data, schema=self.schema)
+
+class InputInfoCollection:
+    """List of InputInfo objects.
+    """
+
+    def __init__(self, data):
+        self.data = data
+
+    @classmethod
+    def read(cls):
+        path = gammacat_info.base_dir / 'input/data'
+        paths = path.glob('*/*/info.yaml')
+
+        data = []
+        for path in paths:
+            info = InputInfo.read(path)
+            data.append(info)
+
+        return cls(data=data)
+
+    def to_list(self):
+        data = []
+        for infodata in self.data:
+            data.append(infodata.path)
+        return data
+
+    def validate(self):
+        log.info('Validating info.yaml files in `input/data`')
+        [_.validate() for _ in self.data]
 
 class InputDatasetCollection:
     """
@@ -306,6 +353,16 @@ class InputData:
     Expose it as Python objects that can be validated and used.
     """
 
+    def __init__(self, schemas=None, sources=None, datasets=None,
+                 ref_infos=None, gammacat_dataset_config=None):
+        self.path = gammacat_info.base_dir / 'input'
+        self.schemas = schemas
+        self.sources = sources
+        self.datasets = datasets
+        self.ref_infos = ref_infos
+        # TODO: self.lightcurves, self.seds
+        self.gammacat_dataset_config = gammacat_dataset_config
+
     @property
     def src_info_list(self):
         """List of all basic source info files in input/sources"""
@@ -339,14 +396,6 @@ def dataset_file_list(self):
         paths = path.glob('*/*/tev*.yaml')
         return sorted(paths)
 
-    def __init__(self, schemas=None, sources=None, datasets=None,
-                 gammacat_dataset_config=None):
-        self.path = gammacat_info.base_dir / 'input'
-        self.schemas = schemas
-        self.sources = sources
-        self.datasets = datasets
-        self.gammacat_dataset_config = gammacat_dataset_config
-
     @classmethod
     def read(cls):
         """Read all data from disk."""
@@ -355,11 +404,14 @@ def read(cls):
         schemas = Schemas.read()
         sources = BasicSourceList.read()
         datasets = InputDatasetCollection.read()
+        ref_infos = InputInfoCollection.read()
+        #TODO: lightcurves, seds
         gammacat_dataset_config = DatasetConfig.read()
         return cls(
             schemas=schemas,
             sources=sources,
             datasets=datasets,
+            ref_infos=ref_infos,
             gammacat_dataset_config=gammacat_dataset_config,
         )
 
@@ -385,6 +437,7 @@ def validate(self):
         self.schemas.validate()
         self.sources.validate()
         self.datasets.validate()
+        self.ref_infos.validate()
 
         for filename in self.sed_file_list:
             SED.read(filename=filename).process()

diff --git a/make.py b/make.py
@@ -8,7 +8,7 @@
 import subprocess
 import click
 from gammacat.info import gammacat_info
-from gammacat.collection import CollectionConfig, CollectionMaker
+from gammacat.collection import CollectionConfig, CollectionMaker, InputCollection
 from gammacat.catalog import CatalogConfig, CatalogMaker
 from gammacat.webpage import WebpageConfig, WebpageMaker
 from gammacat.checks import CheckerConfig, Checker
@@ -45,7 +45,10 @@ def cli_collection(step):
         out_path=gammacat_info.out_path,
         step=step,
     )
-    CollectionMaker(config).run()
+    if (step == 'input-index'):
+        InputCollection(config).run()
+    else:
+        CollectionMaker(config).run()
 
 
 @cli.command(name='catalog')