Utility operation for adding new columns

ROVI-org · Nov 22, 2024 · f88609c · f88609c
1 parent 9a11355
commit f88609c
Show file tree

Hide file tree

Showing 4 changed files with 52 additions and 9 deletions.
diff --git a/battdat/schemas/column.py b/battdat/schemas/column.py
@@ -61,7 +61,7 @@ class ColumnInfo(BaseModel):
 
 
 class ColumnSchema(BaseModel, frozen=True):
-    """Base class for schemas that describe the columns of a tabular dataset
+    """Base class for schemas that describe the columns of a table
 
     Implement a schema to be re-used across multiple datasets by creating a subclass and
     adding attributes for each expected column. The type of each attribute must be a :class:`ColumnInfo`
@@ -110,6 +110,31 @@ def _check_attributes(cls, d: Any):
                 raise ValueError('The subclass is incorrect. All fields must have a default value')
         return d
 
+    def add_column(self,
+                   name: str,
+                   description: str,
+                   data_type: DataType = DataType.OTHER,
+                   required: bool = False,
+                   units: Optional[str] = None,
+                   monotonic: bool = False) -> ColumnInfo:
+        """Add a new column to the :attr:`extra_columns` as a :class:`ColumnInfo` object
+
+        Args:
+            name: Name of new column
+            description: Human-readable description of the data
+            data_type: Type of data
+            required: Whether the data must be included in a table
+            units: Units used for all rows in column
+            monotonic: Whether values must always remain constant or increase
+        Returns:
+            The new column object
+        """
+        new_col = ColumnInfo(
+            description=description, required=required, units=units, monotonic=monotonic, type=data_type
+        )
+        self.extra_columns[name] = new_col
+        return new_col
+
     def validate_dataframe(self, data: DataFrame, allow_extra_columns: bool = True):
         """Check whether a dataframe matches this schema
 

diff --git a/docs/user-guide/dataset.rst b/docs/user-guide/dataset.rst
@@ -84,6 +84,18 @@ nor passing the tables as part of a dictionary.
 
     dataset = CellDataset(raw_data=df)
 
+Each table will be associated with a default schema.
+Describe columns not yet present in the schema by adding them after assembly:
+
+.. code-block:: python
+
+    from battdat.schemas.columns import ColumnInfo
+    dataset.schemas['raw_data'].add_column(
+        name='new_col',
+        description='Information not already included in RawData',
+        units='ohm',
+    )
+
 The current template classes are:
 
 .. _type-table:
@@ -98,7 +110,7 @@ The current template classes are:
        or averaged over entire cycles. Tables (and their schemas) include:
 
        - ``raw_data`` (`RawData <schemas/column-schema.html#rawdata>`_): Measurements of system state at specific points in time.
-       - ``cycle_stats`` (`CycleStats <schemas/column-schema.html#cyclestats>`_): Descriptive statistics about state over entire cycles.
+       - ``cycle_stats`` (`CycleLevelData <schemas/column-schema.html#cycleleveldata>`_): Descriptive statistics about state over entire cycles.
        - ``eis_data`` (`EISData <schemas/column-schema.html#eisdata>`_): EIS measurements at different frequencies, over time.
 
 Loading and Saving

diff --git a/docs/user-guide/formats.rst b/docs/user-guide/formats.rst
@@ -111,28 +111,28 @@ Add multiple batteries into an HDF5 file by providing a "prefix" to name each ce
 
 .. code-block:: python
 
-    test_a.to_battdat_hdf('test.h5', prefix='a')
-    test_b.to_battdat_hdf('test.h5', prefix='b', overwrite=False)  # Overwrite is mandatory
+    test_a.to_hdf('test.h5', prefix='a')
+    test_b.to_hdf('test.h5', prefix='b', overwrite=False)  # Overwrite is mandatory
 
 
 Load a specific cell by providing a specific prefix on load
 
 .. code-block:: python
 
-    test_a = BatteryDataset.from_battdat_hdf('test.h5', prefix='a')
+    test_a = BatteryDataset.from_hdf('test.h5', prefix='a')
 
 
 or load any of the included cells by providing an index
 
 .. code-block:: python
 
-    test_a = BatteryDataset.from_battdat_hdf('test.h5', prefix=0)
+    test_a = BatteryDataset.from_hdf('test.h5', prefix=0)
 
 Load all cells by iterating over them:
 
 .. code-block:: python
 
-    for name, cell in BatteryDataset.all_cells_from_battdat_hdf('test.h5'):
+    for name, cell in BatteryDataset.all_cells_from_hdf('test.h5'):
         do_some_processing(cell)
 
 Parquet

diff --git a/tests/schemas/test_cycling.py b/tests/schemas/test_cycling.py
@@ -43,13 +43,19 @@ def test_extra_cols(example_df):
     example_df['extra'] = [1, 1]
 
     # Passes with extra columns by default
-    RawData().validate_dataframe(example_df)
+    schema = RawData()
+    schema.validate_dataframe(example_df)
 
     # Fails when desired
     with raises(ValueError) as exc:
-        RawData().validate_dataframe(example_df, allow_extra_columns=False)
+        schema.validate_dataframe(example_df, allow_extra_columns=False)
     assert 'extra columns' in str(exc)
 
+    # Passes when new column is defined
+    schema.add_column('extra', 'An extra column')
+    assert 'extra' in schema.extra_columns
+    schema.validate_dataframe(example_df, allow_extra_columns=False)
+
 
 def test_get_item():
     schema = RawData()