From ccae4d6e01bd61545a6e8f831288507c09d0bee3 Mon Sep 17 00:00:00 2001 From: David Kleiven Date: Tue, 15 Oct 2024 08:24:13 +0200 Subject: [PATCH] feat!: simplify schema for coordinate dataframe. Use regular data types (str instead of CategoricalDtype) and give coordinates as floats --- src/cimsparql/data_models.py | 9 ++++----- tests/test_micro_t1_nl.py | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/cimsparql/data_models.py b/src/cimsparql/data_models.py index 0ac915bd..65029e9f 100644 --- a/src/cimsparql/data_models.py +++ b/src/cimsparql/data_models.py @@ -1,7 +1,6 @@ import datetime as dt from typing import Self -import pandas as pd import pandera as pa from pandera.typing import DataFrame, Index, Series @@ -189,10 +188,10 @@ class TransfConToConverterSchema(NamedResourceSchema): class CoordinatesSchema(JsonSchemaOut): mrid: Series[str] = pa.Field() - x: Series[str] = pa.Field() - y: Series[str] = pa.Field() - epsg: Series[pd.CategoricalDtype] = pa.Field() - rdf_type: Series[pd.CategoricalDtype] = pa.Field() + x: Series[float] = pa.Field() + y: Series[float] = pa.Field() + epsg: Series[str] = pa.Field() + rdf_type: Series[str] = pa.Field() CoordinatesDataFrame = DataFrame[CoordinatesSchema] diff --git a/tests/test_micro_t1_nl.py b/tests/test_micro_t1_nl.py index 6cbbfb99..05923982 100644 --- a/tests/test_micro_t1_nl.py +++ b/tests/test_micro_t1_nl.py @@ -268,11 +268,11 @@ def test_coordinates(test_model: t_common.ModelTest): assert test_model.model model = test_model.model crd = model.coordinates() - pd.testing.assert_index_equal(crd["epsg"].cat.categories, pd.Index(["4326"], dtype=str)) + pd.testing.assert_series_equal(crd["epsg"], pd.Series(["4326"])) cim = model.client.prefixes["cim"] categories = {f"{cim}ACLineSegment", f"{cim}Substation"} - assert crd["rdf_type"].cat.categories.difference(categories).empty + assert set(crd["rdf_type"]).difference(categories) == set() assert len(crd) == 49 coordinates = crd.astype({"x": float, "y": float})