From a62aa7faea1302fad1a1af8c7237dedbb0c86e03 Mon Sep 17 00:00:00 2001 From: Hannes Riebl Date: Sat, 19 Oct 2024 22:47:27 +0200 Subject: [PATCH] Add a few docstrings --- src/staticat/lib.py | 66 ++++++++++++++++++++++++++++++++++ src/staticat/vocab/__init__.py | 3 ++ 2 files changed, 69 insertions(+) diff --git a/src/staticat/lib.py b/src/staticat/lib.py index 4e0fc34..8eb1a85 100644 --- a/src/staticat/lib.py +++ b/src/staticat/lib.py @@ -17,10 +17,12 @@ def urlname(value): + """Extracts the last component of a URL path.""" return PurePosixPath(unquote(urlparse(value).path)).name def jinja_env(loader): + """Returns a Jinja environment with autoescape and the urlname filter.""" autoescape = jinja2.select_autoescape(("html", "htm", "xml", "rdf")) env = jinja2.Environment(loader=loader, autoescape=autoescape) env.filters["urlname"] = urlname @@ -28,31 +30,40 @@ def jinja_env(loader): def default_template(name): + """Returns a default Jinja template from the Staticat package.""" env = jinja_env(jinja2.PackageLoader("staticat", encoding="utf-8")) return env.get_template(name) def custom_template(path): + """Returns a custom, user-defined Jinja template from the file system.""" env = jinja_env(jinja2.FileSystemLoader(path.parent, encoding="utf-8")) return env.get_template(path.name) def write(path, data): + """Writes a file in text mode with UTF-8 encoding.""" with open(path, mode="w", encoding="utf-8") as file: file.write(data) class ContactTOML(pydantic.BaseModel): + """TOML configuration of a contact point (DCAT property).""" + name: str email: str class PublisherTOML(pydantic.BaseModel): + """TOML configuration of a publisher (DCT property).""" + name: str uri: str class DistributionTOML(pydantic.BaseModel): + """TOML configuration of a distribution (DCAT class).""" + uri: str title: str modified: datetime | None = None @@ -63,10 +74,14 @@ class DistributionTOML(pydantic.BaseModel): class DatasetConfigTOML(pydantic.BaseModel): + """TOML configuration defining Staticat-specific options for a dataset.""" + convert_excel: bool | None = None class DatasetTOML(pydantic.BaseModel): + """TOML configuration of a dataset (DCAT class).""" + title: str description: str keywords: list[str] @@ -86,6 +101,8 @@ class DatasetTOML(pydantic.BaseModel): class CatalogTOML(pydantic.BaseModel): + """TOML configuration of a catalog (DCAT class).""" + uri: str title: str description: str @@ -94,7 +111,10 @@ class CatalogTOML(pydantic.BaseModel): class Dataset(DatasetTOML): + """A dataset with RDF properties, Staticat properties and processing methods.""" + def __init__(self, directory, catalog): + """Initializes the dataset, parsing and validating the file dataset.toml.""" staticat_config = catalog.staticat_config log_directory = directory.relative_to(staticat_config.directory.parent) logger.info(f"{log_directory}: Parsing dataset.toml") @@ -114,22 +134,30 @@ def __init__(self, directory, catalog): @property def catalog_uri(self): + """The URI of the catalog.""" return self._catalog_uri @property def directory(self): + """The directory of the dataset.""" return self._directory @property def html_description(self): + """The description of the dataset, rendered from Markdown to HTML.""" return MarkdownIt("js-default").render(self.description) @property def log_directory(self): + """The directory of the dataset, formatted for logging.""" return self.directory.relative_to(self.staticat_config.directory.parent) @property def political_geocoding_level(self): + """The political geocoding level (DCAT-AP.de property). + + Inferred from the political geocoding given in the TOML configuration. + """ base = "dcat-ap.de/def/politicalGeocoding" mapping = { @@ -149,15 +177,18 @@ def political_geocoding_level(self): @property def relative_catalog(self): + """The directory of the catalog relative to the dataset.""" path = Path(*(".." for parent in self.relative_directory.parents)) return quote(path.as_posix()) @property def relative_directory(self): + """The directory of the dataset relative to the catalog.""" return self.directory.relative_to(self.staticat_config.directory) @property def should_convert_excel(self): + """Whether Excel files should be converted to CSV.""" if self.config.convert_excel is None: return self.staticat_config.convert_excel @@ -165,13 +196,19 @@ def should_convert_excel(self): @property def staticat_config(self): + """The global Staticat configuration.""" return self._staticat_config @property def uri(self): + """The URI of the dataset.""" return f"{self.catalog_uri}/{quote(self.relative_directory.as_posix())}" def add_distributions(self): + """Adds local files to the dataset as distributions. + + Unsupported file types are skipped. + """ for file in self.directory.glob("*"): if not file.is_file(): continue @@ -205,6 +242,7 @@ def add_distributions(self): ) def convert_excel(self): + """Converts Excel files to CSV.""" for file in self.directory.glob("*"): if not file.is_file(): continue @@ -228,6 +266,7 @@ def convert_excel(self): ) def render_html(self): + """Renders the website of the dataset.""" if self.staticat_config.dataset_template: template = custom_template(self.staticat_config.dataset_template) else: @@ -236,6 +275,7 @@ def render_html(self): return template.render(dataset=self) def write_html(self): + """Writes the website of the dataset to the file index.html.""" logger.info(f"{self.log_directory}: Writing index.html") try: @@ -244,6 +284,7 @@ def write_html(self): raise Exception("Could not write index.html") from error def process(self): + """Processes the dataset.""" if self.should_convert_excel: self.convert_excel() @@ -252,7 +293,10 @@ def process(self): class Catalog(CatalogTOML): + """A catalog with RDF properties, Staticat properties and processing methods.""" + def __init__(self, config): + """Initializes the catalog, parsing and validating the file catalog.toml.""" logger.info(f"{config.directory.name}: Parsing catalog.toml") try: @@ -266,26 +310,36 @@ def __init__(self, config): @property def datasets(self): + """The datasets belonging to the catalog (DCAT property).""" return self._datasets @property def directory(self): + """The directory of the catalog.""" return self.staticat_config.directory @property def html_description(self): + """The description of the catalog, rendered from Markdown to HTML.""" return MarkdownIt("js-default").render(self.description) @property def log_directory(self): + """The directory of the catalog, formatted for logging.""" return self.staticat_config.directory.name @property def staticat_config(self): + """The global Staticat configuration.""" return self._staticat_config @property def tree(self): + """The file tree of the catalog. + + Returns an iterable of dictionaries meant to be processed in the Jinja template + for the website of the catalog. + """ datasets = {dataset.relative_directory for dataset in self.datasets} parents = {parent for dataset in datasets for parent in dataset.parents} items = sorted((datasets | parents) - {Path(".")}) @@ -299,6 +353,11 @@ def tree(self): } def add_datasets(self): + """Adds subdirectories to the catalog as datasets. + + Only subdirectories containing a file dataset.toml are processed and added to + the catalog. + """ for file in self.directory.glob("*/**/dataset.toml"): if not file.is_file(): continue @@ -318,9 +377,11 @@ def add_datasets(self): ) def render_css(self): + """Renders the Staticat stylesheet.""" return default_template("default.css").render() def render_html(self): + """Renders the website of the catalog.""" if self.staticat_config.catalog_template: template = custom_template(self.staticat_config.catalog_template) else: @@ -329,9 +390,11 @@ def render_html(self): return template.render(catalog=self) def render_rdf(self): + """Renders a RDF/XML representation of the catalog.""" return default_template("catalog.rdf").render(catalog=self) def write_css(self): + """Writes the Staticat stylesheet to the file default.css.""" logger.info(f"{self.directory.name}: Writing default.css") try: @@ -340,6 +403,7 @@ def write_css(self): raise Exception("Could not write default.css") from error def write_html(self): + """Writes the website of the catalog to the file index.html.""" logger.info(f"{self.directory.name}: Writing index.html") try: @@ -348,6 +412,7 @@ def write_html(self): raise Exception("Could not write index.html") from error def write_ttl(self): + """Writes a Turtle representation of the catalog to the file catalog.ttl.""" logger.info(f"{self.directory.name}: Writing catalog.ttl") try: @@ -358,6 +423,7 @@ def write_ttl(self): raise Exception("Could not write catalog.ttl") from error def process(self): + """Processes the catalog.""" logger.info(f"{self.directory.name}: Processing catalog...") self.add_datasets() diff --git a/src/staticat/vocab/__init__.py b/src/staticat/vocab/__init__.py index fa227b1..4b305a1 100644 --- a/src/staticat/vocab/__init__.py +++ b/src/staticat/vocab/__init__.py @@ -8,6 +8,7 @@ def read_rdf(name, file): + """Returns an enumeration of the identifiers of the concepts in the given file.""" path = importlib.resources.files() / file graph = Graph() @@ -27,6 +28,7 @@ def read_rdf(name, file): def read_file_type(): + """Returns a data frame with information on the file types in the EU vocabulary.""" path = importlib.resources.files() / "file-type.xml" tree = ET.parse(path) @@ -57,6 +59,7 @@ def read_file_type(): def file_type_df_to_enum(): + """Returns an enumeration of the codes of the file types in the EU vocabulary.""" members = FileTypeDF["code"].drop_duplicates() return StrEnum("FileType", zip(members, members))