Skip to content

Commit

Permalink
Add a few docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
hriebl committed Oct 19, 2024
1 parent b693dc6 commit a62aa7f
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 0 deletions.
66 changes: 66 additions & 0 deletions src/staticat/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,42 +17,53 @@


def urlname(value):
"""Extracts the last component of a URL path."""
return PurePosixPath(unquote(urlparse(value).path)).name


def jinja_env(loader):
"""Returns a Jinja environment with autoescape and the urlname filter."""
autoescape = jinja2.select_autoescape(("html", "htm", "xml", "rdf"))
env = jinja2.Environment(loader=loader, autoescape=autoescape)
env.filters["urlname"] = urlname
return env


def default_template(name):
"""Returns a default Jinja template from the Staticat package."""
env = jinja_env(jinja2.PackageLoader("staticat", encoding="utf-8"))
return env.get_template(name)


def custom_template(path):
"""Returns a custom, user-defined Jinja template from the file system."""
env = jinja_env(jinja2.FileSystemLoader(path.parent, encoding="utf-8"))
return env.get_template(path.name)


def write(path, data):
"""Writes a file in text mode with UTF-8 encoding."""
with open(path, mode="w", encoding="utf-8") as file:
file.write(data)


class ContactTOML(pydantic.BaseModel):
"""TOML configuration of a contact point (DCAT property)."""

name: str
email: str


class PublisherTOML(pydantic.BaseModel):
"""TOML configuration of a publisher (DCT property)."""

name: str
uri: str


class DistributionTOML(pydantic.BaseModel):
"""TOML configuration of a distribution (DCAT class)."""

uri: str
title: str
modified: datetime | None = None
Expand All @@ -63,10 +74,14 @@ class DistributionTOML(pydantic.BaseModel):


class DatasetConfigTOML(pydantic.BaseModel):
"""TOML configuration defining Staticat-specific options for a dataset."""

convert_excel: bool | None = None


class DatasetTOML(pydantic.BaseModel):
"""TOML configuration of a dataset (DCAT class)."""

title: str
description: str
keywords: list[str]
Expand All @@ -86,6 +101,8 @@ class DatasetTOML(pydantic.BaseModel):


class CatalogTOML(pydantic.BaseModel):
"""TOML configuration of a catalog (DCAT class)."""

uri: str
title: str
description: str
Expand All @@ -94,7 +111,10 @@ class CatalogTOML(pydantic.BaseModel):


class Dataset(DatasetTOML):
"""A dataset with RDF properties, Staticat properties and processing methods."""

def __init__(self, directory, catalog):
"""Initializes the dataset, parsing and validating the file dataset.toml."""
staticat_config = catalog.staticat_config
log_directory = directory.relative_to(staticat_config.directory.parent)
logger.info(f"{log_directory}: Parsing dataset.toml")
Expand All @@ -114,22 +134,30 @@ def __init__(self, directory, catalog):

@property
def catalog_uri(self):
"""The URI of the catalog."""
return self._catalog_uri

@property
def directory(self):
"""The directory of the dataset."""
return self._directory

@property
def html_description(self):
"""The description of the dataset, rendered from Markdown to HTML."""
return MarkdownIt("js-default").render(self.description)

@property
def log_directory(self):
"""The directory of the dataset, formatted for logging."""
return self.directory.relative_to(self.staticat_config.directory.parent)

@property
def political_geocoding_level(self):
"""The political geocoding level (DCAT-AP.de property).
Inferred from the political geocoding given in the TOML configuration.
"""
base = "dcat-ap.de/def/politicalGeocoding"

mapping = {
Expand All @@ -149,29 +177,38 @@ def political_geocoding_level(self):

@property
def relative_catalog(self):
"""The directory of the catalog relative to the dataset."""
path = Path(*(".." for parent in self.relative_directory.parents))
return quote(path.as_posix())

@property
def relative_directory(self):
"""The directory of the dataset relative to the catalog."""
return self.directory.relative_to(self.staticat_config.directory)

@property
def should_convert_excel(self):
"""Whether Excel files should be converted to CSV."""
if self.config.convert_excel is None:
return self.staticat_config.convert_excel

return self.config.convert_excel

@property
def staticat_config(self):
"""The global Staticat configuration."""
return self._staticat_config

@property
def uri(self):
"""The URI of the dataset."""
return f"{self.catalog_uri}/{quote(self.relative_directory.as_posix())}"

def add_distributions(self):
"""Adds local files to the dataset as distributions.
Unsupported file types are skipped.
"""
for file in self.directory.glob("*"):
if not file.is_file():
continue
Expand Down Expand Up @@ -205,6 +242,7 @@ def add_distributions(self):
)

def convert_excel(self):
"""Converts Excel files to CSV."""
for file in self.directory.glob("*"):
if not file.is_file():
continue
Expand All @@ -228,6 +266,7 @@ def convert_excel(self):
)

def render_html(self):
"""Renders the website of the dataset."""
if self.staticat_config.dataset_template:
template = custom_template(self.staticat_config.dataset_template)
else:
Expand All @@ -236,6 +275,7 @@ def render_html(self):
return template.render(dataset=self)

def write_html(self):
"""Writes the website of the dataset to the file index.html."""
logger.info(f"{self.log_directory}: Writing index.html")

try:
Expand All @@ -244,6 +284,7 @@ def write_html(self):
raise Exception("Could not write index.html") from error

def process(self):
"""Processes the dataset."""
if self.should_convert_excel:
self.convert_excel()

Expand All @@ -252,7 +293,10 @@ def process(self):


class Catalog(CatalogTOML):
"""A catalog with RDF properties, Staticat properties and processing methods."""

def __init__(self, config):
"""Initializes the catalog, parsing and validating the file catalog.toml."""
logger.info(f"{config.directory.name}: Parsing catalog.toml")

try:
Expand All @@ -266,26 +310,36 @@ def __init__(self, config):

@property
def datasets(self):
"""The datasets belonging to the catalog (DCAT property)."""
return self._datasets

@property
def directory(self):
"""The directory of the catalog."""
return self.staticat_config.directory

@property
def html_description(self):
"""The description of the catalog, rendered from Markdown to HTML."""
return MarkdownIt("js-default").render(self.description)

@property
def log_directory(self):
"""The directory of the catalog, formatted for logging."""
return self.staticat_config.directory.name

@property
def staticat_config(self):
"""The global Staticat configuration."""
return self._staticat_config

@property
def tree(self):
"""The file tree of the catalog.
Returns an iterable of dictionaries meant to be processed in the Jinja template
for the website of the catalog.
"""
datasets = {dataset.relative_directory for dataset in self.datasets}
parents = {parent for dataset in datasets for parent in dataset.parents}
items = sorted((datasets | parents) - {Path(".")})
Expand All @@ -299,6 +353,11 @@ def tree(self):
}

def add_datasets(self):
"""Adds subdirectories to the catalog as datasets.
Only subdirectories containing a file dataset.toml are processed and added to
the catalog.
"""
for file in self.directory.glob("*/**/dataset.toml"):
if not file.is_file():
continue
Expand All @@ -318,9 +377,11 @@ def add_datasets(self):
)

def render_css(self):
"""Renders the Staticat stylesheet."""
return default_template("default.css").render()

def render_html(self):
"""Renders the website of the catalog."""
if self.staticat_config.catalog_template:
template = custom_template(self.staticat_config.catalog_template)
else:
Expand All @@ -329,9 +390,11 @@ def render_html(self):
return template.render(catalog=self)

def render_rdf(self):
"""Renders a RDF/XML representation of the catalog."""
return default_template("catalog.rdf").render(catalog=self)

def write_css(self):
"""Writes the Staticat stylesheet to the file default.css."""
logger.info(f"{self.directory.name}: Writing default.css")

try:
Expand All @@ -340,6 +403,7 @@ def write_css(self):
raise Exception("Could not write default.css") from error

def write_html(self):
"""Writes the website of the catalog to the file index.html."""
logger.info(f"{self.directory.name}: Writing index.html")

try:
Expand All @@ -348,6 +412,7 @@ def write_html(self):
raise Exception("Could not write index.html") from error

def write_ttl(self):
"""Writes a Turtle representation of the catalog to the file catalog.ttl."""
logger.info(f"{self.directory.name}: Writing catalog.ttl")

try:
Expand All @@ -358,6 +423,7 @@ def write_ttl(self):
raise Exception("Could not write catalog.ttl") from error

def process(self):
"""Processes the catalog."""
logger.info(f"{self.directory.name}: Processing catalog...")
self.add_datasets()

Expand Down
3 changes: 3 additions & 0 deletions src/staticat/vocab/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@


def read_rdf(name, file):
"""Returns an enumeration of the identifiers of the concepts in the given file."""
path = importlib.resources.files() / file

graph = Graph()
Expand All @@ -27,6 +28,7 @@ def read_rdf(name, file):


def read_file_type():
"""Returns a data frame with information on the file types in the EU vocabulary."""
path = importlib.resources.files() / "file-type.xml"

tree = ET.parse(path)
Expand Down Expand Up @@ -57,6 +59,7 @@ def read_file_type():


def file_type_df_to_enum():
"""Returns an enumeration of the codes of the file types in the EU vocabulary."""
members = FileTypeDF["code"].drop_duplicates()
return StrEnum("FileType", zip(members, members))

Expand Down

0 comments on commit a62aa7f

Please sign in to comment.