Skip to content

Commit

Permalink
Merge pull request #696 from emmo-repo/directory_layout
Browse files Browse the repository at this point in the history
Added directory_layout() function.

The function suggests a directory layout from the IRIs of all the imported ontologies.

The generation of catalog files has been updated and some bugs in write_catalog() has been fixed. Including:
- irimap dict is now not modified as a side-effect when relative_paths is true
- relative paths in irimap are not modified
- absolute paths written to the catalog file are now relative to the directory or the catalog file when relative_path=True

The read_catalog() has gotten a new argument relative_to which is needed when asking for relative paths. Using this argument ensures that the catalog test is now not dependent on the current working directory when invoking the test.
  • Loading branch information
jesper-friis authored Jan 8, 2024
2 parents 04f145f + 5b10822 commit 77c3bdc
Show file tree
Hide file tree
Showing 11 changed files with 306 additions and 69 deletions.
104 changes: 57 additions & 47 deletions ontopy/ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
write_catalog,
infer_version,
convert_imported,
directory_layout,
FMAP,
IncompatibleVersion,
isinteractive,
Expand Down Expand Up @@ -934,23 +935,46 @@ def save(
raise ValueError(
"`recursive` and `squash` should not both be true"
)
base = self.base_iri.rstrip("#/")
for onto in self.imported_ontologies:
obase = onto.base_iri.rstrip("#/")
newdir = Path(dir) / os.path.relpath(obase, base)
layout = directory_layout(self)

for onto, path in layout.items():
fname = Path(dir) / f"{path}.{fmt}"
onto.save(
filename=None,
filename=fname,
format=format,
dir=newdir.resolve(),
dir=dir,
mkdir=mkdir,
overwrite=overwrite,
recursive=recursive,
squash=squash,
write_catalog_file=write_catalog_file,
append_catalog=append_catalog,
catalog_file=catalog_file,
recursive=False,
squash=False,
write_catalog_file=False,
)

if write_catalog_file:
catalog_files = set()
irimap = {}
for onto, path in layout.items():
irimap[
onto.get_version(as_iri=True)
] = f"{dir}/{path}.{fmt}"
catalog_files.add(Path(path).parent / catalog_file)

for catfile in catalog_files:
write_catalog(
irimap.copy(),
output=catfile,
directory=dir,
append=append_catalog,
)

elif write_catalog_file:
write_catalog(
{self.get_version(as_iri=True): filename},
output=catalog_file,
directory=dir,
append=append_catalog,
)

if squash:
from rdflib import ( # pylint:disable=import-outside-toplevel
URIRef,
Expand Down Expand Up @@ -980,35 +1004,13 @@ def save(
suffix=".owl", delete=False
) as handle:
tmpfile = handle.name
super().save(tmpfile, format="rdfxml")
super().save(tmpfile, format="ntriples")
graph = rdflib.Graph()
graph.parse(tmpfile, format="xml")
graph.parse(tmpfile, format="ntriples")
graph.serialize(destination=filename, format=format)
finally:
os.remove(tmpfile)

if write_catalog_file:
mappings = {}
base = self.base_iri.rstrip("#/")

def append(onto):
obase = onto.base_iri.rstrip("#/")
newdir = Path(dir) / os.path.relpath(obase, base)
newpath = newdir.resolve() / f"{onto.name}.{fmt}"
relpath = os.path.relpath(newpath, dir)
mappings[onto.get_version(as_iri=True)] = str(relpath)
for imported in onto.imported_ontologies:
append(imported)

if recursive:
append(self)
write_catalog(
mappings,
output=catalog_file,
directory=dir,
append=append_catalog,
)

def get_imported_ontologies(self, recursive=False):
"""Return a list with imported ontologies.
Expand Down Expand Up @@ -1939,6 +1941,14 @@ def new_annotation_property(
"""
return self.new_entity(name, parent, "annotation_property")

def difference(self, other: owlready2.Ontology) -> set:
"""Return a set of triples that are in this, but not in the
`other` ontology."""
# pylint: disable=invalid-name
s1 = set(self.get_unabbreviated_triples(blank="_:b"))
s2 = set(other.get_unabbreviated_triples(blank="_:b"))
return s1.difference(s2)


class BlankNode:
"""Represents a blank node.
Expand Down Expand Up @@ -2006,31 +2016,31 @@ def _unabbreviate(


def _get_unabbreviated_triples(
self, subject=None, predicate=None, obj=None, blank=None
onto, subject=None, predicate=None, obj=None, blank=None
):
"""Help function returning all matching triples unabbreviated.
If `blank` is given, it will be used to represent blank nodes.
"""
# pylint: disable=invalid-name
abb = (
None if subject is None else self._abbreviate(subject),
None if predicate is None else self._abbreviate(predicate),
None if obj is None else self._abbreviate(obj),
None if subject is None else onto._abbreviate(subject),
None if predicate is None else onto._abbreviate(predicate),
None if obj is None else onto._abbreviate(obj),
)
for s, p, o in self._get_obj_triples_spo_spo(*abb):
for s, p, o in onto._get_obj_triples_spo_spo(*abb):
yield (
_unabbreviate(self, s, blank=blank),
_unabbreviate(self, p, blank=blank),
_unabbreviate(self, o, blank=blank),
_unabbreviate(onto, s, blank=blank),
_unabbreviate(onto, p, blank=blank),
_unabbreviate(onto, o, blank=blank),
)
for s, p, o, d in self._get_data_triples_spod_spod(*abb, d=None):
for s, p, o, d in onto._get_data_triples_spod_spod(*abb, d=None):
yield (
_unabbreviate(self, s, blank=blank),
_unabbreviate(self, p, blank=blank),
_unabbreviate(onto, s, blank=blank),
_unabbreviate(onto, p, blank=blank),
f'"{o}"{d}'
if isinstance(d, str)
else f'"{o}"^^{_unabbreviate(self, d)}'
else f'"{o}"^^{_unabbreviate(onto, d)}'
if d
else o,
)
99 changes: 86 additions & 13 deletions ontopy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ def read_catalog( # pylint: disable=too-many-locals,too-many-statements,too-man
catalog_file="catalog-v001.xml",
baseuri=None,
recursive=False,
relative_to=None,
return_paths=False,
visited_iris=None,
visited_paths=None,
Expand All @@ -327,6 +328,9 @@ def read_catalog( # pylint: disable=too-many-locals,too-many-statements,too-man
If `recursive` is true, catalog files in sub-folders are also read.
if `relative_to` is given, the paths in the returned dict will be
relative to this path.
If `return_paths` is true, a set of directory paths to source
files is returned in addition to the default dict.
Expand All @@ -335,6 +339,8 @@ def read_catalog( # pylint: disable=too-many-locals,too-many-statements,too-man
A ReadCatalogError is raised if the catalog file cannot be found.
"""
# pylint: disable=too-many-branches

# Protocols supported by urllib.request
web_protocols = "http://", "https://", "ftp://"
uri = str(uri) # in case uri is a pathlib.Path object
Expand Down Expand Up @@ -448,13 +454,18 @@ def load_uri(uri, dirname):
load_catalog(catalog)

load_catalog(filepath)

if relative_to:
for iri, path in iris.items():
iris[iri] = os.path.relpath(path, relative_to)

if return_paths:
return iris, dirs
return iris


def write_catalog(
mappings: dict,
irimap: dict,
output: "Union[str, Path]" = "catalog-v001.xml",
directory: "Union[str, Path]" = ".",
relative_paths: bool = True,
Expand All @@ -463,27 +474,29 @@ def write_catalog(
"""Write catalog file do disk.
Args:
mappings: dict mapping ontology IRIs (name) to actual locations
irimap: dict mapping ontology IRIs (name) to actual locations
(URIs). It has the same format as the dict returned by
read_catalog().
output: name of catalog file.
directory: directory path to the catalog file. Only used if `output`
is a relative path.
relative_paths: whether to write absolute or relative paths to
for file paths inside the catalog file.
relative_paths: whether to write file paths inside the catalog as
relative paths (instead of absolute paths).
append: whether to append to a possible existing catalog file.
If false, an existing file will be overwritten.
"""
web_protocol = "http://", "https://", "ftp://"
filename = Path(directory) / output

if relative_paths:
for key, item in mappings.items():
if not item.startswith(web_protocol):
mappings[key] = os.path.relpath(item, Path(directory).resolve())
filename = (Path(directory) / output).resolve()
irimap = irimap.copy() # don't modify provided irimap
for iri, path in irimap.items():
if os.path.isabs(path):
irimap[iri] = os.path.relpath(path, filename.parent)

if filename.exists() and append:
iris = read_catalog(filename)
iris.update(mappings)
mappings = iris
iris.update(irimap)
irimap = iris

res = [
'<?xml version="1.0" encoding="UTF-8" standalone="no"?>',
Expand All @@ -492,8 +505,8 @@ def write_catalog(
' <group id="Folder Repository, directory=, recursive=true, '
'Auto-Update=false, version=2" prefer="public" xml:base="">',
]
for key, value in dict(mappings).items():
res.append(f' <uri name="{key}" uri="{value}"/>')
for iri, path in irimap.items():
res.append(f' <uri name="{iri}" uri="{path}"/>')
res.append(" </group>")
res.append("</catalog>")
with open(filename, "wt") as handle:
Expand Down Expand Up @@ -745,3 +758,63 @@ def get_format(outfile: str, default: str, fmt: str = None):
if not fmt:
fmt = default
return fmt.lstrip(".")


def directory_layout(onto):
"""Analyse IRIs of imported ontologies and suggested a directory
layout for saving recursively.
Arguments:
onto: Ontology to analyse.
Returns:
layout: A dict mapping ontology objects to relative path names
derived from the ontology IRIs. No file name extension are
added.
Example:
Assume that our ontology `onto` has IRI `ex:onto`. If it directly
or indirectly imports ontologies with IRIs `ex:A/ontoA`, `ex:B/ontoB`
and `ex:A/C/ontoC`, this function will return the following dict:
{
onto: "onto",
ontoA: "A/ontoA",
ontoB: "B/ontoB",
ontoC: "A/C/ontoC",
}
where `ontoA`, `ontoB` and `ontoC` are imported Ontology objects.
"""
layout = {}

def recur(o):
for imported in o.imported_ontologies:
if imported not in layout:
recur(imported)
baseiri = o.base_iri.rstrip("/#")

# Some heuristics here to reproduce the EMMO layout.
# It might not apply to all ontologies, so maybe it should be
# made optional? Alternatively, change EMMO ontology IRIs to
# match the directory layout.
emmolayout = (
any(
oo.base_iri.startswith(baseiri + "/")
for oo in o.imported_ontologies
)
or o.base_iri == "http://emmo.info/emmo/mereocausality#"
)

layout[o] = (
baseiri + "/" + os.path.basename(baseiri) if emmolayout else baseiri
)

recur(onto)

# Strip off initial common prefix from all paths
prefix = os.path.commonprefix(list(layout.values()))
for o, path in layout.items():
layout[o] = path[len(prefix) :].lstrip("/")

return layout
23 changes: 19 additions & 4 deletions tests/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@


def test_catalog(repo_dir: "Path", tmpdir: "Path") -> None:
# if True:
# from pathlib import Path
# repo_dir = Path(__file__).resolve().parent.parent
# tmpdir = repo_dir / "tests" / "xxx"
# tmpdir.mkdir()

ontodir = repo_dir / "tests" / "catalogs_for_testing"
catalog_expected = {
"http://emmo.info/testonto/0.1.0": str(ontodir / "testonto.ttl"),
Expand Down Expand Up @@ -71,14 +77,23 @@ def test_catalog(repo_dir: "Path", tmpdir: "Path") -> None:
def test_write_catalog_choosing_relative_paths(
repo_dir: "Path", tmpdir: "Path"
) -> None:
# if True:
# from pathlib import Path
# import shutil
# repo_dir = Path(__file__).resolve().parent.parent
# tmpdir = repo_dir / "tests" / "xxx"
# if tmpdir.exists():
# shutil.rmtree(tmpdir)
# tmpdir.mkdir()

ontodir = repo_dir / "tests" / "catalogs_for_testing"
catalog1 = read_catalog(str(ontodir))
catalog1 = read_catalog(ontodir, relative_to=ontodir)
write_catalog(
catalog1,
output=(tmpdir / "cat-relative-paths.xml"),
relative_paths=True,
)
catalog2 = read_catalog(str(ontodir))
catalog2 = read_catalog(ontodir)
write_catalog(
catalog2,
output=(tmpdir / "cat-absolute-paths.xml"),
Expand All @@ -101,8 +116,8 @@ def test_write_catalog_choosing_relative_paths(
ontodir = repo_dir / "tests" / "catalogs_for_testing"

catalog_expected_relative_paths = {
str("tests/catalogs_for_testing/testonto.ttl"),
str("tests/catalogs_for_testing/models.ttl"),
"testonto.ttl",
"models.ttl",
}

catalog_expected_absolute_paths = {
Expand Down
Binary file modified tests/test_excelparser/onto_only_classes.xlsx
Binary file not shown.
2 changes: 1 addition & 1 deletion tests/test_excelparser/result_ontology/fromexcelonto.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"Jesper Friis"@en,
"Sylvain Gouttebroze"@en ;
dcterms:title "A test domain ontology"@en ;
owl:imports <http://emmo.info/emmo-inferred>,
owl:imports <http://emmo.info/emmo>,
<http://ontology.info/ontology> ;
owl:versionInfo "0.01"@en .

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"Jesper Friis"@en,
"Sylvain Gouttebroze"@en ;
dcterms:title "A test domain ontology"@en ;
owl:imports <http://emmo.info/emmo-inferred>,
owl:imports <http://emmo.info/emmo>,
<http://ontology.info/ontology> ;
owl:versionInfo "0.01"@en .

Expand Down
Loading

0 comments on commit 77c3bdc

Please sign in to comment.