diff --git a/ontopy/ontology.py b/ontopy/ontology.py index fde54d2aa..9940add0f 100644 --- a/ontopy/ontology.py +++ b/ontopy/ontology.py @@ -35,6 +35,7 @@ write_catalog, infer_version, convert_imported, + directory_layout, FMAP, IncompatibleVersion, isinteractive, @@ -934,23 +935,46 @@ def save( raise ValueError( "`recursive` and `squash` should not both be true" ) - base = self.base_iri.rstrip("#/") - for onto in self.imported_ontologies: - obase = onto.base_iri.rstrip("#/") - newdir = Path(dir) / os.path.relpath(obase, base) + layout = directory_layout(self) + + for onto, path in layout.items(): + fname = Path(dir) / f"{path}.{fmt}" onto.save( - filename=None, + filename=fname, format=format, - dir=newdir.resolve(), + dir=dir, mkdir=mkdir, overwrite=overwrite, - recursive=recursive, - squash=squash, - write_catalog_file=write_catalog_file, - append_catalog=append_catalog, - catalog_file=catalog_file, + recursive=False, + squash=False, + write_catalog_file=False, ) + if write_catalog_file: + catalog_files = set() + irimap = {} + for onto, path in layout.items(): + irimap[ + onto.get_version(as_iri=True) + ] = f"{dir}/{path}.{fmt}" + catalog_files.add(Path(path).parent / catalog_file) + + for catfile in catalog_files: + write_catalog( + irimap.copy(), + output=catfile, + directory=dir, + append=append_catalog, + ) + + elif write_catalog_file: + write_catalog( + {self.get_version(as_iri=True): filename}, + output=catalog_file, + directory=dir, + append=append_catalog, + ) + if squash: from rdflib import ( # pylint:disable=import-outside-toplevel URIRef, @@ -980,35 +1004,13 @@ def save( suffix=".owl", delete=False ) as handle: tmpfile = handle.name - super().save(tmpfile, format="rdfxml") + super().save(tmpfile, format="ntriples") graph = rdflib.Graph() - graph.parse(tmpfile, format="xml") + graph.parse(tmpfile, format="ntriples") graph.serialize(destination=filename, format=format) finally: os.remove(tmpfile) - if write_catalog_file: - mappings = {} - base = self.base_iri.rstrip("#/") - - def append(onto): - obase = onto.base_iri.rstrip("#/") - newdir = Path(dir) / os.path.relpath(obase, base) - newpath = newdir.resolve() / f"{onto.name}.{fmt}" - relpath = os.path.relpath(newpath, dir) - mappings[onto.get_version(as_iri=True)] = str(relpath) - for imported in onto.imported_ontologies: - append(imported) - - if recursive: - append(self) - write_catalog( - mappings, - output=catalog_file, - directory=dir, - append=append_catalog, - ) - def get_imported_ontologies(self, recursive=False): """Return a list with imported ontologies. @@ -1939,6 +1941,14 @@ def new_annotation_property( """ return self.new_entity(name, parent, "annotation_property") + def difference(self, other: owlready2.Ontology) -> set: + """Return a set of triples that are in this, but not in the + `other` ontology.""" + # pylint: disable=invalid-name + s1 = set(self.get_unabbreviated_triples(blank="_:b")) + s2 = set(other.get_unabbreviated_triples(blank="_:b")) + return s1.difference(s2) + class BlankNode: """Represents a blank node. @@ -2006,7 +2016,7 @@ def _unabbreviate( def _get_unabbreviated_triples( - self, subject=None, predicate=None, obj=None, blank=None + onto, subject=None, predicate=None, obj=None, blank=None ): """Help function returning all matching triples unabbreviated. @@ -2014,23 +2024,23 @@ def _get_unabbreviated_triples( """ # pylint: disable=invalid-name abb = ( - None if subject is None else self._abbreviate(subject), - None if predicate is None else self._abbreviate(predicate), - None if obj is None else self._abbreviate(obj), + None if subject is None else onto._abbreviate(subject), + None if predicate is None else onto._abbreviate(predicate), + None if obj is None else onto._abbreviate(obj), ) - for s, p, o in self._get_obj_triples_spo_spo(*abb): + for s, p, o in onto._get_obj_triples_spo_spo(*abb): yield ( - _unabbreviate(self, s, blank=blank), - _unabbreviate(self, p, blank=blank), - _unabbreviate(self, o, blank=blank), + _unabbreviate(onto, s, blank=blank), + _unabbreviate(onto, p, blank=blank), + _unabbreviate(onto, o, blank=blank), ) - for s, p, o, d in self._get_data_triples_spod_spod(*abb, d=None): + for s, p, o, d in onto._get_data_triples_spod_spod(*abb, d=None): yield ( - _unabbreviate(self, s, blank=blank), - _unabbreviate(self, p, blank=blank), + _unabbreviate(onto, s, blank=blank), + _unabbreviate(onto, p, blank=blank), f'"{o}"{d}' if isinstance(d, str) - else f'"{o}"^^{_unabbreviate(self, d)}' + else f'"{o}"^^{_unabbreviate(onto, d)}' if d else o, ) diff --git a/ontopy/utils.py b/ontopy/utils.py index 7020b4777..d357f558b 100644 --- a/ontopy/utils.py +++ b/ontopy/utils.py @@ -304,6 +304,7 @@ def read_catalog( # pylint: disable=too-many-locals,too-many-statements,too-man catalog_file="catalog-v001.xml", baseuri=None, recursive=False, + relative_to=None, return_paths=False, visited_iris=None, visited_paths=None, @@ -327,6 +328,9 @@ def read_catalog( # pylint: disable=too-many-locals,too-many-statements,too-man If `recursive` is true, catalog files in sub-folders are also read. + if `relative_to` is given, the paths in the returned dict will be + relative to this path. + If `return_paths` is true, a set of directory paths to source files is returned in addition to the default dict. @@ -335,6 +339,8 @@ def read_catalog( # pylint: disable=too-many-locals,too-many-statements,too-man A ReadCatalogError is raised if the catalog file cannot be found. """ + # pylint: disable=too-many-branches + # Protocols supported by urllib.request web_protocols = "http://", "https://", "ftp://" uri = str(uri) # in case uri is a pathlib.Path object @@ -448,13 +454,18 @@ def load_uri(uri, dirname): load_catalog(catalog) load_catalog(filepath) + + if relative_to: + for iri, path in iris.items(): + iris[iri] = os.path.relpath(path, relative_to) + if return_paths: return iris, dirs return iris def write_catalog( - mappings: dict, + irimap: dict, output: "Union[str, Path]" = "catalog-v001.xml", directory: "Union[str, Path]" = ".", relative_paths: bool = True, @@ -463,27 +474,29 @@ def write_catalog( """Write catalog file do disk. Args: - mappings: dict mapping ontology IRIs (name) to actual locations + irimap: dict mapping ontology IRIs (name) to actual locations (URIs). It has the same format as the dict returned by read_catalog(). output: name of catalog file. directory: directory path to the catalog file. Only used if `output` is a relative path. - relative_paths: whether to write absolute or relative paths to - for file paths inside the catalog file. + relative_paths: whether to write file paths inside the catalog as + relative paths (instead of absolute paths). append: whether to append to a possible existing catalog file. If false, an existing file will be overwritten. """ - web_protocol = "http://", "https://", "ftp://" + filename = Path(directory) / output + if relative_paths: - for key, item in mappings.items(): - if not item.startswith(web_protocol): - mappings[key] = os.path.relpath(item, Path(directory).resolve()) - filename = (Path(directory) / output).resolve() + irimap = irimap.copy() # don't modify provided irimap + for iri, path in irimap.items(): + if os.path.isabs(path): + irimap[iri] = os.path.relpath(path, filename.parent) + if filename.exists() and append: iris = read_catalog(filename) - iris.update(mappings) - mappings = iris + iris.update(irimap) + irimap = iris res = [ '', @@ -492,8 +505,8 @@ def write_catalog( ' ', ] - for key, value in dict(mappings).items(): - res.append(f' ') + for iri, path in irimap.items(): + res.append(f' ') res.append(" ") res.append("") with open(filename, "wt") as handle: @@ -745,3 +758,63 @@ def get_format(outfile: str, default: str, fmt: str = None): if not fmt: fmt = default return fmt.lstrip(".") + + +def directory_layout(onto): + """Analyse IRIs of imported ontologies and suggested a directory + layout for saving recursively. + + Arguments: + onto: Ontology to analyse. + + Returns: + layout: A dict mapping ontology objects to relative path names + derived from the ontology IRIs. No file name extension are + added. + + Example: + Assume that our ontology `onto` has IRI `ex:onto`. If it directly + or indirectly imports ontologies with IRIs `ex:A/ontoA`, `ex:B/ontoB` + and `ex:A/C/ontoC`, this function will return the following dict: + + { + onto: "onto", + ontoA: "A/ontoA", + ontoB: "B/ontoB", + ontoC: "A/C/ontoC", + } + + where `ontoA`, `ontoB` and `ontoC` are imported Ontology objects. + """ + layout = {} + + def recur(o): + for imported in o.imported_ontologies: + if imported not in layout: + recur(imported) + baseiri = o.base_iri.rstrip("/#") + + # Some heuristics here to reproduce the EMMO layout. + # It might not apply to all ontologies, so maybe it should be + # made optional? Alternatively, change EMMO ontology IRIs to + # match the directory layout. + emmolayout = ( + any( + oo.base_iri.startswith(baseiri + "/") + for oo in o.imported_ontologies + ) + or o.base_iri == "http://emmo.info/emmo/mereocausality#" + ) + + layout[o] = ( + baseiri + "/" + os.path.basename(baseiri) if emmolayout else baseiri + ) + + recur(onto) + + # Strip off initial common prefix from all paths + prefix = os.path.commonprefix(list(layout.values())) + for o, path in layout.items(): + layout[o] = path[len(prefix) :].lstrip("/") + + return layout diff --git a/tests/test_catalog.py b/tests/test_catalog.py index c5d2fc600..d8fa44006 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -7,6 +7,12 @@ def test_catalog(repo_dir: "Path", tmpdir: "Path") -> None: + # if True: + # from pathlib import Path + # repo_dir = Path(__file__).resolve().parent.parent + # tmpdir = repo_dir / "tests" / "xxx" + # tmpdir.mkdir() + ontodir = repo_dir / "tests" / "catalogs_for_testing" catalog_expected = { "http://emmo.info/testonto/0.1.0": str(ontodir / "testonto.ttl"), @@ -71,14 +77,23 @@ def test_catalog(repo_dir: "Path", tmpdir: "Path") -> None: def test_write_catalog_choosing_relative_paths( repo_dir: "Path", tmpdir: "Path" ) -> None: + # if True: + # from pathlib import Path + # import shutil + # repo_dir = Path(__file__).resolve().parent.parent + # tmpdir = repo_dir / "tests" / "xxx" + # if tmpdir.exists(): + # shutil.rmtree(tmpdir) + # tmpdir.mkdir() + ontodir = repo_dir / "tests" / "catalogs_for_testing" - catalog1 = read_catalog(str(ontodir)) + catalog1 = read_catalog(ontodir, relative_to=ontodir) write_catalog( catalog1, output=(tmpdir / "cat-relative-paths.xml"), relative_paths=True, ) - catalog2 = read_catalog(str(ontodir)) + catalog2 = read_catalog(ontodir) write_catalog( catalog2, output=(tmpdir / "cat-absolute-paths.xml"), @@ -101,8 +116,8 @@ def test_write_catalog_choosing_relative_paths( ontodir = repo_dir / "tests" / "catalogs_for_testing" catalog_expected_relative_paths = { - str("tests/catalogs_for_testing/testonto.ttl"), - str("tests/catalogs_for_testing/models.ttl"), + "testonto.ttl", + "models.ttl", } catalog_expected_absolute_paths = { diff --git a/tests/test_excelparser/onto_only_classes.xlsx b/tests/test_excelparser/onto_only_classes.xlsx index 2a37ccb74..8e0dba0df 100644 Binary files a/tests/test_excelparser/onto_only_classes.xlsx and b/tests/test_excelparser/onto_only_classes.xlsx differ diff --git a/tests/test_excelparser/result_ontology/fromexcelonto.ttl b/tests/test_excelparser/result_ontology/fromexcelonto.ttl index ae1a98580..56c85c7bc 100644 --- a/tests/test_excelparser/result_ontology/fromexcelonto.ttl +++ b/tests/test_excelparser/result_ontology/fromexcelonto.ttl @@ -12,7 +12,7 @@ "Jesper Friis"@en, "Sylvain Gouttebroze"@en ; dcterms:title "A test domain ontology"@en ; - owl:imports , + owl:imports , ; owl:versionInfo "0.01"@en . diff --git a/tests/test_excelparser/result_ontology/fromexcelonto_only_classes.ttl b/tests/test_excelparser/result_ontology/fromexcelonto_only_classes.ttl index cb6c2d276..0b73b8ee3 100644 --- a/tests/test_excelparser/result_ontology/fromexcelonto_only_classes.ttl +++ b/tests/test_excelparser/result_ontology/fromexcelonto_only_classes.ttl @@ -12,7 +12,7 @@ "Jesper Friis"@en, "Sylvain Gouttebroze"@en ; dcterms:title "A test domain ontology"@en ; - owl:imports , + owl:imports , ; owl:versionInfo "0.01"@en . diff --git a/tests/test_excelparser/test_excelparser.py b/tests/test_excelparser/test_excelparser.py index 0131b591d..7d823304d 100644 --- a/tests/test_excelparser/test_excelparser.py +++ b/tests/test_excelparser/test_excelparser.py @@ -91,7 +91,7 @@ def test_excelparser(repo_dir: "Path") -> None: update_xlspath, force=True, input_ontology=ontology ) assert updated_onto.ATotallyNewPattern - assert updated_onto.Pattern.iri == onto.Pattern.iri + assert updated_onto.FinitePattern.iri == onto.FinitePattern.iri assert len(list(onto.classes())) + 1 == len(list(updated_onto.classes())) @@ -99,6 +99,12 @@ def test_excelparser_only_classes(repo_dir: "Path") -> None: """This loads the excelfile used and tests that the resulting ontology prior to version 0.5.2 in which only classes where considered, but with empty sheets for properties.""" + + # Useful for debugging with ipython + # if True: + # from pathlib import Path + # repo_dir = Path(__file__).resolve().parent.parent.parent + ontopath = ( repo_dir / "tests" @@ -119,6 +125,10 @@ def test_excelparser_only_classes(repo_dir: "Path") -> None: # Used for printing new ontology when debugging # ontology.save("test_only_classes.ttl") + # Useful for debugging + # print("----- only in onto -----") + # print(onto.difference(ontology)) + assert onto == ontology assert errors["already_defined"] == {"SpecialPattern"} assert errors["in_imported_ontologies"] == {"Atom"} @@ -143,5 +153,5 @@ def test_excelparser_only_classes(repo_dir: "Path") -> None: update_xlspath, force=True, input_ontology=ontology ) assert updated_onto.ATotallyNewPattern - assert updated_onto.Pattern.iri == onto.Pattern.iri + assert updated_onto.FinitePattern.iri == onto.FinitePattern.iri assert len(list(onto.classes())) + 1 == len(list(updated_onto.classes())) diff --git a/tests/test_load.py b/tests/test_load.py index 665586254..4fc637984 100755 --- a/tests/test_load.py +++ b/tests/test_load.py @@ -5,6 +5,12 @@ def test_load(repo_dir: "Path", testonto: "Ontology") -> None: + # if True: + # from pathlib import Path + # from ontopy import get_ontology + # repo_dir = Path(__file__).resolve().parent.parent + # testonto = get_ontology(str(repo_dir / "tests" / "testonto" / "testonto.ttl")).load() + import pytest from ontopy import get_ontology @@ -21,7 +27,7 @@ def test_load(repo_dir: "Path", testonto: "Ontology") -> None: assert str(emmo.Atom.prefLabel.first()) == "Atom" emmo = get_ontology( - "https://emmo-repo.github.io/latest-stable/" "emmo-inferred.owl" + "https://emmo-repo.github.io/emmo-inferred.ttl" ).load() # owl format assert str(emmo.Atom.prefLabel.first()) == "Atom" diff --git a/tests/test_ontology_difference.py b/tests/test_ontology_difference.py new file mode 100644 index 000000000..5ceae10d9 --- /dev/null +++ b/tests/test_ontology_difference.py @@ -0,0 +1,25 @@ +"""Test the Ontology.difference() methode""" + + +if True: + from pathlib import Path + from ontopy import get_ontology + + repo_dir = Path(__file__).resolve().parent.parent + onto_dir = repo_dir / "tests" / "testonto" + print(repo_dir) + + testonto = get_ontology(onto_dir / "testonto.ttl").load() + testontowi = get_ontology(onto_dir / "testonto_w_individual.ttl").load() + + diff = testonto.difference(testontowi) + diffwi = testontowi.difference(testonto) + assert not diff.intersection(diffwi) + + triple1 = ( + "http://emmo.info/testonto#testindividual", + "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", + "http://www.w3.org/2002/07/owl#NamedIndividual", + ) + assert triple1 in diffwi + assert triple1 not in diff diff --git a/tests/test_save.py b/tests/test_save.py index ad1d3e1bf..765aefbd0 100755 --- a/tests/test_save.py +++ b/tests/test_save.py @@ -104,3 +104,20 @@ def test_save( # append_catalog # catalog_filename + + +# Simple working tests without pytest getting in the way - feel free to change to pytest + +if True: # Whether to test for EMMO + from pathlib import Path + + from emmopy import get_emmo + + emmo = get_emmo() + emmo.save( + format="turtle", + dir=Path(__file__).absolute().parent / "outdir", + recursive=True, + mkdir=True, + write_catalog_file=True, + ) diff --git a/tests/test_utils_directory_layout.py b/tests/test_utils_directory_layout.py new file mode 100644 index 000000000..a2b676654 --- /dev/null +++ b/tests/test_utils_directory_layout.py @@ -0,0 +1,81 @@ +import os +from pathlib import Path + +from ontopy import get_ontology +from ontopy.utils import directory_layout + + +# Requires that we have EMMO checked out locally +if False: + emmo = get_ontology("../EMMO/emmo.ttl").load() + layout = directory_layout(emmo) + + # Map base IRIs to ontologies for easy access to all sub-ontologies + omap = {o.base_iri: o for o in layout.keys()} + + # Base IRI of EMMO should not end with slash (/) !!! + assert layout[omap["http://emmo.info/emmo/"]] == "emmo" + + assert ( + layout[omap["http://emmo.info/emmo/perspectives#"]] + == "perspectives/perspectives" + ) + assert ( + layout[omap["http://emmo.info/emmo/perspectives/data#"]] + == "perspectives/data" + ) + assert ( + layout[omap["http://emmo.info/emmo/disciplines#"]] + == "disciplines/disciplines" + ) + assert ( + layout[omap["http://emmo.info/emmo/disciplines/math#"]] + == "disciplines/math" + ) + assert ( + layout[omap["http://emmo.info/emmo/disciplines/units/siunits#"]] + == "disciplines/units/siunits" + ) + assert ( + layout[omap["http://emmo.info/emmo/mereocausality#"]] + == "mereocausality/mereocausality" + ) + + # Also check dir layout for the disciplines module - should be the same as for emmo + disciplines = omap["http://emmo.info/emmo/disciplines#"] + layout = directory_layout(disciplines) + assert ( + layout[omap["http://emmo.info/emmo/perspectives#"]] + == "perspectives/perspectives" + ) + assert ( + layout[omap["http://emmo.info/emmo/perspectives/data#"]] + == "perspectives/data" + ) + assert ( + layout[omap["http://emmo.info/emmo/disciplines#"]] + == "disciplines/disciplines" + ) + assert ( + layout[omap["http://emmo.info/emmo/disciplines/math#"]] + == "disciplines/math" + ) + assert ( + layout[omap["http://emmo.info/emmo/disciplines/units/siunits#"]] + == "disciplines/units/siunits" + ) + assert ( + layout[omap["http://emmo.info/emmo/mereocausality#"]] + == "mereocausality/mereocausality" + ) + + +if True: + thisdir = Path(__file__).resolve().parent + ontopath = thisdir / "testonto" / "testonto.ttl" + onto = get_ontology(ontopath).load() + layout = directory_layout(onto) + omap = {o.base_iri: o for o in layout.keys()} + + assert layout[omap["http://emmo.info/models#"]] == "models" + assert layout[omap["http://emmo.info/testonto#"]] == "testonto"