From cef7b42d55e2e61f337c2810e5f52597957edb33 Mon Sep 17 00:00:00 2001 From: Peter Weber Date: Tue, 10 Dec 2024 17:01:13 +0100 Subject: [PATCH] apiharvester: cantook harvesting * Adds VS and NJ CANTOOK API harvesting. * Deletes OAI ebooks harvesting. * Closes #3718. Co-Authored-by: Peter Weber --- data/apisources.yml | 17 +- pyproject.toml | 6 +- rero_ils/config.py | 24 +- .../__init__.py | 0 rero_ils/modules/api_harvester/api.py | 159 +++++ .../cantook}/__init__.py | 4 +- rero_ils/modules/api_harvester/cantook/api.py | 181 ++++++ .../cantook}/dojson/__init__.py | 0 .../cantook/dojson/json}/__init__.py | 9 +- .../cantook/dojson/json/model.py | 267 ++++++++ rero_ils/modules/api_harvester/cli.py | 142 +++++ rero_ils/modules/api_harvester/errors.py | 44 ++ .../{apiharvester => api_harvester}/models.py | 29 +- rero_ils/modules/api_harvester/tasks.py | 74 +++ rero_ils/modules/api_harvester/utils.py | 111 ++++ rero_ils/modules/apiharvester/cli.py | 159 ----- rero_ils/modules/apiharvester/tasks.py | 58 -- rero_ils/modules/apiharvester/utils.py | 127 ---- rero_ils/modules/cli/reroils.py | 6 +- .../modules/documents/serializers/base.py | 2 +- rero_ils/modules/ebooks/cli.py | 93 --- .../modules/ebooks/dojson/contrib/__init__.py | 18 - .../ebooks/dojson/contrib/marc21/model.py | 511 --------------- rero_ils/modules/ebooks/receivers.py | 63 -- rero_ils/modules/ebooks/tasks.py | 107 ---- rero_ils/modules/ebooks/utils.py | 192 ------ rero_ils/modules/entities/api.py | 4 +- rero_ils/modules/ext.py | 4 - rero_ils/modules/files/operations.py | 2 +- rero_ils/modules/organisations/api.py | 21 +- rero_ils/modules/stats/api/indicators/base.py | 6 +- rero_ils/schedulers.py | 4 +- scripts/setup | 36 +- setup.py | 9 +- .../cantook/test_cantook_dojson.py | 496 +++++++++++++++ .../api_harvester/conftest.py | 16 +- tests/api_harvester/test_cli_api_harvester.py | 172 +++++ .../data/apisources.yml | 18 +- tests/data/mv_cantook.json | 347 ++++++++++ tests/data/mv_cantook_deleted.json | 347 ++++++++++ tests/data/xml/ebook1.xml | 89 --- tests/data/xml/ebook2.xml | 73 --- tests/fixtures/metadata.py | 16 - .../apiharvester/test_apiharvester_utils.py | 114 ---- tests/ui/documents/test_documents_api.py | 64 -- tests/ui/ebooks/test_ebooks_receivers.py | 116 ---- tests/ui/ebooks/test_ebooks_utils.py | 36 -- .../documents/test_documents_dojson_ebooks.py | 597 ------------------ 48 files changed, 2460 insertions(+), 2530 deletions(-) rename rero_ils/modules/{apiharvester => api_harvester}/__init__.py (100%) create mode 100644 rero_ils/modules/api_harvester/api.py rename rero_ils/modules/{ebooks => api_harvester/cantook}/__init__.py (92%) create mode 100644 rero_ils/modules/api_harvester/cantook/api.py rename rero_ils/modules/{ebooks => api_harvester/cantook}/dojson/__init__.py (100%) rename rero_ils/modules/{ebooks/dojson/contrib/marc21 => api_harvester/cantook/dojson/json}/__init__.py (81%) create mode 100644 rero_ils/modules/api_harvester/cantook/dojson/json/model.py create mode 100644 rero_ils/modules/api_harvester/cli.py create mode 100644 rero_ils/modules/api_harvester/errors.py rename rero_ils/modules/{apiharvester => api_harvester}/models.py (72%) create mode 100644 rero_ils/modules/api_harvester/tasks.py create mode 100644 rero_ils/modules/api_harvester/utils.py delete mode 100644 rero_ils/modules/apiharvester/cli.py delete mode 100644 rero_ils/modules/apiharvester/tasks.py delete mode 100644 rero_ils/modules/apiharvester/utils.py delete mode 100644 rero_ils/modules/ebooks/cli.py delete mode 100644 rero_ils/modules/ebooks/dojson/contrib/__init__.py delete mode 100644 rero_ils/modules/ebooks/dojson/contrib/marc21/model.py delete mode 100644 rero_ils/modules/ebooks/receivers.py delete mode 100644 rero_ils/modules/ebooks/tasks.py delete mode 100644 rero_ils/modules/ebooks/utils.py create mode 100644 tests/api_harvester/cantook/test_cantook_dojson.py rename rero_ils/modules/apiharvester/signals.py => tests/api_harvester/conftest.py (68%) create mode 100644 tests/api_harvester/test_cli_api_harvester.py rename data/oaisources.yml => tests/data/apisources.yml (64%) create mode 100644 tests/data/mv_cantook.json create mode 100644 tests/data/mv_cantook_deleted.json delete mode 100644 tests/data/xml/ebook1.xml delete mode 100644 tests/data/xml/ebook2.xml delete mode 100644 tests/ui/apiharvester/test_apiharvester_utils.py delete mode 100644 tests/ui/ebooks/test_ebooks_receivers.py delete mode 100644 tests/ui/ebooks/test_ebooks_utils.py delete mode 100644 tests/unit/documents/test_documents_dojson_ebooks.py diff --git a/data/apisources.yml b/data/apisources.yml index d898d62cff..11cce81c0f 100644 --- a/data/apisources.yml +++ b/data/apisources.yml @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # RERO ILS -# Copyright (C) 2019 RERO +# Copyright (C) 2024 RERO # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -16,8 +16,13 @@ # along with this program. If not, see . -# OAI-PMH harvester configuration. -mef: - url: http://mef.test.rero.ch/api/mef - comment: 'mef persons' - size: 1000 +# API harvester configuration. +VS-CANTOOK: + url: https://mediatheque-valais.cantookstation.eu + classname: 'rero_ils.modules.api_harvester.cantook.api.ApiCantook' + code: 'mv-cantook' + +NJ-CANTOOK: + url: https://bm.ebibliomedia.ch + classname: 'rero_ils.modules.api_harvester.cantook.api.ApiCantook' + code: 'ebibliomedia' diff --git a/pyproject.toml b/pyproject.toml index 442976224e..240adebf6a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -154,7 +154,6 @@ reverse = "rero_ils.dojson.cli:reverse" pjson = "rero_ils.dojson.cli:pretty_json_dump" [tool.poetry.plugins."dojson.cli.rule"] -marc21_ebooks_to_json = "rero_ils.modules.ebooks.dojson.contrib.marc21:marc21" marc21_dnb_to_json = "rero_ils.modules.documents.dojson.contrib.marc21tojson:marc21_dnb" marc21_kul_to_json = "rero_ils.modules.documents.dojson.contrib.marc21tojson:marc21_kul" marc21_loc_to_json = "rero_ils.modules.documents.dojson.contrib.marc21tojson:marc21_loc" @@ -223,11 +222,10 @@ users = "rero_ils.modules.users.views:blueprint" nooppid = "rero_ils.converters:NoopPIDConverter" [tool.poetry.plugins."invenio_celery.tasks"] -apiharvester = "rero_ils.modules.apiharvester.tasks" +api_harvester = "rero_ils.modules.api_harvester.tasks" collections = "rero_ils.modules.collections.tasks" documents = "rero_ils.modules.documents.tasks" remote_entities = "rero_ils.modules.entities.remote_entities.tasks" -ebooks = "rero_ils.modules.ebooks.tasks" holdings = "rero_ils.modules.holdings.tasks" items = "rero_ils.modules.items.tasks" loans = "rero_ils.modules.loans.tasks" @@ -252,7 +250,7 @@ acq_order_lines = "rero_ils.modules.acquisition.acq_order_lines.models" acq_orders = "rero_ils.modules.acquisition.acq_orders.models" acq_receipt_lines = "rero_ils.modules.acquisition.acq_receipt_lines.models" acq_receipts = "rero_ils.modules.acquisition.acq_receipts.models" -apiharvester = "rero_ils.modules.apiharvester.models" +api_harvester = "rero_ils.modules.api_harvester.models" budgets = "rero_ils.modules.acquisition.budgets.models" circ_policies = "rero_ils.modules.circ_policies.models" collections = "rero_ils.modules.collections.models" diff --git a/rero_ils/config.py b/rero_ils/config.py index 50bafb2dd8..1fd4b8e214 100644 --- a/rero_ils/config.py +++ b/rero_ils/config.py @@ -392,12 +392,6 @@ def _(x): "schedule": timedelta(minutes=60), "enabled": False, }, - "ebooks-harvester": { - "task": "invenio_oaiharvester.tasks.list_records_from_dates", - "schedule": crontab(minute=22, hour=22), - "kwargs": {"name": "ebooks"}, - "enabled": False, - }, "notification-creation": { "task": "rero_ils.modules.notifications.tasks.create_notifications", "schedule": crontab(minute=0, hour=3), # Every day at 05:00 UTC, @@ -526,12 +520,18 @@ def _(x): "kwargs": {"delete": True}, "enabled": False, }, - # "mef-harvester": { - # "task": "rero_ils.modules.apiharvester.tasks.harvest_records", - # "schedule": timedelta(minutes=60), - # "kwargs": {"name": "mef", "enabled": False), - # "enabled": False, - # }, + "harvest-vs-cantook": { + "task": "rero_ils.modules.api_harvester.tasks.harvest_records", + "schedule": crontab(minute=33, hour=3), # Every day at 03:33 UTC, + "kwargs": {"name": "VS-CANTOOK"}, + "enabled": False, + }, + "harvest-nj-cantook": { + "task": "rero_ils.modules.api_harvester.tasks.harvest_records", + "schedule": crontab(minute=44, hour=4), # Every day at 04:44 UTC, + "kwargs": {"name": "NJ-CANTOOK"}, + "enabled": False, + }, } CELERY_BROKER_HEARTBEAT = 0 diff --git a/rero_ils/modules/apiharvester/__init__.py b/rero_ils/modules/api_harvester/__init__.py similarity index 100% rename from rero_ils/modules/apiharvester/__init__.py rename to rero_ils/modules/api_harvester/__init__.py diff --git a/rero_ils/modules/api_harvester/api.py b/rero_ils/modules/api_harvester/api.py new file mode 100644 index 0000000000..9dcd0cb3a0 --- /dev/null +++ b/rero_ils/modules/api_harvester/api.py @@ -0,0 +1,159 @@ +# -*- coding: utf-8 -*- +# +# RERO ILS +# Copyright (C) 2024 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""API for cantook records.""" + +from __future__ import absolute_import, print_function + +import click + +from rero_ils.modules.api_harvester.models import ApiHarvestConfig +from rero_ils.modules.locations.api import Location +from rero_ils.modules.organisations.api import Organisation + + +class ApiHarvest: + """ApiHarvest class. + + config: saved config from ApiHarvester class + file_name: to save records to file + process: create harvested records + harvest_count: how many records to harvest + verbose: print verbose messages + """ + + def __init__( + self, name, file_name=None, process=False, harvest_count=-1, verbose=False + ): + """Class init.""" + config = self.get_config(name) + if not config: + raise NameError(f"API Config not found: {name}") + self.config = config + self.file = file_name + self.process = process + self.harvest_count = harvest_count + self.verbose = verbose + self._vendor = None + self._url = self.config.url + self._code = self.config.code + self._count = 0 + self._count_new = 0 + self._count_upd = 0 + self._count_del = 0 + info = {} + for organisation in Organisation.get_records_by_online_harvested_source( + self._code + ): + locations = {} + for location_pid in organisation.get_online_locations(): + locations[location_pid] = None + location = Location.get_record_by_pid(location_pid) + library = location.get_library() + if url := library.get_online_harvested_source_url(source=self._code): + locations[location_pid] = url + info[organisation.pid] = { + "item_type_pid": organisation.online_circulation_category(), + "locations": locations, + } + self._info = info + + @classmethod + def get_config(cls, name): + """Get config. + + :param name: name of config + """ + return ApiHarvestConfig.query.filter_by(name=name).first() + + def get_request_url(self, start_date="1990-01-01", page=1): + """Get request URL. + + start_date: date from where records has to be harvested + page: page from where records have to be harvested + """ + raise NotImplementedError() + + def create_update_record(self, record): + """Create new record or update record. + + :param record: record to create or update + """ + raise NotImplementedError() + + def save_record(self, record): + """Save record to file. + + :param record: record to write to file + """ + if self.file: + self.file.write(record) + + def msg_text(self, pid, msg): + """Logging message text.""" + return f"{self._count}: {self._vendor}:{self._code} {pid} = {msg}" + + def process_records(self, records): + """Process records. + + :param records: records to process + """ + for record in records: + if self.harvest_count >= 0 and self._count >= self.harvest_count: + break + self._count += 1 + self.save_record(record) + if self.process: + pid, status = self.create_update_record(record) + self.verbose_print(self.msg_text(pid=pid, msg=status.value)) + + def verbose_print(self, msg): + """Print verbose message. + + :param msg: message to print if verbose + """ + if self.verbose: + click.echo(msg) + + def harvest_records(self, from_date): + """Harvest records from servers. + + :param from_date: records changed after this date to harvest + """ + records = [] + self.process_records(records=records) + return self._count, len(records) + + @property + def count(self): + """Get count.""" + return self._count + + @property + def count_new(self): + """Get new count.""" + return self._count_new + + @property + def count_upd(self): + """Get updated count.""" + return self._count_upd + + @property + def count_del(self): + """Get deleted count.""" + return self._count_del diff --git a/rero_ils/modules/ebooks/__init__.py b/rero_ils/modules/api_harvester/cantook/__init__.py similarity index 92% rename from rero_ils/modules/ebooks/__init__.py rename to rero_ils/modules/api_harvester/cantook/__init__.py index c955f3eee2..dc9e72752f 100644 --- a/rero_ils/modules/ebooks/__init__.py +++ b/rero_ils/modules/api_harvester/cantook/__init__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # RERO ILS -# Copyright (C) 2019-2022 RERO +# Copyright (C) 2024 RERO # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -15,4 +15,4 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -"""JSON schemas.""" +"""ApiCantook.""" diff --git a/rero_ils/modules/api_harvester/cantook/api.py b/rero_ils/modules/api_harvester/cantook/api.py new file mode 100644 index 0000000000..c3502b3c70 --- /dev/null +++ b/rero_ils/modules/api_harvester/cantook/api.py @@ -0,0 +1,181 @@ +# -*- coding: utf-8 -*- +# +# RERO ILS +# Copyright (C) 2024 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""API for cantook records.""" + +from __future__ import absolute_import, print_function + +from invenio_db import db +from requests import codes as requests_codes + +from rero_ils.modules.documents.api import Document, DocumentsSearch +from rero_ils.modules.holdings.api import Holding, HoldingsSearch, create_holding +from rero_ils.modules.utils import JsonWriter, requests_retry_session + +from ..api import ApiHarvest +from ..models import HarvestActionType +from .dojson.json import cantook_json + + +class ApiCantook(ApiHarvest): + """ApiCantook class. + + Class for harvesting ebooks from cantook API resources. + """ + + def __init__( + self, name, file_name=None, process=False, harvest_count=-1, verbose=False + ): + """Class init.""" + super().__init__( + name=name, + process=process, + harvest_count=harvest_count, + verbose=verbose, + ) + if file_name: + self.file = JsonWriter(file_name) + self._vendor = "CANTOOK" + + def get_request_url(self, start_date="1990-01-01", page=1): + """Get request URL. + + start_date: date from where records has to be harvested + page: page from where records have to be harvested + """ + params = f"start_at={start_date}&page={page}" + return f"{self._url}/v1/resources.json?{params}" + + def delete_holdings(self, document_pid): + """ + Delete holdings. + + :param document_pid: document pid + """ + for hold_pid in list(Holding.get_holdings_pid_by_document_pid(document_pid)): + if holding := Holding.get_record_by_pid(hold_pid): + for electronic_location in holding["electronic_location"]: + if electronic_location["source"] == self._code: + holding.delete(dbcommit=True, delindex=True) + break + + def create_holdings(self, document_pid, link): + """ + Create holdings. + + :param document_pid: document pid + :param link: link to cantook document + """ + holdings = [] + for _, info in self._info.items(): + item_type_pid = info["item_type_pid"] + for location_pid, url in info["locations"].items(): + if url: + uri_split = link.split("/")[3:] + uri_split.insert(0, url.rstrip("/")) + link = "/".join(uri_split) + # See if the holding already exist + query = ( + HoldingsSearch() + .filter("term", document__pid=document_pid) + .filter("term", location__pid=location_pid) + .filter("term", holdings_type="electronic") + .filter("term", electronic_location__source=self._code) + ) + if query.count() == 0: + holding = create_holding( + document_pid=document_pid, + location_pid=location_pid, + item_type_pid=item_type_pid, + electronic_location={"source": self._code, "uri": link}, + holdings_type="electronic", + ) + holdings.append(holding) + db.session.commit() + for holding in holdings: + holding.reindex() + + def create_update_record(self, data): + """Create, update or delete record. + + :param data: date for record operation + """ + status = HarvestActionType.NOTSET + record = None + record_data = cantook_json.do(data) + if record_data.pop("deleted", None): + status = HarvestActionType.DELETED + link = record_data.pop("link", None) + # See if we have this document already + harvested_id = record_data.pop("pid") + query = ( + DocumentsSearch() + .filter("term", identifiedBy__value__raw=harvested_id) + .source(includes=["pid"]) + ) + try: + pid = next(query.scan()).pid + except StopIteration: + pid = None + if pid: + if doc := Document.get_record_by_pid(pid): + if status == HarvestActionType.DELETED: + self._count_del += 1 + self.delete_holdings(document_pid=doc.pid) + # Try to delete document (we have to delete `harvested` for this) + doc.pop("harvested", None) + if not doc.reasons_not_to_delete(): + doc.delete(dbcommit=True, delindex=True) + else: + self._count_upd += 1 + status = HarvestActionType.UPDATED + record_data["pid"] = doc.pid + record = doc.replace(data=record_data, dbcommit=True, reindex=True) + # TODO: do we have to delete and recreate holdings ? + # self.delete_holdings() + self.create_holdings(document_pid=record.pid, link=link) + elif status == HarvestActionType.NOTSET: + self._count_new += 1 + status = HarvestActionType.CREATED + record = Document.create(data=record_data, dbcommit=True, reindex=True) + self.create_holdings(document_pid=record.pid, link=link) + return harvested_id, status + + def harvest_records(self, from_date): + """Harvest CANTOOK records. + + from_date: record changed after this date to get + """ + self._count = 0 + url = self.get_request_url(start_date=from_date, page=1) + request = requests_retry_session().get(url) + total_pages = int(request.headers.get("X-Total-Pages", 0)) + total_items = int(request.headers.get("X-Total-Items", 0)) + current_page = int(request.headers.get("X-Current-Page", 0)) + while ( + request.status_code == requests_codes.ok + and current_page <= total_pages + and (self.harvest_count < 0 or self._count < self.harvest_count) + ): + self.verbose_print(f"API page: {current_page} url: {url}") + self.process_records(request.json().get("resources", [])) + # get next page and update current_page + url = self.get_request_url(start_date=from_date, page=current_page + 1) + request = requests_retry_session().get(url) + current_page = int(request.headers.get("X-Current-Page", 0)) + + return self._count, total_items diff --git a/rero_ils/modules/ebooks/dojson/__init__.py b/rero_ils/modules/api_harvester/cantook/dojson/__init__.py similarity index 100% rename from rero_ils/modules/ebooks/dojson/__init__.py rename to rero_ils/modules/api_harvester/cantook/dojson/__init__.py diff --git a/rero_ils/modules/ebooks/dojson/contrib/marc21/__init__.py b/rero_ils/modules/api_harvester/cantook/dojson/json/__init__.py similarity index 81% rename from rero_ils/modules/ebooks/dojson/contrib/marc21/__init__.py rename to rero_ils/modules/api_harvester/cantook/dojson/json/__init__.py index 9547a74462..4083d38832 100644 --- a/rero_ils/modules/ebooks/dojson/contrib/marc21/__init__.py +++ b/rero_ils/modules/api_harvester/cantook/dojson/json/__init__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # RERO ILS -# Copyright (C) 2019-2022 RERO +# Copyright (C) 2024 RERO # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -15,8 +15,9 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -"""MARC21 RERO to JSON.""" +"""Marc21 data conversion.""" -from .model import marc21 -__all__ = "marc21" +from .model import Transformation, cantook_json + +__all__ = ("Transformation", "cantook_json") diff --git a/rero_ils/modules/api_harvester/cantook/dojson/json/model.py b/rero_ils/modules/api_harvester/cantook/dojson/json/model.py new file mode 100644 index 0000000000..1d961d46a5 --- /dev/null +++ b/rero_ils/modules/api_harvester/cantook/dojson/json/model.py @@ -0,0 +1,267 @@ +# -*- coding: utf-8 -*- +# +# RERO ILS +# Copyright (C) 2024 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Cantook json record transformation.""" + +import dateparser + +from rero_ils.modules.documents.models import DocumentFictionType +from rero_ils.modules.utils import get_schema_for_resource + +CONTRIBUTION_NATURE = { + "adapted_by": "adp", + "afterword_by": "aft", + "author": "aut", + "by_composer": "cmp", + "by_photographer": "pht", + "cover_design_or_artwork_by": "ill", + "director": "drt", + "drawings_by": "ill", + "edited_by": "edt", + "editor_in_chief": "edt", + "editorial_coordination_by": "edt", + "epilogue_by": "aft", + "foreword_by": "aui", + "general_editor": "edt", + "illustrator": "ill", + "instrumental_soloist": "mus", + "interviewer": "ivr", + "introduction_and_notes_by": "aui", + "introduction_by": "aui", + "maps_by": "ctg", + "narrator": "nrt", + "other": "oth", + "other_primary_creator": "cre", + "photograph": "pht", + "preface_by": "aui", + "read_by": "nrt", + "screenplay_by": "aus", + "selected_by": "cur", + "series_edited_by": "pbl", + "translated_by": "trl", + "volume_editor": "pbl", +} + + +class Transformation(object): + """Transformation CANTOOK Json to RERO-ILS Json.""" + + def __init__(self, data=None, logger=None, verbose=False, transform=True): + """Constructor.""" + self.data = data + self.logger = logger + self.verbose = verbose + self.json_dict = {} + if data and transform: + self._transform() + + def _transform(self): + """Call the transformation functions.""" + for func in dir(self): + if func.startswith("trans"): + func = getattr(self, func) + func() + + def do(self, data): + """Do the transformation. + + :param data: json data to transform + :returns: rero-ils document data + """ + self.data = data + self._transform() + return self.json_dict + + @property + def json(self): + """Json data.""" + return self.json_dict or None + + def trans_constants(self): + """Add constants.""" + self.json_dict["$schema"] = get_schema_for_resource("doc") + self.json_dict["harvested"] = True + self.json_dict["issuance"] = { + "main_type": "rdami:1001", + "subtype": "materialUnit", + } + self.json_dict["adminMetadata"] = {"encodingLevel": "Not applicable"} + self.json_dict["type"] = [ + {"main_type": "docmaintype_book", "subtype": "docsubtype_e-book"} + ] + + def trans_pid(self): + """Transformation pid.""" + self.json_dict["pid"] = f"cantook-{self.data['id']}" + + def trans_identified_by(self): + """Transformation IdentifiedBy.""" + identified_by = [ + { + "source": "CANTOOK", + "type": "bf:Local", + "value": f"cantook-{self.data['id']}", + } + ] + for media in self.data.get("media", []): + nature = media.get("nature") + if nature in ["paper", "epub", "audio"] and media["key_type"] == "isbn13": + identified_by.append( + {"type": "bf:Isbn", "value": media.get("key"), "note": nature} + ) + if nature == "audio": + self.json_dict["type"] = [ + { + "main_type": "docmaintype_audio", + "subtype": "docsubtype_audio_book", + } + ] + self.json_dict["identifiedBy"] = identified_by + + def trans_title(self): + """Transformation Title.""" + title = {"type": "bf:Title"} + if maintitle := self.data.get("title"): + title["mainTitle"] = [{"value": maintitle}] + if subtitle := self.data.get("subtitle"): + title["subtitle"] = [{"value": subtitle}] + self.json_dict["title"] = [title] + + def trans_contribution(self): + """Transformation Contribution.""" + contributions = [] + for contribution in self.data.get("contributors", []): + nature = contribution["nature"] + role = CONTRIBUTION_NATURE.get(nature, "ctb") + + names = [] + if last_name := contribution.get("last_name"): + names.append(last_name) + if first_name := contribution.get("first_name"): + names.append(first_name) + + entity = { + "role": [role], + "entity": { + "authorized_access_point": ", ".join(names), + "type": "bf:Person", + }, + } + if entity not in contributions: + contributions.append(entity) + if contributions: + self.json_dict["contribution"] = contributions + + def trans_provision_activity(self): + """Transform provisionActivity.""" + publisher_name = self.data.get("publisher_name", "Publisher unknown") + start_date = dateparser.parse(self.data.get("created_at", "1990-01-01")) + self.json_dict["provisionActivity"] = [ + { + "startDate": start_date.year, + "statement": [ + {"label": [{"value": publisher_name}], "type": "bf:Agent"}, + {"label": [{"value": str(start_date.year)}], "type": "Date"}, + ], + "type": "bf:Publication", + } + ] + + def trans_electronic_locator(self): + """Transformation electronicLocator.""" + electronic_locators = [] + if cover := self.data.get("cover"): + electronic_locators.append( + { + "content": "coverImage", + "type": "relatedResource", + "url": cover, + } + ) + if flipbook := self.data.get("flipbook"): + electronic_locators.append( + { + "content": "extract", + "type": "relatedResource", + "url": flipbook, + } + ) + if electronic_locators: + self.json_dict["electronicLocator"] = electronic_locators + + def trans_fiction(self): + """Transformation fiction.""" + if self.data.get("fiction"): + self.json_dict["fiction_statement"] = DocumentFictionType.Fiction.value + self.json_dict["fiction_statement"] = DocumentFictionType.Unspecified.value + + def trans_language(self): + """Transformation language.""" + if languages := [ + {"type": "bf:Language", "value": language} + for language in self.data.get("languages", []) + ]: + self.json_dict["language"] = languages + + def trans_orginal_language(self): + """Transformation language.""" + if language := self.data.get("translated_from"): + self.json_dict["originalLanguage"] = [language] + + def trans_subjects(self): + """Transformation Subject.""" + use_standard = ["cantook"] # feedbooks, thema, bisac + subjects = [] + for classification in self.data.get("classifications", []): + if classification.get("standard") in use_standard: + for caption in classification.get("captions", []): + if subject := caption.get("fr"): + subjects.append( + { + "entity": { + "authorized_access_point": subject, + "type": "bf:Topic", + } + } + ) + if subjects: + self.json_dict["subjects"] = subjects + + def trans_summary(self): + """Transformation Summary.""" + if summary := self.data.get("summary"): + self.json_dict["summary"] = [{"label": [{"value": summary}]}] + + def trans_extent(self): + """Transformation Extend.""" + if page_count := self.data.get("page_count"): + self.json_dict["extent"] = f"{page_count} pages" + + # to be used to create holdings + def trans_links(self): + """Transformation links.""" + if link := self.data.get("link"): + self.json_dict["link"] = link + + # to be used for deleted records + def trans_deleted(self): + """Transformation deleted.""" + if unavailable_since := self.data.get("unavailable_since"): + self.json_dict["deleted"] = unavailable_since + + +cantook_json = Transformation() diff --git a/rero_ils/modules/api_harvester/cli.py b/rero_ils/modules/api_harvester/cli.py new file mode 100644 index 0000000000..0d419253c4 --- /dev/null +++ b/rero_ils/modules/api_harvester/cli.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- +# +# RERO ILS +# Copyright (C) 2024 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Click command-line interface for API harvester management.""" + +from __future__ import absolute_import, print_function + +import click +import dateparser +import yaml +from flask import current_app +from flask.cli import with_appcontext +from werkzeug.local import LocalProxy + +from rero_ils.modules.api_harvester.tasks import harvest_records + +from .models import ApiHarvestConfig +from .utils import api_source, get_apiharvest_object + +datastore = LocalProxy(lambda: current_app.extensions["security"].datastore) + + +@click.group() +def api_harvester(): + """Api harvester commands.""" + + +@api_harvester.command("add-source") +@click.argument("name") +@click.option("-U", "--url", default="", help="Url") +@click.option("-n", "--classname", default="", help="Class name") +@click.option("-c", "--code", default="", help="Code") +@click.option("-u", "--update", is_flag=True, default=False, help="Update config") +@with_appcontext +def add_api_source_config(name, url, classname, code, update): + """Add or Update ApiHarvestConfig.""" + msg = api_source(name=name, url=url, classname=classname, code=code, update=update) + click.echo(f"ApiHarvestConfig {name}: {msg}") + + +@api_harvester.command("init-config") +@click.argument("configfile", type=click.File("rb")) +@click.option("-u", "--update", is_flag=True, default=False, help="Update config") +@with_appcontext +def init_api_harvest_config(configfile, update): + """Add or update ApiHarvestConfigs from file.""" + if configs := yaml.load(configfile, Loader=yaml.FullLoader): + for name, values in sorted(configs.items()): + url = values.get("url", "") + classname = values.get("classname", "") + code = values.get("code", "") + msg = api_source( + name=name, url=url, classname=classname, code=code, update=update + ) + click.echo(f"ApiHarvestConfig {name}: {msg}") + + else: + click.secho(f"ERROR: no YML config found in: {configfile.name}", fg="red") + + +@api_harvester.command() +@click.option( + "-n", "--name", default=None, help="Name of persistent configuration to use." +) +@click.option( + "-f", + "--from-date", + default=None, + help="The lower bound date for the harvesting (optional).", +) +@click.option( + "-k", + "--enqueue", + is_flag=True, + default=False, + help="Enqueue harvesting and return immediately.", +) +@click.option( + "-m", + "--harvest_count", + type=int, + default=-1, + help="maximum of records to harvest (optional).", +) +@click.option("-v", "--verbose", "verbose", is_flag=True, default=False) +@with_appcontext +def harvest(name, from_date, enqueue, harvest_count, verbose): + """Harvest records from an API repository.""" + if name: + click.secho(f"Harvest api: {name}", fg="green") + if from_date: + from_date = dateparser.parse(from_date).isoformat() + if enqueue: + async_id = harvest_records.delay( + name=name, from_date=from_date, harvest_count=harvest_count, verbose=verbose + ) + if verbose: + click.echo(f"AsyncResult {async_id}") + else: + harvest_records( + name=name, from_date=from_date, harvest_count=harvest_count, verbose=verbose + ) + + +@api_harvester.command("info") +@with_appcontext +def info(): + """List infos for tasks.""" + apis = ApiHarvestConfig.query.order_by(ApiHarvestConfig.name.asc()).all() + for api in apis: + click.echo(api.name) + click.echo(f"\tlastrun : {api.lastrun}") + click.echo(f"\turl : {api.url}") + click.echo(f"\tclassname : {api.classname}") + click.echo(f"\tcode : {api.code}") + + +@api_harvester.command() +@click.argument("name") +@click.option("-d", "--date", default=None, help="Set last run (default: now).") +@with_appcontext +def set_last_run(name, date): + """Set last run.""" + if config := get_apiharvest_object(name=name): + new_date = config.update_lastrun(new_date=date) + click.secho(f"Set last run {name}: {new_date}", fg="green") + else: + click.secho(f"No config found: {name}", fg="red") diff --git a/rero_ils/modules/api_harvester/errors.py b/rero_ils/modules/api_harvester/errors.py new file mode 100644 index 0000000000..b7c3977fc5 --- /dev/null +++ b/rero_ils/modules/api_harvester/errors.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +# +# RERO ILS +# Copyright (C) 2024 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Api harvester errors.""" + +from __future__ import absolute_import, print_function + + +class ApiHarvesterError(Exception): + """Base exception for API harvester.""" + + +class ApiRequestError(ApiHarvesterError): + """Error with the Api request.""" + + +class NameOrUrlMissing(ApiHarvesterError): + """Name or url for harvesting missing.""" + + +class WrongDateCombination(ApiHarvesterError): + """'Until' date is larger that 'from' date.""" + + +class IdentifiersOrDates(ApiHarvesterError): + """Identifiers cannot be used in combination with dates.""" + + +class ApiHarvesterConfigNotFound(ApiHarvesterError): + """No ApiHarvesterConfig was found.""" diff --git a/rero_ils/modules/apiharvester/models.py b/rero_ils/modules/api_harvester/models.py similarity index 72% rename from rero_ils/modules/apiharvester/models.py rename to rero_ils/modules/api_harvester/models.py index c657a127ea..dfd9f657ac 100644 --- a/rero_ils/modules/apiharvester/models.py +++ b/rero_ils/modules/api_harvester/models.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # RERO ILS -# Copyright (C) 2019-2022 RERO +# Copyright (C) 2024 RERO # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -20,27 +20,32 @@ from __future__ import absolute_import from datetime import datetime, timezone +from enum import Enum -import pytz from invenio_db import db -from invenio_pidstore.models import RecordIdentifier -class ApiHarvestConfig(RecordIdentifier): - """Sequence generator for Document identifiers.""" +class HarvestActionType(Enum): + """Harvest action types.""" + + DELETED = "DELETED" + UPDATED = "UPDATED" + CREATED = "CREATED" + NOTSET = "NOTSET" + + +class ApiHarvestConfig(db.Model): + """Represents a ApiHarvestConfig record.""" __tablename__ = "apiharvester_config" - __mapper_args__ = {"concrete": True} id = db.Column(db.Integer, primary_key=True) url = db.Column(db.String(255), nullable=False, server_default="") name = db.Column(db.String(255), nullable=False) - mimetype = db.Column(db.String(255), nullable=False) - size = db.Column(db.Integer, nullable=False) - comment = db.Column(db.Text, nullable=True) - default_last_run = datetime.strptime("1900-1-1", "%Y-%m-%d") + classname = db.Column(db.String(255), nullable=False) + code = db.Column(db.Text, nullable=True) lastrun = db.Column( - db.DateTime, default=pytz.utc.localize(default_last_run), nullable=True + db.DateTime, default=datetime(year=1900, month=1, day=1), nullable=True ) def save(self): @@ -51,3 +56,5 @@ def save(self): def update_lastrun(self, new_date=None): """Update the 'lastrun' attribute of object to now.""" self.lastrun = new_date or datetime.now(timezone.utc) + self.save() + return self.lastrun diff --git a/rero_ils/modules/api_harvester/tasks.py b/rero_ils/modules/api_harvester/tasks.py new file mode 100644 index 0000000000..1be686969f --- /dev/null +++ b/rero_ils/modules/api_harvester/tasks.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- +# +# RERO ILS +# Copyright (C) 2019-2022 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""ApiHarvester tasks.""" + +from __future__ import absolute_import, print_function + +import click +from celery import shared_task +from flask import current_app +from invenio_records_rest.utils import obj_or_import_string + +from rero_ils.modules.utils import set_timestamp + +from .utils import get_apiharvest_object + + +@shared_task(ignore_result=True, soft_time_limit=3600) +def harvest_records(name, from_date=None, harvest_count=-1, verbose=False): + """Harvest records.""" + count = -1 + + if config := get_apiharvest_object(name=name): + if not from_date: + from_date = config.lastrun.isoformat() + if harvest_count < 0: + config.update_lastrun() + msg = f"API harvest {name} class name: {config.classname} " + msg += f"from date: {from_date} url: {config.url}" + + current_app.logger.info(msg) + HarvestClass = obj_or_import_string(config.classname) + harvest = HarvestClass( + name=name, verbose=verbose, harvest_count=harvest_count, process=True + ) + count, total = harvest.harvest_records(from_date=from_date) + msg = ( + f"API harvest {name} items={total} |" + f" got={count} new={harvest.count_new}" + f" updated={harvest.count_upd} deleted={harvest.count_del}" + ) + if verbose: + click.echo(msg) + current_app.logger.info(msg) + timestamp_data = { + name: { + "name": name, + "totoal": total, + "count": count, + "new": harvest.count_new, + "update": harvest.count_upd, + "delete": harvest.count_del, + "from": from_date, + "max": harvest_count, + } + } + set_timestamp("api_harvester", **timestamp_data) + else: + current_app.logger.error(f"No config found: {name}") + return count, total diff --git a/rero_ils/modules/api_harvester/utils.py b/rero_ils/modules/api_harvester/utils.py new file mode 100644 index 0000000000..b54659a2a8 --- /dev/null +++ b/rero_ils/modules/api_harvester/utils.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +# +# RERO ILS +# Copyright (C) 2024 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""ApiHarvester utils.""" + +from __future__ import absolute_import, print_function + +from flask import current_app +from invenio_db import db +from invenio_oaiserver.models import OAISet +from sqlalchemy.exc import OperationalError + +from .errors import ApiHarvesterConfigNotFound +from .models import ApiHarvestConfig + + +def add_set(spec, name, pattern, description="..."): + """Add OAI set. + + :param spec: set identifier + :param name: human readable name of the set + :param pattern: search pattern to get records + :param description: human readable description + """ + try: + oaiset = OAISet( + spec=spec, name=name, description=description, system_created=False + ) + oaiset.search_pattern = pattern + db.session.add(oaiset) + db.session.commit() + msg = f"OAIset added: {name}" + except Exception as err: + db.session.rollback() + msg = f"OAIset exist: {name} {err}" + return msg + + +def api_source(name, url="", classname=None, code="", update=False): + """Add ApiHarvestConfig do DB. + + name: name for the configuration + url: harvesting url + classname: Class responsible for getting record_serializers + code: code added to electronic_location['nonpublic_note'] + update: update configuration if exist + """ + with current_app.app_context(): + msg = "No Update" + source = ApiHarvestConfig.query.filter_by(name=name).first() + if not source: + source = ApiHarvestConfig( + name=name, url=url, classname=classname, code=code + ) + source.save() + db.session.commit() + msg = "Add" + elif update: + source.name = name + msg = [] + if url != "": + source.url = url + msg.append(f"url:{url}") + source.classname = classname + msg.append(f"classname:{classname}") + if code != "": + source.code = code + msg.append(f"code:{code}") + db.session.commit() + msg = f'Update {", ".join(msg)}' + return msg + + +def get_apiharvest_object(name): + """Query and returns an ApiHarvestConfig object based on its name. + + :param name: The name of the ApiHarvestConfig object. + :return: The ApiHarvestConfig object. + """ + get_config_error_count = 0 + get_config_ok = False + while not get_config_ok and get_config_error_count < 5: + try: + obj = ApiHarvestConfig.query.filter_by(name=name).first() + get_config_ok = True + except OperationalError: + get_config_error_count += 1 + current_app.logger.error( + "ApiHarvestConfig OperationalError: " f"{get_config_error_count} {name}" + ) + + if not obj: + raise ApiHarvesterConfigNotFound( + f"Unable to find ApiHarvesterConfig obj with name {name}." + ) + + return obj diff --git a/rero_ils/modules/apiharvester/cli.py b/rero_ils/modules/apiharvester/cli.py deleted file mode 100644 index 81a3e4ffdf..0000000000 --- a/rero_ils/modules/apiharvester/cli.py +++ /dev/null @@ -1,159 +0,0 @@ -# -*- coding: utf-8 -*- -# -# RERO ILS -# Copyright (C) 2019-2022 RERO -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -"""Click command-line interface for mef contribution management.""" - -from __future__ import absolute_import, print_function - -import click -import yaml -from flask import current_app -from flask.cli import with_appcontext -from werkzeug.local import LocalProxy - -from rero_ils.modules.apiharvester.tasks import harvest_records - -from .models import ApiHarvestConfig -from .utils import api_source - -datastore = LocalProxy(lambda: current_app.extensions["security"].datastore) - - -@click.group() -def apiharvester(): - """Api harvester commands.""" - - -@apiharvester.command("source") -@click.argument("name") -@click.option("-U", "--url", default="", help="Url") -@click.option("-m", "--mimetype", default="", help="Mimetype") -@click.option("-s", "--size", default=-1, type=int, help="Size") -@click.option("-c", "--comment", default="", help="Comment") -@click.option("-u", "--update", is_flag=True, default=False, help="Update config") -@with_appcontext -def api_source_config(name, url, mimetype, size, comment, update): - """Add or Update ApiHarvestConfig.""" - click.echo(f"ApiHarvesterConfig: {name} ", nl=False) - msg = api_source( - name=name, url=url, mimetype=mimetype, size=size, comment=comment, update=update - ) - click.echo(msg) - - -@apiharvester.command("sources") -@click.argument("configfile", type=click.File("rb")) -@click.option("-u", "--update", is_flag=True, default=False, help="Update config") -@with_appcontext -def api_source_config_from_file(configfile, update): - """Add or update ApiHarvestConfigs from file.""" - configs = yaml.load(configfile, Loader=yaml.FullLoader) - for name, values in sorted(configs.items()): - url = values.get("url", "") - mimetype = values.get("mimetype", "") - size = values.get("size", 100) - comment = values.get("comment", "") - click.echo(f"ApiHarvesterConfig: {name} {url} ", nl=False) - msg = api_source( - name=name, - url=url, - mimetype=mimetype, - size=size, - comment=comment, - update=update, - ) - click.echo(msg) - - -@apiharvester.command("harvest") -@click.option( - "-n", "--name", default=None, help="Name of persistent configuration to use." -) -@click.option( - "-f", - "--from-date", - default=None, - help="The lower bound date for the harvesting (optional).", -) -@click.option( - "-u", - "--url", - default=None, - help="The upper bound date for the harvesting (optional).", -) -@click.option( - "-k", - "--enqueue", - is_flag=True, - default=False, - help="Enqueue harvesting and return immediately.", -) -@click.option( - "--signals/--no-signals", - default=True, - help="Signals sent with Api harvesting results.", -) -@click.option("-s", "--size", type=int, default=0, help="Size of chunks (optional).") -@click.option( - "-m", - "--max_results", - type=int, - default=0, - help="maximum of records to harvest (optional).", -) -@click.option("-v", "--verbose", "verbose", is_flag=True, default=False) -@with_appcontext -def harvest(name, from_date, url, enqueue, signals, size, max_results, verbose): - """Harvest api.""" - if name: - click.secho(f"Harvest api: {name}", fg="green") - elif url: - click.secho(f"Harvest api: {url}", fg="green") - if enqueue: - harvest_records.delay( - url=url, - name=name, - from_date=from_date, - signals=signals, - size=size, - max_results=max_results, - verbose=verbose, - ) - else: - harvest_records( - url=url, - name=name, - from_date=from_date, - signals=signals, - size=size, - max_results=max_results, - verbose=verbose, - ) - - -@apiharvester.command("info") -@with_appcontext -def info(): - """List infos for tasks.""" - apis = ApiHarvestConfig.query.all() - for api in apis: - click.echo(api.name) - click.echo(f"\tlastrun : {api.lastrun}") - click.echo(f"\turl : {api.url}") - click.echo(f"\tmimetype : {api.mimetype}") - click.echo(f"\tsize : {api.size}") - click.echo(f"\tcomment : {api.comment}") diff --git a/rero_ils/modules/apiharvester/tasks.py b/rero_ils/modules/apiharvester/tasks.py deleted file mode 100644 index fdc6134fba..0000000000 --- a/rero_ils/modules/apiharvester/tasks.py +++ /dev/null @@ -1,58 +0,0 @@ -# -*- coding: utf-8 -*- -# -# RERO ILS -# Copyright (C) 2019-2022 RERO -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -"""ApiHarvester tasks.""" - -from __future__ import absolute_import, print_function - -from celery import shared_task - -from .models import ApiHarvestConfig -from .utils import get_records - - -@shared_task(ignore_result=True) -def harvest_records( - url=None, - name=None, - from_date=None, - signals=True, - size=0, - max_results=0, - verbose=False, -): - """Harvest records.""" - config = ApiHarvestConfig.query.filter_by(name=name).first() - if config: - if not url: - url = config.url - if not from_date: - from_date = config.lastrun - config.update_lastrun() - if size == 0: - size = config.size - - for next, records in get_records( - url=url, - name=name, - from_date=from_date, - size=size, - max_results=max_results, - signals=signals, - verbose=verbose, - ): - pass diff --git a/rero_ils/modules/apiharvester/utils.py b/rero_ils/modules/apiharvester/utils.py deleted file mode 100644 index 3365da4b80..0000000000 --- a/rero_ils/modules/apiharvester/utils.py +++ /dev/null @@ -1,127 +0,0 @@ -# -*- coding: utf-8 -*- -# -# RERO ILS -# Copyright (C) 2019-2022 RERO -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -"""ApiHarvester utils.""" - -from __future__ import absolute_import, print_function - -import click -import requests -from dateutil import parser -from flask import current_app -from invenio_db import db - -from .models import ApiHarvestConfig -from .signals import apiharvest_part - - -def api_source(name, url="", mimetype="", size=100, comment="", update=False): - """Add ApiHarvesterConfig.""" - with current_app.app_context(): - source = ApiHarvestConfig.query.filter_by(name=name).first() - if not source: - source = ApiHarvestConfig( - name=name, url=url, mimetype=mimetype, size=100, comment=comment - ) - source.save() - db.session.commit() - return "Added" - elif update: - source.name = name - msg = [] - if url != "": - source.url = url - msg.append(f"url:{url}") - if mimetype != "": - source.mimetype = mimetype - msg.append(f"mimetype:{mimetype}") - if size != -1: - source.size = size - msg.append(f"size:{size}") - if comment != "": - source.comment = comment - msg.append(f"comment:{comment}") - db.session.commit() - return f'Updated: {", ".join(msg)}' - return "Not Updated" - - -def extract_records(data): - """Extract a record from REST data.""" - records = [] - hits = data.get("hits", {}).get("hits", {}) - for hit in hits: - # pid = data.get('id', '') - # updated = data.get('updated', '') - # links = data.get('links', {}).get('self', '') - record = hit.get("metadata", "") - records.append(record) - return records - - -def get_records( - url=None, - name=None, - from_date=None, - max_results=0, - size=100, - signals=True, - verbose=False, - **kwargs, -): - """Harvest multiple records from invenio api.""" - url += f"/?size={size}" - if from_date: - if isinstance(from_date, str): - from_date = parser.parse(from_date) - from_date = from_date.isoformat() - # we have to urlencode the : from the time with \: - from_date = from_date.replace(":", "%5C:") - url += f"&q=_updated:>{from_date}" - url += f"&size={size}" - - if verbose: - click.echo(f"Get records from {url}") - - try: - count = 0 - request = requests.get(url) - data = request.json() - - total = data["hits"]["total"]["value"] - click.echo(f"API records found: {total}") - - next_url = data.get("links", {}).get("self", True) - while next_url and (count < max_results or max_results == 0): - records = extract_records(data) - count += len(records) - - if count - max_results > 0 and max_results != 0: - records = records[:max_results] - - request = requests.get(next_url) - data = request.json() - if signals: - apiharvest_part.send( - records=records, name=name, url=next, verbose=verbose, **kwargs - ) - else: - yield next_url, records - next_url = data.get("links", {}).get("next", None) - except Exception as error: - click.secho(f"Harvesting API ConnectionRefusedError: {error}", fg="red") - yield url, [] diff --git a/rero_ils/modules/cli/reroils.py b/rero_ils/modules/cli/reroils.py index a2df4a82c2..f56900e22c 100644 --- a/rero_ils/modules/cli/reroils.py +++ b/rero_ils/modules/cli/reroils.py @@ -23,8 +23,7 @@ import click from rero_ils.modules.acquisition.cli import acquisition -from rero_ils.modules.apiharvester.cli import apiharvester -from rero_ils.modules.ebooks.cli import oaiharvester +from rero_ils.modules.api_harvester.cli import api_harvester from rero_ils.modules.entities.remote_entities.cli import entity from rero_ils.modules.migrations.cli import migrations from rero_ils.modules.monitoring.cli import monitoring @@ -43,14 +42,13 @@ def reroils(): reroils.add_command(acquisition) -reroils.add_command(apiharvester) +reroils.add_command(api_harvester) reroils.add_command(entity) reroils.add_command(fixtures) reroils.add_command(index) reroils.add_command(migrations) reroils.add_command(monitoring) reroils.add_command(notifications) -reroils.add_command(oaiharvester) reroils.add_command(scheduler) reroils.add_command(stats) reroils.add_command(utils) diff --git a/rero_ils/modules/documents/serializers/base.py b/rero_ils/modules/documents/serializers/base.py index 28dadc771f..b2b221fb53 100644 --- a/rero_ils/modules/documents/serializers/base.py +++ b/rero_ils/modules/documents/serializers/base.py @@ -76,7 +76,7 @@ def __init__(self, record, **kwargs): @abstractmethod def format(self): """Return formatted record.""" - raise NotImplementedError + raise NotImplementedError() def _get_document_types(self): """Return document types.""" diff --git a/rero_ils/modules/ebooks/cli.py b/rero_ils/modules/ebooks/cli.py deleted file mode 100644 index 881fe918a6..0000000000 --- a/rero_ils/modules/ebooks/cli.py +++ /dev/null @@ -1,93 +0,0 @@ -# -*- coding: utf-8 -*- -# -# RERO ILS -# Copyright (C) 2019-2022 RERO -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -"""Click command-line interface for ebook record management.""" - -from __future__ import absolute_import, print_function - -import click -import yaml -from flask.cli import with_appcontext -from invenio_oaiharvester.cli import oaiharvester -from invenio_oaiharvester.models import OAIHarvestConfig - -from .utils import add_oai_source - - -@oaiharvester.command("addsource") -@click.argument("name") -@click.argument("baseurl") -@click.option( - "-m", "--metadataprefix", default="marc21", help="The prefix for the metadata" -) -@click.option( - "-s", "--setspecs", default="", help="The ‘set’ criteria for the harvesting" -) -@click.option("-c", "--comment", default="", help="Comment") -@click.option("-u", "--update", is_flag=True, default=False, help="Update config") -@with_appcontext -def add_oai_source_config(name, baseurl, metadataprefix, setspecs, comment, update): - """Add OAIHarvestConfig.""" - click.echo(f"Add OAIHarvestConfig: {name} ", nl=False) - msg = add_oai_source( - name=name, - baseurl=baseurl, - metadataprefix=metadataprefix, - setspecs=setspecs, - comment=comment, - update=update, - ) - click.echo(msg) - - -@oaiharvester.command("initconfig") -@click.argument("configfile", type=click.File("rb")) -@click.option("-u", "--update", is_flag=True, default=False, help="Update config") -@with_appcontext -def init_oai_harvest_config(configfile, update): - """Init OAIHarvestConfig.""" - configs = yaml.load(configfile, Loader=yaml.FullLoader) - for name, values in sorted(configs.items()): - baseurl = values["baseurl"] - metadataprefix = values.get("metadataprefix", "marc21") - setspecs = values.get("setspecs", "") - comment = values.get("comment", "") - click.echo(f"Add OAIHarvestConfig: {name} {baseurl} ", nl=False) - msg = add_oai_source( - name=name, - baseurl=baseurl, - metadataprefix=metadataprefix, - setspecs=setspecs, - comment=comment, - update=update, - ) - click.echo(msg) - - -@oaiharvester.command("info") -@with_appcontext -def info(): - """List infos for tasks.""" - oais = OAIHarvestConfig.query.all() - for oai in oais: - click.echo(oai.name) - click.echo("\tlastrun : ", nl=False) - click.echo(oai.lastrun) - click.echo("\tbaseurl : " + oai.baseurl) - click.echo("\tmetadataprefix: " + oai.metadataprefix) - click.echo("\tcomment : " + oai.comment) - click.echo("\tsetspecs : " + oai.setspecs) diff --git a/rero_ils/modules/ebooks/dojson/contrib/__init__.py b/rero_ils/modules/ebooks/dojson/contrib/__init__.py deleted file mode 100644 index 28b47606ef..0000000000 --- a/rero_ils/modules/ebooks/dojson/contrib/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# -*- coding: utf-8 -*- -# -# RERO ILS -# Copyright (C) 2019-2022 RERO -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -"""DOJSON contrib for rero-ils.""" diff --git a/rero_ils/modules/ebooks/dojson/contrib/marc21/model.py b/rero_ils/modules/ebooks/dojson/contrib/marc21/model.py deleted file mode 100644 index 73c4144dc7..0000000000 --- a/rero_ils/modules/ebooks/dojson/contrib/marc21/model.py +++ /dev/null @@ -1,511 +0,0 @@ -# -*- coding: utf-8 -*- -# -# RERO ILS -# Copyright (C) 2019-2022 RERO -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -"""rero-ils MARC21 model definition.""" - - -import contextlib -import re - -from dojson import utils -from isbnlib import EAN13 - -from rero_ils.dojson.utils import ( - ReroIlsMarc21Overdo, - TitlePartList, - add_note, - extract_subtitle_and_parallel_titles_from_field_245_b, - get_field_items, - get_field_link_data, - make_year, - remove_trailing_punctuation, -) -from rero_ils.modules.documents.dojson.contrib.marc21tojson.utils import do_language -from rero_ils.modules.documents.models import DocumentFictionType -from rero_ils.modules.documents.utils import create_authorized_access_point -from rero_ils.modules.entities.models import EntityType - -marc21 = ReroIlsMarc21Overdo() - - -@marc21.over("issuance", "leader") -@utils.ignore_value -def marc21_to_issuance(self, key, value): - """Set the mode of issuance.""" - self["issuance"] = dict(main_type="rdami:1001", subtype="materialUnit") - if marc21.admin_meta_data: - self["adminMetadata"] = marc21.admin_meta_data - self["fiction_statement"] = DocumentFictionType.Unspecified.value - - -@marc21.over("language", "^008") -@utils.ignore_value -def marc21_to_language_from_008(self, key, value): - """Get languages. - - languages: 008 and 041 [$a, repetitive] - """ - return do_language(self, marc21) - - -@marc21.over("language", "^041") -@utils.ignore_value -def marc21_to_language_from_041(self, key, value): - """Get languages. - - languages: 008 and 041 [$a, repetitive] - """ - # if we dont have languages from 008 try to set it with 041 - return do_language(self, marc21) - - -@marc21.over("identifiedBy", "^020..") -@utils.ignore_value -def marc21_to_identifier_isbn(self, key, value): - """Get identifier isbn. - - identifiers_isbn: 020 $a - """ - if isbn13 := EAN13(value.get("a")): - identifiers = self.get("identifiedBy", []) - identifier = {"type": "bf:Isbn", "value": isbn13} - identifiers.append(identifier) - return identifiers - return None - - -@marc21.over("type", "^0248.$") -def marc21_to_type(self, key, value): - """Get document type.""" - if value.get("a").find("cantook") > -1: - return [{"main_type": "docmaintype_book", "subtype": "docsubtype_e-book"}] - return None - - -@marc21.over("identifiedBy", "^035..") -@utils.ignore_value -def marc21_to_identifier_rero_id(self, key, value): - """Get identifier reroId. - - identifiers:reroID: 035$a - """ - identifiers = self.get("identifiedBy", []) - identifier = {"type": "bf:Local", "value": value.get("a")} - identifiers.append(identifier) - return identifiers - - -@marc21.over("contribution", "(^100|^700|^710|^711)..") -@utils.for_each_value -@utils.ignore_value -def marc21_to_contribution(self, key, value): - """Get contribution.""" - if key[4] == "2" or key[:3] not in ["100", "700", "710", "711"]: - return None - agent_data = {"type": "bf:Person"} - if value.get("a"): - name = utils.force_list(value.get("a"))[0] - agent_data["preferred_name"] = remove_trailing_punctuation(name) - - # 100|700 Person - if key[:3] in ["100", "700"]: - if value.get("b"): - numeration = utils.force_list(value.get("b"))[0] - agent_data["numeration"] = remove_trailing_punctuation(numeration) - if value.get("c"): - qualifier = utils.force_list(value.get("c"))[0] - agent_data["qualifier"] = remove_trailing_punctuation(qualifier) - if value.get("d"): - date = utils.force_list(value.get("d"))[0] - date = date.rstrip(",") - dates = remove_trailing_punctuation(date).split("-") - with contextlib.suppress(Exception): - if date_of_birth := dates[0].strip(): - agent_data["date_of_birth"] = date_of_birth - with contextlib.suppress(Exception): - if date_of_death := dates[1].strip(): - agent_data["date_of_death"] = date_of_death - if value.get("q"): - fuller_form_of_name = utils.force_list(value.get("q"))[0] - agent_data["fuller_form_of_name"] = ( - remove_trailing_punctuation(fuller_form_of_name).lstrip("(").rstrip(")") - ) - - elif key[:3] in ["710", "711"]: - agent_data["type"] = "bf:Organisation" - agent_data["conference"] = key[:3] == "711" - if value.get("e"): - subordinate_units = [ - subordinate_unit.rstrip(".") - for subordinate_unit in utils.force_list(value.get("e")) - ] - - agent_data["subordinate_unit"] = subordinate_units - if value.get("n"): - numbering = utils.force_list(value.get("n"))[0] - agent_data["numbering"] = ( - remove_trailing_punctuation(numbering).lstrip("(").rstrip(")") - ) - if value.get("d"): - conference_date = utils.force_list(value.get("d"))[0] - if ( - conference_date := remove_trailing_punctuation(conference_date) - .lstrip("(") - .rstrip(")") - ): - agent_data["conference_date"] = conference_date - if value.get("c"): - place = utils.force_list(value.get("c"))[0] - if place := remove_trailing_punctuation(place).lstrip("(").rstrip(")"): - agent_data["place"] = place - agent = { - "type": agent_data["type"], - "authorized_access_point": create_authorized_access_point(agent_data), - } - if agent_data.get("identifiedBy"): - agent["identifiedBy"] = agent_data["identifiedBy"] - roles = ["aut"] - if value.get("4"): - roles = list(utils.force_list(value.get("4"))) - elif key[:3] == "100": - roles = ["cre"] - elif key[:3] == "711": - roles = ["aut"] - else: - roles = ["ctb"] - return {"entity": agent, "role": roles} - - -@marc21.over("title", "^245..") -@utils.ignore_value -def marc21_to_title(self, key, value): - """Get title data. - - field 245: - $a : non repetitive - $b : non repetitive - $c : non repetitive - $n : repetitive - $p : repetitive - $6 : non repetitive - field 246: - $a : non repetitive - $n : repetitive - $p : repetitive - $6 : non repetitive - """ - subfield_245_a = "" - subfield_245_b = "" - if fields_245 := marc21.get_fields("245"): - subfields_245_a = marc21.get_subfields(fields_245[0], "a") - subfields_245_b = marc21.get_subfields(fields_245[0], "b") - if subfields_245_a: - subfield_245_a = subfields_245_a[0] - if subfields_245_b: - subfield_245_b = subfields_245_b[0] - field_245_a_end_with_equal = re.search(r"\s*=\s*$", subfield_245_a) - field_245_a_end_with_colon = re.search(r"\s*:\s*$", subfield_245_a) - field_245_a_end_with_semicolon = re.search(r"\s*;\s*$", subfield_245_a) - field_245_b_contains_equal = re.search(r"=", subfield_245_b) - - fields_246 = marc21.get_fields("246") - subfield_246_a = "" - if fields_246: - if subfields_246_a := marc21.get_subfields(fields_246[0], "a"): - subfield_246_a = subfields_246_a[0] - - tag_link, link = get_field_link_data(value) - items = get_field_items(value) - index = 1 - title_list = [] - title_data = {} - part_list = TitlePartList(part_number_code="n", part_name_code="p") - parallel_titles = [] - pararalel_title_data_list = [] - pararalel_title_string_set = set() - responsibility = {} - - subfield_selection = {"a", "b", "c", "n", "p"} - for blob_key, blob_value in items: - if blob_key in subfield_selection: - value_data = marc21.build_value_with_alternate_graphic( - "245", blob_key, blob_value, index, link, ",.", ":;/-=" - ) - if blob_key in {"a", "b", "c"}: - subfield_selection.remove(blob_key) - if blob_key == "a": - title_data["mainTitle"] = value_data - elif blob_key == "b": - if subfield_246_a: - subtitle, parallel_titles, pararalel_title_string_set = ( - extract_subtitle_and_parallel_titles_from_field_245_b( - value_data, field_245_a_end_with_equal - ) - ) - if subtitle: - title_data["subtitle"] = subtitle - elif value_data: - title_data["subtitle"] = value_data - elif blob_key == "c": - responsibility = marc21.build_responsibility_data(value_data) - elif blob_key in ["n", "p"]: - part_list.update_part(value_data, blob_key, blob_value) - if blob_key != "__order__": - index += 1 - title_data["type"] = "bf:Title" - if the_part_list := part_list.get_part_list(): - title_data["part"] = the_part_list - if title_data: - title_list.append(title_data) - variant_title_list = marc21.build_variant_title_data(pararalel_title_string_set) - - title_list.extend(iter(parallel_titles)) - title_list.extend(iter(variant_title_list)) - if responsibility: - self["responsibilityStatement"] = responsibility - return title_list or None - - -@marc21.over("editionStatement", "^250..") -@utils.for_each_value -@utils.ignore_value -def marc21_to_edition_statement(self, key, value): - """Get edition statement data. - - editionDesignation: 250 [$a non repetitive] (without trailing ponctuation) - responsibility: 250 [$b non repetitive] - """ - edition_data = {} - if subfields_a := utils.force_list(value.get("a")): - subfield_a = remove_trailing_punctuation(subfields_a[0]) - edition_data["editionDesignation"] = [{"value": subfield_a}] - if subfields_b := utils.force_list(value.get("b")): - subfields_b = subfields_b[0] - edition_data["responsibility"] = [{"value": subfields_b}] - return edition_data or None - - -@marc21.over("copyrightDate", "^264.4") -@utils.ignore_value -def marc21_to_copyright_date(self, key, value): - """Get Copyright Date.""" - copyright_dates = self.get("copyrightDate", []) - copyright_date = value.get("c") - if copyright_date: - if match := re.search(r"^([©℗])+\s*(\d{4}.*)", copyright_date): - copyright_date = " ".join((match.group(1), match.group(2))) - else: - raise ValueError("Bad format of copyright date") - copyright_dates.append(copyright_date) - return copyright_dates or None - - -@marc21.over("provisionActivity", "^(260..|264.[_0-3])") -@utils.for_each_value -@utils.ignore_value -def marc21_to_provision_activity(self, key, value): - """Get publisher data. - - publisher.name: 264 [$b repetitive] - publisher.place: 264 [$a repetitive] - publicationDate: 264 [$c repetitive] (but take only the first one) - """ - - def build_statement(field_value, ind2): - - def build_place_or_agent_data(code, label): - type_per_code = {"a": EntityType.PLACE, "b": EntityType.AGENT} - return ( - {"type": type_per_code[code], "label": [{"value": value}]} - if (value := remove_trailing_punctuation(label)) - else None - ) - - # function build_statement start here - statement = [] - items = get_field_items(field_value) - for blob_key, blob_value in items: - if blob_key in ("a", "b"): - place_or_agent_data = build_place_or_agent_data(blob_key, blob_value) - if place_or_agent_data: - statement.append(place_or_agent_data) - return statement or None - - def build_place(marc21): - place = {} - if marc21.country: - place["country"] = marc21.country - if place: - place["type"] = EntityType.PLACE - return place - - # the function marc21_to_provision_activity start here - ind2 = key[4] - type_per_ind2 = { - " ": "bf:Publication", - "_": "bf:Publication", - "0": "bf:Production", - "1": "bf:Publication", - "2": "bf:Distribution", - "3": "bf:Manufacture", - } - if key[:3] == "260": - ind2 = "1" # to force type to bf:Publication for field 260 - publication = { - "type": type_per_ind2[ind2], - "statement": [], - } - - publication["statement"] = build_statement(value, ind2) - - subfields_c = utils.force_list(value.get("c")) - if subfields_c: - subfield_c = subfields_c[0] - publication["statement"].append( - {"label": [{"value": subfield_c}], "type": "Date"} - ) - if ind2 in (" ", "1"): - dates = subfield_c.replace("[", "").replace("]", "").split("-") - try: - start_date = make_year(dates[0]) - if start_date: - publication["startDate"] = start_date - except Exception: - pass - try: - end_date = make_year(dates[1]) - if end_date: - publication["endDate"] = end_date - except Exception: - pass - place = build_place(marc21) - if place and place.get("country") != "xx": - publication["place"] = [place] - - return publication or None - - -@marc21.over("extent", "^300..") -@utils.ignore_value -def marc21_to_description(self, key, value): - """Get extent. - - extent: 300$a (the first one if many) - """ - if value.get("a") and not self.get("extent", None): - self["extent"] = remove_trailing_punctuation( - utils.force_list(value.get("a"))[0] - ) - return None - - -@marc21.over("note", "^500..") -@utils.for_each_value -@utils.ignore_value -def marc21_to_notes(self, key, value): - """Get notes. - - note: [500$a repetitive] - """ - add_note(dict(noteType="general", label=value.get("a", "")), self) - - return None - - -@marc21.over("summary", "^520..") -@utils.for_each_value -@utils.ignore_value -def marc21_to_summary(self, key, value): - """Get summary from repetitive field 520.""" - key_per_code = {"a": "label", "c": "source"} - # parse field 520 subfields for extracting: - # summary and source parts - tag_link, link = get_field_link_data(value) - items = get_field_items(value) - index = 1 - summary = {} - subfield_selection = {"a", "c"} - for blob_key, blob_value in items: - if blob_key in subfield_selection: - subfield_selection.remove(blob_key) - if blob_key == "a": - summary_data = marc21.build_value_with_alternate_graphic( - "520", blob_key, blob_value, index, link, ",.", ":;/-=" - ) - else: - summary_data = blob_value - if summary_data: - summary[key_per_code[blob_key]] = summary_data - if blob_key != "__order__": - index += 1 - return summary or None - - -@marc21.over("subjects", "^6....") -@utils.for_each_value -@utils.ignore_value -@utils.ignore_value -def marc21_to_subjects(self, key, value): - """Get subjects. - - subjects: 6xx [duplicates could exist between several vocabularies, - if possible deduplicate] - """ - seen = {} - for subject in utils.force_list(value.get("a")): - subject = {"type": EntityType.TOPIC, "authorized_access_point": subject} - str_subject = str(subject) - if str_subject not in seen: - seen[str_subject] = 1 - self.setdefault("subjects", []).append(dict(entity=subject)) - return None - - -@marc21.over("electronicLocator", "^8564.") -@utils.for_each_value -@utils.ignore_value -def marc21_electronicLocator(self, key, value): - """Get electronic locator.""" - indicator2 = key[4] - electronic_locator = {} - url = utils.force_list(value.get("u"))[0].strip() - subfield_3 = value.get("3") # materials_specified - if subfield_3: - subfield_3 = utils.force_list(subfield_3)[0] - if indicator2 == "2": - if subfield_3 and subfield_3 == "Image de couverture": - electronic_locator = { - "url": url, - "type": "relatedResource", - "content": "coverImage", - } - elif indicator2 == "0": - if subfield_x := value.get("x"): # nonpublic_note - electronic_locator = { - "url": url, - "type": "resource", - "source": utils.force_list(subfield_x)[0], - } - if subfield_q := value.get("q"): # electronic_format_type - if subfield_q == "audio": - self["type"] = [ - { - "main_type": "docmaintype_audio", - "subtype": "docsubtype_audio_book", - } - ] - return electronic_locator or None diff --git a/rero_ils/modules/ebooks/receivers.py b/rero_ils/modules/ebooks/receivers.py deleted file mode 100644 index b8fc9442ba..0000000000 --- a/rero_ils/modules/ebooks/receivers.py +++ /dev/null @@ -1,63 +0,0 @@ -# -*- coding: utf-8 -*- -# -# RERO ILS -# Copyright (C) 2019-2022 RERO -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -"""Signals connections for ebooks document.""" - -from dojson.contrib.marc21.utils import create_record -from flask import current_app - -from ..utils import set_timestamp -from .dojson.contrib.marc21 import marc21 -from .tasks import create_records, delete_records - - -def publish_harvested_records( - sender=None, records=None, max_results=None, *args, **kwargs -): - """Create, index the harvested records.""" - # name = kwargs['name'] - records = records or [] - if max_results: - records = list(records)[: int(max_results)] - converted_records = [] - deleted_records = [] - for record in records: - rec = create_record(record.xml) - rec = marc21.do(rec) - rec.setdefault("harvested", True) - - identifiers = rec.get("identifiedBy", []) - identifiers.append( - {"type": "bf:Local", "source": "cantook", "value": record.header.identifier} - ) - rec["identifiedBy"] = identifiers - if record.deleted: - deleted_records.append(rec) - else: - converted_records.append(rec) - if converted_records: - current_app.logger.info( - f"publish_harvester: received {len(converted_records)} " "records to create" - ) - create_records(converted_records) - if deleted_records: - current_app.logger.info( - f"publish_harvester: received {len(deleted_records)} " "records to delete" - ) - delete_records(deleted_records) - msg = f"deleted: {len(deleted_records)}, created: {len(converted_records)}" - set_timestamp("ebooks-harvester", msg=msg) diff --git a/rero_ils/modules/ebooks/tasks.py b/rero_ils/modules/ebooks/tasks.py deleted file mode 100644 index ed62abe807..0000000000 --- a/rero_ils/modules/ebooks/tasks.py +++ /dev/null @@ -1,107 +0,0 @@ -# -*- coding: utf-8 -*- -# -# RERO ILS -# Copyright (C) 2019-2022 RERO -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -"""Celery tasks to create records.""" - -from __future__ import absolute_import, print_function - -from celery import shared_task -from flask import current_app - -from ..documents.api import Document, DocumentsSearch -from ..utils import do_bulk_index, get_schema_for_resource, set_timestamp -from .utils import create_document_holding, update_document_holding - - -@shared_task(ignore_result=True) -def create_records(records): - """Records creation and indexing.""" - n_updated = 0 - n_created = 0 - uuids = [] - for record in records: - # add document type - if "type" not in record: - record["type"] = [ - {"main_type": "docmaintype_book", "subtype": "docsubtype_e-book"} - ] - # check if already harvested - pid = None - for identifier in record.get("identifiedBy"): - if identifier.get("source") == "cantook": - harvested_id = identifier.get("value") - query = ( - DocumentsSearch() - .filter("term", identifiedBy__value__raw=harvested_id) - .source(includes=["pid"]) - ) - try: - pid = next(query.scan()).pid - except StopIteration: - pid = None - try: - # add documents schema - pid_type = Document.provider.pid_type - record["$schema"] = get_schema_for_resource(pid_type) - if pid: - # update the record - record["pid"] = pid - existing_record = update_document_holding(record, pid) - n_updated += 1 - uuids.append(existing_record.id) - elif new_record := create_document_holding(record): - n_created += 1 - uuids.append(new_record.id) - except Exception as err: - current_app.logger.error(f"EBOOKS CREATE RECORDS: {err} {record}") - do_bulk_index(uuids, doc_type="doc", process=True) - - current_app.logger.info(f"create_records: {n_updated} updated, {n_created} new") - set_timestamp("ebooks_create_records", created=n_created, updated=n_updated) - return n_created, n_updated - - -@shared_task(ignore_result=True) -def delete_records(records): - """Records deleting.""" - count = 0 - for record in records: - # check if exist - pid = None - for identifier in record.get("identifiedBy"): - if identifier.get("source") == "cantook": - harvested_id = identifier.get("value") - query = ( - DocumentsSearch() - .filter("term", identifiedBy__value__raw=harvested_id) - .source(includes=["pid"]) - ) - try: - pid = [r.pid for r in query.scan()].pop() - except IndexError: - pid = None - try: - if pid: - # update the record - existing_record = Document.get_record_by_pid(pid) - # TODO: delete record and linked references - count += 1 - except Exception as err: - current_app.logger.error(f"EBOOKS DELETE RECORDS: {err} {record}") - current_app.logger.info(f"delete_records: {count}") - set_timestamp("ebooks_delete_records", deleted=count) - return count diff --git a/rero_ils/modules/ebooks/utils.py b/rero_ils/modules/ebooks/utils.py deleted file mode 100644 index e64a7c13aa..0000000000 --- a/rero_ils/modules/ebooks/utils.py +++ /dev/null @@ -1,192 +0,0 @@ -# -*- coding: utf-8 -*- -# -# RERO ILS -# Copyright (C) 2019-2022 RERO -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -"""Utilities.""" - -from flask import current_app -from invenio_db import db -from invenio_oaiharvester.models import OAIHarvestConfig - -from rero_ils.modules.locations.api import Location - -from ..documents.api import Document -from ..holdings.api import ( - Holding, - HoldingsSearch, - create_holding, - get_holding_pid_by_doc_location_item_type, -) -from ..organisations.api import Organisation - - -def add_oai_source( - name, baseurl, metadataprefix="marc21", setspecs="", comment="", update=False -): - """Add OAIHarvestConfig.""" - with current_app.app_context(): - source = OAIHarvestConfig.query.filter_by(name=name).first() - if not source: - source = OAIHarvestConfig( - name=name, - baseurl=baseurl, - metadataprefix=metadataprefix, - setspecs=setspecs, - comment=comment, - ) - source.save() - db.session.commit() - return "Added" - elif update: - source.name = name - source.baseurl = baseurl - source.metadataprefix = metadataprefix - if setspecs != "": - source.setspecs = setspecs - if comment != "": - source.comment = comment - db.session.commit() - return "Updated" - return "Not Updated" - - -def get_harvested_sources(record): - """Get the harvested sources from electronicLocator.""" - harvested_sources = [] - new_electronic_locators = [] - electronic_locators = record.get("electronicLocator", []) - for electronic_locator in electronic_locators: - if source := electronic_locator.get("source"): - harvested_sources.append( - {"source": source, "uri": electronic_locator.get("url")} - ) - else: - new_electronic_locators.append(electronic_locator) - if new_electronic_locators: - record["electronicLocator"] = new_electronic_locators - return harvested_sources - - -def create_document_holding(record): - """Create a document and a holding for a harvested ebook.""" - harvested_sources = get_harvested_sources(record) - new_record = None - holdings = [] - for harvested_source in harvested_sources: - if org := Organisation.get_record_by_online_harvested_source( - source=harvested_source["source"] - ): - if not new_record: - new_record = Document.create(data=record, dbcommit=False, reindex=False) - if new_record: - item_type_pid = org.online_circulation_category() - location_pids = org.get_online_locations() - for location_pid in location_pids: - location = Location.get_record_by_pid(location_pid) - library = location.get_library() - if url := library.get_online_harvested_source_url( - source=harvested_source["source"] - ): - uri_split = harvested_source["uri"].split("/")[3:] - uri_split.insert(0, url.rstrip("/")) - harvested_source["uri"] = "/".join(uri_split) - hold = create_holding( - document_pid=new_record.pid, - location_pid=location_pid, - item_type_pid=item_type_pid, - electronic_location=harvested_source, - holdings_type="electronic", - ) - holdings.append(hold) - else: - current_app.logger.warning( - f"create document holding no org: {harvested_source['source']}" - ) - db.session.commit() - for hold in holdings: - hold.reindex() - # the document has been reindexed by the holdings - if not holdings and new_record: - new_record.reindex() - return new_record - - -def update_document_holding(record, pid): - """Update a document and a holding for a harvested ebook.""" - harvested_sources = get_harvested_sources(record) - new_record = None - existing_record = Document.get_record_by_pid(pid) - new_record = existing_record.replace(data=record, dbcommit=False, reindex=False) - # Save all source uris to find holdings we can delete later - source_uris = [] - holdings = [] - for harvested_source in harvested_sources: - if org := Organisation.get_record_by_online_harvested_source( - source=harvested_source["source"] - ): - # add the organisation source uri - source_uris.append(harvested_source["uri"]) - item_type_pid = org.online_circulation_category() - for location_pid in org.get_online_locations(): - location = Location.get_record_by_pid(location_pid) - library = location.get_library() - # replace "https://some.uri" from ebooks with library uri - if url := library.get_online_harvested_source_url( - source=harvested_source["source"] - ): - uri_split = harvested_source["uri"].split("/")[3:] - uri_split.insert(0, url.rstrip("/")) - new_uri = "/".join(uri_split) - harvested_source["uri"] = new_uri - # add the library source uri - source_uris.append(new_uri) - if not get_holding_pid_by_doc_location_item_type( - new_record.pid, location_pid, item_type_pid, "electronic" - ): - hold = create_holding( - document_pid=new_record.pid, - location_pid=location_pid, - item_type_pid=item_type_pid, - electronic_location=harvested_source, - holdings_type="electronic", - ) - holdings.append(hold) - db.session.commit() - for hold in holdings: - hold.reindex() - # the document has been reindexed by the holdings - if not holdings and new_record: - new_record.reindex() - HoldingsSearch.flush_and_refresh() - # delete all double holdings and holdings without valid source uri - seen_uris = [] - for holding_pid in Holding.get_holdings_pid_by_document_pid(pid): - holding = Holding.get_record_by_pid(holding_pid) - to_delete = True - for electronic_location in holding.get("electronic_location", []): - uri = electronic_location.get("uri") - if electronic_location.get("source") and uri not in seen_uris: - seen_uris.append(uri) - if uri in source_uris: - to_delete = False - if to_delete: - current_app.logger.info( - "Delete harvested holding | " - f"document: {pid} " - f'holding: {holding.pid} {holding.get("electronic_location")}' - ) - holding.delete(force=False, dbcommit=True, delindex=True) - return new_record diff --git a/rero_ils/modules/entities/api.py b/rero_ils/modules/entities/api.py index 74d0a60ce6..6c29ceab66 100644 --- a/rero_ils/modules/entities/api.py +++ b/rero_ils/modules/entities/api.py @@ -63,7 +63,7 @@ def get_authorized_access_point(self, language): :param language: language for authorized access point. :returns: authorized access point in given language. """ - raise NotImplementedError + raise NotImplementedError() @abstractmethod def get_links_to_me(self, get_pids=False): @@ -90,7 +90,7 @@ def reasons_not_to_delete(self): @abstractmethod def resource_type(self): """Get the entity type.""" - raise NotImplementedError + raise NotImplementedError() @property def organisation_pids(self): diff --git a/rero_ils/modules/ext.py b/rero_ils/modules/ext.py index 92dccb3e45..595e670bc9 100644 --- a/rero_ils/modules/ext.py +++ b/rero_ils/modules/ext.py @@ -31,7 +31,6 @@ from invenio_base.utils import obj_or_import_string from invenio_circulation.signals import loan_state_changed from invenio_indexer.signals import before_record_index -from invenio_oaiharvester.signals import oaiharvest_finished from invenio_records.signals import ( after_record_insert, after_record_update, @@ -64,7 +63,6 @@ from rero_ils.modules.acquisition.acq_receipts.listener import enrich_acq_receipt_data from rero_ils.modules.acquisition.budgets.listener import budget_is_active_changed from rero_ils.modules.collections.listener import enrich_collection_data -from rero_ils.modules.ebooks.receivers import publish_harvested_records from rero_ils.modules.holdings.listener import ( enrich_holding_data, update_items_locations_and_types, @@ -346,8 +344,6 @@ def register_signals(self, app): loan_state_changed.connect(listener_loan_state_changed, weak=False) - oaiharvest_finished.connect(publish_harvested_records, weak=False) - # store the username in the session user_logged_in.connect(set_user_name) user_logged_out.connect(remove_user_name) diff --git a/rero_ils/modules/files/operations.py b/rero_ils/modules/files/operations.py index 52f538a7a6..bfdcc33f43 100644 --- a/rero_ils/modules/files/operations.py +++ b/rero_ils/modules/files/operations.py @@ -45,7 +45,7 @@ def on_post_commit(self, uow): :param uow: obj - UnitOfWork instance. """ - raise NotImplementedError + raise NotImplementedError() class ReindexDoc(ReindexOperationBase): diff --git a/rero_ils/modules/organisations/api.py b/rero_ils/modules/organisations/api.py index ff3d433d56..f915b85ae3 100644 --- a/rero_ils/modules/organisations/api.py +++ b/rero_ils/modules/organisations/api.py @@ -106,13 +106,20 @@ def get_record_by_online_harvested_source(cls, source): :param source: the record source :return: Organisation record or None. """ - results = ( - OrganisationsSearch().filter("term", online_harvested_source=source).scan() - ) - try: - return Organisation.get_record_by_pid(next(results).pid) - except StopIteration: - return None + for org in cls.get_records_by_online_harvested_source(source): + return org + + @classmethod + def get_records_by_online_harvested_source(cls, source): + """Get record by online harvested source. + + :param source: the record source + :return: Organisation record or None. + """ + query = OrganisationsSearch().filter("term", online_harvested_source=source) + org_pids = [hit.pid for hit in query.source("pid").scan()] + for org_pid in org_pids: + yield Organisation.get_record_by_pid(org_pid) @property def organisation_pid(self): diff --git a/rero_ils/modules/stats/api/indicators/base.py b/rero_ils/modules/stats/api/indicators/base.py index 9f5669b9d2..d230b4c9a1 100644 --- a/rero_ils/modules/stats/api/indicators/base.py +++ b/rero_ils/modules/stats/api/indicators/base.py @@ -39,7 +39,7 @@ def query(self): :returns: an elasticsearch query object """ - raise NotImplementedError + raise NotImplementedError() @property @abstractmethod @@ -49,7 +49,7 @@ def aggregation(self, distribution): :param distrubtion: str - report distrubtion name :returns: an elasticsearch aggregation object """ - raise NotImplementedError + raise NotImplementedError() @abstractmethod def label(self, distribution, bucket): @@ -60,4 +60,4 @@ def label(self, distribution, bucket): :returns: the label :rtype: str """ - raise NotImplementedError + raise NotImplementedError() diff --git a/rero_ils/schedulers.py b/rero_ils/schedulers.py index 2cedb92244..657a846a24 100644 --- a/rero_ils/schedulers.py +++ b/rero_ils/schedulers.py @@ -265,7 +265,7 @@ def info(): click.echo("\n".join(current_scheduler.display_all())) -@scheduler.command("init") +@scheduler.command("") @click.option("-r", "--reset", "reset", is_flag=True, default=False) @click.option("-v", "--verbose", "verbose", is_flag=True, default=False) @with_appcontext @@ -285,7 +285,7 @@ def init(reset, verbose): click.echo("\n".join(current_scheduler.display_all())) -@scheduler.command("enable_tasks") +@scheduler.command("") @click.option("-a", "--all", "all", is_flag=True, default=False) @click.option("-n", "--name", "names", multiple=True, default=None) @click.option("-d", "--disable", "disable", is_flag=True, default=False) diff --git a/scripts/setup b/scripts/setup index 379189aee4..4b22df77d8 100755 --- a/scripts/setup +++ b/scripts/setup @@ -598,23 +598,14 @@ create_token organisation_aosta_token reroilstest@gmail.com ${INVENIO_RERO_ACCES create_token organisation_scotland_token reroilstest+irma@gmail.com ${INVENIO_RERO_ACCESS_TOKEN_SCOLAND_LIBRARIAN} create_token organisation_fictive_token reroilstest+imagination@gmail.com ${INVENIO_RERO_ACCESS_TOKEN_FICTIVE_LIBRARIAN} -# OAI configuration -info_msg "OAI configuration: ${DATA_PATH}/oaisources.yml" -eval ${PREFIX} invenio reroils oaiharvester initconfig ${DATA_PATH}/oaisources.yml +# # OAI configuration +info_msg "API configuration: ${DATA_PATH}/apisources.yml" +eval ${PREFIX} invenio reroils api-harvester init-config ${DATA_PATH}/apisources.yml -eval ${PREFIX} invenio reroils scheduler enable_tasks -a -v -# disable ebook harvesting -eval ${PREFIX} invenio reroils scheduler enable_tasks -n ebooks-harvester -d - -if ${DEPLOYMENT} -then - # start oai harvesting asynchrone: beats must be running - info_msg "Start OAI harvesting asynchrone" - eval ${PREFIX} invenio reroils oaiharvester harvest -n ebooks -a max_results=150 -q -k -else - info_msg "For ebooks harvesting run:" - msg "\tinvenio reroils oaiharvester harvest -n ebooks -a max_results=100 -q" -fi +eval ${PREFIX} invenio reroils scheduler enable-tasks -a -v +info_msg disable VS/NJ CANTOOK harvesting +eval ${PREFIX} invenio reroils scheduler enable-tasks -n harvest-vs-cantook -d +eval ${PREFIX} invenio reroils scheduler enable-tasks -n harvest-nj-cantook -d if ${ES_MAPPING} then @@ -650,6 +641,19 @@ info_msg "Initialize wiki search" eval ${PREFIX} invenio flask_wiki init-index eval ${PREFIX} invenio flask_wiki index +info_msg "Api harvesting ...:" +for NAME in "VS-CANTOOK" "NJ-CANTOOK" +do + eval ${PREFIX} invenio reroils api-harvester set-last-run ${NAME} -d 1900-01-01 + if ${DEPLOYMENT} + then + # start api harvesting asynchrone: beats must be running + eval ${PREFIX} invenio reroils api-harvester harvest -n ${NAME} -m 100 -k + else + info_msg "For ${NAME} harvesting run: \"invenio reroils api-harvester harvest -n ${NAME} -m 100 -v\"" + fi +done + date success_msg "Perfect ${PROGRAM}! See you soon…" exit 0 diff --git a/setup.py b/setup.py index 0137a76b43..2ae9534aa5 100644 --- a/setup.py +++ b/setup.py @@ -147,16 +147,14 @@ def run(self): 'marc21tojson_loc = rero_ils.modules.documents.dojson.contrib.marc21tojson:marc21_loc', 'marc21tojson_slsp = rero_ils.modules.documents.dojson.contrib.marc21tojson:marc21_slsp', 'marc21tojson_ugent = rero_ils.modules.documents.dojson.contrib.marc21tojson:marc21_ugent', - 'marc21toebooks = rero_ils.modules.ebooks.dojson.contrib.marc21:marc21', 'unimarctojson = rero_ils.modules.documents.dojson.contrib.unimarctojson:unimarc', ], 'flask.commands': [ - 'apiharvester = rero_ils.modules.apiharvester.cli:apiharvester', + 'api_harvester = rero_ils.modules.api_harvester.cli:api_harvester', 'remote_entity = rero_ils.modules.entities.remote_entities.cli:remote_entity', 'migration = rero_ils.modules.migration.cli:migration', 'monitoring = rero_ils.modules.monitoring.cli:monitoring', 'notifications = rero_ils.modules.notifications.cli:notifications', - 'oaiharvester = rero_ils.modules.ebooks.cli:oaiharvester', 'reroils = rero_ils.modules.cli.reroils:reroils', 'scheduler = rero_ils.schedulers:scheduler', 'stats = rero_ils.modules.stats.cli:stats', @@ -171,7 +169,7 @@ def run(self): 'acq_orders = rero_ils.modules.acquisition.acq_orders.models', 'acq_receipts = rero_ils.modules.acquisition.acq_receipts.models', 'acq_receipt_lines = rero_ils.modules.acquisition.acq_receipt_lines.models', - 'apiharvester = rero_ils.modules.apiharvester.models', + 'api_harvester = rero_ils.modules.api_harvester.models', 'budgets = rero_ils.modules.acquisition.budgets.models', 'circ_policies = rero_ils.modules.circ_policies.models', 'collections = rero_ils.modules.collections.models', @@ -327,10 +325,9 @@ def run(self): 'operation_logs = rero_ils.modules.operation_logs.es_templates:list_es_templates', ], 'invenio_celery.tasks': [ - 'apiharvester = rero_ils.modules.apiharvester.tasks', + 'api_harvester = rero_ils.modules.api_harvester.tasks', 'collections = rero_ils.modules.collections.tasks', 'documents = rero_ils.modules.documents.tasks', - 'ebooks = rero_ils.modules.ebooks.tasks', 'holdings = rero_ils.modules.holdings.tasks', 'items = rero_ils.modules.items.tasks', 'loans = rero_ils.modules.loans.tasks', diff --git a/tests/api_harvester/cantook/test_cantook_dojson.py b/tests/api_harvester/cantook/test_cantook_dojson.py new file mode 100644 index 0000000000..1adf694dd5 --- /dev/null +++ b/tests/api_harvester/cantook/test_cantook_dojson.py @@ -0,0 +1,496 @@ +# -*- coding: utf-8 -*- +# +# RERO ILS +# Copyright (C) 2019 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Test api harvester cantook dojson.""" + +import json +from os.path import dirname, join + +from rero_ils.modules.api_harvester.cantook.dojson.json import Transformation + + +def clean_dict(data, keys=("$schema", "adminMetadata", "fiction_statement")): + """ + Clean keys from dictionary. + + :param keys: keys to clean from dictionary. + :param data: dictionary to clean. + """ + for key in keys: + data.pop(key, None) + return data + + +def test_trans_constants(app): + """Test transformation constants.""" + + data = {} + transformation = Transformation( + data=data, logger=None, verbose=False, transform=False + ) + transformation.trans_constants() + assert transformation.json == { + "$schema": "https://bib.rero.ch/schemas/documents/document-v0.0.1.json", + "adminMetadata": {"encodingLevel": "Not applicable"}, + "harvested": True, + "issuance": {"main_type": "rdami:1001", "subtype": "materialUnit"}, + "type": [{"main_type": "docmaintype_book", "subtype": "docsubtype_e-book"}], + } + + +def test_trans_pid(app): + """Test transformation pid.""" + data = {"id": "immateriel.frO1097420"} + transformation = Transformation( + data=data, logger=None, verbose=False, transform=False + ) + transformation.trans_pid() + assert transformation.json == {"pid": "cantook-immateriel.frO1097420"} + + +def test_trans_identified_by(app): + """Test transformation IdentifiedBy.""" + data = { + "id": "immateriel.frO1097420", + "media": [ + { + "nature": "epub", + "key_type": "isbn13", + "id": "immateriel.frO1097420-9782354089597-epub", + "key": "9782354089597", + "issued_on": "2024-12-04T06:00:00+01:00", + "current_holds": 0, + }, + { + "nature": "paper", + "key_type": "isbn13", + "id": "immateriel.frO1097420-9782354089412-paper", + "key": "9782354089412", + "issued_on": "2024-12-04T06:00:00+01:00", + "current_holds": 0, + }, + ], + } + transformation = Transformation( + data=data, logger=None, verbose=False, transform=False + ) + transformation.trans_identified_by() + assert transformation.json == { + "identifiedBy": [ + { + "source": "CANTOOK", + "type": "bf:Local", + "value": "cantook-immateriel.frO1097420", + }, + {"note": "epub", "type": "bf:Isbn", "value": "9782354089597"}, + {"note": "paper", "type": "bf:Isbn", "value": "9782354089412"}, + ], + } + data = { + "id": "immateriel.frO1097420", + "media": [ + { + "nature": "audio", + "key_type": "isbn13", + "id": "immateriel.frO1097420-9782354089597-epub", + "key": "9782354089597", + "issued_on": "2024-12-04T06:00:00+01:00", + "current_holds": 0, + }, + ], + } + # test audio type + transformation = Transformation( + data=data, logger=None, verbose=False, transform=False + ) + transformation.trans_identified_by() + assert transformation.json == { + "identifiedBy": [ + { + "source": "CANTOOK", + "type": "bf:Local", + "value": "cantook-immateriel.frO1097420", + }, + {"note": "audio", "type": "bf:Isbn", "value": "9782354089597"}, + ], + "type": [ + {"main_type": "docmaintype_audio", "subtype": "docsubtype_audio_book"} + ], + } + + +def test_trans_title(app): + """Test transformation Title.""" + data = { + "title": "L'argent des gens", + "title_prefix": None, + "title_sort": "l'argent des gens", + "subtitle": None, + } + transformation = Transformation( + data=data, logger=None, verbose=False, transform=False + ) + transformation.trans_title() + assert transformation.json == { + "title": [{"mainTitle": [{"value": "L'argent des gens"}], "type": "bf:Title"}] + } + # with sub title + data = { + "title": "L'argent des gens", + "title_prefix": None, + "title_sort": "l'argent des gens", + "subtitle": "Tentative d'épuisement de notre porte-monnaie", + } + transformation = Transformation( + data=data, logger=None, verbose=False, transform=False + ) + transformation.trans_title() + assert transformation.json == { + "title": [ + { + "mainTitle": [{"value": "L'argent des gens"}], + "subtitle": [ + {"value": "Tentative d'épuisement de notre " "porte-monnaie"} + ], + "type": "bf:Title", + } + ], + } + + +def test_trans_contribution(app): + """Test transformation Contribution.""" + data = { + "contributors": [ + { + "first_name": "Jean", + "last_name": "Guiloineau", + "nature": "translated_by", + "country": "FR", + "biography": "Jean Guiloineau est le traducteur des premiers romans de Toni Morrison L\u2019\u0152il le plus bleu, Le Chant de Salomon, Paradis et Tar Baby, parus chez Christian Bourgois \u00e9diteur. Il traduit \u00e9galement d\u2019autres auteurs embl\u00e9matiques de la litt\u00e9rature anglo-saxonne, entre autres Nelson Mandela, Nadine Gordimer, Salman Rushdie, Angela Carter, Thomas McGuane et Ben Okri.", + "website": "", + }, + { + "first_name": "Toni", + "last_name": "Morrison", + "nature": "author", + "country": "FR", + "biography": "", + "website": "", + }, + # again the same entity + { + "first_name": "Toni", + "last_name": "Morrison", + "nature": "author", + "country": "FR", + "biography": "", + "website": "", + }, + ], + } + transformation = Transformation( + data=data, logger=None, verbose=False, transform=False + ) + transformation.trans_contribution() + assert transformation.json == { + "contribution": [ + { + "entity": { + "authorized_access_point": "Guiloineau, Jean", + "type": "bf:Person", + }, + "role": ["trl"], + }, + { + "entity": { + "authorized_access_point": "Morrison, Toni", + "type": "bf:Person", + }, + "role": ["aut"], + }, + ], + } + + +def test_trans_provision_activity(app): + """Transform provisionActivity.""" + data = { + "publisher_name": "Éditions Mnémos", + "created_at": "2022-03-09T07:45:09+01:00", + } + transformation = Transformation( + data=data, logger=None, verbose=False, transform=False + ) + transformation.trans_provision_activity() + assert transformation.json == { + "provisionActivity": [ + { + "startDate": 2022, + "statement": [ + {"label": [{"value": "Éditions Mnémos"}], "type": "bf:Agent"}, + {"label": [{"value": "2022"}], "type": "Date"}, + ], + "type": "bf:Publication", + } + ], + } + + +def test_trans_electronic_locator(app): + """Test transformation electronicLocator.""" + data = { + "cover": "https://images.immateriel.fr/covers/4MY6AC5.png", + "flipbook": "https://tw5.immateriel.fr/wiki/immateriel/b/4MY6AC5", + } + transformation = Transformation( + data=data, logger=None, verbose=False, transform=False + ) + transformation.trans_electronic_locator() + assert transformation.json == { + "electronicLocator": [ + { + "content": "coverImage", + "type": "relatedResource", + "url": "https://images.immateriel.fr/covers/4MY6AC5.png", + }, + { + "content": "extract", + "type": "relatedResource", + "url": "https://tw5.immateriel.fr/wiki/immateriel/b/4MY6AC5", + }, + ], + } + + +def test_trans_fiction(app): + """Test transformation fiction.""" + data = {"fiction": None} + transformation = Transformation( + data=data, logger=None, verbose=False, transform=False + ) + transformation.trans_fiction() + assert transformation.json == {"fiction_statement": "unspecified"} + + +def test_trans_language(app): + """Test transformation language.""" + data = {"languages": ["fre"]} + transformation = Transformation( + data=data, logger=None, verbose=False, transform=False + ) + transformation.trans_language() + assert transformation.json == { + "language": [{"type": "bf:Language", "value": "fre"}] + } + + +def test_trans_orginal_language(app): + """Test transformation language.""" + data = {"translated_from": "eng"} + transformation = Transformation( + data=data, logger=None, verbose=False, transform=False + ) + transformation.trans_orginal_language() + assert transformation.json == {"originalLanguage": ["eng"]} + + +def test_trans_subjects(app): + """Test transformation Subject.""" + data = { + "classifications": [ + { + "standard": "bisac", + "identifiers": ["FIC009000"], + "captions": [{"fr": None, "en": None}], + }, + { + "standard": "cantook", + "identifiers": ["science-fiction-fantasy"], + "captions": [ + { + "fr": "Romans science-fiction et fantastique", + "en": "Science Fiction & Fantasy", + } + ], + }, + { + "standard": "feedbooks", + "identifiers": ["FBFIC009000"], + "captions": [{"fr": "Fantasy", "en": "Fantasy"}], + }, + { + "standard": "thema", + "identifiers": ["FM"], + "captions": [{"fr": "Fantasy", "en": "Fantasy"}], + }, + ] + } + transformation = Transformation( + data=data, logger=None, verbose=False, transform=False + ) + transformation.trans_subjects() + assert transformation.json == { + "subjects": [ + { + "entity": { + "authorized_access_point": "Romans science-fiction " + "et fantastique", + "type": "bf:Topic", + } + } + ], + } + + +def test_trans_summary(app): + """Test transformation Summary.""" + data = {"summary": "This is a summery."} + transformation = Transformation( + data=data, logger=None, verbose=False, transform=False + ) + transformation.trans_summary() + assert transformation.json == { + "summary": [{"label": [{"value": "This is a summery."}]}] + } + + +def test_trans_extent(app): + """Test transformation Extend.""" + data = {"page_count": 560} + transformation = Transformation( + data=data, logger=None, verbose=False, transform=False + ) + transformation.trans_extent() + assert transformation.json == {"extent": "560 pages"} + + +# to be used to create holdings +def test_trans_links(app): + """Test transformation links.""" + data = {"link": "https://bm.ebibliomedia.ch/resources/648bf704b7e14d000154685f"} + transformation = Transformation( + data=data, logger=None, verbose=False, transform=False + ) + transformation.trans_links() + assert transformation.json == { + "link": "https://bm.ebibliomedia.ch/resources/648bf704b7e14d000154685f" + } + + +# to be used for deleted records +def test_trans_deleted(app): + """Test transformation deleted.""" + data = {"unavailable_since": "2024-01-03T16:50:35+01:00"} + transformation = Transformation( + data=data, logger=None, verbose=False, transform=False + ) + transformation.trans_deleted() + assert transformation.json == {"deleted": "2024-01-03T16:50:35+01:00"} + + +def test_trans_do(app): + """Test dojson do.""" + content = json.load( + open(join(dirname(__file__), "../../data/mv_cantook_deleted.json")) + ) + data = content["resources"][0] + transformation = Transformation(logger=None, verbose=False, transform=False) + result = transformation.do(data) + assert result == { + "$schema": "https://bib.rero.ch/schemas/documents/document-v0.0.1.json", + "adminMetadata": {"encodingLevel": "Not applicable"}, + "contribution": [ + { + "entity": { + "authorized_access_point": "Party, Adrien", + "type": "bf:Person", + }, + "role": ["aut"], + } + ], + "deleted": "2002-02-02", + "electronicLocator": [ + { + "content": "coverImage", + "type": "relatedResource", + "url": "https://images.immateriel.fr/covers/5V4JHTA.png", + }, + { + "content": "extract", + "type": "relatedResource", + "url": "https://tw5.immateriel.fr/wiki/immateriel/b/5V4JHTA", + }, + ], + "extent": "735 pages", + "fiction_statement": "unspecified", + "harvested": True, + "identifiedBy": [ + { + "source": "CANTOOK", + "type": "bf:Local", + "value": "cantook-immateriel.frO1109367", + }, + {"note": "epub", "type": "bf:Isbn", "value": "9782376865193"}, + {"note": "paper", "type": "bf:Isbn", "value": "9782376866688"}, + ], + "issuance": {"main_type": "rdami:1001", "subtype": "materialUnit"}, + "language": [{"type": "bf:Language", "value": "fre"}], + "link": "https://mediatheque-valais.cantookstation.eu/resources/642bb98ebf82c100014867a4", + "pid": "cantook-immateriel.frO1109367", + "provisionActivity": [ + { + "startDate": 2023, + "statement": [ + { + "label": [{"value": "Nouvelles Éditions " "Actu SF"}], + "type": "bf:Agent", + }, + {"label": [{"value": "2023"}], "type": "Date"}, + ], + "type": "bf:Publication", + } + ], + "subjects": [ + { + "entity": { + "authorized_access_point": "Romans science-fiction " + "et fantastique", + "type": "bf:Topic", + } + } + ], + "summary": [ + { + "label": [ + { + "value": "Les vampires hantent toujours aujourd'hui " + "nos cauchemars. Depuis leurs débuts, ils " + "ont essaimé dans la littérature, le cinéma, " + "le théâtre, les séries, la BD, le jeu de " + "rôle, la musique... aucun média n'a échappé " + "à la fascination qu'ils exercent sur nous, " + "pauvres mortels. Vampirologie est un " + "ouvrage clef pour comprendre le phénomène, " + "..." + } + ] + } + ], + "title": [{"mainTitle": [{"value": "Vampirologie"}], "type": "bf:Title"}], + "type": [{"main_type": "docmaintype_book", "subtype": "docsubtype_e-book"}], + } diff --git a/rero_ils/modules/apiharvester/signals.py b/tests/api_harvester/conftest.py similarity index 68% rename from rero_ils/modules/apiharvester/signals.py rename to tests/api_harvester/conftest.py index f24e2b621c..e0adad21bf 100644 --- a/rero_ils/modules/apiharvester/signals.py +++ b/tests/api_harvester/conftest.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # RERO ILS -# Copyright (C) 2019-2022 RERO +# Copyright (C) 2019-2023 RERO # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -15,10 +15,16 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -"""ApiHarvester signals.""" +"""Common pytest fixtures and plugins.""" -from blinker import Namespace +import pytest -_signals = Namespace() -apiharvest_part = _signals.signal("apiharvest_part") +@pytest.fixture(scope="module") +def create_app(): + """Create test app.""" + # from invenio_app.factory import create_ui + # create_ui + from invenio_app.factory import create_ui + + return create_ui diff --git a/tests/api_harvester/test_cli_api_harvester.py b/tests/api_harvester/test_cli_api_harvester.py new file mode 100644 index 0000000000..092c133148 --- /dev/null +++ b/tests/api_harvester/test_cli_api_harvester.py @@ -0,0 +1,172 @@ +# -*- coding: utf-8 -*- +# +# RERO ILS +# Copyright (C) 2019 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Test api harvester cli.""" + +import json +from os.path import dirname, join + +import mock +from click.testing import CliRunner +from utils import mock_response + +from rero_ils.modules.api_harvester.cli import ( + add_api_source_config, + harvest, + info, + init_api_harvest_config, + set_last_run, +) +from rero_ils.modules.documents.api import Document +from rero_ils.modules.holdings.api import Holding + + +def test_cli(app, org_sion, lib_sion, loc_online_sion, item_type_online_sion): + """Test count cli.""" + runner = CliRunner() + + config_file = join(dirname(__file__), "../data/apisources.yml") + result = runner.invoke(init_api_harvest_config, [config_file]) + assert result.exit_code == 0 + output = result.output.strip().split("\n") + assert output[0] == "ApiHarvestConfig NJ-CANTOOK: Add" + assert output[1] == "ApiHarvestConfig VS-CANTOOK: Add" + + result = runner.invoke(info) + assert result.exit_code == 0 + output = result.output.strip().split("\n") + assert output == [ + "NJ-CANTOOK", + "\tlastrun : 1900-01-01 00:00:00", + "\turl : https://bm.ebibliomedia.ch", + "\tclassname : rero_ils.modules.api_harvester.cantook.api.ApiCantook", + "\tcode : ebibliomedia", + "VS-CANTOOK", + "\tlastrun : 1900-01-01 00:00:00", + "\turl : https://mediatheque-valais.cantookstation.eu", + "\tclassname : rero_ils.modules.api_harvester.cantook.api.ApiCantook", + "\tcode : mv-cantook", + ] + + result = runner.invoke(set_last_run, ["NJ-CANTOOK", "-d", "2002-02-02"]) + assert result.exit_code == 0 + output = result.output.strip().split("\n") + assert output == ["Set last run NJ-CANTOOK: 2002-02-02"] + + result = runner.invoke(info) + assert result.exit_code == 0 + output = result.output.strip().split("\n") + assert output == [ + "NJ-CANTOOK", + "\tlastrun : 2002-02-02 00:00:00", + "\turl : https://bm.ebibliomedia.ch", + "\tclassname : rero_ils.modules.api_harvester.cantook.api.ApiCantook", + "\tcode : ebibliomedia", + "VS-CANTOOK", + "\tlastrun : 1900-01-01 00:00:00", + "\turl : https://mediatheque-valais.cantookstation.eu", + "\tclassname : rero_ils.modules.api_harvester.cantook.api.ApiCantook", + "\tcode : mv-cantook", + ] + + result = runner.invoke( + add_api_source_config, ["NJ-CANTOOK", "-c", "ebibliomedia-test", "-u"] + ) + assert result.exit_code == 0 + output = result.output.strip().split("\n") + assert output == ["ApiHarvestConfig NJ-CANTOOK: Update code:ebibliomedia-test"] + + result = runner.invoke(info) + assert result.exit_code == 0 + output = result.output.strip().split("\n") + assert output == [ + "NJ-CANTOOK", + "\tlastrun : 2002-02-02 00:00:00", + "\turl : https://bm.ebibliomedia.ch", + "\tclassname : rero_ils.modules.api_harvester.cantook.api.ApiCantook", + "\tcode : ebibliomedia-test", + "VS-CANTOOK", + "\tlastrun : 1900-01-01 00:00:00", + "\turl : https://mediatheque-valais.cantookstation.eu", + "\tclassname : rero_ils.modules.api_harvester.cantook.api.ApiCantook", + "\tcode : mv-cantook", + ] + + # test harvest with create + content = json.load(open(join(dirname(__file__), "../data/mv_cantook.json"))) + headers_1 = { + "X-Total-Pages": 1, + "X-Total-Items": len(content.get("resources", [])), + "X-Current-Page": 1, + } + mock_response_1 = mock_response(json_data=content, headers=headers_1) + headers_2 = { + "X-Total-Pages": 1, + "X-Total-Items": len(content.get("resources", [])), + "X-Current-Page": 2, + } + mock_response_2 = mock_response(json_data={"resources": []}, headers=headers_2) + with mock.patch( + "requests.Session.get", + side_effect=[mock_response_1, mock_response_2], + ): + result = runner.invoke(harvest, ["-n", "VS-CANTOOK", "-v"]) + assert result.exit_code == 0 + output = result.output.strip().split("\n") + print(output) + assert output == [ + "Harvest api: VS-CANTOOK", + "API page: 1 url: " + "https://mediatheque-valais.cantookstation.eu/v1/resources.json?start_at=1900-01-01T00:00:00&page=1", + "1: CANTOOK:mv-cantook cantook-immateriel.frO1109367 = CREATED", + "2: CANTOOK:mv-cantook cantook-immateriel.frO1097420 = CREATED", + "3: CANTOOK:mv-cantook cantook-feedhttps-www-feedbooks-com-item-6177668 = CREATED", + "API harvest VS-CANTOOK items=3 | got=3 new=3 updated=0 deleted=0", + ] + assert Document.count() == 3 + assert Holding.count() == 3 + + # test harvest with update and delete + runner.invoke(set_last_run, ["VS-CANTOOK", "-d", "1900-01-01"]) + content = json.load( + open(join(dirname(__file__), "../data/mv_cantook_deleted.json")) + ) + headers_1 = { + "X-Total-Pages": 1, + "X-Total-Items": len(content.get("resources", [])), + "X-Current-Page": 1, + } + mock_response_1 = mock_response(json_data=content, headers=headers_1) + with mock.patch( + "requests.Session.get", + side_effect=[mock_response_1, mock_response_2], + ): + result = runner.invoke(harvest, ["-n", "VS-CANTOOK", "-v"]) + assert result.exit_code == 0 + output = result.output.strip().split("\n") + print(output) + assert output == [ + "Harvest api: VS-CANTOOK", + "API page: 1 url: " + "https://mediatheque-valais.cantookstation.eu/v1/resources.json?start_at=1900-01-01T00:00:00&page=1", + "1: CANTOOK:mv-cantook cantook-immateriel.frO1109367 = DELETED", + "2: CANTOOK:mv-cantook cantook-immateriel.frO1097420 = UPDATED", + "3: CANTOOK:mv-cantook cantook-feedhttps-www-feedbooks-com-item-6177668 = DELETED", + "API harvest VS-CANTOOK items=3 | got=3 new=0 updated=1 deleted=2", + ] + assert Document.count() == 1 + assert Holding.count() == 1 diff --git a/data/oaisources.yml b/tests/data/apisources.yml similarity index 64% rename from data/oaisources.yml rename to tests/data/apisources.yml index 1f8727cbea..11cce81c0f 100644 --- a/data/oaisources.yml +++ b/tests/data/apisources.yml @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # RERO ILS -# Copyright (C) 2019 RERO +# Copyright (C) 2024 RERO # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -16,9 +16,13 @@ # along with this program. If not, see . -# OAI-PMH harvester configuration. -ebooks: - baseurl: https://ebooks.test.rero.ch:8443/oai2d - metadataprefix: marc21 - comment: '' - setspecs: '' +# API harvester configuration. +VS-CANTOOK: + url: https://mediatheque-valais.cantookstation.eu + classname: 'rero_ils.modules.api_harvester.cantook.api.ApiCantook' + code: 'mv-cantook' + +NJ-CANTOOK: + url: https://bm.ebibliomedia.ch + classname: 'rero_ils.modules.api_harvester.cantook.api.ApiCantook' + code: 'ebibliomedia' diff --git a/tests/data/mv_cantook.json b/tests/data/mv_cantook.json new file mode 100644 index 0000000000..ed12802a07 --- /dev/null +++ b/tests/data/mv_cantook.json @@ -0,0 +1,347 @@ +{ + "resources": [ + { + "title": "Vampirologie", + "title_prefix": null, + "title_sort": "vampirologie", + "subtitle": null, + "description": null, + "summary": "Les vampires hantent toujours aujourd'hui nos cauchemars. Depuis leurs d\u00e9buts, ils ont essaim\u00e9 dans la litt\u00e9rature, le cin\u00e9ma, le th\u00e9\u00e2tre, les s\u00e9ries, la BD, le jeu de r\u00f4le, la musique... aucun m\u00e9dia n'a \u00e9chapp\u00e9 \u00e0 la fascination qu'ils exercent sur nous, pauvres mortels. Vampirologie est un ouvrage clef pour comprendre le ph\u00e9nom\u00e8ne, ...", + "comments": null, + "tags": [], + "back_cover": "", + "back_cover_large": null, + "cover": "https://images.immateriel.fr/covers/5V4JHTA.png", + "cover_large": null, + "flipbook": "https://tw5.immateriel.fr/wiki/immateriel/b/5V4JHTA", + "languages": [ + "fre" + ], + "page_count": 735, + "translated_from": "", + "contributors": [ + { + "first_name": "Adrien", + "last_name": "Party", + "nature": "author", + "country": "FR", + "biography": "", + "website": "" + } + ], + "media": [ + { + "nature": "epub", + "key_type": "isbn13", + "id": "immateriel.frO1109367-9782376865193-epub", + "key": "9782376865193", + "issued_on": "2024-11-21T06:00:00+01:00", + "current_holds": 0 + }, + { + "nature": "paper", + "key_type": "isbn13", + "id": "immateriel.frO1109367-9782376864929-paper", + "key": "9782376866688", + "issued_on": "2024-11-21T06:00:00+01:00", + "current_holds": 0 + } + ], + "id": "immateriel.frO1109367", + "link": "https://mediatheque-valais.cantookstation.eu/resources/642bb98ebf82c100014867a4", + "created_at": "2023-04-04T07:45:50+02:00", + "updated_at": "2024-11-21T07:45:39+01:00", + "publisher_name": "Nouvelles \u00c9ditions Actu SF", + "fiction": null, + "classifications": [ + { + "standard": "bisac", + "identifiers": [ + "FIC009000" + ], + "captions": [ + { + "fr": null, + "en": null + } + ] + }, + { + "standard": "cantook", + "identifiers": [ + "science-fiction-fantasy" + ], + "captions": [ + { + "fr": "Romans science-fiction et fantastique", + "en": "Science Fiction & Fantasy" + } + ] + }, + { + "standard": "feedbooks", + "identifiers": [ + "FBFIC009000" + ], + "captions": [ + { + "fr": "Fantasy", + "en": "Fantasy" + } + ] + }, + { + "standard": "thema", + "identifiers": [ + "FM" + ], + "captions": [ + { + "fr": "Fantasy", + "en": "Fantasy" + } + ] + } + ], + "unavailable_since": null, + "publisher": { + "name": "Nouvelles \u00c9ditions Actu SF" + } + }, + { + "title": "L'Enterrement des \u00e9toiles", + "title_prefix": null, + "title_sort": "l'enterrement des etoiles", + "subtitle": null, + "description": null, + "summary": "L\u2019annonce de la fin est proche. \u00c0 la cit\u00e9 des h\u00e9ritiers, le roi Jenophon re\u00e7oit la visite de l\u2019oracle annonciateur. C\u2019est le moment que choisit un cirque pour s\u2019installer non loin et offrir un moment de joie. Cette compagnie de monstres de foire doit faire face \u00e0 l\u2019obscurit\u00e9 qui s\u2019\u00e9tend et d\u00e9couvrir la lumi\u00e8re int\u00e9rieure, ...", + "comments": null, + "tags": [], + "back_cover": "", + "back_cover_large": null, + "cover": "https://images.immateriel.fr/covers/4MY6AC5.png", + "cover_large": null, + "flipbook": "https://tw5.immateriel.fr/wiki/immateriel/b/4MY6AC5", + "languages": [ + "fre" + ], + "page_count": 320, + "translated_from": "", + "contributors": [ + { + "first_name": "Christophe", + "last_name": "Guillemain", + "nature": "author", + "country": "FR", + "biography": "", + "website": "" + } + ], + "media": [ + { + "nature": "epub", + "key_type": "isbn13", + "id": "immateriel.frO1097420-9782354089597-epub", + "key": "9782354089597", + "issued_on": "2024-12-04T06:00:00+01:00", + "current_holds": 0 + }, + { + "nature": "paper", + "key_type": "isbn13", + "id": "immateriel.frO1097420-9782354089412-paper", + "key": "9782354089412", + "issued_on": "2024-12-04T06:00:00+01:00", + "current_holds": 0 + } + ], + "id": "immateriel.frO1097420", + "link": "https://mediatheque-valais.cantookstation.eu/resources/66614d09065ed9f5833f2c38", + "created_at": "2024-06-06T07:45:45+02:00", + "updated_at": "2024-11-22T07:45:46+01:00", + "publisher_name": "\u00c9ditions Mn\u00e9mos", + "fiction": null, + "classifications": [ + { + "standard": "bisac", + "identifiers": [ + "FIC009000" + ], + "captions": [ + { + "fr": null, + "en": null + } + ] + }, + { + "standard": "cantook", + "identifiers": [ + "science-fiction-fantasy" + ], + "captions": [ + { + "fr": "Romans science-fiction et fantastique", + "en": "Science Fiction & Fantasy" + } + ] + }, + { + "standard": "feedbooks", + "identifiers": [ + "FBFIC009000" + ], + "captions": [ + { + "fr": "Fantasy", + "en": "Fantasy" + } + ] + }, + { + "standard": "thema", + "identifiers": [ + "FM" + ], + "captions": [ + { + "fr": "Fantasy", + "en": "Fantasy" + } + ] + } + ], + "unavailable_since": null, + "publisher": { + "name": "\u00c9ditions Mn\u00e9mos" + } + }, + { + "title": "Les Neurosciences pour les Nuls, grand format", + "title_prefix": null, + "title_sort": "les neurosciences pour les nuls, grand format", + "subtitle": null, + "description": null, + "summary": "Une plong\u00e9e fascinante dans les m\u00e9andres de nos cerveaux !Comment un amas de cellules interconnect\u00e9es \u2013 les neurones \u2013 peut-il \u00eatre \u00e0 la base de nos pens\u00e9es, de nos souvenirs, de nos sentiments, et faire de chacun de nous la personne que nous sommes ? C'est \u00e0 cette question passionnante que ce livre propose d'apporter une r\u00e9ponse.\n \n Que vous soyez simples curieux ou futurs \u00e9tudiants en m\u00e9decine, biologie, psychologie ou sciences cognitives, cet ouvrage constitue une introduction remarquablement efficace et compl\u00e8te au fonctionnement du cerveau humain, et rend compte des formidables avanc\u00e9es scientifiques de ces derni\u00e8res ann\u00e9es.\n \n D\u00e9couvrez : \n \nLe syst\u00e8me nerveux et le fonctionnement des neurones \nLe si\u00e8ge de l'intelligence, de la conscience et des \u00e9motions \nLa plasticit\u00e9 c\u00e9r\u00e9brale \nLes dysfonctionnements neuronaux \nEt bien d'autres choses encore ", + "comments": null, + "tags": [], + "back_cover": null, + "back_cover_large": null, + "cover": "https://covers.feedbooks.net/item/6177668.jpg?size=large&t=1729302359", + "cover_large": null, + "flipbook": null, + "languages": [ + "fre" + ], + "page_count": 431, + "translated_from": null, + "contributors": [ + { + "first_name": "Franck", + "last_name": "Amthor", + "nature": "author", + "country": null, + "biography": "", + "website": null + }, + { + "first_name": "Laurianne", + "last_name": "Geffroy", + "nature": "author", + "country": null, + "biography": "", + "website": null + }, + { + "first_name": "Laurianne", + "last_name": "Geffroy", + "nature": "author", + "country": null, + "biography": "", + "website": null + } + ], + "media": [ + { + "nature": "paper", + "key_type": "isbn13", + "id": "feedhttps-www-feedbooks-com-item-6177668-9782412096499-paper", + "key": "9782412096499", + "issued_on": "2024-11-06T01:00:00+01:00", + "current_holds": 0 + }, + { + "nature": "epub", + "key_type": "isbn13", + "id": "feedhttps-www-feedbooks-com-item-6177668-9782412099247-epub", + "key": "9782412099247", + "issued_on": "2024-11-06T01:00:00+01:00", + "current_holds": 2 + } + ], + "id": "feedhttps-www-feedbooks-com-item-6177668", + "link": "https://mediatheque-valais.cantookstation.eu/resources/6731e051d6f0a4176fe9f5bc", + "created_at": "2024-11-11T11:45:37+01:00", + "updated_at": "2024-11-13T04:15:30+01:00", + "publisher_name": "Pour les nuls", + "fiction": null, + "classifications": [ + { + "standard": "cantook", + "identifiers": [ + "health" + ], + "captions": [ + { + "fr": "Sant\u00e9", + "en": "Health" + } + ] + }, + { + "standard": "feedbooks", + "identifiers": [ + "FBHEA039000", + "FBNFC000000", + "FBHEA000000", + "FBFAM023000" + ], + "captions": [ + { + "fr": "Maladies", + "en": "Diseases" + }, + { + "fr": "Non-Fiction", + "en": "Non-Fiction" + }, + { + "fr": "Sant\u00e9 & Vie quotidienne", + "en": "Health & fitness" + }, + { + "fr": "Sant\u00e9", + "en": "Health" + } + ] + }, + { + "standard": "thema", + "identifiers": [ + "VFJB" + ], + "captions": [ + { + "fr": "Faire face \u00e0 la maladie et \u00e0 des \u00e9tats particuliers", + "en": "Coping with illness & specific conditions" + } + ] + } + ], + "unavailable_since": null, + "publisher": { + "name": "Pour les nuls" + } + } + ] +} \ No newline at end of file diff --git a/tests/data/mv_cantook_deleted.json b/tests/data/mv_cantook_deleted.json new file mode 100644 index 0000000000..a9a8721be0 --- /dev/null +++ b/tests/data/mv_cantook_deleted.json @@ -0,0 +1,347 @@ +{ + "resources": [ + { + "title": "Vampirologie", + "title_prefix": null, + "title_sort": "vampirologie", + "subtitle": null, + "description": null, + "summary": "Les vampires hantent toujours aujourd'hui nos cauchemars. Depuis leurs d\u00e9buts, ils ont essaim\u00e9 dans la litt\u00e9rature, le cin\u00e9ma, le th\u00e9\u00e2tre, les s\u00e9ries, la BD, le jeu de r\u00f4le, la musique... aucun m\u00e9dia n'a \u00e9chapp\u00e9 \u00e0 la fascination qu'ils exercent sur nous, pauvres mortels. Vampirologie est un ouvrage clef pour comprendre le ph\u00e9nom\u00e8ne, ...", + "comments": null, + "tags": [], + "back_cover": "", + "back_cover_large": null, + "cover": "https://images.immateriel.fr/covers/5V4JHTA.png", + "cover_large": null, + "flipbook": "https://tw5.immateriel.fr/wiki/immateriel/b/5V4JHTA", + "languages": [ + "fre" + ], + "page_count": 735, + "translated_from": "", + "contributors": [ + { + "first_name": "Adrien", + "last_name": "Party", + "nature": "author", + "country": "FR", + "biography": "", + "website": "" + } + ], + "media": [ + { + "nature": "epub", + "key_type": "isbn13", + "id": "immateriel.frO1109367-9782376865193-epub", + "key": "9782376865193", + "issued_on": "2024-11-21T06:00:00+01:00", + "current_holds": 0 + }, + { + "nature": "paper", + "key_type": "isbn13", + "id": "immateriel.frO1109367-9782376864929-paper", + "key": "9782376866688", + "issued_on": "2024-11-21T06:00:00+01:00", + "current_holds": 0 + } + ], + "id": "immateriel.frO1109367", + "link": "https://mediatheque-valais.cantookstation.eu/resources/642bb98ebf82c100014867a4", + "created_at": "2023-04-04T07:45:50+02:00", + "updated_at": "2024-11-21T07:45:39+01:00", + "publisher_name": "Nouvelles \u00c9ditions Actu SF", + "fiction": null, + "classifications": [ + { + "standard": "bisac", + "identifiers": [ + "FIC009000" + ], + "captions": [ + { + "fr": null, + "en": null + } + ] + }, + { + "standard": "cantook", + "identifiers": [ + "science-fiction-fantasy" + ], + "captions": [ + { + "fr": "Romans science-fiction et fantastique", + "en": "Science Fiction & Fantasy" + } + ] + }, + { + "standard": "feedbooks", + "identifiers": [ + "FBFIC009000" + ], + "captions": [ + { + "fr": "Fantasy", + "en": "Fantasy" + } + ] + }, + { + "standard": "thema", + "identifiers": [ + "FM" + ], + "captions": [ + { + "fr": "Fantasy", + "en": "Fantasy" + } + ] + } + ], + "unavailable_since": "2002-02-02", + "publisher": { + "name": "Nouvelles \u00c9ditions Actu SF" + } + }, + { + "title": "L'Enterrement des \u00e9toiles", + "title_prefix": null, + "title_sort": "l'enterrement des etoiles", + "subtitle": null, + "description": null, + "summary": "L\u2019annonce de la fin est proche. \u00c0 la cit\u00e9 des h\u00e9ritiers, le roi Jenophon re\u00e7oit la visite de l\u2019oracle annonciateur. C\u2019est le moment que choisit un cirque pour s\u2019installer non loin et offrir un moment de joie. Cette compagnie de monstres de foire doit faire face \u00e0 l\u2019obscurit\u00e9 qui s\u2019\u00e9tend et d\u00e9couvrir la lumi\u00e8re int\u00e9rieure, ...", + "comments": null, + "tags": [], + "back_cover": "", + "back_cover_large": null, + "cover": "https://images.immateriel.fr/covers/4MY6AC5.png", + "cover_large": null, + "flipbook": "https://tw5.immateriel.fr/wiki/immateriel/b/4MY6AC5", + "languages": [ + "fre" + ], + "page_count": 320, + "translated_from": "", + "contributors": [ + { + "first_name": "Christophe", + "last_name": "Guillemain", + "nature": "author", + "country": "FR", + "biography": "", + "website": "" + } + ], + "media": [ + { + "nature": "epub", + "key_type": "isbn13", + "id": "immateriel.frO1097420-9782354089597-epub", + "key": "9782354089597", + "issued_on": "2024-12-04T06:00:00+01:00", + "current_holds": 0 + }, + { + "nature": "paper", + "key_type": "isbn13", + "id": "immateriel.frO1097420-9782354089412-paper", + "key": "9782354089412", + "issued_on": "2024-12-04T06:00:00+01:00", + "current_holds": 0 + } + ], + "id": "immateriel.frO1097420", + "link": "https://mediatheque-valais.cantookstation.eu/resources/66614d09065ed9f5833f2c38", + "created_at": "2024-06-06T07:45:45+02:00", + "updated_at": "2024-11-22T07:45:46+01:00", + "publisher_name": "\u00c9ditions Mn\u00e9mos", + "fiction": null, + "classifications": [ + { + "standard": "bisac", + "identifiers": [ + "FIC009000" + ], + "captions": [ + { + "fr": null, + "en": null + } + ] + }, + { + "standard": "cantook", + "identifiers": [ + "science-fiction-fantasy" + ], + "captions": [ + { + "fr": "Romans science-fiction et fantastique", + "en": "Science Fiction & Fantasy" + } + ] + }, + { + "standard": "feedbooks", + "identifiers": [ + "FBFIC009000" + ], + "captions": [ + { + "fr": "Fantasy", + "en": "Fantasy" + } + ] + }, + { + "standard": "thema", + "identifiers": [ + "FM" + ], + "captions": [ + { + "fr": "Fantasy", + "en": "Fantasy" + } + ] + } + ], + "unavailable_since": null, + "publisher": { + "name": "\u00c9ditions Mn\u00e9mos" + } + }, + { + "title": "Les Neurosciences pour les Nuls, grand format", + "title_prefix": null, + "title_sort": "les neurosciences pour les nuls, grand format", + "subtitle": null, + "description": null, + "summary": "Une plong\u00e9e fascinante dans les m\u00e9andres de nos cerveaux !Comment un amas de cellules interconnect\u00e9es \u2013 les neurones \u2013 peut-il \u00eatre \u00e0 la base de nos pens\u00e9es, de nos souvenirs, de nos sentiments, et faire de chacun de nous la personne que nous sommes ? C'est \u00e0 cette question passionnante que ce livre propose d'apporter une r\u00e9ponse.\n \n Que vous soyez simples curieux ou futurs \u00e9tudiants en m\u00e9decine, biologie, psychologie ou sciences cognitives, cet ouvrage constitue une introduction remarquablement efficace et compl\u00e8te au fonctionnement du cerveau humain, et rend compte des formidables avanc\u00e9es scientifiques de ces derni\u00e8res ann\u00e9es.\n \n D\u00e9couvrez : \n \nLe syst\u00e8me nerveux et le fonctionnement des neurones \nLe si\u00e8ge de l'intelligence, de la conscience et des \u00e9motions \nLa plasticit\u00e9 c\u00e9r\u00e9brale \nLes dysfonctionnements neuronaux \nEt bien d'autres choses encore ", + "comments": null, + "tags": [], + "back_cover": null, + "back_cover_large": null, + "cover": "https://covers.feedbooks.net/item/6177668.jpg?size=large&t=1729302359", + "cover_large": null, + "flipbook": null, + "languages": [ + "fre" + ], + "page_count": 431, + "translated_from": null, + "contributors": [ + { + "first_name": "Franck", + "last_name": "Amthor", + "nature": "author", + "country": null, + "biography": "", + "website": null + }, + { + "first_name": "Laurianne", + "last_name": "Geffroy", + "nature": "author", + "country": null, + "biography": "", + "website": null + }, + { + "first_name": "Laurianne", + "last_name": "Geffroy", + "nature": "author", + "country": null, + "biography": "", + "website": null + } + ], + "media": [ + { + "nature": "paper", + "key_type": "isbn13", + "id": "feedhttps-www-feedbooks-com-item-6177668-9782412096499-paper", + "key": "9782412096499", + "issued_on": "2024-11-06T01:00:00+01:00", + "current_holds": 0 + }, + { + "nature": "epub", + "key_type": "isbn13", + "id": "feedhttps-www-feedbooks-com-item-6177668-9782412099247-epub", + "key": "9782412099247", + "issued_on": "2024-11-06T01:00:00+01:00", + "current_holds": 2 + } + ], + "id": "feedhttps-www-feedbooks-com-item-6177668", + "link": "https://mediatheque-valais.cantookstation.eu/resources/6731e051d6f0a4176fe9f5bc", + "created_at": "2024-11-11T11:45:37+01:00", + "updated_at": "2024-11-13T04:15:30+01:00", + "publisher_name": "Pour les nuls", + "fiction": null, + "classifications": [ + { + "standard": "cantook", + "identifiers": [ + "health" + ], + "captions": [ + { + "fr": "Sant\u00e9", + "en": "Health" + } + ] + }, + { + "standard": "feedbooks", + "identifiers": [ + "FBHEA039000", + "FBNFC000000", + "FBHEA000000", + "FBFAM023000" + ], + "captions": [ + { + "fr": "Maladies", + "en": "Diseases" + }, + { + "fr": "Non-Fiction", + "en": "Non-Fiction" + }, + { + "fr": "Sant\u00e9 & Vie quotidienne", + "en": "Health & fitness" + }, + { + "fr": "Sant\u00e9", + "en": "Health" + } + ] + }, + { + "standard": "thema", + "identifiers": [ + "VFJB" + ], + "captions": [ + { + "fr": "Faire face \u00e0 la maladie et \u00e0 des \u00e9tats particuliers", + "en": "Coping with illness & specific conditions" + } + ] + } + ], + "unavailable_since": "2002-02-02", + "publisher": { + "name": "Pour les nuls" + } + } + ] +} \ No newline at end of file diff --git a/tests/data/xml/ebook1.xml b/tests/data/xml/ebook1.xml deleted file mode 100644 index ed008271b5..0000000000 --- a/tests/data/xml/ebook1.xml +++ /dev/null @@ -1,89 +0,0 @@ - - 00000cam a2200000zu 4500 - - 9782075118842 - - - cantook/EDEN502344 - - - cantook-EDEN502344 - - - fre - - - eng - - - masked - - - Killer Game - - - 2019 - Gallimard Jeunesse - - - 400 pages - - - - Osborne est une petite ville du Nebraska où tout le monde se - connaît, pas vraiment le cadre rêvé pour une adolescente! - Mais avec ses amis, Alex la cynique et le très protecteur Darby, - Makani s'y plaît. Sans parler d'Ollie, le garçon solitaire dont - elle aimerait beaucoup se rapprocher... Tout bascule lorsque les - élèves de son lycée se font assassiner les uns après les autres. - Pour éviter de devenir une proie, Makani va devoir afronter un - terrible secret. Qui a dit qu'il ne se passait jamais rien à - Osborne? - - - - Jeunesse - albums et romans - Juvenile Fiction - - - Jeunesse - Youth - - - Policiers & Thrillers - Detective & thrillers - - - Fiction - Fiction - - - Fiction jeunesse: généralités - - Children's / Teenage fiction: General fiction - - - - Perkins, Stephanie - aut - - - Polanco, Emmanuel - - - Troin, Isabelle - trl - - - https://www.edenlivres.fr/p/502344 - Extrait - - - epub - - https://test1/resources/5ccd26d523579476a9ac9f3 - - Texte intégral - mv-cantook - - diff --git a/tests/data/xml/ebook2.xml b/tests/data/xml/ebook2.xml deleted file mode 100644 index 79113cd6a4..0000000000 --- a/tests/data/xml/ebook2.xml +++ /dev/null @@ -1,73 +0,0 @@ - - 00000cam a2200000zu 4500 - - 9782811234157 - - - cantook/immateriel.frO1006810 - - - cantook-immateriel.frO1006810 - - - fre - - - masked - - - La Vie à portée de main - - - 2019 - Milady - - - 384 pages - - - - « Chère Libby, je me rends compte que ça fait deux longues années – - bon sang ! – que tes enfants et toi vivez chez ta mère. Je t'écris - pour savoir si tu veux que je vienne à ton secours. - Depuis la mort de son mari, Libby vit chez sa mère, une femme - autoritaire qui passe son temps à critiquer tout ce qui l'entoure. - - - - Romans sentimentaux - Romance - - - Contemporain - Contemporary - - - - Roman sentimental pour adulte et roman sentimental contemporain - - Adult & contemporary romance - - - Guillaume, Nathalie - trl - - - Center, Katherine - aut - - - - http://images.immateriel.fr/covers/7GCMWJ4.png - - Image de couverture - - - audio - - https://test2/resources/5d7c7e462357947ad94991f6 - - Texte intégral - ebibliomedia - - diff --git a/tests/fixtures/metadata.py b/tests/fixtures/metadata.py index 10b022bc5e..241cfe0ae0 100644 --- a/tests/fixtures/metadata.py +++ b/tests/fixtures/metadata.py @@ -1098,22 +1098,6 @@ def pattern_bimonthly_every_two_months_two_levels_data(holdings): return deepcopy(holdings.get("pattern10")) -@pytest.fixture(scope="module") -def ebooks_1_xml(): - """Load ebook1 xml file.""" - filepath = join(dirname(__file__), "..", "data", "xml", "ebook1.xml") - with open(filepath) as fh: - return fh.read() - - -@pytest.fixture(scope="module") -def ebooks_2_xml(): - """Load ebook2 xml file.""" - filepath = join(dirname(__file__), "..", "data", "xml", "ebook2.xml") - with open(filepath) as fh: - return fh.read() - - @pytest.fixture(scope="module") def babel_filehandle(): """Load ebook2 xml file.""" diff --git a/tests/ui/apiharvester/test_apiharvester_utils.py b/tests/ui/apiharvester/test_apiharvester_utils.py deleted file mode 100644 index bee9fe2590..0000000000 --- a/tests/ui/apiharvester/test_apiharvester_utils.py +++ /dev/null @@ -1,114 +0,0 @@ -# -*- coding: utf-8 -*- -# -# RERO ILS -# Copyright (C) 2019 RERO -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -"""Api harvester tests.""" - -from __future__ import absolute_import, print_function - -import mock -from utils import mock_response - -from rero_ils.modules.apiharvester.tasks import harvest_records -from rero_ils.modules.apiharvester.utils import api_source, get_records - - -@mock.patch("requests.get") -def test_api_source(mock_get, app, capsys): - """Test api source creation update.""" - msg = api_source(name="test", url="http://test.com") - assert msg == "Added" - - mock_get.return_value = mock_response( - json_data={ - "hits": { - "hits": [ - {"metadata": {"pid": "test1", "data": "test data 1"}}, - {"metadata": {"pid": "test2", "data": "test data 2"}}, - ], - "total": {"value": 2}, - "links": {"self": "http:/test.com"}, - } - } - ) - harvest_records( - name="test", url="http://test.com", signals=False, size=1000, max_results=1000 - ) - out, err = capsys.readouterr() - assert out.strip() == "API records found: 2" - - msg = api_source(name="test", url="http://test.com", size=1000) - assert msg == "Not Updated" - msg = api_source( - name="test", - url="http://test.com", - mimetype="mimetype", - size=1000, - comment="comment", - update=True, - ) - assert msg == ( - "Updated: url:http://test.com, mimetype:mimetype," " size:1000, comment:comment" - ) - - -@mock.patch("requests.get") -def test_get_records(mock_get, app, capsys): - """Test finding a circulation policy.""" - mock_get.return_value = mock_response( - json_data={ - "hits": { - "hits": [ - {"metadata": {"pid": "test1", "data": "test data 1"}}, - {"metadata": {"pid": "test2", "data": "test data 2"}}, - ], - "total": {"value": 2}, - "links": {"self": "http:/test.com"}, - } - } - ) - for next_url, data in get_records( - url="http://test.com", name="test", signals=False - ): - assert next_url - assert data == [ - {"data": "test data 1", "pid": "test1"}, - {"data": "test data 2", "pid": "test2"}, - ] - out, err = capsys.readouterr() - assert out.strip() == "API records found: 2" - mock_get.return_value = mock_response( - json_data={ - "hits": { - "hits": [ - {"metadata": {"pid": "test3", "data": "test data 3"}}, - {"metadata": {"pid": "test4", "data": "test data 4"}}, - ], - "total": {"value": 2}, - "links": {"self": "http:/test.com"}, - } - } - ) - for next_url, data in get_records( - url="http://test.com", name="test", from_date="1970-01-01", signals=False - ): - assert next_url - assert data == [ - {"data": "test data 3", "pid": "test3"}, - {"data": "test data 4", "pid": "test4"}, - ] - out, err = capsys.readouterr() - assert out.strip() == "API records found: 2" diff --git a/tests/ui/documents/test_documents_api.py b/tests/ui/documents/test_documents_api.py index 85608329bf..8e51ab67fd 100644 --- a/tests/ui/documents/test_documents_api.py +++ b/tests/ui/documents/test_documents_api.py @@ -35,7 +35,6 @@ ) from rero_ils.modules.documents.models import DocumentIdentifier from rero_ils.modules.documents.tasks import delete_drafts, delete_orphan_harvested -from rero_ils.modules.ebooks.tasks import create_records from rero_ils.modules.entities.models import EntityType from rero_ils.modules.entities.remote_entities.api import ( RemoteEntitiesSearch, @@ -229,69 +228,6 @@ def test_document_can_delete(app, document_data_tmp): assert reasons == {} -def test_document_create_records( - app, - org_martigny, - org_sion, - ebook_1_data, - ebook_2_data, - item_type_online_martigny, - loc_online_martigny, - item_type_online_sion, - loc_online_sion, -): - """Test can create harvested records.""" - ebook_1_data["electronicLocator"] = [ - { - "source": "ebibliomedia", - "url": "https://www.site1.org/ebook", - "type": "resource", - } - ] - ebook_2_data["electronicLocator"] = [ - { - "source": "ebibliomedia", - "url": "https://www.site2.org/ebook", - "type": "resource", - } - ] - n_created, n_updated = create_records([ebook_1_data]) - assert n_created == 1 - assert n_updated == 0 - - ebook_1_data["electronicLocator"] = [ - { - "source": "ebibliomedia", - "url": "https://www.site2.org/ebook", - "type": "resource", - }, - { - "source": "mv-cantook", - "url": "https://www.site3.org/ebook", - "type": "resource", - }, - ] - n_created, n_updated = create_records([ebook_1_data, ebook_2_data]) - assert n_created == 1 - assert n_updated == 1 - - ebook_1_data["electronicLocator"] = [ - { - "source": "mv-cantook", - "url": "https://www.site3.org/ebook", - "type": "resource", - } - ] - n_created, n_updated = create_records([ebook_1_data, ebook_2_data]) - assert n_created == 0 - assert n_updated == 2 - - # TODO: find a way to execute celery worker tasks in travis tests - # n_created, n_updated = create_records.delay([ebook_1_data]) - # assert n_created == 0 - # assert n_updated == 1 - - def test_document_can_delete_harvested(app, ebook_1_data): """Test can delete for harvested records.""" document = Document.create(ebook_1_data, delete_pid=True) diff --git a/tests/ui/ebooks/test_ebooks_receivers.py b/tests/ui/ebooks/test_ebooks_receivers.py deleted file mode 100644 index efd1f6ef01..0000000000 --- a/tests/ui/ebooks/test_ebooks_receivers.py +++ /dev/null @@ -1,116 +0,0 @@ -# -*- coding: utf-8 -*- -# -# RERO ILS -# Copyright (C) 2019 RERO -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -"""Test ebooks receivers.""" - -from collections import namedtuple - -from rero_ils.modules.documents.api import Document, DocumentsSearch -from rero_ils.modules.ebooks.receivers import publish_harvested_records -from rero_ils.modules.ebooks.tasks import create_records, delete_records -from rero_ils.modules.holdings.api import Holding, HoldingsSearch - - -def test_publish_harvested_records( - app, - ebooks_1_xml, - ebooks_2_xml, - org_martigny, - loc_online_martigny, - item_type_online_martigny, - org_sion, - loc_online_sion, - item_type_online_sion, -): - """Test publish harvested records.""" - Identifier = namedtuple("Identifier", "identifier") - Record = namedtuple("Record", "xml deleted header") - records = [ - Record(xml=ebooks_1_xml, deleted=False, header=Identifier(identifier="record1")) - ] - records.append( - Record(xml=ebooks_2_xml, deleted=False, header=Identifier(identifier="record2")) - ) - records.append( - Record(xml=ebooks_2_xml, deleted=True, header=Identifier(identifier="record3")) - ) - - kwargs = {"max": 100} - publish_harvested_records(sender=None, records=records, kwargs=kwargs) - DocumentsSearch.flush_and_refresh() - HoldingsSearch.flush_and_refresh() - - assert Document.count() == 2 - doc1 = Document.get_record_by_pid("1") - assert doc1.get("$schema") is not None - assert doc1.get("identifiedBy") == [ - {"type": "bf:Isbn", "value": "9782075118842"}, - {"type": "bf:Local", "value": "cantook-EDEN502344"}, - {"type": "bf:Local", "source": "cantook", "value": "record1"}, - ] - assert doc1.get("type") == [ - {"main_type": "docmaintype_book", "subtype": "docsubtype_e-book"} - ] - - assert len(list(Holding.get_holdings_pid_by_document_pid(doc1.pid))) == 1 - doc2 = Document.get_record_by_pid("2") - assert doc2.get("$schema") is not None - assert doc2.get("identifiedBy") == [ - {"type": "bf:Isbn", "value": "9782811234157"}, - {"type": "bf:Local", "value": "cantook-immateriel.frO1006810"}, - {"type": "bf:Local", "source": "cantook", "value": "record2"}, - ] - assert doc2.get("type") == [ - {"main_type": "docmaintype_audio", "subtype": "docsubtype_audio_book"} - ] - assert len(list(Holding.get_holdings_pid_by_document_pid(doc2.pid))) == 1 - - # test update - # cretae a double holding - hold_pid = next(Holding.get_holdings_pid_by_document_pid(doc1.pid)) - hold = Holding.get_record_by_pid(hold_pid) - Holding.create(data=hold, dbcommit=True, reindex=True, delete_pid=True) - # create a holding without valid source uri - hold["electronic_location"][0]["uri"] = "https://invalid.uri/XXXXXX" - Holding.create(data=hold, dbcommit=True, reindex=True, delete_pid=True) - HoldingsSearch.flush_and_refresh() - publish_harvested_records(sender=None, records=records) - DocumentsSearch.flush_and_refresh() - HoldingsSearch.flush_and_refresh() - assert len(list(Holding.get_holdings_pid_by_document_pid(doc1.pid))) == 1 - assert len(list(Holding.get_holdings_pid_by_document_pid(doc2.pid))) == 1 - - # test delete - records = [] - del doc1["electronicLocator"] - records.append(doc1) - doc2["electronicLocator"] = [ - { - "content": "coverImage", - "type": "relatedResource", - "url": "http://images.immateriel.fr/covers/DEQ2C5A.png", - } - ] - records.append(doc2) - - create_records(records=records) - DocumentsSearch.flush_and_refresh() - HoldingsSearch.flush_and_refresh() - assert not list(Holding.get_holdings_pid_by_document_pid(doc1.pid)) - assert not list(Holding.get_holdings_pid_by_document_pid(doc2.pid)) - - assert 2 == delete_records(records=records) diff --git a/tests/ui/ebooks/test_ebooks_utils.py b/tests/ui/ebooks/test_ebooks_utils.py deleted file mode 100644 index af34e865fa..0000000000 --- a/tests/ui/ebooks/test_ebooks_utils.py +++ /dev/null @@ -1,36 +0,0 @@ -# -*- coding: utf-8 -*- -# -# RERO ILS -# Copyright (C) 2019 RERO -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -"""Test ebook utils.""" - -from rero_ils.modules.ebooks.utils import add_oai_source - - -def test_add_oai_source(app): - """Test add oai source.""" - msg = add_oai_source(name="test", baseurl="http://test.com") - assert msg == "Added" - msg = add_oai_source(name="test", baseurl="http://test.com") - assert msg == "Not Updated" - msg = add_oai_source( - name="test", - baseurl="http://test.com", - setspecs="specs", - comment="comment", - update=True, - ) - assert msg == "Updated" diff --git a/tests/unit/documents/test_documents_dojson_ebooks.py b/tests/unit/documents/test_documents_dojson_ebooks.py deleted file mode 100644 index 6e9d6b79b0..0000000000 --- a/tests/unit/documents/test_documents_dojson_ebooks.py +++ /dev/null @@ -1,597 +0,0 @@ -# -*- coding: utf-8 -*- -# -# RERO ILS -# Copyright (C) 2019 RERO -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -"""DOJSON transiformation for ebooks module tests.""" - -from __future__ import absolute_import, print_function - -from dojson.contrib.marc21.utils import create_record - -from rero_ils.modules.ebooks.dojson.contrib.marc21 import marc21 - - -def test_marc21_to_isbn_ebooks(): - """Test dojson isbn transformation.""" - marc21xml = """ - - - 9782812933868 - - - """ - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - assert data.get("identifiedBy") == [{"type": "bf:Isbn", "value": "9782812933868"}] - - marc21xml = """ - - - 9782812 - - - """ - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - assert data.get("identifiedBy") is None - - marc21xml = """ - - - feedhttps-www-feedbooks-com-book-414-epub - - - """ - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - assert not data.get("identifiedBy") - - -def test_marc21_to_languages_ebooks_from_008(): - """Test languages from field 008.""" - marc21xml = """ - - 00501naa a2200133 a 4500 - 160315s2015 cc ||| | ||||00| |fre d - - """ - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - assert data.get("language") == [{"type": "bf:Language", "value": "fre"}] - - -def test_marc21_to_languages_ebooks(): - """Test languages transformation. - - Test languages in multiples fields 041. - """ - marc21xml = """ - - - fre - - - eng - - - """ - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - assert data.get("language") == [{"type": "bf:Language", "value": "fre"}] - - -def test_marc21_to_type_ebooks(): - """Test Other Standard Identifier transformation.""" - marc21xml = """ - - - http://cantookstation.com/resources/1 - - - - cantook-EDEN496624 - - - """ - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - identifiers = data.get("identifiedBy", []) - assert identifiers[0] == {"type": "bf:Local", "value": "cantook-EDEN496624"} - - -def test_marc21_to_title(): - """Test title transformation.""" - marc21xml = """ - - - Elena et les joueuses - - - """ - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - assert data.get("title") == [ - {"mainTitle": [{"value": "Elena et les joueuses"}], "type": "bf:Title"} - ] - - -def test_marc21_to_extent(): - """Test extent transformation. - - Transformation of nb pages, volumes... field 300 $a. - """ - marc21xml = """ - - - 1234 - - - """ - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - assert data.get("extent") == "1234" - - -def test_marc21_to_description(): - """Test description transformation. - - 300 [$a repetitive]: extent, duration: - 300 [$a non repetitive]: colorContent, productionMethod, - illustrativeContent, note of type otherPhysicalDetails - 300 [$c rep - """ - marc21xml = """ - - - 116 p. - ill. - 22 cm - - - """ - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - assert data.get("extent") == "116 p." - - marc21xml = """ - - - 116 p. - ill. - 22 cm - 12 x 15 - - - 200 p. - ill. - 19 cm - - - """ - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - assert data.get("extent") == "116 p." - - marc21xml = """ - - - 116 p. - ill. - 22 cm - - - """ - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - assert data.get("extent") == "116 p." - - -def test_marc21_to_notes(): - """Test notes transformation. - - Transformation notes field 500 $a. - """ - - marc21xml = """ - - - note 1 - - - note 2 - - - """ - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - assert data.get("note") == [ - {"noteType": "general", "label": "note 1"}, - {"noteType": "general", "label": "note 2"}, - ] - - -def test_marc21_to_edition_statement_one_field_250(): - """Test dojson edition statement. - - 1 edition designation and 1 responsibility from field 250 - """ - marc21xml = """ - - - 2e ed. - avec un avant-propos par Jean Faret - - - """ - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - assert data.get("editionStatement") == [ - { - "editionDesignation": [{"value": "2e ed."}], - "responsibility": [{"value": "avec un avant-propos par Jean Faret"}], - } - ] - - -def test_marc21_to_provision_activity_ebooks_from_field_260(): - """Test provision activity Place and Date from field 260 transformation.""" - marc21xml = """ - - - Lausanne : - - [2006] - - - """ - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - assert data.get("provisionActivity") == [ - { - "type": "bf:Publication", - "statement": [ - {"label": [{"value": "Lausanne"}], "type": "bf:Place"}, - {"label": [{"value": "[2006]"}], "type": "Date"}, - ], - "startDate": 2006, - } - ] - - -# Copyright Date: [264 _4 $c non repetitive] -def test_marc21copyrightdate_ebooks_from_field_264_04(): - """Test dojson Copyright Date.""" - - marc21xml = """ - - - © 1971 - - - """ - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - assert data.get("copyrightDate") == ["© 1971"] - - marc21xml = """ - - - © 1971 [extra 1973] - - - """ - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - assert data.get("copyrightDate") == ["© 1971 [extra 1973]"] - - -def test_marc21_to_provision_activity_ebooks_from_field_264_1(): - """Test provision activity Place and Date from field 264_1 transform.""" - marc21xml = """ - - - Lausanne : - Payot, - [2006-2010] - - - """ - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - assert data.get("provisionActivity") == [ - { - "type": "bf:Publication", - "statement": [ - {"label": [{"value": "Lausanne"}], "type": "bf:Place"}, - {"label": [{"value": "Payot"}], "type": "bf:Agent"}, - {"label": [{"value": "[2006-2010]"}], "type": "Date"}, - ], - "startDate": 2006, - "endDate": 2010, - } - ] - - -def test_marc21_to_provision_activity_ebooks_from_field_264_2(): - """Test provision activity Place and Date from field 264_2 transform.""" - marc21xml = """ - - - Lausanne : - Payot, - [2006-2010] - - - """ - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - assert data.get("provisionActivity") == [ - { - "type": "bf:Distribution", - "statement": [ - {"label": [{"value": "Lausanne"}], "type": "bf:Place"}, - {"label": [{"value": "Payot"}], "type": "bf:Agent"}, - {"label": [{"value": "[2006-2010]"}], "type": "Date"}, - ], - } - ] - - -def test_marc21_to_subjects(): - """Test subjects transformation. - - Test subjects in field 653. - Checks applied: - - duplicates subjects removal - - generation of a list of all subjects. - """ - marc21xml = """ - - - Croissance personnelle - Self-Help - - - Santé - Health - - - Développement Personnel - Self-Help - - - """ - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - assert data.get("subjects") == [ - { - "entity": { - "authorized_access_point": "Croissance personnelle", - "type": "bf:Topic", - } - }, - {"entity": {"authorized_access_point": "Self-Help", "type": "bf:Topic"}}, - {"entity": {"authorized_access_point": "Santé", "type": "bf:Topic"}}, - {"entity": {"authorized_access_point": "Health", "type": "bf:Topic"}}, - { - "entity": { - "authorized_access_point": "Développement Personnel", - "type": "bf:Topic", - } - }, - {"entity": {"authorized_access_point": "Self-Help", "type": "bf:Topic"}}, - ] - - -def test_marc21_to_contribution(): - """Test contribution transformation. - - Test author in field 700 with first indicator = 0 - for Forename (name without comma separator). - """ - marc21xml = """ - - - Collectif - aut - - - """ - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - assert data.get("contribution") == [ - { - "entity": {"type": "bf:Person", "authorized_access_point": "Collectif"}, - "role": ["aut"], - } - ] - - marc21xml = """ - - - Jean-Paul - II - Pape - 1954- - aut - - - Dumont, Jean - Historien - 1921-2014 - edt - - - RERO - - - Biennale de céramique contemporaine - (17 : - 2003 : - Châteauroux) - - - """ - - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - contribution = data.get("contribution") - assert contribution == [ - { - "entity": { - "type": "bf:Person", - "authorized_access_point": "Jean-Paul II, Pape, 1954", - }, - "role": ["aut"], - }, - { - "entity": { - "authorized_access_point": "Dumont, Jean, 1921-2014, Historien", - "type": "bf:Person", - }, - "role": ["edt"], - }, - { - "entity": {"type": "bf:Organisation", "authorized_access_point": "RERO"}, - "role": ["ctb"], - }, - { - "entity": { - "type": "bf:Organisation", - "authorized_access_point": "Biennale de céramique contemporaine (17 : 2003 : " - "Châteauroux)", - }, - "role": ["aut"], - }, - ] - - -def test_marc21_to_contribution_and_translator(): - """Test contribution and translator transformation. - - Test author and translator in fields 700 with first indicator = 1 - for Surname (name with comma separator). - """ - marc21xml = """ - - - Peeters, Hagar - aut - - - Maufroy, Sandrine - trl - - - """ - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - assert data.get("contribution") == [ - { - "entity": { - "type": "bf:Person", - "authorized_access_point": "Peeters, Hagar", - }, - "role": ["aut"], - }, - { - "entity": { - "type": "bf:Person", - "authorized_access_point": "Maufroy, Sandrine", - }, - "role": ["trl"], - }, - ] - - -def test_marc21_electronicLocator_ebooks(): - """Harvested_resources tests.""" - marc21xml = """ - - - http://site1.org/resources/1 - ebibliomedia - - - http://site5.org/resources/1 - mv-cantook - - - Image de couverture - http://site2.org/resources/2 - - - Extrait - https://www.edenlivres.fr/p/172480 - - - """ - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - assert data.get("electronicLocator") == [ - { - "url": "http://site1.org/resources/1", - "type": "resource", - "source": "ebibliomedia", - }, - { - "url": "http://site5.org/resources/1", - "type": "resource", - "source": "mv-cantook", - }, - { - "url": "http://site2.org/resources/2", - "type": "relatedResource", - "content": "coverImage", - }, - ] - - -def test_marc21_cover_art_ebooks(): - """Cover art tests.""" - marc21xml = """ - - - Image de couverture - http://site2.org/resources/2 - - - test - http://site3.org/resources/2 - - - """ - marc21json = create_record(marc21xml) - data = marc21.do(marc21json) - assert data.get("electronicLocator") == [ - { - "url": "http://site2.org/resources/2", - "type": "relatedResource", - "content": "coverImage", - } - ]