Skip to content

Commit

Permalink
ebooks: Fix do_json languages
Browse files Browse the repository at this point in the history
Co-Authored-by: Peter Weber <peter.weber@rero.ch>
  • Loading branch information
rerowep committed Nov 18, 2024
1 parent 1fdc0fb commit 9d8e9b3
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 38 deletions.
12 changes: 8 additions & 4 deletions rero_ils/dojson/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1364,18 +1364,22 @@ def init_lang_from(fields_041, code):
self.langs_from_041_h = []
try:
self.lang_from_008 = self.field_008_data[35:38]
if self.lang_from_008 in [" ", "|||"]:
self.lang_from_008 = "und"
elif self.lang_from_008 not in _LANGUAGES:
if self.lang_from_008 not in _LANGUAGES:
error_print(
"WARNING NOT A LANGUAGE 008:",
self.bib_id,
self.rero_id,
f'"{self.lang_from_008}"',
)
self.lang_from_008 = "und"
except Exception:
self.lang_from_008 = "und"
error_print("WARNING: set 008 language to 'und'", self.bib_id, self.rero_id)
error_print(
"WARNING NOT A LANGUAGE 008:",
self.bib_id,
self.rero_id,
f'"{self.field_008_data}"',
)

fields_041 = self.get_fields(tag="041")
self.langs_from_041_a = init_lang_from(fields_041, code="a")
Expand Down
29 changes: 16 additions & 13 deletions rero_ils/modules/documents/dojson/contrib/marc21tojson/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,27 +477,30 @@ def do_language(data, marc21):
languages: 008 and 041 [$a, repetitive]
"""
language = data.get("language", [])
lang_codes = [v.get("value") for v in language]
languages = data.get("language", [])
lang_codes = [v.get("value") for v in languages]
if marc21.lang_from_008:
lang_value = marc21.lang_from_008
if lang_value != "|||" and lang_value not in lang_codes:
language.append({"value": lang_value, "type": "bf:Language"})
if lang_value in _LANGUAGES and lang_value not in lang_codes:
languages.append({"value": lang_value, "type": "bf:Language"})
lang_codes.append(marc21.lang_from_008)
for lang_value in marc21.langs_from_041_a:
if lang_value not in lang_codes:
language.append({"value": lang_value.strip(), "type": "bf:Language"})
if lang_value in _LANGUAGES and lang_value not in lang_codes:
languages.append({"value": lang_value.strip(), "type": "bf:Language"})
lang_codes.append(lang_value)
# language note
if fields_546 := marc21.get_fields(tag="546"):
subfields_546_a = marc21.get_subfields(fields_546[0], "a")
if subfields_546_a and language:
language[0]["note"] = subfields_546_a[0]

if not language:
error_print("ERROR LANGUAGE:", marc21.bib_id, f'f{language} set to "und"')
language = [{"value": "und", "type": "bf:Language"}]
return language or None
if subfields_546_a and languages:
languages[-1]["note"] = subfields_546_a[0]

if len(languages) > 1:
# clean "und" languages
languages = [language for language in languages if language["value"] != "und"]
if not languages:
error_print("ERROR NO LANGUAGE:", marc21.bib_id, 'set to "und"')
languages = [{"value": "und", "type": "bf:Language"}]
return languages or None


def do_abbreviated_title(data, marc21, key, value):
Expand Down
31 changes: 11 additions & 20 deletions rero_ils/modules/ebooks/dojson/contrib/marc21/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,17 @@ def marc21_to_language_from_008(self, key, value):
return do_language(self, marc21)


@marc21.over("language", "^041")
@utils.ignore_value
def marc21_to_language_from_041(self, key, value):
"""Get languages.
languages: 008 and 041 [$a, repetitive]
"""
# if we dont have languages from 008 try to set it with 041
return do_language(self, marc21)


@marc21.over("identifiedBy", "^020..")
@utils.ignore_value
def marc21_to_identifier_isbn(self, key, value):
Expand Down Expand Up @@ -98,26 +109,6 @@ def marc21_to_identifier_rero_id(self, key, value):
return identifiers


@marc21.over("language", "^041..")
@utils.ignore_value
def marc21_to_translated_from(self, key, value):
"""Get language.
languages: 008 and 041 [$a, repetitive]
"""
languages = self.get("language", [])
unique_lang = []
if languages != []:
unique_lang.extend(language["value"] for language in languages)
if language := value.get("a"):
for lang in utils.force_list(language):
if lang not in unique_lang:
unique_lang.append(lang)
languages.append({"type": "bf:Language", "value": lang})

return languages


@marc21.over("contribution", "(^100|^700|^710|^711)..")
@utils.for_each_value
@utils.ignore_value
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/documents/test_documents_dojson.py
Original file line number Diff line number Diff line change
Expand Up @@ -2484,7 +2484,7 @@ def test_marc21_to_provision_activity_exceptions(capsys):
assert out.strip().replace("\n", "") == (
'WARNING NOT A LANGUAGE 008:\t???\t???\t""\t'
"WARNING LANGUAGE SCRIPTS:"
'\t???\t???\tcyrl\t008:\t""\t041$a:\t[]\t041$h:\t[]'
'\t???\t???\tcyrl\t008:\t"und"\t041$a:\t[]\t041$h:\t[]'
)

marc21xml = """
Expand Down

0 comments on commit 9d8e9b3

Please sign in to comment.