Skip to content

Commit

Permalink
Merge pull request #52 from seasidesparrow/fix_51_2023Jul10
Browse files Browse the repository at this point in the history
Fixes issues #50 and #51
  • Loading branch information
seasidesparrow authored Jul 13, 2023
2 parents 714b9a8 + deb72c7 commit ad58eb1
Show file tree
Hide file tree
Showing 6 changed files with 1,407 additions and 9 deletions.
25 changes: 16 additions & 9 deletions adsingestp/parsers/jats.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,11 +336,12 @@ def parse(self, article_metadata):
# check and see if the publisher defined an email tag inside an affil (like IOP does)
nested_email_list = aff.find_all("ext-link")
for e in nested_email_list:
key = e["id"]
value = e.text
# build the cross-reference dictionary to be used later
self.email_xref[key] = value
e.decompose()
if e.get("ext-link-type", None) == "email":
key = e["id"]
value = e.text
# build the cross-reference dictionary to be used later
self.email_xref[key] = value
e.decompose()

key = aff.get("id", default_key)
# special case: get rid of <sup>...
Expand Down Expand Up @@ -542,10 +543,16 @@ def _parse_title_abstract(self):
self.base_metadata["title"] = self._detag(title, self.JATS_TAGSET["title"]).strip()

if self.article_meta.find("abstract") and self.article_meta.find("abstract").find("p"):
abstract = self._detag(
self.article_meta.find("abstract").find("p"), self.JATS_TAGSET["abstract"]
)
self.base_metadata["abstract"] = abstract
abstract_all = self.article_meta.find("abstract").find_all("p")
abstract_paragraph_list = list()
for paragraph in abstract_all:
para = self._detag(paragraph, self.JATS_TAGSET["abstract"])
abstract_paragraph_list.append(para)
self.base_metadata["abstract"] = "\n".join(abstract_paragraph_list)
# abstract = self._detag(
# self.article_meta.find("abstract").find("p"), self.JATS_TAGSET["abstract"]
# )
# self.base_metadata["abstract"] = abstract
if title_fn_list:
self.base_metadata["abstract"] += " " + " ".join(title_fn_list)

Expand Down
2 changes: 2 additions & 0 deletions tests/stubdata/input/jats_a+a_multiparagraph_abstract.xml

Large diffs are not rendered by default.

708 changes: 708 additions & 0 deletions tests/stubdata/input/jats_iop_apj_923_1_47.xml

Large diffs are not rendered by default.

Loading

0 comments on commit ad58eb1

Please sign in to comment.