Skip to content

Commit

Permalink
fix: added tests to crossref posted_content, funding
Browse files Browse the repository at this point in the history
 	modified:   adsingestp/parsers/crossref.py
 	modified:   tests/stubdata/input/crossref_preprint_10.1002-essoar.10508651.1.xml
 	modified:   tests/stubdata/input/crossref_preprint_10.1002-essoar.10511074.2.xml
 	modified:   tests/stubdata/input/crossref_preprint_10.31223-X55K7G.xml
 	modified:   tests/stubdata/input/crossref_preprint_10.31223-X5FW25.xml
 	modified:   tests/stubdata/input/crossref_preprint_10.31223-X5WD2C.xml
 	modified:   tests/stubdata/output/crossref_cn_10.1093=mnras=stac2975.json
 	modified:   tests/stubdata/output/crossref_cn_10.1093=pasj=psac053.json
 	new file:   tests/stubdata/output/crossref_preprint_10.1002-essoar.10508651.1.json
 	new file:   tests/stubdata/output/crossref_preprint_10.1002-essoar.10511074.2.json
 	new file:   tests/stubdata/output/crossref_preprint_10.31223-X55K7G.json
 	new file:   tests/stubdata/output/crossref_preprint_10.31223-X5FW25.json
 	new file:   tests/stubdata/output/crossref_preprint_10.31223-X5WD2C.json
 	modified:   tests/test_crossref.py
  • Loading branch information
seasidesparrow committed Jul 28, 2023
1 parent ccca151 commit 273ea4f
Show file tree
Hide file tree
Showing 14 changed files with 1,339 additions and 18 deletions.
33 changes: 22 additions & 11 deletions adsingestp/parsers/crossref.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,15 @@ def _get_isbn(self, isbns):

def _get_funding(self, fundgroups):
funding_arr = []
funding_text = None
for fg in fundgroups:
funder = {}
funder_name = fg.find("assertion", {"name": "funder_name"}).extract()
funder_award = fg.find("assertion", {"name": "award_number"}).extract()
funder_name = fg.find("assertion", {"name": "funder_name"})
funder_award = fg.find("assertion", {"name": "award_number"})
if funder_name:
funder_id = funder_name.find("assertion", {"name": "funder_identifier"}).extract()
funder_id = funder_name.find("assertion", {"name": "funder_identifier"})
if funder_id:
funder_id = funder_id.extract()
funder_name = funder_name.extract()
else:
funder_id = None

Expand All @@ -85,8 +87,8 @@ def _get_funding(self, fundgroups):
if funder_id:
funder.setdefault("agencyid", {"idvalue": funder_id.get_text().strip()})
if funder_award:
funder.setdefault("awardnumber", funder_award.get_text().strip())
funder.setdefault("awardnumber", funder_award.extract().get_text().strip())

if funder:
funding_arr.append(funder)

Expand Down Expand Up @@ -193,18 +195,27 @@ def _parse_posted_content(self):
if self.record_meta.find("institution"):
inst_name = None
if self.record_meta.find("institution").find("institution_name"):
inst_name = self.record_meta.find("institution").find("institution_name").get_text()
inst_name = (
self.record_meta.find("institution").find("institution_name").get_text()
)
if self.record_meta.find("institution").find("institution_acronym"):
if inst_name:
inst_name = inst_name + " (%s)" % self.record_meta.find("institution").find("institution_acronym").get_text()
inst_name = (
inst_name
+ " (%s)"
% self.record_meta.find("institution")
.find("institution_acronym")
.get_text()
)
else:
inst_name = self.record_meta.find("institution").find("institution_acronym").get_text()
inst_name = (
self.record_meta.find("institution").find("institution_acronym").get_text()
)
if inst_name:
self.base_metadata["publisher"] = inst_name
if self.record_meta.find("posted_date"):
pubdate = self._get_date(self.record_meta.find("posted_date"))
self.base_metadata["pubdate_electronic"] = pubdate


def _parse_title_abstract(self):
if self.record_meta.find("titles") and self.record_meta.find("titles").find("title"):
Expand Down Expand Up @@ -242,7 +253,7 @@ def _parse_contrib(self):

if c.find("ORCID"):
orcid = c.find("ORCID").get_text()
orcid = orcid.replace("http://orcid.org/", "")
orcid = orcid.replace("http://orcid.org/", "").replace("https://orcid.org/", "")
contrib_tmp["orcid"] = orcid

if c.find("affiliation"):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,4 +143,4 @@
</posted_content>
</crossref>
</doi_record>
</doi_records>
</doi_records>
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,4 @@
</posted_content>
</crossref>
</doi_record>
</doi_records>
</doi_records>
2 changes: 1 addition & 1 deletion tests/stubdata/input/crossref_preprint_10.31223-X55K7G.xml
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,4 @@
</posted_content>
</crossref>
</doi_record>
</doi_records>
</doi_records>
2 changes: 1 addition & 1 deletion tests/stubdata/input/crossref_preprint_10.31223-X5FW25.xml
Original file line number Diff line number Diff line change
Expand Up @@ -168,4 +168,4 @@
</posted_content>
</crossref>
</doi_record>
</doi_records>
</doi_records>
2 changes: 1 addition & 1 deletion tests/stubdata/input/crossref_preprint_10.31223-X5WD2C.xml
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,4 @@
</posted_content>
</crossref>
</doi_record>
</doi_records>
</doi_records>
252 changes: 251 additions & 1 deletion tests/stubdata/output/crossref_cn_10.1093=mnras=stac2975.json

Large diffs are not rendered by default.

208 changes: 207 additions & 1 deletion tests/stubdata/output/crossref_cn_10.1093=pasj=psac053.json

Large diffs are not rendered by default.

276 changes: 276 additions & 0 deletions tests/stubdata/output/crossref_preprint_10.1002-essoar.10508651.1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,276 @@
{
"recordData": {
"createdTime": "",
"parsedTime": "",
"loadType": "fromFile",
"loadFormat": "OtherXML",
"loadLocation": "",
"recordOrigin": ""
},
"pubDate": {
"electrDate": "2021-11-08"
},
"publication": {
"publisher": "Earth and Space Science Open Archive (ESSOAr)",
"pubYear": "2021"
},
"persistentIDs": [
{
"DOI": "10.1002/essoar.10508651.1"
}
],
"authors": [
{
"name": {
"surname": "Livi",
"given_name": "Roberto"
},
"affiliation": [
{
"affPubRaw": "University of California"
}
],
"attrib": {
"orcid": "0000-0002-0396-0547"
}
},
{
"name": {
"surname": "Larson",
"given_name": "Davin E"
},
"affiliation": [
{
"affPubRaw": "University of California"
}
]
},
{
"name": {
"surname": "Kasper",
"given_name": "Justin C"
},
"affiliation": [
{
"affPubRaw": "University of Michigan"
},
{
"affPubRaw": "Smithsonian Astrophysical Observatory"
}
]
},
{
"name": {
"surname": "Abiad",
"given_name": "Robert"
},
"affiliation": [
{
"affPubRaw": "University of California"
}
]
},
{
"name": {
"surname": "Case",
"given_name": "Anthony W"
},
"affiliation": [
{
"affPubRaw": "Smithsonian Astrophysical Observatory"
}
]
},
{
"name": {
"surname": "Klein",
"given_name": "Kristopher G"
},
"affiliation": [
{
"affPubRaw": "University of Michigan"
},
{
"affPubRaw": "University of Arizona"
}
]
},
{
"name": {
"surname": "Curtis",
"given_name": "David W"
},
"affiliation": [
{
"affPubRaw": "University of California"
}
]
},
{
"name": {
"surname": "Dalton",
"given_name": "Gregory"
},
"affiliation": [
{
"affPubRaw": "University of California"
}
]
},
{
"name": {
"surname": "Stevens",
"given_name": "Michael"
},
"affiliation": [
{
"affPubRaw": "Smithsonian Astrophysical Observatory"
}
]
},
{
"name": {
"surname": "Korreck",
"given_name": "Kelly E"
},
"affiliation": [
{
"affPubRaw": "Smithsonian Astrophysical Observatory"
}
]
},
{
"name": {
"surname": "Ho",
"given_name": "George"
},
"affiliation": [
{
"affPubRaw": "Applied Physics Laboratory, Johns Hopkins University"
}
]
},
{
"name": {
"surname": "Robinson",
"given_name": "Miles"
},
"affiliation": [
{
"affPubRaw": "University of California"
}
]
},
{
"name": {
"surname": "Tiu",
"given_name": "Chris"
},
"affiliation": [
{
"affPubRaw": "NASA"
}
]
},
{
"name": {
"surname": "Whittlesey",
"given_name": "Phyllis L"
},
"affiliation": [
{
"affPubRaw": "University of California"
}
]
},
{
"name": {
"surname": "Verniero",
"given_name": "J L"
},
"affiliation": [
{
"affPubRaw": "University of California"
}
]
},
{
"name": {
"surname": "Halekas",
"given_name": "Jasper"
},
"affiliation": [
{
"affPubRaw": "University of Iowa"
}
]
},
{
"name": {
"surname": "Mcfadden",
"given_name": "James"
},
"affiliation": [
{
"affPubRaw": "University of California"
}
]
},
{
"name": {
"surname": "Marckwordt",
"given_name": "Mario"
},
"affiliation": [
{
"affPubRaw": "University of California"
}
]
},
{
"name": {
"surname": "Slagle",
"given_name": "Amanda"
},
"affiliation": [
{
"affPubRaw": "University of California"
}
]
},
{
"name": {
"surname": "Abatcha",
"given_name": "Mamuda"
},
"affiliation": [
{
"affPubRaw": "University of California"
}
]
},
{
"name": {
"surname": "Rahmati",
"given_name": "Ali"
},
"affiliation": [
{
"affPubRaw": "University of California"
}
]
}
],
"title": {
"textEnglish": "The Solar Probe ANalyzer -Ions on Parker Solar Probe"
},
"funding": [
{
"agencyname": "National Aeronautics and Space Administration",
"agencyid": {
"idvalue": "100000104"
},
"awardnumber": "NNN06AA01C"
}
]
}
Loading

0 comments on commit 273ea4f

Please sign in to comment.