From 1d0ebe7be1243f5055a750e0f9525137361389a6 Mon Sep 17 00:00:00 2001 From: Matthew Templeton Date: Mon, 24 Jul 2023 20:28:12 -0400 Subject: [PATCH 1/6] fix: adding support for crossref preprints modified: adsingestp/parsers/base.py modified: adsingestp/parsers/crossref.py new file: tests/stubdata/input/crossref_preprint_10.1002-essoar.10508651.1.xml new file: tests/stubdata/input/crossref_preprint_10.1002-essoar.10511074.2.xml new file: tests/stubdata/input/crossref_preprint_10.31223-X55K7G.xml new file: tests/stubdata/input/crossref_preprint_10.31223-X5FW25.xml new file: tests/stubdata/input/crossref_preprint_10.31223-X5WD2C.xml --- adsingestp/parsers/base.py | 2 +- adsingestp/parsers/crossref.py | 76 +++++++- ...ref_preprint_10.1002-essoar.10508651.1.xml | 146 +++++++++++++++ ...ref_preprint_10.1002-essoar.10511074.2.xml | 93 ++++++++++ .../crossref_preprint_10.31223-X55K7G.xml | 76 ++++++++ .../crossref_preprint_10.31223-X5FW25.xml | 171 ++++++++++++++++++ .../crossref_preprint_10.31223-X5WD2C.xml | 57 ++++++ 7 files changed, 619 insertions(+), 2 deletions(-) create mode 100644 tests/stubdata/input/crossref_preprint_10.1002-essoar.10508651.1.xml create mode 100644 tests/stubdata/input/crossref_preprint_10.1002-essoar.10511074.2.xml create mode 100644 tests/stubdata/input/crossref_preprint_10.31223-X55K7G.xml create mode 100644 tests/stubdata/input/crossref_preprint_10.31223-X5FW25.xml create mode 100644 tests/stubdata/input/crossref_preprint_10.31223-X5WD2C.xml diff --git a/adsingestp/parsers/base.py b/adsingestp/parsers/base.py index 81ebe66..c5aa5f9 100644 --- a/adsingestp/parsers/base.py +++ b/adsingestp/parsers/base.py @@ -452,7 +452,7 @@ def format(self, input_dict, format): # output["pubnote"] = "XXX" # TODO need an example # - # output["funding"] = "XXX" # TODO need an example + output["funding"] = input_dict.get("funding", None) # # output["version"] = "XXX" # TODO need an example diff --git a/adsingestp/parsers/crossref.py b/adsingestp/parsers/crossref.py index 6edbf01..a309892 100644 --- a/adsingestp/parsers/crossref.py +++ b/adsingestp/parsers/crossref.py @@ -68,6 +68,49 @@ def _get_isbn(self, isbns): return isbns_out + def _get_funding(self, fundgroups): + funding = [] + funding_text = None + for fg in fundgroups: + funder = None + try: + name = fg.find("assertion", {"name": "funder_name"}).get_text() + except Exception as noop: + name = None + try: + awards = fg.find_all("assertion", {"name": "award_number"}) + award_list = [] + for a in awards: + award_list.append(a.get_text()) + award = ", ".join(award_list) + except Exception as noop: + award = None + + if name: + multiline = [] + for l in name.split('\n'): + if l.strip(): + multiline.append(l.strip()) + funder = ", ".join(multiline) + if award: + if funder: + funder = funder + ", Award(s): %s" % award + else: + funder = "Award(s): %s" % award + if funder: + funding.append(funder) + + funding_text = "; ".join(funding) + + return funding_text + + def _parse_funding(self): + fundgroups = self.record_meta.find_all("assertion", {"name": "fundgroup"}) + if not fundgroups: + print('wtf, why no data?') + funding = self._get_funding(fundgroups) + self.base_metadata["funding"] = funding + def _parse_pub(self): # journal articles only if self.input_metadata.find("journal") and self.input_metadata.find("journal").find( @@ -159,6 +202,23 @@ def _parse_book_series(self): self.base_metadata["series_id"] = series_meta.find("issn").get_text() self.base_metadata["series_id_description"] = "issn" + def _parse_posted_content(self): + if self.record_meta.find("institution"): + inst_name = None + if self.record_meta.find("institution").find("institution_name"): + inst_name = self.record_meta.find("institution").find("institution_name").get_text() + if self.record_meta.find("institution").find("institution_acronym"): + if inst_name: + inst_name = inst_name + " (%s)" % self.record_meta.find("institution").find("institution_acronym").get_text() + else: + inst_name = self.record_meta.find("institution").find("institution_acronym").get_text() + if inst_name: + self.base_metadata["publisher"] = inst_name + if self.record_meta.find("posted_date"): + pubdate = self._get_date(self.record_meta.find("posted_date")) + self.base_metadata["pubdate_electronic"] = pubdate + + def _parse_title_abstract(self): if self.record_meta.find("titles") and self.record_meta.find("titles").find("title"): self.base_metadata["title"] = self.record_meta.find("titles").find("title").get_text() @@ -350,10 +410,20 @@ def parse(self, text): self.record_meta = self.input_metadata.find("book_series_metadata").extract() else: self.record_meta = None + if self.input_metadata.find("posted_content"): + if type_found: + raise WrongSchemaException("Too many document types found in CrossRef record") + else: + type_found = True + self.record_type = "posted_content" + if self.input_metadata.find("posted_content"): + self.record_meta = self.input_metadata.find("posted_content").extract() + else: + self.record_meta = None if not type_found: raise WrongSchemaException( - "Didn't find allowed document type (article, conference, book) in CrossRef record" + "Didn't find allowed document type (article, conference, book, posted_content) in CrossRef record" ) elif not self.record_meta: raise WrongSchemaException( @@ -380,6 +450,10 @@ def parse(self, text): if self.record_meta.find("series_metadata"): self._parse_book_series() + if self.record_type == "posted_content": + self._parse_posted_content() + + self._parse_funding() self._parse_issue() self._parse_title_abstract() self._parse_contrib() diff --git a/tests/stubdata/input/crossref_preprint_10.1002-essoar.10508651.1.xml b/tests/stubdata/input/crossref_preprint_10.1002-essoar.10508651.1.xml new file mode 100644 index 0000000..24481e0 --- /dev/null +++ b/tests/stubdata/input/crossref_preprint_10.1002-essoar.10508651.1.xml @@ -0,0 +1,146 @@ + + + + + + Solar System Physics + + + Roberto + Livi + University of California + https://orcid.org/0000-0002-0396-0547 + + + Davin E + Larson + University of California + + + Justin C + Kasper + University of Michigan + Smithsonian Astrophysical Observatory + + + Robert + Abiad + University of California + + + Anthony W + Case + Smithsonian Astrophysical Observatory + + + Kristopher G + Klein + University of Michigan + University of Arizona + + + David W + Curtis + University of California + + + Gregory + Dalton + University of California + + + Michael + Stevens + Smithsonian Astrophysical Observatory + + + Kelly E + Korreck + Smithsonian Astrophysical Observatory + + + George + Ho + Applied Physics Laboratory, Johns Hopkins University + + + Miles + Robinson + University of California + + + Chris + Tiu + NASA + + + Phyllis L + Whittlesey + University of California + + + J L + Verniero + University of California + + + Jasper + Halekas + University of Iowa + + + James + Mcfadden + University of California + + + Mario + Marckwordt + University of California + + + Amanda + Slagle + University of California + + + Mamuda + Abatcha + University of California + + + Ali + Rahmati + University of California + + + + The Solar Probe ANalyzer -Ions on Parker Solar Probe + + + 11 + 08 + 2021 + + + Earth and Space Science Open Archive + ESSOAr + + + + + National Aeronautics and Space Administration + 100000104 + + NNN06AA01C + + + + 10.1002/essoar.10508651.1 + https://essopenarchive.org/doi/full/10.1002/essoar.10508651.1 + + + + + + \ No newline at end of file diff --git a/tests/stubdata/input/crossref_preprint_10.1002-essoar.10511074.2.xml b/tests/stubdata/input/crossref_preprint_10.1002-essoar.10511074.2.xml new file mode 100644 index 0000000..a47bdb6 --- /dev/null +++ b/tests/stubdata/input/crossref_preprint_10.1002-essoar.10511074.2.xml @@ -0,0 +1,93 @@ + + + + + + Planetology + + + Mélanie + Drilleau + 31400 Toulouse + Institut Supérieur de l’Aéronautique et de l’Espace ISAE-SUPAERO + 10 Avenue Edouard Belin + France + https://orcid.org/0000-0001-5625-9706 + + + Henri + Samuel + Institut de Physique du Globe de Paris, CNRS, Université de Paris, 1 rue Jussieu, 75005 Paris - France + + + Raphaël F. + Garcia + Institut Supérieur de l’Aéronautique et de l’Espace ISAE-SUPAERO, 10 Avenue Edouard Belin, 31400 Toulouse, France + + + Attilio + Rivoldini + Royal Observatory of Belgium, Brussels, Belgium + + + Clément + Perrin + Nantes Université, Université d’Angers, Le Mans Université, CNRS UMR 6112, Laboratoire de Planétologie et Géosciences, UAR 3281, Observatoire des Sciences de l’Univers de Nantes Atlantique, F-44000 Nantes, France + + + Chloé + Michaut + Université de Lyon, Ecole Normale Supérieure de Lyon, Université Claude Bernard Lyon 1, CNRS, Laboratoire de Géologie de Lyon : Terre, Planètes, Environnement, 69622 Villeurbanne, France + + + Mark + Wieczorek + Université Côte d’Azur, Observatoire de la Côte d’Azur, CNRS, Laboratoire Lagrange, France. + + + Benoît + Tauzin + Université de Lyon, Ecole Normale Supérieure de Lyon, Université Claude Bernard Lyon 1, CNRS, Laboratoire de Géologie de Lyon : Terre, Planètes, Environnement, 69622 Villeurbanne, France + + + James A. D. + Connolly + Institute of Geophysics, ETH Zurich, Sonneggstrasse 5, Zurich, Switzerland + + + Pauline + Meyer + Ecole et Observatoire des Sciences de la Terre, Université de Strasbourg, 5 rue René Descartes, 67084 Strasbourg, France + + + Philippe + Lognonné + Institut de Physique du Globe de Paris, CNRS, Université de Paris, 1 rue Jussieu, 75005 Paris - France + + + William B. + Banerdt + Jet Propulsion Laboratory, California Institute of Technology, 4800 Oak Grove Drive, Pasadena, CA 91109, USA + + + + Marsquake locations and 1-D seismic models for Mars from InSight data + + + 04 + 11 + 2022 + + + Earth and Space Science Open Archive + ESSOAr + + + 10.1002/essoar.10511074.2 + https://essopenarchive.org/doi/full/10.1002/essoar.10511074.2 + + + + + + \ No newline at end of file diff --git a/tests/stubdata/input/crossref_preprint_10.31223-X55K7G.xml b/tests/stubdata/input/crossref_preprint_10.31223-X55K7G.xml new file mode 100644 index 0000000..200badd --- /dev/null +++ b/tests/stubdata/input/crossref_preprint_10.31223-X55K7G.xml @@ -0,0 +1,76 @@ + + + + + + Computer Sciences + + + Jacob + Zwart + https://orcid.org/0000-0002-3870-405X + + + Samantha + Oliver + https://orcid.org/0000-0001-5668-1165 + + + William + Watkins + https://orcid.org/0000-0002-7544-0700 + + + Jeffrey + Sadler + https://orcid.org/0000-0001-8776-4844 + + + Alison + Appling + https://orcid.org/0000-0003-3638-8572 + + + Hayley + Corson-Dosch + https://orcid.org/0000-0001-8695-1584 + + + Xiaowei + Jia + https://orcid.org/0000-0001-8544-5233 + + + Vipin + Kumar + + + Jordan + Read + + + + Near-term forecasts of stream temperature using process-guided deep learning and data assimilation + + + 8 + 6 + 2021 + + + 8 + 6 + 2021 + + +

Near-term forecasts of environmental outcomes can inform real-time decision making. Data assimilation modeling techniques can be used for forecasts to leverage real-time data streams, where the difference between model predictions and observations can be used to adjust the model to make better predictions tomorrow. In this use case, we developed a process-guided deep learning and data assimilation approach to make 7-day forecasts of daily maximum water temperature in the Delaware River Basin. Our modeling system produced forecasts of daily maximum stream temperature with an average root mean squared error (RMSE) from 1.2 to 1.6°C for 1-day lead time across all sites. The data assimilation algorithm successfully adjusted the process-guided deep learning model states and marginally improved forecast performance when compared to forecasts produced using the process-guided deep learning model alone (7-13% lower RMSE with the data assimilation algorithm). Our model characterized forecast uncertainty relatively well as 57-80% of observations were within 90% forecast confidence intervals across all sites and lead times, and the uncertainty associated with our forecasts allow managers to anticipate probability of exceedances of ecologically relevant thresholds and aid in decisions about releasing reservoir water downstream. The flexibility of deep learning models to be applied to various prediction problems shows promise for using these types of models to forecast many other important environmental variables and aid in decision making.

+
+ + + 10.31223/X55K7G + http://eartharxiv.org/repository/view/2590/ + +
+
+
+
\ No newline at end of file diff --git a/tests/stubdata/input/crossref_preprint_10.31223-X5FW25.xml b/tests/stubdata/input/crossref_preprint_10.31223-X5FW25.xml new file mode 100644 index 0000000..007dfc8 --- /dev/null +++ b/tests/stubdata/input/crossref_preprint_10.31223-X5FW25.xml @@ -0,0 +1,171 @@ + + + + + + Physical Sciences and Mathematics + + + Graham + Shields + https://orcid.org/0000-0002-7828-3966 + + + Robin + Strachan + + + Susannah + Porter + + + Galen + Halverson + + + Francis + Macdonald + + + Kenneth + Plumb + + + Carlos + de Alvarenga + + + Dhiraj + Banerjee + + + Andrey + Bekker + + + Alexander + Brasier + + + Partha + Chakraborty + + + Kent + Condie + + + Kaushik + Das + + + Richard + Ernst + + + Anthony + Fallick + + + Hartwig + Frimmel + + + Reinhardt + Fuck + + + Paul + Hoffman + + + Balz + Kamber + + + Anton + Kuznetsov + + + Ross + Mitchell + + + Daniel + Poire + + + Simon + Poulton + + + Robert + Riding + + + Mukund + Sharma + + + Craig + Storey + + + Eva + Stueeken + + + Rosalie + Tostevin + + + Elizabeth + Turner + + + Shuhai + Xiao + + + Shuanhong + Zhang + + + Ying + Zhou + + + Maoyan + Zhu + + + + Towards a new geological time scale: A template for improved rock-based subdivision of pre-Cryogenian time + + + 1 + 31 + 2022 + + + 10 + 22 + 2020 + + +

Four first-order (Hadean, Archean, Proterozoic and Phanerozoic eon) and nine second-order (Paleoarchean, Mesoarchean, Neoarchean, Paleoproterozoic, Mesoproterozoic, Neoproterozoic, Paleozoic, Mesozoic and Cenozoic era) units continue to provide intuitive subdivision of geological time. Major transitions in Earth’s tectonic, biological and environmental history occurred at approximately 2.5-2.3, 1.8-1.6, 1.0-0.8 and 0.7-0.5 Ga, and so future rock-based subdivision of pre-Cryogenian time, eventually by use of global stratotypes (GSSPs), will likely require only modest deviation from current chronometric boundaries (GSSAs) at 2.5, 1.6 and 1.0 Ga, respectively. Here we argue that removal of GSSAs could be expedited by establishing event-based concepts and provisional, approximate ages for eon-, era- and period-level subdivisions as soon as practicable, in line with ratification of an Ediacaran GSSP in 2004 and chronostratigraphic definition of the Cryogenian Period at c. 720 Ma in 2012. We also outline the geological basis behind current chronometric divisions, explore how they might differ in any future rock-based scheme, identify where major issues might arise during the transition, and outline where some immediate changes to the present scheme could be easily updated/formalised, as a framework for future GSSP development. In line with these aims, we note that the currently recommended four-fold Archean subdivision has not been formally ratified and agree with previous workers that it could be simplified to an informal three-fold subdivision, pending more detailed analysis. Although the ages of period boundaries would inevitably change in a more closely rock-based or chronostratigraphic scheme, we support retention of all currently ratified period names. Existing period names, borrowed from the Greek, were chosen to delimit natural phenomena of global reach. Any new global nomenclature ought to follow this lead for consistency, and so we discourage the use of supercontinent names (e.g. Rodinian, Columbian) and regional phenomena, however exceptional. In this regard, we tentatively suggest that a new period (e.g. the ‘Kratian’), could precede the Tonian as the first period of the Neoproterozoic Era and we concur with previous authors that the existing Siderian Period (named for banded iron formations) would fit better as a chronostratigraphically defined period of the terminal Archean. Indeed, all pre-Cryogenian subdivisions will need more conceptual grounding in any future chronostratigraphic scheme. We conclude that improved rock-based division of the Proterozoic Eon would likely comprise a three-fold, period-level subdivision of the Paleoproterozoic Era (Oxygenian Rhyacian, Orosirian), a four-fold subdivision of the Mesoproterozoic Era (Statherian, Calymmian, Ectasian, Stenian) and potentially four-fold subdivision of the Neoproterozoic Era (pre-Tonian ‘Kratian’, Tonian, Cryogenian and Ediacaran). Future refinements towards an improved rock-based pre-Cryogenian geological time scale could be propoosed by new international bodies to cover the 1) pre-Ediacaran Neoproterozoic, 2) Mesoproterozoic, 3) Paleoproterozoic and 4) Archean (and Hadean) as few experts and disciplines can speak to the entire pre-Cryogenian rock record.

+
+ + + + https://doi.org/10.1144/jgs2020-222 + + + + 10.31223/X5FW25 + http://eartharxiv.org/repository/view/1712/ + +
+
+
+
\ No newline at end of file diff --git a/tests/stubdata/input/crossref_preprint_10.31223-X5WD2C.xml b/tests/stubdata/input/crossref_preprint_10.31223-X5WD2C.xml new file mode 100644 index 0000000..dcdb213 --- /dev/null +++ b/tests/stubdata/input/crossref_preprint_10.31223-X5WD2C.xml @@ -0,0 +1,57 @@ + + + + + + Earth Sciences + + + Toby + Halamka + + + Jonathan + Raberg + + + Jamie + McFarlin + + + Adam + Younkin + + + Christopher + Mulligan + + + Xiao-Lei + Liu + + + Sebastian + Kopf + + + + Production of diverse brGDGTs by Acidobacterium Solibacter usitatus in response to temperature, pH, and O2 provides a culturing perspective on brGDGT paleoproxies and biosynthesis + + + 4 + 17 + 2022 + + + 4 + 15 + 2022 + + + 10.31223/X5WD2C + http://eartharxiv.org/repository/view/3263/ + + + + + \ No newline at end of file From 91f38f7c5a8a7f416e6d81312c05f1d90ff2f4bd Mon Sep 17 00:00:00 2001 From: Matthew Templeton Date: Thu, 27 Jul 2023 14:59:20 -0400 Subject: [PATCH 2/6] fix: add posted_content to xref; capture funding info modified: adsingestp/parsers/base.py modified: adsingestp/parsers/crossref.py modified: pyproject.toml --- adsingestp/parsers/base.py | 2 +- adsingestp/parsers/crossref.py | 7 ++++--- pyproject.toml | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/adsingestp/parsers/base.py b/adsingestp/parsers/base.py index c5aa5f9..e932bab 100644 --- a/adsingestp/parsers/base.py +++ b/adsingestp/parsers/base.py @@ -452,7 +452,7 @@ def format(self, input_dict, format): # output["pubnote"] = "XXX" # TODO need an example # - output["funding"] = input_dict.get("funding", None) + output["funding"] = input_dict.get("funding", []) # # output["version"] = "XXX" # TODO need an example diff --git a/adsingestp/parsers/crossref.py b/adsingestp/parsers/crossref.py index a309892..67e446f 100644 --- a/adsingestp/parsers/crossref.py +++ b/adsingestp/parsers/crossref.py @@ -72,7 +72,8 @@ def _get_funding(self, fundgroups): funding = [] funding_text = None for fg in fundgroups: - funder = None + funder = {} + """ try: name = fg.find("assertion", {"name": "funder_name"}).get_text() except Exception as noop: @@ -99,10 +100,10 @@ def _get_funding(self, fundgroups): funder = "Award(s): %s" % award if funder: funding.append(funder) + """ - funding_text = "; ".join(funding) - return funding_text + return funding_arr def _parse_funding(self): fundgroups = self.record_meta.find_all("assertion", {"name": "fundgroup"}) diff --git a/pyproject.toml b/pyproject.toml index 1fa9543..91d9e5c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ dependencies = [ 'nameparser==1.1.1', 'ordered-set==4.1.0', 'python-dateutil==2.8.1', - 'adsingestschema @ git+https://github.com/adsabs/ingest_data_model@v1.0.8#egg=adsingestschema', + 'adsingestschema @ git+https://github.com/adsabs/ingest_data_model@v1.0.9#egg=adsingestschema', ] From 421c432a4b4949a93b6128ff4a8d2f2e5a5f6aee Mon Sep 17 00:00:00 2001 From: Matthew Templeton Date: Thu, 27 Jul 2023 15:29:22 -0400 Subject: [PATCH 3/6] modified: adsingestp/parsers/crossref.py --- adsingestp/parsers/crossref.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adsingestp/parsers/crossref.py b/adsingestp/parsers/crossref.py index 67e446f..b0f299f 100644 --- a/adsingestp/parsers/crossref.py +++ b/adsingestp/parsers/crossref.py @@ -69,7 +69,7 @@ def _get_isbn(self, isbns): return isbns_out def _get_funding(self, fundgroups): - funding = [] + funding_arr = [] funding_text = None for fg in fundgroups: funder = {} From 1d9ff4d54a7d2cc859c9b2c5bcc603924c58c944 Mon Sep 17 00:00:00 2001 From: Matthew Templeton Date: Fri, 28 Jul 2023 08:50:10 -0400 Subject: [PATCH 4/6] fix: Finished funding capture for crossref modified: adsingestp/parsers/crossref.py --- adsingestp/parsers/crossref.py | 50 ++++++++++++---------------------- 1 file changed, 18 insertions(+), 32 deletions(-) diff --git a/adsingestp/parsers/crossref.py b/adsingestp/parsers/crossref.py index b0f299f..e2d7f8e 100644 --- a/adsingestp/parsers/crossref.py +++ b/adsingestp/parsers/crossref.py @@ -73,44 +73,30 @@ def _get_funding(self, fundgroups): funding_text = None for fg in fundgroups: funder = {} - """ - try: - name = fg.find("assertion", {"name": "funder_name"}).get_text() - except Exception as noop: - name = None - try: - awards = fg.find_all("assertion", {"name": "award_number"}) - award_list = [] - for a in awards: - award_list.append(a.get_text()) - award = ", ".join(award_list) - except Exception as noop: - award = None - - if name: - multiline = [] - for l in name.split('\n'): - if l.strip(): - multiline.append(l.strip()) - funder = ", ".join(multiline) - if award: - if funder: - funder = funder + ", Award(s): %s" % award - else: - funder = "Award(s): %s" % award + funder_name = fg.find("assertion", {"name": "funder_name"}).extract() + funder_award = fg.find("assertion", {"name": "award_number"}).extract() + if funder_name: + funder_id = funder_name.find("assertion", {"name": "funder_identifier"}).extract() + else: + funder_id = None + + if funder_name: + funder.setdefault("agencyname", funder_name.get_text()) + if funder_id: + funder.setdefault("agencyid", {"idvalue": funder_id.get_text()}) + if funder_award: + funder.setdefault("awardnumber", funder_award.get_text()) + if funder: - funding.append(funder) - """ - + funding_arr.append(funder) return funding_arr def _parse_funding(self): fundgroups = self.record_meta.find_all("assertion", {"name": "fundgroup"}) - if not fundgroups: - print('wtf, why no data?') - funding = self._get_funding(fundgroups) - self.base_metadata["funding"] = funding + if fundgroups: + funding = self._get_funding(fundgroups) + self.base_metadata["funding"] = funding def _parse_pub(self): # journal articles only From ccca1510dbe59b0cd47912432b599c4b2fe868a0 Mon Sep 17 00:00:00 2001 From: Matthew Templeton Date: Fri, 28 Jul 2023 09:35:04 -0400 Subject: [PATCH 5/6] fix: apply .strip() to funding strings modified: adsingestp/parsers/crossref.py --- adsingestp/parsers/crossref.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/adsingestp/parsers/crossref.py b/adsingestp/parsers/crossref.py index e2d7f8e..a2350ba 100644 --- a/adsingestp/parsers/crossref.py +++ b/adsingestp/parsers/crossref.py @@ -81,11 +81,11 @@ def _get_funding(self, fundgroups): funder_id = None if funder_name: - funder.setdefault("agencyname", funder_name.get_text()) + funder.setdefault("agencyname", funder_name.get_text().strip()) if funder_id: - funder.setdefault("agencyid", {"idvalue": funder_id.get_text()}) + funder.setdefault("agencyid", {"idvalue": funder_id.get_text().strip()}) if funder_award: - funder.setdefault("awardnumber", funder_award.get_text()) + funder.setdefault("awardnumber", funder_award.get_text().strip()) if funder: funding_arr.append(funder) From 273ea4f01275bf48652bd43f9f4bd5d638dff8b2 Mon Sep 17 00:00:00 2001 From: Matthew Templeton Date: Fri, 28 Jul 2023 10:33:53 -0400 Subject: [PATCH 6/6] fix: added tests to crossref posted_content, funding modified: adsingestp/parsers/crossref.py modified: tests/stubdata/input/crossref_preprint_10.1002-essoar.10508651.1.xml modified: tests/stubdata/input/crossref_preprint_10.1002-essoar.10511074.2.xml modified: tests/stubdata/input/crossref_preprint_10.31223-X55K7G.xml modified: tests/stubdata/input/crossref_preprint_10.31223-X5FW25.xml modified: tests/stubdata/input/crossref_preprint_10.31223-X5WD2C.xml modified: tests/stubdata/output/crossref_cn_10.1093=mnras=stac2975.json modified: tests/stubdata/output/crossref_cn_10.1093=pasj=psac053.json new file: tests/stubdata/output/crossref_preprint_10.1002-essoar.10508651.1.json new file: tests/stubdata/output/crossref_preprint_10.1002-essoar.10511074.2.json new file: tests/stubdata/output/crossref_preprint_10.31223-X55K7G.json new file: tests/stubdata/output/crossref_preprint_10.31223-X5FW25.json new file: tests/stubdata/output/crossref_preprint_10.31223-X5WD2C.json modified: tests/test_crossref.py --- adsingestp/parsers/crossref.py | 33 ++- ...ref_preprint_10.1002-essoar.10508651.1.xml | 2 +- ...ref_preprint_10.1002-essoar.10511074.2.xml | 2 +- .../crossref_preprint_10.31223-X55K7G.xml | 2 +- .../crossref_preprint_10.31223-X5FW25.xml | 2 +- .../crossref_preprint_10.31223-X5WD2C.xml | 2 +- .../crossref_cn_10.1093=mnras=stac2975.json | 252 +++++++++++++++- .../crossref_cn_10.1093=pasj=psac053.json | 208 ++++++++++++- ...ef_preprint_10.1002-essoar.10508651.1.json | 276 ++++++++++++++++++ ...ef_preprint_10.1002-essoar.10511074.2.json | 171 +++++++++++ .../crossref_preprint_10.31223-X55K7G.json | 104 +++++++ .../crossref_preprint_10.31223-X5FW25.json | 230 +++++++++++++++ .../crossref_preprint_10.31223-X5WD2C.json | 68 +++++ tests/test_crossref.py | 5 + 14 files changed, 1339 insertions(+), 18 deletions(-) create mode 100644 tests/stubdata/output/crossref_preprint_10.1002-essoar.10508651.1.json create mode 100644 tests/stubdata/output/crossref_preprint_10.1002-essoar.10511074.2.json create mode 100644 tests/stubdata/output/crossref_preprint_10.31223-X55K7G.json create mode 100644 tests/stubdata/output/crossref_preprint_10.31223-X5FW25.json create mode 100644 tests/stubdata/output/crossref_preprint_10.31223-X5WD2C.json diff --git a/adsingestp/parsers/crossref.py b/adsingestp/parsers/crossref.py index a2350ba..375ca99 100644 --- a/adsingestp/parsers/crossref.py +++ b/adsingestp/parsers/crossref.py @@ -70,13 +70,15 @@ def _get_isbn(self, isbns): def _get_funding(self, fundgroups): funding_arr = [] - funding_text = None for fg in fundgroups: funder = {} - funder_name = fg.find("assertion", {"name": "funder_name"}).extract() - funder_award = fg.find("assertion", {"name": "award_number"}).extract() + funder_name = fg.find("assertion", {"name": "funder_name"}) + funder_award = fg.find("assertion", {"name": "award_number"}) if funder_name: - funder_id = funder_name.find("assertion", {"name": "funder_identifier"}).extract() + funder_id = funder_name.find("assertion", {"name": "funder_identifier"}) + if funder_id: + funder_id = funder_id.extract() + funder_name = funder_name.extract() else: funder_id = None @@ -85,8 +87,8 @@ def _get_funding(self, fundgroups): if funder_id: funder.setdefault("agencyid", {"idvalue": funder_id.get_text().strip()}) if funder_award: - funder.setdefault("awardnumber", funder_award.get_text().strip()) - + funder.setdefault("awardnumber", funder_award.extract().get_text().strip()) + if funder: funding_arr.append(funder) @@ -193,18 +195,27 @@ def _parse_posted_content(self): if self.record_meta.find("institution"): inst_name = None if self.record_meta.find("institution").find("institution_name"): - inst_name = self.record_meta.find("institution").find("institution_name").get_text() + inst_name = ( + self.record_meta.find("institution").find("institution_name").get_text() + ) if self.record_meta.find("institution").find("institution_acronym"): if inst_name: - inst_name = inst_name + " (%s)" % self.record_meta.find("institution").find("institution_acronym").get_text() + inst_name = ( + inst_name + + " (%s)" + % self.record_meta.find("institution") + .find("institution_acronym") + .get_text() + ) else: - inst_name = self.record_meta.find("institution").find("institution_acronym").get_text() + inst_name = ( + self.record_meta.find("institution").find("institution_acronym").get_text() + ) if inst_name: self.base_metadata["publisher"] = inst_name if self.record_meta.find("posted_date"): pubdate = self._get_date(self.record_meta.find("posted_date")) self.base_metadata["pubdate_electronic"] = pubdate - def _parse_title_abstract(self): if self.record_meta.find("titles") and self.record_meta.find("titles").find("title"): @@ -242,7 +253,7 @@ def _parse_contrib(self): if c.find("ORCID"): orcid = c.find("ORCID").get_text() - orcid = orcid.replace("http://orcid.org/", "") + orcid = orcid.replace("http://orcid.org/", "").replace("https://orcid.org/", "") contrib_tmp["orcid"] = orcid if c.find("affiliation"): diff --git a/tests/stubdata/input/crossref_preprint_10.1002-essoar.10508651.1.xml b/tests/stubdata/input/crossref_preprint_10.1002-essoar.10508651.1.xml index 24481e0..93b30b9 100644 --- a/tests/stubdata/input/crossref_preprint_10.1002-essoar.10508651.1.xml +++ b/tests/stubdata/input/crossref_preprint_10.1002-essoar.10508651.1.xml @@ -143,4 +143,4 @@ - \ No newline at end of file + diff --git a/tests/stubdata/input/crossref_preprint_10.1002-essoar.10511074.2.xml b/tests/stubdata/input/crossref_preprint_10.1002-essoar.10511074.2.xml index a47bdb6..e8540ac 100644 --- a/tests/stubdata/input/crossref_preprint_10.1002-essoar.10511074.2.xml +++ b/tests/stubdata/input/crossref_preprint_10.1002-essoar.10511074.2.xml @@ -90,4 +90,4 @@ - \ No newline at end of file + diff --git a/tests/stubdata/input/crossref_preprint_10.31223-X55K7G.xml b/tests/stubdata/input/crossref_preprint_10.31223-X55K7G.xml index 200badd..26d8b09 100644 --- a/tests/stubdata/input/crossref_preprint_10.31223-X55K7G.xml +++ b/tests/stubdata/input/crossref_preprint_10.31223-X55K7G.xml @@ -73,4 +73,4 @@ - \ No newline at end of file + diff --git a/tests/stubdata/input/crossref_preprint_10.31223-X5FW25.xml b/tests/stubdata/input/crossref_preprint_10.31223-X5FW25.xml index 007dfc8..68d094d 100644 --- a/tests/stubdata/input/crossref_preprint_10.31223-X5FW25.xml +++ b/tests/stubdata/input/crossref_preprint_10.31223-X5FW25.xml @@ -168,4 +168,4 @@ - \ No newline at end of file + diff --git a/tests/stubdata/input/crossref_preprint_10.31223-X5WD2C.xml b/tests/stubdata/input/crossref_preprint_10.31223-X5WD2C.xml index dcdb213..ce9fb7b 100644 --- a/tests/stubdata/input/crossref_preprint_10.31223-X5WD2C.xml +++ b/tests/stubdata/input/crossref_preprint_10.31223-X5WD2C.xml @@ -54,4 +54,4 @@ - \ No newline at end of file + diff --git a/tests/stubdata/output/crossref_cn_10.1093=mnras=stac2975.json b/tests/stubdata/output/crossref_cn_10.1093=mnras=stac2975.json index f1e7256..bf94087 100644 --- a/tests/stubdata/output/crossref_cn_10.1093=mnras=stac2975.json +++ b/tests/stubdata/output/crossref_cn_10.1093=mnras=stac2975.json @@ -1 +1,251 @@ -{"recordData": {"createdTime": "", "parsedTime": "", "loadType": "fromFile", "loadFormat": "OtherXML", "loadLocation": "", "recordOrigin": ""}, "pubDate": {"electrDate": "2022-09-20", "printDate": "2022-11-10"}, "publication": {"pubName": "Monthly Notices of the Royal Astronomical Society", "issueNum": "4", "volumeNum": "517", "pubYear": "2022", "ISSN": [{"pubtype": "print", "issnString": "0035-8711"}, {"pubtype": "electronic", "issnString": "1365-2966"}]}, "persistentIDs": [{"DOI": "10.1093/mnras/stac2975"}], "pagination": {"firstPage": "5496", "lastPage": "5523"}, "authors": [{"name": {"surname": "Yang", "given_name": "Haifeng"}}, {"name": {"surname": "Shi", "given_name": "Chenhui"}}, {"name": {"surname": "Cai", "given_name": "Jianghui"}, "attrib": {"orcid": "0000-0001-6945-8093"}}, {"name": {"surname": "Zhou", "given_name": "Lichan"}}, {"name": {"surname": "Yang", "given_name": "Yuqing"}}, {"name": {"surname": "Zhao", "given_name": "Xujun"}}, {"name": {"surname": "He", "given_name": "Yanting"}}, {"name": {"surname": "Hao", "given_name": "Jing"}}], "title": {"textEnglish": "Data mining techniques on astronomical spectra data \u2013 I. Clustering analysis"}, "abstract": {"textEnglish": "Clustering is an effective tool for astronomical spectral analysis, to mine clustering patterns among data. With the implementation of large sky surveys, many clustering methods have been applied to tackle spectroscopic and photometric data effectively and automatically. Meanwhile, the performance of clustering methods under different data characteristics varies greatly. With the aim of summarizing astronomical spectral clustering algorithms and laying the foundation for further research, this work gives a review of clustering methods applied to astronomical spectra data in three parts. First, many clustering methods for astronomical spectra are investigated and analysed theoretically, looking at algorithmic ideas, applications, and features. Secondly, experiments are carried out on unified datasets constructed using three criteria (spectra data type, spectra quality, and data volume) to compare the performance of typical algorithms; spectra data are selected from the Large Sky Area Multi-Object Fibre Spectroscopic Telescope (LAMOST) survey and Sloan Digital Sky Survey (SDSS). Finally, source codes of the comparison clustering algorithms and manuals for usage and improvement are provided on GitHub."}, "references": [" MNRAS Acuner 475 1708 2018 10.1093/mnras/stx3106 ", " MNRAS Armstrong 452 3159 2015 10.1093/mnras/stv1398 ", " International Encyclopedia of Education Baker 7 112 2010 10.1016/B978-0-08-044894-7.01318-X ", " A&A Balazs 311 145 1996 ", " Ap&SS Bazarghan 337 93 2011 10.1007/s10509-011-0822-7 ", " MNRAS Beck 457 362 2016 10.1093/mnras/stv2986 ", " Data Mining Techniques: For Marketing, Sales, and Customer Relationship Management Berry 3rd edn. 1997 ", " A&A Blanco-Cuaresma 577 A47 2015 10.1051/0004-6361/201425232 ", " ApJ Bu 817 78 2016 10.3847/0004-637X/817/1/78 ", " JA&A Cai 41 15 2020 10.1007/s12036-020-09634-x ", " ACM Trans. Knowl. Discov. Data Cai 16 1 2022 10.1145/3522592 ", " Spectroscopy and Spectral Analysis Cai-Xia 40 1304 2020 ", " Phys.\u00a0Rev.\u00a0D Carlson 88 043006 2013 10.1103/PhysRevD.88.043006 ", " A&A Castro-Ginard 661 A118 2022 10.1051/0004-6361/202142568 ", " MNRAS Chattopadhyay 469 3374 2017 10.1093/mnras/stx1024 ", " ApJ Chattopadhyay 750 91 2012 10.1088/0004-637X/750/2/91 ", " Res. Astron. Astrophys. Chen 18 073 2018 10.1088/1674-4527/18/6/73 ", " ApJ Chen 860 70 2018 10.3847/1538-4357/aac325 ", " Proc. 14th International Conference on Pattern Recognition Connell 182 1998 ", " Elect. J. Stat. Couillet 10 1393 2016 10.1214/16-EJS1144 ", " AJ Dehghan 147 52 2014 10.1088/0004-6256/147/3/52 ", " 10th International Congress on Image and Signal Processing, BioMedical Engineering and Informatics (CISP-BMEI) Deng 1 2017 ", " Proc. International Conference on Image Processing Dorai 340 1995 10.1109/ICIP.1995.538548 ", " 12th International Conference on Computational Intelligence and Security (CIS) Du 134 2016 10.1109/CIS.2016.0039 ", " MNRAS Duarte-Cabral 500 3027 2021 10.1093/mnras/staa2480 ", " Biological Sequence Analysis: Probabilistic Models of Proteins and Nucleic Acids Durbin 1998 10.1017/CBO9780511790492 ", " Social Network Analysis: An Introduction Everton 3 2012 10.1017/CBO9781139136877.003 ", " 2022 International Conference on Electrical, Computer and Energy Technologies (ICECET) Fielding 1 2022 10.1109/ICECET55527.2022.9872611 ", " Computer Vision: A Modern Approach Forsyth 2nd edn. 2011 ", " Environ. Plan. A Fotheringham 30 1905 1998 10.1068/a301905 ", " A&A Fraix-Burnet 545 A80 2012 10.1051/0004-6361/201218769 ", " Expert Syst. Appl. Fustes 40 1530 2013 10.1016/j.eswa.2012.08.069 ", " ApJ Gao 894 48 2020 10.3847/1538-4357/ab8560 ", " Res. Astron. Astrophys. Gao 14 159 2014 10.1088/1674-4527/14/2/004 ", " Res. Astron. Astrophys. Gao 15 2193 2015 10.1088/1674-4527/15/12/007 ", " A&A Garcia-Dias 612 A98 2018 10.1051/0004-6361/201732134 ", " A&A Garcia-Dias 629 A34 2019 10.1051/0004-6361/201935223 ", " IEEE Trans. Parallel Distrib. Syst. Gowanlock 2595 2017 10.1109/TPDS.2017.2675421 ", " in Proc. 1998 ACM SIGMOD International Conference on Management of Data (SIGMOD \u201998 Guha 73 1998 10.1145/276304.276312 ", " Nature Harris 585 357 2020 10.1038/s41586-020-2649-2 ", " MNRAS Hayes 494 4492 2020 10.1093/mnras/staa978 ", " ApJ Hogg 833 262 2016 10.3847/1538-4357/833/2/262 ", " Comput. Sci. Eng. Hunter 9 90 2007 10.1109/MCSE.2007.55 ", " A&A in\u00a0der\u00a0Au 547 A115 2012 10.1051/0004-6361/201219958 ", " MNRAS Iwasaki 488 4106 2019 10.1093/mnras/stz1990 ", " Jin 2022 ", " Biological Theory Kaplan 7 401 2013 10.1007/s13752-012-0048-0 ", " Computer Karypis 32 68 1999 10.1109/2.781637 ", " Ap&SS Kheirdastan 361 304 2016 10.1007/s10509-016-2880-3 ", " Proc. 38th Annual Hawaii International Conference on System Sciences Kiang 73b 2005 10.1109/HICSS.2005.590 ", " MNRAS Kiar 472 1074 2017 10.1093/mnras/stx2037 ", " Kuhn 2017 ", " Academic Press Library in Signal Processing, Vol. 1 Lam 1115 2014 10.1016/B978-0-12-396502-8.00020-6 ", " IEEE Access Li 7 74683 2019 10.1109/ACCESS.2019.2921320 ", " Expert Syst. Appl. Liang 193 116410 2022 10.1016/j.eswa.2021.116410 ", " A&A Logan 633 A154 2020 10.1051/0004-6361/201936648 ", " Proc. Conf. Ser. Vol. 5496, Advanced Software, Control, and Communication Systems for Astronomy Luo 756 2004 10.1117/12.548737 ", " Proc. Conf. Ser. Vol. 7019, Advanced Software and Control for Astronomy II Luo 701935 2008 10.1117/12.788251 ", " Setting the scene for Gaia and LAMOST, Vol. 298 Luo 428 2014 10.1017/S1743921313006947 ", " Res. Astron. Astrophys. Luo 15 1095 2015 10.1088/1674-4527/15/8/002 ", " 2017 International Conference on Computing Methodologies and Communication (ICCMC) Madhusudan 526 2017 10.1109/ICCMC.2017.8282521 ", " MNRAS Mahajan 478 4336 2018 10.1093/mnras/sty1370 ", " McInnes 2018 ", " IEEE Symposium Series on Computational Intelligence (SSCI) Mer\u00e9nyi 1 2016 10.1109/SSCI.2016.7849952 ", " A&A Meusinger 597 A134 2017 10.1051/0004-6361/201629139 ", " ApJ Morales-Luis 743 77 2011 10.1088/0004-637X/743/1/77 ", " MNRAS Mosby 447 1638 2015 10.1093/mnras/stu2531 ", " Handbook of Computational Statistics Ng 139 2012 10.1007/978-3-642-21551-3_6 ", " MNRAS Oliver 501 4420 2021 10.1093/mnras/staa3879 ", " J. Ecol. Openshaw 74 313 1985 10.2307/2260381 ", " Research Notes of the AAS Ordonez 6 90 2022 10.3847/2515-5172/ac6b41 ", " A&A Ordov\u00e1s-Pascual 565 A53 2014 10.1051/0004-6361/201423806 ", " Applied Soft Computing Ord\u00f3\u00f1ez 12 204 2012 10.1016/j.asoc.2011.08.052 ", " ApJ Panos 861 62 2018 10.3847/1538-4357/aac779 ", " MNRAS Price-Jones 487 871 2019 10.1093/mnras/stz1260 ", " MNRAS Price-Jones 496 5101 2020 10.1093/mnras/staa1905 ", " MNRAS Rahmani 478 4416 2018 10.1093/mnras/sty1291 ", " Mach. Learn. Rebbapragada 74 281 2008 10.1007/s10994-008-5093-3 ", " Encyclo. Biomet. Reynolds 741 659 2009 10.1007/978-0-387-73003-5_196 ", " Introduction to Recommender Systems Handbook Ricci 1 2011 10.1007/978-0-387-85820-3 ", " ApJ Rubin 828 111 2016 10.3847/0004-637x/828/2/111 ", " ApJ S\u00e1nchez\u00a0Almeida 763 50 2013 10.1088/0004-637X/763/1/50 ", " ApJ S\u00e1nchez\u00a0Almeida 532 1215 2000 10.1086/308603 ", " ApJ S\u00e1nchez\u00a0Almeida 714 487 2010 10.1088/0004-637X/714/1/487 ", " ApJ S\u00e1nchez\u00a0Almeida 756 163 2012 10.1088/0004-637X/756/2/163 ", " A&A Sans\u00a0Fuentes 599 A143 2017 10.1051/0004-6361/201629719 ", " MNRAS Sasdelli 461 2044 2016 10.1093/mnras/stw1228 ", " Neurocomputing Saxena 267 664 2017 10.1016/j.neucom.2017.06.053 ", " J. Astron. Instrumen. Seo 2050011 2020 10.1142/S2251171720500117 ", " MNRAS Shang 426 3435 2012 10.1111/j.1365-2966.2012.21897.x ", " AJ Shin 156 201 2018 10.3847/1538-3881/aae263 ", " 2nd International Conference on Image, Vision and Computing (ICIVC) Shuxin 1002 2017 10.1109/ICIVC.2017.7984705 ", " Proc. 2nd International Conference on Learning Analytics and Knowledge (LAK\u201912 Siemens 252 2012 10.1145/2330601.2330661 ", " MNRAS Simpson 427 1153 2012 10.1111/j.1365-2966.2012.22012.x ", " Computat. Geosci. Tahmasebi 16 779 2012 10.1007/s10596-012-9287-1 ", " MNRAS Tammour 459 1659 2016 10.1093/mnras/stw586 ", " A&A Tarricq 659 A59 2022 10.1051/0004-6361/202142186 ", " MNRAS T\u00f3th 486 4823 2019 10.1093/mnras/stz1188 ", " MNRAS Tramacere 463 2939 2016 10.1093/mnras/stw2103 ", " ApJS Traven 228 24 2017 10.3847/1538-4365/228/2/24 ", " J. Mach. Learn. Res. Van\u00a0der\u00a0Maaten 9 11 2008 ", " A&A Wagenveld 660 A22 2022 10.1051/0004-6361/202142445 ", " Proc. 2015 IEEE International Conference on Big Data (Big Data). BIG DATA\u201915 Wang 601 2015 10.1109/BigData.2015.7363804 ", " Distill Wattenberg 1 e2 2016 10.23915/distill.00002 ", " IEEE Access Wu 8 66475 2020 10.1109/ACCESS.2020.2983745 ", " Annals of Data Sci. Xu 2 165 2015 10.1007/s40745-015-0040-1 ", " Expert Syst. Appl. Yang 139 112846 2020 10.1016/j.eswa.2019.112846 ", " Expert Syst. Appl. Yang 201 117018 2022 10.1016/j.eswa.2022.117018 ", " Inf. Sci. Yang 596 414 2022 10.1016/j.ins.2022.03.027 ", " ApJ Yan 898 80 2020 10.3847/1538-4357/ab9f9c ", " A&A Zari 628 A123 2019 10.1051/0004-6361/201935781 ", " Proc. 1996 ACM SIGMOD International Conference on Management of Data (SIGMOD\u201996 Zhang 103 1996 10.1145/233269.233324 "]} +{ + "recordData": { + "createdTime": "", + "parsedTime": "", + "loadType": "fromFile", + "loadFormat": "OtherXML", + "loadLocation": "", + "recordOrigin": "" + }, + "pubDate": { + "electrDate": "2022-09-20", + "printDate": "2022-11-10" + }, + "publication": { + "pubName": "Monthly Notices of the Royal Astronomical Society", + "issueNum": "4", + "volumeNum": "517", + "pubYear": "2022", + "ISSN": [ + { + "pubtype": "print", + "issnString": "0035-8711" + }, + { + "pubtype": "electronic", + "issnString": "1365-2966" + } + ] + }, + "persistentIDs": [ + { + "DOI": "10.1093/mnras/stac2975" + } + ], + "pagination": { + "firstPage": "5496", + "lastPage": "5523" + }, + "authors": [ + { + "name": { + "surname": "Yang", + "given_name": "Haifeng" + } + }, + { + "name": { + "surname": "Shi", + "given_name": "Chenhui" + } + }, + { + "name": { + "surname": "Cai", + "given_name": "Jianghui" + }, + "attrib": { + "orcid": "0000-0001-6945-8093" + } + }, + { + "name": { + "surname": "Zhou", + "given_name": "Lichan" + } + }, + { + "name": { + "surname": "Yang", + "given_name": "Yuqing" + } + }, + { + "name": { + "surname": "Zhao", + "given_name": "Xujun" + } + }, + { + "name": { + "surname": "He", + "given_name": "Yanting" + } + }, + { + "name": { + "surname": "Hao", + "given_name": "Jing" + } + } + ], + "title": { + "textEnglish": "Data mining techniques on astronomical spectra data \u2013 I. Clustering analysis" + }, + "abstract": { + "textEnglish": "Clustering is an effective tool for astronomical spectral analysis, to mine clustering patterns among data. With the implementation of large sky surveys, many clustering methods have been applied to tackle spectroscopic and photometric data effectively and automatically. Meanwhile, the performance of clustering methods under different data characteristics varies greatly. With the aim of summarizing astronomical spectral clustering algorithms and laying the foundation for further research, this work gives a review of clustering methods applied to astronomical spectra data in three parts. First, many clustering methods for astronomical spectra are investigated and analysed theoretically, looking at algorithmic ideas, applications, and features. Secondly, experiments are carried out on unified datasets constructed using three criteria (spectra data type, spectra quality, and data volume) to compare the performance of typical algorithms; spectra data are selected from the Large Sky Area Multi-Object Fibre Spectroscopic Telescope (LAMOST) survey and Sloan Digital Sky Survey (SDSS). Finally, source codes of the comparison clustering algorithms and manuals for usage and improvement are provided on GitHub." + }, + "references": [ + " MNRAS Acuner 475 1708 2018 10.1093/mnras/stx3106 ", + " MNRAS Armstrong 452 3159 2015 10.1093/mnras/stv1398 ", + " International Encyclopedia of Education Baker 7 112 2010 10.1016/B978-0-08-044894-7.01318-X ", + " A&A Balazs 311 145 1996 ", + " Ap&SS Bazarghan 337 93 2011 10.1007/s10509-011-0822-7 ", + " MNRAS Beck 457 362 2016 10.1093/mnras/stv2986 ", + " Data Mining Techniques: For Marketing, Sales, and Customer Relationship Management Berry 3rd edn. 1997 ", + " A&A Blanco-Cuaresma 577 A47 2015 10.1051/0004-6361/201425232 ", + " ApJ Bu 817 78 2016 10.3847/0004-637X/817/1/78 ", + " JA&A Cai 41 15 2020 10.1007/s12036-020-09634-x ", + " ACM Trans. Knowl. Discov. Data Cai 16 1 2022 10.1145/3522592 ", + " Spectroscopy and Spectral Analysis Cai-Xia 40 1304 2020 ", + " Phys.\u00a0Rev.\u00a0D Carlson 88 043006 2013 10.1103/PhysRevD.88.043006 ", + " A&A Castro-Ginard 661 A118 2022 10.1051/0004-6361/202142568 ", + " MNRAS Chattopadhyay 469 3374 2017 10.1093/mnras/stx1024 ", + " ApJ Chattopadhyay 750 91 2012 10.1088/0004-637X/750/2/91 ", + " Res. Astron. Astrophys. Chen 18 073 2018 10.1088/1674-4527/18/6/73 ", + " ApJ Chen 860 70 2018 10.3847/1538-4357/aac325 ", + " Proc. 14th International Conference on Pattern Recognition Connell 182 1998 ", + " Elect. J. Stat. Couillet 10 1393 2016 10.1214/16-EJS1144 ", + " AJ Dehghan 147 52 2014 10.1088/0004-6256/147/3/52 ", + " 10th International Congress on Image and Signal Processing, BioMedical Engineering and Informatics (CISP-BMEI) Deng 1 2017 ", + " Proc. International Conference on Image Processing Dorai 340 1995 10.1109/ICIP.1995.538548 ", + " 12th International Conference on Computational Intelligence and Security (CIS) Du 134 2016 10.1109/CIS.2016.0039 ", + " MNRAS Duarte-Cabral 500 3027 2021 10.1093/mnras/staa2480 ", + " Biological Sequence Analysis: Probabilistic Models of Proteins and Nucleic Acids Durbin 1998 10.1017/CBO9780511790492 ", + " Social Network Analysis: An Introduction Everton 3 2012 10.1017/CBO9781139136877.003 ", + " 2022 International Conference on Electrical, Computer and Energy Technologies (ICECET) Fielding 1 2022 10.1109/ICECET55527.2022.9872611 ", + " Computer Vision: A Modern Approach Forsyth 2nd edn. 2011 ", + " Environ. Plan. A Fotheringham 30 1905 1998 10.1068/a301905 ", + " A&A Fraix-Burnet 545 A80 2012 10.1051/0004-6361/201218769 ", + " Expert Syst. Appl. Fustes 40 1530 2013 10.1016/j.eswa.2012.08.069 ", + " ApJ Gao 894 48 2020 10.3847/1538-4357/ab8560 ", + " Res. Astron. Astrophys. Gao 14 159 2014 10.1088/1674-4527/14/2/004 ", + " Res. Astron. Astrophys. Gao 15 2193 2015 10.1088/1674-4527/15/12/007 ", + " A&A Garcia-Dias 612 A98 2018 10.1051/0004-6361/201732134 ", + " A&A Garcia-Dias 629 A34 2019 10.1051/0004-6361/201935223 ", + " IEEE Trans. Parallel Distrib. Syst. Gowanlock 2595 2017 10.1109/TPDS.2017.2675421 ", + " in Proc. 1998 ACM SIGMOD International Conference on Management of Data (SIGMOD \u201998 Guha 73 1998 10.1145/276304.276312 ", + " Nature Harris 585 357 2020 10.1038/s41586-020-2649-2 ", + " MNRAS Hayes 494 4492 2020 10.1093/mnras/staa978 ", + " ApJ Hogg 833 262 2016 10.3847/1538-4357/833/2/262 ", + " Comput. Sci. Eng. Hunter 9 90 2007 10.1109/MCSE.2007.55 ", + " A&A in\u00a0der\u00a0Au 547 A115 2012 10.1051/0004-6361/201219958 ", + " MNRAS Iwasaki 488 4106 2019 10.1093/mnras/stz1990 ", + " Jin 2022 ", + " Biological Theory Kaplan 7 401 2013 10.1007/s13752-012-0048-0 ", + " Computer Karypis 32 68 1999 10.1109/2.781637 ", + " Ap&SS Kheirdastan 361 304 2016 10.1007/s10509-016-2880-3 ", + " Proc. 38th Annual Hawaii International Conference on System Sciences Kiang 73b 2005 10.1109/HICSS.2005.590 ", + " MNRAS Kiar 472 1074 2017 10.1093/mnras/stx2037 ", + " Kuhn 2017 ", + " Academic Press Library in Signal Processing, Vol. 1 Lam 1115 2014 10.1016/B978-0-12-396502-8.00020-6 ", + " IEEE Access Li 7 74683 2019 10.1109/ACCESS.2019.2921320 ", + " Expert Syst. Appl. Liang 193 116410 2022 10.1016/j.eswa.2021.116410 ", + " A&A Logan 633 A154 2020 10.1051/0004-6361/201936648 ", + " Proc. Conf. Ser. Vol. 5496, Advanced Software, Control, and Communication Systems for Astronomy Luo 756 2004 10.1117/12.548737 ", + " Proc. Conf. Ser. Vol. 7019, Advanced Software and Control for Astronomy II Luo 701935 2008 10.1117/12.788251 ", + " Setting the scene for Gaia and LAMOST, Vol. 298 Luo 428 2014 10.1017/S1743921313006947 ", + " Res. Astron. Astrophys. Luo 15 1095 2015 10.1088/1674-4527/15/8/002 ", + " 2017 International Conference on Computing Methodologies and Communication (ICCMC) Madhusudan 526 2017 10.1109/ICCMC.2017.8282521 ", + " MNRAS Mahajan 478 4336 2018 10.1093/mnras/sty1370 ", + " McInnes 2018 ", + " IEEE Symposium Series on Computational Intelligence (SSCI) Mer\u00e9nyi 1 2016 10.1109/SSCI.2016.7849952 ", + " A&A Meusinger 597 A134 2017 10.1051/0004-6361/201629139 ", + " ApJ Morales-Luis 743 77 2011 10.1088/0004-637X/743/1/77 ", + " MNRAS Mosby 447 1638 2015 10.1093/mnras/stu2531 ", + " Handbook of Computational Statistics Ng 139 2012 10.1007/978-3-642-21551-3_6 ", + " MNRAS Oliver 501 4420 2021 10.1093/mnras/staa3879 ", + " J. Ecol. Openshaw 74 313 1985 10.2307/2260381 ", + " Research Notes of the AAS Ordonez 6 90 2022 10.3847/2515-5172/ac6b41 ", + " A&A Ordov\u00e1s-Pascual 565 A53 2014 10.1051/0004-6361/201423806 ", + " Applied Soft Computing Ord\u00f3\u00f1ez 12 204 2012 10.1016/j.asoc.2011.08.052 ", + " ApJ Panos 861 62 2018 10.3847/1538-4357/aac779 ", + " MNRAS Price-Jones 487 871 2019 10.1093/mnras/stz1260 ", + " MNRAS Price-Jones 496 5101 2020 10.1093/mnras/staa1905 ", + " MNRAS Rahmani 478 4416 2018 10.1093/mnras/sty1291 ", + " Mach. Learn. Rebbapragada 74 281 2008 10.1007/s10994-008-5093-3 ", + " Encyclo. Biomet. Reynolds 741 659 2009 10.1007/978-0-387-73003-5_196 ", + " Introduction to Recommender Systems Handbook Ricci 1 2011 10.1007/978-0-387-85820-3 ", + " ApJ Rubin 828 111 2016 10.3847/0004-637x/828/2/111 ", + " ApJ S\u00e1nchez\u00a0Almeida 763 50 2013 10.1088/0004-637X/763/1/50 ", + " ApJ S\u00e1nchez\u00a0Almeida 532 1215 2000 10.1086/308603 ", + " ApJ S\u00e1nchez\u00a0Almeida 714 487 2010 10.1088/0004-637X/714/1/487 ", + " ApJ S\u00e1nchez\u00a0Almeida 756 163 2012 10.1088/0004-637X/756/2/163 ", + " A&A Sans\u00a0Fuentes 599 A143 2017 10.1051/0004-6361/201629719 ", + " MNRAS Sasdelli 461 2044 2016 10.1093/mnras/stw1228 ", + " Neurocomputing Saxena 267 664 2017 10.1016/j.neucom.2017.06.053 ", + " J. Astron. Instrumen. Seo 2050011 2020 10.1142/S2251171720500117 ", + " MNRAS Shang 426 3435 2012 10.1111/j.1365-2966.2012.21897.x ", + " AJ Shin 156 201 2018 10.3847/1538-3881/aae263 ", + " 2nd International Conference on Image, Vision and Computing (ICIVC) Shuxin 1002 2017 10.1109/ICIVC.2017.7984705 ", + " Proc. 2nd International Conference on Learning Analytics and Knowledge (LAK\u201912 Siemens 252 2012 10.1145/2330601.2330661 ", + " MNRAS Simpson 427 1153 2012 10.1111/j.1365-2966.2012.22012.x ", + " Computat. Geosci. Tahmasebi 16 779 2012 10.1007/s10596-012-9287-1 ", + " MNRAS Tammour 459 1659 2016 10.1093/mnras/stw586 ", + " A&A Tarricq 659 A59 2022 10.1051/0004-6361/202142186 ", + " MNRAS T\u00f3th 486 4823 2019 10.1093/mnras/stz1188 ", + " MNRAS Tramacere 463 2939 2016 10.1093/mnras/stw2103 ", + " ApJS Traven 228 24 2017 10.3847/1538-4365/228/2/24 ", + " J. Mach. Learn. Res. Van\u00a0der\u00a0Maaten 9 11 2008 ", + " A&A Wagenveld 660 A22 2022 10.1051/0004-6361/202142445 ", + " Proc. 2015 IEEE International Conference on Big Data (Big Data). BIG DATA\u201915 Wang 601 2015 10.1109/BigData.2015.7363804 ", + " Distill Wattenberg 1 e2 2016 10.23915/distill.00002 ", + " IEEE Access Wu 8 66475 2020 10.1109/ACCESS.2020.2983745 ", + " Annals of Data Sci. Xu 2 165 2015 10.1007/s40745-015-0040-1 ", + " Expert Syst. Appl. Yang 139 112846 2020 10.1016/j.eswa.2019.112846 ", + " Expert Syst. Appl. Yang 201 117018 2022 10.1016/j.eswa.2022.117018 ", + " Inf. Sci. Yang 596 414 2022 10.1016/j.ins.2022.03.027 ", + " ApJ Yan 898 80 2020 10.3847/1538-4357/ab9f9c ", + " A&A Zari 628 A123 2019 10.1051/0004-6361/201935781 ", + " Proc. 1996 ACM SIGMOD International Conference on Management of Data (SIGMOD\u201996 Zhang 103 1996 10.1145/233269.233324 " + ], + "funding": [ + { + "agencyname": "Chinese Academy of Sciences", + "agencyid": { + "idvalue": "http://dx.doi.org/10.13039/501100002367" + } + }, + { + "agencyname": "National Development and Reform Commission", + "agencyid": { + "idvalue": "http://dx.doi.org/10.13039/501100010453" + } + }, + { + "agencyname": "National Natural Science Foundation of China", + "agencyid": { + "idvalue": "http://dx.doi.org/10.13039/501100001809" + }, + "awardnumber": "U1931209" + }, + { + "agencyname": "Key Research and Development Project of Shanxi Province", + "agencyid": { + "idvalue": "http://dx.doi.org/10.13039/501100013317" + }, + "awardnumber": "201903D121116" + }, + { + "agencyname": "Science and Technology Development Fund", + "agencyid": { + "idvalue": "http://dx.doi.org/10.13039/501100003009" + }, + "awardnumber": "20201070" + }, + { + "agencyname": "Fundamental Research Program of Shanxi Province", + "awardnumber": "20210302123223" + } + ] +} diff --git a/tests/stubdata/output/crossref_cn_10.1093=pasj=psac053.json b/tests/stubdata/output/crossref_cn_10.1093=pasj=psac053.json index b4b2277..b3dd69f 100644 --- a/tests/stubdata/output/crossref_cn_10.1093=pasj=psac053.json +++ b/tests/stubdata/output/crossref_cn_10.1093=pasj=psac053.json @@ -1 +1,207 @@ -{"recordData": {"createdTime": "", "parsedTime": "", "loadType": "fromFile", "loadFormat": "OtherXML", "loadLocation": "", "recordOrigin": ""}, "pubDate": {"electrDate": "2022-07-19", "printDate": "2022-10-03"}, "publication": {"pubName": "Publications of the Astronomical Society of Japan", "issueNum": "5", "volumeNum": "74", "pubYear": "2022", "ISSN": [{"pubtype": "print", "issnString": "0004-6264"}, {"pubtype": "electronic", "issnString": "2053-051X"}]}, "persistentIDs": [{"DOI": "10.1093/pasj/psac053"}], "pagination": {"firstPage": "1022", "lastPage": "1040"}, "authors": [{"name": {"surname": "Shimoda", "given_name": "Jiro"}, "attrib": {"orcid": "0000-0003-3383-2279"}}, {"name": {"surname": "Ohira", "given_name": "Yutaka"}}, {"name": {"surname": "Bamba", "given_name": "Aya"}}, {"name": {"surname": "Terada", "given_name": "Yukikatsu"}}, {"name": {"surname": "Yamazaki", "given_name": "Ryo"}}, {"name": {"surname": "Inoue", "given_name": "Tsuyoshi"}}, {"name": {"surname": "Tanaka", "given_name": "Shuta J"}}], "title": {"textEnglish": "X-ray line diagnostics of ion temperature at cosmic ray accelerating collisionless shocks"}, "abstract": {"textEnglish": "A novel collisionless shock jump condition is suggested by modeling the entropy production at the shock transition region. We also calculate downstream developments of the atomic ionization balance and the ion temperature relaxation in supernova remnants (SNRs). The injection process and subsequent acceleration of cosmic rays (CRs) in the SNR shocks are closely related to the formation process of the collisionless shocks. The formation of the shock is caused by wave\u2013particle interactions. Since the wave\u2013particle interactions result in energy exchanges between electromagnetic fields and charged particles, the randomization of particles associated with the shock transition may occur at a rate given by the scalar product of the electric field and current. We find that order-of-magnitude estimates of the randomization with reasonable strength of the electromagnetic fields in the SNR constrain the amount of CR nuclei and the ion temperatures. The constrained amount of CR nuclei can be sufficient to explain the Galactic CRs. The ion temperature becomes significantly lower than that in the case without CRs. To distinguish the case without CRs, we perform synthetic observations of atomic line emissions from the downstream region of the SNR RCW 86. Future observations by XRISM and Athena can distinguish whether the SNR shock accelerates the CRs or not from the ion temperatures."}, "references": [" ApJ Abdo 736 131 2011 10.1088/0004-637X/736/2/131 ", " A&A Altun 474 1051 2007 10.1051/0004-6361:20078238 ", " A&AS Arnaud 60 425 1985 ", " MNRAS Arthur 414 1747 2011 10.1111/j.1365-2966.2011.18507.x ", " ARA&A Asplund 47 481 2009 10.1146/annurev.astro.46.060407.145222 ", " PASJ Bamba 52 1157 2000 10.1093/pasj/52.6.1157 ", " ApJ Bamba 621 793 2005 10.1086/427620 ", " Proc. SPIE10699, Space Telescopes and Instrumentation 2018: Ultraviolet to Gamma Ray Barret 106991G 2018 ", " Space Sci. Rev. Beck 99 243 2001 10.1023/A:1013805401252 ", " MNRAS Bell 182 147 1978 10.1093/mnras/182.2.147 ", " MNRAS Bell 353 550 2004 10.1111/j.1365-2966.2004.08097.x ", " ApJ Blandford 221 L29 1978 10.1086/182658 ", " ApJ Borkowski 550 334 2001 10.1086/319716 ", " MNRAS Broersen 441 3040 2014 10.1093/mnras/stu667 ", " ApJ Caprioli 905 2 2020 10.3847/1538-4357/abbe05 ", " ApJ Chevalier 235 186 1980 10.1086/157623 ", " Nature De Cia 597 206 2021 10.1038/s41586-021-03780-0 ", " ApJ Drury 248 344 1981 10.1086/159159 ", " ApJ Fukui 915 84 2021 10.3847/1538-4357/abff4a ", " MNRAS Girichidis 479 3042 2018 10.1093/mnras/sty1653 ", " A&AS Gronenschild 32 283 1978 ", " ApJ Hahn 788 46 2014 10.1088/0004-637X/788/1/46 ", " Science Helder 325 719 2009 10.1126/science.1173383 ", " MNRAS Helder 435 910 2013 10.1093/mnras/stt993 ", " MNRAS Hopkins 480 800 2018 10.1093/mnras/sty1690 ", " ApJ Hovey 809 119 2015 10.1088/0004-637X/809/2/119 ", " ApJ Hovey 862 148 2018 10.3847/1538-4357/aac94b ", " ApJ Hughes 543 L61 2000 10.1086/317102 ", " J. Low Temperature Phys. Ishisaki 193 991 2018 10.1007/s10909-018-1913-4 ", " ApJ Itoh 285 601 1984 10.1086/162535 ", " in Atomic and Plasma-Material Interaction Data for Fusion, Vol. 4 (Vienna: International Atomic Energy Agency) Janev 1993 ", " Phys. Scr. Kotelnikov 94 055403 2019 10.1088/1402-4896/ab060a ", " A&A Lagage 118 223 1983 ", " A&A Lagage 125 249 1983 ", " ApJ Laming 790 11 2014 10.1088/0004-637X/790/1/11 ", " J. Phys. Chemical Reference Data Lennon 17 1285 1988 10.1063/1.555809 ", " ApJ Lestinsky 698 648 2009 10.1088/0004-637X/698/1/648 ", " Proc. SPIE 11444, Space Telescopes and Instrumentation 2020: Ultraviolet to Gamma Ray Loewenstein 114445D 2020 ", " Living Rev. Comput. Astrophys. Marcowith 6 1 2020 10.1007/s41115-020-0007-6 ", " Phys. Rev. Lett. Matsumoto 119 105101 2017 10.1103/PhysRevLett.119.105101 ", " A&A Mewe 20 215 1972 ", " A&AS Mewe 65 511 1986 ", " A&AS Mewe 40 323 1980 ", " A&A Mewe 87 55 1980 ", " Nat. Astron. Miceli 3 236 2019 10.1038/s41550-018-0677-8 ", " Proc. SPIE 11444, Space Telescopes and Instrumentation 2020: Ultraviolet to Gamma Ray Miller 1144426 2020 ", " A&A Mitnik 425 1153 2004 10.1051/0004-6361:20041297 ", " A&A Morlino 557 A142 2013 10.1051/0004-6361/201322161 ", " A&A Morlino 562 A141 2014 10.1051/0004-6361/201322986 ", " ApJ Morlino 768 148 2013 10.1088/0004-637X/768/2/148 ", " J. Phys. B Murakami 39 2917 2006 10.1088/0953-4075/39/14/001 ", " ApJ Myers 225 380 1978 10.1086/156500 ", " ApJS Nahar 101 423 1995 10.1086/192248 ", " Phys. Rev. A Nahar 58 3766 1998 10.1103/PhysRevA.58.3766 ", " ApJS Nahar 126 537 2000 10.1086/313307 ", " ApJS Nahar 164 280 2006 10.1086/501503 ", " ApJS Nahar 111 339 1997 10.1086/313013 ", " A&AS Nahar 135 347 1999 10.1051/aas:1999447 ", " ApJS Nahar 133 255 2001 10.1086/319187 ", " Proc. SPIE 11444, Space Telescopes and Instrumentation 2020: Ultraviolet to Gamma Ray Nakajima 1144423 2020 ", " ApJ Novotn\u00fd 753 57 2012 10.1088/0004-637X/753/1/57 ", " Phys. Rev. Lett. Ohira 111 245002 2013 10.1103/PhysRevLett.111.245002 ", " ApJ Ohira 827 36 2016 10.3847/0004-637X/827/1/36 ", " ApJ Ohira 817 137 2016 10.3847/0004-637X/817/2/137 ", " ApJ Ohira 729 L13 2011 10.1088/2041-8205/729/1/L13 ", " A&A Ohira 513 A17 2010 10.1051/0004-6361/200913495 ", " ApJ Ohira 661 L171 2007 10.1086/518888 ", " ApJ Ohira 688 320 2008 10.1086/592182 ", " Astrophysics of Gaseous Nebulae and Active Galactic Nuclei Osterbrock 2006 ", " Proc. SPIE 11444, Space Telescopes and Instrumentation 2020: Ultraviolet to Gamma Ray Porter 1144424 2020 ", " ApJ Rakowski 684 348 2008 10.1086/590245 ", " ApJS Savin 138 337 2002 10.1086/323388 ", " Similarity and Dimensional Methods in Mechanics Sedov 1959 ", " MNRAS Shimoda 480 2200 2018 10.1093/mnras/sty2034 ", " ApJ Shimoda 803 98 2015 10.1088/0004-637X/803/2/98 ", " ApJ Shimoda 926 8 2022 10.3847/1538-4357/ac4110 ", " MNRAS Shimoda 485 5453 2019 10.1093/mnras/stz758 ", " MNRAS Shimoda 473 1394 2018 10.1093/mnras/stx2339 ", " Physics of Fully Ionized Gases Spitzer 2nd ed. 1962 ", " Proc. SPIE 11444, Space Telescopes and Instrumentation 2020: Ultraviolet to Gamma Ray Tashiro 1144422 2020 ", " J. Astron. Telesc. Instrum. Syst. Terada 7 037001 2021 10.1117/1.JATIS.7.3.037001 ", " ApJ Tsubone 835 34 2017 10.3847/1538-4357/835/1/34 ", " Nature Uchiyama 449 576 2007 10.1038/nature06210 ", " A&AR Vink 20 49 2012 10.1007/s00159-011-0049-1 ", " ApJ Vink 648 L33 2006 10.1086/507628 ", " ApJ Vink 584 758 2003 10.1086/345832 ", " ApJ Vink 780 125 2014 10.1088/0004-637X/780/2/125 ", " ApJ Vink 722 1727 2010 10.1088/0004-637X/722/2/1727 ", " ApJ Yamaguchi 820 L3 2016 10.3847/2041-8205/820/1/L3 ", " A&A Zatsarinny 447 379 2006 10.1051/0004-6361:20053737 ", " A&A Zatsarinny 412 587 2003 10.1051/0004-6361:20031462 ", " A&A Zatsarinny 417 1173 2004 10.1051/0004-6361:20034174 "]} +{ + "recordData": { + "createdTime": "", + "parsedTime": "", + "loadType": "fromFile", + "loadFormat": "OtherXML", + "loadLocation": "", + "recordOrigin": "" + }, + "pubDate": { + "electrDate": "2022-07-19", + "printDate": "2022-10-03" + }, + "publication": { + "pubName": "Publications of the Astronomical Society of Japan", + "issueNum": "5", + "volumeNum": "74", + "pubYear": "2022", + "ISSN": [ + { + "pubtype": "print", + "issnString": "0004-6264" + }, + { + "pubtype": "electronic", + "issnString": "2053-051X" + } + ] + }, + "persistentIDs": [ + { + "DOI": "10.1093/pasj/psac053" + } + ], + "pagination": { + "firstPage": "1022", + "lastPage": "1040" + }, + "authors": [ + { + "name": { + "surname": "Shimoda", + "given_name": "Jiro" + }, + "attrib": { + "orcid": "0000-0003-3383-2279" + } + }, + { + "name": { + "surname": "Ohira", + "given_name": "Yutaka" + } + }, + { + "name": { + "surname": "Bamba", + "given_name": "Aya" + } + }, + { + "name": { + "surname": "Terada", + "given_name": "Yukikatsu" + } + }, + { + "name": { + "surname": "Yamazaki", + "given_name": "Ryo" + } + }, + { + "name": { + "surname": "Inoue", + "given_name": "Tsuyoshi" + } + }, + { + "name": { + "surname": "Tanaka", + "given_name": "Shuta J" + } + } + ], + "title": { + "textEnglish": "X-ray line diagnostics of ion temperature at cosmic ray accelerating collisionless shocks" + }, + "abstract": { + "textEnglish": "A novel collisionless shock jump condition is suggested by modeling the entropy production at the shock transition region. We also calculate downstream developments of the atomic ionization balance and the ion temperature relaxation in supernova remnants (SNRs). The injection process and subsequent acceleration of cosmic rays (CRs) in the SNR shocks are closely related to the formation process of the collisionless shocks. The formation of the shock is caused by wave\u2013particle interactions. Since the wave\u2013particle interactions result in energy exchanges between electromagnetic fields and charged particles, the randomization of particles associated with the shock transition may occur at a rate given by the scalar product of the electric field and current. We find that order-of-magnitude estimates of the randomization with reasonable strength of the electromagnetic fields in the SNR constrain the amount of CR nuclei and the ion temperatures. The constrained amount of CR nuclei can be sufficient to explain the Galactic CRs. The ion temperature becomes significantly lower than that in the case without CRs. To distinguish the case without CRs, we perform synthetic observations of atomic line emissions from the downstream region of the SNR RCW 86. Future observations by XRISM and Athena can distinguish whether the SNR shock accelerates the CRs or not from the ion temperatures." + }, + "references": [ + " ApJ Abdo 736 131 2011 10.1088/0004-637X/736/2/131 ", + " A&A Altun 474 1051 2007 10.1051/0004-6361:20078238 ", + " A&AS Arnaud 60 425 1985 ", + " MNRAS Arthur 414 1747 2011 10.1111/j.1365-2966.2011.18507.x ", + " ARA&A Asplund 47 481 2009 10.1146/annurev.astro.46.060407.145222 ", + " PASJ Bamba 52 1157 2000 10.1093/pasj/52.6.1157 ", + " ApJ Bamba 621 793 2005 10.1086/427620 ", + " Proc. SPIE10699, Space Telescopes and Instrumentation 2018: Ultraviolet to Gamma Ray Barret 106991G 2018 ", + " Space Sci. Rev. Beck 99 243 2001 10.1023/A:1013805401252 ", + " MNRAS Bell 182 147 1978 10.1093/mnras/182.2.147 ", + " MNRAS Bell 353 550 2004 10.1111/j.1365-2966.2004.08097.x ", + " ApJ Blandford 221 L29 1978 10.1086/182658 ", + " ApJ Borkowski 550 334 2001 10.1086/319716 ", + " MNRAS Broersen 441 3040 2014 10.1093/mnras/stu667 ", + " ApJ Caprioli 905 2 2020 10.3847/1538-4357/abbe05 ", + " ApJ Chevalier 235 186 1980 10.1086/157623 ", + " Nature De Cia 597 206 2021 10.1038/s41586-021-03780-0 ", + " ApJ Drury 248 344 1981 10.1086/159159 ", + " ApJ Fukui 915 84 2021 10.3847/1538-4357/abff4a ", + " MNRAS Girichidis 479 3042 2018 10.1093/mnras/sty1653 ", + " A&AS Gronenschild 32 283 1978 ", + " ApJ Hahn 788 46 2014 10.1088/0004-637X/788/1/46 ", + " Science Helder 325 719 2009 10.1126/science.1173383 ", + " MNRAS Helder 435 910 2013 10.1093/mnras/stt993 ", + " MNRAS Hopkins 480 800 2018 10.1093/mnras/sty1690 ", + " ApJ Hovey 809 119 2015 10.1088/0004-637X/809/2/119 ", + " ApJ Hovey 862 148 2018 10.3847/1538-4357/aac94b ", + " ApJ Hughes 543 L61 2000 10.1086/317102 ", + " J. Low Temperature Phys. Ishisaki 193 991 2018 10.1007/s10909-018-1913-4 ", + " ApJ Itoh 285 601 1984 10.1086/162535 ", + " in Atomic and Plasma-Material Interaction Data for Fusion, Vol. 4 (Vienna: International Atomic Energy Agency) Janev 1993 ", + " Phys. Scr. Kotelnikov 94 055403 2019 10.1088/1402-4896/ab060a ", + " A&A Lagage 118 223 1983 ", + " A&A Lagage 125 249 1983 ", + " ApJ Laming 790 11 2014 10.1088/0004-637X/790/1/11 ", + " J. Phys. Chemical Reference Data Lennon 17 1285 1988 10.1063/1.555809 ", + " ApJ Lestinsky 698 648 2009 10.1088/0004-637X/698/1/648 ", + " Proc. SPIE 11444, Space Telescopes and Instrumentation 2020: Ultraviolet to Gamma Ray Loewenstein 114445D 2020 ", + " Living Rev. Comput. Astrophys. Marcowith 6 1 2020 10.1007/s41115-020-0007-6 ", + " Phys. Rev. Lett. Matsumoto 119 105101 2017 10.1103/PhysRevLett.119.105101 ", + " A&A Mewe 20 215 1972 ", + " A&AS Mewe 65 511 1986 ", + " A&AS Mewe 40 323 1980 ", + " A&A Mewe 87 55 1980 ", + " Nat. Astron. Miceli 3 236 2019 10.1038/s41550-018-0677-8 ", + " Proc. SPIE 11444, Space Telescopes and Instrumentation 2020: Ultraviolet to Gamma Ray Miller 1144426 2020 ", + " A&A Mitnik 425 1153 2004 10.1051/0004-6361:20041297 ", + " A&A Morlino 557 A142 2013 10.1051/0004-6361/201322161 ", + " A&A Morlino 562 A141 2014 10.1051/0004-6361/201322986 ", + " ApJ Morlino 768 148 2013 10.1088/0004-637X/768/2/148 ", + " J. Phys. B Murakami 39 2917 2006 10.1088/0953-4075/39/14/001 ", + " ApJ Myers 225 380 1978 10.1086/156500 ", + " ApJS Nahar 101 423 1995 10.1086/192248 ", + " Phys. Rev. A Nahar 58 3766 1998 10.1103/PhysRevA.58.3766 ", + " ApJS Nahar 126 537 2000 10.1086/313307 ", + " ApJS Nahar 164 280 2006 10.1086/501503 ", + " ApJS Nahar 111 339 1997 10.1086/313013 ", + " A&AS Nahar 135 347 1999 10.1051/aas:1999447 ", + " ApJS Nahar 133 255 2001 10.1086/319187 ", + " Proc. SPIE 11444, Space Telescopes and Instrumentation 2020: Ultraviolet to Gamma Ray Nakajima 1144423 2020 ", + " ApJ Novotn\u00fd 753 57 2012 10.1088/0004-637X/753/1/57 ", + " Phys. Rev. Lett. Ohira 111 245002 2013 10.1103/PhysRevLett.111.245002 ", + " ApJ Ohira 827 36 2016 10.3847/0004-637X/827/1/36 ", + " ApJ Ohira 817 137 2016 10.3847/0004-637X/817/2/137 ", + " ApJ Ohira 729 L13 2011 10.1088/2041-8205/729/1/L13 ", + " A&A Ohira 513 A17 2010 10.1051/0004-6361/200913495 ", + " ApJ Ohira 661 L171 2007 10.1086/518888 ", + " ApJ Ohira 688 320 2008 10.1086/592182 ", + " Astrophysics of Gaseous Nebulae and Active Galactic Nuclei Osterbrock 2006 ", + " Proc. SPIE 11444, Space Telescopes and Instrumentation 2020: Ultraviolet to Gamma Ray Porter 1144424 2020 ", + " ApJ Rakowski 684 348 2008 10.1086/590245 ", + " ApJS Savin 138 337 2002 10.1086/323388 ", + " Similarity and Dimensional Methods in Mechanics Sedov 1959 ", + " MNRAS Shimoda 480 2200 2018 10.1093/mnras/sty2034 ", + " ApJ Shimoda 803 98 2015 10.1088/0004-637X/803/2/98 ", + " ApJ Shimoda 926 8 2022 10.3847/1538-4357/ac4110 ", + " MNRAS Shimoda 485 5453 2019 10.1093/mnras/stz758 ", + " MNRAS Shimoda 473 1394 2018 10.1093/mnras/stx2339 ", + " Physics of Fully Ionized Gases Spitzer 2nd ed. 1962 ", + " Proc. SPIE 11444, Space Telescopes and Instrumentation 2020: Ultraviolet to Gamma Ray Tashiro 1144422 2020 ", + " J. Astron. Telesc. Instrum. Syst. Terada 7 037001 2021 10.1117/1.JATIS.7.3.037001 ", + " ApJ Tsubone 835 34 2017 10.3847/1538-4357/835/1/34 ", + " Nature Uchiyama 449 576 2007 10.1038/nature06210 ", + " A&AR Vink 20 49 2012 10.1007/s00159-011-0049-1 ", + " ApJ Vink 648 L33 2006 10.1086/507628 ", + " ApJ Vink 584 758 2003 10.1086/345832 ", + " ApJ Vink 780 125 2014 10.1088/0004-637X/780/2/125 ", + " ApJ Vink 722 1727 2010 10.1088/0004-637X/722/2/1727 ", + " ApJ Yamaguchi 820 L3 2016 10.3847/2041-8205/820/1/L3 ", + " A&A Zatsarinny 447 379 2006 10.1051/0004-6361:20053737 ", + " A&A Zatsarinny 412 587 2003 10.1051/0004-6361:20031462 ", + " A&A Zatsarinny 417 1173 2004 10.1051/0004-6361:20034174 " + ], + "funding": [ + { + "agencyname": "JSPS", + "agencyid": { + "idvalue": "http://dx.doi.org/10.13039/501100001691" + }, + "awardnumber": "20J01086" + }, + { + "agencyname": "MEXT", + "agencyid": { + "idvalue": "http://dx.doi.org/10.13039/501100001700" + } + }, + { + "agencyname": "Aoyama Gakuin University Research Institute", + "agencyid": { + "idvalue": "http://dx.doi.org/10.13039/501100004968" + } + } + ] +} diff --git a/tests/stubdata/output/crossref_preprint_10.1002-essoar.10508651.1.json b/tests/stubdata/output/crossref_preprint_10.1002-essoar.10508651.1.json new file mode 100644 index 0000000..cb80f12 --- /dev/null +++ b/tests/stubdata/output/crossref_preprint_10.1002-essoar.10508651.1.json @@ -0,0 +1,276 @@ +{ + "recordData": { + "createdTime": "", + "parsedTime": "", + "loadType": "fromFile", + "loadFormat": "OtherXML", + "loadLocation": "", + "recordOrigin": "" + }, + "pubDate": { + "electrDate": "2021-11-08" + }, + "publication": { + "publisher": "Earth and Space Science Open Archive (ESSOAr)", + "pubYear": "2021" + }, + "persistentIDs": [ + { + "DOI": "10.1002/essoar.10508651.1" + } + ], + "authors": [ + { + "name": { + "surname": "Livi", + "given_name": "Roberto" + }, + "affiliation": [ + { + "affPubRaw": "University of California" + } + ], + "attrib": { + "orcid": "0000-0002-0396-0547" + } + }, + { + "name": { + "surname": "Larson", + "given_name": "Davin E" + }, + "affiliation": [ + { + "affPubRaw": "University of California" + } + ] + }, + { + "name": { + "surname": "Kasper", + "given_name": "Justin C" + }, + "affiliation": [ + { + "affPubRaw": "University of Michigan" + }, + { + "affPubRaw": "Smithsonian Astrophysical Observatory" + } + ] + }, + { + "name": { + "surname": "Abiad", + "given_name": "Robert" + }, + "affiliation": [ + { + "affPubRaw": "University of California" + } + ] + }, + { + "name": { + "surname": "Case", + "given_name": "Anthony W" + }, + "affiliation": [ + { + "affPubRaw": "Smithsonian Astrophysical Observatory" + } + ] + }, + { + "name": { + "surname": "Klein", + "given_name": "Kristopher G" + }, + "affiliation": [ + { + "affPubRaw": "University of Michigan" + }, + { + "affPubRaw": "University of Arizona" + } + ] + }, + { + "name": { + "surname": "Curtis", + "given_name": "David W" + }, + "affiliation": [ + { + "affPubRaw": "University of California" + } + ] + }, + { + "name": { + "surname": "Dalton", + "given_name": "Gregory" + }, + "affiliation": [ + { + "affPubRaw": "University of California" + } + ] + }, + { + "name": { + "surname": "Stevens", + "given_name": "Michael" + }, + "affiliation": [ + { + "affPubRaw": "Smithsonian Astrophysical Observatory" + } + ] + }, + { + "name": { + "surname": "Korreck", + "given_name": "Kelly E" + }, + "affiliation": [ + { + "affPubRaw": "Smithsonian Astrophysical Observatory" + } + ] + }, + { + "name": { + "surname": "Ho", + "given_name": "George" + }, + "affiliation": [ + { + "affPubRaw": "Applied Physics Laboratory, Johns Hopkins University" + } + ] + }, + { + "name": { + "surname": "Robinson", + "given_name": "Miles" + }, + "affiliation": [ + { + "affPubRaw": "University of California" + } + ] + }, + { + "name": { + "surname": "Tiu", + "given_name": "Chris" + }, + "affiliation": [ + { + "affPubRaw": "NASA" + } + ] + }, + { + "name": { + "surname": "Whittlesey", + "given_name": "Phyllis L" + }, + "affiliation": [ + { + "affPubRaw": "University of California" + } + ] + }, + { + "name": { + "surname": "Verniero", + "given_name": "J L" + }, + "affiliation": [ + { + "affPubRaw": "University of California" + } + ] + }, + { + "name": { + "surname": "Halekas", + "given_name": "Jasper" + }, + "affiliation": [ + { + "affPubRaw": "University of Iowa" + } + ] + }, + { + "name": { + "surname": "Mcfadden", + "given_name": "James" + }, + "affiliation": [ + { + "affPubRaw": "University of California" + } + ] + }, + { + "name": { + "surname": "Marckwordt", + "given_name": "Mario" + }, + "affiliation": [ + { + "affPubRaw": "University of California" + } + ] + }, + { + "name": { + "surname": "Slagle", + "given_name": "Amanda" + }, + "affiliation": [ + { + "affPubRaw": "University of California" + } + ] + }, + { + "name": { + "surname": "Abatcha", + "given_name": "Mamuda" + }, + "affiliation": [ + { + "affPubRaw": "University of California" + } + ] + }, + { + "name": { + "surname": "Rahmati", + "given_name": "Ali" + }, + "affiliation": [ + { + "affPubRaw": "University of California" + } + ] + } + ], + "title": { + "textEnglish": "The Solar Probe ANalyzer -Ions on Parker Solar Probe" + }, + "funding": [ + { + "agencyname": "National Aeronautics and Space Administration", + "agencyid": { + "idvalue": "100000104" + }, + "awardnumber": "NNN06AA01C" + } + ] +} diff --git a/tests/stubdata/output/crossref_preprint_10.1002-essoar.10511074.2.json b/tests/stubdata/output/crossref_preprint_10.1002-essoar.10511074.2.json new file mode 100644 index 0000000..1ea1da9 --- /dev/null +++ b/tests/stubdata/output/crossref_preprint_10.1002-essoar.10511074.2.json @@ -0,0 +1,171 @@ +{ + "recordData": { + "createdTime": "", + "parsedTime": "", + "loadType": "fromFile", + "loadFormat": "OtherXML", + "loadLocation": "", + "recordOrigin": "" + }, + "pubDate": { + "electrDate": "2022-04-11" + }, + "publication": { + "publisher": "Earth and Space Science Open Archive (ESSOAr)", + "pubYear": "2022" + }, + "persistentIDs": [ + { + "DOI": "10.1002/essoar.10511074.2" + } + ], + "authors": [ + { + "name": { + "surname": "Drilleau", + "given_name": "M\u00e9lanie" + }, + "affiliation": [ + { + "affPubRaw": "31400 Toulouse" + }, + { + "affPubRaw": "Institut Sup\u00e9rieur de l\u2019A\u00e9ronautique et de l\u2019Espace ISAE-SUPAERO" + }, + { + "affPubRaw": "10 Avenue Edouard Belin" + }, + { + "affPubRaw": "France" + } + ], + "attrib": { + "orcid": "0000-0001-5625-9706" + } + }, + { + "name": { + "surname": "Samuel", + "given_name": "Henri" + }, + "affiliation": [ + { + "affPubRaw": "Institut de Physique du Globe de Paris, CNRS, Universit\u00e9 de Paris, 1 rue Jussieu, 75005 Paris - France" + } + ] + }, + { + "name": { + "surname": "Garcia", + "given_name": "Rapha\u00ebl F." + }, + "affiliation": [ + { + "affPubRaw": "Institut Sup\u00e9rieur de l\u2019A\u00e9ronautique et de l\u2019Espace ISAE-SUPAERO, 10 Avenue Edouard Belin, 31400 Toulouse, France" + } + ] + }, + { + "name": { + "surname": "Rivoldini", + "given_name": "Attilio" + }, + "affiliation": [ + { + "affPubRaw": "Royal Observatory of Belgium, Brussels, Belgium" + } + ] + }, + { + "name": { + "surname": "Perrin", + "given_name": "Cl\u00e9ment" + }, + "affiliation": [ + { + "affPubRaw": "Nantes Universit\u00e9, Universit\u00e9 d\u2019Angers, Le Mans Universit\u00e9, CNRS UMR 6112, Laboratoire de Plan\u00e9tologie et G\u00e9osciences, UAR 3281, Observatoire des Sciences de l\u2019Univers de Nantes Atlantique, F-44000 Nantes, France" + } + ] + }, + { + "name": { + "surname": "Michaut", + "given_name": "Chlo\u00e9" + }, + "affiliation": [ + { + "affPubRaw": "Universit\u00e9 de Lyon, Ecole Normale Sup\u00e9rieure de Lyon, Universit\u00e9 Claude Bernard Lyon 1, CNRS, Laboratoire de G\u00e9ologie de Lyon : Terre, Plan\u00e8tes, Environnement, 69622 Villeurbanne, France" + } + ] + }, + { + "name": { + "surname": "Wieczorek", + "given_name": "Mark" + }, + "affiliation": [ + { + "affPubRaw": "Universit\u00e9 C\u00f4te d\u2019Azur, Observatoire de la C\u00f4te d\u2019Azur, CNRS, Laboratoire Lagrange, France." + } + ] + }, + { + "name": { + "surname": "Tauzin", + "given_name": "Beno\u00eet" + }, + "affiliation": [ + { + "affPubRaw": "Universit\u00e9 de Lyon, Ecole Normale Sup\u00e9rieure de Lyon, Universit\u00e9 Claude Bernard Lyon 1, CNRS, Laboratoire de G\u00e9ologie de Lyon : Terre, Plan\u00e8tes, Environnement, 69622 Villeurbanne, France" + } + ] + }, + { + "name": { + "surname": "Connolly", + "given_name": "James A. D." + }, + "affiliation": [ + { + "affPubRaw": "Institute of Geophysics, ETH Zurich, Sonneggstrasse 5, Zurich, Switzerland" + } + ] + }, + { + "name": { + "surname": "Meyer", + "given_name": "Pauline" + }, + "affiliation": [ + { + "affPubRaw": "Ecole et Observatoire des Sciences de la Terre, Universit\u00e9 de Strasbourg, 5 rue Ren\u00e9 Descartes, 67084 Strasbourg, France" + } + ] + }, + { + "name": { + "surname": "Lognonn\u00e9", + "given_name": "Philippe" + }, + "affiliation": [ + { + "affPubRaw": "Institut de Physique du Globe de Paris, CNRS, Universit\u00e9 de Paris, 1 rue Jussieu, 75005 Paris - France" + } + ] + }, + { + "name": { + "surname": "Banerdt", + "given_name": "William B." + }, + "affiliation": [ + { + "affPubRaw": "Jet Propulsion Laboratory, California Institute of Technology, 4800 Oak Grove Drive, Pasadena, CA 91109, USA" + } + ] + } + ], + "title": { + "textEnglish": "Marsquake locations and 1-D seismic models for Mars from InSight data" + } +} diff --git a/tests/stubdata/output/crossref_preprint_10.31223-X55K7G.json b/tests/stubdata/output/crossref_preprint_10.31223-X55K7G.json new file mode 100644 index 0000000..e019000 --- /dev/null +++ b/tests/stubdata/output/crossref_preprint_10.31223-X55K7G.json @@ -0,0 +1,104 @@ +{ + "recordData": { + "createdTime": "", + "parsedTime": "", + "loadType": "fromFile", + "loadFormat": "OtherXML", + "loadLocation": "", + "recordOrigin": "" + }, + "pubDate": { + "electrDate": "2021-08-06" + }, + "publication": { + "pubYear": "2021" + }, + "persistentIDs": [ + { + "DOI": "10.31223/X55K7G" + } + ], + "authors": [ + { + "name": { + "surname": "Zwart", + "given_name": "Jacob" + }, + "attrib": { + "orcid": "0000-0002-3870-405X" + } + }, + { + "name": { + "surname": "Oliver", + "given_name": "Samantha" + }, + "attrib": { + "orcid": "0000-0001-5668-1165" + } + }, + { + "name": { + "surname": "Watkins", + "given_name": "William" + }, + "attrib": { + "orcid": "0000-0002-7544-0700" + } + }, + { + "name": { + "surname": "Sadler", + "given_name": "Jeffrey" + }, + "attrib": { + "orcid": "0000-0001-8776-4844" + } + }, + { + "name": { + "surname": "Appling", + "given_name": "Alison" + }, + "attrib": { + "orcid": "0000-0003-3638-8572" + } + }, + { + "name": { + "surname": "Corson-Dosch", + "given_name": "Hayley" + }, + "attrib": { + "orcid": "0000-0001-8695-1584" + } + }, + { + "name": { + "surname": "Jia", + "given_name": "Xiaowei" + }, + "attrib": { + "orcid": "0000-0001-8544-5233" + } + }, + { + "name": { + "surname": "Kumar", + "given_name": "Vipin" + } + }, + { + "name": { + "surname": "Read", + "given_name": "Jordan" + } + } + ], + "title": { + "textEnglish": "Near-term forecasts of stream temperature using process-guided deep learning and data assimilation" + }, + "abstract": { + "textEnglish": "Near-term forecasts of environmental outcomes can inform real-time decision making. Data assimilation modeling techniques can be used for forecasts to leverage real-time data streams, where the difference between model predictions and observations can be used to adjust the model to make better predictions tomorrow. In this use case, we developed a process-guided deep learning and data assimilation approach to make 7-day forecasts of daily maximum water temperature in the Delaware River Basin. Our modeling system produced forecasts of daily maximum stream temperature with an average root mean squared error (RMSE) from 1.2 to 1.6\u00b0C for 1-day lead time across all sites. The data assimilation algorithm successfully adjusted the process-guided deep learning model states and marginally improved forecast performance when compared to forecasts produced using the process-guided deep learning model alone (7-13% lower RMSE with the data assimilation algorithm). Our model characterized forecast uncertainty relatively well as 57-80% of observations were within 90% forecast confidence intervals across all sites and lead times, and the uncertainty associated with our forecasts allow managers to anticipate probability of exceedances of ecologically relevant thresholds and aid in decisions about releasing reservoir water downstream. The flexibility of deep learning models to be applied to various prediction problems shows promise for using these types of models to forecast many other important environmental variables and aid in decision making." + } +} diff --git a/tests/stubdata/output/crossref_preprint_10.31223-X5FW25.json b/tests/stubdata/output/crossref_preprint_10.31223-X5FW25.json new file mode 100644 index 0000000..b340f4f --- /dev/null +++ b/tests/stubdata/output/crossref_preprint_10.31223-X5FW25.json @@ -0,0 +1,230 @@ +{ + "recordData": { + "createdTime": "", + "parsedTime": "", + "loadType": "fromFile", + "loadFormat": "OtherXML", + "loadLocation": "", + "recordOrigin": "" + }, + "pubDate": { + "electrDate": "2022-01-31" + }, + "publication": { + "pubYear": "2022" + }, + "persistentIDs": [ + { + "DOI": "10.31223/X5FW25" + } + ], + "authors": [ + { + "name": { + "surname": "Shields", + "given_name": "Graham" + }, + "attrib": { + "orcid": "0000-0002-7828-3966" + } + }, + { + "name": { + "surname": "Strachan", + "given_name": "Robin" + } + }, + { + "name": { + "surname": "Porter", + "given_name": "Susannah" + } + }, + { + "name": { + "surname": "Halverson", + "given_name": "Galen" + } + }, + { + "name": { + "surname": "Macdonald", + "given_name": "Francis" + } + }, + { + "name": { + "surname": "Plumb", + "given_name": "Kenneth" + } + }, + { + "name": { + "surname": "de Alvarenga", + "given_name": "Carlos" + } + }, + { + "name": { + "surname": "Banerjee", + "given_name": "Dhiraj" + } + }, + { + "name": { + "surname": "Bekker", + "given_name": "Andrey" + } + }, + { + "name": { + "surname": "Brasier", + "given_name": "Alexander" + } + }, + { + "name": { + "surname": "Chakraborty", + "given_name": "Partha" + } + }, + { + "name": { + "surname": "Condie", + "given_name": "Kent" + } + }, + { + "name": { + "surname": "Das", + "given_name": "Kaushik" + } + }, + { + "name": { + "surname": "Ernst", + "given_name": "Richard" + } + }, + { + "name": { + "surname": "Fallick", + "given_name": "Anthony" + } + }, + { + "name": { + "surname": "Frimmel", + "given_name": "Hartwig" + } + }, + { + "name": { + "surname": "Fuck", + "given_name": "Reinhardt" + } + }, + { + "name": { + "surname": "Hoffman", + "given_name": "Paul" + } + }, + { + "name": { + "surname": "Kamber", + "given_name": "Balz" + } + }, + { + "name": { + "surname": "Kuznetsov", + "given_name": "Anton" + } + }, + { + "name": { + "surname": "Mitchell", + "given_name": "Ross" + } + }, + { + "name": { + "surname": "Poire", + "given_name": "Daniel" + } + }, + { + "name": { + "surname": "Poulton", + "given_name": "Simon" + } + }, + { + "name": { + "surname": "Riding", + "given_name": "Robert" + } + }, + { + "name": { + "surname": "Sharma", + "given_name": "Mukund" + } + }, + { + "name": { + "surname": "Storey", + "given_name": "Craig" + } + }, + { + "name": { + "surname": "Stueeken", + "given_name": "Eva" + } + }, + { + "name": { + "surname": "Tostevin", + "given_name": "Rosalie" + } + }, + { + "name": { + "surname": "Turner", + "given_name": "Elizabeth" + } + }, + { + "name": { + "surname": "Xiao", + "given_name": "Shuhai" + } + }, + { + "name": { + "surname": "Zhang", + "given_name": "Shuanhong" + } + }, + { + "name": { + "surname": "Zhou", + "given_name": "Ying" + } + }, + { + "name": { + "surname": "Zhu", + "given_name": "Maoyan" + } + } + ], + "title": { + "textEnglish": "Towards a new geological time scale: A template for improved rock-based subdivision of pre-Cryogenian time" + }, + "abstract": { + "textEnglish": "Four first-order (Hadean, Archean, Proterozoic and Phanerozoic eon) and nine second-order (Paleoarchean, Mesoarchean, Neoarchean, Paleoproterozoic, Mesoproterozoic, Neoproterozoic, Paleozoic, Mesozoic and Cenozoic era) units continue to provide intuitive subdivision of geological time. Major transitions in Earth\u2019s tectonic, biological and environmental history occurred at approximately 2.5-2.3, 1.8-1.6, 1.0-0.8 and 0.7-0.5 Ga, and so future rock-based subdivision of pre-Cryogenian time, eventually by use of global stratotypes (GSSPs), will likely require only modest deviation from current chronometric boundaries (GSSAs) at 2.5, 1.6 and 1.0 Ga, respectively. Here we argue that removal of GSSAs could be expedited by establishing event-based concepts and provisional, approximate ages for eon-, era- and period-level subdivisions as soon as practicable, in line with ratification of an Ediacaran GSSP in 2004 and chronostratigraphic definition of the Cryogenian Period at c. 720 Ma in 2012. We also outline the geological basis behind current chronometric divisions, explore how they might differ in any future rock-based scheme, identify where major issues might arise during the transition, and outline where some immediate changes to the present scheme could be easily updated/formalised, as a framework for future GSSP development. In line with these aims, we note that the currently recommended four-fold Archean subdivision has not been formally ratified and agree with previous workers that it could be simplified to an informal three-fold subdivision, pending more detailed analysis. Although the ages of period boundaries would inevitably change in a more closely rock-based or chronostratigraphic scheme, we support retention of all currently ratified period names. Existing period names, borrowed from the Greek, were chosen to delimit natural phenomena of global reach. Any new global nomenclature ought to follow this lead for consistency, and so we discourage the use of supercontinent names (e.g. Rodinian, Columbian) and regional phenomena, however exceptional. In this regard, we tentatively suggest that a new period (e.g. the \u2018Kratian\u2019), could precede the Tonian as the first period of the Neoproterozoic Era and we concur with previous authors that the existing Siderian Period (named for banded iron formations) would fit better as a chronostratigraphically defined period of the terminal Archean. Indeed, all pre-Cryogenian subdivisions will need more conceptual grounding in any future chronostratigraphic scheme. We conclude that improved rock-based division of the Proterozoic Eon would likely comprise a three-fold, period-level subdivision of the Paleoproterozoic Era (Oxygenian Rhyacian, Orosirian), a four-fold subdivision of the Mesoproterozoic Era (Statherian, Calymmian, Ectasian, Stenian) and potentially four-fold subdivision of the Neoproterozoic Era (pre-Tonian \u2018Kratian\u2019, Tonian, Cryogenian and Ediacaran). Future refinements towards an improved rock-based pre-Cryogenian geological time scale could be propoosed by new international bodies to cover the 1) pre-Ediacaran Neoproterozoic, 2) Mesoproterozoic, 3) Paleoproterozoic and 4) Archean (and Hadean) as few experts and disciplines can speak to the entire pre-Cryogenian rock record." + } +} diff --git a/tests/stubdata/output/crossref_preprint_10.31223-X5WD2C.json b/tests/stubdata/output/crossref_preprint_10.31223-X5WD2C.json new file mode 100644 index 0000000..9a191df --- /dev/null +++ b/tests/stubdata/output/crossref_preprint_10.31223-X5WD2C.json @@ -0,0 +1,68 @@ +{ + "recordData": { + "createdTime": "", + "parsedTime": "", + "loadType": "fromFile", + "loadFormat": "OtherXML", + "loadLocation": "", + "recordOrigin": "" + }, + "pubDate": { + "electrDate": "2022-04-17" + }, + "publication": { + "pubYear": "2022" + }, + "persistentIDs": [ + { + "DOI": "10.31223/X5WD2C" + } + ], + "authors": [ + { + "name": { + "surname": "Halamka", + "given_name": "Toby" + } + }, + { + "name": { + "surname": "Raberg", + "given_name": "Jonathan" + } + }, + { + "name": { + "surname": "McFarlin", + "given_name": "Jamie" + } + }, + { + "name": { + "surname": "Younkin", + "given_name": "Adam" + } + }, + { + "name": { + "surname": "Mulligan", + "given_name": "Christopher" + } + }, + { + "name": { + "surname": "Liu", + "given_name": "Xiao-Lei" + } + }, + { + "name": { + "surname": "Kopf", + "given_name": "Sebastian" + } + } + ], + "title": { + "textEnglish": "Production of diverse brGDGTs by Acidobacterium Solibacter usitatus in response to temperature, pH, and O2 provides a culturing perspective on brGDGT paleoproxies and biosynthesis" + } +} diff --git a/tests/test_crossref.py b/tests/test_crossref.py index b68981c..2fcbd62 100644 --- a/tests/test_crossref.py +++ b/tests/test_crossref.py @@ -33,6 +33,11 @@ def test_crossref(self): "crossref_cn_10.3847=1538-4357=ac8c2f", "crossref_10.1146_annurev.energy.25.1.441", "crossref_10.3137_a0410105", + "crossref_preprint_10.1002-essoar.10508651.1", + "crossref_preprint_10.1002-essoar.10511074.2", + "crossref_preprint_10.31223-X55K7G", + "crossref_preprint_10.31223-X5FW25", + "crossref_preprint_10.31223-X5WD2C", ] for f in filenames: test_infile = os.path.join(self.inputdir, f + ".xml")