Skip to content

Commit

Permalink
v1.1.5 Fix orcid plugin attributeerror bug (#198)
Browse files Browse the repository at this point in the history
- various cite process tweaks
- add traceback logging of running plugins
- change variable names for clarity and consistency
- add generic "safe get" function as suggested by Faisal last pr
- use generic safe get in place all of all gets. helps fix bug due to
orcid api sometimes returning none types for certain fields, and other future bugs.
  • Loading branch information
vincerubinetti authored May 19, 2023
1 parent 1092da3 commit 4914988
Show file tree
Hide file tree
Showing 8 changed files with 94 additions and 62 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

Reference: common-changelog.org

## 1.1.5 - 2023-05-19

### Changes

- Fix ORCID plugin bug and other cite process tweaks.

## 1.1.4 - 2023-04-28

### Changed
Expand Down
4 changes: 2 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# citation metadata for the template itself

title: "Lab Website Template"
version: 1.1.4
date-released: 2023-04-28
version: 1.1.5
date-released: 2023-05-19
url: "https://github.com/greenelab/lab-website-template"
authors:
- family-names: "Rubinetti"
Expand Down
Binary file modified _cite/.cache/cache.db
Binary file not shown.
38 changes: 24 additions & 14 deletions _cite/cite.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
cite process to convert sources and metasources into full citations
"""

import traceback
from importlib import import_module
from pathlib import Path
from dotenv import load_dotenv
Expand All @@ -23,7 +24,7 @@

log("Compiling sources")

# master list of sources
# compiled list of sources
sources = []

# in-order list of plugins to run
Expand Down Expand Up @@ -63,37 +64,46 @@

# run plugin on data entry to expand into multiple sources
try:
entry = import_module(f"plugins.{plugin.stem}").main(entry)
expanded = import_module(f"plugins.{plugin.stem}").main(entry)
# check that plugin returned correct format
if not list_of_dicts(entry):
if not list_of_dicts(expanded):
raise Exception("Plugin didn't return list of dicts")
# catch any plugin error
except Exception as e:
# log detailed pre-formatted/colored trace
print(traceback.format_exc())
# log high-level error
log(e, 3, "ERROR")
error = True
continue

# loop through sources
for source in entry:
for source in expanded:
if plugin.stem != "sources":
log(label(source), 3)

# include meta info about source
source["plugin"] = plugin.name
source["file"] = file.name
# add source to master list

# add source to compiled list
sources.append(source)

if plugin.stem != "sources":
log(f"{len(entry)} source(s)", 3)
log(f"{len(expanded)} source(s)", 3)


log("Merging sources by id")

# merge sources with matching (non-blank) ids
for a in range(0, len(sources)):
_id = sources[a].get("id", "")
if not _id:
a_id = get_safe(sources, f"{a}.id", "")
if not a_id:
continue
for b in range(a + 1, len(sources)):
if sources[b].get("id", "") == _id:
b_id = get_safe(sources, f"{b}.id", "")
if b_id == a_id:
log(f"Found duplicate {b_id}", 2)
sources[a].update(sources[b])
sources[b] = {}
sources = [entry for entry in sources if entry]
Expand All @@ -118,7 +128,7 @@
citation = {}

# source id
_id = source.get("id", "").strip()
_id = get_safe(source, "id", "").strip()

# Manubot doesn't work without an id
if _id:
Expand All @@ -131,21 +141,21 @@
# if Manubot cannot cite source
except Exception as e:
# if regular source (id entered by user), throw error
if source.get("plugin", "") == "sources.py":
if get_safe(source, "plugin", "") == "sources.py":
log(e, 3, "ERROR")
error = True
# otherwise, if from metasource (id retrieved from some third-party API), just warn
else:
log(e, 3, "WARNING")
# discard source
# discard source from citations
# continue

# preserve fields from input source, overriding existing fields
citation.update(source)

# ensure date in proper format for correct date sorting
if citation.get("date", ""):
citation["date"] = format_date(citation.get("date", ""))
if get_safe(citation, "date", ""):
citation["date"] = format_date(get_safe(citation, "date", ""))

# add new citation to list
citations.append(citation)
Expand Down
21 changes: 11 additions & 10 deletions _cite/plugins/google-scholar.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,20 @@ def main(entry):
returns list of sources to cite
"""

# get api key
# get api key (serp api key to access google scholar)
api_key = os.environ.get("GOOGLE_SCHOLAR_API_KEY", "")
if not api_key:
raise Exception('No "GOOGLE_SCHOLAR_API_KEY" env var')

# serp api
# serp api properties
params = {
"engine": "google_scholar_author",
"api_key": api_key,
"num": 100, # max allowed
}

# get id from entry
_id = entry.get("gsid", "")
_id = get_safe(entry, "gsid", "")
if not _id:
raise Exception('No "gsid" key')

Expand All @@ -31,7 +31,7 @@ def main(entry):
@cache.memoize(name=__file__, expire=1 * (60 * 60 * 24))
def query(_id):
params["author_id"] = _id
return GoogleSearch(params).get_dict().get("articles", [])
return get_safe(GoogleSearch(params).get_dict(), "articles", [])

response = query(_id)

Expand All @@ -41,14 +41,15 @@ def query(_id):
# go through response and format sources
for work in response:
# create source
year = get_safe(work, "year", "")
source = {
"id": work.get("citation_id", ""),
"id": get_safe(work, "citation_id", ""),
# api does not provide Manubot-citeable id, so keep citation details
"title": work.get("title", ""),
"authors": list(map(str.strip, work.get("authors", "").split(","))),
"publisher": work.get("publication", ""),
"date": work.get("year", "") + "-01-01",
"link": work.get("link", ""),
"title": get_safe(work, "title", ""),
"authors": list(map(str.strip, get_safe(work, "authors", "").split(","))),
"publisher": get_safe(work, "publication", ""),
"date": (year + "-01-01") if year else "",
"link": get_safe(work, "link", ""),
}

# copy fields from entry to source
Expand Down
44 changes: 21 additions & 23 deletions _cite/plugins/orcid.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def main(entry):
headers = {"Accept": "application/json"}

# get id from entry
_id = entry.get("orcid", "")
_id = get_safe(entry, "orcid", "")
if not _id:
raise Exception('No "orcid" key')

Expand All @@ -25,7 +25,7 @@ def query(_id):
url = endpoint.replace("$ORCID", _id)
request = Request(url=url, headers=headers)
response = json.loads(urlopen(request).read())
return response.get("group", [])
return get_safe(response, "group", [])

response = query(_id)

Expand All @@ -35,62 +35,60 @@ def query(_id):
# go through response structure and pull out ids e.g. doi:1234/56789
for work in response:
# get list of ids
ids = work.get("external-ids", {}).get("external-id", [])
for summary in work.get("work-summary", []):
ids = ids + summary.get("external-ids", {}).get("external-id", [])
ids = get_safe(work, "external-ids.external-id", [])
for summary in get_safe(work, "work-summary", []):
ids = ids + get_safe(summary, "external-ids.external-id", [])

# prefer doi id type, or fallback to first id
_id = next(
(id for id in ids if id.get("external-id-type", "") == "doi"),
(id for id in ids if get_safe(id, "external-id-type", "") == "doi"),
ids[0] if len(ids) > 0 else {},
)

# get id and id-type from response
id_type = _id.get("external-id-type", "")
id_value = _id.get("external-id-value", "")
id_type = get_safe(_id, "external-id-type", "")
id_value = get_safe(_id, "external-id-value", "")

# create source
source = {"id": f"{id_type}:{id_value}"}

# if not a doi, Manubot likely can't cite, so keep citation details
if id_type != "doi":
# get summaries
summaries = work.get("work-summary", [])
summaries = get_safe(work, "work-summary", [])

# sort summary entries by most recent
summaries = sorted(
summaries,
key=lambda summary: (
summary.get("last-modified-date", {}).get("value", 0)
)
or summary.get("created-date", {}).get("value", 0)
key=lambda summary: (get_safe(summary, "last-modified-date.value", 0))
or get_safe(summary, "created-date.value", 0)
or 0,
reverse=True,
)

# get first summary with defined sub-value
def first(get_func):
return next(value for value in map(get_func, summaries) if value)
return next(
(value for value in map(get_func, summaries) if value), None
)

# get title
title = first(
lambda s: s.get("title", {}).get("title", {}).get("value", "")
)
title = first(lambda s: get_safe(s, "title.title.value", ""))

# get publisher
publisher = first(lambda s: s.get("journal-title", {}).get("value", ""))
publisher = first(lambda s: get_safe(s, "journal-title.value", ""))

# get date
date = (
work.get("last-modified-date", {}).get("value", 0)
or first(lambda s: s.get("last-modified-date", {}).get("value", 0))
or work.get("created-date", {}).get("value", 0)
or first(lambda s: s.get("created-date", {}).get("value", 0))
get_safe(work, "last-modified-date.value")
or first(lambda s: get_safe(s, "last-modified-date.value"))
or get_safe(work, "created-date.value")
or first(lambda s: get_safe(s, "created-date.value"))
or 0
)

# get link
link = first(lambda s: s.get("url", {}).get("value", ""))
link = first(lambda s: get_safe(s, "url.value", ""))

# keep available details
if title:
Expand Down
4 changes: 2 additions & 2 deletions _cite/plugins/pubmed.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def main(entry):
endpoint = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$TERM&retmode=json&retmax=1000&usehistory=y"

# get id from entry
_id = entry.get("term", "")
_id = get_safe(entry, "term", "")
if not _id:
raise Exception('No "term" key')

Expand All @@ -25,7 +25,7 @@ def query(_id):
url = endpoint.replace("$TERM", quote(_id))
request = Request(url=url)
response = json.loads(urlopen(request).read())
return response.get("esearchresult", {}).get("idlist", [])
return get_safe(response, "esearchresult.idlist", [])

response = query(_id)

Expand Down
39 changes: 28 additions & 11 deletions _cite/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,35 @@ def log(message="\n--------------------\n", indent=0, level="", newline=True):
"SUCCESS": "[black on #10B981]",
"INFO": "[grey70]",
}
color = palette.get(level, "") or palette.get(indent, "") or "[white]"
color = get_safe(palette, level, "") or get_safe(palette, indent, "") or "[white]"
if newline:
print()
print(indent * " " + color + str(message) + "[/]", end="", flush=True)


def label(entry):
"""
get "label" of dict entry
get "label" of dict entry (for logging purposes)
"""

return list(entry.keys())[0] + ": " + list(entry.values())[0]
return str(list(entry.keys())[0]) + ": " + str(list(entry.values())[0])


def get_safe(item, path, default=None):
"""
safely access value in nested lists/dicts
"""

for part in str(path).split("."):
try:
part = int(part)
except ValueError:
part = part
try:
item = item[part]
except (KeyError, IndexError, AttributeError, TypeError):
return default
return item


def list_of_dicts(data):
Expand Down Expand Up @@ -176,20 +193,20 @@ def cite_with_manubot(_id):
citation["id"] = _id

# title
citation["title"] = manubot.get("title", "").strip()
citation["title"] = get_safe(manubot, "title", "").strip()

# authors
citation["authors"] = []
for author in manubot.get("author", {}):
given = author.get("given", "").strip()
family = author.get("family", "").strip()
for author in get_safe(manubot, "author", {}):
given = get_safe(author, "given", "").strip()
family = get_safe(author, "family", "").strip()
if given or family:
citation["authors"].append(" ".join([given, family]))

# publisher
container = manubot.get("container-title", "").strip()
collection = manubot.get("collection-title", "").strip()
publisher = manubot.get("publisher", "").strip()
container = get_safe(manubot, "container-title", "").strip()
collection = get_safe(manubot, "collection-title", "").strip()
publisher = get_safe(manubot, "publisher", "").strip()
citation["publisher"] = container or publisher or collection or ""

# extract date part
Expand All @@ -211,7 +228,7 @@ def date_part(citation, index):
citation["date"] = ""

# link
citation["link"] = manubot.get("URL", "").strip()
citation["link"] = get_safe(manubot, "URL", "").strip()

# return citation data
return citation

0 comments on commit 4914988

Please sign in to comment.