Skip to content

Commit

Permalink
🎓 Configure citation-js to keep original citation labels (#1322)
Browse files Browse the repository at this point in the history
* 🎓 Configure citation-js to keep original citation labels

* 🧪 Add test for tex bib writing

* 🧪 Relocate e2e doi test files

* 🔧 Fix bib tex file

* 🎓 Prevent generation of random temp_id_* citation labels

* 🎓 Add a doi without label to bibtex e2e test

* 🏷 Use nice citation label when no id is provided without context switching
  • Loading branch information
fwkoch authored Jun 19, 2024
1 parent 1e2092d commit fc79fb1
Show file tree
Hide file tree
Showing 16 changed files with 189 additions and 66 deletions.
5 changes: 5 additions & 0 deletions .changeset/friendly-wombats-compare.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'citation-js-utils': patch
---

Configure citation-js to keep original citation labels
7 changes: 7 additions & 0 deletions .changeset/nasty-frogs-float.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
'citation-js-utils': patch
'myst-cli': patch
'mystmd': patch
---

Prevent generation of random temp*id*\* citation labels
96 changes: 48 additions & 48 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions packages/citation-js-utils/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@
"npm": ">=6"
},
"dependencies": {
"@citation-js/core": "^0.7.6",
"@citation-js/plugin-bibtex": "^0.7.8",
"@citation-js/plugin-csl": "^0.7.6",
"@citation-js/core": "^0.7.14",
"@citation-js/plugin-bibtex": "^0.7.14",
"@citation-js/plugin-csl": "^0.7.14",
"sanitize-html": "^2.7.0"
},
"devDependencies": {
Expand Down
55 changes: 52 additions & 3 deletions packages/citation-js-utils/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
import { Cite } from '@citation-js/core';
import { Cite, plugins } from '@citation-js/core';
import { doi as doiUtils } from 'doi-utils';
import { clean as cleanCSL } from '@citation-js/core/lib/plugins/input/csl.js';
import sanitizeHtml from 'sanitize-html';

import '@citation-js/plugin-bibtex';
import '@citation-js/plugin-csl';

const config = plugins.config.get('@bibtex');
config.format.useIdAsLabel = true;
config.format.checkLabel = false;

const DOI_IN_TEXT = /(10.\d{4,9}\/[-._;()/:A-Z0-9]*[A-Z0-9])/i;

// This is duplicated in citation-js types, which are not exported
Expand Down Expand Up @@ -199,22 +203,67 @@ export function parseCSLJSON(source: object[]): CSL[] {
*/
export async function getCitations(bibtex: string): Promise<CitationRenderer> {
const csl = parseBibTeX(bibtex);
return await getCitationRenderers(csl);
return getCitationRenderers(csl);
}

/**
* Generate a label from a citation
*
* formatLabel is pulled directly from citation-js
*
* This would be used always if `config.format.useIdAsLabel = false`, but is used never
* when `config.format.useIdAsLabel = true`. We want to use it sometimes - only when
* no ide is provided.
*/
function formatLabel(c: CSL): string {
const stopWords = new Set(['the', 'a', 'an']);
const unsafeChars = /(?:<\/?.*?>|[\u0020-\u002F\u003A-\u0040\u005B-\u005E\u0060\u007B-\u007F])+/g;
const unicode = /[^\u0020-\u007F]+/g;
const firstWord = (text?: string): string => {
if (!text) {
return '';
} else {
return (
text
.normalize('NFKD')
.replace(unicode, '')
.split(unsafeChars)
.find((word) => word.length && !stopWords.has(word.toLowerCase())) ?? ''
);
}
};
const { author, issued, suffix, title } = c;
let label = '';
if (author && author[0]) {
label += firstWord(author[0].family || author[0].literal);
}
if (issued && issued['date-parts'] && issued['date-parts'][0]) {
label += issued['date-parts'][0][0];
}
if (suffix) {
label += suffix;
} else if (title) {
label += firstWord(title);
}
return label;
}

/**
* Build renderers for the given array of CSL items
*
* @param data - array of CSL items
*/
export async function getCitationRenderers(data: CSL[]): Promise<CitationRenderer> {
export function getCitationRenderers(data: CSL[]): CitationRenderer {
const cite = new Cite();
return Object.fromEntries(
data.map((c): [string, CitationRenderer[0]] => {
const matchDoi = c.URL?.match(DOI_IN_TEXT) ?? c.note?.match(DOI_IN_TEXT);
if (!c.DOI && matchDoi) {
c.DOI = matchDoi[0];
}
if (!c.id) {
c.id = formatLabel(c);
}
return [
c.id,
{
Expand Down
6 changes: 3 additions & 3 deletions packages/citation-js-utils/tests/basic.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ const key = 'Cockett2015SimPEG';
describe('Test reference rendering', () => {
it('APA', async () => {
const data = parseBibTeX(bibtex);
const citations = await getCitationRenderers(data);
const citations = getCitationRenderers(data);
expect(Object.keys(citations).length).toBe(1);
const cite = citations[key];
expect(cite.render()).toEqual(TEST_APA_HTML);
Expand All @@ -29,7 +29,7 @@ describe('Test reference rendering', () => {
});
it('Vancouver', async () => {
const data = parseBibTeX(bibtex);
const citations = await getCitationRenderers(data);
const citations = getCitationRenderers(data);
const cite = citations[key];
expect(cite.render(CitationJSStyles.vancouver)).toEqual(TEST_VANCOUVER_HTML);
});
Expand All @@ -38,7 +38,7 @@ describe('Test reference rendering', () => {
['note', doiInNote],
])('Extract the DOI from the %s', async (_, src) => {
const data = parseBibTeX(src);
const citations = await getCitationRenderers(data);
const citations = getCitationRenderers(data);
expect(citations['cury2020sparse'].getDOI()).toBe(TEST_DOI_IN_OTHER_FIELD);
expect(citations['cury2020sparse'].getURL()).toBe(`https://doi.org/${TEST_DOI_IN_OTHER_FIELD}`);
});
Expand Down
12 changes: 12 additions & 0 deletions packages/citation-js-utils/types/citation-js/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,16 @@ declare module '@citation-js/core' {

data: CSL[];
}

// Only declare types for used config fields
export const plugins: {
config: {
get(format: string): {
format: {
useIdAsLabel?: boolean;
checkLabel?: boolean;
};
};
};
};
}
2 changes: 1 addition & 1 deletion packages/myst-cli/src/process/citations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ export async function loadBibTeXCitationRenderers(
data = fs.readFileSync(path).toString();
}
const csl = parseBibTeX(data);
const renderer = await getCitationRenderers(csl);
const renderer = getCitationRenderers(csl);
session.log.debug(toc(`Read ${plural('%s citations(s)', renderer)} from ${path} in %s.`));
return renderer;
}
Expand Down
12 changes: 7 additions & 5 deletions packages/myst-cli/src/transforms/dois.ts
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ export async function getCitation(
return null;
}
try {
const renderer = await getCitationRenderers(data);
const renderer = getCitationRenderers(data);
const id = Object.keys(renderer)[0];
const render = renderer[id];
return { id, render, remote: true };
Expand Down Expand Up @@ -271,14 +271,15 @@ export async function transformLinkedDOIs(
if (cite) number += 1;
else return false;
}
const label = cite.render.getLabel();
if (cite.remote) {
renderer[cite.render.getLabel()] = cite.render;
renderer[label] = cite.render;
}
doiRenderer[normalized] = cite;
const citeNode = node as unknown as Cite;
citeNode.type = 'cite';
citeNode.kind = 'narrative';
citeNode.label = cite.render.getLabel();
citeNode.label = label;
citeNode.identifier = node.url;
if (doi.validate(toText(citeNode.children))) {
// If the link text is the DOI, update with a citation in a following pass
Expand All @@ -297,11 +298,12 @@ export async function transformLinkedDOIs(
if (cite) number += 1;
else return false;
}
const label = cite.render.getLabel();
if (cite.remote) {
renderer[cite.render.getLabel()] = cite.render;
renderer[label] = cite.render;
}
doiRenderer[normalized] = cite;
node.label = cite.render.getLabel();
node.label = label;
return true;
}),
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ @article{Koch_2012

@article{EFSA2019Dietary,
address = {IT},
author = {{EFSA Panel on Nutrition, Novel Foods and Food Allergens (NDA)} and Turck, Dominique and Castenmiller, Jacqueline and de Henauw, Stefaan and HirschErnst, KarenIldico and Kearney, John and Knutsen, Helle Katrine and Maciuk, Alexandre and Mangelsdorf, Inge and McArdle, Harry J and Pelaez, Carmen and Pentieva, Kristina and Siani, Alfonso and Thies, Frank and Tsabouri, Sophia and Vinceti, Marco and Aggett, Peter and FairweatherTait, Susan and Martin, Ambroise and Przyrembel, Hildegard and de SesmaisonsLecarr{\' e}, Agn{\` e}s and Naska, Androniki},
author = {{EFSA Panel on Nutrition, Novel Foods and Food Allergens (NDA)} and Turck, Dominique and Castenmiller, Jacqueline and de Henauw, Stefaan and HirschErnst, KarenIldico and Kearney, John and Knutsen, Helle Katrine and Maciuk, Alexandre and Mangelsdorf, Inge and McArdle, Harry J and Pelaez, Carmen and Pentieva, Kristina and Siani, Alfonso and Thies, Frank and Tsabouri, Sophia and Vinceti, Marco and Aggett, Peter and FairweatherTait, Susan and Martin, Ambroise and Przyrembel, Hildegard and de SesmaisonsLecarr{\' e}, Agn{\` e}s and Naska, Androniki},
journal = {EFSA Journal},
doi = {10.2903/j.efsa.2019.5779},
issn = {18314732, 18314732},
Expand Down
File renamed without changes.
10 changes: 8 additions & 2 deletions packages/mystmd/tests/exports.yml
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,9 @@ cases:
command: myst build --tex --ci
outputs:
- path: dois/_build/out.tex
content: outputs/dois.tex
content: dois/outputs/dois.tex
- path: dois/_build/main.bib
content: outputs/dois.bib
content: dois/outputs/dois.bib
- title: Basic site build
cwd: basic-site
command: myst build
Expand Down Expand Up @@ -236,3 +236,9 @@ cases:
outputs:
- path: write-doi-bib/myst.doi.bib
content: write-doi-bib/outputs/myst.doi.bib
- title: Bibtex file is written correctly on tex export
cwd: tex-with-bib
command: myst build --tex
outputs:
- path: tex-with-bib/_build/main.bib
content: tex-with-bib/outputs/main.bib
5 changes: 5 additions & 0 deletions packages/mystmd/tests/tex-with-bib/input.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Lorem Ipsum

This citation has a label that does not match the content: @jones-etal-2014

This citation has no label by default: [](https://doi.org/10.37921/426590wiobus)
7 changes: 7 additions & 0 deletions packages/mystmd/tests/tex-with-bib/myst.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
version: 1
project:
export:
format: tex
template: ../templates/tex
output: _build/out.tex
article: input.md
19 changes: 19 additions & 0 deletions packages/mystmd/tests/tex-with-bib/outputs/main.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
@article{jones-etal-2014,
author = {Marrero, Jos{\' e} and Garc{\' i}a, Alicia and Berrocoso, Manuel and Llinares, {\' A}ngeles and Rodr{\' i}guez-Losada, Antonio and Ortiz, R.},
journal = {Journal of Applied Volcanology},
doi = {10.1186/s13617-019-0085-5},
year = {2019},
month = {7},
title = {Strategies for the development of volcanic hazard maps in monogenetic volcanic fields: the example of {La} {Palma} ({Canary} {Islands})},
volume = {8},
}


@misc{2021,
doi = {10.37921/426590wiobus},
year = {2021},
month = {sep 1},
url = {http://dx.doi.org/10.37921/426590wiobus},
howpublished = {http://dx.doi.org/10.37921/426590wiobus},
}

Loading

0 comments on commit fc79fb1

Please sign in to comment.