From fc79fb1effcc8093597ce1b7a178acb04612a190 Mon Sep 17 00:00:00 2001 From: Franklin Koch Date: Wed, 19 Jun 2024 10:30:14 -0600 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=93=20Configure=20citation-js=20to=20k?= =?UTF-8?q?eep=20original=20citation=20labels=20(#1322)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ๐ŸŽ“ Configure citation-js to keep original citation labels * ๐Ÿงช Add test for tex bib writing * ๐Ÿงช Relocate e2e doi test files * ๐Ÿ”ง Fix bib tex file * ๐ŸŽ“ Prevent generation of random temp_id_* citation labels * ๐ŸŽ“ Add a doi without label to bibtex e2e test * ๐Ÿท Use nice citation label when no id is provided without context switching --- .changeset/friendly-wombats-compare.md | 5 + .changeset/nasty-frogs-float.md | 7 ++ package-lock.json | 96 +++++++++---------- packages/citation-js-utils/package.json | 6 +- packages/citation-js-utils/src/index.ts | 55 ++++++++++- .../citation-js-utils/tests/basic.spec.ts | 6 +- .../types/citation-js/index.d.ts | 12 +++ packages/myst-cli/src/process/citations.ts | 2 +- packages/myst-cli/src/transforms/dois.ts | 12 ++- .../mystmd/tests/{ => dois}/outputs/dois.bib | 2 +- .../mystmd/tests/{ => dois}/outputs/dois.tex | 0 packages/mystmd/tests/exports.yml | 10 +- packages/mystmd/tests/tex-with-bib/input.md | 5 + packages/mystmd/tests/tex-with-bib/myst.yml | 7 ++ .../tests/tex-with-bib/outputs/main.bib | 19 ++++ .../mystmd/tests/tex-with-bib/references.bib | 11 +++ 16 files changed, 189 insertions(+), 66 deletions(-) create mode 100644 .changeset/friendly-wombats-compare.md create mode 100644 .changeset/nasty-frogs-float.md rename packages/mystmd/tests/{ => dois}/outputs/dois.bib (87%) rename packages/mystmd/tests/{ => dois}/outputs/dois.tex (100%) create mode 100644 packages/mystmd/tests/tex-with-bib/input.md create mode 100644 packages/mystmd/tests/tex-with-bib/myst.yml create mode 100644 packages/mystmd/tests/tex-with-bib/outputs/main.bib create mode 100644 packages/mystmd/tests/tex-with-bib/references.bib diff --git a/.changeset/friendly-wombats-compare.md b/.changeset/friendly-wombats-compare.md new file mode 100644 index 000000000..fdb52a07f --- /dev/null +++ b/.changeset/friendly-wombats-compare.md @@ -0,0 +1,5 @@ +--- +'citation-js-utils': patch +--- + +Configure citation-js to keep original citation labels diff --git a/.changeset/nasty-frogs-float.md b/.changeset/nasty-frogs-float.md new file mode 100644 index 000000000..ca2dfff21 --- /dev/null +++ b/.changeset/nasty-frogs-float.md @@ -0,0 +1,7 @@ +--- +'citation-js-utils': patch +'myst-cli': patch +'mystmd': patch +--- + +Prevent generation of random temp*id*\* citation labels diff --git a/package-lock.json b/package-lock.json index eee60e296..d83606702 100644 --- a/package-lock.json +++ b/package-lock.json @@ -375,6 +375,20 @@ "url": "https://github.com/prettier/prettier?sponsor=1" } }, + "node_modules/@citation-js/core": { + "version": "0.7.14", + "resolved": "https://registry.npmjs.org/@citation-js/core/-/core-0.7.14.tgz", + "integrity": "sha512-dgeGqYDSQmn2MtnWZkwPGpJQPh43yr1lAAr9jl1NJ9pIY1RXUQxtlAUZVur0V9PHdbfQC+kkvB1KC3VpgVV3MA==", + "dependencies": { + "@citation-js/date": "^0.5.0", + "@citation-js/name": "^0.4.2", + "fetch-ponyfill": "^7.1.0", + "sync-fetch": "^0.4.1" + }, + "engines": { + "node": ">=16.0.0" + } + }, "node_modules/@citation-js/date": { "version": "0.5.1", "resolved": "https://registry.npmjs.org/@citation-js/date/-/date-0.5.1.tgz", @@ -391,6 +405,37 @@ "node": ">=6" } }, + "node_modules/@citation-js/plugin-bibtex": { + "version": "0.7.14", + "resolved": "https://registry.npmjs.org/@citation-js/plugin-bibtex/-/plugin-bibtex-0.7.14.tgz", + "integrity": "sha512-xHOHqhF6dthLRv46N9U+mQgYLiiWQHLvQWK9+mcBKz+/3NWge62Xb1oBouNWwLEPd5FV/8gp9fp7SOp93T0dUg==", + "dependencies": { + "@citation-js/date": "^0.5.0", + "@citation-js/name": "^0.4.2", + "moo": "^0.5.1" + }, + "engines": { + "node": ">=16.0.0" + }, + "peerDependencies": { + "@citation-js/core": "^0.7.0" + } + }, + "node_modules/@citation-js/plugin-csl": { + "version": "0.7.14", + "resolved": "https://registry.npmjs.org/@citation-js/plugin-csl/-/plugin-csl-0.7.14.tgz", + "integrity": "sha512-7AKB8lMz1IqdtoE33NnWIpteLYMuSl3xqT+Cax7sQKwAIJEoq2HBmb43Ja8xQQ36nREAupQJv1V6XksIAmYnCg==", + "dependencies": { + "@citation-js/date": "^0.5.0", + "citeproc": "^2.4.6" + }, + "engines": { + "node": ">=16.0.0" + }, + "peerDependencies": { + "@citation-js/core": "^0.7.0" + } + }, "node_modules/@dependents/detective-less": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/@dependents/detective-less/-/detective-less-3.0.2.tgz", @@ -14933,9 +14978,9 @@ "version": "1.2.1", "license": "MIT", "dependencies": { - "@citation-js/core": "^0.7.6", - "@citation-js/plugin-bibtex": "^0.7.8", - "@citation-js/plugin-csl": "^0.7.6", + "@citation-js/core": "^0.7.14", + "@citation-js/plugin-bibtex": "^0.7.14", + "@citation-js/plugin-csl": "^0.7.14", "sanitize-html": "^2.7.0" }, "devDependencies": { @@ -14946,51 +14991,6 @@ "npm": ">=6" } }, - "packages/citation-js-utils/node_modules/@citation-js/core": { - "version": "0.7.6", - "resolved": "https://registry.npmjs.org/@citation-js/core/-/core-0.7.6.tgz", - "integrity": "sha512-qbB6RjwSsx/AjlCSAqoWKN05VxpjADYe8GmnPJnRB7QeNiVmqaRc8NSQDdvQ+4qhCkQOtMH15Sa2Nde4cvlXhw==", - "dependencies": { - "@citation-js/date": "^0.5.0", - "@citation-js/name": "^0.4.2", - "fetch-ponyfill": "^7.1.0", - "sync-fetch": "^0.4.1" - }, - "engines": { - "node": ">=16.0.0" - } - }, - "packages/citation-js-utils/node_modules/@citation-js/plugin-bibtex": { - "version": "0.7.8", - "resolved": "https://registry.npmjs.org/@citation-js/plugin-bibtex/-/plugin-bibtex-0.7.8.tgz", - "integrity": "sha512-20fUXe1zm1oCONFflGj3mgIk6DHspPjWrBirGfsyHmVSR/4xqnSbrqtztLiV15zt3tbKLepTaHm3ZTrcLOK0MA==", - "dependencies": { - "@citation-js/date": "^0.5.0", - "@citation-js/name": "^0.4.2", - "moo": "^0.5.1" - }, - "engines": { - "node": ">=16.0.0" - }, - "peerDependencies": { - "@citation-js/core": "^0.7.0" - } - }, - "packages/citation-js-utils/node_modules/@citation-js/plugin-csl": { - "version": "0.7.6", - "resolved": "https://registry.npmjs.org/@citation-js/plugin-csl/-/plugin-csl-0.7.6.tgz", - "integrity": "sha512-H/dhzU56+D71Hzjto1x9PDtvsWaiI+Dx6Jj1vjiFtCCnbU/Zvqo5xFZNPstee+hFE6AsJ2xYlI8QujrGH+V1pQ==", - "dependencies": { - "@citation-js/date": "^0.5.0", - "citeproc": "^2.4.6" - }, - "engines": { - "node": ">=16.0.0" - }, - "peerDependencies": { - "@citation-js/core": "^0.7.0" - } - }, "packages/jats-to-myst": { "version": "1.0.26", "license": "MIT", diff --git a/packages/citation-js-utils/package.json b/packages/citation-js-utils/package.json index feab65d5e..a071ee127 100644 --- a/packages/citation-js-utils/package.json +++ b/packages/citation-js-utils/package.json @@ -35,9 +35,9 @@ "npm": ">=6" }, "dependencies": { - "@citation-js/core": "^0.7.6", - "@citation-js/plugin-bibtex": "^0.7.8", - "@citation-js/plugin-csl": "^0.7.6", + "@citation-js/core": "^0.7.14", + "@citation-js/plugin-bibtex": "^0.7.14", + "@citation-js/plugin-csl": "^0.7.14", "sanitize-html": "^2.7.0" }, "devDependencies": { diff --git a/packages/citation-js-utils/src/index.ts b/packages/citation-js-utils/src/index.ts index d32c7042d..053ec42b0 100644 --- a/packages/citation-js-utils/src/index.ts +++ b/packages/citation-js-utils/src/index.ts @@ -1,4 +1,4 @@ -import { Cite } from '@citation-js/core'; +import { Cite, plugins } from '@citation-js/core'; import { doi as doiUtils } from 'doi-utils'; import { clean as cleanCSL } from '@citation-js/core/lib/plugins/input/csl.js'; import sanitizeHtml from 'sanitize-html'; @@ -6,6 +6,10 @@ import sanitizeHtml from 'sanitize-html'; import '@citation-js/plugin-bibtex'; import '@citation-js/plugin-csl'; +const config = plugins.config.get('@bibtex'); +config.format.useIdAsLabel = true; +config.format.checkLabel = false; + const DOI_IN_TEXT = /(10.\d{4,9}\/[-._;()/:A-Z0-9]*[A-Z0-9])/i; // This is duplicated in citation-js types, which are not exported @@ -199,7 +203,49 @@ export function parseCSLJSON(source: object[]): CSL[] { */ export async function getCitations(bibtex: string): Promise { const csl = parseBibTeX(bibtex); - return await getCitationRenderers(csl); + return getCitationRenderers(csl); +} + +/** + * Generate a label from a citation + * + * formatLabel is pulled directly from citation-js + * + * This would be used always if `config.format.useIdAsLabel = false`, but is used never + * when `config.format.useIdAsLabel = true`. We want to use it sometimes - only when + * no ide is provided. + */ +function formatLabel(c: CSL): string { + const stopWords = new Set(['the', 'a', 'an']); + const unsafeChars = /(?:<\/?.*?>|[\u0020-\u002F\u003A-\u0040\u005B-\u005E\u0060\u007B-\u007F])+/g; + const unicode = /[^\u0020-\u007F]+/g; + const firstWord = (text?: string): string => { + if (!text) { + return ''; + } else { + return ( + text + .normalize('NFKD') + .replace(unicode, '') + .split(unsafeChars) + .find((word) => word.length && !stopWords.has(word.toLowerCase())) ?? '' + ); + } + }; + const { author, issued, suffix, title } = c; + let label = ''; + if (author && author[0]) { + label += firstWord(author[0].family || author[0].literal); + } + if (issued && issued['date-parts'] && issued['date-parts'][0]) { + label += issued['date-parts'][0][0]; + } + if (suffix) { + label += suffix; + } else if (title) { + label += firstWord(title); + } + return label; } /** @@ -207,7 +253,7 @@ export async function getCitations(bibtex: string): Promise { * * @param data - array of CSL items */ -export async function getCitationRenderers(data: CSL[]): Promise { +export function getCitationRenderers(data: CSL[]): CitationRenderer { const cite = new Cite(); return Object.fromEntries( data.map((c): [string, CitationRenderer[0]] => { @@ -215,6 +261,9 @@ export async function getCitationRenderers(data: CSL[]): Promise { it('APA', async () => { const data = parseBibTeX(bibtex); - const citations = await getCitationRenderers(data); + const citations = getCitationRenderers(data); expect(Object.keys(citations).length).toBe(1); const cite = citations[key]; expect(cite.render()).toEqual(TEST_APA_HTML); @@ -29,7 +29,7 @@ describe('Test reference rendering', () => { }); it('Vancouver', async () => { const data = parseBibTeX(bibtex); - const citations = await getCitationRenderers(data); + const citations = getCitationRenderers(data); const cite = citations[key]; expect(cite.render(CitationJSStyles.vancouver)).toEqual(TEST_VANCOUVER_HTML); }); @@ -38,7 +38,7 @@ describe('Test reference rendering', () => { ['note', doiInNote], ])('Extract the DOI from the %s', async (_, src) => { const data = parseBibTeX(src); - const citations = await getCitationRenderers(data); + const citations = getCitationRenderers(data); expect(citations['cury2020sparse'].getDOI()).toBe(TEST_DOI_IN_OTHER_FIELD); expect(citations['cury2020sparse'].getURL()).toBe(`https://doi.org/${TEST_DOI_IN_OTHER_FIELD}`); }); diff --git a/packages/citation-js-utils/types/citation-js/index.d.ts b/packages/citation-js-utils/types/citation-js/index.d.ts index ed877bc7b..fbeca0645 100644 --- a/packages/citation-js-utils/types/citation-js/index.d.ts +++ b/packages/citation-js-utils/types/citation-js/index.d.ts @@ -51,4 +51,16 @@ declare module '@citation-js/core' { data: CSL[]; } + + // Only declare types for used config fields + export const plugins: { + config: { + get(format: string): { + format: { + useIdAsLabel?: boolean; + checkLabel?: boolean; + }; + }; + }; + }; } diff --git a/packages/myst-cli/src/process/citations.ts b/packages/myst-cli/src/process/citations.ts index c04ee5c77..cb2b87a06 100644 --- a/packages/myst-cli/src/process/citations.ts +++ b/packages/myst-cli/src/process/citations.ts @@ -28,7 +28,7 @@ export async function loadBibTeXCitationRenderers( data = fs.readFileSync(path).toString(); } const csl = parseBibTeX(data); - const renderer = await getCitationRenderers(csl); + const renderer = getCitationRenderers(csl); session.log.debug(toc(`Read ${plural('%s citations(s)', renderer)} from ${path} in %s.`)); return renderer; } diff --git a/packages/myst-cli/src/transforms/dois.ts b/packages/myst-cli/src/transforms/dois.ts index 9ab5fe336..8df9d36d0 100644 --- a/packages/myst-cli/src/transforms/dois.ts +++ b/packages/myst-cli/src/transforms/dois.ts @@ -204,7 +204,7 @@ export async function getCitation( return null; } try { - const renderer = await getCitationRenderers(data); + const renderer = getCitationRenderers(data); const id = Object.keys(renderer)[0]; const render = renderer[id]; return { id, render, remote: true }; @@ -271,14 +271,15 @@ export async function transformLinkedDOIs( if (cite) number += 1; else return false; } + const label = cite.render.getLabel(); if (cite.remote) { - renderer[cite.render.getLabel()] = cite.render; + renderer[label] = cite.render; } doiRenderer[normalized] = cite; const citeNode = node as unknown as Cite; citeNode.type = 'cite'; citeNode.kind = 'narrative'; - citeNode.label = cite.render.getLabel(); + citeNode.label = label; citeNode.identifier = node.url; if (doi.validate(toText(citeNode.children))) { // If the link text is the DOI, update with a citation in a following pass @@ -297,11 +298,12 @@ export async function transformLinkedDOIs( if (cite) number += 1; else return false; } + const label = cite.render.getLabel(); if (cite.remote) { - renderer[cite.render.getLabel()] = cite.render; + renderer[label] = cite.render; } doiRenderer[normalized] = cite; - node.label = cite.render.getLabel(); + node.label = label; return true; }), ), diff --git a/packages/mystmd/tests/outputs/dois.bib b/packages/mystmd/tests/dois/outputs/dois.bib similarity index 87% rename from packages/mystmd/tests/outputs/dois.bib rename to packages/mystmd/tests/dois/outputs/dois.bib index 5afce6796..9cd0965a1 100644 --- a/packages/mystmd/tests/outputs/dois.bib +++ b/packages/mystmd/tests/dois/outputs/dois.bib @@ -60,7 +60,7 @@ @article{Koch_2012 @article{EFSA2019Dietary, address = {IT}, - author = {{EFSA Panelย on Nutrition, Novel Foods and Food Allergens (NDA)} and Turck, Dominique and Castenmiller, Jacqueline and de Henauw, Stefaan and HirschErnst, KarenIldico and Kearney, John and Knutsen, Helle Katrine and Maciuk, Alexandre and Mangelsdorf, Inge and McArdle, Harry J and Pelaez, Carmen and Pentieva, Kristina and Siani, Alfonso and Thies, Frank and Tsabouri, Sophia and Vinceti, Marco and Aggett, Peter and FairweatherTait, Susan and Martin, Ambroise and Przyrembel, Hildegard and de SesmaisonsLecarr{\' e}, Agn{\` e}s and Naska, Androniki}, + author = {{EFSA Panel on Nutrition, Novel Foods and Food Allergens (NDA)} and Turck, Dominique and Castenmiller, Jacqueline and de Henauw, Stefaan and HirschErnst, KarenIldico and Kearney, John and Knutsen, Helle Katrine and Maciuk, Alexandre and Mangelsdorf, Inge and McArdle, Harry J and Pelaez, Carmen and Pentieva, Kristina and Siani, Alfonso and Thies, Frank and Tsabouri, Sophia and Vinceti, Marco and Aggett, Peter and FairweatherTait, Susan and Martin, Ambroise and Przyrembel, Hildegard and de SesmaisonsLecarr{\' e}, Agn{\` e}s and Naska, Androniki}, journal = {EFSA Journal}, doi = {10.2903/j.efsa.2019.5779}, issn = {18314732, 18314732}, diff --git a/packages/mystmd/tests/outputs/dois.tex b/packages/mystmd/tests/dois/outputs/dois.tex similarity index 100% rename from packages/mystmd/tests/outputs/dois.tex rename to packages/mystmd/tests/dois/outputs/dois.tex diff --git a/packages/mystmd/tests/exports.yml b/packages/mystmd/tests/exports.yml index bedd7e966..4c02d7901 100644 --- a/packages/mystmd/tests/exports.yml +++ b/packages/mystmd/tests/exports.yml @@ -176,9 +176,9 @@ cases: command: myst build --tex --ci outputs: - path: dois/_build/out.tex - content: outputs/dois.tex + content: dois/outputs/dois.tex - path: dois/_build/main.bib - content: outputs/dois.bib + content: dois/outputs/dois.bib - title: Basic site build cwd: basic-site command: myst build @@ -236,3 +236,9 @@ cases: outputs: - path: write-doi-bib/myst.doi.bib content: write-doi-bib/outputs/myst.doi.bib + - title: Bibtex file is written correctly on tex export + cwd: tex-with-bib + command: myst build --tex + outputs: + - path: tex-with-bib/_build/main.bib + content: tex-with-bib/outputs/main.bib diff --git a/packages/mystmd/tests/tex-with-bib/input.md b/packages/mystmd/tests/tex-with-bib/input.md new file mode 100644 index 000000000..78d81be67 --- /dev/null +++ b/packages/mystmd/tests/tex-with-bib/input.md @@ -0,0 +1,5 @@ +# Lorem Ipsum + +This citation has a label that does not match the content: @jones-etal-2014 + +This citation has no label by default: [](https://doi.org/10.37921/426590wiobus) \ No newline at end of file diff --git a/packages/mystmd/tests/tex-with-bib/myst.yml b/packages/mystmd/tests/tex-with-bib/myst.yml new file mode 100644 index 000000000..61b21abc8 --- /dev/null +++ b/packages/mystmd/tests/tex-with-bib/myst.yml @@ -0,0 +1,7 @@ +version: 1 +project: + export: + format: tex + template: ../templates/tex + output: _build/out.tex + article: input.md diff --git a/packages/mystmd/tests/tex-with-bib/outputs/main.bib b/packages/mystmd/tests/tex-with-bib/outputs/main.bib new file mode 100644 index 000000000..0c0946b08 --- /dev/null +++ b/packages/mystmd/tests/tex-with-bib/outputs/main.bib @@ -0,0 +1,19 @@ +@article{jones-etal-2014, + author = {Marrero, Jos{\' e} and Garc{\' i}a, Alicia and Berrocoso, Manuel and Llinares, {\' A}ngeles and Rodr{\' i}guez-Losada, Antonio and Ortiz, R.}, + journal = {Journal of Applied Volcanology}, + doi = {10.1186/s13617-019-0085-5}, + year = {2019}, + month = {7}, + title = {Strategies for the development of volcanic hazard maps in monogenetic volcanic fields: the example of {La} {Palma} ({Canary} {Islands})}, + volume = {8}, +} + + +@misc{2021, + doi = {10.37921/426590wiobus}, + year = {2021}, + month = {sep 1}, + url = {http://dx.doi.org/10.37921/426590wiobus}, + howpublished = {http://dx.doi.org/10.37921/426590wiobus}, +} + diff --git a/packages/mystmd/tests/tex-with-bib/references.bib b/packages/mystmd/tests/tex-with-bib/references.bib new file mode 100644 index 000000000..2749be211 --- /dev/null +++ b/packages/mystmd/tests/tex-with-bib/references.bib @@ -0,0 +1,11 @@ +@article{jones-etal-2014, + author = {Marrero, Jos{\' e} and Garc{\' i}a, Alicia and Berrocoso, Manuel and Llinares, {\' A}ngeles and Rodr{\' i}guez-Losada, Antonio and Ortiz, R.}, + journal = {Journal of Applied Volcanology}, + year = {2019}, + month = {7}, + pages = {}, + title = {Strategies for the development of volcanic hazard maps in monogenetic volcanic fields: the example of {La} {Palma} ({Canary} {Islands})}, + volume = {8}, + doi = {10.1186/s13617-019-0085-5}, +} +