diff --git a/.changeset/curly-pumas-burn.md b/.changeset/curly-pumas-burn.md new file mode 100644 index 000000000..f04d4cbbf --- /dev/null +++ b/.changeset/curly-pumas-burn.md @@ -0,0 +1,6 @@ +--- +'myst-cli': patch +'mystmd': patch +--- + +Add build command to write remote doi citations to bibtex diff --git a/.changeset/great-laws-look.md b/.changeset/great-laws-look.md new file mode 100644 index 000000000..1b4a41881 --- /dev/null +++ b/.changeset/great-laws-look.md @@ -0,0 +1,5 @@ +--- +'myst-cli': patch +--- + +Fall back to handle.net if doi.org fails diff --git a/.changeset/pretty-students-sip.md b/.changeset/pretty-students-sip.md new file mode 100644 index 000000000..695cbc3e4 --- /dev/null +++ b/.changeset/pretty-students-sip.md @@ -0,0 +1,5 @@ +--- +'myst-cli': patch +--- + +Normalize doi keys in in-memory doi cache diff --git a/.changeset/tame-books-play.md b/.changeset/tame-books-play.md new file mode 100644 index 000000000..eb3692cdb --- /dev/null +++ b/.changeset/tame-books-play.md @@ -0,0 +1,5 @@ +--- +'myst-cli': patch +--- + +Prioritize dois from bibtex over remote fetching diff --git a/docs/citations.md b/docs/citations.md index 722f120d5..04763a560 100644 --- a/docs/citations.md +++ b/docs/citations.md @@ -19,7 +19,7 @@ This is a link in markdown: [Cockett, 2022](https://doi.org/10.5281/zenodo.64760 It is also possible to to drop the link text, that is:\ `` or `[](doi:10.5281/zenodo.6476040)`,\ -which will insert the citation text in the correct format (e.g. adding an italic "_et al._", etc.). The citation data for these DOIs will be downloaded from `https://doi.org` once and cached to a local file in the `_build` directory. This cache may be cleared with `myst clean --cache`. +which will insert the citation text in the correct format (e.g. adding an italic "_et al._", etc.). If the DOI is present on a citation from a BibTeX file in your project, that citation will be used. Otherwise, the citation data for these DOIs will be downloaded from `https://doi.org` once and cached to a local file in the `_build` directory. This cache may be cleared with `myst clean --cache`. Providing your DOIs as full links has the advantage that on other rendering platforms (e.g. GitHub), your citation will still be shown as a link. If you have many citations, however, this will slow down the build process as the citation information is fetched dynamically. @@ -39,6 +39,16 @@ For DOIs with multiple slashes in the identifier you also have to use the full h ::: +### Writing DOIs to BibTeX + +If you encounter problems fetching DOIs from `https://doi.org`, for example the downloaded citation does not include all the data you expect or requests to `https://doi.org` are failing on an automated continuous integration platform, you may write your DOI citations to file using the MyST command + +```bash +myst build --doi-bib +``` + +This will generate a BibTeX file `myst.doi.bib` which you may then rename, edit, and save to your project. On subsequent builds, the DOIs will be loaded from this file rather than fetched remotely. + ## Including BibTeX A standard way of including references for $\LaTeX$ is using , you can include a `*.bib` file or files in the same directory as your content directory for the project. These will provide the reference keys for that project. diff --git a/packages/myst-cli/src/build/build.ts b/packages/myst-cli/src/build/build.ts index 3849df053..8c868965a 100644 --- a/packages/myst-cli/src/build/build.ts +++ b/packages/myst-cli/src/build/build.ts @@ -215,8 +215,8 @@ function extToKind(ext: string): string { } export async function build(session: ISession, files: string[], opts: BuildOpts) { - const { site, all, watch } = opts; - const performSiteBuild = all || (files.length === 0 && exportSite(session, opts)); + const { site, all, watch, writeDOIBib } = opts; + const performSiteBuild = all || (files.length === 0 && exportSite(session, opts)) || writeDOIBib; const exportOptionsList = await collectAllBuildExportOptions(session, files, opts); // TODO: generalize and pull this out! const buildLog: Record = { diff --git a/packages/myst-cli/src/build/meca/index.ts b/packages/myst-cli/src/build/meca/index.ts index 50c29517e..c33ab72c6 100644 --- a/packages/myst-cli/src/build/meca/index.ts +++ b/packages/myst-cli/src/build/meca/index.ts @@ -282,7 +282,7 @@ export async function runMecaExport( ); // Ensure that an explicit TOC is present if (configDest) { - writeTOCToConfigFile(project, configFile, configDest); + await writeTOCToConfigFile(project, configFile, configDest); } addManifestItem(manifestItems, 'article-source', mecaFolder, configDest); diff --git a/packages/myst-cli/src/build/utils/bibtex.ts b/packages/myst-cli/src/build/utils/bibtex.ts index a8486ef18..aba4fff55 100644 --- a/packages/myst-cli/src/build/utils/bibtex.ts +++ b/packages/myst-cli/src/build/utils/bibtex.ts @@ -42,3 +42,31 @@ export function writeBibtexFromCitationRenderers( fs.writeFileSync(output, bibtexContent.join('\n')); return true; } + +/** + * Write new bibtex file with all remotely loaded DOI bibtex entries + * + * This bibtex file is unordered and includes a header indicating it may be + * overwritten. + * + * Returns true if file was written + */ +export function writeRemoteDOIBibtex(session: ISession, output: string) { + const cache = castSession(session); + const bibtexContent: string[] = []; + // Keep existing myst.doi.bib entries + Object.values(cache.$citationRenderers[output] ?? {}).forEach((render) => { + bibtexContent.push(render.exportBibTeX()); + }); + Object.values(cache.$doiRenderers).forEach(({ render, remote }) => { + if (!remote) return; + bibtexContent.push(render.exportBibTeX()); + }); + if (!bibtexContent.length) return false; + bibtexContent.unshift( + '% AUTOGENERATED FILE - EDITS MAY BE LOST\n% To regenerate, run `myst build --doi-bib`\n', + ); + if (!fs.existsSync(output)) fs.mkdirSync(path.dirname(output), { recursive: true }); + fs.writeFileSync(output, bibtexContent.join('\n')); + return true; +} diff --git a/packages/myst-cli/src/cli/build.ts b/packages/myst-cli/src/cli/build.ts index 17ed3d3d1..5d9247a0a 100644 --- a/packages/myst-cli/src/cli/build.ts +++ b/packages/myst-cli/src/cli/build.ts @@ -18,6 +18,7 @@ import { makeCIOption, makeExecuteOption, makeMaxSizeWebpOption, + makeDOIBibOption, } from './options.js'; export function makeBuildCommand() { @@ -35,6 +36,7 @@ export function makeBuildCommand() { .addOption(makeSiteOption('Build MyST site content')) .addOption(makeHtmlOption('Build static HTML site content')) .addOption(makeAllOption('Build all exports')) + .addOption(makeDOIBibOption()) .addOption(makeWatchOption()) .addOption(makeNamedExportOption('Output file for the export')) .addOption(makeForceOption()) diff --git a/packages/myst-cli/src/cli/options.ts b/packages/myst-cli/src/cli/options.ts index 91d371fa9..432f68520 100644 --- a/packages/myst-cli/src/cli/options.ts +++ b/packages/myst-cli/src/cli/options.ts @@ -1,5 +1,7 @@ import { InvalidArgumentError, Option } from 'commander'; +export const MYST_DOI_BIB_FILE = 'myst.doi.bib'; + function parseInt(value: any) { const parsedValue = Number.parseInt(value, 10); if (isNaN(parsedValue)) throw new InvalidArgumentError('Not a number.'); @@ -58,6 +60,15 @@ export function makeAllOption(description: string) { return new Option('-a, --all', description).default(false); } +export function makeDOIBibOption() { + return new Option( + '--doi-bib', + `Generate (or regenerate) ${MYST_DOI_BIB_FILE} file containing bibtex entries for all remotely loaded DOI citations`, + ) + .default(false) + .implies({ writeDOIBib: true }); +} + export function makeWatchOption() { return new Option('--watch', 'Watch modified files and re-build on change').default(false); } diff --git a/packages/myst-cli/src/process/file.ts b/packages/myst-cli/src/process/file.ts index 325213203..9692b32dc 100644 --- a/packages/myst-cli/src/process/file.ts +++ b/packages/myst-cli/src/process/file.ts @@ -4,6 +4,7 @@ import { createHash } from 'node:crypto'; import { tic } from 'myst-cli-utils'; import { TexParser } from 'tex-to-myst'; import { VFile } from 'vfile'; +import { doi } from 'doi-utils'; import type { GenericParent } from 'myst-common'; import { RuleId, toText } from 'myst-common'; import type { PageFrontmatter } from 'myst-frontmatter'; @@ -162,8 +163,13 @@ export async function loadFile( break; } case '.bib': { - const renderer = await loadBibTeXCitationRenderers(session, file); - cache.$citationRenderers[file] = renderer; + const renderers = await loadBibTeXCitationRenderers(session, file); + cache.$citationRenderers[file] = renderers; + Object.entries(renderers).forEach(([id, renderer]) => { + const normalizedDOI = doi.normalize(renderer.getDOI())?.toLowerCase(); + if (!normalizedDOI || cache.$doiRenderers[normalizedDOI]) return; + cache.$doiRenderers[normalizedDOI] = { id, render: renderer }; + }); break; } default: diff --git a/packages/myst-cli/src/process/site.ts b/packages/myst-cli/src/process/site.ts index 3821dc05b..c4ed94718 100644 --- a/packages/myst-cli/src/process/site.ts +++ b/packages/myst-cli/src/process/site.ts @@ -15,6 +15,8 @@ import { resolvePageDownloads, resolvePageExports, } from '../build/site/manifest.js'; +import { writeRemoteDOIBibtex } from '../build/utils/bibtex.js'; +import { MYST_DOI_BIB_FILE } from '../cli/options.js'; import { filterPages, loadProjectFromDisk } from '../project/load.js'; import type { LocalProject } from '../project/types.js'; import { castSession } from '../session/cache.js'; @@ -54,6 +56,7 @@ export type ProcessFileOptions = { export type ProcessProjectOptions = ProcessFileOptions & { watchMode?: boolean; writeTOC?: boolean; + writeDOIBib?: boolean; writeFiles?: boolean; reloadProject?: boolean; checkLinks?: boolean; @@ -375,6 +378,7 @@ export async function processProject( extraTransforms, watchMode, writeTOC, + writeDOIBib, writeFiles = true, reloadProject, execute, @@ -461,6 +465,11 @@ export async function processProject( log.info( toc(`📚 Built ${plural('%s page(s)', pages)} for ${siteProject.slug ?? 'project'} in %s.`), ); + if (writeDOIBib) { + const doiBibFile = join(siteProject.path, MYST_DOI_BIB_FILE); + log.info(`🎓 Writing remote DOI citations to ${doiBibFile}`); + writeRemoteDOIBibtex(session, doiBibFile); + } return project; } diff --git a/packages/myst-cli/src/project/fromTOC.ts b/packages/myst-cli/src/project/fromTOC.ts index b539f3e82..63b9e919a 100644 --- a/packages/myst-cli/src/project/fromTOC.ts +++ b/packages/myst-cli/src/project/fromTOC.ts @@ -312,9 +312,7 @@ export function getIgnoreFiles(session: ISession, path: string) { const excludeFiles = excludePatterns .map((pattern) => { const matches = globSync(pattern); //.split(sep).join('/')); - return matches - .map((match) => match.split('/').join(sep)) - .filter((match) => isValidFile(match)); + return matches.map((match) => match.split('/').join(sep)); }) .flat(); return [...rootConfigYamls, ...excludeFiles]; diff --git a/packages/myst-cli/src/project/load.ts b/packages/myst-cli/src/project/load.ts index a2d3a2ef0..767805700 100644 --- a/packages/myst-cli/src/project/load.ts +++ b/packages/myst-cli/src/project/load.ts @@ -13,7 +13,7 @@ import { addWarningForFile } from '../utils/addWarningForFile.js'; import { getAllBibTexFilesOnPath } from '../utils/getAllBibtexFiles.js'; import { tocFile, validateSphinxTOC } from '../utils/toc.js'; import { projectFromPath } from './fromPath.js'; -import { projectFromTOC, projectFromSphinxTOC } from './fromTOC.js'; +import { projectFromTOC, projectFromSphinxTOC, getIgnoreFiles } from './fromTOC.js'; import type { LocalProject, LocalProjectPage } from './types.js'; import { writeTOCToConfigFile } from './toTOC.js'; /** @@ -107,9 +107,9 @@ export async function loadProjectFromDisk( session.log.info(`⬆️ Upgrading legacy jupyterbook TOC to MyST: ${tocFile(path)}`); } session.log.info(`💾 Writing new TOC to: ${projectConfigFile}`); - writeTOCToConfigFile(newProject, projectConfigFile, projectConfigFile); + await writeTOCToConfigFile(newProject, projectConfigFile, projectConfigFile); } - const allBibFiles = getAllBibTexFilesOnPath(session, path); + const allBibFiles = getAllBibTexFilesOnPath(session, path, getIgnoreFiles(session, path)); let bibliography: string[]; if (projectConfig?.bibliography) { const bibConfigPath = `${projectConfigFile}#bibliography`; diff --git a/packages/myst-cli/src/transforms/dois.ts b/packages/myst-cli/src/transforms/dois.ts index f8bcb8041..7ff85029f 100644 --- a/packages/myst-cli/src/transforms/dois.ts +++ b/packages/myst-cli/src/transforms/dois.ts @@ -99,7 +99,8 @@ export async function resolveDoiOrg( ): Promise { const normalizedDoi = doi.normalize(doiString); const url = doi.buildUrl(doiString); // This must be based on the incoming string, not the normalizedDoi. (e.g. short DOIs) - if (!doi.validate(doiString) || !normalizedDoi || !url) return undefined; + const handleUrl = url?.replace('doi.org', 'hdl.handle.net'); + if (!doi.validate(doiString) || !normalizedDoi || !url || !handleUrl) return undefined; const filename = doiCSLJSONCacheFilename(normalizedDoi); // Cache DOI resolution as CSL JSON (parsed) @@ -113,6 +114,7 @@ export async function resolveDoiOrg( let data: CSL[] | undefined; try { data = await resolveDOIAsBibTeX(session, url); + if (!data) data = await resolveDOIAsBibTeX(session, handleUrl); if (data) { session.log.debug(toc(`Fetched reference BibTeX for doi:${normalizedDoi} in %s`)); } else { @@ -128,6 +130,7 @@ export async function resolveDoiOrg( if (!data) { try { data = await resolveDOIAsCSLJSON(session, url); + if (!data) data = await resolveDOIAsCSLJSON(session, handleUrl); if (data) { session.log.debug(toc(`Fetched reference CSL-JSON for doi:${normalizedDoi} in %s`)); } else { @@ -203,7 +206,7 @@ export async function getCitation( const renderer = await getCitationRenderers(data); const id = Object.keys(renderer)[0]; const render = renderer[id]; - return { id, render }; + return { id, render, remote: true }; } catch (error) { fileError( vfile, @@ -249,19 +252,23 @@ export async function transformLinkedDOIs( let number = 0; await Promise.all([ ...linkedDois.map(async (node) => { - let cite: SingleCitationRenderer | null = doiRenderer[node.url]; + const normalized = doi.normalize(node.url)?.toLowerCase(); + if (!normalized) return false; + let cite: SingleCitationRenderer | null = doiRenderer[normalized]; if (!cite) { cite = await getCitation(session, vfile, node.url, node); if (cite) number += 1; else return false; } - doiRenderer[node.url] = cite; - const label = cite.render.getLabel(); - renderer[label] = cite.render; + if (cite.remote) { + renderer[cite.render.getLabel()] = cite.render; + } + doiRenderer[normalized] = cite; const citeNode = node as unknown as Cite; citeNode.type = 'cite'; citeNode.kind = 'narrative'; - citeNode.label = label; + citeNode.label = cite.render.getLabel(); + citeNode.identifier = node.url; if (doi.validate(toText(citeNode.children))) { // If the link text is the DOI, update with a citation in a following pass citeNode.children = []; @@ -269,16 +276,19 @@ export async function transformLinkedDOIs( return true; }), ...citeDois.map(async (node) => { - let cite: SingleCitationRenderer | null = doiRenderer[node.label]; + const normalized = doi.normalize(node.label)?.toLowerCase(); + if (!normalized) return false; + let cite: SingleCitationRenderer | null = doiRenderer[normalized]; if (!cite) { cite = await getCitation(session, vfile, node.label, node); if (cite) number += 1; else return false; } - doiRenderer[node.label] = cite; - const label = cite.render.getLabel(); - renderer[label] = cite.render; - node.label = label; + if (cite.remote) { + renderer[cite.render.getLabel()] = cite.render; + } + doiRenderer[normalized] = cite; + node.label = cite.render.getLabel(); return true; }), ]); diff --git a/packages/myst-cli/src/transforms/types.ts b/packages/myst-cli/src/transforms/types.ts index 2b8bdee1f..54f0645ec 100644 --- a/packages/myst-cli/src/transforms/types.ts +++ b/packages/myst-cli/src/transforms/types.ts @@ -20,4 +20,9 @@ export type RendererData = PreRendererData & { dependencies: Dependency[]; }; -export type SingleCitationRenderer = { id: string; render: CitationRenderer[''] }; +export type SingleCitationRenderer = { + id: string; + render: CitationRenderer['']; + /** If remote: true, this citation was loaded from the web */ + remote?: boolean; +}; diff --git a/packages/myst-cli/src/utils/getAllBibtexFiles.ts b/packages/myst-cli/src/utils/getAllBibtexFiles.ts index b2b20253f..e84c8aaf1 100644 --- a/packages/myst-cli/src/utils/getAllBibtexFiles.ts +++ b/packages/myst-cli/src/utils/getAllBibtexFiles.ts @@ -4,13 +4,14 @@ import { isDirectory } from 'myst-cli-utils'; import type { ISession } from '../session/types.js'; import { shouldIgnoreFile } from './shouldIgnoreFile.js'; -export function getAllBibTexFilesOnPath(session: ISession, dir: string) { +export function getAllBibTexFilesOnPath(session: ISession, dir: string, ignore?: string[]) { let bibFiles: string[] = []; const content = fs.readdirSync(dir); content .map((file) => path.join(dir, file)) .filter((file) => { const isDir = isDirectory(file); + if (ignore?.includes(file)) return false; if (!isDir && path.extname(file) === '.bib') { // Push the bibtex file to a list! bibFiles.push(file); @@ -23,7 +24,7 @@ export function getAllBibTexFilesOnPath(session: ISession, dir: string) { }) .forEach((subdir) => { // Now recurse into each directory - bibFiles = bibFiles.concat(getAllBibTexFilesOnPath(session, subdir)); + bibFiles = bibFiles.concat(getAllBibTexFilesOnPath(session, subdir, ignore)); }); return bibFiles; } diff --git a/packages/mystmd/src/options.ts b/packages/mystmd/src/options.ts index 3c727c482..9d0f28c19 100644 --- a/packages/mystmd/src/options.ts +++ b/packages/mystmd/src/options.ts @@ -7,7 +7,7 @@ export function makeProjectOption(description: string) { export function makeWriteTOCOption() { return new Option( '--write-toc', - 'Generate editable _toc.yml file for project if it does not exist', + 'Generate editable table of contents within your myst.yml file, if it does not exist', ) .default(false) .implies({ writeTOC: true }); diff --git a/packages/mystmd/tests/exports.yml b/packages/mystmd/tests/exports.yml index 5f5955a69..bedd7e966 100644 --- a/packages/mystmd/tests/exports.yml +++ b/packages/mystmd/tests/exports.yml @@ -230,3 +230,9 @@ cases: content: math-macros/outputs/index.json - path: math-macros/_build/site/config.json content: math-macros/outputs/config.json + - title: Write DOI .bib file + cwd: write-doi-bib + command: myst build --doi-bib + outputs: + - path: write-doi-bib/myst.doi.bib + content: write-doi-bib/outputs/myst.doi.bib diff --git a/packages/mystmd/tests/write-doi-bib/.gitignore b/packages/mystmd/tests/write-doi-bib/.gitignore new file mode 100644 index 000000000..1907f775f --- /dev/null +++ b/packages/mystmd/tests/write-doi-bib/.gitignore @@ -0,0 +1 @@ +myst.doi.bib \ No newline at end of file diff --git a/packages/mystmd/tests/write-doi-bib/index.md b/packages/mystmd/tests/write-doi-bib/index.md new file mode 100644 index 000000000..621a32543 --- /dev/null +++ b/packages/mystmd/tests/write-doi-bib/index.md @@ -0,0 +1,3 @@ +# One DOI + +[](doi:10.1111/j.1365-246X.2012.05497.x) \ No newline at end of file diff --git a/packages/mystmd/tests/write-doi-bib/myst.yml b/packages/mystmd/tests/write-doi-bib/myst.yml new file mode 100644 index 000000000..0e3479dca --- /dev/null +++ b/packages/mystmd/tests/write-doi-bib/myst.yml @@ -0,0 +1,8 @@ +version: 1 +project: + exclude: outputs/myst.doi.bib +site: + nav: [] + actions: [] + domains: [] + template: ../templates/site/myst/book-theme diff --git a/packages/mystmd/tests/write-doi-bib/outputs/myst.doi.bib b/packages/mystmd/tests/write-doi-bib/outputs/myst.doi.bib new file mode 100644 index 000000000..15f9d858b --- /dev/null +++ b/packages/mystmd/tests/write-doi-bib/outputs/myst.doi.bib @@ -0,0 +1,18 @@ +% AUTOGENERATED FILE - EDITS MAY BE LOST +% To regenerate, run `myst build --doi-bib` + +@article{Koch_2012, + author = {Koch, Franklin W. and Wiens, Douglas A. and Nyblade, Andrew A. and Shore, Patrick J. and Tibi, Rigobert and Ateba, B. and Tabod, C.T. and Nnange, J. M.}, + journal = {Geophysical Journal International}, + doi = {10.1111/j.1365-246x.2012.05497.x}, + issn = {0956-540X}, + number = {1}, + year = {2012}, + month = {5}, + pages = {75--86}, + publisher = {Oxford University Press (OUP)}, + title = {Upper-mantle anisotropy beneath the {Cameroon} {Volcanic} {Line} and {Congo} {Craton} from shear wave splitting measurements: Anisotropy beneath the {CVL}}, + url = {http://dx.doi.org/10.1111/j.1365-246X.2012.05497.x}, + volume = {190}, +} +