diff --git a/.changeset/thick-fans-hear.md b/.changeset/thick-fans-hear.md new file mode 100644 index 000000000..d0376189d --- /dev/null +++ b/.changeset/thick-fans-hear.md @@ -0,0 +1,5 @@ +--- +"myst-cli": minor +--- + +Add Jupyter Book upgrade feature diff --git a/package-lock.json b/package-lock.json index e301789f1..fbd802d45 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14965,6 +14965,14 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/zod": { + "version": "3.23.8", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.23.8.tgz", + "integrity": "sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, "node_modules/zwitch": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/zwitch/-/zwitch-2.0.4.tgz", @@ -15170,7 +15178,8 @@ "vfile": "^5.3.5", "which": "^3.0.1", "ws": "^8.9.0", - "xml-js": "^1.6.11" + "xml-js": "^1.6.11", + "zod": "^3.23.8" }, "devDependencies": { "@jupyterlab/nbformat": "3.5.2", diff --git a/packages/myst-cli/package.json b/packages/myst-cli/package.json index 973ee9425..c5a69b34e 100644 --- a/packages/myst-cli/package.json +++ b/packages/myst-cli/package.json @@ -82,13 +82,13 @@ "myst-spec": "^0.0.5", "myst-spec-ext": "^1.4.7", "myst-templates": "^1.0.19", - "myst-toc": "^0.1.1", "myst-to-docx": "^1.0.10", "myst-to-jats": "^1.0.26", "myst-to-md": "^1.0.11", "myst-to-tex": "^1.0.30", "myst-to-typst": "^0.0.17", "myst-transforms": "^1.3.18", + "myst-toc": "^0.1.1", "nanoid": "^4.0.0", "nbtx": "^0.2.3", "node-fetch": "^3.3.1", @@ -105,7 +105,8 @@ "vfile": "^5.3.5", "which": "^3.0.1", "ws": "^8.9.0", - "xml-js": "^1.6.11" + "xml-js": "^1.6.11", + "zod": "^3.23.8" }, "devDependencies": { "@jupyterlab/nbformat": "3.5.2", diff --git a/packages/myst-cli/src/build/index.ts b/packages/myst-cli/src/build/index.ts index 02adedcfe..e82ad1946 100644 --- a/packages/myst-cli/src/build/index.ts +++ b/packages/myst-cli/src/build/index.ts @@ -1,7 +1,6 @@ export * from './build.js'; export * from './clean.js'; export * from './docx/index.js'; -export * from './init.js'; export * from './pdf/index.js'; export * from './site/index.js'; export * from './tex/index.js'; diff --git a/packages/myst-cli/src/build/utils/index.ts b/packages/myst-cli/src/build/utils/index.ts index 0deff91ca..a6925295c 100644 --- a/packages/myst-cli/src/build/utils/index.ts +++ b/packages/myst-cli/src/build/utils/index.ts @@ -5,4 +5,3 @@ export * from './getFileContent.js'; export * from './localArticleExport.js'; export * from './resolveAndLogErrors.js'; export * from './bibtex.js'; -export * from './github.js'; diff --git a/packages/myst-cli/src/index.ts b/packages/myst-cli/src/index.ts index abdccf4b2..051dabfbb 100644 --- a/packages/myst-cli/src/index.ts +++ b/packages/myst-cli/src/index.ts @@ -1,6 +1,7 @@ export * from './build/index.js'; export * from './cli/index.js'; export * from './config.js'; +export * from './init/index.js'; export * from './frontmatter.js'; export * from './plugins.js'; export * from './process/index.js'; diff --git a/packages/myst-cli/src/build/gh-actions/index.ts b/packages/myst-cli/src/init/gh-actions/index.ts similarity index 98% rename from packages/myst-cli/src/build/gh-actions/index.ts rename to packages/myst-cli/src/init/gh-actions/index.ts index 27c90e65a..7030be633 100644 --- a/packages/myst-cli/src/build/gh-actions/index.ts +++ b/packages/myst-cli/src/init/gh-actions/index.ts @@ -4,8 +4,8 @@ import inquirer from 'inquirer'; import chalk from 'chalk'; import type { ISession } from 'myst-cli-utils'; import { writeFileToFolder } from 'myst-cli-utils'; -import { getGithubUrl } from '../utils/github.js'; -import { checkFolderIsGit, checkAtGitRoot } from '../utils/git.js'; +import { getGithubUrl } from '../../utils/github.js'; +import { checkFolderIsGit, checkAtGitRoot } from '../../utils/git.js'; function createGithubPagesAction({ defaultBranch = 'main', diff --git a/packages/myst-cli/src/init/index.ts b/packages/myst-cli/src/init/index.ts new file mode 100644 index 000000000..deda958f8 --- /dev/null +++ b/packages/myst-cli/src/init/index.ts @@ -0,0 +1 @@ +export * from './init.js'; diff --git a/packages/myst-cli/src/build/init.ts b/packages/myst-cli/src/init/init.ts similarity index 67% rename from packages/myst-cli/src/build/init.ts rename to packages/myst-cli/src/init/init.ts index a2bee4e7c..ce52b1b77 100644 --- a/packages/myst-cli/src/build/init.ts +++ b/packages/myst-cli/src/init/init.ts @@ -2,16 +2,19 @@ import fs from 'node:fs'; import path from 'node:path'; import yaml from 'js-yaml'; import { v4 as uuid } from 'uuid'; +import inquirer from 'inquirer'; +import chalk from 'chalk'; import { defaultConfigFile, loadConfig, writeConfigs } from '../config.js'; import { loadProjectFromDisk } from '../project/load.js'; import { selectors } from '../store/index.js'; import type { ISession } from '../session/types.js'; -import inquirer from 'inquirer'; -import chalk from 'chalk'; -import { startServer } from './site/start.js'; +import { startServer } from '../build/site/start.js'; import { githubCurvenoteAction, githubPagesAction } from './gh-actions/index.js'; -import { getGithubUrl } from './utils/github.js'; -import { checkFolderIsGit } from './utils/git.js'; +import { getGithubUrl } from '../utils/github.js'; +import { checkFolderIsGit } from '../utils/git.js'; +import { upgradeJupyterBook } from './jupyter-book/upgrade.js'; +import { fsExists } from '../utils/fsExists.js'; + const VERSION_CONFIG = '# See docs at: https://mystmd.org/guide/frontmatter\nversion: 1\n'; function createProjectConfig({ github }: { github?: string } = {}) { @@ -127,24 +130,61 @@ export async function init(session: ISession, opts: InitOptions) { await writeConfigs(session, '.', { siteConfig, projectConfig }); } } else { - // If no config is present, write it explicitly to include comments. - const configFile = defaultConfigFile(session, '.'); - let configData: string; - let configDoc: string; - if (site && !project) { - configData = `${VERSION_CONFIG}${SITE_CONFIG}`; - configDoc = 'site'; - } else if (project && !site) { - configData = `${VERSION_CONFIG}${createProjectConfig({ github })}`; - configDoc = 'project'; - } else { - configData = `${VERSION_CONFIG}${createProjectConfig({ github })}${SITE_CONFIG}`; - configDoc = 'project and site'; + // Is this a Jupyter Book? + let didUpgrade = false; + if (await fsExists('_config.yml')) { + const configFile = defaultConfigFile(session, '.'); + const promptUpgrade = await inquirer.prompt([ + { + name: 'upgrade', + message: [ + `📘 Found a legacy Jupyter Book. To proceed, myst needs to perform an upgrade which will: +`, + chalk.dim(`‣ Upgrade any Sphinx-style glossaries to MyST-style glossaries +‣ Upgrade any case-insensitive admonition names to lowercase (${chalk.blue('Note')} → ${chalk.blue('note')}) +‣ Migrate configuration from ${chalk.blue('_config.yml')} and (if applicable) ${chalk.blue('_toc.yml')} files +‣ Rename any modified or unneeded files so that they are hidden + +`), + `Are you willing to proceed?`, + ].join(''), + type: 'confirm', + default: true, + }, + ]); + if (!promptUpgrade.upgrade) { + return; + } + session.log.info(`💾 Writing new config file: ${chalk.blue(path.resolve(configFile))}`); + try { + await upgradeJupyterBook(session, configFile); + didUpgrade = true; + } catch (err) { + session.log.error(`❌ An error occurred during Jupyter Book upgrade:\n\n${err}\n\n`); + session.log.warn(`Ignoring Jupyter Book configuration!`); + } + } + // Otherwise, write some default configs + if (!didUpgrade) { + // If no config is present, write it explicitly to include comments. + const configFile = defaultConfigFile(session, '.'); + let configData: string; + let configDoc: string; + if (site && !project) { + configData = `${VERSION_CONFIG}${SITE_CONFIG}`; + configDoc = 'site'; + } else if (project && !site) { + configData = `${VERSION_CONFIG}${createProjectConfig({ github })}`; + configDoc = 'project'; + } else { + configData = `${VERSION_CONFIG}${createProjectConfig({ github })}${SITE_CONFIG}`; + configDoc = 'project and site'; + } + session.log.info( + `💾 Writing new ${configDoc} config file: ${chalk.blue(path.resolve(configFile))}`, + ); + fs.writeFileSync(configFile, configData); } - session.log.info( - `💾 Writing new ${configDoc} config file: ${chalk.blue(path.resolve(configFile))}`, - ); - fs.writeFileSync(configFile, configData); } if (writeTOC) { await loadConfig(session, '.'); diff --git a/packages/myst-cli/src/init/jupyter-book/config.ts b/packages/myst-cli/src/init/jupyter-book/config.ts new file mode 100644 index 000000000..a3e7f3bff --- /dev/null +++ b/packages/myst-cli/src/init/jupyter-book/config.ts @@ -0,0 +1,236 @@ +import { z } from 'zod'; +import { notNullish } from '../../utils/defined.js'; +import type { Config, ProjectConfig, SiteConfig } from 'myst-config'; +import { ExportFormats } from 'myst-frontmatter'; +import { parse } from 'node:path'; + +const JupyterBookConfig = z.object({ + title: z.string().nullish(), + author: z.string().nullish(), + copyright: z.string().nullish(), + logo: z.string().nullish(), + exclude_patterns: z.array(z.string()).nullish(), + parse: z + .object({ + myst_enable_extensions: z.union([z.null(), z.array(z.string())]).nullish(), + myst_url_schemes: z.union([z.null(), z.array(z.string())]).nullish(), + myst_dmath_double_inline: z.boolean().default(true), + }) + .nullish(), + execute: z + .object({ + eval_regex: z.string().default('^.*$'), + raise_on_error: z.boolean().default(false), + show_tb: z.boolean().default(false), + execute_notebooks: z + .union([ + z.literal('auto'), + z.literal('cache'), + z.literal('force'), + z.literal('inline'), + z.literal('off'), + z.literal(false), + ]) + .default('auto'), + cache: z.string().nullish(), + timeout: z.number().gte(-1).default(30), + allow_errors: z.boolean().default(false), + stderr_output: z + .enum(['show', 'remove', 'remove-warn', 'warn', 'error', 'severe']) + .default('show'), + run_in_temp: z.boolean().default(false), + exclude_patterns: z.array(z.string()).nullish(), + }) + .nullish(), + html: z + .object({ + favicon: z.string().nullish(), + use_edit_page_button: z.boolean().nullish(), + use_repository_button: z.boolean().nullish(), + use_issues_button: z.boolean().nullish(), + extra_footer: z.string().nullish(), + // Legacy analytics field + google_analytics_id: z.string().nullish(), + analytics: z + .object({ + plausible_analytics_domain: z.string().nullish(), + google_analytics_id: z.string().nullish(), + }) + .nullish(), + home_page_in_navbar: z.boolean().nullish(), + baseurl: z.string().nullish(), + comments: z + .object({ + hypothesis: z.union([z.boolean(), z.record(z.any())]).nullish(), + utterances: z.union([z.boolean(), z.record(z.any())]).nullish(), + }) + .nullish(), + announcement: z.string().nullish(), + }) + .nullish(), + latex: z + .object({ + latex_engine: z.string().default('pdflatex'), + use_jupyterbook_latex: z.boolean().nullish(), + latex_documents: z + .object({ + targetname: z.string().nullish(), + }) + .nullish(), + }) + .nullish(), + bibtex_bibfiles: z.array(z.string()).nullish(), + launch_buttons: z + .object({ + notebook_interface: z.string().nullish(), + binderhub_url: z.string().nullish(), + jupyterhub_url: z.string().nullish(), + thebe: z.boolean().nullish(), + colab_url: z.string().nullish(), + }) + .nullish(), + repository: z + .object({ + url: z.string().nullish(), + path_to_book: z.string().nullish(), + branch: z.string().nullish(), + }) + .nullish(), + sphinx: z + .object({ + extra_extensions: z.union([z.null(), z.array(z.string())]).nullish(), + local_extensions: z.union([z.null(), z.record(z.any())]).nullish(), + recursive_update: z.boolean().nullish(), + config: z.union([z.null(), z.record(z.any())]).nullish(), + }) + .nullish(), +}); + +export type JupyterBookConfig = z.infer; + +/** + * Validate a loaded Jupyter Book _config.yml, or return undefined + * + * @param config - config object + */ +export function validateJupyterBookConfig(config: unknown): JupyterBookConfig { + const result = JupyterBookConfig.safeParse(config); + if (!result.success) { + const errors = result.error.errors.map( + (issue) => `${issue.path.join('.')}: ${issue.message} (${issue.code})`, + ); + throw new Error(`Error(s) in parsing Jupyter Book configuration:\n${errors}`); + } else { + return result.data; + } +} + +/** + * Parse a GitHub repo URL to extract the user/repo substring + * + * @param url - GitHub URL + */ +function parseGitHubRepoURL(url: string): string | undefined { + //eslint-disable-next-line + const match = url.match(/(?:git@|https:\/\/)github.com[:\/](.*)(?:.git)?/); + if (!match) { + return undefined; + } + return match[1]; +} + +/** + * Upgrade a Jupyter Book _config.yml into a myst.yml configuration + * + * @param config - validated Jupyter Book configuration + */ +export function upgradeConfig(data: JupyterBookConfig): Pick { + const project: ProjectConfig = {}; + const siteOptions: SiteConfig['options'] = {}; + const site: SiteConfig = { + options: siteOptions, + template: 'book-theme', + }; + + if (notNullish(data.title)) { + project.title = data.title; + } + + if (notNullish(data.author)) { + // Try and parse comma-delimited author lists into separate authors + const authors = data.author.split(/,\s*(?:and\s)?\s*|\s+and\s+/); + if (authors.length === 1) { + project.authors = [{ name: data.author }]; // TODO prompt user for alias? + } else { + project.authors = authors.map((name) => ({ name })); + } + } + + if (notNullish(data.copyright)) { + project.copyright = data.copyright; + } + + if (notNullish(data.logo)) { + siteOptions.logo = data.logo; + } + + if (notNullish(data.exclude_patterns)) { + project.exclude = data.exclude_patterns; + } + + if (notNullish(data.html?.favicon)) { + siteOptions.favicon = data.html.favicon; + } + + if (notNullish(data.html?.analytics?.google_analytics_id)) { + siteOptions.analytics_google = data.html.analytics.google_analytics_id; + } else if (notNullish(data.html?.google_analytics_id)) { + siteOptions.analytics_google = data.html.google_analytics_id; + } + + if (notNullish(data.html?.analytics?.plausible_analytics_domain)) { + siteOptions.analytics_plausible = data.html.analytics.plausible_analytics_domain; + } + + const repo = notNullish(data.repository?.url) + ? parseGitHubRepoURL(data.repository?.url) + : undefined; + if (notNullish(repo)) { + project.github = repo; + } + + // Do we want to enable thebe and mybinder? + if ( + notNullish(repo) && + (notNullish(data.launch_buttons?.binderhub_url) || !!data.launch_buttons?.thebe) + ) { + project.thebe = { + binder: { + repo: repo, + provider: 'github', + url: data.launch_buttons?.binderhub_url ?? undefined, + ref: data.repository?.branch ?? undefined, + }, + }; + } + + // Take bibliography + if (notNullish(data.bibtex_bibfiles)) { + project.bibliography = data.bibtex_bibfiles; + } + + // Defined LaTeX target name + if (notNullish(data.latex?.latex_documents?.targetname)) { + project.exports = project.exports ?? []; + + // Strip any extensions + const { name } = parse(data.latex.latex_documents.targetname); + project.exports.push({ + format: ExportFormats.pdf, + template: 'plain_latex_book', + output: `exports/${name}.pdf`, + }); + } + + return { project, site }; +} diff --git a/packages/myst-cli/src/init/jupyter-book/index.ts b/packages/myst-cli/src/init/jupyter-book/index.ts new file mode 100644 index 000000000..8f0b8cb36 --- /dev/null +++ b/packages/myst-cli/src/init/jupyter-book/index.ts @@ -0,0 +1 @@ +export * from './upgrade.js'; diff --git a/packages/myst-cli/src/init/jupyter-book/syntax.spec.ts b/packages/myst-cli/src/init/jupyter-book/syntax.spec.ts new file mode 100644 index 000000000..06a09311c --- /dev/null +++ b/packages/myst-cli/src/init/jupyter-book/syntax.spec.ts @@ -0,0 +1,32 @@ +import { describe, expect, test } from 'vitest'; +import fs from 'node:fs'; +import path from 'node:path'; +import yaml from 'js-yaml'; +import { upgradeContent } from './syntax.js'; + +type TestFile = { + cases: TestCase[]; +}; + +type TestCase = { + title: string; + source: string; + result: string | undefined; +}; + +const file = 'syntax.yml'; + +const testYaml = fs.readFileSync(path.join(__dirname, file)).toString(); +const cases = (yaml.load(testYaml) as TestFile).cases; + +describe('upgradeSyntax', () => { + test.each(cases.map((c): [string, TestCase] => [c.title, c]))( + '%s', + async (_, { source, result }) => { + const sourceLines = source.split(/\r\n|\r|\n/); + const resultLines = result?.split(/\r\n|\r|\n/); + const transformedLines = await upgradeContent([...sourceLines]); + expect(transformedLines).toEqual(resultLines); + }, + ); +}); diff --git a/packages/myst-cli/src/init/jupyter-book/syntax.ts b/packages/myst-cli/src/init/jupyter-book/syntax.ts new file mode 100644 index 000000000..6bf2d748f --- /dev/null +++ b/packages/myst-cli/src/init/jupyter-book/syntax.ts @@ -0,0 +1,304 @@ +import { mystParse } from 'myst-parser'; +import fs from 'node:fs/promises'; +import { glob } from 'glob'; +import { selectAll } from 'unist-util-select'; +import { toText } from 'myst-common'; +import chalk from 'chalk'; +import type { ISession } from '../../session/types.js'; +import { makeExecutable } from 'myst-cli-utils'; +import { parse, join, relative } from 'node:path'; +import type { INotebookContent } from '@jupyterlab/nbformat'; +import { createTempFolder } from '../../utils/createTempFolder.js'; + +// Preserve newlines through group construct +const SPLIT_PATTERN = /\r\n|\r|\n/; +type DocumentLine = { + content: string; + offset: number; +}; + +type LegacyGlossaryItem = { + termLines: DocumentLine[]; + definitionLines: DocumentLine[]; +}; +type CallbackType = () => Promise; + +/** + * In-place upgrade Sphinx-style glossaries into MyST definition-list glossaries + * in all MyST documents + * + * @param session - session with logging + */ +export async function upgradeProjectSyntax(session: ISession) { + let documentPaths: string[]; + session.log.debug(chalk.dim(`Upgrading legacy-formatted files in a temporary location`)); + // Try and find all Git-tracked files, to ignore _build (hopefully) + try { + const allFiles = (await makeExecutable('git ls-files', null)()).split(SPLIT_PATTERN); + documentPaths = allFiles.filter((path) => { + const { ext } = parse(path); + return ext == '.md' || ext == '.ipynb'; + }); + } catch (error) { + // Fall back on globbing + documentPaths = await glob('**/*.{md,ipynb}'); + } + const tmpPath = createTempFolder(session); + // Update all documents + const maybeUpgradedPaths = await Promise.all( + documentPaths.map((path) => upgradeDocument(session, path, tmpPath, process.cwd())), + ); + + // Now upgrade all documents + session.log.debug(chalk.dim(`Copying upgraded files into project`)); + await Promise.all( + maybeUpgradedPaths + .filter((item: CallbackType | undefined): item is CallbackType => item !== undefined) + .map((fn) => fn()), + ); +} + +/** + * In-place upgrade Sphinx-style glossaries into MyST definition-list glossaries + * in a single document + * + * @param session - session with logging + * @param path - path to document + */ +async function upgradeDocument( + session: ISession, + path: string, + tmpPath: string, + basePath: string, +): Promise { + const relativePath = relative(basePath, path); + + // Temporary location for upgrade path + const upgradeFilePath = join(tmpPath, relativePath); + + // Ensure destination directory exists + const { dir: temporaryFileDir } = parse(upgradeFilePath); + await fs.mkdir(temporaryFileDir, { recursive: true }); + + const { base, ext, dir: originalDir } = parse(path); + // Callback for implementing the "atomic" replacement + const performUpgrade = async () => { + const backupFilePath = join(originalDir, `.${base}.bak`); + await fs.rename(path, backupFilePath); + await fs.rename(upgradeFilePath, path); + }; + + switch (ext) { + case '.md': + { + // Upgrade entire Markdown document in one pass + const data = (await fs.readFile(path)).toString(); + const maybeNewLines = await upgradeContent(data.split(SPLIT_PATTERN)); + if (maybeNewLines !== undefined) { + // Write modified result + await fs.writeFile(upgradeFilePath, maybeNewLines.join('\n')); + session.log.info(chalk.dim(`Upgraded ${chalk.blue(path)}`)); + return performUpgrade; + } + } + break; + case '.ipynb': + { + // Upgrade each cell of the notebook + const data = (await fs.readFile(path)).toString(); + const notebook = JSON.parse(data) as INotebookContent; + const cellDidUpgrade = await Promise.all( + notebook.cells + .filter((cell) => cell.cell_type === 'markdown') + .map(async (cell) => { + // Try and upgrade the cell + const maybeNewLines = await upgradeContent( + (cell.source as string[]) + // Strip newlines + .map((line) => line.replace('\n', '')), + ); + // Did we compute new state? + if (maybeNewLines !== undefined) { + // Write to cell and indicate modification + cell.source = maybeNewLines.map((line) => `${line}\n`); + return true; + } else { + return false; + } + }), + ); + + // Do we need to update the notebook? + if (cellDidUpgrade.some((x) => x)) { + // Write modified result + const newData = JSON.stringify(notebook); + // Write modified result + await fs.writeFile(upgradeFilePath, newData); + session.log.info(chalk.dim(`Upgraded ${chalk.blue(path)}`)); + return performUpgrade; + } + } + return undefined; + } +} +export async function upgradeContent(documentLines: string[]): Promise { + let didUpgrade = false; + + for (const transform of [upgradeGlossary, upgradeNotes]) { + const nextLines = await transform(documentLines); + didUpgrade = didUpgrade || nextLines !== undefined; + documentLines = nextLines ?? documentLines; + } + + return didUpgrade ? documentLines : undefined; +} + +const admonitionPattern = + /^(attention|caution|danger|error|important|hint|note|seealso|tip|warning|\.callout-note|\.callout-warning|\.callout-important|\.callout-tip|\.callout-caution)$/; + +async function upgradeNotes(documentLines: string[]): Promise { + const data = documentLines.join('\n'); + const mdast = mystParse(data); + + const caseInsenstivePattern = new RegExp(admonitionPattern.source, admonitionPattern.flags + 'i'); + const directiveNodes = selectAll('mystDirective', mdast); + const mixedCaseAdmonitions = directiveNodes.filter((item) => { + const name = (item as any).name as string; + return name.match(caseInsenstivePattern) && !name.match(admonitionPattern); + }); + mixedCaseAdmonitions.forEach((node) => { + const start = node.position!.start.line; + + // Find declaration immediately _above_ body node + const newLine = documentLines[start - 1].replace( + // Find :::{fOo} or ```{fOo} + // eslint-disable-next-line no-useless-escape + /^(:{3,}|`{3,})\s*\{([^\}]+)\}/, + // Replace it with :::{foo} or ```{foo} + (_, prefix, name) => `${prefix}{${name.toLowerCase()}}`, + ); + documentLines[start - 1] = newLine; + }); + + // Update the file + if (mixedCaseAdmonitions.length) { + return documentLines; + } else { + return undefined; + } +} + +async function upgradeGlossary(documentLines: string[]): Promise { + const data = documentLines.join('\n'); + const mdast = mystParse(data); + const glossaryNodes = selectAll('mystDirective[name=glossary]', mdast); + + // Track the edit point + let editOffset = 0; + for (const node of glossaryNodes) { + const nodeLines = ((node as any).value as string).split(SPLIT_PATTERN); + + // TODO: assert span items + + // Flag tracking whether the line-processor expects definition lines + let inDefinition = false; + let indentSize = 0; + + const entries: LegacyGlossaryItem[] = []; + + // Parse lines into separate entries + for (let i = 0; i < nodeLines.length; i++) { + const line = nodeLines[i]; + // Is the line a comment? + if (/^\.\.\s/.test(line) || !line.length) { + continue; + } + // Is the line a non-whitespace-leading line (term declaration)? + else if (/^[^\s]/.test(line[0])) { + // Comment + if (line.startsWith('.. ')) { + continue; + } + + // Do we need to create a new entry? + if (inDefinition || !entries.length) { + // Close the current definition, open a new term + entries.push({ + definitionLines: [], + termLines: [{ content: line, offset: i }], + }); + inDefinition = false; + } + // Can we extend existing entry with an additional term? + else if (entries.length) { + entries[entries.length - 1].termLines.push({ content: line, offset: i }); + } + } + // Open a definition + else { + inDefinition = true; + indentSize = line.length - line.replace(/^\s+/, '').length; + + if (entries.length) { + entries[entries.length - 1].definitionLines.push({ + content: line.slice(indentSize), + offset: i, + }); + } + } + } + + // Build glossary + const newLines: string[] = []; + + for (let i = 0; i < entries.length; i++) { + const entry = entries[i]; + const { termLines, definitionLines } = entry; + + const [firstDefinitionLine, ...restDefinitionLines] = definitionLines; + const [firstTerm, ...restTerms] = termLines; + + // Initial definition + const firstTermValue = firstTerm.content.split(/\s+:\s+/, 1)[0]; + newLines.push( + firstTermValue, + `: ${firstDefinitionLine.content}`, + ...restDefinitionLines.map((line) => ` ${line.content}`), + ); + if (restTerms) { + // Terms can contain markup, but we need the text-form to create a term reference + // TODO: what if something magical like an xref is used here? Assume not. + const parsedTerm = mystParse(firstTermValue); + const termName = toText(parsedTerm); + for (const { content } of restTerms) { + const term = content.split(/\s+:\s+/, 1)[0]; + newLines.push( + // Separate from parent term + '', + term, + `: {term}\`${termName}\``, + ); + } + } + + // Will there be following terms? + const isFinalEntry = i === entries.length - 1; + if (!isFinalEntry) { + newLines.push(''); + } + } + const nodeSpan = { start: node.position?.start?.line, stop: node.position?.end?.line }; + const spanLength = nodeSpan.stop! - nodeSpan.start! - 1; + documentLines.splice(nodeSpan.start! + editOffset, spanLength, ...newLines); + + // Offset our insert cursor + editOffset += newLines.length - spanLength; + } + + // Update the file + if (glossaryNodes.length) { + return documentLines; + } else { + return undefined; + } +} diff --git a/packages/myst-cli/src/init/jupyter-book/syntax.yml b/packages/myst-cli/src/init/jupyter-book/syntax.yml new file mode 100644 index 000000000..10b1f5c96 --- /dev/null +++ b/packages/myst-cli/src/init/jupyter-book/syntax.yml @@ -0,0 +1,145 @@ +cases: + - title: Glossary with single term and definition + source: | + :::{glossary} + foo + bar + ::: + result: | + :::{glossary} + foo + : bar + ::: + - title: Glossary with three single term and definition + source: | + :::{glossary} + foo + foo def + + bar + bar def + + baz + baz def + ::: + result: | + :::{glossary} + foo + : foo def + + bar + : bar def + + baz + : baz def + ::: + - title: Glossary with three single term and definition, and a double term and definition + source: | + :::{glossary} + foo + foo def + + bar + bar def + + biff + baff + baff def + + baz + baz def + ::: + result: | + :::{glossary} + foo + : foo def + + bar + : bar def + + biff + : baff def + + baff + : {term}`biff` + + baz + : baz def + ::: + - title: Glossary with double term and single definition + source: | + :::{glossary} + baz + foo + bar + ::: + result: | + :::{glossary} + baz + : bar + + foo + : {term}`baz` + ::: + - title: Glossary with term and several definition lines + source: | + :::{glossary} + foo + bar + baz + ::: + result: | + :::{glossary} + foo + : bar + baz + ::: + - title: A Markdown link (unchanged) + source: | + [A link](https://google.com) + - title: Admonition with incorrectly cased name + source: | + :::{noTE} + I am a note + + - foo + ::: + result: | + :::{note} + I am a note + + - foo + ::: + - title: Admonition with correctly cased name + source: | + :::{note} + I am a note + + - foo + ::: + - title: Admonition with space between backticks and name + source: | + ``` {noTE} + I am a note + + - foo + ``` + result: | + ```{note} + I am a note + + - foo + ``` + - title: Admonition with space between colon-fence and name + source: | + ::: {noTE} + I am a note + + - foo + ::: + result: | + :::{note} + I am a note + + - foo + ::: diff --git a/packages/myst-cli/src/init/jupyter-book/toc.ts b/packages/myst-cli/src/init/jupyter-book/toc.ts new file mode 100644 index 000000000..bde0731ca --- /dev/null +++ b/packages/myst-cli/src/init/jupyter-book/toc.ts @@ -0,0 +1,408 @@ +import { z } from 'zod'; +import { resolveExtension } from '../../utils/resolveExtension.js'; +import { join, relative } from 'node:path'; +import { cwd } from 'node:process'; +import type { Entry as MySTEntry, ParentEntry as MySTParentEntry } from 'myst-toc'; + +const TOCTreeOptions = z + .object({ + caption: z.string(), + hidden: z.boolean(), + maxdepth: z.number(), + numberted: z.boolean(), + reversed: z.boolean(), + titlesonly: z.boolean(), + }) + .partial(); + +type FileEntry = z.infer; +const FileEntry = z.object({ + file: z.string(), + title: z.string().optional(), +}); + +type URLEntry = z.infer; +const URLEntry = z.object({ + url: z.string(), + title: z.string().optional(), +}); + +type GlobEntry = z.infer; +const GlobEntry = z.object({ + glob: z.string(), +}); + +/** Basic TOC Trees **/ +type NoFormatSubtreeType = z.infer & { + entries: z.infer[]; +}; +const NoFormatSubtree: z.ZodType = TOCTreeOptions.extend({ + entries: z.lazy(() => NoFormatEntry.array()), +}); + +type NoFormatShorthandSubtreeType = { + entries: z.infer[]; + options?: z.infer; +}; +const NoFormatShorthandSubtree: z.ZodType = z.object({ + entries: z.lazy(() => NoFormatEntry.array()), + options: TOCTreeOptions.optional(), +}); + +const NoFormatHasSubtrees = z.object({ + subtrees: NoFormatSubtree.array(), +}); + +const NoFormatEntry = z.union([ + FileEntry.and(NoFormatShorthandSubtree), + FileEntry.merge(NoFormatHasSubtrees), + FileEntry, + URLEntry, + GlobEntry, +]); + +const NoFormatTOCBase = z.object({ + root: z.string(), + defaults: TOCTreeOptions.optional(), +}); + +const NoFormatTOC = z.union([ + NoFormatTOCBase.and(NoFormatShorthandSubtree), + NoFormatTOCBase.merge(NoFormatHasSubtrees).strict(), + NoFormatTOCBase.strict(), +]); + +/** Article format **/ +type ArticleSubtreeType = z.infer & { + sections: z.infer[]; +}; +const ArticleSubtree: z.ZodType = TOCTreeOptions.extend({ + sections: z.lazy(() => ArticleEntry.array()), +}); + +type ArticleShorthandSubtreeType = { + sections: z.infer[]; + options?: z.infer; +}; +const ArticleShorthandSubtree: z.ZodType = z.object({ + sections: z.lazy(() => ArticleEntry.array()), + options: TOCTreeOptions.optional(), +}); + +const ArticleHasSubtrees = z.object({ + subtrees: ArticleSubtree.array(), +}); + +const ArticleEntry = z.union([ + FileEntry.and(ArticleShorthandSubtree), + FileEntry.merge(ArticleHasSubtrees), + FileEntry, + URLEntry, + GlobEntry, +]); + +const ArticleTOCBase = z.object({ + root: z.string(), + format: z.literal('jb-article'), + defaults: TOCTreeOptions.optional(), +}); + +const ArticleTOC = z.union([ + ArticleTOCBase.and(ArticleShorthandSubtree), + ArticleTOCBase.merge(ArticleHasSubtrees).strict(), + ArticleTOCBase.strict(), +]); + +/** Book format **/ +type BookOuterSubtreeType = z.infer & { + chapters: z.infer[]; +}; +const BookOuterSubtree: z.ZodType = TOCTreeOptions.extend({ + chapters: z.lazy(() => BookEntry.array()), +}); + +type BookInnerSubtreeType = z.infer & { + sections: z.infer[]; +}; +const BookInnerSubtree: z.ZodType = TOCTreeOptions.extend({ + sections: z.lazy(() => BookEntry.array()), +}); + +type BookShorthandOuterSubtreeType = { + chapters: z.infer[]; + options?: z.infer; +}; +const BookShorthandOuterSubtree: z.ZodType = z.object({ + chapters: z.lazy(() => BookEntry.array()), + options: TOCTreeOptions.optional(), +}); + +type BookShorthandInnerSubtreeType = { + sections: z.infer[]; + options?: z.infer; +}; +const BookShorthandInnerSubtree: z.ZodType = z.object({ + sections: z.lazy(() => BookEntry.array()), + options: TOCTreeOptions.optional(), +}); + +const BookHasOuterSubtrees = z.object({ + parts: BookOuterSubtree.array(), +}); + +const BookHasInnerSubtrees = z.object({ + subtrees: BookInnerSubtree.array(), +}); + +const BookEntry = z.union([ + FileEntry.and(BookShorthandInnerSubtree), + FileEntry.merge(BookHasInnerSubtrees), + FileEntry, + URLEntry, + GlobEntry, +]); + +const BookTOCBase = z.object({ + root: z.string(), + format: z.literal('jb-book'), + defaults: TOCTreeOptions.optional(), +}); + +const BookTOC = z.union([ + BookTOCBase.and(BookShorthandOuterSubtree), + BookTOCBase.merge(BookHasOuterSubtrees).strict(), + BookTOCBase.strict(), +]); + +/** TOC **/ +const SphinxExternalTOC = z.union([ArticleTOC, BookTOC, NoFormatTOC]); + +export type SphinxExternalTOC = z.infer; +export function validateSphinxExternalTOC(toc: unknown): SphinxExternalTOC | undefined { + const result = SphinxExternalTOC.safeParse(toc); + if (!result.success) { + const errors = result.error.errors.map( + (issue) => `${issue.path.join('.')}: ${issue.message} (${issue.code})`, + ); + throw new Error(`Error(s) in parsing Jupyter Book TOC:\n${errors}`); + } else { + return result.data; + } +} + +/** + * Helper function throwing a compile error if the branch is reachable + */ +function assertNever(): never { + throw new Error('unreachable code'); +} + +/** + * Convert a no-format TOC to a MyST TOC + * + * @param dir - directory in which the _toc.yml lives + * @param data - validated TOC + */ +function convertNoFormat(dir: string, data: z.infer) { + const rootEntry = { file: relative(dir, resolveExtension(join(dir, data.root))!) }; + + const convertEntry = (item: z.infer): MySTEntry => { + let entry: MySTEntry; + if ('file' in item) { + const resolved = resolveExtension(join(dir, item.file as string)); + // TODO: check this is valid! + entry = { + file: relative(dir, resolved as string), + title: item.title, + }; + } else if ('url' in item) { + entry = { + url: item.url, + title: item.title, + }; + } else if ('glob' in item) { + entry = { + pattern: item.glob, + }; + } else { + assertNever(); + } + + if ('subtrees' in item || 'entries' in item) { + const children = convertHasOrIsSubtree(item); + entry = { ...entry, children: children }; + } + + return entry; + }; + + const convertSubtree = ( + item: z.infer | z.infer, + options: z.infer | undefined, + index: number, + ): MySTParentEntry => { + return { + title: options?.caption ?? `Subtree ${index}`, + children: item.entries.map(convertEntry), + }; + }; + + const convertHasOrIsSubtree = ( + item: z.infer | z.infer, + ): MySTEntry[] => { + if ('subtrees' in item) { + return item.subtrees.map((subtree, i) => convertSubtree(subtree, subtree, i)); + } else { + // Convert the subtree + const subtree = convertSubtree(item, item.options, 0); + // Lift the children (erasing the shorthand subtree) + return subtree.children; + } + }; + const entries: MySTEntry[] = [rootEntry]; + if ('subtrees' in data || 'entries' in data) { + entries.push(...convertHasOrIsSubtree(data)); + } + return entries; +} + +/** + * Convert a Book TOC into a no-format TOC + * + * @param data - validated TOC + */ +function convertBookToNoFormat(data: z.infer): z.infer { + const convertEntry = (item: z.infer): z.infer => { + // Drop subtrees and sections + // eslint-disable-next-line prefer-const, @typescript-eslint/no-unused-vars + let { sections, subtrees, ...result } = item as z.infer & { + sections: any; + subtrees: any; + }; + + if ('sections' in item || 'subtrees' in item) { + result = { ...result, ...convertHasOrIsInnerSubtree(item) }; + } + + return result; + }; + const convertInnerSubtree = (item: z.infer) => { + const { sections, ...result } = item; + return { ...result, entries: sections.map(convertEntry) }; + }; + + const convertHasOrIsInnerSubtree = ( + item: z.infer | z.infer, + ): z.infer | z.infer => { + if ('subtrees' in item) { + const { subtrees, ...rest } = item; + return { ...rest, subtrees: subtrees.map(convertInnerSubtree) }; + } else { + const { options, ...rest } = item; + return { options, ...convertInnerSubtree(rest) }; + } + }; + + const convertOuterSubtree = ( + item: z.infer, + ): z.infer => { + const { chapters, ...rest } = item; + return { ...rest, entries: chapters.map(convertEntry) }; + }; + + const convertHasOrIsOuterSubtree = ( + item: z.infer | z.infer, + ): z.infer | z.infer => { + if ('parts' in item) { + const { parts, ...rest } = item; + return { ...rest, subtrees: parts.map(convertOuterSubtree) }; + } else { + const { options, ...rest } = item; + return { options, ...convertOuterSubtree(rest) }; + } + }; + + const { root, defaults, format: _, ...rest } = data; + let result = { + root, + defaults, + }; + if ('chapters' in rest || 'parts' in rest) { + result = { ...result, ...convertHasOrIsOuterSubtree(rest) }; + } + + return result; +} + +/** + * Convert a Article TOC into a no-format TOC + * + * @param data - validated TOC + */ +function convertArticleToNoFormat(data: z.infer): z.infer { + const convertEntry = (item: z.infer): z.infer => { + // Drop subtrees and sections + // eslint-disable-next-line prefer-const, @typescript-eslint/no-unused-vars + let { sections, subtrees, ...result } = item as z.infer & { + sections: any; + subtrees: any; + }; + + if ('sections' in item || 'subtrees' in item) { + result = { ...result, ...convertHasOrIsSubtree(item) }; + } + + return result; + }; + const convertSubtree = (item: z.infer) => { + const { sections, ...result } = item; + return { ...result, entries: sections.map(convertEntry) }; + }; + + const convertHasOrIsSubtree = ( + item: z.infer | z.infer, + ): z.infer | z.infer => { + if ('subtrees' in item) { + const { subtrees, ...rest } = item; + return { ...rest, subtrees: subtrees.map(convertSubtree) }; + } else { + const { options, ...rest } = item; + return { options, ...convertSubtree(rest) }; + } + }; + const { root, defaults, format: _, ...rest } = data; + let result = { + root, + defaults, + }; + if ('sections' in rest || 'subtrees' in rest) { + result = { ...result, ...convertHasOrIsSubtree(rest) }; + } + + return result; +} + +/** + * Upgrade a sphinx-external-toc TOC into a MyST TOC + */ +export function upgradeTOC(data: SphinxExternalTOC): MySTEntry[] { + const dir = cwd(); + let dataNoFormat: z.infer; + if ('format' in data) { + switch (data.format) { + case 'jb-book': + { + dataNoFormat = convertBookToNoFormat(data); + } + break; + case 'jb-article': + { + dataNoFormat = convertArticleToNoFormat(data); + } + break; + } + } else { + dataNoFormat = data; + } + return convertNoFormat(dir, dataNoFormat); +} diff --git a/packages/myst-cli/src/init/jupyter-book/upgrade.ts b/packages/myst-cli/src/init/jupyter-book/upgrade.ts new file mode 100644 index 000000000..2e0ba9931 --- /dev/null +++ b/packages/myst-cli/src/init/jupyter-book/upgrade.ts @@ -0,0 +1,54 @@ +import fs from 'node:fs/promises'; + +import { defined } from '../../utils/defined.js'; +import yaml from 'js-yaml'; +import type { Config } from 'myst-config'; +import { upgradeConfig, validateJupyterBookConfig } from './config.js'; +import { upgradeTOC, validateSphinxExternalTOC } from './toc.js'; +import { upgradeProjectSyntax } from './syntax.js'; +import { fsExists } from '../../utils/fsExists.js'; +import chalk from 'chalk'; +import type { ISession } from '../../session/types.js'; + +export async function upgradeJupyterBook(session: ISession, configFile: string) { + const config: Config = { + version: 1, + project: {}, + }; + + // Does config file exist? + if (!(await fsExists('_config.yml'))) { + throw new Error(`${chalk.blue('_config.yml')} is a required Jupyter Book configuration file`); + } + const configContent = await fs.readFile('_config.yml', { encoding: 'utf-8' }); + const configData = validateJupyterBookConfig(yaml.load(configContent)); + if (defined(configData)) { + // Update MyST configuration + ({ site: config.site, project: config.project } = upgradeConfig(configData)); + } + + // Does TOC exist? + if (await fsExists('_toc.yml')) { + const tocContent = await fs.readFile('_toc.yml', { encoding: 'utf-8' }); + const tocData = validateSphinxExternalTOC(yaml.load(tocContent)); + if (defined(tocData)) { + (config as any).project.toc = upgradeTOC(tocData); + } + } + + // Upgrade legacy syntax + await upgradeProjectSyntax(session); + + // Write new myst.yml + await fs.writeFile(configFile, yaml.dump(config)); + + await fs.rename('_config.yml', '._config.yml.bak'); + session.log.debug( + chalk.dim(`Renamed ${chalk.blue('_config.yml')} to ${chalk.blue('._config.yml.bak')}`), + ); + + await fs.rename('_toc.yml', '._toc.yml.bak'); + session.log.debug( + chalk.dim(`Renamed ${chalk.blue('_toc.yml')} to ${chalk.blue('._toc.yml.bak')}`), + ); +} diff --git a/packages/myst-cli/src/utils/defined.ts b/packages/myst-cli/src/utils/defined.ts new file mode 100644 index 000000000..786210d5f --- /dev/null +++ b/packages/myst-cli/src/utils/defined.ts @@ -0,0 +1,7 @@ +export function defined(value: T | undefined): value is T { + return value !== undefined; +} + +export function notNullish(value: T | undefined | null): value is T { + return value !== undefined && value !== null; +} diff --git a/packages/myst-cli/src/utils/fsExists.ts b/packages/myst-cli/src/utils/fsExists.ts new file mode 100644 index 000000000..bf3d8238d --- /dev/null +++ b/packages/myst-cli/src/utils/fsExists.ts @@ -0,0 +1,15 @@ +import fs from 'node:fs/promises'; + +/** + * Asynchronous version of fs.existsSync + * + * @param path - path to test for existence + */ +export async function fsExists(path: string): Promise { + try { + await fs.access(path, fs.constants.F_OK); + return true; + } catch (e) { + return false; + } +} diff --git a/packages/myst-cli/src/build/utils/git.ts b/packages/myst-cli/src/utils/git.ts similarity index 70% rename from packages/myst-cli/src/build/utils/git.ts rename to packages/myst-cli/src/utils/git.ts index e1fb4d12c..9d21bfb40 100644 --- a/packages/myst-cli/src/build/utils/git.ts +++ b/packages/myst-cli/src/utils/git.ts @@ -1,5 +1,5 @@ -import fs from 'node:fs'; import { makeExecutable } from 'myst-cli-utils'; +import { fsExists } from './fsExists.js'; export async function checkFolderIsGit(): Promise { try { @@ -11,10 +11,5 @@ export async function checkFolderIsGit(): Promise { } export async function checkAtGitRoot(): Promise { - try { - fs.readdirSync('.git'); - return true; - } catch (error) { - return false; - } + return await fsExists('.git'); } diff --git a/packages/myst-cli/src/build/utils/github.ts b/packages/myst-cli/src/utils/github.ts similarity index 100% rename from packages/myst-cli/src/build/utils/github.ts rename to packages/myst-cli/src/utils/github.ts diff --git a/packages/myst-cli/src/utils/index.ts b/packages/myst-cli/src/utils/index.ts index 08bd89818..8bfe4deee 100644 --- a/packages/myst-cli/src/utils/index.ts +++ b/packages/myst-cli/src/utils/index.ts @@ -1,6 +1,7 @@ export * from './addWarningForFile.js'; export * from './check.js'; export * from './createTempFolder.js'; +export * from './defined.js'; export * from './fileInfo.js'; export * from './filterFilenamesByExtension.js'; export * from './getAllBibtexFiles.js'; @@ -12,6 +13,7 @@ export * from './resolveExtension.js'; export * from './shouldIgnoreFile.js'; export * from './toc.js'; export * from './uniqueArray.js'; +export * from './github.js'; export * as imagemagick from './imagemagick.js'; export * as inkscape from './inkscape.js';