From c90dbacab05d2ceab84b53c66c7be77b4dda47ef Mon Sep 17 00:00:00 2001 From: Anton Bulakh Date: Tue, 7 Jan 2025 22:33:34 +0200 Subject: [PATCH] chore(build): separate markdown and html handling into two separate stages (#1675) --- quartz/bootstrap-worker.mjs | 5 ++- quartz/plugins/vfile.ts | 8 ++-- quartz/processors/parse.ts | 86 +++++++++++++++++++++++++++---------- quartz/worker.ts | 35 ++++++++++++--- 4 files changed, 100 insertions(+), 34 deletions(-) diff --git a/quartz/bootstrap-worker.mjs b/quartz/bootstrap-worker.mjs index b08689c3c7719..c4c4949b96bd5 100644 --- a/quartz/bootstrap-worker.mjs +++ b/quartz/bootstrap-worker.mjs @@ -1,7 +1,8 @@ #!/usr/bin/env node import workerpool from "workerpool" const cacheFile = "./.quartz-cache/transpiled-worker.mjs" -const { parseFiles } = await import(cacheFile) +const { parseMarkdown, processHtml } = await import(cacheFile) workerpool.worker({ - parseFiles, + parseMarkdown, + processHtml, }) diff --git a/quartz/plugins/vfile.ts b/quartz/plugins/vfile.ts index 5be21058471ab..8c5cf6aa60712 100644 --- a/quartz/plugins/vfile.ts +++ b/quartz/plugins/vfile.ts @@ -1,11 +1,13 @@ -import { Node, Parent } from "hast" +import { Root as HtmlRoot } from "hast" +import { Root as MdRoot } from "mdast" import { Data, VFile } from "vfile" export type QuartzPluginData = Data -export type ProcessedContent = [Node, VFile] +export type MarkdownContent = [MdRoot, VFile] +export type ProcessedContent = [HtmlRoot, VFile] export function defaultProcessedContent(vfileData: Partial): ProcessedContent { - const root: Parent = { type: "root", children: [] } + const root: HtmlRoot = { type: "root", children: [] } const vfile = new VFile("") vfile.data = vfileData return [root, vfile] diff --git a/quartz/processors/parse.ts b/quartz/processors/parse.ts index 2bd530c643254..479313f490f34 100644 --- a/quartz/processors/parse.ts +++ b/quartz/processors/parse.ts @@ -4,18 +4,20 @@ import remarkRehype from "remark-rehype" import { Processor, unified } from "unified" import { Root as MDRoot } from "remark-parse/lib" import { Root as HTMLRoot } from "hast" -import { ProcessedContent } from "../plugins/vfile" +import { MarkdownContent, ProcessedContent } from "../plugins/vfile" import { PerfTimer } from "../util/perf" import { read } from "to-vfile" -import { FilePath, QUARTZ, slugifyFilePath } from "../util/path" +import { FilePath, FullSlug, QUARTZ, slugifyFilePath } from "../util/path" import path from "path" import workerpool, { Promise as WorkerPromise } from "workerpool" import { QuartzLogger } from "../util/log" import { trace } from "../util/trace" import { BuildCtx } from "../util/ctx" -export type QuartzProcessor = Processor -export function createProcessor(ctx: BuildCtx): QuartzProcessor { +export type QuartzMdProcessor = Processor +export type QuartzHtmlProcessor = Processor + +export function createMdProcessor(ctx: BuildCtx): QuartzMdProcessor { const transformers = ctx.cfg.plugins.transformers return ( @@ -24,14 +26,20 @@ export function createProcessor(ctx: BuildCtx): QuartzProcessor { .use(remarkParse) // MD AST -> MD AST transforms .use( - transformers - .filter((p) => p.markdownPlugins) - .flatMap((plugin) => plugin.markdownPlugins!(ctx)), - ) + transformers.flatMap((plugin) => plugin.markdownPlugins?.(ctx) ?? []), + ) as unknown as QuartzMdProcessor + // ^ sadly the typing of `use` is not smart enough to infer the correct type from our plugin list + ) +} + +export function createHtmlProcessor(ctx: BuildCtx): QuartzHtmlProcessor { + const transformers = ctx.cfg.plugins.transformers + return ( + unified() // MD AST -> HTML AST .use(remarkRehype, { allowDangerousHtml: true }) // HTML AST -> HTML AST transforms - .use(transformers.filter((p) => p.htmlPlugins).flatMap((plugin) => plugin.htmlPlugins!(ctx))) + .use(transformers.flatMap((plugin) => plugin.htmlPlugins?.(ctx) ?? [])) ) } @@ -75,8 +83,8 @@ async function transpileWorkerScript() { export function createFileParser(ctx: BuildCtx, fps: FilePath[]) { const { argv, cfg } = ctx - return async (processor: QuartzProcessor) => { - const res: ProcessedContent[] = [] + return async (processor: QuartzMdProcessor) => { + const res: MarkdownContent[] = [] for (const fp of fps) { try { const perf = new PerfTimer() @@ -100,10 +108,32 @@ export function createFileParser(ctx: BuildCtx, fps: FilePath[]) { res.push([newAst, file]) if (argv.verbose) { - console.log(`[process] ${fp} -> ${file.data.slug} (${perf.timeSince()})`) + console.log(`[markdown] ${fp} -> ${file.data.slug} (${perf.timeSince()})`) } } catch (err) { - trace(`\nFailed to process \`${fp}\``, err as Error) + trace(`\nFailed to process markdown \`${fp}\``, err as Error) + } + } + + return res + } +} + +export function createMarkdownParser(ctx: BuildCtx, mdContent: MarkdownContent[]) { + return async (processor: QuartzHtmlProcessor) => { + const res: ProcessedContent[] = [] + for (const [ast, file] of mdContent) { + try { + const perf = new PerfTimer() + + const newAst = await processor.run(ast as MDRoot, file) + res.push([newAst, file]) + + if (ctx.argv.verbose) { + console.log(`[html] ${file.data.slug} (${perf.timeSince()})`) + } + } catch (err) { + trace(`\nFailed to process html \`${file.data.filePath}\``, err as Error) } } @@ -113,6 +143,7 @@ export function createFileParser(ctx: BuildCtx, fps: FilePath[]) { const clamp = (num: number, min: number, max: number) => Math.min(Math.max(Math.round(num), min), max) + export async function parseMarkdown(ctx: BuildCtx, fps: FilePath[]): Promise { const { argv } = ctx const perf = new PerfTimer() @@ -126,9 +157,8 @@ export async function parseMarkdown(ctx: BuildCtx, fps: FilePath[]): Promise { + console.error(`${err}`.replace(/^error:\s*/i, "")) + process.exit(1) + } - const childPromises: WorkerPromise[] = [] + const mdPromises: WorkerPromise<[MarkdownContent[], FullSlug[]]>[] = [] for (const chunk of chunks(fps, CHUNK_SIZE)) { - childPromises.push(pool.exec("parseFiles", [ctx.buildId, argv, chunk, ctx.allSlugs])) + mdPromises.push(pool.exec("parseMarkdown", [ctx.buildId, argv, chunk])) } + const mdResults: [MarkdownContent[], FullSlug[]][] = + await WorkerPromise.all(mdPromises).catch(errorHandler) + + const childPromises: WorkerPromise[] = [] + for (const [_, extraSlugs] of mdResults) { + ctx.allSlugs.push(...extraSlugs) + } + for (const [mdChunk, _] of mdResults) { + childPromises.push(pool.exec("processHtml", [ctx.buildId, argv, mdChunk, ctx.allSlugs])) + } + const results: ProcessedContent[][] = await WorkerPromise.all(childPromises).catch(errorHandler) - const results: ProcessedContent[][] = await WorkerPromise.all(childPromises).catch((err) => { - const errString = err.toString().slice("Error:".length) - console.error(errString) - process.exit(1) - }) res = results.flat() await pool.terminate() } diff --git a/quartz/worker.ts b/quartz/worker.ts index a209df9a06a98..c9cd98055d82f 100644 --- a/quartz/worker.ts +++ b/quartz/worker.ts @@ -3,23 +3,46 @@ sourceMapSupport.install(options) import cfg from "../quartz.config" import { Argv, BuildCtx } from "./util/ctx" import { FilePath, FullSlug } from "./util/path" -import { createFileParser, createProcessor } from "./processors/parse" +import { + createFileParser, + createHtmlProcessor, + createMarkdownParser, + createMdProcessor, +} from "./processors/parse" import { options } from "./util/sourcemap" +import { MarkdownContent, ProcessedContent } from "./plugins/vfile" // only called from worker thread -export async function parseFiles( +export async function parseMarkdown( buildId: string, argv: Argv, fps: FilePath[], +): Promise<[MarkdownContent[], FullSlug[]]> { + // this is a hack + // we assume markdown parsers can add to `allSlugs`, + // but don't actually use them + const allSlugs: FullSlug[] = [] + const ctx: BuildCtx = { + buildId, + cfg, + argv, + allSlugs, + } + return [await createFileParser(ctx, fps)(createMdProcessor(ctx)), allSlugs] +} + +// only called from worker thread +export function processHtml( + buildId: string, + argv: Argv, + mds: MarkdownContent[], allSlugs: FullSlug[], -) { +): Promise { const ctx: BuildCtx = { buildId, cfg, argv, allSlugs, } - const processor = createProcessor(ctx) - const parse = createFileParser(ctx, fps) - return parse(processor) + return createMarkdownParser(ctx, mds)(createHtmlProcessor(ctx)) }