From 6e4977f0c91561f7751ebe782ee4261bb7581789 Mon Sep 17 00:00:00 2001 From: Franklin Koch Date: Mon, 16 Oct 2023 10:17:01 -0600 Subject: [PATCH] =?UTF-8?q?=E2=9C=82=EF=B8=8F=20Delete=20temporary=20outpu?= =?UTF-8?q?t=20files=20in=20reduceOutput=20transform=20(#674)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🔧 Use local paths for output file IO --- .changeset/shy-readers-check.md | 5 ++ packages/myst-cli/src/process/mdast.ts | 2 +- .../myst-cli/src/transforms/outputs.spec.ts | 11 ++-- packages/myst-cli/src/transforms/outputs.ts | 52 ++++++++++++++----- 4 files changed, 50 insertions(+), 20 deletions(-) create mode 100644 .changeset/shy-readers-check.md diff --git a/.changeset/shy-readers-check.md b/.changeset/shy-readers-check.md new file mode 100644 index 000000000..a181ae750 --- /dev/null +++ b/.changeset/shy-readers-check.md @@ -0,0 +1,5 @@ +--- +'myst-cli': patch +--- + +Delete temporary output files in reduceOutput transform diff --git a/packages/myst-cli/src/process/mdast.ts b/packages/myst-cli/src/process/mdast.ts index d1c093462..fc0d80a4d 100644 --- a/packages/myst-cli/src/process/mdast.ts +++ b/packages/myst-cli/src/process/mdast.ts @@ -215,7 +215,7 @@ export async function transformMdast( .run(mdast, vfile); if (simplifyFigures) { // Transform output nodes to images / text - reduceOutputs(mdast, file, imageWriteFolder); + reduceOutputs(session, mdast, file, imageWriteFolder); } if (!useExistingImages) { await transformImages(session, mdast, file, imageWriteFolder, { diff --git a/packages/myst-cli/src/transforms/outputs.spec.ts b/packages/myst-cli/src/transforms/outputs.spec.ts index 872a9a65e..e40f105ca 100644 --- a/packages/myst-cli/src/transforms/outputs.spec.ts +++ b/packages/myst-cli/src/transforms/outputs.spec.ts @@ -1,5 +1,6 @@ import { describe, expect, it } from 'vitest'; -import { reduceOutputs } from './outputs.js'; +import { reduceOutputs } from './outputs'; +import { Session } from '../session/session'; describe('reduceOutputs', () => { it('output with no data is removed', async () => { @@ -28,7 +29,7 @@ describe('reduceOutputs', () => { ], }; expect(mdast.children[0].children.length).toEqual(2); - reduceOutputs(mdast, 'notebook.ipynb', '/my/folder'); + reduceOutputs(new Session(), mdast, 'notebook.ipynb', '/my/folder'); expect(mdast.children[0].children.length).toEqual(1); }); it('output with complex data is removed', async () => { @@ -70,7 +71,7 @@ describe('reduceOutputs', () => { ], }; expect(mdast.children[0].children.length).toEqual(2); - reduceOutputs(mdast, 'notebook.ipynb', '/my/folder'); + reduceOutputs(new Session(), mdast, 'notebook.ipynb', '/my/folder'); expect(mdast.children[0].children.length).toEqual(1); }); it('image output converts to image node', async () => { @@ -102,7 +103,7 @@ describe('reduceOutputs', () => { }, ], }; - reduceOutputs(mdast, 'notebook.ipynb', '/my/folder'); + reduceOutputs(new Session(), mdast, 'notebook.ipynb', '/my/folder'); expect(mdast.children.length).toEqual(1); expect(mdast.children[0].type).toEqual('image'); }); @@ -175,7 +176,7 @@ describe('reduceOutputs', () => { }, ], }; - reduceOutputs(mdast, 'notebook.ipynb', '/my/folder'); + reduceOutputs(new Session(), mdast, 'notebook.ipynb', '/my/folder'); expect(mdast.children.length).toEqual(3); expect(mdast.children[0].type).toEqual('image'); expect(mdast.children[1].type).toEqual('image'); diff --git a/packages/myst-cli/src/transforms/outputs.ts b/packages/myst-cli/src/transforms/outputs.ts index d80a4cf7e..b60e4f3bb 100644 --- a/packages/myst-cli/src/transforms/outputs.ts +++ b/packages/myst-cli/src/transforms/outputs.ts @@ -16,6 +16,14 @@ import { castSession } from '../session/index.js'; import type { ISession } from '../session/types.js'; import { resolveOutputPath } from './images.js'; +function getFilename(hash: string, contentType: string) { + return `${hash}${extFromMimeType(contentType)}`; +} + +function getDestination(hash: string, contentType: string, writeFolder: string) { + return join(writeFolder, getFilename(hash, contentType)); +} + export async function transformOutputs( session: ISession, mdast: GenericParent, @@ -40,8 +48,8 @@ export async function transformOutputs( walkOutputs(node.data, (obj) => { if (!obj.hash || !cache.$outputs[obj.hash]) return undefined; const [content, { contentType, encoding }] = cache.$outputs[obj.hash]; - const filename = `${obj.hash}${extFromMimeType(contentType)}`; - const destination = join(writeFolder, filename); + const filename = getFilename(obj.hash, contentType); + const destination = getDestination(obj.hash, contentType, writeFolder); if (fs.existsSync(destination)) { session.log.debug(`Cached file found for notebook output: ${destination}`); @@ -72,8 +80,14 @@ export async function transformOutputs( * It also only supports minified images (i.e. images cannot be too small) or * non-minified text (i.e. text cannot be too large). */ -export function reduceOutputs(mdast: GenericParent, file: string, writeFolder: string) { +export function reduceOutputs( + session: ISession, + mdast: GenericParent, + file: string, + writeFolder: string, +) { const outputs = selectAll('output', mdast) as GenericNode[]; + const unusedOutputs: string[] = []; outputs.forEach((node) => { if (!node.data?.length) { node.type = '__delete__'; @@ -83,15 +97,17 @@ export function reduceOutputs(mdast: GenericParent, file: string, writeFolder: s node.data.forEach((output: MinifiedOutput) => { let selectedOutput: { content_type: string; path: string; hash: string } | undefined; walkOutputs([output], (obj: any) => { - if (selectedOutput || !obj.path || !obj.hash) return; - if (['error', 'stream'].includes(obj.output_type)) { - const { path, hash } = obj; - selectedOutput = { content_type: 'text/plain', path, hash }; - } else if (typeof obj.content_type === 'string') { - const { content_type, path, hash } = obj; - if (obj.content_type.startsWith('image/') || obj.content_type === 'text/plain') { - selectedOutput = { content_type, path, hash }; + const { output_type, content_type, path, hash } = obj; + if (!selectedOutput && path && hash) { + if (['error', 'stream'].includes(output_type)) { + selectedOutput = { content_type: 'text/plain', path, hash }; + } else if (typeof content_type === 'string') { + if (content_type.startsWith('image/') || content_type === 'text/plain') { + selectedOutput = { content_type, path, hash }; + } } + } else if (hash && content_type) { + unusedOutputs.push(getDestination(hash, content_type, writeFolder)); } }); if (selectedOutput) selectedOutputs.push(selectedOutput); @@ -106,9 +122,10 @@ export function reduceOutputs(mdast: GenericParent, file: string, writeFolder: s url: relativePath, urlSource: relativePath, }; - } else if (output?.content_type === 'text/plain') { - const filename = `${output.hash}${extFromMimeType(output.content_type)}`; - const content = fs.readFileSync(join(writeFolder, filename), 'utf-8'); + } else if (output?.content_type === 'text/plain' && output?.hash) { + const destination = getDestination(output.hash, output.content_type, writeFolder); + unusedOutputs.push(destination); + const content = fs.readFileSync(destination, 'utf-8'); return { type: 'code', data: { type: 'output' }, @@ -123,4 +140,11 @@ export function reduceOutputs(mdast: GenericParent, file: string, writeFolder: s }); remove(mdast, '__delete__'); liftChildren(mdast, '__lift__'); + + unusedOutputs.forEach((out) => { + if (fs.existsSync(out)) { + session.log.debug(`Removing temporary notebook output file: ${out}`); + fs.rmSync(out); + } + }); }