forked from mdn/markdown
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Schalk Neethling
committed
Feb 3, 2022
1 parent
e528d4e
commit 74d0f6f
Showing
33 changed files
with
7,455 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
root = true | ||
|
||
[*] | ||
end_of_line = lf | ||
insert_final_newline = true | ||
charset = utf-8 | ||
indent_style = space | ||
indent_size = 2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
{ | ||
"extends": ["eslint:recommended", "plugin:import/errors", "prettier"], | ||
"rules": { | ||
"no-console": [ | ||
"error", | ||
{ | ||
"allow": ["info", "error"] | ||
} | ||
] | ||
}, | ||
"plugins": ["import"], | ||
"parserOptions": { | ||
"sourceType": "module" | ||
}, | ||
"env": { | ||
"es2021": true, | ||
"node": true | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# Ignore artifacts: | ||
build | ||
coverage |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,10 @@ | ||
# markdown | ||
MDN Web Docs tool to covert HTML to Markdown and vice versa | ||
|
||
``` | ||
yarn run md h2m web/http --locale en-us --mode dry | ||
``` | ||
|
||
``` | ||
yarn md h2m web/http --locale en-us --mode replace | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
{ | ||
"name": "markdown", | ||
"description": "MDN Web Docs tool to covert HTML to Markdown and vice versa", | ||
"export": "./src/cli.js", | ||
"version": "1.0.0", | ||
"repository": "https://github.com/mdn/markdown.git", | ||
"author": "Mozilla", | ||
"license": "MIT", | ||
"type": "module", | ||
"scripts": { | ||
"md": "env-cmd node src/cli.js" | ||
}, | ||
"dependencies": { | ||
"@caporal/core": "2.0.2", | ||
"@mdn/yari": "^0.6.39", | ||
"chalk": "4.1.2", | ||
"cheerio": "1.0.0-rc.10", | ||
"cli-progress": "3.10.0", | ||
"front-matter": "4.0.2", | ||
"hast-util-is-element": "1.1.0", | ||
"hast-util-to-html": "7.1.3", | ||
"hast-util-to-text": "2.0.1", | ||
"mdast-util-phrasing": "2.0.0", | ||
"node-gettext": "3.0.0", | ||
"prettier": "^2.5.1", | ||
"rehype-format": "3.1.0", | ||
"rehype-parse": "7.0.1", | ||
"rehype-raw": "5.1.0", | ||
"rehype-stringify": "8.0.0", | ||
"remark-gfm": "1.0.0", | ||
"remark-parse": "9.0.0", | ||
"remark-prettier": "1.3.0", | ||
"remark-rehype": "8.1.0", | ||
"unified": "9.2.1", | ||
"unist-builder": "2.0.3", | ||
"unist-util-visit": "2.0.3" | ||
}, | ||
"devDependencies": { | ||
"env-cmd": "^10.1.0", | ||
"eslint": "^8.8.0", | ||
"eslint-config-prettier": "^8.3.0", | ||
"eslint-plugin-import": "^2.25.4" | ||
}, | ||
"engines": { | ||
"node": "^12.20.0 || ^14.13.1 || >=16.0.0" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__image_snapshots__/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,230 @@ | ||
import fs from "fs"; | ||
import fm from "front-matter"; | ||
import { createRequire } from "module"; | ||
import chalk from "chalk"; | ||
import cliProgress from "cli-progress"; | ||
import { Document, execGit } from "@mdn/yari/content/index.js"; | ||
import { saveFile } from "@mdn/yari/content/document.js"; | ||
import { VALID_LOCALES } from "@mdn/yari/libs/constants/index.js"; | ||
import { getRoot } from "@mdn/yari/content/utils.js"; | ||
|
||
import { h2m } from "./h2m/index.js"; | ||
import { prettyAST } from "./utils/index.js"; | ||
import { m2h } from "./index.js"; | ||
import { toSelector } from "./h2m/utils.js"; | ||
|
||
const require = createRequire(import.meta.url); | ||
const { program } = require("@caporal/core"); | ||
|
||
function tryOrExit(f) { | ||
return async ({ options = {}, ...args }) => { | ||
try { | ||
await f({ options, ...args }); | ||
} catch (error) { | ||
if (options.verbose || options.v) { | ||
console.error(chalk.red(error.stack)); | ||
} | ||
throw error; | ||
} | ||
}; | ||
} | ||
|
||
function saveProblemsReport(problems) { | ||
const now = new Date(); | ||
const report = [ | ||
`# Report from ${now.toLocaleString()}`, | ||
|
||
"## All unhandled elements", | ||
...Array.from( | ||
Array.from(problems) | ||
.flatMap(([, { invalid, unhandled }]) => [ | ||
...invalid.map((e) => e.source), | ||
...unhandled, | ||
]) | ||
.map((node) => (node.type == "element" ? toSelector(node) : node.type)) | ||
.reduce( | ||
(top, label) => top.set(label, (top.get(label) || 0) + 1), | ||
new Map() | ||
) | ||
) | ||
.sort(([, c1], [, c2]) => (c1 > c2 ? -1 : 1)) | ||
.map(([label, count]) => `- ${label} (${count})`), | ||
|
||
"## Details per Document", | ||
]; | ||
let problemCount = 0; | ||
for (const [url, { offset, invalid, unhandled }] of Array.from(problems)) { | ||
problemCount += invalid.length + unhandled.length; | ||
report.push(`### [${url}](https://developer.mozilla.org${url})`); | ||
|
||
const elementWithPosition = (node) => { | ||
const { type, position } = node; | ||
const label = type == "element" ? toSelector(node) : type; | ||
if (position) { | ||
const { | ||
start: { line, column }, | ||
} = position; | ||
return `${label} (${line + offset}:${column})`; | ||
} | ||
return label; | ||
}; | ||
|
||
if (invalid.length > 0) { | ||
report.push( | ||
"#### Invalid AST transformations", | ||
...invalid | ||
.filter(({ source }) => !!source) | ||
.map(({ source, targetType, unexpectedChildren }) => | ||
[ | ||
`##### ${elementWithPosition(source)} => ${targetType}`, | ||
"```", | ||
unexpectedChildren.map((node) => prettyAST(node)), | ||
"```", | ||
].join("\n") | ||
) | ||
); | ||
} | ||
|
||
if (unhandled.length > 0) { | ||
report.push( | ||
"### Missing conversion rules", | ||
...unhandled.map((node) => "- " + elementWithPosition(node)) | ||
); | ||
} | ||
} | ||
if (problemCount > 0) { | ||
const reportFileName = `md-conversion-problems-report-${now.toISOString()}.md`; | ||
console.info( | ||
`Could not automatically convert ${problemCount} elements. Saving report to ${reportFileName}` | ||
); | ||
fs.writeFileSync(reportFileName, report.join("\n")); | ||
} | ||
} | ||
|
||
function buildLocaleMap(locale) { | ||
let localesMap = new Map(); | ||
if (locale !== "all") { | ||
localesMap = new Map([[locale.toLowerCase(), locale]]); | ||
} | ||
return localesMap; | ||
} | ||
|
||
program | ||
.bin("yarn md") | ||
.name("md") | ||
.version("0.0.1") | ||
.disableGlobalOption("--silent") | ||
.cast(false) | ||
|
||
.command("h2m", "Convert HTML to Markdown") | ||
.option("--mode <mode>", "Mode to be run in", { | ||
default: "keep", | ||
validator: ["dry", "keep", "replace"], | ||
}) | ||
.option("--print-ast", "Prints MD AST", { | ||
default: false, | ||
validator: program.BOOLEAN, | ||
}) | ||
.option("--locale", "Targets a specific locale", { | ||
default: "all", | ||
validator: Array.from(VALID_LOCALES.values()).concat("all"), | ||
}) | ||
.argument("[folder]", "convert by folder") | ||
.action( | ||
tryOrExit(async ({ args, options }) => { | ||
console.info( | ||
`Starting HTML to Markdown conversion in ${options.mode} mode` | ||
); | ||
const documents = Document.findAll({ | ||
folderSearch: args.folder, | ||
locales: buildLocaleMap(options.locale), | ||
}); | ||
|
||
const progressBar = new cliProgress.SingleBar( | ||
{}, | ||
cliProgress.Presets.shades_classic | ||
); | ||
progressBar.start(documents.count); | ||
|
||
const problems = new Map(); | ||
try { | ||
for (let doc of documents.iter()) { | ||
progressBar.increment(); | ||
if ( | ||
doc.isMarkdown || | ||
// findAll's folderSearch is fuzzy which we don't want here | ||
!doc.metadata.slug | ||
.toLowerCase() | ||
.startsWith(args.folder.toLowerCase()) | ||
) { | ||
continue; | ||
} | ||
if (options.verbose) { | ||
console.info(doc.metadata.slug); | ||
} | ||
const { body: h, attributes: metadata } = fm(doc.rawContent); | ||
const [markdown, { invalid, unhandled }] = await h2m(h, { | ||
printAST: options.printAst, | ||
locale: doc.metadata.locale, | ||
}); | ||
|
||
if (invalid.length > 0 || unhandled.length > 0) { | ||
problems.set(doc.url, { | ||
offset: doc.fileInfo.frontMatterOffset, | ||
invalid, | ||
unhandled, | ||
}); | ||
} | ||
|
||
if (options.mode == "replace" || options.mode == "keep") { | ||
if (options.mode == "replace") { | ||
const gitRoot = getRoot(options.locale); | ||
execGit( | ||
[ | ||
"mv", | ||
doc.fileInfo.path, | ||
doc.fileInfo.path.replace(/\.html$/, ".md"), | ||
], | ||
{}, | ||
gitRoot | ||
); | ||
} | ||
saveFile( | ||
doc.fileInfo.path.replace(/\.html$/, ".md"), | ||
markdown, | ||
metadata | ||
); | ||
} | ||
} | ||
} finally { | ||
progressBar.stop(); | ||
} | ||
|
||
saveProblemsReport(problems); | ||
}) | ||
) | ||
|
||
.command("m2h", "Convert Markdown to HTML") | ||
.option("--locale", "Targets a specific locale", { | ||
default: "all", | ||
validator: Array.from(VALID_LOCALES.values()).concat("all"), | ||
}) | ||
.argument("[folder]", "convert by folder") | ||
.action( | ||
tryOrExit(async ({ args, options }) => { | ||
const all = Document.findAll({ | ||
folderSearch: args.folder, | ||
locales: buildLocaleMap(options.locale), | ||
}); | ||
for (let doc of all.iter()) { | ||
if (!doc.isMarkdown) { | ||
continue; | ||
} | ||
const { body: m, attributes: metadata } = fm(doc.rawContent); | ||
const h = await m2h(m, { locale: doc.metadata.locale }); | ||
saveFile(doc.fileInfo.path.replace(/\.md$/, ".html"), h, metadata); | ||
} | ||
}) | ||
); | ||
|
||
program.run(); |
Oops, something went wrong.