Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Schalk Neethling committed Feb 3, 2022
1 parent e528d4e commit 74d0f6f
Show file tree
Hide file tree
Showing 33 changed files with 7,455 additions and 0 deletions.
8 changes: 8 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
root = true

[*]
end_of_line = lf
insert_final_newline = true
charset = utf-8
indent_style = space
indent_size = 2
19 changes: 19 additions & 0 deletions .eslintrc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"extends": ["eslint:recommended", "plugin:import/errors", "prettier"],
"rules": {
"no-console": [
"error",
{
"allow": ["info", "error"]
}
]
},
"plugins": ["import"],
"parserOptions": {
"sourceType": "module"
},
"env": {
"es2021": true,
"node": true
}
}
3 changes: 3 additions & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Ignore artifacts:
build
coverage
1 change: 1 addition & 0 deletions .prettierrc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,10 @@
# markdown
MDN Web Docs tool to covert HTML to Markdown and vice versa

```
yarn run md h2m web/http --locale en-us --mode dry
```

```
yarn md h2m web/http --locale en-us --mode replace
```
47 changes: 47 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
{
"name": "markdown",
"description": "MDN Web Docs tool to covert HTML to Markdown and vice versa",
"export": "./src/cli.js",
"version": "1.0.0",
"repository": "https://github.com/mdn/markdown.git",
"author": "Mozilla",
"license": "MIT",
"type": "module",
"scripts": {
"md": "env-cmd node src/cli.js"
},
"dependencies": {
"@caporal/core": "2.0.2",
"@mdn/yari": "^0.6.39",
"chalk": "4.1.2",
"cheerio": "1.0.0-rc.10",
"cli-progress": "3.10.0",
"front-matter": "4.0.2",
"hast-util-is-element": "1.1.0",
"hast-util-to-html": "7.1.3",
"hast-util-to-text": "2.0.1",
"mdast-util-phrasing": "2.0.0",
"node-gettext": "3.0.0",
"prettier": "^2.5.1",
"rehype-format": "3.1.0",
"rehype-parse": "7.0.1",
"rehype-raw": "5.1.0",
"rehype-stringify": "8.0.0",
"remark-gfm": "1.0.0",
"remark-parse": "9.0.0",
"remark-prettier": "1.3.0",
"remark-rehype": "8.1.0",
"unified": "9.2.1",
"unist-builder": "2.0.3",
"unist-util-visit": "2.0.3"
},
"devDependencies": {
"env-cmd": "^10.1.0",
"eslint": "^8.8.0",
"eslint-config-prettier": "^8.3.0",
"eslint-plugin-import": "^2.25.4"
},
"engines": {
"node": "^12.20.0 || ^14.13.1 || >=16.0.0"
}
}
1 change: 1 addition & 0 deletions src/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__image_snapshots__/
230 changes: 230 additions & 0 deletions src/cli.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
import fs from "fs";
import fm from "front-matter";
import { createRequire } from "module";
import chalk from "chalk";
import cliProgress from "cli-progress";
import { Document, execGit } from "@mdn/yari/content/index.js";
import { saveFile } from "@mdn/yari/content/document.js";
import { VALID_LOCALES } from "@mdn/yari/libs/constants/index.js";
import { getRoot } from "@mdn/yari/content/utils.js";

import { h2m } from "./h2m/index.js";
import { prettyAST } from "./utils/index.js";
import { m2h } from "./index.js";
import { toSelector } from "./h2m/utils.js";

const require = createRequire(import.meta.url);
const { program } = require("@caporal/core");

function tryOrExit(f) {
return async ({ options = {}, ...args }) => {
try {
await f({ options, ...args });
} catch (error) {
if (options.verbose || options.v) {
console.error(chalk.red(error.stack));
}
throw error;
}
};
}

function saveProblemsReport(problems) {
const now = new Date();
const report = [
`# Report from ${now.toLocaleString()}`,

"## All unhandled elements",
...Array.from(
Array.from(problems)
.flatMap(([, { invalid, unhandled }]) => [
...invalid.map((e) => e.source),
...unhandled,
])
.map((node) => (node.type == "element" ? toSelector(node) : node.type))
.reduce(
(top, label) => top.set(label, (top.get(label) || 0) + 1),
new Map()
)
)
.sort(([, c1], [, c2]) => (c1 > c2 ? -1 : 1))
.map(([label, count]) => `- ${label} (${count})`),

"## Details per Document",
];
let problemCount = 0;
for (const [url, { offset, invalid, unhandled }] of Array.from(problems)) {
problemCount += invalid.length + unhandled.length;
report.push(`### [${url}](https://developer.mozilla.org${url})`);

const elementWithPosition = (node) => {
const { type, position } = node;
const label = type == "element" ? toSelector(node) : type;
if (position) {
const {
start: { line, column },
} = position;
return `${label} (${line + offset}:${column})`;
}
return label;
};

if (invalid.length > 0) {
report.push(
"#### Invalid AST transformations",
...invalid
.filter(({ source }) => !!source)
.map(({ source, targetType, unexpectedChildren }) =>
[
`##### ${elementWithPosition(source)} => ${targetType}`,
"```",
unexpectedChildren.map((node) => prettyAST(node)),
"```",
].join("\n")
)
);
}

if (unhandled.length > 0) {
report.push(
"### Missing conversion rules",
...unhandled.map((node) => "- " + elementWithPosition(node))
);
}
}
if (problemCount > 0) {
const reportFileName = `md-conversion-problems-report-${now.toISOString()}.md`;
console.info(
`Could not automatically convert ${problemCount} elements. Saving report to ${reportFileName}`
);
fs.writeFileSync(reportFileName, report.join("\n"));
}
}

function buildLocaleMap(locale) {
let localesMap = new Map();
if (locale !== "all") {
localesMap = new Map([[locale.toLowerCase(), locale]]);
}
return localesMap;
}

program
.bin("yarn md")
.name("md")
.version("0.0.1")
.disableGlobalOption("--silent")
.cast(false)

.command("h2m", "Convert HTML to Markdown")
.option("--mode <mode>", "Mode to be run in", {
default: "keep",
validator: ["dry", "keep", "replace"],
})
.option("--print-ast", "Prints MD AST", {
default: false,
validator: program.BOOLEAN,
})
.option("--locale", "Targets a specific locale", {
default: "all",
validator: Array.from(VALID_LOCALES.values()).concat("all"),
})
.argument("[folder]", "convert by folder")
.action(
tryOrExit(async ({ args, options }) => {
console.info(
`Starting HTML to Markdown conversion in ${options.mode} mode`
);
const documents = Document.findAll({
folderSearch: args.folder,
locales: buildLocaleMap(options.locale),
});

const progressBar = new cliProgress.SingleBar(
{},
cliProgress.Presets.shades_classic
);
progressBar.start(documents.count);

const problems = new Map();
try {
for (let doc of documents.iter()) {
progressBar.increment();
if (
doc.isMarkdown ||
// findAll's folderSearch is fuzzy which we don't want here
!doc.metadata.slug
.toLowerCase()
.startsWith(args.folder.toLowerCase())
) {
continue;
}
if (options.verbose) {
console.info(doc.metadata.slug);
}
const { body: h, attributes: metadata } = fm(doc.rawContent);
const [markdown, { invalid, unhandled }] = await h2m(h, {
printAST: options.printAst,
locale: doc.metadata.locale,
});

if (invalid.length > 0 || unhandled.length > 0) {
problems.set(doc.url, {
offset: doc.fileInfo.frontMatterOffset,
invalid,
unhandled,
});
}

if (options.mode == "replace" || options.mode == "keep") {
if (options.mode == "replace") {
const gitRoot = getRoot(options.locale);
execGit(
[
"mv",
doc.fileInfo.path,
doc.fileInfo.path.replace(/\.html$/, ".md"),
],
{},
gitRoot
);
}
saveFile(
doc.fileInfo.path.replace(/\.html$/, ".md"),
markdown,
metadata
);
}
}
} finally {
progressBar.stop();
}

saveProblemsReport(problems);
})
)

.command("m2h", "Convert Markdown to HTML")
.option("--locale", "Targets a specific locale", {
default: "all",
validator: Array.from(VALID_LOCALES.values()).concat("all"),
})
.argument("[folder]", "convert by folder")
.action(
tryOrExit(async ({ args, options }) => {
const all = Document.findAll({
folderSearch: args.folder,
locales: buildLocaleMap(options.locale),
});
for (let doc of all.iter()) {
if (!doc.isMarkdown) {
continue;
}
const { body: m, attributes: metadata } = fm(doc.rawContent);
const h = await m2h(m, { locale: doc.metadata.locale });
saveFile(doc.fileInfo.path.replace(/\.md$/, ".html"), h, metadata);
}
})
);

program.run();
Loading

0 comments on commit 74d0f6f

Please sign in to comment.