NEW: Interlinears (#46)

closes #46
digitallinguistics · Sep 21, 2024 · a155302 · a155302
1 parent 151ac30
commit a155302
Show file tree

Hide file tree

Showing 7 changed files with 125 additions and 10 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1 +1,2 @@
+.vscode/
 node_modules/
diff --git a/README.md b/README.md
@@ -37,6 +37,7 @@ The library enables the following features by default:
 | grammatical glosses | `^^fut^^`                         | `<abbr class="gl">fut</abbr>`                    |
 | inline examples     | `The word *perro* is Spanish.`    | `<p>The word <i>perro</i> is Spanish.</p>`       |
 | inline translations | `The word *perro* means ''dog''.` | `<p>The word <i>perro</i> means <q>dog</q>.</p>` |
+| interlinear glosses | <pre><code>\```igl<br>ninakupenda<br>ni-na-ku-pend-a<br>1sg.SUBJ-PRES-2sg.OBJ-love-IND<br>I love you<br>```</code></pre> | [See documentation here.][dlx2html] |
 
 ### General
 
@@ -65,6 +66,45 @@ The library enables the following features by default:
 | table of contents      | `[[toc]]`                                                                                                    | [See documentation here.][toc]                                                         |
 | typography             | `-- --- ... 1st 2nd 3rd 4th 1/3 3/4`                                                                         | `– — … 1<sup>st</sup> 2<sup>nd</sup> 3<sup>rd</sup> 4<sup>th</sup> ⅓ ¾`                |
 
+## Usage Notes
+
+- Most markdown libraries convert single asterisks (`*perro*`) to emphasis (`<em>perro</em>`), but `ling-md` converts them to [idiomatic text][i] (`<i>perro</i>`). Use `**double asterisks**` for bold instead. (See ["you're using &lt;em&gt; wrong"][em-article] by Facundo Corradini for more information.)
+- Attributes may be added in a variety of formats. See the [`attributes-parser`][attrs-parser] library for a complete list. The most common ones are:
+  - `.className` > `class="className"`
+  - `#name` > `id="name"`
+  - `attr=val` > `attr="val"` (`data-*` attributes also work)
+- There is no markdown shortcut for underlining. Use the `<u>` tag instead.
+- You can enter interlinear glossed examples in [Scription] format, using fenced code blocks, like so:
+
+  ````
+  ```igl
+  ninakupenda
+  ni-na-ku-pend-a
+  1sg.SUBJ-PRES-2sg.OBJ-love-IND
+  I love you
+  ```
+  ````
+
+  Note that you can enter multiple examples in a single code block, separated by a blank line.
+
+  You can also pass options to both the `scription2dlx` and `dlx2html` libraries by including those options in the YAML header of the interlinear, like so:
+
+  ````
+  ```igl
+  ---
+  dlx2html:
+    glosses: true
+  scription2dlx:
+    emphasis: false
+  ---
+
+  ninakupenda
+  ni-na-ku-pend-a
+  1sg.SUBJ-PRES-2sg.OBJ-love-IND
+  I love you
+  ```
+  ````
+
 ## API
 
 ### `marked`
@@ -82,24 +122,17 @@ Parse a markdown string using the current options and return HTML.
 | `markdown`     | Object    | <pre><code>{<br>  html: true,<br>  typographer: true<br>}</code></pre> | Options to pass to `markdown-it`. `typographer` and `html` are enabled by default.                                          |
 | `translations` | `span\|q` | `span`                                                                 | Whether to use `<span class=tln>` or a `<q>` element for translations. `<span>`s will wrap the inner text in single quotes. |
 
-## Usage Notes
-
-- Most markdown libraries convert single asterisks (`*perro*`) to emphasis (`<em>perro</em>`), but `ling-md` converts them to [idiomatic text][i] (`<i>perro</i>`). Use `**double asterisks**` for bold instead. (See ["you're using &lt;em&gt; wrong"][em-article] by Facundo Corradini for more information.)
-- Attributes may be added in a variety of formats. See the [`attributes-parser`][attrs-parser] library for a complete list. The most common ones are:
-  - `.className` > `class="className"`
-  - `#name` > `id="name"`
-  - `attr=val` > `attr="val"` (`data-*` attributes also work)
-- There is no markdown shortcut for underlining. Use the `<u>` tag instead.
-
 <!-- LINKS -->
 [alert]:           https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#alerts
 [attrs-parser]:    https://www.npmjs.com/package/attributes-parser
 [checklists]:      https://www.markdownguide.org/extended-syntax/#task-lists
 [def-lists]:       https://pandoc.org/MANUAL.html#definition-lists
+[dlx2html]:        https://github.com/digitallinguistics/dlx2html
 [em-article]:      https://blog.logrocket.com/youre-using-em-wrong/
 [fn]:              https://www.npmjs.com/package/markdown-it-footnote
 [i]:               https://developer.mozilla.org/en-US/docs/Web/HTML/Element/i
 [markdown-it]:     https://github.com/markdown-it/markdown-it#readme
+[Scription]:       https://scription.digitallinguistics.io/
 [spec]:            https://github.com/digitallinguistics/ling-markdown-spec
 [summary-details]: https://www.npmjs.com/package/markdown-it-collapsible
 [table-captions]:  https://github.com/martinring/markdown-it-table-captions

diff --git a/index.js b/index.js
@@ -14,6 +14,7 @@ import glosses              from './plugins/glosses.js'
 import headerAnchors        from 'markdown-it-anchor'
 import inlineTranslations   from './plugins/translations.js'
 import insertedText         from 'markdown-it-ins'
+import interlinears         from './plugins/interlinears.js'
 import markedText           from 'markdown-it-mark'
 import ordinals             from './plugins/ordinals.js'
 import subscript            from 'markdown-it-sub'
@@ -58,6 +59,7 @@ export default class Parser {
       .use(headerAnchors)
       .use(inlineTranslations, { tag: translations })
       .use(insertedText)
+      .use(interlinears)
       .use(markedText)
       .use(mathjax, createMathjaxInstance())
       .use(ordinals)

diff --git a/index.test.js b/index.test.js
@@ -128,6 +128,30 @@ describe(`ling-md`, function() {
     expect(html).to.equal(`<p><i>inline example <b>with bold</b></i></p>\n`)
   })
 
+  it(`interlinear glosses`, function() {
+
+    const md = `
+\`\`\`igl
+---
+dlx2html:
+  glosses: true
+scription2dlx:
+  emphasis: false
+---
+nina*ku*penda
+ni-na-*ku*-pend-a
+1sg.SUBJ-PRES-*2sg.OBJ*-love-IND
+I love you
+\`\`\``
+
+    const html = parser.parse(md)
+
+    expect(html).to.match(/^<div class='igl'/v)
+    expect(html).not.to.contain(`*`)
+    expect(html).to.contain(`<abbr`)
+
+  })
+
   it(`++inserted text++`, function() {
     const md   = `This includes ++inserted text++.`
     const html = parser.parse(md)

diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -35,9 +35,12 @@
     "stop-only": "^3.4.0"
   },
   "dependencies": {
+    "@digitallinguistics/dlx2html": "^0.4.0",
+    "@digitallinguistics/scription2dlx": "^0.14.0",
     "@mdit/plugin-alert": "^0.13.1",
     "@mdit/plugin-mathjax": "^0.13.1",
     "@mdit/plugin-tasklist": "^0.13.1",
+    "js-yaml": "^4.1.0",
     "markdown-it": "^14.1.0",
     "markdown-it-anchor": "^9.2.0",
     "markdown-it-attrs": "^4.2.0",

diff --git a/plugins/interlinears.js b/plugins/interlinears.js
@@ -0,0 +1,35 @@
+import dlx2html      from '@digitallinguistics/dlx2html'
+import jsYaml        from 'js-yaml'
+import scription2dlx from '@digitallinguistics/scription2dlx'
+
+const yamlHeaderRegExp = /^---\n(?<header>.*?)\n---\n/sv
+
+export default function InterlinearsPlugin(md) {
+
+  const originalRenderer = md.renderer.rules.fence
+
+  function fenceWithInterlinears(...args) {
+
+    const [tokens, i] = args
+    const token       = tokens[i]
+    const lang        = token.info
+
+    if (lang === `igl`) {
+
+      const header    = token.content.match(yamlHeaderRegExp)?.groups?.header
+      const options   = header ? jsYaml.load(header) : {}
+      const scription = token.content.replace(yamlHeaderRegExp, ``)
+      const data      = scription2dlx(scription, options.scription2dlx)
+      const html      = dlx2html(data, options.dlx2html)
+
+      return html
+
+    }
+
+    return originalRenderer(...args)
+
+  }
+
+  md.renderer.rules.fence = fenceWithInterlinears
+
+}