Skip to content

Commit

Permalink
XLSX support with ExcelJS (#248)
Browse files Browse the repository at this point in the history
* XLSX support with ExcelJS

* Prettier

* Change range option to nested arrays

General code clean up

* Tests and bug fixes

* Respect header row order when resolving conflicts

* Fil/xlsx (#249)

* document xlsx (minimalist, we'll work on the notebook first)

* fix coverage reporter

(avoids a crash on my computer; solution found at tapjs/tapjs#624)

* unknown sheet name

* simplify rows naming

* NN is always called on string (cell specifier such as "AA99")

* test name

* more range specifiers

* Column only range test case

* sheetNames is enumerable

* One more test to check for empty columns

Prettier + use default/base tap reporter

* Add Node 16 to the test matrix

* Revert reporter to classic for Node 16

* Don't fail matrix quickly in actions

* More coverage.

* Example of .xlsx in README

* Remove Excel from Workbook naming

* Fix dates

* Fix for sharedFormula

* Coerce errors to NaN

* Properly escape html

* Make sheetNames read-only

* Require colons in range specifiers

* Include row numbers

* Use only string form ranges

* Coerce range specifiers to strings

* Update README.md

Co-authored-by: Mike Bostock <mbostock@gmail.com>

* Apply suggestions from code review

Co-authored-by: Mike Bostock <mbostock@gmail.com>

* Simplify hyperlinks

* Prettier

* Pass options through

* Rename helper functions for clarity, range tests

* Simpler

* Consistent comment format

* Consistent regexes

* Fix hyperlinks for certain cases

Co-authored-by: Philippe Rivière <fil@rezo.net>
Co-authored-by: Mike Bostock <mbostock@gmail.com>
  • Loading branch information
3 people authored Sep 15, 2021
1 parent 6cfe135 commit db58b85
Show file tree
Hide file tree
Showing 7 changed files with 412 additions and 15 deletions.
26 changes: 13 additions & 13 deletions .github/workflows/nodejs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,23 @@ on: [push]

jobs:
build:

strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
node-version: [12.x, 14.x]
node-version: [12.x, 14.x, 16.x]

runs-on: ${{ matrix.os }}

steps:
- uses: actions/checkout@v1
- name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v1
with:
node-version: ${{ matrix.node-version }}
- name: yarn install and test
run: |
yarn install --frozen-lockfile
yarn test
env:
CI: true
- uses: actions/checkout@v1
- name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v1
with:
node-version: ${{ matrix.node-version }}
- name: yarn install and test
run: |
yarn install --frozen-lockfile
yarn test
env:
CI: true
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,15 @@ Returns a promise to the file loaded as a [SQLite database client](https://obser
const db = await FileAttachment("chinook.db").sqlite();
```

<a href="#attachment_xlsx" name="attachment_xlsx">#</a> *attachment*.<b>xlsx</b>() [<>](https://github.com/observablehq/stdlib/blob/master/src/xlsx.js "Source")

Returns a promise to the file loaded as a [Workbook](https://observablehq.com/@observablehq/xlsx).

```js
const workbook = await FileAttachment("profit-and-loss.xlsx").xlsx();
const sheet = workbook.sheet("Sheet1", {range: "B4:AF234", headers: true});
```

<a href="#attachment_xml" name="attachment_xml">#</a> *attachment*.<b>xml</b>() [<>](https://github.com/observablehq/stdlib/blob/master/src/fileAttachment.js "Source")

Returns a promise to an [XMLDocument](https://developer.mozilla.org/en-US/docs/Web/API/XMLDocument) containing the contents of the file.
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"url": "https://github.com/observablehq/stdlib.git"
},
"scripts": {
"test": "tap 'test/**/*-test.js'",
"test": "tap 'test/**/*-test.js' --reporter classic",
"prepublishOnly": "rollup -c",
"postpublish": "git push && git push --tags"
},
Expand Down
1 change: 1 addition & 0 deletions src/dependencies.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ export const vegaliteApi = dependency("vega-lite-api", "5.0.0", "build/vega-lite
export const arrow = dependency("apache-arrow", "4.0.1", "Arrow.es2015.min.js");
export const arquero = dependency("arquero", "4.8.4", "dist/arquero.min.js");
export const topojson = dependency("topojson-client", "3.1.0", "dist/topojson-client.min.js");
export const exceljs = dependency("exceljs", "4.3.0", "dist/exceljs.min.js");
7 changes: 6 additions & 1 deletion src/fileAttachment.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import {autoType, csvParse, csvParseRows, tsvParse, tsvParseRows} from "d3-dsv";
import {require as requireDefault} from "d3-require";
import {arrow, jszip} from "./dependencies.js";
import {arrow, jszip, exceljs} from "./dependencies.js";
import {SQLiteDatabaseClient} from "./sqlite.js";
import {Workbook} from "./xlsx.js";

async function remote_fetch(file) {
const response = await fetch(await file.url());
Expand Down Expand Up @@ -70,6 +71,10 @@ class AbstractFile {
async html() {
return this.xml("text/html");
}
async xlsx() {
const [ExcelJS, buffer] = await Promise.all([requireDefault(exceljs.resolve()), this.arrayBuffer()]);
return new Workbook(await new ExcelJS.Workbook().xlsx.load(buffer));
}
}

class FileAttachment extends AbstractFile {
Expand Down
104 changes: 104 additions & 0 deletions src/xlsx.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
export class Workbook {
constructor(workbook) {
Object.defineProperties(this, {
_: {value: workbook},
sheetNames: {
value: workbook.worksheets.map((s) => s.name),
enumerable: true,
},
});
}
sheet(name, options) {
const sname =
typeof name === "number"
? this.sheetNames[name]
: this.sheetNames.includes((name += ""))
? name
: null;
if (sname == null) throw new Error(`Sheet not found: ${name}`);
const sheet = this._.getWorksheet(sname);
return extract(sheet, options);
}
}

function extract(sheet, {range, headers = false} = {}) {
let [[c0, r0], [c1, r1]] = parseRange(range, sheet);
const headerRow = headers && sheet._rows[r0++];
let names = new Set(["#"]);
for (let n = c0; n <= c1; n++) {
let name = (headerRow ? valueOf(headerRow._cells[n]) : null) || toColumn(n);
while (names.has(name)) name += "_";
names.add(name);
}
names = new Array(c0).concat(Array.from(names));

const output = new Array(r1 - r0 + 1);
for (let r = r0; r <= r1; r++) {
const row = (output[r - r0] = Object.defineProperty({}, "#", {
value: r + 1,
}));
const _row = sheet._rows[r];
if (_row && _row.hasValues)
for (let c = c0; c <= c1; c++) {
const value = valueOf(_row._cells[c]);
if (value != null) row[names[c + 1]] = value;
}
}

output.columns = names.filter(() => true); // Filter sparse columns
return output;
}

function valueOf(cell) {
if (!cell) return;
const {value} = cell;
if (value && value instanceof Date) return value;
if (value && typeof value === "object") {
if (value.formula || value.sharedFormula)
return value.result && value.result.error ? NaN : value.result;
if (value.richText) return value.richText.map((d) => d.text).join("");
if (value.text) {
let {text} = value;
if (text.richText) text = text.richText.map((d) => d.text).join("");
return value.hyperlink && value.hyperlink !== text
? `${value.hyperlink} ${text}`
: text;
}
return value;
}
return value;
}

function parseRange(specifier = ":", {columnCount, rowCount}) {
specifier += "";
if (!specifier.match(/^[A-Z]*\d*:[A-Z]*\d*$/))
throw new Error("Malformed range specifier");
const [[c0 = 0, r0 = 0], [c1 = columnCount - 1, r1 = rowCount - 1]] =
specifier.split(":").map(fromCellReference);
return [
[c0, r0],
[c1, r1],
];
}

// Returns the default column name for a zero-based column index.
// For example: 0 -> "A", 1 -> "B", 25 -> "Z", 26 -> "AA", 27 -> "AB".
function toColumn(c) {
let sc = "";
c++;
do {
sc = String.fromCharCode(64 + (c % 26 || 26)) + sc;
} while ((c = Math.floor((c - 1) / 26)));
return sc;
}

// Returns the zero-based indexes from a cell reference.
// For example: "A1" -> [0, 0], "B2" -> [1, 1], "AA10" -> [26, 9].
function fromCellReference(s) {
const [, sc, sr] = s.match(/^([A-Z]*)(\d*)$/);
let c = 0;
if (sc)
for (let i = 0; i < sc.length; i++)
c += Math.pow(26, sc.length - i - 1) * (sc.charCodeAt(i) - 64);
return [c ? c - 1 : undefined, sr ? +sr - 1 : undefined];
}
Loading

0 comments on commit db58b85

Please sign in to comment.