From fb569e058e6c2a70bc13eea9d0de169fcbd64b42 Mon Sep 17 00:00:00 2001 From: Brian Mock Date: Wed, 31 Jan 2018 15:27:22 -0800 Subject: [PATCH 01/11] WIP --- src/parsimmon.js | 53 ++++++++++++++++++++++++++++++++++++------ test/core/byte.test.js | 21 +++++++++++++++++ test/core/test.test.js | 6 +++++ 3 files changed, 73 insertions(+), 7 deletions(-) create mode 100644 test/core/byte.test.js diff --git a/src/parsimmon.js b/src/parsimmon.js index e604e19..1d9895e 100644 --- a/src/parsimmon.js +++ b/src/parsimmon.js @@ -19,6 +19,12 @@ function isArray(x) { return {}.toString.call(x) === "[object Array]"; } +var hasBuffer = typeof Buffer !== "undefined"; +function isBuffer(x) { + /* global Buffer */ + return hasBuffer && Buffer.isBuffer(x); +} + function makeSuccess(index, value) { return { status: true, @@ -60,6 +66,13 @@ function mergeReplies(result, last) { } function makeLineColumnIndex(input, i) { + if (isBuffer(input)) { + return { + offset: i, + line: -1, + column: -1 + }; + } var lines = input.slice(0, i).split("\n"); // Note that unlike the character offset, the line and column offsets are // 1-based. @@ -110,6 +123,13 @@ function assertParser(p) { } } +function get(input, i) { + if (typeof input === "string") { + return input.charAt(i); + } + return input[i]; +} + // TODO[ES5]: Switch to Array.isArray eventually. function assertArray(x) { if (!isArray(x)) { @@ -164,6 +184,9 @@ function formatGot(input, error) { if (i === input.length) { return ", got the end of the input"; } + if (isBuffer(input)) { + return " at byte " + index.offset; + } var prefix = i > 0 ? "'..." : "'"; var suffix = input.length - i > 12 ? "...'" : "'"; return ( @@ -336,8 +359,10 @@ function sepBy1(parser, separator) { // -*- Core Parsing Methods -*- _.parse = function(input) { - if (typeof input !== "string") { - throw new Error(".parse must be called with a string as its argument"); + if (typeof input !== "string" && !isBuffer(input)) { + throw new Error( + ".parse must be called with a string or Buffer as its argument" + ); } var result = this.skip(eof)._(input, 0); if (result.status) { @@ -585,6 +610,19 @@ function string(str) { }); } +function byte(b) { + assertNumber(b); + var expected = (b > 0xf ? "0x" : "0x0") + b.toString(16); + return Parsimmon(function(input, i) { + var head = get(input, i); + if (head === b) { + return makeSuccess(i + 1, head); + } else { + return makeFailure(i, expected); + } + }); +} + function regexp(re, group) { assertRegexp(re); if (arguments.length >= 2) { @@ -652,11 +690,11 @@ function notFollowedBy(parser) { function test(predicate) { assertFunction(predicate); return Parsimmon(function(input, i) { - var char = input.charAt(i); + var char = get(input, i); if (i < input.length && predicate(char)) { return makeSuccess(i + 1, char); } else { - return makeFailure(i, "a character matching " + predicate); + return makeFailure(i, "a character/byte matching " + predicate); } }); } @@ -689,7 +727,7 @@ function takeWhile(predicate) { return Parsimmon(function(input, i) { var j = i; - while (j < input.length && predicate(input.charAt(j))) { + while (j < input.length && predicate(get(input, j))) { j++; } return makeSuccess(j, input.slice(i, j)); @@ -738,9 +776,9 @@ var index = Parsimmon(function(input, i) { var any = Parsimmon(function(input, i) { if (i >= input.length) { - return makeFailure(i, "any character"); + return makeFailure(i, "any character/byte"); } - return makeSuccess(i + 1, input.charAt(i)); + return makeSuccess(i + 1, get(input, i)); }); var all = Parsimmon(function(input, i) { @@ -764,6 +802,7 @@ var whitespace = regexp(/\s+/).desc("whitespace"); Parsimmon.all = all; Parsimmon.alt = alt; Parsimmon.any = any; +Parsimmon.byte = byte; Parsimmon.createLanguage = createLanguage; Parsimmon.custom = custom; Parsimmon.digit = digit; diff --git a/test/core/byte.test.js b/test/core/byte.test.js new file mode 100644 index 0000000..7516fef --- /dev/null +++ b/test/core/byte.test.js @@ -0,0 +1,21 @@ +"use strict"; + +suite("byte", function() { + test("it matches a buffer byte", function() { + var b = Buffer.from([0xf]); + var p = Parsimmon.byte(0xf); + assert.ok(p.parse(b).value); + }); + + test("it formats single digit bytes like 0x0f", function() { + var b = Buffer.from([0xa]); + var p = Parsimmon.byte(0xf); + assert.deepEqual(p.parse(b).expected, ["0x0f"]); + }); + + test("it formats double digit bytes like 0xff", function() { + var b = Buffer.from([0x12]); + var p = Parsimmon.byte(0xff); + assert.deepEqual(p.parse(b).expected, ["0xff"]); + }); +}); diff --git a/test/core/test.test.js b/test/core/test.test.js index f1e5bbe..490ab2d 100644 --- a/test/core/test.test.js +++ b/test/core/test.test.js @@ -4,8 +4,14 @@ test("test", function() { var parser = Parsimmon.test(function(ch) { return ch !== "."; }); + var highBit = Parsimmon.test(function(ch) { + return ch | 128; + }); assert.equal(parser.parse("x").value, "x"); assert.equal(parser.parse(".").status, false); + assert.equal(highBit.parse(Buffer.from([255])).status, true); + assert.equal(highBit.parse(Buffer.from([0])).status, true); + assert.equal(highBit.parse(Buffer.from([127])).status, true); assert.throws(function() { Parsimmon.test("not a function"); }); From 4425729fe12648e999b9b6aeef8640a5a566918a Mon Sep 17 00:00:00 2001 From: Brandon Keown Date: Wed, 7 Mar 2018 13:40:02 -0600 Subject: [PATCH 02/11] Buffer support PR. From a00014f264a76663471dc042ff76da15bbb7a0c0 Mon Sep 17 00:00:00 2001 From: Brandon Keown Date: Thu, 8 Mar 2018 12:39:06 -0600 Subject: [PATCH 03/11] Support for bitSeq and bitSeqObj. --- package-lock.json | 36 ++++---- src/parsimmon.js | 163 +++++++++++++++++++++++++++++++++++- test/core/bitSeq.test.js | 27 ++++++ test/core/bitSeqObj.test.js | 37 ++++++++ test/core/byte.test.js | 6 +- 5 files changed, 246 insertions(+), 23 deletions(-) create mode 100644 test/core/bitSeq.test.js create mode 100644 test/core/bitSeqObj.test.js diff --git a/package-lock.json b/package-lock.json index 3739411..19672c3 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "parsimmon", - "version": "1.6.2", + "version": "1.6.4", "lockfileVersion": 1, "requires": true, "dependencies": { @@ -1686,15 +1686,6 @@ "integrity": "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw=", "dev": true }, - "string_decoder": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.0.3.tgz", - "integrity": "sha512-4AH6Z5fzNNBcH+6XDMfA/BTt87skxqJlO0lAh3Dker5zThcAxG6mKz+iGu308UKoPPQ8Dcqx/4JhujzltRa+hQ==", - "dev": true, - "requires": { - "safe-buffer": "5.1.1" - } - }, "string-width": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/string-width/-/string-width-1.0.2.tgz", @@ -1706,6 +1697,15 @@ "strip-ansi": "3.0.1" } }, + "string_decoder": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.0.3.tgz", + "integrity": "sha512-4AH6Z5fzNNBcH+6XDMfA/BTt87skxqJlO0lAh3Dker5zThcAxG6mKz+iGu308UKoPPQ8Dcqx/4JhujzltRa+hQ==", + "dev": true, + "requires": { + "safe-buffer": "5.1.1" + } + }, "strip-ansi": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-3.0.1.tgz", @@ -5013,14 +5013,6 @@ "xtend": "https://registry.npmjs.org/xtend/-/xtend-4.0.1.tgz" } }, - "string_decoder": { - "version": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.0.0.tgz", - "integrity": "sha1-8G9BFXtmTYYGn4S9vcmw2KsoFmc=", - "dev": true, - "requires": { - "buffer-shims": "https://registry.npmjs.org/buffer-shims/-/buffer-shims-1.0.0.tgz" - } - }, "string-width": { "version": "https://registry.npmjs.org/string-width/-/string-width-1.0.2.tgz", "integrity": "sha1-EYvfW4zcUaKn5w0hHgfisLmxB9M=", @@ -5031,6 +5023,14 @@ "strip-ansi": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-3.0.1.tgz" } }, + "string_decoder": { + "version": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.0.0.tgz", + "integrity": "sha1-8G9BFXtmTYYGn4S9vcmw2KsoFmc=", + "dev": true, + "requires": { + "buffer-shims": "https://registry.npmjs.org/buffer-shims/-/buffer-shims-1.0.0.tgz" + } + }, "stringstream": { "version": "0.0.5", "resolved": "https://registry.npmjs.org/stringstream/-/stringstream-0.0.5.tgz", diff --git a/src/parsimmon.js b/src/parsimmon.js index 1d9895e..2c5ac88 100644 --- a/src/parsimmon.js +++ b/src/parsimmon.js @@ -9,6 +9,161 @@ function Parsimmon(action) { var _ = Parsimmon.prototype; +function times(n, f) { + var i = 0; + for (i; i < n; i++) { + f(i); + } +} + +function forEach(f, arr) { + times(arr.length, function(i) { + f(arr[i], i, arr); + }); +} + +function reduce(f, seed, arr) { + forEach(function(elem, i, arr) { + seed = f(seed, elem, i, arr); + }, arr); + return seed; +} + +function map(f, arr) { + return reduce( + function(acc, elem, i, a) { + return acc.concat([f(elem, i, a)]); + }, + [], + arr + ); +} + +function lshiftBuffer(input) { + var asTwoBytes = reduce( + function(a, v, i, b) { + return a.concat( + i === b.length - 1 + ? Buffer.from([v, 0]).readUInt16BE(0) + : b.readUInt16BE(i) + ); + }, + [], + input + ); + return Buffer.from( + map(function(x) { + return ((x << 1) & 0xffff) >> 8; + }, asTwoBytes) + ); +} + +function consumeBitsFromBuffer(n, input) { + var state = { v: 0, buf: input }; + times(n, function() { + state = { + v: (state.v << 1) | bitPeekBuffer(state.buf), + buf: lshiftBuffer(state.buf) + }; + }); + return state; +} + +function bitPeekBuffer(input) { + return input[0] >> 7; +} + +function sum(numArr) { + return reduce( + function(x, y) { + return x + y; + }, + 0, + numArr + ); +} + +function find(pred, arr) { + return reduce( + function(found, elem) { + return found || (pred(elem) ? elem : found); + }, + null, + arr + ); +} + +function bitSeq(alignments) { + var totalBits = sum(alignments); + if (totalBits % 8 !== 0) { + throw new Error("Bits do not sum to byte boundary."); + } + var bytes = totalBits / 8; + + var tooBigRange = find(function(x) { + return x > 48; + }, alignments); + if (tooBigRange) { + throw new Error( + tooBigRange.toString() + + " bit range requested exceeds 48 bit (6 byte) Number max." + ); + } + + return new Parsimmon(function(input, i) { + if (bytes + i > input.length) { + return makeFailure(i, bytes.toString() + " bytes"); + } + return makeSuccess( + i + bytes, + reduce( + function(acc, bits) { + var state = consumeBitsFromBuffer(bits, acc.buf); + return { + coll: acc.coll.concat(state.v), + buf: state.buf + }; + }, + { coll: [], buf: input }, + alignments + ).coll + ); + }); +} + +function bitSeqObj(namedAlignments) { + var fullAlignments = map(function(pair) { + return isArray(pair) ? pair : [null, pair]; + }, namedAlignments); + + var namesOnly = map(function(pair) { + return pair[0]; + }, fullAlignments); + var alignmentsOnly = map(function(pair) { + return pair[1]; + }, fullAlignments); + + return bitSeq(alignmentsOnly).map(function(parsed) { + var namedParsed = map(function(name, i) { + return [name, parsed[i]]; + }, namesOnly); + + return reduce( + function(obj, kv) { + if (kv[0] !== null) { + obj[kv[0]] = kv[1]; + } + return obj; + }, + {}, + namedParsed + ); + }); +} + +function toArray(arrLike) { + return Array.prototype.slice.call(arrLike); +} // -*- Helpers -*- function isParser(obj) { @@ -240,7 +395,7 @@ function seq() { function seqObj() { var seenKeys = {}; var totalKeys = 0; - var parsers = [].slice.call(arguments); + var parsers = toArray(arguments); var numParsers = parsers.length; for (var j = 0; j < numParsers; j += 1) { var p = parsers[j]; @@ -802,7 +957,11 @@ var whitespace = regexp(/\s+/).desc("whitespace"); Parsimmon.all = all; Parsimmon.alt = alt; Parsimmon.any = any; -Parsimmon.byte = byte; +Parsimmon.buffers = { + bitSeq: bitSeq, + bitSeqObj: bitSeqObj, + byte: byte +}; Parsimmon.createLanguage = createLanguage; Parsimmon.custom = custom; Parsimmon.digit = digit; diff --git a/test/core/bitSeq.test.js b/test/core/bitSeq.test.js new file mode 100644 index 0000000..017b2c7 --- /dev/null +++ b/test/core/bitSeq.test.js @@ -0,0 +1,27 @@ +"use strict"; + +suite("bitSeq", function() { + test("it consumes bits into a sequence from a buffer", function() { + var b = Buffer.from([0xff, 0xff]); + var p = Parsimmon.buffers.bitSeq([3, 5, 5, 3]); + assert.deepEqual(p.parse(b).value, [7, 31, 31, 7]); + }); + + test("it disallows construction of parsers that don't align to byte boundaries", function() { + assert.throws(function() { + Parsimmon.buffers.bitSeq([1, 2]); + }); + }); + + test("fails if requesting too much", function() { + var b = Buffer.from([]); + var p = Parsimmon.buffers.bitSeq([3, 5, 5, 3]); + assert.deepEqual(p.parse(b).expected, ["2 bytes"]); + }); + + test("it throws an exception for too large of a range request", function() { + assert.throws(function() { + Parsimmon.buffers.bitSeq([1, 2, 4, 49]); + }); + }); +}); diff --git a/test/core/bitSeqObj.test.js b/test/core/bitSeqObj.test.js new file mode 100644 index 0000000..45618ab --- /dev/null +++ b/test/core/bitSeqObj.test.js @@ -0,0 +1,37 @@ +"use strict"; + +suite("bitSeqObj", function() { + test("it consumes bits into an object from a buffer", function() { + var b = Buffer.from([0xff, 0xff]); + var p = Parsimmon.buffers.bitSeqObj([ + ["a", 3], + ["b", 5], + ["c", 5], + ["d", 3] + ]); + assert.deepEqual(p.parse(b).value, { a: 7, b: 31, c: 31, d: 7 }); + }); + + test("it disallows construction of parsers that don't align to byte boundaries", function() { + assert.throws(function() { + Parsimmon.buffers.bitSeqObj([["a", 1], ["b", 2]]); + }); + }); + + test("fails if requesting too much", function() { + var b = Buffer.from([]); + var p = Parsimmon.buffers.bitSeqObj([ + ["a", 3], + ["b", 5], + ["c", 5], + ["d", 3] + ]); + assert.deepEqual(p.parse(b).expected, ["2 bytes"]); + }); + + test("it ignores unnamed ranges", function() { + var b = Buffer.from([0xff, 0xff]); + var p = Parsimmon.buffers.bitSeqObj([["a", 3], 5, ["c", 5], ["d", 3]]); + assert.deepEqual(p.parse(b).value, { a: 7, c: 31, d: 7 }); + }); +}); diff --git a/test/core/byte.test.js b/test/core/byte.test.js index 7516fef..268f30b 100644 --- a/test/core/byte.test.js +++ b/test/core/byte.test.js @@ -3,19 +3,19 @@ suite("byte", function() { test("it matches a buffer byte", function() { var b = Buffer.from([0xf]); - var p = Parsimmon.byte(0xf); + var p = Parsimmon.buffers.byte(0xf); assert.ok(p.parse(b).value); }); test("it formats single digit bytes like 0x0f", function() { var b = Buffer.from([0xa]); - var p = Parsimmon.byte(0xf); + var p = Parsimmon.buffers.byte(0xf); assert.deepEqual(p.parse(b).expected, ["0x0f"]); }); test("it formats double digit bytes like 0xff", function() { var b = Buffer.from([0x12]); - var p = Parsimmon.byte(0xff); + var p = Parsimmon.buffers.byte(0xff); assert.deepEqual(p.parse(b).expected, ["0xff"]); }); }); From b5e52ed0222d3e9e6a712fa8e3c8626de4ef898e Mon Sep 17 00:00:00 2001 From: Brandon Keown Date: Fri, 9 Mar 2018 08:36:52 -0600 Subject: [PATCH 04/11] Changed namespace, altered tests, and disallowed usage of constructors if buffer isnt present. --- src/parsimmon.js | 23 ++++++++++++++++++----- test/.eslintrc | 9 +++------ test/core/bitSeq.test.js | 8 ++++---- test/core/bitSeqObj.test.js | 8 ++++---- test/core/byte.test.js | 7 ++++--- 5 files changed, 33 insertions(+), 22 deletions(-) diff --git a/src/parsimmon.js b/src/parsimmon.js index 2c5ac88..49b9b55 100644 --- a/src/parsimmon.js +++ b/src/parsimmon.js @@ -957,11 +957,6 @@ var whitespace = regexp(/\s+/).desc("whitespace"); Parsimmon.all = all; Parsimmon.alt = alt; Parsimmon.any = any; -Parsimmon.buffers = { - bitSeq: bitSeq, - bitSeqObj: bitSeqObj, - byte: byte -}; Parsimmon.createLanguage = createLanguage; Parsimmon.custom = custom; Parsimmon.digit = digit; @@ -1000,4 +995,22 @@ Parsimmon.whitespace = whitespace; Parsimmon["fantasy-land/empty"] = empty; Parsimmon["fantasy-land/of"] = succeed; +function noBufferError() { + throw new Error( + "Buffer global does not exist; please consider using https://github.com/feross/buffer if you are running Parsimmon in a browser." + ); +} + +Parsimmon.Binary = hasBuffer + ? { + bitSeq: bitSeq, + bitSeqObj: bitSeqObj, + byte: byte + } + : { + bitSeq: noBufferError, + bitSeqObj: noBufferError, + byte: noBufferError + }; + module.exports = Parsimmon; diff --git a/test/.eslintrc b/test/.eslintrc index 6b1adcc..7b7df31 100644 --- a/test/.eslintrc +++ b/test/.eslintrc @@ -1,13 +1,10 @@ { "env": { - "node": true + "node": true, + "mocha": true }, "globals":{ "Parsimmon": true, - "assert": true, - "suite": true, - "setup": true, - "teardown": true, - "test": true + "assert": true } } diff --git a/test/core/bitSeq.test.js b/test/core/bitSeq.test.js index 017b2c7..c6dbc29 100644 --- a/test/core/bitSeq.test.js +++ b/test/core/bitSeq.test.js @@ -3,25 +3,25 @@ suite("bitSeq", function() { test("it consumes bits into a sequence from a buffer", function() { var b = Buffer.from([0xff, 0xff]); - var p = Parsimmon.buffers.bitSeq([3, 5, 5, 3]); + var p = Parsimmon.Binary.bitSeq([3, 5, 5, 3]); assert.deepEqual(p.parse(b).value, [7, 31, 31, 7]); }); test("it disallows construction of parsers that don't align to byte boundaries", function() { assert.throws(function() { - Parsimmon.buffers.bitSeq([1, 2]); + Parsimmon.Binary.bitSeq([1, 2]); }); }); test("fails if requesting too much", function() { var b = Buffer.from([]); - var p = Parsimmon.buffers.bitSeq([3, 5, 5, 3]); + var p = Parsimmon.Binary.bitSeq([3, 5, 5, 3]); assert.deepEqual(p.parse(b).expected, ["2 bytes"]); }); test("it throws an exception for too large of a range request", function() { assert.throws(function() { - Parsimmon.buffers.bitSeq([1, 2, 4, 49]); + Parsimmon.Binary.bitSeq([1, 2, 4, 49]); }); }); }); diff --git a/test/core/bitSeqObj.test.js b/test/core/bitSeqObj.test.js index 45618ab..7519d5b 100644 --- a/test/core/bitSeqObj.test.js +++ b/test/core/bitSeqObj.test.js @@ -3,7 +3,7 @@ suite("bitSeqObj", function() { test("it consumes bits into an object from a buffer", function() { var b = Buffer.from([0xff, 0xff]); - var p = Parsimmon.buffers.bitSeqObj([ + var p = Parsimmon.Binary.bitSeqObj([ ["a", 3], ["b", 5], ["c", 5], @@ -14,13 +14,13 @@ suite("bitSeqObj", function() { test("it disallows construction of parsers that don't align to byte boundaries", function() { assert.throws(function() { - Parsimmon.buffers.bitSeqObj([["a", 1], ["b", 2]]); + Parsimmon.Binary.bitSeqObj([["a", 1], ["b", 2]]); }); }); test("fails if requesting too much", function() { var b = Buffer.from([]); - var p = Parsimmon.buffers.bitSeqObj([ + var p = Parsimmon.Binary.bitSeqObj([ ["a", 3], ["b", 5], ["c", 5], @@ -31,7 +31,7 @@ suite("bitSeqObj", function() { test("it ignores unnamed ranges", function() { var b = Buffer.from([0xff, 0xff]); - var p = Parsimmon.buffers.bitSeqObj([["a", 3], 5, ["c", 5], ["d", 3]]); + var p = Parsimmon.Binary.bitSeqObj([["a", 3], 5, ["c", 5], ["d", 3]]); assert.deepEqual(p.parse(b).value, { a: 7, c: 31, d: 7 }); }); }); diff --git a/test/core/byte.test.js b/test/core/byte.test.js index 268f30b..bb7a314 100644 --- a/test/core/byte.test.js +++ b/test/core/byte.test.js @@ -1,21 +1,22 @@ "use strict"; +/*global context, before, after*/ suite("byte", function() { test("it matches a buffer byte", function() { var b = Buffer.from([0xf]); - var p = Parsimmon.buffers.byte(0xf); + var p = Parsimmon.Binary.byte(0xf); assert.ok(p.parse(b).value); }); test("it formats single digit bytes like 0x0f", function() { var b = Buffer.from([0xa]); - var p = Parsimmon.buffers.byte(0xf); + var p = Parsimmon.Binary.byte(0xf); assert.deepEqual(p.parse(b).expected, ["0x0f"]); }); test("it formats double digit bytes like 0xff", function() { var b = Buffer.from([0x12]); - var p = Parsimmon.buffers.byte(0xff); + var p = Parsimmon.Binary.byte(0xff); assert.deepEqual(p.parse(b).expected, ["0xff"]); }); }); From 8ea006cf51ac109b8d75e603cbbfa94de1ca956f Mon Sep 17 00:00:00 2001 From: Brandon Keown Date: Fri, 9 Mar 2018 08:49:06 -0600 Subject: [PATCH 05/11] Add documentation. --- API.md | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/API.md b/API.md index afc491d..44821e0 100644 --- a/API.md +++ b/API.md @@ -424,6 +424,43 @@ parser.parse('accccc'); //=> {status: true, value: ['a', ['c', 'c', 'c', 'c', 'c']]} ``` +# Binary constructors. + +The purpose of the following constructors is to allow the consumption of Buffer types in node to allow for attoparsec style consumption of binary input. +As these constructors yield regular values within parsers, they can then be combined in the same fashion as the above string-based constructors to produce +robust binary parsers. These constructors live in the Parsimmon.Binary namespace. + +## Parsimmon.byte(int) + +Returns a parser that yields a byte that matches the given input. Similar to digit/letter. + +## Parsimmon.bitSeq(...alignments) + +Specify a series of bit alignments that do not have to be byte aligned and consume them from a buffer. The bits must +sum to a byte boundary. + +```javascript +var parser = Parsimmon.Binary.bitSeq([3, 5, 5, 3]); +parser.parse(Buffer.from([0x04, 0xFF])); +//=> {status: true, value: [ 0, 4, 31, 7 ]} +``` + +## Parsimmon.bitSeqObj(...namedAlignments) + +Specify a series of bit alignments with names that will output an object with those alignments. Very similar to seqObj, +however, but only accepts numeric values. Will discard unnamed alignments. + +```javascript +var parser = Parsimmon.Binary.bitSeqObj([ + ["a", 3], + 5, + ["b", 5], + ["c", 3] +]); +parser.parse(Buffer.from([0x04, 0xFF])); +//=> { status: true, value: { a: 0, b: 31, c: 7 } } +``` + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * # Parser methods From 455c7ea489b2644bf08fe71a2f6a5e5995ed9e9d Mon Sep 17 00:00:00 2001 From: Brandon Keown Date: Fri, 9 Mar 2018 08:57:05 -0600 Subject: [PATCH 06/11] Disallow incorrect construction of byte parser. --- src/parsimmon.js | 5 +++++ test/core/byte.test.js | 14 ++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/parsimmon.js b/src/parsimmon.js index 49b9b55..a7bd180 100644 --- a/src/parsimmon.js +++ b/src/parsimmon.js @@ -767,6 +767,11 @@ function string(str) { function byte(b) { assertNumber(b); + if (b > 0xff) { + throw new Error( + "Value specified to byte constructor is larger in value than a single byte." + ); + } var expected = (b > 0xf ? "0x" : "0x0") + b.toString(16); return Parsimmon(function(input, i) { var head = get(input, i); diff --git a/test/core/byte.test.js b/test/core/byte.test.js index bb7a314..0f934c3 100644 --- a/test/core/byte.test.js +++ b/test/core/byte.test.js @@ -1,22 +1,28 @@ "use strict"; /*global context, before, after*/ -suite("byte", function() { - test("it matches a buffer byte", function() { +describe("byte", function() { + it("matches a buffer byte", function() { var b = Buffer.from([0xf]); var p = Parsimmon.Binary.byte(0xf); assert.ok(p.parse(b).value); }); - test("it formats single digit bytes like 0x0f", function() { + it("formats single digit bytes like 0x0f", function() { var b = Buffer.from([0xa]); var p = Parsimmon.Binary.byte(0xf); assert.deepEqual(p.parse(b).expected, ["0x0f"]); }); - test("it formats double digit bytes like 0xff", function() { + it("formats double digit bytes like 0xff", function() { var b = Buffer.from([0x12]); var p = Parsimmon.Binary.byte(0xff); assert.deepEqual(p.parse(b).expected, ["0xff"]); }); + + it("disallows larger values than a byte.", function() { + assert.throws(function() { + Parsimmon.Binary.byte(0xfff); + }, /larger in value/); + }); }); From eb8996fedf19e303290c89edf630a477ecb02749 Mon Sep 17 00:00:00 2001 From: Brandon Keown Date: Fri, 9 Mar 2018 09:20:43 -0600 Subject: [PATCH 07/11] Switch to runtime check for testing sanity. --- src/parsimmon.js | 29 ++++++++++++++--------------- test/core/bitSeq.test.js | 28 +++++++++++++++++++++++----- test/core/bitSeqObj.test.js | 28 +++++++++++++++++++++++----- test/core/byte.test.js | 18 ++++++++++++++++++ 4 files changed, 78 insertions(+), 25 deletions(-) diff --git a/src/parsimmon.js b/src/parsimmon.js index a7bd180..6b626ad 100644 --- a/src/parsimmon.js +++ b/src/parsimmon.js @@ -1000,22 +1000,21 @@ Parsimmon.whitespace = whitespace; Parsimmon["fantasy-land/empty"] = empty; Parsimmon["fantasy-land/of"] = succeed; -function noBufferError() { - throw new Error( - "Buffer global does not exist; please consider using https://github.com/feross/buffer if you are running Parsimmon in a browser." - ); +function ensureBuffer(f) { + return function() { + if (typeof Buffer === "undefined") { + throw new Error( + "Buffer global does not exist; please consider using https://github.com/feross/buffer if you are running Parsimmon in a browser." + ); + } + return f.apply(null, arguments); + }; } -Parsimmon.Binary = hasBuffer - ? { - bitSeq: bitSeq, - bitSeqObj: bitSeqObj, - byte: byte - } - : { - bitSeq: noBufferError, - bitSeqObj: noBufferError, - byte: noBufferError - }; +Parsimmon.Binary = { + bitSeq: ensureBuffer(bitSeq), + bitSeqObj: ensureBuffer(bitSeqObj), + byte: ensureBuffer(byte) +}; module.exports = Parsimmon; diff --git a/test/core/bitSeq.test.js b/test/core/bitSeq.test.js index c6dbc29..17bd77e 100644 --- a/test/core/bitSeq.test.js +++ b/test/core/bitSeq.test.js @@ -1,27 +1,45 @@ "use strict"; -suite("bitSeq", function() { - test("it consumes bits into a sequence from a buffer", function() { +describe("bitSeq", function() { + it("consumes bits into a sequence from a buffer", function() { var b = Buffer.from([0xff, 0xff]); var p = Parsimmon.Binary.bitSeq([3, 5, 5, 3]); assert.deepEqual(p.parse(b).value, [7, 31, 31, 7]); }); - test("it disallows construction of parsers that don't align to byte boundaries", function() { + it("disallows construction of parsers that don't align to byte boundaries", function() { assert.throws(function() { Parsimmon.Binary.bitSeq([1, 2]); }); }); - test("fails if requesting too much", function() { + it("fails if requesting too much", function() { var b = Buffer.from([]); var p = Parsimmon.Binary.bitSeq([3, 5, 5, 3]); assert.deepEqual(p.parse(b).expected, ["2 bytes"]); }); - test("it throws an exception for too large of a range request", function() { + it("throws an exception for too large of a range request", function() { assert.throws(function() { Parsimmon.Binary.bitSeq([1, 2, 4, 49]); }); }); + + context("Buffer is not present.", function() { + var buff; + before(function() { + buff = global.Buffer; + global.Buffer = undefined; + }); + + after(function() { + global.Buffer = buff; + }); + + it("Disallows construction.", function() { + assert.throws(function() { + Parsimmon.Binary.bitSeq(0xf); + }, /buffer global/i); + }); + }); }); diff --git a/test/core/bitSeqObj.test.js b/test/core/bitSeqObj.test.js index 7519d5b..aba0b8d 100644 --- a/test/core/bitSeqObj.test.js +++ b/test/core/bitSeqObj.test.js @@ -1,7 +1,7 @@ "use strict"; -suite("bitSeqObj", function() { - test("it consumes bits into an object from a buffer", function() { +describe("bitSeqObj", function() { + it("consumes bits into an object from a buffer", function() { var b = Buffer.from([0xff, 0xff]); var p = Parsimmon.Binary.bitSeqObj([ ["a", 3], @@ -12,13 +12,13 @@ suite("bitSeqObj", function() { assert.deepEqual(p.parse(b).value, { a: 7, b: 31, c: 31, d: 7 }); }); - test("it disallows construction of parsers that don't align to byte boundaries", function() { + it("disallows construction of parsers that don't align to byte boundaries", function() { assert.throws(function() { Parsimmon.Binary.bitSeqObj([["a", 1], ["b", 2]]); }); }); - test("fails if requesting too much", function() { + it("fails if requesting too much", function() { var b = Buffer.from([]); var p = Parsimmon.Binary.bitSeqObj([ ["a", 3], @@ -29,9 +29,27 @@ suite("bitSeqObj", function() { assert.deepEqual(p.parse(b).expected, ["2 bytes"]); }); - test("it ignores unnamed ranges", function() { + it("ignores unnamed ranges", function() { var b = Buffer.from([0xff, 0xff]); var p = Parsimmon.Binary.bitSeqObj([["a", 3], 5, ["c", 5], ["d", 3]]); assert.deepEqual(p.parse(b).value, { a: 7, c: 31, d: 7 }); }); + + context("Buffer is not present.", function() { + var buff; + before(function() { + buff = global.Buffer; + global.Buffer = undefined; + }); + + after(function() { + global.Buffer = buff; + }); + + it("Disallows construction.", function() { + assert.throws(function() { + Parsimmon.Binary.bitSeqObj(0xf); + }, /buffer global/i); + }); + }); }); diff --git a/test/core/byte.test.js b/test/core/byte.test.js index 0f934c3..35a9bee 100644 --- a/test/core/byte.test.js +++ b/test/core/byte.test.js @@ -25,4 +25,22 @@ describe("byte", function() { Parsimmon.Binary.byte(0xfff); }, /larger in value/); }); + + context("Buffer is not present.", function() { + var buff; + before(function() { + buff = global.Buffer; + global.Buffer = undefined; + }); + + after(function() { + global.Buffer = buff; + }); + + it("Disallows construction.", function() { + assert.throws(function() { + Parsimmon.Binary.byte(0xf); + }, /buffer global/i); + }); + }); }); From e943cfcde210fc0ae7a948b6efd41955ea7d37f9 Mon Sep 17 00:00:00 2001 From: Brandon Keown Date: Fri, 9 Mar 2018 09:47:21 -0600 Subject: [PATCH 08/11] Restore complete coverage. --- test/core/parse.test.js | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/core/parse.test.js b/test/core/parse.test.js index d4b72a0..2d95cac 100644 --- a/test/core/parse.test.js +++ b/test/core/parse.test.js @@ -23,4 +23,13 @@ suite(".parse", function() { Parsimmon.of("kaboom").parse(); }); }); + + context("The input is a buffer.", function() { + it("Formats errors correctly.", function() { + var parser = Parsimmon.Binary.byte(0); + assert.throws(function() { + parser.tryParse(Buffer.from([0xf])); + }, /at byte/); + }); + }); }); From 950704821b23a7c93b7205ab42c5b0a18b82f8ed Mon Sep 17 00:00:00 2001 From: Brandon Keown Date: Fri, 9 Mar 2018 11:18:36 -0600 Subject: [PATCH 09/11] Changes for PR requests. --- API.md | 10 ++++++-- src/parsimmon.js | 52 ++++++++++++++++++++++++---------------- test/core/bitSeq.test.js | 4 ++-- test/core/byte.test.js | 3 +-- 4 files changed, 43 insertions(+), 26 deletions(-) diff --git a/API.md b/API.md index 44821e0..2cf2af8 100644 --- a/API.md +++ b/API.md @@ -434,7 +434,13 @@ robust binary parsers. These constructors live in the Parsimmon.Binary namespac Returns a parser that yields a byte that matches the given input. Similar to digit/letter. -## Parsimmon.bitSeq(...alignments) +```javascript +var parser = Parsimmon.Binary.byte(0xFF); +parser.parse(Buffer.from([0xFF])); +//=> { status: true, value: 255 } +``` + +## Parsimmon.bitSeq(alignments) Specify a series of bit alignments that do not have to be byte aligned and consume them from a buffer. The bits must sum to a byte boundary. @@ -445,7 +451,7 @@ parser.parse(Buffer.from([0x04, 0xFF])); //=> {status: true, value: [ 0, 4, 31, 7 ]} ``` -## Parsimmon.bitSeqObj(...namedAlignments) +## Parsimmon.bitSeqObj(namedAlignments) Specify a series of bit alignments with names that will output an object with those alignments. Very similar to seqObj, however, but only accepts numeric values. Will discard unnamed alignments. diff --git a/src/parsimmon.js b/src/parsimmon.js index 6b626ad..017b19f 100644 --- a/src/parsimmon.js +++ b/src/parsimmon.js @@ -93,10 +93,29 @@ function find(pred, arr) { ); } +function bufferExists() { + return typeof Buffer !== "undefined"; +} + +function ensureBuffer() { + if (!bufferExists()) { + throw new Error( + "Buffer global does not exist; please consider using https://github.com/feross/buffer if you are running Parsimmon in a browser." + ); + } +} + function bitSeq(alignments) { + ensureBuffer(); var totalBits = sum(alignments); if (totalBits % 8 !== 0) { - throw new Error("Bits do not sum to byte boundary."); + throw new Error( + "The bits [" + + alignments.join(", ") + + "] add up to " + + totalBits + + " which is not an even number of bytes; the total should be divisible by 8" + ); } var bytes = totalBits / 8; @@ -105,8 +124,7 @@ function bitSeq(alignments) { }, alignments); if (tooBigRange) { throw new Error( - tooBigRange.toString() + - " bit range requested exceeds 48 bit (6 byte) Number max." + tooBigRange + " bit range requested exceeds 48 bit (6 byte) Number max." ); } @@ -132,6 +150,7 @@ function bitSeq(alignments) { } function bitSeqObj(namedAlignments) { + ensureBuffer(); var fullAlignments = map(function(pair) { return isArray(pair) ? pair : [null, pair]; }, namedAlignments); @@ -174,10 +193,9 @@ function isArray(x) { return {}.toString.call(x) === "[object Array]"; } -var hasBuffer = typeof Buffer !== "undefined"; function isBuffer(x) { /* global Buffer */ - return hasBuffer && Buffer.isBuffer(x); + return bufferExists() && Buffer.isBuffer(x); } function makeSuccess(index, value) { @@ -766,10 +784,15 @@ function string(str) { } function byte(b) { + ensureBuffer(); assertNumber(b); if (b > 0xff) { throw new Error( - "Value specified to byte constructor is larger in value than a single byte." + "Value specified to byte constructor (" + + b + + "=0x" + + b.toString(16) + + ") is larger in value than a single byte." ); } var expected = (b > 0xf ? "0x" : "0x0") + b.toString(16); @@ -1000,21 +1023,10 @@ Parsimmon.whitespace = whitespace; Parsimmon["fantasy-land/empty"] = empty; Parsimmon["fantasy-land/of"] = succeed; -function ensureBuffer(f) { - return function() { - if (typeof Buffer === "undefined") { - throw new Error( - "Buffer global does not exist; please consider using https://github.com/feross/buffer if you are running Parsimmon in a browser." - ); - } - return f.apply(null, arguments); - }; -} - Parsimmon.Binary = { - bitSeq: ensureBuffer(bitSeq), - bitSeqObj: ensureBuffer(bitSeqObj), - byte: ensureBuffer(byte) + bitSeq: bitSeq, + bitSeqObj: bitSeqObj, + byte: byte }; module.exports = Parsimmon; diff --git a/test/core/bitSeq.test.js b/test/core/bitSeq.test.js index 17bd77e..9949d6a 100644 --- a/test/core/bitSeq.test.js +++ b/test/core/bitSeq.test.js @@ -10,7 +10,7 @@ describe("bitSeq", function() { it("disallows construction of parsers that don't align to byte boundaries", function() { assert.throws(function() { Parsimmon.Binary.bitSeq([1, 2]); - }); + }, /add up to 3/); }); it("fails if requesting too much", function() { @@ -22,7 +22,7 @@ describe("bitSeq", function() { it("throws an exception for too large of a range request", function() { assert.throws(function() { Parsimmon.Binary.bitSeq([1, 2, 4, 49]); - }); + }, /49 bit range/); }); context("Buffer is not present.", function() { diff --git a/test/core/byte.test.js b/test/core/byte.test.js index 35a9bee..f602abc 100644 --- a/test/core/byte.test.js +++ b/test/core/byte.test.js @@ -1,5 +1,4 @@ "use strict"; -/*global context, before, after*/ describe("byte", function() { it("matches a buffer byte", function() { @@ -23,7 +22,7 @@ describe("byte", function() { it("disallows larger values than a byte.", function() { assert.throws(function() { Parsimmon.Binary.byte(0xfff); - }, /larger in value/); + }, /4095=0xfff/); }); context("Buffer is not present.", function() { From f7cf9cfaf05254e58329de2bae5bbf8ef11146cf Mon Sep 17 00:00:00 2001 From: Brian Mock Date: Sat, 10 Mar 2018 14:24:16 -0800 Subject: [PATCH 10/11] updates error cases, documentation, changelog, tests --- API.md | 26 +++++++++++--------------- CHANGELOG.md | 7 +++++++ package.json | 2 +- src/parsimmon.js | 35 ++++++++++++++++++++++++++++++++--- test/core/bitSeqObj.test.js | 24 ++++++++++++++++++++++++ 5 files changed, 75 insertions(+), 19 deletions(-) diff --git a/API.md b/API.md index 2cf2af8..1285e1c 100644 --- a/API.md +++ b/API.md @@ -421,40 +421,36 @@ var parser = notChar('b').times(5) ); parser.parse('accccc'); -//=> {status: true, value: ['a', ['c', 'c', 'c', 'c', 'c']]} +// => { status: true, value: ['a', ['c', 'c', 'c', 'c', 'c']] } ``` -# Binary constructors. +# Binary constructors -The purpose of the following constructors is to allow the consumption of Buffer types in node to allow for attoparsec style consumption of binary input. -As these constructors yield regular values within parsers, they can then be combined in the same fashion as the above string-based constructors to produce -robust binary parsers. These constructors live in the Parsimmon.Binary namespace. +The `Parsimmon.Binary` constructors parse binary content using Node.js Buffers. These constructors can be combined with the normal parser combinators such as `Parsimmon.seq`, `Parsimmon.seqObj`, and still have all the same methods as text-based parsers (e.g. `.map`, `.node`, etc.). ## Parsimmon.byte(int) -Returns a parser that yields a byte that matches the given input. Similar to digit/letter. +Returns a parser that yields a byte (as a number) that matches the given input; similar to `Parsimmon.digit` and `Parsimmon.letter`. ```javascript -var parser = Parsimmon.Binary.byte(0xFF); -parser.parse(Buffer.from([0xFF])); -//=> { status: true, value: 255 } +var parser = Parsimmon.Binary.byte(0x3f); +parser.parse(Buffer.from([0x3f])); +// => { status: true, value: 63 } ``` ## Parsimmon.bitSeq(alignments) -Specify a series of bit alignments that do not have to be byte aligned and consume them from a buffer. The bits must -sum to a byte boundary. +Parse a series of bits that do not have to be byte-aligned and consume them from a Buffer. The maximum number is 48 since more than 48 bits won't fit safely into a JavaScript number without losing precision. Also, the total of all bits in the sequence must be a multiple of 8 since parsing is still done at the byte level. ```javascript var parser = Parsimmon.Binary.bitSeq([3, 5, 5, 3]); -parser.parse(Buffer.from([0x04, 0xFF])); -//=> {status: true, value: [ 0, 4, 31, 7 ]} +parser.parse(Buffer.from([0x04, 0xff])); +//=> { status: true, value: [0, 4, 31, 7] } ``` ## Parsimmon.bitSeqObj(namedAlignments) -Specify a series of bit alignments with names that will output an object with those alignments. Very similar to seqObj, -however, but only accepts numeric values. Will discard unnamed alignments. +Works like `Parsimmon.bitSeq` except each item in the array is either a number of bits or pair (array with length = 2) of name and bits. The bits are parsed in order and put into an object based on the name supplied. If there's no name for the bits, it will be parsed but discarded from the returned value. ```javascript var parser = Parsimmon.Binary.bitSeqObj([ diff --git a/CHANGELOG.md b/CHANGELOG.md index 45b3869..6f28a06 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +## version 1.7.0 (2018-03-10) + +* Adds support for binary parsing using Node.js Buffers +* Adds `Parsimmon.Binary.bitSeq` +* Adds `Parsimmon.Binary.bitSeqObj` +* Adds `Parsimmon.Binary.byte` + ## version 1.6.4 (2018-01-01) * Fixes `parser.many()` to throw an error if it detects an infinite parse loop. diff --git a/package.json b/package.json index 8a9dd34..0c8dfe7 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "parsimmon", - "version": "1.6.4", + "version": "1.7.0", "description": "A monadic LL(infinity) parser combinator library", "keywords": ["parsing", "parse", "parsers", "parser combinators"], "author": "Jeanine Adkisson ", diff --git a/src/parsimmon.js b/src/parsimmon.js index 017b19f..f3ebafa 100644 --- a/src/parsimmon.js +++ b/src/parsimmon.js @@ -151,10 +151,39 @@ function bitSeq(alignments) { function bitSeqObj(namedAlignments) { ensureBuffer(); - var fullAlignments = map(function(pair) { - return isArray(pair) ? pair : [null, pair]; + var seenKeys = {}; + var totalKeys = 0; + var fullAlignments = map(function(item) { + if (isArray(item)) { + var pair = item; + if (pair.length !== 2) { + throw new Error( + "[" + + pair.join(", ") + + "] should be length 2, got length " + + pair.length + ); + } + assertString(pair[0]); + assertNumber(pair[1]); + if (seenKeys[pair[0]]) { + throw new Error("duplicate key in bitSeqObj: " + pair[0]); + } + seenKeys[pair[0]] = true; + totalKeys++; + return pair; + } else { + assertNumber(item); + return [null, item]; + } }, namedAlignments); - + if (totalKeys < 1) { + throw new Error( + "bitSeqObj expects at least one named pair, got [" + + namedAlignments.join(", ") + + "]" + ); + } var namesOnly = map(function(pair) { return pair[0]; }, fullAlignments); diff --git a/test/core/bitSeqObj.test.js b/test/core/bitSeqObj.test.js index aba0b8d..b532e2e 100644 --- a/test/core/bitSeqObj.test.js +++ b/test/core/bitSeqObj.test.js @@ -18,6 +18,30 @@ describe("bitSeqObj", function() { }); }); + it("throws when there are zero keys", function() { + assert.throws(function() { + Parsimmon.Binary.bitSeqObj([1, 7]); + }, /expects at least one/i); + assert.throws(function() { + Parsimmon.Binary.bitSeqObj([1, 3, 2, 2]); + }, /expects at least one/i); + }); + + it("throws you pass the wrong type of argument", function() { + assert.throws(function() { + Parsimmon.Binary.bitSeqObj([[]]); + }, /should be length 2/i); + assert.throws(function() { + Parsimmon.Binary.bitSeqObj([[1, 2, 3]]); + }, /should be length 2/i); + assert.throws(function() { + Parsimmon.Binary.bitSeqObj([[1, 1]]); + }, /not a string/i); + assert.throws(function() { + Parsimmon.Binary.bitSeqObj([["a", "a"]]); + }, /not a number/i); + }); + it("fails if requesting too much", function() { var b = Buffer.from([]); var p = Parsimmon.Binary.bitSeqObj([ From 3679ac47ad8c7b5d0afd75e98aecdef49012b722 Mon Sep 17 00:00:00 2001 From: Brian Mock Date: Sat, 10 Mar 2018 14:31:50 -0800 Subject: [PATCH 11/11] restore 100% coverage --- test/core/bitSeqObj.test.js | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/core/bitSeqObj.test.js b/test/core/bitSeqObj.test.js index b532e2e..0d152fa 100644 --- a/test/core/bitSeqObj.test.js +++ b/test/core/bitSeqObj.test.js @@ -27,6 +27,15 @@ describe("bitSeqObj", function() { }, /expects at least one/i); }); + it("throws when there are duplicate keys", function() { + assert.throws(function() { + Parsimmon.Binary.bitSeqObj([["a", 1], ["a", 7]]); + }, /duplicate/i); + assert.throws(function() { + Parsimmon.Binary.bitSeqObj([["a", 1], ["b", 2], ["a", 5]]); + }, /duplicate/i); + }); + it("throws you pass the wrong type of argument", function() { assert.throws(function() { Parsimmon.Binary.bitSeqObj([[]]);