diff --git a/API.md b/API.md index ec16fbc..bd6244d 100644 --- a/API.md +++ b/API.md @@ -16,6 +16,41 @@ A parser is said to *consume* the text that it parses, leaving only the unconsum These are either parsers or functions that return new parsers. These are the building blocks of parsers. They are all contained in the `Parsimmon` object. +## Parsimmon.createLanguage(parsers) + +`createLanguage` is the best starting point for building a language parser in Parsimmon. It organizes all of your parsers, collects them into a single namespace, and removes the need to worry about using `Parsimmon.lazy`. + +Each function passed to `createLanguage` receives as its only parameter the entire language of parsers as an object. This is used for referring to other rules from within your current rule. + +Example: + +```js +var Lang = Parsimmon.createLanguage({ + Value: function(r) { + return Parsimmon.alt( + r.Number, + r.Symbol, + r.List + ); + }, + Number: function() { + return Parsimmon.regexp(/[0-9]+/).map(Number); + }, + Symbol: function() { + return Parsimmon.regexp(/[a-z]+/); + }, + List: function(r) { + return Parsimmon.string('(') + .then(Parsimmon.sepBy(r.Value, r._)) + .skip(Parsimmon.string(')')); + }, + _: function() { + return Parsimmon.optWhitespace; + } +}); +Lang.Value.tryParse('(list 1 2 foo (list nice 3 56 989 asdasdas))'); +``` + ## Parsimmon(fn) **NOTE:** You probably will never need to use this function. Most parsing can be accomplished using `Parsimmon.regexp` and combination with `Parsimmon.seq` and `Parsimmon.alt`. @@ -182,6 +217,8 @@ This is the same as `Parsimmon.sepBy`, but matches the `content` parser **at lea ## Parsimmon.lazy(fn) +**NOTE:** This is not needed if you're using `createLanguage`. + Accepts a function that returns a parser, which is evaluated the first time the parser is used. This is useful for referencing parsers that haven't yet been defined, and for implementing recursive parsers. Example: ```javascript @@ -523,29 +560,71 @@ Expects `parser` at most `n` times. Yields an array of the results. Expects `parser` at least `n` times. Yields an array of the results. +## parser.node(name) + +Yields an object with `name`, `value`, `start`, and `end` keys, where `value` is the original value yielded by the parser, `name` is the argument passed in, and `start` and `end` are are objects with a 0-based `offset` and 1-based `line` and `column` properties that represent the position in the input that contained the parsed text. + +Example: + +```javascript +var Identifier = + Parsimmon.regexp(/[a-z]+/).node('Identifier'); + +Identifier.tryParse('hey'); +// => { name: 'Identifier', +// value: 'hey', +// start: { offset: 0, line: 1, column: 1 }, +// end: { offset: 3, line: 1, column: 4 } } +``` + ## parser.mark() -Yields an object with `start`, `value`, and `end` keys, -where `value` is the original value yielded by the parser, and `start` and -`end` are are objects with a 0-based `offset` and 1-based `line` and -`column` properties that represent the position in the input that -contained the parsed text. Works like this function: +Yields an object with `start`, `value`, and `end` keys, where `value` is the original value yielded by the parser, and `start` and `end` are are objects with a 0-based `offset` and 1-based `line` and `column` properties that represent the position in the input that contained the parsed text. Works like this function: ```javascript -function mark(parser) { - return Parsimmon.seqMap( - Parsimmon.index, - parser, - Parsimmon.index, - function(start, value, end) { - return { - start: start, - value: value, - end: end - }; - } - ); +var Identifier = + Parsimmon.regexp(/[a-z]+/).mark(); + +Identifier.tryParse('hey'); +// => { start: { offset: 0, line: 1, column: 1 }, +// value: 'hey', +// end: { offset: 3, line: 1, column: 4 } } +``` + +## parser.thru(wrapper) + +Simply returns `wrapper(this)` from the parser. Useful for custom functions used to wrap your parsers, while keeping with Parsimmon chaining style. + +Example: + +```js +function makeNode(name) { + return function(parser) { + return Parsimmon.seqMap( + Parsimmon.index, + parser, + Parsimmon.index, + function(start, value, end) { + return Object.freeze({ + type: 'myLanguage.' + name, + value: value, + start: start, + end: end + }); + } + ); + }; } + +var Identifier = + Parsimmon.regexp(/[a-z]+/) + .thru(makeNode('Identifier')); + +Identifier.tryParse('hey'); +// => { type: 'myLanguage.Identifier', +// value: 'hey', +// start: { offset: 0, line: 1, column: 1 }, +// end: { offset: 3, line: 1, column: 4 } } ``` ## parser.desc(description) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0fc3339..6786c38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +## version 1.4.0 (2017-06-05) +* Adds `Parsimmon.createLanguage(parsers)` +* Adds `parser.thru(wrapper)` +* Adds `parser.node(name)` + ## version 1.3.0 (2017-05-28) * Adds `Parsimmon.notFollowedBy(parser)` diff --git a/README.md b/README.md index bb0f0ea..8003b06 100644 --- a/README.md +++ b/README.md @@ -48,12 +48,13 @@ Note: If you prefer throwing an error when the parse failed, call [`.tryParse(st ## Common Functions +- [`.createLanguage(parsers)`](API.md#parsimmoncreatelanguageparsers) - [`.string(string)`](API.md#parsimmonstringstring) - [`.regexp(regexp)`](API.md#parsimmonregexpregexp) - [`.seq(p1, p2, ...pn)`](API.md#parsimmonseqp1-p2-pn) - [`.sepBy(content, separator)`](API.md#parsimmonsepbycontent) - [`.alt(p1, p2, ...pn)`](API.md#parsimmonaltp1-p2-pn) -- [`.lazy(fn)`](API.md#parsimmonlazyfn) +- [`.node(name)`](API.md#nodename) - [`.whitespace`](API.md#parsimmonwhitespace) - [`.index`](API.md#parsimmonindex) - [`parser.map(fn)`](API.md#parsermapfn) diff --git a/src/parsimmon.js b/src/parsimmon.js index e94ee6c..eec7c5d 100644 --- a/src/parsimmon.js +++ b/src/parsimmon.js @@ -238,6 +238,22 @@ }); } + // Revisit this with Object.keys and .bind when we drop ES3 support. + function createLanguage(parsers) { + var language = {}; + for (var key in parsers) { + if ({}.hasOwnProperty.call(parsers, key)) { + (function(key) { + var func = function() { + return parsers[key](language); + }; + language[key] = lazy(func); + }(key)); + } + } + return language; + } + /** * Allows to add custom primitive parsers */ @@ -285,6 +301,10 @@ return alt(this, alternative); }; + _.thru = function(wrapper) { + return wrapper(this); + }; + _.then = function(next) { if (typeof next === 'function') { throw new Error('chaining features of .then are no longer supported, use .chain instead'); @@ -425,6 +445,17 @@ }); }; + _.node = function(name) { + return seqMap(index, this, index, function(start, value, end) { + return { + name: name, + value: value, + start: start, + end: end + }; + }); + }; + _.lookahead = function(x) { return this.skip(lookahead(x)); }; @@ -667,6 +698,7 @@ var optWhitespace = regexp(/\s*/).desc('optional whitespace'); var whitespace = regexp(/\s+/).desc('whitespace'); + Parsimmon.createLanguage = createLanguage; Parsimmon.all = all; Parsimmon.alt = alt; Parsimmon.any = any; diff --git a/test/parsimmon.test.js b/test/parsimmon.test.js index ccee2fb..175b8e7 100644 --- a/test/parsimmon.test.js +++ b/test/parsimmon.test.js @@ -153,6 +153,71 @@ suite('parser', function() { }); }); + suite('Parsimmon.createLanguage', function() { + test('should return an object of parsers', function() { + var lang = Parsimmon.createLanguage({ + a: function() { + return Parsimmon.string('a'); + }, + b: function() { + return Parsimmon.string('b'); + } + }); + assert.ok(Parsimmon.isParser(lang.a)); + assert.ok(Parsimmon.isParser(lang.b)); + }); + test('should allow direct recursion in parsers', function() { + var lang = Parsimmon.createLanguage({ + Parentheses: function(r) { + return Parsimmon.alt( + Parsimmon.string('()'), + Parsimmon.string('(') + .then(r.Parentheses) + .skip(Parsimmon.string(')')) + ); + } + }); + lang.Parentheses.tryParse('(((())))'); + }); + test('should allow indirect recursion in parsers', function() { + var lang = Parsimmon.createLanguage({ + Value: function(r) { + return Parsimmon.alt( + r.Number, + r.Symbol, + r.List + ); + }, + Number: function() { + return Parsimmon.regexp(/[0-9]+/).map(Number); + }, + Symbol: function() { + return Parsimmon.regexp(/[a-z]+/); + }, + List: function(r) { + return Parsimmon.string('(') + .then(Parsimmon.sepBy(r.Value, r._)) + .skip(Parsimmon.string(')')); + }, + _: function() { + return Parsimmon.optWhitespace; + } + }); + lang.Value.tryParse('(list 1 2 foo (list nice 3 56 989 asdasdas))'); + }); + }); + + suite('parser.thru', function() { + test('should return wrapper(this)', function() { + function arrayify(x) { + return [x]; + } + var parser = Parsimmon.string(''); + var array = parser.thru(arrayify); + assert.strictEqual(array[0], parser); + }); + }); + suite('Parsimmon.lookahead', function() { test('should handle a string', function() { lookahead(''); @@ -1086,6 +1151,38 @@ suite('parser', function() { ); }); + test('.node(name)', function() { + var ys = regex(/^y*/).node('Y'); + var parser = optWhitespace.then(ys).skip(optWhitespace); + assert.deepEqual( + parser.parse('').value, + { + name: 'Y', + value: '', + start: {offset: 0, line: 1, column: 1}, + end: {offset: 0, line: 1, column: 1} + } + ); + assert.deepEqual( + parser.parse(' yy ').value, + { + name: 'Y', + value: 'yy', + start: {offset: 1, line: 1, column: 2}, + end: {offset: 3, line: 1, column: 4} + } + ); + assert.deepEqual( + parser.parse('\nyy ').value, + { + name: 'Y', + value: 'yy', + start: {offset: 1, line: 2, column: 1}, + end: {offset: 3, line: 2, column: 3} + } + ); + }); + suite('smart error messages', function() { // this is mainly about .or(), .many(), and .times(), but not about // their core functionality, so it's in its own test suite