From 0dab14d652dce2c247c2b1d3cd4be472e9a54ef1 Mon Sep 17 00:00:00 2001 From: Mingun Date: Wed, 24 Jan 2018 23:10:45 +0500 Subject: [PATCH] Add ability to extract comments from the grammar (#511) All comments stored in the `comments` property of the `grammar` node. Comments extracted only if the `extractComments` options set to `true` when you generate parser. This property is object with mapping start offset of comment to comment object, that looks like: ```js { text: 'text in the comment, just after // or /* and before */', multiline: true|false,// true for /**/ comments, false for // comments location: location() } ``` --- README.md | 19 ++ lib/parser/ast.js | 3 +- lib/parser/index.js | 355 ++++++++++++++++++---------------- src/parser.pegjs | 27 ++- test/spec/unit/parser.spec.js | 138 ++++++++++--- 5 files changed, 342 insertions(+), 200 deletions(-) diff --git a/README.md b/README.md index 5888d86..7226154 100644 --- a/README.md +++ b/README.md @@ -188,6 +188,25 @@ object to `peg.generate`. The following options are supported: * `plugins` — plugins to use * `trace` — makes the parser trace its progress (default: `false`) +Also you can supply boolean option `extractComments` (default: `false`). +When set to `true`, parser will be collect all comments in the grammar to the object +in the `comments` property in the `grammar` AST node. This key contains mapping from +offset position of start location of the comment (i.e. `//` or `/*`) to comment object +itself. Comment object has following structure: + + ```js + { + text: 'all text between /* or */, or // and end of line', + multiline: true|false, + location: location() + } + ``` + +When set to `false`, `comments` will be set to `null`. + +This option not impact to the generated parser but only to grammar AST, which +can be used by plugins. + Using the Parser ---------------- diff --git a/lib/parser/ast.js b/lib/parser/ast.js index 9b09769..7fb2a4b 100644 --- a/lib/parser/ast.js +++ b/lib/parser/ast.js @@ -15,11 +15,12 @@ exports.Node = Node; class Grammar extends Node { // Creates a new AST - constructor( initializer, rules, location ) { + constructor( initializer, rules, comments, location ) { super( "grammar", location ); this.initializer = initializer; + this.comments = comments; this.rules = rules; } diff --git a/lib/parser/index.js b/lib/parser/index.js index c01e50b..b2f2060 100644 --- a/lib/parser/index.js +++ b/lib/parser/index.js @@ -237,6 +237,7 @@ function peg$parse(input, options) { return new ast.Grammar( extractOptional(initializer, 0), extractList(rules, 0), + comments, location() ); }; @@ -311,25 +312,31 @@ function peg$parse(input, options) { var peg$f11 = function(operator, code) { return createNode( OPS_TO_SEMANTIC_PREDICATE_TYPES[operator], { code: code } ); }; - var peg$f12 = function(name) { return [name, location()]; }; - var peg$f13 = function(head, tail) { return head + tail.join(""); }; - var peg$f14 = function(sequence) { return sequence; }; - var peg$f15 = function(value, ignoreCase) { + var peg$f12 = function(comment) { + return addComment(comment, true); + }; + var peg$f13 = function(comment) { + return addComment(comment, false); + }; + var peg$f14 = function(name) { return [name, location()]; }; + var peg$f15 = function(head, tail) { return head + tail.join(""); }; + var peg$f16 = function(sequence) { return sequence; }; + var peg$f17 = function(value, ignoreCase) { return createNode( "literal", { value: value, ignoreCase: ignoreCase !== null, } ); }; - var peg$f16 = function(chars) { return chars.join(""); }; - var peg$f17 = function() { return text(); }; - var peg$f18 = function(inverted, parts, ignoreCase) { + var peg$f18 = function(chars) { return chars.join(""); }; + var peg$f19 = function() { return text(); }; + var peg$f20 = function(inverted, parts, ignoreCase) { return createNode( "class", { parts: parts.filter(part => part !== ""), inverted: inverted !== null, ignoreCase: ignoreCase !== null, } ); }; - var peg$f19 = function(begin, end) { + var peg$f21 = function(begin, end) { if (begin.charCodeAt(0) > end.charCodeAt(0)) { error( "Invalid character range: " + text() + "." @@ -338,20 +345,20 @@ function peg$parse(input, options) { return [begin, end]; }; - var peg$f20 = function() { return ""; }; - var peg$f21 = function() { return "\0"; }; - var peg$f22 = function() { return "\b"; }; - var peg$f23 = function() { return "\f"; }; - var peg$f24 = function() { return "\n"; }; - var peg$f25 = function() { return "\r"; }; - var peg$f26 = function() { return "\t"; }; - var peg$f27 = function() { return "\v"; }; - var peg$f28 = function(digits) { + var peg$f22 = function() { return ""; }; + var peg$f23 = function() { return "\0"; }; + var peg$f24 = function() { return "\b"; }; + var peg$f25 = function() { return "\f"; }; + var peg$f26 = function() { return "\n"; }; + var peg$f27 = function() { return "\r"; }; + var peg$f28 = function() { return "\t"; }; + var peg$f29 = function() { return "\v"; }; + var peg$f30 = function(digits) { return String.fromCharCode(parseInt(digits, 16)); }; - var peg$f29 = function() { return createNode( "any", {} ); }; - var peg$f30 = function(code) { return code; }; - var peg$f31 = function() { error("Unbalanced brace."); }; + var peg$f31 = function() { return createNode( "any", {} ); }; + var peg$f32 = function(code) { return code; }; + var peg$f33 = function() { error("Unbalanced brace."); }; var peg$currPos = 0; var peg$savedPos = 0; @@ -1353,7 +1360,7 @@ function peg$parse(input, options) { } function peg$parseMultiLineComment() { - var s0, s1, s2, s3, s4, s5; + var s0, s1, s2, s3, s4, s5, s6; var rule$expects = function (expected) { if (peg$silentFails === 0) peg$expect(expected); @@ -1367,68 +1374,70 @@ function peg$parse(input, options) { s1 = peg$FAILED; } if (s1 !== peg$FAILED) { - s2 = []; - s3 = peg$currPos; + s2 = peg$currPos; + s3 = []; s4 = peg$currPos; + s5 = peg$currPos; peg$begin(); if (input.substr(peg$currPos, 2) === peg$c23) { - s5 = peg$c23; + s6 = peg$c23; peg$currPos += 2; } else { - s5 = peg$FAILED; + s6 = peg$FAILED; } peg$end(true); - if (s5 === peg$FAILED) { - s4 = undefined; + if (s6 === peg$FAILED) { + s5 = undefined; } else { - peg$currPos = s4; - s4 = peg$FAILED; + peg$currPos = s5; + s5 = peg$FAILED; } - if (s4 !== peg$FAILED) { - s5 = peg$parseSourceCharacter(); - if (s5 !== peg$FAILED) { - s4 = [s4, s5]; - s3 = s4; + if (s5 !== peg$FAILED) { + s6 = peg$parseSourceCharacter(); + if (s6 !== peg$FAILED) { + s5 = [s5, s6]; + s4 = s5; } else { - peg$currPos = s3; - s3 = peg$FAILED; + peg$currPos = s4; + s4 = peg$FAILED; } } else { - peg$currPos = s3; - s3 = peg$FAILED; + peg$currPos = s4; + s4 = peg$FAILED; } - while (s3 !== peg$FAILED) { - s2.push(s3); - s3 = peg$currPos; + while (s4 !== peg$FAILED) { + s3.push(s4); s4 = peg$currPos; + s5 = peg$currPos; peg$begin(); if (input.substr(peg$currPos, 2) === peg$c23) { - s5 = peg$c23; + s6 = peg$c23; peg$currPos += 2; } else { - s5 = peg$FAILED; + s6 = peg$FAILED; } peg$end(true); - if (s5 === peg$FAILED) { - s4 = undefined; + if (s6 === peg$FAILED) { + s5 = undefined; } else { - peg$currPos = s4; - s4 = peg$FAILED; + peg$currPos = s5; + s5 = peg$FAILED; } - if (s4 !== peg$FAILED) { - s5 = peg$parseSourceCharacter(); - if (s5 !== peg$FAILED) { - s4 = [s4, s5]; - s3 = s4; + if (s5 !== peg$FAILED) { + s6 = peg$parseSourceCharacter(); + if (s6 !== peg$FAILED) { + s5 = [s5, s6]; + s4 = s5; } else { - peg$currPos = s3; - s3 = peg$FAILED; + peg$currPos = s4; + s4 = peg$FAILED; } } else { - peg$currPos = s3; - s3 = peg$FAILED; + peg$currPos = s4; + s4 = peg$FAILED; } } + s2 = input.substring(s2, peg$currPos); if (input.substr(peg$currPos, 2) === peg$c23) { s3 = peg$c23; peg$currPos += 2; @@ -1436,8 +1445,8 @@ function peg$parse(input, options) { s3 = peg$FAILED; } if (s3 !== peg$FAILED) { - s1 = [s1, s2, s3]; - s0 = s1; + peg$savedPos = s0; + s0 = peg$f12(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1451,7 +1460,7 @@ function peg$parse(input, options) { } function peg$parseMultiLineCommentNoLineTerminator() { - var s0, s1, s2, s3, s4, s5; + var s0, s1, s2, s3, s4, s5, s6; var rule$expects = function (expected) { if (peg$silentFails === 0) peg$expect(expected); @@ -1466,76 +1475,78 @@ function peg$parse(input, options) { s1 = peg$FAILED; } if (s1 !== peg$FAILED) { - s2 = []; - s3 = peg$currPos; + s2 = peg$currPos; + s3 = []; s4 = peg$currPos; + s5 = peg$currPos; peg$begin(); rule$expects(peg$e17); if (input.substr(peg$currPos, 2) === peg$c23) { - s5 = peg$c23; + s6 = peg$c23; peg$currPos += 2; } else { - s5 = peg$FAILED; + s6 = peg$FAILED; } - if (s5 === peg$FAILED) { - s5 = peg$parseLineTerminator(); + if (s6 === peg$FAILED) { + s6 = peg$parseLineTerminator(); } peg$end(true); - if (s5 === peg$FAILED) { - s4 = undefined; + if (s6 === peg$FAILED) { + s5 = undefined; } else { - peg$currPos = s4; - s4 = peg$FAILED; + peg$currPos = s5; + s5 = peg$FAILED; } - if (s4 !== peg$FAILED) { - s5 = peg$parseSourceCharacter(); - if (s5 !== peg$FAILED) { - s4 = [s4, s5]; - s3 = s4; + if (s5 !== peg$FAILED) { + s6 = peg$parseSourceCharacter(); + if (s6 !== peg$FAILED) { + s5 = [s5, s6]; + s4 = s5; } else { - peg$currPos = s3; - s3 = peg$FAILED; + peg$currPos = s4; + s4 = peg$FAILED; } } else { - peg$currPos = s3; - s3 = peg$FAILED; + peg$currPos = s4; + s4 = peg$FAILED; } - while (s3 !== peg$FAILED) { - s2.push(s3); - s3 = peg$currPos; + while (s4 !== peg$FAILED) { + s3.push(s4); s4 = peg$currPos; + s5 = peg$currPos; peg$begin(); rule$expects(peg$e17); if (input.substr(peg$currPos, 2) === peg$c23) { - s5 = peg$c23; + s6 = peg$c23; peg$currPos += 2; } else { - s5 = peg$FAILED; + s6 = peg$FAILED; } - if (s5 === peg$FAILED) { - s5 = peg$parseLineTerminator(); + if (s6 === peg$FAILED) { + s6 = peg$parseLineTerminator(); } peg$end(true); - if (s5 === peg$FAILED) { - s4 = undefined; + if (s6 === peg$FAILED) { + s5 = undefined; } else { - peg$currPos = s4; - s4 = peg$FAILED; + peg$currPos = s5; + s5 = peg$FAILED; } - if (s4 !== peg$FAILED) { - s5 = peg$parseSourceCharacter(); - if (s5 !== peg$FAILED) { - s4 = [s4, s5]; - s3 = s4; + if (s5 !== peg$FAILED) { + s6 = peg$parseSourceCharacter(); + if (s6 !== peg$FAILED) { + s5 = [s5, s6]; + s4 = s5; } else { - peg$currPos = s3; - s3 = peg$FAILED; + peg$currPos = s4; + s4 = peg$FAILED; } } else { - peg$currPos = s3; - s3 = peg$FAILED; + peg$currPos = s4; + s4 = peg$FAILED; } } + s2 = input.substring(s2, peg$currPos); rule$expects(peg$e17); if (input.substr(peg$currPos, 2) === peg$c23) { s3 = peg$c23; @@ -1544,8 +1555,8 @@ function peg$parse(input, options) { s3 = peg$FAILED; } if (s3 !== peg$FAILED) { - s1 = [s1, s2, s3]; - s0 = s1; + peg$savedPos = s0; + s0 = peg$f12(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1559,7 +1570,7 @@ function peg$parse(input, options) { } function peg$parseSingleLineComment() { - var s0, s1, s2, s3, s4, s5; + var s0, s1, s2, s3, s4, s5, s6; var rule$expects = function (expected) { if (peg$silentFails === 0) peg$expect(expected); @@ -1574,60 +1585,62 @@ function peg$parse(input, options) { s1 = peg$FAILED; } if (s1 !== peg$FAILED) { - s2 = []; - s3 = peg$currPos; + s2 = peg$currPos; + s3 = []; s4 = peg$currPos; + s5 = peg$currPos; peg$begin(); - s5 = peg$parseLineTerminator(); + s6 = peg$parseLineTerminator(); peg$end(true); - if (s5 === peg$FAILED) { - s4 = undefined; + if (s6 === peg$FAILED) { + s5 = undefined; } else { - peg$currPos = s4; - s4 = peg$FAILED; + peg$currPos = s5; + s5 = peg$FAILED; } - if (s4 !== peg$FAILED) { - s5 = peg$parseSourceCharacter(); - if (s5 !== peg$FAILED) { - s4 = [s4, s5]; - s3 = s4; + if (s5 !== peg$FAILED) { + s6 = peg$parseSourceCharacter(); + if (s6 !== peg$FAILED) { + s5 = [s5, s6]; + s4 = s5; } else { - peg$currPos = s3; - s3 = peg$FAILED; + peg$currPos = s4; + s4 = peg$FAILED; } } else { - peg$currPos = s3; - s3 = peg$FAILED; + peg$currPos = s4; + s4 = peg$FAILED; } - while (s3 !== peg$FAILED) { - s2.push(s3); - s3 = peg$currPos; + while (s4 !== peg$FAILED) { + s3.push(s4); s4 = peg$currPos; + s5 = peg$currPos; peg$begin(); - s5 = peg$parseLineTerminator(); + s6 = peg$parseLineTerminator(); peg$end(true); - if (s5 === peg$FAILED) { - s4 = undefined; + if (s6 === peg$FAILED) { + s5 = undefined; } else { - peg$currPos = s4; - s4 = peg$FAILED; + peg$currPos = s5; + s5 = peg$FAILED; } - if (s4 !== peg$FAILED) { - s5 = peg$parseSourceCharacter(); - if (s5 !== peg$FAILED) { - s4 = [s4, s5]; - s3 = s4; + if (s5 !== peg$FAILED) { + s6 = peg$parseSourceCharacter(); + if (s6 !== peg$FAILED) { + s5 = [s5, s6]; + s4 = s5; } else { - peg$currPos = s3; - s3 = peg$FAILED; + peg$currPos = s4; + s4 = peg$FAILED; } } else { - peg$currPos = s3; - s3 = peg$FAILED; + peg$currPos = s4; + s4 = peg$FAILED; } } - s1 = [s1, s2]; - s0 = s1; + s2 = input.substring(s2, peg$currPos); + peg$savedPos = s0; + s0 = peg$f13(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1647,7 +1660,7 @@ function peg$parse(input, options) { s1 = peg$parseIdentifierName(); if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f12(s1); + s1 = peg$f14(s1); } s0 = s1; @@ -1673,7 +1686,7 @@ function peg$parse(input, options) { s3 = peg$parseIdentifierPart(); } peg$savedPos = s0; - s0 = peg$f13(s1, s2); + s0 = peg$f15(s1, s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1717,7 +1730,7 @@ function peg$parse(input, options) { s2 = peg$parseUnicodeEscapeSequence(); if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f14(s2); + s0 = peg$f16(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1834,7 +1847,7 @@ function peg$parse(input, options) { s2 = null; } peg$savedPos = s0; - s0 = peg$f15(s1, s2); + s0 = peg$f17(s1, s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1875,7 +1888,7 @@ function peg$parse(input, options) { } if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f16(s2); + s0 = peg$f18(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1907,7 +1920,7 @@ function peg$parse(input, options) { } if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f16(s2); + s0 = peg$f18(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1960,7 +1973,7 @@ function peg$parse(input, options) { s2 = peg$parseSourceCharacter(); if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f17(); + s0 = peg$f19(); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1981,7 +1994,7 @@ function peg$parse(input, options) { s2 = peg$parseEscapeSequence(); if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f14(s2); + s0 = peg$f16(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2036,7 +2049,7 @@ function peg$parse(input, options) { s2 = peg$parseSourceCharacter(); if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f17(); + s0 = peg$f19(); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2057,7 +2070,7 @@ function peg$parse(input, options) { s2 = peg$parseEscapeSequence(); if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f14(s2); + s0 = peg$f16(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2129,7 +2142,7 @@ function peg$parse(input, options) { s5 = null; } peg$savedPos = s0; - s0 = peg$f18(s2, s3, s5); + s0 = peg$f20(s2, s3, s5); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2163,7 +2176,7 @@ function peg$parse(input, options) { s3 = peg$parseClassCharacter(); if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f19(s1, s3); + s0 = peg$f21(s1, s3); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2218,7 +2231,7 @@ function peg$parse(input, options) { s2 = peg$parseSourceCharacter(); if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f17(); + s0 = peg$f19(); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2239,7 +2252,7 @@ function peg$parse(input, options) { s2 = peg$parseEscapeSequence(); if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f14(s2); + s0 = peg$f16(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2274,7 +2287,7 @@ function peg$parse(input, options) { s2 = peg$parseLineTerminatorSequence(); if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f20(); + s0 = peg$f22(); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2316,7 +2329,7 @@ function peg$parse(input, options) { } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f21(); + s0 = peg$f23(); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2388,7 +2401,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f22(); + s1 = peg$f24(); } s0 = s1; if (s0 === peg$FAILED) { @@ -2401,7 +2414,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f23(); + s1 = peg$f25(); } s0 = s1; if (s0 === peg$FAILED) { @@ -2414,7 +2427,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f24(); + s1 = peg$f26(); } s0 = s1; if (s0 === peg$FAILED) { @@ -2427,7 +2440,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f25(); + s1 = peg$f27(); } s0 = s1; if (s0 === peg$FAILED) { @@ -2440,7 +2453,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f26(); + s1 = peg$f28(); } s0 = s1; if (s0 === peg$FAILED) { @@ -2453,7 +2466,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f27(); + s1 = peg$f29(); } s0 = s1; } @@ -2493,7 +2506,7 @@ function peg$parse(input, options) { s2 = peg$parseSourceCharacter(); if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f17(); + s0 = peg$f19(); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2575,7 +2588,7 @@ function peg$parse(input, options) { } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f28(s2); + s0 = peg$f30(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2638,7 +2651,7 @@ function peg$parse(input, options) { } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f28(s2); + s0 = peg$f30(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2702,7 +2715,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f29(); + s1 = peg$f31(); } s0 = s1; @@ -2735,7 +2748,7 @@ function peg$parse(input, options) { } if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f30(s2); + s0 = peg$f32(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2754,7 +2767,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f31(); + s1 = peg$f33(); } s0 = s1; } @@ -3364,6 +3377,20 @@ function peg$parse(input, options) { return util.enforceFastProperties( node ); } + let comments = options.extractComments ? {} : null; + function addComment(comment, multiline) { + if (options.extractComments) { + let loc = location(); + comment = { + text: comment, + multiline: multiline, + location: loc + }; + comments[loc.start.offset] = comment; + return comment; + } + } + peg$begin(); peg$result = peg$startRuleFunction(); diff --git a/src/parser.pegjs b/src/parser.pegjs index efa1d71..23a15fa 100644 --- a/src/parser.pegjs +++ b/src/parser.pegjs @@ -95,6 +95,20 @@ util.extend( node, details ); return util.enforceFastProperties( node ); } + + let comments = options.extractComments ? {} : null; + function addComment(comment, multiline) { + if (options.extractComments) { + let loc = location(); + comment = { + text: comment, + multiline: multiline, + location: loc + }; + comments[loc.start.offset] = comment; + return comment; + } + } } // ---- Syntactic Grammar ----- @@ -104,6 +118,7 @@ Grammar return new ast.Grammar( extractOptional(initializer, 0), extractList(rules, 0), + comments, location() ); } @@ -259,13 +274,19 @@ Comment "comment" / SingleLineComment MultiLineComment - = "/*" (!"*/" SourceCharacter)* "*/" + = "/*" comment:$(!"*/" SourceCharacter)* "*/" { + return addComment(comment, true); + } MultiLineCommentNoLineTerminator - = "/*" (!("*/" / LineTerminator) SourceCharacter)* "*/" + = "/*" comment:$(!("*/" / LineTerminator) SourceCharacter)* "*/" { + return addComment(comment, true); + } SingleLineComment - = "//" (!LineTerminator SourceCharacter)* + = "//" comment:$(!LineTerminator SourceCharacter)* { + return addComment(comment, false); + } Identifier = name:IdentifierName { return [name, location()]; } diff --git a/test/spec/unit/parser.spec.js b/test/spec/unit/parser.spec.js index 5f334f0..a031be2 100644 --- a/test/spec/unit/parser.spec.js +++ b/test/spec/unit/parser.spec.js @@ -2,12 +2,32 @@ const chai = require( "chai" ); const parser = require( "pegjs-dev" ).parser; +const util = require( "pegjs-dev" ).util; const expect = chai.expect; // better diagnostics for deep eq failure chai.config.truncateThreshold = 0; +function varyParserOptions( block ) { + + const optionsVariants = [ + { }, + { extractComments: false }, + { extractComments: true } + ]; + + optionsVariants.forEach( variant => { + + describe( + "with options " + chai.util.inspect( variant ), + () => block( variant ) + ); + + } ); + +} + describe( "PEG.js grammar parser", function () { const literalAbcd = { type: "literal", value: "abcd", ignoreCase: false }; @@ -71,6 +91,7 @@ describe( "PEG.js grammar parser", function () { return { type: "grammar", initializer: null, + comments: null, rules: [ { type: "rule", name: "start", expression: expression } ] }; @@ -115,10 +136,30 @@ describe( "PEG.js grammar parser", function () { } + function commented( grammar, comments, options ) { + + function toObject( result, comment ) { + + result[ comment.offset ] = { + text: comment.text, + multiline: comment.multiline + }; + + return result; + + } + + grammar = util.clone( grammar ); + grammar.comments = options.extractComments ? comments.reduce( toObject, {} ) : null; + return grammar; + + } + const trivialGrammar = literalGrammar( "abcd", false ); const twoRuleGrammar = { type: "grammar", initializer: null, + comments: null, rules: [ ruleA, ruleB ] }; @@ -171,6 +212,11 @@ describe( "PEG.js grammar parser", function () { strip( node.initializer ); + } + if ( node.comments ) { + + util.each( node.comments, stripLeaf ); + } node.rules.forEach( strip ); @@ -206,9 +252,11 @@ describe( "PEG.js grammar parser", function () { const Assertion = chai.Assertion; - Assertion.addMethod( "parseAs", function ( expected ) { + Assertion.addMethod( "parseAs", function ( expected, options ) { + + options = typeof options === "undefined" ? {} : options; - const result = parser.parse( utils.flag( this, "object" ) ); + const result = parser.parse( utils.flag( this, "object" ), options ); stripLocation( result ); @@ -298,13 +346,13 @@ describe( "PEG.js grammar parser", function () { it( "parses Grammar", function () { expect( "\na = 'abcd';\n" ).to.parseAs( - { type: "grammar", initializer: null, rules: [ ruleA ] } + { type: "grammar", comments: null, initializer: null, rules: [ ruleA ] } ); expect( "\na = 'abcd';\nb = 'efgh';\nc = 'ijkl';\n" ).to.parseAs( - { type: "grammar", initializer: null, rules: [ ruleA, ruleB, ruleC ] } + { type: "grammar", comments: null, initializer: null, rules: [ ruleA, ruleB, ruleC ] } ); expect( "\n{ code };\na = 'abcd';\n" ).to.parseAs( - { type: "grammar", initializer: initializer, rules: [ ruleA ] } + { type: "grammar", comments: null, initializer: initializer, rules: [ ruleA ] } ); } ); @@ -313,7 +361,7 @@ describe( "PEG.js grammar parser", function () { it( "parses Initializer", function () { expect( "{ code };start = 'abcd'" ).to.parseAs( - { type: "grammar", initializer: initializer, rules: [ ruleStart ] } + { type: "grammar", comments: null, initializer: initializer, rules: [ ruleStart ] } ); } ); @@ -501,45 +549,71 @@ describe( "PEG.js grammar parser", function () { } ); - // Canonical Comment is "/* comment */". - it( "parses Comment", function () { + varyParserOptions( function ( options ) { - expect( "start =// comment\n'abcd'" ).to.parseAs( trivialGrammar ); - expect( "start =/* comment */'abcd'" ).to.parseAs( trivialGrammar ); + // Canonical Comment is "/* comment */". + it( "parses Comment", function () { - } ); + expect( "start =// comment\n'abcd'" ).to.parseAs( commented( + trivialGrammar, [ { offset: 7, text: " comment", multiline: false } ], options + ), options ); + expect( "start =/* comment */'abcd'" ).to.parseAs( commented( + trivialGrammar, [ { offset: 7, text: " comment ", multiline: true } ], options + ), options ); - // Canonical MultiLineComment is "/* comment */". - it( "parses MultiLineComment", function () { + } ); - expect( "start =/**/'abcd'" ).to.parseAs( trivialGrammar ); - expect( "start =/*a*/'abcd'" ).to.parseAs( trivialGrammar ); - expect( "start =/*abc*/'abcd'" ).to.parseAs( trivialGrammar ); + // Canonical MultiLineComment is "/* comment */". + it( "parses MultiLineComment", function () { - expect( "start =/**/*/'abcd'" ).to.failToParse(); + expect( "start =/**/'abcd'" ).to.parseAs( commented( + trivialGrammar, [ { offset: 7, text: "", multiline: true } ], options + ), options ); + expect( "start =/*a*/'abcd'" ).to.parseAs( commented( + trivialGrammar, [ { offset: 7, text: "a", multiline: true } ], options + ), options ); + expect( "start =/*abc*/'abcd'" ).to.parseAs( commented( + trivialGrammar, [ { offset: 7, text: "abc", multiline: true } ], options + ), options ); - } ); + expect( "start =/**/*/'abcd'" ).to.failToParse(); - // Canonical MultiLineCommentNoLineTerminator is "/* comment */". - it( "parses MultiLineCommentNoLineTerminator", function () { + } ); - expect( "a = 'abcd'/**/\r\nb = 'efgh'" ).to.parseAs( twoRuleGrammar ); - expect( "a = 'abcd'/*a*/\r\nb = 'efgh'" ).to.parseAs( twoRuleGrammar ); - expect( "a = 'abcd'/*abc*/\r\nb = 'efgh'" ).to.parseAs( twoRuleGrammar ); + // Canonical MultiLineCommentNoLineTerminator is "/* comment */". + it( "parses MultiLineCommentNoLineTerminator", function () { - expect( "a = 'abcd'/**/*/\r\nb = 'efgh'" ).to.failToParse(); - expect( "a = 'abcd'/*\n*/\r\nb = 'efgh'" ).to.failToParse(); + expect( "a = 'abcd'/**/\r\nb = 'efgh'" ).to.parseAs( commented( + twoRuleGrammar, [ { offset: 10, text: "", multiline: true } ], options + ), options ); + expect( "a = 'abcd'/*a*/\r\nb = 'efgh'" ).to.parseAs( commented( + twoRuleGrammar, [ { offset: 10, text: "a", multiline: true } ], options + ), options ); + expect( "a = 'abcd'/*abc*/\r\nb = 'efgh'" ).to.parseAs( commented( + twoRuleGrammar, [ { offset: 10, text: "abc", multiline: true } ], options + ), options ); - } ); + expect( "a = 'abcd'/**/*/\r\nb = 'efgh'" ).to.failToParse(); + expect( "a = 'abcd'/*\n*/\r\nb = 'efgh'" ).to.failToParse(); - // Canonical SingleLineComment is "// comment". - it( "parses SingleLineComment", function () { + } ); + + // Canonical SingleLineComment is "// comment". + it( "parses SingleLineComment", function () { - expect( "start =//\n'abcd'" ).to.parseAs( trivialGrammar ); - expect( "start =//a\n'abcd'" ).to.parseAs( trivialGrammar ); - expect( "start =//abc\n'abcd'" ).to.parseAs( trivialGrammar ); + expect( "start =//\n'abcd'" ).to.parseAs( commented( + trivialGrammar, [ { offset: 7, text: "", multiline: false } ], options + ), options ); + expect( "start =//a\n'abcd'" ).to.parseAs( commented( + trivialGrammar, [ { offset: 7, text: "a", multiline: false } ], options + ), options ); + expect( "start =//abc\n'abcd'" ).to.parseAs( commented( + trivialGrammar, [ { offset: 7, text: "abc", multiline: false } ], options + ), options ); - expect( "start =//\n@\n'abcd'" ).to.failToParse(); + expect( "start =//\n@\n'abcd'" ).to.failToParse(); + + } ); } );