Text nodes: Implement text nodes

Implement a new syntax to extract matched strings from expressions. For
example, instead of:

  identifier = first:[a-zA-Z_] rest:[a-zA-Z0-9_]* { return first + rest.join(""); }

you can now just write:

  identifier = $([a-zA-Z_] [a-zA-Z0-9_]*)

This is useful mostly for "lexical" rules at the bottom of many
grammars.

Note that structured match results are still built for the expressions
prefixed by "$", they are just ignored. I plan to optimize this later
(sometime after the code generator rewrite).
redux
David Majda 12 years ago
parent af20f024c7
commit 5e146fce38

@ -314,6 +314,11 @@ the `options` variable.
Note that curly braces in the predicate code must be balanced.
#### $ *expression*
Try to match the expression. If the match succeeds, return the matched string
instead of the match result.
#### *label* : *expression*
Match the expression and remember its match result under given label. The label

@ -212,6 +212,7 @@ module.exports = function(ast) {
computeExpressionScopedReuseResult(node);
},
text: computeExpressionScopedReuseResultSavePos,
simple_and: computeExpressionScopedReuseResultSavePos,
simple_not: computeExpressionScopedReuseResultSavePos,
semantic_and: computeParams,

@ -604,6 +604,13 @@ module.exports = function(ast, options) {
"sequence.inner": [
'#{r(node.resultIndex)} = [#{map(pluck(node.elements, "resultIndex"), r).join(", ")}];'
],
text: [
'#{r(node.posIndex)} = pos;',
'#block emit(node.expression)',
'if (#{r(node.resultIndex)} !== null) {',
' #{r(node.resultIndex)} = input.substring(pos, #{r(node.posIndex)});',
'}'
],
simple_and: [
'#{r(node.posIndex)} = pos;',
'reportFailures++;',
@ -813,6 +820,7 @@ module.exports = function(ast, options) {
labeled: function(node) { return emit(node.expression); },
text: emitSimple("text"),
simple_and: emitSimple("simple_and"),
simple_not: emitSimple("simple_not"),
semantic_and: emitSimple("semantic_and"),

@ -30,6 +30,7 @@ module.exports = function(ast) {
choice: replaceInSubnodes("alternatives"),
sequence: replaceInSubnodes("elements"),
labeled: replaceInExpression,
text: replaceInExpression,
simple_and: replaceInExpression,
simple_not: replaceInExpression,
semantic_and: nop,

@ -36,6 +36,7 @@ module.exports = function(ast) {
},
labeled: checkExpression,
text: checkExpression,
simple_and: checkExpression,
simple_not: checkExpression,
semantic_and: nop,

@ -18,6 +18,7 @@ module.exports = function(ast) {
action: checkExpression,
sequence: checkSubnodes("elements"),
labeled: checkExpression,
text: checkExpression,
simple_and: checkExpression,
simple_not: checkExpression,
semantic_and: nop,

@ -484,9 +484,9 @@ module.exports = (function(){
r1 = pos;
r2 = pos;
r3 = parse_and();
r3 = parse_dollar();
if (r3 !== null) {
r4 = parse_action();
r4 = parse_suffixed();
if (r4 !== null) {
r0 = [r3, r4];
} else {
@ -499,10 +499,10 @@ module.exports = (function(){
}
if (r0 !== null) {
reportedPos = r1;
r0 = (function(code) {
r0 = (function(expression) {
return {
type: "semantic_and",
code: code
type: "text",
expression: expression
};
})(r4);
}
@ -514,7 +514,7 @@ module.exports = (function(){
r2 = pos;
r3 = parse_and();
if (r3 !== null) {
r4 = parse_suffixed();
r4 = parse_action();
if (r4 !== null) {
r0 = [r3, r4];
} else {
@ -527,10 +527,10 @@ module.exports = (function(){
}
if (r0 !== null) {
reportedPos = r1;
r0 = (function(expression) {
r0 = (function(code) {
return {
type: "simple_and",
expression: expression
type: "semantic_and",
code: code
};
})(r4);
}
@ -540,9 +540,9 @@ module.exports = (function(){
if (r0 === null) {
r1 = pos;
r2 = pos;
r3 = parse_not();
r3 = parse_and();
if (r3 !== null) {
r4 = parse_action();
r4 = parse_suffixed();
if (r4 !== null) {
r0 = [r3, r4];
} else {
@ -555,10 +555,10 @@ module.exports = (function(){
}
if (r0 !== null) {
reportedPos = r1;
r0 = (function(code) {
r0 = (function(expression) {
return {
type: "semantic_not",
code: code
type: "simple_and",
expression: expression
};
})(r4);
}
@ -570,7 +570,7 @@ module.exports = (function(){
r2 = pos;
r3 = parse_not();
if (r3 !== null) {
r4 = parse_suffixed();
r4 = parse_action();
if (r4 !== null) {
r0 = [r3, r4];
} else {
@ -583,10 +583,10 @@ module.exports = (function(){
}
if (r0 !== null) {
reportedPos = r1;
r0 = (function(expression) {
r0 = (function(code) {
return {
type: "simple_not",
expression: expression
type: "semantic_not",
code: code
};
})(r4);
}
@ -594,7 +594,36 @@ module.exports = (function(){
pos = r1;
}
if (r0 === null) {
r0 = parse_suffixed();
r1 = pos;
r2 = pos;
r3 = parse_not();
if (r3 !== null) {
r4 = parse_suffixed();
if (r4 !== null) {
r0 = [r3, r4];
} else {
r0 = null;
pos = r2;
}
} else {
r0 = null;
pos = r2;
}
if (r0 !== null) {
reportedPos = r1;
r0 = (function(expression) {
return {
type: "simple_not",
expression: expression
};
})(r4);
}
if (r0 === null) {
pos = r1;
}
if (r0 === null) {
r0 = parse_suffixed();
}
}
}
}
@ -1150,6 +1179,42 @@ module.exports = (function(){
return r0;
}
function parse_dollar() {
var r0, r1, r2, r3, r4;
r1 = pos;
r2 = pos;
if (input.charCodeAt(pos) === 36) {
r3 = "$";
pos++;
} else {
r3 = null;
if (reportFailures === 0) {
matchFailed("\"$\"");
}
}
if (r3 !== null) {
r4 = parse___();
if (r4 !== null) {
r0 = [r3, r4];
} else {
r0 = null;
pos = r2;
}
} else {
r0 = null;
pos = r2;
}
if (r0 !== null) {
reportedPos = r1;
r0 = (function() { return "$"; })();
}
if (r0 === null) {
pos = r1;
}
return r0;
}
function parse_question() {
var r0, r1, r2, r3, r4;

@ -177,6 +177,27 @@ describe("compiler pass |allocateRegisters|", function() {
});
});
describe("for text", function() {
it("allocates a position register", function() {
expect(pass).toChangeAST('start = $"a"', savePosDetails);
});
it("reuses its own result register for the expression", function() {
expect(pass).toChangeAST('start = $"a"', reuseResultDetails);
});
it("creates a new scope", function() {
expect(pass).toChangeAST('start = $(a:"a") { }', scopedDetails);
});
it("unblocks registers blocked by its children", function() {
expect(pass).toChangeAST(
'start = ($(a:"a") "b") ("c" "d")',
unblockedDetails
);
});
});
describe("for simple and", function() {
it("allocates a position register", function() {
expect(pass).toChangeAST('start = &"a"', savePosDetails);

@ -60,6 +60,10 @@ describe("compiler pass |removeProxyRules|", function() {
expect(pass).toChangeAST(proxyGrammar('start = label:proxy'), simpleDetails);
});
it("removes proxy rule from a text", function() {
expect(pass).toChangeAST(proxyGrammar('start = $proxy'), simpleDetails);
});
it("removes proxy rule from a simple and", function() {
expect(pass).toChangeAST(proxyGrammar('start = &proxy'), simpleDetails);
});

@ -63,6 +63,10 @@ describe("compiler pass |reportLeftRecursion|", function() {
expect(pass).toReportLeftRecursionIn('start = label:start');
});
it("reports left recursion inside a text", function() {
expect(pass).toReportLeftRecursionIn('start = $start');
});
it("reports left recursion inside a simple and", function() {
expect(pass).toReportLeftRecursionIn('start = &start');
});

@ -64,6 +64,10 @@ describe("compiler pass |reportMissingRules|", function() {
expect(pass).toReportMissingRuleIn('start = label:missing');
});
it("reports missing rule referenced from a text", function() {
expect(pass).toReportMissingRuleIn('start = $missing');
});
it("reports missing rule referenced from a simple and", function() {
expect(pass).toReportMissingRuleIn('start = &missing');
});

@ -353,6 +353,14 @@ describe("generated parser", function() {
});
});
describe("text matching", function() {
it("matches correctly", function() {
var parser = PEG.buildParser('start = $("a" "b" "c")', options);
expect(parser).toParse("abc", "abc");
});
});
describe("simple and matching", function() {
it("matches correctly", function() {
var parser = PEG.buildParser('start = &"a" "a"', options);

@ -272,6 +272,10 @@ describe("PEG.js grammar parser", function() {
/* Canonical prefixed is "!\"abcd\"". */
it("parses prefixed", function() {
expect('start = $"abcd"?' ).toParseAs(oneRuleGrammar({
type: "text",
expression: optionalLiteral
}));
expect('start = &{ code }').toParseAs(oneRuleGrammar({
type: "semantic_and",
code: " code "

@ -88,7 +88,13 @@ labeled
/ prefixed
prefixed
= and code:action {
= dollar expression:suffixed {
return {
type: "text",
expression: expression
};
}
/ and code:action {
return {
type: "semantic_and",
code: code
@ -169,6 +175,7 @@ semicolon = ";" __ { return ";"; }
slash = "/" __ { return "/"; }
and = "&" __ { return "&"; }
not = "!" __ { return "!"; }
dollar = "$" __ { return "$"; }
question = "?" __ { return "?"; }
star = "*" __ { return "*"; }
plus = "+" __ { return "+"; }

Loading…
Cancel
Save