From af1968054b334e39cd11a4385a437dce33f988d2 Mon Sep 17 00:00:00 2001 From: David Majda Date: Tue, 8 Jun 2010 14:49:13 +0200 Subject: [PATCH] Implement semantic predicates --- lib/compiler.js | 106 +++++++++++++++++++++------------ lib/metagrammar.js | 124 +++++++++++++++++++++++++++------------ lib/metagrammar.pegjs | 18 +++++- test/compiler-test.js | 56 ++++++++++++++---- test/metagrammar-test.js | 28 ++++++--- 5 files changed, 238 insertions(+), 94 deletions(-) diff --git a/lib/compiler.js b/lib/compiler.js index 19c9120..575ae4a 100644 --- a/lib/compiler.js +++ b/lib/compiler.js @@ -282,16 +282,18 @@ PEG.Compiler = { } }, - rule: checkExpression, - choice: checkSubnodes("alternatives"), - sequence: checkSubnodes("elements"), - labeled: checkExpression, - and_predicate: checkExpression, - not_predicate: checkExpression, - optional: checkExpression, - zero_or_more: checkExpression, - one_or_more: checkExpression, - action: checkExpression, + rule: checkExpression, + choice: checkSubnodes("alternatives"), + sequence: checkSubnodes("elements"), + labeled: checkExpression, + simple_and: checkExpression, + simple_not: checkExpression, + semantic_and: nop, + semantic_not: nop, + optional: checkExpression, + zero_or_more: checkExpression, + one_or_more: checkExpression, + action: checkExpression, rule_ref: function(node) { @@ -302,9 +304,9 @@ PEG.Compiler = { } }, - literal: nop, - any: nop, - "class": nop + literal: nop, + any: nop, + "class": nop }; function check(node) { checkFunctions[node.type](node); } @@ -347,13 +349,15 @@ PEG.Compiler = { } }, - labeled: checkExpression, - and_predicate: checkExpression, - not_predicate: checkExpression, - optional: checkExpression, - zero_or_more: checkExpression, - one_or_more: checkExpression, - action: checkExpression, + labeled: checkExpression, + simple_and: checkExpression, + simple_not: checkExpression, + semantic_and: nop, + semantic_not: nop, + optional: checkExpression, + zero_or_more: checkExpression, + one_or_more: checkExpression, + action: checkExpression, rule_ref: function(node, appliedRules) { @@ -365,9 +369,9 @@ PEG.Compiler = { check(ast.rules[node.name], appliedRules); }, - literal: nop, - any: nop, - "class": nop + literal: nop, + any: nop, + "class": nop }; function check(node, appliedRules) { @@ -416,16 +420,18 @@ PEG.Compiler = { } }, - rule: replaceInExpression, - choice: replaceInSubnodes("alternatives"), - sequence: replaceInSubnodes("elements"), - labeled: replaceInExpression, - and_predicate: replaceInExpression, - not_predicate: replaceInExpression, - optional: replaceInExpression, - zero_or_more: replaceInExpression, - one_or_more: replaceInExpression, - action: replaceInExpression, + rule: replaceInExpression, + choice: replaceInSubnodes("alternatives"), + sequence: replaceInSubnodes("elements"), + labeled: replaceInExpression, + simple_and: replaceInExpression, + simple_not: replaceInExpression, + semantic_and: nop, + semantic_not: nop, + optional: replaceInExpression, + zero_or_more: replaceInExpression, + one_or_more: replaceInExpression, + action: replaceInExpression, rule_ref: function(node, from, to) { @@ -434,9 +440,9 @@ PEG.Compiler = { } }, - literal: nop, - any: nop, - "class": nop + literal: nop, + any: nop, + "class": nop }; function replace(node, from, to) { @@ -819,7 +825,7 @@ PEG.Compiler = { return PEG.Compiler.compileNode(node.expression, resultVar); }, - and_predicate: function(node, resultVar) { + simple_and: function(node, resultVar) { var savedPosVar = PEG.Compiler.generateUniqueIdentifier("savedPos"); var savedReportMatchFailuresVar = PEG.Compiler.generateUniqueIdentifier("savedReportMatchFailuresVar"); var expressionResultVar = PEG.Compiler.generateUniqueIdentifier("result"); @@ -846,7 +852,7 @@ PEG.Compiler = { ); }, - not_predicate: function(node, resultVar) { + simple_not: function(node, resultVar) { var savedPosVar = PEG.Compiler.generateUniqueIdentifier("savedPos"); var savedReportMatchFailuresVar = PEG.Compiler.generateUniqueIdentifier("savedReportMatchFailuresVar"); var expressionResultVar = PEG.Compiler.generateUniqueIdentifier("result"); @@ -873,6 +879,30 @@ PEG.Compiler = { ); }, + semantic_and: function(node, resultVar) { + var savedPosVar = PEG.Compiler.generateUniqueIdentifier("savedPos"); + + return PEG.Compiler.formatCode( + "var ${resultVar} = (function() {${actionCode}})() ? '' : null;", + { + actionCode: node.code, + resultVar: resultVar + } + ); + }, + + semantic_not: function(node, resultVar) { + var savedPosVar = PEG.Compiler.generateUniqueIdentifier("savedPos"); + + return PEG.Compiler.formatCode( + "var ${resultVar} = (function() {${actionCode}})() ? null : '';", + { + actionCode: node.code, + resultVar: resultVar + } + ); + }, + optional: function(node, resultVar) { var expressionResultVar = PEG.Compiler.generateUniqueIdentifier("result"); diff --git a/lib/metagrammar.js b/lib/metagrammar.js index d23fbbb..1e3c538 100644 --- a/lib/metagrammar.js +++ b/lib/metagrammar.js @@ -469,61 +469,113 @@ PEG.grammarParser = (function(){ } - var savedPos1 = pos; - var result8 = parse_and(context); - if (result8 !== null) { - var result9 = parse_suffixed(context); - if (result9 !== null) { - var result7 = [result8, result9]; + var savedPos3 = pos; + var result16 = parse_and(context); + if (result16 !== null) { + var result17 = parse_action(context); + if (result17 !== null) { + var result15 = [result16, result17]; } else { - var result7 = null; - pos = savedPos1; + var result15 = null; + pos = savedPos3; } } else { - var result7 = null; - pos = savedPos1; + var result15 = null; + pos = savedPos3; } - var result6 = result7 !== null - ? (function(expression) { + var result14 = result15 !== null + ? (function(code) { return { - type: "and_predicate", - expression: expression + type: "semantic_and", + code: code }; - })(result7[1]) + })(result15[1]) : null; - if (result6 !== null) { - var result0 = result6; + if (result14 !== null) { + var result0 = result14; } else { - var savedPos0 = pos; - var result4 = parse_not(context); - if (result4 !== null) { - var result5 = parse_suffixed(context); - if (result5 !== null) { - var result3 = [result4, result5]; + var savedPos2 = pos; + var result12 = parse_and(context); + if (result12 !== null) { + var result13 = parse_suffixed(context); + if (result13 !== null) { + var result11 = [result12, result13]; } else { - var result3 = null; - pos = savedPos0; + var result11 = null; + pos = savedPos2; } } else { - var result3 = null; - pos = savedPos0; + var result11 = null; + pos = savedPos2; } - var result2 = result3 !== null + var result10 = result11 !== null ? (function(expression) { return { - type: "not_predicate", + type: "simple_and", expression: expression }; - })(result3[1]) + })(result11[1]) : null; - if (result2 !== null) { - var result0 = result2; + if (result10 !== null) { + var result0 = result10; } else { - var result1 = parse_suffixed(context); - if (result1 !== null) { - var result0 = result1; + var savedPos1 = pos; + var result8 = parse_not(context); + if (result8 !== null) { + var result9 = parse_action(context); + if (result9 !== null) { + var result7 = [result8, result9]; + } else { + var result7 = null; + pos = savedPos1; + } + } else { + var result7 = null; + pos = savedPos1; + } + var result6 = result7 !== null + ? (function(code) { + return { + type: "semantic_not", + code: code + }; + })(result7[1]) + : null; + if (result6 !== null) { + var result0 = result6; } else { - var result0 = null;; + var savedPos0 = pos; + var result4 = parse_not(context); + if (result4 !== null) { + var result5 = parse_suffixed(context); + if (result5 !== null) { + var result3 = [result4, result5]; + } else { + var result3 = null; + pos = savedPos0; + } + } else { + var result3 = null; + pos = savedPos0; + } + var result2 = result3 !== null + ? (function(expression) { + return { + type: "simple_not", + expression: expression + }; + })(result3[1]) + : null; + if (result2 !== null) { + var result0 = result2; + } else { + var result1 = parse_suffixed(context); + if (result1 !== null) { + var result0 = result1; + } else { + var result0 = null;; + }; + }; }; }; } diff --git a/lib/metagrammar.pegjs b/lib/metagrammar.pegjs index fe85187..2f6cbf8 100644 --- a/lib/metagrammar.pegjs +++ b/lib/metagrammar.pegjs @@ -82,15 +82,27 @@ labeled / prefixed prefixed - = and expression:suffixed { + = and code:action { return { - type: "and_predicate", + type: "semantic_and", + code: code + }; + } + / and expression:suffixed { + return { + type: "simple_and", expression: expression }; } + / not code:action { + return { + type: "semantic_not", + code: code + }; + } / not expression:suffixed { return { - type: "not_predicate", + type: "simple_not", expression: expression }; } diff --git a/test/compiler-test.js b/test/compiler-test.js index bed60d1..1e9cae4 100644 --- a/test/compiler-test.js +++ b/test/compiler-test.js @@ -303,7 +303,7 @@ test("labels", function() { doesNotParse(parser, "b"); }); -test("and predicate", function() { +test("simple and", function() { var parser = PEG.buildParser('start = "a" &"b" "b"'); parses(parser, "ab", ["a", "", "b"]); doesNotParse(parser, "ac"); @@ -314,7 +314,7 @@ test("and predicate", function() { */ }); -test("not predicate", function() { +test("simple not", function() { var parser = PEG.buildParser('start = "a" !"b"'); parses(parser, "a", ["a", ""]); doesNotParse(parser, "ab"); @@ -327,6 +327,22 @@ test("not predicate", function() { parses(posTestParser, "ac", ["a", "", "c"]); }); +test("semantic and", function() { + var acceptingParser = PEG.buildParser('start = "a" &{ return true; } "b"'); + parses(acceptingParser, "ab", ["a", "", "b"]); + + var rejectingParser = PEG.buildParser('start = "a" &{ return false; } "b"'); + doesNotParse(rejectingParser, "ab"); +}); + +test("semantic not", function() { + var acceptingParser = PEG.buildParser('start = "a" !{ return false; } "b"'); + parses(acceptingParser, "ab", ["a", "", "b"]); + + var rejectingParser = PEG.buildParser('start = "a" !{ return true; } "b"'); + doesNotParse(rejectingParser, "ab"); +}); + test("optional expressions", function() { var parser = PEG.buildParser('start = "a"?'); parses(parser, "", ""); @@ -386,15 +402,35 @@ test("actions", function() { }); test("initializer", function() { - var variableDefinitionParser = PEG.buildParser( + var variableInActionParser = PEG.buildParser( '{ a = 42; }; start = "a" { return a; }' ); - parses(variableDefinitionParser, "a", 42); + parses(variableInActionParser, "a", 42); - var functionDefinitionparser = PEG.buildParser( + var functionInActionParser = PEG.buildParser( '{ function f() { return 42; } }; start = "a" { return f(); }' ); - parses(variableDefinitionParser, "a", 42); + parses(functionInActionParser, "a", 42); + + var variableInSemanticAndParser = PEG.buildParser( + '{ a = 42; }; start = "a" &{ return a === 42; }' + ); + parses(variableInSemanticAndParser, "a", ["a", ""]); + + var functionInSemanticAndParser = PEG.buildParser( + '{ function f() { return 42; } }; start = "a" &{ return f() === 42; }' + ); + parses(functionInSemanticAndParser, "a", ["a", ""]); + + var variableInSemanticNotParser = PEG.buildParser( + '{ a = 42; }; start = "a" !{ return a !== 42; }' + ); + parses(variableInSemanticNotParser, "a", ["a", ""]); + + var functionInSemanticNotParser = PEG.buildParser( + '{ function f() { return 42; } }; start = "a" !{ return f() !== 42; }' + ); + parses(functionInSemanticNotParser, "a", ["a", ""]); }); test("rule references", function() { @@ -568,16 +604,16 @@ test("error messages", function() { 'Expected "c" but "d" found.' ); - var notPredicateParser = PEG.buildParser('start = !"a" "b"'); + var simpleNotParser = PEG.buildParser('start = !"a" "b"'); doesNotParseWithMessage( - notPredicateParser, + simpleNotParser, "c", 'Expected "b" but "c" found.' ); - var andPredicateParser = PEG.buildParser('start = &"a" [a-b]'); + var simpleAndParser = PEG.buildParser('start = &"a" [a-b]'); doesNotParseWithMessage( - andPredicateParser, + simpleAndParser, "c", 'Expected end of input but "c" found.' ); diff --git a/test/metagrammar-test.js b/test/metagrammar-test.js index 13a9bb1..d37d776 100644 --- a/test/metagrammar-test.js +++ b/test/metagrammar-test.js @@ -67,8 +67,20 @@ function nodeWithExpressionConstructor(type) { } } -var andPredicate = nodeWithExpressionConstructor("and_predicate"); -var notPredicate = nodeWithExpressionConstructor("not_predicate"); +function nodeWithCodeConstructor(type) { + return function(code) { + return { + type: type, + code: code + }; + } +} + +var simpleAnd = nodeWithExpressionConstructor("simple_and"); +var simpleNot = nodeWithExpressionConstructor("simple_not"); + +var semanticAnd = nodeWithCodeConstructor("semantic_and"); +var semanticNot = nodeWithCodeConstructor("semantic_not"); var optional = nodeWithExpressionConstructor("optional"); var zeroOrMore = nodeWithExpressionConstructor("zero_or_more"); @@ -270,19 +282,21 @@ test("parses sequence", function() { test("parses labeled", function() { grammarParserParses( 'start = label:!"abcd"', - oneRuleGrammar(labeled("label", notPredicate(literalAbcd))) + oneRuleGrammar(labeled("label", simpleNot(literalAbcd))) ); grammarParserParses( 'start = !"abcd"', - oneRuleGrammar(notPredicate(literalAbcd)) + oneRuleGrammar(simpleNot(literalAbcd)) ); }); /* Canonical prefixed is "!\"abcd\"". */ test("parses prefixed", function() { - grammarParserParses('start = &"abcd"?', oneRuleGrammar(andPredicate(optionalLiteral))); - grammarParserParses('start = !"abcd"?', oneRuleGrammar(notPredicate(optionalLiteral))); - grammarParserParses('start = "abcd"?', oneRuleGrammar(optionalLiteral)); + grammarParserParses('start = &{ code }', oneRuleGrammar(semanticAnd(" code "))); + grammarParserParses('start = &"abcd"?', oneRuleGrammar(simpleAnd(optionalLiteral))); + grammarParserParses('start = !{ code }', oneRuleGrammar(semanticNot(" code "))); + grammarParserParses('start = !"abcd"?', oneRuleGrammar(simpleNot(optionalLiteral))); + grammarParserParses('start = "abcd"?', oneRuleGrammar(optionalLiteral)); }); /* Canonical suffixed is "\"abcd\"?". */