From 76ed63c86ece4559b074f6858ae2d14ce2ebb8de Mon Sep 17 00:00:00 2001 From: David Majda Date: Fri, 21 May 2010 19:00:00 +0200 Subject: [PATCH] AST refactoring 6/6: Get rid of the |Grammar| namespace --- lib/compiler.js | 48 +- test/compiler-test.js | 6 +- test/metagrammar-test.js | 946 +++++++++++++++++++-------------------- 3 files changed, 496 insertions(+), 504 deletions(-) diff --git a/lib/compiler.js b/lib/compiler.js index 13dd3e9..006abbb 100644 --- a/lib/compiler.js +++ b/lib/compiler.js @@ -14,8 +14,8 @@ * "start" is used. * * Throws |PEG.grammarParser.SyntaxError| if the grammar contains a syntax error - * or |PEG.Grammar.GrammarError| if it contains a semantic error. Note that not - * all errors are detected during the generation and some may protrude to the + * or |PEG.GrammarError| if it contains a semantic error. Note that not all + * errors are detected during the generation and some may protrude to the * generated parser and cause its malfunction. */ PEG.buildParser = function(grammar, startRule) { @@ -27,6 +27,17 @@ PEG.buildParser = function(grammar, startRule) { ); }; +/* ===== PEG.GrammarError ===== */ + +/* Thrown when the grammar contains an error. */ + +PEG.GrammarError = function(message) { + this.name = "PEG.GrammarError"; + this.message = message; +}; + +PEG.GrammarError.prototype = Error.prototype; + /* ===== PEG.ArrayUtils ===== */ /* Array manipulation utility functions. */ @@ -136,23 +147,6 @@ PEG.RegExpUtils = { } }; -/* ===== PEG.Grammar ===== */ - -/* Namespace with grammar AST nodes. */ - -PEG.Grammar = {}; - -/* ===== PEG.Grammar.GrammarError ===== */ - -/* Thrown when the grammar contains an error. */ - -PEG.Grammar.GrammarError = function(message) { - this.name = "PEG.Grammar.GrammarError"; - this.message = message; -}; - -PEG.Grammar.GrammarError.prototype = Error.prototype; - /* ===== PEG.Compiler ===== */ PEG.Compiler = { @@ -261,8 +255,8 @@ PEG.Compiler = { * Checks made on the grammar AST before compilation. Each check is a function * that is passed the AST and start rule and does not return anything. If the * check passes, the function does not do anything special, otherwise it - * throws |PEG.Grammar.GrammarError|. The checks are run in sequence in order - * of their definition. + * throws |PEG.GrammarError|. The checks are run in sequence in order of their + * definition. */ _checks: [ /* Checks that all referenced rules exist. */ @@ -291,7 +285,7 @@ PEG.Compiler = { rule_ref: function(node) { if (ast[node.name] === undefined) { - throw new PEG.Grammar.GrammarError( + throw new PEG.GrammarError( "Referenced rule \"" + node.name + "\" does not exist." ); } @@ -312,7 +306,7 @@ PEG.Compiler = { /* Checks that the start rule is defined. */ function(ast, startRule) { if (ast[startRule] === undefined) { - throw new PEG.Grammar.GrammarError( + throw new PEG.GrammarError( "Missing \"" + startRule + "\" rule." ); } @@ -356,7 +350,7 @@ PEG.Compiler = { rule_ref: function(node, appliedRules) { if (PEG.ArrayUtils.contains(appliedRules, node.name)) { - throw new PEG.Grammar.GrammarError( + throw new PEG.GrammarError( "Left recursion detected for rule \"" + node.name + "\"." ); } @@ -771,9 +765,9 @@ PEG.Compiler = { /* * Generates a parser from a specified grammar AST and start rule. Throws - * |PEG.Grammar.GrammarError| if the AST contains a semantic error. Note that - * not all errors are detected during the generation and some may protrude to - * the generated parser and cause its malfunction. + * |PEG.GrammarError| if the AST contains a semantic error. Note that not all + * errors are detected during the generation and some may protrude to the + * generated parser and cause its malfunction. */ compileParser: function(ast, startRule) { /* diff --git a/test/compiler-test.js b/test/compiler-test.js index 45447f7..fb212ac 100644 --- a/test/compiler-test.js +++ b/test/compiler-test.js @@ -194,7 +194,7 @@ test("buildParser reports syntax errors in the grammar", function() { test("buildParser reports missing start rule", function() { throws( function() { PEG.buildParser('notStart: "abcd"'); }, - PEG.Grammar.GrammarError, + PEG.GrammarError, { message: "Missing \"start\" rule." } ); }); @@ -217,7 +217,7 @@ test("buildParser reports missing referenced rules", function() { PEG.ArrayUtils.each(grammars, function(grammar) { throws( function() { PEG.buildParser(grammar); }, - PEG.Grammar.GrammarError, + PEG.GrammarError, { message: "Referenced rule \"missing\" does not exist." } ); }); @@ -244,7 +244,7 @@ test("buildParser reports left recursion", function() { PEG.ArrayUtils.each(grammars, function(grammar) { throws( function() { PEG.buildParser(grammar); }, - PEG.Grammar.GrammarError, + PEG.GrammarError, { message: "Left recursion detected for rule \"start\"." } ); }); diff --git a/test/metagrammar-test.js b/test/metagrammar-test.js index 8e1ef34..c0318c7 100644 --- a/test/metagrammar-test.js +++ b/test/metagrammar-test.js @@ -16,505 +16,503 @@ global.grammarParserDoesNotParse = function(input) { module("Grammar Parser"); -with (PEG.Grammar) { - function rule(name, displayName, expression) { - return { - type: "rule", - name: name, - displayName: displayName, - expression: expression - }; - } +function rule(name, displayName, expression) { + return { + type: "rule", + name: name, + displayName: displayName, + expression: expression + }; +} - function choice(alternatives) { - return { - type: "choice", - alternatives: alternatives - }; - } +function choice(alternatives) { + return { + type: "choice", + alternatives: alternatives + }; +} + +function sequence(elements) { + return { + type: "sequence", + elements: elements + }; +} - function sequence(elements) { +function nodeWithExpressionConstructor(type) { + return function(expression) { return { - type: "sequence", - elements: elements + type: type, + expression: expression }; } +} - function nodeWithExpressionConstructor(type) { - return function(expression) { - return { - type: type, - expression: expression - }; - } - } - - var andPredicate = nodeWithExpressionConstructor("and_predicate"); - var notPredicate = nodeWithExpressionConstructor("not_predicate"); +var andPredicate = nodeWithExpressionConstructor("and_predicate"); +var notPredicate = nodeWithExpressionConstructor("not_predicate"); - var optional = nodeWithExpressionConstructor("optional"); - var zeroOrMore = nodeWithExpressionConstructor("zero_or_more"); - var oneOrMore = nodeWithExpressionConstructor("one_or_more"); +var optional = nodeWithExpressionConstructor("optional"); +var zeroOrMore = nodeWithExpressionConstructor("zero_or_more"); +var oneOrMore = nodeWithExpressionConstructor("one_or_more"); - function action(expression, action) { - return { - type: "action", - expression: expression, - action: action - }; +function action(expression, action) { + return { + type: "action", + expression: expression, + action: action }; +}; - function ruleRef(name) { - return { - type: "rule_ref", - name: name - }; - } - - function literal(value) { - return { - type: "literal", - value: value - }; - } +function ruleRef(name) { + return { + type: "rule_ref", + name: name + }; +} - function any() { - return { type: "any" }; - } +function literal(value) { + return { + type: "literal", + value: value + }; +} - function klass(characters) { - return { - type: "class", - characters: characters - }; - } +function any() { + return { type: "any" }; +} - var literalAbcd = literal("abcd"); - var literalEfgh = literal("efgh"); - var literalIjkl = literal("ijkl"); +function klass(characters) { + return { + type: "class", + characters: characters + }; +} - var optionalLiteral = optional(literalAbcd); +var literalAbcd = literal("abcd"); +var literalEfgh = literal("efgh"); +var literalIjkl = literal("ijkl"); - var notAbcd = notPredicate(literalAbcd); - var notEfgh = notPredicate(literalEfgh); - var notIjkl = notPredicate(literalIjkl); +var optionalLiteral = optional(literalAbcd); - var sequenceEmpty = sequence([]); - var sequenceNots = sequence([notAbcd, notEfgh, notIjkl]); - var sequenceLiterals = sequence([literalAbcd, literalEfgh, literalIjkl]); +var notAbcd = notPredicate(literalAbcd); +var notEfgh = notPredicate(literalEfgh); +var notIjkl = notPredicate(literalIjkl); - var choiceLiterals = choice([literalAbcd, literalEfgh, literalIjkl]); +var sequenceEmpty = sequence([]); +var sequenceNots = sequence([notAbcd, notEfgh, notIjkl]); +var sequenceLiterals = sequence([literalAbcd, literalEfgh, literalIjkl]); - function oneRuleGrammar(expression) { - return { start: rule("start", null, expression) }; - } +var choiceLiterals = choice([literalAbcd, literalEfgh, literalIjkl]); - var simpleGrammar = oneRuleGrammar(literal("abcd")); +function oneRuleGrammar(expression) { + return { start: rule("start", null, expression) }; +} - function identifierGrammar(identifier) { - return oneRuleGrammar(ruleRef(identifier)); - } +var simpleGrammar = oneRuleGrammar(literal("abcd")); - var literal_ = literal - function literalGrammar(literal) { - return oneRuleGrammar(literal_(literal)); - } +function identifierGrammar(identifier) { + return oneRuleGrammar(ruleRef(identifier)); +} - function classGrammar(chars) { - return oneRuleGrammar(klass(chars)); - } +var literal_ = literal +function literalGrammar(literal) { + return oneRuleGrammar(literal_(literal)); +} - var anyGrammar = oneRuleGrammar(any()); +function classGrammar(chars) { + return oneRuleGrammar(klass(chars)); +} - var action_ = action; - function actionGrammar(action) { - return oneRuleGrammar(action_(literal("a"), action)); - } +var anyGrammar = oneRuleGrammar(any()); - /* Canonical grammar is "a: \"abcd\";\nb: \"efgh\";\nc: \"ijkl\";". */ - test("parses grammar", function() { - grammarParserParses('a: "abcd"', { a: rule("a", null, literalAbcd) }); - grammarParserParses( - 'a: "abcd"\nb: "efgh"\nc: "ijkl"', - { - a: rule("a", null, literalAbcd), - b: rule("b", null, literalEfgh), - c: rule("c", null, literalIjkl) - } - ); - }); - - /* Canonical rule is "a: \"abcd\"". */ - test("parses rule", function() { - grammarParserParses( - 'start: "abcd" / "efgh" / "ijkl"', - oneRuleGrammar(choiceLiterals) - ); - grammarParserParses( - 'start "start rule": "abcd" / "efgh" / "ijkl"', - { - start: rule("start", "start rule", choiceLiterals) - } - ); - }); - - /* Canonical expression is "\"abcd\" / \"efgh\" / \"ijkl\"". */ - test("parses expression", function() { - grammarParserParses( - 'start: "abcd" / "efgh" / "ijkl"', - oneRuleGrammar(choiceLiterals) - ); - }); - - /* Canonical choice is "\"abcd\" / \"efgh\" / \"ijkl\"". */ - test("parses choice", function() { - grammarParserParses( - 'start: "abcd" "efgh" "ijkl"', - oneRuleGrammar(sequenceLiterals) - ); - grammarParserParses( - 'start: "abcd" "efgh" "ijkl" / "abcd" "efgh" "ijkl" / "abcd" "efgh" "ijkl"', - oneRuleGrammar(choice([ - sequenceLiterals, - sequenceLiterals, - sequenceLiterals - ])) - ); - }); - - /* Canonical sequence is "\"abcd\" \"efgh\" \"ijkl\"". */ - test("parses sequence", function() { - grammarParserParses( - 'start: { code }', - oneRuleGrammar(action(sequenceEmpty, " code ")) - ); - grammarParserParses( - 'start: !"abcd" { code }', - oneRuleGrammar(action(notAbcd, " code ")) - ); - grammarParserParses( - 'start: !"abcd" !"efgh" !"ijkl" { code }', - oneRuleGrammar(action(sequenceNots, " code ")) - ); - - grammarParserParses('start: ', oneRuleGrammar(sequenceEmpty)); - grammarParserParses('start: !"abcd"', oneRuleGrammar(notAbcd)); - grammarParserParses( - 'start: !"abcd" !"efgh" !"ijkl"', - oneRuleGrammar(sequenceNots) - ); - }); - - /* Canonical prefixed is "!\"abcd\"". */ - test("parses prefixed", function() { - grammarParserParses('start: &"abcd"?', oneRuleGrammar(andPredicate(optionalLiteral))); - grammarParserParses('start: !"abcd"?', oneRuleGrammar(notPredicate(optionalLiteral))); - grammarParserParses('start: "abcd"?', oneRuleGrammar(optionalLiteral)); - }); - - /* Canonical suffixed is "\"abcd\"?". */ - test("parses suffixed", function() { - grammarParserParses('start: "abcd"?', oneRuleGrammar(optionalLiteral)); - grammarParserParses('start: "abcd"*', oneRuleGrammar(zeroOrMore(literalAbcd))); - grammarParserParses('start: "abcd"+', oneRuleGrammar(oneOrMore(literalAbcd))); - grammarParserParses('start: "abcd"', literalGrammar("abcd")); - }); - - /* Canonical primary is "\"abcd\"". */ - test("parses primary", function() { - grammarParserParses('start: a', identifierGrammar("a")); - grammarParserParses('start: "abcd"', literalGrammar("abcd")); - grammarParserParses('start: .', anyGrammar); - grammarParserParses('start: [a-d]', classGrammar("a-d")); - grammarParserParses('start: ("abcd")', literalGrammar("abcd")); - }); - - /* Canonical action is "{ code }". */ - test("parses action", function() { - grammarParserParses('start: "a" { code }', actionGrammar(" code ")); - }); - - /* Canonical braced is "{ code }". */ - test("parses braced", function() { - grammarParserParses('start: "a" {}', actionGrammar("")); - grammarParserParses('start: "a" {a}', actionGrammar("a")); - grammarParserParses('start: "a" {{a}}', actionGrammar("{a}")); - grammarParserParses('start: "a" {aaa}', actionGrammar("aaa")); - }); - - /* Trivial character rules are not tested. */ - - /* Canonical identifier is "a". */ - test("parses identifier", function() { - grammarParserParses('start: a', identifierGrammar("a")); - grammarParserParses('start: z', identifierGrammar("z")); - grammarParserParses('start: A', identifierGrammar("A")); - grammarParserParses('start: Z', identifierGrammar("Z")); - grammarParserParses('start: _', identifierGrammar("_")); - grammarParserParses('start: $', identifierGrammar("$")); - grammarParserParses('start: aa', identifierGrammar("aa")); - grammarParserParses('start: az', identifierGrammar("az")); - grammarParserParses('start: aA', identifierGrammar("aA")); - grammarParserParses('start: aZ', identifierGrammar("aZ")); - grammarParserParses('start: a0', identifierGrammar("a0")); - grammarParserParses('start: a9', identifierGrammar("a9")); - grammarParserParses('start: a_', identifierGrammar("a_")); - grammarParserParses('start: a$', identifierGrammar("a$")); - grammarParserParses('start: abcd', identifierGrammar("abcd")); - - grammarParserParses('start: a\n', identifierGrammar("a")); - }); - - /* Canonical literal is "\"abcd\"". */ - test("parses literal", function() { - grammarParserParses('start: "abcd"', literalGrammar("abcd")); - grammarParserParses("start: 'abcd'", literalGrammar("abcd")); - }); - - /* Canonical doubleQuotedLiteral is "\"abcd\"". */ - test("parses doubleQuotedLiteral", function() { - grammarParserParses('start: ""', literalGrammar("")); - grammarParserParses('start: "a"', literalGrammar("a")); - grammarParserParses('start: "abc"', literalGrammar("abc")); - - grammarParserParses('start: "abcd"\n', literalGrammar("abcd")); - }); - - /* Canonical doubleQuotedCharacter is "a". */ - test("parses doubleQuotedCharacter", function() { - grammarParserParses('start: "a"', literalGrammar("a")); - grammarParserParses('start: "\\n"', literalGrammar("\n")); - grammarParserParses('start: "\\0"', literalGrammar("\0")); - grammarParserParses('start: "\\x00"', literalGrammar("\x00")); - grammarParserParses('start: "\\u0120"', literalGrammar("\u0120")); - grammarParserParses('start: "\\\n"', literalGrammar("\n")); - }); - - /* Canonical simpleDoubleQuotedCharacter is "a". */ - test("parses simpleDoubleQuotedCharacter", function() { - grammarParserParses('start: "a"', literalGrammar("a")); - grammarParserParses('start: "\'"', literalGrammar("'")); - grammarParserDoesNotParse('start: """'); - grammarParserDoesNotParse('start: "\\"'); - grammarParserDoesNotParse('start: "\n"'); - grammarParserDoesNotParse('start: "\r"'); - grammarParserDoesNotParse('start: "\u2028"'); - grammarParserDoesNotParse('start: "\u2029"'); - }); - - /* Canonical singleQuotedLiteral is "'abcd'". */ - test("parses singleQuotedLiteral", function() { - grammarParserParses("start: ''", literalGrammar("")); - grammarParserParses("start: 'a'", literalGrammar("a")); - grammarParserParses("start: 'abc'", literalGrammar("abc")); - - grammarParserParses("start: 'abcd'\n", literalGrammar("abcd")); - }); - - /* Canonical singleQuotedCharacter is "a". */ - test("parses singleQuotedCharacter", function() { - grammarParserParses("start: 'a'", literalGrammar("a")); - grammarParserParses("start: '\\n'", literalGrammar("\n")); - grammarParserParses("start: '\\0'", literalGrammar("\0")); - grammarParserParses("start: '\\x00'", literalGrammar("\x00")); - grammarParserParses("start: '\\u0120'", literalGrammar("\u0120")); - grammarParserParses("start: '\\\n'", literalGrammar("\n")); - }); - - /* Canonical simpleSingleQuotedCharacter is "a". */ - test("parses simpleSingleQuotedCharacter", function() { - grammarParserParses("start: 'a'", literalGrammar("a")); - grammarParserParses("start: '\"'", literalGrammar("\"")); - grammarParserDoesNotParse("start: '''"); - grammarParserDoesNotParse("start: '\\'"); - grammarParserDoesNotParse("start: '\n'"); - grammarParserDoesNotParse("start: '\r'"); - grammarParserDoesNotParse("start: '\u2028'"); - grammarParserDoesNotParse("start: '\u2029'"); - }); - - /* Canonical class is "[a-d]". */ - test("parses class", function() { - grammarParserParses("start: []", classGrammar("")); - grammarParserParses("start: [a-d]", classGrammar("a-d")); - grammarParserParses("start: [^a-d]", classGrammar("^a-d")); - grammarParserParses("start: [a]", classGrammar("a")); - grammarParserParses("start: [a-de-hi-l]", classGrammar("a-de-hi-l")); - - grammarParserParses("start: [a-d]\n", classGrammar("a-d")); - }); - - /* Canonical classCharacterRange is "a-d". */ - test("parses classCharacterRange", function() { - grammarParserParses("start: [a-d]", classGrammar("a-d")); - grammarParserParses("start: [a-a]", classGrammar("a-a")); - grammarParserDoesNotParse("start: [b-a]"); - }); - - /* Canonical classCharacter is "a". */ - test("parses classCharacter", function() { - grammarParserParses("start: [a]", classGrammar("a")); - }); - - /* Canonical bracketDelimitedCharacter is "a". */ - test("parses bracketDelimitedCharacter", function() { - grammarParserParses("start: [a]", classGrammar("a")); - grammarParserParses("start: [\\n]", classGrammar("\\n")); - grammarParserParses("start: [\\0]", classGrammar("\\0")); - grammarParserParses("start: [\\x00]", classGrammar("\\0")); - grammarParserParses("start: [\\u0120]", classGrammar("\u0120")); - grammarParserParses("start: [\\\n]", classGrammar("\\n")); - }); - - /* Canonical simpleBracketDelimiedCharacter is "a". */ - test("parses simpleBracketDelimitedCharacter", function() { - grammarParserParses("start: [a]", classGrammar("a")); - grammarParserParses("start: [[]", classGrammar("[")); - grammarParserDoesNotParse("start: []]"); - grammarParserDoesNotParse("start: [\\]"); - grammarParserDoesNotParse("start: [\n]"); - grammarParserDoesNotParse("start: [\r]"); - grammarParserDoesNotParse("start: [\u2028]"); - grammarParserDoesNotParse("start: [\u2029]"); - }); - - /* Canonical simpleEscapeSequence is "\\n". */ - test("parses simpleEscapeSequence", function() { - grammarParserParses('start: "\\\'"', literalGrammar("'")); - grammarParserParses('start: "\\""', literalGrammar("\"")); - grammarParserParses('start: "\\\\"', literalGrammar("\\")); - grammarParserParses('start: "\\b"', literalGrammar("\b")); - grammarParserParses('start: "\\f"', literalGrammar("\f")); - grammarParserParses('start: "\\n"', literalGrammar("\n")); - grammarParserParses('start: "\\r"', literalGrammar("\r")); - grammarParserParses('start: "\\t"', literalGrammar("\t")); - /* IE does not recognize "\v". */ - grammarParserParses('start: "\\v"', literalGrammar("\x0B")); - - grammarParserParses('start: "\\a"', literalGrammar("a")); - }); - - /* Canonical zeroEscapeSequence is "\\0". */ - test("parses zeroEscapeSequence", function() { - grammarParserParses('start: "\\0"', literalGrammar("\0")); - grammarParserDoesNotParse('start: "\\00"'); - grammarParserDoesNotParse('start: "\\09"'); - }); - - /* Canonical hexEscapeSequence is "\\x00". */ - test("parses hexEscapeSequence", function() { - grammarParserParses('start: "\\x00"', literalGrammar("\x00")); - grammarParserParses('start: "\\x09"', literalGrammar("\x09")); - grammarParserParses('start: "\\x0a"', literalGrammar("\x0a")); - grammarParserParses('start: "\\x0f"', literalGrammar("\x0f")); - grammarParserParses('start: "\\x0A"', literalGrammar("\x0A")); - grammarParserParses('start: "\\x0F"', literalGrammar("\x0F")); - grammarParserDoesNotParse('start: "\\x0"'); - grammarParserParses('start: "\\x000"', literalGrammar("\x000")); - }); - - /* Canonical unicodeEscapeSequence is "\\u0120". */ - test("parses unicodeEscapeSequence", function() { - grammarParserParses('start: "\\u0120"', literalGrammar("\u0120")); - grammarParserParses('start: "\\u0129"', literalGrammar("\u0129")); - grammarParserParses('start: "\\u012a"', literalGrammar("\u012a")); - grammarParserParses('start: "\\u012f"', literalGrammar("\u012f")); - grammarParserParses('start: "\\u012A"', literalGrammar("\u012A")); - grammarParserParses('start: "\\u012F"', literalGrammar("\u012F")); - grammarParserDoesNotParse('start: "\\u012"'); - grammarParserParses('start: "\\u01234"', literalGrammar("\u01234")); - }); - - /* Canonical eolEscapeSequence is "\\\n". */ - test("parses eolEscapeSequence", function() { - grammarParserParses('start: "\\\n"', literalGrammar("\n")); - grammarParserParses('start: "\\\r\n"', literalGrammar("\r\n")); - grammarParserParses('start: "\\\r"', literalGrammar("\r")); - grammarParserParses('start: "\\\u2028"', literalGrammar("\u2028")); - grammarParserParses('start: "\\\u2029"', literalGrammar("\u2029")); - }); - - /* Canonical __ is "\n". */ - test("parses __", function() { - grammarParserParses('start:"abcd"', simpleGrammar); - grammarParserParses('start: "abcd"', simpleGrammar); - grammarParserParses('start:\n"abcd"', simpleGrammar); - grammarParserParses('start:/* comment */"abcd"', simpleGrammar); - grammarParserParses('start: "abcd"', simpleGrammar); - }); - - /* Trivial character class rules are not tested. */ - - /* Canonical comment is "\/* comment *\/". */ - test("parses comment", function() { - grammarParserParses('start:// comment\n"abcd"', simpleGrammar); - grammarParserParses('start:/* comment */"abcd"', simpleGrammar); - }); - /* Canonical singleLineComment is "// comment". */ - test("parses singleLineComment", function() { - grammarParserParses('start://\n"abcd"', simpleGrammar); - grammarParserParses('start://a\n"abcd"', simpleGrammar); - grammarParserParses('start://aaa\n"abcd"', simpleGrammar); - grammarParserParses('start: "abcd"//', simpleGrammar); - }); - - /* Canonical multiLineComment is "\/* comment *\/". */ - test("parses multiLineComment", function() { - grammarParserParses('start:/**/"abcd"', simpleGrammar); - grammarParserParses('start:/*a*/"abcd"', simpleGrammar); - grammarParserParses('start:/*aaa*/"abcd"', simpleGrammar); - grammarParserParses('start:/*\n*/"abcd"', simpleGrammar); - grammarParserParses('start:/***/"abcd"', simpleGrammar); - grammarParserParses('start:/*a/*/"abcd"', simpleGrammar); - - grammarParserDoesNotParse('start:/*"abcd"'); - grammarParserDoesNotParse('start:/*/"abcd"'); - grammarParserDoesNotParse('start:/*/**/*/"abcd"'); - }); - - /* Canonical eol is "\n". */ - test("parses eol", function() { - grammarParserParses('start:\n"abcd"', simpleGrammar); - grammarParserParses('start:\r\n"abcd"', simpleGrammar); - grammarParserParses('start:\r"abcd"', simpleGrammar); - grammarParserParses('start:\u2028"abcd"', simpleGrammar); - grammarParserParses('start:\u2029"abcd"', simpleGrammar); - }); - - /* Canonical eolChar is "\n". */ - test("parses eolChar", function() { - grammarParserParses('start:\n"abcd"', simpleGrammar); - grammarParserParses('start:\r"abcd"', simpleGrammar); - grammarParserParses('start:\u2028"abcd"', simpleGrammar); - grammarParserParses('start:\u2029"abcd"', simpleGrammar); - }); - - /* Canonical whitespace is " ". */ - test("parses whitespace", function() { - grammarParserParses('start:\t"abcd"', simpleGrammar); - /* IE does not recognize "\v". */ - grammarParserParses('start:\x0B"abcd"', simpleGrammar); - grammarParserParses('start:\f"abcd"', simpleGrammar); - grammarParserParses('start: "abcd"', simpleGrammar); - grammarParserParses('start:\u00A0"abcd"', simpleGrammar); - grammarParserParses('start:\uFEFF"abcd"', simpleGrammar); - grammarParserParses('start:\u1680"abcd"', simpleGrammar); - grammarParserParses('start:\u180E"abcd"', simpleGrammar); - grammarParserParses('start:\u2000"abcd"', simpleGrammar); - grammarParserParses('start:\u2001"abcd"', simpleGrammar); - grammarParserParses('start:\u2002"abcd"', simpleGrammar); - grammarParserParses('start:\u2003"abcd"', simpleGrammar); - grammarParserParses('start:\u2004"abcd"', simpleGrammar); - grammarParserParses('start:\u2005"abcd"', simpleGrammar); - grammarParserParses('start:\u2006"abcd"', simpleGrammar); - grammarParserParses('start:\u2007"abcd"', simpleGrammar); - grammarParserParses('start:\u2008"abcd"', simpleGrammar); - grammarParserParses('start:\u2009"abcd"', simpleGrammar); - grammarParserParses('start:\u200A"abcd"', simpleGrammar); - grammarParserParses('start:\u202F"abcd"', simpleGrammar); - grammarParserParses('start:\u205F"abcd"', simpleGrammar); - grammarParserParses('start:\u3000"abcd"', simpleGrammar); - }); +var action_ = action; +function actionGrammar(action) { + return oneRuleGrammar(action_(literal("a"), action)); } +/* Canonical grammar is "a: \"abcd\";\nb: \"efgh\";\nc: \"ijkl\";". */ +test("parses grammar", function() { + grammarParserParses('a: "abcd"', { a: rule("a", null, literalAbcd) }); + grammarParserParses( + 'a: "abcd"\nb: "efgh"\nc: "ijkl"', + { + a: rule("a", null, literalAbcd), + b: rule("b", null, literalEfgh), + c: rule("c", null, literalIjkl) + } + ); +}); + +/* Canonical rule is "a: \"abcd\"". */ +test("parses rule", function() { + grammarParserParses( + 'start: "abcd" / "efgh" / "ijkl"', + oneRuleGrammar(choiceLiterals) + ); + grammarParserParses( + 'start "start rule": "abcd" / "efgh" / "ijkl"', + { + start: rule("start", "start rule", choiceLiterals) + } + ); +}); + +/* Canonical expression is "\"abcd\" / \"efgh\" / \"ijkl\"". */ +test("parses expression", function() { + grammarParserParses( + 'start: "abcd" / "efgh" / "ijkl"', + oneRuleGrammar(choiceLiterals) + ); +}); + +/* Canonical choice is "\"abcd\" / \"efgh\" / \"ijkl\"". */ +test("parses choice", function() { + grammarParserParses( + 'start: "abcd" "efgh" "ijkl"', + oneRuleGrammar(sequenceLiterals) + ); + grammarParserParses( + 'start: "abcd" "efgh" "ijkl" / "abcd" "efgh" "ijkl" / "abcd" "efgh" "ijkl"', + oneRuleGrammar(choice([ + sequenceLiterals, + sequenceLiterals, + sequenceLiterals + ])) + ); +}); + +/* Canonical sequence is "\"abcd\" \"efgh\" \"ijkl\"". */ +test("parses sequence", function() { + grammarParserParses( + 'start: { code }', + oneRuleGrammar(action(sequenceEmpty, " code ")) + ); + grammarParserParses( + 'start: !"abcd" { code }', + oneRuleGrammar(action(notAbcd, " code ")) + ); + grammarParserParses( + 'start: !"abcd" !"efgh" !"ijkl" { code }', + oneRuleGrammar(action(sequenceNots, " code ")) + ); + + grammarParserParses('start: ', oneRuleGrammar(sequenceEmpty)); + grammarParserParses('start: !"abcd"', oneRuleGrammar(notAbcd)); + grammarParserParses( + 'start: !"abcd" !"efgh" !"ijkl"', + oneRuleGrammar(sequenceNots) + ); +}); + +/* Canonical prefixed is "!\"abcd\"". */ +test("parses prefixed", function() { + grammarParserParses('start: &"abcd"?', oneRuleGrammar(andPredicate(optionalLiteral))); + grammarParserParses('start: !"abcd"?', oneRuleGrammar(notPredicate(optionalLiteral))); + grammarParserParses('start: "abcd"?', oneRuleGrammar(optionalLiteral)); +}); + +/* Canonical suffixed is "\"abcd\"?". */ +test("parses suffixed", function() { + grammarParserParses('start: "abcd"?', oneRuleGrammar(optionalLiteral)); + grammarParserParses('start: "abcd"*', oneRuleGrammar(zeroOrMore(literalAbcd))); + grammarParserParses('start: "abcd"+', oneRuleGrammar(oneOrMore(literalAbcd))); + grammarParserParses('start: "abcd"', literalGrammar("abcd")); +}); + +/* Canonical primary is "\"abcd\"". */ +test("parses primary", function() { + grammarParserParses('start: a', identifierGrammar("a")); + grammarParserParses('start: "abcd"', literalGrammar("abcd")); + grammarParserParses('start: .', anyGrammar); + grammarParserParses('start: [a-d]', classGrammar("a-d")); + grammarParserParses('start: ("abcd")', literalGrammar("abcd")); +}); + +/* Canonical action is "{ code }". */ +test("parses action", function() { + grammarParserParses('start: "a" { code }', actionGrammar(" code ")); +}); + +/* Canonical braced is "{ code }". */ +test("parses braced", function() { + grammarParserParses('start: "a" {}', actionGrammar("")); + grammarParserParses('start: "a" {a}', actionGrammar("a")); + grammarParserParses('start: "a" {{a}}', actionGrammar("{a}")); + grammarParserParses('start: "a" {aaa}', actionGrammar("aaa")); +}); + +/* Trivial character rules are not tested. */ + +/* Canonical identifier is "a". */ +test("parses identifier", function() { + grammarParserParses('start: a', identifierGrammar("a")); + grammarParserParses('start: z', identifierGrammar("z")); + grammarParserParses('start: A', identifierGrammar("A")); + grammarParserParses('start: Z', identifierGrammar("Z")); + grammarParserParses('start: _', identifierGrammar("_")); + grammarParserParses('start: $', identifierGrammar("$")); + grammarParserParses('start: aa', identifierGrammar("aa")); + grammarParserParses('start: az', identifierGrammar("az")); + grammarParserParses('start: aA', identifierGrammar("aA")); + grammarParserParses('start: aZ', identifierGrammar("aZ")); + grammarParserParses('start: a0', identifierGrammar("a0")); + grammarParserParses('start: a9', identifierGrammar("a9")); + grammarParserParses('start: a_', identifierGrammar("a_")); + grammarParserParses('start: a$', identifierGrammar("a$")); + grammarParserParses('start: abcd', identifierGrammar("abcd")); + + grammarParserParses('start: a\n', identifierGrammar("a")); +}); + +/* Canonical literal is "\"abcd\"". */ +test("parses literal", function() { + grammarParserParses('start: "abcd"', literalGrammar("abcd")); + grammarParserParses("start: 'abcd'", literalGrammar("abcd")); +}); + +/* Canonical doubleQuotedLiteral is "\"abcd\"". */ +test("parses doubleQuotedLiteral", function() { + grammarParserParses('start: ""', literalGrammar("")); + grammarParserParses('start: "a"', literalGrammar("a")); + grammarParserParses('start: "abc"', literalGrammar("abc")); + + grammarParserParses('start: "abcd"\n', literalGrammar("abcd")); +}); + +/* Canonical doubleQuotedCharacter is "a". */ +test("parses doubleQuotedCharacter", function() { + grammarParserParses('start: "a"', literalGrammar("a")); + grammarParserParses('start: "\\n"', literalGrammar("\n")); + grammarParserParses('start: "\\0"', literalGrammar("\0")); + grammarParserParses('start: "\\x00"', literalGrammar("\x00")); + grammarParserParses('start: "\\u0120"', literalGrammar("\u0120")); + grammarParserParses('start: "\\\n"', literalGrammar("\n")); +}); + +/* Canonical simpleDoubleQuotedCharacter is "a". */ +test("parses simpleDoubleQuotedCharacter", function() { + grammarParserParses('start: "a"', literalGrammar("a")); + grammarParserParses('start: "\'"', literalGrammar("'")); + grammarParserDoesNotParse('start: """'); + grammarParserDoesNotParse('start: "\\"'); + grammarParserDoesNotParse('start: "\n"'); + grammarParserDoesNotParse('start: "\r"'); + grammarParserDoesNotParse('start: "\u2028"'); + grammarParserDoesNotParse('start: "\u2029"'); +}); + +/* Canonical singleQuotedLiteral is "'abcd'". */ +test("parses singleQuotedLiteral", function() { + grammarParserParses("start: ''", literalGrammar("")); + grammarParserParses("start: 'a'", literalGrammar("a")); + grammarParserParses("start: 'abc'", literalGrammar("abc")); + + grammarParserParses("start: 'abcd'\n", literalGrammar("abcd")); +}); + +/* Canonical singleQuotedCharacter is "a". */ +test("parses singleQuotedCharacter", function() { + grammarParserParses("start: 'a'", literalGrammar("a")); + grammarParserParses("start: '\\n'", literalGrammar("\n")); + grammarParserParses("start: '\\0'", literalGrammar("\0")); + grammarParserParses("start: '\\x00'", literalGrammar("\x00")); + grammarParserParses("start: '\\u0120'", literalGrammar("\u0120")); + grammarParserParses("start: '\\\n'", literalGrammar("\n")); +}); + +/* Canonical simpleSingleQuotedCharacter is "a". */ +test("parses simpleSingleQuotedCharacter", function() { + grammarParserParses("start: 'a'", literalGrammar("a")); + grammarParserParses("start: '\"'", literalGrammar("\"")); + grammarParserDoesNotParse("start: '''"); + grammarParserDoesNotParse("start: '\\'"); + grammarParserDoesNotParse("start: '\n'"); + grammarParserDoesNotParse("start: '\r'"); + grammarParserDoesNotParse("start: '\u2028'"); + grammarParserDoesNotParse("start: '\u2029'"); +}); + +/* Canonical class is "[a-d]". */ +test("parses class", function() { + grammarParserParses("start: []", classGrammar("")); + grammarParserParses("start: [a-d]", classGrammar("a-d")); + grammarParserParses("start: [^a-d]", classGrammar("^a-d")); + grammarParserParses("start: [a]", classGrammar("a")); + grammarParserParses("start: [a-de-hi-l]", classGrammar("a-de-hi-l")); + + grammarParserParses("start: [a-d]\n", classGrammar("a-d")); +}); + +/* Canonical classCharacterRange is "a-d". */ +test("parses classCharacterRange", function() { + grammarParserParses("start: [a-d]", classGrammar("a-d")); + grammarParserParses("start: [a-a]", classGrammar("a-a")); + grammarParserDoesNotParse("start: [b-a]"); +}); + +/* Canonical classCharacter is "a". */ +test("parses classCharacter", function() { + grammarParserParses("start: [a]", classGrammar("a")); +}); + +/* Canonical bracketDelimitedCharacter is "a". */ +test("parses bracketDelimitedCharacter", function() { + grammarParserParses("start: [a]", classGrammar("a")); + grammarParserParses("start: [\\n]", classGrammar("\\n")); + grammarParserParses("start: [\\0]", classGrammar("\\0")); + grammarParserParses("start: [\\x00]", classGrammar("\\0")); + grammarParserParses("start: [\\u0120]", classGrammar("\u0120")); + grammarParserParses("start: [\\\n]", classGrammar("\\n")); +}); + +/* Canonical simpleBracketDelimiedCharacter is "a". */ +test("parses simpleBracketDelimitedCharacter", function() { + grammarParserParses("start: [a]", classGrammar("a")); + grammarParserParses("start: [[]", classGrammar("[")); + grammarParserDoesNotParse("start: []]"); + grammarParserDoesNotParse("start: [\\]"); + grammarParserDoesNotParse("start: [\n]"); + grammarParserDoesNotParse("start: [\r]"); + grammarParserDoesNotParse("start: [\u2028]"); + grammarParserDoesNotParse("start: [\u2029]"); +}); + +/* Canonical simpleEscapeSequence is "\\n". */ +test("parses simpleEscapeSequence", function() { + grammarParserParses('start: "\\\'"', literalGrammar("'")); + grammarParserParses('start: "\\""', literalGrammar("\"")); + grammarParserParses('start: "\\\\"', literalGrammar("\\")); + grammarParserParses('start: "\\b"', literalGrammar("\b")); + grammarParserParses('start: "\\f"', literalGrammar("\f")); + grammarParserParses('start: "\\n"', literalGrammar("\n")); + grammarParserParses('start: "\\r"', literalGrammar("\r")); + grammarParserParses('start: "\\t"', literalGrammar("\t")); + /* IE does not recognize "\v". */ + grammarParserParses('start: "\\v"', literalGrammar("\x0B")); + + grammarParserParses('start: "\\a"', literalGrammar("a")); +}); + +/* Canonical zeroEscapeSequence is "\\0". */ +test("parses zeroEscapeSequence", function() { + grammarParserParses('start: "\\0"', literalGrammar("\0")); + grammarParserDoesNotParse('start: "\\00"'); + grammarParserDoesNotParse('start: "\\09"'); +}); + +/* Canonical hexEscapeSequence is "\\x00". */ +test("parses hexEscapeSequence", function() { + grammarParserParses('start: "\\x00"', literalGrammar("\x00")); + grammarParserParses('start: "\\x09"', literalGrammar("\x09")); + grammarParserParses('start: "\\x0a"', literalGrammar("\x0a")); + grammarParserParses('start: "\\x0f"', literalGrammar("\x0f")); + grammarParserParses('start: "\\x0A"', literalGrammar("\x0A")); + grammarParserParses('start: "\\x0F"', literalGrammar("\x0F")); + grammarParserDoesNotParse('start: "\\x0"'); + grammarParserParses('start: "\\x000"', literalGrammar("\x000")); +}); + +/* Canonical unicodeEscapeSequence is "\\u0120". */ +test("parses unicodeEscapeSequence", function() { + grammarParserParses('start: "\\u0120"', literalGrammar("\u0120")); + grammarParserParses('start: "\\u0129"', literalGrammar("\u0129")); + grammarParserParses('start: "\\u012a"', literalGrammar("\u012a")); + grammarParserParses('start: "\\u012f"', literalGrammar("\u012f")); + grammarParserParses('start: "\\u012A"', literalGrammar("\u012A")); + grammarParserParses('start: "\\u012F"', literalGrammar("\u012F")); + grammarParserDoesNotParse('start: "\\u012"'); + grammarParserParses('start: "\\u01234"', literalGrammar("\u01234")); +}); + +/* Canonical eolEscapeSequence is "\\\n". */ +test("parses eolEscapeSequence", function() { + grammarParserParses('start: "\\\n"', literalGrammar("\n")); + grammarParserParses('start: "\\\r\n"', literalGrammar("\r\n")); + grammarParserParses('start: "\\\r"', literalGrammar("\r")); + grammarParserParses('start: "\\\u2028"', literalGrammar("\u2028")); + grammarParserParses('start: "\\\u2029"', literalGrammar("\u2029")); +}); + +/* Canonical __ is "\n". */ +test("parses __", function() { + grammarParserParses('start:"abcd"', simpleGrammar); + grammarParserParses('start: "abcd"', simpleGrammar); + grammarParserParses('start:\n"abcd"', simpleGrammar); + grammarParserParses('start:/* comment */"abcd"', simpleGrammar); + grammarParserParses('start: "abcd"', simpleGrammar); +}); + +/* Trivial character class rules are not tested. */ + +/* Canonical comment is "\/* comment *\/". */ +test("parses comment", function() { + grammarParserParses('start:// comment\n"abcd"', simpleGrammar); + grammarParserParses('start:/* comment */"abcd"', simpleGrammar); +}); +/* Canonical singleLineComment is "// comment". */ +test("parses singleLineComment", function() { + grammarParserParses('start://\n"abcd"', simpleGrammar); + grammarParserParses('start://a\n"abcd"', simpleGrammar); + grammarParserParses('start://aaa\n"abcd"', simpleGrammar); + grammarParserParses('start: "abcd"//', simpleGrammar); +}); + +/* Canonical multiLineComment is "\/* comment *\/". */ +test("parses multiLineComment", function() { + grammarParserParses('start:/**/"abcd"', simpleGrammar); + grammarParserParses('start:/*a*/"abcd"', simpleGrammar); + grammarParserParses('start:/*aaa*/"abcd"', simpleGrammar); + grammarParserParses('start:/*\n*/"abcd"', simpleGrammar); + grammarParserParses('start:/***/"abcd"', simpleGrammar); + grammarParserParses('start:/*a/*/"abcd"', simpleGrammar); + + grammarParserDoesNotParse('start:/*"abcd"'); + grammarParserDoesNotParse('start:/*/"abcd"'); + grammarParserDoesNotParse('start:/*/**/*/"abcd"'); +}); + +/* Canonical eol is "\n". */ +test("parses eol", function() { + grammarParserParses('start:\n"abcd"', simpleGrammar); + grammarParserParses('start:\r\n"abcd"', simpleGrammar); + grammarParserParses('start:\r"abcd"', simpleGrammar); + grammarParserParses('start:\u2028"abcd"', simpleGrammar); + grammarParserParses('start:\u2029"abcd"', simpleGrammar); +}); + +/* Canonical eolChar is "\n". */ +test("parses eolChar", function() { + grammarParserParses('start:\n"abcd"', simpleGrammar); + grammarParserParses('start:\r"abcd"', simpleGrammar); + grammarParserParses('start:\u2028"abcd"', simpleGrammar); + grammarParserParses('start:\u2029"abcd"', simpleGrammar); +}); + +/* Canonical whitespace is " ". */ +test("parses whitespace", function() { + grammarParserParses('start:\t"abcd"', simpleGrammar); + /* IE does not recognize "\v". */ + grammarParserParses('start:\x0B"abcd"', simpleGrammar); + grammarParserParses('start:\f"abcd"', simpleGrammar); + grammarParserParses('start: "abcd"', simpleGrammar); + grammarParserParses('start:\u00A0"abcd"', simpleGrammar); + grammarParserParses('start:\uFEFF"abcd"', simpleGrammar); + grammarParserParses('start:\u1680"abcd"', simpleGrammar); + grammarParserParses('start:\u180E"abcd"', simpleGrammar); + grammarParserParses('start:\u2000"abcd"', simpleGrammar); + grammarParserParses('start:\u2001"abcd"', simpleGrammar); + grammarParserParses('start:\u2002"abcd"', simpleGrammar); + grammarParserParses('start:\u2003"abcd"', simpleGrammar); + grammarParserParses('start:\u2004"abcd"', simpleGrammar); + grammarParserParses('start:\u2005"abcd"', simpleGrammar); + grammarParserParses('start:\u2006"abcd"', simpleGrammar); + grammarParserParses('start:\u2007"abcd"', simpleGrammar); + grammarParserParses('start:\u2008"abcd"', simpleGrammar); + grammarParserParses('start:\u2009"abcd"', simpleGrammar); + grammarParserParses('start:\u200A"abcd"', simpleGrammar); + grammarParserParses('start:\u202F"abcd"', simpleGrammar); + grammarParserParses('start:\u205F"abcd"', simpleGrammar); + grammarParserParses('start:\u3000"abcd"', simpleGrammar); +}); + })();