pegjs/src/passes.js

/*
 * Optimalization passes made on the grammar AST before compilation. Each pass
 * is a function that is passed the AST and returns a new AST. The AST can be
 * modified in-place by the pass. The order in which the passes are run is
 * specified in |PEG.compiler.compile| and should be the same as the order of
 * definitions here.
 */
PEG.compiler.passes = {
  /*
   * Removes proxy rules -- that is, rules that only delegate to other rule.
   */
  removeProxyRules: function(ast) {
    function isProxyRule(node) {
      return node.type === "rule" && node.expression.type === "rule_ref";
    }

    function replaceRuleRefs(ast, from, to) {
      function nop() {}

      function replaceInExpression(node, from, to) {
        replace(node.expression, from, to);
      }

      function replaceInSubnodes(propertyName) {
        return function(node, from, to) {
          each(node[propertyName], function(subnode) {
            replace(subnode, from, to);
          });
        };
      }

      var replace = buildNodeVisitor({
        grammar:
          function(node, from, to) {
            for (var name in node.rules) {
              replace(node.rules[name], from, to);
            }
          },

        rule:         replaceInExpression,
        choice:       replaceInSubnodes("alternatives"),
        sequence:     replaceInSubnodes("elements"),
        labeled:      replaceInExpression,
        simple_and:   replaceInExpression,
        simple_not:   replaceInExpression,
        semantic_and: nop,
        semantic_not: nop,
        optional:     replaceInExpression,
        zero_or_more: replaceInExpression,
        one_or_more:  replaceInExpression,
        action:       replaceInExpression,

        rule_ref:
          function(node, from, to) {
            if (node.name === from) {
              node.name = to;
            }
          },

        literal:      nop,
        any:          nop,
        "class":      nop
      });

      replace(ast, from, to);
    }

    for (var name in ast.rules) {
      if (isProxyRule(ast.rules[name])) {
        replaceRuleRefs(ast, ast.rules[name].name, ast.rules[name].expression.name);
        if (name === ast.startRule) {
          ast.startRule = ast.rules[name].expression.name;
        }
        delete ast.rules[name];
      }
    }

    return ast;
  },

  /*
   * Adds |resultStackDepth| and |posStackDepth| properties to each AST node.
   * These properties specify how many positions on the result or position stack
   * code generated by the emitter for the node will use. This information is
   * used to declare varibles holding the stack data in the generated code.
   */
  computeStackDepths: function(ast) {
    function computeZeroes(node) {
      node.resultStackDepth = 0;
      node.posStackDepth = 0;
    }

    function computeFromExpression(resultStackDelta, posStackDelta) {
      return function(node) {
        compute(node.expression);
        node.resultStackDepth = node.expression.resultStackDepth + resultStackDelta;
        node.posStackDepth    = node.expression.posStackDepth    + posStackDelta;
      };
    }

    var compute = buildNodeVisitor({
      grammar:
        function(node) {
          for (var name in node.rules) {
            compute(node.rules[name]);
          }
        },

      rule:         computeFromExpression(1, 0),

      choice:
        function(node) {
          each(node.alternatives, compute);
          node.resultStackDepth = Math.max.apply(
            null,
            map(node.alternatives, function(e) { return e.resultStackDepth; })
          );
          node.posStackDepth = Math.max.apply(
            null,
            map(node.alternatives, function(e) { return e.posStackDepth; })
          );
        },

      sequence:
        function(node) {
          each(node.elements, compute);
          node.resultStackDepth = node.elements.length > 0
            ? Math.max.apply(
                null,
                map(node.elements, function(e, i) { return i + e.resultStackDepth; })
              )
            : 0;
          node.posStackDepth = node.elements.length > 0
            ? 1 + Math.max.apply(
                null,
                map(node.elements, function(e) { return e.posStackDepth; })
              )
            : 1;
        },

      labeled:      computeFromExpression(0, 0),
      simple_and:   computeFromExpression(0, 1),
      simple_not:   computeFromExpression(0, 1),
      semantic_and: computeZeroes,
      semantic_not: computeZeroes,
      optional:     computeFromExpression(0, 0),
      zero_or_more: computeFromExpression(1, 0),
      one_or_more:  computeFromExpression(1, 0),
      action:       computeFromExpression(0, 1),
      rule_ref:     computeZeroes,
      literal:      computeZeroes,
      any:          computeZeroes,
      "class":      computeZeroes
    });

    compute(ast);

    return ast;
  }
};
Split the source code into several files, introduce build system The source code is now in the src directory. The library needs to be built using "rake", which creates the lib/peg.js file by combining the source files. 14 years ago			`/*`
			`* Optimalization passes made on the grammar AST before compilation. Each pass`
			`* is a function that is passed the AST and returns a new AST. The AST can be`
Compiler checks and passes are named for easier reference from tests 14 years ago			`* modified in-place by the pass. The order in which the passes are run is`
			`* specified in \|PEG.compiler.compile\| and should be the same as the order of`
			`* definitions here.`
Split the source code into several files, introduce build system The source code is now in the src directory. The library needs to be built using "rake", which creates the lib/peg.js file by combining the source files. 14 years ago			`*/`
Compiler checks and passes are named for easier reference from tests 14 years ago			`PEG.compiler.passes = {`
Split the source code into several files, introduce build system The source code is now in the src directory. The library needs to be built using "rake", which creates the lib/peg.js file by combining the source files. 14 years ago			`/*`
			`* Removes proxy rules -- that is, rules that only delegate to other rule.`
			`*/`
Make names of compiler checks and passes verbs 13 years ago			`removeProxyRules: function(ast) {`
Split the source code into several files, introduce build system The source code is now in the src directory. The library needs to be built using "rake", which creates the lib/peg.js file by combining the source files. 14 years ago			`function isProxyRule(node) {`
			`return node.type === "rule" && node.expression.type === "rule_ref";`
			`}`

			`function replaceRuleRefs(ast, from, to) {`
			`function nop() {}`

			`function replaceInExpression(node, from, to) {`
			`replace(node.expression, from, to);`
			`}`

			`function replaceInSubnodes(propertyName) {`
			`return function(node, from, to) {`
Rename \|node\| -> \|subnode\| to avoid aliasing 14 years ago			`each(node[propertyName], function(subnode) {`
			`replace(subnode, from, to);`
Split the source code into several files, introduce build system The source code is now in the src directory. The library needs to be built using "rake", which creates the lib/peg.js file by combining the source files. 14 years ago			`});`
			`};`
			`}`

Extract \|buildNodeVisitor\| 14 years ago			`var replace = buildNodeVisitor({`
Split the source code into several files, introduce build system The source code is now in the src directory. The library needs to be built using "rake", which creates the lib/peg.js file by combining the source files. 14 years ago			`grammar:`
			`function(node, from, to) {`
			`for (var name in node.rules) {`
Fix incorrect variable name on two places 14 years ago			`replace(node.rules[name], from, to);`
Split the source code into several files, introduce build system The source code is now in the src directory. The library needs to be built using "rake", which creates the lib/peg.js file by combining the source files. 14 years ago			`}`
			`},`

			`rule: replaceInExpression,`
			`choice: replaceInSubnodes("alternatives"),`
			`sequence: replaceInSubnodes("elements"),`
			`labeled: replaceInExpression,`
			`simple_and: replaceInExpression,`
			`simple_not: replaceInExpression,`
			`semantic_and: nop,`
			`semantic_not: nop,`
			`optional: replaceInExpression,`
			`zero_or_more: replaceInExpression,`
			`one_or_more: replaceInExpression,`
			`action: replaceInExpression,`

			`rule_ref:`
			`function(node, from, to) {`
			`if (node.name === from) {`
			`node.name = to;`
			`}`
			`},`

			`literal: nop,`
			`any: nop,`
			`"class": nop`
Extract \|buildNodeVisitor\| 14 years ago			`});`
Split the source code into several files, introduce build system The source code is now in the src directory. The library needs to be built using "rake", which creates the lib/peg.js file by combining the source files. 14 years ago
			`replace(ast, from, to);`
			`}`

			`for (var name in ast.rules) {`
			`if (isProxyRule(ast.rules[name])) {`
			`replaceRuleRefs(ast, ast.rules[name].name, ast.rules[name].expression.name);`
			`if (name === ast.startRule) {`
			`ast.startRule = ast.rules[name].expression.name;`
			`}`
			`delete ast.rules[name];`
			`}`
			`}`

Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total 13 years ago			`return ast;`
			`},`

			`/*`
			`* Adds \|resultStackDepth\| and \|posStackDepth\| properties to each AST node.`
			`* These properties specify how many positions on the result or position stack`
			`* code generated by the emitter for the node will use. This information is`
			`* used to declare varibles holding the stack data in the generated code.`
			`*/`
Make names of compiler checks and passes verbs 13 years ago			`computeStackDepths: function(ast) {`
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total 13 years ago			`function computeZeroes(node) {`
			`node.resultStackDepth = 0;`
			`node.posStackDepth = 0;`
			`}`

			`function computeFromExpression(resultStackDelta, posStackDelta) {`
			`return function(node) {`
			`compute(node.expression);`
			`node.resultStackDepth = node.expression.resultStackDepth + resultStackDelta;`
			`node.posStackDepth = node.expression.posStackDepth + posStackDelta;`
src/passes.js: Add missing semicolon Fixes the following JSHint error: ./src/passes.js: line 98, col 8, Missing semicolon. 13 years ago			`};`
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total 13 years ago			`}`

			`var compute = buildNodeVisitor({`
			`grammar:`
			`function(node) {`
			`for (var name in node.rules) {`
			`compute(node.rules[name]);`
			`}`
			`},`

Fix \|posStackDepth\| computation for rules Rules by themselves do not need any variable for storing position. Part of a fix for GH-53. 13 years ago			`rule: computeFromExpression(1, 0),`
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total 13 years ago
			`choice:`
			`function(node) {`
			`each(node.alternatives, compute);`
			`node.resultStackDepth = Math.max.apply(`
			`null,`
			`map(node.alternatives, function(e) { return e.resultStackDepth; })`
			`);`
			`node.posStackDepth = Math.max.apply(`
			`null,`
			`map(node.alternatives, function(e) { return e.posStackDepth; })`
			`);`
			`},`

			`sequence:`
			`function(node) {`
			`each(node.elements, compute);`
Fix stack depth computations for empty sequences Part of a fix for GH-53. 13 years ago			`node.resultStackDepth = node.elements.length > 0`
			`? Math.max.apply(`
			`null,`
			`map(node.elements, function(e, i) { return i + e.resultStackDepth; })`
			`)`
			`: 0;`
			`node.posStackDepth = node.elements.length > 0`
			`? 1 + Math.max.apply(`
			`null,`
			`map(node.elements, function(e) { return e.posStackDepth; })`
			`)`
			`: 1;`
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total 13 years ago			`},`

			`labeled: computeFromExpression(0, 0),`
			`simple_and: computeFromExpression(0, 1),`
			`simple_not: computeFromExpression(0, 1),`
			`semantic_and: computeZeroes,`
			`semantic_not: computeZeroes,`
			`optional: computeFromExpression(0, 0),`
			`zero_or_more: computeFromExpression(1, 0),`
			`one_or_more: computeFromExpression(1, 0),`
			`action: computeFromExpression(0, 1),`
			`rule_ref: computeZeroes,`
			`literal: computeZeroes,`
			`any: computeZeroes,`
			`"class": computeZeroes`
			`});`

			`compute(ast);`

Split the source code into several files, introduce build system The source code is now in the src directory. The library needs to be built using "rake", which creates the lib/peg.js file by combining the source files. 14 years ago			`return ast;`
			`}`
Compiler checks and passes are named for easier reference from tests 14 years ago			`};`