Optimize redundant fail checks (#400)

eliminate unnecessary checks for the result of parse, when it can be statically determined
2018-01-06 02:28:09 +05:00 · 2018-01-06 02:28:09 +05:00 · 534dc53ac2
parent f5699d63fb
commit 534dc53ac2
6 changed files with 660 additions and 528 deletions
--- a/lib/compiler/index.js
+++ b/lib/compiler/index.js
@ -9,6 +9,7 @@ const reportDuplicateRules = require( "./passes/report-duplicate-rules" );
 const reportInfiniteRecursion = require( "./passes/report-infinite-recursion" );
 const reportInfiniteRepetition = require( "./passes/report-infinite-repetition" );
 const reportUndefinedRules = require( "./passes/report-undefined-rules" );
+const inferenceMatchResult = require( "./passes/inference-match-result" );
 const visitor = require( "./visitor" );

 function processOptions( options, defaults ) {
@ -58,6 +59,7 @@ const compiler = {
        },
        generate: {
            calcReportFailures: calcReportFailures,
+            inferenceMatchResult: inferenceMatchResult,
            generateBytecode: generateBytecode,
            generateJS: generateJS
        }
--- a/lib/compiler/passes/generate-bytecode.js
+++ b/lib/compiler/passes/generate-bytecode.js
@ -238,7 +238,10 @@ function generateBytecode( ast ) {

    }

-    function buildCondition( condCode, thenCode, elseCode ) {
+    function buildCondition( match, condCode, thenCode, elseCode ) {
+
+        if ( match > 0 ) return thenCode;
+        if ( match < 0 ) return elseCode;

        return condCode.concat(
            [ thenCode.length, elseCode.length ],
@ -263,6 +266,7 @@ function generateBytecode( ast ) {

    function buildSimplePredicate( expression, negative, context ) {

+        const match = expression.match|0;
        return buildSequence(
            [ op.PUSH_CURR_POS ],
            [ op.EXPECT_NS_BEGIN ],
@ -274,6 +278,7 @@ function generateBytecode( ast ) {
            } ),
            [ op.EXPECT_NS_END, negative ? 1 : 0 ],
            buildCondition(
+                negative ? -match : match,
                [ negative ? op.IF_ERROR : op.IF_NOT_ERROR ],
                buildSequence(
                    [ op.POP ],
@ -290,14 +295,15 @@ function generateBytecode( ast ) {

    }

-    function buildSemanticPredicate( code, negative, context ) {
+    function buildSemanticPredicate( node, negative, context ) {

-        const functionIndex = addFunctionConst( Object.keys( context.env ), code );
+        const functionIndex = addFunctionConst( Object.keys( context.env ), node.code );

        return buildSequence(
            [ op.UPDATE_SAVED_POS ],
            buildCall( functionIndex, 0, context.env, context.sp ),
            buildCondition(
+                node.match|0,
                [ op.IF ],
                buildSequence( [ op.POP ], negative ? [ op.PUSH_FAILED ] : [ op.PUSH_UNDEFINED ] ),
                buildSequence( [ op.POP ], negative ? [ op.PUSH_UNDEFINED ] : [ op.PUSH_FAILED ] )
@ -371,6 +377,8 @@ function generateBytecode( ast ) {
                    alternatives.length < 2
                        ? []
                        : buildCondition(
+                            // If alternative always match no need generate code for next alternatives
+                            -( alternatives[ 0 ].match|0 ),
                            [ op.IF_ERROR ],
                            buildSequence(
                                [ op.POP ],
@ -396,7 +404,10 @@ function generateBytecode( ast ) {
                action: node,
                reportFailures: context.reportFailures
            } );
-            const functionIndex = addFunctionConst( Object.keys( env ), node.code );
+            const match = node.expression.match|0;
+            const functionIndex = emitCall && match >= 0
+                ? addFunctionConst( Object.keys( env ), node.code )
+                : null;

            return emitCall === false
                ? expressionCode
@ -404,6 +415,7 @@ function generateBytecode( ast ) {
                    [ op.PUSH_CURR_POS ],
                    expressionCode,
                    buildCondition(
+                        match,
                        [ op.IF_NOT_ERROR ],
                        buildSequence(
                            [ op.LOAD_SAVED_POS, 1 ],
@ -432,6 +444,7 @@ function generateBytecode( ast ) {
                            reportFailures: context.reportFailures
                        } ),
                        buildCondition(
+                            elements[ 0 ].match|0,
                            [ op.IF_NOT_ERROR ],
                            buildElementsCode( elements.slice( 1 ), {
                                sp: context.sp + 1,
@ -507,6 +520,7 @@ function generateBytecode( ast ) {
                    reportFailures: context.reportFailures
                } ),
                buildCondition(
+                    node.expression.match|0,
                    [ op.IF_NOT_ERROR ],
                    buildSequence( [ op.POP ], [ op.TEXT ] ),
                    [ op.NIP ]
@ -537,6 +551,8 @@ function generateBytecode( ast ) {
                    reportFailures: context.reportFailures
                } ),
                buildCondition(
+                    // If expression always match no need replace FAILED to NULL
+                    -( node.expression.match|0 ),
                    [ op.IF_ERROR ],
                    buildSequence( [ op.POP ], [ op.PUSH_NULL ] ),
                    []
@ -576,6 +592,7 @@ function generateBytecode( ast ) {
                [ op.PUSH_EMPTY_ARRAY ],
                expressionCode,
                buildCondition(
+                    node.expression.match|0,
                    [ op.IF_NOT_ERROR ],
                    buildSequence( buildAppendLoop( expressionCode ), [ op.POP ] ),
                    buildSequence( [ op.POP ], [ op.POP ], [ op.PUSH_FAILED ] )
@ -597,13 +614,13 @@ function generateBytecode( ast ) {

        semantic_and( node, context ) {

-            return buildSemanticPredicate( node.code, false, context );
+            return buildSemanticPredicate( node, false, context );

        },

        semantic_not( node, context ) {

-            return buildSemanticPredicate( node.code, true, context );
+            return buildSemanticPredicate( node, true, context );

        },

@ -617,9 +634,11 @@ function generateBytecode( ast ) {

            if ( node.value.length > 0 ) {

-                const stringIndex = addConst( `"${ js.stringEscape(
+                const match = node.match|0;
+                const needConst = match === 0 || ( match > 0 && ! node.ignoreCase );
+                const stringIndex = needConst ? addConst( `"${ js.stringEscape(
                    node.ignoreCase ? node.value.toLowerCase() : node.value
-                ) }"` );
+                ) }"` ) : null;
                // Do not generate unused constant, if no need it
                const expectedIndex = context.reportFailures ? addConst(
                    "peg$literalExpectation("
@ -634,6 +653,7 @@ function generateBytecode( ast ) {
                return buildSequence(
                    context.reportFailures ? [ op.EXPECT, expectedIndex ] : [],
                    buildCondition(
+                        match,
                        node.ignoreCase
                            ? [ op.MATCH_STRING_IC, stringIndex ]
                            : [ op.MATCH_STRING, stringIndex ],
@ -677,7 +697,8 @@ function generateBytecode( ast ) {
                    .join( ", " )
                + "]";

-            const regexpIndex = addConst( regexp );
+            const match = node.match|0;
+            const regexpIndex = match === 0 ? addConst( regexp ) : null;
            // Do not generate unused constant, if no need it
            const expectedIndex = context.reportFailures ? addConst(
                "peg$classExpectation("
@ -690,6 +711,7 @@ function generateBytecode( ast ) {
            return buildSequence(
                context.reportFailures ? [ op.EXPECT, expectedIndex ] : [],
                buildCondition(
+                    match,
                    [ op.MATCH_REGEXP, regexpIndex ],
                    [ op.ACCEPT_N, 1 ],
                    [ op.PUSH_FAILED ]
@ -708,6 +730,7 @@ function generateBytecode( ast ) {
            return buildSequence(
                context.reportFailures ? [ op.EXPECT, expectedIndex ] : [],
                buildCondition(
+                    node.match|0,
                    [ op.MATCH_ANY ],
                    [ op.ACCEPT_N, 1 ],
                    [ op.PUSH_FAILED ]
--- a/lib/compiler/passes/inference-match-result.js
+++ b/lib/compiler/passes/inference-match-result.js
@ -0,0 +1,173 @@
+"use strict";
+
+const visitor      = require( "../visitor" );
+const asts         = require( "../asts" );
+const GrammarError = require( "../../grammar-error" );
+
+// Inference match result of the rule. Can be:
+//   -1: negative result, always fails
+//    0: neutral result, may be fail, may be match
+//    1: positive result, always match
+function inferenceMatchResult( ast ) {
+
+    let inference;
+    function sometimesMatch( node ) {
+
+        node.match = 0;
+
+        return node.match;
+
+    }
+    function alwaysMatch( node ) {
+
+        inference( node.expression );
+
+        node.match = 1;
+
+        return node.match;
+
+    }
+
+    function inferenceExpression( node ) {
+
+        node.match = inference( node.expression );
+
+        return node.match;
+
+    }
+    function inferenceElements( elements, forChoice ) {
+
+        const length = elements.length;
+        let always = 0;
+        let never = 0;
+
+        for ( let i = 0; i < length; ++i ) {
+
+            const result = inference( elements[ i ] );
+
+            if ( result > 0 ) {
+
+                ++always;
+
+            }
+            if ( result < 0 ) {
+
+                ++never;
+
+            }
+
+        }
+
+        if ( always === length ) {
+
+            return 1;
+
+        }
+        if ( forChoice ) {
+
+            return never === length ? -1 : 0;
+
+        }
+
+        return never > 0 ? -1 : 0;
+
+    }
+
+    inference = visitor.build( {
+        rule( node ) {
+
+            let oldResult;
+            let count = 0;
+
+            if ( typeof node.match === "undefined" ) {
+
+                node.match = 0;
+                do {
+
+                    oldResult = node.match;
+                    node.match = inference( node.expression );
+                    // 6 == 3! -- permutations count for all transitions from one match
+                    // state to another.
+                    // After 6 iterations the cycle with guarantee begins
+                    // istanbul ignore next
+                    if ( ++count > 6 ) {
+
+                        throw new GrammarError(
+                            "Infinity cycle detected when trying evaluate node match result",
+                            node.location
+                        );
+
+                    }
+
+                } while ( oldResult !== node.match );
+
+            }
+
+            return node.match;
+
+        },
+        named:        inferenceExpression,
+        choice( node ) {
+
+            node.match = inferenceElements( node.alternatives, true );
+
+            return node.match;
+
+        },
+        action:       inferenceExpression,
+        sequence( node ) {
+
+            node.match = inferenceElements( node.elements, false );
+
+            return node.match;
+
+        },
+        labeled:      inferenceExpression,
+        text:         inferenceExpression,
+        simple_and:   inferenceExpression,
+        simple_not( node ) {
+
+            node.match = -inference( node.expression );
+
+            return node.match;
+
+        },
+        optional:     alwaysMatch,
+        zero_or_more: alwaysMatch,
+        one_or_more:  inferenceExpression,
+        group:        inferenceExpression,
+        semantic_and: sometimesMatch,
+        semantic_not: sometimesMatch,
+        rule_ref( node ) {
+
+            const rule = asts.findRule( ast, node.name );
+            node.match = inference( rule );
+
+            return node.match;
+
+        },
+        literal( node ) {
+
+            // Empty literal always match on any input
+            node.match = node.value.length === 0 ? 1 : 0;
+
+            return node.match;
+
+        },
+        class( node ) {
+
+            // Empty character class never match on any input
+            node.match = node.parts.length === 0 ? -1 : 0;
+
+            return node.match;
+
+        },
+        // |any| not match on empty input
+        any:          sometimesMatch
+    } );
+
+    inference( ast );
+
+}
+
+module.exports = inferenceMatchResult;
--- a/lib/parser.js
+++ b/lib/parser.js
--- a/lib/typings/api.d.ts
+++ b/lib/typings/api.d.ts
@ -98,6 +98,10 @@ declare namespace peg {

                reportFailures?: boolean;

+                // Added by inference-match-result pass
+
+                match?: number;
+
            }

            interface Named extends INode {
--- a/test/spec/unit/compiler/passes/inference-match-result.spec.js
+++ b/test/spec/unit/compiler/passes/inference-match-result.spec.js
@ -0,0 +1,164 @@
+"use strict";
+
+const chai = require( "chai" );
+const helpers = require( "./helpers" );
+const pass = require( "pegjs-dev" ).compiler.passes.generate.inferenceMatchResult;
+
+chai.use( helpers );
+
+const expect = chai.expect;
+
+describe( "compiler pass |inferenceMatchResult|", function () {
+
+    it( "calculate |match| property for |any| correctly", function () {
+
+        expect( pass ).to.changeAST( "start = .          ", { rules: [ { match:  0 } ] }, {}, {} );
+
+    } );
+
+    it( "calculate |match| property for |literal| correctly", function () {
+
+        expect( pass ).to.changeAST( "start = ''         ", { rules: [ { match:  1 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = ''i        ", { rules: [ { match:  1 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = 'a'        ", { rules: [ { match:  0 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = 'a'i       ", { rules: [ { match:  0 } ] }, {}, {} );
+
+    } );
+
+    it( "calculate |match| property for |class| correctly", function () {
+
+        expect( pass ).to.changeAST( "start = []         ", { rules: [ { match: -1 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = []i        ", { rules: [ { match: -1 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = [a]        ", { rules: [ { match:  0 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = [a]i       ", { rules: [ { match:  0 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = [a-b]      ", { rules: [ { match:  0 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = [a-b]i     ", { rules: [ { match:  0 } ] }, {}, {} );
+
+    } );
+
+    it( "calculate |match| property for |sequence| correctly", function () {
+
+        expect( pass ).to.changeAST( "start = 'a' 'b'    ", { rules: [ { match:  0 } ] }, {}, {} );
+
+        expect( pass ).to.changeAST( "start = 'a' ''     ", { rules: [ { match:  0 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = '' 'b'     ", { rules: [ { match:  0 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = '' ''      ", { rules: [ { match:  1 } ] }, {}, {} );
+
+        expect( pass ).to.changeAST( "start = 'a' []     ", { rules: [ { match: -1 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = [] 'b'     ", { rules: [ { match: -1 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = [] []      ", { rules: [ { match: -1 } ] }, {}, {} );
+
+    } );
+
+    it( "calculate |match| property for |choice| correctly", function () {
+
+        expect( pass ).to.changeAST( "start = 'a' / 'b'  ", { rules: [ { match:  0 } ] }, {}, {} );
+
+        expect( pass ).to.changeAST( "start = 'a' / ''   ", { rules: [ { match:  0 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = ''  / 'b'  ", { rules: [ { match:  0 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = ''  / ''   ", { rules: [ { match:  1 } ] }, {}, {} );
+
+        expect( pass ).to.changeAST( "start = 'a' / []   ", { rules: [ { match:  0 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = []  / 'b'  ", { rules: [ { match:  0 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = []  / []   ", { rules: [ { match: -1 } ] }, {}, {} );
+
+    } );
+
+    it( "calculate |match| property for predicates correctly", function () {
+
+        expect( pass ).to.changeAST( "start = &.         ", { rules: [ { match:  0 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = &''        ", { rules: [ { match:  1 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = &[]        ", { rules: [ { match: -1 } ] }, {}, {} );
+
+        expect( pass ).to.changeAST( "start = !.         ", { rules: [ { match:  0 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = !''        ", { rules: [ { match: -1 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = ![]        ", { rules: [ { match:  1 } ] }, {}, {} );
+
+        expect( pass ).to.changeAST( "start = &{ code }  ", { rules: [ { match: 0 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = !{ code }  ", { rules: [ { match: 0 } ] }, {}, {} );
+
+    } );
+
+    it( "calculate |match| property for |text| correctly", function () {
+
+        expect( pass ).to.changeAST( "start = $.         ", { rules: [ { match:  0 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = $''        ", { rules: [ { match:  1 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = $[]        ", { rules: [ { match: -1 } ] }, {}, {} );
+
+    } );
+
+    it( "calculate |match| property for |action| correctly", function () {
+
+        expect( pass ).to.changeAST( "start = .  { code }", { rules: [ { match:  0 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = '' { code }", { rules: [ { match:  1 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = [] { code }", { rules: [ { match: -1 } ] }, {}, {} );
+
+    } );
+
+    it( "calculate |match| property for |labeled| correctly", function () {
+
+        expect( pass ).to.changeAST( "start = a:.        ", { rules: [ { match:  0 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = a:''       ", { rules: [ { match:  1 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = a:[]       ", { rules: [ { match: -1 } ] }, {}, {} );
+
+    } );
+
+    it( "calculate |match| property for |named| correctly", function () {
+
+        expect( pass ).to.changeAST( "start 'start' = .  ", { rules: [ { match:  0 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start 'start' = '' ", { rules: [ { match:  1 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start 'start' = [] ", { rules: [ { match: -1 } ] }, {}, {} );
+
+    } );
+
+    it( "calculate |match| property for |optional| correctly", function () {
+
+        expect( pass ).to.changeAST( "start = .?         ", { rules: [ { match: 1 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = ''?        ", { rules: [ { match: 1 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = []?        ", { rules: [ { match: 1 } ] }, {}, {} );
+
+    } );
+
+    it( "calculate |match| property for |zero_or_more| correctly", function () {
+
+        expect( pass ).to.changeAST( "start = .*         ", { rules: [ { match: 1 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = ''*        ", { rules: [ { match: 1 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = []*        ", { rules: [ { match: 1 } ] }, {}, {} );
+
+    } );
+
+    it( "calculate |match| property for |one_or_more| correctly", function () {
+
+        expect( pass ).to.changeAST( "start = .+         ", { rules: [ { match:  0 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = ''+        ", { rules: [ { match:  1 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = []+        ", { rules: [ { match: -1 } ] }, {}, {} );
+
+    } );
+
+    it( "calculate |match| property for |rule_ref| correctly", function () {
+
+        expect( pass ).to.changeAST(
+            [ "start = end", "end = . " ].join( "\n" ),
+            { rules: [ { match:  0 }, { match:  0 } ] },
+            {}, {}
+        );
+        expect( pass ).to.changeAST(
+            [ "start = end", "end = ''" ].join( "\n" ),
+            { rules: [ { match:  1 }, { match:  1 } ] },
+            {}, {}
+        );
+        expect( pass ).to.changeAST(
+            [ "start = end", "end = []" ].join( "\n" ),
+            { rules: [ { match: -1 }, { match: -1 } ] },
+            {}, {}
+        );
+
+        expect( pass ).to.changeAST( "start = .  start", { rules: [ { match:  0 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = '' start", { rules: [ { match:  0 } ] }, {}, {} );
+        expect( pass ).to.changeAST( "start = [] start", { rules: [ { match: -1 } ] }, {}, {} );
+
+        expect( pass ).to.changeAST( "start = . start []", { rules: [ { match: -1 } ] }, {}, {} );
+
+    } );
+
+} );