Implement value plucking

Resolves #235, #427, #545
2018-09-17 11:32:34 +01:00 · 2018-09-17 11:32:34 +01:00 · 460f0cc5bc
parent 26969475f7
commit 460f0cc5bc
14 changed files with 772 additions and 318 deletions
--- a/docs/grammar/parsing-expression-types.md
+++ b/docs/grammar/parsing-expression-types.md
@ -16,9 +16,10 @@ There are several types of parsing expressions, some of them containing subexpre
  * [! { predicate }](#--predicate--1)
  * [$ expression](#-expression-2)
  * [label : expression](#label--expression)
-  * [expression1 expression2 ... expressionN](#expression1-expression2---expressionn)
+  * [expression<sub>1</sub> expression<sub>2</sub> ... expression<sub>n</sub>](#expression1-expression2---expressionn)
  * [expression { action }](#expression--action-)
-  * [expression1 / expression2 / ... / expressionN](#expression1--expression2----expressionn)
+  * [expression<sub>1</sub> / expression<sub>2</sub> / ... / expression<sub>n</sub>](#expression1--expression2----expressionn)
+  * [expression<sub>1</sub> @expression<sub>2</sub> ... expression<sub>n</sub>](#expression1--expression2---expressionn)

 #### "*literal*"<br>'*literal*'

@ -113,3 +114,23 @@ The action has access to all variables and functions in the [Action Execution En
 #### *expression<sub>1</sub>* / *expression<sub>2</sub>* / ... / *expression<sub>n</sub>*

 Try to match the first expression, if it does not succeed, try the second one, etc. Return the match result of the first successfully matched expression. If no expression matches, consider the match failed.
+
+#### *expression<sub>1</sub>* @*expression<sub>2</sub>* ...  *expression<sub>n</sub>*
+
+Only returns the expression(s) following `@` 
+
+> WARNING: You cannot use this on predicate's, and cannot use it alongside an action.
+
+```js
+start = MultiPluck
+      / SinglePluck
+
+SinglePluck = "0"? @integer
+MultiPluck = @integer "." @integer
+
+integer = $[0-9]+
+```
+
+When `SinglePluck` finds `011`, it returns `"11"`
+
+When `MultiPluck` finds `0.11`, it returns `["0", "11"]`
--- a/packages/pegjs/lib/compiler/index.js
+++ b/packages/pegjs/lib/compiler/index.js
@ -11,6 +11,7 @@ const reportInfiniteRecursion = require( "./passes/report-infinite-recursion" );
 const reportInfiniteRepetition = require( "./passes/report-infinite-repetition" );
 const reportUndefinedRules = require( "./passes/report-undefined-rules" );
 const inferenceMatchResult = require( "./passes/inference-match-result" );
+const reportIncorrectPlucking = require( "./passes/report-incorrect-plucking" );
 const Session = require( "./session" );
 const util = require( "../util" );

@ -30,7 +31,8 @@ const compiler = {
            reportUnusedRules: reportUnusedRules,
            reportDuplicateLabels: reportDuplicateLabels,
            reportInfiniteRecursion: reportInfiniteRecursion,
-            reportInfiniteRepetition: reportInfiniteRepetition
+            reportInfiniteRepetition: reportInfiniteRepetition,
+            reportIncorrectPlucking: reportIncorrectPlucking,
        },
        transform: {
            removeProxyRules: removeProxyRules
--- a/packages/pegjs/lib/compiler/opcodes.js
+++ b/packages/pegjs/lib/compiler/opcodes.js
@ -18,6 +18,7 @@ const opcodes = {
    APPEND:            10,   // APPEND
    WRAP:              11,   // WRAP n
    TEXT:              12,   // TEXT
+    PLUCK:             41,   // PLUCK n, k, p1, ..., pK

    // Conditions and Loops

--- a/packages/pegjs/lib/compiler/passes/generate-bytecode.js
+++ b/packages/pegjs/lib/compiler/passes/generate-bytecode.js
@ -441,16 +441,19 @@ function generateBytecode( ast, session ) {

        sequence( node, context ) {

+            const TOTAL_ELEMENTS = node.elements.length;
+
            function buildElementsCode( elements, context ) {

                if ( elements.length > 0 ) {

-                    const processedCount = node.elements.length - elements.slice( 1 ).length;
+                    const processedCount = TOTAL_ELEMENTS - elements.slice( 1 ).length;

                    return buildSequence(
                        generate( elements[ 0 ], {
                            sp: context.sp,
                            env: context.env,
+                            pluck: context.pluck,
                            action: null,
                            reportFailures: context.reportFailures
                        } ),
@ -460,6 +463,7 @@ function generateBytecode( ast, session ) {
                            buildElementsCode( elements.slice( 1 ), {
                                sp: context.sp + 1,
                                env: context.env,
+                                pluck: context.pluck,
                                action: context.action,
                                reportFailures: context.reportFailures
                            } ),
@ -471,26 +475,32 @@ function generateBytecode( ast, session ) {
                        )
                    );

-                } else if ( context.action ) {
+                }

-                    const functionIndex = addFunctionConst(
-                        false,
-                        Object.keys( context.env ),
-                        context.action.code
-                    );
+                if ( context.pluck.length > 0 )

                    return buildSequence(
-                        [ op.LOAD_SAVED_POS, node.elements.length ],
+                        [ op.PLUCK, TOTAL_ELEMENTS + 1, context.pluck.length ],
+                        context.pluck.map( eSP => context.sp - eSP )
+                    );
+
+                if ( context.action )
+
+                    return buildSequence(
+                        [ op.LOAD_SAVED_POS, TOTAL_ELEMENTS ],
                        buildCall(
-                            functionIndex,
-                            node.elements.length + 1,
+                            addFunctionConst( // functionIndex
+                                false,
+                                Object.keys( context.env ),
+                                context.action.code
+                            ),
+                            TOTAL_ELEMENTS + 1,
                            context.env,
                            context.sp
                        )
                    );

-                }
-                return buildSequence( [ op.WRAP, node.elements.length ], [ op.NIP ] );
+                return buildSequence( [ op.WRAP, TOTAL_ELEMENTS ], [ op.NIP ] );

            }

@ -499,6 +509,7 @@ function generateBytecode( ast, session ) {
                buildElementsCode( node.elements, {
                    sp: context.sp + 1,
                    env: context.env,
+                    pluck: [],
                    action: context.action,
                    reportFailures: context.reportFailures
                } )
@ -508,9 +519,20 @@ function generateBytecode( ast, session ) {

        labeled( node, context ) {

-            const env = util.clone( context.env );
+            let env = context.env;
+            const label = node.label;
+            const sp = context.sp + 1;

-            context.env[ node.label ] = context.sp + 1;
+            if ( label !== null ) {
+
+                env = util.clone( context.env );
+                context.env[ label ] = sp;
+
+            }
+
+            if ( context.pluck && node.pick )
+
+                context.pluck.push( sp );

            return generate( node.expression, {
                sp: context.sp,
--- a/packages/pegjs/lib/compiler/passes/generate-js.js
+++ b/packages/pegjs/lib/compiler/passes/generate-js.js
@ -379,7 +379,7 @@ function generateJS( ast, session, options ) {
                "  var ends = [];",
                "  var stack = [];",
                "  var startPos = peg$currPos;",
-                "  var params;"
+                "  var params, paramsLength, paramsN;"
            ].join( "\n" ) );

        } else {
@ -391,7 +391,7 @@ function generateJS( ast, session, options ) {
                "  var end = bc.length;",
                "  var ends = [];",
                "  var stack = [];",
-                "  var params;"
+                "  var params, paramsLength, paramsN;"
            ].join( "\n" ) );

        }
@ -472,6 +472,24 @@ function generateJS( ast, session, options ) {
            "          ip++;",
            "          break;",
            "",
+            "        case " + op.PLUCK + ":",               // PLUCK n, k, p1, ..., pK
+            "          paramsLength = bc[ip + 2];",
+            "          paramsN = 3 + paramsLength",
+            "",
+            "          params = bc.slice(ip + 3, ip + paramsN);",
+            "          params = paramsLength === 1",
+            "            ? stack[stack.length - 1 - params[ 0 ]]",
+            "            : params.map(function(p) { return stack[stack.length - 1 - p]; });",
+            "",
+            "          stack.splice(",
+            "            stack.length - bc[ip + 1],",
+            "            bc[ip + 1],",
+            "            params",
+            "          );",
+            "",
+            "          ip += paramsN;",
+            "          break;",
+            "",
            "        case " + op.IF + ":",                 // IF t, f
            indent10( generateCondition( "stack[stack.length - 1]", 0 ) ),
            "",
@ -825,6 +843,22 @@ function generateJS( ast, session, options ) {
                        ip++;
                        break;

+                    case op.PLUCK:               // PLUCK n, k, p1, ..., pK
+                        const baseLength = 3;
+                        const paramsLength = bc[ ip + baseLength - 1 ];
+                        const n = baseLength + paramsLength;
+                        value = bc.slice( ip + baseLength, ip + n );
+                        value = paramsLength === 1
+                            ? stack.index( value[ 0 ] )
+                            : `[ ${
+                                value.map( p => stack.index( p ) )
+                                    .join( ", " )
+                            } ]`;
+                        stack.pop( bc[ ip + 1 ] );
+                        parts.push( stack.push( value ) );
+                        ip += n;
+                        break;
+
                    case op.IF:                 // IF t, f
                        compileCondition( stack.top(), 0 );
                        break;
--- a/packages/pegjs/lib/compiler/passes/report-duplicate-labels.js
+++ b/packages/pegjs/lib/compiler/passes/report-duplicate-labels.js
@ -37,7 +37,7 @@ function reportDuplicateLabels( ast, session ) {

            const label = node.label;

-            if ( __hasOwnProperty.call( env, label ) ) {
+            if ( label && __hasOwnProperty.call( env, label ) ) {

                const start = env[ label ].start;

@ -49,7 +49,8 @@ function reportDuplicateLabels( ast, session ) {
            }

            check( node.expression, env );
-            env[ label ] = node.location;
+
+            if ( label ) env[ label ] = node.location;

        },

--- a/packages/pegjs/lib/compiler/passes/report-incorrect-plucking.js
+++ b/packages/pegjs/lib/compiler/passes/report-incorrect-plucking.js
@ -0,0 +1,53 @@
+"use strict";
+
+//
+// Check if the given element's expression is of type `semantic_*`
+//
+function isSemanticPredicate( element ) {
+
+    const type = element.expression.type;
+
+    if ( type === "semantic_and" ) return true;
+    if ( type === "semantic_not" ) return true;
+
+    return false;
+
+}
+
+//
+// Compiler pass to ensure the following are enforced:
+//
+//   - plucking can not be done with an action block
+//   - cannot pluck a semantic predicate
+//
+function reportIncorrectPlucking( ast, session ) {
+
+    session.buildVisitor( {
+
+        action( node ) {
+
+            this.visit( node.expression, true );
+
+        },
+
+        labeled( node, action ) {
+
+            if ( node.pick !== true ) return void 0;
+
+            if ( action === true )
+
+                session.error( `"@" cannot be used with an action block.`, node.location );
+
+            if ( isSemanticPredicate( node ) )
+
+                session.error( `"@" cannot be used on a semantic predicate.`, node.location );
+
+            this.visit( node.expression );
+
+        },
+
+    } )( ast );
+
+}
+
+module.exports = reportIncorrectPlucking;
--- a/packages/pegjs/lib/parser.js
+++ b/packages/pegjs/lib/parser.js
--- a/src/parser.pegjs
+++ b/src/parser.pegjs
@ -133,15 +133,34 @@ ActionExpression

 SequenceExpression
  = head:LabeledExpression tail:(__ LabeledExpression)* {
-      return tail.length > 0
-        ? createNode( "sequence", {
-            elements: buildList(head, tail, 1),
-          } )
-        : head;
+      if ( tail.length < 1 )
+
+        return head.type === "labeled" && head.pick
+          ? createNode( "sequence", { elements: [ head ] } )
+          : head;
+
+      return createNode( "sequence", {
+
+        elements: buildList( head, tail, 1 ),
+
+      } );
    }

 LabeledExpression
-  = label:IdentifierName __ ":" __ expression:PrefixedExpression {
+  = "@" label:(IdentifierName __ ":")? __ expression:PrefixedExpression {
+      const [ name, location ] = extractOptional(label, 0) || [];
+
+      if (name && RESERVED_WORDS.indexOf(name) >= 0) {
+        error(`Label can't be a reserved word "${name}".`, location);
+      }
+
+      return createNode( "labeled", {
+        pick: true,
+        label: name,
+        expression: expression,
+      } );
+    }
+  / label:IdentifierName __ ":" __ expression:PrefixedExpression {
      if (RESERVED_WORDS.indexOf(label[0]) >= 0) {
        error(`Label can't be a reserved word "${label[0]}".`, label[1]);
      }
--- a/test/spec/behavior/generated-parser-behavior.spec.js
+++ b/test/spec/behavior/generated-parser-behavior.spec.js
@ -10,20 +10,6 @@ describe( "generated parser behavior", function () {

    function varyOptimizationOptions( block ) {

-        function clone( object ) {
-
-            const result = {};
-
-            Object.keys( object ).forEach( key => {
-
-                result[ key ] = object[ key ];
-
-            } );
-
-            return result;
-
-        }
-
        const optionsVariants = [
            { cache: false, optimize: "speed", trace: false },
            { cache: false, optimize: "speed", trace: true },
@ -41,7 +27,7 @@ describe( "generated parser behavior", function () {
                "with options " + chai.util.inspect( variant ),
                function () {

-                    block( clone( variant ) );
+                    block( peg.util.clone( variant ) );

                }
            );
@ -1332,13 +1318,117 @@ describe( "generated parser behavior", function () {

            describe( "when all expressions match", function () {

-                it( "returns an array of their match results", function () {
+                function parser( description, edgecases ) {

-                    const parser = peg.generate( "start = 'a' 'b' 'c'", options );
+                    it( description, () => {

-                    expect( parser ).to.parse( "abc", [ "a", "b", "c" ] );
+                        edgecases.forEach( ( { grammar, input, output } ) => {

-                } );
+                            const parser = peg.generate( grammar, options );
+                            expect( parser ).to.parse( input, output );
+
+                        } );
+
+                    } );
+
+                }
+
+                parser( "returns an array of their match results", [
+                    {
+                        grammar: "start = 'a' 'b' 'c'",
+                        input: "abc",
+                        output: [ "a", "b", "c" ]
+                    },
+                ] );
+
+                parser( "plucking a single value", [
+                    {
+                        grammar: "start = @'a'",
+                        input: "a",
+                        output: "a"
+                    },
+                    {
+                        grammar: "start = @'a' / @'b'",
+                        input: "a",
+                        output: "a"
+                    },
+                    {
+                        grammar: "start = @'a' / @'b'",
+                        input: "b",
+                        output: "b"
+                    },
+                    {
+                        grammar: "start = 'a' @'b' 'c'",
+                        input: "abc",
+                        output: "b"
+                    },
+                    {
+                        grammar: "start = 'a' ( @'b' 'c' )",
+                        input: "abc",
+                        output: [ "a", "b" ]
+                    },
+                    {
+                        grammar: "start = 'a' @( 'b' @'c' 'd' )",
+                        input: "abcd",
+                        output: "c"
+                    },
+                    {
+                        grammar: "start = 'a' ( @'b' 'c' ) @'d'",
+                        input: "abcd",
+                        output: "d"
+                    },
+                    {
+                        grammar: "start = 'a' @'b' 'c' / 'd' 'e' @'f'",
+                        input: "def",
+                        output: "f"
+                    },
+                ] );
+
+                parser( "plucking multiple values", [
+                    {
+                        grammar: "start = 'a' @'b' @'c'",
+                        input: "abc",
+                        output: [ "b", "c" ]
+                    },
+                    {
+                        grammar: "start = 'a' ( @'b' @'c' )",
+                        input: "abc",
+                        output: [ "a", [ "b", "c" ] ]
+                    },
+                    {
+                        grammar: "start = 'a' @( 'b' @'c' @'d' )",
+                        input: "abcd",
+                        output: [ "c", "d" ]
+                    },
+                    {
+                        grammar: "start = 'a' @( @'b' 'c' ) @'d' 'e'",
+                        input: "abcde",
+                        output: [ "b", "d" ]
+                    },
+                    {
+                        grammar: "start = 'a' @'b' 'c' / @'d' 'e' @'f'",
+                        input: "def",
+                        output: [ "d", "f" ]
+                    },
+                ] );
+
+                parser( "plucking a value if a predicate doesnt fail", [
+                    {
+                        grammar: "start = @'a' &{ return true; }",
+                        input: "a",
+                        output: "a"
+                    },
+                    {
+                        grammar: "start = @'a' !{ return false; }",
+                        input: "a",
+                        output: "a"
+                    },
+                    {
+                        grammar: "start = @n:[0-9] &{ return n > 0; }",
+                        input: "2",
+                        output: "2"
+                    },
+                ] );

            } );

--- a/test/spec/unit/compiler/passes/generate-bytecode.spec.js
+++ b/test/spec/unit/compiler/passes/generate-bytecode.spec.js
@ -342,6 +342,50 @@ describe( "compiler pass |generateBytecode|", function () {

        } );

+        it( "generates correct plucking bytecode", function () {
+
+            expect( pass ).to.changeAST( "start = 'a' @'b' 'c'", bytecodeDetails( [
+                5,                            // PUSH_CURR_POS
+                23, 0, 18, 0, 2, 1, 22, 0, 3, // <elements[0]>
+                15, 36, 3,                    // IF_NOT_ERROR
+                23, 1, 18, 1, 2, 1, 22, 1, 3, //   * <elements[1]>
+                15, 20, 4,                    //     IF_NOT_ERROR
+                23, 2, 18, 2, 2, 1, 22, 2, 3, //       * <elements[2]>
+                15, 4, 4,                     //         IF_NOT_ERROR
+                41, 4, 1, 1,                  //           * PLUCK
+                8, 3,                         //           * POP_N
+                7,                            //             POP_CURR_POS
+                3,                            //             PUSH_FAILED
+                8, 2,                         //       * POP_N
+                7,                            //         POP_CURR_POS
+                3,                            //         PUSH_FAILED
+                6,                            //   * POP
+                7,                            //     POP_CURR_POS
+                3,                            //     PUSH_FAILED
+            ] ) );
+
+            expect( pass ).to.changeAST( "start = 'a' @'b' @'c'", bytecodeDetails( [
+                5,                            // PUSH_CURR_POS
+                23, 0, 18, 0, 2, 1, 22, 0, 3, // <elements[0]>
+                15, 37, 3,                    // IF_NOT_ERROR
+                23, 1, 18, 1, 2, 1, 22, 1, 3, //   * <elements[1]>
+                15, 21, 4,                    //     IF_NOT_ERROR
+                23, 2, 18, 2, 2, 1, 22, 2, 3, //       * <elements[2]>
+                15, 5, 4,                     //         IF_NOT_ERROR
+                41, 4, 2, 1, 0,               //           * PLUCK
+                8, 3,                         //           * POP_N
+                7,                            //             POP_CURR_POS
+                3,                            //             PUSH_FAILED
+                8, 2,                         //       * POP_N
+                7,                            //         POP_CURR_POS
+                3,                            //         PUSH_FAILED
+                6,                            //   * POP
+                7,                            //     POP_CURR_POS
+                3,                            //     PUSH_FAILED
+            ] ) );
+
+        } );
+
    } );

    describe( "for labeled", function () {
--- a/test/spec/unit/compiler/passes/helpers.js
+++ b/test/spec/unit/compiler/passes/helpers.js
@ -72,6 +72,8 @@ module.exports = function ( chai, utils ) {

        if ( ! passed && typeof props !== "undefined" ) {

+            if ( typeof props === "string" ) props = { message: props };
+
            Object.keys( props ).forEach( key => {

                new Assertion( result )
--- a/test/spec/unit/compiler/passes/report-incorrect-plucking.spec.js
+++ b/test/spec/unit/compiler/passes/report-incorrect-plucking.spec.js
@ -0,0 +1,52 @@
+"use strict";
+
+const { expect, use } = require( "chai" );
+const helpers = require( "./helpers" );
+const pass = require( "pegjs" ).compiler.passes.check.reportIncorrectPlucking;
+
+use( helpers );
+
+describe( "compiler pass |reportIncorrectPlucking|", function () {
+
+    function reports( error, edgecases ) {
+
+        it( error.slice( 0, -1 ), () => {
+
+            edgecases.forEach( grammar => expect( pass ).to.reportError( grammar, error ) );
+
+        } );
+
+    }
+
+    reports( `"@" cannot be used with an action block.`, [
+
+        `start1 = 'a' @'b' 'c' { /* empty action block */ }`,
+        `start2 = 'a' @('b' @'c' { /* empty action block */ })`
+
+    ] );
+
+    reports( `"@" cannot be used on a semantic predicate.`, [
+
+        `start1 = 'a' @&{ /* semantic_and */ } 'c'`,
+        `start2 = 'a' @!{ /* semantic_not */ } 'c'`
+
+    ] );
+
+    it( "allows valid plucking", function () {
+
+        expect( pass ).not.to.reportError( `
+
+            start1 =  @'1'               // return '1'
+            start2 =  @'1' / @'2'        // return '1' or '2'
+            start2 =   '1'   @'2' '3'    // return '2'
+            start3 =   '1' @b:'2' '3'    // return '2', label "b" can be used in semantic predicates
+            start4 = a:'1' @b:'2' '3'    // return '2', labels "a" and "b" can be used in semantic predicates
+            start5 =  @'1'   @'2' '3'    // return ['1', '2']
+            start6 =  @'1' @b:'2' '3'    // return ['1', '2'], label "b" can be used in semantic predicates
+            start7 = a:'1'   @'2' &{}    // return '2' if the semantic predicate doesnt fail
+
+        ` );
+
+    } );
+
+} );
--- a/test/spec/unit/parser.spec.js
+++ b/test/spec/unit/parser.spec.js
@ -425,6 +425,49 @@ describe( "PEG.js grammar parser", function () {

    } );

+    // Value Plucking
+    it( "parses `@` (value plucking)", function () {
+
+        function $S( ...elements ) {
+
+            return oneRuleGrammar( {
+                type: "sequence",
+                elements: elements
+            } );
+
+        }
+        function $P( label, expression ) {
+
+            return {
+                type: "labeled",
+                pick: true,
+                label: label || void 0,
+                expression: expression
+            };
+
+        }
+
+        expect( "start = @'abcd'" ).to.parseAs(
+            $S( $P( null, literalAbcd ) )
+        );
+        expect( "start = @a:'abcd'" ).to.parseAs(
+            $S( $P( "a", literalAbcd ) )
+        );
+        expect( "start = 'abcd' @'efgh'" ).to.parseAs(
+            $S( literalAbcd, $P( null, literalEfgh ) )
+        );
+        expect( "start = a:'abcd' @b:'efgh'" ).to.parseAs(
+            $S( labeledAbcd, $P( "b", literalEfgh ) )
+        );
+        expect( "start = @'abcd' b:'efgh'" ).to.parseAs(
+            $S( $P( null, literalAbcd ), labeledEfgh )
+        );
+        expect( "start = a:'abcd' @'efgh' 'ijkl' @d:'mnop'" ).to.parseAs(
+            $S( labeledAbcd, $P( null, literalEfgh ), literalIjkl, $P( "d", literalMnop ) )
+        );
+
+    } );
+
    // Canonical LabeledExpression is "a:'abcd'".
    it( "parses LabeledExpression", function () {

@ -605,7 +648,7 @@ describe( "PEG.js grammar parser", function () {
                trivialGrammar, [ { offset: 7, text: "abc", multiline: false } ], options
            ), options );

-            expect( "start =//\n@\n'abcd'" ).to.failToParse();
+            expect( "start =//\n>\n'abcd'" ).to.failToParse();

        } );