From da57118a43a904f753d44d407994cf0b36358adc Mon Sep 17 00:00:00 2001 From: David Majda Date: Mon, 23 Feb 2015 20:12:24 -0800 Subject: [PATCH] Implement basic support for tracing Parsers can now be generated with support for tracing using the --trace CLI option or a boolean |trace| option to |PEG.buildParser|. This makes them trace their progress, which can be useful for debugging. Parsers generated with tracing support are called "tracing parsers". When a tracing parser executes, by default it traces the rules it enters and exits by writing messages to the console. For example, a parser built from this grammar: start = a / b a = "a" b = "b" will write this to the console when parsing input "b": 1:1 rule.enter start 1:1 rule.enter a 1:1 rule.fail a 1:1 rule.enter b 1:2 rule.match b 1:2 rule.match start You can customize tracing by passing a custom *tracer* to parser's |parse| method using the |tracer| option: parser.parse(input, { trace: tracer }); This will replace the built-in default tracer (which writes to the console) by the tracer you supplied. The tracer must be an object with a |trace| method. This method is called each time a tracing event happens. It takes one argument which is an object describing the tracing event. Currently, three events are supported: * rule.enter -- triggered when a rule is entered * rule.match -- triggered when a rule matches successfully * rule.fail -- triggered when a rule fails to match These events are triggered in nested pairs -- for each rule.enter event there is a matching rule.match or rule.fail event. The event object passed as an argument to |trace| contains these properties: * type -- event type * rule -- name of the rule the event is related to * offset -- parse position at the time of the event * line -- line at the time of the event * column -- column at the time of the event * result -- rule's match result (only for rule.match event) The whole tracing API is somewhat experimental (which is why it isn't documented properly yet) and I expect it will evolve over time as experience is gained. The default tracer is also somewhat bare-bones. I hope that PEG.js user community will develop more sophisticated tracers over time and I'll be able to integrate their best ideas into the default tracer. --- README.md | 4 +- bin/pegjs | 6 + lib/compiler.js | 1 + lib/compiler/passes/generate-javascript.js | 203 +++++++++++++++++++-- spec/api/generated-parser-api.spec.js | 80 ++++++++ spec/api/pegjs-api.spec.js | 41 +++++ spec/api/plugin-api.spec.js | 1 + 7 files changed, 319 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 3d5b964..1f8fc55 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,7 @@ You can tweak the generated parser with several options: `PEG.buildParser` * `--extra-options-file` — file with additional options (in JSON format) to pass to `PEG.buildParser` + * `--trace` — makes the parser trace its progress ### JavaScript API @@ -134,9 +135,10 @@ the input is invalid. The exception will contain `offset`, `line`, `column`, parser.parse("abcd"); // throws an exception You can tweak parser behavior by passing a second parameter with an options -object to the `parse` method. Only one option is currently supported: +object to the `parse` method. The following options are supported: * `startRule` — name of the rule to start parsing from + * `tracer` — tracer to use Parsers can also support their own custom options. diff --git a/bin/pegjs b/bin/pegjs index 4c7eb8f..74c577f 100755 --- a/bin/pegjs +++ b/bin/pegjs @@ -32,6 +32,7 @@ function printHelp() { util.puts(" grammar)"); util.puts(" -o, --optimize select optimization for speed or size"); util.puts(" (default: speed)"); + util.puts(" --trace enable tracing in generated parser"); util.puts(" --plugin use a specified plugin (can be specified"); util.puts(" multiple times)"); util.puts(" --extra-options additional options (in JSON format) to pass"); @@ -112,6 +113,7 @@ var options = { cache: false, output: "source", optimize: "speed", + trace: false, plugins: [] }; @@ -140,6 +142,10 @@ while (args.length > 0 && isOption(args[0])) { .map(trim); break; + case "--trace": + options.trace = true; + break; + case "-o": case "--optimize": nextArg(); diff --git a/lib/compiler.js b/lib/compiler.js index fbae304..84d3b50 100644 --- a/lib/compiler.js +++ b/lib/compiler.js @@ -36,6 +36,7 @@ var compiler = { objects.defaults(options, { allowedStartRules: [ast.rules[0].name], cache: false, + trace: false, optimize: "speed", output: "parser" }); diff --git a/lib/compiler/passes/generate-javascript.js b/lib/compiler/passes/generate-javascript.js index 1cbb1b5..92541e6 100644 --- a/lib/compiler/passes/generate-javascript.js +++ b/lib/compiler/passes/generate-javascript.js @@ -37,25 +37,60 @@ function generateJavascript(ast, options) { } } - function generateRuleHeader(ruleIndexCode) { + function generateRuleHeader(ruleNameCode, ruleIndexCode) { + var parts = []; + + parts.push(''); + + if (options.trace) { + parts.push([ + 'peg$trace({', + ' type: "rule.enter",', + ' rule: ' + ruleNameCode, + '});', + '' + ].join('\n')); + } + if (options.cache) { - return [ - '', + parts.push([ 'var key = peg$currPos * ' + ast.rules.length + ' + ' + ruleIndexCode + ',', ' cached = peg$cache[key];', '', 'if (cached) {', ' peg$currPos = cached.nextPos;', + '', + ].join('\n')); + + if (options.trace) { + parts.push([ + 'if (cached.result !== peg$FAILED) {', + ' peg$trace({', + ' type: "rule.match",', + ' rule: ' + ruleNameCode + ',', + ' result: cached.result', + ' });', + '} else {', + ' peg$trace({', + ' type: "rule.fail",', + ' rule: ' + ruleNameCode, + ' });', + '}', + '' + ].join('\n')); + } + + parts.push([ ' return cached.result;', '}', '' - ].join('\n'); - } else { - return ''; + ].join('\n')); } + + return parts.join('\n'); } - function generateRuleFooter(resultCode) { + function generateRuleFooter(ruleNameCode, resultCode) { var parts = []; if (options.cache) { @@ -65,6 +100,24 @@ function generateJavascript(ast, options) { ].join('\n')); } + if (options.trace) { + parts.push([ + '', + 'if (' + resultCode + ' !== peg$FAILED) {', + ' peg$trace({', + ' type: "rule.match",', + ' rule: ' + ruleNameCode + ',', + ' result: ' + resultCode, + ' });', + '} else {', + ' peg$trace({', + ' type: "rule.fail",', + ' rule: ' + ruleNameCode, + ' });', + '}' + ].join('\n')); + } + parts.push([ '', 'return ' + resultCode + ';' @@ -158,7 +211,7 @@ function generateJavascript(ast, options) { ' params, i;', ].join('\n')); - parts.push(indent2(generateRuleHeader('index'))); + parts.push(indent2(generateRuleHeader('peg$ruleNames[index]', 'index'))); parts.push([ /* @@ -337,7 +390,7 @@ function generateJavascript(ast, options) { ' }' ].join('\n')); - parts.push(indent2(generateRuleFooter('stack[0]'))); + parts.push(indent2(generateRuleFooter('peg$ruleNames[index]', 'stack[0]'))); parts.push('}'); return parts.join('\n'); @@ -657,9 +710,15 @@ function generateJavascript(ast, options) { ' var ' + arrays.map(arrays.range(0, stack.maxSp + 1), s).join(', ') + ';', ].join('\n')); - parts.push(indent2(generateRuleHeader(asts.indexOfRule(ast, rule.name)))); + parts.push(indent2(generateRuleHeader( + '"' + js.stringEscape(rule.name) + '"', + asts.indexOfRule(ast, rule.name) + ))); parts.push(indent2(code)); - parts.push(indent2(generateRuleFooter(s(0)))); + parts.push(indent2(generateRuleFooter( + '"' + js.stringEscape(rule.name) + '"', + s(0) + ))); parts.push('}'); @@ -668,7 +727,8 @@ function generateJavascript(ast, options) { var parts = [], startRuleIndices, startRuleIndex, - startRuleFunctions, startRuleFunction; + startRuleFunctions, startRuleFunction, + ruleNames; parts.push([ '(function() {', @@ -696,7 +756,65 @@ function generateJavascript(ast, options) { ' }', '', ' peg$subclass(peg$SyntaxError, Error);', - '', + '' + ].join('\n')); + + if (options.trace) { + parts.push([ + ' function peg$DefaultTracer() {', + ' this.indentLevel = 0;', + ' }', + '', + ' peg$DefaultTracer.prototype.trace = function(event) {', + ' var that = this;', + '', + ' function log(event) {', + ' function repeat(string, n) {', + ' var result = "", i;', + '', + ' for (i = 0; i < n; i++) {', + ' result += string;', + ' }', + '', + ' return result;', + ' }', + '', + ' function pad(string, length) {', + ' return string + repeat(" ", length - string.length);', + ' }', + '', + ' console.log(', + ' event.line + ":" + event.column + " "', + ' + pad(event.type, 10) + " "', + ' + repeat(" ", that.indentLevel) + event.rule', + ' );', + ' }', + '', + ' switch (event.type) {', + ' case "rule.enter":', + ' log(event);', + ' this.indentLevel++;', + ' break;', + '', + ' case "rule.match":', + ' this.indentLevel--;', + ' log(event);', + ' break;', + '', + ' case "rule.fail":', + ' this.indentLevel--;', + ' log(event);', + ' break;', + '', + ' default:', + ' throw new Error("Invalid event type: " + event.type + ".");', + ' }', + ' };', + '' + ].join('\n')); + } + + parts.push([ ' function peg$parse(input) {', ' var options = arguments.length > 1 ? arguments[1] : {},', ' parser = this,', @@ -750,7 +868,31 @@ function generateJavascript(ast, options) { ].join('\n')); if (options.cache) { - parts.push(' peg$cache = {},'); + parts.push([ + ' peg$cache = {},', + '' + ].join('\n')); + } + + if (options.trace) { + if (options.optimize === "size") { + ruleNames = '[' + + arrays.map( + ast.rules, + function(r) { return '"' + js.stringEscape(r.name) + '"'; } + ).join(', ') + + ']'; + + parts.push([ + ' peg$ruleNames = ' + ruleNames + ',', + '' + ].join('\n')); + } + + parts.push([ + ' peg$tracer = "tracer" in options ? options.tracer : new peg$DefaultTracer(),', + '' + ].join('\n')); } parts.push([ @@ -947,6 +1089,21 @@ function generateJavascript(ast, options) { '' ].join('\n')); + if (options.trace) { + parts.push([ + ' function peg$trace(event) {', + ' var posDetails = peg$computePosDetails(peg$currPos);', + '', + ' event.offset = peg$currPos;', + ' event.line = posDetails.line;', + ' event.column = posDetails.column;', + '', + ' peg$tracer.trace(event);', + ' }', + '', + ].join('\n')); + } + if (options.optimize === "size") { parts.push(indent4(generateInterpreter())); parts.push(''); @@ -982,8 +1139,22 @@ function generateJavascript(ast, options) { ' }', '', ' return {', - ' SyntaxError: peg$SyntaxError,', - ' parse: peg$parse', + ].join('\n')); + + if (options.trace) { + parts.push([ + ' SyntaxError: peg$SyntaxError,', + ' DefaultTracer: peg$DefaultTracer,', + ' parse: peg$parse' + ].join('\n')); + } else { + parts.push([ + ' SyntaxError: peg$SyntaxError,', + ' parse: peg$parse' + ].join('\n')); + } + + parts.push([ ' };', '})()' ].join('\n')); diff --git a/spec/api/generated-parser-api.spec.js b/spec/api/generated-parser-api.spec.js index 724852f..bc3fd70 100644 --- a/spec/api/generated-parser-api.spec.js +++ b/spec/api/generated-parser-api.spec.js @@ -41,6 +41,86 @@ describe("generated parser API", function() { }); }); + describe("tracing", function() { + var parser = PEG.buildParser([ + 'start = a / b', + 'a = "a"', + 'b = "b"' + ].join("\n"), { trace: true }); + + describe("default tracer", function() { + it("traces using console.log", function() { + spyOn(console, "log"); + + parser.parse("b"); + + expect(console.log).toHaveBeenCalledWith("1:1 rule.enter start"); + expect(console.log).toHaveBeenCalledWith("1:1 rule.enter a"); + expect(console.log).toHaveBeenCalledWith("1:1 rule.fail a"); + expect(console.log).toHaveBeenCalledWith("1:1 rule.enter b"); + expect(console.log).toHaveBeenCalledWith("1:2 rule.match b"); + expect(console.log).toHaveBeenCalledWith("1:2 rule.match start"); + }); + }); + + describe("custom tracers", function() { + describe("trace", function() { + it("receives tracing events", function() { + var tracer = { trace: function() { } }; + + spyOn(tracer, "trace"); + + parser.parse("b", { tracer: tracer }); + + expect(tracer.trace).toHaveBeenCalledWith({ + type: 'rule.enter', + rule: 'start', + offset: 0, + line: 1, + column: 1 + }); + expect(tracer.trace).toHaveBeenCalledWith({ + type: 'rule.enter', + rule: 'a', + offset: 0, + line: 1, + column: 1 + }); + expect(tracer.trace).toHaveBeenCalledWith({ + type: 'rule.fail', + rule: 'a', + offset: 0, + line: 1, + column: 1 + }); + expect(tracer.trace).toHaveBeenCalledWith({ + type: 'rule.enter', + rule: 'b', + offset: 0, + line: 1, + column: 1 + }); + expect(tracer.trace).toHaveBeenCalledWith({ + type: 'rule.match', + rule: 'b', + result: 'b', + offset: 1, + line: 1, + column: 2 + }); + expect(tracer.trace).toHaveBeenCalledWith({ + type: 'rule.match', + rule: 'start', + result: 'b', + offset: 1, + line: 1, + column: 2 + }); + }); + }); + }); + }); + it("accepts custom options", function() { var parser = PEG.buildParser('start = "a"'); diff --git a/spec/api/pegjs-api.spec.js b/spec/api/pegjs-api.spec.js index 0046410..2bcc21d 100644 --- a/spec/api/pegjs-api.spec.js +++ b/spec/api/pegjs-api.spec.js @@ -122,6 +122,47 @@ describe("PEG.js API", function() { }); }); + describe("tracing", function() { + var grammar = 'start = "a"'; + + describe("when |trace| is not set", function() { + it("generated parser doesn't trace", function() { + var parser = PEG.buildParser(grammar); + + spyOn(console, "log"); + + parser.parse("a"); + + expect(console.log).not.toHaveBeenCalled(); + }); + }); + + describe("when |trace| is set to |false|", function() { + it("generated parser doesn't trace", function() { + var parser = PEG.buildParser(grammar, { trace: false }); + + spyOn(console, "log"); + + parser.parse("a"); + + expect(console.log).not.toHaveBeenCalled(); + }); + }); + + describe("when |trace| is set to |true|", function() { + it("generated parser traces", function() { + var parser = PEG.buildParser(grammar, { trace: true }); + + spyOn(console, "log"); + + parser.parse("a"); + + expect(console.log).toHaveBeenCalledWith("1:1 rule.enter start"); + expect(console.log).toHaveBeenCalledWith("1:2 rule.match start"); + }); + }); + }); + /* * The |optimize| option isn't tested because there is no meaningful way to * write the specs without turning this into a performance test. diff --git a/spec/api/plugin-api.spec.js b/spec/api/plugin-api.spec.js index a37d89d..25e7171 100644 --- a/spec/api/plugin-api.spec.js +++ b/spec/api/plugin-api.spec.js @@ -102,6 +102,7 @@ describe("plugin API", function() { ' rules: [', ' {', ' type: "rule",', + ' name: "start",', ' expression: { type: "literal", value: text(), ignoreCase: false }', ' }', ' ]',