From f4c186d7bd303cb6af17bf3b4d9831e424b69ddf Mon Sep 17 00:00:00 2001 From: Sven Slootweg Date: Thu, 10 Nov 2022 16:11:03 +0100 Subject: [PATCH] WIP --- poc-generators-2/core-ops/one-or-more.js | 3 +- poc-generators-2/core-ops/zero-or-more.js | 3 +- poc-generators-2/index.js | 62 +++++++++++++++++------ poc-generators-2/test-grammar.js | 24 ++++----- poc-generators-2/test-hls.js | 4 +- poc-generators-2/yieldcore/index.js | 59 +++++++-------------- 6 files changed, 84 insertions(+), 71 deletions(-) diff --git a/poc-generators-2/core-ops/one-or-more.js b/poc-generators-2/core-ops/one-or-more.js index 77c7dea..69a1d93 100644 --- a/poc-generators-2/core-ops/one-or-more.js +++ b/poc-generators-2/core-ops/one-or-more.js @@ -7,7 +7,8 @@ module.exports = function* oneOrMore(instruction, state, context) { let matches = yield coreOps.internalCall.zeroOrMore({ rule: instruction.rule }, state, context); // FIXME: NotEnoughInput propagation necessary here? - if (matches.length > 0) { + // FIXME: Do we need this array check? + if (Array.isArray(matches) && matches.length > 0) { return matches; } else { return NoMatch; diff --git a/poc-generators-2/core-ops/zero-or-more.js b/poc-generators-2/core-ops/zero-or-more.js index 8c6b0e5..6b201e2 100644 --- a/poc-generators-2/core-ops/zero-or-more.js +++ b/poc-generators-2/core-ops/zero-or-more.js @@ -9,8 +9,7 @@ module.exports = function* zeroOrMore(instruction, state, context) { let matches = []; while (true) { - let reachedEnd = yield coreOps.internalCall.endOfInput({ rule: rule }, state, context); - // yieldcore.Internal(endOfInput(instruction, state, context), "endOfInput"); + let reachedEnd = yield coreOps.internalCall.endOfInput({}, state, context); if (reachedEnd === true) { break; diff --git a/poc-generators-2/index.js b/poc-generators-2/index.js index f7a96db..a61a2a9 100644 --- a/poc-generators-2/index.js +++ b/poc-generators-2/index.js @@ -55,9 +55,7 @@ parser namespace property on parsing functions // FIXME: Consider whether NotEnoughInput can be handled on a core level rather than in individual core operations, since it seems to always need to be propagated? // FIXME: Need a way to mark end of input, to avoid the case where a trailing optional yields a NotEnoughInput even though it *should* have parsed the input end as the actual end, and concluded that the optional is not present. -const isGeneratorFunction = require("is-generator-function"); -const isRegex = require("is-regex"); -const asExpression = require("as-expression"); +const PromiseTry = require("es6-promise-try"); const matchValue = require("match-value"); const util = require("util"); const yieldcore = require("./yieldcore"); @@ -85,19 +83,35 @@ function isInternalFrame(frame) { } } +function singleLineInspect(value, options = {}) { + return util.inspect(value, { colors: true, compact: true, breakLength: Infinity, ... options }); +} + function getStackSize(stack) { return stack.filter((frame) => !isInternalFrame(frame)).length; } function formatFrameInstruction(frame) { if (frame.instruction === "internal") { - let formattedRule = util.inspect(frame.name, { colors: true, compact: true, breakLength: Infinity }); - return chalk.blue(`[internal: ${formattedRule}]`); + if (frame.name?.__protocolKitInstruction === true) { + let { __protocolKitInstruction, ... conciseRule } = frame.name; + return chalk.blue(singleLineInspect(conciseRule)); + } else { + return chalk.gray(`[internal: ${frame.name}]`); + } } else { return util.inspect(frame.instruction, { colors: true, compact: true, breakLength: Infinity }); } } +function formatFrameReturnValue(returnValue) { + if (returnValue === NoMatch) { + return chalk.redBright(singleLineInspect(returnValue, { colors: false })); + } else { + return singleLineInspect(returnValue); + } +} + module.exports = { NoMatch: NoMatch, parse: function parse(input, rootParser) { @@ -145,10 +159,25 @@ module.exports = { // Parser yielded let result = yield* applyRule(instruction, frame, stack); + // console.log("previousFrame", (result === NoMatch), stack.at(-2)); + + // MARKER: Loading more input asynchronously + if (result === NotEnoughInput && state.isFullyLoaded === true) { + result = NoMatch; + } + if (result === NotEnoughInput) { + // TODO: Does this correctly come out of the parsing Promise? throw new Error(`Ran out of input`); - } else if (result === NoMatch) { - throw new Error(`No match`); + } else if (result === NoMatch && stack.length > 1 && stack.at(-2).instruction !== "internal") { + // We can never return a NoMatch to a user-supplied generator! Instead, we forcibly NoMatch-fail every user-supplied generator up until the next internal instruction, to essentially emulate a `throw` that gets caught in internal instructions. + for (let i = stack.length - 2; i >= 0; i--) { + if (stack[i].instruction === "internal") { + break; + } else { + stack[i].forceValue = NoMatch; + } + } } else { return result; } @@ -161,26 +190,29 @@ module.exports = { let frame = stack.at(-1); if (process.env.DEBUG_PARSER && !isInternalFrame(frame)) { - console.log(`!! (${formatIndex()})` + " ".repeat(getStackSize(stack)) + util.inspect(returnValue, { colors: true, compact: true, breakLength: Infinity })); + console.log(`!! (${formatIndex()})` + chalk.gray("│ ".repeat(getStackSize(stack))) + formatFrameReturnValue(returnValue)); } }, onAfterStackIncrease: function (stack) { let frame = stack.at(-1); if (process.env.DEBUG_PARSER && !isInternalFrame(frame)) { - console.log(`>> (${formatIndex()})` + " ".repeat(getStackSize(stack)) + formatFrameInstruction(frame)); + console.log(`>> (${formatIndex()})` + chalk.gray("│ ".repeat(getStackSize(stack))) + formatFrameInstruction(frame)); } } }); - return core.resume(); + return PromiseTry(() => { + return core.resume(); + }).then((result) => { + if (result === NoMatch) { + throw new Error(`No match`); + } else { + return result; + } + }); - - - - // let rootResult = applyRule(rootParser); - // // FIXME: Detect when rules run out but end of input has not yet been reached, as this is an error (unless specified otherwise - need to figure out how to let grammar authors configure this maybe, for formats that allow trailing data, but that still need to be embeddable? or maybe that doesn't matter because when it's embedded, by definition the sub-parser will never be the root parser, and therefore there are always more higher-level rules left? maybe it's sufficient to just let the top-level parse call determine whether this is valid or not) // if (currentIndex < input.length) { // console.log("incomplete result:", rootResult); diff --git a/poc-generators-2/test-grammar.js b/poc-generators-2/test-grammar.js index 9920a61..1f02435 100644 --- a/poc-generators-2/test-grammar.js +++ b/poc-generators-2/test-grammar.js @@ -4,37 +4,37 @@ const { parse } = require("./index"); const { wholeMatch, either, peek, oneOrMore } = require("./operations"); function* A() { - return yield "hello"; + yield "hello"; } function* Whitespace() { - return yield " "; // FIXME + yield " "; // FIXME } function* B1() { - yield peek(oneOrMore("world")); + let worlds = oneOrMore("world"); + + // yield peek(worlds); // yield peek("world"); - return yield "world"; + yield worlds; } function* B2() { - return yield "earth"; + yield "earth"; } function* B3() { - let result = yield /(moon|jupiter)/; - - return result.$positional[0]; + yield either([ "moon", "jupiter" ]); } module.exports = function* TestParser() { return yield wholeMatch(function* innerWholeMatch () { yield A; yield Whitespace; - return yield either([ B1, B2, B3 ]); + yield either([ B1, B2, B3 ]); }); }; -parse(process.argv[2], module.exports).then((result) => { - console.log(result); -}); +// parse(process.argv[2], module.exports).then((result) => { +// console.log(result); +// }); diff --git a/poc-generators-2/test-hls.js b/poc-generators-2/test-hls.js index f984640..095ea85 100644 --- a/poc-generators-2/test-hls.js +++ b/poc-generators-2/test-hls.js @@ -9,4 +9,6 @@ const { parse } = require("./index"); let file = process.argv[2]; assert(file != null); -console.dir(parse(fs.readFileSync(file, "utf8"), Playlist), { depth: null }); \ No newline at end of file +parse(fs.readFileSync(file, "utf8"), Playlist).then((result) => { + console.dir(result, { depth: null }); +}); diff --git a/poc-generators-2/yieldcore/index.js b/poc-generators-2/yieldcore/index.js index 3157006..a1e9727 100644 --- a/poc-generators-2/yieldcore/index.js +++ b/poc-generators-2/yieldcore/index.js @@ -1,46 +1,19 @@ "use strict"; -const PromiseTry = require("es6-promise-try"); const isGenerator = require("is-generator-function"); const lastItem = require("../util/last-item"); const Pause = Symbol("Pause"); const isPromise = require("./is-promise"); -async function asyncGeneratorContext(generatorFunction, customHook) { - let generator = generatorFunction(); - let lastResult = { done: false }; - let nextValue; - - while (lastResult.done === false) { - lastResult = generator.next(nextValue); - - if (typeof lastResult.value?.then === "function") { - nextValue = await lastResult.value; - } else { - nextValue = customHook(lastResult.value); - } - } - - return nextValue; -} - module.exports = { Pause: Pause, - Instruction: (instruction) => { - // FIXME: Unused? - return { - __yieldcoreInstruction: true, - instruction: instruction - }; - }, Internal: (generator, name) => { return { __yieldcoreInternal: true, generator: generator, name: name }; - // FIXME: propagate NotEnoughInput, handle in custom handler as a retry }, create: function createYieldCore(rootGenerator, { onYieldInstruction, onReturn, onBeforeStackDecrease, onAfterStackIncrease }) { let running = false; @@ -56,13 +29,11 @@ module.exports = { stack.pop(); currentFrame = lastItem(stack); - // console.log("popping stack, new frame:", currentFrame); } function increaseStack(frame) { stack.push(frame); currentFrame = lastItem(stack); - // console.log("increasing stack, new frame:", currentFrame); if (onAfterStackIncrease != null) { onAfterStackIncrease(stack); @@ -75,6 +46,7 @@ module.exports = { generator: instruction(), done: false, value: undefined, + forceValue: undefined, name: undefined }); } @@ -85,15 +57,32 @@ module.exports = { generator: generator, done: false, value: undefined, + forceValue: undefined, name: name // TODO: Rename this to `tag` or something instead? Doesn't have to be a name string }); } + function finalizeCurrentFrame(returnValue) { + currentFrame.done = true; + currentFrame.value = returnValue; + + let lastFrame = currentFrame; + decreaseStack(returnValue); + + if (onReturn != null && lastFrame.instruction !== "internal") { + insertInternalInstruction(onReturn(returnValue, lastFrame, stack), "_onReturn"); + } else { + return returnValue; + } + } + async function startLoop() { while (finished === false && running === true) { if (currentFrame.done === true) { // This is a previously completed frame; it doesn't need any further processing decreaseStack(currentFrame.value); + } else if (currentFrame.forceValue !== undefined) { + lastValue = finalizeCurrentFrame(currentFrame.forceValue); } else { // FIXME: Catch let result = currentFrame.generator.next(lastValue); @@ -101,17 +90,7 @@ module.exports = { if (result.done === true) { // value == return value - currentFrame.done = true; - currentFrame.value = result.value; - - let lastFrame = currentFrame; - decreaseStack(result.value); - - if (onReturn != null && lastFrame.instruction !== "internal") { - insertInternalInstruction(onReturn(result.value, lastFrame, stack), "_onReturn"); - } else { - action = result.value; - } + action = finalizeCurrentFrame(result.value); } else if (isGenerator(result.value)) { // TODO: Figure out better semantics for onYieldGenerator? // NOTE: Currently this hook *cannot* be a generator!