From 1da776edbf0cc95b2235ad023a95a348714763ac Mon Sep 17 00:00:00 2001 From: Sven Slootweg Date: Fri, 2 Sep 2022 14:46:43 +0200 Subject: [PATCH] Initial commit, WIP --- .gitignore | 1 + notes.txt | 30 +++ package.json | 14 ++ packages/imap/index.js | 2 + packages/iso9660/index.js | 1 + poc-generators-2/hls.js | 59 +++++ poc-generators-2/index.js | 387 +++++++++++++++++++++++++++++++ poc-generators-2/maths.js | 99 ++++++++ poc-generators-2/operations.js | 73 ++++++ poc-generators-2/sample-hls.m3u | 9 + poc-generators-2/simple.js | 2 + poc-generators-2/test-grammar.js | 37 +++ poc-generators-2/test-hls.js | 12 + poc-generators.js | 56 +++++ test-gen.js | 16 ++ yarn.lock | 69 ++++++ 16 files changed, 867 insertions(+) create mode 100644 .gitignore create mode 100644 notes.txt create mode 100644 package.json create mode 100644 packages/imap/index.js create mode 100644 packages/iso9660/index.js create mode 100644 poc-generators-2/hls.js create mode 100644 poc-generators-2/index.js create mode 100644 poc-generators-2/maths.js create mode 100644 poc-generators-2/operations.js create mode 100644 poc-generators-2/sample-hls.m3u create mode 100644 poc-generators-2/simple.js create mode 100644 poc-generators-2/test-grammar.js create mode 100644 poc-generators-2/test-hls.js create mode 100644 poc-generators.js create mode 100644 test-gen.js create mode 100644 yarn.lock diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b512c09 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +node_modules \ No newline at end of file diff --git a/notes.txt b/notes.txt new file mode 100644 index 0000000..41d94f9 --- /dev/null +++ b/notes.txt @@ -0,0 +1,30 @@ +Compatibility matrix: +- Input type +- Streaming output +- Streaming item reads + +Input type Streaming output Streaming item reads supported? +=============== ================ =============================== +Stream Yes Yes, blocking +Stream No No +Seekable Yes Yes +Seekable No Yes +Value Yes Yes +Value No Yes + +Parsing options: +- Force streaming items to be buffered instead +- Allow incomplete parses (for incremental seeking parsing) + +Input types: +- Value (string/buffer) +- Seekable (how to set encoding here?) +- Stream (how to detect string vs. buffer here?) + +Thoughts: +- Maybe there should be a parseString vs. parseBuffer method to declare the *intended* parsing mode? +- Need to support *some* form of mixed-mode parsing, eg. for HTTP-encapsulated binary data +- FIXME/MARKER: regex parsing cannot work in either stremaing *or* mixed-mode parsing, because with the language implementation of regex it is unknowable whether it failed due to a mismatch or due to running out of Input + - Provide a custom character range operation instead + - ensure that this correctly handles the astral plane, since those characters are two codepoints in JS (with UCS-2) rather than one (as in UTF-8) + - also need to make sure to get the *codepoint* of characters for determining the ranges, not the first/single byte value (which I think charcodeAt does?) \ No newline at end of file diff --git a/package.json b/package.json new file mode 100644 index 0000000..6b4cdbf --- /dev/null +++ b/package.json @@ -0,0 +1,14 @@ +{ + "name": "protocolkit", + "version": "1.0.0", + "main": "index.js", + "repository": "git@git.cryto.net:joepie91/protocolkit.git", + "author": "Sven Slootweg ", + "license": "MIT", + "dependencies": { + "as-expression": "^1.0.0", + "is-generator-function": "^1.0.10", + "is-regex": "^1.1.4", + "match-value": "^1.1.0" + } +} diff --git a/packages/imap/index.js b/packages/imap/index.js new file mode 100644 index 0000000..23e72fd --- /dev/null +++ b/packages/imap/index.js @@ -0,0 +1,2 @@ +"use strict"; + diff --git a/packages/iso9660/index.js b/packages/iso9660/index.js new file mode 100644 index 0000000..3918c74 --- /dev/null +++ b/packages/iso9660/index.js @@ -0,0 +1 @@ +"use strict"; diff --git a/poc-generators-2/hls.js b/poc-generators-2/hls.js new file mode 100644 index 0000000..b7384d1 --- /dev/null +++ b/poc-generators-2/hls.js @@ -0,0 +1,59 @@ +"use strict"; + +const { either, wholeMatch, optional, oneOrMore, until, EndOfInput } = require("./operations"); + +function* Newline() { + yield "\n"; +} + +function* Digits() { + return yield /[0-9]+/; +} + +function* Integer() { + return parseInt(yield wholeMatch(Digits)); +} + +function* Decimal() { + // NOTE: this gets converted to a floating point value! + let decimalString = yield wholeMatch(function* () { + yield Digits; + + yield optional(function* () { + yield "."; + yield Digits; + }); + }); + + return parseFloat(decimalString); +} + +function* Playlist() { + return yield either([ MediaPlaylist ]); +} + +function* MediaPlaylist() { + yield "#EXTM3U"; + yield Newline; + + yield "#EXT-X-TARGETDURATION:"; + let targetDuration = yield Integer; + yield Newline; + yield Newline; + + let items = yield oneOrMore(function* () { + yield "#EXTINF:"; + let duration = yield Decimal; + yield ","; + yield Newline; + + let url = yield until(Newline); + yield either([ EndOfInput, Newline ]); + + return { url, duration }; + }); + + return { targetDuration, items }; +} + +module.exports = { Playlist }; \ No newline at end of file diff --git a/poc-generators-2/index.js b/poc-generators-2/index.js new file mode 100644 index 0000000..31137ae --- /dev/null +++ b/poc-generators-2/index.js @@ -0,0 +1,387 @@ +"use strict"; + +// function Literal(sequence) { +// // sequence = string or bytes +// } + +// function Bytes(count) { + +// } + +// function ByteRange(rangeStart, rangeEnd) { + +// } + +// function Characters(count) { + +// } + +// function CharacterRange(rangeStart, rangeEnd) { + +// } + +// function Has(parser) { +// // does not consume +// let hasMatch = yield peek(parser); +// return hasMatch; +// } + +// function Until(parser) { +// // does not consume +// return function Until(input, index, _speculativeContext) { +// let indicesRead = 0; + +// while (true) { +// let has = yield Has(parser); +// indicesRead += 1; +// } +// } +// } + +// function UntilNot(parser) { +// // does not consume +// } + +/* +loop through rules +manage context +trackPosition wrapper (document order relative to context-creating operations!) - positions are automatically emitted in streaming mode +parser namespace property on parsing functions +*/ + + +// TODO: What if a parsing rule is dependent on some context that's parsed somewhere else in the input entirely? Isn't this fundamentally incompatible with the streaming parser paradigm? +// NOTE: Use sentinel objects to denote failure, to prevent throw/catch overhead, which can get problematic especially with heavy peek/test usage +// FIXME: Consider whether NotEnoughInput can be handled on a core level rather than in individual core operations, since it seems to always need to be propagated? +// FIXME: Need a way to mark end of input, to avoid the case where a trailing optional yields a NotEnoughInput even though it *should* have parsed the input end as the actual end, and concluded that the optional is not present. + +const isGeneratorFunction = require("is-generator-function"); +const isRegex = require("is-regex"); +const asExpression = require("as-expression"); +const matchValue = require("match-value"); +const util = require("util"); + +const NoMatch = Symbol("protocolkit:NoMatch"); +const NotEnoughInput = Symbol("protocolkit:NotEnoughInput"); + +// This is a utility function for propagating NoMatches through the stack, without resorting to `throw`/`catch` (which can be slow) +// FIXME: Use a Result type instead? +function assertMatch(testResult, produceResult) { + if (testResult === NoMatch || testResult === NotEnoughInput) { + return testResult; + } else { + return produceResult(testResult); + } +} + +/* TO DO: +- amount of characters +- amount of bytes +- regex +- return match with position metadata + +FEATURES: +- streaming mode for all named matches +- grammar-defined streams for large payloads +*/ + +module.exports = { + NoMatch: NoMatch, + parse: function parse(input, rootParser) { + let currentInput = input; // TODO: Cut this down, switch to chunked API instead + let currentIndex = 0; + let inputLength = input.length; + let parserStack = []; + let inputIsEnded = true; // FIXME: Make this dynamic in streaming mode + + function printIndex() { + return String(currentIndex).padStart(Math.ceil(Math.log10(input.length))); + } + + function applyRule(rule) { + let currentFrame = { + startPosition: currentIndex, + rule: rule + }; + + parserStack.push(currentFrame); + + if (process.env.DEBUG_PARSER) { + // console.log(parserStack); + console.log(`>> (${printIndex()})` + " ".repeat(parserStack.length) + util.inspect(rule, { colors: true, compact: true, breakLength: Infinity })); + } + + // HACK + if (typeof rule === "string") { + rule = { __protocolKitInstruction: true, type: "literal", string: rule }; + } else if (isRegex(rule)) { + rule = { __protocolKitInstruction: true, type: "regex", regex: rule }; + } + + // console.log({rule}); + + let result = asExpression(() => { + if (isGeneratorFunction(rule)) { + let returnValue; // FIXME: Is this correct? + let lastValue; + let done = false; + let generator = rule(); + + while (done === false) { + let subRule = generator.next(lastValue); + // console.log({subRule}); + + if (subRule.done === true) { + returnValue = subRule.value; + done = true; + } else { + lastValue = applyRule(subRule.value); + + if (lastValue === NoMatch || lastValue === NotEnoughInput) { + // Don't bother parsing any further + return lastValue; + } + } + } + + return returnValue; + } else if (typeof rule === "object" && rule.__protocolKitInstruction === true) { + return matchValue(rule.type, { + literal: () => { + let { string } = rule; + + if (currentIndex + string.length > inputLength) { + return NotEnoughInput; + // throw new Error(`End of input reached`); // FIXME: Error type + } else if (input.slice(currentIndex, currentIndex + string.length) === string) { + currentIndex += string.length; + return string; + } else { + return NoMatch; + } + }, + regex: () => { + let { regex } = rule; + + // HACK: This is very much an imperfect approach. We're doing a (potentially large) string copy, and are letting it match at *any* position in the input, potentially wasting resurces if it turns out the match wasn't at index 0. This is unfortunate, but likely still the best option - the internal regex implementation is highly optimized (meaning a written-in-JS implementation is unlikely to beat it in performance), and the built-in implementation doesn't allow anchoring a match separately from the regex definition itself. We *could* transform the regex to have a start anchor, but then this would defeat optimization of repeatedly used regexes - this transformation step would be applied *every time the parsing rule is used*, instead of only once at JS parsing time. Should investigate whether there's any performant way of cacheing this work internally! + // FIXME: Disallow global-flagged regexes? As the internal starting index can throw off our logic + // FIXME: The approach we've chosen here is probably *really* unperformant when combining a regex literal with an `until` combinator! + let match = regex.exec(input.slice(currentIndex)); + + if (match?.index === 0) { + // Valid match, because it starts at the currentIndex + currentIndex += match[0].length; + + // NOTE: We only return the groups, and not the full match, for consistency with the rest of the API - wholeMatch should be used for that (and the performance cost of that additional call should be negligible) + return { + $positional: match.slice(1), + ... match.groups + }; + } else { + return NoMatch; + } + }, + endOfInput: () => { + // FIXME: Make this not order-sensitive in an `either`! Currently the NotEnoughInput marker *might* cause issues if this (zero-width) rule comes after nonzero-width rules? Need to investigate. + if (currentIndex === input.length) { + // FIXME: Make this NotEnoughInput-aware; there is probably a similar "exception from the core handling" problem here as in `until` + return true; + } else { + return NoMatch; + } + }, + wholeMatch: () => { + let result = applyRule(rule.rule); + + return assertMatch(result, () => { + return input.slice(currentFrame.startPosition, currentIndex); + }); + }, + either: () => { + let encounteredNotEnoughInput = false; + + for (let option of rule.options) { + // FIXME: currentFrame.startPosition + let startPosition = currentIndex; + let result = applyRule(option); + + if (result === NoMatch) { + // Restore index and try again with the next option + encounteredNotEnoughInput = encounteredNotEnoughInput || (result === NotEnoughInput); + currentIndex = startPosition; + continue; + } else { + // Don't restore index; the match has been consumed + // FIXME: This includes NotEnoughInput! As it warrants an immediate abort. Handling of NotEnoughInput markers should be moved to a centralized place instead. Also, we should figure out exactly how to retain the current parsing position when one is encountered, and whether eg. individual core operations need to manage cursor resets for this purpose, or whether the core can centrally handle that as well, eg. by retaining the parsing stack. + return result; + } + } + + // None of the options matched + if (encounteredNotEnoughInput) { + // This means that at least one of the options returned a NotEnoughInput; which means that we couldn't actually determine whether that option *would* have matched or not, so the entire Either will be considered to need more input + return NotEnoughInput; + } else { + return NoMatch; + } + }, + peek: () => { + let result = applyRule(rule.rule); + currentIndex = currentFrame.startPosition; + return result; + }, + test: () => { + // FIXME: Test + // TODO: Share implementation with `peek`, maybe compose? + let result = applyRule(rule.rule); + currentIndex = currentFrame.startPosition; + + if (result === NotEnoughInput) { + // Propagate this marker directly, as we will need to re-parse after receiving more input, and we cannot yet decide whether there is a match or not. + return NotEnoughInput; + } else if (result === NoMatch) { + return false; + } else { + return true; + } + }, + zeroOrMore: () => { + let matches = []; + + while (true) { + let result = applyRule(rule.rule); + + if (result === NotEnoughInput) { + // Propagate, reparse later + return NotEnoughInput; + } else if (result === NoMatch) { + break; + } else { + matches.push(result); + } + } + + return matches; + }, + oneOrMore: () => { + // FIXME: Compose on zeroOrMore, but add a length assertion + let matches = applyRule({ __protocolKitInstruction: true, type: "zeroOrMore", rule: rule.rule }); + + if (matches === NotEnoughInput || matches.length > 0) { + return matches; + } else { + return NoMatch; + } + }, + optional: () => { + let result = applyRule(rule.rule); + + if (result === NotEnoughInput) { + return NotEnoughInput; + } else if (result === NoMatch) { + return undefined; // TODO: Or return `null` instead? + } else { + return result; + } + }, + until: () => { + // FIXME: We're probably never actually triggering NotEnoughInput right now, due to how the loop logic works here? + // TODO: Build this on `peek` instead? Is there any actual benefit to that? + for (; currentIndex <= input.length; currentIndex++) { + let result = applyRule(rule.rule); + + // FIXME: Fix the structure here, and figure out a way to deal with allowEnd without needing to special-case NotEnoughInput handling against inputIsEnded, because that should be a core concern only + if (result === NotEnoughInput) { + if (inputIsEnded && rule.allowEnd) { + // Fall through + break; + } else { + return NotEnoughInput; + } + } else if (result === NoMatch) { + continue; + } else { + // Fall through + break; + } + } + + // We've consumed everything *up to* the match, but not the match itself + currentIndex -= 1; + return input.slice(currentFrame.startPosition, currentIndex); + } + // zeroOrMore: () => { + + // }, + // oneOrMore: () => { + + // }, + // either: () => { + // contextStack.push({ + // index: currentIndex + // }); + + // // try each rule, try next on error, until success or final failure + // }, + // optional: () => { + // // Also generates a context + // }, + // peek: () => { + // // TODO: semantic difference between peek and either is that the either context should be thrown away after it fully completes (including nested rules)? + // // TODO: emit items (or not) option + // contextStack.push({ + // index: currentIndex + // }); + + // // run parser as normal, but reset index afterwards -- return boolean true/false or an actual parsed item? maybe a separate test instruction for boolean result? + // }, + // test: () => { + + // }, + // wholeMatch: () => { + + // }, + // trackPosition: () => { + + // } + }); + } else { + // FIXME: Do we need to implement anything else, or is this just a bug in the grammar? + throw new Error(`Unimplemented`); + } + }); + + if (process.env.DEBUG_PARSER) { + console.log(`!! (${printIndex()})` + " ".repeat(parserStack.length) + util.inspect(result, { colors: true, compact: true, breakLength: Infinity })); + } + + parserStack.pop(); + + // HACK: Make this nicer, maybe visually represent this in the parse debug tree as well + if (inputIsEnded && result === NotEnoughInput) { + result = NoMatch; + } + + return result; + } + + let rootResult = applyRule(rootParser); + + // FIXME: Detect when rules run out but end of input has not yet been reached, as this is an error (unless specified otherwise - need to figure out how to let grammar authors configure this maybe, for formats that allow trailing data, but that still need to be embeddable? or maybe that doesn't matter because when it's embedded, by definition the sub-parser will never be the root parser, and therefore there are always more higher-level rules left? maybe it's sufficient to just let the top-level parse call determine whether this is valid or not) + if (currentIndex < input.length) { + console.log("incomplete result:", rootResult); + throw new Error("Ran out of parsing rules before end of input"); + } + + + if (rootResult === NoMatch) { + throw new Error(`No match`); + } else if (rootResult === NotEnoughInput) { + throw new Error("Not enough input"); + } else { + return rootResult; + } + } +}; + + diff --git a/poc-generators-2/maths.js b/poc-generators-2/maths.js new file mode 100644 index 0000000..2c562fa --- /dev/null +++ b/poc-generators-2/maths.js @@ -0,0 +1,99 @@ +"use strict"; + +// This is an abstraction for dealing with precedence in generic backtracking parsers; the way this works is that by recursively preferring higher-precedence operators over lower-precedence ones, it will parse those first and then treat them as expression inputs to lower-precedence parsers. This is also why lower-precedence operator rules "fall through" to higher-precedence ones; it's more or less equivalent to attempting them in the specified order, but with support for nesting. +// TODO: Improve this explanation with a visual reference of some sort +// TODO: Maybe linear parsing with post-facto rearranging/grouping of operators would be more performant and simpler to debug? It might provide less opportunity to insert other parsing rules though, and therefore be less composable. Need to investigate this option. Should also consider a utility function that generates an entire operator parsing tree from a list of instructions, leaving space for non-operator (or non-standard operator) rules to be inserted within that tree. + +const matchValue = require("match-value"); +const { either, oneOrMore, zeroOrMore, optional, wholeMatch } = require("./operations"); +const { parse } = require("./index"); + +function makeBinaryExpressionParser(categoryName, nextStep, operatorMap) { + let validOperators = Object.keys(operatorMap); + + let expressionParser = function* () { + let left = yield nextStep; + + let rights = yield oneOrMore(function* () { + yield MaybeWhitespace; + let operation = matchValue(yield either(validOperators), operatorMap); + yield MaybeWhitespace; + let right = yield nextStep; + + return { operation, right }; + }); + + // NOTE: This implements *left* associativity only! + return rights.reduce((last, { right, operation }) => { + return { type: operation, left: last, right: right }; + }, left); + + // return { type: operation, left, right }; + }; + + Object.defineProperty(expressionParser, "name", { value: `${categoryName}Expression` }); + + let stepWrapper = function* () { + return yield either([ expressionParser, nextStep ]); + }; + + Object.defineProperty(stepWrapper, "name", { value: `${categoryName}Step` }); + + return [ stepWrapper, expressionParser ]; +} + +function* ParenStep() { + return yield either([ NumericExpression, ParenExpression ]); +} + +var [ MultiplicativeStep, MultiplicativeExpression ] = makeBinaryExpressionParser("Multiplicative", ParenStep, { + "*": "multiply", + "/": "divide" +}); + +// NOTE: The usage of `var` here is intentional; it is necessary to make recursion work +var [ AdditiveStep, AdditiveExpression ] = makeBinaryExpressionParser("Additive", MultiplicativeStep, { + "+": "add", + "-": "subtract" +}); + +function* ParenExpression() { + yield "("; + yield MaybeWhitespace; + let expression = yield Expression; + yield MaybeWhitespace; + yield ")"; + + return { type: "group", expression }; +} + +function* Expression() { + // return yield AdditiveExpression; + return yield AdditiveStep; +} + +function* MaybeWhitespace() { + yield zeroOrMore(either([ " ", "\t", "\n" ])); +} + +function* NumericExpression() { + function* parserRule() { + yield oneOrMore(Digit); + yield optional(Fraction); + }; + + function* Fraction() { + yield "."; + yield oneOrMore(Digit); + } + + return parseFloat(yield wholeMatch(parserRule)); +} + +function* Digit() { + yield /[0-9]/; +} + +module.exports = Expression; + +console.dir(parse("1 + 4 / 3 * 5 - (2 - 0)", Expression), { depth: null }); \ No newline at end of file diff --git a/poc-generators-2/operations.js b/poc-generators-2/operations.js new file mode 100644 index 0000000..4d6edc9 --- /dev/null +++ b/poc-generators-2/operations.js @@ -0,0 +1,73 @@ +"use strict"; + +module.exports = { + zeroOrMore: (rule) => { + return { + __protocolKitInstruction: true, + type: "zeroOrMore", + rule: rule + }; + }, + oneOrMore: (rule) => { + return { + __protocolKitInstruction: true, + type: "oneOrMore", + rule: rule + }; + }, + either: (options) => { + return { + __protocolKitInstruction: true, + type: "either", + options: options + }; + }, + until: (rule, allowEnd = true) => { + return { + __protocolKitInstruction: true, + type: "until", + rule: rule, + // FIXME: Should allowEnd have a different default? + allowEnd: allowEnd + }; + }, + optional: (rule) => { + return { + __protocolKitInstruction: true, + type: "optional", + rule: rule + }; + }, + peek: (rule) => { + return { + __protocolKitInstruction: true, + type: "peek", + rule: rule + }; + }, + test: (rule) => { + return { + __protocolKitInstruction: true, + type: "test", + rule: rule + }; + }, + wholeMatch: (rule) => { + return { + __protocolKitInstruction: true, + type: "wholeMatch", + rule: rule + }; + }, + trackPosition: (rule) => { + return { + __protocolKitInstruction: true, + type: "trackPosition", + rule: rule + }; + }, + EndOfInput: { + __protocolKitInstruction: true, + type: "endOfInput" + }, +}; diff --git a/poc-generators-2/sample-hls.m3u b/poc-generators-2/sample-hls.m3u new file mode 100644 index 0000000..dfebc10 --- /dev/null +++ b/poc-generators-2/sample-hls.m3u @@ -0,0 +1,9 @@ +#EXTM3U +#EXT-X-TARGETDURATION:10 + +#EXTINF:9.009, +http://media.example.com/first.ts +#EXTINF:9.009, +http://media.example.com/second.ts +#EXTINF:3.003, +http://media.example.com/third.ts \ No newline at end of file diff --git a/poc-generators-2/simple.js b/poc-generators-2/simple.js new file mode 100644 index 0000000..23e72fd --- /dev/null +++ b/poc-generators-2/simple.js @@ -0,0 +1,2 @@ +"use strict"; + diff --git a/poc-generators-2/test-grammar.js b/poc-generators-2/test-grammar.js new file mode 100644 index 0000000..a942882 --- /dev/null +++ b/poc-generators-2/test-grammar.js @@ -0,0 +1,37 @@ +"use strict"; + +const { parse } = require("./index"); +const { wholeMatch, either, peek } = require("./operations"); + +function* A() { + yield "hello"; +} + +function* Whitespace() { + yield " "; // FIXME +} + +function* B1() { + yield peek("world"); + yield "world"; +} + +function* B2() { + yield "earth"; +} + +function* B3() { + let result = yield /(moon|jupiter)/; + + return result.$positional[0]; +} + +module.exports = function* TestParser() { + return yield wholeMatch(function*() { + yield A; + yield Whitespace; + yield either([ B1, B2, B3 ]); + }); +}; + +console.log(parse(process.argv[2], module.exports)); diff --git a/poc-generators-2/test-hls.js b/poc-generators-2/test-hls.js new file mode 100644 index 0000000..f984640 --- /dev/null +++ b/poc-generators-2/test-hls.js @@ -0,0 +1,12 @@ +"use strict"; + +const fs = require("fs"); +const assert = require("assert"); + +const { Playlist } = require("./hls"); +const { parse } = require("./index"); + +let file = process.argv[2]; +assert(file != null); + +console.dir(parse(fs.readFileSync(file, "utf8"), Playlist), { depth: null }); \ No newline at end of file diff --git a/poc-generators.js b/poc-generators.js new file mode 100644 index 0000000..643fc47 --- /dev/null +++ b/poc-generators.js @@ -0,0 +1,56 @@ +"use strict"; + +let parseableString = "MAGIC[4]toot[5]hello[5]world[1]!END"; +let chunk1 = parseableString.slice(0, 10); +let chunk2 = parseableString.slice(10); + +// FIXME: bytes +// FIXME: emit for streaming parse? instead of return at the end +// FIXME: How to deal with eg. regexes when the match extends beyond the buffer? + +function* Root() { + yield "MAGIC"; + let items = yield repeat(Item); + yield "END"; + yield EndOfInput; + + return items; +} + +function* Item() { + yield "["; + let length = yield Integer(); + yield "]"; + let contents = yield read(length); + + return contents; +} + +function* Integer() { + // FIXME: auto anchor + let [ match ] = yield /[0-9]+/; + return parseInt(match); +} + +function* parseByte(context) { + let position = context.position; + // FIXME: detect end + context.position += 1; + return context.input[position]; +} + +function* parseBytes(context, length) { + let bytes = Buffer.alloc(length); + + for (let i = 0; i < length; i++) { + bytes[i] = parseByte(context); // FIXME + } + + return bytes; +} + +// TODO: parseBytesUntil, parseStringUntil? or a generic toString wrapper function instead? + +function* parseString(context, length) { + +} diff --git a/test-gen.js b/test-gen.js new file mode 100644 index 0000000..a9e53f0 --- /dev/null +++ b/test-gen.js @@ -0,0 +1,16 @@ +"use strict"; + +function* foo() { + yield 1; + yield 2; + yield 3; + return 4; +} + +let generator = foo(); + +for (let i of generator) { + console.log("for loop:", i); +} + +console.log("next:", generator.next()); diff --git a/yarn.lock b/yarn.lock new file mode 100644 index 0000000..7ebba22 --- /dev/null +++ b/yarn.lock @@ -0,0 +1,69 @@ +# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. +# yarn lockfile v1 + + +as-expression@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/as-expression/-/as-expression-1.0.0.tgz#7bc620ca4cb2fe0ee90d86729bd6add33b8fd831" + integrity sha512-Iqh4GxNUfxbJdGn6b7/XMzc8m1Dz2ZHouBQ9DDTzyMRO3VPPIAXeoY/sucRxxxXKbUtzwzWZSN6jPR3zfpYHHA== + +call-bind@^1.0.2: + version "1.0.2" + resolved "http://localhost:4873/call-bind/-/call-bind-1.0.2.tgz#b1d4e89e688119c3c9a903ad30abb2f6a919be3c" + integrity sha512-7O+FbCihrB5WGbFYesctwmTKae6rOiIzmz1icreWJ+0aA7LJfuqhEso2T9ncpcFtzMQtzXf2QGGueWJGTYsqrA== + dependencies: + function-bind "^1.1.1" + get-intrinsic "^1.0.2" + +function-bind@^1.1.1: + version "1.1.1" + resolved "http://localhost:4873/function-bind/-/function-bind-1.1.1.tgz#a56899d3ea3c9bab874bb9773b7c5ede92f4895d" + integrity sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A== + +get-intrinsic@^1.0.2: + version "1.1.2" + resolved "http://localhost:4873/get-intrinsic/-/get-intrinsic-1.1.2.tgz#336975123e05ad0b7ba41f152ee4aadbea6cf598" + integrity sha512-Jfm3OyCxHh9DJyc28qGk+JmfkpO41A4XkneDSujN9MDXrm4oDKdHvndhZ2dN94+ERNfkYJWDclW6k2L/ZGHjXA== + dependencies: + function-bind "^1.1.1" + has "^1.0.3" + has-symbols "^1.0.3" + +has-symbols@^1.0.2, has-symbols@^1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/has-symbols/-/has-symbols-1.0.3.tgz#bb7b2c4349251dce87b125f7bdf874aa7c8b39f8" + integrity sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A== + +has-tostringtag@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/has-tostringtag/-/has-tostringtag-1.0.0.tgz#7e133818a7d394734f941e73c3d3f9291e658b25" + integrity sha512-kFjcSNhnlGV1kyoGk7OXKSawH5JOb/LzUc5w9B02hOTO0dfFRjbHQKvg1d6cf3HbeUmtU9VbbV3qzZ2Teh97WQ== + dependencies: + has-symbols "^1.0.2" + +has@^1.0.3: + version "1.0.3" + resolved "http://localhost:4873/has/-/has-1.0.3.tgz#722d7cbfc1f6aa8241f16dd814e011e1f41e8796" + integrity sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw== + dependencies: + function-bind "^1.1.1" + +is-generator-function@^1.0.10: + version "1.0.10" + resolved "https://registry.yarnpkg.com/is-generator-function/-/is-generator-function-1.0.10.tgz#f1558baf1ac17e0deea7c0415c438351ff2b3c72" + integrity sha512-jsEjy9l3yiXEQ+PsXdmBwEPcOxaXWLspKdplFUVI9vq1iZgIekeC0L167qeu86czQaxed3q/Uzuw0swL0irL8A== + dependencies: + has-tostringtag "^1.0.0" + +is-regex@^1.1.4: + version "1.1.4" + resolved "http://localhost:4873/is-regex/-/is-regex-1.1.4.tgz#eef5663cd59fa4c0ae339505323df6854bb15958" + integrity sha512-kvRdxDsxZjhzUX07ZnLydzS1TU/TJlTUHHY4YLL87e37oUA49DfkLqgy+VjFocowy29cKvcSiu+kIv728jTTVg== + dependencies: + call-bind "^1.0.2" + has-tostringtag "^1.0.0" + +match-value@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/match-value/-/match-value-1.1.0.tgz#ad311ef8bbe2d344a53ec3104e28fe221984b98e" + integrity sha512-NOvpobcmkX+l9Eb6r2s3BkR1g1ZwzExDFdXA9d6p1r1O1olLbo88KuzMiBmg43xSpodfm7I6Hqlx2OoySquEgg==