with builtins; let layout_pat = "[ \n]+"; layout_pat_opt = "[ \n]*"; token_pat = ''=|[[][[][a-zA-Z0-9_."*-]+[]][]]|[[][a-zA-Z0-9_."*-]+[]]|[a-zA-Z0-9_-]+|"[^"]*"''; tokenizer_rec = len: prevTokens: patterns: str: let pattern = head patterns; layoutAndTokens = match pattern str; matchLength = stringLength (head layoutAndTokens); tokens = prevTokens ++ tail layoutAndTokens; in if layoutAndTokens == null then # if we cannot reduce the pattern, return the list of token if tail patterns == [] then prevTokens # otherwise, take the next pattern, which only captures half the token. else tokenizer_rec len prevTokens (tail patterns) str else tokenizer_rec len tokens patterns (substring matchLength len str); avgTokenSize = 100; ceilLog2 = v: let inner = n: i: if i < v then inner (n + 1) (i * 2) else n; in inner 1 1; # The builtins.match function match the entire string, and generate a list of all captured # elements. This is the most efficient way to make a tokenizer, if we can make a pattern which # capture all token of the file. Unfortunately C++ std::regex does not support captures in # repeated patterns. As a work-around, we generate patterns which are matching tokens in multiple # of 2, such that we can avoid iterating too many times over the content. generatePatterns = str: let depth = ceilLog2 (stringLength str / avgTokenSize); inner = depth: if depth == 0 then [ "(${token_pat})" ] else let next = inner (depth - 1); in [ "${head next}${layout_pat}${head next}" ] ++ next; in map (pat: "(${layout_pat_opt}${pat}).*" ) (inner depth); tokenizer = str: tokenizer_rec (stringLength str) [] (generatePatterns str) str; unescapeString = str: # Let's ignore any escape character for the moment. assert match ''"[^"]*"'' str != null; substring 1 (stringLength str - 2) str; tokenToValue = token: if token == "true" then true else if token == "false" then false else unescapeString token; # Match the content of TOML format section names, and add the grouping such that: # match header_pat "a.b.c" == [ "a" ".b" "b" ".c" "c" ] # # Note, this implementation is limited to 11 identifiers. ident_pat = ''[a-zA-Z0-9_-]+|"[^"]*"''; header_pat = foldl' (pat: n: "(${ident_pat})([.]${pat})?") "(${ident_pat})" (genList (n: 0) 10); headerToPath = token: wrapLen: let token' = substring wrapLen (stringLength token - 2 * wrapLen) token; matchPath = match header_pat token'; filterDot = filter (s: substring 0 1 s != ".") matchPath; path = map (s: if substring 0 1 s != ''"'' then s else unescapeString s ) filterDot; in assert matchPath != null; # assert trace "Path: ${token'}; match as ${toString path}" true; path; parserInitState = { idx = 0; path = []; isList = false; output = []; elem = {}; }; # Imported from nixpkgs library. setAttrByPath = attrPath: value: if attrPath == [] then value else listToAttrs [ { name = head attrPath; value = setAttrByPath (tail attrPath) value; } ]; closeSection = state: state // { output = state.output ++ [ (setAttrByPath state.path ( if state.isList then [ state.elem ] else state.elem )) ]; }; readToken = state: token: # assert trace "Read '${token}'" true; if state.idx == 0 then if substring 0 2 token == "[[" then (closeSection state) // { path = headerToPath token 2; isList = true; elem = {}; } else if substring 0 1 token == "[" then (closeSection state) // { path = headerToPath token 1; isList = false; elem = {}; } else assert match "[a-zA-Z0-9_-]+" token != null; state // { idx = 1; name = token; } else if state.idx == 1 then assert token == "="; state // { idx = 2; } else assert state.idx == 2; state // { idx = 0; elem = state.elem // { "${state.name}" = tokenToValue token; }; }; # aggregate each section as individual attribute sets. parser = str: closeSection (foldl' readToken parserInitState (tokenizer str)); fromTOML = toml: let sections = (parser toml).output; # Inlined from nixpkgs library functions. zipAttrs = sets: listToAttrs (map (n: { name = n; value = let v = catAttrs n sets; in # assert trace "Visiting ${n}" true; if tail v == [] then head v else if isList (head v) then concatLists v else if isAttrs (head v) then zipAttrs v else throw "cannot merge sections"; }) (concatLists (map attrNames sets))); in zipAttrs sections; in { testing = fromTOML (builtins.readFile ./channel-rust-nightly.toml); testing_url = fromTOML (builtins.readFile (builtins.fetchurl https://static.rust-lang.org/dist/channel-rust-nightly.toml)); inherit fromTOML; }