|
|
|
with builtins;
|
|
|
|
|
|
|
|
# Tokenizer.
|
|
|
|
let
|
|
|
|
layout_pat = "[ \n]+";
|
|
|
|
layout_pat_opt = "[ \n]*";
|
|
|
|
token_pat = ''=|[[][[][a-zA-Z0-9_."*-]+[]][]]|[[][a-zA-Z0-9_."*-]+[]]|[a-zA-Z0-9_-]+|"[^"]*"''; #"
|
|
|
|
|
|
|
|
tokenizer_1_11 = str:
|
|
|
|
let
|
|
|
|
tokenizer_rec = len: prevTokens: patterns: str:
|
|
|
|
let
|
|
|
|
pattern = head patterns;
|
|
|
|
layoutAndTokens = match pattern str;
|
|
|
|
matchLength = stringLength (head layoutAndTokens);
|
|
|
|
tokens = prevTokens ++ tail layoutAndTokens;
|
|
|
|
in
|
|
|
|
if layoutAndTokens == null then
|
|
|
|
# if we cannot reduce the pattern, return the list of token
|
|
|
|
if tail patterns == [] then prevTokens
|
|
|
|
# otherwise, take the next pattern, which only captures half the token.
|
|
|
|
else tokenizer_rec len prevTokens (tail patterns) str
|
|
|
|
else tokenizer_rec len tokens patterns (substring matchLength len str);
|
|
|
|
|
|
|
|
avgTokenSize = 100;
|
|
|
|
ceilLog2 = v:
|
|
|
|
let inner = n: i: if i < v then inner (n + 1) (i * 2) else n; in
|
|
|
|
inner 1 1;
|
|
|
|
|
|
|
|
# The builtins.match function match the entire string, and generate a list of all captured
|
|
|
|
# elements. This is the most efficient way to make a tokenizer, if we can make a pattern which
|
|
|
|
# capture all token of the file. Unfortunately C++ std::regex does not support captures in
|
|
|
|
# repeated patterns. As a work-around, we generate patterns which are matching tokens in multiple
|
|
|
|
# of 2, such that we can avoid iterating too many times over the content.
|
|
|
|
generatePatterns = str:
|
|
|
|
let
|
|
|
|
depth = ceilLog2 (stringLength str / avgTokenSize);
|
|
|
|
inner = depth:
|
|
|
|
if depth == 0 then [ "(${token_pat})" ]
|
|
|
|
else
|
|
|
|
let next = inner (depth - 1); in
|
|
|
|
[ "${head next}${layout_pat}${head next}" ] ++ next;
|
|
|
|
in
|
|
|
|
map (pat: "(${layout_pat_opt}${pat}).*" ) (inner depth);
|
|
|
|
|
|
|
|
in
|
|
|
|
tokenizer_rec (stringLength str) [] (generatePatterns str) str;
|
|
|
|
|
|
|
|
tokenizer_1_12 = str:
|
|
|
|
let
|
|
|
|
# Nix 1.12 has the builtins.split function which allow to tokenize the
|
|
|
|
# file quickly. by iterating with a simple regexp.
|
|
|
|
layoutTokenList = split "(${token_pat})" str;
|
|
|
|
isLayout = s: match layout_pat_opt s != null;
|
|
|
|
filterLayout = list:
|
|
|
|
filter (s:
|
|
|
|
if isString s then
|
|
|
|
if isLayout s then false
|
|
|
|
else throw "Error: Unexpected token: '${s}'"
|
|
|
|
else true) list;
|
|
|
|
removeTokenWrapper = list:
|
|
|
|
map (x: assert tail x == []; head x) list;
|
|
|
|
in
|
|
|
|
removeTokenWrapper (filterLayout layoutTokenList);
|
|
|
|
|
|
|
|
tokenizer =
|
|
|
|
if builtins ? split
|
|
|
|
then tokenizer_1_12
|
|
|
|
else tokenizer_1_11;
|
|
|
|
in
|
|
|
|
|
|
|
|
# Parse entry headers
|
|
|
|
let
|
|
|
|
unescapeString = str:
|
|
|
|
# Let's ignore any escape character for the moment.
|
|
|
|
assert match ''"[^"]*"'' str != null; #"
|
|
|
|
substring 1 (stringLength str - 2) str;
|
|
|
|
|
|
|
|
# Match the content of TOML format section names.
|
|
|
|
ident_pat = ''[a-zA-Z0-9_-]+|"[^"]*"''; #"
|
|
|
|
|
|
|
|
removeBraces = token: wrapLen:
|
|
|
|
substring wrapLen (stringLength token - 2 * wrapLen) token;
|
|
|
|
|
|
|
|
# Note, this implementation is limited to 11 identifiers.
|
|
|
|
matchPathFun_1_11 = token:
|
|
|
|
let
|
|
|
|
# match header_pat "a.b.c" == [ "a" ".b" "b" ".c" "c" ]
|
|
|
|
header_pat =
|
|
|
|
foldl' (pat: n: "(${ident_pat})([.]${pat})?")
|
|
|
|
"(${ident_pat})" (genList (n: 0) 10);
|
|
|
|
matchPath = match header_pat token;
|
|
|
|
filterDot = filter (s: substring 0 1 s != ".") matchPath;
|
|
|
|
in
|
|
|
|
filterDot;
|
|
|
|
|
|
|
|
matchPathFun_1_12 = token:
|
|
|
|
map (e: head e)
|
|
|
|
(filter (s: isList s)
|
|
|
|
(split "(${ident_pat})" token));
|
|
|
|
|
|
|
|
matchPathFun =
|
|
|
|
if builtins ? split
|
|
|
|
then matchPathFun_1_12
|
|
|
|
else matchPathFun_1_11;
|
|
|
|
|
|
|
|
headerToPath = token: wrapLen:
|
|
|
|
let
|
|
|
|
token' = removeBraces token wrapLen;
|
|
|
|
matchPath = matchPathFun token';
|
|
|
|
path =
|
|
|
|
map (s:
|
|
|
|
if substring 0 1 s != ''"'' then s #"
|
|
|
|
else unescapeString s
|
|
|
|
) matchPath;
|
|
|
|
in
|
|
|
|
assert matchPath != null;
|
|
|
|
# assert trace "Path: ${token'}; match as ${toString path}" true;
|
|
|
|
path;
|
|
|
|
in
|
|
|
|
|
|
|
|
# Reconstruct the equivalent attribute set.
|
|
|
|
let
|
|
|
|
tokenToValue = token:
|
|
|
|
if token == "true" then true
|
|
|
|
else if token == "false" then false
|
|
|
|
else unescapeString token;
|
|
|
|
|
|
|
|
parserInitState = {
|
|
|
|
idx = 0;
|
|
|
|
path = [];
|
|
|
|
isList = false;
|
|
|
|
output = [];
|
|
|
|
elem = {};
|
|
|
|
};
|
|
|
|
|
|
|
|
# Imported from nixpkgs library.
|
|
|
|
setAttrByPath = attrPath: value:
|
|
|
|
if attrPath == [] then value
|
|
|
|
else listToAttrs
|
|
|
|
[ { name = head attrPath; value = setAttrByPath (tail attrPath) value; } ];
|
|
|
|
|
|
|
|
closeSection = state:
|
|
|
|
state // {
|
|
|
|
output = state.output ++ [ (setAttrByPath state.path (
|
|
|
|
if state.isList then [ state.elem ]
|
|
|
|
else state.elem
|
|
|
|
)) ];
|
|
|
|
};
|
|
|
|
|
|
|
|
readToken = state: token:
|
|
|
|
# assert trace "Read '${token}'" true;
|
|
|
|
if state.idx == 0 then
|
|
|
|
if substring 0 2 token == "[[" then
|
|
|
|
(closeSection state) // {
|
|
|
|
path = headerToPath token 2;
|
|
|
|
isList = true;
|
|
|
|
elem = {};
|
|
|
|
}
|
|
|
|
else if substring 0 1 token == "[" then
|
|
|
|
(closeSection state) // {
|
|
|
|
path = headerToPath token 1;
|
|
|
|
isList = false;
|
|
|
|
elem = {};
|
|
|
|
}
|
|
|
|
else
|
|
|
|
assert match "[a-zA-Z0-9_-]+" token != null;
|
|
|
|
state // { idx = 1; name = token; }
|
|
|
|
else if state.idx == 1 then
|
|
|
|
assert token == "=";
|
|
|
|
state // { idx = 2; }
|
|
|
|
else
|
|
|
|
assert state.idx == 2;
|
|
|
|
state // {
|
|
|
|
idx = 0;
|
|
|
|
elem = state.elem // {
|
|
|
|
"${state.name}" = tokenToValue token;
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
# aggregate each section as individual attribute sets.
|
|
|
|
parser = str:
|
|
|
|
closeSection (foldl' readToken parserInitState (tokenizer str));
|
|
|
|
|
|
|
|
fromTOML = toml:
|
|
|
|
let
|
|
|
|
sections = (parser toml).output;
|
|
|
|
# Inlined from nixpkgs library functions.
|
|
|
|
zipAttrs = sets:
|
|
|
|
listToAttrs (map (n: {
|
|
|
|
name = n;
|
|
|
|
value =
|
|
|
|
let v = catAttrs n sets; in
|
|
|
|
# assert trace "Visiting ${n}" true;
|
|
|
|
if tail v == [] then head v
|
|
|
|
else if isList (head v) then concatLists v
|
|
|
|
else if isAttrs (head v) then zipAttrs v
|
|
|
|
else throw "cannot merge sections";
|
|
|
|
}) (concatLists (map attrNames sets)));
|
|
|
|
in
|
|
|
|
zipAttrs sections;
|
|
|
|
in
|
|
|
|
|
|
|
|
{
|
|
|
|
testing = fromTOML (builtins.readFile ./channel-rust-nightly.toml);
|
|
|
|
testing_url = fromTOML (builtins.readFile (builtins.fetchurl
|
|
|
|
https://static.rust-lang.org/dist/channel-rust-nightly.toml));
|
|
|
|
inherit fromTOML;
|
|
|
|
}
|
|
|
|
|