Full rewrite to use transformation rulesets
parent
95bb26f882
commit
6bc3054aca
@ -1,2 +1,4 @@
|
|||||||
from block_markup import *
|
from block_markup import *
|
||||||
from parser import *
|
from document import *
|
||||||
|
from transformation_ruleset import *
|
||||||
|
from value import *
|
||||||
|
@ -0,0 +1,85 @@
|
|||||||
|
import re
|
||||||
|
import block_markup
|
||||||
|
|
||||||
|
class Document():
|
||||||
|
def __init__(self, data):
|
||||||
|
self.data = data
|
||||||
|
self._parse()
|
||||||
|
|
||||||
|
def _parse(self):
|
||||||
|
paragraphs = re.split("\s*\n\s*\n", self.data)
|
||||||
|
|
||||||
|
self.paragraphs = paragraphs
|
||||||
|
self.definitions = []
|
||||||
|
|
||||||
|
current_level = 0
|
||||||
|
current_paragraph = 0
|
||||||
|
self.current_elements = {0: block_markup.TreeLevel(0, "root")}
|
||||||
|
|
||||||
|
for paragraph in paragraphs:
|
||||||
|
if paragraph.strip() == "":
|
||||||
|
continue
|
||||||
|
|
||||||
|
current_paragraph += 1
|
||||||
|
indentation = len(paragraph) - len(paragraph.lstrip("\t")) + 1
|
||||||
|
|
||||||
|
if indentation > current_level + 1:
|
||||||
|
raise Exception("Invalid indentation found in paragraph %d" % current_paragraph)
|
||||||
|
|
||||||
|
start = indentation - 1
|
||||||
|
lines = [line[start:] for line in paragraph.splitlines()]
|
||||||
|
|
||||||
|
if lines[0].startswith("#"):
|
||||||
|
# Header
|
||||||
|
depth = len(lines[0]) - len(lines[0].lstrip("#"))
|
||||||
|
lines[0] = lines[0].lstrip("# ")
|
||||||
|
element = block_markup.Header(indentation, " ".join(lines), depth)
|
||||||
|
elif lines[0].startswith("^"):
|
||||||
|
# Definition
|
||||||
|
lines[0] = lines[0].lstrip("^ ")
|
||||||
|
element = block_markup.Definition(indentation, lines)
|
||||||
|
self.definitions.append(element)
|
||||||
|
elif lines[0].startswith("@"):
|
||||||
|
# Example
|
||||||
|
lines[0] = lines[0].lstrip("@ ")
|
||||||
|
element = block_markup.Example(indentation, " ".join(lines))
|
||||||
|
elif lines[0].startswith("$$") and self.current_elements[current_level].__class__.__name__ == "Code":
|
||||||
|
# Code continuation
|
||||||
|
self.current_elements[current_level].data += "\n\n" + "\n".join(lines).lstrip("$ ")
|
||||||
|
continue
|
||||||
|
elif lines[0].startswith("$"):
|
||||||
|
# Code block start
|
||||||
|
lines[0] = lines[0].lstrip("$ ")
|
||||||
|
element = block_markup.Code(indentation, "\n".join(lines))
|
||||||
|
elif lines[0].startswith(">>") and self.current_elements[current_level].__class__.__name__ == "Output":
|
||||||
|
# Output continuation
|
||||||
|
self.current_elements[current_level].data += "\n\n" + "\n".join(lines).lstrip("> ")
|
||||||
|
continue
|
||||||
|
elif lines[0].startswith(">"):
|
||||||
|
# Output block start
|
||||||
|
lines[0] = lines[0].lstrip("> ")
|
||||||
|
element = block_markup.Output(indentation, "\n".join(lines))
|
||||||
|
elif lines[0].startswith("!"):
|
||||||
|
# Exclamation
|
||||||
|
lines[0] = lines[0].lstrip("! ")
|
||||||
|
element = block_markup.Exclamation(indentation, " ".join(lines))
|
||||||
|
elif re.match(".*::\s*$", lines[0]):
|
||||||
|
# Argument definition
|
||||||
|
argname = re.match("(.*)::\s*$", lines[0]).group(1)
|
||||||
|
element = block_markup.Argument(indentation, " ".join(line.lstrip() for line in lines[1:]), argname)
|
||||||
|
elif lines[0].strip() == "{TOC}":
|
||||||
|
# Table of contents
|
||||||
|
element = block_markup.Index(indentation, self)
|
||||||
|
else:
|
||||||
|
# Text
|
||||||
|
element = block_markup.Text(indentation, " ".join(lines))
|
||||||
|
|
||||||
|
self.current_elements[indentation - 1].add(element)
|
||||||
|
current_level = indentation
|
||||||
|
self.current_elements[current_level] = element
|
||||||
|
|
||||||
|
def transform(self, ruleset):
|
||||||
|
return self.current_elements[0].transform(ruleset)
|
||||||
|
|
||||||
|
def get_definitions(self):
|
||||||
|
return self.definitions
|
@ -1,86 +0,0 @@
|
|||||||
from block_markup import *
|
|
||||||
|
|
||||||
class Parser():
|
|
||||||
def __init__(self, template):
|
|
||||||
self.template = template
|
|
||||||
|
|
||||||
def render(self, text):
|
|
||||||
paragraphs = re.split("\s*\n\s*\n", text)
|
|
||||||
self.toc_items = []
|
|
||||||
current_level = 0
|
|
||||||
current_paragraph = 0
|
|
||||||
current_elements = {0: TreeLevel(0, "root")}
|
|
||||||
|
|
||||||
for paragraph in paragraphs:
|
|
||||||
if paragraph.strip() == "":
|
|
||||||
continue
|
|
||||||
|
|
||||||
current_paragraph += 1
|
|
||||||
indentation = len(paragraph) - len(paragraph.lstrip("\t")) + 1
|
|
||||||
|
|
||||||
if indentation > current_level + 1:
|
|
||||||
raise Exception("Invalid indentation found in paragraph %d" % current_paragraph)
|
|
||||||
|
|
||||||
element_type = TreeLevel
|
|
||||||
start = indentation - 1
|
|
||||||
|
|
||||||
lines = [line[start:] for line in paragraph.splitlines()]
|
|
||||||
|
|
||||||
if lines[0].startswith("#"):
|
|
||||||
element_type = Header
|
|
||||||
depth = len(lines[0]) - len(lines[0].lstrip("#"))
|
|
||||||
lines[0] = lines[0].lstrip("# ")
|
|
||||||
data = " ".join(lines)
|
|
||||||
elif lines[0].startswith("^"):
|
|
||||||
element_type = Definition
|
|
||||||
lines[0] = lines[0].lstrip("^ ")
|
|
||||||
data = "\n".join(lines)
|
|
||||||
elif lines[0].startswith("@"):
|
|
||||||
element_type = Example
|
|
||||||
lines[0] = lines[0].lstrip("@ ")
|
|
||||||
data = " ".join(lines)
|
|
||||||
elif lines[0].startswith("$$") and current_elements[current_level].__class__.__name__ == "Code":
|
|
||||||
current_elements[current_level].data += "\n\n" + "\n".join(lines).lstrip("$ ")
|
|
||||||
continue
|
|
||||||
elif lines[0].startswith("$"):
|
|
||||||
element_type = Code
|
|
||||||
lines[0] = lines[0].lstrip("$ ")
|
|
||||||
data = "\n".join(lines)
|
|
||||||
elif lines[0].startswith(">>") and current_elements[current_level].__class__.__name__ == "Output":
|
|
||||||
current_elements[current_level].data += "\n\n" + "\n".join(lines).lstrip("> ")
|
|
||||||
continue
|
|
||||||
elif lines[0].startswith(">"):
|
|
||||||
element_type = Output
|
|
||||||
lines[0] = lines[0].lstrip("> ")
|
|
||||||
data = "\n".join(lines)
|
|
||||||
elif lines[0].startswith("!"):
|
|
||||||
element_type = Exclamation
|
|
||||||
lines[0] = lines[0].lstrip("! ")
|
|
||||||
data = " ".join(lines)
|
|
||||||
elif re.match(".*::\s*$", lines[0]):
|
|
||||||
element_type = Argument
|
|
||||||
argname = lines[0][:-2]
|
|
||||||
data = " ".join(line.lstrip() for line in lines[1:])
|
|
||||||
elif lines[0].strip() == "{TOC}":
|
|
||||||
element_type = Index
|
|
||||||
data = self
|
|
||||||
else:
|
|
||||||
element_type = Text
|
|
||||||
data = " ".join(lines)
|
|
||||||
|
|
||||||
if element_type.__name__ == "Header":
|
|
||||||
element = Header(indentation, data, depth)
|
|
||||||
elif element_type.__name__ == "Argument":
|
|
||||||
element = Argument(indentation, data, argname)
|
|
||||||
else:
|
|
||||||
element = element_type(indentation, data)
|
|
||||||
|
|
||||||
if element_type.__name__ == "Definition":
|
|
||||||
self.toc_items.append(element)
|
|
||||||
|
|
||||||
current_elements[indentation - 1].add(element)
|
|
||||||
|
|
||||||
current_level = indentation
|
|
||||||
current_elements[current_level] = element
|
|
||||||
|
|
||||||
return self.template.replace("{CONTENT}", current_elements[0].output())
|
|
@ -0,0 +1,45 @@
|
|||||||
|
class TransformationRuleset():
|
||||||
|
def transform_children(self, text):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def transform_header(self, depth, text):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def transform_definition(self, forms, children):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def transform_argument(self, name, description, children):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def transform_example(self, title, children):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def transform_code(self, text):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def transform_output(self, text):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def transform_exclamation(self, text, children):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def transform_text(self, text):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def transform_reference(self, target, description):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def transform_external_reference(self, target, description):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def transform_fixed_width(self, text):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def transform_emphasis(self, text):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def transform_strong(self, text):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def transform_toc(self, items):
|
||||||
|
pass
|
@ -0,0 +1,29 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
class Value(str):
|
||||||
|
def transform(self, ruleset):
|
||||||
|
text = self
|
||||||
|
text = re.sub("`([^`]+)`", lambda x: ruleset.transform_fixed_width(Value(x.group(1))), text) # Fixed-width
|
||||||
|
text = re.sub("\*\*([^*]+)\*\*", lambda x: ruleset.transform_emphasis(Value(x.group(1))), text) # Emphasized
|
||||||
|
text = re.sub("__([^_]+)__", lambda x: ruleset.transform_strong(Value(x.group(1))), text) # Strong
|
||||||
|
text = re.sub("{>([^}]+)}\(([^)]+)\)", lambda x: ruleset.transform_reference(Value(x.group(1)), Value(x.group(2))), text) # Hyperlink with text
|
||||||
|
text = re.sub("{>([^}]+)}", lambda x: ruleset.transform_reference(Value(x.group(1)), Value(x.group(1))), text) # Hyperlink
|
||||||
|
text = re.sub("{([^}]+:[^}]+)}\(([^)]+)\)", lambda x: ruleset.transform_external_reference(Value(x.group(1)), Value(x.group(2))), text) # External hyperlink with text
|
||||||
|
text = re.sub("{([^}]+:[^}]+)}", lambda x: ruleset.transform_external_reference(Value(x.group(1)), Value(x.group(1))), text) # External hyperlink
|
||||||
|
text = re.sub("{<([^}]+)}\(([^)]+)\)", lambda x: ruleset.transform_external_reference(Value(x.group(1)), Value(x.group(2))), text) # Forced external hyperlink with text
|
||||||
|
text = re.sub("{<([^}]+)}", lambda x: ruleset.transform_external_reference(Value(x.group(1)), Value(x.group(1))), text) # Forced external hyperlink
|
||||||
|
return text
|
||||||
|
|
||||||
|
def clean(self):
|
||||||
|
text = self
|
||||||
|
text = re.sub("`([^`]+)`", '\\1', text) # Fixed-width
|
||||||
|
text = re.sub("\*\*([^*]+)\*\*", "\\1", text) # Emphasized
|
||||||
|
text = re.sub("__([^_]+)__", "\\1", text) # Strong
|
||||||
|
text = re.sub("{>([^}]+)}\(([^)]+)\)", '\\2', text) # Hyperlink with text
|
||||||
|
text = re.sub("{>([^}]+)}", '\\1', text) # Hyperlink
|
||||||
|
text = re.sub("{([^}]+:[^}]+)}\(([^)]+)\)", '\\2', text) # External hyperlink with text
|
||||||
|
text = re.sub("{([^}]+:[^}]+)}", '\\1', text) # External hyperlink
|
||||||
|
text = re.sub("{<([^}]+)}\(([^)]+)\)", '\\2', text) # Forced external hyperlink with text
|
||||||
|
text = re.sub("{<([^}]+)}", '\\1', text) # Forced external hyperlink
|
||||||
|
return text
|
||||||
|
|
Loading…
Reference in New Issue