Full rewrite to use transformation rulesets

develop
Sven Slootweg 12 years ago
parent 95bb26f882
commit 6bc3054aca

@ -1,2 +1,4 @@
from block_markup import *
from parser import *
from document import *
from transformation_ruleset import *
from value import *

@ -1,4 +1,5 @@
import re
from value import Value
class TreeLevel:
def __init__(self, indentation, data):
@ -9,48 +10,16 @@ class TreeLevel:
def add(self, element):
self.elements.append(element)
def output(self):
return self.render()
def transform(self, ruleset):
return self.transform_children(ruleset)
def render_children(self):
def transform_children(self, ruleset):
child_output = ""
for child in self.elements:
child_output += child.output()
return '<div class="children">%s</div>' % child_output
def process_inline_markup(self, text):
text = re.sub("`([^`]+)`", '<span class="fixed">\\1</span>', text) # Fixed-width
text = re.sub("\*\*([^*]+)\*\*", "<em>\\1</em>", text) # Emphasized
text = re.sub("__([^_]+)__", "<strong>\\1</strong>", text) # Strong
text = re.sub("{>([^}]+)}\(([^)]+)\)", '<a href="\\1.html">\\2</a>', text) # Hyperlink with text
text = re.sub("{>([^}]+)}", '<a href="\\1.html">\\1</a>', text) # Hyperlink
text = re.sub("{([^}]+:[^}]+)}\(([^)]+)\)", '<a href="\\1">\\2</a>', text) # External hyperlink with text
text = re.sub("{([^}]+:[^}]+)}", '<a href="\\1">\\1</a>', text) # External hyperlink
text = re.sub("{<([^}]+)}\(([^)]+)\)", '<a href="\\1">\\2</a>', text) # Forced external hyperlink with text
text = re.sub("{<([^}]+)}", '<a href="\\1">\\1</a>', text) # Forced external hyperlink
return text
def clear_markup(self, text):
text = re.sub("`([^`]+)`", '\\1', text) # Fixed-width
text = re.sub("\*\*([^*]+)\*\*", "\\1", text) # Emphasized
text = re.sub("__([^_]+)__", "\\1", text) # Strong
text = re.sub("{>([^}]+)}\(([^)]+)\)", '\\2', text) # Hyperlink with text
text = re.sub("{>([^}]+)}", '\\1', text) # Hyperlink
text = re.sub("{([^}]+:[^}]+)}\(([^)]+)\)", '\\2', text) # External hyperlink with text
text = re.sub("{([^}]+:[^}]+)}", '\\1', text) # External hyperlink
text = re.sub("{<([^}]+)}\(([^)]+)\)", '\\2', text) # Forced external hyperlink with text
text = re.sub("{<([^}]+)}", '\\1', text) # Forced external hyperlink
return text
def fix_preformatted(self, text):
return text.replace("<", "&lt;").replace(">", "&gt;")
def render(self):
return self.render_children()
child_output += child.transform(ruleset)
return ruleset.transform_children(child_output)
class Header(TreeLevel):
def __init__(self, indentation, data, depth):
@ -59,46 +28,36 @@ class Header(TreeLevel):
self.data = data
self.depth = depth
def render(self):
if self.depth <= 7:
title_type = "h%d" % self.depth
else:
title_type = "h7"
return "<%s>%s</%s>" % (title_type, self.data, title_type)
def transform(self, ruleset):
return ruleset.transform_header(self.depth, Value(self.data))
class Text(TreeLevel):
def render(self):
return '<div class="text">%s</div>' % self.process_inline_markup(self.data)
def transform(self, ruleset):
return ruleset.transform_text(Value(self.data))
class Exclamation(TreeLevel):
def render(self):
return '<div class="exclamation"><strong>Important:</strong> %s</div>' % self.process_inline_markup(self.data)
def transform(self, ruleset):
return ruleset.transform_exclamation(Value(self.data), self.transform_children(ruleset))
class Definition(TreeLevel):
def get_anchor(self):
first = self.clear_markup(self.data.splitlines()[0])
anchor = first.replace("...", "")
anchor = anchor.replace(".", "_")
anchor = re.sub("[^a-zA-Z0-9_]", "", anchor)
return anchor
def __init__(self, indentation, forms):
self.elements = []
self.indentation = indentation
self.forms = [form.lstrip() for form in forms]
def transform(self, ruleset):
return ruleset.transform_definition([Value(form) for form in self.forms], self.transform_children(ruleset))
def get_forms(self):
return [Value(form) for form in self.forms]
def get_description(self):
for element in self.elements:
if element.__class__.__name__ == "Text":
data = self.process_inline_markup(element.data)
if len(data) > 80:
matches = re.match("^(.{0,80})\W", data)
return matches.group(1) + "..."
else:
return data
return element.data
return ""
def render(self):
return '<div class="definition"><a name="def_%s">%s %s</a></div>' % (self.get_anchor(), self.process_inline_markup(self.data.replace("\n", "<br>")), self.render_children())
class Argument(TreeLevel):
def __init__(self, indentation, data, argname):
self.elements = []
@ -106,36 +65,21 @@ class Argument(TreeLevel):
self.data = data
self.argname = argname
def render(self):
return '<dl><dt>%s</dt><dd>%s%s</dd></dl>' % (self.argname, self.process_inline_markup(self.data), self.render_children())
def transform(self, ruleset):
return ruleset.transform_argument(Value(self.argname), Value(self.data), self.transform_children(ruleset))
class Example(TreeLevel):
def render(self):
return '<div class="example">Example: %s %s</div>' % (self.data, self.render_children())
def transform(self, ruleset):
return ruleset.transform_example(Value(self.data), self.transform_children(ruleset))
class Code(TreeLevel):
def render(self):
return '<h7>Code:</h7><pre class="code">%s</pre>' % self.fix_preformatted(self.data)
def transform(self, ruleset):
return ruleset.transform_code(self.data)
class Output(TreeLevel):
def render(self):
return '<h7>Output:</h7><pre class="output">%s</pre>' % self.fix_preformatted(self.data)
def transform(self, ruleset):
return ruleset.transform_output(Value(self.data))
class Index(TreeLevel):
def render(self):
rendered = ""
for item in self.data.toc_items:
forms = item.data.splitlines()
first = self.clear_markup(forms[0])
if len(forms) > 1:
rest = '<span class="alternatives">(also: ' + ', '.join(self.clear_markup(form) for form in forms[1:]) + ")</span>"
else:
rest = ""
anchor = item.get_anchor()
description = item.get_description()
rendered += '<li><a href="#def_%s">%s</a> %s %s</li>' % (anchor, first, description, rest)
return '<div class="toc"><h2>Table of contents</h2><ul>%s</ul></div>' % rendered
def transform(self, ruleset):
return ruleset.transform_toc([(definition, Value(definition.get_description())) for definition in self.data.get_definitions()])

@ -0,0 +1,85 @@
import re
import block_markup
class Document():
def __init__(self, data):
self.data = data
self._parse()
def _parse(self):
paragraphs = re.split("\s*\n\s*\n", self.data)
self.paragraphs = paragraphs
self.definitions = []
current_level = 0
current_paragraph = 0
self.current_elements = {0: block_markup.TreeLevel(0, "root")}
for paragraph in paragraphs:
if paragraph.strip() == "":
continue
current_paragraph += 1
indentation = len(paragraph) - len(paragraph.lstrip("\t")) + 1
if indentation > current_level + 1:
raise Exception("Invalid indentation found in paragraph %d" % current_paragraph)
start = indentation - 1
lines = [line[start:] for line in paragraph.splitlines()]
if lines[0].startswith("#"):
# Header
depth = len(lines[0]) - len(lines[0].lstrip("#"))
lines[0] = lines[0].lstrip("# ")
element = block_markup.Header(indentation, " ".join(lines), depth)
elif lines[0].startswith("^"):
# Definition
lines[0] = lines[0].lstrip("^ ")
element = block_markup.Definition(indentation, lines)
self.definitions.append(element)
elif lines[0].startswith("@"):
# Example
lines[0] = lines[0].lstrip("@ ")
element = block_markup.Example(indentation, " ".join(lines))
elif lines[0].startswith("$$") and self.current_elements[current_level].__class__.__name__ == "Code":
# Code continuation
self.current_elements[current_level].data += "\n\n" + "\n".join(lines).lstrip("$ ")
continue
elif lines[0].startswith("$"):
# Code block start
lines[0] = lines[0].lstrip("$ ")
element = block_markup.Code(indentation, "\n".join(lines))
elif lines[0].startswith(">>") and self.current_elements[current_level].__class__.__name__ == "Output":
# Output continuation
self.current_elements[current_level].data += "\n\n" + "\n".join(lines).lstrip("> ")
continue
elif lines[0].startswith(">"):
# Output block start
lines[0] = lines[0].lstrip("> ")
element = block_markup.Output(indentation, "\n".join(lines))
elif lines[0].startswith("!"):
# Exclamation
lines[0] = lines[0].lstrip("! ")
element = block_markup.Exclamation(indentation, " ".join(lines))
elif re.match(".*::\s*$", lines[0]):
# Argument definition
argname = re.match("(.*)::\s*$", lines[0]).group(1)
element = block_markup.Argument(indentation, " ".join(line.lstrip() for line in lines[1:]), argname)
elif lines[0].strip() == "{TOC}":
# Table of contents
element = block_markup.Index(indentation, self)
else:
# Text
element = block_markup.Text(indentation, " ".join(lines))
self.current_elements[indentation - 1].add(element)
current_level = indentation
self.current_elements[current_level] = element
def transform(self, ruleset):
return self.current_elements[0].transform(ruleset)
def get_definitions(self):
return self.definitions

@ -1,86 +0,0 @@
from block_markup import *
class Parser():
def __init__(self, template):
self.template = template
def render(self, text):
paragraphs = re.split("\s*\n\s*\n", text)
self.toc_items = []
current_level = 0
current_paragraph = 0
current_elements = {0: TreeLevel(0, "root")}
for paragraph in paragraphs:
if paragraph.strip() == "":
continue
current_paragraph += 1
indentation = len(paragraph) - len(paragraph.lstrip("\t")) + 1
if indentation > current_level + 1:
raise Exception("Invalid indentation found in paragraph %d" % current_paragraph)
element_type = TreeLevel
start = indentation - 1
lines = [line[start:] for line in paragraph.splitlines()]
if lines[0].startswith("#"):
element_type = Header
depth = len(lines[0]) - len(lines[0].lstrip("#"))
lines[0] = lines[0].lstrip("# ")
data = " ".join(lines)
elif lines[0].startswith("^"):
element_type = Definition
lines[0] = lines[0].lstrip("^ ")
data = "\n".join(lines)
elif lines[0].startswith("@"):
element_type = Example
lines[0] = lines[0].lstrip("@ ")
data = " ".join(lines)
elif lines[0].startswith("$$") and current_elements[current_level].__class__.__name__ == "Code":
current_elements[current_level].data += "\n\n" + "\n".join(lines).lstrip("$ ")
continue
elif lines[0].startswith("$"):
element_type = Code
lines[0] = lines[0].lstrip("$ ")
data = "\n".join(lines)
elif lines[0].startswith(">>") and current_elements[current_level].__class__.__name__ == "Output":
current_elements[current_level].data += "\n\n" + "\n".join(lines).lstrip("> ")
continue
elif lines[0].startswith(">"):
element_type = Output
lines[0] = lines[0].lstrip("> ")
data = "\n".join(lines)
elif lines[0].startswith("!"):
element_type = Exclamation
lines[0] = lines[0].lstrip("! ")
data = " ".join(lines)
elif re.match(".*::\s*$", lines[0]):
element_type = Argument
argname = lines[0][:-2]
data = " ".join(line.lstrip() for line in lines[1:])
elif lines[0].strip() == "{TOC}":
element_type = Index
data = self
else:
element_type = Text
data = " ".join(lines)
if element_type.__name__ == "Header":
element = Header(indentation, data, depth)
elif element_type.__name__ == "Argument":
element = Argument(indentation, data, argname)
else:
element = element_type(indentation, data)
if element_type.__name__ == "Definition":
self.toc_items.append(element)
current_elements[indentation - 1].add(element)
current_level = indentation
current_elements[current_level] = element
return self.template.replace("{CONTENT}", current_elements[0].output())

@ -0,0 +1,45 @@
class TransformationRuleset():
def transform_children(self, text):
pass
def transform_header(self, depth, text):
pass
def transform_definition(self, forms, children):
pass
def transform_argument(self, name, description, children):
pass
def transform_example(self, title, children):
pass
def transform_code(self, text):
pass
def transform_output(self, text):
pass
def transform_exclamation(self, text, children):
pass
def transform_text(self, text):
pass
def transform_reference(self, target, description):
pass
def transform_external_reference(self, target, description):
pass
def transform_fixed_width(self, text):
pass
def transform_emphasis(self, text):
pass
def transform_strong(self, text):
pass
def transform_toc(self, items):
pass

@ -0,0 +1,29 @@
import re
class Value(str):
def transform(self, ruleset):
text = self
text = re.sub("`([^`]+)`", lambda x: ruleset.transform_fixed_width(Value(x.group(1))), text) # Fixed-width
text = re.sub("\*\*([^*]+)\*\*", lambda x: ruleset.transform_emphasis(Value(x.group(1))), text) # Emphasized
text = re.sub("__([^_]+)__", lambda x: ruleset.transform_strong(Value(x.group(1))), text) # Strong
text = re.sub("{>([^}]+)}\(([^)]+)\)", lambda x: ruleset.transform_reference(Value(x.group(1)), Value(x.group(2))), text) # Hyperlink with text
text = re.sub("{>([^}]+)}", lambda x: ruleset.transform_reference(Value(x.group(1)), Value(x.group(1))), text) # Hyperlink
text = re.sub("{([^}]+:[^}]+)}\(([^)]+)\)", lambda x: ruleset.transform_external_reference(Value(x.group(1)), Value(x.group(2))), text) # External hyperlink with text
text = re.sub("{([^}]+:[^}]+)}", lambda x: ruleset.transform_external_reference(Value(x.group(1)), Value(x.group(1))), text) # External hyperlink
text = re.sub("{<([^}]+)}\(([^)]+)\)", lambda x: ruleset.transform_external_reference(Value(x.group(1)), Value(x.group(2))), text) # Forced external hyperlink with text
text = re.sub("{<([^}]+)}", lambda x: ruleset.transform_external_reference(Value(x.group(1)), Value(x.group(1))), text) # Forced external hyperlink
return text
def clean(self):
text = self
text = re.sub("`([^`]+)`", '\\1', text) # Fixed-width
text = re.sub("\*\*([^*]+)\*\*", "\\1", text) # Emphasized
text = re.sub("__([^_]+)__", "\\1", text) # Strong
text = re.sub("{>([^}]+)}\(([^)]+)\)", '\\2', text) # Hyperlink with text
text = re.sub("{>([^}]+)}", '\\1', text) # Hyperlink
text = re.sub("{([^}]+:[^}]+)}\(([^)]+)\)", '\\2', text) # External hyperlink with text
text = re.sub("{([^}]+:[^}]+)}", '\\1', text) # External hyperlink
text = re.sub("{<([^}]+)}\(([^)]+)\)", '\\2', text) # Forced external hyperlink with text
text = re.sub("{<([^}]+)}", '\\1', text) # Forced external hyperlink
return text

@ -9,9 +9,91 @@ parser.add_argument('files', metavar='FILE', type=str, nargs='+',
args = parser.parse_args()
options = vars(args)
class HtmlRuleset(zippydoc.TransformationRuleset):
def create_anchor(self, title):
anchor = title.clean().replace("...", "").replace(".", "_")
anchor = re.sub("[^a-zA-Z0-9_]", "", anchor)
return anchor
def escape_html(self, text):
return text.replace("<", "&lt;").replace(">", "&gt;")
def transform_children(self, text):
return '<div class="children">%s</div>' % text
def transform_header(self, depth, text):
if depth <= 7:
title_type = "h%d" % depth
else:
title_type = "h7"
return "<%s>%s</%s>" % (title_type, text.transform(self), title_type)
def transform_definition(self, forms, children):
anchor = self.create_anchor(forms[0])
formlist = "<br>".join([form.transform(self) for form in forms])
return '<div class="definition"><a name="%s">%s %s</a></div>' % (anchor, formlist, children)
def transform_argument(self, name, description, children):
return "<dl><dt>%s</dt><dd>%s%s</dd></dl>" % (name, description.transform(self), children)
def transform_example(self, title, children):
return '<div class="example">Example: %s %s</div>' % (title.transform(self), children)
def transform_code(self, text):
return '<h7>Code:</h7><pre class="code">%s</pre>' % self.escape_html(text)
def transform_output(self, text):
return '<h7>Output:</h7><pre class="output">%s</pre>' % self.escape_html(text)
def transform_exclamation(self, text, children):
return '<div class="exclamation"><strong>Important:</strong> %s %s</div>' % (text.transform(self), children)
def transform_text(self, text):
return '<div class="text">%s</div>' % text.transform(self)
def transform_reference(self, target, description):
return '<a href="%s.html">%s</a>' % (target, description.transform(self))
def transform_external_reference(self, target, description):
return '<a href="%s">%s</a>' % (target, description.transform(self))
def transform_fixed_width(self, text):
return '<span class="fixed">%s</span>' % text
def transform_emphasis(self, text):
return "<em>%s</em>" % text.transform(self)
def transform_strong(self, text):
return "<strong>%s</strong>" % text.transform(self)
def transform_toc(self, items):
rendered = ""
for item in items:
forms = item[0].get_forms()
anchor = self.create_anchor(forms[0])
if len(forms) > 1:
alternatives = '<span class="alternatives">(also: %s)</span>' % ", ".join(form.clean() for form in forms[1:])
else:
alternatives = ""
description = item[1]
if len(description) > 80:
matches = re.match("^(.{0,80})\W", data)
description = matches.group(1) + "..."
description = zippydoc.Value(description).transform(self)
rendered += '<li><a href="#%s">%s</a> %s %s</li>' % (anchor, forms[0].clean(), description, alternatives)
return '<div class="toc"><h2>Table of contents</h2><ul>%s</ul></div>' % rendered
files = options["files"]
docparser = zippydoc.Parser(open("template.html").read())
template = open("template.html").read()
for zpy in files:
destination = os.path.splitext(zpy)[0] + ".html"
@ -20,10 +102,12 @@ for zpy in files:
data = f.read()
f.close()
rendered = docparser.render(data)
doc = zippydoc.Document(open(zpy, "r").read())
rendered = doc.transform(HtmlRuleset())
f = open(destination, "w")
f.write(rendered)
f.write(template.replace("{CONTENT}", rendered))
f.close()
print "Rendered %s" % destination

Loading…
Cancel
Save