From 11045f84bdd9e34ea9b525ee310e8c626cbe2810 Mon Sep 17 00:00:00 2001 From: Sven Slootweg Date: Wed, 9 Jan 2013 21:20:59 +0100 Subject: [PATCH] Initial commit --- .gitignore | 1 + zpy2html.py | 392 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 393 insertions(+) create mode 100644 .gitignore create mode 100644 zpy2html.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0d20b64 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.pyc diff --git a/zpy2html.py b/zpy2html.py new file mode 100644 index 0000000..d0a8097 --- /dev/null +++ b/zpy2html.py @@ -0,0 +1,392 @@ +import os, argparse, sys, re + +parser = argparse.ArgumentParser(description='Converts ZippyDoc source files to HTML.') + +parser.add_argument('files', metavar='FILE', type=str, nargs='+', + help='files to convert to HTML') + +args = parser.parse_args() +options = vars(args) + +files = options["files"] + +template = """ + + + + + + + %s + + +""" + +class TreeLevel: + def __init__(self, indentation, data): + self.elements = [] + self.indentation = indentation + self.data = data + + def add(self, element): + self.elements.append(element) + + def output(self): + return self.render() + + def render_children(self): + child_output = "" + + for child in self.elements: + child_output += child.output() + + return '
%s
' % child_output + + def process_inline_markup(self, text): + text = re.sub("`([^`]+)`", '\\1', text) # Emphasized + text = re.sub("\*\*([^*]+)\*\*", "\\1", text) # Emphasized + text = re.sub("__([^_]+)__", "\\1", text) # Strong + text = re.sub("{>([^}]+)}\(([^)]+)\)", '\\2', text) # Hyperlink with text + text = re.sub("{>([^}]+)}", '\\1', text) # Hyperlink + text = re.sub("{([^}]+:[^}]+)}\(([^)]+)\)", '\\2', text) # External hyperlink with text + text = re.sub("{([^}]+:[^}]+)}", '\\1', text) # External hyperlink + text = re.sub("{<([^}]+)}\(([^)]+)\)", '\\2', text) # Forced external hyperlink with text + text = re.sub("{<([^}]+)}", '\\1', text) # Forced external hyperlink + + return text + + def fix_preformatted(self, text): + return text.replace("<", "<").replace(">", ">") + + def clear_markup(self, text): + return re.sub("\*\*([^*]+)\*\*", "\\1", text) + + def render(self): + return self.render_children() + +class Example(TreeLevel): + def render(self): + return '
Example: %s %s
' % (self.data, self.render_children()) + +class Code(TreeLevel): + def render(self): + return 'Code:
%s
' % self.fix_preformatted(self.data) + +class Output(TreeLevel): + def render(self): + return 'Output:
%s
' % self.fix_preformatted(self.data) + +class Definition(TreeLevel): + def get_anchor(self): + first = self.clear_markup(self.data.splitlines()[0]) + anchor = first.replace("...", "") + anchor = anchor.replace(".", "_") + anchor = re.sub("[^a-zA-Z0-9_]", "", anchor) + return anchor + + def get_description(self): + for element in self.elements: + if element.__class__.__name__ == "Text": + data = self.process_inline_markup(element.data) + + if len(data) > 80: + matches = re.match("^(.{0,80})\W", data) + return matches.group(1) + "..." + else: + return data + + return "" + + def render(self): + return '
%s %s
' % (self.get_anchor(), self.process_inline_markup(self.data.replace("\n", "
")), self.render_children()) + +class Exclamation(TreeLevel): + def render(self): + return '
Important: %s
' % self.process_inline_markup(self.data) + +class Argument(TreeLevel): + def __init__(self, indentation, data, argname): + self.elements = [] + self.indentation = indentation + self.argname = argname + self.data = data + + def render(self): + return '
%s
%s
' % (self.argname, self.process_inline_markup(self.data)) + +class Header(TreeLevel): + def __init__(self, indentation, data, depth): + self.elements = [] + self.indentation = indentation + self.depth = depth + self.data = data + + def render(self): + if self.depth <= 7: + title_type = "h%d" % self.depth + else: + title_type = "h7" + + return "<%s>%s" % (title_type, self.data, title_type) + +class Text(TreeLevel): + def render(self): + return '
%s
' % self.process_inline_markup(self.data) + +class Index(TreeLevel): + def render(self): + global toc_items + + rendered = "" + + for item in toc_items: + forms = item.data.splitlines() + first = self.clear_markup(forms[0]) + + if len(forms) > 1: + rest = '(also: ' + ', '.join(self.clear_markup(form) for form in forms[1:]) + ")" + else: + rest = "" + + anchor = item.get_anchor() + description = item.get_description() + rendered += '
  • %s %s %s
  • ' % (anchor, first, description, rest) + + return '

    Table of contents

    ' % rendered + +for zpy in files: + destination = os.path.splitext(zpy)[0] + ".html" + + f = open(zpy, "r") + data = f.read() + f.close() + + paragraphs = re.split("\s*\n\s*\n", data) + toc_items = [] + current_level = 0 + current_paragraph = 0 + current_elements = {0: TreeLevel(0, "root")} + + for paragraph in paragraphs: + if paragraph.strip() == "": + continue + + current_paragraph += 1 + indentation = len(paragraph) - len(paragraph.lstrip("\t")) + 1 + + if indentation > current_level + 1: + raise Exception("Invalid indentation found in paragraph %d" % current_paragraph) + + element_type = TreeLevel + start = indentation - 1 + + lines = [line[start:] for line in paragraph.splitlines()] + + if lines[0].startswith("#"): + element_type = Header + depth = len(lines[0]) - len(lines[0].lstrip("#")) + lines[0] = lines[0].lstrip("# ") + data = " ".join(lines) + elif lines[0].startswith("^"): + element_type = Definition + lines[0] = lines[0].lstrip("^ ") + data = "\n".join(lines) + elif lines[0].startswith("@"): + element_type = Example + lines[0] = lines[0].lstrip("@ ") + data = " ".join(lines) + elif lines[0].startswith("$$") and current_elements[current_level].__class__.__name__ == "Code": + current_elements[current_level].data += "\n\n" + "\n".join(lines).lstrip("$ ") + continue + elif lines[0].startswith("$"): + element_type = Code + lines[0] = lines[0].lstrip("$ ") + data = "\n".join(lines) + elif lines[0].startswith(">>") and current_elements[current_level].__class__.__name__ == "Output": + current_elements[current_level].data += "\n\n" + "\n".join(lines).lstrip("> ") + continue + elif lines[0].startswith(">"): + element_type = Output + lines[0] = lines[0].lstrip("> ") + data = "\n".join(lines) + elif lines[0].startswith("!"): + element_type = Exclamation + lines[0] = lines[0].lstrip("! ") + data = " ".join(lines) + elif re.match(".*::\s*$", lines[0]): + element_type = Argument + argname = lines[0][:-2] + data = " ".join(line.lstrip() for line in lines[1:]) + elif lines[0].strip() == "{TOC}": + element_type = Index + data = "" + else: + element_type = Text + data = " ".join(lines) + + #print "Found element of type %s at indentation %d with data %s" % (element_type.__name__, indentation, data[:80]) + + if element_type.__name__ == "Header": + element = Header(indentation, data, depth) + elif element_type.__name__ == "Argument": + element = Argument(indentation, data, argname) + else: + element = element_type(indentation, data) + + if element_type.__name__ == "Definition": + toc_items.append(element) + + current_elements[indentation - 1].add(element) + + current_level = indentation + current_elements[current_level] = element + + rendered = template % (current_elements[0].output()) + + f = open(destination, "w") + f.write(rendered) + f.close() + + print "Rendered %s" % destination