From daeba0e6d70723da4c215594ed6217b68a071533 Mon Sep 17 00:00:00 2001 From: Sven Slootweg Date: Sun, 22 Sep 2013 08:09:12 +0200 Subject: [PATCH] Parser so far --- parse.py | 259 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 257 insertions(+), 2 deletions(-) diff --git a/parse.py b/parse.py index 18ff5b4..2d170ca 100644 --- a/parse.py +++ b/parse.py @@ -1,4 +1,7 @@ import sys +from collections import defaultdict + +# TODO: Keep trail of message travelling through the rules class Element(object): def __init__(self): @@ -32,12 +35,151 @@ class Rule(Element): class Filter(Rule): def __init__(self, input_, rule): Rule.__init__(self, input_) - # TODO: Filter parsing loop goes here self.rule = rule + # Rules: + # Boolean 'and' has precedence over 'or' + # Enclosure in parentheses means creating a new FilterExpressionGroup + # Having 'and' and 'or' operators in the same group means a new FilterExpressionGroup is created for every 'and' chain, to retain precedence + # Variable accessors are prefixed with $ + # Strings are enclosed in "quotes" + + rule_length = len(rule) + idx = 0 + buff = "" + in_expression = False + current_element = {} + element_list = defaultdict(list) + operator_list = defaultdict(list) + current_depth = 0 + + while idx < rule_length: + char = rule[idx] + print len(buff), len(rule), idx, buff + if char == "(" and in_expression == False: + # New group encountered + group = FilterExpressionGroup() + current_element[current_depth] = group + print "START GROUP %d" % current_depth + current_depth += 1 + elif char == ")" and in_expression == False: + # End statement, Process list of elements + element_list[current_depth].append(create_filter_expression(buff)) + # Add elements to group object + for el in element_list[current_depth]: + current_element[current_depth - 1].add(el) + # Process operators + if len(element_list[current_depth]) > 1: + # Check if the operators vary + operators = operator_list[current_depth] + operator_discrepancy = not all(operators[0] == x for x in operators) + + if operator_discrepancy: + # We'll need to find the 'and' chains and push them into separate groups + print "OPERATOR DISCREPANCY" + + current_element[current_depth - 1].relation = operator_list[current_depth][0] + element_list[current_depth - 1].append(current_element[current_depth - 1]) + operator_list[current_depth] = [] # Clear out list to prevent working with stale data + + print "-- GR: %s" % current_element[current_depth - 1] + buff = "" + current_depth -= 1 + print "END GROUP %d" % current_depth + elif char == '"': + in_expression = not in_expression + buff += '"' + elif not in_expression and char == "o" and idx + 2 < rule_length and rule[idx+1:idx+2] == "r" and len(buff) > 0 and (buff[-1] == " " or buff[-1] == ")"): + # End statement, Boolean OR + if buff.strip() != "": + element_list[current_depth].append(create_filter_expression(buff)) + operator_list[current_depth].append(OR) + buff = "" + idx += 1 # We read ahead one position extra + elif not in_expression and char == "a" and idx + 3 < rule_length and rule[idx+1:idx+3] == "nd" and len(buff) > 0 and (buff[-1] == " " or buff[-1] == ")"): + # End statement, Boolean AND + if buff.strip() != "": + element_list[current_depth].append(create_filter_expression(buff)) + operator_list[current_depth].append(AND) + buff = "" + idx += 2 # We read ahead two positions extra + else: + buff += char + + idx += 1 + + if current_depth > 0: + raise Exception("Missing %d closing parenthese(s)." % current_depth) + elif current_depth < 0: + raise Exception("Missing %d opening parenthese(s)." % (0 - current_depth)) + + if buff.strip() != "": + element_list[current_depth].append(create_filter_expression(buff)) + + if len(element_list[current_depth]) > 1: + # Multiple elements, need to encapsulate in a group + new_group = create_group(element_list[current_depth], operator_list[current_depth]) + + # If there's anything left in the buffer, it's probably a statement we still need to process. + print repr(element_list) + def get_description(self): return "[Filter] %s" % self.rule +def create_group(elements, operators): + group = FilterExpressionGroup() + +def create_filter_expression(buff): + # TODO: Use shlex split because of spaces in strings? + left, operator, right = [x.strip() for x in buff.split(None, 2)] + + if left[0] == '"' and left[-1] == '"': + left_obj = FilterExpressionString(left[1:-1]) + elif left[0] == "$": + if "[" in left[1:] and left[-1] == "]": + name, scope = left[1:-1].split("[", 1) + else: + name = left[1:] + scope = None + + left_obj = FilterExpressionVariable(name, scope) + else: + raise Exception("Unrecognized operand type") # No other types supported yet... + + if right[0] == '"' and right[-1] == '"': + right_obj = FilterExpressionString(right[1:-1]) + elif right[0] == "$": + if "[" in right[1:] and right[-1] == "]": + name, scope = right[1:-1].split("[", 1) + else: + name = right[1:] + scope = None + + right_obj = FilterExpressionVariable(name, scope) + else: + raise Exception("Unrecognized operand type") # No other types supported yet... + + operators = { + "=": EQUALS, + "==": EQUALS, + "!=": NOT_EQUALS, + ">": MORE_THAN, + "<": LESS_THAN, + ">=": MORE_THAN_OR_EQUALS, + "<=": LESS_THAN_OR_EQUALS, + "has": HAS + } + + try: + operator_type = operators[operator] + except KeyError, e: + raise Exception("Invalid operator") + + expression = FilterExpression(left_obj, operator_type, right_obj) + return expression + # Broken? + #print expression + class BinReference(Rule): def __init__(self, input_, name): Rule.__init__(self, input_) @@ -79,6 +221,118 @@ class DistributorReference(Rule): def get_description(self): return "[DistRef] %s" % self.distributor_name +NONE = 0 +AND = 1 +OR = 2 + +EQUALS = 3 +NOT_EQUALS = 4 +LESS_THAN = 5 +MORE_THAN = 6 +LESS_THAN_OR_EQUALS = 7 +MORE_THAN_OR_EQUALS = 8 +HAS = 9 + +class FilterExpression(object): + def __init__(self, left, operator, right): + self.left = left + self.operator = operator + self.right = right + + def evaluate(self, message): + if self.operator == EQUALS: + return (self.left == self.right) + elif self.operator == NOT_EQUALS: + return (self.left != self.right) + elif self.operator == LESS_THAN: + return (self.left < self.right) + elif self.operator == MORE_THAN: + return (self.left > self.right) + elif self.operator == LESS_THAN_OR_EQUALS: + return (self.left <= self.right) + elif self.operator == MORE_THAN_OR_EQUALS: + return (self.left >= self.right) + elif self.operator == HAS: + return False # TODO: Implement array lookup? + else: + # TODO: Log error + return False + + def __repr__(self): + if self.operator == EQUALS: + opname = "EQUALS" + elif self.operator == NOT_EQUALS: + opname = "NOT EQUALS" + elif self.operator == LESS_THAN: + opname = "LESS THAN" + elif self.operator == MORE_THAN: + opname = "MORE THAN" + elif self.operator == LESS_THAN_OR_EQUALS: + opname = "LESS THAN OR EQUAL" + elif self.operator == MORE_THAN_OR_EQUALS: + opname = "MORE THAN OR EQUAL" + else: + opname = "?" + + return "" % (repr(self.left), opname, repr(self.right)) + +class FilterExpressionGroup(object): + def __init__(self): + self.elements = [] + self.relation = NONE + + def add(self, element): + self.elements.append(element) + + def evaluate(self, message): + if self.relation == AND: + for element in self.elements: + if element.evaluate() != True: + return False + return True + elif self.relation == OR: + for element in self.elements: + if element.evaluate() == True: + return True + return False + else: + # TODO: Log error + return False + + def __repr__(self): + if self.relation == AND: + relname = "AND" + elif self.relation == OR: + relname = "OR" + else: + relname = "?" + return "" % (relname, ", ".join(repr(x) for x in self.elements)) + +class FilterExpressionElement(object): + pass + +class FilterExpressionVariable(FilterExpressionElement): + def __init__(self, scope, name=None): + self.scope = scope + self.name = name + # TODO: name path parsing + + def get_value(self, message): + return False # TODO: grab correct value + + def __repr__(self): + return "" % (self.scope, self.name) + +class FilterExpressionString(FilterExpressionElement): + def __init__(self, string): + self.string = string + + def get_value(self, message): + return self.string + + def __repr__(self): + return "" % self.string + def create_rule(buff, input_): buff = buff.strip() if buff[0] == "*": @@ -185,10 +439,11 @@ while idx < rulebook_length: buff = "" idx += 1 # We read one extra character ahead else: + # TODO: add entire chunks at once for speed buff += char idx += 1 # TODO: detect infinite loops via bins! for bin_name, bin_ in bins.iteritems(): - bin_.display(0) + pass#bin_.display(0)