Reorganize some files, add support for units in alarm rules, support network blip detection (including more leniency for heartbeat failures), fix bug in alarm matching, remove debug output, add auto-start mechanism and killscript to cstatsd, fix deps.sh
self.send_all("\x02\x030,4 [ DOWN ] \x03\x02 Service \x032%(service)s\x03 on host \x037%(host)s\x03 reports that \x036%(unit)s\x03 is \x02down!\x02" % data)
elif data["msg_type"] == "blip":
self.send_all("\x02\x030,4 [ WARNING ] \x03\x02 Service \x032%(service)s\x03 on host \x037%(host)s\x03 reports that a blip occurred for \x036%(unit)s\x03!" % data)
elif data["msg_type"] == "value":
for rule_id, rule in rules.iteritems():
check_vals = {
@ -196,10 +204,14 @@ class Bot(object):
"unit": [data["unit"]]
}
failed = False
for segment in ("host", "service", "resource", "unit"):
for val in check_vals[segment]:
if rule["target"][segment] is not True and not fnmatch.fnmatch(val, rule["target"][segment]):
continue
failed = True
break
if failed:
continue # Skip to next
# We haven't broken out in the past bit of code, so we're still matching the pattern...
eligible_keys = [key for key in data["values"].keys() if fnmatch.fnmatch(key, rule["target"]["attribute"])]