25 changed files with 148 additions and 1040 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,10 +1 @@
 cstatsd/config/*.yaml
-ccollectd/config.yaml
-alert/config.yaml
-*.pyc
-ccollectd/pubkey.dat
-ccollectd/privkey.dat
-cstatsd/pubkey.dat
-cstatsd/privkey.dat
-cstatsd/cstatsd.pid
-alert/rules.pickle
--- a/alert/alert
+++ b/alert/alert
@ -1,304 +0,0 @@
-#!/usr/bin/env python2
-
-import socket, yaml, random, zmq, msgpack, time, uuid, fnmatch
-import cPickle as pickle
-
-ctx = zmq.Context()
-
-with open("config.yaml", "r") as cfile:
-	config = yaml.safe_load(cfile)
-
-try:
-	with open("rules.pickle", "r") as pfile:
-		rules = pickle.load(pfile)
-except IOError, e:
-	rules = {}
-
-fetcher = ctx.socket(zmq.SUB)
-fetcher.setsockopt(zmq.SUBSCRIBE, "")
-fetcher.connect("tcp://127.0.0.1:8998")
-
-class Bot(object):
-	def __init__(self, hosts, port, nickname, realname, channels, admins, subsock):
-		self.hosts = hosts
-		self.port = port
-		self.nickname = nickname
-		self.realname = realname
-		self.channels = channels
-		self.admins = admins
-		self.subsock = subsock
-		self.connected = False
-		self.last_down = {}
-		self.known_alarms = {}
-		
-		self.command_map = {
-			"422": self.join_all_channels,
-			"376": self.join_all_channels,
-			"PRIVMSG": self.receive_message
-		}
-	
-	def split_irc(self, message):
-		if message[0] == ":":
-			prefix = ":"
-			message = message[1:]
-		else:
-			prefix = ""
-			
-		if ":" in message:
-			rest, last = message.split(":", 1)
-			parts = rest.strip().split() + [last]
-		else:
-			parts = message.split()
-			
-		parts[0] = prefix + parts[0]
-		return parts
-	
-	def run(self):
-		while True:  # Connect loop
-			host = random.choice(self.hosts)
-			
-			self.sock = socket.socket()
-			try:
-				self.sock.connect((host, self.port))
-			except socket.error, e:
-				continue  # Reconnect
-			self.send_raw("NICK %s" % self.nickname)
-			self.sock.send("USER %s 0 0 :%s\r\n" % (self.nickname, self.realname))
-			
-			buff = ""
-			while True:  # Read loop
-				r, w, x = zmq.select([self.sock, self.subsock], [], [])
-				
-				for s in r:
-					if s == self.sock.fileno():
-						try:
-							recvdata = self.sock.recv(1024)
-						except socket.error, e:
-							break # Something went wrong, reconnect...
-						
-						if len(recvdata) == 0:
-							break  # We have disconnected...
-							
-						buff += recvdata
-						messages = buff.split("\n")
-						buff = messages.pop()
-						
-						for message in messages:
-							self.process_message(self.split_irc(message.strip("\r")))
-					elif self.subsock.getsockopt(zmq.EVENTS) & zmq.POLLIN != 0:
-						# Process incoming data from the subscribe socket...
-						message = msgpack.unpackb(s.recv())
-						self.process_stats(message)
-				
-	def send_raw(self, message):
-		self.sock.send("%s\r\n" % message)
-	
-	def send_message(self, recipient, message):
-		if self.connected == True:
-			self.send_raw("PRIVMSG %s :%s" % (recipient, message))
-	
-	def send_all(self, message):
-		for channel in self.channels:
-			self.send_message(channel, message)
-			
-	def join(self, channel):
-		self.send_raw("JOIN %s" % channel)
-	
-	def join_all_channels(self, message):
-		self.connected = True
-		for channel in self.channels:
-			self.join(channel)
-	
-	def receive_message(self, message):
-		args = message[3].split()
-		sender = message[0][1:].split("!", 1)[0]
-		channel = message[2]
-		
-		try:
-			if sender in self.admins:
-				if args[0] == "!addrule":
-					target, rel, value = args[1:4]
-					target = self.parse_target(target)
-					
-					if value[-1].lower() in ("k", "m", "g", "t"):
-						unit = value[-1].lower()
-						value = value[:-1]
-						value = float(value)
-						value = value * (1024 ** (("k", "m", "g", "t").index(unit) + 1))
-					
-					rule_id = uuid.uuid4()
-					rules[rule_id] = {
-						"target": target,
-						"operator": rel,
-						"value": value
-					}
-					
-					with open("rules.pickle", "w") as pfile:
-						pickle.dump(rules, pfile)
-					
-					self.send_message(channel, "Added rule for %s with ID %s." % (args[1], rule_id))
-		except Exception, e:
-			self.send_message(channel, str(e))
-					
-	def parse_target(self, target):
-		host, rest = target.split("!", 1)
-		service, rest = rest.split(".", 1)
-		resource, rest = rest.split(":", 1)
-		unit, attribute = rest.split(".", 1)
-		# TODO: unit = unit.split("(", 1)[0].strip() # Allow () for comments
-		if host == "*":
-			host = True
-		if service == "*":
-			service = True
-		if attribute == "*":
-			attribute = True
-		if resource == "*":
-			resource = True
-		if unit == "*":
-			unit = True
-		return {
-			"host": host,
-			"service": service,
-			"resource": resource,
-			"unit": unit,
-			"attribute": attribute
-		}
-			
-	
-	def format_time_duration(self, seconds):
-		# http://stackoverflow.com/a/20222351/1332715
-		days, rem = divmod(seconds, 86400)
-		hours, rem = divmod(rem, 3600)
-		minutes, seconds = divmod(rem, 60)
-		if seconds < 1:
-			seconds = 1
-		locals_ = locals()
-		magnitudes_str = ("{n} {magnitude}".format(n=int(locals_[magnitude]), magnitude=magnitude) for magnitude in ("days", "hours", "minutes", "seconds") if locals_[magnitude])
-		return ", ".join(magnitudes_str)
-	
-	def process_stats(self, message):
-		data = message["message"]
-		data["host"] = message["host"]
-		
-		if data["msg_type"] == "up" and data["initial"] == True:
-			return # We don't need to say what is up, initially...
-		
-		# TODO: Duration
-		if data["msg_type"] == "up":
-			try:
-				data["duration"] = self.format_time_duration(time.time() - self.last_down["%(host)s!%(service)s.%(unit)s" % data])
-			except KeyError, e:
-				data["duration"] = "0 seconds"
-			self.send_all("\x02\x030,3 [ UP ] \x03\x02 Service \x032%(service)s\x03 on host \x037%(host)s\x03 reports that \x036%(unit)s\x03 is now back up. It was down for %(duration)s." % data)
-		elif data["msg_type"] == "down":
-			self.last_down["%(host)s!%(service)s.%(unit)s" % data] = time.time()
-			self.send_all("\x02\x030,4 [ DOWN ] \x03\x02 Service \x032%(service)s\x03 on host \x037%(host)s\x03 reports that \x036%(unit)s\x03 is \x02down!\x02" % data)
-		elif data["msg_type"] == "blip":
-			self.send_all("\x02\x030,7 [ WARNING ] \x03\x02 Service \x032%(service)s\x03 on host \x037%(host)s\x03 reports that a blip occurred for \x036%(unit)s\x03!" % data)
-		elif data["msg_type"] == "value":
-			for rule_id, rule in rules.iteritems():
-				check_vals = {
-					"host": [data["host"]],
-					"service": [data["service"]],
-					"resource": [data["resource_type"]],
-					"unit": [data["unit"]]
-				}
-				
-				failed = False
-				for segment in ("host", "service", "resource", "unit"):
-					for val in check_vals[segment]:
-						if rule["target"][segment] is not True and not fnmatch.fnmatch(val, rule["target"][segment]):
-							failed = True
-							break
-				if failed:
-					continue # Skip to next
-							
-				# We haven't broken out in the past bit of code, so we're still matching the pattern...
-				eligible_keys = [key for key in data["values"].keys() if fnmatch.fnmatch(key, rule["target"]["attribute"])]
-				
-				for key in eligible_keys:
-					value = data["values"][key]
-					rule_value = float(rule["value"])
-					operator = rule["operator"]
-					
-					if operator == "=":
-						alarm = (value == rule_value)
-					elif operator == ">":
-						alarm = (value > rule_value)
-					elif operator == "<":
-						alarm = (value < rule_value)
-					elif operator == ">=":
-						alarm = (value >= rule_value)
-					elif operator == "<=":
-						alarm = (value <= rule_value)
-					elif operator == "!=":
-						alarm = (value != rule_value)
-					else:
-						alarm = False
-				
-					self.trigger_alarm(rule_id, data, alarm, value, key)
-	
-	def trigger_alarm(self, rule_id, data, active, offending_value=None, offending_key=None):
-		key = "%s/%s/%s/%s" % (rule_id, data["host"], data["unit"], offending_key)
-		
-		if key not in self.known_alarms:
-			if active:
-				self.transmit_alarm(rule_id, data, active, offending_value, offending_key)
-				self.known_alarms[key] = time.time()
-			else:
-				self.known_alarms[key] = False
-		else:
-			if self.known_alarms[key] == False and active:
-				# Alarm activated
-				self.transmit_alarm(rule_id, data, active, offending_value, offending_key)
-				self.known_alarms[key] = time.time()
-			elif self.known_alarms[key] != False and not active:
-				# Alarm deactivated
-				self.transmit_alarm(rule_id, data, active, offending_value, offending_key)
-				self.known_alarms[key] = False
-	
-	def transmit_alarm(self, rule_id, data, active, offending_value=None, offending_key=None):
-		# At this point, we're sure that we want to notify...
-		rule_target = rules[rule_id]["target"].copy()
-		for k, v in rule_target.iteritems():
-			if v is True:
-				rule_target[k] = "*"
-		
-		rule_pattern = "%(host)s!%(service)s.%(resource)s:%(unit)s.%(attribute)s" % rule_target
-		
-		info = {
-			"host": data["host"],
-			"rule_id": rule_id,
-			"rule_pattern": rule_pattern
-		}
-		
-		if not active:
-			key = "%s/%s/%s/%s" % (rule_id, data["host"], data["unit"], offending_key)
-			try:
-				info["duration"] = self.format_time_duration(time.time() - self.known_alarms[key])
-			except KeyError, e:
-				info["duration"] = "0 seconds"
-			info["unit"] = data["unit"]
-			info["attribute"] = offending_key
-			
-			self.send_all("\x02\x030,3 [ SOLVED ] \x03\x02 Host \x037%(host)s\x03 reports that the alarm for rule %(rule_id)s (\x036%(rule_pattern)s\x03) was resolved for \x034%(unit)s\x03.\x034%(attribute)s\x03. It was active for %(duration)s." % info)
-		else:
-			info["value"] = offending_value
-			info["spec"] = "%s %s" % (rules[rule_id]["operator"], rules[rule_id]["value"])
-			info["unit"] = data["unit"]
-			info["attribute"] = offending_key
-			
-			self.send_all("\x02\x030,7 [ ALARM ] \x03\x02 Host \x037%(host)s\x03 reports that an alarm was triggered for rule %(rule_id)s (\x036%(rule_pattern)s\x03). The reported value was\x034 %(value)s\x03 for\x034 %(unit)s\x03.\x034%(attribute)s\x03 , triggering the \x032%(spec)s\x03 condition." % info)
-	
-	def process_message(self, message):
-		if message[0].upper() == "PING":
-			self.send_raw("PONG %s" % message[1])
-		else:
-			try:
-				self.command_map[message[1].upper()](message)
-			except KeyError, e:
-				pass
-		
-
-bot = Bot(config["irc"]["hosts"], config["irc"]["port"], config["irc"]["nickname"], config["irc"]["realname"], config["irc"]["channels"], config["irc"]["admins"], fetcher)
-bot.run()
--- a/alert/config.yaml.example
+++ b/alert/config.yaml.example
@ -1,12 +0,0 @@
-irc:
-        hosts:
-                - kerpia.cryto.net
-                - box.cryto.net
-                - arvel.cryto.net
-        port: 6667
-        nickname: StatusBot
-        realname: Cryto System Monitoring Service
-        admins:
-                - joepie91
-        channels:
-                - "#test"
--- a/ccollectd/ccollectd
+++ b/ccollectd/ccollectd
@ -1,104 +0,0 @@
-#!/usr/bin/env python2
-
-import zmq, msgpack, yaml, zmqtimer, binascii, nacl, sys, socket
-from nacl.public import PublicKey, PrivateKey, Box
-
-ctx = zmq.Context()
-
-distributor = ctx.socket(zmq.PUB)
-distributor.bind("tcp://127.0.0.1:8998")
-
-poller = zmq.Poller()
-
-with open("config.yaml", "r") as cfile:
-	config = yaml.safe_load(cfile)
-
-with open("privkey.dat", "r") as f:
-	privkey = PrivateKey(binascii.unhexlify(f.read()))
-
-nodes = config["nodes"]
-last_node_status = {}
-socket_map = {}
-boxes = {}
-
-def heartbeat():
-	for hostname, node in nodes.iteritems():
-		retries = 0
-		while retries < config["heartbeat"]["attempts"]:
-			try:
-				s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-				s.settimeout(float(config["heartbeat"]["timeout"]) / (retries + 1))
-				s.connect((node["ip"], node["port"]))
-				s.shutdown(socket.SHUT_RDWR)
-				s.close()
-				up = True
-				break
-			except socket.error, e:
-				up = False
-				retries += 1
-			
-		try:
-			status_changed = (up != last_node_status[hostname])
-			initial = False
-		except KeyError, e:
-			status_changed = True
-			initial = True
-			
-		last_node_status[hostname] = up
-		
-		send_message = False
-		if status_changed:
-			if up:
-				msg_type = "up"
-				send_message = True
-			else:
-				msg_type = "down"
-				send_message = True
-		else:
-			if up and retries > 0:
-				msg_type = "blip"
-				send_message = True
-				
-		if send_message:
-			distributor.send(msgpack.packb({
-				"host": config["hostname"],
-				"message": {
-					"service": "heartbeat",
-					"msg_type": msg_type,
-					"unit": hostname,
-					"initial": initial
-				}
-			}))
-
-timers = zmqtimer.ZmqTimerManager()
-timers.add_timer(zmqtimer.ZmqTimer(config["heartbeat"]["interval"], heartbeat))
-
-for hostname, node in config["nodes"].iteritems():
-	boxes[hostname] = Box(privkey, PublicKey(binascii.unhexlify(node["pubkey"])))
-	grabber = ctx.socket(zmq.SUB)
-	grabber.setsockopt(zmq.SUBSCRIBE, "")
-	grabber.connect(node["endpoint"])
-	socket_map[grabber] = hostname
-	poller.register(grabber, zmq.POLLIN)
-
-while True:
-	timers.check()
-	socks = dict(poller.poll(timers.get_next_interval()))
-	
-	for sock in socks:
-		if socks[sock] == zmq.POLLIN:
-			host = socket_map[sock]
-			try:
-				message = msgpack.unpackb(boxes[host].decrypt(sock.recv()))
-			except nacl.exceptions.CryptoError, e:
-				# Probably a spoofed message... skip to next socket
-				sys.stderr.write("Ignoring message... spoofed? (origin: %s)\n" % host) # FIXME: Use logging module...
-				continue
-			except Exception, e:
-				sys.stderr.write(repr(e) + "\n")
-				continue
-			distributor.send(msgpack.packb({
-				"host": host,
-				"message": message
-			}))
-		
--- a/ccollectd/config.yaml.example
+++ b/ccollectd/config.yaml.example
@ -1,13 +0,0 @@
-hostname: monitoring.cryto.net
-
-heartbeat:
-        interval: 5
-        timeout: 1
-        attempts: 3
-
-nodes:
-        localhost:
-                ip: 127.0.0.1
-                port: 6543
-                endpoint: tcp://127.0.0.1:6543
-                pubkey: bd784ef4065c9bd31627106dc55e26764605a144c6fc45ce93f33cbd19dd7333
--- a/ccollectd/genkey
+++ b/ccollectd/genkey
@ -1,15 +0,0 @@
-#!/usr/bin/env python2
-
-import yaml, os, stat, binascii
-from nacl.public import PrivateKey
-
-privkey = PrivateKey.generate()
-pubkey = privkey.public_key
-
-with open("privkey.dat", "w") as f:
-	f.write(binascii.hexlify(str(privkey)))
-
-with open("pubkey.dat", "w") as f:
-	f.write(binascii.hexlify(str(pubkey)))
-
-os.chmod("privkey.dat", stat.S_IRUSR | stat.S_IWUSR)
--- a/ccollectd/listen
+++ b/ccollectd/listen
@ -1,12 +0,0 @@
-#!/usr/bin/env python2
-import zmq, msgpack
-
-ctx = zmq.Context()
-
-fetcher = ctx.socket(zmq.SUB)
-fetcher.setsockopt(zmq.SUBSCRIBE, "")
-fetcher.connect("tcp://127.0.0.1:8998")
-
-while True:
-	message = msgpack.unpackb(fetcher.recv())
-	print message
--- a/ccollectd/zmqtimer.py
+++ b/ccollectd/zmqtimer.py
@ -1,41 +0,0 @@
-import time
-
-class ZmqTimerManager(object):
-	def __init__(self):
-		self.timers = []
-		self.next_call = 0
-		
-	def add_timer(self, timer):
-		self.timers.append(timer)
-		
-	def check(self):
-		if time.time() > self.next_call:
-			for timer in self.timers:
-				timer.check()
-	
-	def get_next_interval(self):
-		if time.time() >= self.next_call:
-			call_times = []
-			for timer in self.timers:
-				call_times.append(timer.get_next_call())
-			self.next_call = min(call_times)
-			if self.next_call  < time.time():
-				return 0
-			else:
-				return (self.next_call - time.time()) * 1000
-		else:
-			return (self.next_call - time.time()) * 1000
-	
-class ZmqTimer(object):
-	def __init__(self, interval, callback):
-		self.interval = interval
-		self.callback = callback
-		self.last_call = 0
-	
-	def check(self):
-		if time.time() > (self.interval + self.last_call):
-			self.callback()
-			self.last_call = time.time()
-			
-	def get_next_call(self):
-		return self.last_call + self.interval
--- a/cstatsd/bootstrap
+++ b/cstatsd/bootstrap
@ -1,6 +0,0 @@
-#!/bin/bash
-echo "Generating keypair..."
-./genkey 2>/dev/null
-./bootstrap-config
-echo "Your public key: `cat pubkey.dat`"
-echo "Server IP: `curl -s http://wtfismyip.com/text`" 2>/dev/null
--- a/cstatsd/bootstrap-config
+++ b/cstatsd/bootstrap-config
@ -1,86 +0,0 @@
-#!/usr/bin/env python2
-
-import yaml, sys
-
-master_pubkey = raw_input("Public key of the master server: ")
-
-print "You'll now be asked to configure ports to check. If you don't want to configure any ports, just hit enter without entering any information."
-
-ports = {}
-
-while True:
-	port = raw_input("Port number: ")
-	if port.strip() == "":
-		break
-	service_name = raw_input("Service name for port %s: " % port)
-	ports[int(port)] = service_name
-
-print "The same thing, except now for processes to check. Just hit enter without entering any information when you're done; the same goes for the argument list. As a wildcard, you can use *"
-
-services = {}
-
-while True:
-	service_name = raw_input("Service name: ")
-	
-	if service_name.strip() == "":
-		break
-	
-	process_name = raw_input("Process name: ")
-	
-	args = {}
-	argnum = 1
-	while True:
-		arg = raw_input("Argument %d: " % argnum)
-		if arg.strip() == "":
-			break
-		args[argnum] = arg
-		argnum += 1
-		
-	services[service_name] = {
-		"name": process_name,
-		"args": args
-	}
-		
-print "Now enter any disk devices you wish to monitor. Leave empty and hit enter when done."
-
-disks = []
-
-while True:
-	device_name = raw_input("Device name: ")
-	if device_name.strip() == "":
-		break
-	disks.append(device_name)
-	
-# Write config files...
-
-modules = []
-
-modules.append("stats-machine")
-with open("config/machine.yaml.example", "r") as ef:
-	with open("config/machine.yaml", "w") as ff:
-		data = yaml.safe_load(ef.read())
-		data["drives"] = disks
-		ff.write(yaml.dump(data))
-
-if len(ports) > 0:
-	modules.append("stats-ports")
-	with open("config/ports.yaml.example", "r") as ef:
-		with open("config/ports.yaml", "w") as ff:
-			data = yaml.safe_load(ef.read())
-			data["ports"] = ports
-			ff.write(yaml.dump(data))
-
-if len(services) > 0:
-	modules.append("stats-processes")
-	with open("config/processes.yaml.example", "r") as ef:
-		with open("config/processes.yaml", "w") as ff:
-			data = yaml.safe_load(ef.read())
-			data["processes"] = services
-			ff.write(yaml.dump(data))
-
-with open("config/cstatsd.yaml.example", "r") as ef:
-	with open("config/cstatsd.yaml", "w") as ff:
-		data = yaml.safe_load(ef.read())
-		data["pubkey"] = master_pubkey
-		data["autostart"] = modules
-		ff.write(yaml.dump(data))
--- a/cstatsd/config/cstatsd.yaml.example
+++ b/cstatsd/config/cstatsd.yaml.example
@ -1,7 +0,0 @@
-endpoint: tcp://*:6543
-pubkey: a266a0634790a79c6934385892f7c377d35b8f03b9c6ac7d5bfed4a94f93ba65
-
-autostart:
-        - stats-processes
-        - stats-ports
-        - stats-machine
--- a/cstatsd/config/machine.yaml.example
+++ b/cstatsd/config/machine.yaml.example
@ -1,5 +1,4 @@
 interval: 1

 drives:
-        - /dev/sda1
-        - /dev/sdb1
+        - /
--- a/cstatsd/config/ports.yaml.example
+++ b/cstatsd/config/ports.yaml.example
@ -1,4 +1,4 @@
-interval: 1
+interval: 5

 ports:
        6667: UnrealIRCd
--- a/cstatsd/config/processes.yaml.example
+++ b/cstatsd/config/processes.yaml.example
@ -1,15 +0,0 @@
-interval: 5
-
-processes:
-        radiotray:
-                name: '*python*'
-                args:
-                        1: /usr/bin/radiotray
-                        
-        guake:
-                name: '*python*'
-                args:
-                        1: /usr/local/bin/guake
-        
-        keepassx:
-                name: keepassx
--- a/cstatsd/cstatsd
+++ b/cstatsd/cstatsd
@ -1,43 +1,12 @@
 #!/usr/bin/env python2

-import zmq, yaml, binascii, nacl, sys, subprocess, os
-from nacl.public import PublicKey, PrivateKey, Box
-
-basedir = os.path.dirname(os.path.realpath(__file__))
-
-with open("cstatsd.pid", "w") as pidfile:
-	pidfile.write(str(os.getpid()))
+import zmq, msgpack

 ctx = zmq.Context()

-with open("config/cstatsd.yaml", "r") as cfile:
-	config = yaml.safe_load(cfile)
-
-pubkey = PublicKey(binascii.unhexlify(config["pubkey"]))
-
-with open("privkey.dat", "r") as f:
-	privkey = PrivateKey(binascii.unhexlify(f.read()))
-
-box = Box(privkey, pubkey)
-
 collector = ctx.socket(zmq.PULL)
 collector.bind("ipc:///tmp/cstatsd")

-shipper = ctx.socket(zmq.PUB)
-shipper.bind(config["endpoint"])
-
-try:
-	disable_autostart = (sys.argv[1] == "--disable-autostart")
-except:
-	disable_autostart = False
-	
-if disable_autostart == False:
-	with open("/dev/null", "w+") as stfu:
-		for script in config["autostart"]:
-			print os.path.join(basedir, script)
-			subprocess.Popen([os.path.join(basedir, script)], stdout=stfu, stderr=stfu)
-
 while True:
-	message = collector.recv()
-	nonce = nacl.utils.random(Box.NONCE_SIZE)
-	shipper.send(box.encrypt(message, nonce))
+	message = msgpack.unpackb(collector.recv())
+	print message
--- a/cstatsd/genkey
+++ b/cstatsd/genkey
@ -1,15 +0,0 @@
-#!/usr/bin/env python2
-
-import yaml, os, stat, binascii
-from nacl.public import PrivateKey
-
-privkey = PrivateKey.generate()
-pubkey = privkey.public_key
-
-with open("privkey.dat", "w") as f:
-	f.write(binascii.hexlify(str(privkey)))
-
-with open("pubkey.dat", "w") as f:
-	f.write(binascii.hexlify(str(pubkey)))
-
-os.chmod("privkey.dat", stat.S_IRUSR | stat.S_IWUSR)
--- a/cstatsd/kill-stats
+++ b/cstatsd/kill-stats
@ -1,5 +0,0 @@
-#!/bin/bash
-
-PID=`cat cstatsd.pid`
-pkill -P $PID
-kill $PID
--- a/cstatsd/stats-machine
+++ b/cstatsd/stats-machine
@ -1,224 +0,0 @@
-#!/usr/bin/env python2
-
-import zmq, msgpack, time, psutil, yaml, os, subprocess
-from collections import namedtuple
-
-# Horrible hack to make check_output exist in 2.6
-# http://stackoverflow.com/a/13160748/1332715
-if "check_output" not in dir( subprocess ): # duck punch it in!
-	def f(*popenargs, **kwargs):
-		if 'stdout' in kwargs:
-			raise ValueError('stdout argument not allowed, it will be overridden.')
-		process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
-		output, unused_err = process.communicate()
-		retcode = process.poll()
-		if retcode:
-			cmd = kwargs.get("args")
-			if cmd is None:
-				cmd = popenargs[0]
-			raise subprocess.CalledProcessError(retcode, cmd)
-		return output
-	subprocess.check_output = f
-
-ctx = zmq.Context()
-
-sock = ctx.socket(zmq.PUSH)
-sock.connect("ipc:///tmp/cstatsd")
-
-with open("config/machine.yaml", "r") as cfile:
-	config = yaml.safe_load(cfile)
-	
-interval = config["interval"]
-old_net_data = {}
-
-disk_map = {}
-last_io_data = {}
-
-if os.path.exists("/proc/user_beancounters") and not os.path.exists("/proc/vz/vestat"):
-	openvz_burst = True
-	FakeRam = namedtuple("FakeRam", ["total", "used", "available", "percent", "buffers", "cached"])
-else:
-	openvz_burst = False
-
-for disk in psutil.disk_partitions():
-	disk_map[disk.device] = disk
-	
-if len(disk_map) == 0:
-	# We're probably on OpenVZ, so /proc/partitions doesn't exist. Fall back to 'df'.
-	FakeDisk = namedtuple("FakeDisk", ["device", "mountpoint"])
-	for line in subprocess.check_output(["df"]).splitlines()[1:]:
-		device, _, _, _, _, mountpoint = line.split()
-		disk_map[device] = FakeDisk(device, mountpoint)
-
-while True:
-	load_avgs = os.getloadavg()
-	sock.send(msgpack.packb({
-		"service": "machine",
-		"msg_type": "value",
-		"resource_type": "load_average",
-		"unit": "",
-		"values": {
-			"1m": load_avgs[0],
-			"5m": load_avgs[1],
-			"15m": load_avgs[2]
-		}
-	}))
-	
-	cpu_loads = psutil.cpu_percent(percpu=True)
-	
-	for i in xrange(0, len(cpu_loads)):
-		sock.send(msgpack.packb({
-			"service": "machine",
-			"msg_type": "value",
-			"resource_type": "cpu",
-			"unit": "core%d" % (i + 1),
-			"values": {
-				"load": cpu_loads[i]
-			}
-		}))
-	
-	try:
-		io_counters = psutil.disk_io_counters(perdisk=True)
-	except IOError, e:
-		io_counters = {} # OpenVZ...
-	
-	for drive in config["drives"]:
-		drive_data = psutil.disk_usage(disk_map[drive].mountpoint)
-		io_data = None
-		
-		for diskname, data in io_counters.iteritems():
-			if drive.endswith(diskname):
-				io_data = data
-				
-		if io_data is None or drive not in last_io_data:
-			read_bps = 0
-			write_bps = 0
-			read_iops = 0
-			write_iops = 0
-		else:
-			read_bps = (io_data.read_bytes - last_io_data[drive].read_bytes) / interval
-			write_bps = (io_data.write_bytes - last_io_data[drive].write_bytes) / interval
-			read_iops = (io_data.read_count - last_io_data[drive].read_count) / interval
-			write_iops = (io_data.write_count - last_io_data[drive].write_count) / interval
-			
-		if io_data is not None:
-			last_io_data[drive] = io_data
-			
-		sock.send(msgpack.packb({
-			"service": "machine",
-			"msg_type": "value",
-			"resource_type": "disk",
-			"unit": drive,
-			"values": {
-				"total": drive_data.total,
-				"used": drive_data.used,
-				"free": drive_data.free,
-				"used_percentage": drive_data.percent,
-				"bps_read": read_bps,
-				"bps_write": write_bps,
-				"iops_read": read_iops,
-				"iops_write": write_iops,
-			}
-		}))
-		
-	if openvz_burst:
-		# Sigh, OpenVZ... let's use 'free', since that apparently -does- understand OpenVZ.
-		lines = subprocess.check_output(["free", "-b"]).splitlines()
-		_, ram_total, ram_used, ram_free, _, ram_buffers, ram_cached = lines[1].split()
-		_, _, _, ram_available = lines[2].split()
-		ram_total = int(ram_total)
-		ram_free = int(ram_free)
-		ram_buffers = int(ram_buffers)
-		ram_cached = int(ram_cached)
-		ram_available = int(ram_available)
-		ram_used = int(ram_used)
-		ram_percent = 1.0 * (ram_total - ram_available) / ram_total * 100
-		ram_data = FakeRam(ram_total, ram_used, ram_available, ram_percent, ram_buffers, ram_cached)
-	else:
-		ram_data = psutil.virtual_memory()
-		
-	sock.send(msgpack.packb({
-		"service": "machine",
-		"msg_type": "value",
-		"resource_type": "memory",
-		"unit": "physical",
-		"values": {
-			"total": ram_data.total,
-			"used": ram_data.used,
-			"free": ram_data.available,
-			"used_percentage": ram_data.percent,
-			"buffers": ram_data.buffers,
-			"cache": ram_data.cached
-		}
-	}))
-		
-	swap_data = psutil.swap_memory()
-	sock.send(msgpack.packb({
-		"service": "machine",
-		"msg_type": "value",
-		"resource_type": "memory",
-		"unit": "swap",
-		"values": {
-			"total": swap_data.total,
-			"used": swap_data.used,
-			"free": swap_data.free,
-			"used_percentage": swap_data.percent
-		}
-	}))
-	
-	net_data = psutil.net_io_counters(pernic=True)
-	for nic, data in net_data.iteritems():
-		try:
-			old_in_b = old_net_data[nic].bytes_recv
-			old_out_b = old_net_data[nic].bytes_sent
-			old_in_p = old_net_data[nic].packets_recv
-			old_out_p = old_net_data[nic].packets_sent
-		except KeyError, e:
-			# No old data yet, first run? Save and skip to next...
-			old_net_data[nic] = data
-			continue
-		
-		diff_in_b = data.bytes_recv - old_in_b
-		diff_out_b = data.bytes_sent - old_out_b
-		diff_in_p = data.packets_recv - old_in_p
-		diff_out_p = data.packets_sent - old_out_p
-		
-		if diff_in_b < 0:
-			diff_in_b = 0
-		
-		if diff_out_b < 0:
-			diff_out_b = 0
-		
-		if diff_in_p < 0:
-			diff_in_p = 0
-		
-		if diff_out_p < 0:
-			diff_out_p = 0
-			
-		old_net_data[nic] = data
-		
-		sock.send(msgpack.packb({
-			"service": "machine",
-			"msg_type": "value",
-			"resource_type": "network",
-			"unit": nic,
-			"values": {
-				"bps_in": diff_in_b / interval,
-				"bps_out": diff_out_b / interval,
-				"pps_in": diff_in_p / interval,
-				"pps_out": diff_out_p / interval
-			}
-		}))
-		
-	sock.send(msgpack.packb({
-		"service": "machine",
-		"msg_type": "value",
-		"resource_type": "uptime",
-		"unit": "",
-		"values": {
-			"uptime": time.time() - psutil.get_boot_time()
-		}
-	}))
-	
-	time.sleep(interval)
-
--- a/cstatsd/stats-machine.py
+++ b/cstatsd/stats-machine.py
@ -0,0 +1,143 @@
+#!/usr/bin/env python2
+
+import zmq, msgpack, time, psutil, yaml, os
+
+ctx = zmq.Context()
+
+sock = ctx.socket(zmq.PUSH)
+sock.connect("ipc:///tmp/cstatsd")
+
+with open("config/machine.yaml", "r") as cfile:
+	config = yaml.safe_load(cfile)
+	
+interval = config["interval"]
+old_net_data = {}
+
+while True:
+	load_avgs = os.getloadavg()
+	sock.send(msgpack.packb({
+		"service": "machine",
+		"msg_type": "value",
+		"resource_type": "load_average",
+		"unit": "",
+		"values": {
+			"1m": load_avgs[0],
+			"5m": load_avgs[1],
+			"15m": load_avgs[2]
+		}
+	}))
+	
+	cpu_loads = psutil.cpu_percent(percpu=True)
+	
+	for i in xrange(0, len(cpu_loads)):
+		sock.send(msgpack.packb({
+			"service": "machine",
+			"msg_type": "value",
+			"resource_type": "cpu",
+			"unit": "core%d" % (i + 1),
+			"values": {
+				"load": cpu_loads[i]
+			}
+		}))
+	
+	for drive in config["drives"]:
+		drive_data = psutil.disk_usage(drive)
+		sock.send(msgpack.packb({
+			"service": "machine",
+			"msg_type": "value",
+			"resource_type": "disk",
+			"unit": drive,
+			"values": {
+				"total": drive_data.total,
+				"used": drive_data.used,
+				"free": drive_data.free,
+				"used_percentage": drive_data.percent
+			}
+		}))
+		
+	ram_data = psutil.virtual_memory()
+	sock.send(msgpack.packb({
+		"service": "machine",
+		"msg_type": "value",
+		"resource_type": "memory",
+		"unit": "physical",
+		"values": {
+			"total": ram_data.total,
+			"used": ram_data.used,
+			"free": ram_data.available,
+			"used_percentage": ram_data.percent,
+			"buffers": ram_data.buffers,
+			"cache": ram_data.cached
+		}
+	}))
+		
+	swap_data = psutil.virtual_memory()
+	sock.send(msgpack.packb({
+		"service": "machine",
+		"msg_type": "value",
+		"resource_type": "memory",
+		"unit": "swap",
+		"values": {
+			"total": swap_data.total,
+			"used": swap_data.used,
+			"free": swap_data.free,
+			"used_percentage": swap_data.percent
+		}
+	}))
+	
+	net_data = psutil.net_io_counters(pernic=True)
+	for nic, data in net_data.iteritems():
+		try:
+			old_in_b = old_net_data[nic].bytes_recv
+			old_out_b = old_net_data[nic].bytes_sent
+			old_in_p = old_net_data[nic].packets_recv
+			old_out_p = old_net_data[nic].packets_sent
+		except KeyError, e:
+			# No old data yet, first run? Save and skip to next...
+			old_net_data[nic] = data
+			continue
+		
+		diff_in_b = data.bytes_recv - old_in_b
+		diff_out_b = data.bytes_sent - old_out_b
+		diff_in_p = data.packets_recv - old_in_p
+		diff_out_p = data.packets_sent - old_out_p
+		
+		if diff_in_b < 0:
+			diff_in_b = 0
+		
+		if diff_out_b < 0:
+			diff_out_b = 0
+		
+		if diff_in_p < 0:
+			diff_in_p = 0
+		
+		if diff_out_p < 0:
+			diff_out_p = 0
+			
+		old_net_data[nic] = data
+		
+		sock.send(msgpack.packb({
+			"service": "machine",
+			"msg_type": "value",
+			"resource_type": "network",
+			"unit": nic,
+			"values": {
+				"bps_in": diff_in_b / interval,
+				"bps_out": diff_out_b / interval,
+				"pps_in": diff_in_p / interval,
+				"pps_out": diff_out_p / interval
+			}
+		}))
+		
+	sock.send(msgpack.packb({
+		"service": "machine",
+		"msg_type": "value",
+		"resource_type": "uptime",
+		"unit": "",
+		"values": {
+			"uptime": time.time() - psutil.get_boot_time()
+		}
+	}))
+	
+	time.sleep(interval)
+
--- a/cstatsd/stats-ports.py
+++ b/cstatsd/stats-ports.py
@ -20,7 +20,6 @@ while True:
 			s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 			s.settimeout(0.5)
 			s.connect(("127.0.0.1", port))
-			s.shutdown(socket.SHUT_RDWR)
 			s.close()
 			up = True
 		except socket.error, e:
--- a/cstatsd/stats-processes
+++ b/cstatsd/stats-processes
@ -1,72 +0,0 @@
-#!/usr/bin/env python2
-
-import zmq, msgpack, time, yaml, psutil, fnmatch
-
-ctx = zmq.Context()
-
-sock = ctx.socket(zmq.PUSH)
-sock.connect("ipc:///tmp/cstatsd")
-
-with open("config/processes.yaml", "r") as cfile:
-	config = yaml.safe_load(cfile)
-
-interval = config["interval"]
-
-old_status = {}
-
-while True:
-	all_procs = psutil.get_process_list()
-	
-	for service_name, patterns in config["processes"].iteritems():
-		matching = []
-		for proc in all_procs:  # Can't use filter() because of exceptions...
-			try:
-				if len(proc.cmdline) > 0 and fnmatch.fnmatch(proc.cmdline[0], patterns["name"]):
-					failed = False
-					try:
-						for arg, pattern in patterns["args"].iteritems():
-							try:
-								if len(proc.cmdline) < (arg + 1) or not fnmatch.fnmatch(proc.cmdline[arg], pattern):
-									failed = True
-							except KeyError, e:
-								pass
-					except KeyError, e:
-						pass
-					if failed == False:
-						matching.append(proc)
-			except psutil._error.NoSuchProcess, e:
-				pass
-			
-		if len(matching) > 0:
-			up = True
-		else:
-			up = False
-			
-		try:
-			if up == old_status[service_name]:
-				send_notice = False
-			else:
-				send_notice = True
-				initial = False
-		except KeyError, e:
-			send_notice = True
-			initial = True
-			
-		old_status[service_name] = up
-			
-		if send_notice:
-			if up:
-				msg_type = "up"
-			else:
-				msg_type = "down"
-				
-			sock.send(msgpack.packb({
-				"service": "process",
-				"msg_type": msg_type,
-				"unit": service_name,
-				"initial": initial
-			}))
-			
-			
-	time.sleep(interval)
-	
--- a/cstatsd/stats-tahoe.py
+++ b/cstatsd/stats-tahoe.py
--- a/deps.sh
+++ b/deps.sh
@ -1,4 +0,0 @@
-#!/bin/bash
-# You need squeeze-backports if you run this on squeeze!
-apt-get install -y libzmq-dev libffi-dev build-essential python python-dev
-pip install pyzmq msgpack-python pynacl pyyaml psutil
--- a/install-steps.txt
+++ b/install-steps.txt
@ -1,4 +0,0 @@
-# Backports: echo "deb http://backports.debian.org/debian-backports squeeze-backports main" >> /etc/apt/sources.list && apt-get update && apt-get upgrade
-apt-get install -y python python-dev && wget cryto.net/~joepie91/pipfix.sh && chmod +x pipfix.sh && ./pipfix.sh
-adduser --system --shell /bin/bash --group monitor && apt-get install -y git; su -c "cd ~; git clone https://github.com/joepie91/cryto-status.git" monitor && /home/monitor/cryto-status/deps.sh
-# Replace libzmq with a manually compiled version...: wget http://download.zeromq.org/zeromq-4.0.3.tar.gz; tar -xzvf zeromq-4.0.3.tar.gz; cd zeromq-4.0.3; ./configure; make; make install; ldconfig
--- a/todo.txt
+++ b/todo.txt
@ -1,54 +0,0 @@
-* allow comments in (parentheses) in units, and ignore these when matching against an alarm pattern...
-* web interface (angularjs)
-* separate alarm and IRC logic
-* monitor inodes
-* watchdog on slave and master -> should send WARN notifications
-* notifications (text, arbitrary-serialized-data as attachment, DEBUG/INFO/WARN/ERR/CRIT)
-* consider redundancy - can already connect multiple masters through pubsub, how to deal with duplicate processing checking?
-
-cprocessd:
- -> subscribe to ccollectd
- -> debug switch for outputting all to terminal
- -> keep up/down state
- -> keep last-value state (resource usage)
- -> keep track of persistent downtimes (down for more than X time, as configured in config file)
- -> alarms (move this from the IRC bot to cprocessd)
- -> classify message importance
- -> cprocessd-stream socket, PUB that just streams processed data
- -> cprocessd-query socket, REP that responds to queries
-	-> server-status
-	-> down-list
-	-> last-value
-	-> server-list
-	-> service-list
-
-cmaild:
- -> use marrow.mailer
- -> receives data from cprocessd-stream
- -> sends e-mails for configured importance levels
-
-cbotd:
- -> currently named 'alert'
- -> receives data from cprocessd-stream
- -> IRC bot
- -> posts alerts to specified IRC channels, depending on minimum severity level configured for that channel (ie. INFO for #cryto-network but ERR for #crytocc)
- 
-csmsd:
- -> sends SMS for (critical) alerts
- -> receives data from cprocessd-stream
- -> Twilio? does a provider-neutral API exist? might need an extra abstraction...
-
-cwebd:
- -> offers web interface with streaming status data
- -> publicly accessible and password-protected
- -> streaming data from cprocessd-stream
- -> on-pageload state from cprocessd-query (including 'current downtimes')
- -> tornado+zmq ioloop, http://zeromq.github.io/pyzmq/eventloop.html
- -> web dashboard
-	-> AngularJS
-	-> fancy graphs (via AngularJS? idk if a directive exists for this)
-	-> show downtimes as well as live per-machine stats
-	-> also show overview of all machines in a grid, color-coded for average load of all resources
-	-> historical up/down data
-	-> sqlite storage? single concurrent write, so should work
-	-> perhaps letting people sign up for e-mail alerts is an option? to-inbox will be tricky here