You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
41 lines
911 B
Python
41 lines
911 B
Python
import zmq, time, requests, lxml.html, msgpack
|
|
|
|
context = zmq.Context()
|
|
socket = context.socket(zmq.PUSH)
|
|
socket.bind("ipc:///tmp/pbscrape-tasks")
|
|
|
|
last_list = []
|
|
|
|
while True:
|
|
try:
|
|
page = requests.get("http://pastebin.com/archive").text
|
|
except Exception, e:
|
|
# TODO: Log HTTP error
|
|
time.sleep(30)
|
|
continue
|
|
|
|
basetime = int(time.time())
|
|
|
|
xml = lxml.html.fromstring(page)
|
|
|
|
pastes = xml.xpath("//table[@class='maintable']/tr")
|
|
new_list = []
|
|
|
|
for paste in pastes:
|
|
try:
|
|
title, filetype = paste.xpath("td/a/text()")
|
|
except ValueError, e:
|
|
continue # Not a valid entry
|
|
|
|
paste_id = paste.xpath("td[1]/a/@href")[0][1:]
|
|
ago = paste.xpath("td[2]/text()")[0]
|
|
|
|
new_list.append(paste_id)
|
|
|
|
if paste_id not in last_list:
|
|
socket.send(msgpack.packb({"id": paste_id, "type": filetype, "title": title, "base_time": basetime, "ago": ago}))
|
|
|
|
last_list = new_list
|
|
|
|
time.sleep(5 * 60)
|