From ec70a49352a7d26e8667ade920102cadec99f7d1 Mon Sep 17 00:00:00 2001 From: Sven Slootweg Date: Sun, 3 Jun 2012 01:30:31 +0200 Subject: [PATCH] Initial commit --- jamendoparser.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 jamendoparser.py diff --git a/jamendoparser.py b/jamendoparser.py new file mode 100644 index 0000000..9ce7cd0 --- /dev/null +++ b/jamendoparser.py @@ -0,0 +1,30 @@ +import urllib, gzip +from lxml import etree + +xml_url = "http://img.jamendo.com/data/dbdump_artistalbumtrack.xml.gz" +xml_path = "dump.xml.gz" + +def update_progress(count, blocksize, totalsize): + percent = int(count * blocksize * 100 / totalsize) + sys.stdout.write("\r%2d%%" % percent) + sys.stdout.flush() + +def fast_iter(context, func): + # http://www.ibm.com/developerworks/xml/library/x-hiperfparse/ + # Author: Liza Daly + for event, elem in context: + func(elem) + elem.clear() + while elem.getprevious() is not None: + del elem.getparent()[0] + del context + +def process_element(elem): + print elem.xpath( 'description/text( )' ) + +print "Retrieving Jamendo database..." +urllib.urlretrieve(xml_url, xml_file, reporthook=update_progress) + +exit(0) +context = etree.iterparse( MYFILE, tag='item' ) +fast_iter(context,process_element)