Initial commit
commit
ec70a49352
@ -0,0 +1,30 @@
|
|||||||
|
import urllib, gzip
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
xml_url = "http://img.jamendo.com/data/dbdump_artistalbumtrack.xml.gz"
|
||||||
|
xml_path = "dump.xml.gz"
|
||||||
|
|
||||||
|
def update_progress(count, blocksize, totalsize):
|
||||||
|
percent = int(count * blocksize * 100 / totalsize)
|
||||||
|
sys.stdout.write("\r%2d%%" % percent)
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
def fast_iter(context, func):
|
||||||
|
# http://www.ibm.com/developerworks/xml/library/x-hiperfparse/
|
||||||
|
# Author: Liza Daly
|
||||||
|
for event, elem in context:
|
||||||
|
func(elem)
|
||||||
|
elem.clear()
|
||||||
|
while elem.getprevious() is not None:
|
||||||
|
del elem.getparent()[0]
|
||||||
|
del context
|
||||||
|
|
||||||
|
def process_element(elem):
|
||||||
|
print elem.xpath( 'description/text( )' )
|
||||||
|
|
||||||
|
print "Retrieving Jamendo database..."
|
||||||
|
urllib.urlretrieve(xml_url, xml_file, reporthook=update_progress)
|
||||||
|
|
||||||
|
exit(0)
|
||||||
|
context = etree.iterparse( MYFILE, tag='item' )
|
||||||
|
fast_iter(context,process_element)
|
Loading…
Reference in New Issue