4chandownloader/4c

#!/usr/bin/python

import re, urllib, urllib2, argparse, os

parser = argparse.ArgumentParser(description='Downloads all full-size images in an arbitrary 4chan thread.')

parser.add_argument('urllist', metavar='url', type=str, nargs=1,
                   help='the URL of the thread')
parser.add_argument('-n', '--newdir', dest='newdir', action='store_true',
                   help='create a new directory for this thread in the current directory')
                   
args = parser.parse_args()
options = vars(args)

regex = 'href="(\/\/images\.4chan\.org\/b\/src\/[0-9]+\.[a-z]+)"'
url = options['urllist'][0]

try:
	page = urllib2.urlopen(url).read()
except ValueError:
	print "That does not look like a valid URL."
	exit(1)

if options['newdir'] == True:
	thread_id = url.split('/')[-1]
	target_dir = "%s/" % thread_id

	if not os.path.exists(thread_id):
		os.makedirs(thread_id)
else:
	target_dir = ""

search = re.compile(regex)
matches = search.finditer(page)

urls = []

for match in matches:
	if match.group(1) not in urls:
		urls.append(match.group(1))

current = 1
total = len(urls)

print "Parsed thread. Total images: %d" % total

for downloadurl in urls:
	downloadurl = "http:%s" % downloadurl
	filename = downloadurl.split('/')[-1]
	path = target_dir + filename
	urllib.urlretrieve(downloadurl, path)
	print "Downloaded %s (%d/%d)." % (filename, current, total)
	current += 1
Initial commit 13 years ago			`#!/usr/bin/python`

Add argument parsing 13 years ago			`import re, urllib, urllib2, argparse, os`

			`parser = argparse.ArgumentParser(description='Downloads all full-size images in an arbitrary 4chan thread.')`

			`parser.add_argument('urllist', metavar='url', type=str, nargs=1,`
			`help='the URL of the thread')`
			`parser.add_argument('-n', '--newdir', dest='newdir', action='store_true',`
			`help='create a new directory for this thread in the current directory')`

			`args = parser.parse_args()`
			`options = vars(args)`
Find all full image URLs on a page 13 years ago
Actual file downloading and regex fix 13 years ago			`regex = 'href="(\/\/images\.4chan\.org\/b\/src\/[0-9]+\.[a-z]+)"'`
Add argument parsing 13 years ago			`url = options['urllist'][0]`

			`try:`
			`page = urllib2.urlopen(url).read()`
			`except ValueError:`
			`print "That does not look like a valid URL."`
			`exit(1)`

			`if options['newdir'] == True:`
			`thread_id = url.split('/')[-1]`
			`target_dir = "%s/" % thread_id`
Find all full image URLs on a page 13 years ago
Add argument parsing 13 years ago			`if not os.path.exists(thread_id):`
			`os.makedirs(thread_id)`
			`else:`
			`target_dir = ""`
Find all full image URLs on a page 13 years ago
			`search = re.compile(regex)`
			`matches = search.finditer(page)`

			`urls = []`

			`for match in matches:`
			`if match.group(1) not in urls:`
			`urls.append(match.group(1))`

Actual file downloading and regex fix 13 years ago			`current = 1`
			`total = len(urls)`

			`print "Parsed thread. Total images: %d" % total`

			`for downloadurl in urls:`
			`downloadurl = "http:%s" % downloadurl`
			`filename = downloadurl.split('/')[-1]`
Add argument parsing 13 years ago			`path = target_dir + filename`
			`urllib.urlretrieve(downloadurl, path)`
Actual file downloading and regex fix 13 years ago			`print "Downloaded %s (%d/%d)." % (filename, current, total)`
			`current += 1`