#!/usr/bin/python

import sys, os, argparse

parser = argparse.ArgumentParser(description='Fixes PDFs that are output by tiff2pdf, to get rid of the pink and green color overlay issue.')

parser.add_argument('files', metavar='FILE', type=str, nargs='+',
                   help='files to fix')

args = parser.parse_args()
options = vars(args)

def chunked_replace(original_file, target_file, chunk_size, find, replace):
	original = open(original_file, "rb")
	target = open(target_file, "wb")
	position = 0
	
	while True:
		# Read a normally sized chunk.
		data = str(original.read(chunk_size))
		
		# Replace all data immediately available in this read.
		new_data = data.replace(find, replace)
		
		# Check for partial matches. We will loop this to deal with false positives when the partial match
		#   turned out to be random, but another partial match follows immediately afterwards the original
		#   read.
		while True:
			found = False
			
			# We will go from a large substring to a small substring to avoid false positives.
			# If we went the other way around, repetitive patterns in the substring might cause havoc.
			for length in reversed(xrange(1, len(find))):
				if new_data.endswith(find[:length]):
					# Partial match found.
					missing_bytes = len(match_string) - length
					new_data += str(original_file.read(missing_bytes))
					new_data = new_data.replace(find, replace)
					found = True
			
			if found == False:
				# No more partial matches to deal with here.
				break
		
		target.write(new_data)
		
		if data == "":
			break
		else:
			position += chunk_size
			
	original.close()
	target.close()

for item in options['files']:
	base_name, extension = os.path.splitext(os.path.basename(item))
	base_path = os.path.dirname(item)
	target_file = "%s/%s_fixed.%s" % (base_path, base_name, extension)
	
	chunked_replace(item, target_file, 512 * 1024, b"ColorTransform 0", b"ColorTransform 1")