You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

61 lines
1.9 KiB
Python

#!/usr/bin/python
import sys, os, argparse
parser = argparse.ArgumentParser(description='Fixes PDFs that are output by tiff2pdf, to get rid of the pink and green color overlay issue.')
parser.add_argument('files', metavar='FILE', type=str, nargs='+',
help='files to fix')
args = parser.parse_args()
options = vars(args)
def chunked_replace(original_file, target_file, chunk_size, find, replace):
original = open(original_file, "rb")
target = open(target_file, "wb")
position = 0
while True:
# Read a normally sized chunk.
data = str(original.read(chunk_size))
# Replace all data immediately available in this read.
new_data = data.replace(find, replace)
# Check for partial matches. We will loop this to deal with false positives when the partial match
# turned out to be random, but another partial match follows immediately afterwards the original
# read.
while True:
found = False
# We will go from a large substring to a small substring to avoid false positives.
# If we went the other way around, repetitive patterns in the substring might cause havoc.
for length in reversed(xrange(1, len(find))):
if new_data.endswith(find[:length]):
# Partial match found.
missing_bytes = len(match_string) - length
new_data += str(original_file.read(missing_bytes))
new_data = new_data.replace(find, replace)
found = True
if found == False:
# No more partial matches to deal with here.
break
target.write(new_data)
if data == "":
break
else:
position += chunk_size
original.close()
target.close()
for item in options['files']:
base_name, extension = os.path.splitext(os.path.basename(item))
base_path = os.path.dirname(item)
target_file = "%s/%s_fixed.%s" % (base_path, base_name, extension)
chunked_replace(item, target_file, 512 * 1024, b"ColorTransform 0", b"ColorTransform 1")