You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
61 lines
1.9 KiB
Python
61 lines
1.9 KiB
Python
#!/usr/bin/python
|
|
|
|
import sys, os, argparse
|
|
|
|
parser = argparse.ArgumentParser(description='Fixes PDFs that are output by tiff2pdf, to get rid of the pink and green color overlay issue.')
|
|
|
|
parser.add_argument('files', metavar='FILE', type=str, nargs='+',
|
|
help='files to fix')
|
|
|
|
args = parser.parse_args()
|
|
options = vars(args)
|
|
|
|
def chunked_replace(original_file, target_file, chunk_size, find, replace):
|
|
original = open(original_file, "rb")
|
|
target = open(target_file, "wb")
|
|
position = 0
|
|
|
|
while True:
|
|
# Read a normally sized chunk.
|
|
data = str(original.read(chunk_size))
|
|
|
|
# Replace all data immediately available in this read.
|
|
new_data = data.replace(find, replace)
|
|
|
|
# Check for partial matches. We will loop this to deal with false positives when the partial match
|
|
# turned out to be random, but another partial match follows immediately afterwards the original
|
|
# read.
|
|
while True:
|
|
found = False
|
|
|
|
# We will go from a large substring to a small substring to avoid false positives.
|
|
# If we went the other way around, repetitive patterns in the substring might cause havoc.
|
|
for length in reversed(xrange(1, len(find))):
|
|
if new_data.endswith(find[:length]):
|
|
# Partial match found.
|
|
missing_bytes = len(match_string) - length
|
|
new_data += str(original_file.read(missing_bytes))
|
|
new_data = new_data.replace(find, replace)
|
|
found = True
|
|
|
|
if found == False:
|
|
# No more partial matches to deal with here.
|
|
break
|
|
|
|
target.write(new_data)
|
|
|
|
if data == "":
|
|
break
|
|
else:
|
|
position += chunk_size
|
|
|
|
original.close()
|
|
target.close()
|
|
|
|
for item in options['files']:
|
|
base_name, extension = os.path.splitext(os.path.basename(item))
|
|
base_path = os.path.dirname(item)
|
|
target_file = "%s/%s_fixed.%s" % (base_path, base_name, extension)
|
|
|
|
chunked_replace(item, target_file, 512 * 1024, b"ColorTransform 0", b"ColorTransform 1")
|