diff --git a/scripts/pdf_unlock.py b/scripts/pdf_unlock.py index cc3221d..ac514f4 100644 --- a/scripts/pdf_unlock.py +++ b/scripts/pdf_unlock.py @@ -1,3 +1,38 @@ +"""Remove security features from pdfs. + +This script removes security features of pdfs inside a specified folder, +including restrictions on: + +- copying text and images +- printing the document + +usage: pdf_unlock.py [-h] [-r] [-o] folder + +positional arguments: + folder name of input folder + + +optional arguments: + -h, --help show this help message and exit + -r, --recursive search for files recursively + -o, --overwrite overwrite original files + +Examples: + +Search for pdfs inside 'pdf_folder', then create new unlocked versions +of the pdfs with the the suffix '-unlocked.pdf'. +> python pdf_unlock.py pdf_folder + +Search for pdfs inside 'pdf_folder', then unlock the pdfs and overwrite +the original versions. +> python pdf_unlock.py pdf_folder -o + +Search recursively for pdfs inside 'pdf_folder' and all subfolders, +then unlock the pdfs and overwrite the original versions. +> python pdf_unlock.py pdf_folder -o -r + +""" + __author__ = "D. Howe" __version__ = "0.1.0" __email__ = "d.howe@wrl.unsw.edu.au" @@ -7,45 +42,41 @@ import sys import glob import argparse import subprocess +import shutil -def pdf_unlock(): - # Get pdfs in same folder - fnames = [ - os.path.join(name) for name in os.listdir('.') if name.endswith('.pdf') - ] - - # Use pdf names from user (if provided) - if len(sys.argv[1:]) > 0: - fnames = sys.argv[1:] - - for fname in fnames: +def pdf_unlock(pdf_file, overwrite=False): + # Create suffix for unlocked pdfs + suffix = '-unlocked' - # Strip file extension - base_name = os.path.splitext(fname)[0] + # Strip file extension + input_name = os.path.splitext(pdf_file)[0] + output_name = input_name + suffix - # save as temporary file - cmd_str = ('gs ' - '-dNOPAUSE ' - '-dQUIET ' - '-dBATCH ' - '-sDEVICE=pdfwrite ' - '-sOutputFile="{output_pdf}" ' - '"{input_pdf}"').format(output_pdf, input_pdf) + # Skip file if it has already been processed + if input_name.endswith(suffix): + return - print('Processing ' + base_name + '.pdf ...') + # save as temporary file + cmd_str = ('gs ' + '-dSAFER ' + '-dNOPAUSE ' + '-dQUIET ' + '-dBATCH ' + '-sPDFPassword= ' + '-sDEVICE=pdfwrite ' + '-sOutputFile="{}.pdf" ' + '"{}.pdf"').format(output_name, input_name) - with subprocess.Popen( - cmd_str, - stdout=subprocess.PIPE, - bufsize=1, - universal_newlines=True) as p: - for line in p.stdout: - print(line, end='') + with subprocess.Popen( + cmd_str, stdout=subprocess.PIPE, bufsize=1, + universal_newlines=True) as p: + for line in p.stdout: + print(line, end='') - # Overwrite original file - os.remove(base_name + '.pdf') - os.rename(base_name + '_temp.pdf', base_name + '.pdf') + # Overwrite original file + if overwrite: + shutil.move(output_name + '.pdf', input_name + '.pdf') def main(): @@ -56,9 +87,23 @@ def main(): '--recursive', help='search for files recursively', action='store_true') + parser.add_argument( + '-o', + '--overwrite', + help='overwrite original files', + action='store_true') args = parser.parse_args() - print(args.recursive) + # Get pdf files + if args.recursive: + glob_str = args.folder + '/**/*.pdf' + else: + glob_str = args.folder + '/*.pdf' + + pdf_files = glob.glob(glob_str, recursive=args.recursive) + + for pdf_file in pdf_files: + pdf_unlock(pdf_file, overwrite=args.overwrite) if __name__ == '__main__':