"""Remove security features from pdfs. This script removes security features of pdfs inside a specified folder, including restrictions on: - copying text and images - printing the document usage: pdf_unlock.py [-h] [-r] [-o] folder positional arguments: folder name of input folder optional arguments: -h, --help show this help message and exit -r, --recursive search for files recursively -o, --overwrite overwrite original files Examples: Search for pdfs inside 'pdf_folder', then create new unlocked versions of the pdfs with the the suffix '-unlocked.pdf'. > python pdf_unlock.py pdf_folder Search for pdfs inside 'pdf_folder', then unlock the pdfs and overwrite the original versions. > python pdf_unlock.py pdf_folder -o Search recursively for pdfs inside 'pdf_folder' and all subfolders, then unlock the pdfs and overwrite the original versions. > python pdf_unlock.py pdf_folder -o -r """ __author__ = "D. Howe" __version__ = "0.1.0" __email__ = "d.howe@wrl.unsw.edu.au" import os import glob import shutil import argparse import subprocess from tqdm import tqdm def pdf_unlock(pdf_file, overwrite=False): # Create suffix for unlocked pdfs suffix = '-unlocked' # Strip file extension input_name = os.path.splitext(pdf_file)[0] output_name = input_name + suffix # Skip file if it has already been processed if input_name.endswith(suffix): return # save as temporary file cmd_str = ('gs ' '-dSAFER ' '-dNOPAUSE ' '-dQUIET ' '-dBATCH ' '-sPDFPassword= ' '-sDEVICE=pdfwrite ' '-sOutputFile="{}.pdf" ' '"{}.pdf"').format(output_name, input_name) with subprocess.Popen( cmd_str, stdout=subprocess.PIPE, bufsize=1, universal_newlines=True) as p: for line in p.stdout: print(line, end='') # Overwrite original file if overwrite: shutil.move(output_name + '.pdf', input_name + '.pdf') def main(): parser = argparse.ArgumentParser() parser.add_argument('folder', help='name of input folder', default=None) parser.add_argument( '-r', '--recursive', help='search for files recursively', action='store_true') parser.add_argument( '-o', '--overwrite', help='overwrite original files', action='store_true') args = parser.parse_args() # Get pdf files if args.recursive: glob_str = args.folder + '/**/*.pdf' else: glob_str = args.folder + '/*.pdf' pdf_files = glob.glob(glob_str, recursive=args.recursive) pbar = tqdm(pdf_files) for pdf_file in pbar: pdf_file_shortname = os.path.split(pdf_file)[1] pbar.set_description('Processing {}'.format(pdf_file_shortname)) pdf_unlock(pdf_file, overwrite=args.overwrite) if __name__ == '__main__': main()