Add argument parser and documentation

master
Dan Howe 7 years ago
parent 0ce7d3d686
commit 4ef6b65087

@ -1,3 +1,38 @@
"""Remove security features from pdfs.
This script removes security features of pdfs inside a specified folder,
including restrictions on:
- copying text and images
- printing the document
usage: pdf_unlock.py [-h] [-r] [-o] folder
positional arguments:
folder name of input folder
optional arguments:
-h, --help show this help message and exit
-r, --recursive search for files recursively
-o, --overwrite overwrite original files
Examples:
Search for pdfs inside 'pdf_folder', then create new unlocked versions
of the pdfs with the the suffix '-unlocked.pdf'.
> python pdf_unlock.py pdf_folder
Search for pdfs inside 'pdf_folder', then unlock the pdfs and overwrite
the original versions.
> python pdf_unlock.py pdf_folder -o
Search recursively for pdfs inside 'pdf_folder' and all subfolders,
then unlock the pdfs and overwrite the original versions.
> python pdf_unlock.py pdf_folder -o -r
"""
__author__ = "D. Howe" __author__ = "D. Howe"
__version__ = "0.1.0" __version__ = "0.1.0"
__email__ = "d.howe@wrl.unsw.edu.au" __email__ = "d.howe@wrl.unsw.edu.au"
@ -7,45 +42,41 @@ import sys
import glob import glob
import argparse import argparse
import subprocess import subprocess
import shutil
def pdf_unlock(): def pdf_unlock(pdf_file, overwrite=False):
# Get pdfs in same folder # Create suffix for unlocked pdfs
fnames = [ suffix = '-unlocked'
os.path.join(name) for name in os.listdir('.') if name.endswith('.pdf')
]
# Use pdf names from user (if provided)
if len(sys.argv[1:]) > 0:
fnames = sys.argv[1:]
for fname in fnames:
# Strip file extension # Strip file extension
base_name = os.path.splitext(fname)[0] input_name = os.path.splitext(pdf_file)[0]
output_name = input_name + suffix
# Skip file if it has already been processed
if input_name.endswith(suffix):
return
# save as temporary file # save as temporary file
cmd_str = ('gs ' cmd_str = ('gs '
'-dSAFER '
'-dNOPAUSE ' '-dNOPAUSE '
'-dQUIET ' '-dQUIET '
'-dBATCH ' '-dBATCH '
'-sPDFPassword= '
'-sDEVICE=pdfwrite ' '-sDEVICE=pdfwrite '
'-sOutputFile="{output_pdf}" ' '-sOutputFile="{}.pdf" '
'"{input_pdf}"').format(output_pdf, input_pdf) '"{}.pdf"').format(output_name, input_name)
print('Processing ' + base_name + '.pdf ...')
with subprocess.Popen( with subprocess.Popen(
cmd_str, cmd_str, stdout=subprocess.PIPE, bufsize=1,
stdout=subprocess.PIPE,
bufsize=1,
universal_newlines=True) as p: universal_newlines=True) as p:
for line in p.stdout: for line in p.stdout:
print(line, end='') print(line, end='')
# Overwrite original file # Overwrite original file
os.remove(base_name + '.pdf') if overwrite:
os.rename(base_name + '_temp.pdf', base_name + '.pdf') shutil.move(output_name + '.pdf', input_name + '.pdf')
def main(): def main():
@ -56,9 +87,23 @@ def main():
'--recursive', '--recursive',
help='search for files recursively', help='search for files recursively',
action='store_true') action='store_true')
parser.add_argument(
'-o',
'--overwrite',
help='overwrite original files',
action='store_true')
args = parser.parse_args() args = parser.parse_args()
print(args.recursive) # Get pdf files
if args.recursive:
glob_str = args.folder + '/**/*.pdf'
else:
glob_str = args.folder + '/*.pdf'
pdf_files = glob.glob(glob_str, recursive=args.recursive)
for pdf_file in pdf_files:
pdf_unlock(pdf_file, overwrite=args.overwrite)
if __name__ == '__main__': if __name__ == '__main__':

Loading…
Cancel
Save