|
|
|
@ -34,7 +34,7 @@ then unlock the pdfs and overwrite the original versions.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
__author__ = "D. Howe"
|
|
|
|
|
__version__ = "0.1.0"
|
|
|
|
|
__version__ = "0.2.0"
|
|
|
|
|
__email__ = "d.howe@wrl.unsw.edu.au"
|
|
|
|
|
|
|
|
|
|
import os
|
|
|
|
@ -44,7 +44,58 @@ import argparse
|
|
|
|
|
import subprocess
|
|
|
|
|
from tqdm import tqdm
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def pdfinfo(pdf_name, *args):
|
|
|
|
|
"""Get pdf info using the poppler tool 'pdfinfo'.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
pdf_name: path to pdf file
|
|
|
|
|
args: list of optional arguments, e.g. ['-isodates', '-v']
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
A dictionary containing the lines of stdout from calling 'pdfinfo'
|
|
|
|
|
"""
|
|
|
|
|
command_str = ['pdfinfo', *args, pdf_name]
|
|
|
|
|
result = subprocess.run(command_str, stdout=subprocess.PIPE)
|
|
|
|
|
stdout = result.stdout.decode('utf-8').split('\n')
|
|
|
|
|
|
|
|
|
|
pdf_info = {}
|
|
|
|
|
for line in stdout:
|
|
|
|
|
if line:
|
|
|
|
|
key, val = line.split(':', 1)
|
|
|
|
|
pdf_info[key] = val.strip()
|
|
|
|
|
|
|
|
|
|
return pdf_info
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def isencrypted(pdf_name):
|
|
|
|
|
"""Check if a pdf is encrypted.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
pdf_name: path to pdf file
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
True if encrypted, otherwise False
|
|
|
|
|
"""
|
|
|
|
|
pdf_info = pdfinfo(pdf_name)
|
|
|
|
|
status = pdf_info['Encrypted'].split(' ')[0]
|
|
|
|
|
|
|
|
|
|
if status == 'yes':
|
|
|
|
|
return True
|
|
|
|
|
else:
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def pdf_unlock(pdf_file, overwrite=False):
|
|
|
|
|
"""Rewrite pdf with Ghostscript, removing encryption.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
pdf_file: path to pdf file
|
|
|
|
|
overwrite: boolean flag
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
A dictionary containing the lines of stdout from calling 'pdfinfo'
|
|
|
|
|
"""
|
|
|
|
|
# Create suffix for unlocked pdfs
|
|
|
|
|
suffix = '-unlocked'
|
|
|
|
|
|
|
|
|
@ -53,7 +104,12 @@ def pdf_unlock(pdf_file, overwrite=False):
|
|
|
|
|
output_name = input_name + suffix
|
|
|
|
|
|
|
|
|
|
# Skip file if it has already been processed
|
|
|
|
|
if input_name.endswith(suffix):
|
|
|
|
|
if input_name.endswith(suffix) or os.path.exists(output_name + '.pdf'):
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# Skip file if it is not encrypted
|
|
|
|
|
if not isencrypted(pdf_file):
|
|
|
|
|
print('file skipped')
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# Get name of Ghostscript executable
|
|
|
|
|