Replace invalid characters in project and mod names

master
Dan Howe 6 years ago
parent a708b384c3
commit f419079116

@ -22,6 +22,18 @@ from tqdm import tqdm
from requests.exceptions import ConnectionError, InvalidURL from requests.exceptions import ConnectionError, InvalidURL
def make_safe(s):
"""Remove characters that would be invalid in a filepath"""
# Remove '\', '*', '"', '<', '>' '|'
s_safe = re.sub('\\\|\*|"|<|>\|', '', s)
# Replace '/' and ':' with '-'
s_safe = re.sub(':', ' -', s_safe)
s_safe = re.sub('/', '-', s_safe)
return s_safe
def mod_ids_from_search(search_results_url): def mod_ids_from_search(search_results_url):
"""Get modification job IDs from search results URL""" """Get modification job IDs from search results URL"""
@ -58,6 +70,10 @@ def get_document_list(mod_id, output_dir):
# Get list of document folders # Get list of document folders
folders = mod_tree.xpath('//div[@class="folder_row"]') folders = mod_tree.xpath('//div[@class="folder_row"]')
# Remove invalid characters before creating folders
project_name = make_safe(project_name)
mod_name = make_safe(mod_name)
# Create modification folder # Create modification folder
mod_dir = os.path.join(output_dir, project_name, mod_name) mod_dir = os.path.join(output_dir, project_name, mod_name)
os.makedirs(mod_dir, exist_ok=True) os.makedirs(mod_dir, exist_ok=True)
@ -94,15 +110,14 @@ def get_document_list(mod_id, output_dir):
def download_document(url, document_path): def download_document(url, document_path):
"""Download document from given url""" """Download document from given url"""
# Create output directories as required # Check if destination path is too long (Windows filename limitation)
try: try:
open(document_path, 'a').close()
except FileNotFoundError:
document_path = '\\\\?\\' + os.path.abspath(document_path)
# Create output directories as required
os.makedirs(os.path.dirname(document_path), exist_ok=True) os.makedirs(os.path.dirname(document_path), exist_ok=True)
except OSError:
logging.error(('Failed to download {4}\n'
' Project: {1}\n'
' Modification: {2}\n'
' Folder: {3}\n').format(*document_path.split(os.sep)))
return
# Check if file exists # Check if file exists
if os.path.isfile(document_path): if os.path.isfile(document_path):
@ -119,12 +134,6 @@ def download_document(url, document_path):
' Folder: {3}\n').format(*document_path.split(os.sep))) ' Folder: {3}\n').format(*document_path.split(os.sep)))
return return
# Check if destination path is too long (Windows filename limitation)
try:
open(document_path, 'a').close()
except FileNotFoundError:
document_path = '\\\\?\\' + os.path.abspath(document_path)
# Write file to disk # Write file to disk
with open(document_path, 'wb') as f: with open(document_path, 'wb') as f:
shutil.copyfileobj(r.raw, f) shutil.copyfileobj(r.raw, f)
@ -191,6 +200,7 @@ def main():
doc_pbar.set_description(doc_name) doc_pbar.set_description(doc_name)
# Download document # Download document
print(doc)
download_document(doc['url'], doc['document_path']) download_document(doc['url'], doc['document_path'])
# Tidy up console after tqdm # Tidy up console after tqdm

Loading…
Cancel
Save