Fix broken downloaded '.part' files

master
Dan Howe 6 years ago
parent 6f020db6c6
commit 0fd79ac14e

@ -5,9 +5,10 @@ Download bore records from the WaterNSW data portal.
import os import os
import re import re
import time import time
import shutil
import logging import logging
import requests
import warnings import warnings
import requests
import pandas as pd import pandas as pd
from tqdm import tqdm from tqdm import tqdm
from selenium import webdriver from selenium import webdriver
@ -252,9 +253,11 @@ def extract_records(input_dir, output_dir):
if re.search('\([0-9]+\)', zip_name): if re.search('\([0-9]+\)', zip_name):
continue continue
# Use '.part' file if zip was not correctly downloaded # Rename '.part' file if zip was not correctly downloaded
if os.path.getsize(os.path.join(input_dir, zip_name)) == 0: if os.path.getsize(os.path.join(input_dir, zip_name)) == 0:
zip_name += '.part' shutil.move(
os.path.join(input_dir, zip_name) + '.part',
os.path.join(input_dir, zip_name))
# Read csv file inside zip archive # Read csv file inside zip archive
df = pd.read_csv( df = pd.read_csv(
@ -304,8 +307,7 @@ def extract_records(input_dir, output_dir):
]] ]]
# Get csv name from zip archive # Get csv name from zip archive
zip_name = zip_name.replace('.part', '')
csv_name = os.path.join(output_dir, zip_name.replace('.zip', 'csv')) csv_name = os.path.join(output_dir, zip_name.replace('.zip', 'csv'))
# Export to csv # Export to csv
master.to_csv(csv_name, float_format='%0.3f') df.to_csv(csv_name, float_format='%0.3f')

Loading…
Cancel
Save