From 0fd79ac14e7c49ad339fd56c950dbfb391900a16 Mon Sep 17 00:00:00 2001 From: Dan Howe Date: Fri, 15 Feb 2019 17:04:12 +1100 Subject: [PATCH] Fix broken downloaded '.part' files --- waternsw_grabber/waternsw_grabber.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/waternsw_grabber/waternsw_grabber.py b/waternsw_grabber/waternsw_grabber.py index 2f0c1c9..c135f01 100644 --- a/waternsw_grabber/waternsw_grabber.py +++ b/waternsw_grabber/waternsw_grabber.py @@ -5,9 +5,10 @@ Download bore records from the WaterNSW data portal. import os import re import time +import shutil import logging -import requests import warnings +import requests import pandas as pd from tqdm import tqdm from selenium import webdriver @@ -252,9 +253,11 @@ def extract_records(input_dir, output_dir): if re.search('\([0-9]+\)', zip_name): continue - # Use '.part' file if zip was not correctly downloaded + # Rename '.part' file if zip was not correctly downloaded if os.path.getsize(os.path.join(input_dir, zip_name)) == 0: - zip_name += '.part' + shutil.move( + os.path.join(input_dir, zip_name) + '.part', + os.path.join(input_dir, zip_name)) # Read csv file inside zip archive df = pd.read_csv( @@ -304,8 +307,7 @@ def extract_records(input_dir, output_dir): ]] # Get csv name from zip archive - zip_name = zip_name.replace('.part', '') csv_name = os.path.join(output_dir, zip_name.replace('.zip', 'csv')) # Export to csv - master.to_csv(csv_name, float_format='%0.3f') + df.to_csv(csv_name, float_format='%0.3f')