diff --git a/waternsw_grabber/waternsw_grabber.py b/waternsw_grabber/waternsw_grabber.py index 2f0c1c9..c135f01 100644 --- a/waternsw_grabber/waternsw_grabber.py +++ b/waternsw_grabber/waternsw_grabber.py @@ -5,9 +5,10 @@ Download bore records from the WaterNSW data portal. import os import re import time +import shutil import logging -import requests import warnings +import requests import pandas as pd from tqdm import tqdm from selenium import webdriver @@ -252,9 +253,11 @@ def extract_records(input_dir, output_dir): if re.search('\([0-9]+\)', zip_name): continue - # Use '.part' file if zip was not correctly downloaded + # Rename '.part' file if zip was not correctly downloaded if os.path.getsize(os.path.join(input_dir, zip_name)) == 0: - zip_name += '.part' + shutil.move( + os.path.join(input_dir, zip_name) + '.part', + os.path.join(input_dir, zip_name)) # Read csv file inside zip archive df = pd.read_csv( @@ -304,8 +307,7 @@ def extract_records(input_dir, output_dir): ]] # Get csv name from zip archive - zip_name = zip_name.replace('.part', '') csv_name = os.path.join(output_dir, zip_name.replace('.zip', 'csv')) # Export to csv - master.to_csv(csv_name, float_format='%0.3f') + df.to_csv(csv_name, float_format='%0.3f')