|
|
@ -5,9 +5,10 @@ Download bore records from the WaterNSW data portal.
|
|
|
|
import os
|
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import re
|
|
|
|
import time
|
|
|
|
import time
|
|
|
|
|
|
|
|
import shutil
|
|
|
|
import logging
|
|
|
|
import logging
|
|
|
|
import requests
|
|
|
|
|
|
|
|
import warnings
|
|
|
|
import warnings
|
|
|
|
|
|
|
|
import requests
|
|
|
|
import pandas as pd
|
|
|
|
import pandas as pd
|
|
|
|
from tqdm import tqdm
|
|
|
|
from tqdm import tqdm
|
|
|
|
from selenium import webdriver
|
|
|
|
from selenium import webdriver
|
|
|
@ -252,9 +253,11 @@ def extract_records(input_dir, output_dir):
|
|
|
|
if re.search('\([0-9]+\)', zip_name):
|
|
|
|
if re.search('\([0-9]+\)', zip_name):
|
|
|
|
continue
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
# Use '.part' file if zip was not correctly downloaded
|
|
|
|
# Rename '.part' file if zip was not correctly downloaded
|
|
|
|
if os.path.getsize(os.path.join(input_dir, zip_name)) == 0:
|
|
|
|
if os.path.getsize(os.path.join(input_dir, zip_name)) == 0:
|
|
|
|
zip_name += '.part'
|
|
|
|
shutil.move(
|
|
|
|
|
|
|
|
os.path.join(input_dir, zip_name) + '.part',
|
|
|
|
|
|
|
|
os.path.join(input_dir, zip_name))
|
|
|
|
|
|
|
|
|
|
|
|
# Read csv file inside zip archive
|
|
|
|
# Read csv file inside zip archive
|
|
|
|
df = pd.read_csv(
|
|
|
|
df = pd.read_csv(
|
|
|
@ -304,8 +307,7 @@ def extract_records(input_dir, output_dir):
|
|
|
|
]]
|
|
|
|
]]
|
|
|
|
|
|
|
|
|
|
|
|
# Get csv name from zip archive
|
|
|
|
# Get csv name from zip archive
|
|
|
|
zip_name = zip_name.replace('.part', '')
|
|
|
|
|
|
|
|
csv_name = os.path.join(output_dir, zip_name.replace('.zip', 'csv'))
|
|
|
|
csv_name = os.path.join(output_dir, zip_name.replace('.zip', 'csv'))
|
|
|
|
|
|
|
|
|
|
|
|
# Export to csv
|
|
|
|
# Export to csv
|
|
|
|
master.to_csv(csv_name, float_format='%0.3f')
|
|
|
|
df.to_csv(csv_name, float_format='%0.3f')
|
|
|
|