From a80b24a5d4717bcae32e849b34b69d36e09a923e Mon Sep 17 00:00:00 2001 From: Dan Howe Date: Fri, 15 Feb 2019 08:59:26 +1100 Subject: [PATCH] Handle invalid bore IDs gracefully --- waternsw_grabber/waternsw_grabber.py | 38 +++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/waternsw_grabber/waternsw_grabber.py b/waternsw_grabber/waternsw_grabber.py index 223770e..58c5d06 100644 --- a/waternsw_grabber/waternsw_grabber.py +++ b/waternsw_grabber/waternsw_grabber.py @@ -1,5 +1,6 @@ import os import re +import time import requests import warnings import pandas as pd @@ -7,9 +8,10 @@ from tqdm import tqdm from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys -from selenium.common.exceptions import TimeoutException from selenium.webdriver.support.ui import WebDriverWait, Select from selenium.webdriver.support import expected_conditions as EC +from selenium.common.exceptions import ( + TimeoutException, StaleElementReferenceException, NoSuchElementException) def has_admin(): @@ -46,6 +48,9 @@ def wait_for_element(driver, by, x, timeout=30): def get_telemetered_bore(driver, bore_id, start_date, end_date): + url = 'https://realtimedata.waternsw.com.au/water.stm' + driver.get(url) + driver.switch_to.default_content() webhyd = driver.find_element_by_id('webhyd') driver.switch_to.frame(webhyd) @@ -57,6 +62,22 @@ def get_telemetered_bore(driver, bore_id, start_date, end_date): WebDriverWait( driver, timeout=30).until( EC.frame_to_be_available_and_switch_to_it('gwgwlf_org')) + + # Wait until body text of iframe has loaded + body_text = None + while not body_text: + try: + # Get contents of body text + body_text = driver.find_element_by_xpath('//*/body').text + except (StaleElementReferenceException, NoSuchElementException): + pass + time.sleep(0.5) + + # Detect if bore record does not exist + if body_text.startswith('No SITE record found for site'): + raise ValueError('No SITE record found for site {}'.format(bore_id)) + + # Wait for navigation tabs wait_for_element(driver, By.XPATH, '//*[@id="tabstext"]') # Activate outputs tab, and wait for 'Get Output' button @@ -111,9 +132,11 @@ def get_telemetered_bore(driver, bore_id, start_date, end_date): driver.execute_script("hide_object('confirm');co(level,tab,1)") # Close popup - wait_for_element(driver, By.XPATH, - "//div[contains(@class, 'lity-container')]", - timeout=30) + wait_for_element( + driver, + By.XPATH, + "//div[contains(@class, 'lity-container')]", + timeout=30) webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform() @@ -137,8 +160,6 @@ def open_browser(download_dir): # Open browser driver = webdriver.Firefox(firefox_profile=profile) - url = 'https://realtimedata.waternsw.com.au/water.stm' - driver.get(url) return driver @@ -150,6 +171,9 @@ def telemetered_bore_downloader(bore_ids, start_date, end_date, download_dir): pbar = tqdm(bore_ids) for bore_id in pbar: pbar.set_description(bore_id) - get_telemetered_bore(driver, bore_id, start_date, end_date) + try: + get_telemetered_bore(driver, bore_id, start_date, end_date) + except ValueError as e: + print(e) driver.quit()