diff --git a/.env b/.env index de5e5c4..8a4468b 100644 --- a/.env +++ b/.env @@ -10,11 +10,9 @@ MATLAB_PATH="C:/Program Files/MATLAB/R2016b/bin/win64/MATLAB.exe" # total water level. MULTIPROCESS_THREADS=2 -# GDAL_DATA="C://Users//z5189959//Desktop//nsw-2016-storm-impact//.venv//Library//share//gdal" - # The settings below should be left as is unless you know what you're doing. # Need to set pythonpath so that relative imports can be properly used in with pipenv # Refer to https://stackoverflow.com/q/52986500 and https://stackoverflow.com/a/49797761 -PYTHONPATH=${PWD} \ No newline at end of file +# PYTHONPATH=${PWD} \ No newline at end of file diff --git a/src/analysis/forecast_twl.py b/src/analysis/forecast_twl.py index e15df6e..ddc93a6 100644 --- a/src/analysis/forecast_twl.py +++ b/src/analysis/forecast_twl.py @@ -7,8 +7,7 @@ import numpy.ma as ma import pandas as pd from scipy import stats - -from src.analysis import runup_models +import runup_models logging.config.fileConfig("./src/logging.conf", disable_existing_loggers=False) logger = logging.getLogger(__name__) @@ -266,6 +265,7 @@ def crossings(profile_x, profile_z, constant_z): z = np.subtract(profile_z, constant_z) # Find all indices right before any crossing. + # TODO Sometimes this can give a runtime warning https://stackoverflow.com/a/36489085 indicies = np.where(z[:-1] * z[1:] < 0)[0] # Use linear interpolation to find intersample crossings. diff --git a/src/analysis/observed_storm_impacts.py b/src/analysis/observed_storm_impacts.py index 0aadbbe..80bdbda 100644 --- a/src/analysis/observed_storm_impacts.py +++ b/src/analysis/observed_storm_impacts.py @@ -48,6 +48,11 @@ def volume_change(df_profiles, df_profile_features, zone): if np.isnan(prestorm_dune_toe_x): prestorm_dune_toe_x = prestorm_dune_crest_x + # If no prestorm and poststorm profiles, skip site and continue + profile_lengths = [len(df_site.query("profile_type == '{}'".format(x))) for x in ['prestorm', 'poststorm']] + if any([length ==0 for length in profile_lengths]): + continue + # Find last x coordinate where we have both prestorm and poststorm measurements. If we don't do this, # the prestorm and poststorm values are going to be calculated over different lengths. df_zone = df_site.dropna(subset=["z"]) @@ -128,25 +133,26 @@ def storm_regime(df_observed_impacts): return df_observed_impacts -if __name__ == "__main__": - logger.info("Importing existing data") - data_folder = "./data/interim" - df_profiles = pd.read_csv(os.path.join(data_folder, "profiles.csv"), index_col=[0, 1, 2]) - df_profile_features = pd.read_csv(os.path.join(data_folder, "profile_features.csv"), index_col=[0]) - - logger.info("Creating new dataframe for observed impacts") - df_observed_impacts = pd.DataFrame(index=df_profile_features.index) - - logger.info("Getting pre/post storm volumes") - df_swash_vol_changes = volume_change(df_profiles, df_profile_features, zone="swash") - df_dune_face_vol_changes = volume_change(df_profiles, df_profile_features, zone="dune_face") - df_observed_impacts = df_observed_impacts.join([df_swash_vol_changes, df_dune_face_vol_changes]) - - # Classify regime based on volume changes - df_observed_impacts = storm_regime(df_observed_impacts) - - # Save dataframe to csv - df_observed_impacts.to_csv(os.path.join(data_folder, "impacts_observed.csv")) +# +# if __name__ == "__main__": +# logger.info("Importing existing data") +# data_folder = "./data/interim" +# df_profiles = pd.read_csv(os.path.join(data_folder, "profiles.csv"), index_col=[0, 1, 2]) +# df_profile_features = pd.read_csv(os.path.join(data_folder, "profile_features.csv"), index_col=[0]) +# +# logger.info("Creating new dataframe for observed impacts") +# df_observed_impacts = pd.DataFrame(index=df_profile_features.index) +# +# logger.info("Getting pre/post storm volumes") +# df_swash_vol_changes = volume_change(df_profiles, df_profile_features, zone="swash") +# df_dune_face_vol_changes = volume_change(df_profiles, df_profile_features, zone="dune_face") +# df_observed_impacts = df_observed_impacts.join([df_swash_vol_changes, df_dune_face_vol_changes]) +# +# # Classify regime based on volume changes +# df_observed_impacts = storm_regime(df_observed_impacts) +# +# # Save dataframe to csv +# df_observed_impacts.to_csv(os.path.join(data_folder, "impacts_observed.csv")) @click.command() diff --git a/src/data/__init__.py b/src/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/data/csv_to_shp.py b/src/data/csv_to_shp.py index 39525fa..a5438c2 100644 --- a/src/data/csv_to_shp.py +++ b/src/data/csv_to_shp.py @@ -2,15 +2,20 @@ Converts .csv files to .shape files """ +import os +import sys + +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + import click import fiona import pandas as pd from fiona.crs import from_epsg from shapely.geometry import Point, mapping -import logging.config -logging.config.fileConfig("./src/logging.conf", disable_existing_loggers=False) -logger = logging.getLogger(__name__) +from utils import setup_logging + +logger = setup_logging() @click.command() @@ -25,11 +30,11 @@ def sites_csv_to_shp(input_csv, output_shp): """ logger.info("Converting %s to %s", input_csv, output_shp) df_sites = pd.read_csv(input_csv, index_col=[0]) - + logger.info(os.environ.get("GDAL_DATA", None)) schema = {"geometry": "Point", "properties": {"beach": "str", "site_id": "str"}} with fiona.open(output_shp, "w", crs=from_epsg(4326), driver="ESRI Shapefile", schema=schema) as output: for index, row in df_sites.iterrows(): - point = Point(row["lon"], row["lat"]) + point = Point(row["x_200_lon"], row["x_200_lat"]) prop = {"beach": row["beach"], "site_id": index} output.write({"geometry": mapping(point), "properties": prop}) logger.info("Done!") diff --git a/src/data/parse_mat.py b/src/data/parse_mat.py index d6f7cb2..de17dfc 100644 --- a/src/data/parse_mat.py +++ b/src/data/parse_mat.py @@ -2,15 +2,25 @@ Converts raw .mat files into a flattened .csv structure which can be imported into python pandas. """ -import logging.config +import os +import sys + +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + from datetime import datetime, timedelta +import math + + import click +import numpy as np import pandas as pd from mat4py import loadmat -import numpy as np +from shapely.geometry import Point + +from profile_features import convert_coord_systems +from utils import setup_logging -logging.config.fileConfig("./src/logging.conf", disable_existing_loggers=False) -logger = logging.getLogger(__name__) +logger = setup_logging() def parse_orientations(orientations_mat): @@ -134,7 +144,7 @@ def parse_tides(tides_mat): return df -def parse_profiles(profiles_mat): +def parse_profiles_and_sites(profiles_mat): """ Parses the raw profiles.mat file and returns a pandas dataframe :param tides_mat: @@ -142,39 +152,108 @@ def parse_profiles(profiles_mat): """ logger.info("Parsing %s", profiles_mat) mat_data = loadmat(profiles_mat)["data"] - rows = [] - for i in range(0, len(mat_data["site"])): - for j in range(0, len(mat_data["pfx"][i])): - for profile_type in ["prestorm", "poststorm"]: - - if profile_type == "prestorm": - z = mat_data["pf1"][i][j][0] - if profile_type == "poststorm": - z = mat_data["pf2"][i][j][0] - - rows.append( - { - "beach": mat_data["site"][i], - "lon": mat_data["lon"][i], - "lat": mat_data["lat"][i], - "profile_type": profile_type, - "x": mat_data["pfx"][i][j][0], - "z": z, - } - ) + profile_rows = [] + site_rows = [] + site_counter = 0 + + for i, site in enumerate(mat_data["site"]): + + # Give each site a unique id + if len(site_rows) == 0 or site_rows[-1]["beach"] != site: + site_counter = 1 + else: + site_counter += 1 + site_id = "{}{:04d}".format(site, site_counter) + + # Initalize location of x=200m latitude and longitude + x_200_lat = np.nan + x_200_lon = np.nan + + # Want to calculation the orientation + orientation = {} + + for x, lat, lon, z_prestorm, z_poststorm, easting, northing in zip( + mat_data["x"][i], + mat_data["lats"][i], + mat_data["lons"][i], + mat_data["Zpre"][i], + mat_data["Zpost"][i], + mat_data["eastings"][i], + mat_data["northings"][i], + ): + + # Only extract pre and post storm profile + for j, profile_type in enumerate(["prestorm", "poststorm"]): + + if mat_data["isgood"][i][j] == 1: + land_lim = mat_data["landlims"][i][j] + survey_datetime = matlab_datenum_to_datetime(mat_data["surveydates"][i][j]) + + if profile_type == "prestorm": + z = z_prestorm + else: + z = z_poststorm + + # Keep a record of the where the center of the profile is located, and the locations of the land + # and sea + + # TODO: This code isn't very transferrable. What if we don't have lat/lons at 200 m? Relook at this + if x[0] == 200: + x_200_lat = lat[0] + x_200_lon = lon[0] + elif x[0] == 0: + orientation["land_easting"] = easting[0] + orientation["land_northing"] = northing[0] + elif x[0] == 400: + orientation["sea_easting"] = easting[0] + orientation["sea_northing"] = northing[0] + + profile_rows.append( + { + "site_id": site_id, + "lon": lon[0], + "lat": lat[0], + "profile_type": profile_type, + "x": x[0], + "z": z[0], + "land_lim": land_lim, + "survey_datetime": survey_datetime, + } + ) + + orientation = math.degrees( + math.atan2( + orientation["land_northing"] - orientation["sea_northing"], + orientation["land_easting"] - orientation["sea_easting"], + ) + ) + site_rows.append( + { + "site_id": site_id, + "beach": site, + "lat": x_200_lat, + "lon": x_200_lon, + "orientation": orientation, + "profile_x_lat_lon": 200, + } + ) - df = pd.DataFrame(rows) - return df + df_profiles = pd.DataFrame(profile_rows) + df_sites = pd.DataFrame(site_rows) + + logger.info("Parsed profiles and sites") + return df_profiles, df_sites def remove_zeros(df_profiles): """ When parsing the pre/post storm profiles, the end of some profiles have constant values of zero. Let's change these to NaNs for consistancy. Didn't use pandas fillnan because 0 may still be a valid value. - :param df: + :param df_profiles: :return: """ + logger.info("Removing zeros from end of profiles") df_profiles = df_profiles.sort_index() groups = df_profiles.groupby(level=["site_id", "profile_type"]) for key, _ in groups: @@ -185,6 +264,7 @@ def remove_zeros(df_profiles): df_profile = df_profiles[idx_site] x_last_ele = df_profile[df_profile.z != 0].index.get_level_values("x")[-1] df_profiles.loc[idx_site & (df_profiles.index.get_level_values("x") > x_last_ele), "z"] = np.nan + logger.info("Removed zeros from end of profiles") return df_profiles @@ -198,31 +278,7 @@ def matlab_datenum_to_datetime(matlab_datenum): return datetime.fromordinal(int(matlab_datenum)) + timedelta(days=matlab_datenum % 1) - timedelta(days=366) -def get_unique_sites(dfs, cols=["beach", "lat", "lon"]): - """ - Generates a dataframe of unique sites based on beach names, lats and lons. Creates a unique site ID for each. - :param dfs: - :param cols: - :return: - """ - - rows = [] - df_all = pd.concat([df[cols] for df in dfs]) - beach_groups = df_all.groupby(["beach"]) - for beach_name, beach_group in beach_groups: - site_groups = beach_group.groupby(["lat", "lon"]) - siteNo = 1 - for site_name, site_group in site_groups: - site = "{}{:04d}".format(beach_name, siteNo) - rows.append({"site_id": site, "lat": site_name[0], "lon": site_name[1], "beach": beach_name}) - siteNo += 1 - - df = pd.DataFrame(rows) - - return df - - -def replace_unique_sites(df, df_sites, cols=["lat", "lon"]): +def replace_unique_sites(df, df_sites): """ Replaces beach/lat/lon columns with the unique site_id :param dfs: @@ -232,56 +288,37 @@ def replace_unique_sites(df, df_sites, cols=["lat", "lon"]): # Make the sites index a column, so it can be merged into df df_sites["site_id"] = df_sites.index.get_level_values("site_id") - # Merging on a float can lead to subtle bugs. Lets convert lat/lons to integers and merge on that instead - precision = 8 - df_sites["lat_int"] = np.round(df_sites["lat"] * 10 ** precision).astype(np.int64) - df_sites["lon_int"] = np.round(df_sites["lon"] * 10 ** precision).astype(np.int64) - df["lat_int"] = np.round(df["lat"] * 10 ** precision).astype(np.int64) - df["lon_int"] = np.round(df["lon"] * 10 ** precision).astype(np.int64) + # Create eastings and northings so we can calculate distances + site_points = [convert_coord_systems(Point(lon, lat)).xy for lon, lat in zip(df_sites["lon"], df_sites["lat"])] + df_sites["easting"] = [x[0][0] for x in site_points] + df_sites["northing"] = [x[1][0] for x in site_points] - df_merged = df.merge(df_sites, on=["lat_int", "lon_int"]) + # Process each unique combination lat/lons in groups + groups = df.groupby(["lat", "lon"]) + for (lat, lon), df_group in groups: - # Check that all our records have a unique site identifier - n_unmatched = len(df) - len(df_merged) - if n_unmatched > 0: - logger.warning("Not all records (%d of %d) matched with a unique site", n_unmatched, len(df)) - - df_merged = df_merged.drop( - columns=[ - "lat_x", - "lon_x", - "lat_int", - "lon_int", - "beach_y", - "beach_x", - "lat_y", - "lon_y", - "orientation", - "profile_x_lat_lon", - ] - ) + # Calculate distances from each point to each site and determine closest site + easting, northing = [x[0] for x in convert_coord_systems(Point(lon, lat)).xy] + distances_to_sites = np.sqrt((df_sites["easting"] - easting) ** 2 + (df_sites["northing"] - northing) ** 2) + min_distance = distances_to_sites.min() + closest_site = distances_to_sites.idxmin() - return df_merged + # Do some logging so we can check later. + if min_distance > 1: + logger.warning("Closest site to (%.4f,%.4f) is %s (%.2f m away)", lat, lon, closest_site, min_distance) + else: + logger.info("Closest site to (%.4f,%.4f) is %s (%.2f m away)", lat, lon, closest_site, min_distance) + # Assign site_id based on closest site + df.loc[df_group.index, "site_id"] = closest_site -@click.command(short_help="create sites.csv") -@click.option("--waves-mat", required=True, help=".mat file containing wave records") -@click.option("--tides-mat", required=True, help=".mat file containing tide records") -@click.option("--profiles-mat", required=True, help=".mat file containing beach profiles") -@click.option("--orientations-mat", required=True, help=".mat file containing orientation of beach profiles") -@click.option("--output-file", required=True, help="where to save sites.csv") -def create_sites_csv(waves_mat, tides_mat, profiles_mat, orientations_mat, output_file): - logger.info("Creating %s", output_file) - df_waves = parse_waves(waves_mat=waves_mat) - df_tides = parse_tides(tides_mat=tides_mat) - df_profiles = parse_profiles(profiles_mat=profiles_mat) - df_orientations = parse_orientations(orientations_mat=orientations_mat) - df_sites = get_unique_sites(dfs=[df_waves, df_tides, df_profiles]) - df_sites = combine_sites_and_orientaions(df_sites, df_orientations) - df_sites = specify_lat_lon_profile_center(df_sites) - df_sites.set_index(["site_id"], inplace=True) - df_sites.to_csv(output_file) - logger.info("Created %s", output_file) + nan_count = df.site_id.isna().sum() + if nan_count > 0: + logger.warning("Not all records (%d of %d) matched with a unique site", nan_count, len(df)) + + df = df.drop(columns=["lat", "lon", "beach"]) + + return df @click.command(short_help="create waves.csv") @@ -301,17 +338,22 @@ def create_waves_csv(waves_mat, sites_csv, output_file): @click.command(short_help="create profiles.csv") @click.option("--profiles-mat", required=True, help=".mat file containing beach profiles") -@click.option("--sites-csv", required=True, help=".csv file description of cross section sites") -@click.option("--output-file", required=True, help="where to save profiles.csv") -def create_profiles_csv(profiles_mat, sites_csv, output_file): - logger.info("Creating %s", output_file) - df_profiles = parse_profiles(profiles_mat=profiles_mat) - df_sites = pd.read_csv(sites_csv, index_col=[0]) - df_profiles = replace_unique_sites(df_profiles, df_sites) +@click.option("--profiles-output-file", required=True, help="where to save profiles.csv") +@click.option("--sites-output-file", required=True, help="where to save sites.csv") +def create_sites_and_profiles_csv(profiles_mat, profiles_output_file, sites_output_file): + logger.info("Creating sites and profiles csvs") + df_profiles, df_sites = parse_profiles_and_sites(profiles_mat=profiles_mat) df_profiles.set_index(["site_id", "profile_type", "x"], inplace=True) df_profiles.sort_index(inplace=True) - df_profiles.to_csv(output_file) - logger.info("Created %s", output_file) + df_profiles = remove_zeros(df_profiles) + + df_sites.set_index(["site_id"], inplace=True) + df_sites.sort_index(inplace=True) + + df_profiles.to_csv(profiles_output_file) + logger.info("Created %s", profiles_output_file) + df_sites.to_csv(sites_output_file) + logger.info("Created %s", sites_output_file) @click.command(short_help="create profiles.csv") @@ -335,8 +377,7 @@ def cli(): if __name__ == "__main__": - cli.add_command(create_sites_csv) cli.add_command(create_waves_csv) - cli.add_command(create_profiles_csv) + cli.add_command(create_sites_and_profiles_csv) cli.add_command(create_tides_csv) cli() diff --git a/src/data/profile_features.py b/src/data/profile_features.py index 78ba406..cb63568 100644 --- a/src/data/profile_features.py +++ b/src/data/profile_features.py @@ -15,22 +15,6 @@ logging.config.fileConfig("./src/logging.conf", disable_existing_loggers=False) logger = logging.getLogger(__name__) -def shapes_from_shp(shp_file): - """ - Parses a shape file and returns a list of shapely shapes, ids and properties - :param shp_file: - :return: - """ - shapes = [] - ids = [] - properties = [] - for feat in fiona.open(shp_file, "r"): - shapes.append(shape(feat["geometry"])) - ids.append(feat["id"]) - properties.append(feat["properties"]) - return shapes, ids, properties - - def convert_coord_systems(g1, in_coord_system="EPSG:4326", out_coord_system="EPSG:28356"): """ Converts coordinates from one coordinates system to another. Needed because shapefiles are usually defined in @@ -50,6 +34,22 @@ def convert_coord_systems(g1, in_coord_system="EPSG:4326", out_coord_system="EPS return g2 +def shapes_from_shp(shp_file): + """ + Parses a shape file and returns a list of shapely shapes, ids and properties + :param shp_file: + :return: + """ + shapes = [] + ids = [] + properties = [] + for feat in fiona.open(shp_file, "r"): + shapes.append(shape(feat["geometry"])) + ids.append(feat["id"]) + properties.append(feat["properties"]) + return shapes, ids, properties + + def distance_to_intersection(lat, lon, landward_orientation, beach, line_strings, line_properties): """ Returns the distance at whjch a line drawn from a lat/lon at an orientation intersects a line stinrg @@ -143,7 +143,7 @@ def parse_profile_features(df_sites, df_profiles, dune_crest_shp, dune_toe_shp): lambda row: beach_profile_elevation(row["{}_x".format(feat)], df_profiles, "prestorm", row.name), axis=1 ) - df_profile_features = df_profile_features.drop(columns=["beach", "lat", "lon", "orientation"]) + df_profile_features = df_profile_features.drop(columns=["beach", "lat", "lon", "orientation", "profile_x_lat_lon"]) return df_profile_features diff --git a/src/logging.yaml b/src/logging.yaml new file mode 100644 index 0000000..fc82b31 --- /dev/null +++ b/src/logging.yaml @@ -0,0 +1,50 @@ +--- +version: 1 +disable_existing_loggers: False +formatters: + simple: + format: "%(asctime)s - %(filename)s - %(levelname)s - %(message)s" + +handlers: + console: + class: logging.StreamHandler + level: DEBUG + formatter: simple + stream: ext://sys.stdout + + info_file_handler: + class: logging.handlers.RotatingFileHandler + level: INFO + formatter: simple + filename: info.log + maxBytes: 10485760 # 10MB + backupCount: 3 + encoding: utf8 + + warning_file_handler: + class: logging.handlers.RotatingFileHandler + level: WARNING + formatter: simple + filename: warning.log + maxBytes: 10485760 # 10MB + backupCount: 3 + encoding: utf8 + + error_file_handler: + class: logging.handlers.RotatingFileHandler + level: ERROR + formatter: simple + filename: error.log + maxBytes: 10485760 # 10MB + backupCount: 3 + encoding: utf8 + +loggers: + my_module: + level: ERROR + handlers: [console] + propagate: no + +root: + level: INFO + handlers: [console, info_file_handler, error_file_handler, warning_file_handler] diff --git a/src/utils.py b/src/utils.py new file mode 100644 index 0000000..0df1fc6 --- /dev/null +++ b/src/utils.py @@ -0,0 +1,16 @@ +import logging.config +import os +import yaml + + +def setup_logging(path="./src/logging.yaml", default_level=logging.INFO): + """ + Setup logging configuration + """ + if os.path.exists(path): + with open(path, "rt") as f: + config = yaml.safe_load(f.read()) + logging.config.dictConfig(config) + else: + logging.basicConfig(level=default_level) + return logging.getLogger(__name__)