HEMIP_GIT/Code/Currently_unused/RMA_ResultExpl_SalinityIntr...

# -*- coding: utf-8 -*-
#==========================================================#
#Last Updated - June 2018
#@author: z5025317 Valentin Heimhuber
#code for processing RMA11 water quality results - using gridded approach instead of a chainage line (estuary longitudinal line)

#==========================================================#
#Load packages
#==========================================================#
import geopandas as gpd
import numpy as np
import os
import pandas as pd
import glob
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
from matplotlib.backends.backend_pdf import PdfPages
from ggplot import *
matplotlib.style.use('ggplot')

#==========================================================#
#Input parameters
#==========================================================#
#set scenario codes and beginning and end years and corresponding scenario code
fs=['Hwq003', 'Hwq005']
scenario_real_names = ['Present', '0.9m SLR']
WQvariables = ['SALINITY']
variable = 'elev' #depth or vel

maxrowsincsv = 15000

#name of scenario
Plot_scenario = 'HD_grid_V1'
ylims = [0,40]

#time frames for analysis
startdate =  '1999 01 01'
enddate = '2000 12 30'
tideanalysis_day = '1999 05 26'
#year=range(startyear, endyear+1)

#load and refine node vs mesh shapefile to store the results of the statistics
Node_mesh = gpd.read_file('H:/WRL_Projects/Hunter_CC_Modeling/Module_6/03_Results/GIS_data/Mesh_Node_GIS_Setup/hcc002_fishnet_500m_V3_joinXY.shp')
Node_mesh.index =  Node_mesh['Field1']
Node_mesh = Node_mesh.iloc[:,-4:]
Node_mesh.columns = ['Node', 'X', 'Y', 'geometry']
node = Node_mesh['Node'].values
chainages = node
Node_mesh.plot()

#set directory path for output files
output_directory = 'H:/WRL_Projects/Hunter_CC_Modeling/Module_6/03_Results/Output/Postprocessed/' +  Plot_scenario + '/Figures/'

#set input directories and data
#tidal boundary data
tides_csv='C:/Users/z5025317/OneDrive - UNSW/Hunter_CC_Modeling/07_Modelling/01_Input/Tide Generator/Tidal Simulation Python Script (BMM)/Tides_1990_2010.csv'
#CSV file of mesh nodes and corresponding chainages
nodes_csv = 'H:/WRL_Projects/Hunter_CC_Modeling/Module_6/03_Results/Chainages/Hunter_nodes.csv'
#River flow boundary
Flow_csv = 'H:/WRL_Projects/Hunter_CC_Modeling/Module_6/01_Input/BCGeneration/Scenarios/Calibration/Calibration_Greta.csv'
#River bathy
bathy_csv = 'H:/WRL_Projects/Hunter_CC_Modeling/Module_6/03_Results/Chainages/Hunter_nodes_bathy.csv'

#input csv with species and their salinity optima and thresholds
salinity_eco_csv = "H:/WRL_Projects/Hunter_CC_Modeling/Module_6/03_Results/Eco_thresholds/Salintiy_Hunter_physiology_V1.csv"
salinity_eco_df = pd.read_csv(salinity_eco_csv, index_col=2)
salinity_eco_df = salinity_eco_df[salinity_eco_df.index.notnull()]
#==========================================================#


#==========================================================#
#set plot parameters
ALPHA_figs = 1
font = {'family' : 'sans-serif',
        'weight' : 'normal',
        'size'   : 14}
matplotlib.rc('font', **font)
#==========================================================#

#==========================================================#
#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
if not os.path.exists(output_directory):
    os.makedirs(output_directory)
    print('-------------------------------------------')
    print("output directory folder didn't exist and was generated")
    print('-------------------------------------------')
#==========================================================#


#========================================================#
#automated part of the code doing the data extraction
#==========================================================

#Load boundary flow data
Flow_df = pd.read_csv(Flow_csv, parse_dates=True, dayfirst=True, index_col=0)['Q[ML/d]']
Flow_df.columns = ['Q[ML/d]']
Flow_df = Flow_df[datetime.strptime(startdate , '%Y %m %d').date():datetime.strptime(enddate, '%Y %m %d').date()]

#Load tide boundary data
tides_dfm = pd.read_csv(tides_csv, parse_dates=True, index_col=0)
tides_dfm.columns = ['tide']

#Load bathymetry data
bathy_dfm = pd.read_csv(bathy_csv, index_col=0)
bathy_dfm.plot()

#read csv file with extracted RMA data
#RMA_df = pd.read_csv(RMA_data_csv, parse_dates=True, index_col=0)

#load RMA2 data
HD_Summary_df = pd.DataFrame()
for variable in WQvariables:
    Summary_df = pd.DataFrame()
    df = pd.DataFrame()
    for f in fs:
        #set input and output directories
        input_directory = 'H:/WRL_Projects/Hunter_CC_Modeling/Module_6/03_Results/Output/Raw/Output_gridded1/' +  f
        # Set working direcotry (where postprocessed NARClIM data is located)
        os.chdir(input_directory)
        #==========================================================#
        #Load data file
        Clim_Var_CSVs = glob.glob('*' + variable + '*')
        clim_var_csv_path = Clim_Var_CSVs[0]
        df = pd.read_csv(clim_var_csv_path,  index_col=False)
        df.index = pd.to_datetime(df.Year, format = '%Y') + pd.to_timedelta(df.Hour, unit='h')
        df= df.drop(columns=['Year', 'Hour'])
        #df.columns = [NODE+'_Sal'] #, NODE+'_Tem']
        df.columns = [s + variable + '_'+ f for s in  df.columns]
        #cut down the df to start and end date
        df = df[datetime.strptime(startdate , '%Y %m %d').date():datetime.strptime(enddate, '%Y %m %d').date()]
        Summary_df = pd.concat([Summary_df, df], axis=1, join='outer')
    HD_Summary_df  = pd.concat([HD_Summary_df , Summary_df], axis=1, join='outer')

#subset the input data frame
Present_df = HD_Summary_df.filter(regex='Hwq003')
Present_df.columns = chainages
Far_future_df = HD_Summary_df.filter(regex='Hwq005')
Far_future_df.columns = chainages


#do statistics to summarize the time series based on optimum, min and max thresholds
salinity_eco_df.columns
variable = 'Sal'
for index, row in salinity_eco_df.iterrows():
    print row["Minimum optimal"]
    print row["Maximum optimal"]
    print row['# species']
    print row['Lower treshold']
    print row['Upper treshold']

    Pres_Sal_median_df = pd.DataFrame(Present_df.median())
    Pres_Sal_median_df.columns = ['med']
    Present_df_95 = pd.DataFrame(Present_df.quantile(q=0.95, axis=0))
    Present_df_05 = pd.DataFrame(Present_df.quantile(q=0.05, axis=0))
    Present_stats =  pd.concat([Present_df_05, Pres_Sal_median_df, Present_df_95], axis=1, join='outer')
    Present_stats.columns = ['0_' + str(s) for s in Present_stats.columns]

    agg = ('>'+ str(row["Maximum optimal"]) + '_count', lambda x: x.gt(row["Maximum optimal"]).sum()),
    Present_df_gtTHS = pd.DataFrame(Present_df.resample('A').agg(agg)*100/(365*24*2))
    Present_df_gtTHS = pd.DataFrame(Present_df_gtTHS.transpose().mean(axis=1))
    Present_df_gtTHS.columns = [str(int(row['# species'])) + '_0>' + 'MaOp']
    Present_df_gtTHS.index =  chainages

    agg = ('>'+ str(row["Minimum optimal"]) + '_count', lambda x: x.lt(row["Minimum optimal"]).sum()),
    Present_df_gtTHS_miop = pd.DataFrame(Present_df.resample('A').agg(agg)*100/(365*24*2))
    Present_df_gtTHS_miop = pd.DataFrame(Present_df_gtTHS_miop.transpose().mean(axis=1))
    Present_df_gtTHS_miop.columns = [str(int(row['# species'])) + '_0<' + 'MiOp']
    Present_df_gtTHS_miop.index =  chainages

    agg = ('>'+ str(row["Upper treshold"]) + '_count', lambda x: x.gt(row["Upper treshold"]).sum()),
    Present_df_gtTHS_malim = pd.DataFrame(Present_df.resample('A').agg(agg)*100/(365*24*2))
    Present_df_gtTHS_malim = pd.DataFrame(Present_df_gtTHS_malim.transpose().mean(axis=1))
    Present_df_gtTHS_malim.columns = [str(int(row['# species'])) + '_0>' + 'Malim']
    Present_df_gtTHS_malim.index =  chainages

    agg = ('>'+ str(row["Lower treshold"]) + '_count', lambda x: x.lt(row["Lower treshold"]).sum()),
    Present_df_gtTHS_lolim = pd.DataFrame(Present_df.resample('A').agg(agg)*100/(365*24*2))
    Present_df_gtTHS_lolim = pd.DataFrame(Present_df_gtTHS_lolim.transpose().mean(axis=1))
    Present_df_gtTHS_lolim.columns = [str(int(row['# species'])) + '_0<' + 'LoLim']
    Present_df_gtTHS_lolim.index =  chainages


    #future
    Far_future_Sal_median_df = pd.DataFrame(Far_future_df.median())
    Far_future_Sal_median_df.columns = ['med']
    Far_future_df_95 = pd.DataFrame(Far_future_df.quantile(q=0.95, axis=0))
    Far_future_df_05 = pd.DataFrame(Far_future_df.quantile(q=0.05, axis=0))
    Far_future_stats =  pd.concat([Far_future_df_05, Far_future_Sal_median_df, Far_future_df_95], axis=1, join='outer')
    Far_future_stats.columns = ['9_' + str(s) for s in Far_future_stats.columns]

    agg = ('>'+ str(row["Maximum optimal"]) + '_count', lambda x: x.gt(row["Maximum optimal"]).sum()),
    Far_future_df_gtTHS = pd.DataFrame(Far_future_df.resample('A').agg(agg)*100/(365*24*2))
    Far_future_df_gtTHS = pd.DataFrame(Far_future_df_gtTHS.transpose().mean(axis=1))
    Far_future_df_gtTHS.columns = [str(int(row['# species'])) + '_9>' + 'MaOp']
    Far_future_df_gtTHS.index =  chainages

    agg = ('>'+ str(row["Minimum optimal"]) + '_count', lambda x: x.lt(row["Minimum optimal"]).sum()),
    Far_future_df_gtTHS_miop = pd.DataFrame(Far_future_df.resample('A').agg(agg)*100/(365*24*2))
    Far_future_df_gtTHS_miop = pd.DataFrame(Far_future_df_gtTHS_miop.transpose().mean(axis=1))
    Far_future_df_gtTHS_miop.columns = [str(int(row['# species'])) + '_9<' + 'MiOp']
    Far_future_df_gtTHS_miop.index =  chainages

    agg = ('>'+ str(row["Upper treshold"]) + '_count', lambda x: x.gt(row["Upper treshold"]).sum()),
    Far_future_df_gtTHS_malim = pd.DataFrame(Far_future_df.resample('A').agg(agg)*100/(365*24*2))
    Far_future_df_gtTHS_malim = pd.DataFrame(Far_future_df_gtTHS_malim.transpose().mean(axis=1))
    Far_future_df_gtTHS_malim.columns = [str(int(row['# species'])) + '_9>' + 'Malim']
    Far_future_df_gtTHS_malim.index =  chainages

    agg = ('>'+ str(row["Lower treshold"]) + '_count', lambda x: x.lt(row["Lower treshold"]).sum()),
    Far_future_df_gtTHS_lolim = pd.DataFrame(Far_future_df.resample('A').agg(agg)*100/(365*24*2))
    Far_future_df_gtTHS_lolim = pd.DataFrame(Far_future_df_gtTHS_lolim.transpose().mean(axis=1))
    Far_future_df_gtTHS_lolim.columns = [str(int(row['# species'])) + '_9<' + 'LoLim']
    Far_future_df_gtTHS_lolim.index =  chainages


    #combine statistics into single df
    Shapefile_df  = pd.concat([Node_mesh, Present_stats,  Present_df_gtTHS, Present_df_gtTHS_miop,
                               Present_df_gtTHS_malim, Present_df_gtTHS_lolim,  Far_future_stats, Far_future_df_gtTHS,
                               Far_future_df_gtTHS_miop, Far_future_df_gtTHS_malim, Far_future_df_gtTHS_lolim], axis=1, join='outer')


    Shapefile_df['Ch%_MaOp'] = Shapefile_df[str(int(row['# species'])) + '_9>' + 'MaOp'] - Shapefile_df[str(int(row['# species'])) + '_0>' + 'MaOp']
    Shapefile_df['Ch%_MiOp'] = Shapefile_df[str(int(row['# species'])) + '_9<' + 'MiOp'] - Shapefile_df[str(int(row['# species'])) + '_0<' + 'MiOp']
    Shapefile_df['Ch%_Malim'] = Shapefile_df[str(int(row['# species'])) + '_9>' + 'Malim'] - Shapefile_df[str(int(row['# species'])) + '_0>' + 'Malim']
    Shapefile_df['Ch%_LoLim'] = Shapefile_df[str(int(row['# species'])) + '_9<' + 'LoLim'] - Shapefile_df[str(int(row['# species'])) + '_0<' + 'LoLim']

    #Shapefile_df.columns
    #Combined_df_gtTHS.columns = [s + '_gtTHS' for s in scenario_real_names]

    #Combined_df_gtTHS.plot(ylim=[0,500])
    Shape_out_path = output_directory + 'Hunter_salinity_stats_V3.shp'
    Shapefile_df.to_file(Shape_out_path, driver='ESRI Shapefile')


#==========================================================#
#Plot salinityanalysis
#==========================================================#
png_out_name =  output_directory + variable + 'sea level rise analysis_' + Plot_scenario + '.pdf'
fig = plt.figure(figsize=(40,20))
ax=plt.subplot(1,3,1)
plt.title('median '+ variable + ' present day')
Shapefile_df.plot(cmap='viridis_r', column='present median' , legend=True,ax=ax)
#lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.legend()
ax=plt.subplot(1,3,2)
plt.title('Change in median of '+ variable + ' under 0.9m SLR')
Shapefile_df.plot(cmap='viridis_r', column='Dif in median' , legend=True,ax=ax)
#lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.legend()
#plot tides
ax=plt.subplot(1,3,3)
plt.title('Change in time where '+ variable + ' > '+ str(Threshold) + ' under 0.9m SLR')
#max_min_extime_elev.plot(cmap='viridis_r', column='HigT Elev 0SLR' , legend=True, vmin=ylims[0], vmax=ylims[1],ax=ax)
Shapefile_df.plot(cmap='viridis_r', column='Dif in % time > threshold' , legend=True,ax=ax)
ax.legend()
ax.legend()
fig.tight_layout()
fig.savefig(png_out_name)
plt.close()