15 changed files with 264 additions and 3501 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +0,0 @@
 Data/
 Manuscript/
 NARCLIM/
 Outputs/
--- a/Analysis/Code/NARCLIM_Download/R_Preparing_BASH_script_for_NARCLIM_batch_download.R
+++ b/Analysis/Code/NARCLIM_Download/R_Preparing_BASH_script_for_NARCLIM_batch_download.R
@ -1,79 +0,0 @@
 #code for preparing a text file with BASH code for batch download of NARCLIM data for the HUNTER WQ modeling of 
 #future climate scenarios 
 #NARCLIM Variables
 #evspsblmean   	water_evaporation flux (actual ET) long_name: Surface evaporation standard_name: water_evaporation_flux units: kg m-2 s-1
 #tasmean      mean near surface temperature
 #pracc 		precipitation daily precipitation sum (sum of convective prcacc and stratiform prncacc precip) 
 # 'evspsblmean'   	water_evaporation flux (actual ET) long_name: Surface evaporation standard_name: water_evaporation_flux units: kg m-2 s-1
 # 'potevpmean'    	potential ET water_potential_evaporation_flux kg m-2 s-1
 # 'tasmean'               mean near surface temperature
 # 'tasmax'		maximum near surface temperature
 # 'pracc' 		precipitation daily precipitation sum (sum of convective prcacc and stratiform prncacc precip) 
 # 'pr1Hmaxtstep'     	maximum 1 hour interval rainfall in a one day period
 # 'pr1Hmaxtstep' 		Max. 1-hour time-window moving averaged precipitation rate units: kg m-2 s-1 maximum 1-hour time-window moving averaged values from point values 60.0 second
 # 'wss1Hmaxtstep'		Max. 1-hour time-window moving averaged surface wind speed units: m s-1 maximum 1-hour time-window moving averaged values from point values 60.0 second
 # 'wssmax' 		Surface wind speed standard_name: air_velocity units: m s-1 height: 10 m
 # 'wssmean'		Surface wind speed standard_name: air_velocity units: m s-1
 # 'sstmean'   Sea surface temperatuer daily mean
 Clim_Var <- 'evspsblmean'
 Datatype <- 'T_GCMS'  #T_GCMS for GCM forcing, T_NNRP for reanalysis (only 1950-2009)
 Biasboolean <- 'False' #use bias corrected data?
 Location <- 'catchment' #location of coordinate
 Directory <- 'C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Data/NARCLIM_Site_CSVs/CASESTUDY2/'   
 Filename <- 'CASESTUDY2_NARCLIM_Point_Sites.csv'
 #Load CSV with location names and lat lon coordinates
 Location.df <- data.frame(read.csv(paste(Directory, Filename, sep=""), header=T, fileEncoding="UTF-8-BOM"))
 #create empty vector for storing the command line text and open file
 Vector.for.command.line.txt <- c()
 Vector.for.command.line.txt <- c(Vector.for.command.line.txt, "module load python")
 text1 <- c(paste("Datatype='",Datatype,"'", sep=""),
           paste("Bias_corrected='",Biasboolean,"'", sep=""), paste("ClimVarName='",Clim_Var,"'", sep=""))
 Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text1)
 for (i in 1:(length(Location.df$Name))){
  name <-as.character(Location.df$Name[i])
  #name<-gsub('([[:punct:]])|\\s+','_',name)
  # if(i<10){
  #   name<-paste('Catchment_0', as.character(i), sep="")
  # }else{
  #   name<-paste('Catchment_', as.character(i), sep="")
  # }
  latitude=round(as.numeric(Location.df$Lat[i]),3)
  longitude=round(as.numeric(Location.df$Long[i]),3)
  if (Clim_Var == 'sstmean'){
    latitude=round(as.numeric(Location.df$Ocean_Lat[i]),3)
    longitude=round(as.numeric(Location.df$Ocean_Long[i]),3)
  }
  if (Location == 'catchment'){
    latitude=round(as.numeric(Location.df$Catch_Lat[i]),3)
    longitude=round(as.numeric(Location.df$Catch_Lon[i]),3)
    }
  text <- c(paste("latitude=",latitude,"", sep=""), paste("longitude=",longitude,"", sep=""),
 paste("name='",name,"'", sep=""),
 "python /srv/ccrc/data02/z5025317/Code_execution/\\
 P1_NARCliM_NC_to_CSV_CCRC_SS.py \\
  --lat $latitude --lon $longitude --varName $ClimVarName --domain 'd02' --timestep \\
  'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Bias_corrected")
  Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text)
  if(i==10|i==20|i==31|i==length(Location.df$Name)){
    Vector.for.command.line.txt <- c(Vector.for.command.line.txt, " ")
    text.file.name <- paste(Directory ,'/',Clim_Var, "_", Datatype, "_", Biasboolean,substring(as.character(i), 1,1), "_",Location, ".txt", sep="")
    #open and fill text file
    fileConn <- file(text.file.name)
    writeLines(Vector.for.command.line.txt, fileConn)
    close(fileConn)
    #
    if(i==10|i==20){
      Vector.for.command.line.txt <- c()
      Vector.for.command.line.txt <- c(Vector.for.command.line.txt, "module load python")
      text1 <- c(paste("Datatype='",Datatype,"'", sep=""),
                 paste("Bias_corrected='",Biasboolean,"'", sep=""), paste("ClimVarName='",Clim_Var,"'", sep=""))
      Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text1)
      }
    }
  }
--- a/Analysis/Code/NARCLIM_Download/R_Preparing_BASH_script_for_NARCLIM_batch_download_GAB.R
+++ b/Analysis/Code/NARCLIM_Download/R_Preparing_BASH_script_for_NARCLIM_batch_download_GAB.R
@ -1,81 +0,0 @@
 #code for preparing a text file with BASH code for batch download of NARCLIM data for the HUNTER WQ modeling of 
 #future climate scenarios 
 #NARCLIM Variables
 #evspsblmean   	water_evaporation flux (actual ET) long_name: Surface evaporation standard_name: water_evaporation_flux units: kg m-2 s-1
 #tasmean      mean near surface temperature
 #pracc 		precipitation daily precipitation sum (sum of convective prcacc and stratiform prncacc precip) 
 Clim_Var <- 'sstmean'
 Datatype <- 'T_NNRP'  #T_GCMS for GCM forcing, T_NNRP for reanalysis (only 1950-2009)
 Biasboolean <- 'False' #use bias corrected data? True or False python boolean
 Directory <- 'C:/Users/z5025317/OneDrive - UNSW/Hunter_CC_Modeling/07_Modelling/01_Input/BCGeneration/catchments/'   
 Filename <- 'Catchment_Prev_Hunter_Model_Centroids_VH_WGS84_attribute_Table.csv'
 Directory <- 'C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Data/NARCLIM_Site_CSVs/CASESTUDY_GAB/'
 Filename <- 'CASESTUDY2_GAB_NARCLIM_Point_Sites_V4.csv'
 #Load CSV with location names and lat lon coordinates
 Location.df <- data.frame(read.csv(paste(Directory, Filename, sep=""), header=T))
 for(type in c('min','max')){
  #create empty vector for storing the command line text and open file
  Vector.for.command.line.txt <- c()
  Vector.for.command.line.txt <- c(Vector.for.command.line.txt, "module load python")
  text1 <- c(paste("Datatype='",Datatype,"'", sep=""),
  paste("Bias_corrected='",Biasboolean,"'", sep=""), paste("ClimVarName='",Clim_Var,"'", sep=""))
  Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text1)
  for (i in 1:(length(Location.df[,1]))){
  #for (i in c(1,5,10)){ if only subset of catchments is needed like for Met file. 
    #name<-as.character(Location.df$Name[i])
    #name<-gsub('([[:punct:]])|\\s+','_',name)
    if(type == 'min'){
      #latlongs <- as.character(Location.df[,9])
      name <- as.character(Location.df[,1][i])
      latitude <- as.character(Location.df[,2][i])
      longitude <- as.character(Location.df[,3][i])
      }else if(type == 'max'){
      name <- as.character(Location.df[,6][i])
      latitude<-as.character(Location.df[,7][i])
      longitude<-as.character(Location.df[,8][i])
      }
    # print(latlongs[i])
    # names <- as.character(Location.df[,17])
    # if(i<10){
    #   name<-paste('Plant_0', as.character(i), sep="")
    # }else{
    #   name<-paste('Plant_', as.character(i), sep="")
    # }
    #print(name)
    #print(names[i])
    # latmin <- substr(latlongs[i], 5, 10)
    # lonmin <- substr(latlongs[i], 18, 23)
    # latmax <- substr(latlongs[i], 32, 37)
    # lonmax <- substr(latlongs[i], 45, 50)
    # latitude=latmax
    # longitude=lonmax
 text <- c(paste("latitude=",latitude,"", sep=""), paste("longitude=",longitude,"", sep=""),
 paste("name='",name,"'", sep=""),
 "python /srv/ccrc/data02/z5025317/Code_execution/\\
 P1_NARCliM_NC_to_CSV_CCRC_SS.py \\
 --lat $latitude --lon $longitude --varName $ClimVarName --domain 'd01' --timestep \\
 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Bias_corrected")
 Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text)
    if(i==10|i==20|i==30|i==40|i==45){
      Vector.for.command.line.txt <- c(Vector.for.command.line.txt, " ")
      text.file.name <- paste('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Data/NARCLIM_Site_CSVs/CASESTUDY_GAB/DataDownloadCCRC/',Clim_Var, "_", Datatype, "_", Biasboolean,as.character(i), type, ".txt", sep="")
      #open and fill text file
      fileConn <- file(text.file.name)
      writeLines(Vector.for.command.line.txt, fileConn)
      close(fileConn)
      Vector.for.command.line.txt <- c()
      Vector.for.command.line.txt <- c(Vector.for.command.line.txt, "module load python")
      text1 <- c(paste("Datatype='",Datatype,"'", sep=""),
                 paste("Bias_corrected='",Biasboolean,"'", sep=""), paste("ClimVarName='",Clim_Var,"'", sep=""))
      Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text1)
      }
    }
 }
--- a/Analysis/Code/P1_NARCliM_First_Pass_variab_deviation_plots.py
+++ b/Analysis/Code/P1_NARCliM_First_Pass_variab_deviation_plots.py
@ -1,526 +0,0 @@
    # -*- coding: utf-8 -*-
 #==========================================================#
 #Last Updated - March 2018
 #@author: z5025317 Valentin Heimhuber
 #code for creating future climate variability deviation plots for NARCLIM variables. 
 #Inputs: Uses CSV files that containthe deltas of all 12 NARCLIM models for 1 grid cell at the site of interest, generated with P1_NARCLIM_plots_Windows.py
 #This code is used only for the NARCLIM variables - a separate code is used for ocean variables etc that are not part of the NARCLIM ensemble
 #==========================================================#
 #Load packages
 #==========================================================#
 import numpy as np
 import os
 import pandas as pd
 import glob
 import matplotlib
 import matplotlib.pyplot as plt
 from datetime import datetime
 from datetime import timedelta
 from matplotlib.backends.backend_pdf import PdfPages
 from ggplot import *
 import csv
 matplotlib.style.use('ggplot')
 # Load my own functions
 os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Analysis/Code')
 import climdata_fcts as fct
 import silo as sil
 #==========================================================#
 #==========================================================#
 # Set working direcotry (where postprocessed NARClIM data is located)
 os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/')
 #==========================================================#
 #loop through case study estuaries    
 Estuaries = ['HUNTER', 'RICHMOND', 'NADGEE', 'SHOALHAVEN', 'GEORGES','CATHIE']
 Estuaries = ['NADGEE','HUNTER', 'RICHMOND']
 for Est in Estuaries:
    #==========================================================#
    #set input parameters
    #==========================================================#
    Case_Study_Name = 'CASESTUDY2' 
    Version = "V4"
    OutVersion = "V4"
    Casestudy2_csv_path =  'Data/NARCLIM_Site_CSVs/' + Case_Study_Name + '/' + Case_Study_Name + '_NARCLIM_Point_Sites.csv'
    #Estuary = 'HUNTER' # 'Belongil'
    Estuary = Est # 'Belongil'
    print Estuary
    Base_period_start = '1950-01-01'        #Start of interval for base period of climate variability
    Base_period_DELTA_start = '1990-01-01'   #Start of interval used for calculating mean and SD for base period (Delta base period)
    Base_period_end = '2009-01-01'          #use last day that's not included in period as < is used for subsetting
    Clim_var_type  = 'pracc'            #Name of climate variable in NARCLIM models  '*' will create pdf for all variables in folder
    Stats = 'days_h_20'                  #'maxdaily' #maximum takes the annual max Precipitation instead of the sum 'days_h_30'
    PD_Datasource = 'SILO'                  #Source for present day climate data (historic time series) can be either: 'Station' or 'SILO'
    SILO_Clim_var = ['daily_rain']         #select the SILO clim variable to be used for base period. - see silo.py for detailed descriptions
    force_sea_levels = 'no'        #use fort denison sea levels to run the statistics
    Location = 'Estuary'                 # pick locaiton for extracting the SILO data can be: Estuary, Catchment, or Ocean
    Main_Plot = 'yes'
    presentdaybar = False               #include a bar for present day variability in the plots?
    present_day_plot = 'no'             #save a time series of present day  
    Allin1_delta_plot = 'yes'
    Deltaxmin, Deltaymin = -20,40   #y limits for 0 baseline delta plots
    Median_markers = 'yes'              #plot the median horizontal lines on top of the delta range in the allin1 plot?      
    Figure_headings = 'yes'
    ALPHA_figs = 0                     #Set alpha of figure background (0 makes everything around the plot panel transparent)
    font = {'family' : 'sans-serif',        
        'weight' : 'normal',
        'size'   : 14}                  #size of axis labels
    matplotlib.rc('font', **font)       
    #==========================================================#
    #==========================================================#
    #set directory path for output files
    output_directory = 'Output/' + Case_Study_Name + '_' + OutVersion + '/' + Estuary + '/' + '/Clim_Deviation_Plots/'
    #output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
        print('-------------------------------------------')
        print("output directory folder didn't exist and was generated")
        print('-------------------------------------------')
        print('-------------------')
    #==========================================================#
    #==========================================================#
    #read CSV file  
    #==========================================================#
    Clim_Var_CSVs = glob.glob('./Output/' + Case_Study_Name + '_' + Version + '/'  + Estuary + '/' + Estuary + '_' + Clim_var_type + '_' + Stats + '*')
    clim_var_csv_path = Clim_Var_CSVs[0]
    Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
    Clim_var_type = Filename.split('_', 2)[1]
    print(Clim_var_type)
    Ensemble_Delta_full_df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
    if Clim_var_type == 'rsdsmean':
        Clim_Var_CSVs = glob.glob('./Output/' + Case_Study_Name + '/'  + Estuary + '/' + Estuary + '_rsdsmean_' + Stats + '*')
        clim_var_csv_path = Clim_Var_CSVs[0]
        Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
        Clim_var_type = Filename.split('_', 2)[1]
        print(Clim_var_type)
        Ensemble_Delta_full_df1 = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
        Clim_Var_CSVs = glob.glob('./Output/' + Case_Study_Name + '/'  + Estuary + '/' + Estuary + '_rldsmean_' + Stats + '*')
        clim_var_csv_path = Clim_Var_CSVs[0]
        Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
        Clim_var_type = Filename.split('_', 2)[1]
        print(Clim_var_type)
        Ensemble_Delta_full_df2 = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
        Ensemble_Delta_full_df = Ensemble_Delta_full_df2 + Ensemble_Delta_full_df1
    #Ensemble_Delta_full_df = pd.to_numeric(Ensemble_Delta_full_df)
    #==========================================================# 
    #==========================================================#
    #load present day climate variable time series
    #==========================================================#
    if PD_Datasource == 'Station':
        Present_day_df,minplotDelta, maxplotDelta = fct.import_present_day_climdata_csv(Estuary, Clim_var_type)
        if force_sea_levels == 'yes':
            Estuary_Folder = "C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Data/Ocean_Data/Tide_Levels"
            Present_day_df = pd.read_csv(Estuary_Folder + '/Fort_Denison_Daily_1900-2011.csv' , parse_dates=True, index_col=0)
            Present_day_df = pd.read_csv(Estuary_Folder + '/Fort_Denison_Daily_1900-2011.csv' , parse_dates=True, index_col=0)
            Present_day_df = Present_day_df.interpolate()
            Present_day_df = pd.DataFrame(Present_day_df.loc[(Present_day_df.index >= Base_period_start) & (Present_day_df.index <= Base_period_end)])
            [minplotDelta, maxplotDelta]=[1, 1]
            df = pd.read_csv(Estuary_Folder + '/Fort_Denison_Sub_Daily_1900-2011.csv',  parse_dates=[['Date', 'Time']])
            df.index = pd.to_datetime(df.Date, format = 'dd/mm/Y') + pd.to_timedelta(df.Hour, unit='h')
    if PD_Datasource == 'SILO':
        #read the CSV to extract the lat long and case stuy sites
        with open(Casestudy2_csv_path, mode='r') as infile:
            reader = csv.reader(infile)
            next(reader, None) 
            with open('coors_new.csv', mode='w') as outfile:
                writer = csv.writer(outfile)
                if Location == 'Estuary':
                    mydict = dict((rows[0],[rows[1],rows[2]]) for rows in reader)
                if Location == 'Ocean':
                    mydict = dict((rows[0],[rows[3],rows[4]]) for rows in reader)
                if Location == 'Catchment':
                    mydict = dict((rows[0],[rows[5],rows[6]]) for rows in reader)
        if Clim_var_type == 'tasmean':
            silo_df = sil.pointdata(["max_temp", "min_temp"], 'Okl9EDxgS2uzjLWtVNIBM5YqwvVcCxOmpd3nCzJh', Base_period_start.replace("-", ""), Base_period_end.replace("-", ""), 
                                        None, mydict[Estuary][0], mydict[Estuary][1], False, None)
            #take mean of daily min and max temp
            Present_day_df =     silo_df.iloc[:,[2,4]].mean(axis=1)
        else:
            silo_df = sil.pointdata(SILO_Clim_var, 'Okl9EDxgS2uzjLWtVNIBM5YqwvVcCxOmpd3nCzJh', Base_period_start.replace("-", ""), Base_period_end.replace("-", ""), 
                                        None, mydict[Estuary][0], mydict[Estuary][1], False, None)
            Present_day_df = silo_df.iloc[:,[2]]
        #set the x and y limit deltas -  for plotting only
        if Clim_var_type in ['evspsblmean', 'potevpmean']: #ET time series that we have is not in the same format as the other variables, hence the different treatment
            [minplotDelta, maxplotDelta]=[50,50]
        #for tasmean, observed min and max T need to be converted into mean T
        elif Clim_var_type == 'tasmean':
            [minplotDelta, maxplotDelta]=[0.2,1]
        elif Clim_var_type == 'sstmean':
            [minplotDelta, maxplotDelta]=[40,40]
        elif Clim_var_type == 'tasmax':
            [minplotDelta, maxplotDelta]=[1,2]  
        elif Clim_var_type == 'wssmean' or  Clim_var_type == 'wss1Hmaxtstep':
            [minplotDelta, maxplotDelta]=[1, 1.5]  
        elif Clim_var_type == 'pracc':   
            [minplotDelta, maxplotDelta]=[50,100]
        elif Clim_var_type in ['rsdsmean', 'rldsmean']:   
            [minplotDelta, maxplotDelta]=[100,100]
            #[minplotDelta, maxplotDelta]=[1,1]
    #==========================================================#
     #substract a constant from all values to convert from kelvin to celcius (temp)
    if Clim_var_type == 'sstmean':
        #Present_day_df = Present_day_df.iloc[:,0:(len(Present_day_df)-1)]-273.15
        Present_day_df.index = Present_day_df.index.to_period('D')
    #==========================================================#
    #create seasonal sums etc. 
    #==========================================================#
    if Stats == 'maxdaily':
        Present_day_df_annual = Present_day_df.resample('A').max()
        Present_day_df_annual = Present_day_df_annual.replace(0, np.nan)
        Fdf_Seas_means = Present_day_df.resample('Q-NOV').max() #seasonal means
        Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
    elif Stats == 'maxmonthly':
        Present_day_df_annual = Present_day_df.resample('M').max().resample('A').mean()
        Present_day_df_annual = Present_day_df_annual.replace(0, np.nan)
        Fdf_Seas_means = Present_day_df.resample('M').max().resample('Q-NOV').mean() #seasonal means
        Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
    elif(Stats[:4] =='days'):
        Threshold = int(Stats[-2:])
        agg = ('>'+ str(Threshold) + '_count', lambda x: x.gt(Threshold).sum()),
        Present_day_df_annual = Present_day_df.resample('A').agg(agg)                
        Fdf_Seas_means = Present_day_df.resample('Q-NOV').agg(agg)  #seasonal means
    else:
        if Clim_var_type in ['pracc' ,'evspsblmean' ,'potevpmean' ,
                         'pr1Hmaxtstep' ,'wss1Hmaxtstep',
                         'rsdsmean', 'rldsmean']:
            Present_day_df_annual = Present_day_df.resample('A').sum()
            Present_day_df_annual = Present_day_df_annual.replace(0, np.nan)
            Fdf_Seas_means = Present_day_df.resample('Q-NOV').sum() #seasonal means
            Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
        elif Clim_var_type in ['tasmean' ,'tasmax' ,'sstmean']:    
            Present_day_df_annual = Present_day_df.resample('A').mean()
            Present_day_df_monthly = Present_day_df.resample('M').mean()
            Present_day_df_weekly = Present_day_df.resample('W').mean()
            Fdf_Seas_means = Present_day_df.resample('Q-NOV').mean() #seasonal means
    #==========================================================#
    #==========================================================# 
    #calculate linear trend in present day series
    #==========================================================#
    if Stats == 'maxdaily' or Stats[:4] =='days' or Stats == 'maxmonthly':
        coefficients, residuals, _, _, _ = np.polyfit(range(len(Present_day_df_annual.index)),Present_day_df_annual.values,1,full=True)
        #mse = residuals[0]/(len(selected.index))
        #nrmse = np.sqrt(mse)/(selected.max() - selected.min())
        trend = coefficients[0]
        print('Slope ' + str(coefficients[0]))
        print(Base_period_start[:4]+ '-' + Base_period_end[:4] + ' trend was: ' + str(np.round(trend,4)) + ' units/year')
    elif Stats == 'mean':
        coefficients, residuals, _, _, _ = np.polyfit(range(len(Present_day_df.index)),Present_day_df.values,1,full=True)
        #mse = residuals[0]/(len(selected.index))
        #nrmse = np.sqrt(mse)/(selected.max() - selected.min())
        if force_sea_levels == 'yes':
            trend = coefficients[0]*12
        else:
            trend = coefficients[0]*365
        print('Slope ' + str(coefficients[0]))
        print(Base_period_start[:4]+ '-' + Base_period_end[:4] + ' trend was: ' + str(np.round(trend,5)) + ' units/year')
    #==========================================================#
    #==========================================================#   
    #Loop through annual and seasons and create a deviation plot for each. 
    #==========================================================#
    times = ['annual', 'DJF', 'MAM', 'JJA','SON']
    fig = plt.figure(figsize=(14,8))
    delta_all_df = pd.DataFrame()
    Combined_Delta_df = pd.DataFrame()
    Combined_Med_df = pd.DataFrame()
    i=1
    for temp in times:
        #temp = 'annual'
        #subset the ensemble dataframe for the period used:
        if temp == 'annual':
            Ensemble_Delta_df = Ensemble_Delta_full_df.iloc[:,range(0,2)]
            Present_Day_ref_df = Present_day_df_annual
            Column_names = ['Annual_near', 'Annual_far']
        else:
            Ensemble_Delta_df = Ensemble_Delta_full_df.filter(regex= temp)
            Ensemble_Delta_df.columns = ['near', 'far']
            if temp == 'DJF':
                Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==1]
                Column_names = ['Summer_near', 'Summer_far']
            if temp == 'MAM':
                Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==2]
                Column_names = ['Autumn_near', 'Autumn_far']
            if temp == 'JJA':
                Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==3]
                Column_names = ['Winter_near', 'Winter_far']
            if temp == 'SON':
                Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==4]
                Column_names = ['Spring_near', 'Spring_far']
            Present_Day_ref_df = Mean_df  
        #Subset to present day variability period
        Present_Day_ref_df = pd.DataFrame(Present_Day_ref_df.loc[(Present_Day_ref_df.index >= Base_period_start) & (Present_Day_ref_df.index <= Base_period_end)])
        #Subset to present day variability delta base period (this is the statistical baseline used for present day conditions)
        Present_Day_Delta_ref_df = pd.DataFrame(Present_Day_ref_df.loc[(Present_Day_ref_df.index >= Base_period_DELTA_start) & (Present_Day_ref_df.index <= Base_period_end)])
        Present_Day_Mean = np.percentile(Present_Day_Delta_ref_df, 50)
        Present_Day_SD = np.std(Present_Day_Delta_ref_df)
        #create data frame for floating stacked barplots 
        index=['-2std', '-1std', 'Med', '1std', '2std']
        columns = ['present','near future', 'far future']
        Plot_in_df = pd.DataFrame(index=index, columns =columns)
        #  
        Plot_in_df['present'] = [float(Present_Day_Mean-2*Present_Day_SD),float(Present_Day_Mean-Present_Day_SD), float(Present_Day_Mean),
               float(Present_Day_Mean+Present_Day_SD), float(Present_Day_Mean+2*Present_Day_SD)]
        Plot_in_df['near future'] = [float(Present_Day_Mean + Ensemble_Delta_df.near[-3:][0]),np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.near[-3:][1]), 
               np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.near[-3:][2])]
        Plot_in_df['far future'] = [float(Present_Day_Mean + Ensemble_Delta_df.far[-3:][0]),np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.far[-3:][1]), 
               np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.far[-3:][2])]
        #Create a second data frame that has the values in a way that they can be stacked up in bars with the correct absolute values
        Plot_in_df2 = pd.DataFrame(index=index, columns =columns )
        #
        if presentdaybar == False:
            index=['-2std', '-1std', 'Med', '1std', '2std']
            columns = ['near future', 'far future']
            Plot_in_df2 = pd.DataFrame(index=index, columns =columns )
            #Plot_in_df2['present'] = [float(0),float(0), float(0),
            #              float(0), float(0)]  
        else:
            Plot_in_df2['present'] = [float(Present_Day_Mean-2*Present_Day_SD),float(Present_Day_SD), float(Present_Day_SD),
                    float(Present_Day_SD), float(Present_Day_SD)]
        Plot_in_df2['near future'] = [float(Present_Day_Mean + Ensemble_Delta_df.near[-3:][0]),np.NaN, float(Ensemble_Delta_df.near[-3:][1]-Ensemble_Delta_df.near[-3:][0]), 
              np.NaN, float(Ensemble_Delta_df.near[-3:][2]-Ensemble_Delta_df.near[-3:][1])]
        Plot_in_df2['far future'] = [float(Present_Day_Mean + Ensemble_Delta_df.far[-3:][0]),np.NaN, float(Ensemble_Delta_df.far[-3:][1]-Ensemble_Delta_df.far[-3:][0]), 
               np.NaN, float(Ensemble_Delta_df.far[-3:][2]-Ensemble_Delta_df.far[-3:][1])]
        #transpose the data frame
        Plot_in_df_tp = Plot_in_df2.transpose()
        #do the individual plots
        if temp == 'annual':
            xmin = int(min(Plot_in_df.min(axis=1))-minplotDelta)
            xmax = int(max(Plot_in_df.max(axis=1))+maxplotDelta)
        else:
            xmin = int(min(Plot_in_df.min(axis=1))-minplotDelta)
            xmax = int(max(Plot_in_df.max(axis=1))+maxplotDelta)
        #define colour scheme
        #likert_colors = ['none', 'firebrick','firebrick','lightcoral','lightcoral']
        likert_colors = ['none', 'darkblue', 'darkblue','cornflowerblue','cornflowerblue']
        uni_colors = ['none', 'cornflowerblue', 'cornflowerblue','cornflowerblue','cornflowerblue']
        #plot the stacked barplot
        if temp == 'annual':
            ax=plt.subplot(2,4,3) 
        else:
            ax=plt.subplot(2,4,i)  
        Plot_in_df_tp.plot.bar(stacked=True, color=uni_colors, edgecolor='none', legend=False, ax=ax, width=0.5)  
        df = pd.DataFrame(Plot_in_df.iloc[2,[1,2]])
        index2=['Med']
        columns2 = ['near future', 'far future']
        Plot_in_df3 = pd.DataFrame(index=index2, columns =columns2)
        Plot_in_df3['near future'] = df.iloc[1,0]
        Plot_in_df3['far future'] = df.iloc[0,0]
        Plot_in_df3 = Plot_in_df3.transpose()
 #       plt.plot(Plot_in_df3['Med'], linestyle="", markersize=45, 
 #             marker="_", color='red', label="Median")
        #Plot_in_df3['Med']
        #Plot_in_df3['Med'].values
        plt.plot(np.arange(2),Plot_in_df3['Med'].values[::-1], linestyle="", markersize=45, 
             marker="_", color='red', label="Median")
        z = plt.axhline(float(Present_Day_Mean-2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
        z.set_zorder(-1)
        z = plt.axhline(float(Present_Day_Mean+2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
        z.set_zorder(-1)
        z = plt.axhline(float(Present_Day_Mean-Present_Day_SD), linestyle='--', color='black', alpha=.5)
        z.set_zorder(-1)
        z = plt.axhline(float(Present_Day_Mean+Present_Day_SD), linestyle='--', color='black', alpha=.5)
        z.set_zorder(-1)
        z = plt.axhline(float(Present_Day_Mean), linestyle='--', color='red', alpha=.5)
        z.set_zorder(-1)
        plt.ylim(xmin, xmax)
        #plt.ylim(10, 140)
        if Figure_headings == 'yes':
            plt.title(Clim_var_type + ' ' + temp)
        ax.grid(False)
        for tick in ax.get_xticklabels():
            tick.set_rotation(0)
        fig.tight_layout()
        fig.patch.set_alpha(ALPHA_figs)
        #plt.set_facecolor('g')
        if temp == 'annual':
            ax=plt.subplot(2,2,1) 
            Present_Day_ref_df.plot(legend=False, ax=ax)
            z = plt.axhline(float(Present_Day_Mean-2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
            z.set_zorder(-1)
            z = plt.axhline(float(Present_Day_Mean+2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
            z.set_zorder(-1)
            z = plt.axhline(float(Present_Day_Mean-Present_Day_SD), linestyle='--', color='black', alpha=.5)
            z.set_zorder(-1)
            z = plt.axhline(float(Present_Day_Mean+Present_Day_SD), linestyle='--', color='black', alpha=.5)
            z.set_zorder(-1)
            z = plt.axhline(float(Present_Day_Mean), linestyle='--', color='red', alpha=.5)
            z.set_zorder(-1)
            #fig.patch.set_facecolor('deepskyblue')
            fig.tight_layout()
            fig.patch.set_alpha(ALPHA_figs)
            if Figure_headings == 'yes':
                #plt.title(Clim_var_type)
                #plt.title(Clim_var_type + ' ' + Stats + ' ' + Base_period_start[:4] + '-' + Base_period_end[:4] + ' trend:' + str(np.round(trend,4)) + '/year')
                plt.title(Base_period_start[:4] + '-' + Base_period_end[:4] + ' trend:' + str(np.round(trend,4)) + '/year')
            plt.ylim(xmin, xmax)
            #plt.ylim(0, 40)
            ax.grid(False)
            #if temp == 'MAM':
            i=i+4
        else:
            i=i+1
        #create a data frame that contains all of the delta bars
        Plot_in_df_tp.index = Column_names
        Plot_in_df_tp['-2std'] = Plot_in_df_tp['-2std'] - Present_Day_Mean
        Combined_Delta_df = pd.concat([Combined_Delta_df, Plot_in_df_tp], axis=0)
        if Allin1_delta_plot == 'yes':
            ax=plt.subplot(2,4,4) 
            temp = Combined_Delta_df[0:2]
            Plot_in_df_tp = temp
            uni_colors = ['none', 'cornflowerblue', 'cornflowerblue','cornflowerblue','cornflowerblue']
            Plot_in_df_tp['Median'] = pd.DataFrame(Plot_in_df_tp['-2std'] + Plot_in_df_tp['Med'])
            if int(Plot_in_df_tp.stack().min(axis=None,skipna=True)) < 0:
                Bottom = int(Plot_in_df_tp.stack().min(axis=None,skipna=True)) -1
                Plot_in_df_tp['-2std'] = Plot_in_df_tp['-2std'] - Bottom
                Plot_in_df_tp.plot.bar(stacked=True, color=uni_colors, edgecolor='none', legend=False, width=0.5,ax=ax, bottom = Bottom)  
            else:
                Plot_in_df_tp.plot.bar(stacked=True, color=uni_colors, edgecolor='none', legend=False, width=0.5,ax=ax)
            if Median_markers == 'yes':
                plt.plot(np.arange(2),Plot_in_df_tp['Median'].values , linestyle="", markersize=45, 
                     marker="_", color='red', label="Median") 
            z = plt.axhline(float(0), linestyle='--', color='red', alpha=.5)
            z.set_zorder(-1)
            plt.ylim(Deltaxmin, Deltaymin)
            #plt.ylim(xmin, xmax)
            if Figure_headings == 'yes':
                plt.title(Clim_var_type + ' All Deltas ' + Stats)
            #ax.grid(False)
            ax.xaxis.grid(False)
            for tick in ax.get_xticklabels():
                tick.set_rotation(10)
            fig.tight_layout()
            fig.patch.set_alpha(ALPHA_figs)
    #plt.show()  
    if Main_Plot == 'yes': 
        if presentdaybar == False:
            out_file_name = Estuary  + '_' + Clim_var_type + '_' + Stats + '_'  + PD_Datasource +  '_'  + SILO_Clim_var[0] +  '_' + '_'  + Base_period_start +  '_'  + Base_period_end +  '_' +  Version +  '_'  + ' trend_' + str(np.round(trend[0],4)) + '.png'
        else:
            out_file_name = Estuary  + '_' + Clim_var_type + '_' + Stats + '_'  + PD_Datasource +  '_'  + SILO_Clim_var[0] +  '_' +  Version +  '_'  + '2.png'
        out_path = output_directory + '/' + out_file_name
        fig.savefig(out_path)
    plt.close(fig) 
    if Allin1_delta_plot == 'yes': 
        fig = plt.figure(figsize=(14,8))
        ax = fig.add_subplot(2, 3, 1)
        temp = Combined_Delta_df
        Plot_in_df_tp = temp
        uni_colors = ['none', 'cornflowerblue', 'cornflowerblue','cornflowerblue','cornflowerblue']
        Plot_in_df_tp['Median'] = pd.DataFrame(Plot_in_df_tp['-2std'] + Plot_in_df_tp['Med'])
        if int(Plot_in_df_tp.stack().min(axis=None,skipna=True)) < 0:
            Bottom = int(Plot_in_df_tp.stack().min(axis=None,skipna=True)) -1
            Plot_in_df_tp['-2std'] = Plot_in_df_tp['-2std'] - Bottom
            Plot_in_df_tp.plot.bar(stacked=True, color=uni_colors, edgecolor='none', legend=False, width=0.5,ax=ax, bottom = Bottom)  
        else:
            Plot_in_df_tp.plot.bar(stacked=True, color=uni_colors, edgecolor='none', legend=False, width=0.5,ax=ax)
        if Median_markers == 'yes':
 #            plt.bar(Plot_in_df_tp.index, Plot_in_df_tp['Median'], linestyle="", markersize=13, 
 #                     marker="_", color='red', label="Median") 
            plt.plot(np.arange(10), Plot_in_df_tp['Median'], linestyle="", markersize=13, 
                     marker="_", color='red', label="Median")
        z = plt.axhline(float(0), linestyle='--', color='red', alpha=.5)
        z.set_zorder(-1)
        plt.ylim(Deltaxmin, Deltaymin)
        if Figure_headings == 'yes':
            plt.title(Clim_var_type + ' All Delstas ' + Stats)
        #ax.grid(False)
        ax.xaxis.grid(False)
        for tick in ax.get_xticklabels():
            tick.set_rotation(50)
        plt.setp(ax.xaxis.get_majorticklabels(), ha='right')
        #ax.set_xticklabels(xticklabels, rotation = 45, ha="right")
        fig.tight_layout()
        fig.patch.set_alpha(ALPHA_figs)
        plt.show()
        #export plot to png
        out_file_name = Estuary  + '_' + Clim_var_type + '_' + Stats + '_'  + Base_period_start +  '_'  + Base_period_end +  '_'  +  Version + '_All_Deltas_In1.png'
        out_path = output_directory + '/' + out_file_name
        fig.savefig(out_path)
        plt.close(fig) 
 ##make  barplot with alternating colours       
 #men_means, men_std = (20, 35, 30, 35, 27), (2, 3, 4, 1, 2)
 #women_means, women_std = (25, 32, 34, 20, 25), (3, 5, 2, 3, 3)
 #
 #ind = np.arange(len(men_means))  # the x locations for the groups
 #width = 0.35                     # the width of the bars
 #
 #fig, ax = plt.subplots()
 #rects1 = ax.bar(ind - width/2, men_means, width, yerr=men_std,
 #                color='SkyBlue', label='Men')
 #rects2 = ax.bar(ind + width/2, women_means, width, yerr=women_std,
 #                color='IndianRed', label='Women')
 #
 ## Add some text for labels, title and custom x-axis tick labels, etc.
 #ax.set_ylabel('Scores')
 #ax.set_title('Scores by group and gender')
 #ax.set_xticks(ind)
 #ax.set_xticklabels(('G1', 'G2', 'G3', 'G4', 'G5'))
 #ax.legend()
    if present_day_plot == 'yes':  
        #print present day climate data
            fig = plt.figure(figsize=(14,8))
            for i1 in range(2):
                if i1 ==0:
                    ax = fig.add_subplot(2, 3, 1)
                else:
                    ax = fig.add_subplot(2, 1, 2)
                xmin = int(min(Plot_in_df.min(axis=1))-minplotDelta)
                xmax = int(max(Plot_in_df.max(axis=1))+maxplotDelta)
                Present_Day_ref_df.plot(legend=False, ax=ax)
                z = plt.axhline(float(Present_Day_Mean-2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
                z.set_zorder(-1)
                z = plt.axhline(float(Present_Day_Mean+2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
                z.set_zorder(-1)
                z = plt.axhline(float(Present_Day_Mean-Present_Day_SD), linestyle='--', color='black', alpha=.5)
                z.set_zorder(-1)
                z = plt.axhline(float(Present_Day_Mean+Present_Day_SD), linestyle='--', color='black', alpha=.5)
                z.set_zorder(-1)
                z = plt.axhline(float(Present_Day_Mean), linestyle='--', color='red', alpha=.5)
                z.set_zorder(-1)
                #fig.patch.set_facecolor('deepskyblue')
                fig.patch.set_alpha(0)
                #plt.ylim(0, 400)
                plt.ylim(xmin, xmax)
                #export plot to png
            out_file_name = Estuary  + '_' + Clim_var_type + '_' + Stats + '_'  + Base_period_start +  '_'  + Base_period_end +  '_'  +  Version + 'Present_Day_Period.png'
            out_path = output_directory + '/' + out_file_name
            fig.savefig(out_path)
            plt.close(fig) 
            # use transparent=True if you want the whole figure with a transparent background
--- a/Analysis/Code/P1_NARCliM_First_Pass_variab_deviation_plots_nonNARCLIM.py
+++ b/Analysis/Code/P1_NARCliM_First_Pass_variab_deviation_plots_nonNARCLIM.py
@ -1,230 +0,0 @@
    # -*- coding: utf-8 -*-
 """
 Created on Thu Jun 28 12:15:17 2018
@author: z5025317
 """
 # -*- coding: utf-8 -*-
 #==========================================================#
 #Last Updated - March 2018
 #@author: z5025317 Valentin Heimhuber
 #code for creating future climate variability deviation plots for NARCLIM variables. 
 #This code is derived from P1_NARCliM_First_Pass_variab_deviation_plots.py to deal with variables for which we don't have NARCLIM deltas. 
 #These variables are Sea Level, Acidity and...
 # it uses CSIRO CC in Australia CMIP-5 Deltas instead 
 #==========================================================#
 #Load packages
 #==========================================================#
 import numpy as np
 import os
 import pandas as pd
 import glob
 import matplotlib
 import matplotlib.pyplot as plt
 from datetime import datetime
 from datetime import timedelta
 from matplotlib.backends.backend_pdf import PdfPages
 from ggplot import *
 import csv
 matplotlib.style.use('ggplot')
 # Load my own functions
 os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Analysis/Code')
 import climdata_fcts as fct
 import silo as sil
 import re
 #==========================================================#
 #==========================================================#
 # Set working direcotry (where postprocessed NARClIM data is located)
 os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/')
 #==========================================================#
 #==========================================================#
 #set input parameters
 #==========================================================#
 Case_Study_Name = 'CASESTUDY2' 
 Casestudy2_csv_path =  "Data/NARCLIM_Site_CSVs/CASESTUDY2/CASESTDUY2_NARCLIM_Point_Sites.csv"
 Estuary = 'HUNTER' # 'Belongil'
 Base_period_start = '1970-01-01'        #Start of interval for base period of climate variability
 Base_period_DELTA_start = '1990-01-01'   #Start of interval used for calculating mean and SD for base period (Delta base period)
 Base_period_end = '2009-01-01'          #use last day that's not included in period as < is used for subsetting
 Clim_var_type  = 'acidity'            #Name of climate variable in NARCLIM models  '*' will create pdf for all variables in folder
 [minplotDelta, maxplotDelta]=[0.5,1]
 #Source for present day climate data (historic time series) can be either: 'Station' or 'SILO'
 Location = 'Estuary'                 # pick locaiton for extracting the SILO data can be: Estuary, Catchment, or Ocean
 presentdaybar = False               #include a bar for present day variability in the plots?
 present_day_plot = 'yes'             #save a time series of present day        
 Version = "V3"
 Stats = 'mean' #'maxdaily' #maximum takes the annual max Precipitation instead of the sum
 ALPHA_figs = 1 #Set alpha of figure background (0 makes everything around the plot panel transparent)
 #==========================================================#
 #==========================================================#
 #set directory path for output files
 output_directory = 'Output/' + Case_Study_Name +'_'+ Version + '/' + Estuary + '/' + '/Clim_Deviation_Plots/'
 #output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
 if not os.path.exists(output_directory):
    os.makedirs(output_directory)
    print('-------------------------------------------')
    print("output directory folder didn't exist and was generated")
    print('-------------------------------------------')
    print('-------------------')
 #==========================================================#
 #==========================================================#
 #read ensemble change CSV file  
 #==========================================================#
 clim_var_csv_path = './Data/Non_NARCLIM/AUZ_CC_CSIRO_Deltas/Brisbane_Ocean_Variables.csv'
 df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
 df =  df.filter(regex= 'RCP 8.5|Variable')
 df = df[df.Variable=='Ocean pH']
 Ensemble_Delta_df = pd.DataFrame(index=['Median', '10th',  '90th'], columns =['near', 'far'])
 Ensemble_Delta_df['near'] = re.split('\(|\)|\ to |', df.iloc[0,1])[0:3]
 Ensemble_Delta_df['far'] = re.split('\(|\)|\ to |', df.iloc[0,2])[0:3]    
 Ensemble_Delta_df = Ensemble_Delta_df.astype(float).fillna(0.0)
 #==========================================================# 
 #==========================================================#
 #load present day climate variable time series
 #==========================================================#
 #Present_day_df,minplotDelta, maxplotDelta = fct.import_present_day_climdata_csv(Estuary, Clim_var_type)
 Present_day_Var_CSV = glob.glob('./Data/Non_NARCLIM/Hunter_Lower_Acidity2.csv')
 Present_day_df = pd.read_csv(Present_day_Var_CSV[0],parse_dates=True, index_col=0)
 Present_day_df.columns = ['PH']
 df = Present_day_df
 df.index = df.index.to_period('D')
 Present_day_df = df
 Present_day_df = df.groupby(df.index).mean().reset_index()
 Present_day_df.columns = ['Date','PH']
 Present_day_df.index = Present_day_df['Date']
 Present_day_df= Present_day_df['PH']
 #Present_day_df.index = pd.to_datetime(Present_day_df.index,format='%Y%m%d')
 if Stats == 'mindaily':
    Present_day_df_annual = Present_day_df.resample('A').min()
    Present_day_df_annual = Present_day_df_annual.replace(0, np.nan)
 if Stats == 'minmonthly':
    Present_day_df_annual = Present_day_df.resample('M').resample('A').min()
    Present_day_df_annual = Present_day_df_annual.replace(0, np.nan)
 if Stats == 'mean':
    Present_day_df_annual = Present_day_df.resample('A').mean()
    Present_day_df_annual = Present_day_df_annual.replace(0, np.nan)
 Present_Day_ref_df = Present_day_df_annual
 #Subset to present day variability period
 Present_Day_ref_df = pd.DataFrame(Present_Day_ref_df.loc[(Present_Day_ref_df.index >= Base_period_start) & (Present_Day_ref_df.index <= Base_period_end)])
 #Subset to present day variability delta base period (this is the statistical baseline used for present day conditions)
 Present_Day_Delta_ref_df = pd.DataFrame(Present_Day_ref_df.loc[(Present_Day_ref_df.index >= Base_period_DELTA_start) & (Present_Day_ref_df.index <= Base_period_end)])
 Present_Day_Mean = np.percentile(Present_Day_Delta_ref_df, 50)
 Present_Day_SD = np.std(Present_Day_Delta_ref_df)
 #create data frame for floating stacked barplots 
 index=['-2std', '-1std', 'Med', '1std', '2std']
 columns = ['present','near future', 'far future']
 Plot_in_df = pd.DataFrame(index=index, columns =columns)
 #  
 Plot_in_df['present'] = [float(Present_Day_Mean-2*Present_Day_SD),float(Present_Day_Mean-Present_Day_SD), float(Present_Day_Mean),
       float(Present_Day_Mean+Present_Day_SD), float(Present_Day_Mean+2*Present_Day_SD)]
 Plot_in_df['near future'] = [float(Present_Day_Mean + Ensemble_Delta_df.near['10th']),np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.near['Median']), 
       np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.near['90th'])]
 Plot_in_df['far future'] = [float(Present_Day_Mean + Ensemble_Delta_df.far['10th']),np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.far['Median']), 
       np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.far['90th'])]
 #Create a second data frame that has the values in a way that they can be stacked up in bars with the correct absolute values
 Plot_in_df2 = pd.DataFrame(index=index, columns =columns )
 #
 index=['-2std', '-1std', 'Med', '1std', '2std']
 columns = ['near future', 'far future']
 Plot_in_df2 = pd.DataFrame(index=index, columns =columns )
 Plot_in_df2['near future'] = [float(Present_Day_Mean + Ensemble_Delta_df.near['90th']),np.NaN, float(Ensemble_Delta_df.near['Median']-Ensemble_Delta_df.near['90th']), 
      np.NaN, float(Ensemble_Delta_df.near['10th']-Ensemble_Delta_df.near['Median'])]
 Plot_in_df2['far future'] = [float(Present_Day_Mean + Ensemble_Delta_df.far['90th']),np.NaN, float(Ensemble_Delta_df.far['Median']-Ensemble_Delta_df.far['90th']), 
       np.NaN, float(Ensemble_Delta_df.far['10th']-Ensemble_Delta_df.far['Median'])]
 #transpose the data frame
 Plot_in_df_tp = Plot_in_df2.transpose()
 #do the individual plots
 xmin = int(min(Plot_in_df.min(axis=1))-minplotDelta)
 xmax = int(max(Plot_in_df.max(axis=1))+maxplotDelta)
 #define colour scheme
 #likert_colors = ['none', 'firebrick','firebrick','lightcoral','lightcoral']
 likert_colors = ['none', 'darkblue', 'darkblue','cornflowerblue','cornflowerblue']
 uni_colors = ['none', 'cornflowerblue', 'cornflowerblue','cornflowerblue','cornflowerblue']
 #plot the stacked barplot
 fig = plt.figure(figsize=(14,8))
 ax=plt.subplot(2,4,3) 
 Plot_in_df_tp.plot.bar(stacked=True, color=uni_colors, edgecolor='none', legend=False, ax=ax, width=0.5)  
 df = pd.DataFrame(Plot_in_df.iloc[2,[1,2]])
 index2=['Med']
 columns2 = ['near future', 'far future']
 Plot_in_df3 = pd.DataFrame(index=index2, columns =columns2)
 Plot_in_df3['near future'] = df.iloc[1,0]
 Plot_in_df3['far future'] = df.iloc[0,0]
 Plot_in_df3 = Plot_in_df3.transpose()
 plt.plot(Plot_in_df3['Med'], linestyle="", markersize=52, 
     marker="_", color='red', label="Median")
 z = plt.axhline(float(Present_Day_Mean-2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
 z.set_zorder(-1)
 z = plt.axhline(float(Present_Day_Mean+2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
 z.set_zorder(-1)
 z = plt.axhline(float(Present_Day_Mean-Present_Day_SD), linestyle='--', color='black', alpha=.5)
 z.set_zorder(-1)
 z = plt.axhline(float(Present_Day_Mean+Present_Day_SD), linestyle='--', color='black', alpha=.5)
 z.set_zorder(-1)
 z = plt.axhline(float(Present_Day_Mean), linestyle='--', color='red', alpha=.5)
 z.set_zorder(-1)
 plt.ylim(xmin, xmax)
 plt.title(Clim_var_type)
 ax.grid(False)
 for tick in ax.get_xticklabels():
    tick.set_rotation(0)
 fig.tight_layout()
 fig.patch.set_alpha(ALPHA_figs)
 #plot the present day time series      
 ax=plt.subplot(2,2,1) 
 Present_Day_ref_df.plot(legend=False, ax=ax)
 z = plt.axhline(float(Present_Day_Mean-2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
 z.set_zorder(-1)
 z = plt.axhline(float(Present_Day_Mean+2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
 z.set_zorder(-1)
 z = plt.axhline(float(Present_Day_Mean-Present_Day_SD), linestyle='--', color='black', alpha=.5)
 z.set_zorder(-1)
 z = plt.axhline(float(Present_Day_Mean+Present_Day_SD), linestyle='--', color='black', alpha=.5)
 z.set_zorder(-1)
 z = plt.axhline(float(Present_Day_Mean), linestyle='--', color='red', alpha=.5)
 z.set_zorder(-1)
 #fig.patch.set_facecolor('deepskyblue')
 fig.tight_layout()
 fig.patch.set_alpha(ALPHA_figs)
 plt.title(Clim_var_type + ' ' + Stats + ' annual present day')
 plt.ylim(xmin, xmax)
 ax.grid(False)
 if presentdaybar == False:
    out_file_name = Estuary  + '_' + Clim_var_type + '_' + Stats + '_'  + Base_period_start +  '_'  + Base_period_end +  Version +  '_'  + '_NPDB2.png'
 out_path = output_directory + '/' + out_file_name
 fig.savefig(out_path)
--- a/Analysis/Code/P1_NARCliM_NC_to_CSV_CCRC_SS.py
+++ b/Analysis/Code/P1_NARCliM_NC_to_CSV_CCRC_SS.py
@ -1,7 +1,6 @@
-`# -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
 from netCDF4 import *
 import numpy as np
 from numpy import *
 import os
 import pandas as pd
 import glob
@ -23,9 +22,6 @@ if __name__ == "__main__":
    parser.add_argument("--varName", help="operation")
    parser.add_argument("--timestep", help="operation")
    parser.add_argument("--domain", help="operation")
    parser.add_argument("--LocationName", help="operation")
    parser.add_argument("--Datatype", help="operation")
    parser.add_argument("--BiasBool", help="operation")
    args = parser.parse_args()
    print(args.lat)
    print(args.lon)
@ -35,159 +31,46 @@ if __name__ == "__main__":
    Clim_var_type = args.varName
    NC_Domain = args.domain
    Timestep = args.timestep
-    Location = args.LocationName
+    print("Extracting all NARCLIM time series for variable: ", Clim_var_type, " for lat lon: ", mylat, mylon, "domain", NC_Domain, "timestep ", Timestep)
-    Data_Type = args.Datatype
+#set directory path for output files
-    Bias_Correction_BOOL = args.BiasBool
+output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out'
-    print("Extracting all NARCLIM time series for variable: ", Clim_var_type, " for lat lon: ", mylat, mylon, Location, "domain", NC_Domain, " timestep ", Timestep, " Datatype ", Data_Type, " biascorrected? ", Bias_Correction_BOOL)
+#output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted'
-
+if not os.path.exists(output_directory):
 lat_equal_len_string="%.3f" % abs(mylat)
 lon_equal_len_string= "%.3f" % mylon
 if Bias_Correction_BOOL == 'False':
    #set directory path for output files
    output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out/'+ Location + '_' +  lat_equal_len_string + '_' + lon_equal_len_string + '/'
    #output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted'
    print '---------------------------------------------------------'
    if not os.path.exists(output_directory):
    os.makedirs(output_directory)
    print("output directory folder didn't exist and was generated here:")
    print(output_directory)
-    print '---------------------------------------------------------'
+#
-    #
+time.sleep(10) 
-    #time.sleep(10) 
+#manual input via script
-    #set up the loop variables for interrogating the entire NARCLIM raw data
+#mylat= -33.9608578
-    NC_Periods = ('1990-2009','2020-2039','2060-2079')
+#mylon= 151.1339882
-    if Data_Type == 'T_NNRP':
+#Clim_var_type = 'pr1Hmaxtstep'
-        NC_Periods = ('1950-2009','Stop')
+#NC_Domain = 'd02'
-    #
+#
-    #Define empty pandas data frames 
+#set up the loop variables for interrogating the entire NARCLIM raw data
-    Full_df = pd.DataFrame()
+NC_Periods = ('1950-2009','2020-2039','2060-2079')
-    GCM_df = pd.DataFrame()
+#
-    R13_df = pd.DataFrame()
+#Define empty pandas data frames 
-    MultiNC_df = pd.DataFrame()
+Full_df = pd.DataFrame()
-    #
+GCM_df = pd.DataFrame()
-    #Loop through models and construct CSV per site
+R13_df = pd.DataFrame()
-    for NC_Period in NC_Periods:
+MultiNC_df = pd.DataFrame()
-        if NC_Period != "Stop":
+#
 #Loop through models and construct CSV per site
 for NC_Period in NC_Periods:
    Period_short = NC_Period[:4]
    GCMs = os.listdir('./'+ NC_Period)
    for GCM in GCMs:
                print GCM
        Warf_runs = os.listdir('./' + NC_Period + '/' + GCM + '/')
        for Warf_run in Warf_runs:
            Current_input_dir = './' + NC_Period + '/' + GCM + '/' + Warf_run + '/' + NC_Domain + '/'
            print Current_input_dir
            Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type + '.nc'
            Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn)
                    #print Climvar_NCs[1]
            #Climvar_NCs = Climvar_NCs[0:2]
            #print(Climvar_NCs)
            for netcdf in Climvar_NCs:
                f=Dataset(netcdf)
                        # This section print on the screen information contained in the headings of the file
                        #print '---------------------------------------------------------'
                        #print f.ncattrs()
                        #print f.title
                        #print f.variables
                        #print
                        #for varname in f.variables:
                        #	print varname,' -> ',np.shape(f.variables[varname])
                        #print '---------------------------------------------------------'
                        # Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file
                        dist_x=np.abs(f.variables['lon'][:,:]-float(mylon))
                        dist_y=np.abs(f.variables['lat'][:,:]-float(mylat))
                        dist=dist_x + dist_y
                        latindex=np.where(dist_y==np.min(dist_y))
                        lonindex=np.where(dist_x==np.min(dist_x))
                        index=np.where(dist==np.min(dist))
                        print '---------------------------------------------------------'
                        print  netcdf
                        print 'Information on the nearest point'
                        print 'Your desired lat,lon = ',mylat,mylon
                        print 'The nearest lat,lon = ', f.variables['lat'][latindex[0],latindex[1]], f.variables['lon'][lonindex[0],lonindex[1]]
                        #print 'The index of the nearest lat,lon (x,y) = ',index[0], index[1]
                        #Here we constract a pandas data frame, having the "time"/day as an index and a numer of variables (i.e. Clim_var_type, pracc, as columns)
                        d={}
                        #d["time"] = f.variables['time'][:]
                        d[ GCM +'_'+ Warf_run +'_'+ Period_short] = f.variables[Clim_var_type][:, int(index[0]), int(index[1])]
                        #if GCM == 'NNRP' and Warf_run == 'R1':
                        #    d['Period']= NC_Period
                        timestamp = f.variables['time'][:]
                        timestamp_dates = pd.to_datetime(timestamp, unit='h', origin=pd.Timestamp('1949-12-01'))
                        df1=pd.DataFrame(d, index=timestamp_dates)
                        f.close()
                        print 'closing  '+ os.path.basename(os.path.normpath(netcdf)) + '  moving to next netcdf file'
                        #print f
                        print '---------------------------------------------------------'
                        #append in time direction each new time series to the data frame
                        MultiNC_df = MultiNC_df.append(df1)
                    #append in columns direction individual GCM-RCM-123 run time series (along x axis) 
                    MultiNC_df = MultiNC_df.sort_index(axis=0, ascending=True)
                    R13_df = pd.concat([R13_df, MultiNC_df], axis=1)
                    MultiNC_df =pd.DataFrame()
                #append blocks of R1 R2 and R3 in x axis direction 
                R13_df = R13_df.sort_index(axis=0, ascending=True)
                GCM_df = pd.concat([GCM_df, R13_df], axis=1)
                R13_df = pd.DataFrame()
            #append time periods in x axis direction (change axis=1 to =0 if periods for same model should be added to same model R123 column)
            GCM_df = GCM_df.sort_index(axis=0, ascending=True)
            Full_df = pd.concat([Full_df, GCM_df], axis=1)
            GCM_df = pd.DataFrame()
    Full_df = Full_df.sort_index(axis=0, ascending=True)
    #adding a column with the NARCLIM decade
    Full_df.loc[(Full_df.index > '1950-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009'
    Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009'
    Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2040-01-01'), 'period']= '2020-2039'
    Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2080-01-01'), 'period']= '2060-2079'
 #
 if Bias_Correction_BOOL == 'True':
    os.chdir('/srv/ccrc/data30/z3393020/NARCliM/Bias_corrected/')
    #set directory path for output files
    output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out/'+ Location + '_' +  lat_equal_len_string + '_' + lon_equal_len_string + '/Bias_corrected/'
    #output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted'
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
        print("output directory folder didn't exist and was generated here:")
        print(output_directory)
    #time.sleep(10) 
    #set up the loop variables for interrogating the entire NARCLIM raw data
    GCMs = ('CCCMA3.1','CSIRO-MK3.0','ECHAM5', 'MIROC3.2', 'NNRP')
        #set up the loop variables for interrogating the entire NARCLIM raw data
    #
    #Define empty pandas data frames 
    Full_df = pd.DataFrame()
    GCM_df = pd.DataFrame()
    R13_df = pd.DataFrame()
    MultiNC_df = pd.DataFrame()
    #
    #Loop through models and construct CSV per site
    for GCM in GCMs:
        print GCM
        Warf_runs = os.listdir('./' + GCM + '/')
        for Warf_run in Warf_runs:
            NC_Periods = os.listdir('./' + GCM + '/' + Warf_run + '/')
            for NC_Period in NC_Periods:
                Period_short = NC_Period[:4]
                Current_input_dir = './' + GCM + '/' + Warf_run + '/' + NC_Period + '/' + NC_Domain + '/'
                print Current_input_dir
                Clim_var_type = Clim_var_type + '_bc.nc'
                Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type 
                Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn)
                print Climvar_NCs[1]
                print Climvar_NCs[2]
                for netcdf in Climvar_NCs:
                    #netcdf = '/srv/ccrc/data31/z3393020/NARCliM/Bias_corrected/' + netcdf[2:]
                    #print netcdf
                    f=Dataset(netcdf)
                    # This section print on the screen information contained in the headings of the file
                    print '---------------------------------------------------------'
                    print f.ncattrs()
                    print f.title
                    print f.variables
                    print
                    for varname in f.variables:
                    	print varname,' -> ',np.shape(f.variables[varname])
                    print '---------------------------------------------------------'
                # Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file
                dist_x=np.abs(f.variables['lon'][:,:]-float(mylon))
                dist_y=np.abs(f.variables['lat'][:,:]-float(mylat))
@ -212,7 +95,6 @@ if Bias_Correction_BOOL == 'True':
                df1=pd.DataFrame(d, index=timestamp_dates)
                f.close()
                print 'closing  '+ os.path.basename(os.path.normpath(netcdf)) + '  moving to next netcdf file'
                    #print f
                print '---------------------------------------------------------'
                #append in time direction each new time series to the data frame
                MultiNC_df = MultiNC_df.append(df1)
@ -228,8 +110,12 @@ if Bias_Correction_BOOL == 'True':
    GCM_df = GCM_df.sort_index(axis=0, ascending=True)
    Full_df = pd.concat([Full_df, GCM_df], axis=1)
    GCM_df = pd.DataFrame()
-    Full_df = Full_df.sort_index(axis=0, ascending=True)
+Full_df = Full_df.sort_index(axis=0, ascending=True)
 #adding a column with the NARCLIM decade
 Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009'
 Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039'
 Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079'
 #export the pandas data frame as a CSV file within the output directory
-out_file_name = Clim_var_type + '_'+  Data_Type[2:] + '_' + Location + '_' + lat_equal_len_string + '_' + lon_equal_len_string + '_NARCliM_summary.csv'
+out_file_name = Clim_var_type + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '_NARCliM_summary.csv'
 out_path = output_directory +'/' + out_file_name
 Full_df.to_csv(out_path)
--- a/Analysis/Code/P1_NARCliM_NC_to_CSV_CCRC_SS_BASH_script_readme.txt
+++ b/Analysis/Code/P1_NARCliM_NC_to_CSV_CCRC_SS_BASH_script_readme.txt
@ -1,90 +0,0 @@
 Variables available from NARCLIM (output):
 'evspsblmean'   	water_evaporation flux (actual ET) long_name: Surface evaporation standard_name: water_evaporation_flux units: kg m-2 s-1
 'potevpmean'    	potential ET water_potential_evaporation_flux kg m-2 s-1
 'tasmean’               mean near surface temperature
 ‘tasmax’		maximum near surface temperature
 ‘pracc’ 		precipitation daily precipitation sum (sum of convective prcacc and stratiform prncacc precip) 
 ‘pr1Hmaxtstep’     	maximum 1 hour interval rainfall in a one day period
 'pr1Hmaxtstep' 		Max. 1-hour time-window moving averaged precipitation rate units: kg m-2 s-1 maximum 1-hour time-window moving averaged values from point values 60.0 second
 'pr30maxtstep'		Max. 30min time-window moving averaged precipitation rate units: kg m-2 s-1 maximum 1-hour time-window moving averaged values from point values 60.0 second
 'wss1Hmaxtstep'		Max. 1-hour time-window moving averaged surface wind speed units: m s-1 maximum 1-hour time-window moving averaged values from point values 60.0 second
 'wssmax' 		Surface wind speed standard_name: air_velocity units: m s-1 height: 10 m
 'wssmean'		Surface wind speed standard_name: air_velocity units: m s-1
 'sstmean' 		Daily mean sea surface temperature
 Sites:
 Northern NSW:
 Tweed River: -28.17, 153.56
 Cudgera Creek: -28.36, 153.58
 Belongil Creek: -28.63, 153.59
 Central NSW:
 Port Stevens: -32.71, 152.20
 Terrigal Lagoon: -33.4, 151.44
 Hunter River: -32.91, 151.80
 Hunter River near Mahmods site: -32.843, 151.706
 Southern NSW:
 Batemans Bay:  -35.76, 150.25
 Towamba River: -37.1, 149.91
 Nadgee Lake: -37.47, 149.97
 Code Input Variables:
 Datatype: Choose 'T_NNRP' for reanalysis or 'T_GCMS' for GCM forcing data
 BiasBool: Choose 'True' for bias corrected data, 'False' for normal model outputs
 Execution of code in bash-Code for netcdf interrogation:
 1st step: log into storm servers: Putty: hurricane.ccrc.unsw.edu.au or typhoon.ccrc.unsw.edu.au or cyclone.ccrc.unsw.edu.au + UNSW credentials (zID)
 In BASH copy and enter:
 module load python 
 latitude=-32.91
 longitude=151.80
 name='HunterRiver'
 Datatype='T_NNRP'
 Biasboolean='False'
 python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pracc' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
 python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'tasmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
 python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'tasmax' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
 python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pr1Hmaxtstep' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
 python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'wssmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
 python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pracc' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
 python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'wss1Hmaxtstep' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
 python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'evspsblmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
 python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'potevpmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean
 #1 The above code extracts time series from the full model ensemble over a single model grid cell (based on lat lon input) for the above variables of interest and stores into CSV files.
 Example of output name = evspsblmean_35.76_150.25_NARCliM_summary.csv
 #2 The "P1_NARCliM_plots_Windows" code takes these CSV files as input and creates a) a pdf wiht a number of time series and box plots and b) another CSV file containing the Deltas between present day, near and far future
 for each model in the ensemble. Output: C:\Users\z5025317\WRL_Postdoc\Projects\Paper#1\Output\Nadgee\Nadgee_tasmax_NARCliM_ensemble_changes.csv
 #3 The "P1_NARCliM_First_Pass_variab_deviation_plots" code takes those Delta CSV files as input and generates the future climate deviation plots that were originally developed by Duncan Rayner. 
 Run the code with different constellations of Estuary (study sites) and climate variables: 
 e.g. Clim_var_type  = "tasmax*"   #  '*' will create pdf for all variables in folder  
 Present_Day_Clim_Var = 'MaxT'    #MaxT, MinT, Rainfall, (the name for present day clim var refers to the name of the observed climate data that's used for the baseline period variability. 
 #!!!# Important!
 #For present day variability data, only rainfall and temperature data actually correspond to the study sites. ET and Wind are taken from the existing project folder and hence, are for a Hunter weather station
 #e.g. Terrigal_Wind and Terrigal_ET are actually Hunter in reality. This is because we don't have ET for other sites than Hunter at this stage. 
 ##PROBLEM: Without changing anything, the P1_NARCliM_NC_to_CSV_CCRC_SS.py stopped working properly on the CCRC storm servers. It's not giving an error but loading the nc files with Dataset(nc) just takes unlimited time.
 It used to take only a few seconds. NOT solved yet as of 7th of May 2018.### This was solved for the /postprocessed folder at the end of May 2018 but the problem persits with the /bias_corrected/ data folder. 
 running a simple netcdf info script
 python /srv/ccrc/data02/z5025317/Code_execution/P1_Basic_NETCDF_Interrogation.py
 deactivate –
 conda env create --name EEenv -- file ‘C:\Users\z5025317\WRL_Postdoc\Software\EE\ee-jupyter-examples-master\kilian_env’
--- a/Analysis/Code/P1_NARCliM_plots_Windows.py
+++ b/Analysis/Code/P1_NARCliM_plots_Windows.py
@ -1,13 +1,12 @@
 # -*- coding: utf-8 -*-
-#==========================================================#
+#####################################----------------------------------
-#Last Updated - June 2018
+#Last Updated - March 2018
 #@author: z5025317 Valentin Heimhuber
 #code for creating climate prioritization plots for NARCLIM variables. 
 #Inputs: Uses CSV files that contain all 12 NARCLIM model runs time series for 1 grid cell created with: P1_NARCliM_NC_to_CSV_CCRC_SS.py
-
+#####################################----------------------------------
 #==========================================================#
 #Load packages
-#==========================================================#
+#####################################----------------------------------
 import numpy as np
 import os
 import pandas as pd
@ -17,486 +16,183 @@ import matplotlib.pyplot as plt
 from datetime import datetime
 from datetime import timedelta
 from matplotlib.backends.backend_pdf import PdfPages
 from ggplot import *
 matplotlib.style.use('ggplot')
-
+#
 # import own modules
 # Set working direcotry (where postprocessed NARClIM data is located)
-os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Analysis/Code')
+os.chdir('C:/Users/z5025317/WRL_Postdoc/Projects/Paper#1/NARCLIM/')
-import climdata_fcts as fct
+#
-import silo as sil
+#
-
+#####################################----------------------------------
 ALPHA_figs = 0 
 font = {'family' : 'sans-serif',
        'weight' : 'normal',
        'size'   : 14}
 matplotlib.rc('font', **font)
 #==========================================================#
 # Set working direcotry (where postprocessed NARClIM data is located)
 os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/')
 #==========================================================#
 #==========================================================#
 #set input parameters
-Case_Study_Name = 'CASESTUDY2'
+Base_period_start = '1990-01-01'
-Version = 'V5' #V4 is latest for all model runs #V5 is just for the Hunter catchment climatology
+Base_period_end = '2080-01-01' #use last day that's not included in period as < is used for subsetting
-
+#Clim_var_type  = 'tasmean' will create pdf for all variables in folder           
-Estuaries = ['HUNTER', 'RICHMOND', 'NADGEE', 'SHOALHAVEN', 'GEORGES','CATHIE']
+#####################################----------------------------------
-Estuaries = ['HUNTER'] #,'HUNTER','RICHMOND']
+#set directory path for output files
-#Estuaries = ['SHOALHAVEN']
+output_directory = 'Output'
-for Est in Estuaries:
+#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
-    #Estuary = 'HUNTER' # 'Belongil'
+if not os.path.exists(output_directory):
    Estuary = Est # 'Belongil'
    print Estuary
    #Clim_var_type  = 'potevpmean'    #  '*' will create pdf for all variables in folder  "pracc*|tasmax*"   
    Clim_var_types  = ['evspsblmean']
    for climvar in Clim_var_types:
        Clim_var_type  = climvar 
        #==========================================================#
        #set input preferences
        #==========================================================#
        plot_pdf = 'no'
        plot_pngs = 'no'
        delta_csv = 'yes'
        Stats = 'mean'   # 'maxdaily', 'mean', 'days_h_25'
        DoMonthly = True
        Perc_Vs_Abs = 'percent' #'percent' vs. 'absolute' deltas for near and far future
        Location = 'Catchment'
        #==========================================================# 
        #==========================================================#
        #set directory path for output files
        output_directory = 'Output/' + Case_Study_Name + '_' + Version + '/' + Estuary
        #output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
        if not os.path.exists(output_directory):
    os.makedirs(output_directory)
            print('-------------------------------------------')
            print("output directory folder didn't exist and was generated")
            print('-------------------------------------------')
        #set directory path for individual png files
        png_output_directory = 'Output/' + Case_Study_Name +  '_' + Version + '/' + Estuary + '/NARCLIM_Key_Figs'
        #output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
        if not os.path.exists(png_output_directory):
            os.makedirs(png_output_directory)
            print('-------------------------------------------')
    print("output directory folder didn't exist and was generated")
-            print('-------------------------------------------')
+Clim_Var_CSVs = glob.glob('./Site_CSVs/*')
-        #==========================================================#    
+#Clim_Var_CSV = glob.glob('./Site_CSVs/' + Clim_var_type + '*' )
-                
+#read CSV file
-        #==========================================================#
+for clim_var_csv_path in Clim_Var_CSVs:
        Estuary_Folder = glob.glob('./Data/NARCLIM_Site_CSVs/'+ Case_Study_Name + '/' + Estuary + '*' )
        if Location == 'Catchment':
            Estuary_Folder = glob.glob('./Data/NARCLIM_Site_CSVs/'+ Case_Study_Name + '/' + Estuary + '*catch' )
        Clim_Var_CSVs = glob.glob(Estuary_Folder[0] + '/' + Clim_var_type + '*')
        Coordinates_foldername = os.path.basename(os.path.normpath(Estuary_Folder[0])) 
        Coordinates = Coordinates_foldername.split('_', 3)[1] + '_' +Coordinates_foldername.split('_', 3)[2]
        #==========================================================#        
        #==========================================================#
        #read CSV files and start analysis
        #==========================================================#
        #for clim_var_csv_path in Clim_Var_CSVs:
        clim_var_csv_path = Clim_Var_CSVs[0]
    Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
    Clim_var_type = Filename.split('_', 1)[0]
    print(clim_var_csv_path)
    Full_df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
    #pandas datestamp index to period (we don't need the 12 pm info in the index (daily periods are enough))
    Full_df.index = Full_df.index.to_period('D')
        Full_df = Full_df.drop(columns=['period'])
        Ncols_df = len(Full_df)
    #check data types of columns
    #Full_df.dtypes
        #==========================================================#
    #substract a constant from all values (temp)
    if Clim_var_type == 'tasmean':
        Full_df = Full_df.iloc[:,0:26]-273.15
-        #==========================================================#
+    #index the narclim periods if needed
-        #substract a constant from all values to convert from kelvin to celcius (temp)
+    #Full_df.loc[(Full_df.index >= '1990-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009'
-        if Clim_var_type in ['tasmean','tasmax','sstmean']:
+    #Full_df.loc[(Full_df.index >= '2020-01-01') & (Full_df.index < '2040-01-01'), 'period']= '2020-2039'
-            Full_df = Full_df.iloc[:,0:(Ncols_df-1)]-273.15
+    #Full_df.loc[(Full_df.index >= '2060-01-01') & (Full_df.index < '2080-01-01'), 'period']= '2060-2079'
        if Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean':
            Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60*24
        if Clim_var_type in ['pr30maxtstep','pr10maxtstep','pr20maxtstep']:
            Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*30 #int(Clim_var_type[2:4])
        if Clim_var_type in ['pr1Hmaxtstep']:
            Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60           
        if Clim_var_type in ['rsdsmean','rldsmean']:
            Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60*24/1000000  #convert to unit used in SILO
        Fdf_1900_2080 = Full_df
        #==========================================================#
-        
+    #Subset the data to the minimum base period and above (used to set the lenght of the present day climate period)
-        #==========================================================#
+    Fdf_1900_2080 = Full_df.loc[(Full_df.index >= Base_period_start) & (Full_df.index < Base_period_end)]
    #Aggregate daily df to annual time series
-        if Clim_var_type in ['pracc' ,'evspsblmean' ,'potevpmean' ,'pr1Hmaxtstep' ,
+    if (Clim_var_type == 'pracc' or Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean' 
-                             'wss1Hmaxtstep', 'rsdsmean', 'rldsmean']:
+    or Clim_var_type == 'pr1Hmaxtstep' or Clim_var_type == 'wss1Hmaxtstep'):
            if(Stats == 'maxdaily'):
                Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').max()
                Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
                Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').max() #seasonal means
                Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
                Fdf_Monthly_means = Fdf_1900_2080.resample('M').max() #seasonal means
                Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
            elif(Stats[:4] =='days'):
                Threshold = int(Stats[-2:])
                #agg = ('abobe_27_count', lambda x: x.gt(27).sum()), ('average', 'mean')
                agg = ('>'+ str(Threshold) + '_count', lambda x: x.gt(Threshold).sum()),
                Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').agg(agg)                
                #Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
                Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').agg(agg)  #seasonal means
                #Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
                Fdf_Monthly_means = Fdf_1900_2080.resample('M').agg(agg)
            else:
        Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').sum()
        Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
        Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').sum()
        Fdf_1900_2080_monthly = Fdf_1900_2080_monthly.replace(0, np.nan)
        Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').sum()
        Fdf_1900_2080_weekly = Fdf_1900_2080_weekly.replace(0, np.nan)
        Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').sum() #seasonal means
        Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
                Fdf_Monthly_means = Fdf_1900_2080.resample('M').sum() #Monthlyonal means
                Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
        else:  
            if(Stats == 'maxdaily'):
                Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').max()
                Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
                Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').max() #seasonal means
                Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
                Fdf_Monthly_means = Fdf_1900_2080.resample('M').max() #Monthlyonal means
                Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
            elif(Stats == 'maxmonthly'):
                Fdf_1900_2080_annual = Fdf_1900_2080.resample('M').max().resample('A').mean()
                Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
                Fdf_Seas_means = Fdf_1900_2080.resample('M').max().resample('Q-NOV').mean() #seasonal means
                Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
                Fdf_Monthly_means = Fdf_1900_2080.resample('M').max().resample('Q-NOV').mean() #Monthlyonal means
                Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
            elif(Stats[:4] =='days'):
                Threshold = int(Stats[-2:])
                #agg = ('abobe_27_count', lambda x: x.gt(27).sum()), ('average', 'mean')
                agg = ('>'+ str(Threshold) + '_count', lambda x: x.gt(Threshold).sum()),
                Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').agg(agg)                
                #Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
                Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').agg(agg)  #seasonal means
                #Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
                Fdf_Monthly_means = Fdf_1900_2080.resample('M').agg(agg)
    else:    
        Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').mean()
        Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').mean()
        Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').mean()
        Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').mean() #seasonal means
-                Fdf_Monthly_means = Fdf_1900_2080.resample('M').mean() #seasonal means
+    #plot the mean of all model runs
    print('-------------------------------------------')
    print('mean of all models for climate variable:  ' + Clim_var_type)
    Fdf_1900_2080_means = Fdf_1900_2080.mean()
-        #==========================================================#
+    Fdf_1900_2080_means.plot(kind='bar', ylim=(16,22)).figure
-        
+    print('-------------------------------------------')
        #==========================================================#
        #Select the 3 most representative models (min med and max difference betwen far future and present)
        dfall, dfmin, dfmax, dfmed, Min_dif_mod_name, Med_dif_mod_name, Max_dif_mod_name  = fct.select_min_med_max_dif_model(Fdf_1900_2080)
        #==========================================================#
        #==========================================================#
        #create a dataframe that has 1 column for each of the three representative models
        dfa = Fdf_1900_2080_annual.iloc[:,[0]]
        dfa1 = Fdf_1900_2080_annual.iloc[:,[0,3,6]].loc[(Fdf_1900_2080_annual.index >= '1990') & (Fdf_1900_2080_annual.index <= '2009')]
        dfa1.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
        dfa2 = Fdf_1900_2080_annual.iloc[:,[1,4,7]].loc[(Fdf_1900_2080_annual.index >= '2020') & (Fdf_1900_2080_annual.index <= '2039')]
        dfa2.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
        dfa3 = Fdf_1900_2080_annual.iloc[:,[2,5,8]].loc[(Fdf_1900_2080_annual.index >= '2060') & (Fdf_1900_2080_annual.index <= '2079')]
        dfa3.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
        dfall_annual = dfa1.append(dfa2).append(dfa3)
        #==========================================================#
-        #==========================================================#
+    #time series plots:
-        #Create Deltas of average change for annual and seasonal basis
+    #ranks = Fdf_1900_2080_means[3:].rank(axis=0)
-        #==========================================================#
+    df3 = Fdf_1900_2080_annual
-        delta_all_df = fct.calculate_deltas_NF_FF2(Fdf_1900_2080_annual, Fdf_Seas_means, Stats, Perc_Vs_Abs)
+    Fdf_annual_sorted = df3.reindex_axis(df3.mean().sort_values().index, axis=1)
-        if DoMonthly ==True:
+    Fdf_annual_sorted_subset = Fdf_annual_sorted.iloc[:,[0,1,2,3,5,7,13,15,18, 22, 24,25]]
            delta_all_df_monthly = fct.calculate_deltas_monthly(Fdf_Monthly_means, Stats, Perc_Vs_Abs)  
            delta_all_df_monthly = pd.concat([delta_all_df_monthly.filter(regex= 'near'),delta_all_df_monthly.filter(regex= 'far')], axis=1)[-3:]  
-        #==========================================================#
+    #write the key plots to a single pdf document
-        if delta_csv == 'yes':
+    pdf_out_file_name = Clim_var_type + '_start_' + Base_period_start + '_NARCliM_summary4.pdf'
-            out_file_name = Estuary  + '_'  + Clim_var_type + '_' + Stats + '_NARCliM_ensemble_changes' + Perc_Vs_Abs +  '_' + Location + '_' + Coordinates + '.csv'
+    pdf_out_path = output_directory +'/' + pdf_out_file_name
-            out_path = output_directory + '/' + out_file_name
+    #open pdf and add the plots
            delta_all_df.to_csv(out_path)
        if delta_csv == 'yes' and DoMonthly ==True:
            out_file_name = Estuary  + '_' + Clim_var_type + '_' + Stats + '_NARCliM_ensemble_changes_monthly' + Perc_Vs_Abs + '_' + Location + '_' + Coordinates + '.csv'
            out_path = output_directory + '/NARCLIM_Changes_Hunter/' + out_file_name
            delta_all_df_monthly.to_csv(out_path)
        #==========================================================#
-        #==========================================================#
+    #developement
-        #create a dataframe that has a single column for present day, near and far future for the (3 selected models)
+    #I want to create a simple density plot for all values in present day, near future and far future
-        Full_current_df = Fdf_1900_2080.iloc[:,range(0,3)]
+    #this should be an easy indicator of change
    #subset present day simulations
    Full_current_df = Fdf_1900_2080.iloc[:,[0,1,2]]
    Full_current_df = Full_current_df.stack()
    #nearfuture
-        Full_nearfuture_df = Fdf_1900_2080.iloc[:,range(3,6)]
+    Full_nearfuture_df = Fdf_1900_2080.iloc[:,range(3,15)]
    Full_nearfuture_df = Full_nearfuture_df.stack()
    #farfuture
-        Full_farfuture_df = Fdf_1900_2080.iloc[:,range(6,len(Fdf_1900_2080.columns))]
+    Full_farfuture_df = Fdf_1900_2080.iloc[:,range(15,27)]
    Full_farfuture_df = Full_farfuture_df.stack()
    Summarized_df = pd.concat([Full_current_df, Full_nearfuture_df], axis=1, ignore_index=True)
    Summarized_df = pd.concat([Summarized_df, Full_farfuture_df], axis=1, ignore_index=True)
-        Summarized_df.columns = ['present', 'near', 'far']
+    Summarized_df.columns = ['presentday', 'nearfuture', 'farfuture']
-        #==========================================================#
+    #end of developement
        #==========================================================#
        #generate colour schemes for plotting
        #==========================================================#
        plotcolours36 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal',
                 'darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal',
                 'darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal']
        plotcolours36b = ['tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  ,
                 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  ,
                 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple' ]
        plotcolours12 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal']
        plotcolours15 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal', 'lightgreen','lightpink','slateblue']
        color_dict = {'CCCMA3.1_R1': 'darkolivegreen', 'CCCMA3.1_R2': 'turquoise', 'CCCMA3.1_R3': 'lightgreen', 'CSIRO-MK3.0_R2': 'darkgreen',
                  'CSIRO-MK3.0_R1':'lightpink', 'CSIRO-MK3.0_R3':'slateblue','ECHAM5_R1':'slategray', 'ECHAM5_R2': 'orange', 'ECHAM5_R3':  'tomato',
                  'MIROC3.2_R1': 'peru', 'MIROC3.2_R2': 'navy' ,'MIROC3.2_R3': 'teal', '10th':'lightgreen','median':'lightpink','90th':'slateblue'}
        #==========================================================#
        #==========================================================#
        #output crucial summary plots into individual png files
        #==========================================================#
        if plot_pngs == 'yes':
            #=========#
            #Barplot of near and far future deltas
            #=========#
            #out name
            png_out_file_name = Clim_var_type + '_' + Stats + '_Deltas_Barplot_' + Version + '.png'
            png_out_path = png_output_directory + '/' + png_out_file_name
            #prepare data frame for plot
            neardeltadf=delta_all_df['near']
            ymin = min(neardeltadf) + 0.1 *min(neardeltadf)
            ymax = max(neardeltadf) + 0.1 * max(neardeltadf)
            neardeltadf=delta_all_df['far']
            ymin2 = min(neardeltadf) + 0.1 *min(neardeltadf)
            ymax2 = max(neardeltadf) + 0.1 * max(neardeltadf)
            ymin = min(ymin, ymin2)
            if (Clim_var_type == 'tasmax' or Clim_var_type == 'tasmean'):
                ymin = 0
            ymax = max(ymax, ymax2)
            #
            fig = plt.figure(figsize=(5,6))
            ax=plt.subplot(2,1,1) 
            plt.title(Clim_var_type + ' - ' + Stats + ' - deltas - near')
            neardeltadf=delta_all_df['near']
            neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)  
            plt.xticks([])
            ax=plt.subplot(2,1,2) 
            plt.title(Clim_var_type + ' - ' + Stats + ' - deltas - far')
            neardeltadf=delta_all_df['far']
            neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)
            ax.xaxis.grid(False)
            #ax.patch.set_alpha(ALPHA_figs)
            fig.patch.set_alpha(ALPHA_figs)
            fig.tight_layout()
            fig.savefig(png_out_path)
            plt.close()
            #=========#
            #=========#
            #full period density comparison
            #=========#
            #out name
            png_out_file_name = Clim_var_type + '_' + Stats + '_MaxDeltaMod_Histogram_' + Version + '.png'
            png_out_path = png_output_directory + '/' + png_out_file_name
            plt.title(Clim_var_type + ' - ' + Stats + ' -  hist - full period - max delta model')
            #prepare data
            xmin = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) -  4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
            xmax = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) +  4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
            fig = plt.figure(figsize=(5,5))
            ax=plt.subplot(2,1,1) 
            Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]).plot.kde(xlim=(xmin,xmax), ax=ax)
            plt.legend(loc=9, bbox_to_anchor=(0.5, -0.3))
            #ax.xaxis.grid(False)
            ax.yaxis.grid(False)
            fig.patch.set_alpha(ALPHA_figs)
            fig.tight_layout()
            fig.savefig(png_out_path)
            plt.close()
            #=========#
            #=========#
            # time series plot annual ALL models
            #=========#
            png_out_file_name = Clim_var_type + '_' + Stats + '_TimeSeries_AllMods_' + Version + '3.png'
            png_out_path = png_output_directory + '/' + png_out_file_name
            plt.title(Clim_var_type + ' - ' + Stats + ' -  Time series - representative models')
            #prepare data
 #            Mod_order = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,19,20,21,16,17,18,22,23,24,31,32,33,25,26,27,28,29,30,34,35,36]
 #            test = Fdf_1900_2080_annual
 #            Mod_Names = test.columns
 #            New_Mod_Name = []
 #            for i in range(0,len(Mod_Names)):
 #                if(Stats[:4] =='days'):
 #                    New_Mod_Name.append(str(Mod_order[i]+10) + '_' +  Mod_Names[i][0])
 #                else:
 #                    New_Mod_Name.append(str(Mod_order[i]+10) + '_' +  Mod_Names[i])
 #            test.columns = New_Mod_Name
 #            test_sorted = test.reindex_axis(sorted(test.columns), axis=1)
 #            colnamest = test.columns
 #            test_sorted.columns = [w[3:-5] for w in colnamest]
            #plot
            fig = plt.figure(figsize=(8,7))
            ax=plt.subplot(2,1,1)
            #test_sorted.plot(ax=ax) #,color =  plotcolours36)
            Fdf_1900_2080_annual_b = Fdf_1900_2080_annual
            New_Mod_Name = []
            if(Stats[:4] =='days'):
                for each in Fdf_1900_2080_annual_b.columns:
                    New_Mod_Name.append(each[0][:-5])
            else:
                for each in Fdf_1900_2080_annual_b.columns:
                    New_Mod_Name.append(each[:-5])
            Fdf_1900_2080_annual_b.columns = New_Mod_Name
            Fdf_1900_2080_annual_b.plot(ax=ax, color=[color_dict[x] for x in Fdf_1900_2080_annual_b.columns])
            plt.legend(loc=9, bbox_to_anchor=(0.5, -0.2))
            ax.xaxis.grid(False)
            fig.patch.set_alpha(ALPHA_figs)
            fig.tight_layout()
            fig.savefig(png_out_path)
            plt.close()
            #=========#
        #==========================================================#
        #output some summary plot into pdf
        #==========================================================#
        if plot_pdf == 'yes':
            #plt.cm.Paired(np.arange(len(Fdf_1900_2080_means)))
            #write the key plots to a single pdf document
            pdf_out_file_name = Clim_var_type + '_' + Stats + '_NARCliM_summary_' + Version + '.pdf'
            pdf_out_path = output_directory +'/' + pdf_out_file_name
            #open pdf and add the plots
    with PdfPages(pdf_out_path) as pdf:
        #barplot of model means
        plt.title(Clim_var_type + ' -  model means - full period')
        ymin = min(Fdf_1900_2080_means) 
-                ymax = max(Fdf_1900_2080_means) + 0.008 *min(Fdf_1900_2080_means)
+        ymax = max(Fdf_1900_2080_means) 
-                Fdf_1900_2080_means.plot(kind='bar', ylim=(ymin,ymax), color=plotcolours36)
+        Fdf_1900_2080_means.plot(kind='bar', ylim=(ymin,ymax))
        Fdf_1900_2080_means.plot(kind='bar')
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
                #
                neardeltadf=delta_all_df['near']
                ymin = min(neardeltadf) + 0.1 *min(neardeltadf)
                ymax = max(neardeltadf) + 0.1 * max(neardeltadf)
                neardeltadf=delta_all_df['far']
                ymin2 = min(neardeltadf) + 0.1 *min(neardeltadf)
                ymax2 = max(neardeltadf) + 0.1 * max(neardeltadf)
                ymin = min(ymin, ymin2)
                if (Clim_var_type == 'tasmax' or Clim_var_type == 'tasmean'):
                    ymin = 0
                ymax = max(ymax, ymax2)
                #
                # delta barplot for report 1#################################
                ax=plt.subplot(2,1,1) 
                plt.title(Clim_var_type + ' -  model deltas - near-present')
                neardeltadf=delta_all_df['near']
                neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)  
                ax.patch.set_alpha(ALPHA_figs)
                plt.xticks([])
                #ax.xaxis.set_ticklabels([])
                #pdf.savefig(bbox_inches='tight', ylim=(ymin,ymax), pad_inches=0.4)
                #plt.close()
                #
                ax=plt.subplot(2,1,2) 
                plt.title(Clim_var_type + ' -  model deltas - far-present')
                neardeltadf=delta_all_df['far']
                fig = neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)
                ax.xaxis.grid(False)
                ax.patch.set_alpha(ALPHA_figs)
                #fig.patch.set_alpha(ALPHA_figs)
                #plt.show()
                pdf.savefig(bbox_inches='tight', ylim=(ymin,ymax), pad_inches=0.4)
                plt.close()
                # end delta barplot for report 1#################################
                #
        #full period density comparison
-                plt.title(Clim_var_type + ' -  density comparison - full period - all models')
+        plt.title(Clim_var_type + ' -  density comparison - full period')
        Summarized_df.plot.kde()
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
                #full period density comparison
                plt.title(Clim_var_type + ' -  density comparison - full period - max delta model')
                xmin = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) -  4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
                xmax = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) +  4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
                Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]).plot.kde(xlim=(xmin,xmax)) 
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                fig.patch.set_alpha(ALPHA_figs)
                plt.close()
        #annual box
-                plt.title(Clim_var_type + ' -  Annual means/sums for max diff model')
+        plt.title(Clim_var_type + ' -  Annual means')
        Fdf_1900_2080_annual.boxplot(rot=90)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
-                #
+        #monthly box
        plt.title(Clim_var_type + ' -  Monthly means')
        Fdf_1900_2080_monthly.boxplot(rot=90)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        #weekly box
        plt.title(Clim_var_type + ' -  Weekly means')
        Fdf_1900_2080_weekly.boxplot(rot=90)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        #daily box
-                plt.title(Clim_var_type + ' -  Daily means/sums')
+        plt.title(Clim_var_type + ' -  Daily means')
        Fdf_1900_2080.boxplot(rot=90)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        # time series plot annual ALL models
        plt.title(Clim_var_type + ' -  Time series - all models')
-                Mod_order = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,19,20,21,16,17,18,22,23,24,31,32,33,25,26,27,28,29,30,34,35,36]
+        Fdf_1900_2080_annual.plot(legend=False)
                test = Fdf_1900_2080_annual
                Mod_Names = test.columns
                New_Mod_Name = []
                for i in range(0,len(Mod_Names)):
                    New_Mod_Name.append(str(Mod_order[i]+10) + '_' +  Mod_Names[i])
                test.columns = New_Mod_Name
                test_sorted = test.reindex_axis(sorted(test.columns), axis=1)
                colnamest = test.columns
                test_sorted.columns = [w[3:-5] for w in colnamest]
                fig = test_sorted.plot(legend=False, color =  plotcolours36)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                fig.patch.set_alpha(ALPHA_figs)
        plt.close()
-                # time series plot annual ALL models
+        #
        plt.title(Clim_var_type + ' -  Time series - representative models')
-                dfall_annual.plot(legend=False)
+        Fdf_annual_sorted_subset.plot(legend=False)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        # seasonal mean boxplots
        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) 
        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) 
-                plt.title(Clim_var_type + ' -  DJF Summer means/sums')
+        plt.title(Clim_var_type + ' -  DJF Summer means')
-                pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()).plot(kind='bar', ylim=(ymin,ymax))
+        Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean().plot(kind='bar', ylim=(ymin,ymax))
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()  
-                plt.title(Clim_var_type + ' -  DJF Summer means/sums')
+        plt.title(Clim_var_type + ' -  DJF Summer means')
        Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].boxplot(rot=90)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()) 
        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean())
-                plt.title(Clim_var_type + ' -  MAM Autumn means/sums')
+        plt.title(Clim_var_type + ' -  MAM Autumn means')
-                pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()).plot(kind='bar', ylim=(ymin,ymax))
+        Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean().plot(kind='bar', ylim=(ymin,ymax))
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close() 
-                plt.title(Clim_var_type + ' -  MAM Autumn means/sums')
+        plt.title(Clim_var_type + ' -  MAM Autumn means')
        Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].boxplot(rot=90)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()) 
        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean())
-                plt.title(Clim_var_type + ' -  JJA Winter means/sums')
+        plt.title(Clim_var_type + ' -  JJA Winter means')
-                pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()).plot(kind='bar', ylim=(ymin,ymax))
+        Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean().plot(kind='bar', ylim=(ymin,ymax))
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close() 
-                plt.title(Clim_var_type + ' -  JJA Winter means/sums')
+        plt.title(Clim_var_type + ' -  JJA Winter means')
        Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].boxplot(rot=90)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()) 
        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean())
-                plt.title(Clim_var_type + ' -  SON Spring means/sums')
+        plt.title(Clim_var_type + ' -  SON Spring means')
-                pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()).plot(kind='bar', ylim=(ymin,ymax))
+        Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean().plot(kind='bar', ylim=(ymin,ymax))
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close() 
-                plt.title(Clim_var_type + ' -  SON Spring means/sums')
+        plt.title(Clim_var_type + ' -  SON Spring means')
        Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].boxplot(rot=90)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
 #plots not used  
 #Fdf_annual_sorted_subset.plot(legend=False, subplots=True)
--- a/Analysis/Code/P1_NARCliM_plots_Windows_quantile_scaling_factors.py
+++ b/Analysis/Code/P1_NARCliM_plots_Windows_quantile_scaling_factors.py
@ -1,520 +0,0 @@
 # -*- coding: utf-8 -*-
 #==========================================================#
 #Last Updated - June 2018
 #@author: z5025317 Valentin Heimhuber
 #code for creating climate prioritization plots for NARCLIM variables. 
 #Inputs: Uses CSV files that contain all 12 NARCLIM model runs time series for 1 grid cell created with: P1_NARCliM_NC_to_CSV_CCRC_SS.py
 #==========================================================#
 #Load packages
 #==========================================================#
 import numpy as np
 import os
 import pandas as pd
 import glob
 import matplotlib
 import matplotlib.pyplot as plt
 from datetime import datetime
 from datetime import timedelta
 from matplotlib.backends.backend_pdf import PdfPages
 from ggplot import *
 matplotlib.style.use('ggplot')
 # import own modules
 # Set working direcotry (where postprocessed NARClIM data is located)
 os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Analysis/Code')
 import climdata_fcts as fct
 import silo as sil
 ALPHA_figs = 0 
 font = {'family' : 'sans-serif',
        'weight' : 'normal',
        'size'   : 14}
 matplotlib.rc('font', **font)
 #==========================================================#
 # Set working direcotry (where postprocessed NARClIM data is located)
 os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/')
 #==========================================================#
 #==========================================================#
 #set input parameters
 Case_Study_Name = 'CASESTUDY2'
 Version = 'V5' #V4 is latest for all model runs #V5 is just for the Hunter catchment climatology
 Estuaries = ['HUNTER', 'RICHMOND', 'NADGEE', 'SHOALHAVEN', 'GEORGES','CATHIE']
 Estuaries = ['HUNTER'] #,'HUNTER','RICHMOND']
 #Estuaries = ['SHOALHAVEN']
 for Est in Estuaries:
    #Estuary = 'HUNTER' # 'Belongil'
    Estuary = Est # 'Belongil'
    print Estuary
    #Clim_var_type  = 'potevpmean'    #  '*' will create pdf for all variables in folder  "pracc*|tasmax*"   
    Clim_var_types  = ['pracc']
    for climvar in Clim_var_types:
        Clim_var_type  = climvar 
        #==========================================================#
        #set input preferences
        #==========================================================#
        plot_pdf = 'no'
        plot_pngs = 'no'
        delta_csv = 'yes'
        Stats = 'mean'   # 'maxdaily', 'mean', 'days_h_25'
        DoMonthly = True
        Perc_Vs_Abs = 'percent' #'percent' vs. 'absolute' deltas for near and far future
        Location = 'Catchment'
        #==========================================================# 
        #==========================================================#
        #set directory path for output files
        output_directory = 'Output/' + Case_Study_Name + '_' + Version + '/' + Estuary
        #output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
        if not os.path.exists(output_directory):
            os.makedirs(output_directory)
            print('-------------------------------------------')
            print("output directory folder didn't exist and was generated")
            print('-------------------------------------------')
        #set directory path for individual png files
        png_output_directory = 'Output/' + Case_Study_Name +  '_' + Version + '/' + Estuary + '/NARCLIM_Key_Figs'
        #output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
        if not os.path.exists(png_output_directory):
            os.makedirs(png_output_directory)
            print('-------------------------------------------')
            print("output directory folder didn't exist and was generated")
            print('-------------------------------------------')
        #==========================================================#    
        #==========================================================#
        Estuary_Folder = glob.glob('./Data/NARCLIM_Site_CSVs/'+ Case_Study_Name + '/' + Estuary + '*' )
        if Location == 'Catchment':
            Estuary_Folder = glob.glob('./Data/NARCLIM_Site_CSVs/'+ Case_Study_Name + '/' + Estuary + '*catch' )
        Clim_Var_CSVs = glob.glob(Estuary_Folder[0] + '/' + Clim_var_type + '*')
        Coordinates_foldername = os.path.basename(os.path.normpath(Estuary_Folder[0])) 
        Coordinates = Coordinates_foldername.split('_', 3)[1] + '_' +Coordinates_foldername.split('_', 3)[2]
        #==========================================================#        
        #==========================================================#
        #read CSV files and start analysis
        #==========================================================#
        #for clim_var_csv_path in Clim_Var_CSVs:
        clim_var_csv_path = Clim_Var_CSVs[0]
        Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
        Clim_var_type = Filename.split('_', 1)[0]
        print(clim_var_csv_path)
        Full_df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
        #pandas datestamp index to period (we don't need the 12 pm info in the index (daily periods are enough))
        Full_df.index = Full_df.index.to_period('D')
        #Full_df = Full_df.drop(columns=['period']) #not part of all input csv files
        Ncols_df = len(Full_df)
        #check data types of columns
        #Full_df.dtypes
        #==========================================================#
        #==========================================================#
        #substract a constant from all values to convert from kelvin to celcius (temp)
        if Clim_var_type in ['tasmean','tasmax','sstmean']:
            Full_df = Full_df.iloc[:,0:(Ncols_df-1)]-273.15
        if Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean':
            Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60*24
        if Clim_var_type in ['pr30maxtstep','pr10maxtstep','pr20maxtstep']:
            Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*30 #int(Clim_var_type[2:4])
        if Clim_var_type in ['pr1Hmaxtstep']:
            Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60           
        if Clim_var_type in ['rsdsmean','rldsmean']:
            Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60*24/1000000  #convert to unit used in SILO
        Fdf_1900_2080 = Full_df
        #==========================================================#
        #==========================================================#
        #Aggregate daily df to annual time series
        if Clim_var_type in ['pracc' ,'evspsblmean' ,'potevpmean' ,'pr1Hmaxtstep' ,
                             'wss1Hmaxtstep', 'rsdsmean', 'rldsmean']:
            if(Stats == 'maxdaily'):
                Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').max()
                Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
                Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').max() #seasonal means
                Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
                Fdf_Monthly_means = Fdf_1900_2080.resample('M').max() #seasonal means
                Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
            elif(Stats[:4] =='days'):
                Threshold = int(Stats[-2:])
                #agg = ('abobe_27_count', lambda x: x.gt(27).sum()), ('average', 'mean')
                agg = ('>'+ str(Threshold) + '_count', lambda x: x.gt(Threshold).sum()),
                Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').agg(agg)                
                #Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
                Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').agg(agg)  #seasonal means
                #Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
                Fdf_Monthly_means = Fdf_1900_2080.resample('M').agg(agg)
            else:
                Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').sum()
                Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
                Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').sum() #seasonal means
                Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
                Fdf_Monthly_means = Fdf_1900_2080.resample('M').sum() #Monthlyonal means
                Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
        else:  
            if(Stats == 'maxdaily'):
                Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').max()
                Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
                Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').max() #seasonal means
                Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
                Fdf_Monthly_means = Fdf_1900_2080.resample('M').max() #Monthlyonal means
                Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
            elif(Stats == 'maxmonthly'):
                Fdf_1900_2080_annual = Fdf_1900_2080.resample('M').max().resample('A').mean()
                Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
                Fdf_Seas_means = Fdf_1900_2080.resample('M').max().resample('Q-NOV').mean() #seasonal means
                Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
                Fdf_Monthly_means = Fdf_1900_2080.resample('M').max().resample('Q-NOV').mean() #Monthlyonal means
                Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
            elif(Stats[:4] =='days'):
                Threshold = int(Stats[-2:])
                #agg = ('abobe_27_count', lambda x: x.gt(27).sum()), ('average', 'mean')
                agg = ('>'+ str(Threshold) + '_count', lambda x: x.gt(Threshold).sum()),
                Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').agg(agg)                
                #Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
                Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').agg(agg)  #seasonal means
                #Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
                Fdf_Monthly_means = Fdf_1900_2080.resample('M').agg(agg)
            else:
                Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').mean()
                Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').mean() #seasonal means
                Fdf_Monthly_means = Fdf_1900_2080.resample('M').mean() #seasonal means
        Fdf_1900_2080_means = Fdf_1900_2080.mean()
        #==========================================================#
        #==========================================================#
        # construction of quantile scaling method
        #==========================================================#
        quantiles = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.91,0.92,0.93,0.94,0.95,0.96,0.97,0.98,0.99,1]
        Quantile_change_df = quant_quant_scaling(Full_df, quantiles, False)
        png_out_file_name = Clim_var_type + '_' + Stats + '_Deltas_Barplot_' + Version + '.png'
        png_out_path = png_output_directory + '/' + png_out_file_name
        #==========================================================#
        #Select the 3 most representative models (min med and max difference betwen far future and present)
        dfall, dfmin, dfmax, dfmed, Min_dif_mod_name, Med_dif_mod_name, Max_dif_mod_name  = fct.select_min_med_max_dif_model(Fdf_1900_2080)
        #==========================================================#
        #==========================================================#
        #create a dataframe that has 1 column for each of the three representative models
        dfa = Fdf_1900_2080_annual.iloc[:,[0]]
        dfa1 = Fdf_1900_2080_annual.iloc[:,[0,3,6]].loc[(Fdf_1900_2080_annual.index >= '1990') & (Fdf_1900_2080_annual.index <= '2009')]
        dfa1.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
        dfa2 = Fdf_1900_2080_annual.iloc[:,[1,4,7]].loc[(Fdf_1900_2080_annual.index >= '2020') & (Fdf_1900_2080_annual.index <= '2039')]
        dfa2.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
        dfa3 = Fdf_1900_2080_annual.iloc[:,[2,5,8]].loc[(Fdf_1900_2080_annual.index >= '2060') & (Fdf_1900_2080_annual.index <= '2079')]
        dfa3.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
        dfall_annual = dfa1.append(dfa2).append(dfa3)
        #==========================================================#
        #==========================================================#
        #Create Deltas of average change for annual and seasonal basis
        #==========================================================#
        delta_all_df = fct.calculate_deltas_NF_FF2(Fdf_1900_2080_annual, Fdf_Seas_means, Stats, Perc_Vs_Abs)
        if DoMonthly ==True:
            delta_all_df_monthly = fct.calculate_deltas_monthly(Fdf_Monthly_means, Stats, Perc_Vs_Abs)  
            delta_all_df_monthly = pd.concat([delta_all_df_monthly.filter(regex= 'near'),delta_all_df_monthly.filter(regex= 'far')], axis=1)[-3:]  
        #==========================================================#
        if delta_csv == 'yes':
            out_file_name = Estuary  + '_'  + Clim_var_type + '_' + Stats + '_NARCliM_ensemble_changes_' + Perc_Vs_Abs +  '_' + Location + '_' + Coordinates + '.csv'
            delta_all_df.to_csv(output_directory + '/' + out_file_name)
            #quantile scaling csv
            out_file_name = Estuary  + '_'  + Clim_var_type + '_' + Stats + '_NARCliM_quant_quant_' + Coordinates + '.csv'
            Quantile_change_df.to_csv(output_directory + '/NARCLIM_Changes_Hunter/' + out_file_name)
        if delta_csv == 'yes' and DoMonthly ==True:
            out_file_name = Estuary  + '_' + Clim_var_type + '_' + Stats + '_NARCliM_ensemble_changes_monthly_' + Perc_Vs_Abs + '_' + Location + '_' + Coordinates + '.csv'
            delta_all_df_monthly.to_csv(output_directory + '/NARCLIM_Changes_Hunter/' + out_file_name)
        #==========================================================#
        #==========================================================#       
        pd.qcut(range(5), 4)
        #==========================================================#        
        #==========================================================#
        #create a dataframe that has a single column for present day, near and far future for the (3 selected models)
        Full_current_df = Fdf_1900_2080.iloc[:,range(0,3)]
        Full_current_df = Full_current_df.stack()
        #nearfuture
        Full_nearfuture_df = Fdf_1900_2080.iloc[:,range(3,6)]
        Full_nearfuture_df = Full_nearfuture_df.stack()
        #farfuture
        Full_farfuture_df = Fdf_1900_2080.iloc[:,range(6,len(Fdf_1900_2080.columns))]
        Full_farfuture_df = Full_farfuture_df.stack()
        Summarized_df = pd.concat([Full_current_df, Full_nearfuture_df], axis=1, ignore_index=True)
        Summarized_df = pd.concat([Summarized_df, Full_farfuture_df], axis=1, ignore_index=True)
        Summarized_df.columns = ['present', 'near', 'far']
        #==========================================================#
        #==========================================================#
        #generate colour schemes for plotting
        #==========================================================#
        plotcolours36 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal',
                 'darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal',
                 'darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal']
        plotcolours36b = ['tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  ,
                 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  ,
                 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple' ]
        plotcolours12 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal']
        plotcolours15 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal', 'lightgreen','lightpink','slateblue']
        color_dict = {'CCCMA3.1_R1': 'darkolivegreen', 'CCCMA3.1_R2': 'turquoise', 'CCCMA3.1_R3': 'lightgreen', 'CSIRO-MK3.0_R2': 'darkgreen',
                  'CSIRO-MK3.0_R1':'lightpink', 'CSIRO-MK3.0_R3':'slateblue','ECHAM5_R1':'slategray', 'ECHAM5_R2': 'orange', 'ECHAM5_R3':  'tomato',
                  'MIROC3.2_R1': 'peru', 'MIROC3.2_R2': 'navy' ,'MIROC3.2_R3': 'teal', '10th':'lightgreen','median':'lightpink','90th':'slateblue'}
        #==========================================================#
        #==========================================================#
        #output crucial summary plots into individual png files
        #==========================================================#
        if plot_pngs == 'yes':
            #=========#
            #Barplot of near and far future deltas
            #=========#
            #out name
            png_out_file_name = Clim_var_type + '_' + Stats + '_Deltas_Barplot_' + Version + '.png'
            png_out_path = png_output_directory + '/' + png_out_file_name
            #prepare data frame for plot
            neardeltadf=delta_all_df['near']
            ymin = min(neardeltadf) + 0.1 *min(neardeltadf)
            ymax = max(neardeltadf) + 0.1 * max(neardeltadf)
            neardeltadf=delta_all_df['far']
            ymin2 = min(neardeltadf) + 0.1 *min(neardeltadf)
            ymax2 = max(neardeltadf) + 0.1 * max(neardeltadf)
            ymin = min(ymin, ymin2)
            if (Clim_var_type == 'tasmax' or Clim_var_type == 'tasmean'):
                ymin = 0
            ymax = max(ymax, ymax2)
            #
            fig = plt.figure(figsize=(5,6))
            ax=plt.subplot(2,1,1) 
            plt.title(Clim_var_type + ' - ' + Stats + ' - deltas - near')
            neardeltadf=delta_all_df['near']
            neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)  
            plt.xticks([])
            ax=plt.subplot(2,1,2) 
            plt.title(Clim_var_type + ' - ' + Stats + ' - deltas - far')
            neardeltadf=delta_all_df['far']
            neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)
            ax.xaxis.grid(False)
            #ax.patch.set_alpha(ALPHA_figs)
            fig.patch.set_alpha(ALPHA_figs)
            fig.tight_layout()
            fig.savefig(png_out_path)
            plt.close()
            #=========#
            #=========#
            #full period density comparison
            #=========#
            #out name
            png_out_file_name = Clim_var_type + '_' + Stats + '_MaxDeltaMod_Histogram_' + Version + '.png'
            png_out_path = png_output_directory + '/' + png_out_file_name
            plt.title(Clim_var_type + ' - ' + Stats + ' -  hist - full period - max delta model')
            #prepare data
            xmin = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) -  4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
            xmax = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) +  4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
            fig = plt.figure(figsize=(5,5))
            ax=plt.subplot(2,1,1) 
            Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]).plot.kde(xlim=(xmin,xmax), ax=ax)
            plt.legend(loc=9, bbox_to_anchor=(0.5, -0.3))
            #ax.xaxis.grid(False)
            ax.yaxis.grid(False)
            fig.patch.set_alpha(ALPHA_figs)
            fig.tight_layout()
            fig.savefig(png_out_path)
            plt.close()
            #=========#
            #=========#
            # time series plot annual ALL models
            #=========#
            png_out_file_name = Clim_var_type + '_' + Stats + '_TimeSeries_AllMods_' + Version + '3.png'
            png_out_path = png_output_directory + '/' + png_out_file_name
            plt.title(Clim_var_type + ' - ' + Stats + ' -  Time series - representative models')
            #prepare data
 #            Mod_order = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,19,20,21,16,17,18,22,23,24,31,32,33,25,26,27,28,29,30,34,35,36]
 #            test = Fdf_1900_2080_annual
 #            Mod_Names = test.columns
 #            New_Mod_Name = []
 #            for i in range(0,len(Mod_Names)):
 #                if(Stats[:4] =='days'):
 #                    New_Mod_Name.append(str(Mod_order[i]+10) + '_' +  Mod_Names[i][0])
 #                else:
 #                    New_Mod_Name.append(str(Mod_order[i]+10) + '_' +  Mod_Names[i])
 #            test.columns = New_Mod_Name
 #            test_sorted = test.reindex_axis(sorted(test.columns), axis=1)
 #            colnamest = test.columns
 #            test_sorted.columns = [w[3:-5] for w in colnamest]
            #plot
            fig = plt.figure(figsize=(8,7))
            ax=plt.subplot(2,1,1)
            #test_sorted.plot(ax=ax) #,color =  plotcolours36)
            Fdf_1900_2080_annual_b = Fdf_1900_2080_annual
            New_Mod_Name = []
            if(Stats[:4] =='days'):
                for each in Fdf_1900_2080_annual_b.columns:
                    New_Mod_Name.append(each[0][:-5])
            else:
                for each in Fdf_1900_2080_annual_b.columns:
                    New_Mod_Name.append(each[:-5])
            Fdf_1900_2080_annual_b.columns = New_Mod_Name
            Fdf_1900_2080_annual_b.plot(ax=ax, color=[color_dict[x] for x in Fdf_1900_2080_annual_b.columns])
            plt.legend(loc=9, bbox_to_anchor=(0.5, -0.2))
            ax.xaxis.grid(False)
            fig.patch.set_alpha(ALPHA_figs)
            fig.tight_layout()
            fig.savefig(png_out_path)
            plt.close()
            #=========#
        #==========================================================#
        #output some summary plot into pdf
        #==========================================================#
        if plot_pdf == 'yes':
            #plt.cm.Paired(np.arange(len(Fdf_1900_2080_means)))
            #write the key plots to a single pdf document
            pdf_out_file_name = Clim_var_type + '_' + Stats + '_NARCliM_summary_' + Version + '.pdf'
            pdf_out_path = output_directory +'/' + pdf_out_file_name
            #open pdf and add the plots
            with PdfPages(pdf_out_path) as pdf:
                #barplot of model means
                plt.title(Clim_var_type + ' -  model means - full period')
                ymin = min(Fdf_1900_2080_means) 
                ymax = max(Fdf_1900_2080_means) + 0.008 *min(Fdf_1900_2080_means)
                Fdf_1900_2080_means.plot(kind='bar', ylim=(ymin,ymax), color=plotcolours36)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                #
                neardeltadf=delta_all_df['near']
                ymin = min(neardeltadf) + 0.1 *min(neardeltadf)
                ymax = max(neardeltadf) + 0.1 * max(neardeltadf)
                neardeltadf=delta_all_df['far']
                ymin2 = min(neardeltadf) + 0.1 *min(neardeltadf)
                ymax2 = max(neardeltadf) + 0.1 * max(neardeltadf)
                ymin = min(ymin, ymin2)
                if (Clim_var_type == 'tasmax' or Clim_var_type == 'tasmean'):
                    ymin = 0
                ymax = max(ymax, ymax2)
                #
                # delta barplot for report 1#################################
                ax=plt.subplot(2,1,1) 
                plt.title(Clim_var_type + ' -  model deltas - near-present')
                neardeltadf=delta_all_df['near']
                neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)  
                ax.patch.set_alpha(ALPHA_figs)
                plt.xticks([])
                #ax.xaxis.set_ticklabels([])
                #pdf.savefig(bbox_inches='tight', ylim=(ymin,ymax), pad_inches=0.4)
                #plt.close()
                #
                ax=plt.subplot(2,1,2) 
                plt.title(Clim_var_type + ' -  model deltas - far-present')
                neardeltadf=delta_all_df['far']
                fig = neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)
                ax.xaxis.grid(False)
                ax.patch.set_alpha(ALPHA_figs)
                #fig.patch.set_alpha(ALPHA_figs)
                #plt.show()
                pdf.savefig(bbox_inches='tight', ylim=(ymin,ymax), pad_inches=0.4)
                plt.close()
                # end delta barplot for report 1#################################
                #
                #full period density comparison
                plt.title(Clim_var_type + ' -  density comparison - full period - all models')
                Summarized_df.plot.kde()
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                #full period density comparison
                plt.title(Clim_var_type + ' -  density comparison - full period - max delta model')
                xmin = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) -  4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
                xmax = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) +  4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
                Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]).plot.kde(xlim=(xmin,xmax)) 
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                fig.patch.set_alpha(ALPHA_figs)
                plt.close()
                #annual box
                plt.title(Clim_var_type + ' -  Annual means/sums for max diff model')
                Fdf_1900_2080_annual.boxplot(rot=90)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                #
                #daily box
                plt.title(Clim_var_type + ' -  Daily means/sums')
                Fdf_1900_2080.boxplot(rot=90)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                # time series plot annual ALL models
                plt.title(Clim_var_type + ' -  Time series - all models')
                Mod_order = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,19,20,21,16,17,18,22,23,24,31,32,33,25,26,27,28,29,30,34,35,36]
                test = Fdf_1900_2080_annual
                Mod_Names = test.columns
                New_Mod_Name = []
                for i in range(0,len(Mod_Names)):
                    New_Mod_Name.append(str(Mod_order[i]+10) + '_' +  Mod_Names[i])
                test.columns = New_Mod_Name
                test_sorted = test.reindex_axis(sorted(test.columns), axis=1)
                colnamest = test.columns
                test_sorted.columns = [w[3:-5] for w in colnamest]
                fig = test_sorted.plot(legend=False, color =  plotcolours36)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                fig.patch.set_alpha(ALPHA_figs)
                plt.close()
                # time series plot annual ALL models
                plt.title(Clim_var_type + ' -  Time series - representative models')
                dfall_annual.plot(legend=False)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                # seasonal mean boxplots
                ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) 
                ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) 
                plt.title(Clim_var_type + ' -  DJF Summer means/sums')
                pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()).plot(kind='bar', ylim=(ymin,ymax))
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()  
                plt.title(Clim_var_type + ' -  DJF Summer means/sums')
                Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].boxplot(rot=90)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()) 
                ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean())
                plt.title(Clim_var_type + ' -  MAM Autumn means/sums')
                pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()).plot(kind='bar', ylim=(ymin,ymax))
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close() 
                plt.title(Clim_var_type + ' -  MAM Autumn means/sums')
                Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].boxplot(rot=90)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()) 
                ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean())
                plt.title(Clim_var_type + ' -  JJA Winter means/sums')
                pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()).plot(kind='bar', ylim=(ymin,ymax))
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close() 
                plt.title(Clim_var_type + ' -  JJA Winter means/sums')
                Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].boxplot(rot=90)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()) 
                ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean())
                plt.title(Clim_var_type + ' -  SON Spring means/sums')
                pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()).plot(kind='bar', ylim=(ymin,ymax))
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close() 
                plt.title(Clim_var_type + ' -  SON Spring means/sums')
                Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].boxplot(rot=90)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
--- a/Analysis/Code/P1_SILO_Batch_Download.py
+++ b/Analysis/Code/P1_SILO_Batch_Download.py
@ -1,121 +0,0 @@
 # -*- coding: utf-8 -*-
 """
 Created on Wed Jun 20 18:03:25 2018
@author: z5025317
 """
 #==========================================================#
 #Load packages
 #==========================================================#
 import numpy as np
 import os
 import pandas as pd
 import glob
 import matplotlib
 import matplotlib.pyplot as plt
 from datetime import datetime
 from datetime import timedelta
 from matplotlib.backends.backend_pdf import PdfPages
 from ggplot import *
 matplotlib.style.use('ggplot')
 import csv
 # import own modules
 # Set working direcotry (where postprocessed NARClIM data is located)
 os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Analysis/Code')
 #import climdata_fcts as fct
 import silo as sil
 #==========================================================#
 #==========================================================#
 # Set working direcotry (where postprocessed NARClIM data is located)
 os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/')
 #==========================================================#
 #==========================================================#
 #set input parameters
 Case_Study_Name = 'CASESTUDY2' 
 Casestudy2_csv_path =  "Data/NARCLIM_Site_CSVs/CASESTUDY2/CASESTDUY2_NARCLIM_Point_Sites.csv"
 Silo_variables = ['daily_rain', "max_temp", "min_temp", 'et_short_crop', 'evap_syn'] 
 Location  = 'Estuary' #'Catchment'  
 startdate= '19600101'
 enddate= '20180101' 
 #==========================================================#
 #==========================================================#
 #set directory path for output files
 output_directory = 'Data/SILO/' + Case_Study_Name + '/'
 #output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
 if not os.path.exists(output_directory):
    os.makedirs(output_directory)
    print('-------------------------------------------')
    print("output directory folder didn't exist and was generated")
    print('-------------------------------------------')
 #==========================================================#  
 #==========================================================# 
 #read the CSV to extract the lat long and case stuy sites
 with open(Casestudy2_csv_path, mode='r') as infile:
    reader = csv.reader(infile)
    next(reader, None) 
    with open('coors_new.csv', mode='w') as outfile:
        writer = csv.writer(outfile)
        if Location == 'Estuary':
            mydict = dict((rows[0],[rows[1],rows[2]]) for rows in reader)
        if Location == 'Ocean':
            mydict = dict((rows[0],[rows[3],rows[4]]) for rows in reader)
        if Location == 'Catchment':
            mydict = dict((rows[0],[rows[5],rows[6]]) for rows in reader)
 for Estuary in mydict:
    print Estuary, mydict[Estuary][0], mydict[Estuary][1]
    #==========================================================#
    #set directory path for output files
    output_directory_internal = output_directory  + Estuary + '/'
    #output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
    if not os.path.exists(output_directory_internal):
        os.makedirs(output_directory_internal)
        print('-------------------------------------------')
        print("output directory folder didn't exist and was generated")
        print('-------------------------------------------')
    #==========================================================#
    output_csv = output_directory_internal + 'SILO_climdata_' + Estuary +'_'+ Location +'_' + mydict[Estuary][0] + '_' +  mydict[Estuary][1] + '.csv'
    silo_df = sil.pointdata(Silo_variables, 'Okl9EDxgS2uzjLWtVNIBM5YqwvVcCxOmpd3nCzJh',startdate, enddate, 
                            None, mydict[Estuary][0], mydict[Estuary][1], True, output_csv)
 #==========================================================# 
--- a/Analysis/Code/climdata_fcts.py
+++ b/Analysis/Code/climdata_fcts.py
@ -1,315 +0,0 @@
 # -*- coding: utf-8 -*-
 """
 Created on Thu Jun 14 16:32:01 2018
@author: z5025317
 """
 import matplotlib.pyplot as plt
 from datetime import datetime, timedelta
 import numpy as np
 import pandas as pd
 import glob
 def compare_images(im1, im2):
    """plots 2 images next to each other, sharing the axis"""
    plt.figure()
    ax1 = plt.subplot(121)
    plt.imshow(im1, cmap='gray')
    ax2 = plt.subplot(122, sharex=ax1, sharey=ax1)
    plt.imshow(im2, cmap='gray')
    plt.show()  
 def reject_outliers(data, m=2):
    "rejects outliers in a numpy array"
    return data[abs(data - np.mean(data)) < m * np.std(data)]
 def duplicates_dict(lst):
    "return duplicates and indices"
    # nested function
    def duplicates(lst, item):
            return [i for i, x in enumerate(lst) if x == item]
    return dict((x, duplicates(lst, x)) for x in set(lst) if lst.count(x) > 1)
 def datenum2datetime(datenum):
    "convert datenum to datetime"
    #takes in datenum and outputs python datetime
    time = [datetime.fromordinal(int(dn)) + timedelta(days=float(dn)%1) - timedelta(days = 366) for dn in datenum]
    return time
 def select_min_med_max_dif_model(NARCLIM_df):
    #Select the 3 most representative models (min med and max difference betwen far future and present)
    Fdf_1900_2080_sorted = NARCLIM_df.reindex_axis(sorted(NARCLIM_df.columns), axis=1)
    Fdf_1900_2080_sorted_means = pd.DataFrame(Fdf_1900_2080_sorted.mean())
    df = Fdf_1900_2080_sorted_means
    #add a simple increasing integer index 
    df = df.reset_index()
    df= df[df.index % 3 != 1]
    df['C'] = df[0].diff()
    df = df.reset_index()
    df= df[df.index % 2 != 0]
    #get max difference model (difference between far future and prsent day)
    a = df[df.index == df['C'].argmax(skipna=True)]
    Max_dif_mod_name = a.iloc[0]['index']
    #get min difference model
    a = df[df.index == df['C'].argmin(skipna=True)]
    Min_dif_mod_name = a.iloc[0]['index']
    #get the model which difference is closest to the median difference
    df['D'] = abs(df['C']- df['C'].median())
    a = df[df.index == df['D'].argmin(skipna=True)]
    Med_dif_mod_name = a.iloc[0]['index']
    #data frame with min med and max difference model
    df2 = NARCLIM_df.filter(regex= Min_dif_mod_name[:-5] + '|' +  Med_dif_mod_name[:-5] + '|' +  Max_dif_mod_name[:-5] )
    dfall = df2.reindex_axis(sorted(df2.columns), axis=1)
    #data frame with individual models
    dfmin = NARCLIM_df.filter(regex= Min_dif_mod_name[:-5])
    dfmax = NARCLIM_df.filter(regex= Max_dif_mod_name[:-5])
    dfmed = NARCLIM_df.filter(regex= Max_dif_mod_name[:-5])
    return dfall , dfmin, dfmed, dfmax, Min_dif_mod_name,Med_dif_mod_name, Max_dif_mod_name
 def calculate_deltas_NF_FF2(Annual_df, Seasonal_df, Stats, Perc_vs_Abs):
    """calculates the "deltas" between nearfuture and present day for annual or seasonal climate data in pandas TS format"""
    times = ['annual', 'DJF', 'MAM', 'JJA','SON']
    delta_all_df = pd.DataFrame()
    for temp in times:
        if temp == 'annual':
            Mean_df = Annual_df.mean()
            Column_names = ['near', 'far']
        if temp == 'DJF':
            Mean_df = Seasonal_df[Seasonal_df.index.quarter==1].mean()
            Column_names = ['DJF_near', 'DJF_far']
        if temp == 'MAM':
            Mean_df = Seasonal_df[Seasonal_df.index.quarter==2].mean()
            Column_names = ['MAM_near', 'MAM_far']
        if temp == 'JJA':
            Mean_df = Seasonal_df[Seasonal_df.index.quarter==3].mean()
            Column_names = ['JJA_near', 'JJA_far']
        if temp == 'SON':
            Mean_df = Seasonal_df[Seasonal_df.index.quarter==4].mean()
            Column_names = ['SON_near', 'SON_far']
        if(Stats[:4] =='days'):
            models = list(Seasonal_df.mean().index.get_level_values(0))
        else:
            models = list(Seasonal_df.mean().index)
        newmodel = []
        for each in models:
            newmodel.append(each[:-5])
        unique_models = set(newmodel)
        # calculate diff for each unique model
        delta_NF_ensemble = []
        delta_FF_ensemble = []
        for unique_model in unique_models:
            dfdiff = Mean_df.filter(regex= unique_model)
            type(dfdiff)
            if Perc_vs_Abs == 'absolute':
                delta_NF = dfdiff[1] - dfdiff[0]
                delta_NF_ensemble.append(delta_NF)
                delta_FF = dfdiff[2] - dfdiff[0]
                delta_FF_ensemble.append(delta_FF)
            if Perc_vs_Abs == 'percent':
                delta_NF = ((dfdiff[1] - dfdiff[0])/dfdiff[0])*100
                delta_NF_ensemble.append(delta_NF)
                delta_FF = ((dfdiff[2] - dfdiff[0])/dfdiff[0])*100
                delta_FF_ensemble.append(delta_FF)
        delta_df1=pd.DataFrame(delta_NF_ensemble, index=unique_models)
        delta_df2=pd.DataFrame(delta_FF_ensemble, index=unique_models)
        delta_df= pd.concat([delta_df1, delta_df2], axis=1)
        #rename columns
        delta_df.columns = Column_names
        #add a row with medians and 10 and 90th percentiles
        delta_df.loc['10th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 10), Column_names[1]:np.percentile(delta_df[Column_names[1]], 10)})
        delta_df.loc['median'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 50), Column_names[1]:np.percentile(delta_df[Column_names[1]], 50)})
        delta_df.loc['90th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 90), Column_names[1]:np.percentile(delta_df[Column_names[1]], 90)})
        #append df to overall df
        delta_all_df = pd.concat([delta_all_df, delta_df], axis=1)
        if(Stats[:4] =='days'):
            delta_all_df  = delta_all_df .astype(int).fillna(0.0)
    return delta_all_df
 def calculate_deltas_monthly(Monthly_df, Stats, Perc_vs_Abs):  
    """calculates the "deltas" between nearfuture and present day for annual or seasonal climate data in pandas TS format"""   
    delta_all_df = pd.DataFrame()
    for i in range(1, 13, 1):
        Mean_df = Monthly_df[Monthly_df.index.month==i].mean()
        Column_names = [str(i)+'_near', str(i)+'_far']
        if(Stats[:4] =='days'):
            models = list(Monthly_df.mean().index.get_level_values(0))
        else:
            models = list(Monthly_df.mean().index)
        newmodel = []
        for each in models:
            newmodel.append(each[:-5])
        unique_models = set(newmodel)
        # calculate diff for each unique model
        delta_NF_ensemble = []
        delta_FF_ensemble = []
        for unique_model in unique_models:
            dfdiff = Mean_df.filter(regex= unique_model)
            type(dfdiff)
            if Perc_vs_Abs == 'absolute':
                delta_NF = dfdiff[1] - dfdiff[0]
                delta_NF_ensemble.append(delta_NF)
                delta_FF = dfdiff[2] - dfdiff[0]
                delta_FF_ensemble.append(delta_FF)
            if Perc_vs_Abs == 'percent':
                delta_NF = ((dfdiff[1] - dfdiff[0])/dfdiff[0])*100
                delta_NF_ensemble.append(delta_NF)
                delta_FF = ((dfdiff[2] - dfdiff[0])/dfdiff[0])*100
                delta_FF_ensemble.append(delta_FF)
        delta_df1=pd.DataFrame(delta_NF_ensemble, index=unique_models)
        delta_df2=pd.DataFrame(delta_FF_ensemble, index=unique_models)
        delta_df= pd.concat([delta_df1, delta_df2], axis=1)
        #rename columns
        delta_df.columns = Column_names
        #add a row with medians and 10 and 90th percentiles
        delta_df.loc['10th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 10), Column_names[1]:np.percentile(delta_df[Column_names[1]], 10)})
        delta_df.loc['median'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 50), Column_names[1]:np.percentile(delta_df[Column_names[1]], 50)})
        delta_df.loc['90th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 90), Column_names[1]:np.percentile(delta_df[Column_names[1]], 90)})
        #append df to overall df
        delta_all_df = pd.concat([delta_all_df, delta_df], axis=1)
        if(Stats[:4] =='days'):
            delta_all_df  = delta_all_df .astype(int).fillna(0.0)
    return delta_all_df
 def import_present_day_climdata_csv(Estuary, Clim_var_type):
    """
    this funciton imports the present day climate data used for 
    characterizing the present day climate varibility
    If DataSource == 'Station', individual weather station data is used. 
    If DataSource == 'SILO' , SILO time series is used using the estuary centerpoint as reference locatoin for 
    selection of the grid cell
    """ 
    #load present day climate data for the same variable
    if Clim_var_type == 'evspsblmean': #ET time series that we have is not in the same format as the other variables, hence the different treatment
        Present_day_Var_CSV = glob.glob('./Data/Wheather_Station_Data/**/' + Estuary + '_' +  'ET' + '*csv')
        Present_day_df = pd.read_csv(Present_day_Var_CSV[0])
        Dates = pd.to_datetime(Present_day_df.Date)
        Present_day_df.index = Dates
        Present_day_df = Present_day_df.iloc[:,1]
        Present_day_df = Present_day_df.replace(r'\s+', np.nan, regex=True)
        Present_day_df = pd.to_numeric(Present_day_df)
        Present_day_df.index = Dates
        [minplotDelta, maxplotDelta]=[50,50]
    #for tasmean, observed min and max T need to be converted into mean T
    elif Clim_var_type == 'tasmean':
        Present_day_Var_CSV = glob.glob('./Data/Wheather_Station_Data/**/' + Estuary + '_MaxT*csv')
        Present_day_df = pd.read_csv(Present_day_Var_CSV[0])
        Dates = pd.to_datetime(Present_day_df.Year*10000+Present_day_df.Month*100+Present_day_df.Day,format='%Y%m%d')
        Present_day_df.index = Dates
        Present_day_MaxT_df = Present_day_df.iloc[:,5]
        Present_day_Var_CSV = glob.glob('./Data/Wheather_Station_Data/**/' + Estuary + '_MinT*csv')
        Present_day_df = pd.read_csv(Present_day_Var_CSV[0])
        Dates = pd.to_datetime(Present_day_df.Year*10000+Present_day_df.Month*100+Present_day_df.Day,format='%Y%m%d')
        Present_day_df.index = Dates
        Present_day_MinT_df = Present_day_df.iloc[:,5]
        Present_day_df = (Present_day_MaxT_df + Present_day_MinT_df)/2
        [minplotDelta, maxplotDelta]=[1,2]
    elif Clim_var_type == 'tasmax':
        Present_day_Var_CSV = glob.glob('./Data/Wheather_Station_Data/**/' + Estuary + '_MaxT*csv')
        Present_day_df = pd.read_csv(Present_day_Var_CSV[0])
        Dates = pd.to_datetime(Present_day_df.Year*10000+Present_day_df.Month*100+Present_day_df.Day,format='%Y%m%d')
        Present_day_df.index = Dates
        Present_day_df = Present_day_df.iloc[:,5]
        [minplotDelta, maxplotDelta]=[1,2]  
    elif Clim_var_type == 'wssmean' or  Clim_var_type == 'wss1Hmaxtstep':
        Present_day_Var_CSV = glob.glob('./Data/Wheather_Station_Data/**/Terrigal_Wind.csv')
        Present_day_df = pd.read_csv(Present_day_Var_CSV[0])
        Present_day_df.index = Present_day_df[['Year', 'Month', 'Day', 'Hour']].apply(lambda s : datetime(*s),axis = 1)
        Present_day_df = Present_day_df.filter(regex= 'm/s') 
        Present_day_df = Present_day_df.replace(r'\s+', np.nan, regex=True)
        Present_day_df['Wind speed measured in m/s'] = Present_day_df['Wind speed measured in m/s'].convert_objects(convert_numeric=True)
        [minplotDelta, maxplotDelta]=[1, 1.5]  
    elif Clim_var_type == 'sstmean':
        Estuary_Folder = glob.glob('./Data/NARCLIM_Site_CSVs/CASESTUDY2/' + Estuary + '*' )
        Present_day_Var_CSV = glob.glob(Estuary_Folder[0] + '/' + Clim_var_type + '_NNRP*')
        Present_day_df = pd.read_csv(Present_day_Var_CSV[0], parse_dates=True, index_col=0)
        Present_day_df = Present_day_df.filter(regex= 'NNRP_R1_1950') 
        Present_day_df['NNRP_R1_1950'] = Present_day_df['NNRP_R1_1950'].convert_objects(convert_numeric=True)
        [minplotDelta, maxplotDelta]=[1, 1]
    else:   
        Present_day_Var_CSV = glob.glob('./Data/Wheather_Station_Data/**/' + Estuary + '_' +  'Rainfall' + '*csv')
        Present_day_df = pd.read_csv(Present_day_Var_CSV[0])
        Dates = pd.to_datetime(Present_day_df.Year*10000+Present_day_df.Month*100+Present_day_df.Day,format='%Y%m%d')
        Present_day_df.index = Dates
        Present_day_df = Present_day_df.iloc[:,5]
        [minplotDelta, maxplotDelta]=[50,100]    
    return Present_day_df, minplotDelta, maxplotDelta
 def quant_quant_scaling(Full_df, quantiles, Plotbool):
    """
    calculates the % "deltas" for each quantile in line with Climate Change in Australia recommendations provided here:
    https://www.climatechangeinaustralia.gov.au/en/support-and-guidance/using-climate-projections/application-ready-data/scaling-methods/    
    """
    Periods = ['1990', '2020', '2060']
    quantiles_srings = [str(x) for x in quantiles][:-1]
    models = list(Full_df.resample('A').max().mean().index)
    newmodel = []
    for each in models:
        newmodel.append(each[:-5])
    unique_models = list(set(newmodel))
    #create empty df and loop through models and periods to derive the % change factors for all quantiles and models
    Quant_diff_df_outer = pd.DataFrame()
    for unique_model in unique_models:
        dfdiff = Full_df.filter(regex= unique_model)
        Quant_diff_df = pd.DataFrame() 
        for period in Periods:
            x=dfdiff.filter(regex= period).dropna().values
            x.sort(axis=0)
            df=pd.DataFrame(x)
            df.columns = ['A']
            cut_df = pd.DataFrame(df.groupby(pd.qcut(df.rank(method='first').A, quantiles))['A'].mean().values)
            cut_df.columns = [period]
            Quant_diff_df = pd.concat([Quant_diff_df, cut_df], axis=1, join='outer')  
        if Plotbool:
            Quant_diff_df.plot(x=Quant_diff_df.index, y=Periods, kind="bar", title = unique_model)
        Quant_diff_df['NF_%change_'+ unique_model] =  (Quant_diff_df['2020'] - Quant_diff_df['1990'])/Quant_diff_df['1990']*100
        Quant_diff_df['FF_%change_'+ unique_model] =  (Quant_diff_df['2060'] - Quant_diff_df['1990'])/Quant_diff_df['1990']*100
        Quant_diff_df = Quant_diff_df.replace([np.inf, -np.inf], np.nan)
        Quant_diff_df = Quant_diff_df.iloc[:,[3,4]].fillna(0)
        Quant_diff_df_outer = pd.concat([Quant_diff_df_outer, Quant_diff_df], axis=1, join='outer')
    Quant_diff_df_outer.index = quantiles_srings 
    if Plotbool:
        Quant_diff_df_outer.plot(x=Quant_diff_df_outer.index, y=Quant_diff_df_outer.columns, kind="bar", legend = False, title='% intensity change per quantile')
    #add new cols and rows with summary statistics
    Quantile_change_NF_df = Quant_diff_df_outer.filter(regex= 'NF')
    Quantile_change_FF_df = Quant_diff_df_outer.filter(regex= 'FF')
    Quantile_change_stats_df = pd.DataFrame()
    for loop_df in [Quantile_change_NF_df, Quantile_change_FF_df]:
        Sum95_df = pd.DataFrame(loop_df.iloc[-5:,:].sum()).transpose()
        Sum95_df.index = ['Sum>95perc']
        Sum_df = pd.DataFrame(loop_df.sum()).transpose()
        Sum_df.index = ['Sum']
        Med_df = pd.DataFrame(loop_df.median()).transpose()
        Med_df.index = ['Median']
        loop_df = loop_df.append(Sum_df)
        loop_df = loop_df.append(Sum95_df)
        loop_df = loop_df.append(Med_df)
        loop_df['Median'] = loop_df.median(axis=1)
        loop_df['Maximum'] = loop_df.max(axis=1)
        loop_df['Minimum'] = loop_df.min(axis=1)
        Quantile_change_stats_df = pd.concat([Quantile_change_stats_df, loop_df], axis=1, join='outer')
    return Quantile_change_stats_df
--- a/Analysis/Code/climdata_fcts.pyc
+++ b/Analysis/Code/climdata_fcts.pyc
--- a/Analysis/Code/ggplot_time_series_with_trends.R
+++ b/Analysis/Code/ggplot_time_series_with_trends.R
@ -1,556 +0,0 @@
 #R code for creating ggplots of time series with smooth (GAM) and linear term
 ######################
 #Import Libraries and set working directory
 ######################
 library(zoo)
 library(hydroTSM) #you need to install these packages first before you can load them here
 library(lubridate)
 library(mgcv)
 library(ggplot2)
 library(gridExtra)
 library(scales)
 options(scipen=999)
 setwd("C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/")
 ######################
 GRACE.monthly.av.df <- data.frame(read.csv("./Data/GRACE/Zunyi/CSV/With_border_pixels/MDB.monthly.TWSA.Mean.2002-2017 with border pixels.csv"))
 GRACE.monthly.std.df <- data.frame(read.csv("./Data/GRACE/Zunyi/CSV/With_border_pixels/MDB.monthly.TWSA.Stdv.2002-2017 with border pixels.csv"))
 ######################
 #Set inputs
 ######################
 Case.Study <- "CASESTUDY2"
 Estuary <- "HUNTER"
 Riv.Gauge.loc <- "GRETA"     #GRETA 
 Est.Gauge.loc <-  "RAYMONDTERRACE"   #"CORAKI"          #"RAYMONDTERRACE" # "HEXHAMBRIDGE"
 logtransformFlow <- TRUE
 ggplotGAM.k <- 15
 rivTempGAM.k <- 20
 Version <- 'V3'
 Fontsize <- 12
 ######################
 ######################
 Output.Directory <- paste('./Output/CASESTUDY2_V4/', Estuary,'/Recent_Trends/', sep="")
 if (file.exists(Output.Directory)){
  print('output folder already existed and was not created again')
  } else {
  dir.create(file.path(Output.Directory))
  print('output folder did not exist and was created')
  }
 Output.Directory <- paste('./Output/CASESTUDY2_V4/', Estuary,'/Recent_Trends/Riv_', Riv.Gauge.loc,'_Est_',Est.Gauge.loc,'_GAMk', ggplotGAM.k, 'd/', sep="")
 if (file.exists(Output.Directory)){
  print('output folder already existed and was not created again')
 } else {
  dir.create(file.path(Output.Directory))
  print('output folder did not exist and was created')
 }
 ######################
 ######################
 #Set input file paths
 ######################
 pattern = paste('SILO_climdata_', Estuary,'_Catchment*', sep="")
 AirT_CSV_Path <- list.files(paste("./Data/SILO/",Case.Study, '/',sep=""), pattern, full.names=T, recursive=T)
 pattern = paste(Estuary,'@', Riv.Gauge.loc, '.*.ALL.csv', sep="")
 RivT_CSV_Path <- list.files("./Data/River_Gauge_Data/", pattern, full.names=T)
 pattern = paste(Estuary,'@', Est.Gauge.loc, '.*.ALL.csv', sep="")
 EstT_CSV_Path <- list.files("./Data/River_Gauge_Data/", pattern, full.names=T)
 pattern = paste('sstmean_NNRP_', Estuary,'*', sep="")
 SST_CSV_Path <- list.files(paste("./Data/NARCLIM_Site_CSVs/",Case.Study, '/', sep=""), pattern, full.names=T, recursive=T)
 ######################
 ######################
 #Analyse
 ######################
 ############tasmean
 #Load a daily (no gaps) time series as a time serie baseline for other time series used here
 AirT.df <- data.frame(read.csv(AirT_CSV_Path))
 AirT.full.TS <- zoo((AirT.df$max_temp_Celsius + AirT.df$max_temp_Celsius)/2, order.by= as.Date(AirT.df[,"date"], format = "%Y-%m-%d")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
 AirT.TS <- window(AirT.full.TS, start=as.Date("1990-01-01"), end=as.Date("2018-01-01"))
 AirT.full.df <- data.frame(AirT.full.TS)
 AirT.df <- data.frame(AirT.TS)
 colnames(AirT.df) <- 'tasmean'
 colnames(AirT.full.df) <- 'tasmean'
 ############
 AirT.df$Julday1 <- seq(1,length(AirT.df[,1]),1)
 linear.trend.model_EC_all <- lm(tasmean ~ Julday1, AirT.df)
 AirT.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4] 
 AirT.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
 ############
 AirT.full.df$Julday1 <- seq(1,length(AirT.full.df[,1]),1)
 linear.trend.model_EC_all <- lm(tasmean ~ Julday1, AirT.full.df)
 AirT.full.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4] 
 AirT.full.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
 ############tasmean
 ############River temp
 #Load a daily (no gaps) time series as a time serie baseline for other time series used here
 #Here we use the raw DPI CSV format that comes with a bunch of metadata
 RivT.df <- data.frame(read.csv(RivT_CSV_Path))
 char.df <- data.frame(lapply(RivT.df[2,], as.character), stringsAsFactors=FALSE)
 #dat <- data.frame(lapply(RivT.df[(4:nrow(RivT.df)),(2:ncol(RivT.df))], as.numeric), stringsAsFactors=FALSE)
 #dat <- RivT.df.num[!is.na(as.numeric(as.character(RivT.df.num))),]
 dat <- RivT.df[(4:nrow(RivT.df)),]
 colnames(dat) <- lapply(RivT.df[2,], as.character)
 dat$Date <- gsub(x=dat$Date,pattern="00:00:00",replacement="",fixed=T)
 colnames(dat) <- gsub(x=colnames(dat),pattern="Water Temp(C)",replacement="Temp",fixed=T)
 RivT.df <- dat
 rm(dat,char.df)
 RivT.full.TS <- zoo(as.numeric(as.character(RivT.df$Temp)), order.by= as.Date(RivT.df[,"Date"], format = "%d/%m/%Y")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
 RivT.full.TS <- window(RivT.full.TS, start=as.Date("1995-07-01"), end=as.Date("2018-01-01"))
 RivT.full.TS <- na.approx(RivT.full.TS)
 RivT.TS <- RivT.full.TS
 RivT.full.df <- data.frame(RivT.TS)   ### This is only done because 
 RivT.df <- data.frame(RivT.TS)
 colnames(RivT.df) <- 'rivTmean'
 colnames(RivT.full.df) <- 'rivTmean'
 ############
 RivT.df$Julday1 <- seq(1,length(RivT.df[,1]),1)
 linear.trend.model_EC_all <- lm(rivTmean ~ Julday1, RivT.df)
 RivT.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4] 
 RivT.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
 RivT.full.df$Julday1 <- seq(1,length(RivT.full.df[,1]),1)
 linear.trend.model_EC_all <- lm(rivTmean ~ Julday1, RivT.full.df)
 RivT.full.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4] 
 RivT.full.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
 ############River temp
 #export interpolated data as csv
 #csv.name <- "C:/Users/z5025317/OneDrive - UNSW/CC_Estuaries_CASESTUDY2/Data/River_Gauge_Data/HUNTER@Greta_210064_Temp_interpolated.csv"
 #write.csv(RivT.df, csv.name)
 ############River flow
 #Load a daily (no gaps) time series as a time serie baseline for other time series used here
 #Here we use the raw DPI CSV format that comes with a bunch of metadata
 RivQ.df <- data.frame(read.csv(RivT_CSV_Path))
 char.df <- data.frame(lapply(RivQ.df[2,], as.character), stringsAsFactors=FALSE)
 dat <- RivQ.df[(4:nrow(RivQ.df)),]
 colnames(dat) <- lapply(RivQ.df[2,], as.character)
 dat$Date <- gsub(x=dat$Date,pattern="00:00:00",replacement="",fixed=T)
 colnames(dat) <- gsub(x=colnames(dat), pattern="Discharge (ML/d)",replacement="Flow",fixed=T)
 RivQ.df <- dat
 rm(dat,char.df)
 #RivQ.full.TS <- zoo(log10((as.numeric(as.character(RivQ.df$Flow))) +1) , order.by= as.Date(RivQ.df[,"Date"], format = "%d/%m/%Y")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
 RivQ.full.TS <- zoo((as.numeric(as.character(RivQ.df$Flow)))/86.4, order.by= as.Date(RivQ.df[,"Date"], format = "%d/%m/%Y")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
 RivQ.TS <- window(RivQ.full.TS, start=as.Date("1990-01-01"), end=as.Date("2018-01-01"))
 #RivQ.full.TS <- window(RivQ.full.TS, start=as.Date("2013-06-01"), end=as.Date("2018-06-01"))
 RivQ.full.df <- data.frame(RivQ.full.TS)   ### This is only done because 
 RivQ.df <- data.frame(RivQ.TS)
 colnames(RivQ.df) <- 'RivQmean'
 colnames(RivQ.full.df) <- 'RivQmean'
 ############trends
 RivQ.df$Julday1 <- seq(1,length(RivQ.df[,1]),1)
 linear.trend.model_EC_all <- lm(RivQmean ~ Julday1, RivQ.df)
 RivQ.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4] 
 RivQ.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
 RivQ.full.df$Julday1 <- seq(1,length(RivQ.full.df[,1]),1)
 linear.trend.model_EC_all <- lm(RivQmean ~ Julday1, RivQ.full.df)
 RivQ.full.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4] 
 RivQ.full.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
 ############River Flow
 ############ SST
 #Load a daily (no gaps) time series as a time serie baseline for other time series used here
 SST.df <- data.frame(read.csv(SST_CSV_Path))
 SST.full.TS <- zoo(SST.df$NNRP_R1_1950-273.15, order.by= as.Date(SST.df[,"X"], format = "%Y-%m-%d")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
 SST.TS <- window(SST.full.TS, start=as.Date("1990-01-01"), end=as.Date("2018-01-01"))
 #SST.full.TS <- window(SST.full.TS, start=as.Date("2013-06-01"), end=as.Date("2018-06-01"))
 SST.full.df <- data.frame(SST.full.TS)
 SST.df <- data.frame(SST.TS)
 colnames(SST.df) <- 'SSTmean'
 colnames(SST.full.df) <- 'SSTmean'
 ############
 SST.full.df$Julday1 <- seq(1,length(SST.full.df[,1]),1)
 linear.trend.model_EC_all <- lm(SSTmean ~ Julday1, SST.full.df)
 SST.full.pvalNCV_ECall <- summary(linear.trend.model_EC_all)$coefficients[2,4] 
 SST.full.lintrend <- summary(linear.trend.model_EC_all)$coefficients[2,1] * 365
 SST.df$Julday1 <- seq(1,length(SST.df[,1]),1)
 linear.trend.model_EC_all2 <- lm(SSTmean ~ Julday1, SST.df)
 SST.pvalNCV_ECall <- summary(linear.trend.model_EC_all2)$coefficients[2,4] 
 SST.lintrend <- summary(linear.trend.model_EC_all2)$coefficients[2,1] * 365
 ############ SST
 ############Estuary temp
 #Load a daily (no gaps) time series as a time serie baseline for other time series used here
 #Here we use the raw DPI CSV format that comes with a bunch of metadata
 EstT.df <- data.frame(read.csv(EstT_CSV_Path))
 char.df <- data.frame(lapply(EstT.df[2,], as.character), stringsAsFactors=FALSE)
 #dat <- data.frame(lapply(EstT.df[(4:nrow(EstT.df)),(2:ncol(EstT.df))], as.numeric), stringsAsFactors=FALSE)
 #dat <- EstT.df.num[!is.na(as.numeric(as.character(EstT.df.num))),]
 dat <- EstT.df[(4:nrow(EstT.df)),]
 colnames(dat) <- lapply(EstT.df[2,], as.character)
 dat$Date <- gsub(x=dat$Date,pattern="00:00:00",replacement="",fixed=T)
 colnames(dat) <- gsub(x=colnames(dat),pattern="Water Temp(C)",replacement="Temp",fixed=T)
 EstT.df <- dat
 rm(dat,char.df)
 #replace negative values with NA
 EstT.df$Temp <- replace(EstT.df$Temp, which(as.numeric(as.character(EstT.df$Temp)) < 11), NA)
 EstT.full.TS <- zoo(as.numeric(as.character(EstT.df$Temp)), order.by= as.Date(EstT.df[,"Date"], format = "%d/%m/%Y")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
 EstT.TS <- window(EstT.full.TS, start=as.Date("2013-06-01"), end=as.Date("2018-06-01"))
 EstT.full.TS <- EstT.TS
 EstT.full.df <- data.frame(EstT.TS)   ### This is only done because of poor data at beginning
 EstT.df <- data.frame(EstT.TS)
 colnames(EstT.df) <- 'EstTmean'
 colnames(EstT.full.df) <- 'EstTmean'
 ############
 EstT.df$Julday1 <- seq(1,length(EstT.df[,1]),1)
 linear.trend.model_EC_all <- lm(EstTmean ~ Julday1, EstT.df)
 EstT.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4] 
 EstT.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
 EstT.full.df$Julday1 <- seq(1,length(EstT.full.df[,1]),1)
 linear.trend.model_EC_all <- lm(EstTmean ~ Julday1, EstT.full.df)
 EstT.full.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4] 
 EstT.full.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
 ############Est temp
 ######################
 #Plot
 ######################
 ##################################### Full Time Period
 #Air temp Full period 
 p1air <- ggplot(AirT.full.df, aes(y=tasmean, x=index(AirT.full.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary," Catchment Centroid - Linear and smooth trends in catchment airT (SILO) lin trend was ",
                round(AirT.full.lintrend,3), ' C°/year with p=', round(AirT.full.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
  ylab("Air Temperature [C°]") + xlab("Time")
 p1riv <- ggplot(RivT.full.df, aes(y=rivTmean, x=index(RivT.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary, " - Linear and smooth trends in gauged river temperature (@", Riv.Gauge.loc  ,") - Linear trend was ",
                round(RivT.full.lintrend,3), 'C°/year with p=', sprintf("%.5f",round(RivT.full.pvalNCV_ECall,10)), sep="")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
  ylab("River Temperature [C°]") + xlab("Time") + xlab(NULL) +  
  theme(axis.text=element_text(size=Fontsize)) +
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 if(logtransformFlow ==TRUE){
 p1rivQ <- ggplot(RivQ.full.df, aes(y=log10(RivQmean+2), x=index(RivQ.full.TS))) + geom_line(alpha=0.5) + 
    ggtitle(paste(Estuary,'@', Riv.Gauge.loc, " - Linear and smooth trend in river flow (GAUGE) lin trend was ",
                  round(RivQ.full.lintrend,3), ' ML/day /year with p=', round(RivQ.full.pvalNCV_ECall,10), sep=" ")) +
    theme(plot.title=element_text(face="bold", size=9)) +
    geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
    stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
    ylab(expression(paste("log10(River Flow [ML/day] + 2)", sep=""))) + xlab("Time") +
    theme(axis.text=element_text(size=Fontsize)) +
    theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
  } else {
  p1rivQ <- ggplot(RivQ.full.df, aes(y=RivQmean, x=index(RivQ.full.TS))) + geom_line(alpha=0.5) + 
    ggtitle(paste(Estuary,'@', Riv.Gauge.loc, " - Linear and smooth trend in river flow (GAUGE) lin trend was ",
                  round(RivQ.full.lintrend,3), ' ML/day with p=', round(RivQ.full.pvalNCV_ECall,10), sep=" ")) +
    theme(plot.title=element_text(face="bold", size=9)) +
    geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
    stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
    ylab(expression(paste("River Flow [ML/day]", sep=""))) + xlab("Time") +
    theme(axis.text=element_text(size=Fontsize)) +    
    theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
    }
 #Sea temp Full period 
 p1sst <- ggplot(SST.full.df, aes(y=SSTmean, x=index(SST.full.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary, " offshore - Linear and smooth trends in sea surface temperature (NNRP NARCLIM reanalysis) linear trend was ",
                round(SST.full.lintrend,3), 'C°/year with p=', sprintf("%.5f",round(SST.full.pvalNCV_ECall,10)), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
  ylab("Sea Surface Temperature [C°]") + xlab("Time")+ xlab(NULL) +
  theme(axis.text=element_text(size=Fontsize)) +
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 p1Est <- ggplot(EstT.full.df, aes(y=EstTmean, x=index(EstT.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary,'@', Est.Gauge.loc, " - Linear and smooth trend in Estuary temperature (GAUGE) lin trend was ",
                round(EstT.full.lintrend,3), 'C°/year with p=', round(EstT.full.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
  ylab("Estuary Temperature [C°]") + xlab("Time")+
  theme(axis.text=element_text(size=Fontsize)) +
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 p1Est2 <- ggplot(EstT.full.df, aes(y=EstTmean, x=index(EstT.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary,'@', Est.Gauge.loc, " - Linear and smooth trend in Estuary temperature (GAUGE) lin trend was ",
                round(EstT.full.lintrend,3), 'C°/year with p=', round(EstT.full.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  #stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
  binomial_smooth(formula = y ~ splines::ns(x, 2)) +
  ylab("Estuary Temperature [C°]") + xlab("Time")+
  theme(axis.text=element_text(size=Fontsize)) +
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 png.name <- paste(Output.Directory, Estuary, '@', Est.Gauge.loc, '_Trends_estTmean_full_period_', Sys.Date(),"nosmoothb3.png", sep="")
 png(file = png.name, width = 10.5, height = 4, units='in', res=500)
 grid.arrange(p1Est2,ncol=1)
 dev.off()
 gA1 <- ggplotGrob(p1riv)
 gA2 <- ggplotGrob(p1sst)
 gA1$widths <- gA2$widths
 #export to png
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_tasmean_full_period_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p1air,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_rivTmean_full_period_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p1riv,p1riv,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_rivTmean_SSTmean_full_period4_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(gA1,gA2,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_RivQmean_full_period_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p1rivQ,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_SSTmean_full_period_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p1sst,p1sst,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Est.Gauge.loc, '_Trends_estTmean_full_period_', Sys.Date(),"b.png", sep="")
 png(file = png.name, width = 10.5, height = 4, units='in', res=500)
 grid.arrange(p1Est,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Est.Gauge.loc, '_Trends_estTmean_full_period_', Sys.Date(),"nosmoothb.png", sep="")
 png(file = png.name, width = 10.5, height = 4, units='in', res=500)
 grid.arrange(p1Est2,ncol=1)
 dev.off()
 ##################################### Full Time Period
 ######################### 1990-present
 combined.TS <- window(merge(AirT.full.TS, window(RivT.full.TS, start=as.Date("1995-01-01"), end=end(RivT.full.TS)), SST.full.TS,EstT.full.TS,RivQ.full.TS,  all=T), start=as.Date("1990-01-01"), end=end(AirT.full.TS))
 combined.df <- data.frame(combined.TS)
 colnames(combined.df) <- c('tasmean','rivTmean', 'SSTmean', 'EstTmean', 'rivQmean')
 #Air temp 
 p2air <- ggplot(combined.df, aes(y=tasmean, x=index(combined.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary," Catchment centroid - Linear and smooth trend in catchment airT (SILO) linear trend was ",
                round(AirT.lintrend,3), ' C°/year with p=', round(AirT.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
  ylab("Air Temperature [C°]") + xlab(NULL)+
  theme(axis.text=element_text(size=Fontsize)) + 
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 #Riv temp 
 p2riv <- ggplot(combined.df, aes(y=rivTmean, x=index(combined.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary,'@', Riv.Gauge.loc, " - Linear and smooth trend in river temperature (GAUGE) linear trend was ",
                round(RivT.lintrend,3), ' C°/year with p=', round(RivT.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
  ylab("River Temperature [C°]") + xlab(NULL) +  
  theme(axis.text=element_text(size=Fontsize)) + 
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 #Riv flow
 if(logtransformFlow ==TRUE){
  p2rivQ <- ggplot(combined.df, aes(y=log10(rivQmean+2), x=index(combined.TS))) + geom_line(alpha=0.5) + 
    ggtitle(paste(Estuary,'@', Riv.Gauge.loc, " - Linear and smooth trend in river flow (GAUGE) linear trend was ",
                  round(RivQ.full.lintrend,3),  'cubic-meter/year with p=', round(RivQ.full.pvalNCV_ECall,10), sep=" ")) +
    theme(plot.title=element_text(face="bold", size=9)) +
    geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
    stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
    ylab(expression(paste("log10(River flow [m"^"3" *"/s] + 2)", sep=""))) + xlab(NULL)+
    theme(axis.text=element_text(size=Fontsize)) + 
    theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
  } else {
  p2rivQ <- ggplot(combined.df, aes(y=rivQmean, x=index(combined.TS))) + geom_line(alpha=0.5) + 
    ggtitle(paste(Estuary,'@', Riv.Gauge.loc, " - Linear and smooth trend in river flow (GAUGE) linear trend was ",
                  round(RivT.lintrend,3), 'cubic-meter/year with p=', round(RivT.pvalNCV_ECall,10), sep=" ")) +
    theme(plot.title=element_text(face="bold", size=9)) +
    geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
    stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
    ylab(expression(paste("River flow [m"^"3" *"/s]", sep=""))) + xlab(NULL)+
    theme(axis.text=element_text(size=Fontsize)) + 
    theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
    }
 #Sea temp 
 p2sst <- ggplot(combined.df, aes(y=SSTmean, x=index(combined.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary,"NNRP(NARCLIM reanalysis) - Linear and smooth trend in sea surface temperature (NNRP) lin trend was ",
                round(SST.lintrend,3), ' C°/year with p=', round(SST.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
  ylab("Sea Surface Temperature [C°]") + xlab("Time")+
  theme(axis.text=element_text(size=Fontsize)) + 
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 #sst temp 
 p2Est <- ggplot(combined.df, aes(y=EstTmean, x=index(combined.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary,'@', Est.Gauge.loc, " - Linear and smooth trend in Estuary temperature (GAUGE) linear trend was ",
                round(EstT.lintrend,3), ' C°/year with p=', round(EstT.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  #stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
  ylab("Estuary Temperature [C°]") + xlab("Time")+
  theme(axis.text=element_text(size=Fontsize)) +  
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 #export to png
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_tasmean_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p2air,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_rivTmean_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p2riv,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_rivQmean_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p2rivQ,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_SSTmean_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p2sst,ncol=1)
 dev.off()
 #export an overview plot as png
 gA <- ggplotGrob(p2air)
 gB <- ggplotGrob(p2riv)
 gC <- ggplotGrob(p2sst)
 gD <- ggplotGrob(p2Est)
 gE <- ggplotGrob(p2rivQ)
 gA$widths <- gE$widths
 gC$widths <- gE$widths
 gD$widths <- gE$widths
 gB$widths <- gE$widths
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_SST_RivT_AirT_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(gA,gB ,gC,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_SST_RivT_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(gB,gC, ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_AirT_RivT_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(gA,gB, ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_RivQ_RivT_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(gB ,gE, ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_SST_RivT_EstT_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(gB,gD,gC,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_RivQ_RivT_EstT_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(gE,gB,gD,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_AirT_RivT_RivQ_1990-present_', Sys.Date(),"b.png", sep="")
 png(file = png.name, width = 10.5, height = 11, units='in', res=500)
 grid.arrange(gB, gA,gE,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_AirT_RivT_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(gA,gB,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_RivT_RivQ_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(gB,gE,ncol=1)
 dev.off()
 lm_eqn <- function(df){
  m <- lm(y ~ x, df);
  eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 
                   list(a = format(coef(m)[1], digits = 2), 
                        b = format(coef(m)[2], digits = 2), 
                        r2 = format(summary(m)$r.squared, digits = 3)))
  as.character(as.expression(eq));  
 }
 Modeling <- 'yes'
 if (Modeling == 'yes'){
  source("C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Analysis/Code/FUN_do_ggplot_Scatterplot_2vars.R")
  AirT.full.TS.Monthly <- aggregate(AirT.full.TS, as.yearmon(time(AirT.full.TS)),FUN = mean)
  RivT.full.TS.Monthly <- aggregate(RivT.full.TS, as.yearmon(time(RivT.full.TS)),FUN = mean)
  RivQ.full.TS.Monthly <- aggregate(RivQ.full.TS, as.yearmon(time(RivQ.full.TS)),FUN = mean)
  if(logtransformFlow ==TRUE){
    RivQ.full.TS.Monthly <- log10(RivQ.full.TS.Monthly +2) 
  }
  All.TS.zoo <- merge(AirT.full.TS.Monthly, RivT.full.TS.Monthly ,RivQ.full.TS.Monthly , all=F)
  AirvsRivT <- Generate.ggplot.scatterplot(All.TS.zoo[,2], All.TS.zoo[,1],"River temp. [°C]" , "Air temp. [°C]")
  if(logtransformFlow ==TRUE){
    QvsRivT <- Generate.ggplot.scatterplot(All.TS.zoo[,2], All.TS.zoo[,3], "River temp. [°C]",  expression(paste("log10(River flow [m"^"3" *"/s] + 2)", sep="")))
  } else {
    QvsRivT <- Generate.ggplot.scatterplot(All.TS.zoo[,2], All.TS.zoo[,3], "River temp. [°C]",  expression(paste("River flow [m"^"3" *"/s]", sep="")))
  }
  #export an overview plot as png
  gA <- ggplotGrob(AirvsRivT)
  gB <- ggplotGrob(QvsRivT)
  gB$widths <- gA$widths
  png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Scatterplots_of_RivT_vs_predictors_1990-present_', Sys.Date(),"f.png", sep="")
  png(file = png.name, width = 8, height = 4, units='in', res=500)
  grid.arrange(gA,gB , ncol=2)
  dev.off()
 }
--- a/Analysis/Code/ggplot_time_series_with_trends_Waves_SST_OEH.R
+++ b/Analysis/Code/ggplot_time_series_with_trends_Waves_SST_OEH.R
@ -1,184 +0,0 @@
 #R code for creating ggplots of time series with smooth (GAM) and linear term for the OEH buoy data on wave heigth and SST
 ######################
 #Import Libraries and set working directory
 ######################
 library(zoo)
 library(hydroTSM) #you need to install these packages first before you can load them here
 library(lubridate)
 library(mgcv)
 library(ggplot2)
 library(gridExtra)
 library(scales)
 options(scipen=999)
 setwd("C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/")
 ######################
 ######################
 #Set inputs
 ######################
 Case.Study <- "CASESTUDY2"
 Estuary <- "GEORGES"
 Climvar <- 'tasmean'
 ggplotGAM.k <- 7
 ######################
 ######################
 Output.Directory <- paste('./Output/', Case.Study, '/', Estuary,'/Recent_Trends/', sep="")
 if (file.exists(Output.Directory)){
  print('output folder already existed and was not created again')
 } else {
  dir.create(file.path(Output.Directory))
  print('output folder did not exist and was created')
 }
 ######################
 ######################
 #Set input file paths
 ######################
 pattern = paste(Estuary, '.*.csv', sep="")
 Buoy_CSV_Path <- list.files("./Data/Ocean_Data/Waves_SST/", pattern, full.names=T)
 Buoy.name <- substr(list.files("./Data/Ocean_Data/Waves_SST/", pattern, full.names=F), nchar(Estuary)+2, nchar(Estuary)+7)
 SST.df <- data.frame(read.csv(Buoy_CSV_Path, colClasses = "character"))
 colnames(SST.df) <- as.character(SST.df[9,])
 SST.df = SST.df[-c(1:9), ] 
 colnames(SST.df) <- gsub(x=colnames(SST.df), pattern="Date/Time",replacement="Date",fixed=T)
 colnames(SST.df) <- gsub(x=colnames(SST.df), pattern=" ",replacement="",fixed=T)
 colnames(SST.df) <- gsub(x=colnames(SST.df), pattern="Hsig(m)",replacement="Hsig",fixed=T)
 colnames(SST.df) <- gsub(x=colnames(SST.df), pattern="SeaTemp(C)",replacement="SST",fixed=T)
 SST.df$Date <- substr(SST.df$Date, 1, 10)
 ######################
 #Hsig
 ######################
 Hsig.full.TS <- zoo(as.numeric(SST.df$Hsig), order.by= as.Date(SST.df$Date, format = "%d/%m/%Y")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
 Hsig.full.TS <- aggregate(Hsig.full.TS, index(Hsig.full.TS) , FUN=mean)
 Hsig.full.TS <-  aggregate(Hsig.full.TS , as.yearmon(time(Hsig.full.TS )),FUN = mean)
 Hsig.full.TS <- Hsig.full.TS [1:length(Hsig.full.TS)-1]
 Hsig.TS <- window(Hsig.full.TS, start=as.Date("1990-01-01"), end=as.Date("2018-01-01"))
 Hsig.full.df <- data.frame(Hsig.full.TS)
 Hsig.df <- data.frame(Hsig.TS)
 str(Hsig.df)
 colnames(Hsig.df) <- 'Hsigmean'
 colnames(Hsig.full.df) <- 'Hsigmean'
 #rid of outliers
 #Hsig.full.df$MSL <- replace(Hsig.full.df$MSL, which(Hsig.full.df$MSL > 4), NA)
 Hsig.full.df$Julday1 <- seq(1,length(Hsig.full.df[,1]),1)
 linear.trend.model_EC_all <- lm(Hsigmean ~ Julday1, Hsig.full.df)
 Hsig.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4] 
 Hsig.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 12
 ######################
 ######################
 #SST
 ######################
 SST.full.TS <- zoo(as.numeric(SST.df$SST), order.by= as.Date(SST.df$Date, format = "%d/%m/%Y")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
 SST.full.TS  <- na.omit(SST.full.TS)
 SST.full.TS <- aggregate(SST.full.TS, index(SST.full.TS) , FUN=mean)
 SST.full.TS <- SST.full.TS[1:length(SST.full.TS)-1]
 SST.TS <- window(SST.full.TS, start=as.Date("1990-01-01"), end=as.Date("2018-01-01"))
 SST.full.df <- data.frame(SST.full.TS)
 SST.df <- data.frame(SST.TS)
 str(SST.df)
 tail(SST.full.TS)
 colnames(SST.df) <- 'SSTmean'
 colnames(SST.full.df) <- 'SSTmean'
 #rid of outliers
 #SST.full.df$MSL <- replace(SST.full.df$MSL, which(SST.full.df$MSL > 4), NA)
 SST.full.df$Julday1 <- seq(1,length(SST.full.df[,1]),1)
 linear.trend.model_EC_all <- lm(SSTmean ~ Julday1, SST.full.df)
 SST.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4] 
 SST.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
 #export interpolated data as csv
 csv.name <- paste("C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Data/Ocean_Data/Waves_SST/Processed/",Estuary , "_SST_Daily.csv", sep="")
 write.csv(SST.full.df, csv.name)
 ######################
 ######################
 #Plot
 ######################
 ##################################### Full Time Period
 p1air <- ggplot(Hsig.full.df, aes(y=Hsigmean, x=index(Hsig.full.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary, " - Linear and smooth trend in significant wave height (OEH Buoy:",Buoy.name,  "| lin trend was ",
                round(Hsig.lintrend,3), ' m/year with p=', round(Hsig.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  stat_smooth(method=gam, formula=y~s(x,  k=4), se=T, size=0.5, col="red") +
  ylab("Significant Wave Height [m]") + xlab("Time")  +
  theme(axis.text=element_text(size=Fontsize)) +
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 #export to png
 png.name <- paste(Output.Directory, '/Trends_Significant_Wave_Height_',Buoy.name,  '_full_period_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p1air,ncol=1)
 dev.off()
 #multiple smooths
 p1air <- ggplot(Hsig.full.df, aes(y=Hsigmean, x=index(Hsig.full.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary, " - Linear and smooth trend in monthly mean sea level (OEH Buoy:",Buoy.name,  "| lin trend was ",
                round(Hsig.lintrend,3), ' m/year with p=', round(Hsig.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  stat_smooth(method=gam, formula=y~s(x,  k=13), se=T, size=0.5, col="red") +
  #stat_smooth(method=gam, formula=y~s(x,  k=8), se=T, size=0.5, cor="blue") +
  stat_smooth(method=gam, formula=y~s(x,  k=5), se=T, size=0.5, col="green") +
  ylab("Significant Wave Height [m]") + xlab("Time")+
  theme(axis.text=element_text(size=Fontsize)) +
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 #export to png
 png.name <- paste(Output.Directory, '/Trends_Significant_Wave_Height_',Buoy.name,  '_MultiGAM_full_period_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p1air,ncol=1)
 dev.off()
 ##################################### Full Time Period SST
 p1air <- ggplot(SST.full.df, aes(y=SSTmean, x=index(SST.full.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary, " - Linear and smooth trend in SST (OEH Buoy:",Buoy.name,  "| lin trend was ",
                round(SST.lintrend,3), ' deg C/year with p=', round(SST.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  stat_smooth(method=gam, formula=y~s(x,  k=4), se=T, size=0.5, col="red") +
  ylab("Sea Surface Temperature [C°]") + xlab("Time")+
  theme(axis.text=element_text(size=Fontsize)) +
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 #export to png
 png.name <- paste(Output.Directory, '/Trends_SST_',Buoy.name,  '_full_period_',  Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p1air,ncol=1)
 dev.off()
 #multiple smooths
 p1air <- ggplot(SST.full.df, aes(y=SSTmean, x=index(SST.full.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary, " - Linear and smooth trend in monthly mean sea level (OEH Buoy:",Buoy.name,  "| lin trend was ",
                round(SST.lintrend,3), ' deg C/year with p=', round(SST.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  stat_smooth(method=gam, formula=y~s(x,  k=13), se=T, size=0.5, col="red") +
  #stat_smooth(method=gam, formula=y~s(x,  k=8), se=T, size=0.5, cor="blue") +
  stat_smooth(method=gam, formula=y~s(x,  k=5), se=T, size=0.5, col="green") +
  ylab("Sea Surface Temperature [C°]") + xlab("Time")+
  theme(axis.text=element_text(size=Fontsize)) +
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 #export to png
 png.name <- paste(Output.Directory, '/Trends_SST_',Buoy.name, '_MultiGAM_full_period_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p1air,ncol=1)
 dev.off()
--- a/Analysis/Code/ggplot_time_series_with_trends_sea_level.R
+++ b/Analysis/Code/ggplot_time_series_with_trends_sea_level.R
@ -1,113 +0,0 @@
 #R code for creating ggplots of time series with smooth (GAM) and linear term
 ######################
 #Import Libraries and set working directory
 ######################
 library(zoo)
 library(hydroTSM) #you need to install these packages first before you can load them here
 library(lubridate)
 library(mgcv)
 library(ggplot2)
 library(gridExtra)
 library(scales)
 options(scipen=999)
 setwd("C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/")
 ######################
 ######################
 #Set inputs
 ######################
 Case.Study <- "CASESTUDY2"
 Estuary <- "NADGEE"
 Climvar <- 'tasmean'
 ggplotGAM.k <- 7
 ######################
 ######################
 Output.Directory <- paste('./Output/', Case.Study, '/', Estuary,'/Recent_Trends/', sep="")
 if (file.exists(Output.Directory)){
  print('output folder already existed and was not created again')
 } else {
  dir.create(file.path(Output.Directory))
  print('output folder did not exist and was created')
 }
 ######################
 ######################
 #Set input file paths
 ######################
 AirT_CSV_Path <- paste("./Data/Ocean_Data/BOM_monthly_SL_",Estuary,  "_Eden.txt", sep="")
 dat = readLines(AirT_CSV_Path)
 dat = as.data.frame(do.call(rbind, strsplit(dat, split=" {2,10}")), stringsAsFactors=FALSE)
 colnames(dat) <-dat[3,]
 if (Estuary=='NADGEE'){
  dat2 = dat[-c(1:11), ] 
  dat2 = dat2[-(365:381),]
  dat2 = dat2[,-1]
  }
 if (Estuary=='HUNTER'){
  dat2 = dat[-c(1:3), ]  
  dat2 = dat2[-(720:726),]
  dat2 = dat2[-(1:108),]
  dat2 = dat2[,-1]
  }
 dat2$Date <-  as.yearmon(dat2[,1], "%m %Y")
 SeaLev.df <- dat2
 head(SeaLev.df)
 SeaLev.df$MSL <- as.numeric(SeaLev.df$Mean)
 #rid of outliers
 SeaLev.df$MSL <- replace(SeaLev.df$MSL, which(SeaLev.df$MSL > 4), NA)
 SeaLev.df$Julday1 <- seq(1,length(SeaLev.df[,1]),1)
 linear.trend.model_EC_all <- lm(MSL ~ Julday1, SeaLev.df)
 SeaLev.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4] 
 SeaLev.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 12
 ######################
 #Plot
 ######################
 ##################################### Full Time Period
 p1air <- ggplot(SeaLev.df, aes(y=MSL, x=Date)) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary, " - Linear and smooth trend in monthly mean sea level (BOM Gauge) | lin trend was ",
                round(100*SeaLev.lintrend,3), ' cm/year with p=', round(SeaLev.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  stat_smooth(method=gam, formula=y~s(x,  k=4), se=T, size=0.5, col="red") +
  ylab("Monthly Mean Sea Level [m]") + xlab("Time")
 #export to png
 png.name <- paste(Output.Directory, '/Trends_MonthlyMeanSeaLevel_full_period_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p1air,ncol=1)
 dev.off()
 #multiple smooths
 p1air <- ggplot(SeaLev.df, aes(y=MSL, x=Date)) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary, " - Linear and smooth trend in monthly mean sea level (BOM Gauge) | lin trend was ",
                round(100*SeaLev.lintrend,3), ' cm/year with p=', round(SeaLev.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  stat_smooth(method=gam, formula=y~s(x,  k=13), se=T, size=0.5, col="red") +
  #stat_smooth(method=gam, formula=y~s(x,  k=8), se=T, size=0.5, cor="blue") +
  stat_smooth(method=gam, formula=y~s(x,  k=5), se=T, size=0.5, col="green") +
  ylab("Monthly Mean Sea Level [m]") + xlab("Time")
 #export to png
 png.name <- paste(Output.Directory, '/Trends_MonthlyMeanSeaLevel_MultiGAM_full_period_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p1air,ncol=1)
 dev.off()