#added new code for also deriving quantile scaling factors from the NARCLIM data

#Sep 2018 version of the NARCLIM suite of codes for OEH tech guide
#added a few additional plotting options
15 changed files with 3501 additions and 264 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,4 @@
 Data/
 Manuscript/
 NARCLIM/
 Outputs/
--- a/Analysis/Code/NARCLIM_Download/R_Preparing_BASH_script_for_NARCLIM_batch_download.R
+++ b/Analysis/Code/NARCLIM_Download/R_Preparing_BASH_script_for_NARCLIM_batch_download.R
@ -0,0 +1,79 @@
 #code for preparing a text file with BASH code for batch download of NARCLIM data for the HUNTER WQ modeling of 
 #future climate scenarios 
 #NARCLIM Variables
 #evspsblmean   	water_evaporation flux (actual ET) long_name: Surface evaporation standard_name: water_evaporation_flux units: kg m-2 s-1
 #tasmean      mean near surface temperature
 #pracc 		precipitation daily precipitation sum (sum of convective prcacc and stratiform prncacc precip) 
 # 'evspsblmean'   	water_evaporation flux (actual ET) long_name: Surface evaporation standard_name: water_evaporation_flux units: kg m-2 s-1
 # 'potevpmean'    	potential ET water_potential_evaporation_flux kg m-2 s-1
 # 'tasmean'               mean near surface temperature
 # 'tasmax'		maximum near surface temperature
 # 'pracc' 		precipitation daily precipitation sum (sum of convective prcacc and stratiform prncacc precip) 
 # 'pr1Hmaxtstep'     	maximum 1 hour interval rainfall in a one day period
 # 'pr1Hmaxtstep' 		Max. 1-hour time-window moving averaged precipitation rate units: kg m-2 s-1 maximum 1-hour time-window moving averaged values from point values 60.0 second
 # 'wss1Hmaxtstep'		Max. 1-hour time-window moving averaged surface wind speed units: m s-1 maximum 1-hour time-window moving averaged values from point values 60.0 second
 # 'wssmax' 		Surface wind speed standard_name: air_velocity units: m s-1 height: 10 m
 # 'wssmean'		Surface wind speed standard_name: air_velocity units: m s-1
 # 'sstmean'   Sea surface temperatuer daily mean
 Clim_Var <- 'evspsblmean'
 Datatype <- 'T_GCMS'  #T_GCMS for GCM forcing, T_NNRP for reanalysis (only 1950-2009)
 Biasboolean <- 'False' #use bias corrected data?
 Location <- 'catchment' #location of coordinate
 Directory <- 'C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Data/NARCLIM_Site_CSVs/CASESTUDY2/'   
 Filename <- 'CASESTUDY2_NARCLIM_Point_Sites.csv'
 #Load CSV with location names and lat lon coordinates
 Location.df <- data.frame(read.csv(paste(Directory, Filename, sep=""), header=T, fileEncoding="UTF-8-BOM"))
 #create empty vector for storing the command line text and open file
 Vector.for.command.line.txt <- c()
 Vector.for.command.line.txt <- c(Vector.for.command.line.txt, "module load python")
 text1 <- c(paste("Datatype='",Datatype,"'", sep=""),
           paste("Bias_corrected='",Biasboolean,"'", sep=""), paste("ClimVarName='",Clim_Var,"'", sep=""))
 Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text1)
 for (i in 1:(length(Location.df$Name))){
  name <-as.character(Location.df$Name[i])
  #name<-gsub('([[:punct:]])|\\s+','_',name)
  # if(i<10){
  #   name<-paste('Catchment_0', as.character(i), sep="")
  # }else{
  #   name<-paste('Catchment_', as.character(i), sep="")
  # }
  latitude=round(as.numeric(Location.df$Lat[i]),3)
  longitude=round(as.numeric(Location.df$Long[i]),3)
  if (Clim_Var == 'sstmean'){
    latitude=round(as.numeric(Location.df$Ocean_Lat[i]),3)
    longitude=round(as.numeric(Location.df$Ocean_Long[i]),3)
  }
  if (Location == 'catchment'){
    latitude=round(as.numeric(Location.df$Catch_Lat[i]),3)
    longitude=round(as.numeric(Location.df$Catch_Lon[i]),3)
    }
  text <- c(paste("latitude=",latitude,"", sep=""), paste("longitude=",longitude,"", sep=""),
 paste("name='",name,"'", sep=""),
 "python /srv/ccrc/data02/z5025317/Code_execution/\\
 P1_NARCliM_NC_to_CSV_CCRC_SS.py \\
  --lat $latitude --lon $longitude --varName $ClimVarName --domain 'd02' --timestep \\
  'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Bias_corrected")
  Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text)
  if(i==10|i==20|i==31|i==length(Location.df$Name)){
    Vector.for.command.line.txt <- c(Vector.for.command.line.txt, " ")
    text.file.name <- paste(Directory ,'/',Clim_Var, "_", Datatype, "_", Biasboolean,substring(as.character(i), 1,1), "_",Location, ".txt", sep="")
    #open and fill text file
    fileConn <- file(text.file.name)
    writeLines(Vector.for.command.line.txt, fileConn)
    close(fileConn)
    #
    if(i==10|i==20){
      Vector.for.command.line.txt <- c()
      Vector.for.command.line.txt <- c(Vector.for.command.line.txt, "module load python")
      text1 <- c(paste("Datatype='",Datatype,"'", sep=""),
                 paste("Bias_corrected='",Biasboolean,"'", sep=""), paste("ClimVarName='",Clim_Var,"'", sep=""))
      Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text1)
      }
    }
  }
--- a/Analysis/Code/NARCLIM_Download/R_Preparing_BASH_script_for_NARCLIM_batch_download_GAB.R
+++ b/Analysis/Code/NARCLIM_Download/R_Preparing_BASH_script_for_NARCLIM_batch_download_GAB.R
@ -0,0 +1,81 @@
 #code for preparing a text file with BASH code for batch download of NARCLIM data for the HUNTER WQ modeling of 
 #future climate scenarios 
 #NARCLIM Variables
 #evspsblmean   	water_evaporation flux (actual ET) long_name: Surface evaporation standard_name: water_evaporation_flux units: kg m-2 s-1
 #tasmean      mean near surface temperature
 #pracc 		precipitation daily precipitation sum (sum of convective prcacc and stratiform prncacc precip) 
 Clim_Var <- 'sstmean'
 Datatype <- 'T_NNRP'  #T_GCMS for GCM forcing, T_NNRP for reanalysis (only 1950-2009)
 Biasboolean <- 'False' #use bias corrected data? True or False python boolean
 Directory <- 'C:/Users/z5025317/OneDrive - UNSW/Hunter_CC_Modeling/07_Modelling/01_Input/BCGeneration/catchments/'   
 Filename <- 'Catchment_Prev_Hunter_Model_Centroids_VH_WGS84_attribute_Table.csv'
 Directory <- 'C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Data/NARCLIM_Site_CSVs/CASESTUDY_GAB/'
 Filename <- 'CASESTUDY2_GAB_NARCLIM_Point_Sites_V4.csv'
 #Load CSV with location names and lat lon coordinates
 Location.df <- data.frame(read.csv(paste(Directory, Filename, sep=""), header=T))
 for(type in c('min','max')){
  #create empty vector for storing the command line text and open file
  Vector.for.command.line.txt <- c()
  Vector.for.command.line.txt <- c(Vector.for.command.line.txt, "module load python")
  text1 <- c(paste("Datatype='",Datatype,"'", sep=""),
  paste("Bias_corrected='",Biasboolean,"'", sep=""), paste("ClimVarName='",Clim_Var,"'", sep=""))
  Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text1)
  for (i in 1:(length(Location.df[,1]))){
  #for (i in c(1,5,10)){ if only subset of catchments is needed like for Met file. 
    #name<-as.character(Location.df$Name[i])
    #name<-gsub('([[:punct:]])|\\s+','_',name)
    if(type == 'min'){
      #latlongs <- as.character(Location.df[,9])
      name <- as.character(Location.df[,1][i])
      latitude <- as.character(Location.df[,2][i])
      longitude <- as.character(Location.df[,3][i])
      }else if(type == 'max'){
      name <- as.character(Location.df[,6][i])
      latitude<-as.character(Location.df[,7][i])
      longitude<-as.character(Location.df[,8][i])
      }
    # print(latlongs[i])
    # names <- as.character(Location.df[,17])
    # if(i<10){
    #   name<-paste('Plant_0', as.character(i), sep="")
    # }else{
    #   name<-paste('Plant_', as.character(i), sep="")
    # }
    #print(name)
    #print(names[i])
    # latmin <- substr(latlongs[i], 5, 10)
    # lonmin <- substr(latlongs[i], 18, 23)
    # latmax <- substr(latlongs[i], 32, 37)
    # lonmax <- substr(latlongs[i], 45, 50)
    # latitude=latmax
    # longitude=lonmax
 text <- c(paste("latitude=",latitude,"", sep=""), paste("longitude=",longitude,"", sep=""),
 paste("name='",name,"'", sep=""),
 "python /srv/ccrc/data02/z5025317/Code_execution/\\
 P1_NARCliM_NC_to_CSV_CCRC_SS.py \\
 --lat $latitude --lon $longitude --varName $ClimVarName --domain 'd01' --timestep \\
 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Bias_corrected")
 Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text)
    if(i==10|i==20|i==30|i==40|i==45){
      Vector.for.command.line.txt <- c(Vector.for.command.line.txt, " ")
      text.file.name <- paste('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Data/NARCLIM_Site_CSVs/CASESTUDY_GAB/DataDownloadCCRC/',Clim_Var, "_", Datatype, "_", Biasboolean,as.character(i), type, ".txt", sep="")
      #open and fill text file
      fileConn <- file(text.file.name)
      writeLines(Vector.for.command.line.txt, fileConn)
      close(fileConn)
      Vector.for.command.line.txt <- c()
      Vector.for.command.line.txt <- c(Vector.for.command.line.txt, "module load python")
      text1 <- c(paste("Datatype='",Datatype,"'", sep=""),
                 paste("Bias_corrected='",Biasboolean,"'", sep=""), paste("ClimVarName='",Clim_Var,"'", sep=""))
      Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text1)
      }
    }
 }
--- a/Analysis/Code/P1_NARCliM_First_Pass_variab_deviation_plots.py
+++ b/Analysis/Code/P1_NARCliM_First_Pass_variab_deviation_plots.py
@ -0,0 +1,526 @@
    # -*- coding: utf-8 -*-
 #==========================================================#
 #Last Updated - March 2018
 #@author: z5025317 Valentin Heimhuber
 #code for creating future climate variability deviation plots for NARCLIM variables. 
 #Inputs: Uses CSV files that containthe deltas of all 12 NARCLIM models for 1 grid cell at the site of interest, generated with P1_NARCLIM_plots_Windows.py
 #This code is used only for the NARCLIM variables - a separate code is used for ocean variables etc that are not part of the NARCLIM ensemble
 #==========================================================#
 #Load packages
 #==========================================================#
 import numpy as np
 import os
 import pandas as pd
 import glob
 import matplotlib
 import matplotlib.pyplot as plt
 from datetime import datetime
 from datetime import timedelta
 from matplotlib.backends.backend_pdf import PdfPages
 from ggplot import *
 import csv
 matplotlib.style.use('ggplot')
 # Load my own functions
 os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Analysis/Code')
 import climdata_fcts as fct
 import silo as sil
 #==========================================================#
 #==========================================================#
 # Set working direcotry (where postprocessed NARClIM data is located)
 os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/')
 #==========================================================#
 #loop through case study estuaries    
 Estuaries = ['HUNTER', 'RICHMOND', 'NADGEE', 'SHOALHAVEN', 'GEORGES','CATHIE']
 Estuaries = ['NADGEE','HUNTER', 'RICHMOND']
 for Est in Estuaries:
    #==========================================================#
    #set input parameters
    #==========================================================#
    Case_Study_Name = 'CASESTUDY2' 
    Version = "V4"
    OutVersion = "V4"
    Casestudy2_csv_path =  'Data/NARCLIM_Site_CSVs/' + Case_Study_Name + '/' + Case_Study_Name + '_NARCLIM_Point_Sites.csv'
    #Estuary = 'HUNTER' # 'Belongil'
    Estuary = Est # 'Belongil'
    print Estuary
    Base_period_start = '1950-01-01'        #Start of interval for base period of climate variability
    Base_period_DELTA_start = '1990-01-01'   #Start of interval used for calculating mean and SD for base period (Delta base period)
    Base_period_end = '2009-01-01'          #use last day that's not included in period as < is used for subsetting
    Clim_var_type  = 'pracc'            #Name of climate variable in NARCLIM models  '*' will create pdf for all variables in folder
    Stats = 'days_h_20'                  #'maxdaily' #maximum takes the annual max Precipitation instead of the sum 'days_h_30'
    PD_Datasource = 'SILO'                  #Source for present day climate data (historic time series) can be either: 'Station' or 'SILO'
    SILO_Clim_var = ['daily_rain']         #select the SILO clim variable to be used for base period. - see silo.py for detailed descriptions
    force_sea_levels = 'no'        #use fort denison sea levels to run the statistics
    Location = 'Estuary'                 # pick locaiton for extracting the SILO data can be: Estuary, Catchment, or Ocean
    Main_Plot = 'yes'
    presentdaybar = False               #include a bar for present day variability in the plots?
    present_day_plot = 'no'             #save a time series of present day  
    Allin1_delta_plot = 'yes'
    Deltaxmin, Deltaymin = -20,40   #y limits for 0 baseline delta plots
    Median_markers = 'yes'              #plot the median horizontal lines on top of the delta range in the allin1 plot?      
    Figure_headings = 'yes'
    ALPHA_figs = 0                     #Set alpha of figure background (0 makes everything around the plot panel transparent)
    font = {'family' : 'sans-serif',        
        'weight' : 'normal',
        'size'   : 14}                  #size of axis labels
    matplotlib.rc('font', **font)       
    #==========================================================#
    #==========================================================#
    #set directory path for output files
    output_directory = 'Output/' + Case_Study_Name + '_' + OutVersion + '/' + Estuary + '/' + '/Clim_Deviation_Plots/'
    #output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
        print('-------------------------------------------')
        print("output directory folder didn't exist and was generated")
        print('-------------------------------------------')
        print('-------------------')
    #==========================================================#
    #==========================================================#
    #read CSV file  
    #==========================================================#
    Clim_Var_CSVs = glob.glob('./Output/' + Case_Study_Name + '_' + Version + '/'  + Estuary + '/' + Estuary + '_' + Clim_var_type + '_' + Stats + '*')
    clim_var_csv_path = Clim_Var_CSVs[0]
    Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
    Clim_var_type = Filename.split('_', 2)[1]
    print(Clim_var_type)
    Ensemble_Delta_full_df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
    if Clim_var_type == 'rsdsmean':
        Clim_Var_CSVs = glob.glob('./Output/' + Case_Study_Name + '/'  + Estuary + '/' + Estuary + '_rsdsmean_' + Stats + '*')
        clim_var_csv_path = Clim_Var_CSVs[0]
        Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
        Clim_var_type = Filename.split('_', 2)[1]
        print(Clim_var_type)
        Ensemble_Delta_full_df1 = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
        Clim_Var_CSVs = glob.glob('./Output/' + Case_Study_Name + '/'  + Estuary + '/' + Estuary + '_rldsmean_' + Stats + '*')
        clim_var_csv_path = Clim_Var_CSVs[0]
        Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
        Clim_var_type = Filename.split('_', 2)[1]
        print(Clim_var_type)
        Ensemble_Delta_full_df2 = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
        Ensemble_Delta_full_df = Ensemble_Delta_full_df2 + Ensemble_Delta_full_df1
    #Ensemble_Delta_full_df = pd.to_numeric(Ensemble_Delta_full_df)
    #==========================================================# 
    #==========================================================#
    #load present day climate variable time series
    #==========================================================#
    if PD_Datasource == 'Station':
        Present_day_df,minplotDelta, maxplotDelta = fct.import_present_day_climdata_csv(Estuary, Clim_var_type)
        if force_sea_levels == 'yes':
            Estuary_Folder = "C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Data/Ocean_Data/Tide_Levels"
            Present_day_df = pd.read_csv(Estuary_Folder + '/Fort_Denison_Daily_1900-2011.csv' , parse_dates=True, index_col=0)
            Present_day_df = pd.read_csv(Estuary_Folder + '/Fort_Denison_Daily_1900-2011.csv' , parse_dates=True, index_col=0)
            Present_day_df = Present_day_df.interpolate()
            Present_day_df = pd.DataFrame(Present_day_df.loc[(Present_day_df.index >= Base_period_start) & (Present_day_df.index <= Base_period_end)])
            [minplotDelta, maxplotDelta]=[1, 1]
            df = pd.read_csv(Estuary_Folder + '/Fort_Denison_Sub_Daily_1900-2011.csv',  parse_dates=[['Date', 'Time']])
            df.index = pd.to_datetime(df.Date, format = 'dd/mm/Y') + pd.to_timedelta(df.Hour, unit='h')
    if PD_Datasource == 'SILO':
        #read the CSV to extract the lat long and case stuy sites
        with open(Casestudy2_csv_path, mode='r') as infile:
            reader = csv.reader(infile)
            next(reader, None) 
            with open('coors_new.csv', mode='w') as outfile:
                writer = csv.writer(outfile)
                if Location == 'Estuary':
                    mydict = dict((rows[0],[rows[1],rows[2]]) for rows in reader)
                if Location == 'Ocean':
                    mydict = dict((rows[0],[rows[3],rows[4]]) for rows in reader)
                if Location == 'Catchment':
                    mydict = dict((rows[0],[rows[5],rows[6]]) for rows in reader)
        if Clim_var_type == 'tasmean':
            silo_df = sil.pointdata(["max_temp", "min_temp"], 'Okl9EDxgS2uzjLWtVNIBM5YqwvVcCxOmpd3nCzJh', Base_period_start.replace("-", ""), Base_period_end.replace("-", ""), 
                                        None, mydict[Estuary][0], mydict[Estuary][1], False, None)
            #take mean of daily min and max temp
            Present_day_df =     silo_df.iloc[:,[2,4]].mean(axis=1)
        else:
            silo_df = sil.pointdata(SILO_Clim_var, 'Okl9EDxgS2uzjLWtVNIBM5YqwvVcCxOmpd3nCzJh', Base_period_start.replace("-", ""), Base_period_end.replace("-", ""), 
                                        None, mydict[Estuary][0], mydict[Estuary][1], False, None)
            Present_day_df = silo_df.iloc[:,[2]]
        #set the x and y limit deltas -  for plotting only
        if Clim_var_type in ['evspsblmean', 'potevpmean']: #ET time series that we have is not in the same format as the other variables, hence the different treatment
            [minplotDelta, maxplotDelta]=[50,50]
        #for tasmean, observed min and max T need to be converted into mean T
        elif Clim_var_type == 'tasmean':
            [minplotDelta, maxplotDelta]=[0.2,1]
        elif Clim_var_type == 'sstmean':
            [minplotDelta, maxplotDelta]=[40,40]
        elif Clim_var_type == 'tasmax':
            [minplotDelta, maxplotDelta]=[1,2]  
        elif Clim_var_type == 'wssmean' or  Clim_var_type == 'wss1Hmaxtstep':
            [minplotDelta, maxplotDelta]=[1, 1.5]  
        elif Clim_var_type == 'pracc':   
            [minplotDelta, maxplotDelta]=[50,100]
        elif Clim_var_type in ['rsdsmean', 'rldsmean']:   
            [minplotDelta, maxplotDelta]=[100,100]
            #[minplotDelta, maxplotDelta]=[1,1]
    #==========================================================#
     #substract a constant from all values to convert from kelvin to celcius (temp)
    if Clim_var_type == 'sstmean':
        #Present_day_df = Present_day_df.iloc[:,0:(len(Present_day_df)-1)]-273.15
        Present_day_df.index = Present_day_df.index.to_period('D')
    #==========================================================#
    #create seasonal sums etc. 
    #==========================================================#
    if Stats == 'maxdaily':
        Present_day_df_annual = Present_day_df.resample('A').max()
        Present_day_df_annual = Present_day_df_annual.replace(0, np.nan)
        Fdf_Seas_means = Present_day_df.resample('Q-NOV').max() #seasonal means
        Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
    elif Stats == 'maxmonthly':
        Present_day_df_annual = Present_day_df.resample('M').max().resample('A').mean()
        Present_day_df_annual = Present_day_df_annual.replace(0, np.nan)
        Fdf_Seas_means = Present_day_df.resample('M').max().resample('Q-NOV').mean() #seasonal means
        Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
    elif(Stats[:4] =='days'):
        Threshold = int(Stats[-2:])
        agg = ('>'+ str(Threshold) + '_count', lambda x: x.gt(Threshold).sum()),
        Present_day_df_annual = Present_day_df.resample('A').agg(agg)                
        Fdf_Seas_means = Present_day_df.resample('Q-NOV').agg(agg)  #seasonal means
    else:
        if Clim_var_type in ['pracc' ,'evspsblmean' ,'potevpmean' ,
                         'pr1Hmaxtstep' ,'wss1Hmaxtstep',
                         'rsdsmean', 'rldsmean']:
            Present_day_df_annual = Present_day_df.resample('A').sum()
            Present_day_df_annual = Present_day_df_annual.replace(0, np.nan)
            Fdf_Seas_means = Present_day_df.resample('Q-NOV').sum() #seasonal means
            Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
        elif Clim_var_type in ['tasmean' ,'tasmax' ,'sstmean']:    
            Present_day_df_annual = Present_day_df.resample('A').mean()
            Present_day_df_monthly = Present_day_df.resample('M').mean()
            Present_day_df_weekly = Present_day_df.resample('W').mean()
            Fdf_Seas_means = Present_day_df.resample('Q-NOV').mean() #seasonal means
    #==========================================================#
    #==========================================================# 
    #calculate linear trend in present day series
    #==========================================================#
    if Stats == 'maxdaily' or Stats[:4] =='days' or Stats == 'maxmonthly':
        coefficients, residuals, _, _, _ = np.polyfit(range(len(Present_day_df_annual.index)),Present_day_df_annual.values,1,full=True)
        #mse = residuals[0]/(len(selected.index))
        #nrmse = np.sqrt(mse)/(selected.max() - selected.min())
        trend = coefficients[0]
        print('Slope ' + str(coefficients[0]))
        print(Base_period_start[:4]+ '-' + Base_period_end[:4] + ' trend was: ' + str(np.round(trend,4)) + ' units/year')
    elif Stats == 'mean':
        coefficients, residuals, _, _, _ = np.polyfit(range(len(Present_day_df.index)),Present_day_df.values,1,full=True)
        #mse = residuals[0]/(len(selected.index))
        #nrmse = np.sqrt(mse)/(selected.max() - selected.min())
        if force_sea_levels == 'yes':
            trend = coefficients[0]*12
        else:
            trend = coefficients[0]*365
        print('Slope ' + str(coefficients[0]))
        print(Base_period_start[:4]+ '-' + Base_period_end[:4] + ' trend was: ' + str(np.round(trend,5)) + ' units/year')
    #==========================================================#
    #==========================================================#   
    #Loop through annual and seasons and create a deviation plot for each. 
    #==========================================================#
    times = ['annual', 'DJF', 'MAM', 'JJA','SON']
    fig = plt.figure(figsize=(14,8))
    delta_all_df = pd.DataFrame()
    Combined_Delta_df = pd.DataFrame()
    Combined_Med_df = pd.DataFrame()
    i=1
    for temp in times:
        #temp = 'annual'
        #subset the ensemble dataframe for the period used:
        if temp == 'annual':
            Ensemble_Delta_df = Ensemble_Delta_full_df.iloc[:,range(0,2)]
            Present_Day_ref_df = Present_day_df_annual
            Column_names = ['Annual_near', 'Annual_far']
        else:
            Ensemble_Delta_df = Ensemble_Delta_full_df.filter(regex= temp)
            Ensemble_Delta_df.columns = ['near', 'far']
            if temp == 'DJF':
                Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==1]
                Column_names = ['Summer_near', 'Summer_far']
            if temp == 'MAM':
                Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==2]
                Column_names = ['Autumn_near', 'Autumn_far']
            if temp == 'JJA':
                Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==3]
                Column_names = ['Winter_near', 'Winter_far']
            if temp == 'SON':
                Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==4]
                Column_names = ['Spring_near', 'Spring_far']
            Present_Day_ref_df = Mean_df  
        #Subset to present day variability period
        Present_Day_ref_df = pd.DataFrame(Present_Day_ref_df.loc[(Present_Day_ref_df.index >= Base_period_start) & (Present_Day_ref_df.index <= Base_period_end)])
        #Subset to present day variability delta base period (this is the statistical baseline used for present day conditions)
        Present_Day_Delta_ref_df = pd.DataFrame(Present_Day_ref_df.loc[(Present_Day_ref_df.index >= Base_period_DELTA_start) & (Present_Day_ref_df.index <= Base_period_end)])
        Present_Day_Mean = np.percentile(Present_Day_Delta_ref_df, 50)
        Present_Day_SD = np.std(Present_Day_Delta_ref_df)
        #create data frame for floating stacked barplots 
        index=['-2std', '-1std', 'Med', '1std', '2std']
        columns = ['present','near future', 'far future']
        Plot_in_df = pd.DataFrame(index=index, columns =columns)
        #  
        Plot_in_df['present'] = [float(Present_Day_Mean-2*Present_Day_SD),float(Present_Day_Mean-Present_Day_SD), float(Present_Day_Mean),
               float(Present_Day_Mean+Present_Day_SD), float(Present_Day_Mean+2*Present_Day_SD)]
        Plot_in_df['near future'] = [float(Present_Day_Mean + Ensemble_Delta_df.near[-3:][0]),np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.near[-3:][1]), 
               np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.near[-3:][2])]
        Plot_in_df['far future'] = [float(Present_Day_Mean + Ensemble_Delta_df.far[-3:][0]),np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.far[-3:][1]), 
               np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.far[-3:][2])]
        #Create a second data frame that has the values in a way that they can be stacked up in bars with the correct absolute values
        Plot_in_df2 = pd.DataFrame(index=index, columns =columns )
        #
        if presentdaybar == False:
            index=['-2std', '-1std', 'Med', '1std', '2std']
            columns = ['near future', 'far future']
            Plot_in_df2 = pd.DataFrame(index=index, columns =columns )
            #Plot_in_df2['present'] = [float(0),float(0), float(0),
            #              float(0), float(0)]  
        else:
            Plot_in_df2['present'] = [float(Present_Day_Mean-2*Present_Day_SD),float(Present_Day_SD), float(Present_Day_SD),
                    float(Present_Day_SD), float(Present_Day_SD)]
        Plot_in_df2['near future'] = [float(Present_Day_Mean + Ensemble_Delta_df.near[-3:][0]),np.NaN, float(Ensemble_Delta_df.near[-3:][1]-Ensemble_Delta_df.near[-3:][0]), 
              np.NaN, float(Ensemble_Delta_df.near[-3:][2]-Ensemble_Delta_df.near[-3:][1])]
        Plot_in_df2['far future'] = [float(Present_Day_Mean + Ensemble_Delta_df.far[-3:][0]),np.NaN, float(Ensemble_Delta_df.far[-3:][1]-Ensemble_Delta_df.far[-3:][0]), 
               np.NaN, float(Ensemble_Delta_df.far[-3:][2]-Ensemble_Delta_df.far[-3:][1])]
        #transpose the data frame
        Plot_in_df_tp = Plot_in_df2.transpose()
        #do the individual plots
        if temp == 'annual':
            xmin = int(min(Plot_in_df.min(axis=1))-minplotDelta)
            xmax = int(max(Plot_in_df.max(axis=1))+maxplotDelta)
        else:
            xmin = int(min(Plot_in_df.min(axis=1))-minplotDelta)
            xmax = int(max(Plot_in_df.max(axis=1))+maxplotDelta)
        #define colour scheme
        #likert_colors = ['none', 'firebrick','firebrick','lightcoral','lightcoral']
        likert_colors = ['none', 'darkblue', 'darkblue','cornflowerblue','cornflowerblue']
        uni_colors = ['none', 'cornflowerblue', 'cornflowerblue','cornflowerblue','cornflowerblue']
        #plot the stacked barplot
        if temp == 'annual':
            ax=plt.subplot(2,4,3) 
        else:
            ax=plt.subplot(2,4,i)  
        Plot_in_df_tp.plot.bar(stacked=True, color=uni_colors, edgecolor='none', legend=False, ax=ax, width=0.5)  
        df = pd.DataFrame(Plot_in_df.iloc[2,[1,2]])
        index2=['Med']
        columns2 = ['near future', 'far future']
        Plot_in_df3 = pd.DataFrame(index=index2, columns =columns2)
        Plot_in_df3['near future'] = df.iloc[1,0]
        Plot_in_df3['far future'] = df.iloc[0,0]
        Plot_in_df3 = Plot_in_df3.transpose()
 #       plt.plot(Plot_in_df3['Med'], linestyle="", markersize=45, 
 #             marker="_", color='red', label="Median")
        #Plot_in_df3['Med']
        #Plot_in_df3['Med'].values
        plt.plot(np.arange(2),Plot_in_df3['Med'].values[::-1], linestyle="", markersize=45, 
             marker="_", color='red', label="Median")
        z = plt.axhline(float(Present_Day_Mean-2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
        z.set_zorder(-1)
        z = plt.axhline(float(Present_Day_Mean+2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
        z.set_zorder(-1)
        z = plt.axhline(float(Present_Day_Mean-Present_Day_SD), linestyle='--', color='black', alpha=.5)
        z.set_zorder(-1)
        z = plt.axhline(float(Present_Day_Mean+Present_Day_SD), linestyle='--', color='black', alpha=.5)
        z.set_zorder(-1)
        z = plt.axhline(float(Present_Day_Mean), linestyle='--', color='red', alpha=.5)
        z.set_zorder(-1)
        plt.ylim(xmin, xmax)
        #plt.ylim(10, 140)
        if Figure_headings == 'yes':
            plt.title(Clim_var_type + ' ' + temp)
        ax.grid(False)
        for tick in ax.get_xticklabels():
            tick.set_rotation(0)
        fig.tight_layout()
        fig.patch.set_alpha(ALPHA_figs)
        #plt.set_facecolor('g')
        if temp == 'annual':
            ax=plt.subplot(2,2,1) 
            Present_Day_ref_df.plot(legend=False, ax=ax)
            z = plt.axhline(float(Present_Day_Mean-2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
            z.set_zorder(-1)
            z = plt.axhline(float(Present_Day_Mean+2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
            z.set_zorder(-1)
            z = plt.axhline(float(Present_Day_Mean-Present_Day_SD), linestyle='--', color='black', alpha=.5)
            z.set_zorder(-1)
            z = plt.axhline(float(Present_Day_Mean+Present_Day_SD), linestyle='--', color='black', alpha=.5)
            z.set_zorder(-1)
            z = plt.axhline(float(Present_Day_Mean), linestyle='--', color='red', alpha=.5)
            z.set_zorder(-1)
            #fig.patch.set_facecolor('deepskyblue')
            fig.tight_layout()
            fig.patch.set_alpha(ALPHA_figs)
            if Figure_headings == 'yes':
                #plt.title(Clim_var_type)
                #plt.title(Clim_var_type + ' ' + Stats + ' ' + Base_period_start[:4] + '-' + Base_period_end[:4] + ' trend:' + str(np.round(trend,4)) + '/year')
                plt.title(Base_period_start[:4] + '-' + Base_period_end[:4] + ' trend:' + str(np.round(trend,4)) + '/year')
            plt.ylim(xmin, xmax)
            #plt.ylim(0, 40)
            ax.grid(False)
            #if temp == 'MAM':
            i=i+4
        else:
            i=i+1
        #create a data frame that contains all of the delta bars
        Plot_in_df_tp.index = Column_names
        Plot_in_df_tp['-2std'] = Plot_in_df_tp['-2std'] - Present_Day_Mean
        Combined_Delta_df = pd.concat([Combined_Delta_df, Plot_in_df_tp], axis=0)
        if Allin1_delta_plot == 'yes':
            ax=plt.subplot(2,4,4) 
            temp = Combined_Delta_df[0:2]
            Plot_in_df_tp = temp
            uni_colors = ['none', 'cornflowerblue', 'cornflowerblue','cornflowerblue','cornflowerblue']
            Plot_in_df_tp['Median'] = pd.DataFrame(Plot_in_df_tp['-2std'] + Plot_in_df_tp['Med'])
            if int(Plot_in_df_tp.stack().min(axis=None,skipna=True)) < 0:
                Bottom = int(Plot_in_df_tp.stack().min(axis=None,skipna=True)) -1
                Plot_in_df_tp['-2std'] = Plot_in_df_tp['-2std'] - Bottom
                Plot_in_df_tp.plot.bar(stacked=True, color=uni_colors, edgecolor='none', legend=False, width=0.5,ax=ax, bottom = Bottom)  
            else:
                Plot_in_df_tp.plot.bar(stacked=True, color=uni_colors, edgecolor='none', legend=False, width=0.5,ax=ax)
            if Median_markers == 'yes':
                plt.plot(np.arange(2),Plot_in_df_tp['Median'].values , linestyle="", markersize=45, 
                     marker="_", color='red', label="Median") 
            z = plt.axhline(float(0), linestyle='--', color='red', alpha=.5)
            z.set_zorder(-1)
            plt.ylim(Deltaxmin, Deltaymin)
            #plt.ylim(xmin, xmax)
            if Figure_headings == 'yes':
                plt.title(Clim_var_type + ' All Deltas ' + Stats)
            #ax.grid(False)
            ax.xaxis.grid(False)
            for tick in ax.get_xticklabels():
                tick.set_rotation(10)
            fig.tight_layout()
            fig.patch.set_alpha(ALPHA_figs)
    #plt.show()  
    if Main_Plot == 'yes': 
        if presentdaybar == False:
            out_file_name = Estuary  + '_' + Clim_var_type + '_' + Stats + '_'  + PD_Datasource +  '_'  + SILO_Clim_var[0] +  '_' + '_'  + Base_period_start +  '_'  + Base_period_end +  '_' +  Version +  '_'  + ' trend_' + str(np.round(trend[0],4)) + '.png'
        else:
            out_file_name = Estuary  + '_' + Clim_var_type + '_' + Stats + '_'  + PD_Datasource +  '_'  + SILO_Clim_var[0] +  '_' +  Version +  '_'  + '2.png'
        out_path = output_directory + '/' + out_file_name
        fig.savefig(out_path)
    plt.close(fig) 
    if Allin1_delta_plot == 'yes': 
        fig = plt.figure(figsize=(14,8))
        ax = fig.add_subplot(2, 3, 1)
        temp = Combined_Delta_df
        Plot_in_df_tp = temp
        uni_colors = ['none', 'cornflowerblue', 'cornflowerblue','cornflowerblue','cornflowerblue']
        Plot_in_df_tp['Median'] = pd.DataFrame(Plot_in_df_tp['-2std'] + Plot_in_df_tp['Med'])
        if int(Plot_in_df_tp.stack().min(axis=None,skipna=True)) < 0:
            Bottom = int(Plot_in_df_tp.stack().min(axis=None,skipna=True)) -1
            Plot_in_df_tp['-2std'] = Plot_in_df_tp['-2std'] - Bottom
            Plot_in_df_tp.plot.bar(stacked=True, color=uni_colors, edgecolor='none', legend=False, width=0.5,ax=ax, bottom = Bottom)  
        else:
            Plot_in_df_tp.plot.bar(stacked=True, color=uni_colors, edgecolor='none', legend=False, width=0.5,ax=ax)
        if Median_markers == 'yes':
 #            plt.bar(Plot_in_df_tp.index, Plot_in_df_tp['Median'], linestyle="", markersize=13, 
 #                     marker="_", color='red', label="Median") 
            plt.plot(np.arange(10), Plot_in_df_tp['Median'], linestyle="", markersize=13, 
                     marker="_", color='red', label="Median")
        z = plt.axhline(float(0), linestyle='--', color='red', alpha=.5)
        z.set_zorder(-1)
        plt.ylim(Deltaxmin, Deltaymin)
        if Figure_headings == 'yes':
            plt.title(Clim_var_type + ' All Delstas ' + Stats)
        #ax.grid(False)
        ax.xaxis.grid(False)
        for tick in ax.get_xticklabels():
            tick.set_rotation(50)
        plt.setp(ax.xaxis.get_majorticklabels(), ha='right')
        #ax.set_xticklabels(xticklabels, rotation = 45, ha="right")
        fig.tight_layout()
        fig.patch.set_alpha(ALPHA_figs)
        plt.show()
        #export plot to png
        out_file_name = Estuary  + '_' + Clim_var_type + '_' + Stats + '_'  + Base_period_start +  '_'  + Base_period_end +  '_'  +  Version + '_All_Deltas_In1.png'
        out_path = output_directory + '/' + out_file_name
        fig.savefig(out_path)
        plt.close(fig) 
 ##make  barplot with alternating colours       
 #men_means, men_std = (20, 35, 30, 35, 27), (2, 3, 4, 1, 2)
 #women_means, women_std = (25, 32, 34, 20, 25), (3, 5, 2, 3, 3)
 #
 #ind = np.arange(len(men_means))  # the x locations for the groups
 #width = 0.35                     # the width of the bars
 #
 #fig, ax = plt.subplots()
 #rects1 = ax.bar(ind - width/2, men_means, width, yerr=men_std,
 #                color='SkyBlue', label='Men')
 #rects2 = ax.bar(ind + width/2, women_means, width, yerr=women_std,
 #                color='IndianRed', label='Women')
 #
 ## Add some text for labels, title and custom x-axis tick labels, etc.
 #ax.set_ylabel('Scores')
 #ax.set_title('Scores by group and gender')
 #ax.set_xticks(ind)
 #ax.set_xticklabels(('G1', 'G2', 'G3', 'G4', 'G5'))
 #ax.legend()
    if present_day_plot == 'yes':  
        #print present day climate data
            fig = plt.figure(figsize=(14,8))
            for i1 in range(2):
                if i1 ==0:
                    ax = fig.add_subplot(2, 3, 1)
                else:
                    ax = fig.add_subplot(2, 1, 2)
                xmin = int(min(Plot_in_df.min(axis=1))-minplotDelta)
                xmax = int(max(Plot_in_df.max(axis=1))+maxplotDelta)
                Present_Day_ref_df.plot(legend=False, ax=ax)
                z = plt.axhline(float(Present_Day_Mean-2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
                z.set_zorder(-1)
                z = plt.axhline(float(Present_Day_Mean+2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
                z.set_zorder(-1)
                z = plt.axhline(float(Present_Day_Mean-Present_Day_SD), linestyle='--', color='black', alpha=.5)
                z.set_zorder(-1)
                z = plt.axhline(float(Present_Day_Mean+Present_Day_SD), linestyle='--', color='black', alpha=.5)
                z.set_zorder(-1)
                z = plt.axhline(float(Present_Day_Mean), linestyle='--', color='red', alpha=.5)
                z.set_zorder(-1)
                #fig.patch.set_facecolor('deepskyblue')
                fig.patch.set_alpha(0)
                #plt.ylim(0, 400)
                plt.ylim(xmin, xmax)
                #export plot to png
            out_file_name = Estuary  + '_' + Clim_var_type + '_' + Stats + '_'  + Base_period_start +  '_'  + Base_period_end +  '_'  +  Version + 'Present_Day_Period.png'
            out_path = output_directory + '/' + out_file_name
            fig.savefig(out_path)
            plt.close(fig) 
            # use transparent=True if you want the whole figure with a transparent background
--- a/Analysis/Code/P1_NARCliM_First_Pass_variab_deviation_plots_nonNARCLIM.py
+++ b/Analysis/Code/P1_NARCliM_First_Pass_variab_deviation_plots_nonNARCLIM.py
@ -0,0 +1,230 @@
    # -*- coding: utf-8 -*-
 """
 Created on Thu Jun 28 12:15:17 2018
@author: z5025317
 """
 # -*- coding: utf-8 -*-
 #==========================================================#
 #Last Updated - March 2018
 #@author: z5025317 Valentin Heimhuber
 #code for creating future climate variability deviation plots for NARCLIM variables. 
 #This code is derived from P1_NARCliM_First_Pass_variab_deviation_plots.py to deal with variables for which we don't have NARCLIM deltas. 
 #These variables are Sea Level, Acidity and...
 # it uses CSIRO CC in Australia CMIP-5 Deltas instead 
 #==========================================================#
 #Load packages
 #==========================================================#
 import numpy as np
 import os
 import pandas as pd
 import glob
 import matplotlib
 import matplotlib.pyplot as plt
 from datetime import datetime
 from datetime import timedelta
 from matplotlib.backends.backend_pdf import PdfPages
 from ggplot import *
 import csv
 matplotlib.style.use('ggplot')
 # Load my own functions
 os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Analysis/Code')
 import climdata_fcts as fct
 import silo as sil
 import re
 #==========================================================#
 #==========================================================#
 # Set working direcotry (where postprocessed NARClIM data is located)
 os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/')
 #==========================================================#
 #==========================================================#
 #set input parameters
 #==========================================================#
 Case_Study_Name = 'CASESTUDY2' 
 Casestudy2_csv_path =  "Data/NARCLIM_Site_CSVs/CASESTUDY2/CASESTDUY2_NARCLIM_Point_Sites.csv"
 Estuary = 'HUNTER' # 'Belongil'
 Base_period_start = '1970-01-01'        #Start of interval for base period of climate variability
 Base_period_DELTA_start = '1990-01-01'   #Start of interval used for calculating mean and SD for base period (Delta base period)
 Base_period_end = '2009-01-01'          #use last day that's not included in period as < is used for subsetting
 Clim_var_type  = 'acidity'            #Name of climate variable in NARCLIM models  '*' will create pdf for all variables in folder
 [minplotDelta, maxplotDelta]=[0.5,1]
 #Source for present day climate data (historic time series) can be either: 'Station' or 'SILO'
 Location = 'Estuary'                 # pick locaiton for extracting the SILO data can be: Estuary, Catchment, or Ocean
 presentdaybar = False               #include a bar for present day variability in the plots?
 present_day_plot = 'yes'             #save a time series of present day        
 Version = "V3"
 Stats = 'mean' #'maxdaily' #maximum takes the annual max Precipitation instead of the sum
 ALPHA_figs = 1 #Set alpha of figure background (0 makes everything around the plot panel transparent)
 #==========================================================#
 #==========================================================#
 #set directory path for output files
 output_directory = 'Output/' + Case_Study_Name +'_'+ Version + '/' + Estuary + '/' + '/Clim_Deviation_Plots/'
 #output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
 if not os.path.exists(output_directory):
    os.makedirs(output_directory)
    print('-------------------------------------------')
    print("output directory folder didn't exist and was generated")
    print('-------------------------------------------')
    print('-------------------')
 #==========================================================#
 #==========================================================#
 #read ensemble change CSV file  
 #==========================================================#
 clim_var_csv_path = './Data/Non_NARCLIM/AUZ_CC_CSIRO_Deltas/Brisbane_Ocean_Variables.csv'
 df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
 df =  df.filter(regex= 'RCP 8.5|Variable')
 df = df[df.Variable=='Ocean pH']
 Ensemble_Delta_df = pd.DataFrame(index=['Median', '10th',  '90th'], columns =['near', 'far'])
 Ensemble_Delta_df['near'] = re.split('\(|\)|\ to |', df.iloc[0,1])[0:3]
 Ensemble_Delta_df['far'] = re.split('\(|\)|\ to |', df.iloc[0,2])[0:3]    
 Ensemble_Delta_df = Ensemble_Delta_df.astype(float).fillna(0.0)
 #==========================================================# 
 #==========================================================#
 #load present day climate variable time series
 #==========================================================#
 #Present_day_df,minplotDelta, maxplotDelta = fct.import_present_day_climdata_csv(Estuary, Clim_var_type)
 Present_day_Var_CSV = glob.glob('./Data/Non_NARCLIM/Hunter_Lower_Acidity2.csv')
 Present_day_df = pd.read_csv(Present_day_Var_CSV[0],parse_dates=True, index_col=0)
 Present_day_df.columns = ['PH']
 df = Present_day_df
 df.index = df.index.to_period('D')
 Present_day_df = df
 Present_day_df = df.groupby(df.index).mean().reset_index()
 Present_day_df.columns = ['Date','PH']
 Present_day_df.index = Present_day_df['Date']
 Present_day_df= Present_day_df['PH']
 #Present_day_df.index = pd.to_datetime(Present_day_df.index,format='%Y%m%d')
 if Stats == 'mindaily':
    Present_day_df_annual = Present_day_df.resample('A').min()
    Present_day_df_annual = Present_day_df_annual.replace(0, np.nan)
 if Stats == 'minmonthly':
    Present_day_df_annual = Present_day_df.resample('M').resample('A').min()
    Present_day_df_annual = Present_day_df_annual.replace(0, np.nan)
 if Stats == 'mean':
    Present_day_df_annual = Present_day_df.resample('A').mean()
    Present_day_df_annual = Present_day_df_annual.replace(0, np.nan)
 Present_Day_ref_df = Present_day_df_annual
 #Subset to present day variability period
 Present_Day_ref_df = pd.DataFrame(Present_Day_ref_df.loc[(Present_Day_ref_df.index >= Base_period_start) & (Present_Day_ref_df.index <= Base_period_end)])
 #Subset to present day variability delta base period (this is the statistical baseline used for present day conditions)
 Present_Day_Delta_ref_df = pd.DataFrame(Present_Day_ref_df.loc[(Present_Day_ref_df.index >= Base_period_DELTA_start) & (Present_Day_ref_df.index <= Base_period_end)])
 Present_Day_Mean = np.percentile(Present_Day_Delta_ref_df, 50)
 Present_Day_SD = np.std(Present_Day_Delta_ref_df)
 #create data frame for floating stacked barplots 
 index=['-2std', '-1std', 'Med', '1std', '2std']
 columns = ['present','near future', 'far future']
 Plot_in_df = pd.DataFrame(index=index, columns =columns)
 #  
 Plot_in_df['present'] = [float(Present_Day_Mean-2*Present_Day_SD),float(Present_Day_Mean-Present_Day_SD), float(Present_Day_Mean),
       float(Present_Day_Mean+Present_Day_SD), float(Present_Day_Mean+2*Present_Day_SD)]
 Plot_in_df['near future'] = [float(Present_Day_Mean + Ensemble_Delta_df.near['10th']),np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.near['Median']), 
       np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.near['90th'])]
 Plot_in_df['far future'] = [float(Present_Day_Mean + Ensemble_Delta_df.far['10th']),np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.far['Median']), 
       np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.far['90th'])]
 #Create a second data frame that has the values in a way that they can be stacked up in bars with the correct absolute values
 Plot_in_df2 = pd.DataFrame(index=index, columns =columns )
 #
 index=['-2std', '-1std', 'Med', '1std', '2std']
 columns = ['near future', 'far future']
 Plot_in_df2 = pd.DataFrame(index=index, columns =columns )
 Plot_in_df2['near future'] = [float(Present_Day_Mean + Ensemble_Delta_df.near['90th']),np.NaN, float(Ensemble_Delta_df.near['Median']-Ensemble_Delta_df.near['90th']), 
      np.NaN, float(Ensemble_Delta_df.near['10th']-Ensemble_Delta_df.near['Median'])]
 Plot_in_df2['far future'] = [float(Present_Day_Mean + Ensemble_Delta_df.far['90th']),np.NaN, float(Ensemble_Delta_df.far['Median']-Ensemble_Delta_df.far['90th']), 
       np.NaN, float(Ensemble_Delta_df.far['10th']-Ensemble_Delta_df.far['Median'])]
 #transpose the data frame
 Plot_in_df_tp = Plot_in_df2.transpose()
 #do the individual plots
 xmin = int(min(Plot_in_df.min(axis=1))-minplotDelta)
 xmax = int(max(Plot_in_df.max(axis=1))+maxplotDelta)
 #define colour scheme
 #likert_colors = ['none', 'firebrick','firebrick','lightcoral','lightcoral']
 likert_colors = ['none', 'darkblue', 'darkblue','cornflowerblue','cornflowerblue']
 uni_colors = ['none', 'cornflowerblue', 'cornflowerblue','cornflowerblue','cornflowerblue']
 #plot the stacked barplot
 fig = plt.figure(figsize=(14,8))
 ax=plt.subplot(2,4,3) 
 Plot_in_df_tp.plot.bar(stacked=True, color=uni_colors, edgecolor='none', legend=False, ax=ax, width=0.5)  
 df = pd.DataFrame(Plot_in_df.iloc[2,[1,2]])
 index2=['Med']
 columns2 = ['near future', 'far future']
 Plot_in_df3 = pd.DataFrame(index=index2, columns =columns2)
 Plot_in_df3['near future'] = df.iloc[1,0]
 Plot_in_df3['far future'] = df.iloc[0,0]
 Plot_in_df3 = Plot_in_df3.transpose()
 plt.plot(Plot_in_df3['Med'], linestyle="", markersize=52, 
     marker="_", color='red', label="Median")
 z = plt.axhline(float(Present_Day_Mean-2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
 z.set_zorder(-1)
 z = plt.axhline(float(Present_Day_Mean+2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
 z.set_zorder(-1)
 z = plt.axhline(float(Present_Day_Mean-Present_Day_SD), linestyle='--', color='black', alpha=.5)
 z.set_zorder(-1)
 z = plt.axhline(float(Present_Day_Mean+Present_Day_SD), linestyle='--', color='black', alpha=.5)
 z.set_zorder(-1)
 z = plt.axhline(float(Present_Day_Mean), linestyle='--', color='red', alpha=.5)
 z.set_zorder(-1)
 plt.ylim(xmin, xmax)
 plt.title(Clim_var_type)
 ax.grid(False)
 for tick in ax.get_xticklabels():
    tick.set_rotation(0)
 fig.tight_layout()
 fig.patch.set_alpha(ALPHA_figs)
 #plot the present day time series      
 ax=plt.subplot(2,2,1) 
 Present_Day_ref_df.plot(legend=False, ax=ax)
 z = plt.axhline(float(Present_Day_Mean-2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
 z.set_zorder(-1)
 z = plt.axhline(float(Present_Day_Mean+2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
 z.set_zorder(-1)
 z = plt.axhline(float(Present_Day_Mean-Present_Day_SD), linestyle='--', color='black', alpha=.5)
 z.set_zorder(-1)
 z = plt.axhline(float(Present_Day_Mean+Present_Day_SD), linestyle='--', color='black', alpha=.5)
 z.set_zorder(-1)
 z = plt.axhline(float(Present_Day_Mean), linestyle='--', color='red', alpha=.5)
 z.set_zorder(-1)
 #fig.patch.set_facecolor('deepskyblue')
 fig.tight_layout()
 fig.patch.set_alpha(ALPHA_figs)
 plt.title(Clim_var_type + ' ' + Stats + ' annual present day')
 plt.ylim(xmin, xmax)
 ax.grid(False)
 if presentdaybar == False:
    out_file_name = Estuary  + '_' + Clim_var_type + '_' + Stats + '_'  + Base_period_start +  '_'  + Base_period_end +  Version +  '_'  + '_NPDB2.png'
 out_path = output_directory + '/' + out_file_name
 fig.savefig(out_path)
--- a/Analysis/Code/P1_NARCliM_NC_to_CSV_CCRC_SS.py
+++ b/Analysis/Code/P1_NARCliM_NC_to_CSV_CCRC_SS.py
@ -1,6 +1,7 @@
-# -*- coding: utf-8 -*-
+`# -*- coding: utf-8 -*-
 from netCDF4 import *
 import numpy as np
 from numpy import *
 import os
 import pandas as pd
 import glob
@ -22,6 +23,9 @@ if __name__ == "__main__":
    parser.add_argument("--varName", help="operation")
    parser.add_argument("--timestep", help="operation")
    parser.add_argument("--domain", help="operation")
    parser.add_argument("--LocationName", help="operation")
    parser.add_argument("--Datatype", help="operation")
    parser.add_argument("--BiasBool", help="operation")
    args = parser.parse_args()
    print(args.lat)
    print(args.lon)
@ -31,91 +35,201 @@ if __name__ == "__main__":
    Clim_var_type = args.varName
    NC_Domain = args.domain
    Timestep = args.timestep
-    print("Extracting all NARCLIM time series for variable: ", Clim_var_type, " for lat lon: ", mylat, mylon, "domain", NC_Domain, "timestep ", Timestep)
+    Location = args.LocationName
-#set directory path for output files
+    Data_Type = args.Datatype
-output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out'
+    Bias_Correction_BOOL = args.BiasBool
-#output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted'
+    print("Extracting all NARCLIM time series for variable: ", Clim_var_type, " for lat lon: ", mylat, mylon, Location, "domain", NC_Domain, " timestep ", Timestep, " Datatype ", Data_Type, " biascorrected? ", Bias_Correction_BOOL)
-if not os.path.exists(output_directory):
+
-    os.makedirs(output_directory)
+lat_equal_len_string="%.3f" % abs(mylat)
-    print("output directory folder didn't exist and was generated here:")
+lon_equal_len_string= "%.3f" % mylon
-    print(output_directory)
+
-#
+if Bias_Correction_BOOL == 'False':
-time.sleep(10) 
+    #set directory path for output files
-#manual input via script
+    output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out/'+ Location + '_' +  lat_equal_len_string + '_' + lon_equal_len_string + '/'
-#mylat= -33.9608578
+    #output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted'
-#mylon= 151.1339882
+    print '---------------------------------------------------------'
-#Clim_var_type = 'pr1Hmaxtstep'
+    if not os.path.exists(output_directory):
-#NC_Domain = 'd02'
+        os.makedirs(output_directory)
-#
+        print("output directory folder didn't exist and was generated here:")
-#set up the loop variables for interrogating the entire NARCLIM raw data
+        print(output_directory)
-NC_Periods = ('1950-2009','2020-2039','2060-2079')
+    print '---------------------------------------------------------'
-#
+    #
-#Define empty pandas data frames 
+    #time.sleep(10) 
-Full_df = pd.DataFrame()
+    #set up the loop variables for interrogating the entire NARCLIM raw data
-GCM_df = pd.DataFrame()
+    NC_Periods = ('1990-2009','2020-2039','2060-2079')
-R13_df = pd.DataFrame()
+    if Data_Type == 'T_NNRP':
-MultiNC_df = pd.DataFrame()
+        NC_Periods = ('1950-2009','Stop')
    #
    #Define empty pandas data frames 
    Full_df = pd.DataFrame()
    GCM_df = pd.DataFrame()
    R13_df = pd.DataFrame()
    MultiNC_df = pd.DataFrame()
    #
    #Loop through models and construct CSV per site
    for NC_Period in NC_Periods:
        if NC_Period != "Stop":
            Period_short = NC_Period[:4]
            GCMs = os.listdir('./'+ NC_Period)
            for GCM in GCMs:
                print GCM
                Warf_runs = os.listdir('./' + NC_Period + '/' + GCM + '/')
                for Warf_run in Warf_runs:
                    Current_input_dir = './' + NC_Period + '/' + GCM + '/' + Warf_run + '/' + NC_Domain + '/'
                    print Current_input_dir
                    Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type + '.nc'
                    Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn)
                    #print Climvar_NCs[1]
                    #Climvar_NCs = Climvar_NCs[0:2]
                    #print(Climvar_NCs)
                    for netcdf in Climvar_NCs:
                        f=Dataset(netcdf)
                        # This section print on the screen information contained in the headings of the file
                        #print '---------------------------------------------------------'
                        #print f.ncattrs()
                        #print f.title
                        #print f.variables
                        #print
                        #for varname in f.variables:
                        #	print varname,' -> ',np.shape(f.variables[varname])
                        #print '---------------------------------------------------------'
                        # Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file
                        dist_x=np.abs(f.variables['lon'][:,:]-float(mylon))
                        dist_y=np.abs(f.variables['lat'][:,:]-float(mylat))
                        dist=dist_x + dist_y
                        latindex=np.where(dist_y==np.min(dist_y))
                        lonindex=np.where(dist_x==np.min(dist_x))
                        index=np.where(dist==np.min(dist))
                        print '---------------------------------------------------------'
                        print  netcdf
                        print 'Information on the nearest point'
                        print 'Your desired lat,lon = ',mylat,mylon
                        print 'The nearest lat,lon = ', f.variables['lat'][latindex[0],latindex[1]], f.variables['lon'][lonindex[0],lonindex[1]]
                        #print 'The index of the nearest lat,lon (x,y) = ',index[0], index[1]
                        #Here we constract a pandas data frame, having the "time"/day as an index and a numer of variables (i.e. Clim_var_type, pracc, as columns)
                        d={}
                        #d["time"] = f.variables['time'][:]
                        d[ GCM +'_'+ Warf_run +'_'+ Period_short] = f.variables[Clim_var_type][:, int(index[0]), int(index[1])]
                        #if GCM == 'NNRP' and Warf_run == 'R1':
                        #    d['Period']= NC_Period
                        timestamp = f.variables['time'][:]
                        timestamp_dates = pd.to_datetime(timestamp, unit='h', origin=pd.Timestamp('1949-12-01'))
                        df1=pd.DataFrame(d, index=timestamp_dates)
                        f.close()
                        print 'closing  '+ os.path.basename(os.path.normpath(netcdf)) + '  moving to next netcdf file'
                        #print f
                        print '---------------------------------------------------------'
                        #append in time direction each new time series to the data frame
                        MultiNC_df = MultiNC_df.append(df1)
                    #append in columns direction individual GCM-RCM-123 run time series (along x axis) 
                    MultiNC_df = MultiNC_df.sort_index(axis=0, ascending=True)
                    R13_df = pd.concat([R13_df, MultiNC_df], axis=1)
                    MultiNC_df =pd.DataFrame()
                #append blocks of R1 R2 and R3 in x axis direction 
                R13_df = R13_df.sort_index(axis=0, ascending=True)
                GCM_df = pd.concat([GCM_df, R13_df], axis=1)
                R13_df = pd.DataFrame()
            #append time periods in x axis direction (change axis=1 to =0 if periods for same model should be added to same model R123 column)
            GCM_df = GCM_df.sort_index(axis=0, ascending=True)
            Full_df = pd.concat([Full_df, GCM_df], axis=1)
            GCM_df = pd.DataFrame()
    Full_df = Full_df.sort_index(axis=0, ascending=True)
    #adding a column with the NARCLIM decade
    Full_df.loc[(Full_df.index > '1950-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009'
    Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009'
    Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2040-01-01'), 'period']= '2020-2039'
    Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2080-01-01'), 'period']= '2060-2079'
 #
-#Loop through models and construct CSV per site
+if Bias_Correction_BOOL == 'True':
-for NC_Period in NC_Periods:
+    os.chdir('/srv/ccrc/data30/z3393020/NARCliM/Bias_corrected/')
-    Period_short = NC_Period[:4]
+    #set directory path for output files
-    GCMs = os.listdir('./'+ NC_Period)
+    output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out/'+ Location + '_' +  lat_equal_len_string + '_' + lon_equal_len_string + '/Bias_corrected/'
    #output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted'
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
        print("output directory folder didn't exist and was generated here:")
        print(output_directory)
    #time.sleep(10) 
    #set up the loop variables for interrogating the entire NARCLIM raw data
    GCMs = ('CCCMA3.1','CSIRO-MK3.0','ECHAM5', 'MIROC3.2', 'NNRP')
        #set up the loop variables for interrogating the entire NARCLIM raw data
    #
    #Define empty pandas data frames 
    Full_df = pd.DataFrame()
    GCM_df = pd.DataFrame()
    R13_df = pd.DataFrame()
    MultiNC_df = pd.DataFrame()
    #
    #Loop through models and construct CSV per site
    for GCM in GCMs:
-        Warf_runs = os.listdir('./' + NC_Period + '/' + GCM + '/')
+        print GCM
        Warf_runs = os.listdir('./' + GCM + '/')
        for Warf_run in Warf_runs:
-            Current_input_dir = './' + NC_Period + '/' + GCM + '/' + Warf_run + '/' + NC_Domain + '/'
+            NC_Periods = os.listdir('./' + GCM + '/' + Warf_run + '/')
-            print Current_input_dir
+            for NC_Period in NC_Periods:
-            Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type + '.nc'
+                Period_short = NC_Period[:4]
-            Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn)
+                Current_input_dir = './' + GCM + '/' + Warf_run + '/' + NC_Period + '/' + NC_Domain + '/'
-            #Climvar_NCs = Climvar_NCs[0:2]
+                print Current_input_dir
-            #print(Climvar_NCs)
+                Clim_var_type = Clim_var_type + '_bc.nc'
-            for netcdf in Climvar_NCs:
+                Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type 
-                f=Dataset(netcdf)
+                Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn)
-                # Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file
+                print Climvar_NCs[1]
-                dist_x=np.abs(f.variables['lon'][:,:]-float(mylon))
+                print Climvar_NCs[2]
-                dist_y=np.abs(f.variables['lat'][:,:]-float(mylat))
+                for netcdf in Climvar_NCs:
-                dist=dist_x + dist_y
+                    #netcdf = '/srv/ccrc/data31/z3393020/NARCliM/Bias_corrected/' + netcdf[2:]
-                latindex=np.where(dist_y==np.min(dist_y))
+                    #print netcdf
-                lonindex=np.where(dist_x==np.min(dist_x))
+                    f=Dataset(netcdf)
-                index=np.where(dist==np.min(dist))
+                    # This section print on the screen information contained in the headings of the file
-                print '---------------------------------------------------------'
+                    print '---------------------------------------------------------'
-                print  netcdf
+                    print f.ncattrs()
-                print 'Information on the nearest point'
+                    print f.title
-                print 'Your desired lat,lon = ',mylat,mylon
+                    print f.variables
-                print 'The nearest lat,lon = ', f.variables['lat'][latindex[0],latindex[1]], f.variables['lon'][lonindex[0],lonindex[1]]
+                    print
-                print 'The index of the nearest lat,lon (x,y) = ',index[0], index[1]
+                    for varname in f.variables:
-                #Here we constract a pandas data frame, having the "time"/day as an index and a numer of variables (i.e. Clim_var_type, pracc, as columns)
+                    	print varname,' -> ',np.shape(f.variables[varname])
-                d={}
+                    print '---------------------------------------------------------'
-                #d["time"] = f.variables['time'][:]
+                    # Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file
-                d[ GCM +'_'+ Warf_run +'_'+ Period_short] = f.variables[Clim_var_type][:, int(index[0]), int(index[1])]
+                    dist_x=np.abs(f.variables['lon'][:,:]-float(mylon))
-                #if GCM == 'NNRP' and Warf_run == 'R1':
+                    dist_y=np.abs(f.variables['lat'][:,:]-float(mylat))
-                #    d['Period']= NC_Period
+                    dist=dist_x + dist_y
-                timestamp = f.variables['time'][:]
+                    latindex=np.where(dist_y==np.min(dist_y))
-                timestamp_dates = pd.to_datetime(timestamp, unit='h', origin=pd.Timestamp('1949-12-01'))
+                    lonindex=np.where(dist_x==np.min(dist_x))
-                df1=pd.DataFrame(d, index=timestamp_dates)
+                    index=np.where(dist==np.min(dist))
-                f.close()
+                    print '---------------------------------------------------------'
-                print 'closing  '+ os.path.basename(os.path.normpath(netcdf)) + '  moving to next netcdf file'
+                    print  netcdf
-                print '---------------------------------------------------------'
+                    print 'Information on the nearest point'
-                #append in time direction each new time series to the data frame
+                    print 'Your desired lat,lon = ',mylat,mylon
-                MultiNC_df = MultiNC_df.append(df1)
+                    print 'The nearest lat,lon = ', f.variables['lat'][latindex[0],latindex[1]], f.variables['lon'][lonindex[0],lonindex[1]]
-            #append in columns direction individual GCM-RCM-123 run time series (along x axis) 
+                    print 'The index of the nearest lat,lon (x,y) = ',index[0], index[1]
-            MultiNC_df = MultiNC_df.sort_index(axis=0, ascending=True)
+                    #Here we constract a pandas data frame, having the "time"/day as an index and a numer of variables (i.e. Clim_var_type, pracc, as columns)
-            R13_df = pd.concat([R13_df, MultiNC_df], axis=1)
+                    d={}
-            MultiNC_df =pd.DataFrame()
+                    #d["time"] = f.variables['time'][:]
-        #append blocks of R1 R2 and R3 in x axis direction 
+                    d[ GCM +'_'+ Warf_run +'_'+ Period_short] = f.variables[Clim_var_type][:, int(index[0]), int(index[1])]
-        R13_df = R13_df.sort_index(axis=0, ascending=True)
+                    #if GCM == 'NNRP' and Warf_run == 'R1':
-        GCM_df = pd.concat([GCM_df, R13_df], axis=1)
+                    #    d['Period']= NC_Period
-        R13_df = pd.DataFrame()
+                    timestamp = f.variables['time'][:]
-    #append time periods in x axis direction (change axis=1 to =0 if periods for same model should be added to same model R123 column)
+                    timestamp_dates = pd.to_datetime(timestamp, unit='h', origin=pd.Timestamp('1949-12-01'))
-    GCM_df = GCM_df.sort_index(axis=0, ascending=True)
+                    df1=pd.DataFrame(d, index=timestamp_dates)
-    Full_df = pd.concat([Full_df, GCM_df], axis=1)
+                    f.close()
-    GCM_df = pd.DataFrame()
+                    print 'closing  '+ os.path.basename(os.path.normpath(netcdf)) + '  moving to next netcdf file'
-Full_df = Full_df.sort_index(axis=0, ascending=True)
+                    #print f
-#adding a column with the NARCLIM decade
+                    print '---------------------------------------------------------'
-Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009'
+                    #append in time direction each new time series to the data frame
-Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039'
+                    MultiNC_df = MultiNC_df.append(df1)
-Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079'
+                #append in columns direction individual GCM-RCM-123 run time series (along x axis) 
                MultiNC_df = MultiNC_df.sort_index(axis=0, ascending=True)
                R13_df = pd.concat([R13_df, MultiNC_df], axis=1)
                MultiNC_df =pd.DataFrame()
            #append blocks of R1 R2 and R3 in x axis direction 
            R13_df = R13_df.sort_index(axis=0, ascending=True)
            GCM_df = pd.concat([GCM_df, R13_df], axis=1)
            R13_df = pd.DataFrame()
        #append time periods in x axis direction (change axis=1 to =0 if periods for same model should be added to same model R123 column)
        GCM_df = GCM_df.sort_index(axis=0, ascending=True)
        Full_df = pd.concat([Full_df, GCM_df], axis=1)
        GCM_df = pd.DataFrame()
    Full_df = Full_df.sort_index(axis=0, ascending=True)
 #export the pandas data frame as a CSV file within the output directory
-out_file_name = Clim_var_type + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '_NARCliM_summary.csv'
+out_file_name = Clim_var_type + '_'+  Data_Type[2:] + '_' + Location + '_' + lat_equal_len_string + '_' + lon_equal_len_string + '_NARCliM_summary.csv'
 out_path = output_directory +'/' + out_file_name
 Full_df.to_csv(out_path)
--- a/Analysis/Code/P1_NARCliM_NC_to_CSV_CCRC_SS_BASH_script_readme.txt
+++ b/Analysis/Code/P1_NARCliM_NC_to_CSV_CCRC_SS_BASH_script_readme.txt
@ -0,0 +1,90 @@
 Variables available from NARCLIM (output):
 'evspsblmean'   	water_evaporation flux (actual ET) long_name: Surface evaporation standard_name: water_evaporation_flux units: kg m-2 s-1
 'potevpmean'    	potential ET water_potential_evaporation_flux kg m-2 s-1
 'tasmean’               mean near surface temperature
 ‘tasmax’		maximum near surface temperature
 ‘pracc’ 		precipitation daily precipitation sum (sum of convective prcacc and stratiform prncacc precip) 
 ‘pr1Hmaxtstep’     	maximum 1 hour interval rainfall in a one day period
 'pr1Hmaxtstep' 		Max. 1-hour time-window moving averaged precipitation rate units: kg m-2 s-1 maximum 1-hour time-window moving averaged values from point values 60.0 second
 'pr30maxtstep'		Max. 30min time-window moving averaged precipitation rate units: kg m-2 s-1 maximum 1-hour time-window moving averaged values from point values 60.0 second
 'wss1Hmaxtstep'		Max. 1-hour time-window moving averaged surface wind speed units: m s-1 maximum 1-hour time-window moving averaged values from point values 60.0 second
 'wssmax' 		Surface wind speed standard_name: air_velocity units: m s-1 height: 10 m
 'wssmean'		Surface wind speed standard_name: air_velocity units: m s-1
 'sstmean' 		Daily mean sea surface temperature
 Sites:
 Northern NSW:
 Tweed River: -28.17, 153.56
 Cudgera Creek: -28.36, 153.58
 Belongil Creek: -28.63, 153.59
 Central NSW:
 Port Stevens: -32.71, 152.20
 Terrigal Lagoon: -33.4, 151.44
 Hunter River: -32.91, 151.80
 Hunter River near Mahmods site: -32.843, 151.706
 Southern NSW:
 Batemans Bay:  -35.76, 150.25
 Towamba River: -37.1, 149.91
 Nadgee Lake: -37.47, 149.97
 Code Input Variables:
 Datatype: Choose 'T_NNRP' for reanalysis or 'T_GCMS' for GCM forcing data
 BiasBool: Choose 'True' for bias corrected data, 'False' for normal model outputs
 Execution of code in bash-Code for netcdf interrogation:
 1st step: log into storm servers: Putty: hurricane.ccrc.unsw.edu.au or typhoon.ccrc.unsw.edu.au or cyclone.ccrc.unsw.edu.au + UNSW credentials (zID)
 In BASH copy and enter:
 module load python 
 latitude=-32.91
 longitude=151.80
 name='HunterRiver'
 Datatype='T_NNRP'
 Biasboolean='False'
 python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pracc' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
 python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'tasmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
 python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'tasmax' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
 python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pr1Hmaxtstep' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
 python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'wssmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
 python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pracc' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
 python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'wss1Hmaxtstep' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
 python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'evspsblmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
 python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'potevpmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean
 #1 The above code extracts time series from the full model ensemble over a single model grid cell (based on lat lon input) for the above variables of interest and stores into CSV files.
 Example of output name = evspsblmean_35.76_150.25_NARCliM_summary.csv
 #2 The "P1_NARCliM_plots_Windows" code takes these CSV files as input and creates a) a pdf wiht a number of time series and box plots and b) another CSV file containing the Deltas between present day, near and far future
 for each model in the ensemble. Output: C:\Users\z5025317\WRL_Postdoc\Projects\Paper#1\Output\Nadgee\Nadgee_tasmax_NARCliM_ensemble_changes.csv
 #3 The "P1_NARCliM_First_Pass_variab_deviation_plots" code takes those Delta CSV files as input and generates the future climate deviation plots that were originally developed by Duncan Rayner. 
 Run the code with different constellations of Estuary (study sites) and climate variables: 
 e.g. Clim_var_type  = "tasmax*"   #  '*' will create pdf for all variables in folder  
 Present_Day_Clim_Var = 'MaxT'    #MaxT, MinT, Rainfall, (the name for present day clim var refers to the name of the observed climate data that's used for the baseline period variability. 
 #!!!# Important!
 #For present day variability data, only rainfall and temperature data actually correspond to the study sites. ET and Wind are taken from the existing project folder and hence, are for a Hunter weather station
 #e.g. Terrigal_Wind and Terrigal_ET are actually Hunter in reality. This is because we don't have ET for other sites than Hunter at this stage. 
 ##PROBLEM: Without changing anything, the P1_NARCliM_NC_to_CSV_CCRC_SS.py stopped working properly on the CCRC storm servers. It's not giving an error but loading the nc files with Dataset(nc) just takes unlimited time.
 It used to take only a few seconds. NOT solved yet as of 7th of May 2018.### This was solved for the /postprocessed folder at the end of May 2018 but the problem persits with the /bias_corrected/ data folder. 
 running a simple netcdf info script
 python /srv/ccrc/data02/z5025317/Code_execution/P1_Basic_NETCDF_Interrogation.py
 deactivate –
 conda env create --name EEenv -- file ‘C:\Users\z5025317\WRL_Postdoc\Software\EE\ee-jupyter-examples-master\kilian_env’
--- a/Analysis/Code/P1_NARCliM_plots_Windows.py
+++ b/Analysis/Code/P1_NARCliM_plots_Windows.py
@ -1,12 +1,13 @@
 # -*- coding: utf-8 -*-
-#####################################----------------------------------
+#==========================================================#
-#Last Updated - March 2018
+#Last Updated - June 2018
 #@author: z5025317 Valentin Heimhuber
 #code for creating climate prioritization plots for NARCLIM variables. 
 #Inputs: Uses CSV files that contain all 12 NARCLIM model runs time series for 1 grid cell created with: P1_NARCliM_NC_to_CSV_CCRC_SS.py
-#####################################----------------------------------
+
 #==========================================================#
 #Load packages
-#####################################----------------------------------
+#==========================================================#
 import numpy as np
 import os
 import pandas as pd
@ -16,183 +17,486 @@ import matplotlib.pyplot as plt
 from datetime import datetime
 from datetime import timedelta
 from matplotlib.backends.backend_pdf import PdfPages
 from ggplot import *
 matplotlib.style.use('ggplot')
-#
+
 # import own modules
 # Set working direcotry (where postprocessed NARClIM data is located)
 os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Analysis/Code')
 import climdata_fcts as fct
 import silo as sil
 ALPHA_figs = 0 
 font = {'family' : 'sans-serif',
        'weight' : 'normal',
        'size'   : 14}
 matplotlib.rc('font', **font)
 #==========================================================#
 # Set working direcotry (where postprocessed NARClIM data is located)
-os.chdir('C:/Users/z5025317/WRL_Postdoc/Projects/Paper#1/NARCLIM/')
+os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/')
-#
+#==========================================================#
-#
+
-#####################################----------------------------------
+         
 #==========================================================#
 #set input parameters
-Base_period_start = '1990-01-01'
+Case_Study_Name = 'CASESTUDY2'
-Base_period_end = '2080-01-01' #use last day that's not included in period as < is used for subsetting
+Version = 'V5' #V4 is latest for all model runs #V5 is just for the Hunter catchment climatology
-#Clim_var_type  = 'tasmean' will create pdf for all variables in folder           
+
-#####################################----------------------------------
+Estuaries = ['HUNTER', 'RICHMOND', 'NADGEE', 'SHOALHAVEN', 'GEORGES','CATHIE']
-#set directory path for output files
+Estuaries = ['HUNTER'] #,'HUNTER','RICHMOND']
-output_directory = 'Output'
+#Estuaries = ['SHOALHAVEN']
-#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
+for Est in Estuaries:
-if not os.path.exists(output_directory):
+    #Estuary = 'HUNTER' # 'Belongil'
-    os.makedirs(output_directory)
+    Estuary = Est # 'Belongil'
-    print("output directory folder didn't exist and was generated")
+    print Estuary
-Clim_Var_CSVs = glob.glob('./Site_CSVs/*')
+    #Clim_var_type  = 'potevpmean'    #  '*' will create pdf for all variables in folder  "pracc*|tasmax*"   
-#Clim_Var_CSV = glob.glob('./Site_CSVs/' + Clim_var_type + '*' )
+    Clim_var_types  = ['evspsblmean']
-#read CSV file
+    for climvar in Clim_var_types:
-for clim_var_csv_path in Clim_Var_CSVs:
+        Clim_var_type  = climvar 
-    Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
+        
-    Clim_var_type = Filename.split('_', 1)[0]
+        #==========================================================#
-    print(clim_var_csv_path)
+        #set input preferences
-    Full_df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
+        #==========================================================#
-    #pandas datestamp index to period (we don't need the 12 pm info in the index (daily periods are enough))
+        plot_pdf = 'no'
-    Full_df.index = Full_df.index.to_period('D')
+        plot_pngs = 'no'
-    #check data types of columns
+        delta_csv = 'yes'
-    #Full_df.dtypes
+        Stats = 'mean'   # 'maxdaily', 'mean', 'days_h_25'
-    
+        DoMonthly = True
-    #substract a constant from all values (temp)
+        Perc_Vs_Abs = 'percent' #'percent' vs. 'absolute' deltas for near and far future
-    if Clim_var_type == 'tasmean':
+        Location = 'Catchment'
-        Full_df = Full_df.iloc[:,0:26]-273.15
+        #==========================================================# 
-    
+        
-    #index the narclim periods if needed
+        #==========================================================#
-    #Full_df.loc[(Full_df.index >= '1990-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009'
+        #set directory path for output files
-    #Full_df.loc[(Full_df.index >= '2020-01-01') & (Full_df.index < '2040-01-01'), 'period']= '2020-2039'
+        output_directory = 'Output/' + Case_Study_Name + '_' + Version + '/' + Estuary
-    #Full_df.loc[(Full_df.index >= '2060-01-01') & (Full_df.index < '2080-01-01'), 'period']= '2060-2079'
+        #output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
-    
+        if not os.path.exists(output_directory):
-    #Subset the data to the minimum base period and above (used to set the lenght of the present day climate period)
+            os.makedirs(output_directory)
-    Fdf_1900_2080 = Full_df.loc[(Full_df.index >= Base_period_start) & (Full_df.index < Base_period_end)]
+            print('-------------------------------------------')
-    #Aggregate daily df to annual time series
+            print("output directory folder didn't exist and was generated")
-    if (Clim_var_type == 'pracc' or Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean' 
+            print('-------------------------------------------')
-    or Clim_var_type == 'pr1Hmaxtstep' or Clim_var_type == 'wss1Hmaxtstep'):
+            
-        Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').sum()
+        #set directory path for individual png files
-        Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
+        png_output_directory = 'Output/' + Case_Study_Name +  '_' + Version + '/' + Estuary + '/NARCLIM_Key_Figs'
-        Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').sum()
+        #output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
-        Fdf_1900_2080_monthly = Fdf_1900_2080_monthly.replace(0, np.nan)
+        if not os.path.exists(png_output_directory):
-        Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').sum()
+            os.makedirs(png_output_directory)
-        Fdf_1900_2080_weekly = Fdf_1900_2080_weekly.replace(0, np.nan)
+            print('-------------------------------------------')
-        Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').sum() #seasonal means
+            print("output directory folder didn't exist and was generated")
-        Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
+            print('-------------------------------------------')
-    else:    
+        #==========================================================#    
-        Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').mean()
+                
-        Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').mean()
+        #==========================================================#
-        Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').mean()
+        Estuary_Folder = glob.glob('./Data/NARCLIM_Site_CSVs/'+ Case_Study_Name + '/' + Estuary + '*' )
-        Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').mean() #seasonal means
+        if Location == 'Catchment':
-    #plot the mean of all model runs
+            Estuary_Folder = glob.glob('./Data/NARCLIM_Site_CSVs/'+ Case_Study_Name + '/' + Estuary + '*catch' )
-    print('-------------------------------------------')
+        Clim_Var_CSVs = glob.glob(Estuary_Folder[0] + '/' + Clim_var_type + '*')
-    print('mean of all models for climate variable:  ' + Clim_var_type)
+        Coordinates_foldername = os.path.basename(os.path.normpath(Estuary_Folder[0])) 
-    Fdf_1900_2080_means = Fdf_1900_2080.mean()
+        Coordinates = Coordinates_foldername.split('_', 3)[1] + '_' +Coordinates_foldername.split('_', 3)[2]
-    Fdf_1900_2080_means.plot(kind='bar', ylim=(16,22)).figure
+        #==========================================================#        
-    print('-------------------------------------------')
+        
-    
+        #==========================================================#
-    #time series plots:
+        #read CSV files and start analysis
-    #ranks = Fdf_1900_2080_means[3:].rank(axis=0)
+        #==========================================================#
-    df3 = Fdf_1900_2080_annual
+        #for clim_var_csv_path in Clim_Var_CSVs:
-    Fdf_annual_sorted = df3.reindex_axis(df3.mean().sort_values().index, axis=1)
+        clim_var_csv_path = Clim_Var_CSVs[0]
-    Fdf_annual_sorted_subset = Fdf_annual_sorted.iloc[:,[0,1,2,3,5,7,13,15,18, 22, 24,25]]
+        Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
-    
+        Clim_var_type = Filename.split('_', 1)[0]
-    #write the key plots to a single pdf document
+        print(clim_var_csv_path)
-    pdf_out_file_name = Clim_var_type + '_start_' + Base_period_start + '_NARCliM_summary4.pdf'
+        Full_df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
-    pdf_out_path = output_directory +'/' + pdf_out_file_name
+        #pandas datestamp index to period (we don't need the 12 pm info in the index (daily periods are enough))
-    #open pdf and add the plots
+        Full_df.index = Full_df.index.to_period('D')
-    
+        Full_df = Full_df.drop(columns=['period'])
-    #developement
+        Ncols_df = len(Full_df)
-    #I want to create a simple density plot for all values in present day, near future and far future
+        #check data types of columns
-    #this should be an easy indicator of change
+        #Full_df.dtypes
-    #subset present day simulations
+        #==========================================================#
-    Full_current_df = Fdf_1900_2080.iloc[:,[0,1,2]]
+        
-    Full_current_df = Full_current_df.stack()
+        
-    #nearfuture
+        #==========================================================#
-    Full_nearfuture_df = Fdf_1900_2080.iloc[:,range(3,15)]
+        #substract a constant from all values to convert from kelvin to celcius (temp)
-    Full_nearfuture_df = Full_nearfuture_df.stack()
+        if Clim_var_type in ['tasmean','tasmax','sstmean']:
-    #farfuture
+            Full_df = Full_df.iloc[:,0:(Ncols_df-1)]-273.15
-    Full_farfuture_df = Fdf_1900_2080.iloc[:,range(15,27)]
+        if Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean':
-    Full_farfuture_df = Full_farfuture_df.stack()
+            Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60*24
-    Summarized_df = pd.concat([Full_current_df, Full_nearfuture_df], axis=1, ignore_index=True)
+        if Clim_var_type in ['pr30maxtstep','pr10maxtstep','pr20maxtstep']:
-    Summarized_df = pd.concat([Summarized_df, Full_farfuture_df], axis=1, ignore_index=True)
+            Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*30 #int(Clim_var_type[2:4])
-    Summarized_df.columns = ['presentday', 'nearfuture', 'farfuture']
+        if Clim_var_type in ['pr1Hmaxtstep']:
-    #end of developement
+            Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60           
-    with PdfPages(pdf_out_path) as pdf:
+        if Clim_var_type in ['rsdsmean','rldsmean']:
-        #barplot of model means
+            Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60*24/1000000  #convert to unit used in SILO
-        plt.title(Clim_var_type + ' -  model means - full period')
+        Fdf_1900_2080 = Full_df
-        ymin = min(Fdf_1900_2080_means) 
+        #==========================================================#
-        ymax = max(Fdf_1900_2080_means) 
+        
-        Fdf_1900_2080_means.plot(kind='bar', ylim=(ymin,ymax))
+        
-        Fdf_1900_2080_means.plot(kind='bar')
+        #==========================================================#
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        #Aggregate daily df to annual time series
-        plt.close()
+        if Clim_var_type in ['pracc' ,'evspsblmean' ,'potevpmean' ,'pr1Hmaxtstep' ,
-        #full period density comparison
+                             'wss1Hmaxtstep', 'rsdsmean', 'rldsmean']:
-        plt.title(Clim_var_type + ' -  density comparison - full period')
+            if(Stats == 'maxdaily'):
-        Summarized_df.plot.kde()
+                Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').max()
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+                Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
-        plt.close()
+                Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').max() #seasonal means
-        #annual box
+                Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
-        plt.title(Clim_var_type + ' -  Annual means')
+                Fdf_Monthly_means = Fdf_1900_2080.resample('M').max() #seasonal means
-        Fdf_1900_2080_annual.boxplot(rot=90)
+                Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+            elif(Stats[:4] =='days'):
-        plt.close()
+                Threshold = int(Stats[-2:])
-        #monthly box
+                #agg = ('abobe_27_count', lambda x: x.gt(27).sum()), ('average', 'mean')
-        plt.title(Clim_var_type + ' -  Monthly means')
+                agg = ('>'+ str(Threshold) + '_count', lambda x: x.gt(Threshold).sum()),
-        Fdf_1900_2080_monthly.boxplot(rot=90)
+                Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').agg(agg)                
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+                #Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
-        plt.close()
+                Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').agg(agg)  #seasonal means
-        #weekly box
+                #Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
-        plt.title(Clim_var_type + ' -  Weekly means')
+                Fdf_Monthly_means = Fdf_1900_2080.resample('M').agg(agg)
-        Fdf_1900_2080_weekly.boxplot(rot=90)
+            else:
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+                Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').sum()
-        plt.close()
+                Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
-        #daily box
+                Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').sum() #seasonal means
-        plt.title(Clim_var_type + ' -  Daily means')
+                Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
-        Fdf_1900_2080.boxplot(rot=90)
+                Fdf_Monthly_means = Fdf_1900_2080.resample('M').sum() #Monthlyonal means
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+                Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
-        plt.close()
+        else:  
-        # time series plot annual ALL models
+            if(Stats == 'maxdaily'):
-        plt.title(Clim_var_type + ' -  Time series - all models')
+                Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').max()
-        Fdf_1900_2080_annual.plot(legend=False)
+                Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+                Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').max() #seasonal means
-        plt.close()
+                Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
-        #
+                Fdf_Monthly_means = Fdf_1900_2080.resample('M').max() #Monthlyonal means
-        plt.title(Clim_var_type + ' -  Time series - representative models')
+                Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
-        Fdf_annual_sorted_subset.plot(legend=False)
+            elif(Stats == 'maxmonthly'):
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+                Fdf_1900_2080_annual = Fdf_1900_2080.resample('M').max().resample('A').mean()
-        plt.close()
+                Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
-        # seasonal mean boxplots
+                Fdf_Seas_means = Fdf_1900_2080.resample('M').max().resample('Q-NOV').mean() #seasonal means
-        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) 
+                Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
-        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) 
+                Fdf_Monthly_means = Fdf_1900_2080.resample('M').max().resample('Q-NOV').mean() #Monthlyonal means
-        plt.title(Clim_var_type + ' -  DJF Summer means')
+                Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
-        Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean().plot(kind='bar', ylim=(ymin,ymax))
+            elif(Stats[:4] =='days'):
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+                Threshold = int(Stats[-2:])
-        plt.close()  
+                #agg = ('abobe_27_count', lambda x: x.gt(27).sum()), ('average', 'mean')
-        plt.title(Clim_var_type + ' -  DJF Summer means')
+                agg = ('>'+ str(Threshold) + '_count', lambda x: x.gt(Threshold).sum()),
-        Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].boxplot(rot=90)
+                Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').agg(agg)                
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+                #Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
-        plt.close()
+                Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').agg(agg)  #seasonal means
-        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()) 
+                #Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
-        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean())
+                Fdf_Monthly_means = Fdf_1900_2080.resample('M').agg(agg)
-        plt.title(Clim_var_type + ' -  MAM Autumn means')
+            else:
-        Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean().plot(kind='bar', ylim=(ymin,ymax))
+                Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').mean()
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+                Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').mean() #seasonal means
-        plt.close() 
+                Fdf_Monthly_means = Fdf_1900_2080.resample('M').mean() #seasonal means
-        plt.title(Clim_var_type + ' -  MAM Autumn means')
+        Fdf_1900_2080_means = Fdf_1900_2080.mean()
-        Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].boxplot(rot=90)
+        #==========================================================#
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        
-        plt.close()
+        
-        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()) 
+        #==========================================================#
-        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean())
+        #Select the 3 most representative models (min med and max difference betwen far future and present)
-        plt.title(Clim_var_type + ' -  JJA Winter means')
+        dfall, dfmin, dfmax, dfmed, Min_dif_mod_name, Med_dif_mod_name, Max_dif_mod_name  = fct.select_min_med_max_dif_model(Fdf_1900_2080)
-        Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean().plot(kind='bar', ylim=(ymin,ymax))
+        #==========================================================#
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        
-        plt.close() 
+        #==========================================================#
-        plt.title(Clim_var_type + ' -  JJA Winter means')
+        #create a dataframe that has 1 column for each of the three representative models
-        Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].boxplot(rot=90)
+        dfa = Fdf_1900_2080_annual.iloc[:,[0]]
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        dfa1 = Fdf_1900_2080_annual.iloc[:,[0,3,6]].loc[(Fdf_1900_2080_annual.index >= '1990') & (Fdf_1900_2080_annual.index <= '2009')]
-        plt.close()
+        dfa1.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
-        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()) 
+        dfa2 = Fdf_1900_2080_annual.iloc[:,[1,4,7]].loc[(Fdf_1900_2080_annual.index >= '2020') & (Fdf_1900_2080_annual.index <= '2039')]
-        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean())
+        dfa2.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
-        plt.title(Clim_var_type + ' -  SON Spring means')
+        dfa3 = Fdf_1900_2080_annual.iloc[:,[2,5,8]].loc[(Fdf_1900_2080_annual.index >= '2060') & (Fdf_1900_2080_annual.index <= '2079')]
-        Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean().plot(kind='bar', ylim=(ymin,ymax))
+        dfa3.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        dfall_annual = dfa1.append(dfa2).append(dfa3)
-        plt.close() 
+        #==========================================================#
-        plt.title(Clim_var_type + ' -  SON Spring means')
+        
-        Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].boxplot(rot=90)
+        #==========================================================#
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        #Create Deltas of average change for annual and seasonal basis
-        plt.close()
+        #==========================================================#
-
+        delta_all_df = fct.calculate_deltas_NF_FF2(Fdf_1900_2080_annual, Fdf_Seas_means, Stats, Perc_Vs_Abs)
-#plots not used  
+        if DoMonthly ==True:
-#Fdf_annual_sorted_subset.plot(legend=False, subplots=True)
+            delta_all_df_monthly = fct.calculate_deltas_monthly(Fdf_Monthly_means, Stats, Perc_Vs_Abs)  
            delta_all_df_monthly = pd.concat([delta_all_df_monthly.filter(regex= 'near'),delta_all_df_monthly.filter(regex= 'far')], axis=1)[-3:]  
        #==========================================================#
        if delta_csv == 'yes':
            out_file_name = Estuary  + '_'  + Clim_var_type + '_' + Stats + '_NARCliM_ensemble_changes' + Perc_Vs_Abs +  '_' + Location + '_' + Coordinates + '.csv'
            out_path = output_directory + '/' + out_file_name
            delta_all_df.to_csv(out_path)
        if delta_csv == 'yes' and DoMonthly ==True:
            out_file_name = Estuary  + '_' + Clim_var_type + '_' + Stats + '_NARCliM_ensemble_changes_monthly' + Perc_Vs_Abs + '_' + Location + '_' + Coordinates + '.csv'
            out_path = output_directory + '/NARCLIM_Changes_Hunter/' + out_file_name
            delta_all_df_monthly.to_csv(out_path)
        #==========================================================#
        #==========================================================#
        #create a dataframe that has a single column for present day, near and far future for the (3 selected models)
        Full_current_df = Fdf_1900_2080.iloc[:,range(0,3)]
        Full_current_df = Full_current_df.stack()
        #nearfuture
        Full_nearfuture_df = Fdf_1900_2080.iloc[:,range(3,6)]
        Full_nearfuture_df = Full_nearfuture_df.stack()
        #farfuture
        Full_farfuture_df = Fdf_1900_2080.iloc[:,range(6,len(Fdf_1900_2080.columns))]
        Full_farfuture_df = Full_farfuture_df.stack()
        Summarized_df = pd.concat([Full_current_df, Full_nearfuture_df], axis=1, ignore_index=True)
        Summarized_df = pd.concat([Summarized_df, Full_farfuture_df], axis=1, ignore_index=True)
        Summarized_df.columns = ['present', 'near', 'far']
        #==========================================================#
        #==========================================================#
        #generate colour schemes for plotting
        #==========================================================#
        plotcolours36 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal',
                 'darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal',
                 'darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal']
        plotcolours36b = ['tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  ,
                 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  ,
                 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple' ]
        plotcolours12 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal']
        plotcolours15 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal', 'lightgreen','lightpink','slateblue']
        color_dict = {'CCCMA3.1_R1': 'darkolivegreen', 'CCCMA3.1_R2': 'turquoise', 'CCCMA3.1_R3': 'lightgreen', 'CSIRO-MK3.0_R2': 'darkgreen',
                  'CSIRO-MK3.0_R1':'lightpink', 'CSIRO-MK3.0_R3':'slateblue','ECHAM5_R1':'slategray', 'ECHAM5_R2': 'orange', 'ECHAM5_R3':  'tomato',
                  'MIROC3.2_R1': 'peru', 'MIROC3.2_R2': 'navy' ,'MIROC3.2_R3': 'teal', '10th':'lightgreen','median':'lightpink','90th':'slateblue'}
        #==========================================================#
        #==========================================================#
        #output crucial summary plots into individual png files
        #==========================================================#
        if plot_pngs == 'yes':
            #=========#
            #Barplot of near and far future deltas
            #=========#
            #out name
            png_out_file_name = Clim_var_type + '_' + Stats + '_Deltas_Barplot_' + Version + '.png'
            png_out_path = png_output_directory + '/' + png_out_file_name
            #prepare data frame for plot
            neardeltadf=delta_all_df['near']
            ymin = min(neardeltadf) + 0.1 *min(neardeltadf)
            ymax = max(neardeltadf) + 0.1 * max(neardeltadf)
            neardeltadf=delta_all_df['far']
            ymin2 = min(neardeltadf) + 0.1 *min(neardeltadf)
            ymax2 = max(neardeltadf) + 0.1 * max(neardeltadf)
            ymin = min(ymin, ymin2)
            if (Clim_var_type == 'tasmax' or Clim_var_type == 'tasmean'):
                ymin = 0
            ymax = max(ymax, ymax2)
            #
            fig = plt.figure(figsize=(5,6))
            ax=plt.subplot(2,1,1) 
            plt.title(Clim_var_type + ' - ' + Stats + ' - deltas - near')
            neardeltadf=delta_all_df['near']
            neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)  
            plt.xticks([])
            ax=plt.subplot(2,1,2) 
            plt.title(Clim_var_type + ' - ' + Stats + ' - deltas - far')
            neardeltadf=delta_all_df['far']
            neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)
            ax.xaxis.grid(False)
            #ax.patch.set_alpha(ALPHA_figs)
            fig.patch.set_alpha(ALPHA_figs)
            fig.tight_layout()
            fig.savefig(png_out_path)
            plt.close()
            #=========#
            #=========#
            #full period density comparison
            #=========#
            #out name
            png_out_file_name = Clim_var_type + '_' + Stats + '_MaxDeltaMod_Histogram_' + Version + '.png'
            png_out_path = png_output_directory + '/' + png_out_file_name
            plt.title(Clim_var_type + ' - ' + Stats + ' -  hist - full period - max delta model')
            #prepare data
            xmin = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) -  4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
            xmax = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) +  4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
            fig = plt.figure(figsize=(5,5))
            ax=plt.subplot(2,1,1) 
            Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]).plot.kde(xlim=(xmin,xmax), ax=ax)
            plt.legend(loc=9, bbox_to_anchor=(0.5, -0.3))
            #ax.xaxis.grid(False)
            ax.yaxis.grid(False)
            fig.patch.set_alpha(ALPHA_figs)
            fig.tight_layout()
            fig.savefig(png_out_path)
            plt.close()
            #=========#
            #=========#
            # time series plot annual ALL models
            #=========#
            png_out_file_name = Clim_var_type + '_' + Stats + '_TimeSeries_AllMods_' + Version + '3.png'
            png_out_path = png_output_directory + '/' + png_out_file_name
            plt.title(Clim_var_type + ' - ' + Stats + ' -  Time series - representative models')
            #prepare data
 #            Mod_order = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,19,20,21,16,17,18,22,23,24,31,32,33,25,26,27,28,29,30,34,35,36]
 #            test = Fdf_1900_2080_annual
 #            Mod_Names = test.columns
 #            New_Mod_Name = []
 #            for i in range(0,len(Mod_Names)):
 #                if(Stats[:4] =='days'):
 #                    New_Mod_Name.append(str(Mod_order[i]+10) + '_' +  Mod_Names[i][0])
 #                else:
 #                    New_Mod_Name.append(str(Mod_order[i]+10) + '_' +  Mod_Names[i])
 #            test.columns = New_Mod_Name
 #            test_sorted = test.reindex_axis(sorted(test.columns), axis=1)
 #            colnamest = test.columns
 #            test_sorted.columns = [w[3:-5] for w in colnamest]
            #plot
            fig = plt.figure(figsize=(8,7))
            ax=plt.subplot(2,1,1)
            #test_sorted.plot(ax=ax) #,color =  plotcolours36)
            Fdf_1900_2080_annual_b = Fdf_1900_2080_annual
            New_Mod_Name = []
            if(Stats[:4] =='days'):
                for each in Fdf_1900_2080_annual_b.columns:
                    New_Mod_Name.append(each[0][:-5])
            else:
                for each in Fdf_1900_2080_annual_b.columns:
                    New_Mod_Name.append(each[:-5])
            Fdf_1900_2080_annual_b.columns = New_Mod_Name
            Fdf_1900_2080_annual_b.plot(ax=ax, color=[color_dict[x] for x in Fdf_1900_2080_annual_b.columns])
            plt.legend(loc=9, bbox_to_anchor=(0.5, -0.2))
            ax.xaxis.grid(False)
            fig.patch.set_alpha(ALPHA_figs)
            fig.tight_layout()
            fig.savefig(png_out_path)
            plt.close()
            #=========#
        #==========================================================#
        #output some summary plot into pdf
        #==========================================================#
        if plot_pdf == 'yes':
            #plt.cm.Paired(np.arange(len(Fdf_1900_2080_means)))
            #write the key plots to a single pdf document
            pdf_out_file_name = Clim_var_type + '_' + Stats + '_NARCliM_summary_' + Version + '.pdf'
            pdf_out_path = output_directory +'/' + pdf_out_file_name
            #open pdf and add the plots
            with PdfPages(pdf_out_path) as pdf:
                #barplot of model means
                plt.title(Clim_var_type + ' -  model means - full period')
                ymin = min(Fdf_1900_2080_means) 
                ymax = max(Fdf_1900_2080_means) + 0.008 *min(Fdf_1900_2080_means)
                Fdf_1900_2080_means.plot(kind='bar', ylim=(ymin,ymax), color=plotcolours36)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                #
                neardeltadf=delta_all_df['near']
                ymin = min(neardeltadf) + 0.1 *min(neardeltadf)
                ymax = max(neardeltadf) + 0.1 * max(neardeltadf)
                neardeltadf=delta_all_df['far']
                ymin2 = min(neardeltadf) + 0.1 *min(neardeltadf)
                ymax2 = max(neardeltadf) + 0.1 * max(neardeltadf)
                ymin = min(ymin, ymin2)
                if (Clim_var_type == 'tasmax' or Clim_var_type == 'tasmean'):
                    ymin = 0
                ymax = max(ymax, ymax2)
                #
                # delta barplot for report 1#################################
                ax=plt.subplot(2,1,1) 
                plt.title(Clim_var_type + ' -  model deltas - near-present')
                neardeltadf=delta_all_df['near']
                neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)  
                ax.patch.set_alpha(ALPHA_figs)
                plt.xticks([])
                #ax.xaxis.set_ticklabels([])
                #pdf.savefig(bbox_inches='tight', ylim=(ymin,ymax), pad_inches=0.4)
                #plt.close()
                #
                ax=plt.subplot(2,1,2) 
                plt.title(Clim_var_type + ' -  model deltas - far-present')
                neardeltadf=delta_all_df['far']
                fig = neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)
                ax.xaxis.grid(False)
                ax.patch.set_alpha(ALPHA_figs)
                #fig.patch.set_alpha(ALPHA_figs)
                #plt.show()
                pdf.savefig(bbox_inches='tight', ylim=(ymin,ymax), pad_inches=0.4)
                plt.close()
                # end delta barplot for report 1#################################
                #
                #full period density comparison
                plt.title(Clim_var_type + ' -  density comparison - full period - all models')
                Summarized_df.plot.kde()
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                #full period density comparison
                plt.title(Clim_var_type + ' -  density comparison - full period - max delta model')
                xmin = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) -  4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
                xmax = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) +  4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
                Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]).plot.kde(xlim=(xmin,xmax)) 
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                fig.patch.set_alpha(ALPHA_figs)
                plt.close()
                #annual box
                plt.title(Clim_var_type + ' -  Annual means/sums for max diff model')
                Fdf_1900_2080_annual.boxplot(rot=90)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                #
                #daily box
                plt.title(Clim_var_type + ' -  Daily means/sums')
                Fdf_1900_2080.boxplot(rot=90)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                # time series plot annual ALL models
                plt.title(Clim_var_type + ' -  Time series - all models')
                Mod_order = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,19,20,21,16,17,18,22,23,24,31,32,33,25,26,27,28,29,30,34,35,36]
                test = Fdf_1900_2080_annual
                Mod_Names = test.columns
                New_Mod_Name = []
                for i in range(0,len(Mod_Names)):
                    New_Mod_Name.append(str(Mod_order[i]+10) + '_' +  Mod_Names[i])
                test.columns = New_Mod_Name
                test_sorted = test.reindex_axis(sorted(test.columns), axis=1)
                colnamest = test.columns
                test_sorted.columns = [w[3:-5] for w in colnamest]
                fig = test_sorted.plot(legend=False, color =  plotcolours36)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                fig.patch.set_alpha(ALPHA_figs)
                plt.close()
                # time series plot annual ALL models
                plt.title(Clim_var_type + ' -  Time series - representative models')
                dfall_annual.plot(legend=False)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                # seasonal mean boxplots
                ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) 
                ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) 
                plt.title(Clim_var_type + ' -  DJF Summer means/sums')
                pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()).plot(kind='bar', ylim=(ymin,ymax))
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()  
                plt.title(Clim_var_type + ' -  DJF Summer means/sums')
                Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].boxplot(rot=90)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()) 
                ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean())
                plt.title(Clim_var_type + ' -  MAM Autumn means/sums')
                pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()).plot(kind='bar', ylim=(ymin,ymax))
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close() 
                plt.title(Clim_var_type + ' -  MAM Autumn means/sums')
                Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].boxplot(rot=90)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()) 
                ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean())
                plt.title(Clim_var_type + ' -  JJA Winter means/sums')
                pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()).plot(kind='bar', ylim=(ymin,ymax))
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close() 
                plt.title(Clim_var_type + ' -  JJA Winter means/sums')
                Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].boxplot(rot=90)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()) 
                ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean())
                plt.title(Clim_var_type + ' -  SON Spring means/sums')
                pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()).plot(kind='bar', ylim=(ymin,ymax))
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close() 
                plt.title(Clim_var_type + ' -  SON Spring means/sums')
                Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].boxplot(rot=90)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
--- a/Analysis/Code/P1_NARCliM_plots_Windows_quantile_scaling_factors.py
+++ b/Analysis/Code/P1_NARCliM_plots_Windows_quantile_scaling_factors.py
@ -0,0 +1,520 @@
 # -*- coding: utf-8 -*-
 #==========================================================#
 #Last Updated - June 2018
 #@author: z5025317 Valentin Heimhuber
 #code for creating climate prioritization plots for NARCLIM variables. 
 #Inputs: Uses CSV files that contain all 12 NARCLIM model runs time series for 1 grid cell created with: P1_NARCliM_NC_to_CSV_CCRC_SS.py
 #==========================================================#
 #Load packages
 #==========================================================#
 import numpy as np
 import os
 import pandas as pd
 import glob
 import matplotlib
 import matplotlib.pyplot as plt
 from datetime import datetime
 from datetime import timedelta
 from matplotlib.backends.backend_pdf import PdfPages
 from ggplot import *
 matplotlib.style.use('ggplot')
 # import own modules
 # Set working direcotry (where postprocessed NARClIM data is located)
 os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Analysis/Code')
 import climdata_fcts as fct
 import silo as sil
 ALPHA_figs = 0 
 font = {'family' : 'sans-serif',
        'weight' : 'normal',
        'size'   : 14}
 matplotlib.rc('font', **font)
 #==========================================================#
 # Set working direcotry (where postprocessed NARClIM data is located)
 os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/')
 #==========================================================#
 #==========================================================#
 #set input parameters
 Case_Study_Name = 'CASESTUDY2'
 Version = 'V5' #V4 is latest for all model runs #V5 is just for the Hunter catchment climatology
 Estuaries = ['HUNTER', 'RICHMOND', 'NADGEE', 'SHOALHAVEN', 'GEORGES','CATHIE']
 Estuaries = ['HUNTER'] #,'HUNTER','RICHMOND']
 #Estuaries = ['SHOALHAVEN']
 for Est in Estuaries:
    #Estuary = 'HUNTER' # 'Belongil'
    Estuary = Est # 'Belongil'
    print Estuary
    #Clim_var_type  = 'potevpmean'    #  '*' will create pdf for all variables in folder  "pracc*|tasmax*"   
    Clim_var_types  = ['pracc']
    for climvar in Clim_var_types:
        Clim_var_type  = climvar 
        #==========================================================#
        #set input preferences
        #==========================================================#
        plot_pdf = 'no'
        plot_pngs = 'no'
        delta_csv = 'yes'
        Stats = 'mean'   # 'maxdaily', 'mean', 'days_h_25'
        DoMonthly = True
        Perc_Vs_Abs = 'percent' #'percent' vs. 'absolute' deltas for near and far future
        Location = 'Catchment'
        #==========================================================# 
        #==========================================================#
        #set directory path for output files
        output_directory = 'Output/' + Case_Study_Name + '_' + Version + '/' + Estuary
        #output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
        if not os.path.exists(output_directory):
            os.makedirs(output_directory)
            print('-------------------------------------------')
            print("output directory folder didn't exist and was generated")
            print('-------------------------------------------')
        #set directory path for individual png files
        png_output_directory = 'Output/' + Case_Study_Name +  '_' + Version + '/' + Estuary + '/NARCLIM_Key_Figs'
        #output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
        if not os.path.exists(png_output_directory):
            os.makedirs(png_output_directory)
            print('-------------------------------------------')
            print("output directory folder didn't exist and was generated")
            print('-------------------------------------------')
        #==========================================================#    
        #==========================================================#
        Estuary_Folder = glob.glob('./Data/NARCLIM_Site_CSVs/'+ Case_Study_Name + '/' + Estuary + '*' )
        if Location == 'Catchment':
            Estuary_Folder = glob.glob('./Data/NARCLIM_Site_CSVs/'+ Case_Study_Name + '/' + Estuary + '*catch' )
        Clim_Var_CSVs = glob.glob(Estuary_Folder[0] + '/' + Clim_var_type + '*')
        Coordinates_foldername = os.path.basename(os.path.normpath(Estuary_Folder[0])) 
        Coordinates = Coordinates_foldername.split('_', 3)[1] + '_' +Coordinates_foldername.split('_', 3)[2]
        #==========================================================#        
        #==========================================================#
        #read CSV files and start analysis
        #==========================================================#
        #for clim_var_csv_path in Clim_Var_CSVs:
        clim_var_csv_path = Clim_Var_CSVs[0]
        Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
        Clim_var_type = Filename.split('_', 1)[0]
        print(clim_var_csv_path)
        Full_df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
        #pandas datestamp index to period (we don't need the 12 pm info in the index (daily periods are enough))
        Full_df.index = Full_df.index.to_period('D')
        #Full_df = Full_df.drop(columns=['period']) #not part of all input csv files
        Ncols_df = len(Full_df)
        #check data types of columns
        #Full_df.dtypes
        #==========================================================#
        #==========================================================#
        #substract a constant from all values to convert from kelvin to celcius (temp)
        if Clim_var_type in ['tasmean','tasmax','sstmean']:
            Full_df = Full_df.iloc[:,0:(Ncols_df-1)]-273.15
        if Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean':
            Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60*24
        if Clim_var_type in ['pr30maxtstep','pr10maxtstep','pr20maxtstep']:
            Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*30 #int(Clim_var_type[2:4])
        if Clim_var_type in ['pr1Hmaxtstep']:
            Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60           
        if Clim_var_type in ['rsdsmean','rldsmean']:
            Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60*24/1000000  #convert to unit used in SILO
        Fdf_1900_2080 = Full_df
        #==========================================================#
        #==========================================================#
        #Aggregate daily df to annual time series
        if Clim_var_type in ['pracc' ,'evspsblmean' ,'potevpmean' ,'pr1Hmaxtstep' ,
                             'wss1Hmaxtstep', 'rsdsmean', 'rldsmean']:
            if(Stats == 'maxdaily'):
                Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').max()
                Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
                Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').max() #seasonal means
                Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
                Fdf_Monthly_means = Fdf_1900_2080.resample('M').max() #seasonal means
                Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
            elif(Stats[:4] =='days'):
                Threshold = int(Stats[-2:])
                #agg = ('abobe_27_count', lambda x: x.gt(27).sum()), ('average', 'mean')
                agg = ('>'+ str(Threshold) + '_count', lambda x: x.gt(Threshold).sum()),
                Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').agg(agg)                
                #Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
                Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').agg(agg)  #seasonal means
                #Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
                Fdf_Monthly_means = Fdf_1900_2080.resample('M').agg(agg)
            else:
                Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').sum()
                Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
                Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').sum() #seasonal means
                Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
                Fdf_Monthly_means = Fdf_1900_2080.resample('M').sum() #Monthlyonal means
                Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
        else:  
            if(Stats == 'maxdaily'):
                Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').max()
                Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
                Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').max() #seasonal means
                Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
                Fdf_Monthly_means = Fdf_1900_2080.resample('M').max() #Monthlyonal means
                Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
            elif(Stats == 'maxmonthly'):
                Fdf_1900_2080_annual = Fdf_1900_2080.resample('M').max().resample('A').mean()
                Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
                Fdf_Seas_means = Fdf_1900_2080.resample('M').max().resample('Q-NOV').mean() #seasonal means
                Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
                Fdf_Monthly_means = Fdf_1900_2080.resample('M').max().resample('Q-NOV').mean() #Monthlyonal means
                Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
            elif(Stats[:4] =='days'):
                Threshold = int(Stats[-2:])
                #agg = ('abobe_27_count', lambda x: x.gt(27).sum()), ('average', 'mean')
                agg = ('>'+ str(Threshold) + '_count', lambda x: x.gt(Threshold).sum()),
                Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').agg(agg)                
                #Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
                Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').agg(agg)  #seasonal means
                #Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
                Fdf_Monthly_means = Fdf_1900_2080.resample('M').agg(agg)
            else:
                Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').mean()
                Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').mean() #seasonal means
                Fdf_Monthly_means = Fdf_1900_2080.resample('M').mean() #seasonal means
        Fdf_1900_2080_means = Fdf_1900_2080.mean()
        #==========================================================#
        #==========================================================#
        # construction of quantile scaling method
        #==========================================================#
        quantiles = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.91,0.92,0.93,0.94,0.95,0.96,0.97,0.98,0.99,1]
        Quantile_change_df = quant_quant_scaling(Full_df, quantiles, False)
        png_out_file_name = Clim_var_type + '_' + Stats + '_Deltas_Barplot_' + Version + '.png'
        png_out_path = png_output_directory + '/' + png_out_file_name
        #==========================================================#
        #Select the 3 most representative models (min med and max difference betwen far future and present)
        dfall, dfmin, dfmax, dfmed, Min_dif_mod_name, Med_dif_mod_name, Max_dif_mod_name  = fct.select_min_med_max_dif_model(Fdf_1900_2080)
        #==========================================================#
        #==========================================================#
        #create a dataframe that has 1 column for each of the three representative models
        dfa = Fdf_1900_2080_annual.iloc[:,[0]]
        dfa1 = Fdf_1900_2080_annual.iloc[:,[0,3,6]].loc[(Fdf_1900_2080_annual.index >= '1990') & (Fdf_1900_2080_annual.index <= '2009')]
        dfa1.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
        dfa2 = Fdf_1900_2080_annual.iloc[:,[1,4,7]].loc[(Fdf_1900_2080_annual.index >= '2020') & (Fdf_1900_2080_annual.index <= '2039')]
        dfa2.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
        dfa3 = Fdf_1900_2080_annual.iloc[:,[2,5,8]].loc[(Fdf_1900_2080_annual.index >= '2060') & (Fdf_1900_2080_annual.index <= '2079')]
        dfa3.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
        dfall_annual = dfa1.append(dfa2).append(dfa3)
        #==========================================================#
        #==========================================================#
        #Create Deltas of average change for annual and seasonal basis
        #==========================================================#
        delta_all_df = fct.calculate_deltas_NF_FF2(Fdf_1900_2080_annual, Fdf_Seas_means, Stats, Perc_Vs_Abs)
        if DoMonthly ==True:
            delta_all_df_monthly = fct.calculate_deltas_monthly(Fdf_Monthly_means, Stats, Perc_Vs_Abs)  
            delta_all_df_monthly = pd.concat([delta_all_df_monthly.filter(regex= 'near'),delta_all_df_monthly.filter(regex= 'far')], axis=1)[-3:]  
        #==========================================================#
        if delta_csv == 'yes':
            out_file_name = Estuary  + '_'  + Clim_var_type + '_' + Stats + '_NARCliM_ensemble_changes_' + Perc_Vs_Abs +  '_' + Location + '_' + Coordinates + '.csv'
            delta_all_df.to_csv(output_directory + '/' + out_file_name)
            #quantile scaling csv
            out_file_name = Estuary  + '_'  + Clim_var_type + '_' + Stats + '_NARCliM_quant_quant_' + Coordinates + '.csv'
            Quantile_change_df.to_csv(output_directory + '/NARCLIM_Changes_Hunter/' + out_file_name)
        if delta_csv == 'yes' and DoMonthly ==True:
            out_file_name = Estuary  + '_' + Clim_var_type + '_' + Stats + '_NARCliM_ensemble_changes_monthly_' + Perc_Vs_Abs + '_' + Location + '_' + Coordinates + '.csv'
            delta_all_df_monthly.to_csv(output_directory + '/NARCLIM_Changes_Hunter/' + out_file_name)
        #==========================================================#
        #==========================================================#       
        pd.qcut(range(5), 4)
        #==========================================================#        
        #==========================================================#
        #create a dataframe that has a single column for present day, near and far future for the (3 selected models)
        Full_current_df = Fdf_1900_2080.iloc[:,range(0,3)]
        Full_current_df = Full_current_df.stack()
        #nearfuture
        Full_nearfuture_df = Fdf_1900_2080.iloc[:,range(3,6)]
        Full_nearfuture_df = Full_nearfuture_df.stack()
        #farfuture
        Full_farfuture_df = Fdf_1900_2080.iloc[:,range(6,len(Fdf_1900_2080.columns))]
        Full_farfuture_df = Full_farfuture_df.stack()
        Summarized_df = pd.concat([Full_current_df, Full_nearfuture_df], axis=1, ignore_index=True)
        Summarized_df = pd.concat([Summarized_df, Full_farfuture_df], axis=1, ignore_index=True)
        Summarized_df.columns = ['present', 'near', 'far']
        #==========================================================#
        #==========================================================#
        #generate colour schemes for plotting
        #==========================================================#
        plotcolours36 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal',
                 'darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal',
                 'darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal']
        plotcolours36b = ['tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  ,
                 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  ,
                 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple' ]
        plotcolours12 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal']
        plotcolours15 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal', 'lightgreen','lightpink','slateblue']
        color_dict = {'CCCMA3.1_R1': 'darkolivegreen', 'CCCMA3.1_R2': 'turquoise', 'CCCMA3.1_R3': 'lightgreen', 'CSIRO-MK3.0_R2': 'darkgreen',
                  'CSIRO-MK3.0_R1':'lightpink', 'CSIRO-MK3.0_R3':'slateblue','ECHAM5_R1':'slategray', 'ECHAM5_R2': 'orange', 'ECHAM5_R3':  'tomato',
                  'MIROC3.2_R1': 'peru', 'MIROC3.2_R2': 'navy' ,'MIROC3.2_R3': 'teal', '10th':'lightgreen','median':'lightpink','90th':'slateblue'}
        #==========================================================#
        #==========================================================#
        #output crucial summary plots into individual png files
        #==========================================================#
        if plot_pngs == 'yes':
            #=========#
            #Barplot of near and far future deltas
            #=========#
            #out name
            png_out_file_name = Clim_var_type + '_' + Stats + '_Deltas_Barplot_' + Version + '.png'
            png_out_path = png_output_directory + '/' + png_out_file_name
            #prepare data frame for plot
            neardeltadf=delta_all_df['near']
            ymin = min(neardeltadf) + 0.1 *min(neardeltadf)
            ymax = max(neardeltadf) + 0.1 * max(neardeltadf)
            neardeltadf=delta_all_df['far']
            ymin2 = min(neardeltadf) + 0.1 *min(neardeltadf)
            ymax2 = max(neardeltadf) + 0.1 * max(neardeltadf)
            ymin = min(ymin, ymin2)
            if (Clim_var_type == 'tasmax' or Clim_var_type == 'tasmean'):
                ymin = 0
            ymax = max(ymax, ymax2)
            #
            fig = plt.figure(figsize=(5,6))
            ax=plt.subplot(2,1,1) 
            plt.title(Clim_var_type + ' - ' + Stats + ' - deltas - near')
            neardeltadf=delta_all_df['near']
            neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)  
            plt.xticks([])
            ax=plt.subplot(2,1,2) 
            plt.title(Clim_var_type + ' - ' + Stats + ' - deltas - far')
            neardeltadf=delta_all_df['far']
            neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)
            ax.xaxis.grid(False)
            #ax.patch.set_alpha(ALPHA_figs)
            fig.patch.set_alpha(ALPHA_figs)
            fig.tight_layout()
            fig.savefig(png_out_path)
            plt.close()
            #=========#
            #=========#
            #full period density comparison
            #=========#
            #out name
            png_out_file_name = Clim_var_type + '_' + Stats + '_MaxDeltaMod_Histogram_' + Version + '.png'
            png_out_path = png_output_directory + '/' + png_out_file_name
            plt.title(Clim_var_type + ' - ' + Stats + ' -  hist - full period - max delta model')
            #prepare data
            xmin = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) -  4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
            xmax = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) +  4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
            fig = plt.figure(figsize=(5,5))
            ax=plt.subplot(2,1,1) 
            Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]).plot.kde(xlim=(xmin,xmax), ax=ax)
            plt.legend(loc=9, bbox_to_anchor=(0.5, -0.3))
            #ax.xaxis.grid(False)
            ax.yaxis.grid(False)
            fig.patch.set_alpha(ALPHA_figs)
            fig.tight_layout()
            fig.savefig(png_out_path)
            plt.close()
            #=========#
            #=========#
            # time series plot annual ALL models
            #=========#
            png_out_file_name = Clim_var_type + '_' + Stats + '_TimeSeries_AllMods_' + Version + '3.png'
            png_out_path = png_output_directory + '/' + png_out_file_name
            plt.title(Clim_var_type + ' - ' + Stats + ' -  Time series - representative models')
            #prepare data
 #            Mod_order = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,19,20,21,16,17,18,22,23,24,31,32,33,25,26,27,28,29,30,34,35,36]
 #            test = Fdf_1900_2080_annual
 #            Mod_Names = test.columns
 #            New_Mod_Name = []
 #            for i in range(0,len(Mod_Names)):
 #                if(Stats[:4] =='days'):
 #                    New_Mod_Name.append(str(Mod_order[i]+10) + '_' +  Mod_Names[i][0])
 #                else:
 #                    New_Mod_Name.append(str(Mod_order[i]+10) + '_' +  Mod_Names[i])
 #            test.columns = New_Mod_Name
 #            test_sorted = test.reindex_axis(sorted(test.columns), axis=1)
 #            colnamest = test.columns
 #            test_sorted.columns = [w[3:-5] for w in colnamest]
            #plot
            fig = plt.figure(figsize=(8,7))
            ax=plt.subplot(2,1,1)
            #test_sorted.plot(ax=ax) #,color =  plotcolours36)
            Fdf_1900_2080_annual_b = Fdf_1900_2080_annual
            New_Mod_Name = []
            if(Stats[:4] =='days'):
                for each in Fdf_1900_2080_annual_b.columns:
                    New_Mod_Name.append(each[0][:-5])
            else:
                for each in Fdf_1900_2080_annual_b.columns:
                    New_Mod_Name.append(each[:-5])
            Fdf_1900_2080_annual_b.columns = New_Mod_Name
            Fdf_1900_2080_annual_b.plot(ax=ax, color=[color_dict[x] for x in Fdf_1900_2080_annual_b.columns])
            plt.legend(loc=9, bbox_to_anchor=(0.5, -0.2))
            ax.xaxis.grid(False)
            fig.patch.set_alpha(ALPHA_figs)
            fig.tight_layout()
            fig.savefig(png_out_path)
            plt.close()
            #=========#
        #==========================================================#
        #output some summary plot into pdf
        #==========================================================#
        if plot_pdf == 'yes':
            #plt.cm.Paired(np.arange(len(Fdf_1900_2080_means)))
            #write the key plots to a single pdf document
            pdf_out_file_name = Clim_var_type + '_' + Stats + '_NARCliM_summary_' + Version + '.pdf'
            pdf_out_path = output_directory +'/' + pdf_out_file_name
            #open pdf and add the plots
            with PdfPages(pdf_out_path) as pdf:
                #barplot of model means
                plt.title(Clim_var_type + ' -  model means - full period')
                ymin = min(Fdf_1900_2080_means) 
                ymax = max(Fdf_1900_2080_means) + 0.008 *min(Fdf_1900_2080_means)
                Fdf_1900_2080_means.plot(kind='bar', ylim=(ymin,ymax), color=plotcolours36)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                #
                neardeltadf=delta_all_df['near']
                ymin = min(neardeltadf) + 0.1 *min(neardeltadf)
                ymax = max(neardeltadf) + 0.1 * max(neardeltadf)
                neardeltadf=delta_all_df['far']
                ymin2 = min(neardeltadf) + 0.1 *min(neardeltadf)
                ymax2 = max(neardeltadf) + 0.1 * max(neardeltadf)
                ymin = min(ymin, ymin2)
                if (Clim_var_type == 'tasmax' or Clim_var_type == 'tasmean'):
                    ymin = 0
                ymax = max(ymax, ymax2)
                #
                # delta barplot for report 1#################################
                ax=plt.subplot(2,1,1) 
                plt.title(Clim_var_type + ' -  model deltas - near-present')
                neardeltadf=delta_all_df['near']
                neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)  
                ax.patch.set_alpha(ALPHA_figs)
                plt.xticks([])
                #ax.xaxis.set_ticklabels([])
                #pdf.savefig(bbox_inches='tight', ylim=(ymin,ymax), pad_inches=0.4)
                #plt.close()
                #
                ax=plt.subplot(2,1,2) 
                plt.title(Clim_var_type + ' -  model deltas - far-present')
                neardeltadf=delta_all_df['far']
                fig = neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)
                ax.xaxis.grid(False)
                ax.patch.set_alpha(ALPHA_figs)
                #fig.patch.set_alpha(ALPHA_figs)
                #plt.show()
                pdf.savefig(bbox_inches='tight', ylim=(ymin,ymax), pad_inches=0.4)
                plt.close()
                # end delta barplot for report 1#################################
                #
                #full period density comparison
                plt.title(Clim_var_type + ' -  density comparison - full period - all models')
                Summarized_df.plot.kde()
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                #full period density comparison
                plt.title(Clim_var_type + ' -  density comparison - full period - max delta model')
                xmin = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) -  4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
                xmax = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) +  4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
                Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]).plot.kde(xlim=(xmin,xmax)) 
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                fig.patch.set_alpha(ALPHA_figs)
                plt.close()
                #annual box
                plt.title(Clim_var_type + ' -  Annual means/sums for max diff model')
                Fdf_1900_2080_annual.boxplot(rot=90)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                #
                #daily box
                plt.title(Clim_var_type + ' -  Daily means/sums')
                Fdf_1900_2080.boxplot(rot=90)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                # time series plot annual ALL models
                plt.title(Clim_var_type + ' -  Time series - all models')
                Mod_order = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,19,20,21,16,17,18,22,23,24,31,32,33,25,26,27,28,29,30,34,35,36]
                test = Fdf_1900_2080_annual
                Mod_Names = test.columns
                New_Mod_Name = []
                for i in range(0,len(Mod_Names)):
                    New_Mod_Name.append(str(Mod_order[i]+10) + '_' +  Mod_Names[i])
                test.columns = New_Mod_Name
                test_sorted = test.reindex_axis(sorted(test.columns), axis=1)
                colnamest = test.columns
                test_sorted.columns = [w[3:-5] for w in colnamest]
                fig = test_sorted.plot(legend=False, color =  plotcolours36)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                fig.patch.set_alpha(ALPHA_figs)
                plt.close()
                # time series plot annual ALL models
                plt.title(Clim_var_type + ' -  Time series - representative models')
                dfall_annual.plot(legend=False)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                # seasonal mean boxplots
                ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) 
                ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) 
                plt.title(Clim_var_type + ' -  DJF Summer means/sums')
                pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()).plot(kind='bar', ylim=(ymin,ymax))
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()  
                plt.title(Clim_var_type + ' -  DJF Summer means/sums')
                Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].boxplot(rot=90)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()) 
                ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean())
                plt.title(Clim_var_type + ' -  MAM Autumn means/sums')
                pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()).plot(kind='bar', ylim=(ymin,ymax))
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close() 
                plt.title(Clim_var_type + ' -  MAM Autumn means/sums')
                Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].boxplot(rot=90)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()) 
                ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean())
                plt.title(Clim_var_type + ' -  JJA Winter means/sums')
                pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()).plot(kind='bar', ylim=(ymin,ymax))
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close() 
                plt.title(Clim_var_type + ' -  JJA Winter means/sums')
                Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].boxplot(rot=90)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
                ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()) 
                ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean())
                plt.title(Clim_var_type + ' -  SON Spring means/sums')
                pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()).plot(kind='bar', ylim=(ymin,ymax))
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close() 
                plt.title(Clim_var_type + ' -  SON Spring means/sums')
                Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].boxplot(rot=90)
                pdf.savefig(bbox_inches='tight', pad_inches=0.4)
                plt.close()
--- a/Analysis/Code/P1_SILO_Batch_Download.py
+++ b/Analysis/Code/P1_SILO_Batch_Download.py
@ -0,0 +1,121 @@
 # -*- coding: utf-8 -*-
 """
 Created on Wed Jun 20 18:03:25 2018
@author: z5025317
 """
 #==========================================================#
 #Load packages
 #==========================================================#
 import numpy as np
 import os
 import pandas as pd
 import glob
 import matplotlib
 import matplotlib.pyplot as plt
 from datetime import datetime
 from datetime import timedelta
 from matplotlib.backends.backend_pdf import PdfPages
 from ggplot import *
 matplotlib.style.use('ggplot')
 import csv
 # import own modules
 # Set working direcotry (where postprocessed NARClIM data is located)
 os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Analysis/Code')
 #import climdata_fcts as fct
 import silo as sil
 #==========================================================#
 #==========================================================#
 # Set working direcotry (where postprocessed NARClIM data is located)
 os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/')
 #==========================================================#
 #==========================================================#
 #set input parameters
 Case_Study_Name = 'CASESTUDY2' 
 Casestudy2_csv_path =  "Data/NARCLIM_Site_CSVs/CASESTUDY2/CASESTDUY2_NARCLIM_Point_Sites.csv"
 Silo_variables = ['daily_rain', "max_temp", "min_temp", 'et_short_crop', 'evap_syn'] 
 Location  = 'Estuary' #'Catchment'  
 startdate= '19600101'
 enddate= '20180101' 
 #==========================================================#
 #==========================================================#
 #set directory path for output files
 output_directory = 'Data/SILO/' + Case_Study_Name + '/'
 #output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
 if not os.path.exists(output_directory):
    os.makedirs(output_directory)
    print('-------------------------------------------')
    print("output directory folder didn't exist and was generated")
    print('-------------------------------------------')
 #==========================================================#  
 #==========================================================# 
 #read the CSV to extract the lat long and case stuy sites
 with open(Casestudy2_csv_path, mode='r') as infile:
    reader = csv.reader(infile)
    next(reader, None) 
    with open('coors_new.csv', mode='w') as outfile:
        writer = csv.writer(outfile)
        if Location == 'Estuary':
            mydict = dict((rows[0],[rows[1],rows[2]]) for rows in reader)
        if Location == 'Ocean':
            mydict = dict((rows[0],[rows[3],rows[4]]) for rows in reader)
        if Location == 'Catchment':
            mydict = dict((rows[0],[rows[5],rows[6]]) for rows in reader)
 for Estuary in mydict:
    print Estuary, mydict[Estuary][0], mydict[Estuary][1]
    #==========================================================#
    #set directory path for output files
    output_directory_internal = output_directory  + Estuary + '/'
    #output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
    if not os.path.exists(output_directory_internal):
        os.makedirs(output_directory_internal)
        print('-------------------------------------------')
        print("output directory folder didn't exist and was generated")
        print('-------------------------------------------')
    #==========================================================#
    output_csv = output_directory_internal + 'SILO_climdata_' + Estuary +'_'+ Location +'_' + mydict[Estuary][0] + '_' +  mydict[Estuary][1] + '.csv'
    silo_df = sil.pointdata(Silo_variables, 'Okl9EDxgS2uzjLWtVNIBM5YqwvVcCxOmpd3nCzJh',startdate, enddate, 
                            None, mydict[Estuary][0], mydict[Estuary][1], True, output_csv)
 #==========================================================# 
--- a/Analysis/Code/climdata_fcts.py
+++ b/Analysis/Code/climdata_fcts.py
@ -0,0 +1,315 @@
 # -*- coding: utf-8 -*-
 """
 Created on Thu Jun 14 16:32:01 2018
@author: z5025317
 """
 import matplotlib.pyplot as plt
 from datetime import datetime, timedelta
 import numpy as np
 import pandas as pd
 import glob
 def compare_images(im1, im2):
    """plots 2 images next to each other, sharing the axis"""
    plt.figure()
    ax1 = plt.subplot(121)
    plt.imshow(im1, cmap='gray')
    ax2 = plt.subplot(122, sharex=ax1, sharey=ax1)
    plt.imshow(im2, cmap='gray')
    plt.show()  
 def reject_outliers(data, m=2):
    "rejects outliers in a numpy array"
    return data[abs(data - np.mean(data)) < m * np.std(data)]
 def duplicates_dict(lst):
    "return duplicates and indices"
    # nested function
    def duplicates(lst, item):
            return [i for i, x in enumerate(lst) if x == item]
    return dict((x, duplicates(lst, x)) for x in set(lst) if lst.count(x) > 1)
 def datenum2datetime(datenum):
    "convert datenum to datetime"
    #takes in datenum and outputs python datetime
    time = [datetime.fromordinal(int(dn)) + timedelta(days=float(dn)%1) - timedelta(days = 366) for dn in datenum]
    return time
 def select_min_med_max_dif_model(NARCLIM_df):
    #Select the 3 most representative models (min med and max difference betwen far future and present)
    Fdf_1900_2080_sorted = NARCLIM_df.reindex_axis(sorted(NARCLIM_df.columns), axis=1)
    Fdf_1900_2080_sorted_means = pd.DataFrame(Fdf_1900_2080_sorted.mean())
    df = Fdf_1900_2080_sorted_means
    #add a simple increasing integer index 
    df = df.reset_index()
    df= df[df.index % 3 != 1]
    df['C'] = df[0].diff()
    df = df.reset_index()
    df= df[df.index % 2 != 0]
    #get max difference model (difference between far future and prsent day)
    a = df[df.index == df['C'].argmax(skipna=True)]
    Max_dif_mod_name = a.iloc[0]['index']
    #get min difference model
    a = df[df.index == df['C'].argmin(skipna=True)]
    Min_dif_mod_name = a.iloc[0]['index']
    #get the model which difference is closest to the median difference
    df['D'] = abs(df['C']- df['C'].median())
    a = df[df.index == df['D'].argmin(skipna=True)]
    Med_dif_mod_name = a.iloc[0]['index']
    #data frame with min med and max difference model
    df2 = NARCLIM_df.filter(regex= Min_dif_mod_name[:-5] + '|' +  Med_dif_mod_name[:-5] + '|' +  Max_dif_mod_name[:-5] )
    dfall = df2.reindex_axis(sorted(df2.columns), axis=1)
    #data frame with individual models
    dfmin = NARCLIM_df.filter(regex= Min_dif_mod_name[:-5])
    dfmax = NARCLIM_df.filter(regex= Max_dif_mod_name[:-5])
    dfmed = NARCLIM_df.filter(regex= Max_dif_mod_name[:-5])
    return dfall , dfmin, dfmed, dfmax, Min_dif_mod_name,Med_dif_mod_name, Max_dif_mod_name
 def calculate_deltas_NF_FF2(Annual_df, Seasonal_df, Stats, Perc_vs_Abs):
    """calculates the "deltas" between nearfuture and present day for annual or seasonal climate data in pandas TS format"""
    times = ['annual', 'DJF', 'MAM', 'JJA','SON']
    delta_all_df = pd.DataFrame()
    for temp in times:
        if temp == 'annual':
            Mean_df = Annual_df.mean()
            Column_names = ['near', 'far']
        if temp == 'DJF':
            Mean_df = Seasonal_df[Seasonal_df.index.quarter==1].mean()
            Column_names = ['DJF_near', 'DJF_far']
        if temp == 'MAM':
            Mean_df = Seasonal_df[Seasonal_df.index.quarter==2].mean()
            Column_names = ['MAM_near', 'MAM_far']
        if temp == 'JJA':
            Mean_df = Seasonal_df[Seasonal_df.index.quarter==3].mean()
            Column_names = ['JJA_near', 'JJA_far']
        if temp == 'SON':
            Mean_df = Seasonal_df[Seasonal_df.index.quarter==4].mean()
            Column_names = ['SON_near', 'SON_far']
        if(Stats[:4] =='days'):
            models = list(Seasonal_df.mean().index.get_level_values(0))
        else:
            models = list(Seasonal_df.mean().index)
        newmodel = []
        for each in models:
            newmodel.append(each[:-5])
        unique_models = set(newmodel)
        # calculate diff for each unique model
        delta_NF_ensemble = []
        delta_FF_ensemble = []
        for unique_model in unique_models:
            dfdiff = Mean_df.filter(regex= unique_model)
            type(dfdiff)
            if Perc_vs_Abs == 'absolute':
                delta_NF = dfdiff[1] - dfdiff[0]
                delta_NF_ensemble.append(delta_NF)
                delta_FF = dfdiff[2] - dfdiff[0]
                delta_FF_ensemble.append(delta_FF)
            if Perc_vs_Abs == 'percent':
                delta_NF = ((dfdiff[1] - dfdiff[0])/dfdiff[0])*100
                delta_NF_ensemble.append(delta_NF)
                delta_FF = ((dfdiff[2] - dfdiff[0])/dfdiff[0])*100
                delta_FF_ensemble.append(delta_FF)
        delta_df1=pd.DataFrame(delta_NF_ensemble, index=unique_models)
        delta_df2=pd.DataFrame(delta_FF_ensemble, index=unique_models)
        delta_df= pd.concat([delta_df1, delta_df2], axis=1)
        #rename columns
        delta_df.columns = Column_names
        #add a row with medians and 10 and 90th percentiles
        delta_df.loc['10th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 10), Column_names[1]:np.percentile(delta_df[Column_names[1]], 10)})
        delta_df.loc['median'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 50), Column_names[1]:np.percentile(delta_df[Column_names[1]], 50)})
        delta_df.loc['90th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 90), Column_names[1]:np.percentile(delta_df[Column_names[1]], 90)})
        #append df to overall df
        delta_all_df = pd.concat([delta_all_df, delta_df], axis=1)
        if(Stats[:4] =='days'):
            delta_all_df  = delta_all_df .astype(int).fillna(0.0)
    return delta_all_df
 def calculate_deltas_monthly(Monthly_df, Stats, Perc_vs_Abs):  
    """calculates the "deltas" between nearfuture and present day for annual or seasonal climate data in pandas TS format"""   
    delta_all_df = pd.DataFrame()
    for i in range(1, 13, 1):
        Mean_df = Monthly_df[Monthly_df.index.month==i].mean()
        Column_names = [str(i)+'_near', str(i)+'_far']
        if(Stats[:4] =='days'):
            models = list(Monthly_df.mean().index.get_level_values(0))
        else:
            models = list(Monthly_df.mean().index)
        newmodel = []
        for each in models:
            newmodel.append(each[:-5])
        unique_models = set(newmodel)
        # calculate diff for each unique model
        delta_NF_ensemble = []
        delta_FF_ensemble = []
        for unique_model in unique_models:
            dfdiff = Mean_df.filter(regex= unique_model)
            type(dfdiff)
            if Perc_vs_Abs == 'absolute':
                delta_NF = dfdiff[1] - dfdiff[0]
                delta_NF_ensemble.append(delta_NF)
                delta_FF = dfdiff[2] - dfdiff[0]
                delta_FF_ensemble.append(delta_FF)
            if Perc_vs_Abs == 'percent':
                delta_NF = ((dfdiff[1] - dfdiff[0])/dfdiff[0])*100
                delta_NF_ensemble.append(delta_NF)
                delta_FF = ((dfdiff[2] - dfdiff[0])/dfdiff[0])*100
                delta_FF_ensemble.append(delta_FF)
        delta_df1=pd.DataFrame(delta_NF_ensemble, index=unique_models)
        delta_df2=pd.DataFrame(delta_FF_ensemble, index=unique_models)
        delta_df= pd.concat([delta_df1, delta_df2], axis=1)
        #rename columns
        delta_df.columns = Column_names
        #add a row with medians and 10 and 90th percentiles
        delta_df.loc['10th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 10), Column_names[1]:np.percentile(delta_df[Column_names[1]], 10)})
        delta_df.loc['median'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 50), Column_names[1]:np.percentile(delta_df[Column_names[1]], 50)})
        delta_df.loc['90th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 90), Column_names[1]:np.percentile(delta_df[Column_names[1]], 90)})
        #append df to overall df
        delta_all_df = pd.concat([delta_all_df, delta_df], axis=1)
        if(Stats[:4] =='days'):
            delta_all_df  = delta_all_df .astype(int).fillna(0.0)
    return delta_all_df
 def import_present_day_climdata_csv(Estuary, Clim_var_type):
    """
    this funciton imports the present day climate data used for 
    characterizing the present day climate varibility
    If DataSource == 'Station', individual weather station data is used. 
    If DataSource == 'SILO' , SILO time series is used using the estuary centerpoint as reference locatoin for 
    selection of the grid cell
    """ 
    #load present day climate data for the same variable
    if Clim_var_type == 'evspsblmean': #ET time series that we have is not in the same format as the other variables, hence the different treatment
        Present_day_Var_CSV = glob.glob('./Data/Wheather_Station_Data/**/' + Estuary + '_' +  'ET' + '*csv')
        Present_day_df = pd.read_csv(Present_day_Var_CSV[0])
        Dates = pd.to_datetime(Present_day_df.Date)
        Present_day_df.index = Dates
        Present_day_df = Present_day_df.iloc[:,1]
        Present_day_df = Present_day_df.replace(r'\s+', np.nan, regex=True)
        Present_day_df = pd.to_numeric(Present_day_df)
        Present_day_df.index = Dates
        [minplotDelta, maxplotDelta]=[50,50]
    #for tasmean, observed min and max T need to be converted into mean T
    elif Clim_var_type == 'tasmean':
        Present_day_Var_CSV = glob.glob('./Data/Wheather_Station_Data/**/' + Estuary + '_MaxT*csv')
        Present_day_df = pd.read_csv(Present_day_Var_CSV[0])
        Dates = pd.to_datetime(Present_day_df.Year*10000+Present_day_df.Month*100+Present_day_df.Day,format='%Y%m%d')
        Present_day_df.index = Dates
        Present_day_MaxT_df = Present_day_df.iloc[:,5]
        Present_day_Var_CSV = glob.glob('./Data/Wheather_Station_Data/**/' + Estuary + '_MinT*csv')
        Present_day_df = pd.read_csv(Present_day_Var_CSV[0])
        Dates = pd.to_datetime(Present_day_df.Year*10000+Present_day_df.Month*100+Present_day_df.Day,format='%Y%m%d')
        Present_day_df.index = Dates
        Present_day_MinT_df = Present_day_df.iloc[:,5]
        Present_day_df = (Present_day_MaxT_df + Present_day_MinT_df)/2
        [minplotDelta, maxplotDelta]=[1,2]
    elif Clim_var_type == 'tasmax':
        Present_day_Var_CSV = glob.glob('./Data/Wheather_Station_Data/**/' + Estuary + '_MaxT*csv')
        Present_day_df = pd.read_csv(Present_day_Var_CSV[0])
        Dates = pd.to_datetime(Present_day_df.Year*10000+Present_day_df.Month*100+Present_day_df.Day,format='%Y%m%d')
        Present_day_df.index = Dates
        Present_day_df = Present_day_df.iloc[:,5]
        [minplotDelta, maxplotDelta]=[1,2]  
    elif Clim_var_type == 'wssmean' or  Clim_var_type == 'wss1Hmaxtstep':
        Present_day_Var_CSV = glob.glob('./Data/Wheather_Station_Data/**/Terrigal_Wind.csv')
        Present_day_df = pd.read_csv(Present_day_Var_CSV[0])
        Present_day_df.index = Present_day_df[['Year', 'Month', 'Day', 'Hour']].apply(lambda s : datetime(*s),axis = 1)
        Present_day_df = Present_day_df.filter(regex= 'm/s') 
        Present_day_df = Present_day_df.replace(r'\s+', np.nan, regex=True)
        Present_day_df['Wind speed measured in m/s'] = Present_day_df['Wind speed measured in m/s'].convert_objects(convert_numeric=True)
        [minplotDelta, maxplotDelta]=[1, 1.5]  
    elif Clim_var_type == 'sstmean':
        Estuary_Folder = glob.glob('./Data/NARCLIM_Site_CSVs/CASESTUDY2/' + Estuary + '*' )
        Present_day_Var_CSV = glob.glob(Estuary_Folder[0] + '/' + Clim_var_type + '_NNRP*')
        Present_day_df = pd.read_csv(Present_day_Var_CSV[0], parse_dates=True, index_col=0)
        Present_day_df = Present_day_df.filter(regex= 'NNRP_R1_1950') 
        Present_day_df['NNRP_R1_1950'] = Present_day_df['NNRP_R1_1950'].convert_objects(convert_numeric=True)
        [minplotDelta, maxplotDelta]=[1, 1]
    else:   
        Present_day_Var_CSV = glob.glob('./Data/Wheather_Station_Data/**/' + Estuary + '_' +  'Rainfall' + '*csv')
        Present_day_df = pd.read_csv(Present_day_Var_CSV[0])
        Dates = pd.to_datetime(Present_day_df.Year*10000+Present_day_df.Month*100+Present_day_df.Day,format='%Y%m%d')
        Present_day_df.index = Dates
        Present_day_df = Present_day_df.iloc[:,5]
        [minplotDelta, maxplotDelta]=[50,100]    
    return Present_day_df, minplotDelta, maxplotDelta
 def quant_quant_scaling(Full_df, quantiles, Plotbool):
    """
    calculates the % "deltas" for each quantile in line with Climate Change in Australia recommendations provided here:
    https://www.climatechangeinaustralia.gov.au/en/support-and-guidance/using-climate-projections/application-ready-data/scaling-methods/    
    """
    Periods = ['1990', '2020', '2060']
    quantiles_srings = [str(x) for x in quantiles][:-1]
    models = list(Full_df.resample('A').max().mean().index)
    newmodel = []
    for each in models:
        newmodel.append(each[:-5])
    unique_models = list(set(newmodel))
    #create empty df and loop through models and periods to derive the % change factors for all quantiles and models
    Quant_diff_df_outer = pd.DataFrame()
    for unique_model in unique_models:
        dfdiff = Full_df.filter(regex= unique_model)
        Quant_diff_df = pd.DataFrame() 
        for period in Periods:
            x=dfdiff.filter(regex= period).dropna().values
            x.sort(axis=0)
            df=pd.DataFrame(x)
            df.columns = ['A']
            cut_df = pd.DataFrame(df.groupby(pd.qcut(df.rank(method='first').A, quantiles))['A'].mean().values)
            cut_df.columns = [period]
            Quant_diff_df = pd.concat([Quant_diff_df, cut_df], axis=1, join='outer')  
        if Plotbool:
            Quant_diff_df.plot(x=Quant_diff_df.index, y=Periods, kind="bar", title = unique_model)
        Quant_diff_df['NF_%change_'+ unique_model] =  (Quant_diff_df['2020'] - Quant_diff_df['1990'])/Quant_diff_df['1990']*100
        Quant_diff_df['FF_%change_'+ unique_model] =  (Quant_diff_df['2060'] - Quant_diff_df['1990'])/Quant_diff_df['1990']*100
        Quant_diff_df = Quant_diff_df.replace([np.inf, -np.inf], np.nan)
        Quant_diff_df = Quant_diff_df.iloc[:,[3,4]].fillna(0)
        Quant_diff_df_outer = pd.concat([Quant_diff_df_outer, Quant_diff_df], axis=1, join='outer')
    Quant_diff_df_outer.index = quantiles_srings 
    if Plotbool:
        Quant_diff_df_outer.plot(x=Quant_diff_df_outer.index, y=Quant_diff_df_outer.columns, kind="bar", legend = False, title='% intensity change per quantile')
    #add new cols and rows with summary statistics
    Quantile_change_NF_df = Quant_diff_df_outer.filter(regex= 'NF')
    Quantile_change_FF_df = Quant_diff_df_outer.filter(regex= 'FF')
    Quantile_change_stats_df = pd.DataFrame()
    for loop_df in [Quantile_change_NF_df, Quantile_change_FF_df]:
        Sum95_df = pd.DataFrame(loop_df.iloc[-5:,:].sum()).transpose()
        Sum95_df.index = ['Sum>95perc']
        Sum_df = pd.DataFrame(loop_df.sum()).transpose()
        Sum_df.index = ['Sum']
        Med_df = pd.DataFrame(loop_df.median()).transpose()
        Med_df.index = ['Median']
        loop_df = loop_df.append(Sum_df)
        loop_df = loop_df.append(Sum95_df)
        loop_df = loop_df.append(Med_df)
        loop_df['Median'] = loop_df.median(axis=1)
        loop_df['Maximum'] = loop_df.max(axis=1)
        loop_df['Minimum'] = loop_df.min(axis=1)
        Quantile_change_stats_df = pd.concat([Quantile_change_stats_df, loop_df], axis=1, join='outer')
    return Quantile_change_stats_df
--- a/Analysis/Code/climdata_fcts.pyc
+++ b/Analysis/Code/climdata_fcts.pyc
--- a/Analysis/Code/ggplot_time_series_with_trends.R
+++ b/Analysis/Code/ggplot_time_series_with_trends.R
@ -0,0 +1,556 @@
 #R code for creating ggplots of time series with smooth (GAM) and linear term
 ######################
 #Import Libraries and set working directory
 ######################
 library(zoo)
 library(hydroTSM) #you need to install these packages first before you can load them here
 library(lubridate)
 library(mgcv)
 library(ggplot2)
 library(gridExtra)
 library(scales)
 options(scipen=999)
 setwd("C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/")
 ######################
 GRACE.monthly.av.df <- data.frame(read.csv("./Data/GRACE/Zunyi/CSV/With_border_pixels/MDB.monthly.TWSA.Mean.2002-2017 with border pixels.csv"))
 GRACE.monthly.std.df <- data.frame(read.csv("./Data/GRACE/Zunyi/CSV/With_border_pixels/MDB.monthly.TWSA.Stdv.2002-2017 with border pixels.csv"))
 ######################
 #Set inputs
 ######################
 Case.Study <- "CASESTUDY2"
 Estuary <- "HUNTER"
 Riv.Gauge.loc <- "GRETA"     #GRETA 
 Est.Gauge.loc <-  "RAYMONDTERRACE"   #"CORAKI"          #"RAYMONDTERRACE" # "HEXHAMBRIDGE"
 logtransformFlow <- TRUE
 ggplotGAM.k <- 15
 rivTempGAM.k <- 20
 Version <- 'V3'
 Fontsize <- 12
 ######################
 ######################
 Output.Directory <- paste('./Output/CASESTUDY2_V4/', Estuary,'/Recent_Trends/', sep="")
 if (file.exists(Output.Directory)){
  print('output folder already existed and was not created again')
  } else {
  dir.create(file.path(Output.Directory))
  print('output folder did not exist and was created')
  }
 Output.Directory <- paste('./Output/CASESTUDY2_V4/', Estuary,'/Recent_Trends/Riv_', Riv.Gauge.loc,'_Est_',Est.Gauge.loc,'_GAMk', ggplotGAM.k, 'd/', sep="")
 if (file.exists(Output.Directory)){
  print('output folder already existed and was not created again')
 } else {
  dir.create(file.path(Output.Directory))
  print('output folder did not exist and was created')
 }
 ######################
 ######################
 #Set input file paths
 ######################
 pattern = paste('SILO_climdata_', Estuary,'_Catchment*', sep="")
 AirT_CSV_Path <- list.files(paste("./Data/SILO/",Case.Study, '/',sep=""), pattern, full.names=T, recursive=T)
 pattern = paste(Estuary,'@', Riv.Gauge.loc, '.*.ALL.csv', sep="")
 RivT_CSV_Path <- list.files("./Data/River_Gauge_Data/", pattern, full.names=T)
 pattern = paste(Estuary,'@', Est.Gauge.loc, '.*.ALL.csv', sep="")
 EstT_CSV_Path <- list.files("./Data/River_Gauge_Data/", pattern, full.names=T)
 pattern = paste('sstmean_NNRP_', Estuary,'*', sep="")
 SST_CSV_Path <- list.files(paste("./Data/NARCLIM_Site_CSVs/",Case.Study, '/', sep=""), pattern, full.names=T, recursive=T)
 ######################
 ######################
 #Analyse
 ######################
 ############tasmean
 #Load a daily (no gaps) time series as a time serie baseline for other time series used here
 AirT.df <- data.frame(read.csv(AirT_CSV_Path))
 AirT.full.TS <- zoo((AirT.df$max_temp_Celsius + AirT.df$max_temp_Celsius)/2, order.by= as.Date(AirT.df[,"date"], format = "%Y-%m-%d")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
 AirT.TS <- window(AirT.full.TS, start=as.Date("1990-01-01"), end=as.Date("2018-01-01"))
 AirT.full.df <- data.frame(AirT.full.TS)
 AirT.df <- data.frame(AirT.TS)
 colnames(AirT.df) <- 'tasmean'
 colnames(AirT.full.df) <- 'tasmean'
 ############
 AirT.df$Julday1 <- seq(1,length(AirT.df[,1]),1)
 linear.trend.model_EC_all <- lm(tasmean ~ Julday1, AirT.df)
 AirT.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4] 
 AirT.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
 ############
 AirT.full.df$Julday1 <- seq(1,length(AirT.full.df[,1]),1)
 linear.trend.model_EC_all <- lm(tasmean ~ Julday1, AirT.full.df)
 AirT.full.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4] 
 AirT.full.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
 ############tasmean
 ############River temp
 #Load a daily (no gaps) time series as a time serie baseline for other time series used here
 #Here we use the raw DPI CSV format that comes with a bunch of metadata
 RivT.df <- data.frame(read.csv(RivT_CSV_Path))
 char.df <- data.frame(lapply(RivT.df[2,], as.character), stringsAsFactors=FALSE)
 #dat <- data.frame(lapply(RivT.df[(4:nrow(RivT.df)),(2:ncol(RivT.df))], as.numeric), stringsAsFactors=FALSE)
 #dat <- RivT.df.num[!is.na(as.numeric(as.character(RivT.df.num))),]
 dat <- RivT.df[(4:nrow(RivT.df)),]
 colnames(dat) <- lapply(RivT.df[2,], as.character)
 dat$Date <- gsub(x=dat$Date,pattern="00:00:00",replacement="",fixed=T)
 colnames(dat) <- gsub(x=colnames(dat),pattern="Water Temp(C)",replacement="Temp",fixed=T)
 RivT.df <- dat
 rm(dat,char.df)
 RivT.full.TS <- zoo(as.numeric(as.character(RivT.df$Temp)), order.by= as.Date(RivT.df[,"Date"], format = "%d/%m/%Y")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
 RivT.full.TS <- window(RivT.full.TS, start=as.Date("1995-07-01"), end=as.Date("2018-01-01"))
 RivT.full.TS <- na.approx(RivT.full.TS)
 RivT.TS <- RivT.full.TS
 RivT.full.df <- data.frame(RivT.TS)   ### This is only done because 
 RivT.df <- data.frame(RivT.TS)
 colnames(RivT.df) <- 'rivTmean'
 colnames(RivT.full.df) <- 'rivTmean'
 ############
 RivT.df$Julday1 <- seq(1,length(RivT.df[,1]),1)
 linear.trend.model_EC_all <- lm(rivTmean ~ Julday1, RivT.df)
 RivT.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4] 
 RivT.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
 RivT.full.df$Julday1 <- seq(1,length(RivT.full.df[,1]),1)
 linear.trend.model_EC_all <- lm(rivTmean ~ Julday1, RivT.full.df)
 RivT.full.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4] 
 RivT.full.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
 ############River temp
 #export interpolated data as csv
 #csv.name <- "C:/Users/z5025317/OneDrive - UNSW/CC_Estuaries_CASESTUDY2/Data/River_Gauge_Data/HUNTER@Greta_210064_Temp_interpolated.csv"
 #write.csv(RivT.df, csv.name)
 ############River flow
 #Load a daily (no gaps) time series as a time serie baseline for other time series used here
 #Here we use the raw DPI CSV format that comes with a bunch of metadata
 RivQ.df <- data.frame(read.csv(RivT_CSV_Path))
 char.df <- data.frame(lapply(RivQ.df[2,], as.character), stringsAsFactors=FALSE)
 dat <- RivQ.df[(4:nrow(RivQ.df)),]
 colnames(dat) <- lapply(RivQ.df[2,], as.character)
 dat$Date <- gsub(x=dat$Date,pattern="00:00:00",replacement="",fixed=T)
 colnames(dat) <- gsub(x=colnames(dat), pattern="Discharge (ML/d)",replacement="Flow",fixed=T)
 RivQ.df <- dat
 rm(dat,char.df)
 #RivQ.full.TS <- zoo(log10((as.numeric(as.character(RivQ.df$Flow))) +1) , order.by= as.Date(RivQ.df[,"Date"], format = "%d/%m/%Y")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
 RivQ.full.TS <- zoo((as.numeric(as.character(RivQ.df$Flow)))/86.4, order.by= as.Date(RivQ.df[,"Date"], format = "%d/%m/%Y")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
 RivQ.TS <- window(RivQ.full.TS, start=as.Date("1990-01-01"), end=as.Date("2018-01-01"))
 #RivQ.full.TS <- window(RivQ.full.TS, start=as.Date("2013-06-01"), end=as.Date("2018-06-01"))
 RivQ.full.df <- data.frame(RivQ.full.TS)   ### This is only done because 
 RivQ.df <- data.frame(RivQ.TS)
 colnames(RivQ.df) <- 'RivQmean'
 colnames(RivQ.full.df) <- 'RivQmean'
 ############trends
 RivQ.df$Julday1 <- seq(1,length(RivQ.df[,1]),1)
 linear.trend.model_EC_all <- lm(RivQmean ~ Julday1, RivQ.df)
 RivQ.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4] 
 RivQ.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
 RivQ.full.df$Julday1 <- seq(1,length(RivQ.full.df[,1]),1)
 linear.trend.model_EC_all <- lm(RivQmean ~ Julday1, RivQ.full.df)
 RivQ.full.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4] 
 RivQ.full.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
 ############River Flow
 ############ SST
 #Load a daily (no gaps) time series as a time serie baseline for other time series used here
 SST.df <- data.frame(read.csv(SST_CSV_Path))
 SST.full.TS <- zoo(SST.df$NNRP_R1_1950-273.15, order.by= as.Date(SST.df[,"X"], format = "%Y-%m-%d")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
 SST.TS <- window(SST.full.TS, start=as.Date("1990-01-01"), end=as.Date("2018-01-01"))
 #SST.full.TS <- window(SST.full.TS, start=as.Date("2013-06-01"), end=as.Date("2018-06-01"))
 SST.full.df <- data.frame(SST.full.TS)
 SST.df <- data.frame(SST.TS)
 colnames(SST.df) <- 'SSTmean'
 colnames(SST.full.df) <- 'SSTmean'
 ############
 SST.full.df$Julday1 <- seq(1,length(SST.full.df[,1]),1)
 linear.trend.model_EC_all <- lm(SSTmean ~ Julday1, SST.full.df)
 SST.full.pvalNCV_ECall <- summary(linear.trend.model_EC_all)$coefficients[2,4] 
 SST.full.lintrend <- summary(linear.trend.model_EC_all)$coefficients[2,1] * 365
 SST.df$Julday1 <- seq(1,length(SST.df[,1]),1)
 linear.trend.model_EC_all2 <- lm(SSTmean ~ Julday1, SST.df)
 SST.pvalNCV_ECall <- summary(linear.trend.model_EC_all2)$coefficients[2,4] 
 SST.lintrend <- summary(linear.trend.model_EC_all2)$coefficients[2,1] * 365
 ############ SST
 ############Estuary temp
 #Load a daily (no gaps) time series as a time serie baseline for other time series used here
 #Here we use the raw DPI CSV format that comes with a bunch of metadata
 EstT.df <- data.frame(read.csv(EstT_CSV_Path))
 char.df <- data.frame(lapply(EstT.df[2,], as.character), stringsAsFactors=FALSE)
 #dat <- data.frame(lapply(EstT.df[(4:nrow(EstT.df)),(2:ncol(EstT.df))], as.numeric), stringsAsFactors=FALSE)
 #dat <- EstT.df.num[!is.na(as.numeric(as.character(EstT.df.num))),]
 dat <- EstT.df[(4:nrow(EstT.df)),]
 colnames(dat) <- lapply(EstT.df[2,], as.character)
 dat$Date <- gsub(x=dat$Date,pattern="00:00:00",replacement="",fixed=T)
 colnames(dat) <- gsub(x=colnames(dat),pattern="Water Temp(C)",replacement="Temp",fixed=T)
 EstT.df <- dat
 rm(dat,char.df)
 #replace negative values with NA
 EstT.df$Temp <- replace(EstT.df$Temp, which(as.numeric(as.character(EstT.df$Temp)) < 11), NA)
 EstT.full.TS <- zoo(as.numeric(as.character(EstT.df$Temp)), order.by= as.Date(EstT.df[,"Date"], format = "%d/%m/%Y")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
 EstT.TS <- window(EstT.full.TS, start=as.Date("2013-06-01"), end=as.Date("2018-06-01"))
 EstT.full.TS <- EstT.TS
 EstT.full.df <- data.frame(EstT.TS)   ### This is only done because of poor data at beginning
 EstT.df <- data.frame(EstT.TS)
 colnames(EstT.df) <- 'EstTmean'
 colnames(EstT.full.df) <- 'EstTmean'
 ############
 EstT.df$Julday1 <- seq(1,length(EstT.df[,1]),1)
 linear.trend.model_EC_all <- lm(EstTmean ~ Julday1, EstT.df)
 EstT.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4] 
 EstT.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
 EstT.full.df$Julday1 <- seq(1,length(EstT.full.df[,1]),1)
 linear.trend.model_EC_all <- lm(EstTmean ~ Julday1, EstT.full.df)
 EstT.full.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4] 
 EstT.full.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
 ############Est temp
 ######################
 #Plot
 ######################
 ##################################### Full Time Period
 #Air temp Full period 
 p1air <- ggplot(AirT.full.df, aes(y=tasmean, x=index(AirT.full.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary," Catchment Centroid - Linear and smooth trends in catchment airT (SILO) lin trend was ",
                round(AirT.full.lintrend,3), ' C°/year with p=', round(AirT.full.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
  ylab("Air Temperature [C°]") + xlab("Time")
 p1riv <- ggplot(RivT.full.df, aes(y=rivTmean, x=index(RivT.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary, " - Linear and smooth trends in gauged river temperature (@", Riv.Gauge.loc  ,") - Linear trend was ",
                round(RivT.full.lintrend,3), 'C°/year with p=', sprintf("%.5f",round(RivT.full.pvalNCV_ECall,10)), sep="")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
  ylab("River Temperature [C°]") + xlab("Time") + xlab(NULL) +  
  theme(axis.text=element_text(size=Fontsize)) +
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 if(logtransformFlow ==TRUE){
 p1rivQ <- ggplot(RivQ.full.df, aes(y=log10(RivQmean+2), x=index(RivQ.full.TS))) + geom_line(alpha=0.5) + 
    ggtitle(paste(Estuary,'@', Riv.Gauge.loc, " - Linear and smooth trend in river flow (GAUGE) lin trend was ",
                  round(RivQ.full.lintrend,3), ' ML/day /year with p=', round(RivQ.full.pvalNCV_ECall,10), sep=" ")) +
    theme(plot.title=element_text(face="bold", size=9)) +
    geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
    stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
    ylab(expression(paste("log10(River Flow [ML/day] + 2)", sep=""))) + xlab("Time") +
    theme(axis.text=element_text(size=Fontsize)) +
    theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
  } else {
  p1rivQ <- ggplot(RivQ.full.df, aes(y=RivQmean, x=index(RivQ.full.TS))) + geom_line(alpha=0.5) + 
    ggtitle(paste(Estuary,'@', Riv.Gauge.loc, " - Linear and smooth trend in river flow (GAUGE) lin trend was ",
                  round(RivQ.full.lintrend,3), ' ML/day with p=', round(RivQ.full.pvalNCV_ECall,10), sep=" ")) +
    theme(plot.title=element_text(face="bold", size=9)) +
    geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
    stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
    ylab(expression(paste("River Flow [ML/day]", sep=""))) + xlab("Time") +
    theme(axis.text=element_text(size=Fontsize)) +    
    theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
    }
 #Sea temp Full period 
 p1sst <- ggplot(SST.full.df, aes(y=SSTmean, x=index(SST.full.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary, " offshore - Linear and smooth trends in sea surface temperature (NNRP NARCLIM reanalysis) linear trend was ",
                round(SST.full.lintrend,3), 'C°/year with p=', sprintf("%.5f",round(SST.full.pvalNCV_ECall,10)), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
  ylab("Sea Surface Temperature [C°]") + xlab("Time")+ xlab(NULL) +
  theme(axis.text=element_text(size=Fontsize)) +
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 p1Est <- ggplot(EstT.full.df, aes(y=EstTmean, x=index(EstT.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary,'@', Est.Gauge.loc, " - Linear and smooth trend in Estuary temperature (GAUGE) lin trend was ",
                round(EstT.full.lintrend,3), 'C°/year with p=', round(EstT.full.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
  ylab("Estuary Temperature [C°]") + xlab("Time")+
  theme(axis.text=element_text(size=Fontsize)) +
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 p1Est2 <- ggplot(EstT.full.df, aes(y=EstTmean, x=index(EstT.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary,'@', Est.Gauge.loc, " - Linear and smooth trend in Estuary temperature (GAUGE) lin trend was ",
                round(EstT.full.lintrend,3), 'C°/year with p=', round(EstT.full.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  #stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
  binomial_smooth(formula = y ~ splines::ns(x, 2)) +
  ylab("Estuary Temperature [C°]") + xlab("Time")+
  theme(axis.text=element_text(size=Fontsize)) +
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 png.name <- paste(Output.Directory, Estuary, '@', Est.Gauge.loc, '_Trends_estTmean_full_period_', Sys.Date(),"nosmoothb3.png", sep="")
 png(file = png.name, width = 10.5, height = 4, units='in', res=500)
 grid.arrange(p1Est2,ncol=1)
 dev.off()
 gA1 <- ggplotGrob(p1riv)
 gA2 <- ggplotGrob(p1sst)
 gA1$widths <- gA2$widths
 #export to png
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_tasmean_full_period_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p1air,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_rivTmean_full_period_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p1riv,p1riv,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_rivTmean_SSTmean_full_period4_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(gA1,gA2,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_RivQmean_full_period_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p1rivQ,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_SSTmean_full_period_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p1sst,p1sst,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Est.Gauge.loc, '_Trends_estTmean_full_period_', Sys.Date(),"b.png", sep="")
 png(file = png.name, width = 10.5, height = 4, units='in', res=500)
 grid.arrange(p1Est,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Est.Gauge.loc, '_Trends_estTmean_full_period_', Sys.Date(),"nosmoothb.png", sep="")
 png(file = png.name, width = 10.5, height = 4, units='in', res=500)
 grid.arrange(p1Est2,ncol=1)
 dev.off()
 ##################################### Full Time Period
 ######################### 1990-present
 combined.TS <- window(merge(AirT.full.TS, window(RivT.full.TS, start=as.Date("1995-01-01"), end=end(RivT.full.TS)), SST.full.TS,EstT.full.TS,RivQ.full.TS,  all=T), start=as.Date("1990-01-01"), end=end(AirT.full.TS))
 combined.df <- data.frame(combined.TS)
 colnames(combined.df) <- c('tasmean','rivTmean', 'SSTmean', 'EstTmean', 'rivQmean')
 #Air temp 
 p2air <- ggplot(combined.df, aes(y=tasmean, x=index(combined.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary," Catchment centroid - Linear and smooth trend in catchment airT (SILO) linear trend was ",
                round(AirT.lintrend,3), ' C°/year with p=', round(AirT.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
  ylab("Air Temperature [C°]") + xlab(NULL)+
  theme(axis.text=element_text(size=Fontsize)) + 
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 #Riv temp 
 p2riv <- ggplot(combined.df, aes(y=rivTmean, x=index(combined.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary,'@', Riv.Gauge.loc, " - Linear and smooth trend in river temperature (GAUGE) linear trend was ",
                round(RivT.lintrend,3), ' C°/year with p=', round(RivT.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
  ylab("River Temperature [C°]") + xlab(NULL) +  
  theme(axis.text=element_text(size=Fontsize)) + 
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 #Riv flow
 if(logtransformFlow ==TRUE){
  p2rivQ <- ggplot(combined.df, aes(y=log10(rivQmean+2), x=index(combined.TS))) + geom_line(alpha=0.5) + 
    ggtitle(paste(Estuary,'@', Riv.Gauge.loc, " - Linear and smooth trend in river flow (GAUGE) linear trend was ",
                  round(RivQ.full.lintrend,3),  'cubic-meter/year with p=', round(RivQ.full.pvalNCV_ECall,10), sep=" ")) +
    theme(plot.title=element_text(face="bold", size=9)) +
    geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
    stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
    ylab(expression(paste("log10(River flow [m"^"3" *"/s] + 2)", sep=""))) + xlab(NULL)+
    theme(axis.text=element_text(size=Fontsize)) + 
    theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
  } else {
  p2rivQ <- ggplot(combined.df, aes(y=rivQmean, x=index(combined.TS))) + geom_line(alpha=0.5) + 
    ggtitle(paste(Estuary,'@', Riv.Gauge.loc, " - Linear and smooth trend in river flow (GAUGE) linear trend was ",
                  round(RivT.lintrend,3), 'cubic-meter/year with p=', round(RivT.pvalNCV_ECall,10), sep=" ")) +
    theme(plot.title=element_text(face="bold", size=9)) +
    geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
    stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
    ylab(expression(paste("River flow [m"^"3" *"/s]", sep=""))) + xlab(NULL)+
    theme(axis.text=element_text(size=Fontsize)) + 
    theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
    }
 #Sea temp 
 p2sst <- ggplot(combined.df, aes(y=SSTmean, x=index(combined.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary,"NNRP(NARCLIM reanalysis) - Linear and smooth trend in sea surface temperature (NNRP) lin trend was ",
                round(SST.lintrend,3), ' C°/year with p=', round(SST.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
  ylab("Sea Surface Temperature [C°]") + xlab("Time")+
  theme(axis.text=element_text(size=Fontsize)) + 
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 #sst temp 
 p2Est <- ggplot(combined.df, aes(y=EstTmean, x=index(combined.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary,'@', Est.Gauge.loc, " - Linear and smooth trend in Estuary temperature (GAUGE) linear trend was ",
                round(EstT.lintrend,3), ' C°/year with p=', round(EstT.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  #stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
  ylab("Estuary Temperature [C°]") + xlab("Time")+
  theme(axis.text=element_text(size=Fontsize)) +  
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 #export to png
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_tasmean_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p2air,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_rivTmean_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p2riv,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_rivQmean_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p2rivQ,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_SSTmean_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p2sst,ncol=1)
 dev.off()
 #export an overview plot as png
 gA <- ggplotGrob(p2air)
 gB <- ggplotGrob(p2riv)
 gC <- ggplotGrob(p2sst)
 gD <- ggplotGrob(p2Est)
 gE <- ggplotGrob(p2rivQ)
 gA$widths <- gE$widths
 gC$widths <- gE$widths
 gD$widths <- gE$widths
 gB$widths <- gE$widths
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_SST_RivT_AirT_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(gA,gB ,gC,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_SST_RivT_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(gB,gC, ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_AirT_RivT_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(gA,gB, ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_RivQ_RivT_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(gB ,gE, ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_SST_RivT_EstT_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(gB,gD,gC,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_RivQ_RivT_EstT_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(gE,gB,gD,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_AirT_RivT_RivQ_1990-present_', Sys.Date(),"b.png", sep="")
 png(file = png.name, width = 10.5, height = 11, units='in', res=500)
 grid.arrange(gB, gA,gE,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_AirT_RivT_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(gA,gB,ncol=1)
 dev.off()
 png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_RivT_RivQ_1990-present_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(gB,gE,ncol=1)
 dev.off()
 lm_eqn <- function(df){
  m <- lm(y ~ x, df);
  eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 
                   list(a = format(coef(m)[1], digits = 2), 
                        b = format(coef(m)[2], digits = 2), 
                        r2 = format(summary(m)$r.squared, digits = 3)))
  as.character(as.expression(eq));  
 }
 Modeling <- 'yes'
 if (Modeling == 'yes'){
  source("C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Analysis/Code/FUN_do_ggplot_Scatterplot_2vars.R")
  AirT.full.TS.Monthly <- aggregate(AirT.full.TS, as.yearmon(time(AirT.full.TS)),FUN = mean)
  RivT.full.TS.Monthly <- aggregate(RivT.full.TS, as.yearmon(time(RivT.full.TS)),FUN = mean)
  RivQ.full.TS.Monthly <- aggregate(RivQ.full.TS, as.yearmon(time(RivQ.full.TS)),FUN = mean)
  if(logtransformFlow ==TRUE){
    RivQ.full.TS.Monthly <- log10(RivQ.full.TS.Monthly +2) 
  }
  All.TS.zoo <- merge(AirT.full.TS.Monthly, RivT.full.TS.Monthly ,RivQ.full.TS.Monthly , all=F)
  AirvsRivT <- Generate.ggplot.scatterplot(All.TS.zoo[,2], All.TS.zoo[,1],"River temp. [°C]" , "Air temp. [°C]")
  if(logtransformFlow ==TRUE){
    QvsRivT <- Generate.ggplot.scatterplot(All.TS.zoo[,2], All.TS.zoo[,3], "River temp. [°C]",  expression(paste("log10(River flow [m"^"3" *"/s] + 2)", sep="")))
  } else {
    QvsRivT <- Generate.ggplot.scatterplot(All.TS.zoo[,2], All.TS.zoo[,3], "River temp. [°C]",  expression(paste("River flow [m"^"3" *"/s]", sep="")))
  }
  #export an overview plot as png
  gA <- ggplotGrob(AirvsRivT)
  gB <- ggplotGrob(QvsRivT)
  gB$widths <- gA$widths
  png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Scatterplots_of_RivT_vs_predictors_1990-present_', Sys.Date(),"f.png", sep="")
  png(file = png.name, width = 8, height = 4, units='in', res=500)
  grid.arrange(gA,gB , ncol=2)
  dev.off()
 }
--- a/Analysis/Code/ggplot_time_series_with_trends_Waves_SST_OEH.R
+++ b/Analysis/Code/ggplot_time_series_with_trends_Waves_SST_OEH.R
@ -0,0 +1,184 @@
 #R code for creating ggplots of time series with smooth (GAM) and linear term for the OEH buoy data on wave heigth and SST
 ######################
 #Import Libraries and set working directory
 ######################
 library(zoo)
 library(hydroTSM) #you need to install these packages first before you can load them here
 library(lubridate)
 library(mgcv)
 library(ggplot2)
 library(gridExtra)
 library(scales)
 options(scipen=999)
 setwd("C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/")
 ######################
 ######################
 #Set inputs
 ######################
 Case.Study <- "CASESTUDY2"
 Estuary <- "GEORGES"
 Climvar <- 'tasmean'
 ggplotGAM.k <- 7
 ######################
 ######################
 Output.Directory <- paste('./Output/', Case.Study, '/', Estuary,'/Recent_Trends/', sep="")
 if (file.exists(Output.Directory)){
  print('output folder already existed and was not created again')
 } else {
  dir.create(file.path(Output.Directory))
  print('output folder did not exist and was created')
 }
 ######################
 ######################
 #Set input file paths
 ######################
 pattern = paste(Estuary, '.*.csv', sep="")
 Buoy_CSV_Path <- list.files("./Data/Ocean_Data/Waves_SST/", pattern, full.names=T)
 Buoy.name <- substr(list.files("./Data/Ocean_Data/Waves_SST/", pattern, full.names=F), nchar(Estuary)+2, nchar(Estuary)+7)
 SST.df <- data.frame(read.csv(Buoy_CSV_Path, colClasses = "character"))
 colnames(SST.df) <- as.character(SST.df[9,])
 SST.df = SST.df[-c(1:9), ] 
 colnames(SST.df) <- gsub(x=colnames(SST.df), pattern="Date/Time",replacement="Date",fixed=T)
 colnames(SST.df) <- gsub(x=colnames(SST.df), pattern=" ",replacement="",fixed=T)
 colnames(SST.df) <- gsub(x=colnames(SST.df), pattern="Hsig(m)",replacement="Hsig",fixed=T)
 colnames(SST.df) <- gsub(x=colnames(SST.df), pattern="SeaTemp(C)",replacement="SST",fixed=T)
 SST.df$Date <- substr(SST.df$Date, 1, 10)
 ######################
 #Hsig
 ######################
 Hsig.full.TS <- zoo(as.numeric(SST.df$Hsig), order.by= as.Date(SST.df$Date, format = "%d/%m/%Y")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
 Hsig.full.TS <- aggregate(Hsig.full.TS, index(Hsig.full.TS) , FUN=mean)
 Hsig.full.TS <-  aggregate(Hsig.full.TS , as.yearmon(time(Hsig.full.TS )),FUN = mean)
 Hsig.full.TS <- Hsig.full.TS [1:length(Hsig.full.TS)-1]
 Hsig.TS <- window(Hsig.full.TS, start=as.Date("1990-01-01"), end=as.Date("2018-01-01"))
 Hsig.full.df <- data.frame(Hsig.full.TS)
 Hsig.df <- data.frame(Hsig.TS)
 str(Hsig.df)
 colnames(Hsig.df) <- 'Hsigmean'
 colnames(Hsig.full.df) <- 'Hsigmean'
 #rid of outliers
 #Hsig.full.df$MSL <- replace(Hsig.full.df$MSL, which(Hsig.full.df$MSL > 4), NA)
 Hsig.full.df$Julday1 <- seq(1,length(Hsig.full.df[,1]),1)
 linear.trend.model_EC_all <- lm(Hsigmean ~ Julday1, Hsig.full.df)
 Hsig.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4] 
 Hsig.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 12
 ######################
 ######################
 #SST
 ######################
 SST.full.TS <- zoo(as.numeric(SST.df$SST), order.by= as.Date(SST.df$Date, format = "%d/%m/%Y")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
 SST.full.TS  <- na.omit(SST.full.TS)
 SST.full.TS <- aggregate(SST.full.TS, index(SST.full.TS) , FUN=mean)
 SST.full.TS <- SST.full.TS[1:length(SST.full.TS)-1]
 SST.TS <- window(SST.full.TS, start=as.Date("1990-01-01"), end=as.Date("2018-01-01"))
 SST.full.df <- data.frame(SST.full.TS)
 SST.df <- data.frame(SST.TS)
 str(SST.df)
 tail(SST.full.TS)
 colnames(SST.df) <- 'SSTmean'
 colnames(SST.full.df) <- 'SSTmean'
 #rid of outliers
 #SST.full.df$MSL <- replace(SST.full.df$MSL, which(SST.full.df$MSL > 4), NA)
 SST.full.df$Julday1 <- seq(1,length(SST.full.df[,1]),1)
 linear.trend.model_EC_all <- lm(SSTmean ~ Julday1, SST.full.df)
 SST.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4] 
 SST.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
 #export interpolated data as csv
 csv.name <- paste("C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Data/Ocean_Data/Waves_SST/Processed/",Estuary , "_SST_Daily.csv", sep="")
 write.csv(SST.full.df, csv.name)
 ######################
 ######################
 #Plot
 ######################
 ##################################### Full Time Period
 p1air <- ggplot(Hsig.full.df, aes(y=Hsigmean, x=index(Hsig.full.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary, " - Linear and smooth trend in significant wave height (OEH Buoy:",Buoy.name,  "| lin trend was ",
                round(Hsig.lintrend,3), ' m/year with p=', round(Hsig.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  stat_smooth(method=gam, formula=y~s(x,  k=4), se=T, size=0.5, col="red") +
  ylab("Significant Wave Height [m]") + xlab("Time")  +
  theme(axis.text=element_text(size=Fontsize)) +
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 #export to png
 png.name <- paste(Output.Directory, '/Trends_Significant_Wave_Height_',Buoy.name,  '_full_period_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p1air,ncol=1)
 dev.off()
 #multiple smooths
 p1air <- ggplot(Hsig.full.df, aes(y=Hsigmean, x=index(Hsig.full.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary, " - Linear and smooth trend in monthly mean sea level (OEH Buoy:",Buoy.name,  "| lin trend was ",
                round(Hsig.lintrend,3), ' m/year with p=', round(Hsig.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  stat_smooth(method=gam, formula=y~s(x,  k=13), se=T, size=0.5, col="red") +
  #stat_smooth(method=gam, formula=y~s(x,  k=8), se=T, size=0.5, cor="blue") +
  stat_smooth(method=gam, formula=y~s(x,  k=5), se=T, size=0.5, col="green") +
  ylab("Significant Wave Height [m]") + xlab("Time")+
  theme(axis.text=element_text(size=Fontsize)) +
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 #export to png
 png.name <- paste(Output.Directory, '/Trends_Significant_Wave_Height_',Buoy.name,  '_MultiGAM_full_period_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p1air,ncol=1)
 dev.off()
 ##################################### Full Time Period SST
 p1air <- ggplot(SST.full.df, aes(y=SSTmean, x=index(SST.full.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary, " - Linear and smooth trend in SST (OEH Buoy:",Buoy.name,  "| lin trend was ",
                round(SST.lintrend,3), ' deg C/year with p=', round(SST.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  stat_smooth(method=gam, formula=y~s(x,  k=4), se=T, size=0.5, col="red") +
  ylab("Sea Surface Temperature [C°]") + xlab("Time")+
  theme(axis.text=element_text(size=Fontsize)) +
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 #export to png
 png.name <- paste(Output.Directory, '/Trends_SST_',Buoy.name,  '_full_period_',  Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p1air,ncol=1)
 dev.off()
 #multiple smooths
 p1air <- ggplot(SST.full.df, aes(y=SSTmean, x=index(SST.full.TS))) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary, " - Linear and smooth trend in monthly mean sea level (OEH Buoy:",Buoy.name,  "| lin trend was ",
                round(SST.lintrend,3), ' deg C/year with p=', round(SST.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  stat_smooth(method=gam, formula=y~s(x,  k=13), se=T, size=0.5, col="red") +
  #stat_smooth(method=gam, formula=y~s(x,  k=8), se=T, size=0.5, cor="blue") +
  stat_smooth(method=gam, formula=y~s(x,  k=5), se=T, size=0.5, col="green") +
  ylab("Sea Surface Temperature [C°]") + xlab("Time")+
  theme(axis.text=element_text(size=Fontsize)) +
  theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
 #export to png
 png.name <- paste(Output.Directory, '/Trends_SST_',Buoy.name, '_MultiGAM_full_period_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p1air,ncol=1)
 dev.off()
--- a/Analysis/Code/ggplot_time_series_with_trends_sea_level.R
+++ b/Analysis/Code/ggplot_time_series_with_trends_sea_level.R
@ -0,0 +1,113 @@
 #R code for creating ggplots of time series with smooth (GAM) and linear term
 ######################
 #Import Libraries and set working directory
 ######################
 library(zoo)
 library(hydroTSM) #you need to install these packages first before you can load them here
 library(lubridate)
 library(mgcv)
 library(ggplot2)
 library(gridExtra)
 library(scales)
 options(scipen=999)
 setwd("C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/")
 ######################
 ######################
 #Set inputs
 ######################
 Case.Study <- "CASESTUDY2"
 Estuary <- "NADGEE"
 Climvar <- 'tasmean'
 ggplotGAM.k <- 7
 ######################
 ######################
 Output.Directory <- paste('./Output/', Case.Study, '/', Estuary,'/Recent_Trends/', sep="")
 if (file.exists(Output.Directory)){
  print('output folder already existed and was not created again')
 } else {
  dir.create(file.path(Output.Directory))
  print('output folder did not exist and was created')
 }
 ######################
 ######################
 #Set input file paths
 ######################
 AirT_CSV_Path <- paste("./Data/Ocean_Data/BOM_monthly_SL_",Estuary,  "_Eden.txt", sep="")
 dat = readLines(AirT_CSV_Path)
 dat = as.data.frame(do.call(rbind, strsplit(dat, split=" {2,10}")), stringsAsFactors=FALSE)
 colnames(dat) <-dat[3,]
 if (Estuary=='NADGEE'){
  dat2 = dat[-c(1:11), ] 
  dat2 = dat2[-(365:381),]
  dat2 = dat2[,-1]
  }
 if (Estuary=='HUNTER'){
  dat2 = dat[-c(1:3), ]  
  dat2 = dat2[-(720:726),]
  dat2 = dat2[-(1:108),]
  dat2 = dat2[,-1]
  }
 dat2$Date <-  as.yearmon(dat2[,1], "%m %Y")
 SeaLev.df <- dat2
 head(SeaLev.df)
 SeaLev.df$MSL <- as.numeric(SeaLev.df$Mean)
 #rid of outliers
 SeaLev.df$MSL <- replace(SeaLev.df$MSL, which(SeaLev.df$MSL > 4), NA)
 SeaLev.df$Julday1 <- seq(1,length(SeaLev.df[,1]),1)
 linear.trend.model_EC_all <- lm(MSL ~ Julday1, SeaLev.df)
 SeaLev.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4] 
 SeaLev.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 12
 ######################
 #Plot
 ######################
 ##################################### Full Time Period
 p1air <- ggplot(SeaLev.df, aes(y=MSL, x=Date)) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary, " - Linear and smooth trend in monthly mean sea level (BOM Gauge) | lin trend was ",
                round(100*SeaLev.lintrend,3), ' cm/year with p=', round(SeaLev.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
  stat_smooth(method=gam, formula=y~s(x,  k=4), se=T, size=0.5, col="red") +
  ylab("Monthly Mean Sea Level [m]") + xlab("Time")
 #export to png
 png.name <- paste(Output.Directory, '/Trends_MonthlyMeanSeaLevel_full_period_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p1air,ncol=1)
 dev.off()
 #multiple smooths
 p1air <- ggplot(SeaLev.df, aes(y=MSL, x=Date)) + geom_line(alpha=0.5) + 
  ggtitle(paste(Estuary, " - Linear and smooth trend in monthly mean sea level (BOM Gauge) | lin trend was ",
                round(100*SeaLev.lintrend,3), ' cm/year with p=', round(SeaLev.pvalNCV_ECall,10), sep=" ")) +
  theme(plot.title=element_text(face="bold", size=9)) +
  stat_smooth(method=gam, formula=y~s(x,  k=13), se=T, size=0.5, col="red") +
  #stat_smooth(method=gam, formula=y~s(x,  k=8), se=T, size=0.5, cor="blue") +
  stat_smooth(method=gam, formula=y~s(x,  k=5), se=T, size=0.5, col="green") +
  ylab("Monthly Mean Sea Level [m]") + xlab("Time")
 #export to png
 png.name <- paste(Output.Directory, '/Trends_MonthlyMeanSeaLevel_MultiGAM_full_period_', Sys.Date(),".png", sep="")
 png(file = png.name, width = 10.5, height = 7, units='in', res=500)
 grid.arrange(p1air,ncol=1)
 dev.off()
Author	SHA1	Message	Date
tinoheimhuber	ddf2ee38a3	#added new code for also deriving quantile scaling factors from the NARCLIM data	7 years ago
tinoheimhuber	0cd9a32a19	#Sep 2018 version of the NARCLIM suite of codes for OEH tech guide	8 years ago
Valentin Heimhuber	0a8d32769e	#added a few additional plotting options #a bug remains for the plotting of the medians on top of the NARCLIM delta plots	8 years ago
Valentin Heimhuber	00e63c6c29	#added a couple of plotting abilities to the existing code and all is working. the only problem remaining is that currently, the present day plot for extreme indices isn't working properly it's showing the annual sums or averages	8 years ago
Valentin Heimhuber	78fd10e93b	#added better png plotting for the tech guide plots	8 years ago
Valentin Heimhuber	9eda19058e	#several major imporvements on all fronts	8 years ago
Valentin Heimhuber	7302cf843c	#added code for dealing with OEH buoy data on SST and Hsig	8 years ago
Valentin Heimhuber	c5f2f4f743	#several improvements	8 years ago
tinoheimhuber	6baeb9c237	#just backup	8 years ago
tinoheimhuber	d56262cd14	#a new srcipt for silo and backup for the other paper#1 codes	8 years ago
Valentin Heimhuber	42ab0af07a	#cleaned out the code and separated big functions from the analysis code the functions are in climdata_fcts.py	8 years ago
Valentin Heimhuber	1154e4eb1c	#added a few additions to the NARCLIM NC code and added an R code for generating BASH code for automating the download for a number of sites and clim_vars	8 years ago
Valentin Heimhuber	2c102d4594	#added a couple of function input parameters and the ability to create bias corrected data csv files as well as NNRP for both bias corrected and normal	8 years ago
tinoheimhuber	adad360de9	#incorporated the option for a stats type such as daily max per year or season #also fixed a bug with the server script	8 years ago
tinoheimhuber	d907d640f8	#This commit comprises a major update to the NARCLIM interrogation set of scripts. #Many of the plots have been updated to to be used in the OEH technical guide. #Several improvements have been made.	8 years ago
tinoheimhuber	8a771b65b6	#fully working set of codes for generating the NARCLIM Delta CSVs to then generate the climate deviation plots by combining with observational time seris	8 years ago
tinoheimhuber	b2fc7135d9	#next step for NACRLIM cc deviation plots	8 years ago
tinoheimhuber	b2dd285534	#First successfull recreation of the existing method after first meeting with Alejandro di Luca. We are not comparing present day variability with near and far future ensemble model deltas (changes in 20-year average climate). These changes are added to the present day median climate for the plots.	8 years ago
tinoheimhuber	fd19552a5f	#refined the NARCLIM plotting	8 years ago
tinoheimhuber	bbec9f1891	#started with more sophisticated NARCLIM plots	8 years ago
tinoheimhuber	3ca11831d8	#proper gitignore file and updated linux bash script for NARCLIM interrogation	8 years ago