Compare commits

...

21 Commits

Author SHA1 Message Date
tinoheimhuber ddf2ee38a3 #added new code for also deriving quantile scaling factors from the NARCLIM data 6 years ago
tinoheimhuber 0cd9a32a19 #Sep 2018 version of the NARCLIM suite of codes for OEH tech guide 6 years ago
Valentin Heimhuber 0a8d32769e #added a few additional plotting options
#a bug remains for the plotting of the medians on top of the NARCLIM delta plots
6 years ago
Valentin Heimhuber 00e63c6c29 #added a couple of plotting abilities to the existing code and all is working.
the only problem remaining is that currently, the present day plot for extreme indices isn't working properly
it's showing the annual sums or averages
6 years ago
Valentin Heimhuber 78fd10e93b #added better png plotting for the tech guide plots 6 years ago
Valentin Heimhuber 9eda19058e #several major imporvements on all fronts 7 years ago
Valentin Heimhuber 7302cf843c #added code for dealing with OEH buoy data on SST and Hsig 7 years ago
Valentin Heimhuber c5f2f4f743 #several improvements 7 years ago
tinoheimhuber 6baeb9c237 #just backup 7 years ago
tinoheimhuber d56262cd14 #a new srcipt for silo and backup for the other paper#1 codes 7 years ago
Valentin Heimhuber 42ab0af07a #cleaned out the code and separated big functions from the analysis code
the functions are in climdata_fcts.py
7 years ago
Valentin Heimhuber 1154e4eb1c #added a few additions to the NARCLIM NC code and added an R code for
generating BASH code for automating the download
for a number of sites and clim_vars
7 years ago
Valentin Heimhuber 2c102d4594 #added a couple of function input parameters
and the ability to create bias corrected data csv files
as well as NNRP for both bias corrected and normal
7 years ago
tinoheimhuber adad360de9 #incorporated the option for a stats type such as daily max per year or season
#also fixed a bug with the server script
7 years ago
tinoheimhuber d907d640f8 #This commit comprises a major update to the
NARCLIM interrogation set of scripts.
#Many of the plots have been updated to
to be used in the OEH technical guide.
#Several improvements have been made.
7 years ago
tinoheimhuber 8a771b65b6 #fully working set of codes for generating the NARCLIM
Delta CSVs to then generate the climate deviation plots
by combining with observational time seris
7 years ago
tinoheimhuber b2fc7135d9 #next step for NACRLIM cc deviation plots 7 years ago
tinoheimhuber b2dd285534 #First successfull recreation of the existing method
after first meeting with Alejandro di Luca. We
are not comparing present day variability with
near and far future ensemble model deltas (changes
in 20-year average climate). These changes are added
to the present day median climate for the plots.
7 years ago
tinoheimhuber fd19552a5f #refined the NARCLIM plotting 7 years ago
tinoheimhuber bbec9f1891 #started with more sophisticated NARCLIM plots 7 years ago
tinoheimhuber 3ca11831d8 #proper gitignore file and updated linux bash script
for NARCLIM interrogation
7 years ago

4
.gitignore vendored

@ -0,0 +1,4 @@
Data/
Manuscript/
NARCLIM/
Outputs/

@ -0,0 +1,79 @@
#code for preparing a text file with BASH code for batch download of NARCLIM data for the HUNTER WQ modeling of
#future climate scenarios
#NARCLIM Variables
#evspsblmean water_evaporation flux (actual ET) long_name: Surface evaporation standard_name: water_evaporation_flux units: kg m-2 s-1
#tasmean mean near surface temperature
#pracc precipitation daily precipitation sum (sum of convective prcacc and stratiform prncacc precip)
# 'evspsblmean' water_evaporation flux (actual ET) long_name: Surface evaporation standard_name: water_evaporation_flux units: kg m-2 s-1
# 'potevpmean' potential ET water_potential_evaporation_flux kg m-2 s-1
# 'tasmean' mean near surface temperature
# 'tasmax' maximum near surface temperature
# 'pracc' precipitation daily precipitation sum (sum of convective prcacc and stratiform prncacc precip)
# 'pr1Hmaxtstep' maximum 1 hour interval rainfall in a one day period
# 'pr1Hmaxtstep' Max. 1-hour time-window moving averaged precipitation rate units: kg m-2 s-1 maximum 1-hour time-window moving averaged values from point values 60.0 second
# 'wss1Hmaxtstep' Max. 1-hour time-window moving averaged surface wind speed units: m s-1 maximum 1-hour time-window moving averaged values from point values 60.0 second
# 'wssmax' Surface wind speed standard_name: air_velocity units: m s-1 height: 10 m
# 'wssmean' Surface wind speed standard_name: air_velocity units: m s-1
# 'sstmean' Sea surface temperatuer daily mean
Clim_Var <- 'evspsblmean'
Datatype <- 'T_GCMS' #T_GCMS for GCM forcing, T_NNRP for reanalysis (only 1950-2009)
Biasboolean <- 'False' #use bias corrected data?
Location <- 'catchment' #location of coordinate
Directory <- 'C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Data/NARCLIM_Site_CSVs/CASESTUDY2/'
Filename <- 'CASESTUDY2_NARCLIM_Point_Sites.csv'
#Load CSV with location names and lat lon coordinates
Location.df <- data.frame(read.csv(paste(Directory, Filename, sep=""), header=T, fileEncoding="UTF-8-BOM"))
#create empty vector for storing the command line text and open file
Vector.for.command.line.txt <- c()
Vector.for.command.line.txt <- c(Vector.for.command.line.txt, "module load python")
text1 <- c(paste("Datatype='",Datatype,"'", sep=""),
paste("Bias_corrected='",Biasboolean,"'", sep=""), paste("ClimVarName='",Clim_Var,"'", sep=""))
Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text1)
for (i in 1:(length(Location.df$Name))){
name <-as.character(Location.df$Name[i])
#name<-gsub('([[:punct:]])|\\s+','_',name)
# if(i<10){
# name<-paste('Catchment_0', as.character(i), sep="")
# }else{
# name<-paste('Catchment_', as.character(i), sep="")
# }
latitude=round(as.numeric(Location.df$Lat[i]),3)
longitude=round(as.numeric(Location.df$Long[i]),3)
if (Clim_Var == 'sstmean'){
latitude=round(as.numeric(Location.df$Ocean_Lat[i]),3)
longitude=round(as.numeric(Location.df$Ocean_Long[i]),3)
}
if (Location == 'catchment'){
latitude=round(as.numeric(Location.df$Catch_Lat[i]),3)
longitude=round(as.numeric(Location.df$Catch_Lon[i]),3)
}
text <- c(paste("latitude=",latitude,"", sep=""), paste("longitude=",longitude,"", sep=""),
paste("name='",name,"'", sep=""),
"python /srv/ccrc/data02/z5025317/Code_execution/\\
P1_NARCliM_NC_to_CSV_CCRC_SS.py \\
--lat $latitude --lon $longitude --varName $ClimVarName --domain 'd02' --timestep \\
'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Bias_corrected")
Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text)
if(i==10|i==20|i==31|i==length(Location.df$Name)){
Vector.for.command.line.txt <- c(Vector.for.command.line.txt, " ")
text.file.name <- paste(Directory ,'/',Clim_Var, "_", Datatype, "_", Biasboolean,substring(as.character(i), 1,1), "_",Location, ".txt", sep="")
#open and fill text file
fileConn <- file(text.file.name)
writeLines(Vector.for.command.line.txt, fileConn)
close(fileConn)
#
if(i==10|i==20){
Vector.for.command.line.txt <- c()
Vector.for.command.line.txt <- c(Vector.for.command.line.txt, "module load python")
text1 <- c(paste("Datatype='",Datatype,"'", sep=""),
paste("Bias_corrected='",Biasboolean,"'", sep=""), paste("ClimVarName='",Clim_Var,"'", sep=""))
Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text1)
}
}
}

@ -0,0 +1,81 @@
#code for preparing a text file with BASH code for batch download of NARCLIM data for the HUNTER WQ modeling of
#future climate scenarios
#NARCLIM Variables
#evspsblmean water_evaporation flux (actual ET) long_name: Surface evaporation standard_name: water_evaporation_flux units: kg m-2 s-1
#tasmean mean near surface temperature
#pracc precipitation daily precipitation sum (sum of convective prcacc and stratiform prncacc precip)
Clim_Var <- 'sstmean'
Datatype <- 'T_NNRP' #T_GCMS for GCM forcing, T_NNRP for reanalysis (only 1950-2009)
Biasboolean <- 'False' #use bias corrected data? True or False python boolean
Directory <- 'C:/Users/z5025317/OneDrive - UNSW/Hunter_CC_Modeling/07_Modelling/01_Input/BCGeneration/catchments/'
Filename <- 'Catchment_Prev_Hunter_Model_Centroids_VH_WGS84_attribute_Table.csv'
Directory <- 'C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Data/NARCLIM_Site_CSVs/CASESTUDY_GAB/'
Filename <- 'CASESTUDY2_GAB_NARCLIM_Point_Sites_V4.csv'
#Load CSV with location names and lat lon coordinates
Location.df <- data.frame(read.csv(paste(Directory, Filename, sep=""), header=T))
for(type in c('min','max')){
#create empty vector for storing the command line text and open file
Vector.for.command.line.txt <- c()
Vector.for.command.line.txt <- c(Vector.for.command.line.txt, "module load python")
text1 <- c(paste("Datatype='",Datatype,"'", sep=""),
paste("Bias_corrected='",Biasboolean,"'", sep=""), paste("ClimVarName='",Clim_Var,"'", sep=""))
Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text1)
for (i in 1:(length(Location.df[,1]))){
#for (i in c(1,5,10)){ if only subset of catchments is needed like for Met file.
#name<-as.character(Location.df$Name[i])
#name<-gsub('([[:punct:]])|\\s+','_',name)
if(type == 'min'){
#latlongs <- as.character(Location.df[,9])
name <- as.character(Location.df[,1][i])
latitude <- as.character(Location.df[,2][i])
longitude <- as.character(Location.df[,3][i])
}else if(type == 'max'){
name <- as.character(Location.df[,6][i])
latitude<-as.character(Location.df[,7][i])
longitude<-as.character(Location.df[,8][i])
}
# print(latlongs[i])
# names <- as.character(Location.df[,17])
# if(i<10){
# name<-paste('Plant_0', as.character(i), sep="")
# }else{
# name<-paste('Plant_', as.character(i), sep="")
# }
#print(name)
#print(names[i])
# latmin <- substr(latlongs[i], 5, 10)
# lonmin <- substr(latlongs[i], 18, 23)
# latmax <- substr(latlongs[i], 32, 37)
# lonmax <- substr(latlongs[i], 45, 50)
# latitude=latmax
# longitude=lonmax
text <- c(paste("latitude=",latitude,"", sep=""), paste("longitude=",longitude,"", sep=""),
paste("name='",name,"'", sep=""),
"python /srv/ccrc/data02/z5025317/Code_execution/\\
P1_NARCliM_NC_to_CSV_CCRC_SS.py \\
--lat $latitude --lon $longitude --varName $ClimVarName --domain 'd01' --timestep \\
'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Bias_corrected")
Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text)
if(i==10|i==20|i==30|i==40|i==45){
Vector.for.command.line.txt <- c(Vector.for.command.line.txt, " ")
text.file.name <- paste('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Data/NARCLIM_Site_CSVs/CASESTUDY_GAB/DataDownloadCCRC/',Clim_Var, "_", Datatype, "_", Biasboolean,as.character(i), type, ".txt", sep="")
#open and fill text file
fileConn <- file(text.file.name)
writeLines(Vector.for.command.line.txt, fileConn)
close(fileConn)
Vector.for.command.line.txt <- c()
Vector.for.command.line.txt <- c(Vector.for.command.line.txt, "module load python")
text1 <- c(paste("Datatype='",Datatype,"'", sep=""),
paste("Bias_corrected='",Biasboolean,"'", sep=""), paste("ClimVarName='",Clim_Var,"'", sep=""))
Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text1)
}
}
}

@ -0,0 +1,526 @@
# -*- coding: utf-8 -*-
#==========================================================#
#Last Updated - March 2018
#@author: z5025317 Valentin Heimhuber
#code for creating future climate variability deviation plots for NARCLIM variables.
#Inputs: Uses CSV files that containthe deltas of all 12 NARCLIM models for 1 grid cell at the site of interest, generated with P1_NARCLIM_plots_Windows.py
#This code is used only for the NARCLIM variables - a separate code is used for ocean variables etc that are not part of the NARCLIM ensemble
#==========================================================#
#Load packages
#==========================================================#
import numpy as np
import os
import pandas as pd
import glob
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
from matplotlib.backends.backend_pdf import PdfPages
from ggplot import *
import csv
matplotlib.style.use('ggplot')
# Load my own functions
os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Analysis/Code')
import climdata_fcts as fct
import silo as sil
#==========================================================#
#==========================================================#
# Set working direcotry (where postprocessed NARClIM data is located)
os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/')
#==========================================================#
#loop through case study estuaries
Estuaries = ['HUNTER', 'RICHMOND', 'NADGEE', 'SHOALHAVEN', 'GEORGES','CATHIE']
Estuaries = ['NADGEE','HUNTER', 'RICHMOND']
for Est in Estuaries:
#==========================================================#
#set input parameters
#==========================================================#
Case_Study_Name = 'CASESTUDY2'
Version = "V4"
OutVersion = "V4"
Casestudy2_csv_path = 'Data/NARCLIM_Site_CSVs/' + Case_Study_Name + '/' + Case_Study_Name + '_NARCLIM_Point_Sites.csv'
#Estuary = 'HUNTER' # 'Belongil'
Estuary = Est # 'Belongil'
print Estuary
Base_period_start = '1950-01-01' #Start of interval for base period of climate variability
Base_period_DELTA_start = '1990-01-01' #Start of interval used for calculating mean and SD for base period (Delta base period)
Base_period_end = '2009-01-01' #use last day that's not included in period as < is used for subsetting
Clim_var_type = 'pracc' #Name of climate variable in NARCLIM models '*' will create pdf for all variables in folder
Stats = 'days_h_20' #'maxdaily' #maximum takes the annual max Precipitation instead of the sum 'days_h_30'
PD_Datasource = 'SILO' #Source for present day climate data (historic time series) can be either: 'Station' or 'SILO'
SILO_Clim_var = ['daily_rain'] #select the SILO clim variable to be used for base period. - see silo.py for detailed descriptions
force_sea_levels = 'no' #use fort denison sea levels to run the statistics
Location = 'Estuary' # pick locaiton for extracting the SILO data can be: Estuary, Catchment, or Ocean
Main_Plot = 'yes'
presentdaybar = False #include a bar for present day variability in the plots?
present_day_plot = 'no' #save a time series of present day
Allin1_delta_plot = 'yes'
Deltaxmin, Deltaymin = -20,40 #y limits for 0 baseline delta plots
Median_markers = 'yes' #plot the median horizontal lines on top of the delta range in the allin1 plot?
Figure_headings = 'yes'
ALPHA_figs = 0 #Set alpha of figure background (0 makes everything around the plot panel transparent)
font = {'family' : 'sans-serif',
'weight' : 'normal',
'size' : 14} #size of axis labels
matplotlib.rc('font', **font)
#==========================================================#
#==========================================================#
#set directory path for output files
output_directory = 'Output/' + Case_Study_Name + '_' + OutVersion + '/' + Estuary + '/' + '/Clim_Deviation_Plots/'
#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
if not os.path.exists(output_directory):
os.makedirs(output_directory)
print('-------------------------------------------')
print("output directory folder didn't exist and was generated")
print('-------------------------------------------')
print('-------------------')
#==========================================================#
#==========================================================#
#read CSV file
#==========================================================#
Clim_Var_CSVs = glob.glob('./Output/' + Case_Study_Name + '_' + Version + '/' + Estuary + '/' + Estuary + '_' + Clim_var_type + '_' + Stats + '*')
clim_var_csv_path = Clim_Var_CSVs[0]
Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
Clim_var_type = Filename.split('_', 2)[1]
print(Clim_var_type)
Ensemble_Delta_full_df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
if Clim_var_type == 'rsdsmean':
Clim_Var_CSVs = glob.glob('./Output/' + Case_Study_Name + '/' + Estuary + '/' + Estuary + '_rsdsmean_' + Stats + '*')
clim_var_csv_path = Clim_Var_CSVs[0]
Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
Clim_var_type = Filename.split('_', 2)[1]
print(Clim_var_type)
Ensemble_Delta_full_df1 = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
Clim_Var_CSVs = glob.glob('./Output/' + Case_Study_Name + '/' + Estuary + '/' + Estuary + '_rldsmean_' + Stats + '*')
clim_var_csv_path = Clim_Var_CSVs[0]
Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
Clim_var_type = Filename.split('_', 2)[1]
print(Clim_var_type)
Ensemble_Delta_full_df2 = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
Ensemble_Delta_full_df = Ensemble_Delta_full_df2 + Ensemble_Delta_full_df1
#Ensemble_Delta_full_df = pd.to_numeric(Ensemble_Delta_full_df)
#==========================================================#
#==========================================================#
#load present day climate variable time series
#==========================================================#
if PD_Datasource == 'Station':
Present_day_df,minplotDelta, maxplotDelta = fct.import_present_day_climdata_csv(Estuary, Clim_var_type)
if force_sea_levels == 'yes':
Estuary_Folder = "C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Data/Ocean_Data/Tide_Levels"
Present_day_df = pd.read_csv(Estuary_Folder + '/Fort_Denison_Daily_1900-2011.csv' , parse_dates=True, index_col=0)
Present_day_df = pd.read_csv(Estuary_Folder + '/Fort_Denison_Daily_1900-2011.csv' , parse_dates=True, index_col=0)
Present_day_df = Present_day_df.interpolate()
Present_day_df = pd.DataFrame(Present_day_df.loc[(Present_day_df.index >= Base_period_start) & (Present_day_df.index <= Base_period_end)])
[minplotDelta, maxplotDelta]=[1, 1]
df = pd.read_csv(Estuary_Folder + '/Fort_Denison_Sub_Daily_1900-2011.csv', parse_dates=[['Date', 'Time']])
df.index = pd.to_datetime(df.Date, format = 'dd/mm/Y') + pd.to_timedelta(df.Hour, unit='h')
if PD_Datasource == 'SILO':
#read the CSV to extract the lat long and case stuy sites
with open(Casestudy2_csv_path, mode='r') as infile:
reader = csv.reader(infile)
next(reader, None)
with open('coors_new.csv', mode='w') as outfile:
writer = csv.writer(outfile)
if Location == 'Estuary':
mydict = dict((rows[0],[rows[1],rows[2]]) for rows in reader)
if Location == 'Ocean':
mydict = dict((rows[0],[rows[3],rows[4]]) for rows in reader)
if Location == 'Catchment':
mydict = dict((rows[0],[rows[5],rows[6]]) for rows in reader)
if Clim_var_type == 'tasmean':
silo_df = sil.pointdata(["max_temp", "min_temp"], 'Okl9EDxgS2uzjLWtVNIBM5YqwvVcCxOmpd3nCzJh', Base_period_start.replace("-", ""), Base_period_end.replace("-", ""),
None, mydict[Estuary][0], mydict[Estuary][1], False, None)
#take mean of daily min and max temp
Present_day_df = silo_df.iloc[:,[2,4]].mean(axis=1)
else:
silo_df = sil.pointdata(SILO_Clim_var, 'Okl9EDxgS2uzjLWtVNIBM5YqwvVcCxOmpd3nCzJh', Base_period_start.replace("-", ""), Base_period_end.replace("-", ""),
None, mydict[Estuary][0], mydict[Estuary][1], False, None)
Present_day_df = silo_df.iloc[:,[2]]
#set the x and y limit deltas - for plotting only
if Clim_var_type in ['evspsblmean', 'potevpmean']: #ET time series that we have is not in the same format as the other variables, hence the different treatment
[minplotDelta, maxplotDelta]=[50,50]
#for tasmean, observed min and max T need to be converted into mean T
elif Clim_var_type == 'tasmean':
[minplotDelta, maxplotDelta]=[0.2,1]
elif Clim_var_type == 'sstmean':
[minplotDelta, maxplotDelta]=[40,40]
elif Clim_var_type == 'tasmax':
[minplotDelta, maxplotDelta]=[1,2]
elif Clim_var_type == 'wssmean' or Clim_var_type == 'wss1Hmaxtstep':
[minplotDelta, maxplotDelta]=[1, 1.5]
elif Clim_var_type == 'pracc':
[minplotDelta, maxplotDelta]=[50,100]
elif Clim_var_type in ['rsdsmean', 'rldsmean']:
[minplotDelta, maxplotDelta]=[100,100]
#[minplotDelta, maxplotDelta]=[1,1]
#==========================================================#
#substract a constant from all values to convert from kelvin to celcius (temp)
if Clim_var_type == 'sstmean':
#Present_day_df = Present_day_df.iloc[:,0:(len(Present_day_df)-1)]-273.15
Present_day_df.index = Present_day_df.index.to_period('D')
#==========================================================#
#create seasonal sums etc.
#==========================================================#
if Stats == 'maxdaily':
Present_day_df_annual = Present_day_df.resample('A').max()
Present_day_df_annual = Present_day_df_annual.replace(0, np.nan)
Fdf_Seas_means = Present_day_df.resample('Q-NOV').max() #seasonal means
Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
elif Stats == 'maxmonthly':
Present_day_df_annual = Present_day_df.resample('M').max().resample('A').mean()
Present_day_df_annual = Present_day_df_annual.replace(0, np.nan)
Fdf_Seas_means = Present_day_df.resample('M').max().resample('Q-NOV').mean() #seasonal means
Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
elif(Stats[:4] =='days'):
Threshold = int(Stats[-2:])
agg = ('>'+ str(Threshold) + '_count', lambda x: x.gt(Threshold).sum()),
Present_day_df_annual = Present_day_df.resample('A').agg(agg)
Fdf_Seas_means = Present_day_df.resample('Q-NOV').agg(agg) #seasonal means
else:
if Clim_var_type in ['pracc' ,'evspsblmean' ,'potevpmean' ,
'pr1Hmaxtstep' ,'wss1Hmaxtstep',
'rsdsmean', 'rldsmean']:
Present_day_df_annual = Present_day_df.resample('A').sum()
Present_day_df_annual = Present_day_df_annual.replace(0, np.nan)
Fdf_Seas_means = Present_day_df.resample('Q-NOV').sum() #seasonal means
Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
elif Clim_var_type in ['tasmean' ,'tasmax' ,'sstmean']:
Present_day_df_annual = Present_day_df.resample('A').mean()
Present_day_df_monthly = Present_day_df.resample('M').mean()
Present_day_df_weekly = Present_day_df.resample('W').mean()
Fdf_Seas_means = Present_day_df.resample('Q-NOV').mean() #seasonal means
#==========================================================#
#==========================================================#
#calculate linear trend in present day series
#==========================================================#
if Stats == 'maxdaily' or Stats[:4] =='days' or Stats == 'maxmonthly':
coefficients, residuals, _, _, _ = np.polyfit(range(len(Present_day_df_annual.index)),Present_day_df_annual.values,1,full=True)
#mse = residuals[0]/(len(selected.index))
#nrmse = np.sqrt(mse)/(selected.max() - selected.min())
trend = coefficients[0]
print('Slope ' + str(coefficients[0]))
print(Base_period_start[:4]+ '-' + Base_period_end[:4] + ' trend was: ' + str(np.round(trend,4)) + ' units/year')
elif Stats == 'mean':
coefficients, residuals, _, _, _ = np.polyfit(range(len(Present_day_df.index)),Present_day_df.values,1,full=True)
#mse = residuals[0]/(len(selected.index))
#nrmse = np.sqrt(mse)/(selected.max() - selected.min())
if force_sea_levels == 'yes':
trend = coefficients[0]*12
else:
trend = coefficients[0]*365
print('Slope ' + str(coefficients[0]))
print(Base_period_start[:4]+ '-' + Base_period_end[:4] + ' trend was: ' + str(np.round(trend,5)) + ' units/year')
#==========================================================#
#==========================================================#
#Loop through annual and seasons and create a deviation plot for each.
#==========================================================#
times = ['annual', 'DJF', 'MAM', 'JJA','SON']
fig = plt.figure(figsize=(14,8))
delta_all_df = pd.DataFrame()
Combined_Delta_df = pd.DataFrame()
Combined_Med_df = pd.DataFrame()
i=1
for temp in times:
#temp = 'annual'
#subset the ensemble dataframe for the period used:
if temp == 'annual':
Ensemble_Delta_df = Ensemble_Delta_full_df.iloc[:,range(0,2)]
Present_Day_ref_df = Present_day_df_annual
Column_names = ['Annual_near', 'Annual_far']
else:
Ensemble_Delta_df = Ensemble_Delta_full_df.filter(regex= temp)
Ensemble_Delta_df.columns = ['near', 'far']
if temp == 'DJF':
Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==1]
Column_names = ['Summer_near', 'Summer_far']
if temp == 'MAM':
Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==2]
Column_names = ['Autumn_near', 'Autumn_far']
if temp == 'JJA':
Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==3]
Column_names = ['Winter_near', 'Winter_far']
if temp == 'SON':
Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==4]
Column_names = ['Spring_near', 'Spring_far']
Present_Day_ref_df = Mean_df
#Subset to present day variability period
Present_Day_ref_df = pd.DataFrame(Present_Day_ref_df.loc[(Present_Day_ref_df.index >= Base_period_start) & (Present_Day_ref_df.index <= Base_period_end)])
#Subset to present day variability delta base period (this is the statistical baseline used for present day conditions)
Present_Day_Delta_ref_df = pd.DataFrame(Present_Day_ref_df.loc[(Present_Day_ref_df.index >= Base_period_DELTA_start) & (Present_Day_ref_df.index <= Base_period_end)])
Present_Day_Mean = np.percentile(Present_Day_Delta_ref_df, 50)
Present_Day_SD = np.std(Present_Day_Delta_ref_df)
#create data frame for floating stacked barplots
index=['-2std', '-1std', 'Med', '1std', '2std']
columns = ['present','near future', 'far future']
Plot_in_df = pd.DataFrame(index=index, columns =columns)
#
Plot_in_df['present'] = [float(Present_Day_Mean-2*Present_Day_SD),float(Present_Day_Mean-Present_Day_SD), float(Present_Day_Mean),
float(Present_Day_Mean+Present_Day_SD), float(Present_Day_Mean+2*Present_Day_SD)]
Plot_in_df['near future'] = [float(Present_Day_Mean + Ensemble_Delta_df.near[-3:][0]),np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.near[-3:][1]),
np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.near[-3:][2])]
Plot_in_df['far future'] = [float(Present_Day_Mean + Ensemble_Delta_df.far[-3:][0]),np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.far[-3:][1]),
np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.far[-3:][2])]
#Create a second data frame that has the values in a way that they can be stacked up in bars with the correct absolute values
Plot_in_df2 = pd.DataFrame(index=index, columns =columns )
#
if presentdaybar == False:
index=['-2std', '-1std', 'Med', '1std', '2std']
columns = ['near future', 'far future']
Plot_in_df2 = pd.DataFrame(index=index, columns =columns )
#Plot_in_df2['present'] = [float(0),float(0), float(0),
# float(0), float(0)]
else:
Plot_in_df2['present'] = [float(Present_Day_Mean-2*Present_Day_SD),float(Present_Day_SD), float(Present_Day_SD),
float(Present_Day_SD), float(Present_Day_SD)]
Plot_in_df2['near future'] = [float(Present_Day_Mean + Ensemble_Delta_df.near[-3:][0]),np.NaN, float(Ensemble_Delta_df.near[-3:][1]-Ensemble_Delta_df.near[-3:][0]),
np.NaN, float(Ensemble_Delta_df.near[-3:][2]-Ensemble_Delta_df.near[-3:][1])]
Plot_in_df2['far future'] = [float(Present_Day_Mean + Ensemble_Delta_df.far[-3:][0]),np.NaN, float(Ensemble_Delta_df.far[-3:][1]-Ensemble_Delta_df.far[-3:][0]),
np.NaN, float(Ensemble_Delta_df.far[-3:][2]-Ensemble_Delta_df.far[-3:][1])]
#transpose the data frame
Plot_in_df_tp = Plot_in_df2.transpose()
#do the individual plots
if temp == 'annual':
xmin = int(min(Plot_in_df.min(axis=1))-minplotDelta)
xmax = int(max(Plot_in_df.max(axis=1))+maxplotDelta)
else:
xmin = int(min(Plot_in_df.min(axis=1))-minplotDelta)
xmax = int(max(Plot_in_df.max(axis=1))+maxplotDelta)
#define colour scheme
#likert_colors = ['none', 'firebrick','firebrick','lightcoral','lightcoral']
likert_colors = ['none', 'darkblue', 'darkblue','cornflowerblue','cornflowerblue']
uni_colors = ['none', 'cornflowerblue', 'cornflowerblue','cornflowerblue','cornflowerblue']
#plot the stacked barplot
if temp == 'annual':
ax=plt.subplot(2,4,3)
else:
ax=plt.subplot(2,4,i)
Plot_in_df_tp.plot.bar(stacked=True, color=uni_colors, edgecolor='none', legend=False, ax=ax, width=0.5)
df = pd.DataFrame(Plot_in_df.iloc[2,[1,2]])
index2=['Med']
columns2 = ['near future', 'far future']
Plot_in_df3 = pd.DataFrame(index=index2, columns =columns2)
Plot_in_df3['near future'] = df.iloc[1,0]
Plot_in_df3['far future'] = df.iloc[0,0]
Plot_in_df3 = Plot_in_df3.transpose()
# plt.plot(Plot_in_df3['Med'], linestyle="", markersize=45,
# marker="_", color='red', label="Median")
#Plot_in_df3['Med']
#Plot_in_df3['Med'].values
plt.plot(np.arange(2),Plot_in_df3['Med'].values[::-1], linestyle="", markersize=45,
marker="_", color='red', label="Median")
z = plt.axhline(float(Present_Day_Mean-2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
z.set_zorder(-1)
z = plt.axhline(float(Present_Day_Mean+2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
z.set_zorder(-1)
z = plt.axhline(float(Present_Day_Mean-Present_Day_SD), linestyle='--', color='black', alpha=.5)
z.set_zorder(-1)
z = plt.axhline(float(Present_Day_Mean+Present_Day_SD), linestyle='--', color='black', alpha=.5)
z.set_zorder(-1)
z = plt.axhline(float(Present_Day_Mean), linestyle='--', color='red', alpha=.5)
z.set_zorder(-1)
plt.ylim(xmin, xmax)
#plt.ylim(10, 140)
if Figure_headings == 'yes':
plt.title(Clim_var_type + ' ' + temp)
ax.grid(False)
for tick in ax.get_xticklabels():
tick.set_rotation(0)
fig.tight_layout()
fig.patch.set_alpha(ALPHA_figs)
#plt.set_facecolor('g')
if temp == 'annual':
ax=plt.subplot(2,2,1)
Present_Day_ref_df.plot(legend=False, ax=ax)
z = plt.axhline(float(Present_Day_Mean-2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
z.set_zorder(-1)
z = plt.axhline(float(Present_Day_Mean+2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
z.set_zorder(-1)
z = plt.axhline(float(Present_Day_Mean-Present_Day_SD), linestyle='--', color='black', alpha=.5)
z.set_zorder(-1)
z = plt.axhline(float(Present_Day_Mean+Present_Day_SD), linestyle='--', color='black', alpha=.5)
z.set_zorder(-1)
z = plt.axhline(float(Present_Day_Mean), linestyle='--', color='red', alpha=.5)
z.set_zorder(-1)
#fig.patch.set_facecolor('deepskyblue')
fig.tight_layout()
fig.patch.set_alpha(ALPHA_figs)
if Figure_headings == 'yes':
#plt.title(Clim_var_type)
#plt.title(Clim_var_type + ' ' + Stats + ' ' + Base_period_start[:4] + '-' + Base_period_end[:4] + ' trend:' + str(np.round(trend,4)) + '/year')
plt.title(Base_period_start[:4] + '-' + Base_period_end[:4] + ' trend:' + str(np.round(trend,4)) + '/year')
plt.ylim(xmin, xmax)
#plt.ylim(0, 40)
ax.grid(False)
#if temp == 'MAM':
i=i+4
else:
i=i+1
#create a data frame that contains all of the delta bars
Plot_in_df_tp.index = Column_names
Plot_in_df_tp['-2std'] = Plot_in_df_tp['-2std'] - Present_Day_Mean
Combined_Delta_df = pd.concat([Combined_Delta_df, Plot_in_df_tp], axis=0)
if Allin1_delta_plot == 'yes':
ax=plt.subplot(2,4,4)
temp = Combined_Delta_df[0:2]
Plot_in_df_tp = temp
uni_colors = ['none', 'cornflowerblue', 'cornflowerblue','cornflowerblue','cornflowerblue']
Plot_in_df_tp['Median'] = pd.DataFrame(Plot_in_df_tp['-2std'] + Plot_in_df_tp['Med'])
if int(Plot_in_df_tp.stack().min(axis=None,skipna=True)) < 0:
Bottom = int(Plot_in_df_tp.stack().min(axis=None,skipna=True)) -1
Plot_in_df_tp['-2std'] = Plot_in_df_tp['-2std'] - Bottom
Plot_in_df_tp.plot.bar(stacked=True, color=uni_colors, edgecolor='none', legend=False, width=0.5,ax=ax, bottom = Bottom)
else:
Plot_in_df_tp.plot.bar(stacked=True, color=uni_colors, edgecolor='none', legend=False, width=0.5,ax=ax)
if Median_markers == 'yes':
plt.plot(np.arange(2),Plot_in_df_tp['Median'].values , linestyle="", markersize=45,
marker="_", color='red', label="Median")
z = plt.axhline(float(0), linestyle='--', color='red', alpha=.5)
z.set_zorder(-1)
plt.ylim(Deltaxmin, Deltaymin)
#plt.ylim(xmin, xmax)
if Figure_headings == 'yes':
plt.title(Clim_var_type + ' All Deltas ' + Stats)
#ax.grid(False)
ax.xaxis.grid(False)
for tick in ax.get_xticklabels():
tick.set_rotation(10)
fig.tight_layout()
fig.patch.set_alpha(ALPHA_figs)
#plt.show()
if Main_Plot == 'yes':
if presentdaybar == False:
out_file_name = Estuary + '_' + Clim_var_type + '_' + Stats + '_' + PD_Datasource + '_' + SILO_Clim_var[0] + '_' + '_' + Base_period_start + '_' + Base_period_end + '_' + Version + '_' + ' trend_' + str(np.round(trend[0],4)) + '.png'
else:
out_file_name = Estuary + '_' + Clim_var_type + '_' + Stats + '_' + PD_Datasource + '_' + SILO_Clim_var[0] + '_' + Version + '_' + '2.png'
out_path = output_directory + '/' + out_file_name
fig.savefig(out_path)
plt.close(fig)
if Allin1_delta_plot == 'yes':
fig = plt.figure(figsize=(14,8))
ax = fig.add_subplot(2, 3, 1)
temp = Combined_Delta_df
Plot_in_df_tp = temp
uni_colors = ['none', 'cornflowerblue', 'cornflowerblue','cornflowerblue','cornflowerblue']
Plot_in_df_tp['Median'] = pd.DataFrame(Plot_in_df_tp['-2std'] + Plot_in_df_tp['Med'])
if int(Plot_in_df_tp.stack().min(axis=None,skipna=True)) < 0:
Bottom = int(Plot_in_df_tp.stack().min(axis=None,skipna=True)) -1
Plot_in_df_tp['-2std'] = Plot_in_df_tp['-2std'] - Bottom
Plot_in_df_tp.plot.bar(stacked=True, color=uni_colors, edgecolor='none', legend=False, width=0.5,ax=ax, bottom = Bottom)
else:
Plot_in_df_tp.plot.bar(stacked=True, color=uni_colors, edgecolor='none', legend=False, width=0.5,ax=ax)
if Median_markers == 'yes':
# plt.bar(Plot_in_df_tp.index, Plot_in_df_tp['Median'], linestyle="", markersize=13,
# marker="_", color='red', label="Median")
plt.plot(np.arange(10), Plot_in_df_tp['Median'], linestyle="", markersize=13,
marker="_", color='red', label="Median")
z = plt.axhline(float(0), linestyle='--', color='red', alpha=.5)
z.set_zorder(-1)
plt.ylim(Deltaxmin, Deltaymin)
if Figure_headings == 'yes':
plt.title(Clim_var_type + ' All Delstas ' + Stats)
#ax.grid(False)
ax.xaxis.grid(False)
for tick in ax.get_xticklabels():
tick.set_rotation(50)
plt.setp(ax.xaxis.get_majorticklabels(), ha='right')
#ax.set_xticklabels(xticklabels, rotation = 45, ha="right")
fig.tight_layout()
fig.patch.set_alpha(ALPHA_figs)
plt.show()
#export plot to png
out_file_name = Estuary + '_' + Clim_var_type + '_' + Stats + '_' + Base_period_start + '_' + Base_period_end + '_' + Version + '_All_Deltas_In1.png'
out_path = output_directory + '/' + out_file_name
fig.savefig(out_path)
plt.close(fig)
##make barplot with alternating colours
#men_means, men_std = (20, 35, 30, 35, 27), (2, 3, 4, 1, 2)
#women_means, women_std = (25, 32, 34, 20, 25), (3, 5, 2, 3, 3)
#
#ind = np.arange(len(men_means)) # the x locations for the groups
#width = 0.35 # the width of the bars
#
#fig, ax = plt.subplots()
#rects1 = ax.bar(ind - width/2, men_means, width, yerr=men_std,
# color='SkyBlue', label='Men')
#rects2 = ax.bar(ind + width/2, women_means, width, yerr=women_std,
# color='IndianRed', label='Women')
#
## Add some text for labels, title and custom x-axis tick labels, etc.
#ax.set_ylabel('Scores')
#ax.set_title('Scores by group and gender')
#ax.set_xticks(ind)
#ax.set_xticklabels(('G1', 'G2', 'G3', 'G4', 'G5'))
#ax.legend()
if present_day_plot == 'yes':
#print present day climate data
fig = plt.figure(figsize=(14,8))
for i1 in range(2):
if i1 ==0:
ax = fig.add_subplot(2, 3, 1)
else:
ax = fig.add_subplot(2, 1, 2)
xmin = int(min(Plot_in_df.min(axis=1))-minplotDelta)
xmax = int(max(Plot_in_df.max(axis=1))+maxplotDelta)
Present_Day_ref_df.plot(legend=False, ax=ax)
z = plt.axhline(float(Present_Day_Mean-2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
z.set_zorder(-1)
z = plt.axhline(float(Present_Day_Mean+2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
z.set_zorder(-1)
z = plt.axhline(float(Present_Day_Mean-Present_Day_SD), linestyle='--', color='black', alpha=.5)
z.set_zorder(-1)
z = plt.axhline(float(Present_Day_Mean+Present_Day_SD), linestyle='--', color='black', alpha=.5)
z.set_zorder(-1)
z = plt.axhline(float(Present_Day_Mean), linestyle='--', color='red', alpha=.5)
z.set_zorder(-1)
#fig.patch.set_facecolor('deepskyblue')
fig.patch.set_alpha(0)
#plt.ylim(0, 400)
plt.ylim(xmin, xmax)
#export plot to png
out_file_name = Estuary + '_' + Clim_var_type + '_' + Stats + '_' + Base_period_start + '_' + Base_period_end + '_' + Version + 'Present_Day_Period.png'
out_path = output_directory + '/' + out_file_name
fig.savefig(out_path)
plt.close(fig)
# use transparent=True if you want the whole figure with a transparent background

@ -0,0 +1,230 @@
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 28 12:15:17 2018
@author: z5025317
"""
# -*- coding: utf-8 -*-
#==========================================================#
#Last Updated - March 2018
#@author: z5025317 Valentin Heimhuber
#code for creating future climate variability deviation plots for NARCLIM variables.
#This code is derived from P1_NARCliM_First_Pass_variab_deviation_plots.py to deal with variables for which we don't have NARCLIM deltas.
#These variables are Sea Level, Acidity and...
# it uses CSIRO CC in Australia CMIP-5 Deltas instead
#==========================================================#
#Load packages
#==========================================================#
import numpy as np
import os
import pandas as pd
import glob
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
from matplotlib.backends.backend_pdf import PdfPages
from ggplot import *
import csv
matplotlib.style.use('ggplot')
# Load my own functions
os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Analysis/Code')
import climdata_fcts as fct
import silo as sil
import re
#==========================================================#
#==========================================================#
# Set working direcotry (where postprocessed NARClIM data is located)
os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/')
#==========================================================#
#==========================================================#
#set input parameters
#==========================================================#
Case_Study_Name = 'CASESTUDY2'
Casestudy2_csv_path = "Data/NARCLIM_Site_CSVs/CASESTUDY2/CASESTDUY2_NARCLIM_Point_Sites.csv"
Estuary = 'HUNTER' # 'Belongil'
Base_period_start = '1970-01-01' #Start of interval for base period of climate variability
Base_period_DELTA_start = '1990-01-01' #Start of interval used for calculating mean and SD for base period (Delta base period)
Base_period_end = '2009-01-01' #use last day that's not included in period as < is used for subsetting
Clim_var_type = 'acidity' #Name of climate variable in NARCLIM models '*' will create pdf for all variables in folder
[minplotDelta, maxplotDelta]=[0.5,1]
#Source for present day climate data (historic time series) can be either: 'Station' or 'SILO'
Location = 'Estuary' # pick locaiton for extracting the SILO data can be: Estuary, Catchment, or Ocean
presentdaybar = False #include a bar for present day variability in the plots?
present_day_plot = 'yes' #save a time series of present day
Version = "V3"
Stats = 'mean' #'maxdaily' #maximum takes the annual max Precipitation instead of the sum
ALPHA_figs = 1 #Set alpha of figure background (0 makes everything around the plot panel transparent)
#==========================================================#
#==========================================================#
#set directory path for output files
output_directory = 'Output/' + Case_Study_Name +'_'+ Version + '/' + Estuary + '/' + '/Clim_Deviation_Plots/'
#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
if not os.path.exists(output_directory):
os.makedirs(output_directory)
print('-------------------------------------------')
print("output directory folder didn't exist and was generated")
print('-------------------------------------------')
print('-------------------')
#==========================================================#
#==========================================================#
#read ensemble change CSV file
#==========================================================#
clim_var_csv_path = './Data/Non_NARCLIM/AUZ_CC_CSIRO_Deltas/Brisbane_Ocean_Variables.csv'
df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
df = df.filter(regex= 'RCP 8.5|Variable')
df = df[df.Variable=='Ocean pH']
Ensemble_Delta_df = pd.DataFrame(index=['Median', '10th', '90th'], columns =['near', 'far'])
Ensemble_Delta_df['near'] = re.split('\(|\)|\ to |', df.iloc[0,1])[0:3]
Ensemble_Delta_df['far'] = re.split('\(|\)|\ to |', df.iloc[0,2])[0:3]
Ensemble_Delta_df = Ensemble_Delta_df.astype(float).fillna(0.0)
#==========================================================#
#==========================================================#
#load present day climate variable time series
#==========================================================#
#Present_day_df,minplotDelta, maxplotDelta = fct.import_present_day_climdata_csv(Estuary, Clim_var_type)
Present_day_Var_CSV = glob.glob('./Data/Non_NARCLIM/Hunter_Lower_Acidity2.csv')
Present_day_df = pd.read_csv(Present_day_Var_CSV[0],parse_dates=True, index_col=0)
Present_day_df.columns = ['PH']
df = Present_day_df
df.index = df.index.to_period('D')
Present_day_df = df
Present_day_df = df.groupby(df.index).mean().reset_index()
Present_day_df.columns = ['Date','PH']
Present_day_df.index = Present_day_df['Date']
Present_day_df= Present_day_df['PH']
#Present_day_df.index = pd.to_datetime(Present_day_df.index,format='%Y%m%d')
if Stats == 'mindaily':
Present_day_df_annual = Present_day_df.resample('A').min()
Present_day_df_annual = Present_day_df_annual.replace(0, np.nan)
if Stats == 'minmonthly':
Present_day_df_annual = Present_day_df.resample('M').resample('A').min()
Present_day_df_annual = Present_day_df_annual.replace(0, np.nan)
if Stats == 'mean':
Present_day_df_annual = Present_day_df.resample('A').mean()
Present_day_df_annual = Present_day_df_annual.replace(0, np.nan)
Present_Day_ref_df = Present_day_df_annual
#Subset to present day variability period
Present_Day_ref_df = pd.DataFrame(Present_Day_ref_df.loc[(Present_Day_ref_df.index >= Base_period_start) & (Present_Day_ref_df.index <= Base_period_end)])
#Subset to present day variability delta base period (this is the statistical baseline used for present day conditions)
Present_Day_Delta_ref_df = pd.DataFrame(Present_Day_ref_df.loc[(Present_Day_ref_df.index >= Base_period_DELTA_start) & (Present_Day_ref_df.index <= Base_period_end)])
Present_Day_Mean = np.percentile(Present_Day_Delta_ref_df, 50)
Present_Day_SD = np.std(Present_Day_Delta_ref_df)
#create data frame for floating stacked barplots
index=['-2std', '-1std', 'Med', '1std', '2std']
columns = ['present','near future', 'far future']
Plot_in_df = pd.DataFrame(index=index, columns =columns)
#
Plot_in_df['present'] = [float(Present_Day_Mean-2*Present_Day_SD),float(Present_Day_Mean-Present_Day_SD), float(Present_Day_Mean),
float(Present_Day_Mean+Present_Day_SD), float(Present_Day_Mean+2*Present_Day_SD)]
Plot_in_df['near future'] = [float(Present_Day_Mean + Ensemble_Delta_df.near['10th']),np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.near['Median']),
np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.near['90th'])]
Plot_in_df['far future'] = [float(Present_Day_Mean + Ensemble_Delta_df.far['10th']),np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.far['Median']),
np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.far['90th'])]
#Create a second data frame that has the values in a way that they can be stacked up in bars with the correct absolute values
Plot_in_df2 = pd.DataFrame(index=index, columns =columns )
#
index=['-2std', '-1std', 'Med', '1std', '2std']
columns = ['near future', 'far future']
Plot_in_df2 = pd.DataFrame(index=index, columns =columns )
Plot_in_df2['near future'] = [float(Present_Day_Mean + Ensemble_Delta_df.near['90th']),np.NaN, float(Ensemble_Delta_df.near['Median']-Ensemble_Delta_df.near['90th']),
np.NaN, float(Ensemble_Delta_df.near['10th']-Ensemble_Delta_df.near['Median'])]
Plot_in_df2['far future'] = [float(Present_Day_Mean + Ensemble_Delta_df.far['90th']),np.NaN, float(Ensemble_Delta_df.far['Median']-Ensemble_Delta_df.far['90th']),
np.NaN, float(Ensemble_Delta_df.far['10th']-Ensemble_Delta_df.far['Median'])]
#transpose the data frame
Plot_in_df_tp = Plot_in_df2.transpose()
#do the individual plots
xmin = int(min(Plot_in_df.min(axis=1))-minplotDelta)
xmax = int(max(Plot_in_df.max(axis=1))+maxplotDelta)
#define colour scheme
#likert_colors = ['none', 'firebrick','firebrick','lightcoral','lightcoral']
likert_colors = ['none', 'darkblue', 'darkblue','cornflowerblue','cornflowerblue']
uni_colors = ['none', 'cornflowerblue', 'cornflowerblue','cornflowerblue','cornflowerblue']
#plot the stacked barplot
fig = plt.figure(figsize=(14,8))
ax=plt.subplot(2,4,3)
Plot_in_df_tp.plot.bar(stacked=True, color=uni_colors, edgecolor='none', legend=False, ax=ax, width=0.5)
df = pd.DataFrame(Plot_in_df.iloc[2,[1,2]])
index2=['Med']
columns2 = ['near future', 'far future']
Plot_in_df3 = pd.DataFrame(index=index2, columns =columns2)
Plot_in_df3['near future'] = df.iloc[1,0]
Plot_in_df3['far future'] = df.iloc[0,0]
Plot_in_df3 = Plot_in_df3.transpose()
plt.plot(Plot_in_df3['Med'], linestyle="", markersize=52,
marker="_", color='red', label="Median")
z = plt.axhline(float(Present_Day_Mean-2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
z.set_zorder(-1)
z = plt.axhline(float(Present_Day_Mean+2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
z.set_zorder(-1)
z = plt.axhline(float(Present_Day_Mean-Present_Day_SD), linestyle='--', color='black', alpha=.5)
z.set_zorder(-1)
z = plt.axhline(float(Present_Day_Mean+Present_Day_SD), linestyle='--', color='black', alpha=.5)
z.set_zorder(-1)
z = plt.axhline(float(Present_Day_Mean), linestyle='--', color='red', alpha=.5)
z.set_zorder(-1)
plt.ylim(xmin, xmax)
plt.title(Clim_var_type)
ax.grid(False)
for tick in ax.get_xticklabels():
tick.set_rotation(0)
fig.tight_layout()
fig.patch.set_alpha(ALPHA_figs)
#plot the present day time series
ax=plt.subplot(2,2,1)
Present_Day_ref_df.plot(legend=False, ax=ax)
z = plt.axhline(float(Present_Day_Mean-2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
z.set_zorder(-1)
z = plt.axhline(float(Present_Day_Mean+2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
z.set_zorder(-1)
z = plt.axhline(float(Present_Day_Mean-Present_Day_SD), linestyle='--', color='black', alpha=.5)
z.set_zorder(-1)
z = plt.axhline(float(Present_Day_Mean+Present_Day_SD), linestyle='--', color='black', alpha=.5)
z.set_zorder(-1)
z = plt.axhline(float(Present_Day_Mean), linestyle='--', color='red', alpha=.5)
z.set_zorder(-1)
#fig.patch.set_facecolor('deepskyblue')
fig.tight_layout()
fig.patch.set_alpha(ALPHA_figs)
plt.title(Clim_var_type + ' ' + Stats + ' annual present day')
plt.ylim(xmin, xmax)
ax.grid(False)
if presentdaybar == False:
out_file_name = Estuary + '_' + Clim_var_type + '_' + Stats + '_' + Base_period_start + '_' + Base_period_end + Version + '_' + '_NPDB2.png'
out_path = output_directory + '/' + out_file_name
fig.savefig(out_path)

@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
`# -*- coding: utf-8 -*-
from netCDF4 import *
import numpy as np
from numpy import *
import os
import pandas as pd
import glob
@ -22,6 +23,9 @@ if __name__ == "__main__":
parser.add_argument("--varName", help="operation")
parser.add_argument("--timestep", help="operation")
parser.add_argument("--domain", help="operation")
parser.add_argument("--LocationName", help="operation")
parser.add_argument("--Datatype", help="operation")
parser.add_argument("--BiasBool", help="operation")
args = parser.parse_args()
print(args.lat)
print(args.lon)
@ -31,24 +35,30 @@ if __name__ == "__main__":
Clim_var_type = args.varName
NC_Domain = args.domain
Timestep = args.timestep
print("Extracting all NARCLIM time series for variable: ", Clim_var_type, " for lat lon: ", mylat, mylon, "domain", NC_Domain, "timestep ", Timestep)
Location = args.LocationName
Data_Type = args.Datatype
Bias_Correction_BOOL = args.BiasBool
print("Extracting all NARCLIM time series for variable: ", Clim_var_type, " for lat lon: ", mylat, mylon, Location, "domain", NC_Domain, " timestep ", Timestep, " Datatype ", Data_Type, " biascorrected? ", Bias_Correction_BOOL)
lat_equal_len_string="%.3f" % abs(mylat)
lon_equal_len_string= "%.3f" % mylon
if Bias_Correction_BOOL == 'False':
#set directory path for output files
output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out'
output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out/'+ Location + '_' + lat_equal_len_string + '_' + lon_equal_len_string + '/'
#output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted'
print '---------------------------------------------------------'
if not os.path.exists(output_directory):
os.makedirs(output_directory)
print("output directory folder didn't exist and was generated here:")
print(output_directory)
print '---------------------------------------------------------'
#
time.sleep(10)
#manual input via script
#mylat= -33.9608578
#mylon= 151.1339882
#Clim_var_type = 'pr1Hmaxtstep'
#NC_Domain = 'd02'
#
#time.sleep(10)
#set up the loop variables for interrogating the entire NARCLIM raw data
NC_Periods = ('1950-2009','2020-2039','2060-2079')
NC_Periods = ('1990-2009','2020-2039','2060-2079')
if Data_Type == 'T_NNRP':
NC_Periods = ('1950-2009','Stop')
#
#Define empty pandas data frames
Full_df = pd.DataFrame()
@ -58,19 +68,31 @@ MultiNC_df = pd.DataFrame()
#
#Loop through models and construct CSV per site
for NC_Period in NC_Periods:
if NC_Period != "Stop":
Period_short = NC_Period[:4]
GCMs = os.listdir('./'+ NC_Period)
for GCM in GCMs:
print GCM
Warf_runs = os.listdir('./' + NC_Period + '/' + GCM + '/')
for Warf_run in Warf_runs:
Current_input_dir = './' + NC_Period + '/' + GCM + '/' + Warf_run + '/' + NC_Domain + '/'
print Current_input_dir
Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type + '.nc'
Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn)
#print Climvar_NCs[1]
#Climvar_NCs = Climvar_NCs[0:2]
#print(Climvar_NCs)
for netcdf in Climvar_NCs:
f=Dataset(netcdf)
# This section print on the screen information contained in the headings of the file
#print '---------------------------------------------------------'
#print f.ncattrs()
#print f.title
#print f.variables
#print
#for varname in f.variables:
# print varname,' -> ',np.shape(f.variables[varname])
#print '---------------------------------------------------------'
# Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file
dist_x=np.abs(f.variables['lon'][:,:]-float(mylon))
dist_y=np.abs(f.variables['lat'][:,:]-float(mylat))
@ -83,7 +105,7 @@ for NC_Period in NC_Periods:
print 'Information on the nearest point'
print 'Your desired lat,lon = ',mylat,mylon
print 'The nearest lat,lon = ', f.variables['lat'][latindex[0],latindex[1]], f.variables['lon'][lonindex[0],lonindex[1]]
print 'The index of the nearest lat,lon (x,y) = ',index[0], index[1]
#print 'The index of the nearest lat,lon (x,y) = ',index[0], index[1]
#Here we constract a pandas data frame, having the "time"/day as an index and a numer of variables (i.e. Clim_var_type, pracc, as columns)
d={}
#d["time"] = f.variables['time'][:]
@ -95,6 +117,7 @@ for NC_Period in NC_Periods:
df1=pd.DataFrame(d, index=timestamp_dates)
f.close()
print 'closing '+ os.path.basename(os.path.normpath(netcdf)) + ' moving to next netcdf file'
#print f
print '---------------------------------------------------------'
#append in time direction each new time series to the data frame
MultiNC_df = MultiNC_df.append(df1)
@ -112,10 +135,101 @@ for NC_Period in NC_Periods:
GCM_df = pd.DataFrame()
Full_df = Full_df.sort_index(axis=0, ascending=True)
#adding a column with the NARCLIM decade
Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009'
Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039'
Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079'
Full_df.loc[(Full_df.index > '1950-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009'
Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009'
Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2040-01-01'), 'period']= '2020-2039'
Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2080-01-01'), 'period']= '2060-2079'
#
if Bias_Correction_BOOL == 'True':
os.chdir('/srv/ccrc/data30/z3393020/NARCliM/Bias_corrected/')
#set directory path for output files
output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out/'+ Location + '_' + lat_equal_len_string + '_' + lon_equal_len_string + '/Bias_corrected/'
#output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted'
if not os.path.exists(output_directory):
os.makedirs(output_directory)
print("output directory folder didn't exist and was generated here:")
print(output_directory)
#time.sleep(10)
#set up the loop variables for interrogating the entire NARCLIM raw data
GCMs = ('CCCMA3.1','CSIRO-MK3.0','ECHAM5', 'MIROC3.2', 'NNRP')
#set up the loop variables for interrogating the entire NARCLIM raw data
#
#Define empty pandas data frames
Full_df = pd.DataFrame()
GCM_df = pd.DataFrame()
R13_df = pd.DataFrame()
MultiNC_df = pd.DataFrame()
#
#Loop through models and construct CSV per site
for GCM in GCMs:
print GCM
Warf_runs = os.listdir('./' + GCM + '/')
for Warf_run in Warf_runs:
NC_Periods = os.listdir('./' + GCM + '/' + Warf_run + '/')
for NC_Period in NC_Periods:
Period_short = NC_Period[:4]
Current_input_dir = './' + GCM + '/' + Warf_run + '/' + NC_Period + '/' + NC_Domain + '/'
print Current_input_dir
Clim_var_type = Clim_var_type + '_bc.nc'
Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type
Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn)
print Climvar_NCs[1]
print Climvar_NCs[2]
for netcdf in Climvar_NCs:
#netcdf = '/srv/ccrc/data31/z3393020/NARCliM/Bias_corrected/' + netcdf[2:]
#print netcdf
f=Dataset(netcdf)
# This section print on the screen information contained in the headings of the file
print '---------------------------------------------------------'
print f.ncattrs()
print f.title
print f.variables
print
for varname in f.variables:
print varname,' -> ',np.shape(f.variables[varname])
print '---------------------------------------------------------'
# Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file
dist_x=np.abs(f.variables['lon'][:,:]-float(mylon))
dist_y=np.abs(f.variables['lat'][:,:]-float(mylat))
dist=dist_x + dist_y
latindex=np.where(dist_y==np.min(dist_y))
lonindex=np.where(dist_x==np.min(dist_x))
index=np.where(dist==np.min(dist))
print '---------------------------------------------------------'
print netcdf
print 'Information on the nearest point'
print 'Your desired lat,lon = ',mylat,mylon
print 'The nearest lat,lon = ', f.variables['lat'][latindex[0],latindex[1]], f.variables['lon'][lonindex[0],lonindex[1]]
print 'The index of the nearest lat,lon (x,y) = ',index[0], index[1]
#Here we constract a pandas data frame, having the "time"/day as an index and a numer of variables (i.e. Clim_var_type, pracc, as columns)
d={}
#d["time"] = f.variables['time'][:]
d[ GCM +'_'+ Warf_run +'_'+ Period_short] = f.variables[Clim_var_type][:, int(index[0]), int(index[1])]
#if GCM == 'NNRP' and Warf_run == 'R1':
# d['Period']= NC_Period
timestamp = f.variables['time'][:]
timestamp_dates = pd.to_datetime(timestamp, unit='h', origin=pd.Timestamp('1949-12-01'))
df1=pd.DataFrame(d, index=timestamp_dates)
f.close()
print 'closing '+ os.path.basename(os.path.normpath(netcdf)) + ' moving to next netcdf file'
#print f
print '---------------------------------------------------------'
#append in time direction each new time series to the data frame
MultiNC_df = MultiNC_df.append(df1)
#append in columns direction individual GCM-RCM-123 run time series (along x axis)
MultiNC_df = MultiNC_df.sort_index(axis=0, ascending=True)
R13_df = pd.concat([R13_df, MultiNC_df], axis=1)
MultiNC_df =pd.DataFrame()
#append blocks of R1 R2 and R3 in x axis direction
R13_df = R13_df.sort_index(axis=0, ascending=True)
GCM_df = pd.concat([GCM_df, R13_df], axis=1)
R13_df = pd.DataFrame()
#append time periods in x axis direction (change axis=1 to =0 if periods for same model should be added to same model R123 column)
GCM_df = GCM_df.sort_index(axis=0, ascending=True)
Full_df = pd.concat([Full_df, GCM_df], axis=1)
GCM_df = pd.DataFrame()
Full_df = Full_df.sort_index(axis=0, ascending=True)
#export the pandas data frame as a CSV file within the output directory
out_file_name = Clim_var_type + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '_NARCliM_summary.csv'
out_file_name = Clim_var_type + '_'+ Data_Type[2:] + '_' + Location + '_' + lat_equal_len_string + '_' + lon_equal_len_string + '_NARCliM_summary.csv'
out_path = output_directory +'/' + out_file_name
Full_df.to_csv(out_path)

@ -0,0 +1,90 @@
Variables available from NARCLIM (output):
'evspsblmean' water_evaporation flux (actual ET) long_name: Surface evaporation standard_name: water_evaporation_flux units: kg m-2 s-1
'potevpmean' potential ET water_potential_evaporation_flux kg m-2 s-1
'tasmean mean near surface temperature
tasmax maximum near surface temperature
pracc precipitation daily precipitation sum (sum of convective prcacc and stratiform prncacc precip)
pr1Hmaxtstep maximum 1 hour interval rainfall in a one day period
'pr1Hmaxtstep' Max. 1-hour time-window moving averaged precipitation rate units: kg m-2 s-1 maximum 1-hour time-window moving averaged values from point values 60.0 second
'pr30maxtstep' Max. 30min time-window moving averaged precipitation rate units: kg m-2 s-1 maximum 1-hour time-window moving averaged values from point values 60.0 second
'wss1Hmaxtstep' Max. 1-hour time-window moving averaged surface wind speed units: m s-1 maximum 1-hour time-window moving averaged values from point values 60.0 second
'wssmax' Surface wind speed standard_name: air_velocity units: m s-1 height: 10 m
'wssmean' Surface wind speed standard_name: air_velocity units: m s-1
'sstmean' Daily mean sea surface temperature
Sites:
Northern NSW:
Tweed River: -28.17, 153.56
Cudgera Creek: -28.36, 153.58
Belongil Creek: -28.63, 153.59
Central NSW:
Port Stevens: -32.71, 152.20
Terrigal Lagoon: -33.4, 151.44
Hunter River: -32.91, 151.80
Hunter River near Mahmods site: -32.843, 151.706
Southern NSW:
Batemans Bay: -35.76, 150.25
Towamba River: -37.1, 149.91
Nadgee Lake: -37.47, 149.97
Code Input Variables:
Datatype: Choose 'T_NNRP' for reanalysis or 'T_GCMS' for GCM forcing data
BiasBool: Choose 'True' for bias corrected data, 'False' for normal model outputs
Execution of code in bash-Code for netcdf interrogation:
1st step: log into storm servers: Putty: hurricane.ccrc.unsw.edu.au or typhoon.ccrc.unsw.edu.au or cyclone.ccrc.unsw.edu.au + UNSW credentials (zID)
In BASH copy and enter:
module load python
latitude=-32.91
longitude=151.80
name='HunterRiver'
Datatype='T_NNRP'
Biasboolean='False'
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pracc' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'tasmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'tasmax' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pr1Hmaxtstep' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'wssmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pracc' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'wss1Hmaxtstep' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'evspsblmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'potevpmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean
#1 The above code extracts time series from the full model ensemble over a single model grid cell (based on lat lon input) for the above variables of interest and stores into CSV files.
Example of output name = evspsblmean_35.76_150.25_NARCliM_summary.csv
#2 The "P1_NARCliM_plots_Windows" code takes these CSV files as input and creates a) a pdf wiht a number of time series and box plots and b) another CSV file containing the Deltas between present day, near and far future
for each model in the ensemble. Output: C:\Users\z5025317\WRL_Postdoc\Projects\Paper#1\Output\Nadgee\Nadgee_tasmax_NARCliM_ensemble_changes.csv
#3 The "P1_NARCliM_First_Pass_variab_deviation_plots" code takes those Delta CSV files as input and generates the future climate deviation plots that were originally developed by Duncan Rayner.
Run the code with different constellations of Estuary (study sites) and climate variables:
e.g. Clim_var_type = "tasmax*" # '*' will create pdf for all variables in folder
Present_Day_Clim_Var = 'MaxT' #MaxT, MinT, Rainfall, (the name for present day clim var refers to the name of the observed climate data that's used for the baseline period variability.
#!!!# Important!
#For present day variability data, only rainfall and temperature data actually correspond to the study sites. ET and Wind are taken from the existing project folder and hence, are for a Hunter weather station
#e.g. Terrigal_Wind and Terrigal_ET are actually Hunter in reality. This is because we don't have ET for other sites than Hunter at this stage.
##PROBLEM: Without changing anything, the P1_NARCliM_NC_to_CSV_CCRC_SS.py stopped working properly on the CCRC storm servers. It's not giving an error but loading the nc files with Dataset(nc) just takes unlimited time.
It used to take only a few seconds. NOT solved yet as of 7th of May 2018.### This was solved for the /postprocessed folder at the end of May 2018 but the problem persits with the /bias_corrected/ data folder.
running a simple netcdf info script
python /srv/ccrc/data02/z5025317/Code_execution/P1_Basic_NETCDF_Interrogation.py
deactivate
conda env create --name EEenv -- file C:\Users\z5025317\WRL_Postdoc\Software\EE\ee-jupyter-examples-master\kilian_env

@ -1,12 +1,13 @@
# -*- coding: utf-8 -*-
#####################################----------------------------------
#Last Updated - March 2018
#==========================================================#
#Last Updated - June 2018
#@author: z5025317 Valentin Heimhuber
#code for creating climate prioritization plots for NARCLIM variables.
#Inputs: Uses CSV files that contain all 12 NARCLIM model runs time series for 1 grid cell created with: P1_NARCliM_NC_to_CSV_CCRC_SS.py
#####################################----------------------------------
#==========================================================#
#Load packages
#####################################----------------------------------
#==========================================================#
import numpy as np
import os
import pandas as pd
@ -16,183 +17,486 @@ import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
from matplotlib.backends.backend_pdf import PdfPages
from ggplot import *
matplotlib.style.use('ggplot')
#
# import own modules
# Set working direcotry (where postprocessed NARClIM data is located)
os.chdir('C:/Users/z5025317/WRL_Postdoc/Projects/Paper#1/NARCLIM/')
#
#
#####################################----------------------------------
os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Analysis/Code')
import climdata_fcts as fct
import silo as sil
ALPHA_figs = 0
font = {'family' : 'sans-serif',
'weight' : 'normal',
'size' : 14}
matplotlib.rc('font', **font)
#==========================================================#
# Set working direcotry (where postprocessed NARClIM data is located)
os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/')
#==========================================================#
#==========================================================#
#set input parameters
Base_period_start = '1990-01-01'
Base_period_end = '2080-01-01' #use last day that's not included in period as < is used for subsetting
#Clim_var_type = 'tasmean' will create pdf for all variables in folder
#####################################----------------------------------
Case_Study_Name = 'CASESTUDY2'
Version = 'V5' #V4 is latest for all model runs #V5 is just for the Hunter catchment climatology
Estuaries = ['HUNTER', 'RICHMOND', 'NADGEE', 'SHOALHAVEN', 'GEORGES','CATHIE']
Estuaries = ['HUNTER'] #,'HUNTER','RICHMOND']
#Estuaries = ['SHOALHAVEN']
for Est in Estuaries:
#Estuary = 'HUNTER' # 'Belongil'
Estuary = Est # 'Belongil'
print Estuary
#Clim_var_type = 'potevpmean' # '*' will create pdf for all variables in folder "pracc*|tasmax*"
Clim_var_types = ['evspsblmean']
for climvar in Clim_var_types:
Clim_var_type = climvar
#==========================================================#
#set input preferences
#==========================================================#
plot_pdf = 'no'
plot_pngs = 'no'
delta_csv = 'yes'
Stats = 'mean' # 'maxdaily', 'mean', 'days_h_25'
DoMonthly = True
Perc_Vs_Abs = 'percent' #'percent' vs. 'absolute' deltas for near and far future
Location = 'Catchment'
#==========================================================#
#==========================================================#
#set directory path for output files
output_directory = 'Output'
output_directory = 'Output/' + Case_Study_Name + '_' + Version + '/' + Estuary
#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
if not os.path.exists(output_directory):
os.makedirs(output_directory)
print('-------------------------------------------')
print("output directory folder didn't exist and was generated")
print('-------------------------------------------')
#set directory path for individual png files
png_output_directory = 'Output/' + Case_Study_Name + '_' + Version + '/' + Estuary + '/NARCLIM_Key_Figs'
#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
if not os.path.exists(png_output_directory):
os.makedirs(png_output_directory)
print('-------------------------------------------')
print("output directory folder didn't exist and was generated")
Clim_Var_CSVs = glob.glob('./Site_CSVs/*')
#Clim_Var_CSV = glob.glob('./Site_CSVs/' + Clim_var_type + '*' )
#read CSV file
for clim_var_csv_path in Clim_Var_CSVs:
print('-------------------------------------------')
#==========================================================#
#==========================================================#
Estuary_Folder = glob.glob('./Data/NARCLIM_Site_CSVs/'+ Case_Study_Name + '/' + Estuary + '*' )
if Location == 'Catchment':
Estuary_Folder = glob.glob('./Data/NARCLIM_Site_CSVs/'+ Case_Study_Name + '/' + Estuary + '*catch' )
Clim_Var_CSVs = glob.glob(Estuary_Folder[0] + '/' + Clim_var_type + '*')
Coordinates_foldername = os.path.basename(os.path.normpath(Estuary_Folder[0]))
Coordinates = Coordinates_foldername.split('_', 3)[1] + '_' +Coordinates_foldername.split('_', 3)[2]
#==========================================================#
#==========================================================#
#read CSV files and start analysis
#==========================================================#
#for clim_var_csv_path in Clim_Var_CSVs:
clim_var_csv_path = Clim_Var_CSVs[0]
Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
Clim_var_type = Filename.split('_', 1)[0]
print(clim_var_csv_path)
Full_df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
#pandas datestamp index to period (we don't need the 12 pm info in the index (daily periods are enough))
Full_df.index = Full_df.index.to_period('D')
Full_df = Full_df.drop(columns=['period'])
Ncols_df = len(Full_df)
#check data types of columns
#Full_df.dtypes
#==========================================================#
#substract a constant from all values (temp)
if Clim_var_type == 'tasmean':
Full_df = Full_df.iloc[:,0:26]-273.15
#index the narclim periods if needed
#Full_df.loc[(Full_df.index >= '1990-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009'
#Full_df.loc[(Full_df.index >= '2020-01-01') & (Full_df.index < '2040-01-01'), 'period']= '2020-2039'
#Full_df.loc[(Full_df.index >= '2060-01-01') & (Full_df.index < '2080-01-01'), 'period']= '2060-2079'
#==========================================================#
#substract a constant from all values to convert from kelvin to celcius (temp)
if Clim_var_type in ['tasmean','tasmax','sstmean']:
Full_df = Full_df.iloc[:,0:(Ncols_df-1)]-273.15
if Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean':
Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60*24
if Clim_var_type in ['pr30maxtstep','pr10maxtstep','pr20maxtstep']:
Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*30 #int(Clim_var_type[2:4])
if Clim_var_type in ['pr1Hmaxtstep']:
Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60
if Clim_var_type in ['rsdsmean','rldsmean']:
Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60*24/1000000 #convert to unit used in SILO
Fdf_1900_2080 = Full_df
#==========================================================#
#Subset the data to the minimum base period and above (used to set the lenght of the present day climate period)
Fdf_1900_2080 = Full_df.loc[(Full_df.index >= Base_period_start) & (Full_df.index < Base_period_end)]
#==========================================================#
#Aggregate daily df to annual time series
if (Clim_var_type == 'pracc' or Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean'
or Clim_var_type == 'pr1Hmaxtstep' or Clim_var_type == 'wss1Hmaxtstep'):
if Clim_var_type in ['pracc' ,'evspsblmean' ,'potevpmean' ,'pr1Hmaxtstep' ,
'wss1Hmaxtstep', 'rsdsmean', 'rldsmean']:
if(Stats == 'maxdaily'):
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').max()
Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').max() #seasonal means
Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
Fdf_Monthly_means = Fdf_1900_2080.resample('M').max() #seasonal means
Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
elif(Stats[:4] =='days'):
Threshold = int(Stats[-2:])
#agg = ('abobe_27_count', lambda x: x.gt(27).sum()), ('average', 'mean')
agg = ('>'+ str(Threshold) + '_count', lambda x: x.gt(Threshold).sum()),
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').agg(agg)
#Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').agg(agg) #seasonal means
#Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
Fdf_Monthly_means = Fdf_1900_2080.resample('M').agg(agg)
else:
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').sum()
Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').sum()
Fdf_1900_2080_monthly = Fdf_1900_2080_monthly.replace(0, np.nan)
Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').sum()
Fdf_1900_2080_weekly = Fdf_1900_2080_weekly.replace(0, np.nan)
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').sum() #seasonal means
Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
Fdf_Monthly_means = Fdf_1900_2080.resample('M').sum() #Monthlyonal means
Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
else:
if(Stats == 'maxdaily'):
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').max()
Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').max() #seasonal means
Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
Fdf_Monthly_means = Fdf_1900_2080.resample('M').max() #Monthlyonal means
Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
elif(Stats == 'maxmonthly'):
Fdf_1900_2080_annual = Fdf_1900_2080.resample('M').max().resample('A').mean()
Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
Fdf_Seas_means = Fdf_1900_2080.resample('M').max().resample('Q-NOV').mean() #seasonal means
Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
Fdf_Monthly_means = Fdf_1900_2080.resample('M').max().resample('Q-NOV').mean() #Monthlyonal means
Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
elif(Stats[:4] =='days'):
Threshold = int(Stats[-2:])
#agg = ('abobe_27_count', lambda x: x.gt(27).sum()), ('average', 'mean')
agg = ('>'+ str(Threshold) + '_count', lambda x: x.gt(Threshold).sum()),
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').agg(agg)
#Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').agg(agg) #seasonal means
#Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
Fdf_Monthly_means = Fdf_1900_2080.resample('M').agg(agg)
else:
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').mean()
Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').mean()
Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').mean()
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').mean() #seasonal means
#plot the mean of all model runs
print('-------------------------------------------')
print('mean of all models for climate variable: ' + Clim_var_type)
Fdf_Monthly_means = Fdf_1900_2080.resample('M').mean() #seasonal means
Fdf_1900_2080_means = Fdf_1900_2080.mean()
Fdf_1900_2080_means.plot(kind='bar', ylim=(16,22)).figure
print('-------------------------------------------')
#==========================================================#
#time series plots:
#ranks = Fdf_1900_2080_means[3:].rank(axis=0)
df3 = Fdf_1900_2080_annual
Fdf_annual_sorted = df3.reindex_axis(df3.mean().sort_values().index, axis=1)
Fdf_annual_sorted_subset = Fdf_annual_sorted.iloc[:,[0,1,2,3,5,7,13,15,18, 22, 24,25]]
#write the key plots to a single pdf document
pdf_out_file_name = Clim_var_type + '_start_' + Base_period_start + '_NARCliM_summary4.pdf'
pdf_out_path = output_directory +'/' + pdf_out_file_name
#open pdf and add the plots
#==========================================================#
#Select the 3 most representative models (min med and max difference betwen far future and present)
dfall, dfmin, dfmax, dfmed, Min_dif_mod_name, Med_dif_mod_name, Max_dif_mod_name = fct.select_min_med_max_dif_model(Fdf_1900_2080)
#==========================================================#
#==========================================================#
#create a dataframe that has 1 column for each of the three representative models
dfa = Fdf_1900_2080_annual.iloc[:,[0]]
dfa1 = Fdf_1900_2080_annual.iloc[:,[0,3,6]].loc[(Fdf_1900_2080_annual.index >= '1990') & (Fdf_1900_2080_annual.index <= '2009')]
dfa1.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
dfa2 = Fdf_1900_2080_annual.iloc[:,[1,4,7]].loc[(Fdf_1900_2080_annual.index >= '2020') & (Fdf_1900_2080_annual.index <= '2039')]
dfa2.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
dfa3 = Fdf_1900_2080_annual.iloc[:,[2,5,8]].loc[(Fdf_1900_2080_annual.index >= '2060') & (Fdf_1900_2080_annual.index <= '2079')]
dfa3.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
dfall_annual = dfa1.append(dfa2).append(dfa3)
#==========================================================#
#developement
#I want to create a simple density plot for all values in present day, near future and far future
#this should be an easy indicator of change
#subset present day simulations
Full_current_df = Fdf_1900_2080.iloc[:,[0,1,2]]
#==========================================================#
#Create Deltas of average change for annual and seasonal basis
#==========================================================#
delta_all_df = fct.calculate_deltas_NF_FF2(Fdf_1900_2080_annual, Fdf_Seas_means, Stats, Perc_Vs_Abs)
if DoMonthly ==True:
delta_all_df_monthly = fct.calculate_deltas_monthly(Fdf_Monthly_means, Stats, Perc_Vs_Abs)
delta_all_df_monthly = pd.concat([delta_all_df_monthly.filter(regex= 'near'),delta_all_df_monthly.filter(regex= 'far')], axis=1)[-3:]
#==========================================================#
if delta_csv == 'yes':
out_file_name = Estuary + '_' + Clim_var_type + '_' + Stats + '_NARCliM_ensemble_changes' + Perc_Vs_Abs + '_' + Location + '_' + Coordinates + '.csv'
out_path = output_directory + '/' + out_file_name
delta_all_df.to_csv(out_path)
if delta_csv == 'yes' and DoMonthly ==True:
out_file_name = Estuary + '_' + Clim_var_type + '_' + Stats + '_NARCliM_ensemble_changes_monthly' + Perc_Vs_Abs + '_' + Location + '_' + Coordinates + '.csv'
out_path = output_directory + '/NARCLIM_Changes_Hunter/' + out_file_name
delta_all_df_monthly.to_csv(out_path)
#==========================================================#
#==========================================================#
#create a dataframe that has a single column for present day, near and far future for the (3 selected models)
Full_current_df = Fdf_1900_2080.iloc[:,range(0,3)]
Full_current_df = Full_current_df.stack()
#nearfuture
Full_nearfuture_df = Fdf_1900_2080.iloc[:,range(3,15)]
Full_nearfuture_df = Fdf_1900_2080.iloc[:,range(3,6)]
Full_nearfuture_df = Full_nearfuture_df.stack()
#farfuture
Full_farfuture_df = Fdf_1900_2080.iloc[:,range(15,27)]
Full_farfuture_df = Fdf_1900_2080.iloc[:,range(6,len(Fdf_1900_2080.columns))]
Full_farfuture_df = Full_farfuture_df.stack()
Summarized_df = pd.concat([Full_current_df, Full_nearfuture_df], axis=1, ignore_index=True)
Summarized_df = pd.concat([Summarized_df, Full_farfuture_df], axis=1, ignore_index=True)
Summarized_df.columns = ['presentday', 'nearfuture', 'farfuture']
#end of developement
Summarized_df.columns = ['present', 'near', 'far']
#==========================================================#
#==========================================================#
#generate colour schemes for plotting
#==========================================================#
plotcolours36 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen', 'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal',
'darkolivegreen','turquoise', 'lightgreen', 'darkgreen', 'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal',
'darkolivegreen','turquoise', 'lightgreen', 'darkgreen', 'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal']
plotcolours36b = ['tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' ,
'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' ,
'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' ]
plotcolours12 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen', 'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal']
plotcolours15 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen', 'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal', 'lightgreen','lightpink','slateblue']
color_dict = {'CCCMA3.1_R1': 'darkolivegreen', 'CCCMA3.1_R2': 'turquoise', 'CCCMA3.1_R3': 'lightgreen', 'CSIRO-MK3.0_R2': 'darkgreen',
'CSIRO-MK3.0_R1':'lightpink', 'CSIRO-MK3.0_R3':'slateblue','ECHAM5_R1':'slategray', 'ECHAM5_R2': 'orange', 'ECHAM5_R3': 'tomato',
'MIROC3.2_R1': 'peru', 'MIROC3.2_R2': 'navy' ,'MIROC3.2_R3': 'teal', '10th':'lightgreen','median':'lightpink','90th':'slateblue'}
#==========================================================#
#==========================================================#
#output crucial summary plots into individual png files
#==========================================================#
if plot_pngs == 'yes':
#=========#
#Barplot of near and far future deltas
#=========#
#out name
png_out_file_name = Clim_var_type + '_' + Stats + '_Deltas_Barplot_' + Version + '.png'
png_out_path = png_output_directory + '/' + png_out_file_name
#prepare data frame for plot
neardeltadf=delta_all_df['near']
ymin = min(neardeltadf) + 0.1 *min(neardeltadf)
ymax = max(neardeltadf) + 0.1 * max(neardeltadf)
neardeltadf=delta_all_df['far']
ymin2 = min(neardeltadf) + 0.1 *min(neardeltadf)
ymax2 = max(neardeltadf) + 0.1 * max(neardeltadf)
ymin = min(ymin, ymin2)
if (Clim_var_type == 'tasmax' or Clim_var_type == 'tasmean'):
ymin = 0
ymax = max(ymax, ymax2)
#
fig = plt.figure(figsize=(5,6))
ax=plt.subplot(2,1,1)
plt.title(Clim_var_type + ' - ' + Stats + ' - deltas - near')
neardeltadf=delta_all_df['near']
neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)
plt.xticks([])
ax=plt.subplot(2,1,2)
plt.title(Clim_var_type + ' - ' + Stats + ' - deltas - far')
neardeltadf=delta_all_df['far']
neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)
ax.xaxis.grid(False)
#ax.patch.set_alpha(ALPHA_figs)
fig.patch.set_alpha(ALPHA_figs)
fig.tight_layout()
fig.savefig(png_out_path)
plt.close()
#=========#
#=========#
#full period density comparison
#=========#
#out name
png_out_file_name = Clim_var_type + '_' + Stats + '_MaxDeltaMod_Histogram_' + Version + '.png'
png_out_path = png_output_directory + '/' + png_out_file_name
plt.title(Clim_var_type + ' - ' + Stats + ' - hist - full period - max delta model')
#prepare data
xmin = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) - 4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
xmax = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) + 4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
fig = plt.figure(figsize=(5,5))
ax=plt.subplot(2,1,1)
Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]).plot.kde(xlim=(xmin,xmax), ax=ax)
plt.legend(loc=9, bbox_to_anchor=(0.5, -0.3))
#ax.xaxis.grid(False)
ax.yaxis.grid(False)
fig.patch.set_alpha(ALPHA_figs)
fig.tight_layout()
fig.savefig(png_out_path)
plt.close()
#=========#
#=========#
# time series plot annual ALL models
#=========#
png_out_file_name = Clim_var_type + '_' + Stats + '_TimeSeries_AllMods_' + Version + '3.png'
png_out_path = png_output_directory + '/' + png_out_file_name
plt.title(Clim_var_type + ' - ' + Stats + ' - Time series - representative models')
#prepare data
# Mod_order = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,19,20,21,16,17,18,22,23,24,31,32,33,25,26,27,28,29,30,34,35,36]
# test = Fdf_1900_2080_annual
# Mod_Names = test.columns
# New_Mod_Name = []
# for i in range(0,len(Mod_Names)):
# if(Stats[:4] =='days'):
# New_Mod_Name.append(str(Mod_order[i]+10) + '_' + Mod_Names[i][0])
# else:
# New_Mod_Name.append(str(Mod_order[i]+10) + '_' + Mod_Names[i])
# test.columns = New_Mod_Name
# test_sorted = test.reindex_axis(sorted(test.columns), axis=1)
# colnamest = test.columns
# test_sorted.columns = [w[3:-5] for w in colnamest]
#plot
fig = plt.figure(figsize=(8,7))
ax=plt.subplot(2,1,1)
#test_sorted.plot(ax=ax) #,color = plotcolours36)
Fdf_1900_2080_annual_b = Fdf_1900_2080_annual
New_Mod_Name = []
if(Stats[:4] =='days'):
for each in Fdf_1900_2080_annual_b.columns:
New_Mod_Name.append(each[0][:-5])
else:
for each in Fdf_1900_2080_annual_b.columns:
New_Mod_Name.append(each[:-5])
Fdf_1900_2080_annual_b.columns = New_Mod_Name
Fdf_1900_2080_annual_b.plot(ax=ax, color=[color_dict[x] for x in Fdf_1900_2080_annual_b.columns])
plt.legend(loc=9, bbox_to_anchor=(0.5, -0.2))
ax.xaxis.grid(False)
fig.patch.set_alpha(ALPHA_figs)
fig.tight_layout()
fig.savefig(png_out_path)
plt.close()
#=========#
#==========================================================#
#output some summary plot into pdf
#==========================================================#
if plot_pdf == 'yes':
#plt.cm.Paired(np.arange(len(Fdf_1900_2080_means)))
#write the key plots to a single pdf document
pdf_out_file_name = Clim_var_type + '_' + Stats + '_NARCliM_summary_' + Version + '.pdf'
pdf_out_path = output_directory +'/' + pdf_out_file_name
#open pdf and add the plots
with PdfPages(pdf_out_path) as pdf:
#barplot of model means
plt.title(Clim_var_type + ' - model means - full period')
ymin = min(Fdf_1900_2080_means)
ymax = max(Fdf_1900_2080_means)
Fdf_1900_2080_means.plot(kind='bar', ylim=(ymin,ymax))
Fdf_1900_2080_means.plot(kind='bar')
ymax = max(Fdf_1900_2080_means) + 0.008 *min(Fdf_1900_2080_means)
Fdf_1900_2080_means.plot(kind='bar', ylim=(ymin,ymax), color=plotcolours36)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
#
neardeltadf=delta_all_df['near']
ymin = min(neardeltadf) + 0.1 *min(neardeltadf)
ymax = max(neardeltadf) + 0.1 * max(neardeltadf)
neardeltadf=delta_all_df['far']
ymin2 = min(neardeltadf) + 0.1 *min(neardeltadf)
ymax2 = max(neardeltadf) + 0.1 * max(neardeltadf)
ymin = min(ymin, ymin2)
if (Clim_var_type == 'tasmax' or Clim_var_type == 'tasmean'):
ymin = 0
ymax = max(ymax, ymax2)
#
# delta barplot for report 1#################################
ax=plt.subplot(2,1,1)
plt.title(Clim_var_type + ' - model deltas - near-present')
neardeltadf=delta_all_df['near']
neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)
ax.patch.set_alpha(ALPHA_figs)
plt.xticks([])
#ax.xaxis.set_ticklabels([])
#pdf.savefig(bbox_inches='tight', ylim=(ymin,ymax), pad_inches=0.4)
#plt.close()
#
ax=plt.subplot(2,1,2)
plt.title(Clim_var_type + ' - model deltas - far-present')
neardeltadf=delta_all_df['far']
fig = neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)
ax.xaxis.grid(False)
ax.patch.set_alpha(ALPHA_figs)
#fig.patch.set_alpha(ALPHA_figs)
#plt.show()
pdf.savefig(bbox_inches='tight', ylim=(ymin,ymax), pad_inches=0.4)
plt.close()
# end delta barplot for report 1#################################
#
#full period density comparison
plt.title(Clim_var_type + ' - density comparison - full period')
plt.title(Clim_var_type + ' - density comparison - full period - all models')
Summarized_df.plot.kde()
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
#annual box
plt.title(Clim_var_type + ' - Annual means')
Fdf_1900_2080_annual.boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
#monthly box
plt.title(Clim_var_type + ' - Monthly means')
Fdf_1900_2080_monthly.boxplot(rot=90)
#full period density comparison
plt.title(Clim_var_type + ' - density comparison - full period - max delta model')
xmin = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) - 4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
xmax = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) + 4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]).plot.kde(xlim=(xmin,xmax))
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
fig.patch.set_alpha(ALPHA_figs)
plt.close()
#weekly box
plt.title(Clim_var_type + ' - Weekly means')
Fdf_1900_2080_weekly.boxplot(rot=90)
#annual box
plt.title(Clim_var_type + ' - Annual means/sums for max diff model')
Fdf_1900_2080_annual.boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
#
#daily box
plt.title(Clim_var_type + ' - Daily means')
plt.title(Clim_var_type + ' - Daily means/sums')
Fdf_1900_2080.boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
# time series plot annual ALL models
plt.title(Clim_var_type + ' - Time series - all models')
Fdf_1900_2080_annual.plot(legend=False)
Mod_order = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,19,20,21,16,17,18,22,23,24,31,32,33,25,26,27,28,29,30,34,35,36]
test = Fdf_1900_2080_annual
Mod_Names = test.columns
New_Mod_Name = []
for i in range(0,len(Mod_Names)):
New_Mod_Name.append(str(Mod_order[i]+10) + '_' + Mod_Names[i])
test.columns = New_Mod_Name
test_sorted = test.reindex_axis(sorted(test.columns), axis=1)
colnamest = test.columns
test_sorted.columns = [w[3:-5] for w in colnamest]
fig = test_sorted.plot(legend=False, color = plotcolours36)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
fig.patch.set_alpha(ALPHA_figs)
plt.close()
#
# time series plot annual ALL models
plt.title(Clim_var_type + ' - Time series - representative models')
Fdf_annual_sorted_subset.plot(legend=False)
dfall_annual.plot(legend=False)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
# seasonal mean boxplots
ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean())
ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean())
plt.title(Clim_var_type + ' - DJF Summer means')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean().plot(kind='bar', ylim=(ymin,ymax))
plt.title(Clim_var_type + ' - DJF Summer means/sums')
pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()).plot(kind='bar', ylim=(ymin,ymax))
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
plt.title(Clim_var_type + ' - DJF Summer means')
plt.title(Clim_var_type + ' - DJF Summer means/sums')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean())
ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean())
plt.title(Clim_var_type + ' - MAM Autumn means')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean().plot(kind='bar', ylim=(ymin,ymax))
plt.title(Clim_var_type + ' - MAM Autumn means/sums')
pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()).plot(kind='bar', ylim=(ymin,ymax))
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
plt.title(Clim_var_type + ' - MAM Autumn means')
plt.title(Clim_var_type + ' - MAM Autumn means/sums')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean())
ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean())
plt.title(Clim_var_type + ' - JJA Winter means')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean().plot(kind='bar', ylim=(ymin,ymax))
plt.title(Clim_var_type + ' - JJA Winter means/sums')
pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()).plot(kind='bar', ylim=(ymin,ymax))
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
plt.title(Clim_var_type + ' - JJA Winter means')
plt.title(Clim_var_type + ' - JJA Winter means/sums')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean())
ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean())
plt.title(Clim_var_type + ' - SON Spring means')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean().plot(kind='bar', ylim=(ymin,ymax))
plt.title(Clim_var_type + ' - SON Spring means/sums')
pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()).plot(kind='bar', ylim=(ymin,ymax))
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
plt.title(Clim_var_type + ' - SON Spring means')
plt.title(Clim_var_type + ' - SON Spring means/sums')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
#plots not used
#Fdf_annual_sorted_subset.plot(legend=False, subplots=True)

@ -0,0 +1,520 @@
# -*- coding: utf-8 -*-
#==========================================================#
#Last Updated - June 2018
#@author: z5025317 Valentin Heimhuber
#code for creating climate prioritization plots for NARCLIM variables.
#Inputs: Uses CSV files that contain all 12 NARCLIM model runs time series for 1 grid cell created with: P1_NARCliM_NC_to_CSV_CCRC_SS.py
#==========================================================#
#Load packages
#==========================================================#
import numpy as np
import os
import pandas as pd
import glob
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
from matplotlib.backends.backend_pdf import PdfPages
from ggplot import *
matplotlib.style.use('ggplot')
# import own modules
# Set working direcotry (where postprocessed NARClIM data is located)
os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Analysis/Code')
import climdata_fcts as fct
import silo as sil
ALPHA_figs = 0
font = {'family' : 'sans-serif',
'weight' : 'normal',
'size' : 14}
matplotlib.rc('font', **font)
#==========================================================#
# Set working direcotry (where postprocessed NARClIM data is located)
os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/')
#==========================================================#
#==========================================================#
#set input parameters
Case_Study_Name = 'CASESTUDY2'
Version = 'V5' #V4 is latest for all model runs #V5 is just for the Hunter catchment climatology
Estuaries = ['HUNTER', 'RICHMOND', 'NADGEE', 'SHOALHAVEN', 'GEORGES','CATHIE']
Estuaries = ['HUNTER'] #,'HUNTER','RICHMOND']
#Estuaries = ['SHOALHAVEN']
for Est in Estuaries:
#Estuary = 'HUNTER' # 'Belongil'
Estuary = Est # 'Belongil'
print Estuary
#Clim_var_type = 'potevpmean' # '*' will create pdf for all variables in folder "pracc*|tasmax*"
Clim_var_types = ['pracc']
for climvar in Clim_var_types:
Clim_var_type = climvar
#==========================================================#
#set input preferences
#==========================================================#
plot_pdf = 'no'
plot_pngs = 'no'
delta_csv = 'yes'
Stats = 'mean' # 'maxdaily', 'mean', 'days_h_25'
DoMonthly = True
Perc_Vs_Abs = 'percent' #'percent' vs. 'absolute' deltas for near and far future
Location = 'Catchment'
#==========================================================#
#==========================================================#
#set directory path for output files
output_directory = 'Output/' + Case_Study_Name + '_' + Version + '/' + Estuary
#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
if not os.path.exists(output_directory):
os.makedirs(output_directory)
print('-------------------------------------------')
print("output directory folder didn't exist and was generated")
print('-------------------------------------------')
#set directory path for individual png files
png_output_directory = 'Output/' + Case_Study_Name + '_' + Version + '/' + Estuary + '/NARCLIM_Key_Figs'
#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
if not os.path.exists(png_output_directory):
os.makedirs(png_output_directory)
print('-------------------------------------------')
print("output directory folder didn't exist and was generated")
print('-------------------------------------------')
#==========================================================#
#==========================================================#
Estuary_Folder = glob.glob('./Data/NARCLIM_Site_CSVs/'+ Case_Study_Name + '/' + Estuary + '*' )
if Location == 'Catchment':
Estuary_Folder = glob.glob('./Data/NARCLIM_Site_CSVs/'+ Case_Study_Name + '/' + Estuary + '*catch' )
Clim_Var_CSVs = glob.glob(Estuary_Folder[0] + '/' + Clim_var_type + '*')
Coordinates_foldername = os.path.basename(os.path.normpath(Estuary_Folder[0]))
Coordinates = Coordinates_foldername.split('_', 3)[1] + '_' +Coordinates_foldername.split('_', 3)[2]
#==========================================================#
#==========================================================#
#read CSV files and start analysis
#==========================================================#
#for clim_var_csv_path in Clim_Var_CSVs:
clim_var_csv_path = Clim_Var_CSVs[0]
Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
Clim_var_type = Filename.split('_', 1)[0]
print(clim_var_csv_path)
Full_df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
#pandas datestamp index to period (we don't need the 12 pm info in the index (daily periods are enough))
Full_df.index = Full_df.index.to_period('D')
#Full_df = Full_df.drop(columns=['period']) #not part of all input csv files
Ncols_df = len(Full_df)
#check data types of columns
#Full_df.dtypes
#==========================================================#
#==========================================================#
#substract a constant from all values to convert from kelvin to celcius (temp)
if Clim_var_type in ['tasmean','tasmax','sstmean']:
Full_df = Full_df.iloc[:,0:(Ncols_df-1)]-273.15
if Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean':
Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60*24
if Clim_var_type in ['pr30maxtstep','pr10maxtstep','pr20maxtstep']:
Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*30 #int(Clim_var_type[2:4])
if Clim_var_type in ['pr1Hmaxtstep']:
Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60
if Clim_var_type in ['rsdsmean','rldsmean']:
Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60*24/1000000 #convert to unit used in SILO
Fdf_1900_2080 = Full_df
#==========================================================#
#==========================================================#
#Aggregate daily df to annual time series
if Clim_var_type in ['pracc' ,'evspsblmean' ,'potevpmean' ,'pr1Hmaxtstep' ,
'wss1Hmaxtstep', 'rsdsmean', 'rldsmean']:
if(Stats == 'maxdaily'):
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').max()
Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').max() #seasonal means
Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
Fdf_Monthly_means = Fdf_1900_2080.resample('M').max() #seasonal means
Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
elif(Stats[:4] =='days'):
Threshold = int(Stats[-2:])
#agg = ('abobe_27_count', lambda x: x.gt(27).sum()), ('average', 'mean')
agg = ('>'+ str(Threshold) + '_count', lambda x: x.gt(Threshold).sum()),
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').agg(agg)
#Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').agg(agg) #seasonal means
#Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
Fdf_Monthly_means = Fdf_1900_2080.resample('M').agg(agg)
else:
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').sum()
Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').sum() #seasonal means
Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
Fdf_Monthly_means = Fdf_1900_2080.resample('M').sum() #Monthlyonal means
Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
else:
if(Stats == 'maxdaily'):
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').max()
Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').max() #seasonal means
Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
Fdf_Monthly_means = Fdf_1900_2080.resample('M').max() #Monthlyonal means
Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
elif(Stats == 'maxmonthly'):
Fdf_1900_2080_annual = Fdf_1900_2080.resample('M').max().resample('A').mean()
Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
Fdf_Seas_means = Fdf_1900_2080.resample('M').max().resample('Q-NOV').mean() #seasonal means
Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
Fdf_Monthly_means = Fdf_1900_2080.resample('M').max().resample('Q-NOV').mean() #Monthlyonal means
Fdf_Monthly_means = Fdf_Monthly_means.replace(0, np.nan)
elif(Stats[:4] =='days'):
Threshold = int(Stats[-2:])
#agg = ('abobe_27_count', lambda x: x.gt(27).sum()), ('average', 'mean')
agg = ('>'+ str(Threshold) + '_count', lambda x: x.gt(Threshold).sum()),
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').agg(agg)
#Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').agg(agg) #seasonal means
#Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
Fdf_Monthly_means = Fdf_1900_2080.resample('M').agg(agg)
else:
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').mean()
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').mean() #seasonal means
Fdf_Monthly_means = Fdf_1900_2080.resample('M').mean() #seasonal means
Fdf_1900_2080_means = Fdf_1900_2080.mean()
#==========================================================#
#==========================================================#
# construction of quantile scaling method
#==========================================================#
quantiles = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.91,0.92,0.93,0.94,0.95,0.96,0.97,0.98,0.99,1]
Quantile_change_df = quant_quant_scaling(Full_df, quantiles, False)
png_out_file_name = Clim_var_type + '_' + Stats + '_Deltas_Barplot_' + Version + '.png'
png_out_path = png_output_directory + '/' + png_out_file_name
#==========================================================#
#Select the 3 most representative models (min med and max difference betwen far future and present)
dfall, dfmin, dfmax, dfmed, Min_dif_mod_name, Med_dif_mod_name, Max_dif_mod_name = fct.select_min_med_max_dif_model(Fdf_1900_2080)
#==========================================================#
#==========================================================#
#create a dataframe that has 1 column for each of the three representative models
dfa = Fdf_1900_2080_annual.iloc[:,[0]]
dfa1 = Fdf_1900_2080_annual.iloc[:,[0,3,6]].loc[(Fdf_1900_2080_annual.index >= '1990') & (Fdf_1900_2080_annual.index <= '2009')]
dfa1.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
dfa2 = Fdf_1900_2080_annual.iloc[:,[1,4,7]].loc[(Fdf_1900_2080_annual.index >= '2020') & (Fdf_1900_2080_annual.index <= '2039')]
dfa2.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
dfa3 = Fdf_1900_2080_annual.iloc[:,[2,5,8]].loc[(Fdf_1900_2080_annual.index >= '2060') & (Fdf_1900_2080_annual.index <= '2079')]
dfa3.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
dfall_annual = dfa1.append(dfa2).append(dfa3)
#==========================================================#
#==========================================================#
#Create Deltas of average change for annual and seasonal basis
#==========================================================#
delta_all_df = fct.calculate_deltas_NF_FF2(Fdf_1900_2080_annual, Fdf_Seas_means, Stats, Perc_Vs_Abs)
if DoMonthly ==True:
delta_all_df_monthly = fct.calculate_deltas_monthly(Fdf_Monthly_means, Stats, Perc_Vs_Abs)
delta_all_df_monthly = pd.concat([delta_all_df_monthly.filter(regex= 'near'),delta_all_df_monthly.filter(regex= 'far')], axis=1)[-3:]
#==========================================================#
if delta_csv == 'yes':
out_file_name = Estuary + '_' + Clim_var_type + '_' + Stats + '_NARCliM_ensemble_changes_' + Perc_Vs_Abs + '_' + Location + '_' + Coordinates + '.csv'
delta_all_df.to_csv(output_directory + '/' + out_file_name)
#quantile scaling csv
out_file_name = Estuary + '_' + Clim_var_type + '_' + Stats + '_NARCliM_quant_quant_' + Coordinates + '.csv'
Quantile_change_df.to_csv(output_directory + '/NARCLIM_Changes_Hunter/' + out_file_name)
if delta_csv == 'yes' and DoMonthly ==True:
out_file_name = Estuary + '_' + Clim_var_type + '_' + Stats + '_NARCliM_ensemble_changes_monthly_' + Perc_Vs_Abs + '_' + Location + '_' + Coordinates + '.csv'
delta_all_df_monthly.to_csv(output_directory + '/NARCLIM_Changes_Hunter/' + out_file_name)
#==========================================================#
#==========================================================#
pd.qcut(range(5), 4)
#==========================================================#
#==========================================================#
#create a dataframe that has a single column for present day, near and far future for the (3 selected models)
Full_current_df = Fdf_1900_2080.iloc[:,range(0,3)]
Full_current_df = Full_current_df.stack()
#nearfuture
Full_nearfuture_df = Fdf_1900_2080.iloc[:,range(3,6)]
Full_nearfuture_df = Full_nearfuture_df.stack()
#farfuture
Full_farfuture_df = Fdf_1900_2080.iloc[:,range(6,len(Fdf_1900_2080.columns))]
Full_farfuture_df = Full_farfuture_df.stack()
Summarized_df = pd.concat([Full_current_df, Full_nearfuture_df], axis=1, ignore_index=True)
Summarized_df = pd.concat([Summarized_df, Full_farfuture_df], axis=1, ignore_index=True)
Summarized_df.columns = ['present', 'near', 'far']
#==========================================================#
#==========================================================#
#generate colour schemes for plotting
#==========================================================#
plotcolours36 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen', 'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal',
'darkolivegreen','turquoise', 'lightgreen', 'darkgreen', 'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal',
'darkolivegreen','turquoise', 'lightgreen', 'darkgreen', 'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal']
plotcolours36b = ['tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' ,
'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' ,
'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' ]
plotcolours12 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen', 'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal']
plotcolours15 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen', 'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal', 'lightgreen','lightpink','slateblue']
color_dict = {'CCCMA3.1_R1': 'darkolivegreen', 'CCCMA3.1_R2': 'turquoise', 'CCCMA3.1_R3': 'lightgreen', 'CSIRO-MK3.0_R2': 'darkgreen',
'CSIRO-MK3.0_R1':'lightpink', 'CSIRO-MK3.0_R3':'slateblue','ECHAM5_R1':'slategray', 'ECHAM5_R2': 'orange', 'ECHAM5_R3': 'tomato',
'MIROC3.2_R1': 'peru', 'MIROC3.2_R2': 'navy' ,'MIROC3.2_R3': 'teal', '10th':'lightgreen','median':'lightpink','90th':'slateblue'}
#==========================================================#
#==========================================================#
#output crucial summary plots into individual png files
#==========================================================#
if plot_pngs == 'yes':
#=========#
#Barplot of near and far future deltas
#=========#
#out name
png_out_file_name = Clim_var_type + '_' + Stats + '_Deltas_Barplot_' + Version + '.png'
png_out_path = png_output_directory + '/' + png_out_file_name
#prepare data frame for plot
neardeltadf=delta_all_df['near']
ymin = min(neardeltadf) + 0.1 *min(neardeltadf)
ymax = max(neardeltadf) + 0.1 * max(neardeltadf)
neardeltadf=delta_all_df['far']
ymin2 = min(neardeltadf) + 0.1 *min(neardeltadf)
ymax2 = max(neardeltadf) + 0.1 * max(neardeltadf)
ymin = min(ymin, ymin2)
if (Clim_var_type == 'tasmax' or Clim_var_type == 'tasmean'):
ymin = 0
ymax = max(ymax, ymax2)
#
fig = plt.figure(figsize=(5,6))
ax=plt.subplot(2,1,1)
plt.title(Clim_var_type + ' - ' + Stats + ' - deltas - near')
neardeltadf=delta_all_df['near']
neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)
plt.xticks([])
ax=plt.subplot(2,1,2)
plt.title(Clim_var_type + ' - ' + Stats + ' - deltas - far')
neardeltadf=delta_all_df['far']
neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)
ax.xaxis.grid(False)
#ax.patch.set_alpha(ALPHA_figs)
fig.patch.set_alpha(ALPHA_figs)
fig.tight_layout()
fig.savefig(png_out_path)
plt.close()
#=========#
#=========#
#full period density comparison
#=========#
#out name
png_out_file_name = Clim_var_type + '_' + Stats + '_MaxDeltaMod_Histogram_' + Version + '.png'
png_out_path = png_output_directory + '/' + png_out_file_name
plt.title(Clim_var_type + ' - ' + Stats + ' - hist - full period - max delta model')
#prepare data
xmin = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) - 4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
xmax = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) + 4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
fig = plt.figure(figsize=(5,5))
ax=plt.subplot(2,1,1)
Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]).plot.kde(xlim=(xmin,xmax), ax=ax)
plt.legend(loc=9, bbox_to_anchor=(0.5, -0.3))
#ax.xaxis.grid(False)
ax.yaxis.grid(False)
fig.patch.set_alpha(ALPHA_figs)
fig.tight_layout()
fig.savefig(png_out_path)
plt.close()
#=========#
#=========#
# time series plot annual ALL models
#=========#
png_out_file_name = Clim_var_type + '_' + Stats + '_TimeSeries_AllMods_' + Version + '3.png'
png_out_path = png_output_directory + '/' + png_out_file_name
plt.title(Clim_var_type + ' - ' + Stats + ' - Time series - representative models')
#prepare data
# Mod_order = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,19,20,21,16,17,18,22,23,24,31,32,33,25,26,27,28,29,30,34,35,36]
# test = Fdf_1900_2080_annual
# Mod_Names = test.columns
# New_Mod_Name = []
# for i in range(0,len(Mod_Names)):
# if(Stats[:4] =='days'):
# New_Mod_Name.append(str(Mod_order[i]+10) + '_' + Mod_Names[i][0])
# else:
# New_Mod_Name.append(str(Mod_order[i]+10) + '_' + Mod_Names[i])
# test.columns = New_Mod_Name
# test_sorted = test.reindex_axis(sorted(test.columns), axis=1)
# colnamest = test.columns
# test_sorted.columns = [w[3:-5] for w in colnamest]
#plot
fig = plt.figure(figsize=(8,7))
ax=plt.subplot(2,1,1)
#test_sorted.plot(ax=ax) #,color = plotcolours36)
Fdf_1900_2080_annual_b = Fdf_1900_2080_annual
New_Mod_Name = []
if(Stats[:4] =='days'):
for each in Fdf_1900_2080_annual_b.columns:
New_Mod_Name.append(each[0][:-5])
else:
for each in Fdf_1900_2080_annual_b.columns:
New_Mod_Name.append(each[:-5])
Fdf_1900_2080_annual_b.columns = New_Mod_Name
Fdf_1900_2080_annual_b.plot(ax=ax, color=[color_dict[x] for x in Fdf_1900_2080_annual_b.columns])
plt.legend(loc=9, bbox_to_anchor=(0.5, -0.2))
ax.xaxis.grid(False)
fig.patch.set_alpha(ALPHA_figs)
fig.tight_layout()
fig.savefig(png_out_path)
plt.close()
#=========#
#==========================================================#
#output some summary plot into pdf
#==========================================================#
if plot_pdf == 'yes':
#plt.cm.Paired(np.arange(len(Fdf_1900_2080_means)))
#write the key plots to a single pdf document
pdf_out_file_name = Clim_var_type + '_' + Stats + '_NARCliM_summary_' + Version + '.pdf'
pdf_out_path = output_directory +'/' + pdf_out_file_name
#open pdf and add the plots
with PdfPages(pdf_out_path) as pdf:
#barplot of model means
plt.title(Clim_var_type + ' - model means - full period')
ymin = min(Fdf_1900_2080_means)
ymax = max(Fdf_1900_2080_means) + 0.008 *min(Fdf_1900_2080_means)
Fdf_1900_2080_means.plot(kind='bar', ylim=(ymin,ymax), color=plotcolours36)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
#
neardeltadf=delta_all_df['near']
ymin = min(neardeltadf) + 0.1 *min(neardeltadf)
ymax = max(neardeltadf) + 0.1 * max(neardeltadf)
neardeltadf=delta_all_df['far']
ymin2 = min(neardeltadf) + 0.1 *min(neardeltadf)
ymax2 = max(neardeltadf) + 0.1 * max(neardeltadf)
ymin = min(ymin, ymin2)
if (Clim_var_type == 'tasmax' or Clim_var_type == 'tasmean'):
ymin = 0
ymax = max(ymax, ymax2)
#
# delta barplot for report 1#################################
ax=plt.subplot(2,1,1)
plt.title(Clim_var_type + ' - model deltas - near-present')
neardeltadf=delta_all_df['near']
neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)
ax.patch.set_alpha(ALPHA_figs)
plt.xticks([])
#ax.xaxis.set_ticklabels([])
#pdf.savefig(bbox_inches='tight', ylim=(ymin,ymax), pad_inches=0.4)
#plt.close()
#
ax=plt.subplot(2,1,2)
plt.title(Clim_var_type + ' - model deltas - far-present')
neardeltadf=delta_all_df['far']
fig = neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)
ax.xaxis.grid(False)
ax.patch.set_alpha(ALPHA_figs)
#fig.patch.set_alpha(ALPHA_figs)
#plt.show()
pdf.savefig(bbox_inches='tight', ylim=(ymin,ymax), pad_inches=0.4)
plt.close()
# end delta barplot for report 1#################################
#
#full period density comparison
plt.title(Clim_var_type + ' - density comparison - full period - all models')
Summarized_df.plot.kde()
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
#full period density comparison
plt.title(Clim_var_type + ' - density comparison - full period - max delta model')
xmin = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) - 4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
xmax = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) + 4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]).plot.kde(xlim=(xmin,xmax))
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
fig.patch.set_alpha(ALPHA_figs)
plt.close()
#annual box
plt.title(Clim_var_type + ' - Annual means/sums for max diff model')
Fdf_1900_2080_annual.boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
#
#daily box
plt.title(Clim_var_type + ' - Daily means/sums')
Fdf_1900_2080.boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
# time series plot annual ALL models
plt.title(Clim_var_type + ' - Time series - all models')
Mod_order = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,19,20,21,16,17,18,22,23,24,31,32,33,25,26,27,28,29,30,34,35,36]
test = Fdf_1900_2080_annual
Mod_Names = test.columns
New_Mod_Name = []
for i in range(0,len(Mod_Names)):
New_Mod_Name.append(str(Mod_order[i]+10) + '_' + Mod_Names[i])
test.columns = New_Mod_Name
test_sorted = test.reindex_axis(sorted(test.columns), axis=1)
colnamest = test.columns
test_sorted.columns = [w[3:-5] for w in colnamest]
fig = test_sorted.plot(legend=False, color = plotcolours36)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
fig.patch.set_alpha(ALPHA_figs)
plt.close()
# time series plot annual ALL models
plt.title(Clim_var_type + ' - Time series - representative models')
dfall_annual.plot(legend=False)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
# seasonal mean boxplots
ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean())
ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean())
plt.title(Clim_var_type + ' - DJF Summer means/sums')
pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()).plot(kind='bar', ylim=(ymin,ymax))
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
plt.title(Clim_var_type + ' - DJF Summer means/sums')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean())
ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean())
plt.title(Clim_var_type + ' - MAM Autumn means/sums')
pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()).plot(kind='bar', ylim=(ymin,ymax))
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
plt.title(Clim_var_type + ' - MAM Autumn means/sums')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean())
ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean())
plt.title(Clim_var_type + ' - JJA Winter means/sums')
pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()).plot(kind='bar', ylim=(ymin,ymax))
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
plt.title(Clim_var_type + ' - JJA Winter means/sums')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean())
ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean())
plt.title(Clim_var_type + ' - SON Spring means/sums')
pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()).plot(kind='bar', ylim=(ymin,ymax))
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
plt.title(Clim_var_type + ' - SON Spring means/sums')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()

@ -0,0 +1,121 @@
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 20 18:03:25 2018
@author: z5025317
"""
#==========================================================#
#Load packages
#==========================================================#
import numpy as np
import os
import pandas as pd
import glob
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
from matplotlib.backends.backend_pdf import PdfPages
from ggplot import *
matplotlib.style.use('ggplot')
import csv
# import own modules
# Set working direcotry (where postprocessed NARClIM data is located)
os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Analysis/Code')
#import climdata_fcts as fct
import silo as sil
#==========================================================#
#==========================================================#
# Set working direcotry (where postprocessed NARClIM data is located)
os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/')
#==========================================================#
#==========================================================#
#set input parameters
Case_Study_Name = 'CASESTUDY2'
Casestudy2_csv_path = "Data/NARCLIM_Site_CSVs/CASESTUDY2/CASESTDUY2_NARCLIM_Point_Sites.csv"
Silo_variables = ['daily_rain', "max_temp", "min_temp", 'et_short_crop', 'evap_syn']
Location = 'Estuary' #'Catchment'
startdate= '19600101'
enddate= '20180101'
#==========================================================#
#==========================================================#
#set directory path for output files
output_directory = 'Data/SILO/' + Case_Study_Name + '/'
#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
if not os.path.exists(output_directory):
os.makedirs(output_directory)
print('-------------------------------------------')
print("output directory folder didn't exist and was generated")
print('-------------------------------------------')
#==========================================================#
#==========================================================#
#read the CSV to extract the lat long and case stuy sites
with open(Casestudy2_csv_path, mode='r') as infile:
reader = csv.reader(infile)
next(reader, None)
with open('coors_new.csv', mode='w') as outfile:
writer = csv.writer(outfile)
if Location == 'Estuary':
mydict = dict((rows[0],[rows[1],rows[2]]) for rows in reader)
if Location == 'Ocean':
mydict = dict((rows[0],[rows[3],rows[4]]) for rows in reader)
if Location == 'Catchment':
mydict = dict((rows[0],[rows[5],rows[6]]) for rows in reader)
for Estuary in mydict:
print Estuary, mydict[Estuary][0], mydict[Estuary][1]
#==========================================================#
#set directory path for output files
output_directory_internal = output_directory + Estuary + '/'
#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
if not os.path.exists(output_directory_internal):
os.makedirs(output_directory_internal)
print('-------------------------------------------')
print("output directory folder didn't exist and was generated")
print('-------------------------------------------')
#==========================================================#
output_csv = output_directory_internal + 'SILO_climdata_' + Estuary +'_'+ Location +'_' + mydict[Estuary][0] + '_' + mydict[Estuary][1] + '.csv'
silo_df = sil.pointdata(Silo_variables, 'Okl9EDxgS2uzjLWtVNIBM5YqwvVcCxOmpd3nCzJh',startdate, enddate,
None, mydict[Estuary][0], mydict[Estuary][1], True, output_csv)
#==========================================================#

@ -0,0 +1,315 @@
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 14 16:32:01 2018
@author: z5025317
"""
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
import glob
def compare_images(im1, im2):
"""plots 2 images next to each other, sharing the axis"""
plt.figure()
ax1 = plt.subplot(121)
plt.imshow(im1, cmap='gray')
ax2 = plt.subplot(122, sharex=ax1, sharey=ax1)
plt.imshow(im2, cmap='gray')
plt.show()
def reject_outliers(data, m=2):
"rejects outliers in a numpy array"
return data[abs(data - np.mean(data)) < m * np.std(data)]
def duplicates_dict(lst):
"return duplicates and indices"
# nested function
def duplicates(lst, item):
return [i for i, x in enumerate(lst) if x == item]
return dict((x, duplicates(lst, x)) for x in set(lst) if lst.count(x) > 1)
def datenum2datetime(datenum):
"convert datenum to datetime"
#takes in datenum and outputs python datetime
time = [datetime.fromordinal(int(dn)) + timedelta(days=float(dn)%1) - timedelta(days = 366) for dn in datenum]
return time
def select_min_med_max_dif_model(NARCLIM_df):
#Select the 3 most representative models (min med and max difference betwen far future and present)
Fdf_1900_2080_sorted = NARCLIM_df.reindex_axis(sorted(NARCLIM_df.columns), axis=1)
Fdf_1900_2080_sorted_means = pd.DataFrame(Fdf_1900_2080_sorted.mean())
df = Fdf_1900_2080_sorted_means
#add a simple increasing integer index
df = df.reset_index()
df= df[df.index % 3 != 1]
df['C'] = df[0].diff()
df = df.reset_index()
df= df[df.index % 2 != 0]
#get max difference model (difference between far future and prsent day)
a = df[df.index == df['C'].argmax(skipna=True)]
Max_dif_mod_name = a.iloc[0]['index']
#get min difference model
a = df[df.index == df['C'].argmin(skipna=True)]
Min_dif_mod_name = a.iloc[0]['index']
#get the model which difference is closest to the median difference
df['D'] = abs(df['C']- df['C'].median())
a = df[df.index == df['D'].argmin(skipna=True)]
Med_dif_mod_name = a.iloc[0]['index']
#data frame with min med and max difference model
df2 = NARCLIM_df.filter(regex= Min_dif_mod_name[:-5] + '|' + Med_dif_mod_name[:-5] + '|' + Max_dif_mod_name[:-5] )
dfall = df2.reindex_axis(sorted(df2.columns), axis=1)
#data frame with individual models
dfmin = NARCLIM_df.filter(regex= Min_dif_mod_name[:-5])
dfmax = NARCLIM_df.filter(regex= Max_dif_mod_name[:-5])
dfmed = NARCLIM_df.filter(regex= Max_dif_mod_name[:-5])
return dfall , dfmin, dfmed, dfmax, Min_dif_mod_name,Med_dif_mod_name, Max_dif_mod_name
def calculate_deltas_NF_FF2(Annual_df, Seasonal_df, Stats, Perc_vs_Abs):
"""calculates the "deltas" between nearfuture and present day for annual or seasonal climate data in pandas TS format"""
times = ['annual', 'DJF', 'MAM', 'JJA','SON']
delta_all_df = pd.DataFrame()
for temp in times:
if temp == 'annual':
Mean_df = Annual_df.mean()
Column_names = ['near', 'far']
if temp == 'DJF':
Mean_df = Seasonal_df[Seasonal_df.index.quarter==1].mean()
Column_names = ['DJF_near', 'DJF_far']
if temp == 'MAM':
Mean_df = Seasonal_df[Seasonal_df.index.quarter==2].mean()
Column_names = ['MAM_near', 'MAM_far']
if temp == 'JJA':
Mean_df = Seasonal_df[Seasonal_df.index.quarter==3].mean()
Column_names = ['JJA_near', 'JJA_far']
if temp == 'SON':
Mean_df = Seasonal_df[Seasonal_df.index.quarter==4].mean()
Column_names = ['SON_near', 'SON_far']
if(Stats[:4] =='days'):
models = list(Seasonal_df.mean().index.get_level_values(0))
else:
models = list(Seasonal_df.mean().index)
newmodel = []
for each in models:
newmodel.append(each[:-5])
unique_models = set(newmodel)
# calculate diff for each unique model
delta_NF_ensemble = []
delta_FF_ensemble = []
for unique_model in unique_models:
dfdiff = Mean_df.filter(regex= unique_model)
type(dfdiff)
if Perc_vs_Abs == 'absolute':
delta_NF = dfdiff[1] - dfdiff[0]
delta_NF_ensemble.append(delta_NF)
delta_FF = dfdiff[2] - dfdiff[0]
delta_FF_ensemble.append(delta_FF)
if Perc_vs_Abs == 'percent':
delta_NF = ((dfdiff[1] - dfdiff[0])/dfdiff[0])*100
delta_NF_ensemble.append(delta_NF)
delta_FF = ((dfdiff[2] - dfdiff[0])/dfdiff[0])*100
delta_FF_ensemble.append(delta_FF)
delta_df1=pd.DataFrame(delta_NF_ensemble, index=unique_models)
delta_df2=pd.DataFrame(delta_FF_ensemble, index=unique_models)
delta_df= pd.concat([delta_df1, delta_df2], axis=1)
#rename columns
delta_df.columns = Column_names
#add a row with medians and 10 and 90th percentiles
delta_df.loc['10th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 10), Column_names[1]:np.percentile(delta_df[Column_names[1]], 10)})
delta_df.loc['median'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 50), Column_names[1]:np.percentile(delta_df[Column_names[1]], 50)})
delta_df.loc['90th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 90), Column_names[1]:np.percentile(delta_df[Column_names[1]], 90)})
#append df to overall df
delta_all_df = pd.concat([delta_all_df, delta_df], axis=1)
if(Stats[:4] =='days'):
delta_all_df = delta_all_df .astype(int).fillna(0.0)
return delta_all_df
def calculate_deltas_monthly(Monthly_df, Stats, Perc_vs_Abs):
"""calculates the "deltas" between nearfuture and present day for annual or seasonal climate data in pandas TS format"""
delta_all_df = pd.DataFrame()
for i in range(1, 13, 1):
Mean_df = Monthly_df[Monthly_df.index.month==i].mean()
Column_names = [str(i)+'_near', str(i)+'_far']
if(Stats[:4] =='days'):
models = list(Monthly_df.mean().index.get_level_values(0))
else:
models = list(Monthly_df.mean().index)
newmodel = []
for each in models:
newmodel.append(each[:-5])
unique_models = set(newmodel)
# calculate diff for each unique model
delta_NF_ensemble = []
delta_FF_ensemble = []
for unique_model in unique_models:
dfdiff = Mean_df.filter(regex= unique_model)
type(dfdiff)
if Perc_vs_Abs == 'absolute':
delta_NF = dfdiff[1] - dfdiff[0]
delta_NF_ensemble.append(delta_NF)
delta_FF = dfdiff[2] - dfdiff[0]
delta_FF_ensemble.append(delta_FF)
if Perc_vs_Abs == 'percent':
delta_NF = ((dfdiff[1] - dfdiff[0])/dfdiff[0])*100
delta_NF_ensemble.append(delta_NF)
delta_FF = ((dfdiff[2] - dfdiff[0])/dfdiff[0])*100
delta_FF_ensemble.append(delta_FF)
delta_df1=pd.DataFrame(delta_NF_ensemble, index=unique_models)
delta_df2=pd.DataFrame(delta_FF_ensemble, index=unique_models)
delta_df= pd.concat([delta_df1, delta_df2], axis=1)
#rename columns
delta_df.columns = Column_names
#add a row with medians and 10 and 90th percentiles
delta_df.loc['10th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 10), Column_names[1]:np.percentile(delta_df[Column_names[1]], 10)})
delta_df.loc['median'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 50), Column_names[1]:np.percentile(delta_df[Column_names[1]], 50)})
delta_df.loc['90th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 90), Column_names[1]:np.percentile(delta_df[Column_names[1]], 90)})
#append df to overall df
delta_all_df = pd.concat([delta_all_df, delta_df], axis=1)
if(Stats[:4] =='days'):
delta_all_df = delta_all_df .astype(int).fillna(0.0)
return delta_all_df
def import_present_day_climdata_csv(Estuary, Clim_var_type):
"""
this funciton imports the present day climate data used for
characterizing the present day climate varibility
If DataSource == 'Station', individual weather station data is used.
If DataSource == 'SILO' , SILO time series is used using the estuary centerpoint as reference locatoin for
selection of the grid cell
"""
#load present day climate data for the same variable
if Clim_var_type == 'evspsblmean': #ET time series that we have is not in the same format as the other variables, hence the different treatment
Present_day_Var_CSV = glob.glob('./Data/Wheather_Station_Data/**/' + Estuary + '_' + 'ET' + '*csv')
Present_day_df = pd.read_csv(Present_day_Var_CSV[0])
Dates = pd.to_datetime(Present_day_df.Date)
Present_day_df.index = Dates
Present_day_df = Present_day_df.iloc[:,1]
Present_day_df = Present_day_df.replace(r'\s+', np.nan, regex=True)
Present_day_df = pd.to_numeric(Present_day_df)
Present_day_df.index = Dates
[minplotDelta, maxplotDelta]=[50,50]
#for tasmean, observed min and max T need to be converted into mean T
elif Clim_var_type == 'tasmean':
Present_day_Var_CSV = glob.glob('./Data/Wheather_Station_Data/**/' + Estuary + '_MaxT*csv')
Present_day_df = pd.read_csv(Present_day_Var_CSV[0])
Dates = pd.to_datetime(Present_day_df.Year*10000+Present_day_df.Month*100+Present_day_df.Day,format='%Y%m%d')
Present_day_df.index = Dates
Present_day_MaxT_df = Present_day_df.iloc[:,5]
Present_day_Var_CSV = glob.glob('./Data/Wheather_Station_Data/**/' + Estuary + '_MinT*csv')
Present_day_df = pd.read_csv(Present_day_Var_CSV[0])
Dates = pd.to_datetime(Present_day_df.Year*10000+Present_day_df.Month*100+Present_day_df.Day,format='%Y%m%d')
Present_day_df.index = Dates
Present_day_MinT_df = Present_day_df.iloc[:,5]
Present_day_df = (Present_day_MaxT_df + Present_day_MinT_df)/2
[minplotDelta, maxplotDelta]=[1,2]
elif Clim_var_type == 'tasmax':
Present_day_Var_CSV = glob.glob('./Data/Wheather_Station_Data/**/' + Estuary + '_MaxT*csv')
Present_day_df = pd.read_csv(Present_day_Var_CSV[0])
Dates = pd.to_datetime(Present_day_df.Year*10000+Present_day_df.Month*100+Present_day_df.Day,format='%Y%m%d')
Present_day_df.index = Dates
Present_day_df = Present_day_df.iloc[:,5]
[minplotDelta, maxplotDelta]=[1,2]
elif Clim_var_type == 'wssmean' or Clim_var_type == 'wss1Hmaxtstep':
Present_day_Var_CSV = glob.glob('./Data/Wheather_Station_Data/**/Terrigal_Wind.csv')
Present_day_df = pd.read_csv(Present_day_Var_CSV[0])
Present_day_df.index = Present_day_df[['Year', 'Month', 'Day', 'Hour']].apply(lambda s : datetime(*s),axis = 1)
Present_day_df = Present_day_df.filter(regex= 'm/s')
Present_day_df = Present_day_df.replace(r'\s+', np.nan, regex=True)
Present_day_df['Wind speed measured in m/s'] = Present_day_df['Wind speed measured in m/s'].convert_objects(convert_numeric=True)
[minplotDelta, maxplotDelta]=[1, 1.5]
elif Clim_var_type == 'sstmean':
Estuary_Folder = glob.glob('./Data/NARCLIM_Site_CSVs/CASESTUDY2/' + Estuary + '*' )
Present_day_Var_CSV = glob.glob(Estuary_Folder[0] + '/' + Clim_var_type + '_NNRP*')
Present_day_df = pd.read_csv(Present_day_Var_CSV[0], parse_dates=True, index_col=0)
Present_day_df = Present_day_df.filter(regex= 'NNRP_R1_1950')
Present_day_df['NNRP_R1_1950'] = Present_day_df['NNRP_R1_1950'].convert_objects(convert_numeric=True)
[minplotDelta, maxplotDelta]=[1, 1]
else:
Present_day_Var_CSV = glob.glob('./Data/Wheather_Station_Data/**/' + Estuary + '_' + 'Rainfall' + '*csv')
Present_day_df = pd.read_csv(Present_day_Var_CSV[0])
Dates = pd.to_datetime(Present_day_df.Year*10000+Present_day_df.Month*100+Present_day_df.Day,format='%Y%m%d')
Present_day_df.index = Dates
Present_day_df = Present_day_df.iloc[:,5]
[minplotDelta, maxplotDelta]=[50,100]
return Present_day_df, minplotDelta, maxplotDelta
def quant_quant_scaling(Full_df, quantiles, Plotbool):
"""
calculates the % "deltas" for each quantile in line with Climate Change in Australia recommendations provided here:
https://www.climatechangeinaustralia.gov.au/en/support-and-guidance/using-climate-projections/application-ready-data/scaling-methods/
"""
Periods = ['1990', '2020', '2060']
quantiles_srings = [str(x) for x in quantiles][:-1]
models = list(Full_df.resample('A').max().mean().index)
newmodel = []
for each in models:
newmodel.append(each[:-5])
unique_models = list(set(newmodel))
#create empty df and loop through models and periods to derive the % change factors for all quantiles and models
Quant_diff_df_outer = pd.DataFrame()
for unique_model in unique_models:
dfdiff = Full_df.filter(regex= unique_model)
Quant_diff_df = pd.DataFrame()
for period in Periods:
x=dfdiff.filter(regex= period).dropna().values
x.sort(axis=0)
df=pd.DataFrame(x)
df.columns = ['A']
cut_df = pd.DataFrame(df.groupby(pd.qcut(df.rank(method='first').A, quantiles))['A'].mean().values)
cut_df.columns = [period]
Quant_diff_df = pd.concat([Quant_diff_df, cut_df], axis=1, join='outer')
if Plotbool:
Quant_diff_df.plot(x=Quant_diff_df.index, y=Periods, kind="bar", title = unique_model)
Quant_diff_df['NF_%change_'+ unique_model] = (Quant_diff_df['2020'] - Quant_diff_df['1990'])/Quant_diff_df['1990']*100
Quant_diff_df['FF_%change_'+ unique_model] = (Quant_diff_df['2060'] - Quant_diff_df['1990'])/Quant_diff_df['1990']*100
Quant_diff_df = Quant_diff_df.replace([np.inf, -np.inf], np.nan)
Quant_diff_df = Quant_diff_df.iloc[:,[3,4]].fillna(0)
Quant_diff_df_outer = pd.concat([Quant_diff_df_outer, Quant_diff_df], axis=1, join='outer')
Quant_diff_df_outer.index = quantiles_srings
if Plotbool:
Quant_diff_df_outer.plot(x=Quant_diff_df_outer.index, y=Quant_diff_df_outer.columns, kind="bar", legend = False, title='% intensity change per quantile')
#add new cols and rows with summary statistics
Quantile_change_NF_df = Quant_diff_df_outer.filter(regex= 'NF')
Quantile_change_FF_df = Quant_diff_df_outer.filter(regex= 'FF')
Quantile_change_stats_df = pd.DataFrame()
for loop_df in [Quantile_change_NF_df, Quantile_change_FF_df]:
Sum95_df = pd.DataFrame(loop_df.iloc[-5:,:].sum()).transpose()
Sum95_df.index = ['Sum>95perc']
Sum_df = pd.DataFrame(loop_df.sum()).transpose()
Sum_df.index = ['Sum']
Med_df = pd.DataFrame(loop_df.median()).transpose()
Med_df.index = ['Median']
loop_df = loop_df.append(Sum_df)
loop_df = loop_df.append(Sum95_df)
loop_df = loop_df.append(Med_df)
loop_df['Median'] = loop_df.median(axis=1)
loop_df['Maximum'] = loop_df.max(axis=1)
loop_df['Minimum'] = loop_df.min(axis=1)
Quantile_change_stats_df = pd.concat([Quantile_change_stats_df, loop_df], axis=1, join='outer')
return Quantile_change_stats_df

Binary file not shown.

@ -0,0 +1,556 @@
#R code for creating ggplots of time series with smooth (GAM) and linear term
######################
#Import Libraries and set working directory
######################
library(zoo)
library(hydroTSM) #you need to install these packages first before you can load them here
library(lubridate)
library(mgcv)
library(ggplot2)
library(gridExtra)
library(scales)
options(scipen=999)
setwd("C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/")
######################
GRACE.monthly.av.df <- data.frame(read.csv("./Data/GRACE/Zunyi/CSV/With_border_pixels/MDB.monthly.TWSA.Mean.2002-2017 with border pixels.csv"))
GRACE.monthly.std.df <- data.frame(read.csv("./Data/GRACE/Zunyi/CSV/With_border_pixels/MDB.monthly.TWSA.Stdv.2002-2017 with border pixels.csv"))
######################
#Set inputs
######################
Case.Study <- "CASESTUDY2"
Estuary <- "HUNTER"
Riv.Gauge.loc <- "GRETA" #GRETA
Est.Gauge.loc <- "RAYMONDTERRACE" #"CORAKI" #"RAYMONDTERRACE" # "HEXHAMBRIDGE"
logtransformFlow <- TRUE
ggplotGAM.k <- 15
rivTempGAM.k <- 20
Version <- 'V3'
Fontsize <- 12
######################
######################
Output.Directory <- paste('./Output/CASESTUDY2_V4/', Estuary,'/Recent_Trends/', sep="")
if (file.exists(Output.Directory)){
print('output folder already existed and was not created again')
} else {
dir.create(file.path(Output.Directory))
print('output folder did not exist and was created')
}
Output.Directory <- paste('./Output/CASESTUDY2_V4/', Estuary,'/Recent_Trends/Riv_', Riv.Gauge.loc,'_Est_',Est.Gauge.loc,'_GAMk', ggplotGAM.k, 'd/', sep="")
if (file.exists(Output.Directory)){
print('output folder already existed and was not created again')
} else {
dir.create(file.path(Output.Directory))
print('output folder did not exist and was created')
}
######################
######################
#Set input file paths
######################
pattern = paste('SILO_climdata_', Estuary,'_Catchment*', sep="")
AirT_CSV_Path <- list.files(paste("./Data/SILO/",Case.Study, '/',sep=""), pattern, full.names=T, recursive=T)
pattern = paste(Estuary,'@', Riv.Gauge.loc, '.*.ALL.csv', sep="")
RivT_CSV_Path <- list.files("./Data/River_Gauge_Data/", pattern, full.names=T)
pattern = paste(Estuary,'@', Est.Gauge.loc, '.*.ALL.csv', sep="")
EstT_CSV_Path <- list.files("./Data/River_Gauge_Data/", pattern, full.names=T)
pattern = paste('sstmean_NNRP_', Estuary,'*', sep="")
SST_CSV_Path <- list.files(paste("./Data/NARCLIM_Site_CSVs/",Case.Study, '/', sep=""), pattern, full.names=T, recursive=T)
######################
######################
#Analyse
######################
############tasmean
#Load a daily (no gaps) time series as a time serie baseline for other time series used here
AirT.df <- data.frame(read.csv(AirT_CSV_Path))
AirT.full.TS <- zoo((AirT.df$max_temp_Celsius + AirT.df$max_temp_Celsius)/2, order.by= as.Date(AirT.df[,"date"], format = "%Y-%m-%d")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
AirT.TS <- window(AirT.full.TS, start=as.Date("1990-01-01"), end=as.Date("2018-01-01"))
AirT.full.df <- data.frame(AirT.full.TS)
AirT.df <- data.frame(AirT.TS)
colnames(AirT.df) <- 'tasmean'
colnames(AirT.full.df) <- 'tasmean'
############
AirT.df$Julday1 <- seq(1,length(AirT.df[,1]),1)
linear.trend.model_EC_all <- lm(tasmean ~ Julday1, AirT.df)
AirT.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4]
AirT.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
############
AirT.full.df$Julday1 <- seq(1,length(AirT.full.df[,1]),1)
linear.trend.model_EC_all <- lm(tasmean ~ Julday1, AirT.full.df)
AirT.full.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4]
AirT.full.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
############tasmean
############River temp
#Load a daily (no gaps) time series as a time serie baseline for other time series used here
#Here we use the raw DPI CSV format that comes with a bunch of metadata
RivT.df <- data.frame(read.csv(RivT_CSV_Path))
char.df <- data.frame(lapply(RivT.df[2,], as.character), stringsAsFactors=FALSE)
#dat <- data.frame(lapply(RivT.df[(4:nrow(RivT.df)),(2:ncol(RivT.df))], as.numeric), stringsAsFactors=FALSE)
#dat <- RivT.df.num[!is.na(as.numeric(as.character(RivT.df.num))),]
dat <- RivT.df[(4:nrow(RivT.df)),]
colnames(dat) <- lapply(RivT.df[2,], as.character)
dat$Date <- gsub(x=dat$Date,pattern="00:00:00",replacement="",fixed=T)
colnames(dat) <- gsub(x=colnames(dat),pattern="Water Temp(C)",replacement="Temp",fixed=T)
RivT.df <- dat
rm(dat,char.df)
RivT.full.TS <- zoo(as.numeric(as.character(RivT.df$Temp)), order.by= as.Date(RivT.df[,"Date"], format = "%d/%m/%Y")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
RivT.full.TS <- window(RivT.full.TS, start=as.Date("1995-07-01"), end=as.Date("2018-01-01"))
RivT.full.TS <- na.approx(RivT.full.TS)
RivT.TS <- RivT.full.TS
RivT.full.df <- data.frame(RivT.TS) ### This is only done because
RivT.df <- data.frame(RivT.TS)
colnames(RivT.df) <- 'rivTmean'
colnames(RivT.full.df) <- 'rivTmean'
############
RivT.df$Julday1 <- seq(1,length(RivT.df[,1]),1)
linear.trend.model_EC_all <- lm(rivTmean ~ Julday1, RivT.df)
RivT.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4]
RivT.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
RivT.full.df$Julday1 <- seq(1,length(RivT.full.df[,1]),1)
linear.trend.model_EC_all <- lm(rivTmean ~ Julday1, RivT.full.df)
RivT.full.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4]
RivT.full.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
############River temp
#export interpolated data as csv
#csv.name <- "C:/Users/z5025317/OneDrive - UNSW/CC_Estuaries_CASESTUDY2/Data/River_Gauge_Data/HUNTER@Greta_210064_Temp_interpolated.csv"
#write.csv(RivT.df, csv.name)
############River flow
#Load a daily (no gaps) time series as a time serie baseline for other time series used here
#Here we use the raw DPI CSV format that comes with a bunch of metadata
RivQ.df <- data.frame(read.csv(RivT_CSV_Path))
char.df <- data.frame(lapply(RivQ.df[2,], as.character), stringsAsFactors=FALSE)
dat <- RivQ.df[(4:nrow(RivQ.df)),]
colnames(dat) <- lapply(RivQ.df[2,], as.character)
dat$Date <- gsub(x=dat$Date,pattern="00:00:00",replacement="",fixed=T)
colnames(dat) <- gsub(x=colnames(dat), pattern="Discharge (ML/d)",replacement="Flow",fixed=T)
RivQ.df <- dat
rm(dat,char.df)
#RivQ.full.TS <- zoo(log10((as.numeric(as.character(RivQ.df$Flow))) +1) , order.by= as.Date(RivQ.df[,"Date"], format = "%d/%m/%Y")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
RivQ.full.TS <- zoo((as.numeric(as.character(RivQ.df$Flow)))/86.4, order.by= as.Date(RivQ.df[,"Date"], format = "%d/%m/%Y")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
RivQ.TS <- window(RivQ.full.TS, start=as.Date("1990-01-01"), end=as.Date("2018-01-01"))
#RivQ.full.TS <- window(RivQ.full.TS, start=as.Date("2013-06-01"), end=as.Date("2018-06-01"))
RivQ.full.df <- data.frame(RivQ.full.TS) ### This is only done because
RivQ.df <- data.frame(RivQ.TS)
colnames(RivQ.df) <- 'RivQmean'
colnames(RivQ.full.df) <- 'RivQmean'
############trends
RivQ.df$Julday1 <- seq(1,length(RivQ.df[,1]),1)
linear.trend.model_EC_all <- lm(RivQmean ~ Julday1, RivQ.df)
RivQ.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4]
RivQ.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
RivQ.full.df$Julday1 <- seq(1,length(RivQ.full.df[,1]),1)
linear.trend.model_EC_all <- lm(RivQmean ~ Julday1, RivQ.full.df)
RivQ.full.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4]
RivQ.full.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
############River Flow
############ SST
#Load a daily (no gaps) time series as a time serie baseline for other time series used here
SST.df <- data.frame(read.csv(SST_CSV_Path))
SST.full.TS <- zoo(SST.df$NNRP_R1_1950-273.15, order.by= as.Date(SST.df[,"X"], format = "%Y-%m-%d")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
SST.TS <- window(SST.full.TS, start=as.Date("1990-01-01"), end=as.Date("2018-01-01"))
#SST.full.TS <- window(SST.full.TS, start=as.Date("2013-06-01"), end=as.Date("2018-06-01"))
SST.full.df <- data.frame(SST.full.TS)
SST.df <- data.frame(SST.TS)
colnames(SST.df) <- 'SSTmean'
colnames(SST.full.df) <- 'SSTmean'
############
SST.full.df$Julday1 <- seq(1,length(SST.full.df[,1]),1)
linear.trend.model_EC_all <- lm(SSTmean ~ Julday1, SST.full.df)
SST.full.pvalNCV_ECall <- summary(linear.trend.model_EC_all)$coefficients[2,4]
SST.full.lintrend <- summary(linear.trend.model_EC_all)$coefficients[2,1] * 365
SST.df$Julday1 <- seq(1,length(SST.df[,1]),1)
linear.trend.model_EC_all2 <- lm(SSTmean ~ Julday1, SST.df)
SST.pvalNCV_ECall <- summary(linear.trend.model_EC_all2)$coefficients[2,4]
SST.lintrend <- summary(linear.trend.model_EC_all2)$coefficients[2,1] * 365
############ SST
############Estuary temp
#Load a daily (no gaps) time series as a time serie baseline for other time series used here
#Here we use the raw DPI CSV format that comes with a bunch of metadata
EstT.df <- data.frame(read.csv(EstT_CSV_Path))
char.df <- data.frame(lapply(EstT.df[2,], as.character), stringsAsFactors=FALSE)
#dat <- data.frame(lapply(EstT.df[(4:nrow(EstT.df)),(2:ncol(EstT.df))], as.numeric), stringsAsFactors=FALSE)
#dat <- EstT.df.num[!is.na(as.numeric(as.character(EstT.df.num))),]
dat <- EstT.df[(4:nrow(EstT.df)),]
colnames(dat) <- lapply(EstT.df[2,], as.character)
dat$Date <- gsub(x=dat$Date,pattern="00:00:00",replacement="",fixed=T)
colnames(dat) <- gsub(x=colnames(dat),pattern="Water Temp(C)",replacement="Temp",fixed=T)
EstT.df <- dat
rm(dat,char.df)
#replace negative values with NA
EstT.df$Temp <- replace(EstT.df$Temp, which(as.numeric(as.character(EstT.df$Temp)) < 11), NA)
EstT.full.TS <- zoo(as.numeric(as.character(EstT.df$Temp)), order.by= as.Date(EstT.df[,"Date"], format = "%d/%m/%Y")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
EstT.TS <- window(EstT.full.TS, start=as.Date("2013-06-01"), end=as.Date("2018-06-01"))
EstT.full.TS <- EstT.TS
EstT.full.df <- data.frame(EstT.TS) ### This is only done because of poor data at beginning
EstT.df <- data.frame(EstT.TS)
colnames(EstT.df) <- 'EstTmean'
colnames(EstT.full.df) <- 'EstTmean'
############
EstT.df$Julday1 <- seq(1,length(EstT.df[,1]),1)
linear.trend.model_EC_all <- lm(EstTmean ~ Julday1, EstT.df)
EstT.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4]
EstT.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
EstT.full.df$Julday1 <- seq(1,length(EstT.full.df[,1]),1)
linear.trend.model_EC_all <- lm(EstTmean ~ Julday1, EstT.full.df)
EstT.full.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4]
EstT.full.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
############Est temp
######################
#Plot
######################
##################################### Full Time Period
#Air temp Full period
p1air <- ggplot(AirT.full.df, aes(y=tasmean, x=index(AirT.full.TS))) + geom_line(alpha=0.5) +
ggtitle(paste(Estuary," Catchment Centroid - Linear and smooth trends in catchment airT (SILO) lin trend was ",
round(AirT.full.lintrend,3), ' C°/year with p=', round(AirT.full.pvalNCV_ECall,10), sep=" ")) +
theme(plot.title=element_text(face="bold", size=9)) +
geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
ylab("Air Temperature [C°]") + xlab("Time")
p1riv <- ggplot(RivT.full.df, aes(y=rivTmean, x=index(RivT.TS))) + geom_line(alpha=0.5) +
ggtitle(paste(Estuary, " - Linear and smooth trends in gauged river temperature (@", Riv.Gauge.loc ,") - Linear trend was ",
round(RivT.full.lintrend,3), 'C°/year with p=', sprintf("%.5f",round(RivT.full.pvalNCV_ECall,10)), sep="")) +
theme(plot.title=element_text(face="bold", size=9)) +
geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
ylab("River Temperature [C°]") + xlab("Time") + xlab(NULL) +
theme(axis.text=element_text(size=Fontsize)) +
theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
if(logtransformFlow ==TRUE){
p1rivQ <- ggplot(RivQ.full.df, aes(y=log10(RivQmean+2), x=index(RivQ.full.TS))) + geom_line(alpha=0.5) +
ggtitle(paste(Estuary,'@', Riv.Gauge.loc, " - Linear and smooth trend in river flow (GAUGE) lin trend was ",
round(RivQ.full.lintrend,3), ' ML/day /year with p=', round(RivQ.full.pvalNCV_ECall,10), sep=" ")) +
theme(plot.title=element_text(face="bold", size=9)) +
geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
ylab(expression(paste("log10(River Flow [ML/day] + 2)", sep=""))) + xlab("Time") +
theme(axis.text=element_text(size=Fontsize)) +
theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
} else {
p1rivQ <- ggplot(RivQ.full.df, aes(y=RivQmean, x=index(RivQ.full.TS))) + geom_line(alpha=0.5) +
ggtitle(paste(Estuary,'@', Riv.Gauge.loc, " - Linear and smooth trend in river flow (GAUGE) lin trend was ",
round(RivQ.full.lintrend,3), ' ML/day with p=', round(RivQ.full.pvalNCV_ECall,10), sep=" ")) +
theme(plot.title=element_text(face="bold", size=9)) +
geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
ylab(expression(paste("River Flow [ML/day]", sep=""))) + xlab("Time") +
theme(axis.text=element_text(size=Fontsize)) +
theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
}
#Sea temp Full period
p1sst <- ggplot(SST.full.df, aes(y=SSTmean, x=index(SST.full.TS))) + geom_line(alpha=0.5) +
ggtitle(paste(Estuary, " offshore - Linear and smooth trends in sea surface temperature (NNRP NARCLIM reanalysis) linear trend was ",
round(SST.full.lintrend,3), 'C°/year with p=', sprintf("%.5f",round(SST.full.pvalNCV_ECall,10)), sep=" ")) +
theme(plot.title=element_text(face="bold", size=9)) +
geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
ylab("Sea Surface Temperature [C°]") + xlab("Time")+ xlab(NULL) +
theme(axis.text=element_text(size=Fontsize)) +
theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
p1Est <- ggplot(EstT.full.df, aes(y=EstTmean, x=index(EstT.TS))) + geom_line(alpha=0.5) +
ggtitle(paste(Estuary,'@', Est.Gauge.loc, " - Linear and smooth trend in Estuary temperature (GAUGE) lin trend was ",
round(EstT.full.lintrend,3), 'C°/year with p=', round(EstT.full.pvalNCV_ECall,10), sep=" ")) +
theme(plot.title=element_text(face="bold", size=9)) +
geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
ylab("Estuary Temperature [C°]") + xlab("Time")+
theme(axis.text=element_text(size=Fontsize)) +
theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
p1Est2 <- ggplot(EstT.full.df, aes(y=EstTmean, x=index(EstT.TS))) + geom_line(alpha=0.5) +
ggtitle(paste(Estuary,'@', Est.Gauge.loc, " - Linear and smooth trend in Estuary temperature (GAUGE) lin trend was ",
round(EstT.full.lintrend,3), 'C°/year with p=', round(EstT.full.pvalNCV_ECall,10), sep=" ")) +
theme(plot.title=element_text(face="bold", size=9)) +
geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
#stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
binomial_smooth(formula = y ~ splines::ns(x, 2)) +
ylab("Estuary Temperature [C°]") + xlab("Time")+
theme(axis.text=element_text(size=Fontsize)) +
theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
png.name <- paste(Output.Directory, Estuary, '@', Est.Gauge.loc, '_Trends_estTmean_full_period_', Sys.Date(),"nosmoothb3.png", sep="")
png(file = png.name, width = 10.5, height = 4, units='in', res=500)
grid.arrange(p1Est2,ncol=1)
dev.off()
gA1 <- ggplotGrob(p1riv)
gA2 <- ggplotGrob(p1sst)
gA1$widths <- gA2$widths
#export to png
png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_tasmean_full_period_', Sys.Date(),".png", sep="")
png(file = png.name, width = 10.5, height = 7, units='in', res=500)
grid.arrange(p1air,ncol=1)
dev.off()
png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_rivTmean_full_period_', Sys.Date(),".png", sep="")
png(file = png.name, width = 10.5, height = 7, units='in', res=500)
grid.arrange(p1riv,p1riv,ncol=1)
dev.off()
png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_rivTmean_SSTmean_full_period4_', Sys.Date(),".png", sep="")
png(file = png.name, width = 10.5, height = 7, units='in', res=500)
grid.arrange(gA1,gA2,ncol=1)
dev.off()
png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_RivQmean_full_period_', Sys.Date(),".png", sep="")
png(file = png.name, width = 10.5, height = 7, units='in', res=500)
grid.arrange(p1rivQ,ncol=1)
dev.off()
png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_SSTmean_full_period_', Sys.Date(),".png", sep="")
png(file = png.name, width = 10.5, height = 7, units='in', res=500)
grid.arrange(p1sst,p1sst,ncol=1)
dev.off()
png.name <- paste(Output.Directory, Estuary, '@', Est.Gauge.loc, '_Trends_estTmean_full_period_', Sys.Date(),"b.png", sep="")
png(file = png.name, width = 10.5, height = 4, units='in', res=500)
grid.arrange(p1Est,ncol=1)
dev.off()
png.name <- paste(Output.Directory, Estuary, '@', Est.Gauge.loc, '_Trends_estTmean_full_period_', Sys.Date(),"nosmoothb.png", sep="")
png(file = png.name, width = 10.5, height = 4, units='in', res=500)
grid.arrange(p1Est2,ncol=1)
dev.off()
##################################### Full Time Period
######################### 1990-present
combined.TS <- window(merge(AirT.full.TS, window(RivT.full.TS, start=as.Date("1995-01-01"), end=end(RivT.full.TS)), SST.full.TS,EstT.full.TS,RivQ.full.TS, all=T), start=as.Date("1990-01-01"), end=end(AirT.full.TS))
combined.df <- data.frame(combined.TS)
colnames(combined.df) <- c('tasmean','rivTmean', 'SSTmean', 'EstTmean', 'rivQmean')
#Air temp
p2air <- ggplot(combined.df, aes(y=tasmean, x=index(combined.TS))) + geom_line(alpha=0.5) +
ggtitle(paste(Estuary," Catchment centroid - Linear and smooth trend in catchment airT (SILO) linear trend was ",
round(AirT.lintrend,3), ' C°/year with p=', round(AirT.pvalNCV_ECall,10), sep=" ")) +
theme(plot.title=element_text(face="bold", size=9)) +
geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
ylab("Air Temperature [C°]") + xlab(NULL)+
theme(axis.text=element_text(size=Fontsize)) +
theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
#Riv temp
p2riv <- ggplot(combined.df, aes(y=rivTmean, x=index(combined.TS))) + geom_line(alpha=0.5) +
ggtitle(paste(Estuary,'@', Riv.Gauge.loc, " - Linear and smooth trend in river temperature (GAUGE) linear trend was ",
round(RivT.lintrend,3), ' C°/year with p=', round(RivT.pvalNCV_ECall,10), sep=" ")) +
theme(plot.title=element_text(face="bold", size=9)) +
geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
ylab("River Temperature [C°]") + xlab(NULL) +
theme(axis.text=element_text(size=Fontsize)) +
theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
#Riv flow
if(logtransformFlow ==TRUE){
p2rivQ <- ggplot(combined.df, aes(y=log10(rivQmean+2), x=index(combined.TS))) + geom_line(alpha=0.5) +
ggtitle(paste(Estuary,'@', Riv.Gauge.loc, " - Linear and smooth trend in river flow (GAUGE) linear trend was ",
round(RivQ.full.lintrend,3), 'cubic-meter/year with p=', round(RivQ.full.pvalNCV_ECall,10), sep=" ")) +
theme(plot.title=element_text(face="bold", size=9)) +
geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
ylab(expression(paste("log10(River flow [m"^"3" *"/s] + 2)", sep=""))) + xlab(NULL)+
theme(axis.text=element_text(size=Fontsize)) +
theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
} else {
p2rivQ <- ggplot(combined.df, aes(y=rivQmean, x=index(combined.TS))) + geom_line(alpha=0.5) +
ggtitle(paste(Estuary,'@', Riv.Gauge.loc, " - Linear and smooth trend in river flow (GAUGE) linear trend was ",
round(RivT.lintrend,3), 'cubic-meter/year with p=', round(RivT.pvalNCV_ECall,10), sep=" ")) +
theme(plot.title=element_text(face="bold", size=9)) +
geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
ylab(expression(paste("River flow [m"^"3" *"/s]", sep=""))) + xlab(NULL)+
theme(axis.text=element_text(size=Fontsize)) +
theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
}
#Sea temp
p2sst <- ggplot(combined.df, aes(y=SSTmean, x=index(combined.TS))) + geom_line(alpha=0.5) +
ggtitle(paste(Estuary,"NNRP(NARCLIM reanalysis) - Linear and smooth trend in sea surface temperature (NNRP) lin trend was ",
round(SST.lintrend,3), ' C°/year with p=', round(SST.pvalNCV_ECall,10), sep=" ")) +
theme(plot.title=element_text(face="bold", size=9)) +
geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
ylab("Sea Surface Temperature [C°]") + xlab("Time")+
theme(axis.text=element_text(size=Fontsize)) +
theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
#sst temp
p2Est <- ggplot(combined.df, aes(y=EstTmean, x=index(combined.TS))) + geom_line(alpha=0.5) +
ggtitle(paste(Estuary,'@', Est.Gauge.loc, " - Linear and smooth trend in Estuary temperature (GAUGE) linear trend was ",
round(EstT.lintrend,3), ' C°/year with p=', round(EstT.pvalNCV_ECall,10), sep=" ")) +
theme(plot.title=element_text(face="bold", size=9)) +
geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
#stat_smooth(method=gam, formula=y~s(x, k=ggplotGAM.k), se=T, size=0.5) +
ylab("Estuary Temperature [C°]") + xlab("Time")+
theme(axis.text=element_text(size=Fontsize)) +
theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
#export to png
png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_tasmean_1990-present_', Sys.Date(),".png", sep="")
png(file = png.name, width = 10.5, height = 7, units='in', res=500)
grid.arrange(p2air,ncol=1)
dev.off()
png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_rivTmean_1990-present_', Sys.Date(),".png", sep="")
png(file = png.name, width = 10.5, height = 7, units='in', res=500)
grid.arrange(p2riv,ncol=1)
dev.off()
png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_rivQmean_1990-present_', Sys.Date(),".png", sep="")
png(file = png.name, width = 10.5, height = 7, units='in', res=500)
grid.arrange(p2rivQ,ncol=1)
dev.off()
png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_SSTmean_1990-present_', Sys.Date(),".png", sep="")
png(file = png.name, width = 10.5, height = 7, units='in', res=500)
grid.arrange(p2sst,ncol=1)
dev.off()
#export an overview plot as png
gA <- ggplotGrob(p2air)
gB <- ggplotGrob(p2riv)
gC <- ggplotGrob(p2sst)
gD <- ggplotGrob(p2Est)
gE <- ggplotGrob(p2rivQ)
gA$widths <- gE$widths
gC$widths <- gE$widths
gD$widths <- gE$widths
gB$widths <- gE$widths
png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_SST_RivT_AirT_1990-present_', Sys.Date(),".png", sep="")
png(file = png.name, width = 10.5, height = 7, units='in', res=500)
grid.arrange(gA,gB ,gC,ncol=1)
dev.off()
png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_SST_RivT_1990-present_', Sys.Date(),".png", sep="")
png(file = png.name, width = 10.5, height = 7, units='in', res=500)
grid.arrange(gB,gC, ncol=1)
dev.off()
png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_AirT_RivT_1990-present_', Sys.Date(),".png", sep="")
png(file = png.name, width = 10.5, height = 7, units='in', res=500)
grid.arrange(gA,gB, ncol=1)
dev.off()
png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_RivQ_RivT_1990-present_', Sys.Date(),".png", sep="")
png(file = png.name, width = 10.5, height = 7, units='in', res=500)
grid.arrange(gB ,gE, ncol=1)
dev.off()
png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_SST_RivT_EstT_1990-present_', Sys.Date(),".png", sep="")
png(file = png.name, width = 10.5, height = 7, units='in', res=500)
grid.arrange(gB,gD,gC,ncol=1)
dev.off()
png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_RivQ_RivT_EstT_1990-present_', Sys.Date(),".png", sep="")
png(file = png.name, width = 10.5, height = 7, units='in', res=500)
grid.arrange(gE,gB,gD,ncol=1)
dev.off()
png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_AirT_RivT_RivQ_1990-present_', Sys.Date(),"b.png", sep="")
png(file = png.name, width = 10.5, height = 11, units='in', res=500)
grid.arrange(gB, gA,gE,ncol=1)
dev.off()
png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_AirT_RivT_1990-present_', Sys.Date(),".png", sep="")
png(file = png.name, width = 10.5, height = 7, units='in', res=500)
grid.arrange(gA,gB,ncol=1)
dev.off()
png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Trends_RivT_RivQ_1990-present_', Sys.Date(),".png", sep="")
png(file = png.name, width = 10.5, height = 7, units='in', res=500)
grid.arrange(gB,gE,ncol=1)
dev.off()
lm_eqn <- function(df){
m <- lm(y ~ x, df);
eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,
list(a = format(coef(m)[1], digits = 2),
b = format(coef(m)[2], digits = 2),
r2 = format(summary(m)$r.squared, digits = 3)))
as.character(as.expression(eq));
}
Modeling <- 'yes'
if (Modeling == 'yes'){
source("C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Analysis/Code/FUN_do_ggplot_Scatterplot_2vars.R")
AirT.full.TS.Monthly <- aggregate(AirT.full.TS, as.yearmon(time(AirT.full.TS)),FUN = mean)
RivT.full.TS.Monthly <- aggregate(RivT.full.TS, as.yearmon(time(RivT.full.TS)),FUN = mean)
RivQ.full.TS.Monthly <- aggregate(RivQ.full.TS, as.yearmon(time(RivQ.full.TS)),FUN = mean)
if(logtransformFlow ==TRUE){
RivQ.full.TS.Monthly <- log10(RivQ.full.TS.Monthly +2)
}
All.TS.zoo <- merge(AirT.full.TS.Monthly, RivT.full.TS.Monthly ,RivQ.full.TS.Monthly , all=F)
AirvsRivT <- Generate.ggplot.scatterplot(All.TS.zoo[,2], All.TS.zoo[,1],"River temp. [°C]" , "Air temp. [°C]")
if(logtransformFlow ==TRUE){
QvsRivT <- Generate.ggplot.scatterplot(All.TS.zoo[,2], All.TS.zoo[,3], "River temp. [°C]", expression(paste("log10(River flow [m"^"3" *"/s] + 2)", sep="")))
} else {
QvsRivT <- Generate.ggplot.scatterplot(All.TS.zoo[,2], All.TS.zoo[,3], "River temp. [°C]", expression(paste("River flow [m"^"3" *"/s]", sep="")))
}
#export an overview plot as png
gA <- ggplotGrob(AirvsRivT)
gB <- ggplotGrob(QvsRivT)
gB$widths <- gA$widths
png.name <- paste(Output.Directory, Estuary, '@', Riv.Gauge.loc, '_Scatterplots_of_RivT_vs_predictors_1990-present_', Sys.Date(),"f.png", sep="")
png(file = png.name, width = 8, height = 4, units='in', res=500)
grid.arrange(gA,gB , ncol=2)
dev.off()
}

@ -0,0 +1,184 @@
#R code for creating ggplots of time series with smooth (GAM) and linear term for the OEH buoy data on wave heigth and SST
######################
#Import Libraries and set working directory
######################
library(zoo)
library(hydroTSM) #you need to install these packages first before you can load them here
library(lubridate)
library(mgcv)
library(ggplot2)
library(gridExtra)
library(scales)
options(scipen=999)
setwd("C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/")
######################
######################
#Set inputs
######################
Case.Study <- "CASESTUDY2"
Estuary <- "GEORGES"
Climvar <- 'tasmean'
ggplotGAM.k <- 7
######################
######################
Output.Directory <- paste('./Output/', Case.Study, '/', Estuary,'/Recent_Trends/', sep="")
if (file.exists(Output.Directory)){
print('output folder already existed and was not created again')
} else {
dir.create(file.path(Output.Directory))
print('output folder did not exist and was created')
}
######################
######################
#Set input file paths
######################
pattern = paste(Estuary, '.*.csv', sep="")
Buoy_CSV_Path <- list.files("./Data/Ocean_Data/Waves_SST/", pattern, full.names=T)
Buoy.name <- substr(list.files("./Data/Ocean_Data/Waves_SST/", pattern, full.names=F), nchar(Estuary)+2, nchar(Estuary)+7)
SST.df <- data.frame(read.csv(Buoy_CSV_Path, colClasses = "character"))
colnames(SST.df) <- as.character(SST.df[9,])
SST.df = SST.df[-c(1:9), ]
colnames(SST.df) <- gsub(x=colnames(SST.df), pattern="Date/Time",replacement="Date",fixed=T)
colnames(SST.df) <- gsub(x=colnames(SST.df), pattern=" ",replacement="",fixed=T)
colnames(SST.df) <- gsub(x=colnames(SST.df), pattern="Hsig(m)",replacement="Hsig",fixed=T)
colnames(SST.df) <- gsub(x=colnames(SST.df), pattern="SeaTemp(C)",replacement="SST",fixed=T)
SST.df$Date <- substr(SST.df$Date, 1, 10)
######################
#Hsig
######################
Hsig.full.TS <- zoo(as.numeric(SST.df$Hsig), order.by= as.Date(SST.df$Date, format = "%d/%m/%Y")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
Hsig.full.TS <- aggregate(Hsig.full.TS, index(Hsig.full.TS) , FUN=mean)
Hsig.full.TS <- aggregate(Hsig.full.TS , as.yearmon(time(Hsig.full.TS )),FUN = mean)
Hsig.full.TS <- Hsig.full.TS [1:length(Hsig.full.TS)-1]
Hsig.TS <- window(Hsig.full.TS, start=as.Date("1990-01-01"), end=as.Date("2018-01-01"))
Hsig.full.df <- data.frame(Hsig.full.TS)
Hsig.df <- data.frame(Hsig.TS)
str(Hsig.df)
colnames(Hsig.df) <- 'Hsigmean'
colnames(Hsig.full.df) <- 'Hsigmean'
#rid of outliers
#Hsig.full.df$MSL <- replace(Hsig.full.df$MSL, which(Hsig.full.df$MSL > 4), NA)
Hsig.full.df$Julday1 <- seq(1,length(Hsig.full.df[,1]),1)
linear.trend.model_EC_all <- lm(Hsigmean ~ Julday1, Hsig.full.df)
Hsig.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4]
Hsig.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 12
######################
######################
#SST
######################
SST.full.TS <- zoo(as.numeric(SST.df$SST), order.by= as.Date(SST.df$Date, format = "%d/%m/%Y")) #=daily time series of rainfall for creation of clean, daily TS of ET and Q
SST.full.TS <- na.omit(SST.full.TS)
SST.full.TS <- aggregate(SST.full.TS, index(SST.full.TS) , FUN=mean)
SST.full.TS <- SST.full.TS[1:length(SST.full.TS)-1]
SST.TS <- window(SST.full.TS, start=as.Date("1990-01-01"), end=as.Date("2018-01-01"))
SST.full.df <- data.frame(SST.full.TS)
SST.df <- data.frame(SST.TS)
str(SST.df)
tail(SST.full.TS)
colnames(SST.df) <- 'SSTmean'
colnames(SST.full.df) <- 'SSTmean'
#rid of outliers
#SST.full.df$MSL <- replace(SST.full.df$MSL, which(SST.full.df$MSL > 4), NA)
SST.full.df$Julday1 <- seq(1,length(SST.full.df[,1]),1)
linear.trend.model_EC_all <- lm(SSTmean ~ Julday1, SST.full.df)
SST.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4]
SST.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 365
#export interpolated data as csv
csv.name <- paste("C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Data/Ocean_Data/Waves_SST/Processed/",Estuary , "_SST_Daily.csv", sep="")
write.csv(SST.full.df, csv.name)
######################
######################
#Plot
######################
##################################### Full Time Period
p1air <- ggplot(Hsig.full.df, aes(y=Hsigmean, x=index(Hsig.full.TS))) + geom_line(alpha=0.5) +
ggtitle(paste(Estuary, " - Linear and smooth trend in significant wave height (OEH Buoy:",Buoy.name, "| lin trend was ",
round(Hsig.lintrend,3), ' m/year with p=', round(Hsig.pvalNCV_ECall,10), sep=" ")) +
theme(plot.title=element_text(face="bold", size=9)) +
geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
stat_smooth(method=gam, formula=y~s(x, k=4), se=T, size=0.5, col="red") +
ylab("Significant Wave Height [m]") + xlab("Time") +
theme(axis.text=element_text(size=Fontsize)) +
theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
#export to png
png.name <- paste(Output.Directory, '/Trends_Significant_Wave_Height_',Buoy.name, '_full_period_', Sys.Date(),".png", sep="")
png(file = png.name, width = 10.5, height = 7, units='in', res=500)
grid.arrange(p1air,ncol=1)
dev.off()
#multiple smooths
p1air <- ggplot(Hsig.full.df, aes(y=Hsigmean, x=index(Hsig.full.TS))) + geom_line(alpha=0.5) +
ggtitle(paste(Estuary, " - Linear and smooth trend in monthly mean sea level (OEH Buoy:",Buoy.name, "| lin trend was ",
round(Hsig.lintrend,3), ' m/year with p=', round(Hsig.pvalNCV_ECall,10), sep=" ")) +
theme(plot.title=element_text(face="bold", size=9)) +
stat_smooth(method=gam, formula=y~s(x, k=13), se=T, size=0.5, col="red") +
#stat_smooth(method=gam, formula=y~s(x, k=8), se=T, size=0.5, cor="blue") +
stat_smooth(method=gam, formula=y~s(x, k=5), se=T, size=0.5, col="green") +
ylab("Significant Wave Height [m]") + xlab("Time")+
theme(axis.text=element_text(size=Fontsize)) +
theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
#export to png
png.name <- paste(Output.Directory, '/Trends_Significant_Wave_Height_',Buoy.name, '_MultiGAM_full_period_', Sys.Date(),".png", sep="")
png(file = png.name, width = 10.5, height = 7, units='in', res=500)
grid.arrange(p1air,ncol=1)
dev.off()
##################################### Full Time Period SST
p1air <- ggplot(SST.full.df, aes(y=SSTmean, x=index(SST.full.TS))) + geom_line(alpha=0.5) +
ggtitle(paste(Estuary, " - Linear and smooth trend in SST (OEH Buoy:",Buoy.name, "| lin trend was ",
round(SST.lintrend,3), ' deg C/year with p=', round(SST.pvalNCV_ECall,10), sep=" ")) +
theme(plot.title=element_text(face="bold", size=9)) +
geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
stat_smooth(method=gam, formula=y~s(x, k=4), se=T, size=0.5, col="red") +
ylab("Sea Surface Temperature [C°]") + xlab("Time")+
theme(axis.text=element_text(size=Fontsize)) +
theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
#export to png
png.name <- paste(Output.Directory, '/Trends_SST_',Buoy.name, '_full_period_', Sys.Date(),".png", sep="")
png(file = png.name, width = 10.5, height = 7, units='in', res=500)
grid.arrange(p1air,ncol=1)
dev.off()
#multiple smooths
p1air <- ggplot(SST.full.df, aes(y=SSTmean, x=index(SST.full.TS))) + geom_line(alpha=0.5) +
ggtitle(paste(Estuary, " - Linear and smooth trend in monthly mean sea level (OEH Buoy:",Buoy.name, "| lin trend was ",
round(SST.lintrend,3), ' deg C/year with p=', round(SST.pvalNCV_ECall,10), sep=" ")) +
theme(plot.title=element_text(face="bold", size=9)) +
stat_smooth(method=gam, formula=y~s(x, k=13), se=T, size=0.5, col="red") +
#stat_smooth(method=gam, formula=y~s(x, k=8), se=T, size=0.5, cor="blue") +
stat_smooth(method=gam, formula=y~s(x, k=5), se=T, size=0.5, col="green") +
ylab("Sea Surface Temperature [C°]") + xlab("Time")+
theme(axis.text=element_text(size=Fontsize)) +
theme(panel.grid.major.x = element_blank() ,panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line( size=.1, color="white" ))
#export to png
png.name <- paste(Output.Directory, '/Trends_SST_',Buoy.name, '_MultiGAM_full_period_', Sys.Date(),".png", sep="")
png(file = png.name, width = 10.5, height = 7, units='in', res=500)
grid.arrange(p1air,ncol=1)
dev.off()

@ -0,0 +1,113 @@
#R code for creating ggplots of time series with smooth (GAM) and linear term
######################
#Import Libraries and set working directory
######################
library(zoo)
library(hydroTSM) #you need to install these packages first before you can load them here
library(lubridate)
library(mgcv)
library(ggplot2)
library(gridExtra)
library(scales)
options(scipen=999)
setwd("C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/")
######################
######################
#Set inputs
######################
Case.Study <- "CASESTUDY2"
Estuary <- "NADGEE"
Climvar <- 'tasmean'
ggplotGAM.k <- 7
######################
######################
Output.Directory <- paste('./Output/', Case.Study, '/', Estuary,'/Recent_Trends/', sep="")
if (file.exists(Output.Directory)){
print('output folder already existed and was not created again')
} else {
dir.create(file.path(Output.Directory))
print('output folder did not exist and was created')
}
######################
######################
#Set input file paths
######################
AirT_CSV_Path <- paste("./Data/Ocean_Data/BOM_monthly_SL_",Estuary, "_Eden.txt", sep="")
dat = readLines(AirT_CSV_Path)
dat = as.data.frame(do.call(rbind, strsplit(dat, split=" {2,10}")), stringsAsFactors=FALSE)
colnames(dat) <-dat[3,]
if (Estuary=='NADGEE'){
dat2 = dat[-c(1:11), ]
dat2 = dat2[-(365:381),]
dat2 = dat2[,-1]
}
if (Estuary=='HUNTER'){
dat2 = dat[-c(1:3), ]
dat2 = dat2[-(720:726),]
dat2 = dat2[-(1:108),]
dat2 = dat2[,-1]
}
dat2$Date <- as.yearmon(dat2[,1], "%m %Y")
SeaLev.df <- dat2
head(SeaLev.df)
SeaLev.df$MSL <- as.numeric(SeaLev.df$Mean)
#rid of outliers
SeaLev.df$MSL <- replace(SeaLev.df$MSL, which(SeaLev.df$MSL > 4), NA)
SeaLev.df$Julday1 <- seq(1,length(SeaLev.df[,1]),1)
linear.trend.model_EC_all <- lm(MSL ~ Julday1, SeaLev.df)
SeaLev.pvalNCV_ECall <- summary(linear.trend.model_EC_all )$coefficients[2,4]
SeaLev.lintrend <- summary(linear.trend.model_EC_all )$coefficients[2,1] * 12
######################
#Plot
######################
##################################### Full Time Period
p1air <- ggplot(SeaLev.df, aes(y=MSL, x=Date)) + geom_line(alpha=0.5) +
ggtitle(paste(Estuary, " - Linear and smooth trend in monthly mean sea level (BOM Gauge) | lin trend was ",
round(100*SeaLev.lintrend,3), ' cm/year with p=', round(SeaLev.pvalNCV_ECall,10), sep=" ")) +
theme(plot.title=element_text(face="bold", size=9)) +
geom_smooth(method='lm',fill="green", formula=y~x, colour="darkgreen", size = 0.5) +
stat_smooth(method=gam, formula=y~s(x, k=4), se=T, size=0.5, col="red") +
ylab("Monthly Mean Sea Level [m]") + xlab("Time")
#export to png
png.name <- paste(Output.Directory, '/Trends_MonthlyMeanSeaLevel_full_period_', Sys.Date(),".png", sep="")
png(file = png.name, width = 10.5, height = 7, units='in', res=500)
grid.arrange(p1air,ncol=1)
dev.off()
#multiple smooths
p1air <- ggplot(SeaLev.df, aes(y=MSL, x=Date)) + geom_line(alpha=0.5) +
ggtitle(paste(Estuary, " - Linear and smooth trend in monthly mean sea level (BOM Gauge) | lin trend was ",
round(100*SeaLev.lintrend,3), ' cm/year with p=', round(SeaLev.pvalNCV_ECall,10), sep=" ")) +
theme(plot.title=element_text(face="bold", size=9)) +
stat_smooth(method=gam, formula=y~s(x, k=13), se=T, size=0.5, col="red") +
#stat_smooth(method=gam, formula=y~s(x, k=8), se=T, size=0.5, cor="blue") +
stat_smooth(method=gam, formula=y~s(x, k=5), se=T, size=0.5, col="green") +
ylab("Monthly Mean Sea Level [m]") + xlab("Time")
#export to png
png.name <- paste(Output.Directory, '/Trends_MonthlyMeanSeaLevel_MultiGAM_full_period_', Sys.Date(),".png", sep="")
png(file = png.name, width = 10.5, height = 7, units='in', res=500)
grid.arrange(p1air,ncol=1)
dev.off()
Loading…
Cancel
Save