You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
CC_Est_NARC/Analysis/Code/P1_NARCliM_plots_Windows.py

435 lines
24 KiB
Python

# -*- coding: utf-8 -*-
#==========================================================#
#Last Updated - June 2018
#@author: z5025317 Valentin Heimhuber
#code for creating climate prioritization plots for NARCLIM variables.
#Inputs: Uses CSV files that contain all 12 NARCLIM model runs time series for 1 grid cell created with: P1_NARCliM_NC_to_CSV_CCRC_SS.py
#==========================================================#
#Load packages
#==========================================================#
import numpy as np
import os
import pandas as pd
import glob
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
from matplotlib.backends.backend_pdf import PdfPages
from ggplot import *
matplotlib.style.use('ggplot')
# import own modules
# Set working direcotry (where postprocessed NARClIM data is located)
os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Analysis/Code')
import climdata_fcts as fct
import silo as sil
ALPHA_figs = 0
#==========================================================#
# Set working direcotry (where postprocessed NARClIM data is located)
os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/')
#==========================================================#
#==========================================================#
#set input parameters
Case_Study_Name = 'CASESTUDY2'
Estuaries = ['HUNTER', 'RICHMOND', 'NADGEE', 'SHOALHAVEN', 'GEORGES','CATHIE']
Estuaries = ['HUNTER']
for Est in Estuaries:
#Estuary = 'HUNTER' # 'Belongil'
Estuary = Est # 'Belongil'
print Estuary
#Clim_var_type = 'potevpmean' # '*' will create pdf for all variables in folder "pracc*|tasmax*"
Clim_var_types = ['tasmean']
for climvar in Clim_var_types:
Clim_var_type = climvar
plot_pdf = 'yes'
plot_pngs = 'yes'
delta_csv = 'yes'
Stats = 'mean' # 'maxdaily', 'mean'
Version = 'V1'
#==========================================================#
#==========================================================#
#set directory path for output files
output_directory = 'Output/' + Case_Study_Name + '/' + Estuary
#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
if not os.path.exists(output_directory):
os.makedirs(output_directory)
print('-------------------------------------------')
print("output directory folder didn't exist and was generated")
print('-------------------------------------------')
#set directory path for individual png files
png_output_directory = 'Output/' + Case_Study_Name + '/' + Estuary + '/NARCLIM_Key_Figs'
#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
if not os.path.exists(png_output_directory):
os.makedirs(png_output_directory)
print('-------------------------------------------')
print("output directory folder didn't exist and was generated")
print('-------------------------------------------')
#==========================================================#
#==========================================================#
Estuary_Folder = glob.glob('./Data/NARCLIM_Site_CSVs/'+ Case_Study_Name + '/' + Estuary + '*' )
Clim_Var_CSVs = glob.glob(Estuary_Folder[0] + '/' + Clim_var_type + '*')
#==========================================================#
#==========================================================#
#read CSV files and start analysis
#==========================================================#
#for clim_var_csv_path in Clim_Var_CSVs:
clim_var_csv_path = Clim_Var_CSVs[0]
Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
Clim_var_type = Filename.split('_', 1)[0]
print(clim_var_csv_path)
Full_df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
#pandas datestamp index to period (we don't need the 12 pm info in the index (daily periods are enough))
Full_df.index = Full_df.index.to_period('D')
Full_df = Full_df.drop(columns=['period'])
Ncols_df = len(Full_df)
#check data types of columns
#Full_df.dtypes
#==========================================================#
#==========================================================#
#substract a constant from all values to convert from kelvin to celcius (temp)
if Clim_var_type in ['tasmean','tasmax','sstmean']:
Full_df = Full_df.iloc[:,0:(Ncols_df-1)]-273.15
if Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean':
Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60*24
Fdf_1900_2080 = Full_df
if Clim_var_type in ['rsdsmean','rldsmean']:
Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60*24/1000000
#==========================================================#
#==========================================================#
#Aggregate daily df to annual time series
if Clim_var_type in ['pracc' ,'evspsblmean' ,'potevpmean' ,'pr1Hmaxtstep' ,
'wss1Hmaxtstep', 'rsdsmean', 'rldsmean']:
if(Stats == 'maxdaily'):
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').max()
Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').max() #seasonal means
Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
else:
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').sum()
Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').sum() #seasonal means
Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
else:
if(Stats == 'maxdaily'):
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').max()
Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').max() #seasonal means
Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
if(Stats[:4] =='days'):
Threshold = int(Stats[-2:])
#agg = ('abobe_27_count', lambda x: x.gt(27).sum()), ('average', 'mean')
agg = ('>'+ str(Threshold) + '_count', lambda x: x.gt(Threshold).sum()),
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').agg(agg)
#Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').agg(agg) #seasonal means
#Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
else:
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').mean()
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').mean() #seasonal means
Fdf_1900_2080_means = Fdf_1900_2080.mean()
#==========================================================#
#==========================================================#
#Select the 3 most representative models (min med and max difference betwen far future and present)
dfall, dfmin, dfmax, dfmed, Min_dif_mod_name, Med_dif_mod_name, Max_dif_mod_name = fct.select_min_med_max_dif_model(Fdf_1900_2080)
#==========================================================#
#==========================================================#
#create a dataframe that has 1 column for each of the three representative models
dfa = Fdf_1900_2080_annual.iloc[:,[0]]
dfa1 = Fdf_1900_2080_annual.iloc[:,[0,3,6]].loc[(Fdf_1900_2080_annual.index >= '1990') & (Fdf_1900_2080_annual.index <= '2009')]
dfa1.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
dfa2 = Fdf_1900_2080_annual.iloc[:,[1,4,7]].loc[(Fdf_1900_2080_annual.index >= '2020') & (Fdf_1900_2080_annual.index <= '2039')]
dfa2.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
dfa3 = Fdf_1900_2080_annual.iloc[:,[2,5,8]].loc[(Fdf_1900_2080_annual.index >= '2060') & (Fdf_1900_2080_annual.index <= '2079')]
dfa3.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
dfall_annual = dfa1.append(dfa2).append(dfa3)
#==========================================================#
#==========================================================#
#Create Deltas of average change for annual and seasonal basis
#==========================================================#
delta_all_df = fct.calculate_deltas_NF_FF2(Fdf_1900_2080_annual, Fdf_Seas_means, Stats)
#==========================================================#
if delta_csv == 'yes':
out_file_name = Estuary + '_' + Clim_var_type + '_' + Stats + '_NARCliM_ensemble_changes.csv'
out_path = output_directory + '/' + out_file_name
delta_all_df.to_csv(out_path)
#==========================================================#
#==========================================================#
#create a dataframe that has a single column for present day, near and far future for the (3 selected models)
Full_current_df = Fdf_1900_2080.iloc[:,range(0,3)]
Full_current_df = Full_current_df.stack()
#nearfuture
Full_nearfuture_df = Fdf_1900_2080.iloc[:,range(3,6)]
Full_nearfuture_df = Full_nearfuture_df.stack()
#farfuture
Full_farfuture_df = Fdf_1900_2080.iloc[:,range(6,len(Fdf_1900_2080.columns))]
Full_farfuture_df = Full_farfuture_df.stack()
Summarized_df = pd.concat([Full_current_df, Full_nearfuture_df], axis=1, ignore_index=True)
Summarized_df = pd.concat([Summarized_df, Full_farfuture_df], axis=1, ignore_index=True)
Summarized_df.columns = ['present', 'near', 'far']
#==========================================================#
#==========================================================#
#generate colour schemes for plotting
#==========================================================#
plotcolours36 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen', 'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal',
'darkolivegreen','turquoise', 'lightgreen', 'darkgreen', 'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal',
'darkolivegreen','turquoise', 'lightgreen', 'darkgreen', 'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal']
plotcolours36b = ['tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' ,
'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' ,
'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' ]
plotcolours12 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen', 'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal']
plotcolours15 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen', 'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal', 'lightgreen','lightpink','slateblue']
#==========================================================#
#==========================================================#
#output crucial summary plots into individual png files
#==========================================================#
if plot_pngs == 'yes':
#=========#
#Barplot of near and far future deltas
#=========#
#out name
png_out_file_name = Clim_var_type + '_' + Stats + '_Deltas_Barplot_' + Version + '.png'
png_out_path = png_output_directory + '/' + png_out_file_name
#prepare data frame for plot
neardeltadf=delta_all_df['near']
ymin = min(neardeltadf) + 0.1 *min(neardeltadf)
ymax = max(neardeltadf) + 0.1 * max(neardeltadf)
neardeltadf=delta_all_df['far']
ymin2 = min(neardeltadf) + 0.1 *min(neardeltadf)
ymax2 = max(neardeltadf) + 0.1 * max(neardeltadf)
ymin = min(ymin, ymin2)
if (Clim_var_type == 'tasmax' or Clim_var_type == 'tasmean'):
ymin = 0
ymax = max(ymax, ymax2)
#
fig = plt.figure(figsize=(5,6))
ax=plt.subplot(2,1,1)
plt.title(Clim_var_type + ' - ' + Stats + ' - deltas - near')
neardeltadf=delta_all_df['near']
neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)
plt.xticks([])
ax=plt.subplot(2,1,2)
plt.title(Clim_var_type + ' - ' + Stats + ' - deltas - far')
neardeltadf=delta_all_df['far']
neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)
ax.xaxis.grid(False)
#ax.patch.set_alpha(ALPHA_figs)
fig.patch.set_alpha(ALPHA_figs)
fig.tight_layout()
fig.savefig(png_out_path)
#=========#
#=========#
#full period density comparison
#=========#
#out name
png_out_file_name = Clim_var_type + '_' + Stats + '_MaxDeltaMod_Histogram_' + Version + '.png'
png_out_path = png_output_directory + '/' + png_out_file_name
plt.title(Clim_var_type + ' - ' + Stats + ' - hist - full period - max delta model')
#prepare data
xmin = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) - 4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
xmax = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) + 4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
fig = plt.figure(figsize=(5,5))
ax=plt.subplot(2,1,1)
Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]).plot.kde(xlim=(xmin,xmax), ax=ax)
plt.legend(loc=9, bbox_to_anchor=(0.5, -0.3))
#ax.xaxis.grid(False)
ax.yaxis.grid(False)
fig.patch.set_alpha(ALPHA_figs)
fig.tight_layout()
fig.savefig(png_out_path)
#=========#
#=========#
# time series plot annual ALL models
#=========#
png_out_file_name = Clim_var_type + '_' + Stats + '_TimeSeries_AllMods_' + Version + '.png'
png_out_path = png_output_directory + '/' + png_out_file_name
plt.title(Clim_var_type + ' - ' + Stats + ' - Time series - representative models')
#prepare data
Mod_order = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,19,20,21,16,17,18,22,23,24,31,32,33,25,26,27,28,29,30,34,35,36]
test = Fdf_1900_2080_annual
Mod_Names = test.columns
New_Mod_Name = []
for i in range(0,len(Mod_Names)):
New_Mod_Name.append(str(Mod_order[i]+10) + '_' + Mod_Names[i])
test.columns = New_Mod_Name
test_sorted = test.reindex_axis(sorted(test.columns), axis=1)
colnamest = test.columns
test_sorted.columns = [w[3:-5] for w in colnamest]
#plot
fig = plt.figure(figsize=(8,7))
ax=plt.subplot(2,1,1)
test_sorted.plot(ax=ax,color = plotcolours36)
plt.legend(loc=9, bbox_to_anchor=(0.5, -0.2))
ax.xaxis.grid(False)
fig.patch.set_alpha(ALPHA_figs)
fig.tight_layout()
fig.savefig(png_out_path)
#=========#
#==========================================================#
#output some summary plot into pdf
#==========================================================#
if plot_pdf == 'yes':
#plt.cm.Paired(np.arange(len(Fdf_1900_2080_means)))
#write the key plots to a single pdf document
pdf_out_file_name = Clim_var_type + '_' + Stats + '_NARCliM_summary_' + Version + '.pdf'
pdf_out_path = output_directory +'/' + pdf_out_file_name
#open pdf and add the plots
with PdfPages(pdf_out_path) as pdf:
#barplot of model means
plt.title(Clim_var_type + ' - model means - full period')
ymin = min(Fdf_1900_2080_means)
ymax = max(Fdf_1900_2080_means) + 0.008 *min(Fdf_1900_2080_means)
Fdf_1900_2080_means.plot(kind='bar', ylim=(ymin,ymax), color=plotcolours36)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
#
neardeltadf=delta_all_df['near']
ymin = min(neardeltadf) + 0.1 *min(neardeltadf)
ymax = max(neardeltadf) + 0.1 * max(neardeltadf)
neardeltadf=delta_all_df['far']
ymin2 = min(neardeltadf) + 0.1 *min(neardeltadf)
ymax2 = max(neardeltadf) + 0.1 * max(neardeltadf)
ymin = min(ymin, ymin2)
if (Clim_var_type == 'tasmax' or Clim_var_type == 'tasmean'):
ymin = 0
ymax = max(ymax, ymax2)
#
# delta barplot for report 1#################################
ax=plt.subplot(2,1,1)
plt.title(Clim_var_type + ' - model deltas - near-present')
neardeltadf=delta_all_df['near']
neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)
ax.patch.set_alpha(ALPHA_figs)
plt.xticks([])
#ax.xaxis.set_ticklabels([])
#pdf.savefig(bbox_inches='tight', ylim=(ymin,ymax), pad_inches=0.4)
#plt.close()
#
ax=plt.subplot(2,1,2)
plt.title(Clim_var_type + ' - model deltas - far-present')
neardeltadf=delta_all_df['far']
fig = neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)
ax.xaxis.grid(False)
ax.patch.set_alpha(ALPHA_figs)
#fig.patch.set_alpha(ALPHA_figs)
#plt.show()
pdf.savefig(bbox_inches='tight', ylim=(ymin,ymax), pad_inches=0.4)
plt.close()
# end delta barplot for report 1#################################
#
#full period density comparison
plt.title(Clim_var_type + ' - density comparison - full period - all models')
Summarized_df.plot.kde()
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
#full period density comparison
plt.title(Clim_var_type + ' - density comparison - full period - max delta model')
xmin = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) - 4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
xmax = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) + 4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]).plot.kde(xlim=(xmin,xmax))
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
fig.patch.set_alpha(ALPHA_figs)
plt.close()
#annual box
plt.title(Clim_var_type + ' - Annual means/sums for max diff model')
Fdf_1900_2080_annual.boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
#
#daily box
plt.title(Clim_var_type + ' - Daily means/sums')
Fdf_1900_2080.boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
# time series plot annual ALL models
plt.title(Clim_var_type + ' - Time series - all models')
Mod_order = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,19,20,21,16,17,18,22,23,24,31,32,33,25,26,27,28,29,30,34,35,36]
test = Fdf_1900_2080_annual
Mod_Names = test.columns
New_Mod_Name = []
for i in range(0,len(Mod_Names)):
New_Mod_Name.append(str(Mod_order[i]+10) + '_' + Mod_Names[i])
test.columns = New_Mod_Name
test_sorted = test.reindex_axis(sorted(test.columns), axis=1)
colnamest = test.columns
test_sorted.columns = [w[3:-5] for w in colnamest]
fig = test_sorted.plot(legend=False, color = plotcolours36)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
fig.patch.set_alpha(ALPHA_figs)
plt.close()
# time series plot annual ALL models
plt.title(Clim_var_type + ' - Time series - representative models')
dfall_annual.plot(legend=False)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
# seasonal mean boxplots
ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean())
ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean())
plt.title(Clim_var_type + ' - DJF Summer means/sums')
pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()).plot(kind='bar', ylim=(ymin,ymax))
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
plt.title(Clim_var_type + ' - DJF Summer means/sums')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean())
ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean())
plt.title(Clim_var_type + ' - MAM Autumn means/sums')
pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()).plot(kind='bar', ylim=(ymin,ymax))
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
plt.title(Clim_var_type + ' - MAM Autumn means/sums')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean())
ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean())
plt.title(Clim_var_type + ' - JJA Winter means/sums')
pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()).plot(kind='bar', ylim=(ymin,ymax))
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
plt.title(Clim_var_type + ' - JJA Winter means/sums')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean())
ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean())
plt.title(Clim_var_type + ' - SON Spring means/sums')
pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()).plot(kind='bar', ylim=(ymin,ymax))
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
plt.title(Clim_var_type + ' - SON Spring means/sums')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()