#next step for NACRLIM cc deviation plots

Development1
tinoheimhuber 7 years ago
parent b2dd285534
commit b2fc7135d9

@ -0,0 +1,187 @@
# -*- coding: utf-8 -*-
#####################################----------------------------------
#Last Updated - March 2018
#@author: z5025317 Valentin Heimhuber
#code for creating climate prioritization plots for NARCLIM variables.
#Inputs: Uses CSV files that contain all 12 NARCLIM model runs time series for 1 grid cell created with: P1_NARCliM_NC_to_CSV_CCRC_SS.py
#####################################----------------------------------
#Load packages
#####################################----------------------------------
import numpy as np
import os
import pandas as pd
import glob
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
from matplotlib.backends.backend_pdf import PdfPages
from ggplot import *
matplotlib.style.use('ggplot')
#plt.rcParams.update(plt.rcParamsDefault)
#
# Set working direcotry (where postprocessed NARClIM data is located)
os.chdir('C:/Users/z5025317/WRL_Postdoc/Projects/Paper#1/')
#
#####################################----------------------------------
#set input parameters
Base_period_start = '1986-01-01'
Base_period_end = '2005-01-01' #use last day that's not included in period as < is used for subsetting
Estuary = 'Tweed' # 'Belongil'
Clim_var_type = "*" # '*' will create pdf for all variables in folder
Clim_var_type = "tasmax*" # '*' will create pdf for all variables in folder
Present_Day_Clim_Var = 'Rainfall'
#####################################----------------------------------
#set directory path for output files
output_directory = 'Output/Clim_Deviation_Plots/'+ Estuary
#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
if not os.path.exists(output_directory):
os.makedirs(output_directory)
print('-------------------------------------------')
print("output directory folder didn't exist and was generated")
print('-------------------------------------------')
print('-------------------')
Clim_Var_CSVs = glob.glob('./Output/' + Estuary + '/' + Estuary + '_' + Clim_var_type + '*')
#read CSV file
for clim_var_csv_path in Clim_Var_CSVs:
Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
Clim_var_type = Filename.split('_', 2)[1]
print(Clim_var_type)
Ensemble_Delta_full_df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
#Ensemble_Delta_full_df = pd.to_numeric(Ensemble_Delta_full_df)
#load present day climate data for the same variable
Present_day_Var_CSV = glob.glob('./Data/Wheather_Station_Data/**/' + Estuary + '_' + Present_Day_Clim_Var + '*csv')
Present_day_df = pd.read_csv(Present_day_Var_CSV[0])
if Clim_var_type == 'evspsblmean':
Dates = pd.to_datetime(Present_day_df.Date)
Present_day_df.index = Dates
Present_day_df = Present_day_df.iloc[:,1]
Present_day_df = Present_day_df.replace(r'\s+', np.nan, regex=True)
Present_day_df = pd.to_numeric(Present_day_df)
else:
Dates = pd.to_datetime(Present_day_df.Year*10000+Present_day_df.Month*100+Present_day_df.Day,format='%Y%m%d')
Present_day_df.index = Dates
Present_day_df = Present_day_df.iloc[:,5]
#create seasonal sums etc.
if (Clim_var_type == 'pracc' or Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean'
or Clim_var_type == 'pr1Hmaxtstep' or Clim_var_type == 'wss1Hmaxtstep'):
Present_day_df_annual = Present_day_df.resample('A').sum()
Present_day_df_annual = Present_day_df_annual.replace(0, np.nan)
Present_day_df_monthly = Present_day_df.resample('M').sum()
Present_day_df_monthly = Present_day_df_monthly.replace(0, np.nan)
Present_day_df_weekly = Present_day_df.resample('W').sum()
Present_day_df_weekly = Present_day_df_weekly.replace(0, np.nan)
Fdf_Seas_means = Present_day_df.resample('Q-NOV').sum() #seasonal means
Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
else:
Present_day_df_annual = Present_day_df.resample('A').mean()
Present_day_df_monthly = Present_day_df.resample('M').mean()
Present_day_df_weekly = Present_day_df.resample('W').mean()
Fdf_Seas_means = Present_day_df.resample('Q-NOV').mean() #seasonal means
#Loop through annual and seasons and create a deviation plot for each.
times = ['annual', 'DJF', 'MAM', 'JJA','SON']
fig = plt.figure(figsize=(14,8))
delta_all_df = pd.DataFrame()
i=1
for temp in times:
#subset the ensemble dataframe for the period used:
if temp == 'annual':
Ensemble_Delta_df = Ensemble_Delta_full_df.iloc[:,range(0,2)]
#
Present_Day_ref_df = Present_day_df_annual
else:
Ensemble_Delta_df = Ensemble_Delta_full_df.filter(regex= temp)
Ensemble_Delta_df.columns = ['near', 'far']
if temp == 'DJF':
Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==1]
Column_names = ['DJF_near', 'DJF_far']
if temp == 'MAM':
Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==2]
Column_names = ['MAM_near', 'MAM_far']
if temp == 'JJA':
Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==3]
Column_names = ['JJA_near', 'JJA_far']
if temp == 'SON':
Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==4]
Present_Day_ref_df = Mean_df
print(Ensemble_Delta_df.columns.values)
#Subset to present day variability period
Present_Day_ref_df = pd.DataFrame(Present_Day_ref_df.loc[(Present_Day_ref_df.index >= Base_period_start) & (Present_Day_ref_df.index <= Base_period_end)])
Present_Day_Mean = np.percentile(Present_Day_ref_df, 50)
Present_Day_SD = np.std(Present_Day_ref_df)
#create data frame for floating stacked barplots
index=['-2std', '-1std', 'Med', '1std', '2std']
columns = ['present','near future', 'far future']
Plot_in_df = pd.DataFrame(index=index, columns =columns)
#
Plot_in_df['present'] = [float(Present_Day_Mean-2*Present_Day_SD),float(Present_Day_Mean-Present_Day_SD), float(Present_Day_Mean),
float(Present_Day_Mean+Present_Day_SD), float(Present_Day_Mean+2*Present_Day_SD)]
Plot_in_df['near future'] = [float(Present_Day_Mean + Ensemble_Delta_df.near[-3:][0]),np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.near[-3:][1]),
np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.near[-3:][2])]
Plot_in_df['far future'] = [float(Present_Day_Mean + Ensemble_Delta_df.far[-3:][0]),np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.far[-3:][1]),
np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.far[-3:][2])]
#Create a second data frame that has the values in a way that they can be stacked up in bars with the correct absolute values
Plot_in_df2 = pd.DataFrame(index=index, columns =columns )
Plot_in_df2['present'] = [float(Present_Day_Mean-2*Present_Day_SD),float(Present_Day_SD), float(Present_Day_SD),
float(Present_Day_SD), float(Present_Day_SD)]
Plot_in_df2['near future'] = [float(Present_Day_Mean + Ensemble_Delta_df.near[-3:][0]),np.NaN, float(Ensemble_Delta_df.near[-3:][1]-Ensemble_Delta_df.near[-3:][0]),
np.NaN, float(Ensemble_Delta_df.near[-3:][2]-Ensemble_Delta_df.near[-3:][1])]
Plot_in_df2['far future'] = [float(Present_Day_Mean + Ensemble_Delta_df.far[-3:][0]),np.NaN, float(Ensemble_Delta_df.far[-3:][1]-Ensemble_Delta_df.far[-3:][0]),
np.NaN, float(Ensemble_Delta_df.far[-3:][2]-Ensemble_Delta_df.far[-3:][1])]
#transpose the data frame
Plot_in_df_tp = Plot_in_df2.transpose()
#do the individual plots
if temp == 'annual':
xmin = int(min(Plot_in_df.min(axis=1))-1)
xmax = int(max(Plot_in_df.max(axis=1))+2)
else:
xmin = int(min(Plot_in_df.min(axis=1))-1)
xmax = int(max(Plot_in_df.max(axis=1))+2)
#define colour scheme
#likert_colors = ['none', 'firebrick','firebrick','lightcoral','lightcoral']
likert_colors = ['none', 'darkblue', 'darkblue','cornflowerblue','cornflowerblue']
#plot the stacked barplot
ax=plt.subplot(2,3,i)
Plot_in_df_tp.plot.bar(stacked=True, color=likert_colors, edgecolor='none', legend=False, ax=ax)
z = plt.axhline(float(Present_Day_Mean-2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
z.set_zorder(-1)
z = plt.axhline(float(Present_Day_Mean+2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
z.set_zorder(-1)
z = plt.axhline(float(Present_Day_Mean-Present_Day_SD), linestyle='--', color='black', alpha=.5)
z.set_zorder(-1)
z = plt.axhline(float(Present_Day_Mean+Present_Day_SD), linestyle='--', color='black', alpha=.5)
z.set_zorder(-1)
plt.ylim(xmin, xmax)
plt.title(Clim_var_type + ' ' + temp )
ax.grid(False)
for tick in ax.get_xticklabels():
tick.set_rotation(0)
fig.tight_layout()
#reset i to i+1 for next step
if temp == 'MAM':
i=i+2
else:
i=i+1
print(i)
plt.show()
out_file_name = Estuary + '_' + Clim_var_type + '_CC_prio_plot.png'
out_path = output_directory + '/' + out_file_name
fig.savefig(out_path)

@ -28,9 +28,10 @@ os.chdir('C:/Users/z5025317/WRL_Postdoc/Projects/Paper#1/')
#set input parameters
Base_period_start = '1990-01-01'
Base_period_end = '2080-01-01' #use last day that's not included in period as < is used for subsetting
Estuary = 'Bateman' # 'Belongil'
Clim_var_type = "*" #will create pdf for all variables in folder
subset_ensemble = 'no' # is yes, only the model with the lowest, median and max difference between present day and far future are selected
Estuary = 'Terrigal' # 'Belongil'
Clim_var_type = "pracc*|tasmax*" # '*' will create pdf for all variables in folder
subset_ensemble = 'yes' # is yes, only the model with the lowest, median and max difference between present day and far future are selected
plot_pdf = 'no'
#####################################----------------------------------
#set directory path for output files
@ -46,7 +47,7 @@ Clim_Var_CSVs = glob.glob('./Data/NARCLIM_Site_CSVs/' + Estuary + '/' + Clim_var
#Clim_Var_CSV = glob.glob('./Site_CSVs/' + Clim_var_type + '*' )
#read CSV file
for clim_var_csv_path in Clim_Var_CSVs:
#clim_var_csv_path = Clim_Var_CSVs[3]
#clim_var_csv_path = Clim_Var_CSVs[0]
Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
Clim_var_type = Filename.split('_', 1)[0]
print(clim_var_csv_path)
@ -66,6 +67,30 @@ for clim_var_csv_path in Clim_Var_CSVs:
#Subset the data to the minimum base period and above (used to set the lenght of the present day climate period)
#Fdf_1900_2080 = Full_df.loc[(Full_df.index >= Base_period_start) & (Full_df.index < Base_period_end)] # not necessary if not using reanalysis models for base period
#Aggregate daily df to annual time series
if (Clim_var_type == 'pracc' or Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean'
or Clim_var_type == 'pr1Hmaxtstep' or Clim_var_type == 'wss1Hmaxtstep'):
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').sum()
Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').sum()
Fdf_1900_2080_monthly = Fdf_1900_2080_monthly.replace(0, np.nan)
Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').sum()
Fdf_1900_2080_weekly = Fdf_1900_2080_weekly.replace(0, np.nan)
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').sum() #seasonal means
Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
else:
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').mean()
Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').mean()
Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').mean()
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').mean() #seasonal means
#plot the mean of all model runs
print('-------------------------------------------')
print('mean of all models for climate variable: ' + Clim_var_type)
Fdf_1900_2080_means = Fdf_1900_2080.mean()
Fdf_1900_2080_means.plot(kind='bar').figure
print('-------------------------------------------')
if subset_ensemble == 'yes':
#Select the 3 most representative models (min med and max difference betwen far future and present)
Fdf_1900_2080_sorted = Fdf_1900_2080.reindex_axis(sorted(Fdf_1900_2080.columns), axis=1)
@ -96,33 +121,38 @@ for clim_var_csv_path in Clim_Var_CSVs:
dfmed = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
# use only the 3 representative models for the analysis
Fdf_1900_2080_all_mods = Fdf_1900_2080
Fdf_1900_2080 = dfall
#Aggregate daily df to annual time series
if (Clim_var_type == 'pracc' or Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean'
or Clim_var_type == 'pr1Hmaxtstep' or Clim_var_type == 'wss1Hmaxtstep'):
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').sum()
Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').sum()
Fdf_1900_2080_monthly = Fdf_1900_2080_monthly.replace(0, np.nan)
Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').sum()
Fdf_1900_2080_weekly = Fdf_1900_2080_weekly.replace(0, np.nan)
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').sum() #seasonal means
Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
else:
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').mean()
Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').mean()
Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').mean()
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').mean() #seasonal means
#plot the mean of all model runs
print('-------------------------------------------')
print('mean of all models for climate variable: ' + Clim_var_type)
Fdf_1900_2080_means = Fdf_1900_2080.mean()
Fdf_1900_2080_means.plot(kind='bar').figure
print('-------------------------------------------')
#create a dataframe that has 1 column for each of the three representative models
# Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009'
# Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039'
# Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079'
dfa = Fdf_1900_2080_annual.iloc[:,[0]]
dfa1 = Fdf_1900_2080_annual.iloc[:,[0,3,6]].loc[(Fdf_1900_2080_annual.index >= '1990') & (Fdf_1900_2080_annual.index <= '2009')]
dfa1.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
dfa2 = Fdf_1900_2080_annual.iloc[:,[1,4,7]].loc[(Fdf_1900_2080_annual.index >= '2020') & (Fdf_1900_2080_annual.index <= '2039')]
dfa2.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
dfa3 = Fdf_1900_2080_annual.iloc[:,[2,5,8]].loc[(Fdf_1900_2080_annual.index >= '2060') & (Fdf_1900_2080_annual.index <= '2079')]
dfa3.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
dfall_annual = dfa1.append(dfa2).append(dfa3)
#Create Deltas of average change
#Create Deltas of average change for annual and seasonal basis
times = ['annual', 'DJF', 'MAM', 'JJA','SON']
delta_all_df = pd.DataFrame()
for temp in times:
if temp == 'annual':
Mean_df = Fdf_1900_2080_annual.mean()
Column_names = ['near', 'far']
if temp == 'DJF':
Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()
Column_names = ['DJF_near', 'DJF_far']
if temp == 'MAM':
Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()
Column_names = ['MAM_near', 'MAM_far']
if temp == 'JJA':
Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()
Column_names = ['JJA_near', 'JJA_far']
if temp == 'SON':
Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()
Column_names = ['SON_near', 'SON_far']
models = list(Fdf_1900_2080_means.index)
newmodel = []
type(newmodel)
@ -133,29 +163,29 @@ for clim_var_csv_path in Clim_Var_CSVs:
delta_NF_ensemble = []
delta_FF_ensemble = []
for unique_model in unique_models:
dfdiff = Fdf_1900_2080_means.filter(regex= unique_model)
dfdiff = Mean_df.filter(regex= unique_model)
type(dfdiff)
delta_NF = dfdiff[1] - dfdiff[0]
delta_NF_ensemble.append(delta_NF)
delta_FF = dfdiff[2] - dfdiff[1]
delta_FF_ensemble.append(delta_FF)
np.percentile(delta_NF, 50)
delta_df
delta_df1=pd.DataFrame(delta_NF_ensemble, index=unique_models)
delta_df2=pd.DataFrame(delta_FF_ensemble, index=unique_models)
delta_df= pd.concat([delta_df1, delta_df2], axis=1)
delta_df.plot(kind='box').figure
pd.DataFrame()
concat([Full_df, GCM_df], axis=1)
delta_df ensemble.plot(kind='bar')
dfmax = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
dfmed = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
#rename columns
delta_df.columns = Column_names
#add a row with medians and 10 and 90th percentiles
delta_df.loc['10th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 10), Column_names[1]:np.percentile(delta_df[Column_names[1]], 10)})
delta_df.loc['median'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 50), Column_names[1]:np.percentile(delta_df[Column_names[1]], 50)})
delta_df.loc['90th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 90), Column_names[1]:np.percentile(delta_df[Column_names[1]], 90)})
#append df to overall df
delta_all_df = pd.concat([delta_all_df, delta_df], axis=1)
out_file_name = Estuary + '_' + Clim_var_type + '_NARCliM_ensemble_changes.csv'
out_path = output_directory + '/' + out_file_name
delta_all_df.to_csv(out_path)
#create a dataframe that has a single column for present day, near and far future for the (3 selected models)
len(Fdf_1900_2080.columns)
@ -169,26 +199,13 @@ for clim_var_csv_path in Clim_Var_CSVs:
Full_farfuture_df = Full_farfuture_df.stack()
Summarized_df = pd.concat([Full_current_df, Full_nearfuture_df], axis=1, ignore_index=True)
Summarized_df = pd.concat([Summarized_df, Full_farfuture_df], axis=1, ignore_index=True)
Summarized_df.columns = ['present', 'near', 'far']
#create a dataframe that has 1 column for each of the three representative models
Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009'
Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039'
Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079'
dfa = Fdf_1900_2080_annual.iloc[:,[0]]
dfa1 = Fdf_1900_2080_annual.iloc[:,[0,3,6]].loc[(Fdf_1900_2080_annual.index >= '1990') & (Fdf_1900_2080_annual.index <= '2009')]
dfa1.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
dfa2 = Fdf_1900_2080_annual.iloc[:,[1,4,7]].loc[(Fdf_1900_2080_annual.index >= '2020') & (Fdf_1900_2080_annual.index <= '2039')]
dfa2.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
dfa3 = Fdf_1900_2080_annual.iloc[:,[2,5,8]].loc[(Fdf_1900_2080_annual.index >= '2060') & (Fdf_1900_2080_annual.index <= '2079')]
dfa3.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
dfall = dfa1.append(dfa2).append(dfa3)
#output some summary plot into pdf
if plot_pdf == 'yes':
#write the key plots to a single pdf document
pdf_out_file_name = Clim_var_type + '_start_' + Base_period_start + '_NARCliM_summary_B.pdf'
pdf_out_file_name = Clim_var_type + '_start_' + Base_period_start + '_NARCliM_summary_3.pdf'
pdf_out_path = output_directory +'/' + pdf_out_file_name
#open pdf and add the plots
with PdfPages(pdf_out_path) as pdf:
#barplot of model means
@ -240,7 +257,7 @@ for clim_var_csv_path in Clim_Var_CSVs:
plt.close()
# time series plot annual ALL models
plt.title(Clim_var_type + ' - Time series - representative models')
dfall.plot(legend=False)
dfall_annual.plot(legend=False)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
@ -285,6 +302,3 @@ for clim_var_csv_path in Clim_Var_CSVs:
Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
#plots not used
#Fdf_annual_sorted_subset.plot(legend=False, subplots=True)

Loading…
Cancel
Save