# -*- coding: utf-8 -*- #####################################---------------------------------- #Last Updated - March 2018 #@author: z5025317 Valentin Heimhuber #code for creating climate prioritization plots for NARCLIM variables. #Inputs: Uses CSV files that contain all 12 NARCLIM model runs time series for 1 grid cell created with: P1_NARCliM_NC_to_CSV_CCRC_SS.py #####################################---------------------------------- #Load packages #####################################---------------------------------- import numpy as np import os import pandas as pd import glob import matplotlib import matplotlib.pyplot as plt from datetime import datetime from datetime import timedelta from matplotlib.backends.backend_pdf import PdfPages from ggplot import * matplotlib.style.use('ggplot') # # Set working direcotry (where postprocessed NARClIM data is located) os.chdir('C:/Users/z5025317/WRL_Postdoc/Projects/Paper#1/') # #####################################---------------------------------- #set input parameters Base_period_start = '1990-01-01' Base_period_end = '2080-01-01' #use last day that's not included in period as < is used for subsetting Estuary = 'Terrigal' # 'Belongil' Clim_var_type = "wssmean*" # '*' will create pdf for all variables in folder "pracc*|tasmax*" subset_ensemble = 'yes' # is yes, only the model with the lowest, median and max difference between present day and far future are selected plot_pdf = 'no' #####################################---------------------------------- # #set directory path for output files output_directory = 'Output/'+ Estuary #output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted' if not os.path.exists(output_directory): os.makedirs(output_directory) print('-------------------------------------------') print("output directory folder didn't exist and was generated") print('-------------------------------------------') print('-------------------') Clim_Var_CSVs = glob.glob('./Data/NARCLIM_Site_CSVs/' + Estuary + '/' + Clim_var_type) #Clim_Var_CSV = glob.glob('./Site_CSVs/' + Clim_var_type + '*' ) #read CSV file for clim_var_csv_path in Clim_Var_CSVs: #clim_var_csv_path = Clim_Var_CSVs[0] Filename = os.path.basename(os.path.normpath(clim_var_csv_path)) Clim_var_type = Filename.split('_', 1)[0] print(clim_var_csv_path) Full_df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True) #pandas datestamp index to period (we don't need the 12 pm info in the index (daily periods are enough)) Full_df.index = Full_df.index.to_period('D') Full_df = Full_df.drop(columns=['period']) Ncols_df = len(Full_df) #check data types of columns #Full_df.dtypes #substract a constant from all values to convert from kelvin to celcius (temp) if Clim_var_type == 'tasmean' or Clim_var_type == 'tasmax': Full_df = Full_df.iloc[:,0:(Ncols_df-1)]-273.15 if Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean': Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60*24 Fdf_1900_2080 = Full_df #Subset the data to the minimum base period and above (used to set the lenght of the present day climate period) #Fdf_1900_2080 = Full_df.loc[(Full_df.index >= Base_period_start) & (Full_df.index < Base_period_end)] # not necessary if not using reanalysis models for base period #Aggregate daily df to annual time series if (Clim_var_type == 'pracc' or Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean' or Clim_var_type == 'pr1Hmaxtstep' or Clim_var_type == 'wss1Hmaxtstep'): Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').sum() Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan) Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').sum() Fdf_1900_2080_monthly = Fdf_1900_2080_monthly.replace(0, np.nan) Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').sum() Fdf_1900_2080_weekly = Fdf_1900_2080_weekly.replace(0, np.nan) Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').sum() #seasonal means Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan) else: Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').mean() Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').mean() Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').mean() Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').mean() #seasonal means #plot the mean of all model runs print('-------------------------------------------') print('mean of all models for climate variable: ' + Clim_var_type) Fdf_1900_2080_means = Fdf_1900_2080.mean() Fdf_1900_2080_means.plot(kind='bar').figure print('-------------------------------------------') if subset_ensemble == 'yes': #Select the 3 most representative models (min med and max difference betwen far future and present) Fdf_1900_2080_sorted = Fdf_1900_2080.reindex_axis(sorted(Fdf_1900_2080.columns), axis=1) Fdf_1900_2080_sorted_means = pd.DataFrame(Fdf_1900_2080_sorted.mean()) df = Fdf_1900_2080_sorted_means #add a simple increasing integer index df = df.reset_index() df= df[df.index % 3 != 1] df['C'] = df[0].diff() df = df.reset_index() df= df[df.index % 2 != 0] #get max difference model (difference between far future and prsent day) a = df[df.index == df['C'].argmax(skipna=True)] Max_dif_mod_name = a.iloc[0]['index'] #get min difference model a = df[df.index == df['C'].argmin(skipna=True)] Min_dif_mod_name = a.iloc[0]['index'] #get the model which difference is closest to the median difference df['D'] = abs(df['C']- df['C'].median()) a = df[df.index == df['D'].argmin(skipna=True)] Med_dif_mod_name = a.iloc[0]['index'] #data frame with min med and max difference model df2 = Fdf_1900_2080.filter(regex= Min_dif_mod_name[:-5] + '|' + Med_dif_mod_name[:-5] + '|' + Max_dif_mod_name[:-5] ) dfall = df2.reindex_axis(sorted(df2.columns), axis=1) #data frame with individual models dfmin = Fdf_1900_2080.filter(regex= Min_dif_mod_name[:-5]) dfmax = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]) dfmed = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]) # use only the 3 representative models for the analysis Fdf_1900_2080_all_mods = Fdf_1900_2080 #create a dataframe that has 1 column for each of the three representative models # Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009' # Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039' # Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079' dfa = Fdf_1900_2080_annual.iloc[:,[0]] dfa1 = Fdf_1900_2080_annual.iloc[:,[0,3,6]].loc[(Fdf_1900_2080_annual.index >= '1990') & (Fdf_1900_2080_annual.index <= '2009')] dfa1.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]] dfa2 = Fdf_1900_2080_annual.iloc[:,[1,4,7]].loc[(Fdf_1900_2080_annual.index >= '2020') & (Fdf_1900_2080_annual.index <= '2039')] dfa2.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]] dfa3 = Fdf_1900_2080_annual.iloc[:,[2,5,8]].loc[(Fdf_1900_2080_annual.index >= '2060') & (Fdf_1900_2080_annual.index <= '2079')] dfa3.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]] dfall_annual = dfa1.append(dfa2).append(dfa3) #Create Deltas of average change for annual and seasonal basis times = ['annual', 'DJF', 'MAM', 'JJA','SON'] delta_all_df = pd.DataFrame() for temp in times: if temp == 'annual': Mean_df = Fdf_1900_2080_annual.mean() Column_names = ['near', 'far'] if temp == 'DJF': Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean() Column_names = ['DJF_near', 'DJF_far'] if temp == 'MAM': Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean() Column_names = ['MAM_near', 'MAM_far'] if temp == 'JJA': Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean() Column_names = ['JJA_near', 'JJA_far'] if temp == 'SON': Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean() Column_names = ['SON_near', 'SON_far'] models = list(Fdf_1900_2080_means.index) newmodel = [] type(newmodel) for each in models: newmodel.append(each[:-5]) unique_models = set(newmodel) # calculate diff for each unique model delta_NF_ensemble = [] delta_FF_ensemble = [] for unique_model in unique_models: dfdiff = Mean_df.filter(regex= unique_model) type(dfdiff) delta_NF = dfdiff[1] - dfdiff[0] delta_NF_ensemble.append(delta_NF) delta_FF = dfdiff[2] - dfdiff[1] delta_FF_ensemble.append(delta_FF) delta_df1=pd.DataFrame(delta_NF_ensemble, index=unique_models) delta_df2=pd.DataFrame(delta_FF_ensemble, index=unique_models) delta_df= pd.concat([delta_df1, delta_df2], axis=1) #rename columns delta_df.columns = Column_names #add a row with medians and 10 and 90th percentiles delta_df.loc['10th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 10), Column_names[1]:np.percentile(delta_df[Column_names[1]], 10)}) delta_df.loc['median'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 50), Column_names[1]:np.percentile(delta_df[Column_names[1]], 50)}) delta_df.loc['90th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 90), Column_names[1]:np.percentile(delta_df[Column_names[1]], 90)}) #append df to overall df delta_all_df = pd.concat([delta_all_df, delta_df], axis=1) out_file_name = Estuary + '_' + Clim_var_type + '_NARCliM_ensemble_changes.csv' out_path = output_directory + '/' + out_file_name delta_all_df.to_csv(out_path) #create a dataframe that has a single column for present day, near and far future for the (3 selected models) len(Fdf_1900_2080.columns) Full_current_df = Fdf_1900_2080.iloc[:,range(0,3)] Full_current_df = Full_current_df.stack() #nearfuture Full_nearfuture_df = Fdf_1900_2080.iloc[:,range(3,6)] Full_nearfuture_df = Full_nearfuture_df.stack() #farfuture Full_farfuture_df = Fdf_1900_2080.iloc[:,range(6,len(Fdf_1900_2080.columns))] Full_farfuture_df = Full_farfuture_df.stack() Summarized_df = pd.concat([Full_current_df, Full_nearfuture_df], axis=1, ignore_index=True) Summarized_df = pd.concat([Summarized_df, Full_farfuture_df], axis=1, ignore_index=True) Summarized_df.columns = ['present', 'near', 'far'] #output some summary plot into pdf if plot_pdf == 'yes': #write the key plots to a single pdf document pdf_out_file_name = Clim_var_type + '_start_' + Base_period_start + '_NARCliM_summary_3.pdf' pdf_out_path = output_directory +'/' + pdf_out_file_name #open pdf and add the plots with PdfPages(pdf_out_path) as pdf: #barplot of model means plt.title(Clim_var_type + ' - model means - full period') ymin = min(Fdf_1900_2080_means) ymax = max(Fdf_1900_2080_means) Fdf_1900_2080_means.plot(kind='bar', ylim=(ymin,ymax)) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() #full period density comparison plt.title(Clim_var_type + ' - density comparison - full period - all models') Summarized_df.plot.kde() pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() #annual box plt.title(Clim_var_type + ' - Annual means/sums for max diff model') Fdf_1900_2080_annual.boxplot(rot=90) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() #monthly box plt.title(Clim_var_type + ' - Monthly means/sums') Fdf_1900_2080_monthly.boxplot(rot=90) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() #annual box plt.title(Clim_var_type + ' - Monthly means/sums for min diff model') Fdf_1900_2080_monthly.filter(regex= Min_dif_mod_name[:-5]).boxplot(rot=90) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() #annual box plt.title(Clim_var_type + ' - Monthly means/sums for median diff model') Fdf_1900_2080_monthly.filter(regex= Med_dif_mod_name[:-5]).boxplot(rot=90) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() #annual box plt.title(Clim_var_type + ' - Monthly means/sums for max diff model') Fdf_1900_2080_monthly.filter(regex= Max_dif_mod_name[:-5]).boxplot(rot=90) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() #weekly box plt.title(Clim_var_type + ' - Weekly means/sums') Fdf_1900_2080_weekly.boxplot(rot=90) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() #daily box plt.title(Clim_var_type + ' - Daily means/sums') Fdf_1900_2080.boxplot(rot=90) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() # time series plot annual ALL models plt.title(Clim_var_type + ' - Time series - representative models') dfall_annual.plot(legend=False) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() # seasonal mean boxplots ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) plt.title(Clim_var_type + ' - DJF Summer means/sums') Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean().plot(kind='bar', ylim=(ymin,ymax)) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() plt.title(Clim_var_type + ' - DJF Summer means/sums') Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].boxplot(rot=90) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()) ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()) plt.title(Clim_var_type + ' - MAM Autumn means/sums') Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean().plot(kind='bar', ylim=(ymin,ymax)) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() plt.title(Clim_var_type + ' - MAM Autumn means/sums') Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].boxplot(rot=90) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()) ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()) plt.title(Clim_var_type + ' - JJA Winter means/sums') Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean().plot(kind='bar', ylim=(ymin,ymax)) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() plt.title(Clim_var_type + ' - JJA Winter means/sums') Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].boxplot(rot=90) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()) ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()) plt.title(Clim_var_type + ' - SON Spring means/sums') Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean().plot(kind='bar', ylim=(ymin,ymax)) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() plt.title(Clim_var_type + ' - SON Spring means/sums') Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].boxplot(rot=90) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close()