From fd19552a5f152c3d16376ac1fa41bf64c48f3657 Mon Sep 17 00:00:00 2001 From: tinoheimhuber Date: Wed, 18 Apr 2018 18:35:14 +1000 Subject: [PATCH] #refined the NARCLIM plotting --- Analysis/Code/P1_NARCliM_plots_Windows.py | 172 ++++++++++++---------- 1 file changed, 92 insertions(+), 80 deletions(-) diff --git a/Analysis/Code/P1_NARCliM_plots_Windows.py b/Analysis/Code/P1_NARCliM_plots_Windows.py index c9e852f..8d2432f 100644 --- a/Analysis/Code/P1_NARCliM_plots_Windows.py +++ b/Analysis/Code/P1_NARCliM_plots_Windows.py @@ -16,7 +16,9 @@ import matplotlib.pyplot as plt from datetime import datetime from datetime import timedelta from matplotlib.backends.backend_pdf import PdfPages +from ggplot import * matplotlib.style.use('ggplot') + # # Set working direcotry (where postprocessed NARClIM data is located) os.chdir('C:/Users/z5025317/WRL_Postdoc/Projects/Paper#1/') @@ -27,7 +29,7 @@ os.chdir('C:/Users/z5025317/WRL_Postdoc/Projects/Paper#1/') Base_period_start = '1990-01-01' Base_period_end = '2080-01-01' #use last day that's not included in period as < is used for subsetting Estuary = 'Bateman' # 'Belongil' -#Clim_var_type = 'tasmean' will create pdf for all variables in folder +Clim_var_type = "*" #will create pdf for all variables in folder #####################################---------------------------------- #set directory path for output files output_directory = 'Output/'+ Estuary @@ -38,33 +40,62 @@ if not os.path.exists(output_directory): print("output directory folder didn't exist and was generated") print('-------------------------------------------') print('-------------------') -Clim_Var_CSVs = glob.glob('./Data/NARCLIM_Site_CSVs/' + Estuary + '/*') +Clim_Var_CSVs = glob.glob('./Data/NARCLIM_Site_CSVs/' + Estuary + '/' + Clim_var_type) #Clim_Var_CSV = glob.glob('./Site_CSVs/' + Clim_var_type + '*' ) #read CSV file for clim_var_csv_path in Clim_Var_CSVs: - #clim_var_csv_path = Clim_Var_CSVs[0] + #clim_var_csv_path = Clim_Var_CSVs[3] Filename = os.path.basename(os.path.normpath(clim_var_csv_path)) Clim_var_type = Filename.split('_', 1)[0] print(clim_var_csv_path) Full_df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True) #pandas datestamp index to period (we don't need the 12 pm info in the index (daily periods are enough)) Full_df.index = Full_df.index.to_period('D') + Full_df = Full_df.drop(columns=['period']) + Ncols_df = len(Full_df) #check data types of columns #Full_df.dtypes - #substract a constant from all values (temp) + #substract a constant from all values to convert from kelvin to celcius (temp) if Clim_var_type == 'tasmean' or Clim_var_type == 'tasmax': Full_df = Full_df.iloc[:,0:(Ncols_df-1)]-273.15 - - #index the narclim periods if needed - #Full_df.loc[(Full_df.index >= '1990-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009' - #Full_df.loc[(Full_df.index >= '2020-01-01') & (Full_df.index < '2040-01-01'), 'period']= '2020-2039' - #Full_df.loc[(Full_df.index >= '2060-01-01') & (Full_df.index < '2080-01-01'), 'period']= '2060-2079' + Fdf_1900_2080 = Full_df #Subset the data to the minimum base period and above (used to set the lenght of the present day climate period) #Fdf_1900_2080 = Full_df.loc[(Full_df.index >= Base_period_start) & (Full_df.index < Base_period_end)] # not necessary if not using reanalysis models for base period - Fdf_1900_2080 = Full_df.drop(columns=['period']) - Ncols_df = len(Fdf_1900_2080) + + #Select the 3 most representative models (min med and max difference betwen far future and present) + Fdf_1900_2080_sorted = Fdf_1900_2080.reindex_axis(sorted(Fdf_1900_2080.columns), axis=1) + Fdf_1900_2080_sorted_means = pd.DataFrame(Fdf_1900_2080_sorted.mean()) + df = Fdf_1900_2080_sorted_means + #add a simple increasing integer index + df = df.reset_index() + df= df[df.index % 3 != 1] + df['C'] = df[0].diff() + df = df.reset_index() + df= df[df.index % 2 != 0] + #get max difference model (difference between far future and prsent day) + a = df[df.index == df['C'].argmax(skipna=True)] + Max_dif_mod_name = a.iloc[0]['index'] + #get min difference model + a = df[df.index == df['C'].argmin(skipna=True)] + Min_dif_mod_name = a.iloc[0]['index'] + #get the model which difference is closest to the median difference + df['D'] = abs(df['C']- df['C'].median()) + a = df[df.index == df['D'].argmin(skipna=True)] + Med_dif_mod_name = a.iloc[0]['index'] + #data frame with min med and max difference model + df2 = Fdf_1900_2080.filter(regex= Min_dif_mod_name[:-5] + '|' + Med_dif_mod_name[:-5] + '|' + Max_dif_mod_name[:-5] ) + dfall = df2.reindex_axis(sorted(df2.columns), axis=1) + #data frame with individual models + dfmin = Fdf_1900_2080.filter(regex= Min_dif_mod_name[:-5]) + dfmax = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]) + dfmed = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]) + + # use only the 3 representative models for the analysis + Fdf_1900_2080_all_mods = Fdf_1900_2080 + Fdf_1900_2080 = dfall + #Aggregate daily df to annual time series if (Clim_var_type == 'pracc' or Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean' or Clim_var_type == 'pr1Hmaxtstep' or Clim_var_type == 'wss1Hmaxtstep'): @@ -81,6 +112,7 @@ for clim_var_csv_path in Clim_Var_CSVs: Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').mean() Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').mean() Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').mean() #seasonal means + #plot the mean of all model runs print('-------------------------------------------') print('mean of all models for climate variable: ' + Clim_var_type) @@ -88,60 +120,39 @@ for clim_var_csv_path in Clim_Var_CSVs: Fdf_1900_2080_means.plot(kind='bar').figure print('-------------------------------------------') - #time series plots: - #ranks = Fdf_1900_2080_means[3:].rank(axis=0) - df3 = Fdf_1900_2080_annual - Fdf_annual_sorted = df3.reindex_axis(df3.mean().sort_values().index, axis=1) - Fdf_annual_sorted_subset = Fdf_annual_sorted.iloc[:,[0,1,2,3,5,7,13,15,18, 22, 24,25,30,33]] - - #write the key plots to a single pdf document - pdf_out_file_name = Clim_var_type + '_start_' + Base_period_start + '_NARCliM_summary4.pdf' - pdf_out_path = output_directory +'/' + pdf_out_file_name - #open pdf and add the plots - - #developement - #I want to create a simple density plot for all values in present day, near future and far future - #this should be an easy indicator of change - #subset present day simulations + #create a dataframe that has a single column for present day, near and far future for the (3 selected models) len(Fdf_1900_2080.columns) - Full_current_df = Fdf_1900_2080.iloc[:,range(0,12)] + Full_current_df = Fdf_1900_2080.iloc[:,range(0,3)] Full_current_df = Full_current_df.stack() #nearfuture - Full_nearfuture_df = Fdf_1900_2080.iloc[:,range(12,24)] + Full_nearfuture_df = Fdf_1900_2080.iloc[:,range(3,6)] Full_nearfuture_df = Full_nearfuture_df.stack() #farfuture - Full_farfuture_df = Fdf_1900_2080.iloc[:,range(24,len(Fdf_1900_2080.columns))] + Full_farfuture_df = Fdf_1900_2080.iloc[:,range(6,len(Fdf_1900_2080.columns))] Full_farfuture_df = Full_farfuture_df.stack() Summarized_df = pd.concat([Full_current_df, Full_nearfuture_df], axis=1, ignore_index=True) Summarized_df = pd.concat([Summarized_df, Full_farfuture_df], axis=1, ignore_index=True) - Summarized_df.columns = ['presentday', 'nearfuture', 'farfuture'] - plt.title(Clim_var_type + ' - density comparison - full period') - Summarized_df.plot.kde() - Summarized_df.boxplot(rot=90) - - - #getting to more refined and meaningful plots - Fdf_1900_2080_sorted = Fdf_1900_2080.reindex_axis(sorted(Fdf_1900_2080.columns), axis=1) - Fdf_1900_2080_sorted_means = pd.DataFrame(Fdf_1900_2080_sorted.mean()) - df = Fdf_1900_2080_sorted_means - df = df.reset_index() - df= df[df.index % 3 != 1] - df['C'] = df[0].diff() - a= df[df.index == df['C'].argmax(skipna=True)] - a['index'] - df.iloc[[df['C'].argmax(skipna=True)]] - df['C'].argmax(skipna=True) - df['C'].argmin(skipna=True) - df['C'].argmean(skipna=True) - df[df['C']==df['C'].median()] - max(df['C']) + #create a dataframe that has 1 column for each of the three representative models + Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009' + Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039' + Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079' + + dfa = Fdf_1900_2080_annual.iloc[:,[0]] + dfa1 = Fdf_1900_2080_annual.iloc[:,[0,3,6]].loc[(Fdf_1900_2080_annual.index >= '1990') & (Fdf_1900_2080_annual.index <= '2009')] + dfa1.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]] + dfa2 = Fdf_1900_2080_annual.iloc[:,[1,4,7]].loc[(Fdf_1900_2080_annual.index >= '2020') & (Fdf_1900_2080_annual.index <= '2039')] + dfa2.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]] + dfa3 = Fdf_1900_2080_annual.iloc[:,[2,5,8]].loc[(Fdf_1900_2080_annual.index >= '2060') & (Fdf_1900_2080_annual.index <= '2079')] + dfa3.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]] + dfall = dfa1.append(dfa2).append(dfa3) - Fdf_1900_2080_sorted_means.plot(kind='bar').figure - MIROC_R2_df = Fdf_1900_2080.iloc[:,[1,13,28]] - #end of developement + #write the key plots to a single pdf document + pdf_out_file_name = Clim_var_type + '_start_' + Base_period_start + '_NARCliM_summary2.pdf' + pdf_out_path = output_directory +'/' + pdf_out_file_name + #open pdf and add the plots with PdfPages(pdf_out_path) as pdf: #barplot of model means plt.title(Clim_var_type + ' - model means - full period') @@ -151,88 +162,89 @@ for clim_var_csv_path in Clim_Var_CSVs: pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() #full period density comparison - plt.title(Clim_var_type + ' - density comparison - full period - one model') - MIROC_R2_df.plot.kde() + plt.title(Clim_var_type + ' - density comparison - full period - all models') + Summarized_df.plot.kde() pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() #annual box - plt.title(Clim_var_type + ' - Annual means for one model') - Fdf_1900_2080_annual.iloc[:,[1,13,28]].boxplot(rot=90) + plt.title(Clim_var_type + ' - Annual means/sums for max diff model') + Fdf_1900_2080_annual.boxplot(rot=90) + pdf.savefig(bbox_inches='tight', pad_inches=0.4) + plt.close() + #monthly box + plt.title(Clim_var_type + ' - Monthly means/sums') + Fdf_1900_2080_monthly.boxplot(rot=90) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() #annual box - plt.title(Clim_var_type + ' - Annual means for one model B') - Fdf_1900_2080_annual.iloc[:,[3,18,30]].boxplot(rot=90) + plt.title(Clim_var_type + ' - Monthly means/sums for min diff model') + Fdf_1900_2080_monthly.filter(regex= Min_dif_mod_name[:-5]).boxplot(rot=90) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() #annual box - plt.title(Clim_var_type + ' - Annual means for one model C') - Fdf_1900_2080_annual.iloc[:,[8,17,26]].boxplot(rot=90) + plt.title(Clim_var_type + ' - Monthly means/sums for median diff model') + Fdf_1900_2080_monthly.filter(regex= Med_dif_mod_name[:-5]).boxplot(rot=90) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() - #monthly box - plt.title(Clim_var_type + ' - Monthly means') - Fdf_1900_2080_monthly.boxplot(rot=90) + #annual box + plt.title(Clim_var_type + ' - Monthly means/sums for max diff model') + Fdf_1900_2080_monthly.filter(regex= Max_dif_mod_name[:-5]).boxplot(rot=90) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() #weekly box - plt.title(Clim_var_type + ' - Weekly means') + plt.title(Clim_var_type + ' - Weekly means/sums') Fdf_1900_2080_weekly.boxplot(rot=90) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() #daily box - plt.title(Clim_var_type + ' - Daily means') + plt.title(Clim_var_type + ' - Daily means/sums') Fdf_1900_2080.boxplot(rot=90) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() # time series plot annual ALL models - plt.title(Clim_var_type + ' - Time series - all models') - Fdf_1900_2080_annual.plot(legend=False) - pdf.savefig(bbox_inches='tight', pad_inches=0.4) - plt.close() - # plt.title(Clim_var_type + ' - Time series - representative models') - Fdf_annual_sorted_subset.plot(legend=False) + dfall.plot(legend=False) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() + # seasonal mean boxplots ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) - plt.title(Clim_var_type + ' - DJF Summer means') + plt.title(Clim_var_type + ' - DJF Summer means/sums') Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean().plot(kind='bar', ylim=(ymin,ymax)) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() - plt.title(Clim_var_type + ' - DJF Summer means') + plt.title(Clim_var_type + ' - DJF Summer means/sums') Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].boxplot(rot=90) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()) ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()) - plt.title(Clim_var_type + ' - MAM Autumn means') + plt.title(Clim_var_type + ' - MAM Autumn means/sums') Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean().plot(kind='bar', ylim=(ymin,ymax)) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() - plt.title(Clim_var_type + ' - MAM Autumn means') + plt.title(Clim_var_type + ' - MAM Autumn means/sums') Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].boxplot(rot=90) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()) ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()) - plt.title(Clim_var_type + ' - JJA Winter means') + plt.title(Clim_var_type + ' - JJA Winter means/sums') Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean().plot(kind='bar', ylim=(ymin,ymax)) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() - plt.title(Clim_var_type + ' - JJA Winter means') + plt.title(Clim_var_type + ' - JJA Winter means/sums') Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].boxplot(rot=90) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()) ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()) - plt.title(Clim_var_type + ' - SON Spring means') + plt.title(Clim_var_type + ' - SON Spring means/sums') Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean().plot(kind='bar', ylim=(ymin,ymax)) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close() - plt.title(Clim_var_type + ' - SON Spring means') + plt.title(Clim_var_type + ' - SON Spring means/sums') Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].boxplot(rot=90) pdf.savefig(bbox_inches='tight', pad_inches=0.4) plt.close()