#refined the NARCLIM plotting

Development1
tinoheimhuber 7 years ago
parent bbec9f1891
commit fd19552a5f

@ -16,7 +16,9 @@ import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
from matplotlib.backends.backend_pdf import PdfPages
from ggplot import *
matplotlib.style.use('ggplot')
#
# Set working direcotry (where postprocessed NARClIM data is located)
os.chdir('C:/Users/z5025317/WRL_Postdoc/Projects/Paper#1/')
@ -27,7 +29,7 @@ os.chdir('C:/Users/z5025317/WRL_Postdoc/Projects/Paper#1/')
Base_period_start = '1990-01-01'
Base_period_end = '2080-01-01' #use last day that's not included in period as < is used for subsetting
Estuary = 'Bateman' # 'Belongil'
#Clim_var_type = 'tasmean' will create pdf for all variables in folder
Clim_var_type = "*" #will create pdf for all variables in folder
#####################################----------------------------------
#set directory path for output files
output_directory = 'Output/'+ Estuary
@ -38,33 +40,62 @@ if not os.path.exists(output_directory):
print("output directory folder didn't exist and was generated")
print('-------------------------------------------')
print('-------------------')
Clim_Var_CSVs = glob.glob('./Data/NARCLIM_Site_CSVs/' + Estuary + '/*')
Clim_Var_CSVs = glob.glob('./Data/NARCLIM_Site_CSVs/' + Estuary + '/' + Clim_var_type)
#Clim_Var_CSV = glob.glob('./Site_CSVs/' + Clim_var_type + '*' )
#read CSV file
for clim_var_csv_path in Clim_Var_CSVs:
#clim_var_csv_path = Clim_Var_CSVs[0]
#clim_var_csv_path = Clim_Var_CSVs[3]
Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
Clim_var_type = Filename.split('_', 1)[0]
print(clim_var_csv_path)
Full_df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
#pandas datestamp index to period (we don't need the 12 pm info in the index (daily periods are enough))
Full_df.index = Full_df.index.to_period('D')
Full_df = Full_df.drop(columns=['period'])
Ncols_df = len(Full_df)
#check data types of columns
#Full_df.dtypes
#substract a constant from all values (temp)
#substract a constant from all values to convert from kelvin to celcius (temp)
if Clim_var_type == 'tasmean' or Clim_var_type == 'tasmax':
Full_df = Full_df.iloc[:,0:(Ncols_df-1)]-273.15
#index the narclim periods if needed
#Full_df.loc[(Full_df.index >= '1990-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009'
#Full_df.loc[(Full_df.index >= '2020-01-01') & (Full_df.index < '2040-01-01'), 'period']= '2020-2039'
#Full_df.loc[(Full_df.index >= '2060-01-01') & (Full_df.index < '2080-01-01'), 'period']= '2060-2079'
Fdf_1900_2080 = Full_df
#Subset the data to the minimum base period and above (used to set the lenght of the present day climate period)
#Fdf_1900_2080 = Full_df.loc[(Full_df.index >= Base_period_start) & (Full_df.index < Base_period_end)] # not necessary if not using reanalysis models for base period
Fdf_1900_2080 = Full_df.drop(columns=['period'])
Ncols_df = len(Fdf_1900_2080)
#Select the 3 most representative models (min med and max difference betwen far future and present)
Fdf_1900_2080_sorted = Fdf_1900_2080.reindex_axis(sorted(Fdf_1900_2080.columns), axis=1)
Fdf_1900_2080_sorted_means = pd.DataFrame(Fdf_1900_2080_sorted.mean())
df = Fdf_1900_2080_sorted_means
#add a simple increasing integer index
df = df.reset_index()
df= df[df.index % 3 != 1]
df['C'] = df[0].diff()
df = df.reset_index()
df= df[df.index % 2 != 0]
#get max difference model (difference between far future and prsent day)
a = df[df.index == df['C'].argmax(skipna=True)]
Max_dif_mod_name = a.iloc[0]['index']
#get min difference model
a = df[df.index == df['C'].argmin(skipna=True)]
Min_dif_mod_name = a.iloc[0]['index']
#get the model which difference is closest to the median difference
df['D'] = abs(df['C']- df['C'].median())
a = df[df.index == df['D'].argmin(skipna=True)]
Med_dif_mod_name = a.iloc[0]['index']
#data frame with min med and max difference model
df2 = Fdf_1900_2080.filter(regex= Min_dif_mod_name[:-5] + '|' + Med_dif_mod_name[:-5] + '|' + Max_dif_mod_name[:-5] )
dfall = df2.reindex_axis(sorted(df2.columns), axis=1)
#data frame with individual models
dfmin = Fdf_1900_2080.filter(regex= Min_dif_mod_name[:-5])
dfmax = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
dfmed = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
# use only the 3 representative models for the analysis
Fdf_1900_2080_all_mods = Fdf_1900_2080
Fdf_1900_2080 = dfall
#Aggregate daily df to annual time series
if (Clim_var_type == 'pracc' or Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean'
or Clim_var_type == 'pr1Hmaxtstep' or Clim_var_type == 'wss1Hmaxtstep'):
@ -81,6 +112,7 @@ for clim_var_csv_path in Clim_Var_CSVs:
Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').mean()
Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').mean()
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').mean() #seasonal means
#plot the mean of all model runs
print('-------------------------------------------')
print('mean of all models for climate variable: ' + Clim_var_type)
@ -88,60 +120,39 @@ for clim_var_csv_path in Clim_Var_CSVs:
Fdf_1900_2080_means.plot(kind='bar').figure
print('-------------------------------------------')
#time series plots:
#ranks = Fdf_1900_2080_means[3:].rank(axis=0)
df3 = Fdf_1900_2080_annual
Fdf_annual_sorted = df3.reindex_axis(df3.mean().sort_values().index, axis=1)
Fdf_annual_sorted_subset = Fdf_annual_sorted.iloc[:,[0,1,2,3,5,7,13,15,18, 22, 24,25,30,33]]
#write the key plots to a single pdf document
pdf_out_file_name = Clim_var_type + '_start_' + Base_period_start + '_NARCliM_summary4.pdf'
pdf_out_path = output_directory +'/' + pdf_out_file_name
#open pdf and add the plots
#developement
#I want to create a simple density plot for all values in present day, near future and far future
#this should be an easy indicator of change
#subset present day simulations
#create a dataframe that has a single column for present day, near and far future for the (3 selected models)
len(Fdf_1900_2080.columns)
Full_current_df = Fdf_1900_2080.iloc[:,range(0,12)]
Full_current_df = Fdf_1900_2080.iloc[:,range(0,3)]
Full_current_df = Full_current_df.stack()
#nearfuture
Full_nearfuture_df = Fdf_1900_2080.iloc[:,range(12,24)]
Full_nearfuture_df = Fdf_1900_2080.iloc[:,range(3,6)]
Full_nearfuture_df = Full_nearfuture_df.stack()
#farfuture
Full_farfuture_df = Fdf_1900_2080.iloc[:,range(24,len(Fdf_1900_2080.columns))]
Full_farfuture_df = Fdf_1900_2080.iloc[:,range(6,len(Fdf_1900_2080.columns))]
Full_farfuture_df = Full_farfuture_df.stack()
Summarized_df = pd.concat([Full_current_df, Full_nearfuture_df], axis=1, ignore_index=True)
Summarized_df = pd.concat([Summarized_df, Full_farfuture_df], axis=1, ignore_index=True)
Summarized_df.columns = ['presentday', 'nearfuture', 'farfuture']
plt.title(Clim_var_type + ' - density comparison - full period')
Summarized_df.plot.kde()
Summarized_df.boxplot(rot=90)
#getting to more refined and meaningful plots
Fdf_1900_2080_sorted = Fdf_1900_2080.reindex_axis(sorted(Fdf_1900_2080.columns), axis=1)
Fdf_1900_2080_sorted_means = pd.DataFrame(Fdf_1900_2080_sorted.mean())
df = Fdf_1900_2080_sorted_means
#create a dataframe that has 1 column for each of the three representative models
df = df.reset_index()
df= df[df.index % 3 != 1]
df['C'] = df[0].diff()
a= df[df.index == df['C'].argmax(skipna=True)]
a['index']
df.iloc[[df['C'].argmax(skipna=True)]]
df['C'].argmax(skipna=True)
df['C'].argmin(skipna=True)
df['C'].argmean(skipna=True)
df[df['C']==df['C'].median()]
max(df['C'])
Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009'
Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039'
Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079'
dfa = Fdf_1900_2080_annual.iloc[:,[0]]
dfa1 = Fdf_1900_2080_annual.iloc[:,[0,3,6]].loc[(Fdf_1900_2080_annual.index >= '1990') & (Fdf_1900_2080_annual.index <= '2009')]
dfa1.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
dfa2 = Fdf_1900_2080_annual.iloc[:,[1,4,7]].loc[(Fdf_1900_2080_annual.index >= '2020') & (Fdf_1900_2080_annual.index <= '2039')]
dfa2.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
dfa3 = Fdf_1900_2080_annual.iloc[:,[2,5,8]].loc[(Fdf_1900_2080_annual.index >= '2060') & (Fdf_1900_2080_annual.index <= '2079')]
dfa3.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
dfall = dfa1.append(dfa2).append(dfa3)
Fdf_1900_2080_sorted_means.plot(kind='bar').figure
MIROC_R2_df = Fdf_1900_2080.iloc[:,[1,13,28]]
#end of developement
#write the key plots to a single pdf document
pdf_out_file_name = Clim_var_type + '_start_' + Base_period_start + '_NARCliM_summary2.pdf'
pdf_out_path = output_directory +'/' + pdf_out_file_name
#open pdf and add the plots
with PdfPages(pdf_out_path) as pdf:
#barplot of model means
plt.title(Clim_var_type + ' - model means - full period')
@ -151,88 +162,89 @@ for clim_var_csv_path in Clim_Var_CSVs:
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
#full period density comparison
plt.title(Clim_var_type + ' - density comparison - full period - one model')
MIROC_R2_df.plot.kde()
plt.title(Clim_var_type + ' - density comparison - full period - all models')
Summarized_df.plot.kde()
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
#annual box
plt.title(Clim_var_type + ' - Annual means for one model')
Fdf_1900_2080_annual.iloc[:,[1,13,28]].boxplot(rot=90)
plt.title(Clim_var_type + ' - Annual means/sums for max diff model')
Fdf_1900_2080_annual.boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
#monthly box
plt.title(Clim_var_type + ' - Monthly means/sums')
Fdf_1900_2080_monthly.boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
#annual box
plt.title(Clim_var_type + ' - Annual means for one model B')
Fdf_1900_2080_annual.iloc[:,[3,18,30]].boxplot(rot=90)
plt.title(Clim_var_type + ' - Monthly means/sums for min diff model')
Fdf_1900_2080_monthly.filter(regex= Min_dif_mod_name[:-5]).boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
#annual box
plt.title(Clim_var_type + ' - Annual means for one model C')
Fdf_1900_2080_annual.iloc[:,[8,17,26]].boxplot(rot=90)
plt.title(Clim_var_type + ' - Monthly means/sums for median diff model')
Fdf_1900_2080_monthly.filter(regex= Med_dif_mod_name[:-5]).boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
#monthly box
plt.title(Clim_var_type + ' - Monthly means')
Fdf_1900_2080_monthly.boxplot(rot=90)
#annual box
plt.title(Clim_var_type + ' - Monthly means/sums for max diff model')
Fdf_1900_2080_monthly.filter(regex= Max_dif_mod_name[:-5]).boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
#weekly box
plt.title(Clim_var_type + ' - Weekly means')
plt.title(Clim_var_type + ' - Weekly means/sums')
Fdf_1900_2080_weekly.boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
#daily box
plt.title(Clim_var_type + ' - Daily means')
plt.title(Clim_var_type + ' - Daily means/sums')
Fdf_1900_2080.boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
# time series plot annual ALL models
plt.title(Clim_var_type + ' - Time series - all models')
Fdf_1900_2080_annual.plot(legend=False)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
#
plt.title(Clim_var_type + ' - Time series - representative models')
Fdf_annual_sorted_subset.plot(legend=False)
dfall.plot(legend=False)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
# seasonal mean boxplots
ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean())
ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean())
plt.title(Clim_var_type + ' - DJF Summer means')
plt.title(Clim_var_type + ' - DJF Summer means/sums')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean().plot(kind='bar', ylim=(ymin,ymax))
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
plt.title(Clim_var_type + ' - DJF Summer means')
plt.title(Clim_var_type + ' - DJF Summer means/sums')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean())
ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean())
plt.title(Clim_var_type + ' - MAM Autumn means')
plt.title(Clim_var_type + ' - MAM Autumn means/sums')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean().plot(kind='bar', ylim=(ymin,ymax))
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
plt.title(Clim_var_type + ' - MAM Autumn means')
plt.title(Clim_var_type + ' - MAM Autumn means/sums')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean())
ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean())
plt.title(Clim_var_type + ' - JJA Winter means')
plt.title(Clim_var_type + ' - JJA Winter means/sums')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean().plot(kind='bar', ylim=(ymin,ymax))
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
plt.title(Clim_var_type + ' - JJA Winter means')
plt.title(Clim_var_type + ' - JJA Winter means/sums')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean())
ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean())
plt.title(Clim_var_type + ' - SON Spring means')
plt.title(Clim_var_type + ' - SON Spring means/sums')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean().plot(kind='bar', ylim=(ymin,ymax))
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()
plt.title(Clim_var_type + ' - SON Spring means')
plt.title(Clim_var_type + ' - SON Spring means/sums')
Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.close()

Loading…
Cancel
Save