From b2fc7135d9ccbabb94c94fdda0f4b5bce7487981 Mon Sep 17 00:00:00 2001
From: tinoheimhuber <tinoheimhuber@gmail.com>
Date: Thu, 3 May 2018 18:16:26 +1000
Subject: [PATCH] #next step for NACRLIM cc deviation plots

---
 ...RCliM_First_Pass_variab_deviation_plots.py | 187 +++++++++
 Analysis/Code/P1_NARCliM_plots_Windows.py     | 378 +++++++++---------
 2 files changed, 383 insertions(+), 182 deletions(-)
 create mode 100644 Analysis/Code/P1_NARCliM_First_Pass_variab_deviation_plots.py

diff --git a/Analysis/Code/P1_NARCliM_First_Pass_variab_deviation_plots.py b/Analysis/Code/P1_NARCliM_First_Pass_variab_deviation_plots.py
new file mode 100644
index 0000000..a451f73
--- /dev/null
+++ b/Analysis/Code/P1_NARCliM_First_Pass_variab_deviation_plots.py
@@ -0,0 +1,187 @@
+# -*- coding: utf-8 -*-
+#####################################----------------------------------
+#Last Updated - March 2018
+#@author: z5025317 Valentin Heimhuber
+#code for creating climate prioritization plots for NARCLIM variables. 
+#Inputs: Uses CSV files that contain all 12 NARCLIM model runs time series for 1 grid cell created with: P1_NARCliM_NC_to_CSV_CCRC_SS.py
+#####################################----------------------------------
+#Load packages
+#####################################----------------------------------
+import numpy as np
+import os
+import pandas as pd
+import glob
+import matplotlib
+import matplotlib.pyplot as plt
+from datetime import datetime
+from datetime import timedelta
+from matplotlib.backends.backend_pdf import PdfPages
+from ggplot import *
+matplotlib.style.use('ggplot')
+#plt.rcParams.update(plt.rcParamsDefault)
+#
+# Set working direcotry (where postprocessed NARClIM data is located)
+os.chdir('C:/Users/z5025317/WRL_Postdoc/Projects/Paper#1/')
+#
+#####################################----------------------------------
+#set input parameters
+Base_period_start = '1986-01-01'
+Base_period_end = '2005-01-01' #use last day that's not included in period as < is used for subsetting
+Estuary = 'Tweed' # 'Belongil'
+Clim_var_type  = "*"   #  '*' will create pdf for all variables in folder  
+Clim_var_type  = "tasmax*"   #  '*' will create pdf for all variables in folder  
+Present_Day_Clim_Var = 'Rainfall'    
+#####################################----------------------------------
+
+#set directory path for output files
+output_directory = 'Output/Clim_Deviation_Plots/'+ Estuary
+#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
+if not os.path.exists(output_directory):
+    os.makedirs(output_directory)
+    print('-------------------------------------------')
+    print("output directory folder didn't exist and was generated")
+    print('-------------------------------------------')
+    print('-------------------')
+Clim_Var_CSVs = glob.glob('./Output/' + Estuary + '/' + Estuary + '_' + Clim_var_type + '*')
+#read CSV file
+for clim_var_csv_path in Clim_Var_CSVs:
+    Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
+    Clim_var_type = Filename.split('_', 2)[1]
+    print(Clim_var_type)
+    Ensemble_Delta_full_df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
+    #Ensemble_Delta_full_df = pd.to_numeric(Ensemble_Delta_full_df)
+    
+    #load present day climate data for the same variable
+    Present_day_Var_CSV = glob.glob('./Data/Wheather_Station_Data/**/' + Estuary + '_' +  Present_Day_Clim_Var + '*csv')
+    Present_day_df = pd.read_csv(Present_day_Var_CSV[0])
+    if Clim_var_type == 'evspsblmean':
+        Dates = pd.to_datetime(Present_day_df.Date)
+        Present_day_df.index = Dates
+        Present_day_df = Present_day_df.iloc[:,1]
+        Present_day_df = Present_day_df.replace(r'\s+', np.nan, regex=True)
+        Present_day_df = pd.to_numeric(Present_day_df)
+        
+    else:   
+        Dates = pd.to_datetime(Present_day_df.Year*10000+Present_day_df.Month*100+Present_day_df.Day,format='%Y%m%d')
+        Present_day_df.index = Dates
+        Present_day_df = Present_day_df.iloc[:,5]
+    #create seasonal sums etc. 
+    if (Clim_var_type == 'pracc' or Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean' 
+    or Clim_var_type == 'pr1Hmaxtstep' or Clim_var_type == 'wss1Hmaxtstep'):
+        Present_day_df_annual = Present_day_df.resample('A').sum()
+        Present_day_df_annual = Present_day_df_annual.replace(0, np.nan)
+        Present_day_df_monthly = Present_day_df.resample('M').sum()
+        Present_day_df_monthly = Present_day_df_monthly.replace(0, np.nan)
+        Present_day_df_weekly = Present_day_df.resample('W').sum()
+        Present_day_df_weekly = Present_day_df_weekly.replace(0, np.nan)
+        Fdf_Seas_means = Present_day_df.resample('Q-NOV').sum() #seasonal means
+        Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
+    else:    
+        Present_day_df_annual = Present_day_df.resample('A').mean()
+        Present_day_df_monthly = Present_day_df.resample('M').mean()
+        Present_day_df_weekly = Present_day_df.resample('W').mean()
+        Fdf_Seas_means = Present_day_df.resample('Q-NOV').mean() #seasonal means
+    
+    #Loop through annual and seasons and create a deviation plot for each. 
+    times = ['annual', 'DJF', 'MAM', 'JJA','SON']
+    fig = plt.figure(figsize=(14,8))
+    delta_all_df = pd.DataFrame()
+    i=1
+    for temp in times:
+        #subset the ensemble dataframe for the period used:
+        if temp == 'annual':
+            Ensemble_Delta_df = Ensemble_Delta_full_df.iloc[:,range(0,2)]
+            #
+            Present_Day_ref_df = Present_day_df_annual
+        else:
+            Ensemble_Delta_df = Ensemble_Delta_full_df.filter(regex= temp)
+            Ensemble_Delta_df.columns = ['near', 'far']
+            if temp == 'DJF':
+                Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==1]
+            Column_names = ['DJF_near', 'DJF_far']
+            if temp == 'MAM':
+                Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==2]
+            Column_names = ['MAM_near', 'MAM_far']
+            if temp == 'JJA':
+                Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==3]
+            Column_names = ['JJA_near', 'JJA_far']
+            if temp == 'SON':
+                Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==4]
+            Present_Day_ref_df = Mean_df
+        print(Ensemble_Delta_df.columns.values)
+        #Subset to present day variability period
+        Present_Day_ref_df = pd.DataFrame(Present_Day_ref_df.loc[(Present_Day_ref_df.index >= Base_period_start) & (Present_Day_ref_df.index <= Base_period_end)])
+        Present_Day_Mean = np.percentile(Present_Day_ref_df, 50)
+        Present_Day_SD = np.std(Present_Day_ref_df)
+        #create data frame for floating stacked barplots 
+        index=['-2std', '-1std', 'Med', '1std', '2std']
+        columns = ['present','near future', 'far future']
+        Plot_in_df = pd.DataFrame(index=index, columns =columns)
+        #
+        Plot_in_df['present'] = [float(Present_Day_Mean-2*Present_Day_SD),float(Present_Day_Mean-Present_Day_SD), float(Present_Day_Mean),
+                float(Present_Day_Mean+Present_Day_SD), float(Present_Day_Mean+2*Present_Day_SD)]
+        Plot_in_df['near future'] = [float(Present_Day_Mean + Ensemble_Delta_df.near[-3:][0]),np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.near[-3:][1]), 
+               np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.near[-3:][2])]
+        Plot_in_df['far future'] = [float(Present_Day_Mean + Ensemble_Delta_df.far[-3:][0]),np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.far[-3:][1]), 
+               np.NaN, float(Present_Day_Mean + Ensemble_Delta_df.far[-3:][2])]
+        #Create a second data frame that has the values in a way that they can be stacked up in bars with the correct absolute values
+        Plot_in_df2 = pd.DataFrame(index=index, columns =columns )
+        
+        Plot_in_df2['present'] = [float(Present_Day_Mean-2*Present_Day_SD),float(Present_Day_SD), float(Present_Day_SD),
+                float(Present_Day_SD), float(Present_Day_SD)]
+        Plot_in_df2['near future'] = [float(Present_Day_Mean + Ensemble_Delta_df.near[-3:][0]),np.NaN, float(Ensemble_Delta_df.near[-3:][1]-Ensemble_Delta_df.near[-3:][0]), 
+              np.NaN, float(Ensemble_Delta_df.near[-3:][2]-Ensemble_Delta_df.near[-3:][1])]
+        Plot_in_df2['far future'] = [float(Present_Day_Mean + Ensemble_Delta_df.far[-3:][0]),np.NaN, float(Ensemble_Delta_df.far[-3:][1]-Ensemble_Delta_df.far[-3:][0]), 
+               np.NaN, float(Ensemble_Delta_df.far[-3:][2]-Ensemble_Delta_df.far[-3:][1])]
+        #transpose the data frame
+        Plot_in_df_tp = Plot_in_df2.transpose()
+        #do the individual plots
+        if temp == 'annual':
+            xmin = int(min(Plot_in_df.min(axis=1))-1)
+            xmax = int(max(Plot_in_df.max(axis=1))+2)
+        else:
+            xmin = int(min(Plot_in_df.min(axis=1))-1)
+            xmax = int(max(Plot_in_df.max(axis=1))+2)
+        #define colour scheme
+        #likert_colors = ['none', 'firebrick','firebrick','lightcoral','lightcoral']
+        likert_colors = ['none', 'darkblue', 'darkblue','cornflowerblue','cornflowerblue']
+        #plot the stacked barplot
+        ax=plt.subplot(2,3,i) 
+        Plot_in_df_tp.plot.bar(stacked=True, color=likert_colors, edgecolor='none', legend=False, ax=ax)
+        z = plt.axhline(float(Present_Day_Mean-2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
+        z.set_zorder(-1)
+        z = plt.axhline(float(Present_Day_Mean+2*Present_Day_SD), linestyle='-', color='black', alpha=.5)
+        z.set_zorder(-1)
+        z = plt.axhline(float(Present_Day_Mean-Present_Day_SD), linestyle='--', color='black', alpha=.5)
+        z.set_zorder(-1)
+        z = plt.axhline(float(Present_Day_Mean+Present_Day_SD), linestyle='--', color='black', alpha=.5)
+        z.set_zorder(-1)
+        plt.ylim(xmin, xmax)
+        plt.title(Clim_var_type + ' ' + temp )
+        ax.grid(False)
+        for tick in ax.get_xticklabels():
+            tick.set_rotation(0)
+        fig.tight_layout()
+        #reset i to i+1 for next step
+        if temp == 'MAM':
+            i=i+2
+        else:
+            i=i+1
+        print(i)
+    plt.show()    
+    out_file_name = Estuary  + '_' + Clim_var_type + '_CC_prio_plot.png'
+    out_path = output_directory + '/' + out_file_name
+    fig.savefig(out_path)
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
\ No newline at end of file
diff --git a/Analysis/Code/P1_NARCliM_plots_Windows.py b/Analysis/Code/P1_NARCliM_plots_Windows.py
index 7530c1e..40f8638 100644
--- a/Analysis/Code/P1_NARCliM_plots_Windows.py
+++ b/Analysis/Code/P1_NARCliM_plots_Windows.py
@@ -28,9 +28,10 @@ os.chdir('C:/Users/z5025317/WRL_Postdoc/Projects/Paper#1/')
 #set input parameters
 Base_period_start = '1990-01-01'
 Base_period_end = '2080-01-01' #use last day that's not included in period as < is used for subsetting
-Estuary = 'Bateman' # 'Belongil'
-Clim_var_type  = "*"   #will create pdf for all variables in folder 
-subset_ensemble = 'no' # is yes, only the model with the lowest, median and max difference between present day and far future are selected         
+Estuary = 'Terrigal' # 'Belongil'
+Clim_var_type  = "pracc*|tasmax*"   #  '*' will create pdf for all variables in folder 
+subset_ensemble = 'yes' # is yes, only the model with the lowest, median and max difference between present day and far future are selected         
+plot_pdf = 'no'
 #####################################----------------------------------
 
 #set directory path for output files
@@ -46,7 +47,7 @@ Clim_Var_CSVs = glob.glob('./Data/NARCLIM_Site_CSVs/' + Estuary + '/' + Clim_var
 #Clim_Var_CSV = glob.glob('./Site_CSVs/' + Clim_var_type + '*' )
 #read CSV file
 for clim_var_csv_path in Clim_Var_CSVs:
-    #clim_var_csv_path = Clim_Var_CSVs[3]
+    #clim_var_csv_path = Clim_Var_CSVs[0]
     Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
     Clim_var_type = Filename.split('_', 1)[0]
     print(clim_var_csv_path)
@@ -62,10 +63,34 @@ for clim_var_csv_path in Clim_Var_CSVs:
     if Clim_var_type == 'tasmean' or Clim_var_type == 'tasmax':
         Full_df = Full_df.iloc[:,0:(Ncols_df-1)]-273.15
     Fdf_1900_2080 = Full_df
-    
+
     #Subset the data to the minimum base period and above (used to set the lenght of the present day climate period)
     #Fdf_1900_2080 = Full_df.loc[(Full_df.index >= Base_period_start) & (Full_df.index < Base_period_end)] # not necessary if not using reanalysis models for base period
-
+    
+    #Aggregate daily df to annual time series
+    if (Clim_var_type == 'pracc' or Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean' 
+    or Clim_var_type == 'pr1Hmaxtstep' or Clim_var_type == 'wss1Hmaxtstep'):
+        Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').sum()
+        Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
+        Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').sum()
+        Fdf_1900_2080_monthly = Fdf_1900_2080_monthly.replace(0, np.nan)
+        Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').sum()
+        Fdf_1900_2080_weekly = Fdf_1900_2080_weekly.replace(0, np.nan)
+        Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').sum() #seasonal means
+        Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
+    else:    
+        Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').mean()
+        Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').mean()
+        Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').mean()
+        Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').mean() #seasonal means
+    
+    #plot the mean of all model runs
+    print('-------------------------------------------')
+    print('mean of all models for climate variable:  ' + Clim_var_type)
+    Fdf_1900_2080_means = Fdf_1900_2080.mean()
+    Fdf_1900_2080_means.plot(kind='bar').figure
+    print('-------------------------------------------')
+    
     if subset_ensemble == 'yes':
         #Select the 3 most representative models (min med and max difference betwen far future and present)
         Fdf_1900_2080_sorted = Fdf_1900_2080.reindex_axis(sorted(Fdf_1900_2080.columns), axis=1)
@@ -96,67 +121,72 @@ for clim_var_csv_path in Clim_Var_CSVs:
         dfmed = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
         # use only the 3 representative models for the analysis
         Fdf_1900_2080_all_mods = Fdf_1900_2080
-        Fdf_1900_2080 = dfall
-    
-    #Aggregate daily df to annual time series
-    if (Clim_var_type == 'pracc' or Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean' 
-    or Clim_var_type == 'pr1Hmaxtstep' or Clim_var_type == 'wss1Hmaxtstep'):
-        Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').sum()
-        Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
-        Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').sum()
-        Fdf_1900_2080_monthly = Fdf_1900_2080_monthly.replace(0, np.nan)
-        Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').sum()
-        Fdf_1900_2080_weekly = Fdf_1900_2080_weekly.replace(0, np.nan)
-        Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').sum() #seasonal means
-        Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
-    else:    
-        Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').mean()
-        Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').mean()
-        Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').mean()
-        Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').mean() #seasonal means
-    
-    #plot the mean of all model runs
-    print('-------------------------------------------')
-    print('mean of all models for climate variable:  ' + Clim_var_type)
-    Fdf_1900_2080_means = Fdf_1900_2080.mean()
-    Fdf_1900_2080_means.plot(kind='bar').figure
-    print('-------------------------------------------')
-    
-    #Create Deltas of average change
-    models = list(Fdf_1900_2080_means.index)
-    newmodel = []
-    type(newmodel)
-    for each in models:
-        newmodel.append(each[:-5])
-    unique_models = set(newmodel)
-    # calculate diff for each unique model
-    delta_NF_ensemble = []
-    delta_FF_ensemble = []
-    for unique_model in unique_models:
-        dfdiff = Fdf_1900_2080_means.filter(regex= unique_model)
-        type(dfdiff)
-        delta_NF = dfdiff[1] - dfdiff[0]
-        delta_NF_ensemble.append(delta_NF)
-        delta_FF = dfdiff[2] - dfdiff[1]
-        delta_FF_ensemble.append(delta_FF)
-     
-    np.percentile(delta_NF, 50)
-    delta_df
-    delta_df1=pd.DataFrame(delta_NF_ensemble, index=unique_models)
-    delta_df2=pd.DataFrame(delta_FF_ensemble, index=unique_models)
-    delta_df=pd.concat([delta_df1, delta_df2], axis=1)
-    
-    delta_df.plot(kind='box').figure
-    
-    pd.DataFrame()
-    concat([Full_df, GCM_df], axis=1)
+        #create a dataframe that has 1 column for each of the three representative models
+#        Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009'
+#        Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039'
+#        Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079'
+        dfa = Fdf_1900_2080_annual.iloc[:,[0]]
+        dfa1 = Fdf_1900_2080_annual.iloc[:,[0,3,6]].loc[(Fdf_1900_2080_annual.index >= '1990') & (Fdf_1900_2080_annual.index <= '2009')]
+        dfa1.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
+        dfa2 = Fdf_1900_2080_annual.iloc[:,[1,4,7]].loc[(Fdf_1900_2080_annual.index >= '2020') & (Fdf_1900_2080_annual.index <= '2039')]
+        dfa2.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
+        dfa3 = Fdf_1900_2080_annual.iloc[:,[2,5,8]].loc[(Fdf_1900_2080_annual.index >= '2060') & (Fdf_1900_2080_annual.index <= '2079')]
+        dfa3.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
+        dfall_annual = dfa1.append(dfa2).append(dfa3)
         
-     delta_df ensemble.plot(kind='bar')  
+    #Create Deltas of average change for annual and seasonal basis
+    times = ['annual', 'DJF', 'MAM', 'JJA','SON']
+    delta_all_df = pd.DataFrame()
+    for temp in times:
+        if temp == 'annual':
+            Mean_df = Fdf_1900_2080_annual.mean()
+            Column_names = ['near', 'far']
+        if temp == 'DJF':
+            Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()
+            Column_names = ['DJF_near', 'DJF_far']
+        if temp == 'MAM':
+            Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()
+            Column_names = ['MAM_near', 'MAM_far']
+        if temp == 'JJA':
+            Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()
+            Column_names = ['JJA_near', 'JJA_far']
+        if temp == 'SON':
+            Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()
+            Column_names = ['SON_near', 'SON_far']
+        models = list(Fdf_1900_2080_means.index)
+        newmodel = []
+        type(newmodel)
+        for each in models:
+            newmodel.append(each[:-5])
+        unique_models = set(newmodel)
+        # calculate diff for each unique model
+        delta_NF_ensemble = []
+        delta_FF_ensemble = []
+        for unique_model in unique_models:
+            dfdiff = Mean_df.filter(regex= unique_model)
+            type(dfdiff)
+            delta_NF = dfdiff[1] - dfdiff[0]
+            delta_NF_ensemble.append(delta_NF)
+            delta_FF = dfdiff[2] - dfdiff[1]
+            delta_FF_ensemble.append(delta_FF)
+    
+        delta_df1=pd.DataFrame(delta_NF_ensemble, index=unique_models)
+        delta_df2=pd.DataFrame(delta_FF_ensemble, index=unique_models)
+        delta_df= pd.concat([delta_df1, delta_df2], axis=1)
         
-        dfmax = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
-        dfmed = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
+        #rename columns
+        delta_df.columns = Column_names
+        #add a row with medians and 10 and 90th percentiles
+        delta_df.loc['10th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 10), Column_names[1]:np.percentile(delta_df[Column_names[1]], 10)})
+        delta_df.loc['median'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 50), Column_names[1]:np.percentile(delta_df[Column_names[1]], 50)})
+        delta_df.loc['90th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 90), Column_names[1]:np.percentile(delta_df[Column_names[1]], 90)})
+        #append df to overall df
+        delta_all_df = pd.concat([delta_all_df, delta_df], axis=1)
+
+    out_file_name = Estuary  + '_' + Clim_var_type + '_NARCliM_ensemble_changes.csv'
+    out_path = output_directory + '/' + out_file_name
+    delta_all_df.to_csv(out_path)
 
-    
     #create a dataframe that has a single column for present day, near and far future for the (3 selected models)
     len(Fdf_1900_2080.columns)
     Full_current_df = Fdf_1900_2080.iloc[:,range(0,3)]
@@ -169,122 +199,106 @@ for clim_var_csv_path in Clim_Var_CSVs:
     Full_farfuture_df = Full_farfuture_df.stack()
     Summarized_df = pd.concat([Full_current_df, Full_nearfuture_df], axis=1, ignore_index=True)
     Summarized_df = pd.concat([Summarized_df, Full_farfuture_df], axis=1, ignore_index=True)
+    Summarized_df.columns = ['present', 'near', 'far']
     
-    #create a dataframe that has 1 column for each of the three representative models
-    
-    Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009'
-    Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039'
-    Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079'
-
-    dfa = Fdf_1900_2080_annual.iloc[:,[0]]
-    dfa1 = Fdf_1900_2080_annual.iloc[:,[0,3,6]].loc[(Fdf_1900_2080_annual.index >= '1990') & (Fdf_1900_2080_annual.index <= '2009')]
-    dfa1.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
-    dfa2 = Fdf_1900_2080_annual.iloc[:,[1,4,7]].loc[(Fdf_1900_2080_annual.index >= '2020') & (Fdf_1900_2080_annual.index <= '2039')]
-    dfa2.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
-    dfa3 = Fdf_1900_2080_annual.iloc[:,[2,5,8]].loc[(Fdf_1900_2080_annual.index >= '2060') & (Fdf_1900_2080_annual.index <= '2079')]
-    dfa3.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
-    dfall = dfa1.append(dfa2).append(dfa3)
-    
-    #write the key plots to a single pdf document
-    pdf_out_file_name = Clim_var_type + '_start_' + Base_period_start + '_NARCliM_summary_B.pdf'
-    pdf_out_path = output_directory +'/' + pdf_out_file_name
-    
-    #open pdf and add the plots
-    with PdfPages(pdf_out_path) as pdf:
-        #barplot of model means
-        plt.title(Clim_var_type + ' -  model means - full period')
-        ymin = min(Fdf_1900_2080_means) 
-        ymax = max(Fdf_1900_2080_means) 
-        Fdf_1900_2080_means.plot(kind='bar', ylim=(ymin,ymax))
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-        plt.close()
-        #full period density comparison
-        plt.title(Clim_var_type + ' -  density comparison - full period - all models')
-        Summarized_df.plot.kde()
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-        plt.close()
-        #annual box
-        plt.title(Clim_var_type + ' -  Annual means/sums for max diff model')
-        Fdf_1900_2080_annual.boxplot(rot=90)
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-        plt.close()
-        #monthly box
-        plt.title(Clim_var_type + ' -  Monthly means/sums')
-        Fdf_1900_2080_monthly.boxplot(rot=90)
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-        plt.close()
-        #annual box
-        plt.title(Clim_var_type + ' -  Monthly means/sums for min diff model')
-        Fdf_1900_2080_monthly.filter(regex= Min_dif_mod_name[:-5]).boxplot(rot=90)
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-        plt.close()
-        #annual box
-        plt.title(Clim_var_type + ' -  Monthly means/sums for median diff model')
-        Fdf_1900_2080_monthly.filter(regex= Med_dif_mod_name[:-5]).boxplot(rot=90)
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-        plt.close()
-        #annual box
-        plt.title(Clim_var_type + ' -  Monthly means/sums for max diff model')
-        Fdf_1900_2080_monthly.filter(regex= Max_dif_mod_name[:-5]).boxplot(rot=90)
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-        plt.close()
-        #weekly box
-        plt.title(Clim_var_type + ' -  Weekly means/sums')
-        Fdf_1900_2080_weekly.boxplot(rot=90)
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-        plt.close()
-        #daily box
-        plt.title(Clim_var_type + ' -  Daily means/sums')
-        Fdf_1900_2080.boxplot(rot=90)
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-        plt.close()
-        # time series plot annual ALL models
-        plt.title(Clim_var_type + ' -  Time series - representative models')
-        dfall.plot(legend=False)
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-        plt.close()
-        
-        # seasonal mean boxplots
-        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) 
-        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) 
-        plt.title(Clim_var_type + ' -  DJF Summer means/sums')
-        Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean().plot(kind='bar', ylim=(ymin,ymax))
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-        plt.close()  
-        plt.title(Clim_var_type + ' -  DJF Summer means/sums')
-        Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].boxplot(rot=90)
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-        plt.close()
-        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()) 
-        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean())
-        plt.title(Clim_var_type + ' -  MAM Autumn means/sums')
-        Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean().plot(kind='bar', ylim=(ymin,ymax))
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-        plt.close() 
-        plt.title(Clim_var_type + ' -  MAM Autumn means/sums')
-        Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].boxplot(rot=90)
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-        plt.close()
-        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()) 
-        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean())
-        plt.title(Clim_var_type + ' -  JJA Winter means/sums')
-        Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean().plot(kind='bar', ylim=(ymin,ymax))
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-        plt.close() 
-        plt.title(Clim_var_type + ' -  JJA Winter means/sums')
-        Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].boxplot(rot=90)
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-        plt.close()
-        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()) 
-        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean())
-        plt.title(Clim_var_type + ' -  SON Spring means/sums')
-        Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean().plot(kind='bar', ylim=(ymin,ymax))
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-        plt.close() 
-        plt.title(Clim_var_type + ' -  SON Spring means/sums')
-        Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].boxplot(rot=90)
-        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-        plt.close()
-
-#plots not used  
-#Fdf_annual_sorted_subset.plot(legend=False, subplots=True)
+    #output some summary plot into pdf
+    if plot_pdf == 'yes':
+        #write the key plots to a single pdf document
+        pdf_out_file_name = Clim_var_type + '_start_' + Base_period_start + '_NARCliM_summary_3.pdf'
+        pdf_out_path = output_directory +'/' + pdf_out_file_name
+        #open pdf and add the plots
+        with PdfPages(pdf_out_path) as pdf:
+            #barplot of model means
+            plt.title(Clim_var_type + ' -  model means - full period')
+            ymin = min(Fdf_1900_2080_means) 
+            ymax = max(Fdf_1900_2080_means) 
+            Fdf_1900_2080_means.plot(kind='bar', ylim=(ymin,ymax))
+            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+            plt.close()
+            #full period density comparison
+            plt.title(Clim_var_type + ' -  density comparison - full period - all models')
+            Summarized_df.plot.kde()
+            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+            plt.close()
+            #annual box
+            plt.title(Clim_var_type + ' -  Annual means/sums for max diff model')
+            Fdf_1900_2080_annual.boxplot(rot=90)
+            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+            plt.close()
+            #monthly box
+            plt.title(Clim_var_type + ' -  Monthly means/sums')
+            Fdf_1900_2080_monthly.boxplot(rot=90)
+            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+            plt.close()
+            #annual box
+            plt.title(Clim_var_type + ' -  Monthly means/sums for min diff model')
+            Fdf_1900_2080_monthly.filter(regex= Min_dif_mod_name[:-5]).boxplot(rot=90)
+            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+            plt.close()
+            #annual box
+            plt.title(Clim_var_type + ' -  Monthly means/sums for median diff model')
+            Fdf_1900_2080_monthly.filter(regex= Med_dif_mod_name[:-5]).boxplot(rot=90)
+            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+            plt.close()
+            #annual box
+            plt.title(Clim_var_type + ' -  Monthly means/sums for max diff model')
+            Fdf_1900_2080_monthly.filter(regex= Max_dif_mod_name[:-5]).boxplot(rot=90)
+            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+            plt.close()
+            #weekly box
+            plt.title(Clim_var_type + ' -  Weekly means/sums')
+            Fdf_1900_2080_weekly.boxplot(rot=90)
+            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+            plt.close()
+            #daily box
+            plt.title(Clim_var_type + ' -  Daily means/sums')
+            Fdf_1900_2080.boxplot(rot=90)
+            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+            plt.close()
+            # time series plot annual ALL models
+            plt.title(Clim_var_type + ' -  Time series - representative models')
+            dfall_annual.plot(legend=False)
+            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+            plt.close()
+            
+            # seasonal mean boxplots
+            ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) 
+            ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) 
+            plt.title(Clim_var_type + ' -  DJF Summer means/sums')
+            Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean().plot(kind='bar', ylim=(ymin,ymax))
+            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+            plt.close()  
+            plt.title(Clim_var_type + ' -  DJF Summer means/sums')
+            Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].boxplot(rot=90)
+            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+            plt.close()
+            ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()) 
+            ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean())
+            plt.title(Clim_var_type + ' -  MAM Autumn means/sums')
+            Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean().plot(kind='bar', ylim=(ymin,ymax))
+            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+            plt.close() 
+            plt.title(Clim_var_type + ' -  MAM Autumn means/sums')
+            Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].boxplot(rot=90)
+            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+            plt.close()
+            ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()) 
+            ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean())
+            plt.title(Clim_var_type + ' -  JJA Winter means/sums')
+            Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean().plot(kind='bar', ylim=(ymin,ymax))
+            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+            plt.close() 
+            plt.title(Clim_var_type + ' -  JJA Winter means/sums')
+            Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].boxplot(rot=90)
+            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+            plt.close()
+            ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()) 
+            ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean())
+            plt.title(Clim_var_type + ' -  SON Spring means/sums')
+            Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean().plot(kind='bar', ylim=(ymin,ymax))
+            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+            plt.close() 
+            plt.title(Clim_var_type + ' -  SON Spring means/sums')
+            Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].boxplot(rot=90)
+            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+            plt.close()
\ No newline at end of file