From b2dd2855349ec5d1f494a324d1a4b7feb8af19ee Mon Sep 17 00:00:00 2001
From: tinoheimhuber <tinoheimhuber@gmail.com>
Date: Mon, 23 Apr 2018 18:29:36 +1000
Subject: [PATCH] #First successfull recreation of the existing method after
 first meeting with Alejandro di Luca. We are not comparing present day
 variability with near and far future ensemble model deltas (changes in
 20-year average climate). These changes are added to the present day median
 climate for the plots.

---
 Analysis/Code/P1_NARCliM_plots_Windows.py | 103 +++++++++++++++-------
 1 file changed, 70 insertions(+), 33 deletions(-)

diff --git a/Analysis/Code/P1_NARCliM_plots_Windows.py b/Analysis/Code/P1_NARCliM_plots_Windows.py
index 8d2432f..7530c1e 100644
--- a/Analysis/Code/P1_NARCliM_plots_Windows.py
+++ b/Analysis/Code/P1_NARCliM_plots_Windows.py
@@ -29,8 +29,10 @@ os.chdir('C:/Users/z5025317/WRL_Postdoc/Projects/Paper#1/')
 Base_period_start = '1990-01-01'
 Base_period_end = '2080-01-01' #use last day that's not included in period as < is used for subsetting
 Estuary = 'Bateman' # 'Belongil'
-Clim_var_type  = "*"   #will create pdf for all variables in folder           
+Clim_var_type  = "*"   #will create pdf for all variables in folder 
+subset_ensemble = 'no' # is yes, only the model with the lowest, median and max difference between present day and far future are selected         
 #####################################----------------------------------
+
 #set directory path for output files
 output_directory = 'Output/'+ Estuary
 #output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
@@ -64,37 +66,37 @@ for clim_var_csv_path in Clim_Var_CSVs:
     #Subset the data to the minimum base period and above (used to set the lenght of the present day climate period)
     #Fdf_1900_2080 = Full_df.loc[(Full_df.index >= Base_period_start) & (Full_df.index < Base_period_end)] # not necessary if not using reanalysis models for base period
 
-    #Select the 3 most representative models (min med and max difference betwen far future and present)
-    Fdf_1900_2080_sorted = Fdf_1900_2080.reindex_axis(sorted(Fdf_1900_2080.columns), axis=1)
-    Fdf_1900_2080_sorted_means = pd.DataFrame(Fdf_1900_2080_sorted.mean())
-    df = Fdf_1900_2080_sorted_means
-    #add a simple increasing integer index 
-    df = df.reset_index()
-    df= df[df.index % 3 != 1]
-    df['C'] = df[0].diff()
-    df = df.reset_index()
-    df= df[df.index % 2 != 0]
-    #get max difference model (difference between far future and prsent day)
-    a = df[df.index == df['C'].argmax(skipna=True)]
-    Max_dif_mod_name = a.iloc[0]['index']
-    #get min difference model
-    a = df[df.index == df['C'].argmin(skipna=True)]
-    Min_dif_mod_name = a.iloc[0]['index']
-    #get the model which difference is closest to the median difference
-    df['D'] = abs(df['C']- df['C'].median())
-    a = df[df.index == df['D'].argmin(skipna=True)]
-    Med_dif_mod_name = a.iloc[0]['index']
-    #data frame with min med and max difference model
-    df2 = Fdf_1900_2080.filter(regex= Min_dif_mod_name[:-5] + '|' +  Med_dif_mod_name[:-5] + '|' +  Max_dif_mod_name[:-5] )
-    dfall = df2.reindex_axis(sorted(df2.columns), axis=1)
-    #data frame with individual models
-    dfmin = Fdf_1900_2080.filter(regex= Min_dif_mod_name[:-5])
-    dfmax = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
-    dfmed = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
-    
-    # use only the 3 representative models for the analysis
-    Fdf_1900_2080_all_mods = Fdf_1900_2080
-    Fdf_1900_2080 = dfall
+    if subset_ensemble == 'yes':
+        #Select the 3 most representative models (min med and max difference betwen far future and present)
+        Fdf_1900_2080_sorted = Fdf_1900_2080.reindex_axis(sorted(Fdf_1900_2080.columns), axis=1)
+        Fdf_1900_2080_sorted_means = pd.DataFrame(Fdf_1900_2080_sorted.mean())
+        df = Fdf_1900_2080_sorted_means
+        #add a simple increasing integer index 
+        df = df.reset_index()
+        df= df[df.index % 3 != 1]
+        df['C'] = df[0].diff()
+        df = df.reset_index()
+        df= df[df.index % 2 != 0]
+        #get max difference model (difference between far future and prsent day)
+        a = df[df.index == df['C'].argmax(skipna=True)]
+        Max_dif_mod_name = a.iloc[0]['index']
+        #get min difference model
+        a = df[df.index == df['C'].argmin(skipna=True)]
+        Min_dif_mod_name = a.iloc[0]['index']
+        #get the model which difference is closest to the median difference
+        df['D'] = abs(df['C']- df['C'].median())
+        a = df[df.index == df['D'].argmin(skipna=True)]
+        Med_dif_mod_name = a.iloc[0]['index']
+        #data frame with min med and max difference model
+        df2 = Fdf_1900_2080.filter(regex= Min_dif_mod_name[:-5] + '|' +  Med_dif_mod_name[:-5] + '|' +  Max_dif_mod_name[:-5] )
+        dfall = df2.reindex_axis(sorted(df2.columns), axis=1)
+        #data frame with individual models
+        dfmin = Fdf_1900_2080.filter(regex= Min_dif_mod_name[:-5])
+        dfmax = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
+        dfmed = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
+        # use only the 3 representative models for the analysis
+        Fdf_1900_2080_all_mods = Fdf_1900_2080
+        Fdf_1900_2080 = dfall
     
     #Aggregate daily df to annual time series
     if (Clim_var_type == 'pracc' or Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean' 
@@ -120,6 +122,41 @@ for clim_var_csv_path in Clim_Var_CSVs:
     Fdf_1900_2080_means.plot(kind='bar').figure
     print('-------------------------------------------')
     
+    #Create Deltas of average change
+    models = list(Fdf_1900_2080_means.index)
+    newmodel = []
+    type(newmodel)
+    for each in models:
+        newmodel.append(each[:-5])
+    unique_models = set(newmodel)
+    # calculate diff for each unique model
+    delta_NF_ensemble = []
+    delta_FF_ensemble = []
+    for unique_model in unique_models:
+        dfdiff = Fdf_1900_2080_means.filter(regex= unique_model)
+        type(dfdiff)
+        delta_NF = dfdiff[1] - dfdiff[0]
+        delta_NF_ensemble.append(delta_NF)
+        delta_FF = dfdiff[2] - dfdiff[1]
+        delta_FF_ensemble.append(delta_FF)
+     
+    np.percentile(delta_NF, 50)
+    delta_df
+    delta_df1=pd.DataFrame(delta_NF_ensemble, index=unique_models)
+    delta_df2=pd.DataFrame(delta_FF_ensemble, index=unique_models)
+    delta_df=pd.concat([delta_df1, delta_df2], axis=1)
+    
+    delta_df.plot(kind='box').figure
+    
+    pd.DataFrame()
+    concat([Full_df, GCM_df], axis=1)
+        
+     delta_df ensemble.plot(kind='bar')  
+        
+        dfmax = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
+        dfmed = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
+
+    
     #create a dataframe that has a single column for present day, near and far future for the (3 selected models)
     len(Fdf_1900_2080.columns)
     Full_current_df = Fdf_1900_2080.iloc[:,range(0,3)]
@@ -149,7 +186,7 @@ for clim_var_csv_path in Clim_Var_CSVs:
     dfall = dfa1.append(dfa2).append(dfa3)
     
     #write the key plots to a single pdf document
-    pdf_out_file_name = Clim_var_type + '_start_' + Base_period_start + '_NARCliM_summary2.pdf'
+    pdf_out_file_name = Clim_var_type + '_start_' + Base_period_start + '_NARCliM_summary_B.pdf'
     pdf_out_path = output_directory +'/' + pdf_out_file_name
     
     #open pdf and add the plots