#First successfull recreation of the existing method

after first meeting with Alejandro di Luca. We are not comparing present day variability with near and far future ensemble model deltas (changes in 20-year average climate). These changes are added to the present day median climate for the plots.
8 years ago · b2dd285534
parent fd19552a5f
commit b2dd285534
1 changed files with 70 additions and 33 deletions
--- a/Analysis/Code/P1_NARCliM_plots_Windows.py
+++ b/Analysis/Code/P1_NARCliM_plots_Windows.py
@ -29,8 +29,10 @@ os.chdir('C:/Users/z5025317/WRL_Postdoc/Projects/Paper#1/')
 Base_period_start = '1990-01-01'
 Base_period_end = '2080-01-01' #use last day that's not included in period as < is used for subsetting
 Estuary = 'Bateman' # 'Belongil'
-Clim_var_type  = "*"   #will create pdf for all variables in folder           
+Clim_var_type  = "*"   #will create pdf for all variables in folder 
+subset_ensemble = 'no' # is yes, only the model with the lowest, median and max difference between present day and far future are selected         
 #####################################----------------------------------
+
 #set directory path for output files
 output_directory = 'Output/'+ Estuary
 #output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
@ -64,37 +66,37 @@ for clim_var_csv_path in Clim_Var_CSVs:
    #Subset the data to the minimum base period and above (used to set the lenght of the present day climate period)
    #Fdf_1900_2080 = Full_df.loc[(Full_df.index >= Base_period_start) & (Full_df.index < Base_period_end)] # not necessary if not using reanalysis models for base period

-    #Select the 3 most representative models (min med and max difference betwen far future and present)
-    Fdf_1900_2080_sorted = Fdf_1900_2080.reindex_axis(sorted(Fdf_1900_2080.columns), axis=1)
-    Fdf_1900_2080_sorted_means = pd.DataFrame(Fdf_1900_2080_sorted.mean())
-    df = Fdf_1900_2080_sorted_means
-    #add a simple increasing integer index 
-    df = df.reset_index()
-    df= df[df.index % 3 != 1]
-    df['C'] = df[0].diff()
-    df = df.reset_index()
-    df= df[df.index % 2 != 0]
-    #get max difference model (difference between far future and prsent day)
-    a = df[df.index == df['C'].argmax(skipna=True)]
-    Max_dif_mod_name = a.iloc[0]['index']
-    #get min difference model
-    a = df[df.index == df['C'].argmin(skipna=True)]
-    Min_dif_mod_name = a.iloc[0]['index']
-    #get the model which difference is closest to the median difference
-    df['D'] = abs(df['C']- df['C'].median())
-    a = df[df.index == df['D'].argmin(skipna=True)]
-    Med_dif_mod_name = a.iloc[0]['index']
-    #data frame with min med and max difference model
-    df2 = Fdf_1900_2080.filter(regex= Min_dif_mod_name[:-5] + '|' +  Med_dif_mod_name[:-5] + '|' +  Max_dif_mod_name[:-5] )
-    dfall = df2.reindex_axis(sorted(df2.columns), axis=1)
-    #data frame with individual models
-    dfmin = Fdf_1900_2080.filter(regex= Min_dif_mod_name[:-5])
-    dfmax = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
-    dfmed = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
-    
-    # use only the 3 representative models for the analysis
-    Fdf_1900_2080_all_mods = Fdf_1900_2080
-    Fdf_1900_2080 = dfall
+    if subset_ensemble == 'yes':
+        #Select the 3 most representative models (min med and max difference betwen far future and present)
+        Fdf_1900_2080_sorted = Fdf_1900_2080.reindex_axis(sorted(Fdf_1900_2080.columns), axis=1)
+        Fdf_1900_2080_sorted_means = pd.DataFrame(Fdf_1900_2080_sorted.mean())
+        df = Fdf_1900_2080_sorted_means
+        #add a simple increasing integer index 
+        df = df.reset_index()
+        df= df[df.index % 3 != 1]
+        df['C'] = df[0].diff()
+        df = df.reset_index()
+        df= df[df.index % 2 != 0]
+        #get max difference model (difference between far future and prsent day)
+        a = df[df.index == df['C'].argmax(skipna=True)]
+        Max_dif_mod_name = a.iloc[0]['index']
+        #get min difference model
+        a = df[df.index == df['C'].argmin(skipna=True)]
+        Min_dif_mod_name = a.iloc[0]['index']
+        #get the model which difference is closest to the median difference
+        df['D'] = abs(df['C']- df['C'].median())
+        a = df[df.index == df['D'].argmin(skipna=True)]
+        Med_dif_mod_name = a.iloc[0]['index']
+        #data frame with min med and max difference model
+        df2 = Fdf_1900_2080.filter(regex= Min_dif_mod_name[:-5] + '|' +  Med_dif_mod_name[:-5] + '|' +  Max_dif_mod_name[:-5] )
+        dfall = df2.reindex_axis(sorted(df2.columns), axis=1)
+        #data frame with individual models
+        dfmin = Fdf_1900_2080.filter(regex= Min_dif_mod_name[:-5])
+        dfmax = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
+        dfmed = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
+        # use only the 3 representative models for the analysis
+        Fdf_1900_2080_all_mods = Fdf_1900_2080
+        Fdf_1900_2080 = dfall
    
    #Aggregate daily df to annual time series
    if (Clim_var_type == 'pracc' or Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean' 
@ -120,6 +122,41 @@ for clim_var_csv_path in Clim_Var_CSVs:
    Fdf_1900_2080_means.plot(kind='bar').figure
    print('-------------------------------------------')
    
+    #Create Deltas of average change
+    models = list(Fdf_1900_2080_means.index)
+    newmodel = []
+    type(newmodel)
+    for each in models:
+        newmodel.append(each[:-5])
+    unique_models = set(newmodel)
+    # calculate diff for each unique model
+    delta_NF_ensemble = []
+    delta_FF_ensemble = []
+    for unique_model in unique_models:
+        dfdiff = Fdf_1900_2080_means.filter(regex= unique_model)
+        type(dfdiff)
+        delta_NF = dfdiff[1] - dfdiff[0]
+        delta_NF_ensemble.append(delta_NF)
+        delta_FF = dfdiff[2] - dfdiff[1]
+        delta_FF_ensemble.append(delta_FF)
+     
+    np.percentile(delta_NF, 50)
+    delta_df
+    delta_df1=pd.DataFrame(delta_NF_ensemble, index=unique_models)
+    delta_df2=pd.DataFrame(delta_FF_ensemble, index=unique_models)
+    delta_df=pd.concat([delta_df1, delta_df2], axis=1)
+    
+    delta_df.plot(kind='box').figure
+    
+    pd.DataFrame()
+    concat([Full_df, GCM_df], axis=1)
+        
+     delta_df ensemble.plot(kind='bar')  
+        
+        dfmax = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
+        dfmed = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
+
+    
    #create a dataframe that has a single column for present day, near and far future for the (3 selected models)
    len(Fdf_1900_2080.columns)
    Full_current_df = Fdf_1900_2080.iloc[:,range(0,3)]
@ -149,7 +186,7 @@ for clim_var_csv_path in Clim_Var_CSVs:
    dfall = dfa1.append(dfa2).append(dfa3)
    
    #write the key plots to a single pdf document
-    pdf_out_file_name = Clim_var_type + '_start_' + Base_period_start + '_NARCliM_summary2.pdf'
+    pdf_out_file_name = Clim_var_type + '_start_' + Base_period_start + '_NARCliM_summary_B.pdf'
    pdf_out_path = output_directory +'/' + pdf_out_file_name
    
    #open pdf and add the plots