#cleaned out the code and separated big functions from the analysis code

the functions are in climdata_fcts.py
7 years ago · 42ab0af07a
parent 1154e4eb1c
commit 42ab0af07a
3 changed files with 409 additions and 338 deletions
--- a/Analysis/Code/NARCLIM_Download/R_Preparing_BASH_script_for_NARCLIM_batch_download.R
+++ b/Analysis/Code/NARCLIM_Download/R_Preparing_BASH_script_for_NARCLIM_batch_download.R
@ -5,16 +5,27 @@
 #evspsblmean   	water_evaporation flux (actual ET) long_name: Surface evaporation standard_name: water_evaporation_flux units: kg m-2 s-1
 #tasmean      mean near surface temperature
 #pracc 		precipitation daily precipitation sum (sum of convective prcacc and stratiform prncacc precip) 
 # 'evspsblmean'   	water_evaporation flux (actual ET) long_name: Surface evaporation standard_name: water_evaporation_flux units: kg m-2 s-1
 # 'potevpmean'    	potential ET water_potential_evaporation_flux kg m-2 s-1
 # 'tasmean'               mean near surface temperature
 # 'tasmax'		maximum near surface temperature
 # 'pracc' 		precipitation daily precipitation sum (sum of convective prcacc and stratiform prncacc precip) 
 # 'pr1Hmaxtstep'     	maximum 1 hour interval rainfall in a one day period
 # 'pr1Hmaxtstep' 		Max. 1-hour time-window moving averaged precipitation rate units: kg m-2 s-1 maximum 1-hour time-window moving averaged values from point values 60.0 second
 # 'wss1Hmaxtstep'		Max. 1-hour time-window moving averaged surface wind speed units: m s-1 maximum 1-hour time-window moving averaged values from point values 60.0 second
 # 'wssmax' 		Surface wind speed standard_name: air_velocity units: m s-1 height: 10 m
 # 'wssmean'		Surface wind speed standard_name: air_velocity units: m s-1
-Clim_Var <- 'evspsblmean'
+
 Clim_Var <- 'wssmax'
 Datatype <- 'T_GCMS'  #T_GCMS for GCM forcing, T_NNRP for reanalysis (only 1950-2009)
 Biasboolean <- 'False' #use bias corrected data?
-Directory <- 'C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Data/NARCLIM_Site_CSVs'   
+Directory <- 'C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Data/NARCLIM_Site_CSVs/'   
 Filename <- 'NARCLIM_Point_Sites.csv'
 #Load CSV with location names and lat lon coordinates
-Location.df <- data.frame(read.csv(paste(Directory, Filename, sep=""), header=T))
+Location.df <- data.frame(read.csv(paste(Directory, Filename, sep=""), header=T, fileEncoding="UTF-8-BOM"))
 #create empty vector for storing the command line text and open file
 Vector.for.command.line.txt <- c()
@ -23,13 +34,13 @@ text1 <- c(paste("Datatype='",Datatype,"'", sep=""),
           paste("Bias_corrected='",Biasboolean,"'", sep=""), paste("ClimVarName='",Clim_Var,"'", sep=""))
 Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text1)
 for (i in 1:(length(Location.df$Name))){
-  #name<-as.character(Location.df$Name[i])
+  name<-as.character(Location.df$Name[i])
  #name<-gsub('([[:punct:]])|\\s+','_',name)
-  if(i<10){
+  # if(i<10){
-    name<-paste('Catchment_0', as.character(i), sep="")
+  #   name<-paste('Catchment_0', as.character(i), sep="")
-  }else{
+  # }else{
-    name<-paste('Catchment_', as.character(i), sep="")
+  #   name<-paste('Catchment_', as.character(i), sep="")
-  }
+  # }
  latitude=round(as.numeric(Location.df$Lat[i]),3)
  longitude=round(as.numeric(Location.df$Long[i]),3)
  text <- c(paste("latitude=",latitude,"", sep=""), paste("longitude=",longitude,"", sep=""),
@ -39,9 +50,9 @@ P1_NARCliM_NC_to_CSV_CCRC_SS.py \\
  --lat $latitude --lon $longitude --varName $ClimVarName --domain 'd02' --timestep \\
  'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Bias_corrected")
  Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text)
-  if(i==10|i==20|i==31){
+  if(i==10|i==20|i==31|i==length(Location.df$Name)){
    Vector.for.command.line.txt <- c(Vector.for.command.line.txt, " ")
-    text.file.name <- paste('C:/Users/z5025317/OneDrive - UNSW/Hunter_CC_Modeling/07_Modelling/01_Input/BC_Generation/Code/NARCLIM_Download_and_Processing/',Clim_Var, "_", Datatype, "_", Biasboolean,substring(as.character(i), 1,1), ".txt", sep="")
+    text.file.name <- paste(Directory ,'/',Clim_Var, "_", Datatype, "_", Biasboolean,substring(as.character(i), 1,1), "_", ".txt", sep="")
    #open and fill text file
    fileConn <- file(text.file.name)
    writeLines(Vector.for.command.line.txt, fileConn)
--- a/Analysis/Code/P1_NARCliM_plots_Windows.py
+++ b/Analysis/Code/P1_NARCliM_plots_Windows.py
@ -1,12 +1,13 @@
 # -*- coding: utf-8 -*-
-#####################################----------------------------------
+#==========================================================#
-#Last Updated - March 2018
+#Last Updated - June 2018
 #@author: z5025317 Valentin Heimhuber
 #code for creating climate prioritization plots for NARCLIM variables. 
 #Inputs: Uses CSV files that contain all 12 NARCLIM model runs time series for 1 grid cell created with: P1_NARCliM_NC_to_CSV_CCRC_SS.py
-#####################################----------------------------------
+
 #==========================================================#
 #Load packages
-#####################################----------------------------------
+#==========================================================#
 import numpy as np
 import os
 import pandas as pd
@ -18,346 +19,334 @@ from datetime import timedelta
 from matplotlib.backends.backend_pdf import PdfPages
 from ggplot import *
 matplotlib.style.use('ggplot')
-#
+
 # import own modules
 # Set working direcotry (where postprocessed NARClIM data is located)
 os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Analysis/Code')
 import climdata_fcts as fct
 #==========================================================#
 # Set working direcotry (where postprocessed NARClIM data is located)
 os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/')
-#
+#==========================================================#
-#####################################----------------------------------
+
 #==========================================================#
 #set input parameters
 Base_period_start = '1990-01-01'
 Base_period_end = '2080-01-01' #use last day that's not included in period as < is used for subsetting
-Estuary = 'Nadgee' # 'Belongil'
+Estuary = 'HUNTER' # 'Belongil'
-Clim_var_type  = "pracc*"   #  '*' will create pdf for all variables in folder  "pracc*|tasmax*"        
+Clim_var_type  = "pracc"   #  '*' will create pdf for all variables in folder  "pracc*|tasmax*"        
 plot_pdf = 'yes'
 delta_csv = 'yes'
 Stats = 'dailymax'
-#####################################----------------------------------
+Version = 'V4'
-#
+#==========================================================#
 #==========================================================#
 #set directory path for output files
-output_directory = 'Output/'+ Estuary
+output_directory = 'Output/Case_Study_1/'+ Estuary
 #output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
 if not os.path.exists(output_directory):
    os.makedirs(output_directory)
    print('-------------------------------------------')
    print("output directory folder didn't exist and was generated")
    print('-------------------------------------------')
-    print('-------------------')
+#==========================================================#    
-Clim_Var_CSVs = glob.glob('./Data/NARCLIM_Site_CSVs/' + Estuary + '/' + Clim_var_type)
+
-#Clim_Var_CSV = glob.glob('./Site_CSVs/' + Clim_var_type + '*' )
+#==========================================================#
-#read CSV file
+Estuary_Folder = glob.glob('./Data/NARCLIM_Site_CSVs/' + Estuary + '*' )
-for clim_var_csv_path in Clim_Var_CSVs:
+Clim_Var_CSVs = glob.glob(Estuary_Folder[0] + '/' + Clim_var_type + '*')
-    #clim_var_csv_path = Clim_Var_CSVs[0]
+#==========================================================#
-    Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
+
-    Clim_var_type = Filename.split('_', 1)[0]
+#==========================================================#
-    print(clim_var_csv_path)
+#read CSV files and start analysis
-    Full_df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
+#==========================================================#
-    #pandas datestamp index to period (we don't need the 12 pm info in the index (daily periods are enough))
+#for clim_var_csv_path in Clim_Var_CSVs:
-    Full_df.index = Full_df.index.to_period('D')
+clim_var_csv_path = Clim_Var_CSVs[0]
-    Full_df = Full_df.drop(columns=['period'])
+Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
-    Ncols_df = len(Full_df)
+Clim_var_type = Filename.split('_', 1)[0]
-    #check data types of columns
+print(clim_var_csv_path)
-    #Full_df.dtypes
+Full_df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
-    
+#pandas datestamp index to period (we don't need the 12 pm info in the index (daily periods are enough))
-    #substract a constant from all values to convert from kelvin to celcius (temp)
+Full_df.index = Full_df.index.to_period('D')
-    if Clim_var_type == 'tasmean' or Clim_var_type == 'tasmax':
+Full_df = Full_df.drop(columns=['period'])
-        Full_df = Full_df.iloc[:,0:(Ncols_df-1)]-273.15
+Ncols_df = len(Full_df)
-    if Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean':
+#check data types of columns
-        Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60*24
+#Full_df.dtypes
-    Fdf_1900_2080 = Full_df
+
-    #Subset the data to the minimum base period and above (used to set the lenght of the present day climate period)
+#==========================================================#
-    #Fdf_1900_2080 = Full_df.loc[(Full_df.index >= Base_period_start) & (Full_df.index < Base_period_end)] # not necessary if not using reanalysis models for base period
+#substract a constant from all values to convert from kelvin to celcius (temp)
-    
+if Clim_var_type == 'tasmean' or Clim_var_type == 'tasmax':
-    #Aggregate daily df to annual time series
+    Full_df = Full_df.iloc[:,0:(Ncols_df-1)]-273.15
-    if (Clim_var_type == 'pracc' or Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean' 
+if Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean':
-    or Clim_var_type == 'pr1Hmaxtstep' or Clim_var_type == 'wss1Hmaxtstep'):
+    Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60*24
-        if(Stats == 'maxdaily'):
+Fdf_1900_2080 = Full_df
-            Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').max()
+#==========================================================#
-            Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
+
-            Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').max()
+#Subset the data to the minimum base period and above (used to set the lenght of the present day climate period)
-            Fdf_1900_2080_monthly = Fdf_1900_2080_monthly.replace(0, np.nan)
+#Fdf_1900_2080 = Full_df.loc[(Full_df.index >= Base_period_start) & (Full_df.index < Base_period_end)] # not necessary if not using reanalysis models for base period
-            Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').max()
+
-            Fdf_1900_2080_weekly = Fdf_1900_2080_weekly.replace(0, np.nan)
+#==========================================================#
-            Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').max() #seasonal means
+#Aggregate daily df to annual time series
-            Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
+if (Clim_var_type == 'pracc' or Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean' 
-        else:
+or Clim_var_type == 'pr1Hmaxtstep' or Clim_var_type == 'wss1Hmaxtstep'):
-            Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').sum()
+    if(Stats == 'maxdaily'):
-            Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
+        Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').max()
-            Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').sum()
+        Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
-            Fdf_1900_2080_monthly = Fdf_1900_2080_monthly.replace(0, np.nan)
+        Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').max() #seasonal means
-            Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').sum()
+        Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
-            Fdf_1900_2080_weekly = Fdf_1900_2080_weekly.replace(0, np.nan)
+    else:
-            Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').sum() #seasonal means
+        Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').sum()
-            Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
+        Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
-    else:    
+        Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').sum() #seasonal means
        Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
 else:  
    if(Stats == 'maxdaily'):
        Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').max()
        Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
        Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').max() #seasonal means
        Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
    else:
        Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').mean()
        Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').mean()
        Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').mean()
        Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').mean() #seasonal means
-    
+Fdf_1900_2080_means = Fdf_1900_2080.mean()
-    #plot the mean of all model runs
+#==========================================================#
-    print('-------------------------------------------')
+
-    print('mean of all models for climate variable:  ' + Clim_var_type)
+
-    Fdf_1900_2080_means = Fdf_1900_2080.mean()
+
-    #Fdf_1900_2080_means.columns = ['Mean']
+#==========================================================#
-    #Fdf_1900_2080_means.plot(kind='bar').figure
+#Select the 3 most representative models (min med and max difference betwen far future and present)
-    print('-------------------------------------------')
+dfall, dfmin, dfmax, dfmed, Min_dif_mod_name, Med_dif_mod_name, Max_dif_mod_name  = fct.select_min_med_max_dif_model(Fdf_1900_2080)
-    
+#==========================================================#
    #Select the 3 most representative models (min med and max difference betwen far future and present)
    Fdf_1900_2080_sorted = Fdf_1900_2080.reindex_axis(sorted(Fdf_1900_2080.columns), axis=1)
    Fdf_1900_2080_sorted_means = pd.DataFrame(Fdf_1900_2080_sorted.mean())
    df = Fdf_1900_2080_sorted_means
    #add a simple increasing integer index 
    df = df.reset_index()
    df= df[df.index % 3 != 1]
    df['C'] = df[0].diff()
    df = df.reset_index()
    df= df[df.index % 2 != 0]
    #get max difference model (difference between far future and prsent day)
    a = df[df.index == df['C'].argmax(skipna=True)]
    Max_dif_mod_name = a.iloc[0]['index']
    #get min difference model
    a = df[df.index == df['C'].argmin(skipna=True)]
    Min_dif_mod_name = a.iloc[0]['index']
    #get the model which difference is closest to the median difference
    df['D'] = abs(df['C']- df['C'].median())
    a = df[df.index == df['D'].argmin(skipna=True)]
    Med_dif_mod_name = a.iloc[0]['index']
    #data frame with min med and max difference model
    df2 = Fdf_1900_2080.filter(regex= Min_dif_mod_name[:-5] + '|' +  Med_dif_mod_name[:-5] + '|' +  Max_dif_mod_name[:-5] )
    dfall = df2.reindex_axis(sorted(df2.columns), axis=1)
    #data frame with individual models
    dfmin = Fdf_1900_2080.filter(regex= Min_dif_mod_name[:-5])
    dfmax = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
    dfmed = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
    # use only the 3 representative models for the analysis
    Fdf_1900_2080_all_mods = Fdf_1900_2080
    #create a dataframe that has 1 column for each of the three representative models
 #        Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009'
 #        Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039'
 #        Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079'
    dfa = Fdf_1900_2080_annual.iloc[:,[0]]
    dfa1 = Fdf_1900_2080_annual.iloc[:,[0,3,6]].loc[(Fdf_1900_2080_annual.index >= '1990') & (Fdf_1900_2080_annual.index <= '2009')]
    dfa1.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
    dfa2 = Fdf_1900_2080_annual.iloc[:,[1,4,7]].loc[(Fdf_1900_2080_annual.index >= '2020') & (Fdf_1900_2080_annual.index <= '2039')]
    dfa2.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
    dfa3 = Fdf_1900_2080_annual.iloc[:,[2,5,8]].loc[(Fdf_1900_2080_annual.index >= '2060') & (Fdf_1900_2080_annual.index <= '2079')]
    dfa3.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
    dfall_annual = dfa1.append(dfa2).append(dfa3)
    #Create Deltas of average change for annual and seasonal basis
    times = ['annual', 'DJF', 'MAM', 'JJA','SON']
    delta_all_df = pd.DataFrame()
    for temp in times:
        if temp == 'annual':
            Mean_df = Fdf_1900_2080_annual.mean()
            Column_names = ['near', 'far']
        if temp == 'DJF':
            Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()
            Column_names = ['DJF_near', 'DJF_far']
        if temp == 'MAM':
            Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()
            Column_names = ['MAM_near', 'MAM_far']
        if temp == 'JJA':
            Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()
            Column_names = ['JJA_near', 'JJA_far']
        if temp == 'SON':
            Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()
            Column_names = ['SON_near', 'SON_far']
        models = list(Fdf_1900_2080_means.index)
        newmodel = []
        type(newmodel)
        for each in models:
            newmodel.append(each[:-5])
        unique_models = set(newmodel)
        # calculate diff for each unique model
        delta_NF_ensemble = []
        delta_FF_ensemble = []
        for unique_model in unique_models:
            dfdiff = Mean_df.filter(regex= unique_model)
            type(dfdiff)
            delta_NF = dfdiff[1] - dfdiff[0]
            delta_NF_ensemble.append(delta_NF)
            delta_FF = dfdiff[2] - dfdiff[1]
            delta_FF_ensemble.append(delta_FF)
        delta_df1=pd.DataFrame(delta_NF_ensemble, index=unique_models)
        delta_df2=pd.DataFrame(delta_FF_ensemble, index=unique_models)
        delta_df= pd.concat([delta_df1, delta_df2], axis=1)
        #rename columns
        delta_df.columns = Column_names
        #add a row with medians and 10 and 90th percentiles
        delta_df.loc['10th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 10), Column_names[1]:np.percentile(delta_df[Column_names[1]], 10)})
        delta_df.loc['median'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 50), Column_names[1]:np.percentile(delta_df[Column_names[1]], 50)})
        delta_df.loc['90th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 90), Column_names[1]:np.percentile(delta_df[Column_names[1]], 90)})
        #append df to overall df
        delta_all_df = pd.concat([delta_all_df, delta_df], axis=1)
-    if delta_csv == 'yes':
+#==========================================================#
-        out_file_name = Estuary  + '_' + Clim_var_type + '_' + Stats + '_NARCliM_ensemble_changes.csv'
+#create a dataframe that has 1 column for each of the three representative models
-        out_path = output_directory + '/' + out_file_name
+dfa = Fdf_1900_2080_annual.iloc[:,[0]]
-        delta_all_df.to_csv(out_path)
+dfa1 = Fdf_1900_2080_annual.iloc[:,[0,3,6]].loc[(Fdf_1900_2080_annual.index >= '1990') & (Fdf_1900_2080_annual.index <= '2009')]
 dfa1.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
 dfa2 = Fdf_1900_2080_annual.iloc[:,[1,4,7]].loc[(Fdf_1900_2080_annual.index >= '2020') & (Fdf_1900_2080_annual.index <= '2039')]
 dfa2.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
 dfa3 = Fdf_1900_2080_annual.iloc[:,[2,5,8]].loc[(Fdf_1900_2080_annual.index >= '2060') & (Fdf_1900_2080_annual.index <= '2079')]
 dfa3.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
 dfall_annual = dfa1.append(dfa2).append(dfa3)
 #==========================================================#
-    #create a dataframe that has a single column for present day, near and far future for the (3 selected models)
+#==========================================================#
-    len(Fdf_1900_2080.columns)
+#Create Deltas of average change for annual and seasonal basis
-    Full_current_df = Fdf_1900_2080.iloc[:,range(0,3)]
+#==========================================================#
-    Full_current_df = Full_current_df.stack()
+times = ['annual', 'DJF', 'MAM', 'JJA','SON']
-    #nearfuture
+delta_all_df = pd.DataFrame()
-    Full_nearfuture_df = Fdf_1900_2080.iloc[:,range(3,6)]
+for temp in times:
-    Full_nearfuture_df = Full_nearfuture_df.stack()
+    if temp == 'annual':
-    #farfuture
+        Mean_df = Fdf_1900_2080_annual.mean()
-    Full_farfuture_df = Fdf_1900_2080.iloc[:,range(6,len(Fdf_1900_2080.columns))]
+        Column_names = ['near', 'far']
-    Full_farfuture_df = Full_farfuture_df.stack()
+    if temp == 'DJF':
-    Summarized_df = pd.concat([Full_current_df, Full_nearfuture_df], axis=1, ignore_index=True)
+        Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()
-    Summarized_df = pd.concat([Summarized_df, Full_farfuture_df], axis=1, ignore_index=True)
+        Column_names = ['DJF_near', 'DJF_far']
-    Summarized_df.columns = ['present', 'near', 'far']
+    if temp == 'MAM':
        Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()
        Column_names = ['MAM_near', 'MAM_far']
    if temp == 'JJA':
        Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()
        Column_names = ['JJA_near', 'JJA_far']
    if temp == 'SON':
        Mean_df = Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()
        Column_names = ['SON_near', 'SON_far']
    models = list(Fdf_1900_2080_means.index)
    newmodel = []
    type(newmodel)
    for each in models:
        newmodel.append(each[:-5])
    unique_models = set(newmodel)
    # calculate diff for each unique model
    delta_NF_ensemble = []
    delta_FF_ensemble = []
    for unique_model in unique_models:
        dfdiff = Mean_df.filter(regex= unique_model)
        type(dfdiff)
        delta_NF = dfdiff[1] - dfdiff[0]
        delta_NF_ensemble.append(delta_NF)
        delta_FF = dfdiff[2] - dfdiff[1]
        delta_FF_ensemble.append(delta_FF)
    delta_df1=pd.DataFrame(delta_NF_ensemble, index=unique_models)
    delta_df2=pd.DataFrame(delta_FF_ensemble, index=unique_models)
    delta_df= pd.concat([delta_df1, delta_df2], axis=1)
-    #output some summary plot into pdf
+    #rename columns
-    if plot_pdf == 'yes':
+    delta_df.columns = Column_names
-        plotcolours36 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal',
+    #add a row with medians and 10 and 90th percentiles
-                 'darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal',
+    delta_df.loc['10th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 10), Column_names[1]:np.percentile(delta_df[Column_names[1]], 10)})
-                 'darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal']
+    delta_df.loc['median'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 50), Column_names[1]:np.percentile(delta_df[Column_names[1]], 50)})
-        plotcolours36b = ['tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  ,
+    delta_df.loc['90th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 90), Column_names[1]:np.percentile(delta_df[Column_names[1]], 90)})
-                 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  ,
+    #append df to overall df
-                 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple' ]
+    delta_all_df = pd.concat([delta_all_df, delta_df], axis=1)
-        plotcolours12 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal']
+
-        plotcolours15 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal', 'lightgreen','lightpink','slateblue']
+
-        
+#==========================================================#
-        #plt.cm.Paired(np.arange(len(Fdf_1900_2080_means)))
+if delta_csv == 'yes':
-        #write the key plots to a single pdf document
+    out_file_name = Estuary  + '_' + Clim_var_type + '_' + Stats + '_NARCliM_ensemble_changes.csv'
-        pdf_out_file_name = Clim_var_type + '_start_' + Base_period_start + '_NARCliM_summary_10.pdf'
+    out_path = output_directory + '/' + out_file_name
-        pdf_out_path = output_directory +'/' + pdf_out_file_name
+    delta_all_df.to_csv(out_path)
-        #open pdf and add the plots
+#==========================================================#
-        with PdfPages(pdf_out_path) as pdf:
+
-            #barplot of model means
+#==========================================================#
-            plt.title(Clim_var_type + ' -  model means - full period')
+#create a dataframe that has a single column for present day, near and far future for the (3 selected models)
-            ymin = min(Fdf_1900_2080_means) 
+Full_current_df = Fdf_1900_2080.iloc[:,range(0,3)]
-            ymax = max(Fdf_1900_2080_means) + 0.008 *min(Fdf_1900_2080_means)
+Full_current_df = Full_current_df.stack()
-            Fdf_1900_2080_means.plot(kind='bar', ylim=(ymin,ymax), color=plotcolours36)
+#nearfuture
-            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+Full_nearfuture_df = Fdf_1900_2080.iloc[:,range(3,6)]
-            plt.close()
+Full_nearfuture_df = Full_nearfuture_df.stack()
-            #
+#farfuture
-            plt.title(Clim_var_type + ' -  model deltas - far-present')
+Full_farfuture_df = Fdf_1900_2080.iloc[:,range(6,len(Fdf_1900_2080.columns))]
-            neardeltadf=delta_all_df['far']
+Full_farfuture_df = Full_farfuture_df.stack()
-            ymin = 0  #min(neardeltadf) - 0.008 *min(neardeltadf)
+Summarized_df = pd.concat([Full_current_df, Full_nearfuture_df], axis=1, ignore_index=True)
-            ymax = max(neardeltadf) + 0.008 * max(neardeltadf)
+Summarized_df = pd.concat([Summarized_df, Full_farfuture_df], axis=1, ignore_index=True)
-            neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax))
+Summarized_df.columns = ['present', 'near', 'far']
-            #fig.patch.set_alpha(0)
+#==========================================================#
-            pdf.savefig(bbox_inches='tight', ylim=(ymin,ymax), pad_inches=0.4)
+
-            plt.close()
+
-            #
+
-            plt.title(Clim_var_type + ' -  model deltas - near-present')
+#==========================================================#
-            neardeltadf=delta_all_df['near']
+#output some summary plot into pdf
-            #ymin = 0  #min(neardeltadf) - 0.008 *min(neardeltadf)
+#==========================================================#
-            #ymax = max(neardeltadf) + 0.008 *max(neardeltadf)
+if plot_pdf == 'yes':
-            neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax))      
+    plotcolours36 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal',
-            pdf.savefig(bbox_inches='tight', ylim=(ymin,ymax), pad_inches=0.4)
+             'darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal',
-            plt.close()
+             'darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal']
-            #full period density comparison
+    plotcolours36b = ['tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  ,
-            plt.title(Clim_var_type + ' -  density comparison - full period - all models')
+             'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  ,
-            Summarized_df.plot.kde()
+             'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple'  , 'tomato', 'royalblue', 'mediumpurple' ]
-            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+    plotcolours12 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal']
-            plt.close()
+    plotcolours15 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen',  'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal', 'lightgreen','lightpink','slateblue']
-            #full period density comparison
+    #plt.cm.Paired(np.arange(len(Fdf_1900_2080_means)))
-            plt.title(Clim_var_type + ' -  density comparison - full period - max delta model')
+    #write the key plots to a single pdf document
-            xmin = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) -  4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
+    pdf_out_file_name = Clim_var_type + '_' + Stats + '_start_' + Base_period_start + '_NARCliM_summary_' + Version + '.pdf'
-            xmax = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) +  4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
+    pdf_out_path = output_directory +'/' + pdf_out_file_name
-            Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]).plot.kde(xlim=(xmin,xmax)) 
+    #open pdf and add the plots
-            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+    with PdfPages(pdf_out_path) as pdf:
-            plt.close()
+        #barplot of model means
-            #annual box
+        plt.title(Clim_var_type + ' -  model means - full period')
-            plt.title(Clim_var_type + ' -  Annual means/sums for max diff model')
+        ymin = min(Fdf_1900_2080_means) 
-            Fdf_1900_2080_annual.boxplot(rot=90)
+        ymax = max(Fdf_1900_2080_means) + 0.008 *min(Fdf_1900_2080_means)
-            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        Fdf_1900_2080_means.plot(kind='bar', ylim=(ymin,ymax), color=plotcolours36)
-            plt.close()
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-            #monthly box
+        plt.close()
-            plt.title(Clim_var_type + ' -  Monthly means/sums')
+        #
-            Fdf_1900_2080_monthly.boxplot(rot=90)
+        neardeltadf=delta_all_df['near']
-            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        ymin = min(neardeltadf) + 0.1 *min(neardeltadf)
-            plt.close()
+        ymax = max(neardeltadf) + 0.1 * max(neardeltadf)
-            #annual box
+        neardeltadf=delta_all_df['far']
-            plt.title(Clim_var_type + ' -  Monthly means/sums for min diff model')
+        ymin2 = min(neardeltadf) + 0.1 *min(neardeltadf)
-            Fdf_1900_2080_monthly.filter(regex= Min_dif_mod_name[:-5]).boxplot(rot=90)
+        ymax2 = max(neardeltadf) + 0.1 * max(neardeltadf)
-            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        ymin = min(ymin, ymin2)
-            plt.close()
+        if (Clim_var_type == 'tasmax' or Clim_var_type == 'tasmean'):
-            #annual box
+            ymin = 0
-            plt.title(Clim_var_type + ' -  Monthly means/sums for median diff model')
+        ymax = max(ymax, ymax2)
-            Fdf_1900_2080_monthly.filter(regex= Med_dif_mod_name[:-5]).boxplot(rot=90)
+        #
-            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        # delta barplot for report 1#################################
-            plt.close()
+        ax=plt.subplot(2,1,1) 
-            #annual box
+        plt.title(Clim_var_type + ' -  model deltas - near-present')
-            plt.title(Clim_var_type + ' -  Monthly means/sums for max diff model')
+        neardeltadf=delta_all_df['near']
-            Fdf_1900_2080_monthly.filter(regex= Max_dif_mod_name[:-5]).boxplot(rot=90)
+        neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)  
-            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        plt.xticks([])
-            plt.close()
+        #ax.xaxis.set_ticklabels([])
-            #weekly box
+        #pdf.savefig(bbox_inches='tight', ylim=(ymin,ymax), pad_inches=0.4)
-            plt.title(Clim_var_type + ' -  Weekly means/sums')
+        #plt.close()
-            Fdf_1900_2080_weekly.boxplot(rot=90)
+        #
-            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        ax=plt.subplot(2,1,2) 
-            plt.close()
+        plt.title(Clim_var_type + ' -  model deltas - far-present')
-            #daily box
+        neardeltadf=delta_all_df['far']
-            plt.title(Clim_var_type + ' -  Daily means/sums')
+        neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)
-            Fdf_1900_2080.boxplot(rot=90)
+        ax.xaxis.grid(False)
-            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        #fig.patch.set_alpha(0)
-            plt.close()
+        #plt.show()
-            # time series plot annual ALL models
+        pdf.savefig(bbox_inches='tight', ylim=(ymin,ymax), pad_inches=0.4)
-            plt.title(Clim_var_type + ' -  Time series - all models')
+        plt.close()
-            Mod_order = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,19,20,21,16,17,18,22,23,24,31,32,33,25,26,27,28,29,30,34,35,36]
+        # end delta barplot for report 1#################################
-            test = Fdf_1900_2080_annual
+        #
-            Mod_Names = test.columns
+        #full period density comparison
-            New_Mod_Name = []
+        plt.title(Clim_var_type + ' -  density comparison - full period - all models')
-            for i in range(0,len(Mod_Names)):
+        Summarized_df.plot.kde()
-                New_Mod_Name.append(str(Mod_order[i]+10) + '_' +  Mod_Names[i])
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-            test.columns = New_Mod_Name
+        plt.close()
-            test_sorted = test.reindex_axis(sorted(test.columns), axis=1)
+        #full period density comparison
-            colnamest = test.columns
+        plt.title(Clim_var_type + ' -  density comparison - full period - max delta model')
-            test_sorted.columns = [w[3:-5] for w in colnamest]
+        xmin = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) -  4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
-            test_sorted.plot(legend=False, color =  plotcolours36)
+        xmax = float(max(np.nanpercentile(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]),50) +  4 * np.std(Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]))))
-            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5]).plot.kde(xlim=(xmin,xmax)) 
-            plt.close()
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-            # time series plot annual ALL models
+        plt.close()
-            plt.title(Clim_var_type + ' -  Time series - representative models')
+        #annual box
-            dfall_annual.plot(legend=False)
+        plt.title(Clim_var_type + ' -  Annual means/sums for max diff model')
-            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        Fdf_1900_2080_annual.boxplot(rot=90)
-            plt.close()
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-            # seasonal mean boxplots
+        plt.close()
-            ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) 
+        #
-            ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) 
+        #daily box
-            plt.title(Clim_var_type + ' -  DJF Summer means/sums')
+        plt.title(Clim_var_type + ' -  Daily means/sums')
-            pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()).plot(kind='bar', ylim=(ymin,ymax))
+        Fdf_1900_2080.boxplot(rot=90)
-            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-            plt.close()  
+        plt.close()
-            plt.title(Clim_var_type + ' -  DJF Summer means/sums')
+        # time series plot annual ALL models
-            Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].boxplot(rot=90)
+        plt.title(Clim_var_type + ' -  Time series - all models')
-            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        Mod_order = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,19,20,21,16,17,18,22,23,24,31,32,33,25,26,27,28,29,30,34,35,36]
-            plt.close()
+        test = Fdf_1900_2080_annual
-            ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()) 
+        Mod_Names = test.columns
-            ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean())
+        New_Mod_Name = []
-            plt.title(Clim_var_type + ' -  MAM Autumn means/sums')
+        for i in range(0,len(Mod_Names)):
-            pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()).plot(kind='bar', ylim=(ymin,ymax))
+            New_Mod_Name.append(str(Mod_order[i]+10) + '_' +  Mod_Names[i])
-            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        test.columns = New_Mod_Name
-            plt.close() 
+        test_sorted = test.reindex_axis(sorted(test.columns), axis=1)
-            plt.title(Clim_var_type + ' -  MAM Autumn means/sums')
+        colnamest = test.columns
-            Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].boxplot(rot=90)
+        test_sorted.columns = [w[3:-5] for w in colnamest]
-            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        test_sorted.plot(legend=False, color =  plotcolours36)
-            plt.close()
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-            ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()) 
+        plt.close()
-            ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean())
+        # time series plot annual ALL models
-            plt.title(Clim_var_type + ' -  JJA Winter means/sums')
+        plt.title(Clim_var_type + ' -  Time series - representative models')
-            pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()).plot(kind='bar', ylim=(ymin,ymax))
+        dfall_annual.plot(legend=False)
-            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-            plt.close() 
+        plt.close()
-            plt.title(Clim_var_type + ' -  JJA Winter means/sums')
+        # seasonal mean boxplots
-            Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].boxplot(rot=90)
+        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) 
-            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) 
-            plt.close()
+        plt.title(Clim_var_type + ' -  DJF Summer means/sums')
-            ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()) 
+        pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()).plot(kind='bar', ylim=(ymin,ymax))
-            ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean())
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-            plt.title(Clim_var_type + ' -  SON Spring means/sums')
+        plt.close()  
-            pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()).plot(kind='bar', ylim=(ymin,ymax))
+        plt.title(Clim_var_type + ' -  DJF Summer means/sums')
-            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].boxplot(rot=90)
-            plt.close() 
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
-            plt.title(Clim_var_type + ' -  SON Spring means/sums')
+        plt.close()
-            Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].boxplot(rot=90)
+        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()) 
-            pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean())
-            plt.close()
+        plt.title(Clim_var_type + ' -  MAM Autumn means/sums')
        pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()).plot(kind='bar', ylim=(ymin,ymax))
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close() 
        plt.title(Clim_var_type + ' -  MAM Autumn means/sums')
        Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].boxplot(rot=90)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()) 
        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean())
        plt.title(Clim_var_type + ' -  JJA Winter means/sums')
        pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()).plot(kind='bar', ylim=(ymin,ymax))
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close() 
        plt.title(Clim_var_type + ' -  JJA Winter means/sums')
        Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].boxplot(rot=90)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()) 
        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean())
        plt.title(Clim_var_type + ' -  SON Spring means/sums')
        pd.DataFrame(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()).plot(kind='bar', ylim=(ymin,ymax))
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close() 
        plt.title(Clim_var_type + ' -  SON Spring means/sums')
        Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].boxplot(rot=90)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
--- a/Analysis/Code/climdata_fcts.py
+++ b/Analysis/Code/climdata_fcts.py
@ -0,0 +1,71 @@
 # -*- coding: utf-8 -*-
 """
 Created on Thu Jun 14 16:32:01 2018
@author: z5025317
 """
 import matplotlib.pyplot as plt
 from datetime import datetime, timedelta
 import numpy as np
 import pandas as pd
 def compare_images(im1, im2):
    """plots 2 images next to each other, sharing the axis"""
    plt.figure()
    ax1 = plt.subplot(121)
    plt.imshow(im1, cmap='gray')
    ax2 = plt.subplot(122, sharex=ax1, sharey=ax1)
    plt.imshow(im2, cmap='gray')
    plt.show()  
 def reject_outliers(data, m=2):
    "rejects outliers in a numpy array"
    return data[abs(data - np.mean(data)) < m * np.std(data)]
 def duplicates_dict(lst):
    "return duplicates and indices"
    # nested function
    def duplicates(lst, item):
            return [i for i, x in enumerate(lst) if x == item]
    return dict((x, duplicates(lst, x)) for x in set(lst) if lst.count(x) > 1)
 def datenum2datetime(datenum):
    "convert datenum to datetime"
    #takes in datenum and outputs python datetime
    time = [datetime.fromordinal(int(dn)) + timedelta(days=float(dn)%1) - timedelta(days = 366) for dn in datenum]
    return time
 def select_min_med_max_dif_model(NARCLIM_df):
    #Select the 3 most representative models (min med and max difference betwen far future and present)
    Fdf_1900_2080_sorted = NARCLIM_df.reindex_axis(sorted(NARCLIM_df.columns), axis=1)
    Fdf_1900_2080_sorted_means = pd.DataFrame(Fdf_1900_2080_sorted.mean())
    df = Fdf_1900_2080_sorted_means
    #add a simple increasing integer index 
    df = df.reset_index()
    df= df[df.index % 3 != 1]
    df['C'] = df[0].diff()
    df = df.reset_index()
    df= df[df.index % 2 != 0]
    #get max difference model (difference between far future and prsent day)
    a = df[df.index == df['C'].argmax(skipna=True)]
    Max_dif_mod_name = a.iloc[0]['index']
    #get min difference model
    a = df[df.index == df['C'].argmin(skipna=True)]
    Min_dif_mod_name = a.iloc[0]['index']
    #get the model which difference is closest to the median difference
    df['D'] = abs(df['C']- df['C'].median())
    a = df[df.index == df['D'].argmin(skipna=True)]
    Med_dif_mod_name = a.iloc[0]['index']
    #data frame with min med and max difference model
    df2 = NARCLIM_df.filter(regex= Min_dif_mod_name[:-5] + '|' +  Med_dif_mod_name[:-5] + '|' +  Max_dif_mod_name[:-5] )
    dfall = df2.reindex_axis(sorted(df2.columns), axis=1)
    #data frame with individual models
    dfmin = NARCLIM_df.filter(regex= Min_dif_mod_name[:-5])
    dfmax = NARCLIM_df.filter(regex= Max_dif_mod_name[:-5])
    dfmed = NARCLIM_df.filter(regex= Max_dif_mod_name[:-5])
    return dfall , dfmin, dfmed, dfmax, Min_dif_mod_name,Med_dif_mod_name, Max_dif_mod_name