CC_Est_NARC/Analysis/Code/P1_NARCliM_plots_Windows.py

# -*- coding: utf-8 -*-
#####################################----------------------------------
#Last Updated - March 2018
#@author: z5025317 Valentin Heimhuber
#code for creating climate prioritization plots for NARCLIM variables.
#Inputs: Uses CSV files that contain all 12 NARCLIM model runs time series for 1 grid cell created with: P1_NARCliM_NC_to_CSV_CCRC_SS.py
#####################################----------------------------------
#Load packages
#####################################----------------------------------
import numpy as np
import os
import pandas as pd
import glob
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
from matplotlib.backends.backend_pdf import PdfPages
from ggplot import *
matplotlib.style.use('ggplot')

#
# Set working direcotry (where postprocessed NARClIM data is located)
os.chdir('C:/Users/z5025317/WRL_Postdoc/Projects/Paper#1/')
#
#
#####################################----------------------------------
#set input parameters
Base_period_start = '1990-01-01'
Base_period_end = '2080-01-01' #use last day that's not included in period as < is used for subsetting
Estuary = 'Bateman' # 'Belongil'
Clim_var_type  = "*"   #will create pdf for all variables in folder
subset_ensemble = 'no' # is yes, only the model with the lowest, median and max difference between present day and far future are selected
#####################################----------------------------------

#set directory path for output files
output_directory = 'Output/'+ Estuary
#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
if not os.path.exists(output_directory):
    os.makedirs(output_directory)
    print('-------------------------------------------')
    print("output directory folder didn't exist and was generated")
    print('-------------------------------------------')
    print('-------------------')
Clim_Var_CSVs = glob.glob('./Data/NARCLIM_Site_CSVs/' + Estuary + '/' + Clim_var_type)
#Clim_Var_CSV = glob.glob('./Site_CSVs/' + Clim_var_type + '*' )
#read CSV file
for clim_var_csv_path in Clim_Var_CSVs:
    #clim_var_csv_path = Clim_Var_CSVs[3]
    Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
    Clim_var_type = Filename.split('_', 1)[0]
    print(clim_var_csv_path)
    Full_df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
    #pandas datestamp index to period (we don't need the 12 pm info in the index (daily periods are enough))
    Full_df.index = Full_df.index.to_period('D')
    Full_df = Full_df.drop(columns=['period'])
    Ncols_df = len(Full_df)
    #check data types of columns
    #Full_df.dtypes

    #substract a constant from all values to convert from kelvin to celcius (temp)
    if Clim_var_type == 'tasmean' or Clim_var_type == 'tasmax':
        Full_df = Full_df.iloc[:,0:(Ncols_df-1)]-273.15
    Fdf_1900_2080 = Full_df

    #Subset the data to the minimum base period and above (used to set the lenght of the present day climate period)
    #Fdf_1900_2080 = Full_df.loc[(Full_df.index >= Base_period_start) & (Full_df.index < Base_period_end)] # not necessary if not using reanalysis models for base period

    if subset_ensemble == 'yes':
        #Select the 3 most representative models (min med and max difference betwen far future and present)
        Fdf_1900_2080_sorted = Fdf_1900_2080.reindex_axis(sorted(Fdf_1900_2080.columns), axis=1)
        Fdf_1900_2080_sorted_means = pd.DataFrame(Fdf_1900_2080_sorted.mean())
        df = Fdf_1900_2080_sorted_means
        #add a simple increasing integer index
        df = df.reset_index()
        df= df[df.index % 3 != 1]
        df['C'] = df[0].diff()
        df = df.reset_index()
        df= df[df.index % 2 != 0]
        #get max difference model (difference between far future and prsent day)
        a = df[df.index == df['C'].argmax(skipna=True)]
        Max_dif_mod_name = a.iloc[0]['index']
        #get min difference model
        a = df[df.index == df['C'].argmin(skipna=True)]
        Min_dif_mod_name = a.iloc[0]['index']
        #get the model which difference is closest to the median difference
        df['D'] = abs(df['C']- df['C'].median())
        a = df[df.index == df['D'].argmin(skipna=True)]
        Med_dif_mod_name = a.iloc[0]['index']
        #data frame with min med and max difference model
        df2 = Fdf_1900_2080.filter(regex= Min_dif_mod_name[:-5] + '|' +  Med_dif_mod_name[:-5] + '|' +  Max_dif_mod_name[:-5] )
        dfall = df2.reindex_axis(sorted(df2.columns), axis=1)
        #data frame with individual models
        dfmin = Fdf_1900_2080.filter(regex= Min_dif_mod_name[:-5])
        dfmax = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
        dfmed = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
        # use only the 3 representative models for the analysis
        Fdf_1900_2080_all_mods = Fdf_1900_2080
        Fdf_1900_2080 = dfall

    #Aggregate daily df to annual time series
    if (Clim_var_type == 'pracc' or Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean'
    or Clim_var_type == 'pr1Hmaxtstep' or Clim_var_type == 'wss1Hmaxtstep'):
        Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').sum()
        Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
        Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').sum()
        Fdf_1900_2080_monthly = Fdf_1900_2080_monthly.replace(0, np.nan)
        Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').sum()
        Fdf_1900_2080_weekly = Fdf_1900_2080_weekly.replace(0, np.nan)
        Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').sum() #seasonal means
        Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
    else:
        Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').mean()
        Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').mean()
        Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').mean()
        Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').mean() #seasonal means

    #plot the mean of all model runs
    print('-------------------------------------------')
    print('mean of all models for climate variable:  ' + Clim_var_type)
    Fdf_1900_2080_means = Fdf_1900_2080.mean()
    Fdf_1900_2080_means.plot(kind='bar').figure
    print('-------------------------------------------')

    #Create Deltas of average change
    models = list(Fdf_1900_2080_means.index)
    newmodel = []
    type(newmodel)
    for each in models:
        newmodel.append(each[:-5])
    unique_models = set(newmodel)
    # calculate diff for each unique model
    delta_NF_ensemble = []
    delta_FF_ensemble = []
    for unique_model in unique_models:
        dfdiff = Fdf_1900_2080_means.filter(regex= unique_model)
        type(dfdiff)
        delta_NF = dfdiff[1] - dfdiff[0]
        delta_NF_ensemble.append(delta_NF)
        delta_FF = dfdiff[2] - dfdiff[1]
        delta_FF_ensemble.append(delta_FF)

    np.percentile(delta_NF, 50)
    delta_df
    delta_df1=pd.DataFrame(delta_NF_ensemble, index=unique_models)
    delta_df2=pd.DataFrame(delta_FF_ensemble, index=unique_models)
    delta_df=pd.concat([delta_df1, delta_df2], axis=1)

    delta_df.plot(kind='box').figure

    pd.DataFrame()
    concat([Full_df, GCM_df], axis=1)

     delta_df ensemble.plot(kind='bar')

        dfmax = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
        dfmed = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])


    #create a dataframe that has a single column for present day, near and far future for the (3 selected models)
    len(Fdf_1900_2080.columns)
    Full_current_df = Fdf_1900_2080.iloc[:,range(0,3)]
    Full_current_df = Full_current_df.stack()
    #nearfuture
    Full_nearfuture_df = Fdf_1900_2080.iloc[:,range(3,6)]
    Full_nearfuture_df = Full_nearfuture_df.stack()
    #farfuture
    Full_farfuture_df = Fdf_1900_2080.iloc[:,range(6,len(Fdf_1900_2080.columns))]
    Full_farfuture_df = Full_farfuture_df.stack()
    Summarized_df = pd.concat([Full_current_df, Full_nearfuture_df], axis=1, ignore_index=True)
    Summarized_df = pd.concat([Summarized_df, Full_farfuture_df], axis=1, ignore_index=True)

    #create a dataframe that has 1 column for each of the three representative models

    Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009'
    Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039'
    Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079'

    dfa = Fdf_1900_2080_annual.iloc[:,[0]]
    dfa1 = Fdf_1900_2080_annual.iloc[:,[0,3,6]].loc[(Fdf_1900_2080_annual.index >= '1990') & (Fdf_1900_2080_annual.index <= '2009')]
    dfa1.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
    dfa2 = Fdf_1900_2080_annual.iloc[:,[1,4,7]].loc[(Fdf_1900_2080_annual.index >= '2020') & (Fdf_1900_2080_annual.index <= '2039')]
    dfa2.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
    dfa3 = Fdf_1900_2080_annual.iloc[:,[2,5,8]].loc[(Fdf_1900_2080_annual.index >= '2060') & (Fdf_1900_2080_annual.index <= '2079')]
    dfa3.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
    dfall = dfa1.append(dfa2).append(dfa3)

    #write the key plots to a single pdf document
    pdf_out_file_name = Clim_var_type + '_start_' + Base_period_start + '_NARCliM_summary_B.pdf'
    pdf_out_path = output_directory +'/' + pdf_out_file_name

    #open pdf and add the plots
    with PdfPages(pdf_out_path) as pdf:
        #barplot of model means
        plt.title(Clim_var_type + ' -  model means - full period')
        ymin = min(Fdf_1900_2080_means)
        ymax = max(Fdf_1900_2080_means)
        Fdf_1900_2080_means.plot(kind='bar', ylim=(ymin,ymax))
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        #full period density comparison
        plt.title(Clim_var_type + ' -  density comparison - full period - all models')
        Summarized_df.plot.kde()
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        #annual box
        plt.title(Clim_var_type + ' -  Annual means/sums for max diff model')
        Fdf_1900_2080_annual.boxplot(rot=90)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        #monthly box
        plt.title(Clim_var_type + ' -  Monthly means/sums')
        Fdf_1900_2080_monthly.boxplot(rot=90)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        #annual box
        plt.title(Clim_var_type + ' -  Monthly means/sums for min diff model')
        Fdf_1900_2080_monthly.filter(regex= Min_dif_mod_name[:-5]).boxplot(rot=90)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        #annual box
        plt.title(Clim_var_type + ' -  Monthly means/sums for median diff model')
        Fdf_1900_2080_monthly.filter(regex= Med_dif_mod_name[:-5]).boxplot(rot=90)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        #annual box
        plt.title(Clim_var_type + ' -  Monthly means/sums for max diff model')
        Fdf_1900_2080_monthly.filter(regex= Max_dif_mod_name[:-5]).boxplot(rot=90)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        #weekly box
        plt.title(Clim_var_type + ' -  Weekly means/sums')
        Fdf_1900_2080_weekly.boxplot(rot=90)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        #daily box
        plt.title(Clim_var_type + ' -  Daily means/sums')
        Fdf_1900_2080.boxplot(rot=90)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        # time series plot annual ALL models
        plt.title(Clim_var_type + ' -  Time series - representative models')
        dfall.plot(legend=False)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()

        # seasonal mean boxplots
        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean())
        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean())
        plt.title(Clim_var_type + ' -  DJF Summer means/sums')
        Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean().plot(kind='bar', ylim=(ymin,ymax))
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        plt.title(Clim_var_type + ' -  DJF Summer means/sums')
        Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].boxplot(rot=90)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean())
        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean())
        plt.title(Clim_var_type + ' -  MAM Autumn means/sums')
        Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean().plot(kind='bar', ylim=(ymin,ymax))
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        plt.title(Clim_var_type + ' -  MAM Autumn means/sums')
        Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].boxplot(rot=90)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean())
        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean())
        plt.title(Clim_var_type + ' -  JJA Winter means/sums')
        Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean().plot(kind='bar', ylim=(ymin,ymax))
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        plt.title(Clim_var_type + ' -  JJA Winter means/sums')
        Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].boxplot(rot=90)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean())
        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean())
        plt.title(Clim_var_type + ' -  SON Spring means/sums')
        Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean().plot(kind='bar', ylim=(ymin,ymax))
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()
        plt.title(Clim_var_type + ' -  SON Spring means/sums')
        Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].boxplot(rot=90)
        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
        plt.close()

#plots not used
#Fdf_annual_sorted_subset.plot(legend=False, subplots=True)