# -*- coding: utf-8 -*-
#####################################----------------------------------
#Last Updated - March 2018
#@author: z5025317 Valentin Heimhuber
#code for creating climate prioritization plots for NARCLIM variables.
#Inputs: Uses CSV files that contain all 12 NARCLIM model runs time series for 1 grid cell created with: P1_NARCliM_NC_to_CSV_CCRC_SS.py
#####################################----------------------------------
#Load packages
#####################################----------------------------------
import numpy as np
import os
import pandas as pd
import glob
import matplotlib
import matplotlib . pyplot as plt
from datetime import datetime
from datetime import timedelta
from matplotlib . backends . backend_pdf import PdfPages
from ggplot import *
matplotlib . style . use ( ' ggplot ' )
#
# Set working direcotry (where postprocessed NARClIM data is located)
os . chdir ( ' C:/Users/z5025317/WRL_Postdoc/Projects/Paper#1/ ' )
#
#
#####################################----------------------------------
#set input parameters
Base_period_start = ' 1990-01-01 '
Base_period_end = ' 2080-01-01 ' #use last day that's not included in period as < is used for subsetting
Estuary = ' Bateman ' # 'Belongil'
Clim_var_type = " * " #will create pdf for all variables in folder
#####################################----------------------------------
#set directory path for output files
output_directory = ' Output/ ' + Estuary
#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
if not os . path . exists ( output_directory ) :
os . makedirs ( output_directory )
print ( ' ------------------------------------------- ' )
print ( " output directory folder didn ' t exist and was generated " )
print ( ' ------------------------------------------- ' )
print ( ' ------------------- ' )
Clim_Var_CSVs = glob . glob ( ' ./Data/NARCLIM_Site_CSVs/ ' + Estuary + ' / ' + Clim_var_type )
#Clim_Var_CSV = glob.glob('./Site_CSVs/' + Clim_var_type + '*' )
#read CSV file
for clim_var_csv_path in Clim_Var_CSVs :
#clim_var_csv_path = Clim_Var_CSVs[3]
Filename = os . path . basename ( os . path . normpath ( clim_var_csv_path ) )
Clim_var_type = Filename . split ( ' _ ' , 1 ) [ 0 ]
print ( clim_var_csv_path )
Full_df = pd . read_csv ( clim_var_csv_path , index_col = 0 , parse_dates = True )
#pandas datestamp index to period (we don't need the 12 pm info in the index (daily periods are enough))
Full_df . index = Full_df . index . to_period ( ' D ' )
Full_df = Full_df . drop ( columns = [ ' period ' ] )
Ncols_df = len ( Full_df )
#check data types of columns
#Full_df.dtypes
#substract a constant from all values to convert from kelvin to celcius (temp)
if Clim_var_type == ' tasmean ' or Clim_var_type == ' tasmax ' :
Full_df = Full_df . iloc [ : , 0 : ( Ncols_df - 1 ) ] - 273.15
Fdf_1900_2080 = Full_df
#Subset the data to the minimum base period and above (used to set the lenght of the present day climate period)
#Fdf_1900_2080 = Full_df.loc[(Full_df.index >= Base_period_start) & (Full_df.index < Base_period_end)] # not necessary if not using reanalysis models for base period
#Select the 3 most representative models (min med and max difference betwen far future and present)
Fdf_1900_2080_sorted = Fdf_1900_2080 . reindex_axis ( sorted ( Fdf_1900_2080 . columns ) , axis = 1 )
Fdf_1900_2080_sorted_means = pd . DataFrame ( Fdf_1900_2080_sorted . mean ( ) )
df = Fdf_1900_2080_sorted_means
#add a simple increasing integer index
df = df . reset_index ( )
df = df [ df . index % 3 != 1 ]
df [ ' C ' ] = df [ 0 ] . diff ( )
df = df . reset_index ( )
df = df [ df . index % 2 != 0 ]
#get max difference model (difference between far future and prsent day)
a = df [ df . index == df [ ' C ' ] . argmax ( skipna = True ) ]
Max_dif_mod_name = a . iloc [ 0 ] [ ' index ' ]
#get min difference model
a = df [ df . index == df [ ' C ' ] . argmin ( skipna = True ) ]
Min_dif_mod_name = a . iloc [ 0 ] [ ' index ' ]
#get the model which difference is closest to the median difference
df [ ' D ' ] = abs ( df [ ' C ' ] - df [ ' C ' ] . median ( ) )
a = df [ df . index == df [ ' D ' ] . argmin ( skipna = True ) ]
Med_dif_mod_name = a . iloc [ 0 ] [ ' index ' ]
#data frame with min med and max difference model
df2 = Fdf_1900_2080 . filter ( regex = Min_dif_mod_name [ : - 5 ] + ' | ' + Med_dif_mod_name [ : - 5 ] + ' | ' + Max_dif_mod_name [ : - 5 ] )
dfall = df2 . reindex_axis ( sorted ( df2 . columns ) , axis = 1 )
#data frame with individual models
dfmin = Fdf_1900_2080 . filter ( regex = Min_dif_mod_name [ : - 5 ] )
dfmax = Fdf_1900_2080 . filter ( regex = Max_dif_mod_name [ : - 5 ] )
dfmed = Fdf_1900_2080 . filter ( regex = Max_dif_mod_name [ : - 5 ] )
# use only the 3 representative models for the analysis
Fdf_1900_2080_all_mods = Fdf_1900_2080
Fdf_1900_2080 = dfall
#Aggregate daily df to annual time series
if ( Clim_var_type == ' pracc ' or Clim_var_type == ' evspsblmean ' or Clim_var_type == ' potevpmean '
or Clim_var_type == ' pr1Hmaxtstep ' or Clim_var_type == ' wss1Hmaxtstep ' ) :
Fdf_1900_2080_annual = Fdf_1900_2080 . resample ( ' A ' ) . sum ( )
Fdf_1900_2080_annual = Fdf_1900_2080_annual . replace ( 0 , np . nan )
Fdf_1900_2080_monthly = Fdf_1900_2080 . resample ( ' M ' ) . sum ( )
Fdf_1900_2080_monthly = Fdf_1900_2080_monthly . replace ( 0 , np . nan )
Fdf_1900_2080_weekly = Fdf_1900_2080 . resample ( ' W ' ) . sum ( )
Fdf_1900_2080_weekly = Fdf_1900_2080_weekly . replace ( 0 , np . nan )
Fdf_Seas_means = Fdf_1900_2080 . resample ( ' Q-NOV ' ) . sum ( ) #seasonal means
Fdf_Seas_means = Fdf_Seas_means . replace ( 0 , np . nan )
else :
Fdf_1900_2080_annual = Fdf_1900_2080 . resample ( ' A ' ) . mean ( )
Fdf_1900_2080_monthly = Fdf_1900_2080 . resample ( ' M ' ) . mean ( )
Fdf_1900_2080_weekly = Fdf_1900_2080 . resample ( ' W ' ) . mean ( )
Fdf_Seas_means = Fdf_1900_2080 . resample ( ' Q-NOV ' ) . mean ( ) #seasonal means
#plot the mean of all model runs
print ( ' ------------------------------------------- ' )
print ( ' mean of all models for climate variable: ' + Clim_var_type )
Fdf_1900_2080_means = Fdf_1900_2080 . mean ( )
Fdf_1900_2080_means . plot ( kind = ' bar ' ) . figure
print ( ' ------------------------------------------- ' )
#create a dataframe that has a single column for present day, near and far future for the (3 selected models)
len ( Fdf_1900_2080 . columns )
Full_current_df = Fdf_1900_2080 . iloc [ : , range ( 0 , 3 ) ]
Full_current_df = Full_current_df . stack ( )
#nearfuture
Full_nearfuture_df = Fdf_1900_2080 . iloc [ : , range ( 3 , 6 ) ]
Full_nearfuture_df = Full_nearfuture_df . stack ( )
#farfuture
Full_farfuture_df = Fdf_1900_2080 . iloc [ : , range ( 6 , len ( Fdf_1900_2080 . columns ) ) ]
Full_farfuture_df = Full_farfuture_df . stack ( )
Summarized_df = pd . concat ( [ Full_current_df , Full_nearfuture_df ] , axis = 1 , ignore_index = True )
Summarized_df = pd . concat ( [ Summarized_df , Full_farfuture_df ] , axis = 1 , ignore_index = True )
#create a dataframe that has 1 column for each of the three representative models
Full_df . loc [ ( Full_df . index > ' 1990-01-01 ' ) & ( Full_df . index < ' 2009-01-01 ' ) , ' period ' ] = ' 1990-2009 '
Full_df . loc [ ( Full_df . index > ' 2020-01-01 ' ) & ( Full_df . index < ' 2039-01-01 ' ) , ' period ' ] = ' 2020-2039 '
Full_df . loc [ ( Full_df . index > ' 2060-01-01 ' ) & ( Full_df . index < ' 2079-01-01 ' ) , ' period ' ] = ' 2060-2079 '
dfa = Fdf_1900_2080_annual . iloc [ : , [ 0 ] ]
dfa1 = Fdf_1900_2080_annual . iloc [ : , [ 0 , 3 , 6 ] ] . loc [ ( Fdf_1900_2080_annual . index > = ' 1990 ' ) & ( Fdf_1900_2080_annual . index < = ' 2009 ' ) ]
dfa1 . columns = [ Min_dif_mod_name [ : - 5 ] , Med_dif_mod_name [ : - 5 ] , Max_dif_mod_name [ : - 5 ] ]
dfa2 = Fdf_1900_2080_annual . iloc [ : , [ 1 , 4 , 7 ] ] . loc [ ( Fdf_1900_2080_annual . index > = ' 2020 ' ) & ( Fdf_1900_2080_annual . index < = ' 2039 ' ) ]
dfa2 . columns = [ Min_dif_mod_name [ : - 5 ] , Med_dif_mod_name [ : - 5 ] , Max_dif_mod_name [ : - 5 ] ]
dfa3 = Fdf_1900_2080_annual . iloc [ : , [ 2 , 5 , 8 ] ] . loc [ ( Fdf_1900_2080_annual . index > = ' 2060 ' ) & ( Fdf_1900_2080_annual . index < = ' 2079 ' ) ]
dfa3 . columns = [ Min_dif_mod_name [ : - 5 ] , Med_dif_mod_name [ : - 5 ] , Max_dif_mod_name [ : - 5 ] ]
dfall = dfa1 . append ( dfa2 ) . append ( dfa3 )
#write the key plots to a single pdf document
pdf_out_file_name = Clim_var_type + ' _start_ ' + Base_period_start + ' _NARCliM_summary2.pdf '
pdf_out_path = output_directory + ' / ' + pdf_out_file_name
#open pdf and add the plots
with PdfPages ( pdf_out_path ) as pdf :
#barplot of model means
plt . title ( Clim_var_type + ' - model means - full period ' )
ymin = min ( Fdf_1900_2080_means )
ymax = max ( Fdf_1900_2080_means )
Fdf_1900_2080_means . plot ( kind = ' bar ' , ylim = ( ymin , ymax ) )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
plt . close ( )
#full period density comparison
plt . title ( Clim_var_type + ' - density comparison - full period - all models ' )
Summarized_df . plot . kde ( )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
plt . close ( )
#annual box
plt . title ( Clim_var_type + ' - Annual means/sums for max diff model ' )
Fdf_1900_2080_annual . boxplot ( rot = 90 )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
plt . close ( )
#monthly box
plt . title ( Clim_var_type + ' - Monthly means/sums ' )
Fdf_1900_2080_monthly . boxplot ( rot = 90 )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
plt . close ( )
#annual box
plt . title ( Clim_var_type + ' - Monthly means/sums for min diff model ' )
Fdf_1900_2080_monthly . filter ( regex = Min_dif_mod_name [ : - 5 ] ) . boxplot ( rot = 90 )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
plt . close ( )
#annual box
plt . title ( Clim_var_type + ' - Monthly means/sums for median diff model ' )
Fdf_1900_2080_monthly . filter ( regex = Med_dif_mod_name [ : - 5 ] ) . boxplot ( rot = 90 )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
plt . close ( )
#annual box
plt . title ( Clim_var_type + ' - Monthly means/sums for max diff model ' )
Fdf_1900_2080_monthly . filter ( regex = Max_dif_mod_name [ : - 5 ] ) . boxplot ( rot = 90 )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
plt . close ( )
#weekly box
plt . title ( Clim_var_type + ' - Weekly means/sums ' )
Fdf_1900_2080_weekly . boxplot ( rot = 90 )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
plt . close ( )
#daily box
plt . title ( Clim_var_type + ' - Daily means/sums ' )
Fdf_1900_2080 . boxplot ( rot = 90 )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
plt . close ( )
# time series plot annual ALL models
plt . title ( Clim_var_type + ' - Time series - representative models ' )
dfall . plot ( legend = False )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
plt . close ( )
# seasonal mean boxplots
ymin = min ( Fdf_Seas_means [ Fdf_Seas_means . index . quarter == 1 ] . mean ( ) )
ymax = max ( Fdf_Seas_means [ Fdf_Seas_means . index . quarter == 1 ] . mean ( ) )
plt . title ( Clim_var_type + ' - DJF Summer means/sums ' )
Fdf_Seas_means [ Fdf_Seas_means . index . quarter == 1 ] . mean ( ) . plot ( kind = ' bar ' , ylim = ( ymin , ymax ) )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
plt . close ( )
plt . title ( Clim_var_type + ' - DJF Summer means/sums ' )
Fdf_Seas_means [ Fdf_Seas_means . index . quarter == 1 ] . boxplot ( rot = 90 )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
plt . close ( )
ymin = min ( Fdf_Seas_means [ Fdf_Seas_means . index . quarter == 2 ] . mean ( ) )
ymax = max ( Fdf_Seas_means [ Fdf_Seas_means . index . quarter == 2 ] . mean ( ) )
plt . title ( Clim_var_type + ' - MAM Autumn means/sums ' )
Fdf_Seas_means [ Fdf_Seas_means . index . quarter == 2 ] . mean ( ) . plot ( kind = ' bar ' , ylim = ( ymin , ymax ) )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
plt . close ( )
plt . title ( Clim_var_type + ' - MAM Autumn means/sums ' )
Fdf_Seas_means [ Fdf_Seas_means . index . quarter == 2 ] . boxplot ( rot = 90 )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
plt . close ( )
ymin = min ( Fdf_Seas_means [ Fdf_Seas_means . index . quarter == 3 ] . mean ( ) )
ymax = max ( Fdf_Seas_means [ Fdf_Seas_means . index . quarter == 3 ] . mean ( ) )
plt . title ( Clim_var_type + ' - JJA Winter means/sums ' )
Fdf_Seas_means [ Fdf_Seas_means . index . quarter == 3 ] . mean ( ) . plot ( kind = ' bar ' , ylim = ( ymin , ymax ) )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
plt . close ( )
plt . title ( Clim_var_type + ' - JJA Winter means/sums ' )
Fdf_Seas_means [ Fdf_Seas_means . index . quarter == 3 ] . boxplot ( rot = 90 )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
plt . close ( )
ymin = min ( Fdf_Seas_means [ Fdf_Seas_means . index . quarter == 4 ] . mean ( ) )
ymax = max ( Fdf_Seas_means [ Fdf_Seas_means . index . quarter == 4 ] . mean ( ) )
plt . title ( Clim_var_type + ' - SON Spring means/sums ' )
Fdf_Seas_means [ Fdf_Seas_means . index . quarter == 4 ] . mean ( ) . plot ( kind = ' bar ' , ylim = ( ymin , ymax ) )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
plt . close ( )
plt . title ( Clim_var_type + ' - SON Spring means/sums ' )
Fdf_Seas_means [ Fdf_Seas_means . index . quarter == 4 ] . boxplot ( rot = 90 )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
plt . close ( )
#plots not used
#Fdf_annual_sorted_subset.plot(legend=False, subplots=True)