#cleaned out the code and separated big functions from the analysis code

the functions are in climdata_fcts.py
Valentin Heimhuber 7 years ago
parent 1154e4eb1c
commit 42ab0af07a

@ -5,16 +5,27 @@
#evspsblmean water_evaporation flux (actual ET) long_name: Surface evaporation standard_name: water_evaporation_flux units: kg m-2 s-1
#tasmean mean near surface temperature
#pracc precipitation daily precipitation sum (sum of convective prcacc and stratiform prncacc precip)
# 'evspsblmean' water_evaporation flux (actual ET) long_name: Surface evaporation standard_name: water_evaporation_flux units: kg m-2 s-1
# 'potevpmean' potential ET water_potential_evaporation_flux kg m-2 s-1
# 'tasmean' mean near surface temperature
# 'tasmax' maximum near surface temperature
# 'pracc' precipitation daily precipitation sum (sum of convective prcacc and stratiform prncacc precip)
# 'pr1Hmaxtstep' maximum 1 hour interval rainfall in a one day period
# 'pr1Hmaxtstep' Max. 1-hour time-window moving averaged precipitation rate units: kg m-2 s-1 maximum 1-hour time-window moving averaged values from point values 60.0 second
# 'wss1Hmaxtstep' Max. 1-hour time-window moving averaged surface wind speed units: m s-1 maximum 1-hour time-window moving averaged values from point values 60.0 second
# 'wssmax' Surface wind speed standard_name: air_velocity units: m s-1 height: 10 m
# 'wssmean' Surface wind speed standard_name: air_velocity units: m s-1
Clim_Var <- 'evspsblmean'
Clim_Var <- 'wssmax'
Datatype <- 'T_GCMS' #T_GCMS for GCM forcing, T_NNRP for reanalysis (only 1950-2009)
Biasboolean <- 'False' #use bias corrected data?
Directory <- 'C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Data/NARCLIM_Site_CSVs'
Directory <- 'C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Data/NARCLIM_Site_CSVs/'
Filename <- 'NARCLIM_Point_Sites.csv'
#Load CSV with location names and lat lon coordinates
Location.df <- data.frame(read.csv(paste(Directory, Filename, sep=""), header=T))
Location.df <- data.frame(read.csv(paste(Directory, Filename, sep=""), header=T, fileEncoding="UTF-8-BOM"))
#create empty vector for storing the command line text and open file
Vector.for.command.line.txt <- c()
@ -23,13 +34,13 @@ text1 <- c(paste("Datatype='",Datatype,"'", sep=""),
paste("Bias_corrected='",Biasboolean,"'", sep=""), paste("ClimVarName='",Clim_Var,"'", sep=""))
Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text1)
for (i in 1:(length(Location.df$Name))){
name<-paste('Catchment_0', as.character(i), sep="")
name<-paste('Catchment_', as.character(i), sep="")
# if(i<10){
# name<-paste('Catchment_0', as.character(i), sep="")
# }else{
# name<-paste('Catchment_', as.character(i), sep="")
# }
text <- c(paste("latitude=",latitude,"", sep=""), paste("longitude=",longitude,"", sep=""),
@ -39,9 +50,9 @@ P1_NARCliM_NC_to_CSV_CCRC_SS.py \\
--lat $latitude --lon $longitude --varName $ClimVarName --domain 'd02' --timestep \\
'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Bias_corrected")
Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text)
Vector.for.command.line.txt <- c(Vector.for.command.line.txt, " ")
text.file.name <- paste('C:/Users/z5025317/OneDrive - UNSW/Hunter_CC_Modeling/07_Modelling/01_Input/BC_Generation/Code/NARCLIM_Download_and_Processing/',Clim_Var, "_", Datatype, "_", Biasboolean,substring(as.character(i), 1,1), ".txt", sep="")
text.file.name <- paste(Directory ,'/',Clim_Var, "_", Datatype, "_", Biasboolean,substring(as.character(i), 1,1), "_", ".txt", sep="")
#open and fill text file
fileConn <- file(text.file.name)
writeLines(Vector.for.command.line.txt, fileConn)

@ -1,12 +1,13 @@
# -*- coding: utf-8 -*-
#Last Updated - March 2018
#Last Updated - June 2018
#@author: z5025317 Valentin Heimhuber
#code for creating climate prioritization plots for NARCLIM variables.
#Inputs: Uses CSV files that contain all 12 NARCLIM model runs time series for 1 grid cell created with: P1_NARCliM_NC_to_CSV_CCRC_SS.py
#Load packages
import numpy as np
import os
import pandas as pd
@ -18,35 +19,51 @@ from datetime import timedelta
from matplotlib.backends.backend_pdf import PdfPages
from ggplot import *
# import own modules
# Set working direcotry (where postprocessed NARClIM data is located)
os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/Analysis/Code')
import climdata_fcts as fct
# Set working direcotry (where postprocessed NARClIM data is located)
os.chdir('C:/Users/z5025317/OneDrive - UNSW/WRL_Postdoc_Manual_Backup/WRL_Postdoc/Projects/Paper#1/')
#set input parameters
Base_period_start = '1990-01-01'
Base_period_end = '2080-01-01' #use last day that's not included in period as < is used for subsetting
Estuary = 'Nadgee' # 'Belongil'
Clim_var_type = "pracc*" # '*' will create pdf for all variables in folder "pracc*|tasmax*"
Estuary = 'HUNTER' # 'Belongil'
Clim_var_type = "pracc" # '*' will create pdf for all variables in folder "pracc*|tasmax*"
plot_pdf = 'yes'
delta_csv = 'yes'
Stats = 'dailymax'
Version = 'V4'
#set directory path for output files
output_directory = 'Output/'+ Estuary
output_directory = 'Output/Case_Study_1/'+ Estuary
#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
if not os.path.exists(output_directory):
print("output directory folder didn't exist and was generated")
Clim_Var_CSVs = glob.glob('./Data/NARCLIM_Site_CSVs/' + Estuary + '/' + Clim_var_type)
#Clim_Var_CSV = glob.glob('./Site_CSVs/' + Clim_var_type + '*' )
#read CSV file
for clim_var_csv_path in Clim_Var_CSVs:
#clim_var_csv_path = Clim_Var_CSVs[0]
Estuary_Folder = glob.glob('./Data/NARCLIM_Site_CSVs/' + Estuary + '*' )
Clim_Var_CSVs = glob.glob(Estuary_Folder[0] + '/' + Clim_var_type + '*')
#read CSV files and start analysis
#for clim_var_csv_path in Clim_Var_CSVs:
clim_var_csv_path = Clim_Var_CSVs[0]
Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
Clim_var_type = Filename.split('_', 1)[0]
@ -58,83 +75,53 @@ for clim_var_csv_path in Clim_Var_CSVs:
#check data types of columns
#substract a constant from all values to convert from kelvin to celcius (temp)
if Clim_var_type == 'tasmean' or Clim_var_type == 'tasmax':
Full_df = Full_df.iloc[:,0:(Ncols_df-1)]-273.15
if Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean':
Full_df = Full_df.iloc[:,0:(Ncols_df-1)]*60*60*24
Fdf_1900_2080 = Full_df
#Subset the data to the minimum base period and above (used to set the lenght of the present day climate period)
#Fdf_1900_2080 = Full_df.loc[(Full_df.index >= Base_period_start) & (Full_df.index < Base_period_end)] # not necessary if not using reanalysis models for base period
#Aggregate daily df to annual time series
if (Clim_var_type == 'pracc' or Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean'
or Clim_var_type == 'pr1Hmaxtstep' or Clim_var_type == 'wss1Hmaxtstep'):
if(Stats == 'maxdaily'):
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').max()
Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').max()
Fdf_1900_2080_monthly = Fdf_1900_2080_monthly.replace(0, np.nan)
Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').max()
Fdf_1900_2080_weekly = Fdf_1900_2080_weekly.replace(0, np.nan)
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').max() #seasonal means
Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').sum()
Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').sum()
Fdf_1900_2080_monthly = Fdf_1900_2080_monthly.replace(0, np.nan)
Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').sum()
Fdf_1900_2080_weekly = Fdf_1900_2080_weekly.replace(0, np.nan)
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').sum() #seasonal means
Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
if(Stats == 'maxdaily'):
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').max()
Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').max() #seasonal means
Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').mean()
Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').mean()
Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').mean()
Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').mean() #seasonal means
#plot the mean of all model runs
print('mean of all models for climate variable: ' + Clim_var_type)
Fdf_1900_2080_means = Fdf_1900_2080.mean()
#Fdf_1900_2080_means.columns = ['Mean']
#Select the 3 most representative models (min med and max difference betwen far future and present)
Fdf_1900_2080_sorted = Fdf_1900_2080.reindex_axis(sorted(Fdf_1900_2080.columns), axis=1)
Fdf_1900_2080_sorted_means = pd.DataFrame(Fdf_1900_2080_sorted.mean())
df = Fdf_1900_2080_sorted_means
#add a simple increasing integer index
df = df.reset_index()
df= df[df.index % 3 != 1]
df['C'] = df[0].diff()
df = df.reset_index()
df= df[df.index % 2 != 0]
#get max difference model (difference between far future and prsent day)
a = df[df.index == df['C'].argmax(skipna=True)]
Max_dif_mod_name = a.iloc[0]['index']
#get min difference model
a = df[df.index == df['C'].argmin(skipna=True)]
Min_dif_mod_name = a.iloc[0]['index']
#get the model which difference is closest to the median difference
df['D'] = abs(df['C']- df['C'].median())
a = df[df.index == df['D'].argmin(skipna=True)]
Med_dif_mod_name = a.iloc[0]['index']
#data frame with min med and max difference model
df2 = Fdf_1900_2080.filter(regex= Min_dif_mod_name[:-5] + '|' + Med_dif_mod_name[:-5] + '|' + Max_dif_mod_name[:-5] )
dfall = df2.reindex_axis(sorted(df2.columns), axis=1)
#data frame with individual models
dfmin = Fdf_1900_2080.filter(regex= Min_dif_mod_name[:-5])
dfmax = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
dfmed = Fdf_1900_2080.filter(regex= Max_dif_mod_name[:-5])
# use only the 3 representative models for the analysis
Fdf_1900_2080_all_mods = Fdf_1900_2080
dfall, dfmin, dfmax, dfmed, Min_dif_mod_name, Med_dif_mod_name, Max_dif_mod_name = fct.select_min_med_max_dif_model(Fdf_1900_2080)
#create a dataframe that has 1 column for each of the three representative models
# Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009'
# Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039'
# Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079'
dfa = Fdf_1900_2080_annual.iloc[:,[0]]
dfa1 = Fdf_1900_2080_annual.iloc[:,[0,3,6]].loc[(Fdf_1900_2080_annual.index >= '1990') & (Fdf_1900_2080_annual.index <= '2009')]
dfa1.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
@ -143,8 +130,11 @@ for clim_var_csv_path in Clim_Var_CSVs:
dfa3 = Fdf_1900_2080_annual.iloc[:,[2,5,8]].loc[(Fdf_1900_2080_annual.index >= '2060') & (Fdf_1900_2080_annual.index <= '2079')]
dfa3.columns = [Min_dif_mod_name[:-5], Med_dif_mod_name[:-5], Max_dif_mod_name[:-5]]
dfall_annual = dfa1.append(dfa2).append(dfa3)
#Create Deltas of average change for annual and seasonal basis
times = ['annual', 'DJF', 'MAM', 'JJA','SON']
delta_all_df = pd.DataFrame()
for temp in times:
@ -193,13 +183,16 @@ for clim_var_csv_path in Clim_Var_CSVs:
#append df to overall df
delta_all_df = pd.concat([delta_all_df, delta_df], axis=1)
if delta_csv == 'yes':
out_file_name = Estuary + '_' + Clim_var_type + '_' + Stats + '_NARCliM_ensemble_changes.csv'
out_path = output_directory + '/' + out_file_name
#create a dataframe that has a single column for present day, near and far future for the (3 selected models)
Full_current_df = Fdf_1900_2080.iloc[:,range(0,3)]
Full_current_df = Full_current_df.stack()
@ -211,8 +204,13 @@ for clim_var_csv_path in Clim_Var_CSVs:
Summarized_df = pd.concat([Full_current_df, Full_nearfuture_df], axis=1, ignore_index=True)
Summarized_df = pd.concat([Summarized_df, Full_farfuture_df], axis=1, ignore_index=True)
Summarized_df.columns = ['present', 'near', 'far']
#output some summary plot into pdf
if plot_pdf == 'yes':
plotcolours36 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen', 'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal',
'darkolivegreen','turquoise', 'lightgreen', 'darkgreen', 'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal',
@ -222,10 +220,9 @@ for clim_var_csv_path in Clim_Var_CSVs:
'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' , 'tomato', 'royalblue', 'mediumpurple' ]
plotcolours12 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen', 'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal']
plotcolours15 = ['darkolivegreen','turquoise', 'lightgreen', 'darkgreen', 'lightpink','slateblue', 'slategray', 'orange', 'tomato', 'peru', 'navy', 'teal', 'lightgreen','lightpink','slateblue']
#write the key plots to a single pdf document
pdf_out_file_name = Clim_var_type + '_start_' + Base_period_start + '_NARCliM_summary_10.pdf'
pdf_out_file_name = Clim_var_type + '_' + Stats + '_start_' + Base_period_start + '_NARCliM_summary_' + Version + '.pdf'
pdf_out_path = output_directory +'/' + pdf_out_file_name
#open pdf and add the plots
with PdfPages(pdf_out_path) as pdf:
@ -237,22 +234,38 @@ for clim_var_csv_path in Clim_Var_CSVs:
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
plt.title(Clim_var_type + ' - model deltas - far-present')
ymin = min(neardeltadf) + 0.1 *min(neardeltadf)
ymax = max(neardeltadf) + 0.1 * max(neardeltadf)
ymin = 0 #min(neardeltadf) - 0.008 *min(neardeltadf)
ymax = max(neardeltadf) + 0.008 * max(neardeltadf)
neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax))
pdf.savefig(bbox_inches='tight', ylim=(ymin,ymax), pad_inches=0.4)
ymin2 = min(neardeltadf) + 0.1 *min(neardeltadf)
ymax2 = max(neardeltadf) + 0.1 * max(neardeltadf)
ymin = min(ymin, ymin2)
if (Clim_var_type == 'tasmax' or Clim_var_type == 'tasmean'):
ymin = 0
ymax = max(ymax, ymax2)
# delta barplot for report 1#################################
plt.title(Clim_var_type + ' - model deltas - near-present')
#ymin = 0 #min(neardeltadf) - 0.008 *min(neardeltadf)
#ymax = max(neardeltadf) + 0.008 *max(neardeltadf)
neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax))
neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)
#pdf.savefig(bbox_inches='tight', ylim=(ymin,ymax), pad_inches=0.4)
plt.title(Clim_var_type + ' - model deltas - far-present')
neardeltadf.plot(kind='bar', color=plotcolours15, ylim=(ymin,ymax), ax=ax)
pdf.savefig(bbox_inches='tight', ylim=(ymin,ymax), pad_inches=0.4)
# end delta barplot for report 1#################################
#full period density comparison
plt.title(Clim_var_type + ' - density comparison - full period - all models')
@ -270,31 +283,7 @@ for clim_var_csv_path in Clim_Var_CSVs:
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
#monthly box
plt.title(Clim_var_type + ' - Monthly means/sums')
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
#annual box
plt.title(Clim_var_type + ' - Monthly means/sums for min diff model')
Fdf_1900_2080_monthly.filter(regex= Min_dif_mod_name[:-5]).boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
#annual box
plt.title(Clim_var_type + ' - Monthly means/sums for median diff model')
Fdf_1900_2080_monthly.filter(regex= Med_dif_mod_name[:-5]).boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
#annual box
plt.title(Clim_var_type + ' - Monthly means/sums for max diff model')
Fdf_1900_2080_monthly.filter(regex= Max_dif_mod_name[:-5]).boxplot(rot=90)
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
#weekly box
plt.title(Clim_var_type + ' - Weekly means/sums')
pdf.savefig(bbox_inches='tight', pad_inches=0.4)
#daily box
plt.title(Clim_var_type + ' - Daily means/sums')

@ -0,0 +1,71 @@
# -*- coding: utf-8 -*-
Created on Thu Jun 14 16:32:01 2018
@author: z5025317
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
def compare_images(im1, im2):
"""plots 2 images next to each other, sharing the axis"""
ax1 = plt.subplot(121)
plt.imshow(im1, cmap='gray')
ax2 = plt.subplot(122, sharex=ax1, sharey=ax1)
plt.imshow(im2, cmap='gray')
def reject_outliers(data, m=2):
"rejects outliers in a numpy array"
return data[abs(data - np.mean(data)) < m * np.std(data)]
def duplicates_dict(lst):
"return duplicates and indices"
# nested function
def duplicates(lst, item):
return [i for i, x in enumerate(lst) if x == item]
return dict((x, duplicates(lst, x)) for x in set(lst) if lst.count(x) > 1)
def datenum2datetime(datenum):
"convert datenum to datetime"
#takes in datenum and outputs python datetime
time = [datetime.fromordinal(int(dn)) + timedelta(days=float(dn)%1) - timedelta(days = 366) for dn in datenum]
return time
def select_min_med_max_dif_model(NARCLIM_df):
#Select the 3 most representative models (min med and max difference betwen far future and present)
Fdf_1900_2080_sorted = NARCLIM_df.reindex_axis(sorted(NARCLIM_df.columns), axis=1)
Fdf_1900_2080_sorted_means = pd.DataFrame(Fdf_1900_2080_sorted.mean())
df = Fdf_1900_2080_sorted_means
#add a simple increasing integer index
df = df.reset_index()
df= df[df.index % 3 != 1]
df['C'] = df[0].diff()
df = df.reset_index()
df= df[df.index % 2 != 0]
#get max difference model (difference between far future and prsent day)
a = df[df.index == df['C'].argmax(skipna=True)]
Max_dif_mod_name = a.iloc[0]['index']
#get min difference model
a = df[df.index == df['C'].argmin(skipna=True)]
Min_dif_mod_name = a.iloc[0]['index']
#get the model which difference is closest to the median difference
df['D'] = abs(df['C']- df['C'].median())
a = df[df.index == df['D'].argmin(skipna=True)]
Med_dif_mod_name = a.iloc[0]['index']
#data frame with min med and max difference model
df2 = NARCLIM_df.filter(regex= Min_dif_mod_name[:-5] + '|' + Med_dif_mod_name[:-5] + '|' + Max_dif_mod_name[:-5] )
dfall = df2.reindex_axis(sorted(df2.columns), axis=1)
#data frame with individual models
dfmin = NARCLIM_df.filter(regex= Min_dif_mod_name[:-5])
dfmax = NARCLIM_df.filter(regex= Max_dif_mod_name[:-5])
dfmed = NARCLIM_df.filter(regex= Max_dif_mod_name[:-5])
return dfall , dfmin, dfmed, dfmax, Min_dif_mod_name,Med_dif_mod_name, Max_dif_mod_name