From 4bd8d40c6d777ea673f690e96ce7050392b4e641 Mon Sep 17 00:00:00 2001 From: tinoheimhuber Date: Mon, 16 Apr 2018 09:08:24 +1000 Subject: [PATCH] #principal python codes for a) NARCLIM interrogation on CCRC STORM servers (Linux) where we go from netcdf to a csv file for single locations. and b) b) code for creating some variability shift plots from the CSV files which is done locally --- Analysis/Code/P1_NARCliM_NC_to_CSV_CCRC_SS.py | 121 +++++++++++ Analysis/Code/P1_NARCliM_plots_Windows.py | 198 ++++++++++++++++++ 2 files changed, 319 insertions(+) create mode 100644 Analysis/Code/P1_NARCliM_NC_to_CSV_CCRC_SS.py create mode 100644 Analysis/Code/P1_NARCliM_plots_Windows.py diff --git a/Analysis/Code/P1_NARCliM_NC_to_CSV_CCRC_SS.py b/Analysis/Code/P1_NARCliM_NC_to_CSV_CCRC_SS.py new file mode 100644 index 0000000..493b555 --- /dev/null +++ b/Analysis/Code/P1_NARCliM_NC_to_CSV_CCRC_SS.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +from netCDF4 import * +import numpy as np +import os +import pandas as pd +import glob +import matplotlib +import matplotlib.pyplot as plt +from datetime import datetime +from datetime import timedelta +import argparse +import time +# +# Set working direcotry (where postprocessed NARClIM data is located) +os.chdir('/srv/ccrc/data30/z3393020/NARCliM/postprocess/') +# +#User input for location and variable type - from command line +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--lat", help="first number") + parser.add_argument("--lon", help="second number") + parser.add_argument("--varName", help="operation") + parser.add_argument("--timestep", help="operation") + parser.add_argument("--domain", help="operation") + args = parser.parse_args() + print(args.lat) + print(args.lon) + print(args.varName) + mylat= float(args.lat) + mylon= float(args.lon) + Clim_var_type = args.varName + NC_Domain = args.domain + Timestep = args.timestep + print("Extracting all NARCLIM time series for variable: ", Clim_var_type, " for lat lon: ", mylat, mylon, "domain", NC_Domain, "timestep ", Timestep) +#set directory path for output files +output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out' +#output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted' +if not os.path.exists(output_directory): + os.makedirs(output_directory) + print("output directory folder didn't exist and was generated here:") + print(output_directory) +# +time.sleep(10) +#manual input via script +#mylat= -33.9608578 +#mylon= 151.1339882 +#Clim_var_type = 'pr1Hmaxtstep' +#NC_Domain = 'd02' +# +#set up the loop variables for interrogating the entire NARCLIM raw data +NC_Periods = ('1950-2009','2020-2039','2060-2079') +# +#Define empty pandas data frames +Full_df = pd.DataFrame() +GCM_df = pd.DataFrame() +R13_df = pd.DataFrame() +MultiNC_df = pd.DataFrame() +# +#Loop through models and construct CSV per site +for NC_Period in NC_Periods: + Period_short = NC_Period[:4] + GCMs = os.listdir('./'+ NC_Period) + for GCM in GCMs: + Warf_runs = os.listdir('./' + NC_Period + '/' + GCM + '/') + for Warf_run in Warf_runs: + Current_input_dir = './' + NC_Period + '/' + GCM + '/' + Warf_run + '/' + NC_Domain + '/' + print Current_input_dir + Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type + '.nc' + Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn) + #Climvar_NCs = Climvar_NCs[0:2] + #print(Climvar_NCs) + for netcdf in Climvar_NCs: + f=Dataset(netcdf) + # Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file + dist_x=np.abs(f.variables['lon'][:,:]-float(mylon)) + dist_y=np.abs(f.variables['lat'][:,:]-float(mylat)) + dist=dist_x + dist_y + latindex=np.where(dist_y==np.min(dist_y)) + lonindex=np.where(dist_x==np.min(dist_x)) + index=np.where(dist==np.min(dist)) + print '---------------------------------------------------------' + print netcdf + print 'Information on the nearest point' + print 'Your desired lat,lon = ',mylat,mylon + print 'The nearest lat,lon = ', f.variables['lat'][latindex[0],latindex[1]], f.variables['lon'][lonindex[0],lonindex[1]] + print 'The index of the nearest lat,lon (x,y) = ',index[0], index[1] + #Here we constract a pandas data frame, having the "time"/day as an index and a numer of variables (i.e. Clim_var_type, pracc, as columns) + d={} + #d["time"] = f.variables['time'][:] + d[ GCM +'_'+ Warf_run +'_'+ Period_short] = f.variables[Clim_var_type][:, int(index[0]), int(index[1])] + #if GCM == 'NNRP' and Warf_run == 'R1': + # d['Period']= NC_Period + timestamp = f.variables['time'][:] + timestamp_dates = pd.to_datetime(timestamp, unit='h', origin=pd.Timestamp('1949-12-01')) + df1=pd.DataFrame(d, index=timestamp_dates) + f.close() + print 'closing '+ os.path.basename(os.path.normpath(netcdf)) + ' moving to next netcdf file' + print '---------------------------------------------------------' + #append in time direction each new time series to the data frame + MultiNC_df = MultiNC_df.append(df1) + #append in columns direction individual GCM-RCM-123 run time series (along x axis) + MultiNC_df = MultiNC_df.sort_index(axis=0, ascending=True) + R13_df = pd.concat([R13_df, MultiNC_df], axis=1) + MultiNC_df =pd.DataFrame() + #append blocks of R1 R2 and R3 in x axis direction + R13_df = R13_df.sort_index(axis=0, ascending=True) + GCM_df = pd.concat([GCM_df, R13_df], axis=1) + R13_df = pd.DataFrame() + #append time periods in x axis direction (change axis=1 to =0 if periods for same model should be added to same model R123 column) + GCM_df = GCM_df.sort_index(axis=0, ascending=True) + Full_df = pd.concat([Full_df, GCM_df], axis=1) + GCM_df = pd.DataFrame() +Full_df = Full_df.sort_index(axis=0, ascending=True) +#adding a column with the NARCLIM decade +Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009' +Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039' +Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079' +#export the pandas data frame as a CSV file within the output directory +out_file_name = Clim_var_type + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '_NARCliM_summary.csv' +out_path = output_directory +'/' + out_file_name +Full_df.to_csv(out_path) \ No newline at end of file diff --git a/Analysis/Code/P1_NARCliM_plots_Windows.py b/Analysis/Code/P1_NARCliM_plots_Windows.py new file mode 100644 index 0000000..e71abf1 --- /dev/null +++ b/Analysis/Code/P1_NARCliM_plots_Windows.py @@ -0,0 +1,198 @@ +# -*- coding: utf-8 -*- +#####################################---------------------------------- +#Last Updated - March 2018 +#@author: z5025317 Valentin Heimhuber +#code for creating climate prioritization plots for NARCLIM variables. +#Inputs: Uses CSV files that contain all 12 NARCLIM model runs time series for 1 grid cell created with: P1_NARCliM_NC_to_CSV_CCRC_SS.py +#####################################---------------------------------- +#Load packages +#####################################---------------------------------- +import numpy as np +import os +import pandas as pd +import glob +import matplotlib +import matplotlib.pyplot as plt +from datetime import datetime +from datetime import timedelta +from matplotlib.backends.backend_pdf import PdfPages +matplotlib.style.use('ggplot') +# +# Set working direcotry (where postprocessed NARClIM data is located) +os.chdir('C:/Users/z5025317/WRL_Postdoc/Projects/Paper#1/NARCLIM/') +# +# +#####################################---------------------------------- +#set input parameters +Base_period_start = '1990-01-01' +Base_period_end = '2080-01-01' #use last day that's not included in period as < is used for subsetting +#Clim_var_type = 'tasmean' will create pdf for all variables in folder +#####################################---------------------------------- +#set directory path for output files +output_directory = 'Output' +#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted' +if not os.path.exists(output_directory): + os.makedirs(output_directory) + print("output directory folder didn't exist and was generated") +Clim_Var_CSVs = glob.glob('./Site_CSVs/*') +#Clim_Var_CSV = glob.glob('./Site_CSVs/' + Clim_var_type + '*' ) +#read CSV file +for clim_var_csv_path in Clim_Var_CSVs: + Filename = os.path.basename(os.path.normpath(clim_var_csv_path)) + Clim_var_type = Filename.split('_', 1)[0] + print(clim_var_csv_path) + Full_df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True) + #pandas datestamp index to period (we don't need the 12 pm info in the index (daily periods are enough)) + Full_df.index = Full_df.index.to_period('D') + #check data types of columns + #Full_df.dtypes + + #substract a constant from all values (temp) + if Clim_var_type == 'tasmean': + Full_df = Full_df.iloc[:,0:26]-273.15 + + #index the narclim periods if needed + #Full_df.loc[(Full_df.index >= '1990-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009' + #Full_df.loc[(Full_df.index >= '2020-01-01') & (Full_df.index < '2040-01-01'), 'period']= '2020-2039' + #Full_df.loc[(Full_df.index >= '2060-01-01') & (Full_df.index < '2080-01-01'), 'period']= '2060-2079' + + #Subset the data to the minimum base period and above (used to set the lenght of the present day climate period) + Fdf_1900_2080 = Full_df.loc[(Full_df.index >= Base_period_start) & (Full_df.index < Base_period_end)] + #Aggregate daily df to annual time series + if (Clim_var_type == 'pracc' or Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean' + or Clim_var_type == 'pr1Hmaxtstep' or Clim_var_type == 'wss1Hmaxtstep'): + Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').sum() + Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan) + Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').sum() + Fdf_1900_2080_monthly = Fdf_1900_2080_monthly.replace(0, np.nan) + Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').sum() + Fdf_1900_2080_weekly = Fdf_1900_2080_weekly.replace(0, np.nan) + Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').sum() #seasonal means + Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan) + else: + Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').mean() + Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').mean() + Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').mean() + Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').mean() #seasonal means + #plot the mean of all model runs + print('-------------------------------------------') + print('mean of all models for climate variable: ' + Clim_var_type) + Fdf_1900_2080_means = Fdf_1900_2080.mean() + Fdf_1900_2080_means.plot(kind='bar', ylim=(16,22)).figure + print('-------------------------------------------') + + #time series plots: + #ranks = Fdf_1900_2080_means[3:].rank(axis=0) + df3 = Fdf_1900_2080_annual + Fdf_annual_sorted = df3.reindex_axis(df3.mean().sort_values().index, axis=1) + Fdf_annual_sorted_subset = Fdf_annual_sorted.iloc[:,[0,1,2,3,5,7,13,15,18, 22, 24,25]] + + #write the key plots to a single pdf document + pdf_out_file_name = Clim_var_type + '_start_' + Base_period_start + '_NARCliM_summary4.pdf' + pdf_out_path = output_directory +'/' + pdf_out_file_name + #open pdf and add the plots + + #developement + #I want to create a simple density plot for all values in present day, near future and far future + #this should be an easy indicator of change + #subset present day simulations + Full_current_df = Fdf_1900_2080.iloc[:,[0,1,2]] + Full_current_df = Full_current_df.stack() + #nearfuture + Full_nearfuture_df = Fdf_1900_2080.iloc[:,range(3,15)] + Full_nearfuture_df = Full_nearfuture_df.stack() + #farfuture + Full_farfuture_df = Fdf_1900_2080.iloc[:,range(15,27)] + Full_farfuture_df = Full_farfuture_df.stack() + Summarized_df = pd.concat([Full_current_df, Full_nearfuture_df], axis=1, ignore_index=True) + Summarized_df = pd.concat([Summarized_df, Full_farfuture_df], axis=1, ignore_index=True) + Summarized_df.columns = ['presentday', 'nearfuture', 'farfuture'] + #end of developement + with PdfPages(pdf_out_path) as pdf: + #barplot of model means + plt.title(Clim_var_type + ' - model means - full period') + ymin = min(Fdf_1900_2080_means) + ymax = max(Fdf_1900_2080_means) + Fdf_1900_2080_means.plot(kind='bar', ylim=(ymin,ymax)) + Fdf_1900_2080_means.plot(kind='bar') + pdf.savefig(bbox_inches='tight', pad_inches=0.4) + plt.close() + #full period density comparison + plt.title(Clim_var_type + ' - density comparison - full period') + Summarized_df.plot.kde() + pdf.savefig(bbox_inches='tight', pad_inches=0.4) + plt.close() + #annual box + plt.title(Clim_var_type + ' - Annual means') + Fdf_1900_2080_annual.boxplot(rot=90) + pdf.savefig(bbox_inches='tight', pad_inches=0.4) + plt.close() + #monthly box + plt.title(Clim_var_type + ' - Monthly means') + Fdf_1900_2080_monthly.boxplot(rot=90) + pdf.savefig(bbox_inches='tight', pad_inches=0.4) + plt.close() + #weekly box + plt.title(Clim_var_type + ' - Weekly means') + Fdf_1900_2080_weekly.boxplot(rot=90) + pdf.savefig(bbox_inches='tight', pad_inches=0.4) + plt.close() + #daily box + plt.title(Clim_var_type + ' - Daily means') + Fdf_1900_2080.boxplot(rot=90) + pdf.savefig(bbox_inches='tight', pad_inches=0.4) + plt.close() + # time series plot annual ALL models + plt.title(Clim_var_type + ' - Time series - all models') + Fdf_1900_2080_annual.plot(legend=False) + pdf.savefig(bbox_inches='tight', pad_inches=0.4) + plt.close() + # + plt.title(Clim_var_type + ' - Time series - representative models') + Fdf_annual_sorted_subset.plot(legend=False) + pdf.savefig(bbox_inches='tight', pad_inches=0.4) + plt.close() + # seasonal mean boxplots + ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) + ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) + plt.title(Clim_var_type + ' - DJF Summer means') + Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean().plot(kind='bar', ylim=(ymin,ymax)) + pdf.savefig(bbox_inches='tight', pad_inches=0.4) + plt.close() + plt.title(Clim_var_type + ' - DJF Summer means') + Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].boxplot(rot=90) + pdf.savefig(bbox_inches='tight', pad_inches=0.4) + plt.close() + ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()) + ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()) + plt.title(Clim_var_type + ' - MAM Autumn means') + Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean().plot(kind='bar', ylim=(ymin,ymax)) + pdf.savefig(bbox_inches='tight', pad_inches=0.4) + plt.close() + plt.title(Clim_var_type + ' - MAM Autumn means') + Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].boxplot(rot=90) + pdf.savefig(bbox_inches='tight', pad_inches=0.4) + plt.close() + ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()) + ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()) + plt.title(Clim_var_type + ' - JJA Winter means') + Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean().plot(kind='bar', ylim=(ymin,ymax)) + pdf.savefig(bbox_inches='tight', pad_inches=0.4) + plt.close() + plt.title(Clim_var_type + ' - JJA Winter means') + Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].boxplot(rot=90) + pdf.savefig(bbox_inches='tight', pad_inches=0.4) + plt.close() + ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()) + ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()) + plt.title(Clim_var_type + ' - SON Spring means') + Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean().plot(kind='bar', ylim=(ymin,ymax)) + pdf.savefig(bbox_inches='tight', pad_inches=0.4) + plt.close() + plt.title(Clim_var_type + ' - SON Spring means') + Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].boxplot(rot=90) + pdf.savefig(bbox_inches='tight', pad_inches=0.4) + plt.close() + +#plots not used +#Fdf_annual_sorted_subset.plot(legend=False, subplots=True)