#principal python codes for a) NARCLIM interrogation on CCRC STORM servers (Linux) where we go from netcdf to a csv file for single locations. and b)
b) code for creating some variability shift plots from the CSV files which is done locallymaster
						commit
						4bd8d40c6d
					
				| @ -0,0 +1,121 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from netCDF4 import * | ||||
| import numpy as np | ||||
| import os | ||||
| import pandas as pd | ||||
| import glob | ||||
| import matplotlib | ||||
| import matplotlib.pyplot as plt | ||||
| from datetime import datetime | ||||
| from datetime import timedelta | ||||
| import argparse | ||||
| import time | ||||
| # | ||||
| # Set working direcotry (where postprocessed NARClIM data is located) | ||||
| os.chdir('/srv/ccrc/data30/z3393020/NARCliM/postprocess/') | ||||
| # | ||||
| #User input for location and variable type - from command line | ||||
| if __name__ == "__main__": | ||||
|     parser = argparse.ArgumentParser() | ||||
|     parser.add_argument("--lat", help="first number") | ||||
|     parser.add_argument("--lon", help="second number") | ||||
|     parser.add_argument("--varName", help="operation") | ||||
|     parser.add_argument("--timestep", help="operation") | ||||
|     parser.add_argument("--domain", help="operation") | ||||
|     args = parser.parse_args() | ||||
|     print(args.lat) | ||||
|     print(args.lon) | ||||
|     print(args.varName) | ||||
|     mylat= float(args.lat) | ||||
|     mylon= float(args.lon) | ||||
|     Clim_var_type = args.varName | ||||
|     NC_Domain = args.domain | ||||
|     Timestep = args.timestep | ||||
|     print("Extracting all NARCLIM time series for variable: ", Clim_var_type, " for lat lon: ", mylat, mylon, "domain", NC_Domain, "timestep ", Timestep) | ||||
| #set directory path for output files | ||||
| output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out' | ||||
| #output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted' | ||||
| if not os.path.exists(output_directory): | ||||
|     os.makedirs(output_directory) | ||||
|     print("output directory folder didn't exist and was generated here:") | ||||
|     print(output_directory) | ||||
| # | ||||
| time.sleep(10)  | ||||
| #manual input via script | ||||
| #mylat= -33.9608578 | ||||
| #mylon= 151.1339882 | ||||
| #Clim_var_type = 'pr1Hmaxtstep' | ||||
| #NC_Domain = 'd02' | ||||
| # | ||||
| #set up the loop variables for interrogating the entire NARCLIM raw data | ||||
| NC_Periods = ('1950-2009','2020-2039','2060-2079') | ||||
| # | ||||
| #Define empty pandas data frames  | ||||
| Full_df = pd.DataFrame() | ||||
| GCM_df = pd.DataFrame() | ||||
| R13_df = pd.DataFrame() | ||||
| MultiNC_df = pd.DataFrame() | ||||
| # | ||||
| #Loop through models and construct CSV per site | ||||
| for NC_Period in NC_Periods: | ||||
|     Period_short = NC_Period[:4] | ||||
|     GCMs = os.listdir('./'+ NC_Period) | ||||
|     for GCM in GCMs: | ||||
|         Warf_runs = os.listdir('./' + NC_Period + '/' + GCM + '/') | ||||
|         for Warf_run in Warf_runs: | ||||
|             Current_input_dir = './' + NC_Period + '/' + GCM + '/' + Warf_run + '/' + NC_Domain + '/' | ||||
|             print Current_input_dir | ||||
|             Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type + '.nc' | ||||
|             Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn) | ||||
|             #Climvar_NCs = Climvar_NCs[0:2] | ||||
|             #print(Climvar_NCs) | ||||
|             for netcdf in Climvar_NCs: | ||||
|                 f=Dataset(netcdf) | ||||
|                 # Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file | ||||
|                 dist_x=np.abs(f.variables['lon'][:,:]-float(mylon)) | ||||
|                 dist_y=np.abs(f.variables['lat'][:,:]-float(mylat)) | ||||
|                 dist=dist_x + dist_y | ||||
|                 latindex=np.where(dist_y==np.min(dist_y)) | ||||
|                 lonindex=np.where(dist_x==np.min(dist_x)) | ||||
|                 index=np.where(dist==np.min(dist)) | ||||
|                 print '---------------------------------------------------------' | ||||
|                 print  netcdf | ||||
|                 print 'Information on the nearest point' | ||||
|                 print 'Your desired lat,lon = ',mylat,mylon | ||||
|                 print 'The nearest lat,lon = ', f.variables['lat'][latindex[0],latindex[1]], f.variables['lon'][lonindex[0],lonindex[1]] | ||||
|                 print 'The index of the nearest lat,lon (x,y) = ',index[0], index[1] | ||||
|                 #Here we constract a pandas data frame, having the "time"/day as an index and a numer of variables (i.e. Clim_var_type, pracc, as columns) | ||||
|                 d={} | ||||
|                 #d["time"] = f.variables['time'][:] | ||||
|                 d[ GCM +'_'+ Warf_run +'_'+ Period_short] = f.variables[Clim_var_type][:, int(index[0]), int(index[1])] | ||||
|                 #if GCM == 'NNRP' and Warf_run == 'R1': | ||||
|                 #    d['Period']= NC_Period | ||||
|                 timestamp = f.variables['time'][:] | ||||
|                 timestamp_dates = pd.to_datetime(timestamp, unit='h', origin=pd.Timestamp('1949-12-01')) | ||||
|                 df1=pd.DataFrame(d, index=timestamp_dates) | ||||
|                 f.close() | ||||
|                 print 'closing  '+ os.path.basename(os.path.normpath(netcdf)) + '  moving to next netcdf file' | ||||
|                 print '---------------------------------------------------------' | ||||
|                 #append in time direction each new time series to the data frame | ||||
|                 MultiNC_df = MultiNC_df.append(df1) | ||||
|             #append in columns direction individual GCM-RCM-123 run time series (along x axis)  | ||||
|             MultiNC_df = MultiNC_df.sort_index(axis=0, ascending=True) | ||||
|             R13_df = pd.concat([R13_df, MultiNC_df], axis=1) | ||||
|             MultiNC_df =pd.DataFrame() | ||||
|         #append blocks of R1 R2 and R3 in x axis direction  | ||||
|         R13_df = R13_df.sort_index(axis=0, ascending=True) | ||||
|         GCM_df = pd.concat([GCM_df, R13_df], axis=1) | ||||
|         R13_df = pd.DataFrame() | ||||
|     #append time periods in x axis direction (change axis=1 to =0 if periods for same model should be added to same model R123 column) | ||||
|     GCM_df = GCM_df.sort_index(axis=0, ascending=True) | ||||
|     Full_df = pd.concat([Full_df, GCM_df], axis=1) | ||||
|     GCM_df = pd.DataFrame() | ||||
| Full_df = Full_df.sort_index(axis=0, ascending=True) | ||||
| #adding a column with the NARCLIM decade | ||||
| Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009' | ||||
| Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039' | ||||
| Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079' | ||||
| #export the pandas data frame as a CSV file within the output directory | ||||
| out_file_name = Clim_var_type + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '_NARCliM_summary.csv' | ||||
| out_path = output_directory +'/' + out_file_name | ||||
| Full_df.to_csv(out_path) | ||||
| @ -0,0 +1,198 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| #####################################---------------------------------- | ||||
| #Last Updated - March 2018 | ||||
| #@author: z5025317 Valentin Heimhuber | ||||
| #code for creating climate prioritization plots for NARCLIM variables.  | ||||
| #Inputs: Uses CSV files that contain all 12 NARCLIM model runs time series for 1 grid cell created with: P1_NARCliM_NC_to_CSV_CCRC_SS.py | ||||
| #####################################---------------------------------- | ||||
| #Load packages | ||||
| #####################################---------------------------------- | ||||
| import numpy as np | ||||
| import os | ||||
| import pandas as pd | ||||
| import glob | ||||
| import matplotlib | ||||
| import matplotlib.pyplot as plt | ||||
| from datetime import datetime | ||||
| from datetime import timedelta | ||||
| from matplotlib.backends.backend_pdf import PdfPages | ||||
| matplotlib.style.use('ggplot') | ||||
| # | ||||
| # Set working direcotry (where postprocessed NARClIM data is located) | ||||
| os.chdir('C:/Users/z5025317/WRL_Postdoc/Projects/Paper#1/NARCLIM/') | ||||
| # | ||||
| # | ||||
| #####################################---------------------------------- | ||||
| #set input parameters | ||||
| Base_period_start = '1990-01-01' | ||||
| Base_period_end = '2080-01-01' #use last day that's not included in period as < is used for subsetting | ||||
| #Clim_var_type  = 'tasmean' will create pdf for all variables in folder            | ||||
| #####################################---------------------------------- | ||||
| #set directory path for output files | ||||
| output_directory = 'Output' | ||||
| #output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted' | ||||
| if not os.path.exists(output_directory): | ||||
|     os.makedirs(output_directory) | ||||
|     print("output directory folder didn't exist and was generated") | ||||
| Clim_Var_CSVs = glob.glob('./Site_CSVs/*') | ||||
| #Clim_Var_CSV = glob.glob('./Site_CSVs/' + Clim_var_type + '*' ) | ||||
| #read CSV file | ||||
| for clim_var_csv_path in Clim_Var_CSVs: | ||||
|     Filename = os.path.basename(os.path.normpath(clim_var_csv_path)) | ||||
|     Clim_var_type = Filename.split('_', 1)[0] | ||||
|     print(clim_var_csv_path) | ||||
|     Full_df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True) | ||||
|     #pandas datestamp index to period (we don't need the 12 pm info in the index (daily periods are enough)) | ||||
|     Full_df.index = Full_df.index.to_period('D') | ||||
|     #check data types of columns | ||||
|     #Full_df.dtypes | ||||
|      | ||||
|     #substract a constant from all values (temp) | ||||
|     if Clim_var_type == 'tasmean': | ||||
|         Full_df = Full_df.iloc[:,0:26]-273.15 | ||||
|      | ||||
|     #index the narclim periods if needed | ||||
|     #Full_df.loc[(Full_df.index >= '1990-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009' | ||||
|     #Full_df.loc[(Full_df.index >= '2020-01-01') & (Full_df.index < '2040-01-01'), 'period']= '2020-2039' | ||||
|     #Full_df.loc[(Full_df.index >= '2060-01-01') & (Full_df.index < '2080-01-01'), 'period']= '2060-2079' | ||||
|      | ||||
|     #Subset the data to the minimum base period and above (used to set the lenght of the present day climate period) | ||||
|     Fdf_1900_2080 = Full_df.loc[(Full_df.index >= Base_period_start) & (Full_df.index < Base_period_end)] | ||||
|     #Aggregate daily df to annual time series | ||||
|     if (Clim_var_type == 'pracc' or Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean'  | ||||
|     or Clim_var_type == 'pr1Hmaxtstep' or Clim_var_type == 'wss1Hmaxtstep'): | ||||
|         Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').sum() | ||||
|         Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan) | ||||
|         Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').sum() | ||||
|         Fdf_1900_2080_monthly = Fdf_1900_2080_monthly.replace(0, np.nan) | ||||
|         Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').sum() | ||||
|         Fdf_1900_2080_weekly = Fdf_1900_2080_weekly.replace(0, np.nan) | ||||
|         Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').sum() #seasonal means | ||||
|         Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan) | ||||
|     else:     | ||||
|         Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').mean() | ||||
|         Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').mean() | ||||
|         Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').mean() | ||||
|         Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').mean() #seasonal means | ||||
|     #plot the mean of all model runs | ||||
|     print('-------------------------------------------') | ||||
|     print('mean of all models for climate variable:  ' + Clim_var_type) | ||||
|     Fdf_1900_2080_means = Fdf_1900_2080.mean() | ||||
|     Fdf_1900_2080_means.plot(kind='bar', ylim=(16,22)).figure | ||||
|     print('-------------------------------------------') | ||||
|      | ||||
|     #time series plots: | ||||
|     #ranks = Fdf_1900_2080_means[3:].rank(axis=0) | ||||
|     df3 = Fdf_1900_2080_annual | ||||
|     Fdf_annual_sorted = df3.reindex_axis(df3.mean().sort_values().index, axis=1) | ||||
|     Fdf_annual_sorted_subset = Fdf_annual_sorted.iloc[:,[0,1,2,3,5,7,13,15,18, 22, 24,25]] | ||||
|      | ||||
|     #write the key plots to a single pdf document | ||||
|     pdf_out_file_name = Clim_var_type + '_start_' + Base_period_start + '_NARCliM_summary4.pdf' | ||||
|     pdf_out_path = output_directory +'/' + pdf_out_file_name | ||||
|     #open pdf and add the plots | ||||
|      | ||||
|     #developement | ||||
|     #I want to create a simple density plot for all values in present day, near future and far future | ||||
|     #this should be an easy indicator of change | ||||
|     #subset present day simulations | ||||
|     Full_current_df = Fdf_1900_2080.iloc[:,[0,1,2]] | ||||
|     Full_current_df = Full_current_df.stack() | ||||
|     #nearfuture | ||||
|     Full_nearfuture_df = Fdf_1900_2080.iloc[:,range(3,15)] | ||||
|     Full_nearfuture_df = Full_nearfuture_df.stack() | ||||
|     #farfuture | ||||
|     Full_farfuture_df = Fdf_1900_2080.iloc[:,range(15,27)] | ||||
|     Full_farfuture_df = Full_farfuture_df.stack() | ||||
|     Summarized_df = pd.concat([Full_current_df, Full_nearfuture_df], axis=1, ignore_index=True) | ||||
|     Summarized_df = pd.concat([Summarized_df, Full_farfuture_df], axis=1, ignore_index=True) | ||||
|     Summarized_df.columns = ['presentday', 'nearfuture', 'farfuture'] | ||||
|     #end of developement | ||||
|     with PdfPages(pdf_out_path) as pdf: | ||||
|         #barplot of model means | ||||
|         plt.title(Clim_var_type + ' -  model means - full period') | ||||
|         ymin = min(Fdf_1900_2080_means)  | ||||
|         ymax = max(Fdf_1900_2080_means)  | ||||
|         Fdf_1900_2080_means.plot(kind='bar', ylim=(ymin,ymax)) | ||||
|         Fdf_1900_2080_means.plot(kind='bar') | ||||
|         pdf.savefig(bbox_inches='tight', pad_inches=0.4) | ||||
|         plt.close() | ||||
|         #full period density comparison | ||||
|         plt.title(Clim_var_type + ' -  density comparison - full period') | ||||
|         Summarized_df.plot.kde() | ||||
|         pdf.savefig(bbox_inches='tight', pad_inches=0.4) | ||||
|         plt.close() | ||||
|         #annual box | ||||
|         plt.title(Clim_var_type + ' -  Annual means') | ||||
|         Fdf_1900_2080_annual.boxplot(rot=90) | ||||
|         pdf.savefig(bbox_inches='tight', pad_inches=0.4) | ||||
|         plt.close() | ||||
|         #monthly box | ||||
|         plt.title(Clim_var_type + ' -  Monthly means') | ||||
|         Fdf_1900_2080_monthly.boxplot(rot=90) | ||||
|         pdf.savefig(bbox_inches='tight', pad_inches=0.4) | ||||
|         plt.close() | ||||
|         #weekly box | ||||
|         plt.title(Clim_var_type + ' -  Weekly means') | ||||
|         Fdf_1900_2080_weekly.boxplot(rot=90) | ||||
|         pdf.savefig(bbox_inches='tight', pad_inches=0.4) | ||||
|         plt.close() | ||||
|         #daily box | ||||
|         plt.title(Clim_var_type + ' -  Daily means') | ||||
|         Fdf_1900_2080.boxplot(rot=90) | ||||
|         pdf.savefig(bbox_inches='tight', pad_inches=0.4) | ||||
|         plt.close() | ||||
|         # time series plot annual ALL models | ||||
|         plt.title(Clim_var_type + ' -  Time series - all models') | ||||
|         Fdf_1900_2080_annual.plot(legend=False) | ||||
|         pdf.savefig(bbox_inches='tight', pad_inches=0.4) | ||||
|         plt.close() | ||||
|         # | ||||
|         plt.title(Clim_var_type + ' -  Time series - representative models') | ||||
|         Fdf_annual_sorted_subset.plot(legend=False) | ||||
|         pdf.savefig(bbox_inches='tight', pad_inches=0.4) | ||||
|         plt.close() | ||||
|         # seasonal mean boxplots | ||||
|         ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean())  | ||||
|         ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean())  | ||||
|         plt.title(Clim_var_type + ' -  DJF Summer means') | ||||
|         Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean().plot(kind='bar', ylim=(ymin,ymax)) | ||||
|         pdf.savefig(bbox_inches='tight', pad_inches=0.4) | ||||
|         plt.close()   | ||||
|         plt.title(Clim_var_type + ' -  DJF Summer means') | ||||
|         Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].boxplot(rot=90) | ||||
|         pdf.savefig(bbox_inches='tight', pad_inches=0.4) | ||||
|         plt.close() | ||||
|         ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean())  | ||||
|         ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()) | ||||
|         plt.title(Clim_var_type + ' -  MAM Autumn means') | ||||
|         Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean().plot(kind='bar', ylim=(ymin,ymax)) | ||||
|         pdf.savefig(bbox_inches='tight', pad_inches=0.4) | ||||
|         plt.close()  | ||||
|         plt.title(Clim_var_type + ' -  MAM Autumn means') | ||||
|         Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].boxplot(rot=90) | ||||
|         pdf.savefig(bbox_inches='tight', pad_inches=0.4) | ||||
|         plt.close() | ||||
|         ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean())  | ||||
|         ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()) | ||||
|         plt.title(Clim_var_type + ' -  JJA Winter means') | ||||
|         Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean().plot(kind='bar', ylim=(ymin,ymax)) | ||||
|         pdf.savefig(bbox_inches='tight', pad_inches=0.4) | ||||
|         plt.close()  | ||||
|         plt.title(Clim_var_type + ' -  JJA Winter means') | ||||
|         Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].boxplot(rot=90) | ||||
|         pdf.savefig(bbox_inches='tight', pad_inches=0.4) | ||||
|         plt.close() | ||||
|         ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean())  | ||||
|         ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()) | ||||
|         plt.title(Clim_var_type + ' -  SON Spring means') | ||||
|         Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean().plot(kind='bar', ylim=(ymin,ymax)) | ||||
|         pdf.savefig(bbox_inches='tight', pad_inches=0.4) | ||||
|         plt.close()  | ||||
|         plt.title(Clim_var_type + ' -  SON Spring means') | ||||
|         Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].boxplot(rot=90) | ||||
|         pdf.savefig(bbox_inches='tight', pad_inches=0.4) | ||||
|         plt.close() | ||||
| 
 | ||||
| #plots not used   | ||||
| #Fdf_annual_sorted_subset.plot(legend=False, subplots=True) | ||||
					Loading…
					
					
				
		Reference in New Issue