#principal python codes for a) NARCLIM interrogation on CCRC STORM servers (Linux) where we go from netcdf to a csv file for single locations. and b)

b) code for creating some variability shift plots from the CSV files which is done locally
8 years ago · 4bd8d40c6d
commit 4bd8d40c6d
2 changed files with 319 additions and 0 deletions
--- a/Analysis/Code/P1_NARCliM_NC_to_CSV_CCRC_SS.py
+++ b/Analysis/Code/P1_NARCliM_NC_to_CSV_CCRC_SS.py
@ -0,0 +1,121 @@
+# -*- coding: utf-8 -*-
+from netCDF4 import *
+import numpy as np
+import os
+import pandas as pd
+import glob
+import matplotlib
+import matplotlib.pyplot as plt
+from datetime import datetime
+from datetime import timedelta
+import argparse
+import time
+#
+# Set working direcotry (where postprocessed NARClIM data is located)
+os.chdir('/srv/ccrc/data30/z3393020/NARCliM/postprocess/')
+#
+#User input for location and variable type - from command line
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--lat", help="first number")
+    parser.add_argument("--lon", help="second number")
+    parser.add_argument("--varName", help="operation")
+    parser.add_argument("--timestep", help="operation")
+    parser.add_argument("--domain", help="operation")
+    args = parser.parse_args()
+    print(args.lat)
+    print(args.lon)
+    print(args.varName)
+    mylat= float(args.lat)
+    mylon= float(args.lon)
+    Clim_var_type = args.varName
+    NC_Domain = args.domain
+    Timestep = args.timestep
+    print("Extracting all NARCLIM time series for variable: ", Clim_var_type, " for lat lon: ", mylat, mylon, "domain", NC_Domain, "timestep ", Timestep)
+#set directory path for output files
+output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out'
+#output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted'
+if not os.path.exists(output_directory):
+    os.makedirs(output_directory)
+    print("output directory folder didn't exist and was generated here:")
+    print(output_directory)
+#
+time.sleep(10) 
+#manual input via script
+#mylat= -33.9608578
+#mylon= 151.1339882
+#Clim_var_type = 'pr1Hmaxtstep'
+#NC_Domain = 'd02'
+#
+#set up the loop variables for interrogating the entire NARCLIM raw data
+NC_Periods = ('1950-2009','2020-2039','2060-2079')
+#
+#Define empty pandas data frames 
+Full_df = pd.DataFrame()
+GCM_df = pd.DataFrame()
+R13_df = pd.DataFrame()
+MultiNC_df = pd.DataFrame()
+#
+#Loop through models and construct CSV per site
+for NC_Period in NC_Periods:
+    Period_short = NC_Period[:4]
+    GCMs = os.listdir('./'+ NC_Period)
+    for GCM in GCMs:
+        Warf_runs = os.listdir('./' + NC_Period + '/' + GCM + '/')
+        for Warf_run in Warf_runs:
+            Current_input_dir = './' + NC_Period + '/' + GCM + '/' + Warf_run + '/' + NC_Domain + '/'
+            print Current_input_dir
+            Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type + '.nc'
+            Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn)
+            #Climvar_NCs = Climvar_NCs[0:2]
+            #print(Climvar_NCs)
+            for netcdf in Climvar_NCs:
+                f=Dataset(netcdf)
+                # Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file
+                dist_x=np.abs(f.variables['lon'][:,:]-float(mylon))
+                dist_y=np.abs(f.variables['lat'][:,:]-float(mylat))
+                dist=dist_x + dist_y
+                latindex=np.where(dist_y==np.min(dist_y))
+                lonindex=np.where(dist_x==np.min(dist_x))
+                index=np.where(dist==np.min(dist))
+                print '---------------------------------------------------------'
+                print  netcdf
+                print 'Information on the nearest point'
+                print 'Your desired lat,lon = ',mylat,mylon
+                print 'The nearest lat,lon = ', f.variables['lat'][latindex[0],latindex[1]], f.variables['lon'][lonindex[0],lonindex[1]]
+                print 'The index of the nearest lat,lon (x,y) = ',index[0], index[1]
+                #Here we constract a pandas data frame, having the "time"/day as an index and a numer of variables (i.e. Clim_var_type, pracc, as columns)
+                d={}
+                #d["time"] = f.variables['time'][:]
+                d[ GCM +'_'+ Warf_run +'_'+ Period_short] = f.variables[Clim_var_type][:, int(index[0]), int(index[1])]
+                #if GCM == 'NNRP' and Warf_run == 'R1':
+                #    d['Period']= NC_Period
+                timestamp = f.variables['time'][:]
+                timestamp_dates = pd.to_datetime(timestamp, unit='h', origin=pd.Timestamp('1949-12-01'))
+                df1=pd.DataFrame(d, index=timestamp_dates)
+                f.close()
+                print 'closing  '+ os.path.basename(os.path.normpath(netcdf)) + '  moving to next netcdf file'
+                print '---------------------------------------------------------'
+                #append in time direction each new time series to the data frame
+                MultiNC_df = MultiNC_df.append(df1)
+            #append in columns direction individual GCM-RCM-123 run time series (along x axis) 
+            MultiNC_df = MultiNC_df.sort_index(axis=0, ascending=True)
+            R13_df = pd.concat([R13_df, MultiNC_df], axis=1)
+            MultiNC_df =pd.DataFrame()
+        #append blocks of R1 R2 and R3 in x axis direction 
+        R13_df = R13_df.sort_index(axis=0, ascending=True)
+        GCM_df = pd.concat([GCM_df, R13_df], axis=1)
+        R13_df = pd.DataFrame()
+    #append time periods in x axis direction (change axis=1 to =0 if periods for same model should be added to same model R123 column)
+    GCM_df = GCM_df.sort_index(axis=0, ascending=True)
+    Full_df = pd.concat([Full_df, GCM_df], axis=1)
+    GCM_df = pd.DataFrame()
+Full_df = Full_df.sort_index(axis=0, ascending=True)
+#adding a column with the NARCLIM decade
+Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009'
+Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039'
+Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079'
+#export the pandas data frame as a CSV file within the output directory
+out_file_name = Clim_var_type + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '_NARCliM_summary.csv'
+out_path = output_directory +'/' + out_file_name
+Full_df.to_csv(out_path)
--- a/Analysis/Code/P1_NARCliM_plots_Windows.py
+++ b/Analysis/Code/P1_NARCliM_plots_Windows.py
@ -0,0 +1,198 @@
+# -*- coding: utf-8 -*-
+#####################################----------------------------------
+#Last Updated - March 2018
+#@author: z5025317 Valentin Heimhuber
+#code for creating climate prioritization plots for NARCLIM variables. 
+#Inputs: Uses CSV files that contain all 12 NARCLIM model runs time series for 1 grid cell created with: P1_NARCliM_NC_to_CSV_CCRC_SS.py
+#####################################----------------------------------
+#Load packages
+#####################################----------------------------------
+import numpy as np
+import os
+import pandas as pd
+import glob
+import matplotlib
+import matplotlib.pyplot as plt
+from datetime import datetime
+from datetime import timedelta
+from matplotlib.backends.backend_pdf import PdfPages
+matplotlib.style.use('ggplot')
+#
+# Set working direcotry (where postprocessed NARClIM data is located)
+os.chdir('C:/Users/z5025317/WRL_Postdoc/Projects/Paper#1/NARCLIM/')
+#
+#
+#####################################----------------------------------
+#set input parameters
+Base_period_start = '1990-01-01'
+Base_period_end = '2080-01-01' #use last day that's not included in period as < is used for subsetting
+#Clim_var_type  = 'tasmean' will create pdf for all variables in folder           
+#####################################----------------------------------
+#set directory path for output files
+output_directory = 'Output'
+#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
+if not os.path.exists(output_directory):
+    os.makedirs(output_directory)
+    print("output directory folder didn't exist and was generated")
+Clim_Var_CSVs = glob.glob('./Site_CSVs/*')
+#Clim_Var_CSV = glob.glob('./Site_CSVs/' + Clim_var_type + '*' )
+#read CSV file
+for clim_var_csv_path in Clim_Var_CSVs:
+    Filename = os.path.basename(os.path.normpath(clim_var_csv_path))
+    Clim_var_type = Filename.split('_', 1)[0]
+    print(clim_var_csv_path)
+    Full_df = pd.read_csv(clim_var_csv_path, index_col=0, parse_dates = True)
+    #pandas datestamp index to period (we don't need the 12 pm info in the index (daily periods are enough))
+    Full_df.index = Full_df.index.to_period('D')
+    #check data types of columns
+    #Full_df.dtypes
+    
+    #substract a constant from all values (temp)
+    if Clim_var_type == 'tasmean':
+        Full_df = Full_df.iloc[:,0:26]-273.15
+    
+    #index the narclim periods if needed
+    #Full_df.loc[(Full_df.index >= '1990-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009'
+    #Full_df.loc[(Full_df.index >= '2020-01-01') & (Full_df.index < '2040-01-01'), 'period']= '2020-2039'
+    #Full_df.loc[(Full_df.index >= '2060-01-01') & (Full_df.index < '2080-01-01'), 'period']= '2060-2079'
+    
+    #Subset the data to the minimum base period and above (used to set the lenght of the present day climate period)
+    Fdf_1900_2080 = Full_df.loc[(Full_df.index >= Base_period_start) & (Full_df.index < Base_period_end)]
+    #Aggregate daily df to annual time series
+    if (Clim_var_type == 'pracc' or Clim_var_type == 'evspsblmean' or Clim_var_type == 'potevpmean' 
+    or Clim_var_type == 'pr1Hmaxtstep' or Clim_var_type == 'wss1Hmaxtstep'):
+        Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').sum()
+        Fdf_1900_2080_annual = Fdf_1900_2080_annual.replace(0, np.nan)
+        Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').sum()
+        Fdf_1900_2080_monthly = Fdf_1900_2080_monthly.replace(0, np.nan)
+        Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').sum()
+        Fdf_1900_2080_weekly = Fdf_1900_2080_weekly.replace(0, np.nan)
+        Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').sum() #seasonal means
+        Fdf_Seas_means = Fdf_Seas_means.replace(0, np.nan)
+    else:    
+        Fdf_1900_2080_annual = Fdf_1900_2080.resample('A').mean()
+        Fdf_1900_2080_monthly = Fdf_1900_2080.resample('M').mean()
+        Fdf_1900_2080_weekly = Fdf_1900_2080.resample('W').mean()
+        Fdf_Seas_means = Fdf_1900_2080.resample('Q-NOV').mean() #seasonal means
+    #plot the mean of all model runs
+    print('-------------------------------------------')
+    print('mean of all models for climate variable:  ' + Clim_var_type)
+    Fdf_1900_2080_means = Fdf_1900_2080.mean()
+    Fdf_1900_2080_means.plot(kind='bar', ylim=(16,22)).figure
+    print('-------------------------------------------')
+    
+    #time series plots:
+    #ranks = Fdf_1900_2080_means[3:].rank(axis=0)
+    df3 = Fdf_1900_2080_annual
+    Fdf_annual_sorted = df3.reindex_axis(df3.mean().sort_values().index, axis=1)
+    Fdf_annual_sorted_subset = Fdf_annual_sorted.iloc[:,[0,1,2,3,5,7,13,15,18, 22, 24,25]]
+    
+    #write the key plots to a single pdf document
+    pdf_out_file_name = Clim_var_type + '_start_' + Base_period_start + '_NARCliM_summary4.pdf'
+    pdf_out_path = output_directory +'/' + pdf_out_file_name
+    #open pdf and add the plots
+    
+    #developement
+    #I want to create a simple density plot for all values in present day, near future and far future
+    #this should be an easy indicator of change
+    #subset present day simulations
+    Full_current_df = Fdf_1900_2080.iloc[:,[0,1,2]]
+    Full_current_df = Full_current_df.stack()
+    #nearfuture
+    Full_nearfuture_df = Fdf_1900_2080.iloc[:,range(3,15)]
+    Full_nearfuture_df = Full_nearfuture_df.stack()
+    #farfuture
+    Full_farfuture_df = Fdf_1900_2080.iloc[:,range(15,27)]
+    Full_farfuture_df = Full_farfuture_df.stack()
+    Summarized_df = pd.concat([Full_current_df, Full_nearfuture_df], axis=1, ignore_index=True)
+    Summarized_df = pd.concat([Summarized_df, Full_farfuture_df], axis=1, ignore_index=True)
+    Summarized_df.columns = ['presentday', 'nearfuture', 'farfuture']
+    #end of developement
+    with PdfPages(pdf_out_path) as pdf:
+        #barplot of model means
+        plt.title(Clim_var_type + ' -  model means - full period')
+        ymin = min(Fdf_1900_2080_means) 
+        ymax = max(Fdf_1900_2080_means) 
+        Fdf_1900_2080_means.plot(kind='bar', ylim=(ymin,ymax))
+        Fdf_1900_2080_means.plot(kind='bar')
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        plt.close()
+        #full period density comparison
+        plt.title(Clim_var_type + ' -  density comparison - full period')
+        Summarized_df.plot.kde()
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        plt.close()
+        #annual box
+        plt.title(Clim_var_type + ' -  Annual means')
+        Fdf_1900_2080_annual.boxplot(rot=90)
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        plt.close()
+        #monthly box
+        plt.title(Clim_var_type + ' -  Monthly means')
+        Fdf_1900_2080_monthly.boxplot(rot=90)
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        plt.close()
+        #weekly box
+        plt.title(Clim_var_type + ' -  Weekly means')
+        Fdf_1900_2080_weekly.boxplot(rot=90)
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        plt.close()
+        #daily box
+        plt.title(Clim_var_type + ' -  Daily means')
+        Fdf_1900_2080.boxplot(rot=90)
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        plt.close()
+        # time series plot annual ALL models
+        plt.title(Clim_var_type + ' -  Time series - all models')
+        Fdf_1900_2080_annual.plot(legend=False)
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        plt.close()
+        #
+        plt.title(Clim_var_type + ' -  Time series - representative models')
+        Fdf_annual_sorted_subset.plot(legend=False)
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        plt.close()
+        # seasonal mean boxplots
+        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) 
+        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean()) 
+        plt.title(Clim_var_type + ' -  DJF Summer means')
+        Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].mean().plot(kind='bar', ylim=(ymin,ymax))
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        plt.close()  
+        plt.title(Clim_var_type + ' -  DJF Summer means')
+        Fdf_Seas_means[Fdf_Seas_means.index.quarter==1].boxplot(rot=90)
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        plt.close()
+        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean()) 
+        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean())
+        plt.title(Clim_var_type + ' -  MAM Autumn means')
+        Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].mean().plot(kind='bar', ylim=(ymin,ymax))
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        plt.close() 
+        plt.title(Clim_var_type + ' -  MAM Autumn means')
+        Fdf_Seas_means[Fdf_Seas_means.index.quarter==2].boxplot(rot=90)
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        plt.close()
+        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean()) 
+        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean())
+        plt.title(Clim_var_type + ' -  JJA Winter means')
+        Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].mean().plot(kind='bar', ylim=(ymin,ymax))
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        plt.close() 
+        plt.title(Clim_var_type + ' -  JJA Winter means')
+        Fdf_Seas_means[Fdf_Seas_means.index.quarter==3].boxplot(rot=90)
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        plt.close()
+        ymin = min(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean()) 
+        ymax = max(Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean())
+        plt.title(Clim_var_type + ' -  SON Spring means')
+        Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].mean().plot(kind='bar', ylim=(ymin,ymax))
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        plt.close() 
+        plt.title(Clim_var_type + ' -  SON Spring means')
+        Fdf_Seas_means[Fdf_Seas_means.index.quarter==4].boxplot(rot=90)
+        pdf.savefig(bbox_inches='tight', pad_inches=0.4)
+        plt.close()
+
+#plots not used  
+#Fdf_annual_sorted_subset.plot(legend=False, subplots=True)