diff --git a/Analysis/Code/P1_NARCliM_NC_to_CSV_CCRC_SS.py b/Analysis/Code/P1_NARCliM_NC_to_CSV_CCRC_SS.py index d55690b..5914cbe 100644 --- a/Analysis/Code/P1_NARCliM_NC_to_CSV_CCRC_SS.py +++ b/Analysis/Code/P1_NARCliM_NC_to_CSV_CCRC_SS.py @@ -15,6 +15,9 @@ import time # Set working direcotry (where postprocessed NARClIM data is located) os.chdir('/srv/ccrc/data30/z3393020/NARCliM/postprocess/') # +#Choose Type of NARCLIM data ('T_NNRP', 'T_GCMS') +Data_Type = 'T_NNRP' +# #User input for location and variable type - from command line if __name__ == "__main__": parser = argparse.ArgumentParser() @@ -23,6 +26,9 @@ if __name__ == "__main__": parser.add_argument("--varName", help="operation") parser.add_argument("--timestep", help="operation") parser.add_argument("--domain", help="operation") + parser.add_argument("--LocationName", help="operation") + parser.add_argument("--Datatype", help="operation") + parser.add_argument("--BiasBool", help="operation") args = parser.parse_args() print(args.lat) print(args.lon) @@ -32,99 +38,201 @@ if __name__ == "__main__": Clim_var_type = args.varName NC_Domain = args.domain Timestep = args.timestep - print("Extracting all NARCLIM time series for variable: ", Clim_var_type, " for lat lon: ", mylat, mylon, "domain", NC_Domain, "timestep ", Timestep) -#set directory path for output files -output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out/' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '/' -#output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted' -if not os.path.exists(output_directory): - os.makedirs(output_directory) - print("output directory folder didn't exist and was generated here:") - print(output_directory) -# -#time.sleep(10) -#set up the loop variables for interrogating the entire NARCLIM raw data -NC_Periods = ('1990-2009','2020-2039','2060-2079') -# -#Define empty pandas data frames -Full_df = pd.DataFrame() -GCM_df = pd.DataFrame() -R13_df = pd.DataFrame() -MultiNC_df = pd.DataFrame() + Location = args.LocationName + Data_Type = args.Datatype + Bias_Correction_BOOL = args.BiasBool + print("Extracting all NARCLIM time series for variable: ", Clim_var_type, " for lat lon: ", mylat, mylon, Location, "domain", NC_Domain, " timestep ", Timestep, " Datatype ", Data_Type, " biascorrected? ", Bias_Correction_BOOL) + +if Bias_Correction_BOOL == 'False': + #set directory path for output files + output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out/'+ Location + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '/' + #output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted' + if not os.path.exists(output_directory): + os.makedirs(output_directory) + print("output directory folder didn't exist and was generated here:") + print(output_directory) + + #time.sleep(10) + #set up the loop variables for interrogating the entire NARCLIM raw data + NC_Periods = ('1990-2009','2020-2039','2060-2079') + if Data_Type == 'T_NNRP': + NC_Periods = ('1950-2009','Stop') + # + #Define empty pandas data frames + Full_df = pd.DataFrame() + GCM_df = pd.DataFrame() + R13_df = pd.DataFrame() + MultiNC_df = pd.DataFrame() + # + #Loop through models and construct CSV per site + for NC_Period in NC_Periods: + if NC_Period != "Stop": + Period_short = NC_Period[:4] + GCMs = os.listdir('./'+ NC_Period) + for GCM in GCMs: + print GCM + Warf_runs = os.listdir('./' + NC_Period + '/' + GCM + '/') + for Warf_run in Warf_runs: + Current_input_dir = './' + NC_Period + '/' + GCM + '/' + Warf_run + '/' + NC_Domain + '/' + print Current_input_dir + Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type + '.nc' + Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn) + #print Climvar_NCs[1] + #Climvar_NCs = Climvar_NCs[0:2] + #print(Climvar_NCs) + for netcdf in Climvar_NCs: + f=Dataset(netcdf) + # This section print on the screen information contained in the headings of the file + #print '---------------------------------------------------------' + #print f.ncattrs() + #print f.title + #print f.variables + #print + #for varname in f.variables: + # print varname,' -> ',np.shape(f.variables[varname]) + #print '---------------------------------------------------------' + # Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file + dist_x=np.abs(f.variables['lon'][:,:]-float(mylon)) + dist_y=np.abs(f.variables['lat'][:,:]-float(mylat)) + dist=dist_x + dist_y + latindex=np.where(dist_y==np.min(dist_y)) + lonindex=np.where(dist_x==np.min(dist_x)) + index=np.where(dist==np.min(dist)) + print '---------------------------------------------------------' + print netcdf + print 'Information on the nearest point' + print 'Your desired lat,lon = ',mylat,mylon + print 'The nearest lat,lon = ', f.variables['lat'][latindex[0],latindex[1]], f.variables['lon'][lonindex[0],lonindex[1]] + #print 'The index of the nearest lat,lon (x,y) = ',index[0], index[1] + #Here we constract a pandas data frame, having the "time"/day as an index and a numer of variables (i.e. Clim_var_type, pracc, as columns) + d={} + #d["time"] = f.variables['time'][:] + d[ GCM +'_'+ Warf_run +'_'+ Period_short] = f.variables[Clim_var_type][:, int(index[0]), int(index[1])] + #if GCM == 'NNRP' and Warf_run == 'R1': + # d['Period']= NC_Period + timestamp = f.variables['time'][:] + timestamp_dates = pd.to_datetime(timestamp, unit='h', origin=pd.Timestamp('1949-12-01')) + df1=pd.DataFrame(d, index=timestamp_dates) + f.close() + print 'closing '+ os.path.basename(os.path.normpath(netcdf)) + ' moving to next netcdf file' + #print f + print '---------------------------------------------------------' + #append in time direction each new time series to the data frame + MultiNC_df = MultiNC_df.append(df1) + #append in columns direction individual GCM-RCM-123 run time series (along x axis) + MultiNC_df = MultiNC_df.sort_index(axis=0, ascending=True) + R13_df = pd.concat([R13_df, MultiNC_df], axis=1) + MultiNC_df =pd.DataFrame() + #append blocks of R1 R2 and R3 in x axis direction + R13_df = R13_df.sort_index(axis=0, ascending=True) + GCM_df = pd.concat([GCM_df, R13_df], axis=1) + R13_df = pd.DataFrame() + #append time periods in x axis direction (change axis=1 to =0 if periods for same model should be added to same model R123 column) + GCM_df = GCM_df.sort_index(axis=0, ascending=True) + Full_df = pd.concat([Full_df, GCM_df], axis=1) + GCM_df = pd.DataFrame() + Full_df = Full_df.sort_index(axis=0, ascending=True) + #adding a column with the NARCLIM decade + Full_df.loc[(Full_df.index > '1950-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009' + Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009' + Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2040-01-01'), 'period']= '2020-2039' + Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2080-01-01'), 'period']= '2060-2079' + #export the pandas data frame as a CSV file within the output directory + out_file_name = Clim_var_type + '_'+ Data_Type[2:] + '_' + Location + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '_NARCliM_summary.csv' + out_path = output_directory +'/' + out_file_name + Full_df.to_csv(out_path) # -#Loop through models and construct CSV per site -for NC_Period in NC_Periods: - Period_short = NC_Period[:4] - GCMs = os.listdir('./'+ NC_Period) +if Bias_Correction_BOOL == 'True': + #set directory path for output files + output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out/'+ Location + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '/Bias_corrected/' + #output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted' + if not os.path.exists(output_directory): + os.makedirs(output_directory) + print("output directory folder didn't exist and was generated here:") + print(output_directory) + #time.sleep(10) + #set up the loop variables for interrogating the entire NARCLIM raw data + GCMs = ('CCCMA3.1','CSIRO-MK3.0','ECHAM5', 'MIROC3.2', 'NNRP') + # + #Define empty pandas data frames + Full_df = pd.DataFrame() + GCM_df = pd.DataFrame() + R13_df = pd.DataFrame() + MultiNC_df = pd.DataFrame() + # + #Loop through models and construct CSV per site for GCM in GCMs: print GCM - Warf_runs = os.listdir('./' + NC_Period + '/' + GCM + '/') + Warf_runs = os.listdir('./' + GCM + '/') for Warf_run in Warf_runs: - Current_input_dir = './' + NC_Period + '/' + GCM + '/' + Warf_run + '/' + NC_Domain + '/' - print Current_input_dir - Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type + '.nc' - Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn) - print "test" - print Climvar_NCs[1] - #Climvar_NCs = Climvar_NCs[0:2] - #print(Climvar_NCs) - for netcdf in Climvar_NCs: - print "test2" - f=Dataset(netcdf) - # This section print on the screen information contained in the headings of the file - print '---------------------------------------------------------' - print f.ncattrs() - print f.title - print f.variables - print - for varname in f.variables: - print varname,' -> ',np.shape(f.variables[varname]) - print '---------------------------------------------------------' - # Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file - dist_x=np.abs(f.variables['lon'][:,:]-float(mylon)) - dist_y=np.abs(f.variables['lat'][:,:]-float(mylat)) - dist=dist_x + dist_y - latindex=np.where(dist_y==np.min(dist_y)) - lonindex=np.where(dist_x==np.min(dist_x)) - index=np.where(dist==np.min(dist)) - print '---------------------------------------------------------' - print netcdf - print 'Information on the nearest point' - print 'Your desired lat,lon = ',mylat,mylon - print 'The nearest lat,lon = ', f.variables['lat'][latindex[0],latindex[1]], f.variables['lon'][lonindex[0],lonindex[1]] - print 'The index of the nearest lat,lon (x,y) = ',index[0], index[1] - #Here we constract a pandas data frame, having the "time"/day as an index and a numer of variables (i.e. Clim_var_type, pracc, as columns) - d={} - #d["time"] = f.variables['time'][:] - d[ GCM +'_'+ Warf_run +'_'+ Period_short] = f.variables[Clim_var_type][:, int(index[0]), int(index[1])] - #if GCM == 'NNRP' and Warf_run == 'R1': - # d['Period']= NC_Period - timestamp = f.variables['time'][:] - timestamp_dates = pd.to_datetime(timestamp, unit='h', origin=pd.Timestamp('1949-12-01')) - df1=pd.DataFrame(d, index=timestamp_dates) - f.close() - print 'closing '+ os.path.basename(os.path.normpath(netcdf)) + ' moving to next netcdf file' - #print f - print '---------------------------------------------------------' - #append in time direction each new time series to the data frame - MultiNC_df = MultiNC_df.append(df1) - #append in columns direction individual GCM-RCM-123 run time series (along x axis) - MultiNC_df = MultiNC_df.sort_index(axis=0, ascending=True) - R13_df = pd.concat([R13_df, MultiNC_df], axis=1) - MultiNC_df =pd.DataFrame() - #append blocks of R1 R2 and R3 in x axis direction - R13_df = R13_df.sort_index(axis=0, ascending=True) - GCM_df = pd.concat([GCM_df, R13_df], axis=1) - R13_df = pd.DataFrame() - #append time periods in x axis direction (change axis=1 to =0 if periods for same model should be added to same model R123 column) - GCM_df = GCM_df.sort_index(axis=0, ascending=True) - Full_df = pd.concat([Full_df, GCM_df], axis=1) - GCM_df = pd.DataFrame() -Full_df = Full_df.sort_index(axis=0, ascending=True) -#adding a column with the NARCLIM decade -Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009' -Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039' -Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079' -#export the pandas data frame as a CSV file within the output directory -out_file_name = Clim_var_type + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '_NARCliM_summary.csv' -out_path = output_directory +'/' + out_file_name -Full_df.to_csv(out_path) \ No newline at end of file + NC_Periods = os.listdir('./' + GCM + '/' + Warf_run + '/') + for NC_Period in NC_Periods: + Period_short = NC_Period[:4] + Current_input_dir = './' + GCM + '/' + Warf_run + '/' + NC_Period + '/' + NC_Domain + '/' + print Current_input_dir + Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type + '_bc.nc' + Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn) + print Climvar_NCs[1] + print Climvar_NCs[2] + for netcdf in Climvar_NCs: + #netcdf = '/srv/ccrc/data31/z3393020/NARCliM/Bias_corrected/' + netcdf[2:] + #print netcdf + f=Dataset(netcdf) + # This section print on the screen information contained in the headings of the file +# print '---------------------------------------------------------' +# print f.ncattrs() +# print f.title +# print f.variables +# print +# for varname in f.variables: +# print varname,' -> ',np.shape(f.variables[varname]) +# print '---------------------------------------------------------' + # Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file + dist_x=np.abs(f.variables['lon'][:,:]-float(mylon)) + dist_y=np.abs(f.variables['lat'][:,:]-float(mylat)) + dist=dist_x + dist_y + latindex=np.where(dist_y==np.min(dist_y)) + lonindex=np.where(dist_x==np.min(dist_x)) + index=np.where(dist==np.min(dist)) + print '---------------------------------------------------------' + print netcdf + print 'Information on the nearest point' + print 'Your desired lat,lon = ',mylat,mylon + print 'The nearest lat,lon = ', f.variables['lat'][latindex[0],latindex[1]], f.variables['lon'][lonindex[0],lonindex[1]] + print 'The index of the nearest lat,lon (x,y) = ',index[0], index[1] + #Here we constract a pandas data frame, having the "time"/day as an index and a numer of variables (i.e. Clim_var_type, pracc, as columns) + d={} + #d["time"] = f.variables['time'][:] + d[ GCM +'_'+ Warf_run +'_'+ Period_short] = f.variables[Clim_var_type][:, int(index[0]), int(index[1])] + #if GCM == 'NNRP' and Warf_run == 'R1': + # d['Period']= NC_Period + timestamp = f.variables['time'][:] + timestamp_dates = pd.to_datetime(timestamp, unit='h', origin=pd.Timestamp('1949-12-01')) + df1=pd.DataFrame(d, index=timestamp_dates) + f.close() + print 'closing '+ os.path.basename(os.path.normpath(netcdf)) + ' moving to next netcdf file' + #print f + print '---------------------------------------------------------' + #append in time direction each new time series to the data frame + MultiNC_df = MultiNC_df.append(df1) + #append in columns direction individual GCM-RCM-123 run time series (along x axis) + MultiNC_df = MultiNC_df.sort_index(axis=0, ascending=True) + R13_df = pd.concat([R13_df, MultiNC_df], axis=1) + MultiNC_df =pd.DataFrame() + #append blocks of R1 R2 and R3 in x axis direction + R13_df = R13_df.sort_index(axis=0, ascending=True) + GCM_df = pd.concat([GCM_df, R13_df], axis=1) + R13_df = pd.DataFrame() + #append time periods in x axis direction (change axis=1 to =0 if periods for same model should be added to same model R123 column) + GCM_df = GCM_df.sort_index(axis=0, ascending=True) + Full_df = pd.concat([Full_df, GCM_df], axis=1) + GCM_df = pd.DataFrame() + Full_df = Full_df.sort_index(axis=0, ascending=True) + #adding a column with the NARCLIM decade + #Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009' + #Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039' + #Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079' + #export the pandas data frame as a CSV file within the output directory + out_file_name = Clim_var_type + '_'+ Data_Type[2:] + '_' + Location + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '_NARCliM_summary.csv' + out_path = output_directory +'/' + out_file_name + Full_df.to_csv(out_path) \ No newline at end of file diff --git a/Analysis/Code/P1_NARCliM_NC_to_CSV_CCRC_SS_BASH_script_readme.txt b/Analysis/Code/P1_NARCliM_NC_to_CSV_CCRC_SS_BASH_script_readme.txt index d58d6e2..0f2e9e8 100644 --- a/Analysis/Code/P1_NARCliM_NC_to_CSV_CCRC_SS_BASH_script_readme.txt +++ b/Analysis/Code/P1_NARCliM_NC_to_CSV_CCRC_SS_BASH_script_readme.txt @@ -27,20 +27,35 @@ Batemans Bay: -35.76, 150.25 Towamba River: -37.1, 149.91 Nadgee Lake: -37.47, 149.97 -Bash-Code for netcdf interrogation: +Code Input Variables: +Datatype: Choose 'T_NNRP' for reanalysis or 'T_GCMS' for GCM forcing data +BiasBool: Choose 'True' for bias corrected data, 'False' for normal model outputs + + +Execution of code in bash-Code for netcdf interrogation: 1st step: log into storm servers: Putty: hurricane.ccrc.unsw.edu.au or typhoon.ccrc.unsw.edu.au or cyclone.ccrc.unsw.edu.au + UNSW credentials (zID) -In BASH: + +In BASH copy and enter: + module load python -latitude=-28.17 -longitude=153.56 -python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'tasmean' --domain 'd02' --timestep 'DAY'; -python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'tasmax' --domain 'd02' --timestep 'DAY'; -python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pr1Hmaxtstep' --domain 'd02' --timestep 'DAY'; -python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'wssmean' --domain 'd02' --timestep 'DAY'; -python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pracc' --domain 'd02' --timestep 'DAY'; -python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'wss1Hmaxtstep' --domain 'd02' --timestep 'DAY'; -python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'evspsblmean' --domain 'd02' --timestep 'DAY'; -python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'potevpmean' --domain 'd02' --timestep 'DAY' +latitude=-32.91 +longitude=151.80 +name='HunterRiver' +Datatype='T_NNRP' +Biasboolean='False' +python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pracc' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean; + +python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'tasmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean; +python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'tasmax' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean; +python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pr1Hmaxtstep' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean; +python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'wssmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean; +python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pracc' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean; +python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'wss1Hmaxtstep' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean; +python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'evspsblmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean; +python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'potevpmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean + + + #1 The above code extracts time series from the full model ensemble over a single model grid cell (based on lat lon input) for the above variables of interest and stores into CSV files. Example of output name = evspsblmean_35.76_150.25_NARCliM_summary.csv