#added a couple of function input parameters

and the ability to create bias corrected data csv files
as well as NNRP for both bias corrected and normal
Development1
Valentin Heimhuber
parent adad360de9
commit 2c102d4594

@ -15,6 +15,9 @@ import time
# Set working direcotry (where postprocessed NARClIM data is located) # Set working direcotry (where postprocessed NARClIM data is located)
os.chdir('/srv/ccrc/data30/z3393020/NARCliM/postprocess/') os.chdir('/srv/ccrc/data30/z3393020/NARCliM/postprocess/')
# #
#Choose Type of NARCLIM data ('T_NNRP', 'T_GCMS')
Data_Type = 'T_NNRP'
#
#User input for location and variable type - from command line #User input for location and variable type - from command line
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
@ -23,6 +26,9 @@ if __name__ == "__main__":
parser.add_argument("--varName", help="operation") parser.add_argument("--varName", help="operation")
parser.add_argument("--timestep", help="operation") parser.add_argument("--timestep", help="operation")
parser.add_argument("--domain", help="operation") parser.add_argument("--domain", help="operation")
parser.add_argument("--LocationName", help="operation")
parser.add_argument("--Datatype", help="operation")
parser.add_argument("--BiasBool", help="operation")
args = parser.parse_args() args = parser.parse_args()
print(args.lat) print(args.lat)
print(args.lon) print(args.lon)
@ -32,99 +38,201 @@ if __name__ == "__main__":
Clim_var_type = args.varName Clim_var_type = args.varName
NC_Domain = args.domain NC_Domain = args.domain
Timestep = args.timestep Timestep = args.timestep
print("Extracting all NARCLIM time series for variable: ", Clim_var_type, " for lat lon: ", mylat, mylon, "domain", NC_Domain, "timestep ", Timestep) Location = args.LocationName
#set directory path for output files Data_Type = args.Datatype
output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out/' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '/' Bias_Correction_BOOL = args.BiasBool
#output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted' print("Extracting all NARCLIM time series for variable: ", Clim_var_type, " for lat lon: ", mylat, mylon, Location, "domain", NC_Domain, " timestep ", Timestep, " Datatype ", Data_Type, " biascorrected? ", Bias_Correction_BOOL)
if not os.path.exists(output_directory):
os.makedirs(output_directory) if Bias_Correction_BOOL == 'False':
print("output directory folder didn't exist and was generated here:") #set directory path for output files
print(output_directory) output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out/'+ Location + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '/'
# #output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted'
#time.sleep(10) if not os.path.exists(output_directory):
#set up the loop variables for interrogating the entire NARCLIM raw data os.makedirs(output_directory)
NC_Periods = ('1990-2009','2020-2039','2060-2079') print("output directory folder didn't exist and was generated here:")
# print(output_directory)
#Define empty pandas data frames
Full_df = pd.DataFrame() #time.sleep(10)
GCM_df = pd.DataFrame() #set up the loop variables for interrogating the entire NARCLIM raw data
R13_df = pd.DataFrame() NC_Periods = ('1990-2009','2020-2039','2060-2079')
MultiNC_df = pd.DataFrame() if Data_Type == 'T_NNRP':
NC_Periods = ('1950-2009','Stop')
#
#Define empty pandas data frames
Full_df = pd.DataFrame()
GCM_df = pd.DataFrame()
R13_df = pd.DataFrame()
MultiNC_df = pd.DataFrame()
#
#Loop through models and construct CSV per site
for NC_Period in NC_Periods:
if NC_Period != "Stop":
Period_short = NC_Period[:4]
GCMs = os.listdir('./'+ NC_Period)
for GCM in GCMs:
print GCM
Warf_runs = os.listdir('./' + NC_Period + '/' + GCM + '/')
for Warf_run in Warf_runs:
Current_input_dir = './' + NC_Period + '/' + GCM + '/' + Warf_run + '/' + NC_Domain + '/'
print Current_input_dir
Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type + '.nc'
Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn)
#print Climvar_NCs[1]
#Climvar_NCs = Climvar_NCs[0:2]
#print(Climvar_NCs)
for netcdf in Climvar_NCs:
f=Dataset(netcdf)
# This section print on the screen information contained in the headings of the file
#print '---------------------------------------------------------'
#print f.ncattrs()
#print f.title
#print f.variables
#print
#for varname in f.variables:
# print varname,' -> ',np.shape(f.variables[varname])
#print '---------------------------------------------------------'
# Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file
dist_x=np.abs(f.variables['lon'][:,:]-float(mylon))
dist_y=np.abs(f.variables['lat'][:,:]-float(mylat))
dist=dist_x + dist_y
latindex=np.where(dist_y==np.min(dist_y))
lonindex=np.where(dist_x==np.min(dist_x))
index=np.where(dist==np.min(dist))
print '---------------------------------------------------------'
print netcdf
print 'Information on the nearest point'
print 'Your desired lat,lon = ',mylat,mylon
print 'The nearest lat,lon = ', f.variables['lat'][latindex[0],latindex[1]], f.variables['lon'][lonindex[0],lonindex[1]]
#print 'The index of the nearest lat,lon (x,y) = ',index[0], index[1]
#Here we constract a pandas data frame, having the "time"/day as an index and a numer of variables (i.e. Clim_var_type, pracc, as columns)
d={}
#d["time"] = f.variables['time'][:]
d[ GCM +'_'+ Warf_run +'_'+ Period_short] = f.variables[Clim_var_type][:, int(index[0]), int(index[1])]
#if GCM == 'NNRP' and Warf_run == 'R1':
# d['Period']= NC_Period
timestamp = f.variables['time'][:]
timestamp_dates = pd.to_datetime(timestamp, unit='h', origin=pd.Timestamp('1949-12-01'))
df1=pd.DataFrame(d, index=timestamp_dates)
f.close()
print 'closing '+ os.path.basename(os.path.normpath(netcdf)) + ' moving to next netcdf file'
#print f
print '---------------------------------------------------------'
#append in time direction each new time series to the data frame
MultiNC_df = MultiNC_df.append(df1)
#append in columns direction individual GCM-RCM-123 run time series (along x axis)
MultiNC_df = MultiNC_df.sort_index(axis=0, ascending=True)
R13_df = pd.concat([R13_df, MultiNC_df], axis=1)
MultiNC_df =pd.DataFrame()
#append blocks of R1 R2 and R3 in x axis direction
R13_df = R13_df.sort_index(axis=0, ascending=True)
GCM_df = pd.concat([GCM_df, R13_df], axis=1)
R13_df = pd.DataFrame()
#append time periods in x axis direction (change axis=1 to =0 if periods for same model should be added to same model R123 column)
GCM_df = GCM_df.sort_index(axis=0, ascending=True)
Full_df = pd.concat([Full_df, GCM_df], axis=1)
GCM_df = pd.DataFrame()
Full_df = Full_df.sort_index(axis=0, ascending=True)
#adding a column with the NARCLIM decade
Full_df.loc[(Full_df.index > '1950-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009'
Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009'
Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2040-01-01'), 'period']= '2020-2039'
Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2080-01-01'), 'period']= '2060-2079'
#export the pandas data frame as a CSV file within the output directory
out_file_name = Clim_var_type + '_'+ Data_Type[2:] + '_' + Location + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '_NARCliM_summary.csv'
out_path = output_directory +'/' + out_file_name
Full_df.to_csv(out_path)
# #
#Loop through models and construct CSV per site if Bias_Correction_BOOL == 'True':
for NC_Period in NC_Periods: #set directory path for output files
Period_short = NC_Period[:4] output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out/'+ Location + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '/Bias_corrected/'
GCMs = os.listdir('./'+ NC_Period) #output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted'
if not os.path.exists(output_directory):
os.makedirs(output_directory)
print("output directory folder didn't exist and was generated here:")
print(output_directory)
#time.sleep(10)
#set up the loop variables for interrogating the entire NARCLIM raw data
GCMs = ('CCCMA3.1','CSIRO-MK3.0','ECHAM5', 'MIROC3.2', 'NNRP')
#
#Define empty pandas data frames
Full_df = pd.DataFrame()
GCM_df = pd.DataFrame()
R13_df = pd.DataFrame()
MultiNC_df = pd.DataFrame()
#
#Loop through models and construct CSV per site
for GCM in GCMs: for GCM in GCMs:
print GCM print GCM
Warf_runs = os.listdir('./' + NC_Period + '/' + GCM + '/') Warf_runs = os.listdir('./' + GCM + '/')
for Warf_run in Warf_runs: for Warf_run in Warf_runs:
Current_input_dir = './' + NC_Period + '/' + GCM + '/' + Warf_run + '/' + NC_Domain + '/' NC_Periods = os.listdir('./' + GCM + '/' + Warf_run + '/')
print Current_input_dir for NC_Period in NC_Periods:
Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type + '.nc' Period_short = NC_Period[:4]
Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn) Current_input_dir = './' + GCM + '/' + Warf_run + '/' + NC_Period + '/' + NC_Domain + '/'
print "test" print Current_input_dir
print Climvar_NCs[1] Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type + '_bc.nc'
#Climvar_NCs = Climvar_NCs[0:2] Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn)
#print(Climvar_NCs) print Climvar_NCs[1]
for netcdf in Climvar_NCs: print Climvar_NCs[2]
print "test2" for netcdf in Climvar_NCs:
f=Dataset(netcdf) #netcdf = '/srv/ccrc/data31/z3393020/NARCliM/Bias_corrected/' + netcdf[2:]
# This section print on the screen information contained in the headings of the file #print netcdf
print '---------------------------------------------------------' f=Dataset(netcdf)
print f.ncattrs() # This section print on the screen information contained in the headings of the file
print f.title # print '---------------------------------------------------------'
print f.variables # print f.ncattrs()
print # print f.title
for varname in f.variables: # print f.variables
print varname,' -> ',np.shape(f.variables[varname]) # print
print '---------------------------------------------------------' # for varname in f.variables:
# Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file # print varname,' -> ',np.shape(f.variables[varname])
dist_x=np.abs(f.variables['lon'][:,:]-float(mylon)) # print '---------------------------------------------------------'
dist_y=np.abs(f.variables['lat'][:,:]-float(mylat)) # Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file
dist=dist_x + dist_y dist_x=np.abs(f.variables['lon'][:,:]-float(mylon))
latindex=np.where(dist_y==np.min(dist_y)) dist_y=np.abs(f.variables['lat'][:,:]-float(mylat))
lonindex=np.where(dist_x==np.min(dist_x)) dist=dist_x + dist_y
index=np.where(dist==np.min(dist)) latindex=np.where(dist_y==np.min(dist_y))
print '---------------------------------------------------------' lonindex=np.where(dist_x==np.min(dist_x))
print netcdf index=np.where(dist==np.min(dist))
print 'Information on the nearest point' print '---------------------------------------------------------'
print 'Your desired lat,lon = ',mylat,mylon print netcdf
print 'The nearest lat,lon = ', f.variables['lat'][latindex[0],latindex[1]], f.variables['lon'][lonindex[0],lonindex[1]] print 'Information on the nearest point'
print 'The index of the nearest lat,lon (x,y) = ',index[0], index[1] print 'Your desired lat,lon = ',mylat,mylon
#Here we constract a pandas data frame, having the "time"/day as an index and a numer of variables (i.e. Clim_var_type, pracc, as columns) print 'The nearest lat,lon = ', f.variables['lat'][latindex[0],latindex[1]], f.variables['lon'][lonindex[0],lonindex[1]]
d={} print 'The index of the nearest lat,lon (x,y) = ',index[0], index[1]
#d["time"] = f.variables['time'][:] #Here we constract a pandas data frame, having the "time"/day as an index and a numer of variables (i.e. Clim_var_type, pracc, as columns)
d[ GCM +'_'+ Warf_run +'_'+ Period_short] = f.variables[Clim_var_type][:, int(index[0]), int(index[1])] d={}
#if GCM == 'NNRP' and Warf_run == 'R1': #d["time"] = f.variables['time'][:]
# d['Period']= NC_Period d[ GCM +'_'+ Warf_run +'_'+ Period_short] = f.variables[Clim_var_type][:, int(index[0]), int(index[1])]
timestamp = f.variables['time'][:] #if GCM == 'NNRP' and Warf_run == 'R1':
timestamp_dates = pd.to_datetime(timestamp, unit='h', origin=pd.Timestamp('1949-12-01')) # d['Period']= NC_Period
df1=pd.DataFrame(d, index=timestamp_dates) timestamp = f.variables['time'][:]
f.close() timestamp_dates = pd.to_datetime(timestamp, unit='h', origin=pd.Timestamp('1949-12-01'))
print 'closing '+ os.path.basename(os.path.normpath(netcdf)) + ' moving to next netcdf file' df1=pd.DataFrame(d, index=timestamp_dates)
#print f f.close()
print '---------------------------------------------------------' print 'closing '+ os.path.basename(os.path.normpath(netcdf)) + ' moving to next netcdf file'
#append in time direction each new time series to the data frame #print f
MultiNC_df = MultiNC_df.append(df1) print '---------------------------------------------------------'
#append in columns direction individual GCM-RCM-123 run time series (along x axis) #append in time direction each new time series to the data frame
MultiNC_df = MultiNC_df.sort_index(axis=0, ascending=True) MultiNC_df = MultiNC_df.append(df1)
R13_df = pd.concat([R13_df, MultiNC_df], axis=1) #append in columns direction individual GCM-RCM-123 run time series (along x axis)
MultiNC_df =pd.DataFrame() MultiNC_df = MultiNC_df.sort_index(axis=0, ascending=True)
#append blocks of R1 R2 and R3 in x axis direction R13_df = pd.concat([R13_df, MultiNC_df], axis=1)
R13_df = R13_df.sort_index(axis=0, ascending=True) MultiNC_df =pd.DataFrame()
GCM_df = pd.concat([GCM_df, R13_df], axis=1) #append blocks of R1 R2 and R3 in x axis direction
R13_df = pd.DataFrame() R13_df = R13_df.sort_index(axis=0, ascending=True)
#append time periods in x axis direction (change axis=1 to =0 if periods for same model should be added to same model R123 column) GCM_df = pd.concat([GCM_df, R13_df], axis=1)
GCM_df = GCM_df.sort_index(axis=0, ascending=True) R13_df = pd.DataFrame()
Full_df = pd.concat([Full_df, GCM_df], axis=1) #append time periods in x axis direction (change axis=1 to =0 if periods for same model should be added to same model R123 column)
GCM_df = pd.DataFrame() GCM_df = GCM_df.sort_index(axis=0, ascending=True)
Full_df = Full_df.sort_index(axis=0, ascending=True) Full_df = pd.concat([Full_df, GCM_df], axis=1)
#adding a column with the NARCLIM decade GCM_df = pd.DataFrame()
Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009' Full_df = Full_df.sort_index(axis=0, ascending=True)
Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039' #adding a column with the NARCLIM decade
Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079' #Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009'
#export the pandas data frame as a CSV file within the output directory #Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039'
out_file_name = Clim_var_type + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '_NARCliM_summary.csv' #Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079'
out_path = output_directory +'/' + out_file_name #export the pandas data frame as a CSV file within the output directory
Full_df.to_csv(out_path) out_file_name = Clim_var_type + '_'+ Data_Type[2:] + '_' + Location + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '_NARCliM_summary.csv'
out_path = output_directory +'/' + out_file_name
Full_df.to_csv(out_path)

@ -27,20 +27,35 @@ Batemans Bay: -35.76, 150.25
Towamba River: -37.1, 149.91 Towamba River: -37.1, 149.91
Nadgee Lake: -37.47, 149.97 Nadgee Lake: -37.47, 149.97
Bash-Code for netcdf interrogation: Code Input Variables:
Datatype: Choose 'T_NNRP' for reanalysis or 'T_GCMS' for GCM forcing data
BiasBool: Choose 'True' for bias corrected data, 'False' for normal model outputs
Execution of code in bash-Code for netcdf interrogation:
1st step: log into storm servers: Putty: hurricane.ccrc.unsw.edu.au or typhoon.ccrc.unsw.edu.au or cyclone.ccrc.unsw.edu.au + UNSW credentials (zID) 1st step: log into storm servers: Putty: hurricane.ccrc.unsw.edu.au or typhoon.ccrc.unsw.edu.au or cyclone.ccrc.unsw.edu.au + UNSW credentials (zID)
In BASH:
In BASH copy and enter:
module load python module load python
latitude=-28.17 latitude=-32.91
longitude=153.56 longitude=151.80
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'tasmean' --domain 'd02' --timestep 'DAY'; name='HunterRiver'
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'tasmax' --domain 'd02' --timestep 'DAY'; Datatype='T_NNRP'
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pr1Hmaxtstep' --domain 'd02' --timestep 'DAY'; Biasboolean='False'
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'wssmean' --domain 'd02' --timestep 'DAY'; python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pracc' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pracc' --domain 'd02' --timestep 'DAY';
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'wss1Hmaxtstep' --domain 'd02' --timestep 'DAY'; python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'tasmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'evspsblmean' --domain 'd02' --timestep 'DAY'; python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'tasmax' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'potevpmean' --domain 'd02' --timestep 'DAY' python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pr1Hmaxtstep' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'wssmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pracc' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'wss1Hmaxtstep' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'evspsblmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'potevpmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean
#1 The above code extracts time series from the full model ensemble over a single model grid cell (based on lat lon input) for the above variables of interest and stores into CSV files. #1 The above code extracts time series from the full model ensemble over a single model grid cell (based on lat lon input) for the above variables of interest and stores into CSV files.
Example of output name = evspsblmean_35.76_150.25_NARCliM_summary.csv Example of output name = evspsblmean_35.76_150.25_NARCliM_summary.csv

Loading…
Cancel
Save