#added a couple of function input parameters

and the ability to create bias corrected data csv files
as well as NNRP for both bias corrected and normal
Development1
Valentin Heimhuber 7 years ago
parent adad360de9
commit 2c102d4594

@ -15,6 +15,9 @@ import time
# Set working direcotry (where postprocessed NARClIM data is located) # Set working direcotry (where postprocessed NARClIM data is located)
os.chdir('/srv/ccrc/data30/z3393020/NARCliM/postprocess/') os.chdir('/srv/ccrc/data30/z3393020/NARCliM/postprocess/')
# #
#Choose Type of NARCLIM data ('T_NNRP', 'T_GCMS')
Data_Type = 'T_NNRP'
#
#User input for location and variable type - from command line #User input for location and variable type - from command line
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
@ -23,6 +26,9 @@ if __name__ == "__main__":
parser.add_argument("--varName", help="operation") parser.add_argument("--varName", help="operation")
parser.add_argument("--timestep", help="operation") parser.add_argument("--timestep", help="operation")
parser.add_argument("--domain", help="operation") parser.add_argument("--domain", help="operation")
parser.add_argument("--LocationName", help="operation")
parser.add_argument("--Datatype", help="operation")
parser.add_argument("--BiasBool", help="operation")
args = parser.parse_args() args = parser.parse_args()
print(args.lat) print(args.lat)
print(args.lon) print(args.lon)
@ -32,99 +38,201 @@ if __name__ == "__main__":
Clim_var_type = args.varName Clim_var_type = args.varName
NC_Domain = args.domain NC_Domain = args.domain
Timestep = args.timestep Timestep = args.timestep
print("Extracting all NARCLIM time series for variable: ", Clim_var_type, " for lat lon: ", mylat, mylon, "domain", NC_Domain, "timestep ", Timestep) Location = args.LocationName
#set directory path for output files Data_Type = args.Datatype
output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out/' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '/' Bias_Correction_BOOL = args.BiasBool
#output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted' print("Extracting all NARCLIM time series for variable: ", Clim_var_type, " for lat lon: ", mylat, mylon, Location, "domain", NC_Domain, " timestep ", Timestep, " Datatype ", Data_Type, " biascorrected? ", Bias_Correction_BOOL)
if not os.path.exists(output_directory):
os.makedirs(output_directory) if Bias_Correction_BOOL == 'False':
print("output directory folder didn't exist and was generated here:") #set directory path for output files
print(output_directory) output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out/'+ Location + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '/'
# #output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted'
#time.sleep(10) if not os.path.exists(output_directory):
#set up the loop variables for interrogating the entire NARCLIM raw data os.makedirs(output_directory)
NC_Periods = ('1990-2009','2020-2039','2060-2079') print("output directory folder didn't exist and was generated here:")
# print(output_directory)
#Define empty pandas data frames
Full_df = pd.DataFrame() #time.sleep(10)
GCM_df = pd.DataFrame() #set up the loop variables for interrogating the entire NARCLIM raw data
R13_df = pd.DataFrame() NC_Periods = ('1990-2009','2020-2039','2060-2079')
MultiNC_df = pd.DataFrame() if Data_Type == 'T_NNRP':
NC_Periods = ('1950-2009','Stop')
#
#Define empty pandas data frames
Full_df = pd.DataFrame()
GCM_df = pd.DataFrame()
R13_df = pd.DataFrame()
MultiNC_df = pd.DataFrame()
#
#Loop through models and construct CSV per site
for NC_Period in NC_Periods:
if NC_Period != "Stop":
Period_short = NC_Period[:4]
GCMs = os.listdir('./'+ NC_Period)
for GCM in GCMs:
print GCM
Warf_runs = os.listdir('./' + NC_Period + '/' + GCM + '/')
for Warf_run in Warf_runs:
Current_input_dir = './' + NC_Period + '/' + GCM + '/' + Warf_run + '/' + NC_Domain + '/'
print Current_input_dir
Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type + '.nc'
Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn)
#print Climvar_NCs[1]
#Climvar_NCs = Climvar_NCs[0:2]
#print(Climvar_NCs)
for netcdf in Climvar_NCs:
f=Dataset(netcdf)
# This section print on the screen information contained in the headings of the file
#print '---------------------------------------------------------'
#print f.ncattrs()
#print f.title
#print f.variables
#print
#for varname in f.variables:
# print varname,' -> ',np.shape(f.variables[varname])
#print '---------------------------------------------------------'
# Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file
dist_x=np.abs(f.variables['lon'][:,:]-float(mylon))
dist_y=np.abs(f.variables['lat'][:,:]-float(mylat))
dist=dist_x + dist_y
latindex=np.where(dist_y==np.min(dist_y))
lonindex=np.where(dist_x==np.min(dist_x))
index=np.where(dist==np.min(dist))
print '---------------------------------------------------------'
print netcdf
print 'Information on the nearest point'
print 'Your desired lat,lon = ',mylat,mylon
print 'The nearest lat,lon = ', f.variables['lat'][latindex[0],latindex[1]], f.variables['lon'][lonindex[0],lonindex[1]]
#print 'The index of the nearest lat,lon (x,y) = ',index[0], index[1]
#Here we constract a pandas data frame, having the "time"/day as an index and a numer of variables (i.e. Clim_var_type, pracc, as columns)
d={}
#d["time"] = f.variables['time'][:]
d[ GCM +'_'+ Warf_run +'_'+ Period_short] = f.variables[Clim_var_type][:, int(index[0]), int(index[1])]
#if GCM == 'NNRP' and Warf_run == 'R1':
# d['Period']= NC_Period
timestamp = f.variables['time'][:]
timestamp_dates = pd.to_datetime(timestamp, unit='h', origin=pd.Timestamp('1949-12-01'))
df1=pd.DataFrame(d, index=timestamp_dates)
f.close()
print 'closing '+ os.path.basename(os.path.normpath(netcdf)) + ' moving to next netcdf file'
#print f
print '---------------------------------------------------------'
#append in time direction each new time series to the data frame
MultiNC_df = MultiNC_df.append(df1)
#append in columns direction individual GCM-RCM-123 run time series (along x axis)
MultiNC_df = MultiNC_df.sort_index(axis=0, ascending=True)
R13_df = pd.concat([R13_df, MultiNC_df], axis=1)
MultiNC_df =pd.DataFrame()
#append blocks of R1 R2 and R3 in x axis direction
R13_df = R13_df.sort_index(axis=0, ascending=True)
GCM_df = pd.concat([GCM_df, R13_df], axis=1)
R13_df = pd.DataFrame()
#append time periods in x axis direction (change axis=1 to =0 if periods for same model should be added to same model R123 column)
GCM_df = GCM_df.sort_index(axis=0, ascending=True)
Full_df = pd.concat([Full_df, GCM_df], axis=1)
GCM_df = pd.DataFrame()
Full_df = Full_df.sort_index(axis=0, ascending=True)
#adding a column with the NARCLIM decade
Full_df.loc[(Full_df.index > '1950-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009'
Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009'
Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2040-01-01'), 'period']= '2020-2039'
Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2080-01-01'), 'period']= '2060-2079'
#export the pandas data frame as a CSV file within the output directory
out_file_name = Clim_var_type + '_'+ Data_Type[2:] + '_' + Location + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '_NARCliM_summary.csv'
out_path = output_directory +'/' + out_file_name
Full_df.to_csv(out_path)
# #
#Loop through models and construct CSV per site if Bias_Correction_BOOL == 'True':
for NC_Period in NC_Periods: #set directory path for output files
Period_short = NC_Period[:4] output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out/'+ Location + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '/Bias_corrected/'
GCMs = os.listdir('./'+ NC_Period) #output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted'
if not os.path.exists(output_directory):
os.makedirs(output_directory)
print("output directory folder didn't exist and was generated here:")
print(output_directory)
#time.sleep(10)
#set up the loop variables for interrogating the entire NARCLIM raw data
GCMs = ('CCCMA3.1','CSIRO-MK3.0','ECHAM5', 'MIROC3.2', 'NNRP')
#
#Define empty pandas data frames
Full_df = pd.DataFrame()
GCM_df = pd.DataFrame()
R13_df = pd.DataFrame()
MultiNC_df = pd.DataFrame()
#
#Loop through models and construct CSV per site
for GCM in GCMs: for GCM in GCMs:
print GCM print GCM
Warf_runs = os.listdir('./' + NC_Period + '/' + GCM + '/') Warf_runs = os.listdir('./' + GCM + '/')
for Warf_run in Warf_runs: for Warf_run in Warf_runs:
Current_input_dir = './' + NC_Period + '/' + GCM + '/' + Warf_run + '/' + NC_Domain + '/' NC_Periods = os.listdir('./' + GCM + '/' + Warf_run + '/')
print Current_input_dir for NC_Period in NC_Periods:
Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type + '.nc' Period_short = NC_Period[:4]
Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn) Current_input_dir = './' + GCM + '/' + Warf_run + '/' + NC_Period + '/' + NC_Domain + '/'
print "test" print Current_input_dir
print Climvar_NCs[1] Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type + '_bc.nc'
#Climvar_NCs = Climvar_NCs[0:2] Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn)
#print(Climvar_NCs) print Climvar_NCs[1]
for netcdf in Climvar_NCs: print Climvar_NCs[2]
print "test2" for netcdf in Climvar_NCs:
f=Dataset(netcdf) #netcdf = '/srv/ccrc/data31/z3393020/NARCliM/Bias_corrected/' + netcdf[2:]
# This section print on the screen information contained in the headings of the file #print netcdf
print '---------------------------------------------------------' f=Dataset(netcdf)
print f.ncattrs() # This section print on the screen information contained in the headings of the file
print f.title # print '---------------------------------------------------------'
print f.variables # print f.ncattrs()
print # print f.title
for varname in f.variables: # print f.variables
print varname,' -> ',np.shape(f.variables[varname]) # print
print '---------------------------------------------------------' # for varname in f.variables:
# Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file # print varname,' -> ',np.shape(f.variables[varname])
dist_x=np.abs(f.variables['lon'][:,:]-float(mylon)) # print '---------------------------------------------------------'
dist_y=np.abs(f.variables['lat'][:,:]-float(mylat)) # Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file
dist=dist_x + dist_y dist_x=np.abs(f.variables['lon'][:,:]-float(mylon))
latindex=np.where(dist_y==np.min(dist_y)) dist_y=np.abs(f.variables['lat'][:,:]-float(mylat))
lonindex=np.where(dist_x==np.min(dist_x)) dist=dist_x + dist_y
index=np.where(dist==np.min(dist)) latindex=np.where(dist_y==np.min(dist_y))
print '---------------------------------------------------------' lonindex=np.where(dist_x==np.min(dist_x))
print netcdf index=np.where(dist==np.min(dist))
print 'Information on the nearest point' print '---------------------------------------------------------'
print 'Your desired lat,lon = ',mylat,mylon print netcdf
print 'The nearest lat,lon = ', f.variables['lat'][latindex[0],latindex[1]], f.variables['lon'][lonindex[0],lonindex[1]] print 'Information on the nearest point'
print 'The index of the nearest lat,lon (x,y) = ',index[0], index[1] print 'Your desired lat,lon = ',mylat,mylon
#Here we constract a pandas data frame, having the "time"/day as an index and a numer of variables (i.e. Clim_var_type, pracc, as columns) print 'The nearest lat,lon = ', f.variables['lat'][latindex[0],latindex[1]], f.variables['lon'][lonindex[0],lonindex[1]]
d={} print 'The index of the nearest lat,lon (x,y) = ',index[0], index[1]
#d["time"] = f.variables['time'][:] #Here we constract a pandas data frame, having the "time"/day as an index and a numer of variables (i.e. Clim_var_type, pracc, as columns)
d[ GCM +'_'+ Warf_run +'_'+ Period_short] = f.variables[Clim_var_type][:, int(index[0]), int(index[1])] d={}
#if GCM == 'NNRP' and Warf_run == 'R1': #d["time"] = f.variables['time'][:]
# d['Period']= NC_Period d[ GCM +'_'+ Warf_run +'_'+ Period_short] = f.variables[Clim_var_type][:, int(index[0]), int(index[1])]
timestamp = f.variables['time'][:] #if GCM == 'NNRP' and Warf_run == 'R1':
timestamp_dates = pd.to_datetime(timestamp, unit='h', origin=pd.Timestamp('1949-12-01')) # d['Period']= NC_Period
df1=pd.DataFrame(d, index=timestamp_dates) timestamp = f.variables['time'][:]
f.close() timestamp_dates = pd.to_datetime(timestamp, unit='h', origin=pd.Timestamp('1949-12-01'))
print 'closing '+ os.path.basename(os.path.normpath(netcdf)) + ' moving to next netcdf file' df1=pd.DataFrame(d, index=timestamp_dates)
#print f f.close()
print '---------------------------------------------------------' print 'closing '+ os.path.basename(os.path.normpath(netcdf)) + ' moving to next netcdf file'
#append in time direction each new time series to the data frame #print f
MultiNC_df = MultiNC_df.append(df1) print '---------------------------------------------------------'
#append in columns direction individual GCM-RCM-123 run time series (along x axis) #append in time direction each new time series to the data frame
MultiNC_df = MultiNC_df.sort_index(axis=0, ascending=True) MultiNC_df = MultiNC_df.append(df1)
R13_df = pd.concat([R13_df, MultiNC_df], axis=1) #append in columns direction individual GCM-RCM-123 run time series (along x axis)
MultiNC_df =pd.DataFrame() MultiNC_df = MultiNC_df.sort_index(axis=0, ascending=True)
#append blocks of R1 R2 and R3 in x axis direction R13_df = pd.concat([R13_df, MultiNC_df], axis=1)
R13_df = R13_df.sort_index(axis=0, ascending=True) MultiNC_df =pd.DataFrame()
GCM_df = pd.concat([GCM_df, R13_df], axis=1) #append blocks of R1 R2 and R3 in x axis direction
R13_df = pd.DataFrame() R13_df = R13_df.sort_index(axis=0, ascending=True)
#append time periods in x axis direction (change axis=1 to =0 if periods for same model should be added to same model R123 column) GCM_df = pd.concat([GCM_df, R13_df], axis=1)
GCM_df = GCM_df.sort_index(axis=0, ascending=True) R13_df = pd.DataFrame()
Full_df = pd.concat([Full_df, GCM_df], axis=1) #append time periods in x axis direction (change axis=1 to =0 if periods for same model should be added to same model R123 column)
GCM_df = pd.DataFrame() GCM_df = GCM_df.sort_index(axis=0, ascending=True)
Full_df = Full_df.sort_index(axis=0, ascending=True) Full_df = pd.concat([Full_df, GCM_df], axis=1)
#adding a column with the NARCLIM decade GCM_df = pd.DataFrame()
Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009' Full_df = Full_df.sort_index(axis=0, ascending=True)
Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039' #adding a column with the NARCLIM decade
Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079' #Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009'
#export the pandas data frame as a CSV file within the output directory #Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039'
out_file_name = Clim_var_type + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '_NARCliM_summary.csv' #Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079'
out_path = output_directory +'/' + out_file_name #export the pandas data frame as a CSV file within the output directory
Full_df.to_csv(out_path) out_file_name = Clim_var_type + '_'+ Data_Type[2:] + '_' + Location + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '_NARCliM_summary.csv'
out_path = output_directory +'/' + out_file_name
Full_df.to_csv(out_path)

@ -27,20 +27,35 @@ Batemans Bay: -35.76, 150.25
Towamba River: -37.1, 149.91 Towamba River: -37.1, 149.91
Nadgee Lake: -37.47, 149.97 Nadgee Lake: -37.47, 149.97
Bash-Code for netcdf interrogation: Code Input Variables:
Datatype: Choose 'T_NNRP' for reanalysis or 'T_GCMS' for GCM forcing data
BiasBool: Choose 'True' for bias corrected data, 'False' for normal model outputs
Execution of code in bash-Code for netcdf interrogation:
1st step: log into storm servers: Putty: hurricane.ccrc.unsw.edu.au or typhoon.ccrc.unsw.edu.au or cyclone.ccrc.unsw.edu.au + UNSW credentials (zID) 1st step: log into storm servers: Putty: hurricane.ccrc.unsw.edu.au or typhoon.ccrc.unsw.edu.au or cyclone.ccrc.unsw.edu.au + UNSW credentials (zID)
In BASH:
In BASH copy and enter:
module load python module load python
latitude=-28.17 latitude=-32.91
longitude=153.56 longitude=151.80
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'tasmean' --domain 'd02' --timestep 'DAY'; name='HunterRiver'
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'tasmax' --domain 'd02' --timestep 'DAY'; Datatype='T_NNRP'
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pr1Hmaxtstep' --domain 'd02' --timestep 'DAY'; Biasboolean='False'
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'wssmean' --domain 'd02' --timestep 'DAY'; python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pracc' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pracc' --domain 'd02' --timestep 'DAY';
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'wss1Hmaxtstep' --domain 'd02' --timestep 'DAY'; python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'tasmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'evspsblmean' --domain 'd02' --timestep 'DAY'; python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'tasmax' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'potevpmean' --domain 'd02' --timestep 'DAY' python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pr1Hmaxtstep' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'wssmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pracc' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'wss1Hmaxtstep' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'evspsblmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'potevpmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean
#1 The above code extracts time series from the full model ensemble over a single model grid cell (based on lat lon input) for the above variables of interest and stores into CSV files. #1 The above code extracts time series from the full model ensemble over a single model grid cell (based on lat lon input) for the above variables of interest and stores into CSV files.
Example of output name = evspsblmean_35.76_150.25_NARCliM_summary.csv Example of output name = evspsblmean_35.76_150.25_NARCliM_summary.csv

Loading…
Cancel
Save