#added a couple of function input parameters

and the ability to create bias corrected data csv files
as well as NNRP for both bias corrected and normal
Development1
Valentin Heimhuber 7 years ago
parent adad360de9
commit 2c102d4594

@ -15,6 +15,9 @@ import time
# Set working direcotry (where postprocessed NARClIM data is located) # Set working direcotry (where postprocessed NARClIM data is located)
os.chdir('/srv/ccrc/data30/z3393020/NARCliM/postprocess/') os.chdir('/srv/ccrc/data30/z3393020/NARCliM/postprocess/')
# #
#Choose Type of NARCLIM data ('T_NNRP', 'T_GCMS')
Data_Type = 'T_NNRP'
#
#User input for location and variable type - from command line #User input for location and variable type - from command line
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
@ -23,6 +26,9 @@ if __name__ == "__main__":
parser.add_argument("--varName", help="operation") parser.add_argument("--varName", help="operation")
parser.add_argument("--timestep", help="operation") parser.add_argument("--timestep", help="operation")
parser.add_argument("--domain", help="operation") parser.add_argument("--domain", help="operation")
parser.add_argument("--LocationName", help="operation")
parser.add_argument("--Datatype", help="operation")
parser.add_argument("--BiasBool", help="operation")
args = parser.parse_args() args = parser.parse_args()
print(args.lat) print(args.lat)
print(args.lon) print(args.lon)
@ -32,27 +38,35 @@ if __name__ == "__main__":
Clim_var_type = args.varName Clim_var_type = args.varName
NC_Domain = args.domain NC_Domain = args.domain
Timestep = args.timestep Timestep = args.timestep
print("Extracting all NARCLIM time series for variable: ", Clim_var_type, " for lat lon: ", mylat, mylon, "domain", NC_Domain, "timestep ", Timestep) Location = args.LocationName
#set directory path for output files Data_Type = args.Datatype
output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out/' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '/' Bias_Correction_BOOL = args.BiasBool
#output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted' print("Extracting all NARCLIM time series for variable: ", Clim_var_type, " for lat lon: ", mylat, mylon, Location, "domain", NC_Domain, " timestep ", Timestep, " Datatype ", Data_Type, " biascorrected? ", Bias_Correction_BOOL)
if not os.path.exists(output_directory):
if Bias_Correction_BOOL == 'False':
#set directory path for output files
output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out/'+ Location + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '/'
#output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted'
if not os.path.exists(output_directory):
os.makedirs(output_directory) os.makedirs(output_directory)
print("output directory folder didn't exist and was generated here:") print("output directory folder didn't exist and was generated here:")
print(output_directory) print(output_directory)
#
#time.sleep(10) #time.sleep(10)
#set up the loop variables for interrogating the entire NARCLIM raw data #set up the loop variables for interrogating the entire NARCLIM raw data
NC_Periods = ('1990-2009','2020-2039','2060-2079') NC_Periods = ('1990-2009','2020-2039','2060-2079')
# if Data_Type == 'T_NNRP':
#Define empty pandas data frames NC_Periods = ('1950-2009','Stop')
Full_df = pd.DataFrame() #
GCM_df = pd.DataFrame() #Define empty pandas data frames
R13_df = pd.DataFrame() Full_df = pd.DataFrame()
MultiNC_df = pd.DataFrame() GCM_df = pd.DataFrame()
# R13_df = pd.DataFrame()
#Loop through models and construct CSV per site MultiNC_df = pd.DataFrame()
for NC_Period in NC_Periods: #
#Loop through models and construct CSV per site
for NC_Period in NC_Periods:
if NC_Period != "Stop":
Period_short = NC_Period[:4] Period_short = NC_Period[:4]
GCMs = os.listdir('./'+ NC_Period) GCMs = os.listdir('./'+ NC_Period)
for GCM in GCMs: for GCM in GCMs:
@ -63,22 +77,116 @@ for NC_Period in NC_Periods:
print Current_input_dir print Current_input_dir
Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type + '.nc' Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type + '.nc'
Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn) Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn)
print "test" #print Climvar_NCs[1]
print Climvar_NCs[1]
#Climvar_NCs = Climvar_NCs[0:2] #Climvar_NCs = Climvar_NCs[0:2]
#print(Climvar_NCs) #print(Climvar_NCs)
for netcdf in Climvar_NCs: for netcdf in Climvar_NCs:
print "test2"
f=Dataset(netcdf) f=Dataset(netcdf)
# This section print on the screen information contained in the headings of the file # This section print on the screen information contained in the headings of the file
#print '---------------------------------------------------------'
#print f.ncattrs()
#print f.title
#print f.variables
#print
#for varname in f.variables:
# print varname,' -> ',np.shape(f.variables[varname])
#print '---------------------------------------------------------'
# Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file
dist_x=np.abs(f.variables['lon'][:,:]-float(mylon))
dist_y=np.abs(f.variables['lat'][:,:]-float(mylat))
dist=dist_x + dist_y
latindex=np.where(dist_y==np.min(dist_y))
lonindex=np.where(dist_x==np.min(dist_x))
index=np.where(dist==np.min(dist))
print '---------------------------------------------------------' print '---------------------------------------------------------'
print f.ncattrs() print netcdf
print f.title print 'Information on the nearest point'
print f.variables print 'Your desired lat,lon = ',mylat,mylon
print print 'The nearest lat,lon = ', f.variables['lat'][latindex[0],latindex[1]], f.variables['lon'][lonindex[0],lonindex[1]]
for varname in f.variables: #print 'The index of the nearest lat,lon (x,y) = ',index[0], index[1]
print varname,' -> ',np.shape(f.variables[varname]) #Here we constract a pandas data frame, having the "time"/day as an index and a numer of variables (i.e. Clim_var_type, pracc, as columns)
d={}
#d["time"] = f.variables['time'][:]
d[ GCM +'_'+ Warf_run +'_'+ Period_short] = f.variables[Clim_var_type][:, int(index[0]), int(index[1])]
#if GCM == 'NNRP' and Warf_run == 'R1':
# d['Period']= NC_Period
timestamp = f.variables['time'][:]
timestamp_dates = pd.to_datetime(timestamp, unit='h', origin=pd.Timestamp('1949-12-01'))
df1=pd.DataFrame(d, index=timestamp_dates)
f.close()
print 'closing '+ os.path.basename(os.path.normpath(netcdf)) + ' moving to next netcdf file'
#print f
print '---------------------------------------------------------' print '---------------------------------------------------------'
#append in time direction each new time series to the data frame
MultiNC_df = MultiNC_df.append(df1)
#append in columns direction individual GCM-RCM-123 run time series (along x axis)
MultiNC_df = MultiNC_df.sort_index(axis=0, ascending=True)
R13_df = pd.concat([R13_df, MultiNC_df], axis=1)
MultiNC_df =pd.DataFrame()
#append blocks of R1 R2 and R3 in x axis direction
R13_df = R13_df.sort_index(axis=0, ascending=True)
GCM_df = pd.concat([GCM_df, R13_df], axis=1)
R13_df = pd.DataFrame()
#append time periods in x axis direction (change axis=1 to =0 if periods for same model should be added to same model R123 column)
GCM_df = GCM_df.sort_index(axis=0, ascending=True)
Full_df = pd.concat([Full_df, GCM_df], axis=1)
GCM_df = pd.DataFrame()
Full_df = Full_df.sort_index(axis=0, ascending=True)
#adding a column with the NARCLIM decade
Full_df.loc[(Full_df.index > '1950-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009'
Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009'
Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2040-01-01'), 'period']= '2020-2039'
Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2080-01-01'), 'period']= '2060-2079'
#export the pandas data frame as a CSV file within the output directory
out_file_name = Clim_var_type + '_'+ Data_Type[2:] + '_' + Location + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '_NARCliM_summary.csv'
out_path = output_directory +'/' + out_file_name
Full_df.to_csv(out_path)
#
if Bias_Correction_BOOL == 'True':
#set directory path for output files
output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out/'+ Location + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '/Bias_corrected/'
#output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted'
if not os.path.exists(output_directory):
os.makedirs(output_directory)
print("output directory folder didn't exist and was generated here:")
print(output_directory)
#time.sleep(10)
#set up the loop variables for interrogating the entire NARCLIM raw data
GCMs = ('CCCMA3.1','CSIRO-MK3.0','ECHAM5', 'MIROC3.2', 'NNRP')
#
#Define empty pandas data frames
Full_df = pd.DataFrame()
GCM_df = pd.DataFrame()
R13_df = pd.DataFrame()
MultiNC_df = pd.DataFrame()
#
#Loop through models and construct CSV per site
for GCM in GCMs:
print GCM
Warf_runs = os.listdir('./' + GCM + '/')
for Warf_run in Warf_runs:
NC_Periods = os.listdir('./' + GCM + '/' + Warf_run + '/')
for NC_Period in NC_Periods:
Period_short = NC_Period[:4]
Current_input_dir = './' + GCM + '/' + Warf_run + '/' + NC_Period + '/' + NC_Domain + '/'
print Current_input_dir
Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type + '_bc.nc'
Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn)
print Climvar_NCs[1]
print Climvar_NCs[2]
for netcdf in Climvar_NCs:
#netcdf = '/srv/ccrc/data31/z3393020/NARCliM/Bias_corrected/' + netcdf[2:]
#print netcdf
f=Dataset(netcdf)
# This section print on the screen information contained in the headings of the file
# print '---------------------------------------------------------'
# print f.ncattrs()
# print f.title
# print f.variables
# print
# for varname in f.variables:
# print varname,' -> ',np.shape(f.variables[varname])
# print '---------------------------------------------------------'
# Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file # Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file
dist_x=np.abs(f.variables['lon'][:,:]-float(mylon)) dist_x=np.abs(f.variables['lon'][:,:]-float(mylon))
dist_y=np.abs(f.variables['lat'][:,:]-float(mylat)) dist_y=np.abs(f.variables['lat'][:,:]-float(mylat))
@ -119,12 +227,12 @@ for NC_Period in NC_Periods:
GCM_df = GCM_df.sort_index(axis=0, ascending=True) GCM_df = GCM_df.sort_index(axis=0, ascending=True)
Full_df = pd.concat([Full_df, GCM_df], axis=1) Full_df = pd.concat([Full_df, GCM_df], axis=1)
GCM_df = pd.DataFrame() GCM_df = pd.DataFrame()
Full_df = Full_df.sort_index(axis=0, ascending=True) Full_df = Full_df.sort_index(axis=0, ascending=True)
#adding a column with the NARCLIM decade #adding a column with the NARCLIM decade
Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009' #Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009'
Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039' #Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039'
Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079' #Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079'
#export the pandas data frame as a CSV file within the output directory #export the pandas data frame as a CSV file within the output directory
out_file_name = Clim_var_type + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '_NARCliM_summary.csv' out_file_name = Clim_var_type + '_'+ Data_Type[2:] + '_' + Location + '_' + str(abs(round(mylat,3))) + '_' + str(round(mylon, 3)) + '_NARCliM_summary.csv'
out_path = output_directory +'/' + out_file_name out_path = output_directory +'/' + out_file_name
Full_df.to_csv(out_path) Full_df.to_csv(out_path)

@ -27,20 +27,35 @@ Batemans Bay: -35.76, 150.25
Towamba River: -37.1, 149.91 Towamba River: -37.1, 149.91
Nadgee Lake: -37.47, 149.97 Nadgee Lake: -37.47, 149.97
Bash-Code for netcdf interrogation: Code Input Variables:
Datatype: Choose 'T_NNRP' for reanalysis or 'T_GCMS' for GCM forcing data
BiasBool: Choose 'True' for bias corrected data, 'False' for normal model outputs
Execution of code in bash-Code for netcdf interrogation:
1st step: log into storm servers: Putty: hurricane.ccrc.unsw.edu.au or typhoon.ccrc.unsw.edu.au or cyclone.ccrc.unsw.edu.au + UNSW credentials (zID) 1st step: log into storm servers: Putty: hurricane.ccrc.unsw.edu.au or typhoon.ccrc.unsw.edu.au or cyclone.ccrc.unsw.edu.au + UNSW credentials (zID)
In BASH:
In BASH copy and enter:
module load python module load python
latitude=-28.17 latitude=-32.91
longitude=153.56 longitude=151.80
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'tasmean' --domain 'd02' --timestep 'DAY'; name='HunterRiver'
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'tasmax' --domain 'd02' --timestep 'DAY'; Datatype='T_NNRP'
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pr1Hmaxtstep' --domain 'd02' --timestep 'DAY'; Biasboolean='False'
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'wssmean' --domain 'd02' --timestep 'DAY'; python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pracc' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pracc' --domain 'd02' --timestep 'DAY';
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'wss1Hmaxtstep' --domain 'd02' --timestep 'DAY'; python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'tasmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'evspsblmean' --domain 'd02' --timestep 'DAY'; python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'tasmax' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'potevpmean' --domain 'd02' --timestep 'DAY' python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pr1Hmaxtstep' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'wssmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'pracc' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'wss1Hmaxtstep' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'evspsblmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean;
python /srv/ccrc/data02/z5025317/Code_execution/P1_NARCliM_NC_to_CSV_CCRC_SS.py --lat $latitude --lon $longitude --varName 'potevpmean' --domain 'd02' --timestep 'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Biasboolean
#1 The above code extracts time series from the full model ensemble over a single model grid cell (based on lat lon input) for the above variables of interest and stores into CSV files. #1 The above code extracts time series from the full model ensemble over a single model grid cell (based on lat lon input) for the above variables of interest and stores into CSV files.
Example of output name = evspsblmean_35.76_150.25_NARCliM_summary.csv Example of output name = evspsblmean_35.76_150.25_NARCliM_summary.csv

Loading…
Cancel
Save