@ -15,6 +15,9 @@ import time
# Set working direcotry (where postprocessed NARClIM data is located)
os . chdir ( ' /srv/ccrc/data30/z3393020/NARCliM/postprocess/ ' )
#
#Choose Type of NARCLIM data ('T_NNRP', 'T_GCMS')
Data_Type = ' T_NNRP '
#
#User input for location and variable type - from command line
if __name__ == " __main__ " :
parser = argparse . ArgumentParser ( )
@ -23,6 +26,9 @@ if __name__ == "__main__":
parser . add_argument ( " --varName " , help = " operation " )
parser . add_argument ( " --timestep " , help = " operation " )
parser . add_argument ( " --domain " , help = " operation " )
parser . add_argument ( " --LocationName " , help = " operation " )
parser . add_argument ( " --Datatype " , help = " operation " )
parser . add_argument ( " --BiasBool " , help = " operation " )
args = parser . parse_args ( )
print ( args . lat )
print ( args . lon )
@ -32,18 +38,25 @@ if __name__ == "__main__":
Clim_var_type = args . varName
NC_Domain = args . domain
Timestep = args . timestep
print ( " Extracting all NARCLIM time series for variable: " , Clim_var_type , " for lat lon: " , mylat , mylon , " domain " , NC_Domain , " timestep " , Timestep )
Location = args . LocationName
Data_Type = args . Datatype
Bias_Correction_BOOL = args . BiasBool
print ( " Extracting all NARCLIM time series for variable: " , Clim_var_type , " for lat lon: " , mylat , mylon , Location , " domain " , NC_Domain , " timestep " , Timestep , " Datatype " , Data_Type , " biascorrected? " , Bias_Correction_BOOL )
if Bias_Correction_BOOL == ' False ' :
#set directory path for output files
output_directory = ' /srv/ccrc/data02/z5025317/NARCliM_out/ ' + str ( abs ( round ( mylat , 3 ) ) ) + ' _ ' + str ( round ( mylon , 3 ) ) + ' / '
output_directory = ' /srv/ccrc/data02/z5025317/NARCliM_out/ ' + Location + ' _ ' + str ( abs ( round ( mylat , 3 ) ) ) + ' _ ' + str ( round ( mylon , 3 ) ) + ' / '
#output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted'
if not os . path . exists ( output_directory ) :
os . makedirs ( output_directory )
print ( " output directory folder didn ' t exist and was generated here: " )
print ( output_directory )
#
#time.sleep(10)
#set up the loop variables for interrogating the entire NARCLIM raw data
NC_Periods = ( ' 1990-2009 ' , ' 2020-2039 ' , ' 2060-2079 ' )
if Data_Type == ' T_NNRP ' :
NC_Periods = ( ' 1950-2009 ' , ' Stop ' )
#
#Define empty pandas data frames
Full_df = pd . DataFrame ( )
@ -53,6 +66,7 @@ MultiNC_df = pd.DataFrame()
#
#Loop through models and construct CSV per site
for NC_Period in NC_Periods :
if NC_Period != " Stop " :
Period_short = NC_Period [ : 4 ]
GCMs = os . listdir ( ' ./ ' + NC_Period )
for GCM in GCMs :
@ -63,22 +77,116 @@ for NC_Period in NC_Periods:
print Current_input_dir
Climvar_ptrn = ' * ' + Timestep + ' _* ' + Clim_var_type + ' .nc '
Climvar_NCs = glob . glob ( Current_input_dir + Climvar_ptrn )
print " test "
print Climvar_NCs [ 1 ]
#print Climvar_NCs[1]
#Climvar_NCs = Climvar_NCs[0:2]
#print(Climvar_NCs)
for netcdf in Climvar_NCs :
print " test2 "
f = Dataset ( netcdf )
# This section print on the screen information contained in the headings of the file
#print '---------------------------------------------------------'
#print f.ncattrs()
#print f.title
#print f.variables
#print
#for varname in f.variables:
# print varname,' -> ',np.shape(f.variables[varname])
#print '---------------------------------------------------------'
# Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file
dist_x = np . abs ( f . variables [ ' lon ' ] [ : , : ] - float ( mylon ) )
dist_y = np . abs ( f . variables [ ' lat ' ] [ : , : ] - float ( mylat ) )
dist = dist_x + dist_y
latindex = np . where ( dist_y == np . min ( dist_y ) )
lonindex = np . where ( dist_x == np . min ( dist_x ) )
index = np . where ( dist == np . min ( dist ) )
print ' --------------------------------------------------------- '
print f . ncattrs ( )
print f . title
print f . variables
print
for varname in f . variables :
print varname , ' -> ' , np . shape ( f . variables [ varname ] )
print netcdf
print ' Information on the nearest point '
print ' Your desired lat,lon = ' , mylat , mylon
print ' The nearest lat,lon = ' , f . variables [ ' lat ' ] [ latindex [ 0 ] , latindex [ 1 ] ] , f . variables [ ' lon ' ] [ lonindex [ 0 ] , lonindex [ 1 ] ]
#print 'The index of the nearest lat,lon (x,y) = ',index[0], index[1]
#Here we constract a pandas data frame, having the "time"/day as an index and a numer of variables (i.e. Clim_var_type, pracc, as columns)
d = { }
#d["time"] = f.variables['time'][:]
d [ GCM + ' _ ' + Warf_run + ' _ ' + Period_short ] = f . variables [ Clim_var_type ] [ : , int ( index [ 0 ] ) , int ( index [ 1 ] ) ]
#if GCM == 'NNRP' and Warf_run == 'R1':
# d['Period']= NC_Period
timestamp = f . variables [ ' time ' ] [ : ]
timestamp_dates = pd . to_datetime ( timestamp , unit = ' h ' , origin = pd . Timestamp ( ' 1949-12-01 ' ) )
df1 = pd . DataFrame ( d , index = timestamp_dates )
f . close ( )
print ' closing ' + os . path . basename ( os . path . normpath ( netcdf ) ) + ' moving to next netcdf file '
#print f
print ' --------------------------------------------------------- '
#append in time direction each new time series to the data frame
MultiNC_df = MultiNC_df . append ( df1 )
#append in columns direction individual GCM-RCM-123 run time series (along x axis)
MultiNC_df = MultiNC_df . sort_index ( axis = 0 , ascending = True )
R13_df = pd . concat ( [ R13_df , MultiNC_df ] , axis = 1 )
MultiNC_df = pd . DataFrame ( )
#append blocks of R1 R2 and R3 in x axis direction
R13_df = R13_df . sort_index ( axis = 0 , ascending = True )
GCM_df = pd . concat ( [ GCM_df , R13_df ] , axis = 1 )
R13_df = pd . DataFrame ( )
#append time periods in x axis direction (change axis=1 to =0 if periods for same model should be added to same model R123 column)
GCM_df = GCM_df . sort_index ( axis = 0 , ascending = True )
Full_df = pd . concat ( [ Full_df , GCM_df ] , axis = 1 )
GCM_df = pd . DataFrame ( )
Full_df = Full_df . sort_index ( axis = 0 , ascending = True )
#adding a column with the NARCLIM decade
Full_df . loc [ ( Full_df . index > ' 1950-01-01 ' ) & ( Full_df . index < ' 2010-01-01 ' ) , ' period ' ] = ' 1990-2009 '
Full_df . loc [ ( Full_df . index > ' 1990-01-01 ' ) & ( Full_df . index < ' 2010-01-01 ' ) , ' period ' ] = ' 1990-2009 '
Full_df . loc [ ( Full_df . index > ' 2020-01-01 ' ) & ( Full_df . index < ' 2040-01-01 ' ) , ' period ' ] = ' 2020-2039 '
Full_df . loc [ ( Full_df . index > ' 2060-01-01 ' ) & ( Full_df . index < ' 2080-01-01 ' ) , ' period ' ] = ' 2060-2079 '
#export the pandas data frame as a CSV file within the output directory
out_file_name = Clim_var_type + ' _ ' + Data_Type [ 2 : ] + ' _ ' + Location + ' _ ' + str ( abs ( round ( mylat , 3 ) ) ) + ' _ ' + str ( round ( mylon , 3 ) ) + ' _NARCliM_summary.csv '
out_path = output_directory + ' / ' + out_file_name
Full_df . to_csv ( out_path )
#
if Bias_Correction_BOOL == ' True ' :
#set directory path for output files
output_directory = ' /srv/ccrc/data02/z5025317/NARCliM_out/ ' + Location + ' _ ' + str ( abs ( round ( mylat , 3 ) ) ) + ' _ ' + str ( round ( mylon , 3 ) ) + ' /Bias_corrected/ '
#output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted'
if not os . path . exists ( output_directory ) :
os . makedirs ( output_directory )
print ( " output directory folder didn ' t exist and was generated here: " )
print ( output_directory )
#time.sleep(10)
#set up the loop variables for interrogating the entire NARCLIM raw data
GCMs = ( ' CCCMA3.1 ' , ' CSIRO-MK3.0 ' , ' ECHAM5 ' , ' MIROC3.2 ' , ' NNRP ' )
#
#Define empty pandas data frames
Full_df = pd . DataFrame ( )
GCM_df = pd . DataFrame ( )
R13_df = pd . DataFrame ( )
MultiNC_df = pd . DataFrame ( )
#
#Loop through models and construct CSV per site
for GCM in GCMs :
print GCM
Warf_runs = os . listdir ( ' ./ ' + GCM + ' / ' )
for Warf_run in Warf_runs :
NC_Periods = os . listdir ( ' ./ ' + GCM + ' / ' + Warf_run + ' / ' )
for NC_Period in NC_Periods :
Period_short = NC_Period [ : 4 ]
Current_input_dir = ' ./ ' + GCM + ' / ' + Warf_run + ' / ' + NC_Period + ' / ' + NC_Domain + ' / '
print Current_input_dir
Climvar_ptrn = ' * ' + Timestep + ' _* ' + Clim_var_type + ' _bc.nc '
Climvar_NCs = glob . glob ( Current_input_dir + Climvar_ptrn )
print Climvar_NCs [ 1 ]
print Climvar_NCs [ 2 ]
for netcdf in Climvar_NCs :
#netcdf = '/srv/ccrc/data31/z3393020/NARCliM/Bias_corrected/' + netcdf[2:]
#print netcdf
f = Dataset ( netcdf )
# This section print on the screen information contained in the headings of the file
# print '---------------------------------------------------------'
# print f.ncattrs()
# print f.title
# print f.variables
# print
# for varname in f.variables:
# print varname,' -> ',np.shape(f.variables[varname])
# print '---------------------------------------------------------'
# Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file
dist_x = np . abs ( f . variables [ ' lon ' ] [ : , : ] - float ( mylon ) )
dist_y = np . abs ( f . variables [ ' lat ' ] [ : , : ] - float ( mylat ) )
@ -121,10 +229,10 @@ for NC_Period in NC_Periods:
GCM_df = pd . DataFrame ( )
Full_df = Full_df . sort_index ( axis = 0 , ascending = True )
#adding a column with the NARCLIM decade
Full_df . loc [ ( Full_df . index > ' 1990-01-01 ' ) & ( Full_df . index < ' 2009-01-01 ' ) , ' period ' ] = ' 1990-2009 '
Full_df . loc [ ( Full_df . index > ' 2020-01-01 ' ) & ( Full_df . index < ' 2039-01-01 ' ) , ' period ' ] = ' 2020-2039 '
Full_df . loc [ ( Full_df . index > ' 2060-01-01 ' ) & ( Full_df . index < ' 2079-01-01 ' ) , ' period ' ] = ' 2060-2079 '
#Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2009-01-01'), 'period']= '1990-2009 '
#Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2039-01-01'), 'period']= '2020-2039 '
#Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2079-01-01'), 'period']= '2060-2079 '
#export the pandas data frame as a CSV file within the output directory
out_file_name = Clim_var_type + ' _ ' + str ( abs ( round ( mylat , 3 ) ) ) + ' _ ' + str ( round ( mylon , 3 ) ) + ' _NARCliM_summary.csv '
out_file_name = Clim_var_type + ' _ ' + Data_Type [ 2 : ] + ' _ ' + Location + ' _ ' + str ( abs ( round ( mylat , 3 ) ) ) + ' _ ' + str ( round ( mylon , 3 ) ) + ' _NARCliM_summary.csv '
out_path = output_directory + ' / ' + out_file_name
Full_df . to_csv ( out_path )