#Comprises a fully working version of the major BC Generation R and Python codes
the project is still being set up so most of the code will still undergo significant changesDevelopment
parent
1f4a0bd6fb
commit
46ade05763
@ -0,0 +1,3 @@
|
||||
(
|
||||
echo HWQ027.s
|
||||
) | C:\Users\z3509544\AppData\Local\Continuum\Anaconda3\python hunter_rma_preprocessing.py
|
@ -0,0 +1,233 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from netCDF4 import *
|
||||
import numpy as np
|
||||
from numpy import *
|
||||
import os
|
||||
import pandas as pd
|
||||
import glob
|
||||
import matplotlib
|
||||
import matplotlib.pyplot as plt
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
import argparse
|
||||
import time
|
||||
#
|
||||
# Set working direcotry (where postprocessed NARClIM data is located)
|
||||
os.chdir('/srv/ccrc/data30/z3393020/NARCliM/postprocess/')
|
||||
#
|
||||
#User input for location and variable type - from command line
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--lat", help="first number")
|
||||
parser.add_argument("--lon", help="second number")
|
||||
parser.add_argument("--varName", help="operation")
|
||||
parser.add_argument("--timestep", help="operation")
|
||||
parser.add_argument("--domain", help="operation")
|
||||
parser.add_argument("--LocationName", help="operation")
|
||||
parser.add_argument("--Datatype", help="operation")
|
||||
parser.add_argument("--BiasBool", help="operation")
|
||||
args = parser.parse_args()
|
||||
print(args.lat)
|
||||
print(args.lon)
|
||||
print(args.varName)
|
||||
mylat= float(args.lat)
|
||||
mylon= float(args.lon)
|
||||
Clim_var_type = args.varName
|
||||
NC_Domain = args.domain
|
||||
Timestep = args.timestep
|
||||
Location = args.LocationName
|
||||
Data_Type = args.Datatype
|
||||
Bias_Correction_BOOL = args.BiasBool
|
||||
print("Extracting all NARCLIM time series for variable: ", Clim_var_type, " for lat lon: ", mylat, mylon, Location, "domain", NC_Domain, " timestep ", Timestep, " Datatype ", Data_Type, " biascorrected? ", Bias_Correction_BOOL)
|
||||
|
||||
lat_equal_len_string="%.3f" % abs(mylat)
|
||||
lon_equal_len_string= "%.3f" % mylon
|
||||
|
||||
if Bias_Correction_BOOL == 'False':
|
||||
#set directory path for output files
|
||||
output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out/'+ Location + '_' + lat_equal_len_string + '_' + lon_equal_len_string + '/'
|
||||
#output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted'
|
||||
print '---------------------------------------------------------'
|
||||
if not os.path.exists(output_directory):
|
||||
os.makedirs(output_directory)
|
||||
print("output directory folder didn't exist and was generated here:")
|
||||
print(output_directory)
|
||||
print '---------------------------------------------------------'
|
||||
#
|
||||
#time.sleep(10)
|
||||
#set up the loop variables for interrogating the entire NARCLIM raw data
|
||||
NC_Periods = ('1990-2009','2020-2039','2060-2079')
|
||||
if Data_Type == 'T_NNRP':
|
||||
NC_Periods = ('1950-2009','Stop')
|
||||
#
|
||||
#Define empty pandas data frames
|
||||
Full_df = pd.DataFrame()
|
||||
GCM_df = pd.DataFrame()
|
||||
R13_df = pd.DataFrame()
|
||||
MultiNC_df = pd.DataFrame()
|
||||
#
|
||||
#Loop through models and construct CSV per site
|
||||
for NC_Period in NC_Periods:
|
||||
if NC_Period != "Stop":
|
||||
Period_short = NC_Period[:4]
|
||||
GCMs = os.listdir('./'+ NC_Period)
|
||||
for GCM in GCMs:
|
||||
print GCM
|
||||
Warf_runs = os.listdir('./' + NC_Period + '/' + GCM + '/')
|
||||
for Warf_run in Warf_runs:
|
||||
Current_input_dir = './' + NC_Period + '/' + GCM + '/' + Warf_run + '/' + NC_Domain + '/'
|
||||
print Current_input_dir
|
||||
Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type + '.nc'
|
||||
Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn)
|
||||
#print Climvar_NCs[1]
|
||||
#Climvar_NCs = Climvar_NCs[0:2]
|
||||
#print(Climvar_NCs)
|
||||
for netcdf in Climvar_NCs:
|
||||
f=Dataset(netcdf)
|
||||
# This section print on the screen information contained in the headings of the file
|
||||
#print '---------------------------------------------------------'
|
||||
#print f.ncattrs()
|
||||
#print f.title
|
||||
#print f.variables
|
||||
#print
|
||||
#for varname in f.variables:
|
||||
# print varname,' -> ',np.shape(f.variables[varname])
|
||||
#print '---------------------------------------------------------'
|
||||
# Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file
|
||||
dist_x=np.abs(f.variables['lon'][:,:]-float(mylon))
|
||||
dist_y=np.abs(f.variables['lat'][:,:]-float(mylat))
|
||||
dist=dist_x + dist_y
|
||||
latindex=np.where(dist_y==np.min(dist_y))
|
||||
lonindex=np.where(dist_x==np.min(dist_x))
|
||||
index=np.where(dist==np.min(dist))
|
||||
print '---------------------------------------------------------'
|
||||
print netcdf
|
||||
print 'Information on the nearest point'
|
||||
print 'Your desired lat,lon = ',mylat,mylon
|
||||
print 'The nearest lat,lon = ', f.variables['lat'][latindex[0],latindex[1]], f.variables['lon'][lonindex[0],lonindex[1]]
|
||||
#print 'The index of the nearest lat,lon (x,y) = ',index[0], index[1]
|
||||
#Here we constract a pandas data frame, having the "time"/day as an index and a numer of variables (i.e. Clim_var_type, pracc, as columns)
|
||||
d={}
|
||||
#d["time"] = f.variables['time'][:]
|
||||
d[ GCM +'_'+ Warf_run +'_'+ Period_short] = f.variables[Clim_var_type][:, int(index[0]), int(index[1])]
|
||||
#if GCM == 'NNRP' and Warf_run == 'R1':
|
||||
# d['Period']= NC_Period
|
||||
timestamp = f.variables['time'][:]
|
||||
timestamp_dates = pd.to_datetime(timestamp, unit='h', origin=pd.Timestamp('1949-12-01'))
|
||||
df1=pd.DataFrame(d, index=timestamp_dates)
|
||||
f.close()
|
||||
print 'closing '+ os.path.basename(os.path.normpath(netcdf)) + ' moving to next netcdf file'
|
||||
#print f
|
||||
print '---------------------------------------------------------'
|
||||
#append in time direction each new time series to the data frame
|
||||
MultiNC_df = MultiNC_df.append(df1)
|
||||
#append in columns direction individual GCM-RCM-123 run time series (along x axis)
|
||||
MultiNC_df = MultiNC_df.sort_index(axis=0, ascending=True)
|
||||
R13_df = pd.concat([R13_df, MultiNC_df], axis=1)
|
||||
MultiNC_df =pd.DataFrame()
|
||||
#append blocks of R1 R2 and R3 in x axis direction
|
||||
R13_df = R13_df.sort_index(axis=0, ascending=True)
|
||||
GCM_df = pd.concat([GCM_df, R13_df], axis=1)
|
||||
R13_df = pd.DataFrame()
|
||||
#append time periods in x axis direction (change axis=1 to =0 if periods for same model should be added to same model R123 column)
|
||||
GCM_df = GCM_df.sort_index(axis=0, ascending=True)
|
||||
Full_df = pd.concat([Full_df, GCM_df], axis=1)
|
||||
GCM_df = pd.DataFrame()
|
||||
Full_df = Full_df.sort_index(axis=0, ascending=True)
|
||||
#adding a column with the NARCLIM decade
|
||||
Full_df.loc[(Full_df.index > '1950-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009'
|
||||
Full_df.loc[(Full_df.index > '1990-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009'
|
||||
Full_df.loc[(Full_df.index > '2020-01-01') & (Full_df.index < '2040-01-01'), 'period']= '2020-2039'
|
||||
Full_df.loc[(Full_df.index > '2060-01-01') & (Full_df.index < '2080-01-01'), 'period']= '2060-2079'
|
||||
#
|
||||
if Bias_Correction_BOOL == 'True':
|
||||
os.chdir('/srv/ccrc/data30/z3393020/NARCliM/Bias_corrected/')
|
||||
#set directory path for output files
|
||||
output_directory = '/srv/ccrc/data02/z5025317/NARCliM_out/'+ Location + '_' + lat_equal_len_string + '_' + lon_equal_len_string + '/Bias_corrected/'
|
||||
#output_directory = 'J:\Project wrl2016032\NARCLIM_Raw_Data\Extracted'
|
||||
if not os.path.exists(output_directory):
|
||||
os.makedirs(output_directory)
|
||||
print("output directory folder didn't exist and was generated here:")
|
||||
print(output_directory)
|
||||
#time.sleep(10)
|
||||
#set up the loop variables for interrogating the entire NARCLIM raw data
|
||||
GCMs = ('CCCMA3.1','CSIRO-MK3.0','ECHAM5', 'MIROC3.2', 'NNRP')
|
||||
#
|
||||
#Define empty pandas data frames
|
||||
Full_df = pd.DataFrame()
|
||||
GCM_df = pd.DataFrame()
|
||||
R13_df = pd.DataFrame()
|
||||
MultiNC_df = pd.DataFrame()
|
||||
#
|
||||
#Loop through models and construct CSV per site
|
||||
for GCM in GCMs:
|
||||
print GCM
|
||||
Warf_runs = os.listdir('./' + GCM + '/')
|
||||
for Warf_run in Warf_runs:
|
||||
NC_Periods = os.listdir('./' + GCM + '/' + Warf_run + '/')
|
||||
for NC_Period in NC_Periods:
|
||||
Period_short = NC_Period[:4]
|
||||
Current_input_dir = './' + GCM + '/' + Warf_run + '/' + NC_Period + '/' + NC_Domain + '/'
|
||||
print Current_input_dir
|
||||
Climvar_ptrn = '*' + Timestep + '_*' + Clim_var_type + '_bc.nc'
|
||||
Climvar_NCs = glob.glob(Current_input_dir + Climvar_ptrn)
|
||||
print Climvar_NCs[1]
|
||||
print Climvar_NCs[2]
|
||||
for netcdf in Climvar_NCs:
|
||||
#netcdf = '/srv/ccrc/data31/z3393020/NARCliM/Bias_corrected/' + netcdf[2:]
|
||||
#print netcdf
|
||||
f=Dataset(netcdf)
|
||||
# This section print on the screen information contained in the headings of the file
|
||||
print '---------------------------------------------------------'
|
||||
print f.ncattrs()
|
||||
print f.title
|
||||
print f.variables
|
||||
print
|
||||
for varname in f.variables:
|
||||
print varname,' -> ',np.shape(f.variables[varname])
|
||||
print '---------------------------------------------------------'
|
||||
# Based on the desired inputs, this finds the nearest grid centerpoint index (x,y) in the *.nc file
|
||||
dist_x=np.abs(f.variables['lon'][:,:]-float(mylon))
|
||||
dist_y=np.abs(f.variables['lat'][:,:]-float(mylat))
|
||||
dist=dist_x + dist_y
|
||||
latindex=np.where(dist_y==np.min(dist_y))
|
||||
lonindex=np.where(dist_x==np.min(dist_x))
|
||||
index=np.where(dist==np.min(dist))
|
||||
print '---------------------------------------------------------'
|
||||
print netcdf
|
||||
print 'Information on the nearest point'
|
||||
print 'Your desired lat,lon = ',mylat,mylon
|
||||
print 'The nearest lat,lon = ', f.variables['lat'][latindex[0],latindex[1]], f.variables['lon'][lonindex[0],lonindex[1]]
|
||||
print 'The index of the nearest lat,lon (x,y) = ',index[0], index[1]
|
||||
#Here we constract a pandas data frame, having the "time"/day as an index and a numer of variables (i.e. Clim_var_type, pracc, as columns)
|
||||
d={}
|
||||
#d["time"] = f.variables['time'][:]
|
||||
d[ GCM +'_'+ Warf_run +'_'+ Period_short] = f.variables[Clim_var_type+'_bc'][:, int(index[0]), int(index[1])]
|
||||
#if GCM == 'NNRP' and Warf_run == 'R1':
|
||||
# d['Period']= NC_Period
|
||||
timestamp = f.variables['time'][:]
|
||||
timestamp_dates = pd.to_datetime(timestamp, unit='h', origin=pd.Timestamp('1949-12-01'))
|
||||
df1=pd.DataFrame(d, index=timestamp_dates)
|
||||
f.close()
|
||||
print 'closing '+ os.path.basename(os.path.normpath(netcdf)) + ' moving to next netcdf file'
|
||||
#print f
|
||||
print '---------------------------------------------------------'
|
||||
#append in time direction each new time series to the data frame
|
||||
MultiNC_df = MultiNC_df.append(df1)
|
||||
#append in columns direction individual GCM-RCM-123 run time series (along x axis)
|
||||
MultiNC_df = MultiNC_df.sort_index(axis=0, ascending=True)
|
||||
R13_df = pd.concat([R13_df, MultiNC_df], axis=1)
|
||||
MultiNC_df =pd.DataFrame()
|
||||
#append blocks of R1 R2 and R3 in x axis direction
|
||||
R13_df = R13_df.sort_index(axis=0, ascending=True)
|
||||
GCM_df = pd.concat([GCM_df, R13_df], axis=1)
|
||||
R13_df = pd.DataFrame()
|
||||
#append time periods in x axis direction (change axis=1 to =0 if periods for same model should be added to same model R123 column)
|
||||
GCM_df = GCM_df.sort_index(axis=0, ascending=True)
|
||||
Full_df = pd.concat([Full_df, GCM_df], axis=1)
|
||||
GCM_df = pd.DataFrame()
|
||||
Full_df = Full_df.sort_index(axis=0, ascending=True)
|
||||
#export the pandas data frame as a CSV file within the output directory
|
||||
out_file_name = Clim_var_type + '_'+ Data_Type[2:] + '_' + Location + '_' + lat_equal_len_string + '_' + lon_equal_len_string + '_NARCliM_summary.csv'
|
||||
out_path = output_directory +'/' + out_file_name
|
||||
Full_df.to_csv(out_path)
|
@ -0,0 +1,59 @@
|
||||
#code for preparing a text file with BASH code for batch download of NARCLIM data for the HUNTER WQ modeling of
|
||||
#future climate scenarios
|
||||
|
||||
#NARCLIM Variables
|
||||
#evspsblmean water_evaporation flux (actual ET) long_name: Surface evaporation standard_name: water_evaporation_flux units: kg m-2 s-1
|
||||
#tasmean mean near surface temperature
|
||||
#pracc precipitation daily precipitation sum (sum of convective prcacc and stratiform prncacc precip)
|
||||
|
||||
Clim_Var <- 'pracc'
|
||||
Datatype <- 'T_GCM' #T_GCMS for GCM forcing, T_NNRP for reanalysis (only 1950-2009)
|
||||
Biasboolean <- 'True' #use bias corrected data? True or False python boolean
|
||||
|
||||
Directory <- 'C:/Users/z5025317/OneDrive - UNSW/Hunter_CC_Modeling/07_Modelling/01_Input/BC_Generation/catchments/'
|
||||
Filename <- 'Catchment_Prev_Hunter_Model_Centroids_VH_WGS84_attribute_Table.csv'
|
||||
|
||||
#Load CSV with location names and lat lon coordinates
|
||||
Location.df <- data.frame(read.csv(paste(Directory, Filename, sep=""), header=T))
|
||||
|
||||
#create empty vector for storing the command line text and open file
|
||||
Vector.for.command.line.txt <- c()
|
||||
Vector.for.command.line.txt <- c(Vector.for.command.line.txt, "module load python")
|
||||
text1 <- c(paste("Datatype='",Datatype,"'", sep=""),
|
||||
paste("Bias_corrected='",Biasboolean,"'", sep=""), paste("ClimVarName='",Clim_Var,"'", sep=""))
|
||||
Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text1)
|
||||
for (i in 1:(length(Location.df$Name))){
|
||||
#name<-as.character(Location.df$Name[i])
|
||||
#name<-gsub('([[:punct:]])|\\s+','_',name)
|
||||
if(i<10){
|
||||
name<-paste('Catchment_0', as.character(i), sep="")
|
||||
}else{
|
||||
name<-paste('Catchment_', as.character(i), sep="")
|
||||
}
|
||||
latitude=round(as.numeric(Location.df$Lat[i]),3)
|
||||
longitude=round(as.numeric(Location.df$Long[i]),3)
|
||||
text <- c(paste("latitude=",latitude,"", sep=""), paste("longitude=",longitude,"", sep=""),
|
||||
paste("name='",name,"'", sep=""),
|
||||
"python /srv/ccrc/data02/z5025317/Code_execution/\\
|
||||
P1_NARCliM_NC_to_CSV_CCRC_SS.py \\
|
||||
--lat $latitude --lon $longitude --varName $ClimVarName --domain 'd02' --timestep \\
|
||||
'DAY' --LocationName $name --Datatype $Datatype --BiasBool $Bias_corrected")
|
||||
Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text)
|
||||
if(i==10|i==20|i==31){
|
||||
Vector.for.command.line.txt <- c(Vector.for.command.line.txt, " ")
|
||||
text.file.name <- paste('C:/Users/z5025317/OneDrive - UNSW/Hunter_CC_Modeling/07_Modelling/01_Input/BC_Generation/Code/NARCLIM_Download_and_Processing/',Clim_Var, "_", Datatype, "_", Biasboolean,substring(as.character(i), 1,1), ".txt", sep="")
|
||||
#open and fill text file
|
||||
fileConn <- file(text.file.name)
|
||||
writeLines(Vector.for.command.line.txt, fileConn)
|
||||
close(fileConn)
|
||||
#
|
||||
if(i==10|i==20){
|
||||
Vector.for.command.line.txt <- c()
|
||||
Vector.for.command.line.txt <- c(Vector.for.command.line.txt, "module load python")
|
||||
text1 <- c(paste("Datatype='",Datatype,"'", sep=""),
|
||||
paste("Bias_corrected='",Biasboolean,"'", sep=""), paste("ClimVarName='",Clim_Var,"'", sep=""))
|
||||
Vector.for.command.line.txt <- c(Vector.for.command.line.txt, text1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue