HEMIP_GIT/Code/Combine_extracted_RM11_resu...

# -*- coding: utf-8 -*-
#==========================================================#
#Last Updated - June 2018
#@author: z5025317 Valentin Heimhuber
#code for combining  all extracted RMA2 and 11 results into a single data frame and save it to CSV

#==========================================================#
#Load packages
#==========================================================#
import numpy as np
import os
import pandas as pd
import glob
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
#==========================================================#


#==========================================================#
#Input parameters and directories
#==========================================================#
# Set working direcotry (where postprocessed NARClIM data is located)
#set beginning and end years and corresponding scenario code

fs=['Hwq003', 'Hwq005']
HDvariables = ['depth', 'elev','vel']
WQvariables = ['sal']
Subset_years = False
startyear=1999                   #years need to be adjusted based on the time period of the model runs
endyear=2004
#year=range(startyear, endyear+1)


#set directory path for output files
output_directory = 'H:/WRL_Projects/Hunter_CC_Modeling/Module_6/03_Results/Output/Postprocessed/Compound_data/'
nodes_csv = 'H:/WRL_Projects/Hunter_CC_Modeling/Module_6/03_Results/Chainages/Hunter_nodes.csv' 
#read csv file with nodes and chainages to extract data from
node = pd.read_csv(nodes_csv)['Hunter'].values
chainages = pd.read_csv(nodes_csv)['x_km'].values
#==========================================================#


#==========================================================#
#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
if not os.path.exists(output_directory):
    os.makedirs(output_directory)
    print('-------------------------------------------')
    print("output directory folder didn't exist and was generated")
    print('-------------------------------------------')
#==========================================================#


#==========================================================#
#data extraction for RMA11 Variables
#==========================================================#
WQ_Summary_df = pd.DataFrame() 

for variable in WQvariables:
    for f in fs:
        f = 'Hwq003'
        input_directory = 'H:/WRL_Projects/Hunter_CC_Modeling/Module_6/03_Results/Output2/'+ f 
        # Set working direcotry (where postprocessed NARClIM data is located)
        os.chdir(input_directory)
        Summary_df =  pd.DataFrame()
        df = pd.DataFrame()
        for NODE in node:
            NODE = str(NODE)
            #set input and output directories
            #==========================================================#
            #Load data file
            Clim_Var_CSVs = glob.glob('*_'+ NODE + '_*WQ*')
            print Clim_Var_CSVs 
            print NODE
            clim_var_csv_path = Clim_Var_CSVs[0]
            df = pd.read_csv(clim_var_csv_path, index_col=False,  sep=' ')
            df.index = pd.to_datetime(df.Year, format = '%Y') + pd.to_timedelta(df.Hour, unit='h')
            df= df.drop(columns=['Year', 'Hour'])
            #df.columns = [NODE+'_Sal'] #, NODE+'_Tem']
            df.columns = [NODE + '_'+ variable + '_'+ f]
            #df = df.loc[~df.index.duplicated(keep='first')]
            Summary_df = pd.concat([Summary_df, df], axis=1)
        out_path = input_directory + '/' +  f + '_' +   variable + '4.csv'
        print('writing ' +  out_path)
        Summary_df.to_csv(out_path) 
        
    #Optionally cut down the summary df to common years
    if Subset_years:
        Summary_df = Summary_df[datetime.strptime(str(startyear) + ' 01 01', '%Y %m %d').date():datetime.strptime(str(endyear) + ' 06 31', '%Y %m %d').date()]
    WQ_Summary_df  = pd.concat([WQ_Summary_df ,Summary_df], axis=1, join='outer')

#==========================================================#
    
    
##==========================================================#
##data extraction for RMA2 variables
##==========================================================#
#HD_Summary_df = pd.DataFrame() 
#for variable in HDvariables:
#    Summary_df = pd.DataFrame() 
#    df = pd.DataFrame() 
#    for f in fs:
#        #set input and output directories
#        input_directory = 'H:/WRL_Projects/Hunter_CC_Modeling/Module_6/03_Results/Output/' +  f 
#        # Set working direcotry (where postprocessed NARClIM data is located)
#        os.chdir(input_directory)
#        #==========================================================#
#        #Load data file
#        if variable == 'depth' or variable == 'elev':
#            Clim_Var_CSVs = glob.glob('*' + variable + '*')
#            clim_var_csv_path = Clim_Var_CSVs[0]
#            df = pd.read_csv(clim_var_csv_path, index_col=False,  sep=' ')
#            df.index = pd.to_datetime(df.Year, format = '%Y') + pd.to_timedelta(df.Hour, unit='h')
#            df= df.drop(columns=['Year', 'Hour'])
#            a=len(df.columns)-1
#            df=df.iloc[:,:a]
#        if variable == 'vel':
#            #x velocity
#            Clim_Var_CSVs = glob.glob('*' +'x'+ variable + '*')
#            clim_var_csv_path = Clim_Var_CSVs[0]
#            df = pd.read_csv(clim_var_csv_path, index_col=False,  sep=' ')
#            df.index = pd.to_datetime(df.Year, format = '%Y') + pd.to_timedelta(df.Hour, unit='h')
#            dfx= df.drop(columns=['Year', 'Hour','1'])
#            #y velocity
#            Clim_Var_CSVs = glob.glob('*' +'y'+ variable + '*')
#            clim_var_csv_path = Clim_Var_CSVs[0]
#            df = pd.read_csv(clim_var_csv_path, index_col=False,  sep=' ')
#            df.index = pd.to_datetime(df.Year, format = '%Y') + pd.to_timedelta(df.Hour, unit='h')
#            dfy= df.drop(columns=['Year', 'Hour','1'])
#            df = np.sqrt(dfx*dfx + dfy*dfy)
#            
#        df.columns = df.columns + '_'+ variable + '_'+ f
#        Summary_df = pd.concat([Summary_df, df], axis=1, join='outer')
#    #Optionally cut down the summary df to common years
#    if Subset_years:
#        Summary_df = Summary_df[datetime.strptime(str(startyear) + ' 01 01', '%Y %m %d').date():datetime.strptime(str(endyear) + ' 06 31', '%Y %m %d').date()]
#    HD_Summary_df  = pd.concat([HD_Summary_df , Summary_df], axis=1, join='outer')
##==========================================================#
#
#
#
#
##==========================================================#
##generate and safe the final data frame as csv
##==========================================================#
#Compound_df = pd.concat([WQ_Summary_df , HD_Summary_df], axis=1, join='outer')
#var = 'Scn_'
#for f in fs:
#    var = var+ '_' + f
#WQvars = 'WQ'
#for variabs in WQvariables:
#    WQvars = WQvars + '_' + variabs  
#out_path = output_directory +  var + '_' + WQvars + '_' + str(startyear) + '_' + str(endyear) + '_compound.csv'
#Compound_df.to_csv(out_path) 
# #==========================================================#   
#    
#
#initial commit to setup the repo 5 years ago			`# -- coding: utf-8 --`
			`#==========================================================#`
			`#Last Updated - June 2018`
			`#@author: z5025317 Valentin Heimhuber`
			`#code for combining all extracted RMA2 and 11 results into a single data frame and save it to CSV`

			`#==========================================================#`
			`#Load packages`
			`#==========================================================#`
			`import numpy as np`
			`import os`
			`import pandas as pd`
			`import glob`
			`import matplotlib`
			`import matplotlib.pyplot as plt`
			`from datetime import datetime`
			`from datetime import timedelta`
			`#==========================================================#`


			`#==========================================================#`
			`#Input parameters and directories`
			`#==========================================================#`
			`# Set working direcotry (where postprocessed NARClIM data is located)`
			`#set beginning and end years and corresponding scenario code`

			`fs=['Hwq003', 'Hwq005']`
			`HDvariables = ['depth', 'elev','vel']`
			`WQvariables = ['sal']`
			`Subset_years = False`
			`startyear=1999 #years need to be adjusted based on the time period of the model runs`
			`endyear=2004`
			`#year=range(startyear, endyear+1)`


			`#set directory path for output files`
			`output_directory = 'H:/WRL_Projects/Hunter_CC_Modeling/Module_6/03_Results/Output/Postprocessed/Compound_data/'`
			`nodes_csv = 'H:/WRL_Projects/Hunter_CC_Modeling/Module_6/03_Results/Chainages/Hunter_nodes.csv'`
			`#read csv file with nodes and chainages to extract data from`
			`node = pd.read_csv(nodes_csv)['Hunter'].values`
			`chainages = pd.read_csv(nodes_csv)['x_km'].values`
			`#==========================================================#`


			`#==========================================================#`
			`#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'`
			`if not os.path.exists(output_directory):`
			`os.makedirs(output_directory)`
			`print('-------------------------------------------')`
			`print("output directory folder didn't exist and was generated")`
			`print('-------------------------------------------')`
			`#==========================================================#`


			`#==========================================================#`
			`#data extraction for RMA11 Variables`
			`#==========================================================#`
			`WQ_Summary_df = pd.DataFrame()`

			`for variable in WQvariables:`
			`for f in fs:`
			`f = 'Hwq003'`
			`input_directory = 'H:/WRL_Projects/Hunter_CC_Modeling/Module_6/03_Results/Output2/'+ f`
			`# Set working direcotry (where postprocessed NARClIM data is located)`
			`os.chdir(input_directory)`
			`Summary_df = pd.DataFrame()`
			`df = pd.DataFrame()`
			`for NODE in node:`
			`NODE = str(NODE)`
			`#set input and output directories`
			`#==========================================================#`
			`#Load data file`
			`Clim_Var_CSVs = glob.glob('_'+ NODE + '_WQ*')`
			`print Clim_Var_CSVs`
			`print NODE`
			`clim_var_csv_path = Clim_Var_CSVs[0]`
			`df = pd.read_csv(clim_var_csv_path, index_col=False, sep=' ')`
			`df.index = pd.to_datetime(df.Year, format = '%Y') + pd.to_timedelta(df.Hour, unit='h')`
			`df= df.drop(columns=['Year', 'Hour'])`
			`#df.columns = [NODE+'_Sal'] #, NODE+'_Tem']`
			`df.columns = [NODE + '_'+ variable + '_'+ f]`
			`#df = df.loc[~df.index.duplicated(keep='first')]`
			`Summary_df = pd.concat([Summary_df, df], axis=1)`
			`out_path = input_directory + '/' + f + '_' + variable + '4.csv'`
			`print('writing ' + out_path)`
			`Summary_df.to_csv(out_path)`

			`#Optionally cut down the summary df to common years`
			`if Subset_years:`
			`Summary_df = Summary_df[datetime.strptime(str(startyear) + ' 01 01', '%Y %m %d').date():datetime.strptime(str(endyear) + ' 06 31', '%Y %m %d').date()]`
			`WQ_Summary_df = pd.concat([WQ_Summary_df ,Summary_df], axis=1, join='outer')`

			`#==========================================================#`



			`##==========================================================#`
			`##data extraction for RMA2 variables`
			`##==========================================================#`
			`#HD_Summary_df = pd.DataFrame()`
			`#for variable in HDvariables:`
			`# Summary_df = pd.DataFrame()`
			`# df = pd.DataFrame()`
			`# for f in fs:`
			`# #set input and output directories`
			`# input_directory = 'H:/WRL_Projects/Hunter_CC_Modeling/Module_6/03_Results/Output/' + f`
			`# # Set working direcotry (where postprocessed NARClIM data is located)`
			`# os.chdir(input_directory)`
			`# #==========================================================#`
			`# #Load data file`
			`# if variable == 'depth' or variable == 'elev':`
			`# Clim_Var_CSVs = glob.glob('' + variable + '')`
			`# clim_var_csv_path = Clim_Var_CSVs[0]`
			`# df = pd.read_csv(clim_var_csv_path, index_col=False, sep=' ')`
			`# df.index = pd.to_datetime(df.Year, format = '%Y') + pd.to_timedelta(df.Hour, unit='h')`
			`# df= df.drop(columns=['Year', 'Hour'])`
			`# a=len(df.columns)-1`
			`# df=df.iloc[:,:a]`
			`# if variable == 'vel':`
			`# #x velocity`
			`# Clim_Var_CSVs = glob.glob('' +'x'+ variable + '')`
			`# clim_var_csv_path = Clim_Var_CSVs[0]`
			`# df = pd.read_csv(clim_var_csv_path, index_col=False, sep=' ')`
			`# df.index = pd.to_datetime(df.Year, format = '%Y') + pd.to_timedelta(df.Hour, unit='h')`
			`# dfx= df.drop(columns=['Year', 'Hour','1'])`
			`# #y velocity`
			`# Clim_Var_CSVs = glob.glob('' +'y'+ variable + '')`
			`# clim_var_csv_path = Clim_Var_CSVs[0]`
			`# df = pd.read_csv(clim_var_csv_path, index_col=False, sep=' ')`
			`# df.index = pd.to_datetime(df.Year, format = '%Y') + pd.to_timedelta(df.Hour, unit='h')`
			`# dfy= df.drop(columns=['Year', 'Hour','1'])`
			`# df = np.sqrt(dfxdfx + dfydfy)`
			`#`
			`# df.columns = df.columns + '_'+ variable + '_'+ f`
			`# Summary_df = pd.concat([Summary_df, df], axis=1, join='outer')`
			`# #Optionally cut down the summary df to common years`
			`# if Subset_years:`
			`# Summary_df = Summary_df[datetime.strptime(str(startyear) + ' 01 01', '%Y %m %d').date():datetime.strptime(str(endyear) + ' 06 31', '%Y %m %d').date()]`
			`# HD_Summary_df = pd.concat([HD_Summary_df , Summary_df], axis=1, join='outer')`
			`##==========================================================#`
			`#`
			`#`
			`#`
			`#`
			`##==========================================================#`
			`##generate and safe the final data frame as csv`
			`##==========================================================#`
			`#Compound_df = pd.concat([WQ_Summary_df , HD_Summary_df], axis=1, join='outer')`
			`#var = 'Scn_'`
			`#for f in fs:`
			`# var = var+ '_' + f`
			`#WQvars = 'WQ'`
			`#for variabs in WQvariables:`
			`# WQvars = WQvars + '_' + variabs`
			`#out_path = output_directory + var + '_' + WQvars + '_' + str(startyear) + '_' + str(endyear) + '_compound.csv'`
			`#Compound_df.to_csv(out_path)`
			`# #==========================================================#`
			`#`
			`#`