HEMIP_GIT/Code/Combine_extracted_RM11_resu...

# -*- coding: utf-8 -*-
#==========================================================#
#Last Updated - June 2018
#@author: z5025317 Valentin Heimhuber
#code for combining  all extracted RMA2 and 11 results into a single data frame and save it to CSV

#==========================================================#
#Load packages
#==========================================================#
import numpy as np
import os
import pandas as pd
import glob
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
#==========================================================#


#==========================================================#
#Input parameters and directories
#==========================================================#
# Set working direcotry (where postprocessed NARClIM data is located)
#set beginning and end years and corresponding scenario code

fs=['Hwq003', 'Hwq005']
HDvariables = ['depth', 'elev','vel']
WQvariables = ['sal']
Subset_years = False
startyear=1999                   #years need to be adjusted based on the time period of the model runs
endyear=2004
#year=range(startyear, endyear+1)


#set directory path for output files
output_directory = 'H:/WRL_Projects/Hunter_CC_Modeling/Module_6/03_Results/Output/Postprocessed/Compound_data/'
nodes_csv = 'H:/WRL_Projects/Hunter_CC_Modeling/Module_6/03_Results/Chainages/Hunter_nodes.csv'
#read csv file with nodes and chainages to extract data from
node = pd.read_csv(nodes_csv)['Hunter'].values
chainages = pd.read_csv(nodes_csv)['x_km'].values
#==========================================================#


#==========================================================#
#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
if not os.path.exists(output_directory):
    os.makedirs(output_directory)
    print('-------------------------------------------')
    print("output directory folder didn't exist and was generated")
    print('-------------------------------------------')
#==========================================================#


#==========================================================#
#data extraction for RMA11 Variables
#==========================================================#
WQ_Summary_df = pd.DataFrame()

for variable in WQvariables:
    for f in fs:
        f = 'Hwq003'
        input_directory = 'H:/WRL_Projects/Hunter_CC_Modeling/Module_6/03_Results/Output2/'+ f
        # Set working direcotry (where postprocessed NARClIM data is located)
        os.chdir(input_directory)
        Summary_df =  pd.DataFrame()
        df = pd.DataFrame()
        for NODE in node:
            NODE = str(NODE)
            #set input and output directories
            #==========================================================#
            #Load data file
            Clim_Var_CSVs = glob.glob('*_'+ NODE + '_*WQ*')
            print Clim_Var_CSVs
            print NODE
            clim_var_csv_path = Clim_Var_CSVs[0]
            df = pd.read_csv(clim_var_csv_path, index_col=False,  sep=' ')
            df.index = pd.to_datetime(df.Year, format = '%Y') + pd.to_timedelta(df.Hour, unit='h')
            df= df.drop(columns=['Year', 'Hour'])
            #df.columns = [NODE+'_Sal'] #, NODE+'_Tem']
            df.columns = [NODE + '_'+ variable + '_'+ f]
            #df = df.loc[~df.index.duplicated(keep='first')]
            Summary_df = pd.concat([Summary_df, df], axis=1)
        out_path = input_directory + '/' +  f + '_' +   variable + '4.csv'
        print('writing ' +  out_path)
        Summary_df.to_csv(out_path)

    #Optionally cut down the summary df to common years
    if Subset_years:
        Summary_df = Summary_df[datetime.strptime(str(startyear) + ' 01 01', '%Y %m %d').date():datetime.strptime(str(endyear) + ' 06 31', '%Y %m %d').date()]
    WQ_Summary_df  = pd.concat([WQ_Summary_df ,Summary_df], axis=1, join='outer')

#==========================================================#


##==========================================================#
##data extraction for RMA2 variables
##==========================================================#
#HD_Summary_df = pd.DataFrame()
#for variable in HDvariables:
#    Summary_df = pd.DataFrame()
#    df = pd.DataFrame()
#    for f in fs:
#        #set input and output directories
#        input_directory = 'H:/WRL_Projects/Hunter_CC_Modeling/Module_6/03_Results/Output/' +  f
#        # Set working direcotry (where postprocessed NARClIM data is located)
#        os.chdir(input_directory)
#        #==========================================================#
#        #Load data file
#        if variable == 'depth' or variable == 'elev':
#            Clim_Var_CSVs = glob.glob('*' + variable + '*')
#            clim_var_csv_path = Clim_Var_CSVs[0]
#            df = pd.read_csv(clim_var_csv_path, index_col=False,  sep=' ')
#            df.index = pd.to_datetime(df.Year, format = '%Y') + pd.to_timedelta(df.Hour, unit='h')
#            df= df.drop(columns=['Year', 'Hour'])
#            a=len(df.columns)-1
#            df=df.iloc[:,:a]
#        if variable == 'vel':
#            #x velocity
#            Clim_Var_CSVs = glob.glob('*' +'x'+ variable + '*')
#            clim_var_csv_path = Clim_Var_CSVs[0]
#            df = pd.read_csv(clim_var_csv_path, index_col=False,  sep=' ')
#            df.index = pd.to_datetime(df.Year, format = '%Y') + pd.to_timedelta(df.Hour, unit='h')
#            dfx= df.drop(columns=['Year', 'Hour','1'])
#            #y velocity
#            Clim_Var_CSVs = glob.glob('*' +'y'+ variable + '*')
#            clim_var_csv_path = Clim_Var_CSVs[0]
#            df = pd.read_csv(clim_var_csv_path, index_col=False,  sep=' ')
#            df.index = pd.to_datetime(df.Year, format = '%Y') + pd.to_timedelta(df.Hour, unit='h')
#            dfy= df.drop(columns=['Year', 'Hour','1'])
#            df = np.sqrt(dfx*dfx + dfy*dfy)
#
#        df.columns = df.columns + '_'+ variable + '_'+ f
#        Summary_df = pd.concat([Summary_df, df], axis=1, join='outer')
#    #Optionally cut down the summary df to common years
#    if Subset_years:
#        Summary_df = Summary_df[datetime.strptime(str(startyear) + ' 01 01', '%Y %m %d').date():datetime.strptime(str(endyear) + ' 06 31', '%Y %m %d').date()]
#    HD_Summary_df  = pd.concat([HD_Summary_df , Summary_df], axis=1, join='outer')
##==========================================================#
#
#
#
#
##==========================================================#
##generate and safe the final data frame as csv
##==========================================================#
#Compound_df = pd.concat([WQ_Summary_df , HD_Summary_df], axis=1, join='outer')
#var = 'Scn_'
#for f in fs:
#    var = var+ '_' + f
#WQvars = 'WQ'
#for variabs in WQvariables:
#    WQvars = WQvars + '_' + variabs
#out_path = output_directory +  var + '_' + WQvars + '_' + str(startyear) + '_' + str(endyear) + '_compound.csv'
#Compound_df.to_csv(out_path)
# #==========================================================#
#
#