@ -19,25 +19,30 @@ from matplotlib.backends.backend_pdf import PdfPages
matplotlib . style . use ( ' ggplot ' )
matplotlib . style . use ( ' ggplot ' )
#
#
# Set working direcotry (where postprocessed NARClIM data is located)
# Set working direcotry (where postprocessed NARClIM data is located)
os . chdir ( ' C:/Users/z5025317/WRL_Postdoc/Projects/Paper#1/ NARCLIM/ ' )
os . chdir ( ' C:/Users/z5025317/WRL_Postdoc/Projects/Paper#1/ ' )
#
#
#
#
#####################################----------------------------------
#####################################----------------------------------
#set input parameters
#set input parameters
Base_period_start = ' 1990-01-01 '
Base_period_start = ' 1990-01-01 '
Base_period_end = ' 2080-01-01 ' #use last day that's not included in period as < is used for subsetting
Base_period_end = ' 2080-01-01 ' #use last day that's not included in period as < is used for subsetting
Estuary = ' Bateman ' # 'Belongil'
#Clim_var_type = 'tasmean' will create pdf for all variables in folder
#Clim_var_type = 'tasmean' will create pdf for all variables in folder
#####################################----------------------------------
#####################################----------------------------------
#set directory path for output files
#set directory path for output files
output_directory = ' Output '
output_directory = ' Output / ' + Estuary
#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
#output_directory = 'J:/Project wrl2016032/NARCLIM_Raw_Data/Extracted'
if not os . path . exists ( output_directory ) :
if not os . path . exists ( output_directory ) :
os . makedirs ( output_directory )
os . makedirs ( output_directory )
print ( ' ------------------------------------------- ' )
print ( " output directory folder didn ' t exist and was generated " )
print ( " output directory folder didn ' t exist and was generated " )
Clim_Var_CSVs = glob . glob ( ' ./Site_CSVs/* ' )
print ( ' ------------------------------------------- ' )
print ( ' ------------------- ' )
Clim_Var_CSVs = glob . glob ( ' ./Data/NARCLIM_Site_CSVs/ ' + Estuary + ' /* ' )
#Clim_Var_CSV = glob.glob('./Site_CSVs/' + Clim_var_type + '*' )
#Clim_Var_CSV = glob.glob('./Site_CSVs/' + Clim_var_type + '*' )
#read CSV file
#read CSV file
for clim_var_csv_path in Clim_Var_CSVs :
for clim_var_csv_path in Clim_Var_CSVs :
#clim_var_csv_path = Clim_Var_CSVs[0]
Filename = os . path . basename ( os . path . normpath ( clim_var_csv_path ) )
Filename = os . path . basename ( os . path . normpath ( clim_var_csv_path ) )
Clim_var_type = Filename . split ( ' _ ' , 1 ) [ 0 ]
Clim_var_type = Filename . split ( ' _ ' , 1 ) [ 0 ]
print ( clim_var_csv_path )
print ( clim_var_csv_path )
@ -48,8 +53,8 @@ for clim_var_csv_path in Clim_Var_CSVs:
#Full_df.dtypes
#Full_df.dtypes
#substract a constant from all values (temp)
#substract a constant from all values (temp)
if Clim_var_type == ' tasmean ' :
if Clim_var_type == ' tasmean ' or Clim_var_type == ' tasmax ' :
Full_df = Full_df . iloc [ : , 0 : 26 ] - 273.15
Full_df = Full_df . iloc [ : , 0 : ( Ncols_df - 1 ) ] - 273.15
#index the narclim periods if needed
#index the narclim periods if needed
#Full_df.loc[(Full_df.index >= '1990-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009'
#Full_df.loc[(Full_df.index >= '1990-01-01') & (Full_df.index < '2010-01-01'), 'period']= '1990-2009'
@ -57,7 +62,9 @@ for clim_var_csv_path in Clim_Var_CSVs:
#Full_df.loc[(Full_df.index >= '2060-01-01') & (Full_df.index < '2080-01-01'), 'period']= '2060-2079'
#Full_df.loc[(Full_df.index >= '2060-01-01') & (Full_df.index < '2080-01-01'), 'period']= '2060-2079'
#Subset the data to the minimum base period and above (used to set the lenght of the present day climate period)
#Subset the data to the minimum base period and above (used to set the lenght of the present day climate period)
Fdf_1900_2080 = Full_df . loc [ ( Full_df . index > = Base_period_start ) & ( Full_df . index < Base_period_end ) ]
#Fdf_1900_2080 = Full_df.loc[(Full_df.index >= Base_period_start) & (Full_df.index < Base_period_end)] # not necessary if not using reanalysis models for base period
Fdf_1900_2080 = Full_df . drop ( columns = [ ' period ' ] )
Ncols_df = len ( Fdf_1900_2080 )
#Aggregate daily df to annual time series
#Aggregate daily df to annual time series
if ( Clim_var_type == ' pracc ' or Clim_var_type == ' evspsblmean ' or Clim_var_type == ' potevpmean '
if ( Clim_var_type == ' pracc ' or Clim_var_type == ' evspsblmean ' or Clim_var_type == ' potevpmean '
or Clim_var_type == ' pr1Hmaxtstep ' or Clim_var_type == ' wss1Hmaxtstep ' ) :
or Clim_var_type == ' pr1Hmaxtstep ' or Clim_var_type == ' wss1Hmaxtstep ' ) :
@ -78,14 +85,14 @@ for clim_var_csv_path in Clim_Var_CSVs:
print ( ' ------------------------------------------- ' )
print ( ' ------------------------------------------- ' )
print ( ' mean of all models for climate variable: ' + Clim_var_type )
print ( ' mean of all models for climate variable: ' + Clim_var_type )
Fdf_1900_2080_means = Fdf_1900_2080 . mean ( )
Fdf_1900_2080_means = Fdf_1900_2080 . mean ( )
Fdf_1900_2080_means . plot ( kind = ' bar ' , ylim = ( 16 , 22 ) ). figure
Fdf_1900_2080_means . plot ( kind = ' bar ' ). figure
print ( ' ------------------------------------------- ' )
print ( ' ------------------------------------------- ' )
#time series plots:
#time series plots:
#ranks = Fdf_1900_2080_means[3:].rank(axis=0)
#ranks = Fdf_1900_2080_means[3:].rank(axis=0)
df3 = Fdf_1900_2080_annual
df3 = Fdf_1900_2080_annual
Fdf_annual_sorted = df3 . reindex_axis ( df3 . mean ( ) . sort_values ( ) . index , axis = 1 )
Fdf_annual_sorted = df3 . reindex_axis ( df3 . mean ( ) . sort_values ( ) . index , axis = 1 )
Fdf_annual_sorted_subset = Fdf_annual_sorted . iloc [ : , [ 0 , 1 , 2 , 3 , 5 , 7 , 13 , 15 , 18 , 22 , 24 , 25 ]]
Fdf_annual_sorted_subset = Fdf_annual_sorted . iloc [ : , [ 0 , 1 , 2 , 3 , 5 , 7 , 13 , 15 , 18 , 22 , 24 , 25 ,30 , 33 ]]
#write the key plots to a single pdf document
#write the key plots to a single pdf document
pdf_out_file_name = Clim_var_type + ' _start_ ' + Base_period_start + ' _NARCliM_summary4.pdf '
pdf_out_file_name = Clim_var_type + ' _start_ ' + Base_period_start + ' _NARCliM_summary4.pdf '
@ -96,35 +103,71 @@ for clim_var_csv_path in Clim_Var_CSVs:
#I want to create a simple density plot for all values in present day, near future and far future
#I want to create a simple density plot for all values in present day, near future and far future
#this should be an easy indicator of change
#this should be an easy indicator of change
#subset present day simulations
#subset present day simulations
Full_current_df = Fdf_1900_2080 . iloc [ : , [ 0 , 1 , 2 ] ]
len ( Fdf_1900_2080 . columns )
Full_current_df = Fdf_1900_2080 . iloc [ : , range ( 0 , 12 ) ]
Full_current_df = Full_current_df . stack ( )
Full_current_df = Full_current_df . stack ( )
#nearfuture
#nearfuture
Full_nearfuture_df = Fdf_1900_2080 . iloc [ : , range ( 3, 15 ) ]
Full_nearfuture_df = Fdf_1900_2080 . iloc [ : , range ( 12, 24 ) ]
Full_nearfuture_df = Full_nearfuture_df . stack ( )
Full_nearfuture_df = Full_nearfuture_df . stack ( )
#farfuture
#farfuture
Full_farfuture_df = Fdf_1900_2080 . iloc [ : , range ( 15, 27 ) ]
Full_farfuture_df = Fdf_1900_2080 . iloc [ : , range ( 24, len ( Fdf_1900_2080 . columns ) ) ]
Full_farfuture_df = Full_farfuture_df . stack ( )
Full_farfuture_df = Full_farfuture_df . stack ( )
Summarized_df = pd . concat ( [ Full_current_df , Full_nearfuture_df ] , axis = 1 , ignore_index = True )
Summarized_df = pd . concat ( [ Full_current_df , Full_nearfuture_df ] , axis = 1 , ignore_index = True )
Summarized_df = pd . concat ( [ Summarized_df , Full_farfuture_df ] , axis = 1 , ignore_index = True )
Summarized_df = pd . concat ( [ Summarized_df , Full_farfuture_df ] , axis = 1 , ignore_index = True )
Summarized_df . columns = [ ' presentday ' , ' nearfuture ' , ' farfuture ' ]
Summarized_df . columns = [ ' presentday ' , ' nearfuture ' , ' farfuture ' ]
plt . title ( Clim_var_type + ' - density comparison - full period ' )
Summarized_df . plot . kde ( )
Summarized_df . boxplot ( rot = 90 )
#getting to more refined and meaningful plots
Fdf_1900_2080_sorted = Fdf_1900_2080 . reindex_axis ( sorted ( Fdf_1900_2080 . columns ) , axis = 1 )
Fdf_1900_2080_sorted_means = pd . DataFrame ( Fdf_1900_2080_sorted . mean ( ) )
df = Fdf_1900_2080_sorted_means
df = df . reset_index ( )
df = df [ df . index % 3 != 1 ]
df [ ' C ' ] = df [ 0 ] . diff ( )
a = df [ df . index == df [ ' C ' ] . argmax ( skipna = True ) ]
a [ ' index ' ]
df . iloc [ [ df [ ' C ' ] . argmax ( skipna = True ) ] ]
df [ ' C ' ] . argmax ( skipna = True )
df [ ' C ' ] . argmin ( skipna = True )
df [ ' C ' ] . argmean ( skipna = True )
df [ df [ ' C ' ] == df [ ' C ' ] . median ( ) ]
max ( df [ ' C ' ] )
Fdf_1900_2080_sorted_means . plot ( kind = ' bar ' ) . figure
MIROC_R2_df = Fdf_1900_2080 . iloc [ : , [ 1 , 13 , 28 ] ]
#end of developement
#end of developement
with PdfPages ( pdf_out_path ) as pdf :
with PdfPages ( pdf_out_path ) as pdf :
#barplot of model means
#barplot of model means
plt . title ( Clim_var_type + ' - model means - full period ' )
plt . title ( Clim_var_type + ' - model means - full period ' )
ymin = min ( Fdf_1900_2080_means )
ymin = min ( Fdf_1900_2080_means )
ymax = max ( Fdf_1900_2080_means )
ymax = max ( Fdf_1900_2080_means )
Fdf_1900_2080_means . plot ( kind = ' bar ' , ylim = ( ymin , ymax ) )
Fdf_1900_2080_means . plot ( kind = ' bar ' , ylim = ( ymin , ymax ) )
Fdf_1900_2080_means . plot ( kind = ' bar ' )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
plt . close ( )
plt . close ( )
#full period density comparison
#full period density comparison
plt . title ( Clim_var_type + ' - density comparison - full period ' )
plt . title ( Clim_var_type + ' - density comparison - full period - one model ' )
Summarized_df . plot . kde ( )
MIROC_R2_df . plot . kde ( )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
plt . close ( )
#annual box
plt . title ( Clim_var_type + ' - Annual means for one model ' )
Fdf_1900_2080_annual . iloc [ : , [ 1 , 13 , 28 ] ] . boxplot ( rot = 90 )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
plt . close ( )
#annual box
plt . title ( Clim_var_type + ' - Annual means for one model B ' )
Fdf_1900_2080_annual . iloc [ : , [ 3 , 18 , 30 ] ] . boxplot ( rot = 90 )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
plt . close ( )
plt . close ( )
#annual box
#annual box
plt . title ( Clim_var_type + ' - Annual means ' )
plt . title ( Clim_var_type + ' - Annual means for one model C ' )
Fdf_1900_2080_annual . boxplot ( rot = 90 )
Fdf_1900_2080_annual . iloc[ : , [ 8 , 17 , 26 ] ] . boxplot( rot = 90 )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
pdf . savefig ( bbox_inches = ' tight ' , pad_inches = 0.4 )
plt . close ( )
plt . close ( )
#monthly box
#monthly box