# -*- coding: utf-8 -*- """ Created on Thu Jun 14 16:32:01 2018 @author: z5025317 """ import matplotlib.pyplot as plt from datetime import datetime, timedelta import numpy as np import pandas as pd def compare_images(im1, im2): """plots 2 images next to each other, sharing the axis""" plt.figure() ax1 = plt.subplot(121) plt.imshow(im1, cmap='gray') ax2 = plt.subplot(122, sharex=ax1, sharey=ax1) plt.imshow(im2, cmap='gray') plt.show() def reject_outliers(data, m=2): "rejects outliers in a numpy array" return data[abs(data - np.mean(data)) < m * np.std(data)] def duplicates_dict(lst): "return duplicates and indices" # nested function def duplicates(lst, item): return [i for i, x in enumerate(lst) if x == item] return dict((x, duplicates(lst, x)) for x in set(lst) if lst.count(x) > 1) def datenum2datetime(datenum): "convert datenum to datetime" #takes in datenum and outputs python datetime time = [datetime.fromordinal(int(dn)) + timedelta(days=float(dn)%1) - timedelta(days = 366) for dn in datenum] return time def select_min_med_max_dif_model(NARCLIM_df): #Select the 3 most representative models (min med and max difference betwen far future and present) Fdf_1900_2080_sorted = NARCLIM_df.reindex_axis(sorted(NARCLIM_df.columns), axis=1) Fdf_1900_2080_sorted_means = pd.DataFrame(Fdf_1900_2080_sorted.mean()) df = Fdf_1900_2080_sorted_means #add a simple increasing integer index df = df.reset_index() df= df[df.index % 3 != 1] df['C'] = df[0].diff() df = df.reset_index() df= df[df.index % 2 != 0] #get max difference model (difference between far future and prsent day) a = df[df.index == df['C'].argmax(skipna=True)] Max_dif_mod_name = a.iloc[0]['index'] #get min difference model a = df[df.index == df['C'].argmin(skipna=True)] Min_dif_mod_name = a.iloc[0]['index'] #get the model which difference is closest to the median difference df['D'] = abs(df['C']- df['C'].median()) a = df[df.index == df['D'].argmin(skipna=True)] Med_dif_mod_name = a.iloc[0]['index'] #data frame with min med and max difference model df2 = NARCLIM_df.filter(regex= Min_dif_mod_name[:-5] + '|' + Med_dif_mod_name[:-5] + '|' + Max_dif_mod_name[:-5] ) dfall = df2.reindex_axis(sorted(df2.columns), axis=1) #data frame with individual models dfmin = NARCLIM_df.filter(regex= Min_dif_mod_name[:-5]) dfmax = NARCLIM_df.filter(regex= Max_dif_mod_name[:-5]) dfmed = NARCLIM_df.filter(regex= Max_dif_mod_name[:-5]) return dfall , dfmin, dfmed, dfmax, Min_dif_mod_name,Med_dif_mod_name, Max_dif_mod_name def calculate_deltas_NF_FF2(Annual_df, Seasonal_df): """calculates the "deltas" between nearfuture and present day for annual or seasonal climate data in pandas TS format""" times = ['annual', 'DJF', 'MAM', 'JJA','SON'] delta_all_df = pd.DataFrame() for temp in times: if temp == 'annual': Mean_df = Annual_df.mean() Column_names = ['near', 'far'] if temp == 'DJF': Mean_df = Seasonal_df[Seasonal_df.index.quarter==1].mean() Column_names = ['DJF_near', 'DJF_far'] if temp == 'MAM': Mean_df = Seasonal_df[Seasonal_df.index.quarter==2].mean() Column_names = ['MAM_near', 'MAM_far'] if temp == 'JJA': Mean_df = Seasonal_df[Seasonal_df.index.quarter==3].mean() Column_names = ['JJA_near', 'JJA_far'] if temp == 'SON': Mean_df = Seasonal_df[Seasonal_df.index.quarter==4].mean() Column_names = ['SON_near', 'SON_far'] models = list(Seasonal_df.mean().index) newmodel = [] type(newmodel) for each in models: newmodel.append(each[:-5]) unique_models = set(newmodel) # calculate diff for each unique model delta_NF_ensemble = [] delta_FF_ensemble = [] for unique_model in unique_models: dfdiff = Mean_df.filter(regex= unique_model) type(dfdiff) delta_NF = dfdiff[1] - dfdiff[0] delta_NF_ensemble.append(delta_NF) delta_FF = dfdiff[2] - dfdiff[1] delta_FF_ensemble.append(delta_FF) delta_df1=pd.DataFrame(delta_NF_ensemble, index=unique_models) delta_df2=pd.DataFrame(delta_FF_ensemble, index=unique_models) delta_df= pd.concat([delta_df1, delta_df2], axis=1) #rename columns delta_df.columns = Column_names #add a row with medians and 10 and 90th percentiles delta_df.loc['10th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 10), Column_names[1]:np.percentile(delta_df[Column_names[1]], 10)}) delta_df.loc['median'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 50), Column_names[1]:np.percentile(delta_df[Column_names[1]], 50)}) delta_df.loc['90th'] = pd.Series({Column_names[0]:np.percentile(delta_df[Column_names[0]], 90), Column_names[1]:np.percentile(delta_df[Column_names[1]], 90)}) #append df to overall df delta_all_df = pd.concat([delta_all_df, delta_df], axis=1) return delta_all_df