Simplified codebase

master
Per A Brodtkorb 8 years ago
parent 1bbf993e69
commit 2c5fdeda83

@ -143,6 +143,23 @@ def edfcnd(x, c=None, method=2):
return F return F
def _check_nmin(nmin, n):
nmin = max(nmin, 1)
if 2 * nmin > n:
warnings.warn('nmin possibly too large!')
return nmin
def _check_umin_umax(data, umin, umax, nmin):
sd = np.sort(data)
sdmax, sdmin = sd[-nmin], sd[0]
umax = sdmax if umax is None else min(umax, sdmax)
umin = sdmin if umin is None else max(umin, sdmin)
return umin, umax
def _check_nu(nu, nmin, n):
if nu is None:
nu = min(n - nmin, 100)
return nu
def reslife(data, u=None, umin=None, umax=None, nu=None, nmin=3, alpha=0.05, def reslife(data, u=None, umin=None, umax=None, nu=None, nmin=3, alpha=0.05,
plotflag=False): plotflag=False):
@ -198,27 +215,17 @@ def reslife(data, u=None, umin=None, umax=None, nu=None, nmin=3, alpha=0.05,
fitgenparrange, disprsnidx fitgenparrange, disprsnidx
""" """
if u is None: if u is None:
sd = np.sort(data)
n = len(data) n = len(data)
nmin = _check_nmin(nmin, n)
nmin = max(nmin, 0) umin, umax = _check_umin_umax(data, umin, umax, nmin)
if 2 * nmin > n: nu = _check_nu(nu, nmin, n)
warnings.warn('nmin possibly too large!')
sdmax, sdmin = sd[-nmin], sd[0]
umax = sdmax if umax is None else min(umax, sdmax)
umin = sdmin if umin is None else max(umin, sdmin)
if nu is None:
nu = min(n - nmin, 100)
u = linspace(umin, umax, nu) u = linspace(umin, umax, nu)
nu = len(u) nu = len(u)
#mrl1 = valarray(nu) # mrl1 = valarray(nu)
#srl = valarray(nu) # srl = valarray(nu)
#num = valarray(nu) # num = valarray(nu)
mean_and_std = lambda data1: (data1.mean(), data1.std(), data1.size) mean_and_std = lambda data1: (data1.mean(), data1.std(), data1.size)
dat = arr(data) dat = arr(data)
@ -229,13 +236,13 @@ def reslife(data, u=None, umin=None, umax=None, nu=None, nmin=3, alpha=0.05,
alpha2 = alpha / 2 alpha2 = alpha / 2
# Approximate P% confidence interval # Approximate P% confidence interval
#%Za = -invnorm(alpha2); % known mean # Za = -invnorm(alpha2); % known mean
Za = -_invt(alpha2, num - 1) # unknown mean Za = -_invt(alpha2, num - 1) # unknown mean
mrlu = mrl + Za * srl / sqrt(num) mrlu = mrl + Za * srl / sqrt(num)
mrll = mrl - Za * srl / sqrt(num) mrll = mrl - Za * srl / sqrt(num)
#options.CI = [mrll,mrlu]; # options.CI = [mrll,mrlu];
#options.numdata = num; # options.numdata = num;
titleTxt = 'Mean residual life with %d%s CI' % (100 * p, '%') titleTxt = 'Mean residual life with %d%s CI' % (100 * p, '%')
res = PlotData(mrl, u, xlab='Threshold', res = PlotData(mrl, u, xlab='Threshold',
ylab='Mean Excess', title=titleTxt) ylab='Mean Excess', title=titleTxt)
@ -330,6 +337,16 @@ def dispersion_idx(
partial duration series model. Water Resource Research, 15\bold{(2)} partial duration series model. Water Resource Research, 15\bold{(2)}
:489--494.} :489--494.}
""" """
def _find_appropriate_threshold(u, di, di_low, di_up):
k1, = np.where((di_low < di) & (di < di_up))
if len(k1) > 0:
ok_u = u[k1]
b_di = di[k1].mean() < di[k1]
k = b_di.argmax()
b_u = ok_u[k]
else:
b_u = ok_u = None
return b_u, ok_u
n = len(data) n = len(data)
if t is None: if t is None:
@ -341,19 +358,9 @@ def dispersion_idx(
t1[:] = np.floor(ti / tb) t1[:] = np.floor(ti / tb)
if u is None: if u is None:
sd = np.sort(data) nmin = _check_nmin(nmin, n)
umin, umax = _check_umin_umax(data, umin, umax, nmin)
nmin = max(nmin, 0) nu = _check_nu(nu, nmin, n)
if 2 * nmin > n:
warnings.warn('nmin possibly too large!')
sdmax, sdmin = sd[-nmin], sd[0]
umax = sdmax if umax is None else min(umax, sdmax)
umin = sdmin if umin is None else max(umin, sdmin)
if nu is None:
nu = min(n - nmin, 100)
u = linspace(umin, umax, nu) u = linspace(umin, umax, nu)
nu = len(u) nu = len(u)
@ -362,12 +369,12 @@ def dispersion_idx(
d = arr(data) d = arr(data)
mint = int(min(t1)) # ; % mint should be 0. mint = int(min(t1)) # should be 0.
maxt = int(max(t1)) maxt = int(max(t1))
M = maxt - mint + 1 M = maxt - mint + 1
occ = np.zeros(M) occ = np.zeros(M)
for ix, tresh in enumerate(u.tolist()): for ix, tresh in enumerate(u):
excess = (d > tresh) excess = (d > tresh)
lambda_ = excess.sum() / M lambda_ = excess.sum() / M
for block in range(M): for block in range(M):
@ -375,31 +382,23 @@ def dispersion_idx(
di[ix] = occ.var() / lambda_ di[ix] = occ.var() / lambda_
p = 1 - alpha p = 1.0 - alpha
diLo = _invchi2(1 - alpha / 2, M - 1) / (M - 1) di_low = _invchi2(1 - alpha / 2, M - 1) / (M - 1)
diUp = _invchi2(alpha / 2, M - 1) / (M - 1) di_up = _invchi2(alpha / 2, M - 1) / (M - 1)
# Find appropriate threshold b_u, ok_u = _find_appropriate_threshold(u, di, di_low, di_up)
k1, = np.where((diLo < di) & (di < diUp))
if len(k1) > 0:
ok_u = u[k1]
b_di = (di[k1].mean() < di[k1])
k = b_di.argmax()
b_u = ok_u[k]
else:
b_u = ok_u = None
CItxt = '%d%s CI' % (100 * p, '%') ci_txt = '{0:d}{1} CI'.format(100 * p, '%')
titleTxt = 'Dispersion Index plot' titleTxt = 'Dispersion Index plot'
res = PlotData(di, u, title=titleTxt, res = PlotData(di, u, title=titleTxt,
labx='Threshold', laby='Dispersion Index') labx='Threshold', laby='Dispersion Index')
#'caption',CItxt); #'caption',ci_txt);
res.workspace = dict(umin=umin, umax=umax, nu=nu, nmin=nmin, alpha=alpha) res.workspace = dict(umin=umin, umax=umax, nu=nu, nmin=nmin, alpha=alpha)
res.children = [ res.children = [
PlotData(vstack([diLo * ones(nu), diUp * ones(nu)]).T, u, PlotData(vstack([di_low * ones(nu), di_up * ones(nu)]).T, u,
xlab='Threshold', title=CItxt)] xlab='Threshold', title=ci_txt)]
res.plot_args_children = ['--r'] res.plot_args_children = ['--r']
if plotflag: if plotflag:
res.plot(di) res.plot(di)
@ -449,6 +448,27 @@ def decluster(data, t=None, thresh=None, tmin=1):
return data[i], t[i] return data[i], t[i]
def _remove_index_to_data_too_close_to_each_other(ix_e, is_too_small, di_e, ti_e, tmin):
is_too_close = np.hstack((is_too_small[0], is_too_small[:-1] | is_too_small[1:],
is_too_small[-1]))
# Find opening (no) and closing (nc) index for data beeing to close:
iy = findextrema(np.hstack([0, 0, is_too_small, 0]))
no = iy[:2] - 1
nc = iy[1::2]
for start, stop in zip(no, nc):
iz = slice(start, stop)
i_ok = _find_ok_peaks(di_e[iz], ti_e[iz], tmin)
if len(i_ok):
is_too_close[start + i_ok] = 0
# Remove data which is too close to other data.
if is_too_close.any():
i_ok, = where(1 - is_too_close)
ix_e = ix_e[i_ok]
return ix_e
def findpot(data, t=None, thresh=None, tmin=1): def findpot(data, t=None, thresh=None, tmin=1):
""" """
Retrun indices to Peaks over threshold values Retrun indices to Peaks over threshold values
@ -464,7 +484,7 @@ def findpot(data, t=None, thresh=None, tmin=1):
Returns Returns
------- -------
Ie : ndarray ix_e : ndarray
indices to extreme values, i.e., all data > tresh which are at least indices to extreme values, i.e., all data > tresh which are at least
tmin distance apart. tmin distance apart.
@ -479,10 +499,10 @@ def findpot(data, t=None, thresh=None, tmin=1):
>>> ytc, ttc = data[itc], t[itc] >>> ytc, ttc = data[itc], t[itc]
>>> ymin = 2*data.std() >>> ymin = 2*data.std()
>>> tmin = 10 # sec >>> tmin = 10 # sec
>>> I = findpot(data, t, ymin, tmin) >>> i = findpot(data, t, ymin, tmin)
>>> yp, tp = data[I], t[I] >>> yp, tp = data[i], t[i]
>>> Ie = findpot(yp, tp, ymin,tmin) >>> ix_e = findpot(yp, tp, ymin,tmin)
>>> ye, te = yp[Ie], tp[Ie] >>> ye, te = yp[ix_e], tp[ix_e]
>>> h = pylab.plot(t,data,ttc,ytc,'ro', >>> h = pylab.plot(t,data,ttc,ytc,'ro',
... t,zeros(len(t)),':', ... t,zeros(len(t)),':',
... te, ye,'k.',tp,yp,'+') ... te, ye,'k.',tp,yp,'+')
@ -491,51 +511,33 @@ def findpot(data, t=None, thresh=None, tmin=1):
-------- --------
fitgenpar, decluster, extremalidx fitgenpar, decluster, extremalidx
""" """
Data = arr(data) data = arr(data)
if t is None: if t is None:
ti = np.arange(len(Data)) t = np.arange(len(data))
else: else:
ti = arr(t) t = arr(t)
Ie, = where(Data > thresh)
Ye = Data[Ie]
Te = ti[Ie]
if len(Ye) <= 1:
return Ie
dT = np.diff(Te)
notSorted = np.any(dT < 0)
if notSorted:
I = np.argsort(Te)
Te = Te[I]
Ie = Ie[I]
Ye = Ye[I]
dT = np.diff(Te)
isTooSmall = (dT <= tmin) ix_e, = where(data > thresh)
di_e = data[ix_e]
ti_e = t[ix_e]
if len(di_e) <= 1:
return ix_e
if np.any(isTooSmall): dt = np.diff(ti_e)
isTooClose = np.hstack( not_sorted = np.any(dt < 0)
(isTooSmall[0], isTooSmall[:-1] | isTooSmall[1:], isTooSmall[-1])) if not_sorted:
i = np.argsort(ti_e)
ti_e = ti_e[i]
ix_e = ix_e[i]
di_e = di_e[i]
dt = np.diff(ti_e)
# Find opening (NO) and closing (NC) index for data beeing to close: is_too_small = (dt <= tmin)
iy = findextrema(np.hstack([0, 0, isTooSmall, 0]))
NO = iy[::2] - 1 if np.any(is_too_small):
NC = iy[1::2] ix_e = _remove_index_to_data_too_close_to_each_other(ix_e, is_too_small, di_e, ti_e, tmin)
for no, nc in zip(NO, NC): return ix_e
iz = slice(no, nc)
iOK = _find_ok_peaks(Ye[iz], Te[iz], tmin)
if len(iOK):
isTooClose[no + iOK] = 0
# Remove data which is too close to other data.
if isTooClose.any():
# len(tooClose)>0:
iOK, = where(1 - isTooClose)
Ie = Ie[iOK]
return Ie
def _find_ok_peaks(y, t, t_min): def _find_ok_peaks(y, t, t_min):
@ -874,9 +876,9 @@ class RegLogit(object):
(y * 0 + 1) * np.arange(ymin + 1, ymax + 1)) (y * 0 + 1) * np.arange(ymin + 1, ymax + 1))
z = z[:, np.flatnonzero(z.any(axis=0))] z = z[:, np.flatnonzero(z.any(axis=0))]
z1 = z1[:, np.flatnonzero(z1.any(axis=0))] z1 = z1[:, np.flatnonzero(z1.any(axis=0))]
[_mz, nz] = z.shape _mz, nz = z.shape
[_mx, nx] = X.shape _mx, nx = X.shape
[my, _ny] = y.shape my, _ny = y.shape
g = (z.sum(axis=0).cumsum() / my).reshape(-1, 1) g = (z.sum(axis=0).cumsum() / my).reshape(-1, 1)
theta00 = np.log(g / (1 - g)).ravel() theta00 = np.log(g / (1 - g)).ravel()
@ -979,7 +981,7 @@ class RegLogit(object):
self.params_std = se self.params_std = se
self.params_cov = pcov self.params_cov = pcov
self.params_tstat = (self.params / self.params_std) self.params_tstat = (self.params / self.params_std)
# % options.estdispersn %dispersion_parameter=='mean_deviance' # options.estdispersn dispersion_parameter=='mean_deviance'
if False: if False:
self.params_pvalue = 2. * _cdft(-abs(self.params_tstat), self.df) self.params_pvalue = 2. * _cdft(-abs(self.params_tstat), self.df)
bcrit = -se * _invt(self.alpha / 2, self.df) bcrit = -se * _invt(self.alpha / 2, self.df)
@ -1281,7 +1283,6 @@ def _test_dispersion_idx():
di, _u, _ok_u = dispersion_idx(data[Ie], t[Ie], tb=100) di, _u, _ok_u = dispersion_idx(data[Ie], t[Ie], tb=100)
di.plot() # a threshold around 1 seems appropriate. di.plot() # a threshold around 1 seems appropriate.
di.show() di.show()
pass
def _test_findpot(): def _test_findpot():

Loading…
Cancel
Save