You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1218 lines
41 KiB
PyTho
1218 lines
41 KiB
PyTho
11 years ago
|
'''
|
||
15 years ago
|
Contains FitDistribution and Profile class, which are
|
||
15 years ago
|
|
||
11 years ago
|
important classes for fitting to various Continous and Discrete Probability
|
||
|
Distributions
|
||
15 years ago
|
|
||
|
Author: Per A. Brodtkorb 2008
|
||
|
'''
|
||
15 years ago
|
|
||
10 years ago
|
from __future__ import division, absolute_import
|
||
14 years ago
|
import warnings
|
||
10 years ago
|
|
||
|
from ..plotbackend import plotbackend
|
||
|
from ..misc import ecross, findcross
|
||
14 years ago
|
|
||
15 years ago
|
|
||
11 years ago
|
import numdifftools # @UnresolvedImport
|
||
15 years ago
|
from scipy import special
|
||
15 years ago
|
from scipy.linalg import pinv2
|
||
|
from scipy import optimize
|
||
15 years ago
|
|
||
15 years ago
|
import numpy
|
||
|
import numpy as np
|
||
14 years ago
|
from numpy import alltrue, arange, ravel, sum, zeros, log, sqrt, exp
|
||
11 years ago
|
from numpy import (
|
||
|
atleast_1d, any, asarray, nan, pi, # reshape, #repeat, product, ndarray,
|
||
|
isfinite)
|
||
15 years ago
|
from numpy import flatnonzero as nonzero
|
||
|
|
||
|
|
||
10 years ago
|
__all__ = ['Profile', 'FitDistribution']
|
||
15 years ago
|
|
||
|
floatinfo = np.finfo(float)
|
||
10 years ago
|
|
||
15 years ago
|
arr = asarray
|
||
11 years ago
|
all = alltrue # @ReservedAssignment
|
||
|
|
||
15 years ago
|
|
||
15 years ago
|
def chi2isf(p, df):
|
||
15 years ago
|
return special.chdtri(df, p)
|
||
15 years ago
|
|
||
11 years ago
|
|
||
15 years ago
|
def chi2sf(x, df):
|
||
|
return special.chdtrc(df, x)
|
||
|
|
||
11 years ago
|
|
||
15 years ago
|
def norm_ppf(q):
|
||
|
return special.ndtri(q)
|
||
|
|
||
15 years ago
|
|
||
|
# Frozen RV class
|
||
|
class rv_frozen(object):
|
||
11 years ago
|
|
||
15 years ago
|
''' Frozen continous or discrete 1D Random Variable object (RV)
|
||
11 years ago
|
|
||
14 years ago
|
Methods
|
||
|
-------
|
||
|
rvs(size=1)
|
||
|
Random variates.
|
||
|
pdf(x)
|
||
|
Probability density function.
|
||
|
cdf(x)
|
||
|
Cumulative density function.
|
||
|
sf(x)
|
||
|
Survival function (1-cdf --- sometimes more accurate).
|
||
|
ppf(q)
|
||
|
Percent point function (inverse of cdf --- percentiles).
|
||
|
isf(q)
|
||
|
Inverse survival function (inverse of sf).
|
||
|
stats(moments='mv')
|
||
|
Mean('m'), variance('v'), skew('s'), and/or kurtosis('k').
|
||
|
entropy()
|
||
|
(Differential) entropy of the RV.
|
||
15 years ago
|
'''
|
||
11 years ago
|
|
||
15 years ago
|
def __init__(self, dist, *args, **kwds):
|
||
|
self.dist = dist
|
||
11 years ago
|
args, loc, scale = dist._parse_args(*args, **kwds)
|
||
10 years ago
|
if len(args) == dist.numargs - 2: #
|
||
|
# if isinstance(dist, rv_continuous):
|
||
11 years ago
|
self.par = args + (loc, scale)
|
||
|
else: # rv_discrete
|
||
|
self.par = args + (loc,)
|
||
15 years ago
|
|
||
15 years ago
|
def pdf(self, x):
|
||
15 years ago
|
''' Probability density function at x of the given RV.'''
|
||
15 years ago
|
return self.dist.pdf(x, *self.par)
|
||
11 years ago
|
|
||
15 years ago
|
def cdf(self, x):
|
||
15 years ago
|
'''Cumulative distribution function at x of the given RV.'''
|
||
15 years ago
|
return self.dist.cdf(x, *self.par)
|
||
11 years ago
|
|
||
15 years ago
|
def ppf(self, q):
|
||
15 years ago
|
'''Percent point function (inverse of cdf) at q of the given RV.'''
|
||
15 years ago
|
return self.dist.ppf(q, *self.par)
|
||
11 years ago
|
|
||
15 years ago
|
def isf(self, q):
|
||
15 years ago
|
'''Inverse survival function at q of the given RV.'''
|
||
15 years ago
|
return self.dist.isf(q, *self.par)
|
||
11 years ago
|
|
||
15 years ago
|
def rvs(self, size=None):
|
||
|
'''Random variates of given type.'''
|
||
|
kwds = dict(size=size)
|
||
15 years ago
|
return self.dist.rvs(*self.par, **kwds)
|
||
11 years ago
|
|
||
15 years ago
|
def sf(self, x):
|
||
15 years ago
|
'''Survival function (1-cdf) at x of the given RV.'''
|
||
15 years ago
|
return self.dist.sf(x, *self.par)
|
||
11 years ago
|
|
||
15 years ago
|
def stats(self, moments='mv'):
|
||
15 years ago
|
''' Some statistics of the given RV'''
|
||
|
kwds = dict(moments=moments)
|
||
15 years ago
|
return self.dist.stats(*self.par, **kwds)
|
||
11 years ago
|
|
||
15 years ago
|
def median(self):
|
||
15 years ago
|
return self.dist.median(*self.par)
|
||
11 years ago
|
|
||
15 years ago
|
def mean(self):
|
||
15 years ago
|
return self.dist.mean(*self.par)
|
||
11 years ago
|
|
||
15 years ago
|
def var(self):
|
||
15 years ago
|
return self.dist.var(*self.par)
|
||
11 years ago
|
|
||
15 years ago
|
def std(self):
|
||
15 years ago
|
return self.dist.std(*self.par)
|
||
11 years ago
|
|
||
15 years ago
|
def moment(self, n):
|
||
15 years ago
|
par1 = self.par[:self.dist.numargs]
|
||
15 years ago
|
return self.dist.moment(n, *par1)
|
||
11 years ago
|
|
||
15 years ago
|
def entropy(self):
|
||
|
return self.dist.entropy(*self.par)
|
||
11 years ago
|
|
||
15 years ago
|
def pmf(self, k):
|
||
15 years ago
|
'''Probability mass function at k of the given RV'''
|
||
15 years ago
|
return self.dist.pmf(k, *self.par)
|
||
11 years ago
|
|
||
|
def interval(self, alpha):
|
||
15 years ago
|
return self.dist.interval(alpha, *self.par)
|
||
15 years ago
|
|
||
|
|
||
|
# internal class to profile parameters of a given distribution
|
||
|
class Profile(object):
|
||
11 years ago
|
|
||
15 years ago
|
''' Profile Log- likelihood or Product Spacing-function.
|
||
|
which can be used for constructing confidence interval for
|
||
14 years ago
|
either phat[i], probability or quantile.
|
||
11 years ago
|
|
||
15 years ago
|
Parameters
|
||
|
----------
|
||
11 years ago
|
fit_dist : FitDistribution object
|
||
14 years ago
|
with ML or MPS estimated distribution parameters.
|
||
15 years ago
|
**kwds : named arguments with keys
|
||
11 years ago
|
i : scalar integer
|
||
10 years ago
|
defining which distribution parameter to keep fixed in the
|
||
11 years ago
|
profiling process (default first non-fixed parameter)
|
||
14 years ago
|
pmin, pmax : real scalars
|
||
11 years ago
|
Interval for either the parameter, phat(i), prb, or x, used in the
|
||
|
optimization of the profile function (default is based on the
|
||
|
100*(1-alpha)% confidence interval computed with the delta method.)
|
||
14 years ago
|
N : scalar integer
|
||
|
Max number of points used in Lp (default 100)
|
||
|
x : real scalar
|
||
|
Quantile (return value) (default None)
|
||
|
logSF : real scalar
|
||
|
log survival probability,i.e., SF = Prob(X>x;phat) (default None)
|
||
11 years ago
|
link : function connecting the x-quantile and the survival probability
|
||
|
(SF) with the fixed distribution parameter, i.e.:
|
||
11 years ago
|
self.par[i] = link(x, logSF, self.par, i), where
|
||
|
logSF = log(Prob(X>x;phat)).
|
||
14 years ago
|
This means that if:
|
||
|
1) x is not None then x is profiled
|
||
|
2) logSF is not None then logSF is profiled
|
||
11 years ago
|
3) x and logSF are None then self.par[i] is profiled (default)
|
||
|
alpha : real scalar
|
||
14 years ago
|
confidence coefficent (default 0.05)
|
||
15 years ago
|
Returns
|
||
|
-------
|
||
|
Lp : Profile log-likelihood function with parameters phat given
|
||
14 years ago
|
the data, phat(i), probability (prb) and quantile (x) (if given), i.e.,
|
||
|
Lp = max(log(f(phat|data,phat(i)))),
|
||
|
or
|
||
|
Lp = max(log(f(phat|data,phat(i),x,prb)))
|
||
11 years ago
|
|
||
15 years ago
|
Member methods
|
||
14 years ago
|
-------------
|
||
|
plot() : Plot profile function with 100(1-alpha)% confidence interval
|
||
|
get_bounds() : Return 100(1-alpha)% confidence interval
|
||
15 years ago
|
|
||
|
Member variables
|
||
14 years ago
|
----------------
|
||
|
fit_dist : FitDistribution data object.
|
||
|
data : profile function values
|
||
|
args : profile function arguments
|
||
|
alpha : confidence coefficient
|
||
|
Lmax : Maximum value of profile function
|
||
|
alpha_cross_level :
|
||
15 years ago
|
|
||
|
PROFILE is a utility function for making inferences either on a particular
|
||
|
component of the vector phat or the quantile, x, or the probability, SF.
|
||
|
This is usually more accurate than using the delta method assuming
|
||
|
asymptotic normality of the ML estimator or the MPS estimator.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
11 years ago
|
# MLE
|
||
14 years ago
|
>>> import wafo.stats as ws
|
||
|
>>> R = ws.weibull_min.rvs(1,size=100);
|
||
|
>>> phat = FitDistribution(ws.weibull_min, R, 1, scale=1, floc=0.0)
|
||
11 years ago
|
|
||
14 years ago
|
# Better CI for phat.par[i=0]
|
||
14 years ago
|
>>> Lp = Profile(phat, i=0)
|
||
15 years ago
|
>>> Lp.plot()
|
||
14 years ago
|
>>> phat_ci = Lp.get_bounds(alpha=0.1)
|
||
11 years ago
|
|
||
15 years ago
|
>>> SF = 1./990
|
||
|
>>> x = phat.isf(SF)
|
||
|
|
||
|
# CI for x
|
||
14 years ago
|
>>> Lx = phat.profile(i=0, x=x, link=phat.dist.link)
|
||
15 years ago
|
>>> Lx.plot()
|
||
14 years ago
|
>>> x_ci = Lx.get_bounds(alpha=0.2)
|
||
15 years ago
|
|
||
|
# CI for logSF=log(SF)
|
||
14 years ago
|
>>> Lsf = phat.profile(i=0, logSF=log(SF), link=phat.dist.link)
|
||
|
>>> Lsf.plot()
|
||
|
>>> sf_ci = Lsf.get_bounds(alpha=0.2)
|
||
15 years ago
|
'''
|
||
12 years ago
|
|
||
15 years ago
|
def __init__(self, fit_dist, **kwds):
|
||
11 years ago
|
|
||
14 years ago
|
try:
|
||
|
i0 = (1 - numpy.isfinite(fit_dist.par_fix)).argmax()
|
||
|
except:
|
||
|
i0 = 0
|
||
15 years ago
|
self.fit_dist = fit_dist
|
||
|
self.data = None
|
||
|
self.args = None
|
||
11 years ago
|
self.title = ''
|
||
15 years ago
|
self.xlabel = ''
|
||
11 years ago
|
self.ylabel = 'Profile log'
|
||
11 years ago
|
(self.i_fixed, self.N, self.alpha, self.pmin, self.pmax, self.x,
|
||
|
self.logSF, self.link) = map(
|
||
|
kwds.get,
|
||
11 years ago
|
['i', 'N', 'alpha', 'pmin', 'pmax', 'x', 'logSF', 'link'],
|
||
11 years ago
|
[i0, 100, 0.05, None, None, None, None, None])
|
||
15 years ago
|
|
||
11 years ago
|
self.title = '%g%s CI' % (100 * (1.0 - self.alpha), '%')
|
||
15 years ago
|
if fit_dist.method.startswith('ml'):
|
||
11 years ago
|
self.ylabel = self.ylabel + 'likelihood'
|
||
15 years ago
|
Lmax = fit_dist.LLmax
|
||
|
elif fit_dist.method.startswith('mps'):
|
||
11 years ago
|
self.ylabel = self.ylabel + ' product spacing'
|
||
15 years ago
|
Lmax = fit_dist.LPSmax
|
||
|
else:
|
||
11 years ago
|
raise ValueError(
|
||
|
"PROFILE is only valid for ML- or MPS- estimators")
|
||
|
|
||
12 years ago
|
if fit_dist.par_fix is None:
|
||
14 years ago
|
isnotfixed = np.ones(fit_dist.par.shape, dtype=bool)
|
||
15 years ago
|
else:
|
||
15 years ago
|
isnotfixed = 1 - numpy.isfinite(fit_dist.par_fix)
|
||
15 years ago
|
|
||
|
self.i_notfixed = nonzero(isnotfixed)
|
||
|
|
||
|
self.i_fixed = atleast_1d(self.i_fixed)
|
||
|
|
||
15 years ago
|
if 1 - isnotfixed[self.i_fixed]:
|
||
11 years ago
|
raise IndexError(
|
||
11 years ago
|
"Index i must be equal to an index to one of the free " +
|
||
|
"parameters.")
|
||
15 years ago
|
|
||
|
isfree = isnotfixed
|
||
|
isfree[self.i_fixed] = False
|
||
|
self.i_free = nonzero(isfree)
|
||
|
|
||
|
self.Lmax = Lmax
|
||
11 years ago
|
self.alpha_Lrange = 0.5 * chi2isf(self.alpha, 1)
|
||
15 years ago
|
self.alpha_cross_level = Lmax - self.alpha_Lrange
|
||
11 years ago
|
# lowLevel = self.alpha_cross_level - self.alpha_Lrange / 7.0
|
||
15 years ago
|
|
||
|
phatv = fit_dist.par.copy()
|
||
|
self._par = phatv.copy()
|
||
11 years ago
|
|
||
|
# Set up variable to profile and _local_link function
|
||
10 years ago
|
self.profile_x = self.x is not None
|
||
|
self.profile_logSF = not (self.logSF is None or self.profile_x)
|
||
15 years ago
|
self.profile_par = not (self.profile_x or self.profile_logSF)
|
||
|
|
||
10 years ago
|
if self.link is None:
|
||
15 years ago
|
self.link = self.fit_dist.dist.link
|
||
|
if self.profile_par:
|
||
10 years ago
|
self._local_link = self._par_link
|
||
15 years ago
|
self.xlabel = 'phat(%d)' % self.i_fixed
|
||
15 years ago
|
p_opt = self._par[self.i_fixed]
|
||
|
elif self.profile_x:
|
||
11 years ago
|
self.logSF = fit_dist.logsf(self.x)
|
||
10 years ago
|
self._local_link = self._x_link
|
||
15 years ago
|
self.xlabel = 'x'
|
||
|
p_opt = self.x
|
||
|
elif self.profile_logSF:
|
||
|
p_opt = self.logSF
|
||
|
self.x = fit_dist.isf(exp(p_opt))
|
||
10 years ago
|
self._local_link = self._logSF_link
|
||
14 years ago
|
self.xlabel = 'log(SF)'
|
||
15 years ago
|
else:
|
||
11 years ago
|
raise ValueError(
|
||
|
"You must supply a non-empty quantile (x) or probability " +
|
||
|
"(logSF) in order to profile it!")
|
||
15 years ago
|
|
||
|
self.xlabel = self.xlabel + ' (' + fit_dist.dist.name + ')'
|
||
|
|
||
11 years ago
|
phatfree = phatv[self.i_free].copy()
|
||
|
self._set_profile(phatfree, p_opt)
|
||
15 years ago
|
|
||
10 years ago
|
def _par_link(self, fix_par, par):
|
||
|
return fix_par
|
||
|
|
||
|
def _x_link(self, fix_par, par):
|
||
|
return self.link(fix_par, self.logSF, par, self.i_fixed)
|
||
|
|
||
|
def _logSF_link(self, fix_par, par):
|
||
|
return self.link(self.x, fix_par, par, self.i_fixed)
|
||
|
|
||
12 years ago
|
def _correct_Lmax(self, Lmax):
|
||
11 years ago
|
if Lmax > self.Lmax: # foundNewphat = True
|
||
|
warnings.warn(
|
||
|
'The fitted parameters does not provide the optimum fit. ' +
|
||
|
'Something wrong with fit')
|
||
12 years ago
|
dL = self.Lmax - Lmax
|
||
|
self.alpha_cross_level -= dL
|
||
|
self.Lmax = Lmax
|
||
|
|
||
|
def _profile_optimum(self, phatfree0, p_opt):
|
||
11 years ago
|
phatfree = optimize.fmin(
|
||
|
self._profile_fun, phatfree0, args=(p_opt,), disp=0)
|
||
12 years ago
|
Lmax = -self._profile_fun(phatfree, p_opt)
|
||
|
self._correct_Lmax(Lmax)
|
||
|
return Lmax, phatfree
|
||
|
|
||
|
def _set_profile(self, phatfree0, p_opt):
|
||
|
pvec = self._get_pvec(phatfree0, p_opt)
|
||
11 years ago
|
|
||
12 years ago
|
self.data = numpy.ones_like(pvec) * nan
|
||
|
k1 = (pvec >= p_opt).argmax()
|
||
11 years ago
|
|
||
|
for size, step in ((-1, -1), (pvec.size, 1)):
|
||
12 years ago
|
phatfree = phatfree0.copy()
|
||
|
for ix in xrange(k1, size, step):
|
||
|
Lmax, phatfree = self._profile_optimum(phatfree, pvec[ix])
|
||
|
self.data[ix] = Lmax
|
||
11 years ago
|
if self.data[ix] < self.alpha_cross_level:
|
||
12 years ago
|
break
|
||
|
np.putmask(pvec, np.isnan(self.data), nan)
|
||
|
self.args = pvec
|
||
11 years ago
|
|
||
12 years ago
|
self._prettify_profile()
|
||
11 years ago
|
|
||
12 years ago
|
def _prettify_profile(self):
|
||
|
pvec = self.args
|
||
15 years ago
|
ix = nonzero(numpy.isfinite(pvec))
|
||
|
self.data = self.data[ix]
|
||
|
self.args = pvec[ix]
|
||
15 years ago
|
cond = self.data == -numpy.inf
|
||
15 years ago
|
if any(cond):
|
||
|
ind, = cond.nonzero()
|
||
15 years ago
|
self.data.put(ind, floatinfo.min / 2.0)
|
||
15 years ago
|
ind1 = numpy.where(ind == 0, ind, ind - 1)
|
||
|
cl = self.alpha_cross_level - self.alpha_Lrange / 2.0
|
||
|
t0 = ecross(self.args, self.data, ind1, cl)
|
||
|
self.data.put(ind, cl)
|
||
|
self.args.put(ind, t0)
|
||
11 years ago
|
|
||
12 years ago
|
def _get_variance(self):
|
||
|
if self.profile_par:
|
||
|
pvar = self.fit_dist.par_cov[self.i_fixed, :][:, self.i_fixed]
|
||
|
else:
|
||
|
i_notfixed = self.i_notfixed
|
||
|
phatv = self._par
|
||
15 years ago
|
|
||
12 years ago
|
if self.profile_x:
|
||
|
gradfun = numdifftools.Gradient(self._myinvfun)
|
||
|
else:
|
||
|
gradfun = numdifftools.Gradient(self._myprbfun)
|
||
|
drl = gradfun(phatv[self.i_notfixed])
|
||
15 years ago
|
|
||
12 years ago
|
pcov = self.fit_dist.par_cov[i_notfixed, :][:, i_notfixed]
|
||
|
pvar = sum(numpy.dot(drl, pcov) * drl)
|
||
|
return pvar
|
||
11 years ago
|
|
||
12 years ago
|
def _get_pvec(self, phatfree0, p_opt):
|
||
15 years ago
|
''' return proper interval for the variable to profile
|
||
|
'''
|
||
|
|
||
|
linspace = numpy.linspace
|
||
10 years ago
|
if self.pmin is None or self.pmax is None:
|
||
15 years ago
|
|
||
12 years ago
|
pvar = self._get_variance()
|
||
11 years ago
|
|
||
|
if pvar <= 1e-5 or numpy.isnan(pvar):
|
||
|
pvar = max(abs(p_opt) * 0.5, 0.5)
|
||
|
|
||
|
p_crit = (-norm_ppf(self.alpha / 2.0) *
|
||
|
sqrt(numpy.ravel(pvar)) * 1.5)
|
||
10 years ago
|
if self.pmin is None:
|
||
11 years ago
|
self.pmin = self._search_pmin(phatfree0,
|
||
|
p_opt - 5.0 * p_crit, p_opt)
|
||
|
p_crit_low = (p_opt - self.pmin) / 5
|
||
|
|
||
10 years ago
|
if self.pmax is None:
|
||
11 years ago
|
self.pmax = self._search_pmax(phatfree0,
|
||
|
p_opt + 5.0 * p_crit, p_opt)
|
||
|
p_crit_up = (self.pmax - p_opt) / 5
|
||
|
|
||
15 years ago
|
N4 = numpy.floor(self.N / 4.0)
|
||
15 years ago
|
|
||
12 years ago
|
pvec1 = linspace(self.pmin, p_opt - p_crit_low, N4 + 1)
|
||
11 years ago
|
pvec2 = linspace(
|
||
|
p_opt - p_crit_low, p_opt + p_crit_up, self.N - 2 * N4)
|
||
12 years ago
|
pvec3 = linspace(p_opt + p_crit_up, self.pmax, N4 + 1)
|
||
15 years ago
|
pvec = numpy.unique(numpy.hstack((pvec1, p_opt, pvec2, pvec3)))
|
||
15 years ago
|
|
||
|
else:
|
||
15 years ago
|
pvec = linspace(self.pmin, self.pmax, self.N)
|
||
15 years ago
|
return pvec
|
||
11 years ago
|
|
||
12 years ago
|
def _search_pmin(self, phatfree0, p_min0, p_opt):
|
||
|
phatfree = phatfree0.copy()
|
||
11 years ago
|
|
||
|
dp = p_opt - p_min0
|
||
|
if dp < 1e-2:
|
||
12 years ago
|
dp = 0.1
|
||
|
p_min_opt = p_min0
|
||
|
Lmax, phatfree = self._profile_optimum(phatfree, p_opt)
|
||
|
for _j in range(50):
|
||
|
p_min = p_opt - dp
|
||
|
Lmax, phatfree = self._profile_optimum(phatfree, p_min)
|
||
|
if np.isnan(Lmax):
|
||
|
dp *= 0.33
|
||
11 years ago
|
elif Lmax < self.alpha_cross_level - self.alpha_Lrange * 2:
|
||
12 years ago
|
p_min_opt = p_min
|
||
|
dp *= 0.33
|
||
11 years ago
|
elif Lmax < self.alpha_cross_level:
|
||
12 years ago
|
p_min_opt = p_min
|
||
|
break
|
||
|
else:
|
||
|
dp *= 1.67
|
||
|
return p_min_opt
|
||
11 years ago
|
|
||
12 years ago
|
def _search_pmax(self, phatfree0, p_max0, p_opt):
|
||
|
phatfree = phatfree0.copy()
|
||
11 years ago
|
|
||
|
dp = p_max0 - p_opt
|
||
|
if dp < 1e-2:
|
||
12 years ago
|
dp = 0.1
|
||
|
p_max_opt = p_max0
|
||
|
Lmax, phatfree = self._profile_optimum(phatfree, p_opt)
|
||
|
for _j in range(50):
|
||
|
p_max = p_opt + dp
|
||
|
Lmax, phatfree = self._profile_optimum(phatfree, p_max)
|
||
|
if np.isnan(Lmax):
|
||
|
dp *= 0.33
|
||
11 years ago
|
elif Lmax < self.alpha_cross_level - self.alpha_Lrange * 2:
|
||
12 years ago
|
p_max_opt = p_max
|
||
|
dp *= 0.33
|
||
11 years ago
|
elif Lmax < self.alpha_cross_level:
|
||
12 years ago
|
p_max_opt = p_max
|
||
|
break
|
||
|
else:
|
||
|
dp *= 1.67
|
||
|
return p_max_opt
|
||
11 years ago
|
|
||
|
def _myinvfun(self, phatnotfixed):
|
||
15 years ago
|
mphat = self._par.copy()
|
||
11 years ago
|
mphat[self.i_notfixed] = phatnotfixed
|
||
15 years ago
|
prb = exp(self.logSF)
|
||
11 years ago
|
return self.fit_dist.dist.isf(prb, *mphat)
|
||
15 years ago
|
|
||
15 years ago
|
def _myprbfun(self, phatnotfixed):
|
||
15 years ago
|
mphat = self._par.copy()
|
||
14 years ago
|
mphat[self.i_notfixed] = phatnotfixed
|
||
11 years ago
|
logSF = self.fit_dist.dist.logsf(self.x, *mphat)
|
||
14 years ago
|
return np.where(np.isfinite(logSF), logSF, np.nan)
|
||
15 years ago
|
|
||
12 years ago
|
def _profile_fun(self, free_par, fix_par):
|
||
15 years ago
|
''' Return negative of loglike or logps function
|
||
|
|
||
|
free_par - vector of free parameters
|
||
|
fix_par - fixed parameter, i.e., either quantile (return level),
|
||
|
probability (return period) or distribution parameter
|
||
|
'''
|
||
14 years ago
|
par = self._par.copy()
|
||
15 years ago
|
par[self.i_free] = free_par
|
||
11 years ago
|
# _local_link: connects fixed quantile or probability with fixed
|
||
|
# distribution parameter
|
||
15 years ago
|
par[self.i_fixed] = self._local_link(fix_par, par)
|
||
15 years ago
|
return self.fit_dist.fitfun(par)
|
||
|
|
||
14 years ago
|
def get_bounds(self, alpha=0.05):
|
||
12 years ago
|
'''Return confidence interval for profiled parameter
|
||
15 years ago
|
'''
|
||
15 years ago
|
if alpha < self.alpha:
|
||
11 years ago
|
warnings.warn(
|
||
|
'Might not be able to return CI with alpha less than %g' %
|
||
|
self.alpha)
|
||
|
cross_level = self.Lmax - 0.5 * chi2isf(alpha, 1)
|
||
15 years ago
|
ind = findcross(self.data, cross_level)
|
||
15 years ago
|
N = len(ind)
|
||
15 years ago
|
if N == 0:
|
||
11 years ago
|
warnings.warn('''Number of crossings is zero, i.e.,
|
||
14 years ago
|
upper and lower bound is not found!''')
|
||
15 years ago
|
CI = (self.pmin, self.pmax)
|
||
11 years ago
|
|
||
15 years ago
|
elif N == 1:
|
||
|
x0 = ecross(self.args, self.data, ind, cross_level)
|
||
|
isUpcrossing = self.data[ind] > self.data[ind + 1]
|
||
15 years ago
|
if isUpcrossing:
|
||
15 years ago
|
CI = (x0, self.pmax)
|
||
14 years ago
|
warnings.warn('Upper bound is larger')
|
||
15 years ago
|
else:
|
||
15 years ago
|
CI = (self.pmin, x0)
|
||
14 years ago
|
warnings.warn('Lower bound is smaller')
|
||
15 years ago
|
|
||
15 years ago
|
elif N == 2:
|
||
|
CI = ecross(self.args, self.data, ind, cross_level)
|
||
15 years ago
|
else:
|
||
14 years ago
|
warnings.warn('Number of crossings too large! Something is wrong!')
|
||
15 years ago
|
CI = ecross(self.args, self.data, ind[[0, -1]], cross_level)
|
||
15 years ago
|
return CI
|
||
|
|
||
11 years ago
|
def plot(self, axis=None):
|
||
15 years ago
|
''' Plot profile function with 100(1-alpha)% CI
|
||
|
'''
|
||
11 years ago
|
if axis is None:
|
||
|
axis = plotbackend.gca()
|
||
10 years ago
|
|
||
11 years ago
|
p_ci = self.get_bounds(self.alpha)
|
||
|
axis.plot(
|
||
11 years ago
|
self.args, self.data,
|
||
11 years ago
|
self.args[[0, -1]], [self.Lmax, ] * 2, 'r--',
|
||
|
self.args[[0, -1]], [self.alpha_cross_level, ] * 2, 'r--')
|
||
|
axis.vlines(p_ci, ymin=axis.get_ylim()[0],
|
||
10 years ago
|
ymax=self.Lmax, # self.alpha_cross_level,
|
||
|
color='r', linestyles='--')
|
||
11 years ago
|
axis.set_title(self.title)
|
||
|
axis.set_ylabel(self.ylabel)
|
||
|
axis.set_xlabel(self.xlabel)
|
||
11 years ago
|
|
||
|
|
||
15 years ago
|
class FitDistribution(rv_frozen):
|
||
11 years ago
|
|
||
14 years ago
|
'''
|
||
|
Return estimators to shape, location, and scale from data
|
||
15 years ago
|
|
||
14 years ago
|
Starting points for the fit are given by input arguments. For any
|
||
|
arguments not given starting points, dist._fitstart(data) is called
|
||
|
to get the starting estimates.
|
||
11 years ago
|
|
||
14 years ago
|
You can hold some parameters fixed to specific values by passing in
|
||
|
keyword arguments f0..fn for shape paramters and floc, fscale for
|
||
|
location and scale parameters.
|
||
11 years ago
|
|
||
14 years ago
|
Parameters
|
||
|
----------
|
||
|
dist : scipy distribution object
|
||
|
distribution to fit to data
|
||
|
data : array-like
|
||
|
Data to use in calculating the ML or MPS estimators
|
||
|
args : optional
|
||
|
Starting values for any shape arguments (those not specified
|
||
14 years ago
|
will be determined by dist._fitstart(data))
|
||
14 years ago
|
kwds : loc, scale
|
||
|
Starting values for the location and scale parameters
|
||
|
Special keyword arguments are recognized as holding certain
|
||
|
parameters fixed:
|
||
|
f0..fn : hold respective shape paramters fixed
|
||
|
floc : hold location parameter fixed to specified value
|
||
|
fscale : hold scale parameter fixed to specified value
|
||
|
method : of estimation. Options are
|
||
|
'ml' : Maximum Likelihood method (default)
|
||
|
'mps': Maximum Product Spacing method
|
||
|
alpha : scalar, optional
|
||
|
Confidence coefficent (default=0.05)
|
||
|
search : bool
|
||
|
If true search for best estimator (default),
|
||
|
otherwise return object with initial distribution parameters
|
||
|
copydata : bool
|
||
|
If true copydata (default)
|
||
|
optimizer : The optimizer to use. The optimizer must take func,
|
||
|
and starting position as the first two arguments,
|
||
|
plus args (for extra arguments to pass to the
|
||
|
function to be optimized) and disp=0 to suppress
|
||
|
output as keyword arguments.
|
||
11 years ago
|
|
||
14 years ago
|
Return
|
||
|
------
|
||
|
phat : FitDistribution object
|
||
|
Fitted distribution object with following member variables:
|
||
|
LLmax : loglikelihood function evaluated using par
|
||
|
LPSmax : log product spacing function evaluated using par
|
||
|
pvalue : p-value for the fit
|
||
|
par : distribution parameters (fixed and fitted)
|
||
|
par_cov : covariance of distribution parameters
|
||
|
par_fix : fixed distribution parameters
|
||
|
par_lower : lower (1-alpha)% confidence bound for the parameters
|
||
|
par_upper : upper (1-alpha)% confidence bound for the parameters
|
||
11 years ago
|
|
||
14 years ago
|
Note
|
||
|
----
|
||
|
`data` is sorted using this function, so if `copydata`==False the data
|
||
|
in your namespace will be sorted as well.
|
||
11 years ago
|
|
||
14 years ago
|
Examples
|
||
|
--------
|
||
|
Estimate distribution parameters for weibull_min distribution.
|
||
|
>>> import wafo.stats as ws
|
||
|
>>> R = ws.weibull_min.rvs(1,size=100);
|
||
|
>>> phat = FitDistribution(ws.weibull_min, R, 1, scale=1, floc=0.0)
|
||
11 years ago
|
|
||
14 years ago
|
#Plot various diagnostic plots to asses quality of fit.
|
||
11 years ago
|
>>> phat.plotfitsummary()
|
||
|
|
||
14 years ago
|
#phat.par holds the estimated parameters
|
||
|
#phat.par_upper upper CI for parameters
|
||
|
#phat.par_lower lower CI for parameters
|
||
11 years ago
|
|
||
14 years ago
|
#Better CI for phat.par[0]
|
||
14 years ago
|
>>> Lp = phat.profile(i=0)
|
||
14 years ago
|
>>> Lp.plot()
|
||
14 years ago
|
>>> p_ci = Lp.get_bounds(alpha=0.1)
|
||
11 years ago
|
|
||
14 years ago
|
>>> SF = 1./990
|
||
|
>>> x = phat.isf(SF)
|
||
15 years ago
|
|
||
14 years ago
|
# CI for x
|
||
14 years ago
|
>>> Lx = phat.profile(i=0,x=x,link=phat.dist.link)
|
||
14 years ago
|
>>> Lx.plot()
|
||
14 years ago
|
>>> x_ci = Lx.get_bounds(alpha=0.2)
|
||
11 years ago
|
|
||
14 years ago
|
# CI for logSF=log(SF)
|
||
|
>>> Lsf = phat.profile(i=0, logSF=log(SF), link=phat.dist.link)
|
||
|
>>> Lsf.plot()
|
||
|
>>> sf_ci = Lsf.get_bounds(alpha=0.2)
|
||
14 years ago
|
'''
|
||
11 years ago
|
|
||
14 years ago
|
def __init__(self, dist, data, *args, **kwds):
|
||
|
extradoc = '''
|
||
14 years ago
|
plotfitsummary()
|
||
14 years ago
|
Plot various diagnostic plots to asses quality of fit.
|
||
|
plotecdf()
|
||
|
Plot Empirical and fitted Cumulative Distribution Function
|
||
|
plotesf()
|
||
|
Plot Empirical and fitted Survival Function
|
||
|
plotepdf()
|
||
|
Plot Empirical and fitted Probability Distribution Function
|
||
|
plotresq()
|
||
|
Displays a residual quantile plot.
|
||
|
plotresprb()
|
||
|
Displays a residual probability plot.
|
||
11 years ago
|
|
||
14 years ago
|
profile()
|
||
|
Return Profile Log- likelihood or Product Spacing-function.
|
||
11 years ago
|
|
||
14 years ago
|
Parameters
|
||
|
----------
|
||
|
x : array-like
|
||
|
quantiles
|
||
|
q : array-like
|
||
|
lower or upper tail probability
|
||
|
size : int or tuple of ints, optional
|
||
|
shape of random variates (default computed from input arguments )
|
||
|
moments : str, optional
|
||
|
composed of letters ['mvsk'] specifying which moments to compute where
|
||
|
'm' = mean, 'v' = variance, 's' = (Fisher's) skew and
|
||
|
'k' = (Fisher's) kurtosis. (default='mv')
|
||
|
'''
|
||
|
# Member variables
|
||
|
# ----------------
|
||
|
# data - data used in fitting
|
||
|
# alpha - confidence coefficient
|
||
|
# method - method used
|
||
|
# LLmax - loglikelihood function evaluated using par
|
||
|
# LPSmax - log product spacing function evaluated using par
|
||
|
# pvalue - p-value for the fit
|
||
|
# search - True if search for distribution parameters (default)
|
||
|
# copydata - True if copy input data (default)
|
||
|
#
|
||
|
# par - parameters (fixed and fitted)
|
||
|
# par_cov - covariance of parameters
|
||
|
# par_fix - fixed parameters
|
||
|
# par_lower - lower (1-alpha)% confidence bound for the parameters
|
||
|
# par_upper - upper (1-alpha)% confidence bound for the parameters
|
||
|
#
|
||
|
# '''
|
||
15 years ago
|
self.__doc__ = rv_frozen.__doc__ + extradoc
|
||
|
self.dist = dist
|
||
|
numargs = dist.numargs
|
||
11 years ago
|
|
||
|
self.method = self.alpha = self.par_fix = self.search = None
|
||
|
self.copydata = None
|
||
15 years ago
|
m_variables = ['method', 'alpha', 'par_fix', 'search', 'copydata']
|
||
|
m_defaults = ['ml', 0.05, None, True, True]
|
||
11 years ago
|
for (name, val) in zip(m_variables, m_defaults):
|
||
|
setattr(self, name, kwds.get(name, val))
|
||
|
|
||
15 years ago
|
if self.method.lower()[:].startswith('mps'):
|
||
|
self._fitfun = dist.nlogps
|
||
|
else:
|
||
|
self._fitfun = dist.nnlf
|
||
11 years ago
|
|
||
15 years ago
|
self.data = ravel(data)
|
||
|
if self.copydata:
|
||
|
self.data = self.data.copy()
|
||
|
self.data.sort()
|
||
11 years ago
|
|
||
15 years ago
|
par, fixedn = self._fit(*args, **kwds)
|
||
|
self.par = arr(par)
|
||
11 years ago
|
somefixed = len(fixedn) > 0
|
||
15 years ago
|
if somefixed:
|
||
11 years ago
|
self.par_fix = [nan, ] * len(self.par)
|
||
15 years ago
|
for i in fixedn:
|
||
|
self.par_fix[i] = self.par[i]
|
||
11 years ago
|
|
||
15 years ago
|
self.i_notfixed = nonzero(1 - isfinite(self.par_fix))
|
||
|
self.i_fixed = nonzero(isfinite(self.par_fix))
|
||
|
|
||
|
numpar = numargs + 2
|
||
|
self.par_cov = zeros((numpar, numpar))
|
||
|
self._compute_cov()
|
||
11 years ago
|
|
||
15 years ago
|
# Set confidence interval for parameters
|
||
|
pvar = numpy.diag(self.par_cov)
|
||
|
zcrit = -norm_ppf(self.alpha / 2.0)
|
||
|
self.par_lower = self.par - zcrit * sqrt(pvar)
|
||
|
self.par_upper = self.par + zcrit * sqrt(pvar)
|
||
11 years ago
|
|
||
15 years ago
|
self.LLmax = -dist.nnlf(self.par, self.data)
|
||
|
self.LPSmax = -dist.nlogps(self.par, self.data)
|
||
15 years ago
|
self.pvalue = self._pvalue(self.par, self.data, unknown_numpar=numpar)
|
||
11 years ago
|
|
||
|
def __repr__(self):
|
||
|
params = ['alpha', 'method', 'LLmax', 'LPSmax', 'pvalue',
|
||
|
'par', 'par_lower', 'par_upper', 'par_fix', 'par_cov']
|
||
|
t = ['%s:\n' % self.__class__.__name__]
|
||
|
for par in params:
|
||
|
t.append('%s = %s\n' % (par, str(getattr(self, par))))
|
||
|
return ''.join(t)
|
||
|
|
||
15 years ago
|
def _reduce_func(self, args, kwds):
|
||
|
args = list(args)
|
||
14 years ago
|
Nargs = len(args)
|
||
15 years ago
|
fixedn = []
|
||
11 years ago
|
index = range(Nargs)
|
||
|
names = ['f%d' % n for n in range(Nargs - 2)] + ['floc', 'fscale']
|
||
15 years ago
|
x0 = args[:]
|
||
13 years ago
|
for n, key in zip(index[::-1], names[::-1]):
|
||
11 years ago
|
if key in kwds:
|
||
15 years ago
|
fixedn.append(n)
|
||
|
args[n] = kwds[key]
|
||
|
del x0[n]
|
||
11 years ago
|
|
||
15 years ago
|
fitfun = self._fitfun
|
||
11 years ago
|
|
||
15 years ago
|
if len(fixedn) == 0:
|
||
|
func = fitfun
|
||
|
restore = None
|
||
|
else:
|
||
|
if len(fixedn) == len(index):
|
||
11 years ago
|
raise ValueError("All parameters fixed. " +
|
||
|
"There is nothing to optimize.")
|
||
|
|
||
15 years ago
|
def restore(args, theta):
|
||
|
# Replace with theta for all numbers not in fixedn
|
||
|
# This allows the non-fixed values to vary, but
|
||
|
# we still call self.nnlf with all parameters.
|
||
|
i = 0
|
||
|
for n in range(Nargs):
|
||
|
if n not in fixedn:
|
||
|
args[n] = theta[i]
|
||
|
i += 1
|
||
|
return args
|
||
|
|
||
|
def func(theta, x):
|
||
|
newtheta = restore(args[:], theta)
|
||
|
return fitfun(newtheta, x)
|
||
|
|
||
|
return x0, func, restore, args, fixedn
|
||
11 years ago
|
|
||
15 years ago
|
def _fit(self, *args, **kwds):
|
||
11 years ago
|
|
||
15 years ago
|
dist = self.dist
|
||
|
data = self.data
|
||
11 years ago
|
|
||
15 years ago
|
Narg = len(args)
|
||
|
if Narg > dist.numargs:
|
||
11 years ago
|
raise ValueError("Too many input arguments.")
|
||
|
start = [None] * 2
|
||
|
if (Narg < dist.numargs) or not ('loc' in kwds and 'scale' in kwds):
|
||
|
# get distribution specific starting locations
|
||
|
start = dist._fitstart(data)
|
||
15 years ago
|
args += start[Narg:-2]
|
||
|
loc = kwds.get('loc', start[-2])
|
||
|
scale = kwds.get('scale', start[-1])
|
||
|
args += (loc, scale)
|
||
|
x0, func, restore, args, fixedn = self._reduce_func(args, kwds)
|
||
|
if self.search:
|
||
|
optimizer = kwds.get('optimizer', optimize.fmin)
|
||
|
# convert string to function in scipy.optimize
|
||
11 years ago
|
if (not callable(optimizer) and
|
||
10 years ago
|
isinstance(optimizer, (str, unicode))):
|
||
15 years ago
|
if not optimizer.startswith('fmin_'):
|
||
11 years ago
|
optimizer = "fmin_" + optimizer
|
||
|
if optimizer == 'fmin_':
|
||
15 years ago
|
optimizer = 'fmin'
|
||
|
try:
|
||
|
optimizer = getattr(optimize, optimizer)
|
||
|
except AttributeError:
|
||
11 years ago
|
raise ValueError("%s is not a valid optimizer" % optimizer)
|
||
|
|
||
|
vals = optimizer(func, x0, args=(ravel(data),), disp=0)
|
||
15 years ago
|
vals = tuple(vals)
|
||
|
else:
|
||
|
vals = tuple(x0)
|
||
|
if restore is not None:
|
||
|
vals = restore(args, vals)
|
||
|
return vals, fixedn
|
||
11 years ago
|
|
||
15 years ago
|
def _compute_cov(self):
|
||
|
'''Compute covariance
|
||
|
'''
|
||
10 years ago
|
somefixed = (self.par_fix is not None) and any(isfinite(self.par_fix))
|
||
11 years ago
|
# H1 = numpy.asmatrix(self.dist.hessian_nnlf(self.par, self.data))
|
||
14 years ago
|
H = numpy.asmatrix(self.dist.hessian_nlogps(self.par, self.data))
|
||
15 years ago
|
self.H = H
|
||
|
try:
|
||
15 years ago
|
if somefixed:
|
||
|
allfixed = all(isfinite(self.par_fix))
|
||
|
if allfixed:
|
||
11 years ago
|
self.par_cov[:, :] = 0
|
||
15 years ago
|
else:
|
||
|
pcov = -pinv2(H[self.i_notfixed, :][..., self.i_notfixed])
|
||
|
for row, ix in enumerate(list(self.i_notfixed)):
|
||
|
self.par_cov[ix, self.i_notfixed] = pcov[row, :]
|
||
15 years ago
|
else:
|
||
|
self.par_cov = -pinv2(H)
|
||
|
except:
|
||
15 years ago
|
self.par_cov[:, :] = nan
|
||
11 years ago
|
|
||
15 years ago
|
def fitfun(self, phat):
|
||
|
return self._fitfun(phat, self.data)
|
||
15 years ago
|
|
||
15 years ago
|
def profile(self, **kwds):
|
||
15 years ago
|
''' Profile Log- likelihood or Log Product Spacing- function,
|
||
|
which can be used for constructing confidence interval for
|
||
|
either phat(i), probability or quantile.
|
||
|
|
||
14 years ago
|
Parameters
|
||
|
----------
|
||
|
**kwds : named arguments with keys
|
||
11 years ago
|
i : scalar integer
|
||
|
defining which distribution parameter to profile, i.e. which
|
||
|
parameter to keep fixed (default first non-fixed parameter)
|
||
14 years ago
|
pmin, pmax : real scalars
|
||
11 years ago
|
Interval for either the parameter, phat(i), prb, or x, used in the
|
||
|
optimization of the profile function (default is based on the
|
||
|
100*(1-alpha)% confidence interval computed with the delta method.)
|
||
14 years ago
|
N : scalar integer
|
||
|
Max number of points used in Lp (default 100)
|
||
|
x : real scalar
|
||
|
Quantile (return value) (default None)
|
||
|
logSF : real scalar
|
||
|
log survival probability,i.e., SF = Prob(X>x;phat) (default None)
|
||
11 years ago
|
link : function connecting the x-quantile and the survival probability
|
||
|
(SF) with the fixed distribution parameter, i.e.:
|
||
|
self.par[i] = link(x,logSF,self.par,i), where
|
||
|
logSF = log(Prob(X>x;phat)).
|
||
14 years ago
|
This means that if:
|
||
|
1) x is not None then x is profiled
|
||
|
2) logSF is not None then logSF is profiled
|
||
11 years ago
|
3) x and logSF are None then self.par[i] is profiled (default)
|
||
|
alpha : real scalar
|
||
14 years ago
|
confidence coefficent (default 0.05)
|
||
|
Returns
|
||
|
-------
|
||
|
Lp : Profile log-likelihood function with parameters phat given
|
||
11 years ago
|
the data, phat(i), probability (prb) and quantile (x), i.e.,
|
||
14 years ago
|
Lp = max(log(f(phat|data,phat(i)))),
|
||
|
or
|
||
|
Lp = max(log(f(phat|data,phat(i),x,prb)))
|
||
11 years ago
|
|
||
14 years ago
|
Member methods
|
||
|
-------------
|
||
|
plot() : Plot profile function with 100(1-alpha)% confidence interval
|
||
|
get_bounds() : Return 100(1-alpha)% confidence interval
|
||
11 years ago
|
|
||
14 years ago
|
Member variables
|
||
|
----------------
|
||
|
fit_dist : FitDistribution data object.
|
||
|
data : profile function values
|
||
|
args : profile function arguments
|
||
|
alpha : confidence coefficient
|
||
|
Lmax : Maximum value of profile function
|
||
|
alpha_cross_level :
|
||
11 years ago
|
|
||
|
PROFILE is a utility function for making inferences either on a
|
||
|
particular component of the vector phat or the quantile, x, or the
|
||
|
probability, SF. This is usually more accurate than using the delta
|
||
|
method assuming asymptotic normality of the ML estimator or the MPS
|
||
|
estimator.
|
||
|
|
||
14 years ago
|
Examples
|
||
|
--------
|
||
11 years ago
|
# MLE
|
||
14 years ago
|
>>> import wafo.stats as ws
|
||
|
>>> R = ws.weibull_min.rvs(1,size=100);
|
||
|
>>> phat = FitDistribution(ws.weibull_min, R, 1, scale=1, floc=0.0)
|
||
11 years ago
|
|
||
14 years ago
|
# Better CI for phat.par[i=0]
|
||
|
>>> Lp = Profile(phat, i=0)
|
||
|
>>> Lp.plot()
|
||
|
>>> phat_ci = Lp.get_bounds(alpha=0.1)
|
||
11 years ago
|
|
||
14 years ago
|
>>> SF = 1./990
|
||
|
>>> x = phat.isf(SF)
|
||
11 years ago
|
|
||
14 years ago
|
# CI for x
|
||
|
>>> Lx = phat.profile(i=0, x=x, link=phat.dist.link)
|
||
|
>>> Lx.plot()
|
||
|
>>> x_ci = Lx.get_bounds(alpha=0.2)
|
||
11 years ago
|
|
||
14 years ago
|
# CI for logSF=log(SF)
|
||
|
>>> Lsf = phat.profile(i=0, logSF=log(SF), link=phat.dist.link)
|
||
|
>>> Lsf.plot()
|
||
|
>>> sf_ci = Lsf.get_bounds(alpha=0.2)
|
||
|
|
||
|
See also
|
||
|
--------
|
||
|
Profile
|
||
15 years ago
|
'''
|
||
15 years ago
|
return Profile(self, **kwds)
|
||
15 years ago
|
|
||
14 years ago
|
def plotfitsummary(self):
|
||
15 years ago
|
''' Plot various diagnostic plots to asses the quality of the fit.
|
||
|
|
||
11 years ago
|
PLOTFITSUMMARY displays probability plot, density plot, residual
|
||
|
quantile plot and residual probability plot.
|
||
15 years ago
|
The purpose of these plots is to graphically assess whether the data
|
||
11 years ago
|
could come from the fitted distribution. If so the empirical- CDF and
|
||
|
PDF should follow the model and the residual plots will be linear.
|
||
|
Other distribution types will introduce curvature in the residual
|
||
|
plots.
|
||
15 years ago
|
'''
|
||
15 years ago
|
plotbackend.subplot(2, 2, 1)
|
||
11 years ago
|
# self.plotecdf()
|
||
15 years ago
|
self.plotesf()
|
||
15 years ago
|
plotbackend.subplot(2, 2, 2)
|
||
15 years ago
|
self.plotepdf()
|
||
15 years ago
|
plotbackend.subplot(2, 2, 3)
|
||
15 years ago
|
self.plotresq()
|
||
14 years ago
|
plotbackend.subplot(2, 2, 4)
|
||
|
self.plotresprb()
|
||
11 years ago
|
|
||
15 years ago
|
fixstr = ''
|
||
10 years ago
|
if self.par_fix is not None:
|
||
15 years ago
|
numfix = len(self.i_fixed)
|
||
15 years ago
|
if numfix > 0:
|
||
11 years ago
|
format0 = ', '.join(['%d'] * numfix)
|
||
|
format1 = ', '.join(['%g'] * numfix)
|
||
12 years ago
|
phatistr = format0 % tuple(self.i_fixed)
|
||
15 years ago
|
phatvstr = format1 % tuple(self.par[self.i_fixed])
|
||
15 years ago
|
fixstr = 'Fixed: phat[%s] = %s ' % (phatistr, phatvstr)
|
||
15 years ago
|
|
||
11 years ago
|
infostr = 'Fit method: %s, Fit p-value: %2.2f %s' % (
|
||
|
self.method, self.pvalue, fixstr)
|
||
15 years ago
|
try:
|
||
15 years ago
|
plotbackend.figtext(0.05, 0.01, infostr)
|
||
15 years ago
|
except:
|
||
|
pass
|
||
|
|
||
11 years ago
|
def plotesf(self, symb1='r-', symb2='b.'):
|
||
15 years ago
|
''' Plot Empirical and fitted Survival Function
|
||
|
|
||
|
The purpose of the plot is to graphically assess whether
|
||
|
the data could come from the fitted distribution.
|
||
|
If so the empirical CDF should resemble the model CDF.
|
||
|
Other distribution types will introduce deviations in the plot.
|
||
|
'''
|
||
|
n = len(self.data)
|
||
15 years ago
|
SF = (arange(n, 0, -1)) / n
|
||
11 years ago
|
plotbackend.semilogy(
|
||
|
self.data, SF, symb2, self.data, self.sf(self.data), symb1)
|
||
|
# plotbackend.plot(self.data,SF,'b.',self.data,self.sf(self.data),'r-')
|
||
|
plotbackend.xlabel('x')
|
||
15 years ago
|
plotbackend.ylabel('F(x) (%s)' % self.dist.name)
|
||
|
plotbackend.title('Empirical SF plot')
|
||
|
|
||
11 years ago
|
def plotecdf(self, symb1='r-', symb2='b.'):
|
||
15 years ago
|
''' Plot Empirical and fitted Cumulative Distribution Function
|
||
|
|
||
|
The purpose of the plot is to graphically assess whether
|
||
|
the data could come from the fitted distribution.
|
||
|
If so the empirical CDF should resemble the model CDF.
|
||
|
Other distribution types will introduce deviations in the plot.
|
||
|
'''
|
||
|
n = len(self.data)
|
||
15 years ago
|
F = (arange(1, n + 1)) / n
|
||
11 years ago
|
plotbackend.plot(self.data, F, symb2,
|
||
|
self.data, self.cdf(self.data), symb1)
|
||
|
plotbackend.xlabel('x')
|
||
15 years ago
|
plotbackend.ylabel('F(x) (%s)' % self.dist.name)
|
||
|
plotbackend.title('Empirical CDF plot')
|
||
|
|
||
11 years ago
|
def _get_grid(self, odd=False):
|
||
14 years ago
|
x = np.atleast_1d(self.data)
|
||
|
n = np.ceil(4 * np.sqrt(np.sqrt(len(x))))
|
||
|
mn = x.min()
|
||
|
mx = x.max()
|
||
|
d = (mx - mn) / n * 2
|
||
11 years ago
|
e = np.floor(np.log(d) / np.log(10))
|
||
14 years ago
|
m = np.floor(d / 10 ** e)
|
||
|
if m > 5:
|
||
|
m = 5
|
||
|
elif m > 2:
|
||
|
m = 2
|
||
|
d = m * 10 ** e
|
||
|
mn = (np.floor(mn / d) - 1) * d - odd * d / 2
|
||
|
mx = (np.ceil(mx / d) + 1) * d + odd * d / 2
|
||
|
limits = np.arange(mn, mx, d)
|
||
11 years ago
|
return limits
|
||
|
|
||
|
def _staircase(self, x, y):
|
||
|
xx = x.reshape(-1, 1).repeat(3, axis=1).ravel()[1:-1]
|
||
|
yy = y.reshape(-1, 1).repeat(3, axis=1)
|
||
|
# yy[0,0] = 0.0 # pdf
|
||
|
yy[:, 0] = 0.0 # histogram
|
||
15 years ago
|
yy.shape = (-1,)
|
||
15 years ago
|
yy = numpy.hstack((yy, 0.0))
|
||
11 years ago
|
return xx, yy
|
||
|
|
||
|
def _get_empirical_pdf(self):
|
||
|
limits = self._get_grid()
|
||
|
pdf, x = np.histogram(self.data, bins=limits, normed=True)
|
||
|
return self._staircase(x, pdf)
|
||
|
|
||
|
def plotepdf(self, symb1='r-', symb2='b-'):
|
||
|
'''Plot Empirical and fitted Probability Density Function
|
||
|
|
||
|
The purpose of the plot is to graphically assess whether
|
||
|
the data could come from the fitted distribution.
|
||
|
If so the histogram should resemble the model density.
|
||
|
Other distribution types will introduce deviations in the plot.
|
||
|
'''
|
||
|
x, pdf = self._get_empirical_pdf()
|
||
|
ymax = pdf.max()
|
||
|
# plotbackend.hist(self.data,normed=True,fill=False)
|
||
|
plotbackend.plot(self.data, self.pdf(self.data), symb1,
|
||
|
x, pdf, symb2)
|
||
11 years ago
|
ax = list(plotbackend.axis())
|
||
11 years ago
|
ax[3] = min(ymax * 1.3, ax[3])
|
||
11 years ago
|
plotbackend.axis(ax)
|
||
11 years ago
|
plotbackend.xlabel('x')
|
||
15 years ago
|
plotbackend.ylabel('f(x) (%s)' % self.dist.name)
|
||
|
plotbackend.title('Density plot')
|
||
|
|
||
11 years ago
|
def plotresq(self, symb1='r-', symb2='b.'):
|
||
15 years ago
|
'''PLOTRESQ displays a residual quantile plot.
|
||
|
|
||
|
The purpose of the plot is to graphically assess whether
|
||
|
the data could come from the fitted distribution. If so the
|
||
|
plot will be linear. Other distribution types will introduce
|
||
|
curvature in the plot.
|
||
|
'''
|
||
15 years ago
|
n = len(self.data)
|
||
|
eprob = (arange(1, n + 1) - 0.5) / n
|
||
15 years ago
|
y = self.ppf(eprob)
|
||
15 years ago
|
y1 = self.data[[0, -1]]
|
||
11 years ago
|
plotbackend.plot(self.data, y, symb2, y1, y1, symb1)
|
||
15 years ago
|
plotbackend.xlabel('Empirical')
|
||
|
plotbackend.ylabel('Model (%s)' % self.dist.name)
|
||
11 years ago
|
plotbackend.title('Residual Quantile Plot')
|
||
15 years ago
|
plotbackend.axis('tight')
|
||
|
plotbackend.axis('equal')
|
||
|
|
||
11 years ago
|
def plotresprb(self, symb1='r-', symb2='b.'):
|
||
15 years ago
|
''' PLOTRESPRB displays a residual probability plot.
|
||
|
|
||
|
The purpose of the plot is to graphically assess whether
|
||
|
the data could come from the fitted distribution. If so the
|
||
11 years ago
|
plot will be linear. Other distribution types will introduce curvature
|
||
|
in the plot.
|
||
15 years ago
|
'''
|
||
11 years ago
|
n = len(self.data)
|
||
|
# ecdf = (0.5:n-0.5)/n;
|
||
15 years ago
|
ecdf = arange(1, n + 1) / (n + 1)
|
||
15 years ago
|
mcdf = self.cdf(self.data)
|
||
15 years ago
|
p1 = [0, 1]
|
||
11 years ago
|
plotbackend.plot(ecdf, mcdf, symb2,
|
||
|
p1, p1, symb1)
|
||
15 years ago
|
plotbackend.xlabel('Empirical')
|
||
|
plotbackend.ylabel('Model (%s)' % self.dist.name)
|
||
11 years ago
|
plotbackend.title('Residual Probability Plot')
|
||
15 years ago
|
plotbackend.axis('equal')
|
||
14 years ago
|
plotbackend.axis([0, 1, 0, 1])
|
||
15 years ago
|
|
||
15 years ago
|
def _pvalue(self, theta, x, unknown_numpar=None):
|
||
11 years ago
|
''' Return P-value for the fit using Moran's negative log Product
|
||
|
Spacings statistic
|
||
15 years ago
|
|
||
|
where theta are the parameters (including loc and scale)
|
||
|
|
||
|
Note: the data in x must be sorted
|
||
|
'''
|
||
15 years ago
|
dx = numpy.diff(x, axis=0)
|
||
|
tie = (dx == 0)
|
||
15 years ago
|
if any(tie):
|
||
11 years ago
|
warnings.warn(
|
||
|
'P-value is on the conservative side (i.e. too large) due to' +
|
||
|
' ties in the data!')
|
||
15 years ago
|
|
||
15 years ago
|
T = self.dist.nlogps(theta, x)
|
||
15 years ago
|
|
||
|
n = len(x)
|
||
15 years ago
|
np1 = n + 1
|
||
10 years ago
|
if unknown_numpar is None:
|
||
15 years ago
|
k = len(theta)
|
||
|
else:
|
||
|
k = unknown_numpar
|
||
|
|
||
|
isParUnKnown = True
|
||
15 years ago
|
m = (np1) * (log(np1) + 0.57722) - 0.5 - 1.0 / (12. * (np1))
|
||
|
v = (np1) * (pi ** 2. / 6.0 - 1.0) - 0.5 - 1.0 / (6. * (np1))
|
||
|
C1 = m - sqrt(0.5 * n * v)
|
||
|
C2 = sqrt(v / (2.0 * n))
|
||
11 years ago
|
# chi2 with n degrees of freedom
|
||
|
Tn = (T + 0.5 * k * isParUnKnown - C1) / C2
|
||
|
pvalue = chi2sf(Tn, n) # _WAFODIST.chi2.sf(Tn, n)
|
||
15 years ago
|
return pvalue
|
||
|
|
||
|
|
||
12 years ago
|
def test_doctstrings():
|
||
14 years ago
|
import doctest
|
||
|
doctest.testmod()
|
||
11 years ago
|
|
||
|
|
||
14 years ago
|
def test1():
|
||
|
import wafo.stats as ws
|
||
12 years ago
|
dist = ws.weibull_min
|
||
10 years ago
|
# dist = ws.bradford
|
||
11 years ago
|
R = dist.rvs(0.3, size=1000)
|
||
12 years ago
|
phat = FitDistribution(dist, R, method='ml')
|
||
11 years ago
|
|
||
|
# Better CI for phat.par[i=0]
|
||
|
Lp1 = Profile(phat, i=0) # @UnusedVariable
|
||
14 years ago
|
# Lp2 = Profile(phat, i=2)
|
||
|
# SF = 1./990
|
||
|
# x = phat.isf(SF)
|
||
|
#
|
||
11 years ago
|
# CI for x
|
||
14 years ago
|
# Lx = Profile(phat, i=0,x=x,link=phat.dist.link)
|
||
|
# Lx.plot()
|
||
|
# x_ci = Lx.get_bounds(alpha=0.2)
|
||
11 years ago
|
#
|
||
|
# CI for logSF=log(SF)
|
||
14 years ago
|
# Lsf = phat.profile(i=0, logSF=log(SF), link=phat.dist.link)
|
||
|
# Lsf.plot()
|
||
|
# sf_ci = Lsf.get_bounds(alpha=0.2)
|
||
|
# pass
|
||
11 years ago
|
|
||
|
|
||
15 years ago
|
# _WAFODIST = ppimport('wafo.stats.distributions')
|
||
11 years ago
|
# nbinom(10, 0.75).rvs(3)
|
||
15 years ago
|
# import matplotlib
|
||
|
# matplotlib.interactive(True)
|
||
|
# t = _WAFODIST.bernoulli(0.75).rvs(3)
|
||
|
# x = np.r_[5, 10]
|
||
|
# npr = np.r_[9, 9]
|
||
|
# t2 = _WAFODIST.bd0(x, npr)
|
||
11 years ago
|
# Examples MLE and better CI for phat.par[0]
|
||
15 years ago
|
# R = _WAFODIST.weibull_min.rvs(1, size=100);
|
||
|
# phat = _WAFODIST.weibull_min.fit(R, 1, 1, par_fix=[nan, 0, nan])
|
||
|
# Lp = phat.profile(i=0)
|
||
|
# Lp.plot()
|
||
14 years ago
|
# Lp.get_bounds(alpha=0.1)
|
||
15 years ago
|
# R = 1. / 990
|
||
|
# x = phat.isf(R)
|
||
|
#
|
||
11 years ago
|
# CI for x
|
||
15 years ago
|
# Lx = phat.profile(i=0, x=x)
|
||
|
# Lx.plot()
|
||
14 years ago
|
# Lx.get_bounds(alpha=0.2)
|
||
15 years ago
|
#
|
||
11 years ago
|
# CI for logSF=log(SF)
|
||
15 years ago
|
# Lpr = phat.profile(i=0, logSF=log(R), link=phat.dist.link)
|
||
|
# Lpr.plot()
|
||
14 years ago
|
# Lpr.get_bounds(alpha=0.075)
|
||
15 years ago
|
#
|
||
|
# _WAFODIST.dlaplace.stats(0.8, loc=0)
|
||
11 years ago
|
# pass
|
||
15 years ago
|
# t = _WAFODIST.planck(0.51000000000000001)
|
||
|
# t.ppf(0.5)
|
||
|
# t = _WAFODIST.zipf(2)
|
||
|
# t.ppf(0.5)
|
||
|
# import pylab as plb
|
||
|
# _WAFODIST.rice.rvs(1)
|
||
|
# x = plb.linspace(-5, 5)
|
||
|
# y = _WAFODIST.genpareto.cdf(x, 0)
|
||
11 years ago
|
# plb.plot(x,y)
|
||
|
# plb.show()
|
||
15 years ago
|
#
|
||
|
#
|
||
|
# on = ones((2, 3))
|
||
|
# r = _WAFODIST.genpareto.rvs(0, size=100)
|
||
|
# pht = _WAFODIST.genpareto.fit(r, 1, par_fix=[0, 0, nan])
|
||
|
# lp = pht.profile()
|
||
15 years ago
|
if __name__ == '__main__':
|
||
12 years ago
|
test1()
|
||
11 years ago
|
# test_doctstrings()
|