You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
217 lines
7.3 KiB
Python
217 lines
7.3 KiB
Python
"""
|
|
Generalized additive models
|
|
|
|
"""
|
|
|
|
# JP:
|
|
# changes: use PolySmoother instead of crashing bspline
|
|
# TODO: check/catalogue required interface of a smoother
|
|
# TODO: replace default smoother by corresponding function to initialize
|
|
# other smoothers
|
|
# TODO: fix iteration, don't define class with iterator methods, use looping;
|
|
# add maximum iteration and other optional stop criteria
|
|
# fixed some of the dimension problems in PolySmoother,
|
|
# now graph for example looks good
|
|
# NOTE: example script is now in examples folder
|
|
|
|
import numpy as np
|
|
|
|
from scikits.statsmodels.genmod.families import family
|
|
from smoothers import PolySmoother as SmoothingSpline # relative import
|
|
from scikits.statsmodels.genmod.glm import GLM
|
|
|
|
def default_smoother(x):
|
|
_x = x.copy()
|
|
_x.sort()
|
|
n = x.shape[0]
|
|
# taken form smooth.spline in R
|
|
|
|
if n < 50:
|
|
nknots = n
|
|
else:
|
|
a1 = np.log(50) / np.log(2)
|
|
a2 = np.log(100) / np.log(2)
|
|
a3 = np.log(140) / np.log(2)
|
|
a4 = np.log(200) / np.log(2)
|
|
if n < 200:
|
|
nknots = 2**(a1 + (a2 - a1) * (n - 50)/150.)
|
|
elif n < 800:
|
|
nknots = 2**(a2 + (a3 - a2) * (n - 200)/600.)
|
|
elif n < 3200:
|
|
nknots = 2**(a3 + (a4 - a3) * (n - 800)/2400.)
|
|
else:
|
|
nknots = 200 + (n - 3200.)**0.2
|
|
knots = _x[np.linspace(0, n-1, nknots).astype(np.int32)]
|
|
|
|
s = SmoothingSpline(knots, x=x.copy())
|
|
s.gram(d=2)
|
|
s.target_df = 5
|
|
return s
|
|
|
|
class Offset(object):
|
|
|
|
def __init__(self, fn, offset):
|
|
self.fn = fn
|
|
self.offset = offset
|
|
|
|
def __call__(self, *args, **kw):
|
|
return self.fn(*args, **kw) + self.offset
|
|
|
|
class Results(object):
|
|
|
|
def __init__(self, Y, alpha, design, smoothers, family, offset):
|
|
self.Y = Y
|
|
self.alpha = alpha
|
|
self.smoothers = smoothers
|
|
self.offset = offset
|
|
self.family = family
|
|
self.design = design
|
|
self.offset = offset
|
|
self.mu = self(design)
|
|
|
|
def __call__(self, design):
|
|
return self.family.link.inverse(self.predict(design))
|
|
|
|
def predict(self, design):
|
|
return np.sum(self.smoothed(design), axis=0) + self.alpha
|
|
|
|
def smoothed(self, design):
|
|
return np.array([self.smoothers[i]() + self.offset[i] for i in range(design.shape[1])])
|
|
|
|
class AdditiveModel(object):
|
|
|
|
def __init__(self, design, smoothers=None, weights=None):
|
|
self.design = design
|
|
if not weights is None:
|
|
self.weights = weights
|
|
else:
|
|
self.weights = np.ones(self.design.shape[0])
|
|
|
|
self.smoothers = smoothers or [default_smoother(design[:,i]) for i in range(design.shape[1])]
|
|
for i in range(design.shape[1]):
|
|
self.smoothers[i].df = 10
|
|
self.family = family.Gaussian()
|
|
|
|
def __iter__(self):
|
|
self.iter = 0
|
|
self.dev = np.inf
|
|
return self
|
|
|
|
def next(self):
|
|
_results = self.results; Y = self.results.Y
|
|
mu = _results.predict(self.design)
|
|
offset = np.zeros(self.design.shape[1], np.float64)
|
|
alpha = (Y * self.weights).sum() / self.weights.sum()
|
|
for i in range(self.design.shape[1]):
|
|
tmp = self.smoothers[i]()
|
|
self.smoothers[i].smooth(Y - alpha - mu + tmp,
|
|
weights=self.weights)
|
|
tmp2 = self.smoothers[i]()
|
|
offset[i] = -(tmp2*self.weights).sum() / self.weights.sum()
|
|
mu += tmp2 - tmp
|
|
|
|
#print self.iter
|
|
#self.iter += 1 #missing incrementing of iter counter NOT
|
|
return Results(Y, alpha, self.design, self.smoothers, self.family, offset)
|
|
|
|
def cont(self, tol=1.0e-04):
|
|
self.iter += 1 #moved here to always count, not necessary
|
|
print self.iter,
|
|
curdev = (((self.results.Y - self.results.predict(self.design))**2) * self.weights).sum()
|
|
|
|
if self.iter > 30: #kill it, no max iterationoption
|
|
return False
|
|
if np.fabs((self.dev - curdev) / curdev) < tol:
|
|
self.dev = curdev
|
|
return False
|
|
|
|
#self.iter += 1
|
|
self.dev = curdev
|
|
return True
|
|
|
|
def df_resid(self):
|
|
return self.results.Y.shape[0] - np.array([self.smoothers[i].df_fit() for i in range(self.design.shape[1])]).sum()
|
|
|
|
def estimate_scale(self):
|
|
return ((self.results.Y - self.results(self.design))**2).sum() / self.df_resid()
|
|
|
|
def fit(self, Y):
|
|
iter(self) # what does this do? anything?
|
|
mu = 0
|
|
alpha = (Y * self.weights).sum() / self.weights.sum()
|
|
|
|
offset = np.zeros(self.design.shape[1], np.float64)
|
|
|
|
for i in range(self.design.shape[1]):
|
|
self.smoothers[i].smooth(Y - alpha - mu,
|
|
weights=self.weights)
|
|
tmp = self.smoothers[i]()
|
|
offset[i] = (tmp * self.weights).sum() / self.weights.sum()
|
|
tmp -= tmp.sum()
|
|
mu += tmp
|
|
|
|
self.results = Results(Y, alpha, self.design, self.smoothers, self.family, offset)
|
|
|
|
while self.cont():
|
|
self.results = self.next()
|
|
|
|
return self.results
|
|
|
|
class Model(GLM, AdditiveModel):
|
|
|
|
niter = 2
|
|
|
|
# def __init__(self, design, smoothers=None, family=family.Gaussian()):
|
|
# GLM.__init__(self, design, family=family)
|
|
# AdditiveModel.__init__(self, design, smoothers=smoothers)
|
|
# self.family = family
|
|
def __init__(self, endog, exog, smoothers=None, family=family.Gaussian()):
|
|
GLM.__init__(self, endog, exog, family=family)
|
|
AdditiveModel.__init__(self, exog, smoothers=smoothers)
|
|
|
|
def next(self):
|
|
_results = self.results; Y = _results.Y
|
|
if np.isnan(self.weights).all(): print "nanweights1"
|
|
_results.mu = self.family.link.inverse(_results.predict(self.design))
|
|
weights = self.family.weights(_results.mu)
|
|
if np.isnan(weights).all():
|
|
self.weights = weights
|
|
print "nanweights2"
|
|
Z = _results.predict(self.design) + self.family.link.deriv(_results.mu) * (Y - _results.mu)
|
|
m = AdditiveModel(self.design, smoothers=self.smoothers, weights=self.weights)
|
|
_results = m.fit(Z)
|
|
_results.Y = Y
|
|
_results.mu = self.family.link.inverse(_results.predict(self.design))
|
|
self.iter += 1
|
|
self.results = _results
|
|
|
|
return _results
|
|
|
|
def estimate_scale(self, Y=None):
|
|
"""
|
|
Return Pearson\'s X^2 estimate of scale.
|
|
"""
|
|
|
|
if Y is None:
|
|
Y = self.Y
|
|
resid = Y - self.results.mu
|
|
return (np.power(resid, 2) / self.family.variance(self.results.mu)).sum() / AdditiveModel.df_resid(self)
|
|
|
|
def fit(self, Y):
|
|
self.Y = np.asarray(Y, np.float64)
|
|
|
|
iter(self)
|
|
alpha = self.Y.mean()
|
|
Z = self.family.link(alpha) + self.family.link.deriv(alpha) * (Y - alpha)
|
|
m = AdditiveModel(self.design, smoothers=self.smoothers)
|
|
self.results = m.fit(Z)
|
|
self.results.mu = self.family.link.inverse(self.results.predict(self.design))
|
|
self.results.Y = Y
|
|
|
|
while self.cont():
|
|
self.results = self.next()
|
|
self.scale = self.results.scale = self.estimate_scale()
|
|
|
|
|
|
return self.results
|