You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
91 lines
2.7 KiB
Python
91 lines
2.7 KiB
Python
"""Grunfeld (1950) Investment Data"""
|
|
|
|
__docformat__ = 'restructuredtext'
|
|
|
|
COPYRIGHT = """This is public domain."""
|
|
TITLE = __doc__
|
|
SOURCE = """This is the Grunfeld (1950) Investment Data.
|
|
|
|
The source for the data was the original 11-firm data set from Grunfeld's Ph.D.
|
|
thesis recreated by Kleiber and Zeileis (2008) "The Grunfeld Data at 50".
|
|
The data can be found here.
|
|
http://statmath.wu-wien.ac.at/~zeileis/grunfeld/
|
|
|
|
For a note on the many versions of the Grunfeld data circulating see:
|
|
http://www.stanford.edu/~clint/bench/grunfeld.htm
|
|
"""
|
|
|
|
DESCRSHORT = """Grunfeld (1950) Investment Data for 11 U.S. Firms."""
|
|
|
|
DESCRLONG = DESCRSHORT
|
|
|
|
NOTE = """Number of observations - 220 (20 years for 11 firms)
|
|
|
|
Number of variables - 5
|
|
|
|
Variables name definitions::
|
|
|
|
invest - Gross investment in 1947 dollars
|
|
value - Market value as of Dec. 31 in 1947 dollars
|
|
capital - Stock of plant and equipment in 1947 dollars
|
|
firm - General Motors, US Steel, General Electric, Chrysler,
|
|
Atlantic Refining, IBM, Union Oil, Westinghouse, Goodyear,
|
|
Diamond Match, American Steel
|
|
year - 1935 - 1954
|
|
|
|
Note that raw_data has firm expanded to dummy variables, since it is a
|
|
string categorical variable.
|
|
"""
|
|
|
|
from numpy import recfromtxt, column_stack, array
|
|
from scikits.statsmodels.tools import categorical
|
|
import scikits.statsmodels.tools.datautils as du
|
|
from os.path import dirname, abspath
|
|
|
|
def load():
|
|
"""
|
|
Loads the Grunfeld data and returns a Dataset class.
|
|
|
|
Returns
|
|
-------
|
|
Dataset instance:
|
|
See DATASET_PROPOSAL.txt for more information.
|
|
|
|
Notes
|
|
-----
|
|
raw_data has the firm variable expanded to dummy variables for each
|
|
firm (ie., there is no reference dummy)
|
|
"""
|
|
data = _get_data()
|
|
raw_data = categorical(data, col='firm', drop=True)
|
|
ds = du.process_recarray(data, endog_idx=0, stack=False)
|
|
ds.raw_data = raw_data
|
|
return ds
|
|
|
|
def load_pandas():
|
|
"""
|
|
Loads the Grunfeld data and returns a Dataset class.
|
|
|
|
Returns
|
|
-------
|
|
Dataset instance:
|
|
See DATASET_PROPOSAL.txt for more information.
|
|
|
|
Notes
|
|
-----
|
|
raw_data has the firm variable expanded to dummy variables for each
|
|
firm (ie., there is no reference dummy)
|
|
"""
|
|
from pandas import DataFrame
|
|
data = _get_data()
|
|
raw_data = categorical(data, col='firm', drop=True)
|
|
ds = du.process_recarray_pandas(data, endog_idx=0)
|
|
ds.raw_data = DataFrame(raw_data)
|
|
return ds
|
|
|
|
def _get_data():
|
|
filepath = dirname(abspath(__file__))
|
|
data = recfromtxt(open(filepath + '/grunfeld.csv','rb'), delimiter=",",
|
|
names=True, dtype="f8,f8,f8,a17,f8")
|
|
return data
|