some new features
This commit is contained in:
@ -0,0 +1,60 @@
|
||||
"""Yearly sunspots data 1700-2008"""
|
||||
from statsmodels.datasets import utils as du
|
||||
|
||||
__docformat__ = 'restructuredtext'
|
||||
|
||||
COPYRIGHT = """This data is public domain."""
|
||||
TITLE = __doc__
|
||||
SOURCE = """
|
||||
http://www.ngdc.noaa.gov/stp/solar/solarda3.html
|
||||
|
||||
The original dataset contains monthly data on sunspot activity in the file
|
||||
./src/sunspots_yearly.dat. There is also sunspots_monthly.dat.
|
||||
"""
|
||||
|
||||
DESCRSHORT = """Yearly (1700-2008) data on sunspots from the National
|
||||
Geophysical Data Center."""
|
||||
|
||||
DESCRLONG = DESCRSHORT
|
||||
|
||||
NOTE = """::
|
||||
|
||||
Number of Observations - 309 (Annual 1700 - 2008)
|
||||
Number of Variables - 1
|
||||
Variable name definitions::
|
||||
|
||||
SUNACTIVITY - Number of sunspots for each year
|
||||
|
||||
The data file contains a 'YEAR' variable that is not returned by load.
|
||||
"""
|
||||
|
||||
|
||||
def load_pandas():
|
||||
data = _get_data()
|
||||
# TODO: time series
|
||||
endog = data.set_index(data.YEAR).SUNACTIVITY
|
||||
dataset = du.Dataset(data=data, names=list(data.columns),
|
||||
endog=endog, endog_name='volume')
|
||||
return dataset
|
||||
|
||||
|
||||
def load():
|
||||
"""
|
||||
Load the yearly sunspot data and returns a data class.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Dataset
|
||||
See DATASET_PROPOSAL.txt for more information.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This dataset only contains data for one variable, so the attributes
|
||||
data, raw_data, and endog are all the same variable. There is no exog
|
||||
attribute defined.
|
||||
"""
|
||||
return load_pandas()
|
||||
|
||||
|
||||
def _get_data():
|
||||
return du.load_csv(__file__, 'sunspots.csv').astype(float)
|
||||
Reference in New Issue
Block a user