some new features
This commit is contained in:
@ -0,0 +1,5 @@
|
||||
__all__ = ["load", "load_pandas",
|
||||
"COPYRIGHT", "TITLE", "SOURCE", "DESCRSHORT", "DESCRLONG", "NOTE"]
|
||||
from .data import (
|
||||
load, load_pandas,
|
||||
COPYRIGHT, TITLE, SOURCE, DESCRSHORT, DESCRLONG, NOTE)
|
||||
Binary file not shown.
Binary file not shown.
@ -0,0 +1,21 @@
|
||||
"COMMITTEE","BILLS104","SIZE","SUBS","STAFF","PRESTIGE","BILLS103"
|
||||
"Appropriations",6,58,13,109,1,9
|
||||
"Budget",23,42,0,39,1,101
|
||||
"Rules",44,13,2,25,1,54
|
||||
"Ways_and_Means",355,39,5,23,1,542
|
||||
"Banking",125,51,5,61,0,101
|
||||
"Economic_Educ_Oppor",131,43,5,69,0,158
|
||||
"Commerce",271,49,4,79,0,196
|
||||
"International_Relations",63,44,3,68,0,40
|
||||
"Government_Reform",149,51,7,99,0,72
|
||||
"Judiciary",253,35,5,56,0,168
|
||||
"Agriculture",81,49,5,46,0,60
|
||||
"National_Security",89,55,7,48,0,75
|
||||
"Resources",142,44,5,58,0,98
|
||||
"TransInfrastructure",155,61,6,74,0,69
|
||||
"Science",27,50,4,58,0,25
|
||||
"Small_Business",8,43,4,29,0,9
|
||||
"Veterans_Affairs",28,33,3,36,0,41
|
||||
"House_Oversight",68,12,0,24,0,233
|
||||
"Stds_of_Conduct",1,10,0,9,0,0
|
||||
"Intelligence",4,16,2,24,0,2
|
||||
|
@ -0,0 +1,69 @@
|
||||
"""First 100 days of the US House of Representatives 1995"""
|
||||
from statsmodels.datasets import utils as du
|
||||
|
||||
__docformat__ = 'restructuredtext'
|
||||
|
||||
COPYRIGHT = """Used with express permission from the original author,
|
||||
who retains all rights."""
|
||||
TITLE = __doc__
|
||||
SOURCE = """
|
||||
Jeff Gill's `Generalized Linear Models: A Unifited Approach`
|
||||
|
||||
http://jgill.wustl.edu/research/books.html
|
||||
"""
|
||||
|
||||
DESCRSHORT = """Number of bill assignments in the 104th House in 1995"""
|
||||
|
||||
DESCRLONG = """The example in Gill, seeks to explain the number of bill
|
||||
assignments in the first 100 days of the US' 104th House of Representatives.
|
||||
The response variable is the number of bill assignments in the first 100 days
|
||||
over 20 Committees. The explanatory variables in the example are the number of
|
||||
assignments in the first 100 days of the 103rd House, the number of members on
|
||||
the committee, the number of subcommittees, the log of the number of staff
|
||||
assigned to the committee, a dummy variable indicating whether
|
||||
the committee is a high prestige committee, and an interaction term between
|
||||
the number of subcommittees and the log of the staff size.
|
||||
|
||||
The data returned by load are not cleaned to represent the above example.
|
||||
"""
|
||||
|
||||
NOTE = """::
|
||||
|
||||
Number of Observations - 20
|
||||
Number of Variables - 6
|
||||
Variable name definitions::
|
||||
|
||||
BILLS104 - Number of bill assignments in the first 100 days of the
|
||||
104th House of Representatives.
|
||||
SIZE - Number of members on the committee.
|
||||
SUBS - Number of subcommittees.
|
||||
STAFF - Number of staff members assigned to the committee.
|
||||
PRESTIGE - PRESTIGE == 1 is a high prestige committee.
|
||||
BILLS103 - Number of bill assignments in the first 100 days of the
|
||||
103rd House of Representatives.
|
||||
|
||||
Committee names are included as a variable in the data file though not
|
||||
returned by load.
|
||||
"""
|
||||
|
||||
|
||||
def load_pandas():
|
||||
data = _get_data()
|
||||
return du.process_pandas(data, endog_idx=0)
|
||||
|
||||
|
||||
def load():
|
||||
"""Load the committee data and returns a data class.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Dataset
|
||||
See DATASET_PROPOSAL.txt for more information.
|
||||
"""
|
||||
return load_pandas()
|
||||
|
||||
|
||||
def _get_data():
|
||||
data = du.load_csv(__file__, 'committee.csv')
|
||||
data = data.iloc[:, 1:7].astype(float)
|
||||
return data
|
||||
Reference in New Issue
Block a user