some new features

2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions
--- a/.venv/lib/python3.12/site-packages/statsmodels/datasets/anes96/data.py
+++ b/.venv/lib/python3.12/site-packages/statsmodels/datasets/anes96/data.py
@ -0,0 +1,119 @@
+"""American National Election Survey 1996"""
+from numpy import log
+
+from statsmodels.datasets import utils as du
+
+__docformat__ = 'restructuredtext'
+
+COPYRIGHT = """This is public domain."""
+TITLE = __doc__
+SOURCE = """
+http://www.electionstudies.org/
+
+The American National Election Studies.
+"""
+
+DESCRSHORT = """This data is a subset of the American National Election Studies of 1996."""
+
+DESCRLONG = DESCRSHORT
+
+NOTE = """::
+
+    Number of observations - 944
+    Number of variables - 10
+
+    Variables name definitions::
+
+            popul - Census place population in 1000s
+            TVnews - Number of times per week that respondent watches TV news.
+            PID - Party identification of respondent.
+                0 - Strong Democrat
+                1 - Weak Democrat
+                2 - Independent-Democrat
+                3 - Independent-Indpendent
+                4 - Independent-Republican
+                5 - Weak Republican
+                6 - Strong Republican
+            age : Age of respondent.
+            educ - Education level of respondent
+                1 - 1-8 grades
+                2 - Some high school
+                3 - High school graduate
+                4 - Some college
+                5 - College degree
+                6 - Master's degree
+                7 - PhD
+            income - Income of household
+                1  - None or less than $2,999
+                2  - $3,000-$4,999
+                3  - $5,000-$6,999
+                4  - $7,000-$8,999
+                5  - $9,000-$9,999
+                6  - $10,000-$10,999
+                7  - $11,000-$11,999
+                8  - $12,000-$12,999
+                9  - $13,000-$13,999
+                10 - $14,000-$14.999
+                11 - $15,000-$16,999
+                12 - $17,000-$19,999
+                13 - $20,000-$21,999
+                14 - $22,000-$24,999
+                15 - $25,000-$29,999
+                16 - $30,000-$34,999
+                17 - $35,000-$39,999
+                18 - $40,000-$44,999
+                19 - $45,000-$49,999
+                20 - $50,000-$59,999
+                21 - $60,000-$74,999
+                22 - $75,000-89,999
+                23 - $90,000-$104,999
+                24 - $105,000 and over
+            vote - Expected vote
+                0 - Clinton
+                1 - Dole
+            The following 3 variables all take the values:
+                1 - Extremely liberal
+                2 - Liberal
+                3 - Slightly liberal
+                4 - Moderate
+                5 - Slightly conservative
+                6 - Conservative
+                7 - Extremely Conservative
+            selfLR - Respondent's self-reported political leanings from "Left"
+                to "Right".
+            ClinLR - Respondents impression of Bill Clinton's political
+                leanings from "Left" to "Right".
+            DoleLR  - Respondents impression of Bob Dole's political leanings
+                from "Left" to "Right".
+            logpopul - log(popul + .1)
+"""
+
+
+def load_pandas():
+    """Load the anes96 data and returns a Dataset class.
+
+    Returns
+    -------
+    Dataset
+        See DATASET_PROPOSAL.txt for more information.
+    """
+    data = _get_data()
+    return du.process_pandas(data, endog_idx=5, exog_idx=[10, 2, 6, 7, 8])
+
+
+def load():
+    """Load the anes96 data and returns a Dataset class.
+
+    Returns
+    -------
+    Dataset
+        See DATASET_PROPOSAL.txt for more information.
+    """
+    return load_pandas()
+
+
+def _get_data():
+    data = du.load_csv(__file__, 'anes96.csv', sep=r'\s')
+    data = du.strip_column_names(data)
+    data['logpopul'] = log(data['popul'] + .1)
+    return data.astype(float)