reconnect moved files to git repo
This commit is contained in:
@ -0,0 +1,995 @@
|
||||
"""Treatment effect estimators
|
||||
|
||||
follows largely Stata's teffects in Stata 13 manual
|
||||
|
||||
Created on Tue Jun 9 22:45:23 2015
|
||||
|
||||
Author: Josef Perktold
|
||||
License: BSD-3
|
||||
|
||||
currently available
|
||||
|
||||
ATE POM_0 POM_1
|
||||
res_ipw 230.688598 3172.774059 3403.462658
|
||||
res_aipw -230.989201 3403.355253 3172.366052
|
||||
res_aipw_wls -227.195618 3403.250651 3176.055033
|
||||
res_ra -239.639211 3403.242272 3163.603060
|
||||
res_ipwra -229.967078 3403.335639 3173.368561
|
||||
|
||||
|
||||
Lots of todos, just the beginning, but most effects are available but not
|
||||
standard errors, and no code structure that has a useful pattern
|
||||
|
||||
see https://github.com/statsmodels/statsmodels/issues/2443
|
||||
|
||||
Note: script requires cattaneo2 data file from Stata 14, hardcoded file path
|
||||
could be loaded with webuse
|
||||
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from statsmodels.compat.pandas import Substitution
|
||||
from scipy.linalg import block_diag
|
||||
from statsmodels.regression.linear_model import WLS
|
||||
from statsmodels.sandbox.regression.gmm import GMM
|
||||
from statsmodels.stats.contrast import ContrastResults
|
||||
from statsmodels.tools.docstring import indent
|
||||
|
||||
|
||||
def _mom_ate(params, endog, tind, prob, weighted=True):
|
||||
"""moment condition for average treatment effect
|
||||
|
||||
This does not include a moment condition for potential outcome mean (POM).
|
||||
|
||||
"""
|
||||
w1 = (tind / prob)
|
||||
w0 = (1. - tind) / (1. - prob)
|
||||
if weighted:
|
||||
w0 /= w0.mean()
|
||||
w1 /= w1.mean()
|
||||
|
||||
wdiff = w1 - w0
|
||||
|
||||
return endog * wdiff - params
|
||||
|
||||
|
||||
def _mom_atm(params, endog, tind, prob, weighted=True):
|
||||
"""moment conditions for average treatment means (POM)
|
||||
|
||||
moment conditions are POM0 and POM1
|
||||
"""
|
||||
w1 = (tind / prob)
|
||||
w0 = (1. - tind) / (1. - prob)
|
||||
if weighted:
|
||||
w1 /= w1.mean()
|
||||
w0 /= w0.mean()
|
||||
|
||||
return np.column_stack((endog * w0 - params[0], endog * w1 - params[1]))
|
||||
|
||||
|
||||
def _mom_ols(params, endog, tind, prob, weighted=True):
|
||||
"""
|
||||
moment condition for average treatment mean based on OLS dummy regression
|
||||
|
||||
moment conditions are POM0 and POM1
|
||||
|
||||
"""
|
||||
w = tind / prob + (1-tind) / (1 - prob)
|
||||
|
||||
treat_ind = np.column_stack((1 - tind, tind))
|
||||
mom = (w * (endog - treat_ind.dot(params)))[:, None] * treat_ind
|
||||
|
||||
return mom
|
||||
|
||||
|
||||
def _mom_ols_te(tm, endog, tind, prob, weighted=True):
|
||||
"""
|
||||
moment condition for average treatment mean based on OLS dummy regression
|
||||
|
||||
first moment is ATE
|
||||
second moment is POM0 (control)
|
||||
|
||||
"""
|
||||
w = tind / prob + (1-tind) / (1 - prob)
|
||||
|
||||
treat_ind = np.column_stack((tind, np.ones(len(tind))))
|
||||
mom = (w * (endog - treat_ind.dot(tm)))[:, None] * treat_ind
|
||||
|
||||
return mom
|
||||
|
||||
|
||||
def _mom_olsex(params, model=None, exog=None, scale=None):
|
||||
exog = exog if exog is not None else model.exog
|
||||
fitted = model.predict(params, exog)
|
||||
resid = model.endog - fitted
|
||||
if scale is not None:
|
||||
resid /= scale
|
||||
mom = resid[:, None] * exog
|
||||
return mom
|
||||
|
||||
|
||||
def ate_ipw(endog, tind, prob, weighted=True, probt=None):
|
||||
"""average treatment effect based on basic inverse propensity weighting.
|
||||
|
||||
"""
|
||||
w1 = (tind / prob)
|
||||
w0 = (1. - tind) / (1. - prob)
|
||||
|
||||
if probt is not None:
|
||||
w1 *= probt
|
||||
w0 *= probt
|
||||
|
||||
if weighted:
|
||||
w0 /= w0.mean()
|
||||
w1 /= w1.mean()
|
||||
|
||||
wdiff = w1 - w0
|
||||
|
||||
return (endog * wdiff).mean(), (endog * w0).mean(), (endog * w1).mean()
|
||||
|
||||
|
||||
class _TEGMMGeneric1(GMM):
|
||||
"""GMM class to get cov_params for treatment effects
|
||||
|
||||
This combines moment conditions for the selection/treatment model and the
|
||||
outcome model to get the standard errors for the treatment effect that
|
||||
takes the first step estimation of the treatment model into account.
|
||||
|
||||
this also matches standard errors of ATE and POM in Stata
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, endog, res_select, mom_outcome, exclude_tmoms=False,
|
||||
**kwargs):
|
||||
super().__init__(endog, None, None)
|
||||
self.results_select = res_select
|
||||
self.mom_outcome = mom_outcome
|
||||
self.exclude_tmoms = exclude_tmoms
|
||||
self.__dict__.update(kwargs)
|
||||
|
||||
# add xnames so it's not None
|
||||
# we don't have exog in init in this version
|
||||
if self.data.xnames is None:
|
||||
self.data.xnames = []
|
||||
|
||||
# need information about decomposition of parameters
|
||||
if exclude_tmoms:
|
||||
self.k_select = 0
|
||||
else:
|
||||
self.k_select = len(res_select.model.data.param_names)
|
||||
|
||||
if exclude_tmoms:
|
||||
# fittedvalues is still linpred
|
||||
self.prob = self.results_select.predict()
|
||||
else:
|
||||
self.prob = None
|
||||
|
||||
def momcond(self, params):
|
||||
k_outcome = len(params) - self.k_select
|
||||
tm = params[:k_outcome]
|
||||
p_tm = params[k_outcome:]
|
||||
|
||||
tind = self.results_select.model.endog
|
||||
|
||||
if self.exclude_tmoms:
|
||||
prob = self.prob
|
||||
else:
|
||||
prob = self.results_select.model.predict(p_tm)
|
||||
|
||||
moms_list = []
|
||||
mom_o = self.mom_outcome(tm, self.endog, tind, prob, weighted=True)
|
||||
moms_list.append(mom_o)
|
||||
|
||||
if not self.exclude_tmoms:
|
||||
mom_t = self.results_select.model.score_obs(p_tm)
|
||||
moms_list.append(mom_t)
|
||||
|
||||
moms = np.column_stack(moms_list)
|
||||
return moms
|
||||
|
||||
|
||||
class _TEGMM(GMM):
|
||||
"""GMM class to get cov_params for treatment effects
|
||||
|
||||
This combines moment conditions for the selection/treatment model and the
|
||||
outcome model to get the standard errors for the treatment effect that
|
||||
takes the first step estimation of the treatment model into account.
|
||||
|
||||
this also matches standard errors of ATE and POM in Stata
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, endog, res_select, mom_outcome):
|
||||
super().__init__(endog, None, None)
|
||||
self.results_select = res_select
|
||||
self.mom_outcome = mom_outcome
|
||||
|
||||
# add xnames so it's not None
|
||||
# we don't have exog in init in this version
|
||||
if self.data.xnames is None:
|
||||
self.data.xnames = []
|
||||
|
||||
def momcond(self, params):
|
||||
tm = params[:2]
|
||||
p_tm = params[2:]
|
||||
|
||||
tind = self.results_select.model.endog
|
||||
prob = self.results_select.model.predict(p_tm)
|
||||
momt = self.mom_outcome(tm, self.endog, tind, prob) # weighted=True)
|
||||
moms = np.column_stack((momt,
|
||||
self.results_select.model.score_obs(p_tm)))
|
||||
return moms
|
||||
|
||||
|
||||
class _IPWGMM(_TEGMMGeneric1):
|
||||
""" GMM for aipw treatment effect and potential outcome
|
||||
|
||||
uses unweighted outcome regression
|
||||
"""
|
||||
|
||||
def momcond(self, params):
|
||||
# Note: momcond in original order of observations
|
||||
ra = self.teff
|
||||
res_select = ra.results_select
|
||||
tind = ra.treatment
|
||||
endog = ra.model_pool.endog
|
||||
effect_group = self.effect_group
|
||||
|
||||
tm = params[:2]
|
||||
ps = params[2:]
|
||||
|
||||
prob_sel = np.asarray(res_select.model.predict(ps))
|
||||
prob_sel = np.clip(prob_sel, 0.01, 0.99)
|
||||
prob = prob_sel
|
||||
|
||||
if effect_group == "all":
|
||||
probt = None
|
||||
elif effect_group in [1, "treated"]:
|
||||
probt = prob
|
||||
elif effect_group in [0, "untreated", "control"]:
|
||||
probt = 1 - prob
|
||||
elif isinstance(effect_group, np.ndarray):
|
||||
probt = probt
|
||||
else:
|
||||
raise ValueError("incorrect option for effect_group")
|
||||
|
||||
w = tind / prob + (1 - tind) / (1 - prob)
|
||||
# Are we supposed to use scaled weights? doesn't cloesely match Stata
|
||||
# w1 = tind / prob
|
||||
# w2 = (1 - tind) / (1 - prob)
|
||||
# w = w1 / w1.sum() * tind.sum() + w2 / w2.sum() * (1 - tind).sum()
|
||||
if probt is not None:
|
||||
w *= probt
|
||||
|
||||
treat_ind = np.column_stack((tind, np.ones(len(tind))))
|
||||
mm = (w * (endog - treat_ind.dot(tm)))[:, None] * treat_ind
|
||||
|
||||
mom_select = res_select.model.score_obs(ps)
|
||||
moms = np.column_stack((mm, mom_select))
|
||||
return moms
|
||||
|
||||
|
||||
class _AIPWGMM(_TEGMMGeneric1):
|
||||
""" GMM for aipw treatment effect and potential outcome
|
||||
|
||||
uses unweighted outcome regression
|
||||
"""
|
||||
|
||||
def momcond(self, params):
|
||||
ra = self.teff
|
||||
treat_mask = ra.treat_mask
|
||||
res_select = ra.results_select
|
||||
|
||||
ppom = params[1]
|
||||
mask = np.arange(len(params)) != 1
|
||||
params = params[mask]
|
||||
|
||||
k = ra.results0.model.exog.shape[1]
|
||||
pm = params[0] # ATE parameter
|
||||
p0 = params[1:k+1]
|
||||
p1 = params[k+1:2*k+1]
|
||||
ps = params[2*k+1:]
|
||||
mod0 = ra.results0.model
|
||||
mod1 = ra.results1.model
|
||||
# use reordered exog, endog so it matches sub models by group
|
||||
exog = ra.exog_grouped
|
||||
endog = ra.endog_grouped
|
||||
|
||||
prob_sel = np.asarray(res_select.model.predict(ps))
|
||||
prob_sel = np.clip(prob_sel, 0.01, 0.99)
|
||||
|
||||
prob0 = prob_sel[~treat_mask]
|
||||
prob1 = prob_sel[treat_mask]
|
||||
prob = np.concatenate((prob0, prob1))
|
||||
|
||||
# outcome models by treatment unweighted
|
||||
fitted0 = mod0.predict(p0, exog)
|
||||
mom0 = _mom_olsex(p0, model=mod0)
|
||||
|
||||
fitted1 = mod1.predict(p1, exog)
|
||||
mom1 = _mom_olsex(p1, model=mod1)
|
||||
|
||||
mom_outcome = block_diag(mom0, mom1)
|
||||
|
||||
# moments for target statistics, ATE and POM
|
||||
tind = ra.treatment
|
||||
tind = np.concatenate((tind[~treat_mask], tind[treat_mask]))
|
||||
correct0 = (endog - fitted0) / (1 - prob) * (1 - tind)
|
||||
correct1 = (endog - fitted1) / prob * tind
|
||||
|
||||
tmean0 = fitted0 + correct0
|
||||
tmean1 = fitted1 + correct1
|
||||
ate = tmean1 - tmean0
|
||||
|
||||
mm = ate - pm
|
||||
mpom = tmean0 - ppom
|
||||
mm = np.column_stack((mm, mpom))
|
||||
|
||||
# Note: res_select has original data order,
|
||||
# mom_outcome and mm use grouped observations
|
||||
mom_select = res_select.model.score_obs(ps)
|
||||
mom_select = np.concatenate((mom_select[~treat_mask],
|
||||
mom_select[treat_mask]), axis=0)
|
||||
|
||||
moms = np.column_stack((mm, mom_outcome, mom_select))
|
||||
return moms
|
||||
|
||||
|
||||
class _AIPWWLSGMM(_TEGMMGeneric1):
|
||||
""" GMM for aipw-wls treatment effect and potential outcome
|
||||
|
||||
uses weighted outcome regression
|
||||
"""
|
||||
|
||||
def momcond(self, params):
|
||||
ra = self.teff
|
||||
treat_mask = ra.treat_mask
|
||||
res_select = ra.results_select
|
||||
|
||||
ppom = params[1]
|
||||
mask = np.arange(len(params)) != 1
|
||||
params = params[mask]
|
||||
|
||||
k = ra.results0.model.exog.shape[1]
|
||||
pm = params[0] # ATE parameter
|
||||
p0 = params[1:k+1]
|
||||
p1 = params[k+1:2*k+1]
|
||||
ps = params[-6:]
|
||||
mod0 = ra.results0.model
|
||||
mod1 = ra.results1.model
|
||||
# use reordered exog, endog so it matches sub models by group
|
||||
exog = ra.exog_grouped
|
||||
endog = ra.endog_grouped
|
||||
|
||||
# todo: need weights in outcome models
|
||||
prob_sel = np.asarray(res_select.model.predict(ps))
|
||||
|
||||
prob_sel = np.clip(prob_sel, 0.001, 0.999)
|
||||
|
||||
prob0 = prob_sel[~treat_mask]
|
||||
prob1 = prob_sel[treat_mask]
|
||||
prob = np.concatenate((prob0, prob1))
|
||||
|
||||
tind = 0
|
||||
ww0 = (1 - tind) / (1 - prob0) * ((1 - tind) / (1 - prob0) - 1)
|
||||
tind = 1
|
||||
ww1 = tind / prob1 * (tind / prob1 - 1)
|
||||
|
||||
# outcome models by treatment using IPW weights
|
||||
fitted0 = mod0.predict(p0, exog)
|
||||
mom0 = _mom_olsex(p0, model=mod0) * ww0[:, None]
|
||||
|
||||
fitted1 = mod1.predict(p1, exog)
|
||||
mom1 = _mom_olsex(p1, model=mod1) * ww1[:, None]
|
||||
|
||||
mom_outcome = block_diag(mom0, mom1)
|
||||
|
||||
# moments for target statistics, ATE and POM
|
||||
tind = ra.treatment
|
||||
tind = np.concatenate((tind[~treat_mask], tind[treat_mask]))
|
||||
|
||||
correct0 = (endog - fitted0) / (1 - prob) * (1 - tind)
|
||||
correct1 = (endog - fitted1) / prob * tind
|
||||
|
||||
tmean0 = fitted0 + correct0
|
||||
tmean1 = fitted1 + correct1
|
||||
ate = tmean1 - tmean0
|
||||
|
||||
mm = ate - pm
|
||||
mpom = tmean0 - ppom
|
||||
mm = np.column_stack((mm, mpom))
|
||||
|
||||
# Note: res_select has original data order,
|
||||
# mom_outcome and mm use grouped observations
|
||||
mom_select = res_select.model.score_obs(ps)
|
||||
mom_select = np.concatenate((mom_select[~treat_mask],
|
||||
mom_select[treat_mask]), axis=0)
|
||||
|
||||
moms = np.column_stack((mm, mom_outcome, mom_select))
|
||||
return moms
|
||||
|
||||
|
||||
class _RAGMM(_TEGMMGeneric1):
|
||||
"""GMM for regression adjustment treatment effect and potential outcome
|
||||
|
||||
uses unweighted outcome regression
|
||||
"""
|
||||
|
||||
def momcond(self, params):
|
||||
ra = self.teff
|
||||
|
||||
ppom = params[1]
|
||||
mask = np.arange(len(params)) != 1
|
||||
params = params[mask]
|
||||
|
||||
k = ra.results0.model.exog.shape[1]
|
||||
pm = params[0]
|
||||
p0 = params[1:k+1]
|
||||
p1 = params[-k:]
|
||||
mod0 = ra.results0.model
|
||||
mod1 = ra.results1.model
|
||||
# use reordered exog, endog so it matches sub models by group
|
||||
exog = ra.exog_grouped
|
||||
|
||||
fitted0 = mod0.predict(p0, exog)
|
||||
mom0 = _mom_olsex(p0, model=mod0)
|
||||
|
||||
fitted1 = mod1.predict(p1, exog)
|
||||
mom1 = _mom_olsex(p1, model=mod1)
|
||||
|
||||
momout = block_diag(mom0, mom1)
|
||||
|
||||
mm = fitted1 - fitted0 - pm
|
||||
mpom = fitted0 - ppom
|
||||
mm = np.column_stack((mm, mpom))
|
||||
if self.probt is not None:
|
||||
mm *= (self.probt / self.probt.mean())[:, None]
|
||||
|
||||
moms = np.column_stack((mm, momout))
|
||||
return moms
|
||||
|
||||
|
||||
class _IPWRAGMM(_TEGMMGeneric1):
|
||||
""" GMM for ipwra treatment effect and potential outcome
|
||||
"""
|
||||
|
||||
def momcond(self, params):
|
||||
ra = self.teff
|
||||
treat_mask = ra.treat_mask
|
||||
res_select = ra.results_select
|
||||
|
||||
ppom = params[1]
|
||||
mask = np.arange(len(params)) != 1
|
||||
params = params[mask]
|
||||
|
||||
k = ra.results0.model.exog.shape[1]
|
||||
pm = params[0] # ATE parameter
|
||||
p0 = params[1:k+1]
|
||||
p1 = params[k+1:2*k+1]
|
||||
ps = params[-6:]
|
||||
mod0 = ra.results0.model
|
||||
mod1 = ra.results1.model
|
||||
|
||||
# use reordered exog so it matches sub models by group
|
||||
exog = ra.exog_grouped
|
||||
tind = np.zeros(len(treat_mask))
|
||||
tind[-treat_mask.sum():] = 1
|
||||
|
||||
# selection probability by group, propensity score
|
||||
prob_sel = np.asarray(res_select.model.predict(ps))
|
||||
prob_sel = np.clip(prob_sel, 0.001, 0.999)
|
||||
prob0 = prob_sel[~treat_mask]
|
||||
prob1 = prob_sel[treat_mask]
|
||||
|
||||
effect_group = self.effect_group
|
||||
if effect_group == "all":
|
||||
w0 = 1 / (1 - prob0)
|
||||
w1 = 1 / prob1
|
||||
sind = 1
|
||||
elif effect_group in [1, "treated"]:
|
||||
w0 = prob0 / (1 - prob0)
|
||||
w1 = prob1 / prob1
|
||||
# for averaging effect on treated
|
||||
sind = tind / tind.mean()
|
||||
elif effect_group in [0, "untreated", "control"]:
|
||||
w0 = (1 - prob0) / (1 - prob0)
|
||||
w1 = (1 - prob1) / prob1
|
||||
|
||||
sind = (1 - tind)
|
||||
sind /= sind.mean()
|
||||
else:
|
||||
raise ValueError("incorrect option for effect_group")
|
||||
|
||||
# outcome models by treatment using IPW weights
|
||||
fitted0 = mod0.predict(p0, exog)
|
||||
mom0 = _mom_olsex(p0, model=mod0) * w0[:, None]
|
||||
|
||||
fitted1 = mod1.predict(p1, exog)
|
||||
mom1 = _mom_olsex(p1, model=mod1) * w1[:, None]
|
||||
|
||||
mom_outcome = block_diag(mom0, mom1)
|
||||
|
||||
# moments for target statistics, ATE and POM
|
||||
mm = (fitted1 - fitted0 - pm) * sind
|
||||
mpom = (fitted0 - ppom) * sind
|
||||
mm = np.column_stack((mm, mpom))
|
||||
|
||||
# Note: res_select has original data order,
|
||||
# mom_outcome and mm use grouped observations
|
||||
mom_select = res_select.model.score_obs(ps)
|
||||
mom_select = np.concatenate((mom_select[~treat_mask],
|
||||
mom_select[treat_mask]), axis=0)
|
||||
|
||||
moms = np.column_stack((mm, mom_outcome, mom_select))
|
||||
return moms
|
||||
|
||||
|
||||
class TreatmentEffectResults(ContrastResults):
|
||||
"""
|
||||
Results class for treatment effect estimation
|
||||
|
||||
Parameters
|
||||
----------
|
||||
teff : instance of TreatmentEffect class
|
||||
results_gmm : instance of GMMResults class
|
||||
method : string
|
||||
Method and estimator of treatment effect.
|
||||
kwds: dict
|
||||
Other keywords with additional information.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This class is a subclass of ContrastResults and inherits methods like
|
||||
summary, summary_frame and conf_int. Attributes correspond to a z-test
|
||||
given by ``GMMResults.t_test``.
|
||||
"""
|
||||
|
||||
def __init__(self, teff, results_gmm, method, **kwds):
|
||||
super().__init__()
|
||||
k_params = len(results_gmm.params)
|
||||
constraints = np.zeros((3, k_params))
|
||||
constraints[0, 0] = 1
|
||||
constraints[1, 1] = 1
|
||||
constraints[2, :2] = [1, 1]
|
||||
tt = results_gmm.t_test(constraints)
|
||||
self.__dict__.update(tt.__dict__)
|
||||
self.teff = teff
|
||||
self.results_gmm = results_gmm
|
||||
self.method = method
|
||||
# TODO: make those explicit?
|
||||
self.__dict__.update(kwds)
|
||||
|
||||
self.c_names = ["ATE", "POM0", "POM1"]
|
||||
|
||||
|
||||
doc_params_returns = """\
|
||||
Parameters
|
||||
----------
|
||||
return_results : bool
|
||||
If True, then a results instance is returned.
|
||||
If False, just ATE, POM0 and POM1 are returned.
|
||||
effect_group : {"all", 0, 1}
|
||||
``effectgroup`` determines for which population the effects are
|
||||
estimated.
|
||||
If effect_group is "all", then sample average treatment effect and
|
||||
potential outcomes are returned
|
||||
If effect_group is 1 or "treated", then effects on treated are
|
||||
returned.
|
||||
If effect_group is 0, "treated" or "control", then effects on
|
||||
untreated, i.e. control group, are returned.
|
||||
disp : bool
|
||||
Indicates whether the scipy optimizer should display the
|
||||
optimization results
|
||||
|
||||
Returns
|
||||
-------
|
||||
TreatmentEffectsResults instance or tuple (ATE, POM0, POM1)
|
||||
"""
|
||||
|
||||
doc_params_returns2 = """\
|
||||
Parameters
|
||||
----------
|
||||
return_results : bool
|
||||
If True, then a results instance is returned.
|
||||
If False, just ATE, POM0 and POM1 are returned.
|
||||
disp : bool
|
||||
Indicates whether the scipy optimizer should display the
|
||||
optimization results
|
||||
|
||||
Returns
|
||||
-------
|
||||
TreatmentEffectsResults instance or tuple (ATE, POM0, POM1)
|
||||
"""
|
||||
|
||||
|
||||
class TreatmentEffect:
|
||||
"""
|
||||
Estimate average treatment effect under conditional independence
|
||||
|
||||
.. versionadded:: 0.14.0
|
||||
|
||||
This class estimates treatment effect and potential outcome using 5
|
||||
different methods, ipw, ra, aipw, aipw-wls, ipw-ra.
|
||||
Standard errors and inference are based on the joint GMM representation of
|
||||
selection or treatment model, outcome model and effect functions.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
model : instance of a model class
|
||||
The model class should contain endog and exog for the outcome model.
|
||||
treatment : ndarray
|
||||
indicator array for observations with treatment (1) or without (0)
|
||||
results_select : results instance
|
||||
The results instance for the treatment or selection model.
|
||||
_cov_type : "HC0"
|
||||
Internal keyword. The keyword oes not affect GMMResults which always
|
||||
corresponds to HC0 standard errors.
|
||||
kwds : keyword arguments
|
||||
currently not used
|
||||
|
||||
Notes
|
||||
-----
|
||||
The outcome model is currently limited to a linear model based on OLS.
|
||||
Other outcome models, like Logit and Poisson, will become available in
|
||||
future.
|
||||
|
||||
See `Treatment Effect notebook
|
||||
<../examples/notebooks/generated/treatment_effect.html>`__
|
||||
for an overview.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, model, treatment, results_select=None, _cov_type="HC0",
|
||||
**kwds):
|
||||
# Note _cov_type is only for preliminary estimators,
|
||||
# cov in GMM alwasy corresponds to HC0
|
||||
self.__dict__.update(kwds) # currently not used
|
||||
self.treatment = np.asarray(treatment)
|
||||
self.treat_mask = treat_mask = (treatment == 1)
|
||||
|
||||
if results_select is not None:
|
||||
self.results_select = results_select
|
||||
self.prob_select = results_select.predict()
|
||||
|
||||
self.model_pool = model
|
||||
endog = model.endog
|
||||
exog = model.exog
|
||||
self.nobs = endog.shape[0]
|
||||
self._cov_type = _cov_type
|
||||
|
||||
# no init keys are supported
|
||||
mod0 = model.__class__(endog[~treat_mask], exog[~treat_mask])
|
||||
self.results0 = mod0.fit(cov_type=_cov_type)
|
||||
mod1 = model.__class__(endog[treat_mask], exog[treat_mask])
|
||||
self.results1 = mod1.fit(cov_type=_cov_type)
|
||||
# self.predict_mean0 = self.model_pool.predict(self.results0.params
|
||||
# ).mean()
|
||||
# self.predict_mean1 = self.model_pool.predict(self.results1.params
|
||||
# ).mean()
|
||||
|
||||
self.exog_grouped = np.concatenate((mod0.exog, mod1.exog), axis=0)
|
||||
self.endog_grouped = np.concatenate((mod0.endog, mod1.endog), axis=0)
|
||||
|
||||
@classmethod
|
||||
def from_data(cls, endog, exog, treatment, model='ols', **kwds):
|
||||
"""create models from data
|
||||
|
||||
not yet implemented
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def ipw(self, return_results=True, effect_group="all", disp=False):
|
||||
"""Inverse Probability Weighted treatment effect estimation.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
return_results : bool
|
||||
If True, then a results instance is returned.
|
||||
If False, just ATE, POM0 and POM1 are returned.
|
||||
effect_group : {"all", 0, 1}
|
||||
``effectgroup`` determines for which population the effects are
|
||||
estimated.
|
||||
If effect_group is "all", then sample average treatment effect and
|
||||
potential outcomes are returned.
|
||||
If effect_group is 1 or "treated", then effects on treated are
|
||||
returned.
|
||||
If effect_group is 0, "treated" or "control", then effects on
|
||||
untreated, i.e. control group, are returned.
|
||||
disp : bool
|
||||
Indicates whether the scipy optimizer should display the
|
||||
optimization results
|
||||
|
||||
Returns
|
||||
-------
|
||||
TreatmentEffectsResults instance or tuple (ATE, POM0, POM1)
|
||||
|
||||
See Also
|
||||
--------
|
||||
TreatmentEffectsResults
|
||||
"""
|
||||
endog = self.model_pool.endog
|
||||
tind = self.treatment
|
||||
prob = self.prob_select
|
||||
if effect_group == "all":
|
||||
probt = None
|
||||
elif effect_group in [1, "treated"]:
|
||||
probt = prob
|
||||
effect_group = 1 # standardize effect_group name
|
||||
elif effect_group in [0, "untreated", "control"]:
|
||||
probt = 1 - prob
|
||||
effect_group = 0 # standardize effect_group name
|
||||
elif isinstance(effect_group, np.ndarray):
|
||||
probt = effect_group
|
||||
effect_group = "user" # standardize effect_group name
|
||||
else:
|
||||
raise ValueError("incorrect option for effect_group")
|
||||
|
||||
res_ipw = ate_ipw(endog, tind, prob, weighted=True, probt=probt)
|
||||
|
||||
if not return_results:
|
||||
return res_ipw
|
||||
|
||||
# gmm = _TEGMMGeneric1(endog, self.results_select, _mom_ols_te,
|
||||
# probt=probt)
|
||||
gmm = _IPWGMM(endog, self.results_select, None, teff=self,
|
||||
effect_group=effect_group)
|
||||
start_params = np.concatenate((res_ipw[:2],
|
||||
self.results_select.params))
|
||||
res_gmm = gmm.fit(start_params=start_params,
|
||||
inv_weights=np.eye(len(start_params)),
|
||||
optim_method='nm',
|
||||
optim_args={"maxiter": 5000, "disp": disp},
|
||||
maxiter=1,
|
||||
)
|
||||
|
||||
res = TreatmentEffectResults(self, res_gmm, "IPW",
|
||||
start_params=start_params,
|
||||
effect_group=effect_group,
|
||||
)
|
||||
return res
|
||||
|
||||
@Substitution(params_returns=indent(doc_params_returns, " " * 8))
|
||||
def ra(self, return_results=True, effect_group="all", disp=False):
|
||||
"""
|
||||
Regression Adjustment treatment effect estimation.
|
||||
\n%(params_returns)s
|
||||
See Also
|
||||
--------
|
||||
TreatmentEffectsResults
|
||||
"""
|
||||
# need indicator for reordered observations
|
||||
tind = np.zeros(len(self.treatment))
|
||||
tind[-self.treatment.sum():] = 1
|
||||
if effect_group == "all":
|
||||
probt = None
|
||||
elif effect_group in [1, "treated"]:
|
||||
probt = tind
|
||||
effect_group = 1 # standardize effect_group name
|
||||
elif effect_group in [0, "untreated", "control"]:
|
||||
probt = 1 - tind
|
||||
effect_group = 0 # standardize effect_group name
|
||||
elif isinstance(effect_group, np.ndarray):
|
||||
# TODO: do we keep this?
|
||||
probt = effect_group
|
||||
effect_group = "user" # standardize effect_group name
|
||||
else:
|
||||
raise ValueError("incorrect option for effect_group")
|
||||
|
||||
exog = self.exog_grouped
|
||||
|
||||
# weight or indicator for effect_group
|
||||
if probt is not None:
|
||||
cw = (probt / probt.mean())
|
||||
else:
|
||||
cw = 1
|
||||
|
||||
pom0 = (self.results0.predict(exog) * cw).mean()
|
||||
pom1 = (self.results1.predict(exog) * cw).mean()
|
||||
if not return_results:
|
||||
return pom1 - pom0, pom0, pom1
|
||||
|
||||
endog = self.model_pool.endog
|
||||
mod_gmm = _RAGMM(endog, self.results_select, None, teff=self,
|
||||
probt=probt)
|
||||
start_params = np.concatenate((
|
||||
# ate, tt0.effect,
|
||||
[pom1 - pom0, pom0],
|
||||
self.results0.params,
|
||||
self.results1.params))
|
||||
res_gmm = mod_gmm.fit(start_params=start_params,
|
||||
inv_weights=np.eye(len(start_params)),
|
||||
optim_method='nm',
|
||||
optim_args={"maxiter": 5000, "disp": disp},
|
||||
maxiter=1,
|
||||
)
|
||||
res = TreatmentEffectResults(self, res_gmm, "IPW",
|
||||
start_params=start_params,
|
||||
effect_group=effect_group,
|
||||
)
|
||||
return res
|
||||
|
||||
@Substitution(params_returns=indent(doc_params_returns2, " " * 8))
|
||||
def aipw(self, return_results=True, disp=False):
|
||||
"""
|
||||
ATE and POM from double robust augmented inverse probability weighting
|
||||
\n%(params_returns)s
|
||||
See Also
|
||||
--------
|
||||
TreatmentEffectsResults
|
||||
|
||||
"""
|
||||
|
||||
nobs = self.nobs
|
||||
prob = self.prob_select
|
||||
tind = self.treatment
|
||||
exog = self.model_pool.exog # in original order
|
||||
correct0 = (self.results0.resid / (1 - prob[tind == 0])).sum() / nobs
|
||||
correct1 = (self.results1.resid / (prob[tind == 1])).sum() / nobs
|
||||
tmean0 = self.results0.predict(exog).mean() + correct0
|
||||
tmean1 = self.results1.predict(exog).mean() + correct1
|
||||
ate = tmean1 - tmean0
|
||||
if not return_results:
|
||||
return ate, tmean0, tmean1
|
||||
|
||||
endog = self.model_pool.endog
|
||||
p2_aipw = np.asarray([ate, tmean0])
|
||||
|
||||
mag_aipw1 = _AIPWGMM(endog, self.results_select, None, teff=self)
|
||||
start_params = np.concatenate((
|
||||
p2_aipw,
|
||||
self.results0.params, self.results1.params,
|
||||
self.results_select.params))
|
||||
res_gmm = mag_aipw1.fit(
|
||||
start_params=start_params,
|
||||
inv_weights=np.eye(len(start_params)),
|
||||
optim_method='nm',
|
||||
optim_args={"maxiter": 5000, "disp": disp},
|
||||
maxiter=1)
|
||||
|
||||
res = TreatmentEffectResults(self, res_gmm, "IPW",
|
||||
start_params=start_params,
|
||||
effect_group="all",
|
||||
)
|
||||
return res
|
||||
|
||||
@Substitution(params_returns=indent(doc_params_returns2, " " * 8))
|
||||
def aipw_wls(self, return_results=True, disp=False):
|
||||
"""
|
||||
ATE and POM from double robust augmented inverse probability weighting.
|
||||
|
||||
This uses weighted outcome regression, while `aipw` uses unweighted
|
||||
outcome regression.
|
||||
Option for effect on treated or on untreated is not available.
|
||||
\n%(params_returns)s
|
||||
See Also
|
||||
--------
|
||||
TreatmentEffectsResults
|
||||
|
||||
"""
|
||||
nobs = self.nobs
|
||||
prob = self.prob_select
|
||||
|
||||
endog = self.model_pool.endog
|
||||
exog = self.model_pool.exog
|
||||
tind = self.treatment
|
||||
treat_mask = self.treat_mask
|
||||
|
||||
ww1 = tind / prob * (tind / prob - 1)
|
||||
mod1 = WLS(endog[treat_mask], exog[treat_mask],
|
||||
weights=ww1[treat_mask])
|
||||
result1 = mod1.fit(cov_type='HC1')
|
||||
mean1_ipw2 = result1.predict(exog).mean()
|
||||
|
||||
ww0 = (1 - tind) / (1 - prob) * ((1 - tind) / (1 - prob) - 1)
|
||||
mod0 = WLS(endog[~treat_mask], exog[~treat_mask],
|
||||
weights=ww0[~treat_mask])
|
||||
result0 = mod0.fit(cov_type='HC1')
|
||||
mean0_ipw2 = result0.predict(exog).mean()
|
||||
|
||||
self.results_ipwwls0 = result0
|
||||
self.results_ipwwls1 = result1
|
||||
|
||||
correct0 = (result0.resid / (1 - prob[tind == 0])).sum() / nobs
|
||||
correct1 = (result1.resid / (prob[tind == 1])).sum() / nobs
|
||||
tmean0 = mean0_ipw2 + correct0
|
||||
tmean1 = mean1_ipw2 + correct1
|
||||
ate = tmean1 - tmean0
|
||||
|
||||
if not return_results:
|
||||
return ate, tmean0, tmean1
|
||||
|
||||
p2_aipw_wls = np.asarray([ate, tmean0]).squeeze()
|
||||
|
||||
# GMM
|
||||
mod_gmm = _AIPWWLSGMM(endog, self.results_select, None,
|
||||
teff=self)
|
||||
start_params = np.concatenate((
|
||||
p2_aipw_wls,
|
||||
result0.params,
|
||||
result1.params,
|
||||
self.results_select.params))
|
||||
res_gmm = mod_gmm.fit(
|
||||
start_params=start_params,
|
||||
inv_weights=np.eye(len(start_params)),
|
||||
optim_method='nm',
|
||||
optim_args={"maxiter": 5000, "disp": disp},
|
||||
maxiter=1)
|
||||
res = TreatmentEffectResults(self, res_gmm, "IPW",
|
||||
start_params=start_params,
|
||||
effect_group="all",
|
||||
)
|
||||
return res
|
||||
|
||||
@Substitution(params_returns=indent(doc_params_returns, " " * 8))
|
||||
def ipw_ra(self, return_results=True, effect_group="all", disp=False):
|
||||
"""
|
||||
ATE and POM from inverse probability weighted regression adjustment.
|
||||
|
||||
\n%(params_returns)s
|
||||
See Also
|
||||
--------
|
||||
TreatmentEffectsResults
|
||||
|
||||
"""
|
||||
treat_mask = self.treat_mask
|
||||
endog = self.model_pool.endog
|
||||
exog = self.model_pool.exog
|
||||
prob = self.prob_select
|
||||
|
||||
prob0 = prob[~treat_mask]
|
||||
prob1 = prob[treat_mask]
|
||||
if effect_group == "all":
|
||||
w0 = 1 / (1 - prob0)
|
||||
w1 = 1 / prob1
|
||||
exogt = exog
|
||||
elif effect_group in [1, "treated"]:
|
||||
w0 = prob0 / (1 - prob0)
|
||||
w1 = prob1 / prob1
|
||||
exogt = exog[treat_mask]
|
||||
effect_group = 1 # standardize effect_group name
|
||||
elif effect_group in [0, "untreated", "control"]:
|
||||
w0 = (1 - prob0) / (1 - prob0)
|
||||
w1 = (1 - prob1) / prob1
|
||||
exogt = exog[~treat_mask]
|
||||
effect_group = 0 # standardize effect_group name
|
||||
else:
|
||||
raise ValueError("incorrect option for effect_group")
|
||||
|
||||
mod0 = WLS(endog[~treat_mask], exog[~treat_mask],
|
||||
weights=w0)
|
||||
result0 = mod0.fit(cov_type='HC1')
|
||||
# mean0_ipwra = (result0.predict(exog) * (prob / prob.mean())).mean()
|
||||
mean0_ipwra = result0.predict(exogt).mean()
|
||||
|
||||
mod1 = WLS(endog[treat_mask], exog[treat_mask],
|
||||
weights=w1)
|
||||
result1 = mod1.fit(cov_type='HC1')
|
||||
# mean1_ipwra = (result1.predict(exog) * (prob / prob.mean())).mean()
|
||||
mean1_ipwra = result1.predict(exogt).mean()
|
||||
|
||||
if not return_results:
|
||||
return mean1_ipwra - mean0_ipwra, mean0_ipwra, mean1_ipwra
|
||||
|
||||
# GMM
|
||||
mod_gmm = _IPWRAGMM(endog, self.results_select, None, teff=self,
|
||||
effect_group=effect_group)
|
||||
start_params = np.concatenate((
|
||||
[mean1_ipwra - mean0_ipwra, mean0_ipwra],
|
||||
result0.params,
|
||||
result1.params,
|
||||
np.asarray(self.results_select.params)
|
||||
))
|
||||
res_gmm = mod_gmm.fit(
|
||||
start_params=start_params,
|
||||
inv_weights=np.eye(len(start_params)),
|
||||
optim_method='nm',
|
||||
optim_args={"maxiter": 2000, "disp": disp},
|
||||
maxiter=1
|
||||
)
|
||||
|
||||
res = TreatmentEffectResults(self, res_gmm, "IPW",
|
||||
start_params=start_params,
|
||||
effect_group=effect_group,
|
||||
)
|
||||
return res
|
||||
Reference in New Issue
Block a user