reconnect moved files to git repo
This commit is contained in:
@ -0,0 +1,371 @@
|
||||
'''conditional logit and nested conditional logit
|
||||
|
||||
nested conditional logit is supposed to be the random utility version
|
||||
(RU2 and maybe RU1)
|
||||
|
||||
References:
|
||||
-----------
|
||||
currently based on:
|
||||
Greene, Econometric Analysis, 5th edition and draft (?)
|
||||
Hess, Florian, 2002, Structural Choice analysis with nested logit models,
|
||||
The Stats Journal 2(3) pp 227-252
|
||||
|
||||
not yet used:
|
||||
Silberhorn Nadja, Yasemin Boztug, Lutz Hildebrandt, 2008, Estimation with the
|
||||
nested logit model: specifications and software particularities,
|
||||
OR Spectrum
|
||||
Koppelman, Frank S., and Chandra Bhat with technical support from Vaneet Sethi,
|
||||
Sriram Subramanian, Vincent Bernardin and Jian Zhang, 2006,
|
||||
A Self Instructing Course in Mode Choice Modeling: Multinomial and
|
||||
Nested Logit Models
|
||||
|
||||
Author: josef-pktd
|
||||
License: BSD (simplified)
|
||||
'''
|
||||
import numpy as np
|
||||
import numpy.lib.recfunctions as recf
|
||||
from scipy import optimize
|
||||
|
||||
|
||||
class TryCLogit:
|
||||
'''
|
||||
Conditional Logit, data handling test
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
endog : array (nobs,nchoices)
|
||||
dummy encoding of realized choices
|
||||
exog_bychoices : list of arrays
|
||||
explanatory variables, one array of exog for each choice. Variables
|
||||
with common coefficients have to be first in each array
|
||||
ncommon : int
|
||||
number of explanatory variables with common coefficients
|
||||
|
||||
Notes
|
||||
-----
|
||||
|
||||
Utility for choice j is given by
|
||||
|
||||
$V_j = X_j * beta + Z * gamma_j$
|
||||
|
||||
where X_j contains generic variables (terminology Hess) that have the same
|
||||
coefficient across choices, and Z are variables, like individual-specific
|
||||
variables that have different coefficients across variables.
|
||||
|
||||
If there are choice specific constants, then they should be contained in Z.
|
||||
For identification, the constant of one choice should be dropped.
|
||||
|
||||
|
||||
'''
|
||||
|
||||
def __init__(self, endog, exog_bychoices, ncommon):
|
||||
self.endog = endog
|
||||
self.exog_bychoices = exog_bychoices
|
||||
self.ncommon = ncommon
|
||||
self.nobs, self.nchoices = endog.shape
|
||||
self.nchoices = len(exog_bychoices)
|
||||
|
||||
#TODO: rename beta to params and include inclusive values for nested CL
|
||||
betaind = [exog_bychoices[ii].shape[1]-ncommon for ii in range(4)]
|
||||
zi = np.r_[[ncommon], ncommon + np.array(betaind).cumsum()]
|
||||
beta_indices = [np.r_[np.array([0, 1]),z[zi[ii]:zi[ii+1]]]
|
||||
for ii in range(len(zi)-1)]
|
||||
self.beta_indices = beta_indices
|
||||
|
||||
#for testing only
|
||||
beta = np.arange(7)
|
||||
betaidx_bychoices = [beta[idx] for idx in beta_indices]
|
||||
|
||||
|
||||
def xbetas(self, params):
|
||||
'''these are the V_i
|
||||
'''
|
||||
|
||||
res = np.empty((self.nobs, self.nchoices))
|
||||
for choiceind in range(self.nchoices):
|
||||
res[:,choiceind] = np.dot(self.exog_bychoices[choiceind],
|
||||
params[self.beta_indices[choiceind]])
|
||||
return res
|
||||
|
||||
def loglike(self, params):
|
||||
#normalization ?
|
||||
xb = self.xbetas(params)
|
||||
expxb = np.exp(xb)
|
||||
sumexpxb = expxb.sum(1)#[:,None]
|
||||
probs = expxb/expxb.sum(1)[:,None] #we do not really need this for all
|
||||
loglike = (self.endog * np.log(probs)).sum(1)
|
||||
#is this the same: YES
|
||||
#self.logliketest = (self.endog * xb).sum(1) - np.log(sumexpxb)
|
||||
#if self.endog where index then xb[self.endog]
|
||||
return -loglike.sum() #return sum for now not for each observation
|
||||
|
||||
def fit(self, start_params=None):
|
||||
if start_params is None:
|
||||
start_params = np.zeros(6) # need better np.zeros(6)
|
||||
return optimize.fmin(self.loglike, start_params, maxfun=10000)
|
||||
|
||||
|
||||
class TryNCLogit:
|
||||
'''
|
||||
Nested Conditional Logit (RUNMNL), data handling test
|
||||
|
||||
unfinished, does not do anything yet
|
||||
|
||||
'''
|
||||
|
||||
def __init__(self, endog, exog_bychoices, ncommon):
|
||||
self.endog = endog
|
||||
self.exog_bychoices = exog_bychoices
|
||||
self.ncommon = ncommon
|
||||
self.nobs, self.nchoices = endog.shape
|
||||
self.nchoices = len(exog_bychoices)
|
||||
|
||||
|
||||
#TODO rename beta to params and include inclusive values for nested CL
|
||||
betaind = [exog_bychoices[ii].shape[1]-ncommon for ii in range(4)]
|
||||
zi = np.r_[[ncommon], ncommon + np.array(betaind).cumsum()]
|
||||
beta_indices = [np.r_[np.array([0, 1]),z[zi[ii]:zi[ii+1]]]
|
||||
for ii in range(len(zi)-1)]
|
||||
self.beta_indices = beta_indices
|
||||
|
||||
#for testing only
|
||||
beta = np.arange(7)
|
||||
betaidx_bychoices = [beta[idx] for idx in beta_indices]
|
||||
|
||||
|
||||
def xbetas(self, params):
|
||||
'''these are the V_i
|
||||
'''
|
||||
|
||||
res = np.empty((self.nobs, self.nchoices))
|
||||
for choiceind in range(self.nchoices):
|
||||
res[:,choiceind] = np.dot(self.exog_bychoices[choiceind],
|
||||
params[self.beta_indices[choiceind]])
|
||||
return res
|
||||
|
||||
def loglike_leafbranch(self, params, tau):
|
||||
#normalization ?
|
||||
#check/change naming for tau
|
||||
xb = self.xbetas(params)
|
||||
expxb = np.exp(xb/tau)
|
||||
sumexpxb = expxb.sum(1)#[:,None]
|
||||
logsumexpxb = np.log(sumexpxb)
|
||||
#loglike = (self.endog * xb).sum(1) - logsumexpxb
|
||||
probs = expxb/sumexpxb[:,None]
|
||||
return probs, logsumexpxp # noqa:F821 See GH#5756
|
||||
#if self.endog where index then xb[self.endog]
|
||||
#return -loglike.sum() #return sum for now not for each observation
|
||||
|
||||
def loglike_branch(self, params, tau):
|
||||
#not yet sure how to keep track of branches during walking of tree
|
||||
ivs = []
|
||||
for b in branches: # noqa:F821 See GH#5756
|
||||
probs, iv = self.loglike_leafbranch(params, tau)
|
||||
ivs.append(iv)
|
||||
|
||||
#ivs = np.array(ivs) #note ivs is (nobs,nbranchchoices)
|
||||
ivs = np.column_stack(ivs) # this way ?
|
||||
exptiv = np.exp(tau*ivs)
|
||||
sumexptiv = exptiv.sum(1)
|
||||
logsumexpxb = np.log(sumexpxb) # noqa:F821 See GH#5756
|
||||
probs = exptiv/sumexptiv[:,None]
|
||||
|
||||
|
||||
####### obsolete version to try out attaching data,
|
||||
####### new in treewalkerclass.py, copy new version to replace this
|
||||
####### problem with bzr I will disconnect history when copying
|
||||
testxb = 0 #global to class
|
||||
class RU2NMNL:
|
||||
'''Nested Multinomial Logit with Random Utility 2 parameterization
|
||||
|
||||
'''
|
||||
|
||||
def __init__(self, endog, exog, tree, paramsind):
|
||||
self.endog = endog
|
||||
self.datadict = exog
|
||||
self.tree = tree
|
||||
self.paramsind = paramsind
|
||||
|
||||
self.branchsum = ''
|
||||
self.probs = {}
|
||||
|
||||
|
||||
def calc_prob(self, tree, keys=None):
|
||||
'''walking a tree bottom-up based on dictionary
|
||||
'''
|
||||
endog = self.endog
|
||||
datadict = self.datadict
|
||||
paramsind = self.paramsind
|
||||
branchsum = self.branchsum
|
||||
|
||||
|
||||
if isinstance(tree, tuple): #assumes leaves are int for choice index
|
||||
name, subtree = tree
|
||||
print(name, datadict[name])
|
||||
print('subtree', subtree)
|
||||
keys = []
|
||||
if testxb:
|
||||
branchsum = datadict[name]
|
||||
else:
|
||||
branchsum = name #0
|
||||
for b in subtree:
|
||||
print(b)
|
||||
#branchsum += branch2(b)
|
||||
branchsum = branchsum + self.calc_prob(b, keys)
|
||||
print('branchsum', branchsum, keys)
|
||||
for k in keys:
|
||||
self.probs[k] = self.probs[k] + ['*' + name + '-prob']
|
||||
|
||||
else:
|
||||
keys.append(tree)
|
||||
self.probs[tree] = [tree + '-prob' +
|
||||
'(%s)' % ', '.join(self.paramsind[tree])]
|
||||
if testxb:
|
||||
leavessum = sum(datadict[bi] for bi in tree)
|
||||
print('final branch with', tree, ''.join(tree), leavessum) #sum(tree)
|
||||
return leavessum #sum(xb[tree])
|
||||
else:
|
||||
return ''.join(tree) #sum(tree)
|
||||
|
||||
print('working on branch', tree, branchsum)
|
||||
return branchsum
|
||||
|
||||
|
||||
|
||||
#Trying out ways to handle data
|
||||
#------------------------------
|
||||
|
||||
#travel data from Greene
|
||||
dta = np.genfromtxt('TableF23-2.txt', skip_header=1,
|
||||
names='Mode Ttme Invc Invt GC Hinc PSize'.split())
|
||||
|
||||
endog = dta['Mode'].reshape(-1,4).copy() #I do not want a view
|
||||
nobs, nchoices = endog.shape
|
||||
datafloat = dta.view(float).reshape(-1,7)
|
||||
exog = datafloat[:,1:].reshape(-1,6*nchoices).copy() #I do not want a view
|
||||
|
||||
print(endog.sum(0))
|
||||
varnames = dta.dtype.names
|
||||
print(varnames[1:])
|
||||
modes = ['Air', 'Train', 'Bus', 'Car']
|
||||
print(exog.mean(0).reshape(nchoices, -1)) # Greene Table 23.23
|
||||
|
||||
|
||||
|
||||
|
||||
#try dummy encoding for individual-specific variables
|
||||
exog_choice_names = ['GC', 'Ttme']
|
||||
exog_choice = np.column_stack([dta[name] for name in exog_choice_names])
|
||||
exog_choice = exog_choice.reshape(-1,len(exog_choice_names)*nchoices)
|
||||
exog_choice = np.c_[endog, exog_choice] # add constant dummy
|
||||
|
||||
exog_individual = dta['Hinc'][:,None]
|
||||
|
||||
#exog2 = np.c_[exog_choice, exog_individual*endog]
|
||||
|
||||
# we can also overwrite and select in original datafloat
|
||||
# e.g. Hinc*endog{choice)
|
||||
|
||||
choice_index = np.arange(dta.shape[0]) % nchoices
|
||||
hinca = dta['Hinc']*(choice_index==0)
|
||||
dta2=recf.append_fields(dta, ['Hinca'],[hinca], usemask=False)
|
||||
|
||||
|
||||
#another version
|
||||
|
||||
xi = []
|
||||
for ii in range(4):
|
||||
xi.append(datafloat[choice_index==ii])
|
||||
|
||||
#one more
|
||||
dta1 = recf.append_fields(dta, ['Const'],[np.ones(dta.shape[0])], usemask=False)
|
||||
|
||||
xivar = [['GC', 'Ttme', 'Const', 'Hinc'],
|
||||
['GC', 'Ttme', 'Const'],
|
||||
['GC', 'Ttme', 'Const'],
|
||||
['GC', 'Ttme']] #need to drop one constant
|
||||
|
||||
xi = []
|
||||
for ii in range(4):
|
||||
xi.append(dta1[xivar[ii]][choice_index==ii])
|
||||
#this does not change sequence of columns, bug report by Skipper I think
|
||||
|
||||
ncommon = 2
|
||||
betaind = [len(xi[ii].dtype.names)-ncommon for ii in range(4)]
|
||||
zi=np.r_[[ncommon], ncommon+np.array(betaind).cumsum()]
|
||||
z=np.arange(7) #what is n?
|
||||
betaindices = [np.r_[np.array([0, 1]),z[zi[ii]:zi[ii+1]]]
|
||||
for ii in range(len(zi)-1)]
|
||||
|
||||
beta = np.arange(7)
|
||||
betai = [beta[idx] for idx in betaindices]
|
||||
|
||||
|
||||
|
||||
|
||||
#examples for TryCLogit
|
||||
#----------------------
|
||||
|
||||
|
||||
#get exogs as float
|
||||
xifloat = [xx.view(float).reshape(nobs,-1) for xx in xi]
|
||||
clogit = TryCLogit(endog, xifloat, 2)
|
||||
|
||||
debug = 0
|
||||
if debug:
|
||||
res = optimize.fmin(clogit.loglike, np.ones(6))
|
||||
#estimated parameters from Greene:
|
||||
tab2324 = [-0.15501, -0.09612, 0.01329, 5.2074, 3.8690, 3.1632]
|
||||
if debug:
|
||||
res2 = optimize.fmin(clogit.loglike, tab2324)
|
||||
|
||||
res3 = optimize.fmin(clogit.loglike, np.zeros(6),maxfun=10000)
|
||||
#this has same numbers as Greene table 23.24, but different sequence
|
||||
#coefficient on GC is exactly 10% of Greene's
|
||||
#TODO: get better starting values
|
||||
'''
|
||||
Optimization terminated successfully.
|
||||
Current function value: 199.128369
|
||||
Iterations: 957
|
||||
Function evaluations: 1456
|
||||
array([-0.0961246 , -0.0155019 , 0.01328757, 5.20741244, 3.86905293,
|
||||
3.16319074])
|
||||
'''
|
||||
res3corr = res3[[1, 0, 2, 3, 4, 5]]
|
||||
res3corr[0] *= 10
|
||||
print(res3corr - tab2324) # diff 1e-5 to 1e-6
|
||||
#199.128369 - 199.1284 #llf same up to print(precision of Greene
|
||||
|
||||
print(clogit.fit())
|
||||
|
||||
|
||||
tree0 = ('top',
|
||||
[('Fly',['Air']),
|
||||
('Ground', ['Train', 'Car', 'Bus'])
|
||||
])
|
||||
|
||||
datadict = dict(zip(['Air', 'Train', 'Bus', 'Car'],
|
||||
[xifloat[i]for i in range(4)]))
|
||||
|
||||
#for testing only (mock that returns it's own name
|
||||
datadict = dict(zip(['Air', 'Train', 'Bus', 'Car'],
|
||||
['Airdata', 'Traindata', 'Busdata', 'Cardata']))
|
||||
|
||||
datadict.update({'top' : [],
|
||||
'Fly' : [],
|
||||
'Ground': []})
|
||||
|
||||
paramsind = {'top' : [],
|
||||
'Fly' : [],
|
||||
'Ground': [],
|
||||
'Air' : ['GC', 'Ttme', 'ConstA', 'Hinc'],
|
||||
'Train' : ['GC', 'Ttme', 'ConstT'],
|
||||
'Bus' : ['GC', 'Ttme', 'ConstB'],
|
||||
'Car' : ['GC', 'Ttme']
|
||||
}
|
||||
|
||||
modru = RU2NMNL(endog, datadict, tree0, paramsind)
|
||||
print(modru.calc_prob(modru.tree))
|
||||
print('\nmodru.probs')
|
||||
print(modru.probs)
|
||||
Reference in New Issue
Block a user