Biogeme: Python Library  2.5
statistics.py
Go to the documentation of this file.
1 ## \file
2 # Functions calculating statistics on the sample
3 
4 from biogeme import *
5 
6 ## Computes the null log likelihood from the sample and ask Biogeme to include it in the output file.
7 # \ingroup stats
8 # \param availability A <a href="http://docs.python.org/py3k/tutorial/datastructures.html#dictionaries" target="_blank">dictionary</a> mapping each alternative ID with its availability condition.
9 # \param iterator An iterator on the data file.
10 # \return log likelihood of a model where the choice probability for
11 # observation \f$n\f$ is given by is \f$1/J_n\f$, where \f$J_n\f$ is
12 # the number of available alternatives, i.e. \f[ \mathcal{L} = -\sum_n \ln(J_n) \f]
13 def nullLoglikelihood(availability,iterator):
14  terms = {}
15  for i,a in availability.items() :
16  terms[i] = ( a!=0 )
17 
18  total = bioMultSum(terms)
19  nl = -Sum(log(total),iterator)
20  BIOGEME_OBJECT.STATISTICS['Null loglikelihood'] = nl
21  return nl
22 
23 ## Computes the number of times each alternative is chosen in the data set and ask Biogeme to include it in the output file..
24 # \ingroup stats
25 # \param choiceSet list containing the alternatives for which statistics must be computed.
26 # \param choice expression producing the id of the chosen alternative.
27 # \param iterator An iterator on the data file.
28 # \return A <a href="http://docs.python.org/py3k/tutorial/datastructures.html#dictionaries" target="_blank">dictionary</a> n with an entry n[i] for each alternative i containing the number of times it is chosen.
29 # \note Note that availability is ignored here.
30 def choiceStatistics(choiceSet,choice,iterator):
31  n = {}
32  for i in choiceSet:
33  n[i] = Sum(choice == i,iterator)
34  for i in choiceSet:
35  s = 'Alt. %d chosen' % (i)
36  BIOGEME_OBJECT.STATISTICS[s] = n[i]
37  return n
38 
39 ## Computes the number of times each alternative is declared available in the data set and ask Biogeme to include it in the output file..
40 # \ingroup stats
41 # \param availability <a href="http://docs.python.org/py3k/tutorial/datastructures.html#dictionaries" target="_blank">Dictionary</a> containing for each alternative the expression for its availability.
42 # \param iterator An iterator on the data file.
43 # \return A <a href="http://docs.python.org/py3k/tutorial/datastructures.html#dictionaries" target="_blank">dictionary</a> n with an entry n[i] for each alternative i containing the number of times it is available.
44 def availabilityStatistics(availability,iterator):
45  n = {}
46  for i,a in availability.items():
47  n[i] = Sum((a != 0),iterator)
48  for i,a in availability.items():
49  s = 'Alt. %d available' % (i)
50  BIOGEME_OBJECT.STATISTICS[s] = n[i]
51  return n
52 
53 ## Computes the constant loglikelihood from the sample and ask Biogeme to include it in the output file. It assumes that the full choice set is available for each observation.
54 # \ingroup stats
55 # \param choiceSet list containing the alternatives in the choice set.
56 # \param choice expression producing the id of the chosen alternative.
57 # \param iterator An iterator on the data file.
58 # \return log likelihood of a logit model where the only parameters are the alternative specific constants. If \f$n_i\f$ is the number of times alternative \f$i\f$ is chosen, then it is given by \f[ \mathcal{L} = \sum_i n_i \ln(n_i) - n \ln(n) \f] where \f$ n = \sum_i n_i \f$ is the total number of observations.
59 # \note Note that availability is ignored here.
60 def cteLoglikelihood(choiceSet,choice,iterator):
61  n = choiceStatistics(choiceSet,choice,iterator)
62  terms_l = {}
63  terms_tot = {}
64  for i in n:
65  terms_l[i] = n[i] * log(n[i])
66  terms_tot[i] = n[i]
67  total = bioMultSum(terms_tot)
68  l = bioMultSum(terms_l)
69  l -= total * log(total)
70 
71  BIOGEME_OBJECT.STATISTICS['Cte loglikelihood (only for full choice sets)'] = l
72  return l
73 
def choiceStatistics(choiceSet, choice, iterator)
Computes the number of times each alternative is chosen in the data set and ask Biogeme to include it...
Definition: statistics.py:30
def cteLoglikelihood(choiceSet, choice, iterator)
Computes the constant loglikelihood from the sample and ask Biogeme to include it in the output file...
Definition: statistics.py:60
def nullLoglikelihood(availability, iterator)
Computes the null log likelihood from the sample and ask Biogeme to include it in the output file...
Definition: statistics.py:13
def availabilityStatistics(availability, iterator)
Computes the number of times each alternative is declared available in the data set and ask Biogeme t...
Definition: statistics.py:44
Copyright 2016 Michel Bierlaire