Source code for biogeme.idmanager

"""Combine several arithmetic expressions and a database to obtain formulas

:author: Michel Bierlaire
:date: Sat Jul 30 12:36:40 2022
"""
import inspect
from collections import namedtuple
import biogeme.exceptions as excep
import biogeme.messaging as msg

ElementsTuple = namedtuple('ElementsTuple', 'expressions indices names')

logger = msg.bioMessage()


[docs]class IdManager:
    """Class combining managing the ids of an arithmetic expression."""

[docs]    def __init__(
        self, expressions, database, number_of_draws, force_new_ids=False
    ):
        """Ctor

        :param expressions: list of expressions
        :type expressions: list(biogeme.expressions.Expression)

        :param database: database with the variables as column names
        :type database: biogeme.database.Database

        :param number_of_draws: number of draws for Monte-Carlo integration
        :type number_of_draws: int

        :param force_new_ids: if True, new ids are calculated for all
            expressions, even if they have already an ID. If False,
            and some expressions already have an ID, an error is
            raised.
        :type force_new_ids: bool

        :raises biogemeError: if an expression contains a variable and
            no database is provided.

        """
        self.expressions = expressions
        self.database = database
        self.number_of_draws = number_of_draws
        self.elementary_expressions = None
        self.free_betas = None
        self.free_betas_values = None
        self.number_of_free_betas = 0
        self.fixed_betas = None
        self.fixed_betas_values = None
        self.bounds = None
        self.random_variables = None
        self.draws = None
        self.variables = None
        self.requires_draws = False
        for f in self.expressions:
            the_variables = f.setOfVariables()
            if the_variables and database is None:
                raise excep.biogemeError(
                    f'No database is provided and an expression '
                    f'contains variables: {the_variables}'
                )
            if f.embedExpression('MonteCarlo') or f.embedExpression(
                'bioDraws'
            ):
                self.requires_draws = True

        self.prepare()

    def __str__(self):
        return str(self.elementary_expressions.indices)

    def __repr__(self):
        return str(self.elementary_expressions.indices)

[docs]    def __eq__(self, other):
        return self.elementary_expressions == other.elementary_expressions

[docs]    def audit(self):
        """Performs various checks on the expressions.

        :return: tuple listOfErrors, listOfWarnings
        :rtype: list(string), list(string)
        """
        listOfErrors = []
        listOfWarnings = []
        if self.database.isPanel():
            dict_of_variables = (
                self.expressions.dictOfVariablesOutsidePanelTrajectory()
            )
            if dict_of_variables:
                err_msg = (
                    f'Error in the loglikelihood function. '
                    f'Some variables are not inside '
                    f'PanelLikelihoodTrajectory: '
                    f'{dict_of_variables.keys()} .'
                    f'If the database is organized as panel data, '
                    f'all variables must be used inside a '
                    f'PanelLikelihoodTrajectory. '
                    f'If it is not consistent with your model, '
                    f'generate a flat '
                    f'version of the data using the function '
                    f'`generateFlatPanelDataframe`.'
                )
                listOfErrors.append(err_msg)
        return listOfErrors, listOfWarnings

[docs]    def changeInitValues(self, betas):
        """Modifies the values of the pameters

        :param betas: dictionary where the keys are the names of the
                      parameters, and the values are the new value for
                      the parameters.
        :type betas: dict(string:float)
        """

        def get_value(name):
            v = betas.get(name)
            if v is None:
                return self.free_betas.expressions[name].initValue
            return v

        self.free_betas_values = [get_value(x) for x in self.free_betas.names]

[docs]    def expressions_names_indices(self, dict_of_elements):
        """Assigns consecutive indices to expressions

        :param dict_of_elements: dictionary of expressions. The keys
            are the names.
        :type dict_of_elements: dict(str: biogeme.expressions.Expression)

        :return: a tuple with the original dictionary, the indices,
            and the sorted names.
        :rtype: ElementsTuple
        """
        indices = {}
        names = {}
        names = sorted(dict_of_elements)
        for i, v in enumerate(names):
            indices[v] = i

        return ElementsTuple(
            expressions=dict_of_elements, indices=indices, names=names
        )

[docs]    def prepare(self):
        """Extract from the formulas the literals (parameters,
        variables, random variables) and decide a numbering convention.

        The numbering is done in the following order:

        (i) free betas,
        (ii) fixed betas,
        (iii) random variables for numerical integration,
        (iv) random variables for Monte-Carlo integration,
        (v) variables

        The numbering convention will be performed for all expressions
        together, so that the same elementary expressions in several
        expressions will have the same index.


        """

        # Free parameters (to be estimated), sortedby alphabetical order
        expr = {}
        for f in self.expressions:
            d = f.dictOfBetas(free=True, fixed=False)
            expr = dict(expr, **d)

        self.free_betas = self.expressions_names_indices(expr)

        self.bounds = [
            (
                self.free_betas.expressions[b].lb,
                self.free_betas.expressions[b].ub,
            )
            for b in self.free_betas.names
        ]
        self.number_of_free_betas = len(self.free_betas.names)
        # Fixed parameters (not to be estimated), sorted by alphatical order.
        expr = {}
        for f in self.expressions:
            d = f.dictOfBetas(free=False, fixed=True)
            expr = dict(expr, **d)
        self.fixed_betas = self.expressions_names_indices(expr)

        # Random variables for numerical integration
        expr = {}
        for f in self.expressions:
            d = f.dictOfRandomVariables()
            expr = dict(expr, **d)
        self.random_variables = self.expressions_names_indices(expr)

        # Draws
        expr = {}
        for f in self.expressions:
            d = f.dictOfDraws()
            expr = dict(expr, **d)
        self.draws = self.expressions_names_indices(expr)

        # Variables
        # Here, we do not extract the variables from the
        # formulas. Instead, we use all the variables in the database.
        if self.database is not None:
            variables_names = list(self.database.data.columns.values)
            variables_indices = {}
            for i, v in enumerate(variables_names):
                variables_indices[v] = i
            self.variables = ElementsTuple(
                expressions=None,
                indices=variables_indices,
                names=variables_names,
            )
        else:
            self.variables = ElementsTuple(
                expressions=None, indices=None, names=[]
            )

        # Merge all the names
        elementary_expressions_names = (
            self.free_betas.names
            + self.fixed_betas.names
            + self.random_variables.names
            + self.draws.names
            + self.variables.names
        )

        if len(elementary_expressions_names) != len(
            set(elementary_expressions_names)
        ):
            duplicates = {
                x
                for x in elementary_expressions_names
                if elementary_expressions_names.count(x) > 1
            }
            error_msg = (
                f'The following elementary expressions are defined '
                f'more than once: {duplicates}.'
            )
            raise excep.biogemeError(error_msg)

        elementary_expressions_indices = {
            v: i for i, v in enumerate(elementary_expressions_names)
        }

        self.elementary_expressions = ElementsTuple(
            expressions=None,
            indices=elementary_expressions_indices,
            names=elementary_expressions_names,
        )

        self.free_betas_values = [
            self.free_betas.expressions[x].initValue
            for x in self.free_betas.names
        ]
        self.fixed_betas_values = [
            self.fixed_betas.expressions[x].initValue
            for x in self.fixed_betas.names
        ]

        if self.requires_draws:
            self.database.generateDraws(
                self.draws.expressions, self.draws.names, self.number_of_draws
            )

[docs]    def setDataMap(self, sample):
        """Specify the map of the panel data in the expressions

        :param sample: map of the panel data (see
            :func:`biogeme.database.Database.buildPanelMap`)
        :type sample: pandas.DataFrame
        """
        for f in self.expressions:
            f.cpp.setDataMap(sample)

[docs]    def setData(self, sample):
        """Specify the sample

        :param sample: map of the panel data (see
            :func:`biogeme.database.Database.buildPanelMap`)
        :type sample: pandas.DataFrame

        """
        for f in self.expressions:
            f.cpp.setData(sample)