In the context of discrete choice modeling, the extraction of potential behavioral insights from large datasets is often limited by the poor scalability of maximum likelihood estimation. This paper proposes a simple and fast dataset-reduction method that is specifically designed to preserve the richness of observations originally present in a dataset, while reducing the computational complexity of the estimation process. Our approach, called LSH-DR, leverages locality-sensitive hashing to create homogeneous clusters, from which representative observations are then sampled and weighted. We demonstrate the efficacy of our approach by applying it on a real-world mode choice dataset: the obtained results show that the samples generated by LSH-DR allow for substantial savings in estimation time while preserving estimation efficiency at little cost.
@Article{OrteLappBier2024,
author = {Nicola Ortelli and Matthieu {de Lapparent} and Michel Bierlaire},
title = {Resampling Estimation of Discrete Choice Models},
journal = {Journal of Choice Modelling},
year = {2024},
volume = {50},
number = {100467},
DOI = {10.1016/j.jocm.2023.100467},
note = {Accepted on Dec 20, 2023}}}