New version

gdalle · Feb 22, 2024 · 1e9495d · 1e9495d
1 parent 6d8e254
commit 1e9495d
Show file tree

Hide file tree

Showing 11 changed files with 95 additions and 673 deletions.
diff --git a/.github/workflows/draft-pdf.yml b/.github/workflows/draft-pdf.yml
@@ -10,7 +10,7 @@ jobs:
     name: Paper Draft
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
       - name: Build draft PDF
         uses: openjournals/openjournals-draft-action@master
         with:

diff --git a/paper/HMM.bib b/paper/HMM.bib
@@ -1,16 +1,3 @@
-@software{antonelloHMMGradientsJlEnables2021,
-  title = {{{HMMGradients}}.Jl: {{Enables}} Computing the Gradient of the Parameters of {{Hidden Markov Models}} ({{HMMs}})},
-  shorttitle = {Idiap/{{HMMGradients}}.Jl},
-  author = {Antonello, Niccolò},
-  date = {2021-06-07},
-  doi = {10.5281/zenodo.4454565},
-  url = {https://doi.org/10.5281/zenodo.4454565},
-  urldate = {2023-09-12},
-  organization = {{Zenodo}},
-  keywords = {hmm},
-  file = {/home/gdalle/Zotero/storage/PEFYSLF7/4906644.html}
-}
-
 @inproceedings{bengioInputOutputHMM1994,
   title = {An {{Input Output HMM Architecture}}},
   booktitle = {Advances in {{Neural Information Processing Systems}}},
@@ -22,7 +9,7 @@ @inproceedings{bengioInputOutputHMM1994
   urldate = {2023-03-12},
   abstract = {We  introduce  a  recurrent  architecture  having  a  modular structure  and we formulate a training procedure based on the EM  algorithm.  The resulting model has similarities to hidden  Markov models, but  supports  recurrent  networks  processing style and allows  to exploit  the supervised  learning paradigm while using maximum likelihood  estimation.},
   keywords = {hmm,thesis},
-  file = {/home/gdalle/Zotero/storage/68UNNYP2/Bengio_Frasconi_1994_An Input Output HMM Architecture.pdf}
+  file = {/home/gdalle/snap/zotero-snap/common/Zotero/storage/68UNNYP2/Bengio_Frasconi_1994_An Input Output HMM Architecture.pdf}
 }
 
 @article{besanconDistributionsJlDefinition2021,
@@ -40,7 +27,7 @@ @article{besanconDistributionsJlDefinition2021
   abstract = {Random variables and their distributions are a central part in many areas of statistical methods. The Distributions.jl package provides Julia users and developers tools for working with probability distributions, leveraging Julia features for their intuitive and flexible manipulation, while remaining highly efficient through zero-cost abstractions.},
   langid = {english},
   keywords = {hmm,thesis},
-  file = {/home/gdalle/Zotero/storage/FZ5V2QNZ/Besancon et al_2021_Distributions.pdf}
+  file = {/home/gdalle/snap/zotero-snap/common/Zotero/storage/FZ5V2QNZ/Besancon et al_2021_Distributions.pdf}
 }
 
 @article{bezansonJuliaFreshApproach2017,
@@ -58,8 +45,8 @@ @article{bezansonJuliaFreshApproach2017
   url = {https://epubs.siam.org/doi/10.1137/141000671},
   urldate = {2022-12-03},
   langid = {english},
-  keywords = {hmm,inferopt,povar,thesis,viva},
-  file = {/home/gdalle/Zotero/storage/YWLISSFK/Bezanson et al_2017_Julia.pdf}
+  keywords = {bootstrap,hmm,inferopt,povar,thesis,viva},
+  file = {/home/gdalle/snap/zotero-snap/common/Zotero/storage/YWLISSFK/Bezanson et al_2017_Julia.pdf}
 }
 
 @book{cappeInferenceHiddenMarkov2005,
@@ -77,7 +64,7 @@ @book{cappeInferenceHiddenMarkov2005
   isbn = {978-0-387-40264-2 978-0-387-28982-3},
   langid = {english},
   keywords = {hmm,povar,thesis},
-  file = {/home/gdalle/Zotero/storage/2HYZE7ZD/Cappé et al_2005_Inference in Hidden Markov Models.pdf;/home/gdalle/Zotero/storage/QRNV9CL8/Cappé et al. - 2006 - Inference in Hidden Markov Models.pdf}
+  file = {/home/gdalle/snap/zotero-snap/common/Zotero/storage/2HYZE7ZD/Cappé et al_2005_Inference in Hidden Markov Models.pdf;/home/gdalle/snap/zotero-snap/common/Zotero/storage/QRNV9CL8/Cappé et al. - 2006 - Inference in Hidden Markov Models.pdf}
 }
 
 @thesis{dalleMachineLearningCombinatorial2022,
@@ -89,26 +76,12 @@ @thesis{dalleMachineLearningCombinatorial2022
   date = {2022-12-16},
   institution = {{École des Ponts ParisTech}},
   url = {https://www.theses.fr/2022ENPC0047},
-  urldate = {2023-03-31},
   abbr = {Dissertation},
   abstract = {This thesis investigates the frontier between machine learning and combinatorial optimization, two active areas of applied mathematics research. We combine theoretical insights with efficient algorithms, and develop several open source Julia libraries. Inspired by a collaboration with the Société nationale des chemins de fer français (SNCF), we study high-impact use cases from the railway world: train failure prediction, delay propagation, and track allocation.In Part I, we provide mathematical background and describe software implementations for various tools that will be needed later on: implicit differentiation, temporal point processes, Hidden Markov Models and Multi-Agent Path Finding. Our publicly-available code fills a void in the Julia package ecosystem, aiming at ease of use without compromising on performance.In Part II, we highlight theoretical contributions related to both statistics and decision-making. We consider a Vector AutoRegressive process with partial observations, and prove matching upper and lower bounds on the estimation error. We unify and extend the state of the art for combinatorial optimization layers in deep learning, gathering various approaches in a Julia library called InferOpt.jl. We also seek to differentiate through multi-objective optimization layers, which leads to a novel theory of lexicographic convex analysis.In Part III, these mathematical and algorithmic foundations come together to tackle railway problems. We design a hierarchical model of train failures, propose a graph-based framework for delay propagation, and suggest new avenues for track allocation, with the Flatland challenge as a testing ground.},
-  bibtex_show = {true},
-  hal = {https://pastel.archives-ouvertes.fr/tel-04053322},
   langid = {english},
-  selected = {true},
-  keywords = {hmm,paper,website},
-  file = {/home/gdalle/Zotero/storage/CEVJMUP4/Dalle - Machine learning and combinatorial optimization al.pdf}
-}
-
-@software{hmmlearnHmmlearnHiddenMarkov2023,
-  title = {Hmmlearn: {{Hidden Markov Models}} in {{Python}}, with Scikit-Learn like {{API}}},
-  author = {{hmmlearn}},
-  date = {2023},
-  url = {https://github.com/hmmlearn/hmmlearn},
-  urldate = {2023-09-12},
-  abstract = {Hidden Markov Models in Python, with scikit-learn like API},
-  organization = {{hmmlearn}},
-  keywords = {hmm}
+  pdf = {https://pastel.archives-ouvertes.fr/tel-04053322},
+  keywords = {cv,hmm,website},
+  file = {/home/gdalle/snap/zotero-snap/common/Zotero/storage/CEVJMUP4/Dalle - Machine learning and combinatorial optimization al.pdf}
 }
 
 @software{mouchetHMMBaseJlHidden2023,
@@ -121,6 +94,16 @@ @software{mouchetHMMBaseJlHidden2023
   keywords = {hmm}
 }
 
+@book{murphyProbabilisticMachineLearning2023,
+  title = {Probabilistic Machine Learning: Advanced Topics},
+  author = {Murphy, Kevin P.},
+  date = {2023},
+  publisher = {{The MIT Press}},
+  url = {probml.ai},
+  keywords = {hmm,todo},
+  file = {/home/gdalle/snap/zotero-snap/common/Zotero/storage/DXSP888K/Murphy - 2023 - Probabilistic machine learning advanced topics.pdf;/home/gdalle/snap/zotero-snap/common/Zotero/storage/XMNWZH35/supp2.pdf}
+}
+
 @unpublished{ondelGPUAcceleratedForwardBackwardAlgorithm2021,
   title = {{{GPU-Accelerated Forward-Backward Algorithm}} with {{Application}} to {{Lattic-Free MMI}}},
   author = {Ondel, Lucas and Lam-Yee-Mui, Léa-Marie and Kocour, Martin and Filippo, Caio and Lukás Burget, Corro},
@@ -129,7 +112,18 @@ @unpublished{ondelGPUAcceleratedForwardBackwardAlgorithm2021
   urldate = {2023-09-12},
   abstract = {We propose to express the forward-backward algorithm in terms of operations between sparse matrices in a specific semiring. This new perspective naturally leads to a GPU-friendly algorithm which is easy to implement in Julia or any programming languages with native support of semiring algebra. We use this new implementation to train a TDNN with the LF-MMI objective function and we compare the training time of our system with PyChain-a recently introduced C++/CUDA implementation of the LF-MMI loss. Our implementation is about two times faster while not having to use any approximation such as the "leaky-HMM".},
   keywords = {hmm},
-  file = {/home/gdalle/Zotero/storage/XRKC5QBG/Ondel et al. - 2021 - GPU-Accelerated Forward-Backward Algorithm with Ap.pdf}
+  file = {/home/gdalle/snap/zotero-snap/common/Zotero/storage/XRKC5QBG/Ondel et al. - 2021 - GPU-Accelerated Forward-Backward Algorithm with Ap.pdf}
+}
+
+@software{ProbmlDynamax2024,
+  title = {Probml/Dynamax},
+  date = {2024-02-22T04:10:59Z},
+  origdate = {2022-04-11T23:42:29Z},
+  url = {https://github.com/probml/dynamax},
+  urldate = {2024-02-22},
+  abstract = {State Space Models library in JAX},
+  organization = {{Probabilistic machine learning}},
+  keywords = {hmm}
 }
 
 @article{qinDirectOptimizationApproach2000,
@@ -148,7 +142,7 @@ @article{qinDirectOptimizationApproach2000
   abstract = {Hidden Markov modeling (HMM) provides an effective approach for modeling single channel kinetics. Standard HMM is based on Baum's reestimation. As applied to single channel currents, the algorithm has the inability to optimize the rate constants directly. We present here an alternative approach by considering the problem as a general optimization problem. The quasi-Newton method is used for searching the likelihood surface. The analytical derivatives of the likelihood function are derived, thereby maximizing the efficiency of the optimization. Because the rate constants are optimized directly, the approach has advantages such as the allowance for model constraints and the ability to simultaneously fit multiple data sets obtained at different experimental conditions. Numerical examples are presented to illustrate the performance of the algorithm. Comparisons with Baum's reestimation suggest that the approach has a superior convergence speed when the likelihood surface is poorly defined due to, for example, a low signal-to-noise ratio or the aggregation of multiple states having identical conductances.},
   langid = {english},
   keywords = {hmm,thesis,viva},
-  file = {/home/gdalle/Zotero/storage/EPDNRHUX/Qin et al. - 2000 - A Direct Optimization Approach to Hidden Markov Mo.pdf;/home/gdalle/Zotero/storage/6C5WNKEU/S0006349500764411.html}
+  file = {/home/gdalle/snap/zotero-snap/common/Zotero/storage/EPDNRHUX/Qin et al. - 2000 - A Direct Optimization Approach to Hidden Markov Mo.pdf;/home/gdalle/snap/zotero-snap/common/Zotero/storage/6C5WNKEU/S0006349500764411.html}
 }
 
 @article{rabinerTutorialHiddenMarkov1989,
@@ -164,7 +158,7 @@ @article{rabinerTutorialHiddenMarkov1989
   abstract = {This tutorial provides an overview of the basic theory of hidden Markov models (HMMs) as originated by L.E. Baum and T. Petrie (1966) and gives practical details on methods of implementation of the theory along with a description of selected applications of the theory to distinct problems in speech recognition. Results from a number of original sources are combined to provide a single source of acquiring the background required to pursue further this area of research. The author first reviews the theory of discrete Markov chains and shows how the concept of hidden states, where the observation is a probabilistic function of the state, can be used effectively. The theory is illustrated with two simple examples, namely coin-tossing, and the classic balls-in-urns system. Three fundamental problems of HMMs are noted and several practical techniques for solving these problems are given. The various types of HMMs that have been studied, including ergodic as well as left-right models, are described.{$<>$}},
   eventtitle = {Proceedings of the {{IEEE}}},
   keywords = {done,hmm,thesis,viva},
-  file = {/home/gdalle/Zotero/storage/A68ILRMJ/Rabiner_1989_A tutorial on hidden Markov models and selected applications in speech.pdf;/home/gdalle/Zotero/storage/BEJEKP4E/Rabiner_1989_A tutorial on hidden Markov models and selected applications in speech.pdf;/home/gdalle/Zotero/storage/5BHQF7ME/18626.html}
+  file = {/home/gdalle/snap/zotero-snap/common/Zotero/storage/A68ILRMJ/Rabiner_1989_A tutorial on hidden Markov models and selected applications in speech.pdf;/home/gdalle/snap/zotero-snap/common/Zotero/storage/BEJEKP4E/Rabiner_1989_A tutorial on hidden Markov models and selected applications in speech.pdf;/home/gdalle/snap/zotero-snap/common/Zotero/storage/5BHQF7ME/18626.html}
 }
 
 @software{rowleyLogarithmicNumbersJlLogarithmic2023,
@@ -177,23 +171,6 @@ @software{rowleyLogarithmicNumbersJlLogarithmic2023
   keywords = {hmm}
 }
 
-@article{schreiberPomegranateFastFlexible2018a,
-  title = {Pomegranate: {{Fast}} and {{Flexible Probabilistic Modeling}} in {{Python}}},
-  shorttitle = {Pomegranate},
-  author = {Schreiber, Jacob},
-  date = {2018},
-  journaltitle = {Journal of Machine Learning Research},
-  volume = {18},
-  number = {164},
-  pages = {1--6},
-  issn = {1533-7928},
-  url = {http://jmlr.org/papers/v18/17-636.html},
-  urldate = {2023-09-12},
-  langid = {english},
-  keywords = {⛔ No DOI found,hmm},
-  file = {/home/gdalle/Zotero/storage/6DQMARYF/Schreiber - 2018 - pomegranate Fast and Flexible Probabilistic Model.pdf}
-}
-
 @software{whiteJuliaDiffChainRulesJl2022,
   title = {{{JuliaDiff}}/{{ChainRules}}{{.jl}}: V1.44.7},
   shorttitle = {{{JuliaDiff}}/{{ChainRules}}.Jl},
@@ -205,5 +182,5 @@ @software{whiteJuliaDiffChainRulesJl2022
   abstract = {ChainRules v1.44.7 Diff since v1.44.6 {$<$}strong{$>$}Closed issues:{$<$}/strong{$>$} cat with Val tuple dims fails (\#678) {$<$}strong{$>$}Merged pull requests:{$<$}/strong{$>$} Fix for ChainRulesCore \#586 (\#675) (@rofinn) fix cat rrule (\#679) (@cossio)},
   organization = {{Zenodo}},
   version = {v1.44.7},
-  keywords = {\#nosource,hmm,inferopt,thesis}
+  keywords = {#nosource,hmm,inferopt,thesis}
 }
diff --git a/paper/images/high_dim_baum_welch_(D=10,T=200,K=50,I=10).svg b/paper/images/high_dim_baum_welch_(D=10,T=200,K=50,I=10).svg
diff --git a/paper/images/high_dim_forward_backward_(D=10,T=200,K=50).svg b/paper/images/high_dim_forward_backward_(D=10,T=200,K=50).svg
diff --git a/paper/images/high_dim_logdensity_(D=10,T=200,K=50).svg b/paper/images/high_dim_logdensity_(D=10,T=200,K=50).svg
diff --git a/paper/images/high_dim_viterbi_(D=10,T=200,K=50).svg b/paper/images/high_dim_viterbi_(D=10,T=200,K=50).svg
diff --git a/paper/images/low_dim_baum_welch_(D=1,T=1000,K=1,I=10).svg b/paper/images/low_dim_baum_welch_(D=1,T=1000,K=1,I=10).svg
diff --git a/paper/images/low_dim_forward_backward_(D=1,T=1000,K=1).svg b/paper/images/low_dim_forward_backward_(D=1,T=1000,K=1).svg
diff --git a/paper/images/low_dim_logdensity_(D=1,T=1000,K=1).svg b/paper/images/low_dim_logdensity_(D=1,T=1000,K=1).svg
diff --git a/paper/images/low_dim_viterbi_(D=1,T=1000,K=1).svg b/paper/images/low_dim_viterbi_(D=1,T=1000,K=1).svg