From cd105a7bef6d1c52640bb977188693ee09aa2005 Mon Sep 17 00:00:00 2001 From: MaksimEkin Date: Fri, 26 Apr 2024 18:37:16 -0600 Subject: [PATCH 01/11] update develop branch version --- CITATION.cff | 2 +- README.md | 2 +- TELF/version.py | 2 +- docs/source/conf.py | 2 +- docs/source/index.rst | 2 +- setup.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 85c3150..0a6d6af 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -20,7 +20,7 @@ authors: - family-names: Alexandrov given-names: Boian title: "Tensor Extraction of Latent Features (T-ELF)" -version: 0.0.17 +version: 0.0.18 url: https://github.com/lanl/T-ELF doi: 10.5281/zenodo.10257897 date-released: 2023-12-04 diff --git a/README.md b/README.md index 36788fe..967f243 100644 --- a/README.md +++ b/README.md @@ -153,7 +153,7 @@ If you use T-ELF please cite. **APA:** ```latex -Eren, M., Solovyev, N., Barron, R., Bhattarai, M., Truong, D., Boureima, I., Skau, E., Rasmussen, K., & Alexandrov, B. (2023). Tensor Extraction of Latent Features (T-ELF) (Version 0.0.17) [Computer software]. https://doi.org/10.5281/zenodo.10257897 +Eren, M., Solovyev, N., Barron, R., Bhattarai, M., Truong, D., Boureima, I., Skau, E., Rasmussen, K., & Alexandrov, B. (2023). Tensor Extraction of Latent Features (T-ELF) (Version 0.0.18) [Computer software]. https://doi.org/10.5281/zenodo.10257897 ``` **BibTeX:** diff --git a/TELF/version.py b/TELF/version.py index 39d352f..1ac739d 100644 --- a/TELF/version.py +++ b/TELF/version.py @@ -1 +1 @@ -__version__ = '0.0.17' \ No newline at end of file +__version__ = '0.0.18' \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 93bc28a..dcc2c78 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -9,7 +9,7 @@ project = 'TELF' copyright = '2022, LANL' author = 'Maksim E. Eren, Nicholas Solovyev, Ryan Barron, Manish Bhattarai, Ismael Boureima, Erik Skau, Kim Rasmussen, Boian S. Alexandrov' -release = '0.0.17' +release = '0.0.18' # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/docs/source/index.rst b/docs/source/index.rst index 4424a61..1740c3f 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -104,7 +104,7 @@ How to Cite T-ELF? .. code-block:: console - Eren, M., Solovyev, N., Barron, R., Bhattarai, M., Truong, D., Boureima, I., Skau, E., Rasmussen, K., & Alexandrov, B. (2023). Tensor Extraction of Latent Features (T-ELF) (Version 0.0.17) [Computer software]. https://doi.org/10.5281/zenodo.10257897 + Eren, M., Solovyev, N., Barron, R., Bhattarai, M., Truong, D., Boureima, I., Skau, E., Rasmussen, K., & Alexandrov, B. (2023). Tensor Extraction of Latent Features (T-ELF) (Version 0.0.18) [Computer software]. https://doi.org/10.5281/zenodo.10257897 **BibTeX:** diff --git a/setup.py b/setup.py index eebe4e0..aaebd0e 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ from setuptools import setup, find_packages from glob import glob -__version__ = "0.0.17" +__version__ = "0.0.18" # add readme with open('README.md', 'r') as f: From e14817830554e120327f935c11acf75e366adffc Mon Sep 17 00:00:00 2001 From: MaksimEkin Date: Mon, 29 Apr 2024 13:55:08 -0600 Subject: [PATCH 02/11] fix bug on H_sill_thresh --- TELF/factorization/NMFk.py | 6 +++--- examples/NMFk/NMFk_wtsi.ipynb | 25 ++++++++++--------------- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/TELF/factorization/NMFk.py b/TELF/factorization/NMFk.py index 5ff4c1b..43285ea 100644 --- a/TELF/factorization/NMFk.py +++ b/TELF/factorization/NMFk.py @@ -359,7 +359,7 @@ def _nmf_parallel_wrapper( with K_search_settings['lock']: if min(sils_min_W, sils_min_H) >= K_search_settings["sill_thresh"]: K_search_settings['k_min'] = k - if K_search_settings["H_sill_thresh"] >= 0 and (sils_min_H <= K_search_settings["H_sill_thresh"]): + if K_search_settings["H_sill_thresh"] is not None and (sils_min_H <= K_search_settings["H_sill_thresh"]): K_search_settings['k_max'] = k if n_nodes > 1: @@ -507,7 +507,7 @@ def __init__( get_plot_data=False, simple_plot=True, k_search_method="linear", - H_sill_thresh=-1 + H_sill_thresh=None ): """ NMFk is a Non-negative Matrix Factorization module with the capability to do automatic model determination. @@ -628,7 +628,7 @@ def __init__( Setting for removing higher ranks from the search space.\n When searching for the optimal rank with binary search using ``k_search='bst_post'`` or ``k_search='bst_pre'``, this hyper-parameter can be used to cut off higher ranks from search space.\n The cut-off of higher ranks from the search space is based on threshold for H silhouette. When a H silhouette below ``H_sill_thresh`` is found for a given rank or K, all higher ranks are removed from the search space.\n - If ``H_sill_thresh=-1``, it is not used. The default is -1. + If ``H_sill_thresh=None``, it is not used. The default is None. Returns ------- None. diff --git a/examples/NMFk/NMFk_wtsi.ipynb b/examples/NMFk/NMFk_wtsi.ipynb index 09548d3..3479979 100644 --- a/examples/NMFk/NMFk_wtsi.ipynb +++ b/examples/NMFk/NMFk_wtsi.ipynb @@ -159,9 +159,9 @@ "sill_thresh: 0.9\n", "predict_k: True\n", "predict_k_method: sill\n", - "n_jobs: 2\n", + "n_jobs: 12\n", "n_nodes: 1\n", - "nmf: \n", + "nmf: \n", "nmf_method: nmf_kl_mu\n", "nmf_obj_params: {}\n", "pruned: True\n", @@ -175,8 +175,8 @@ "perturb_multiprocessing: False\n", "k_search_method: bst_post\n", "H_sill_thresh: 0.1\n", - "lock: \n", - "K_search_settings: {'lock': , 'k_search_method': 'bst_post', 'sill_thresh': 0.9, 'H_sill_thresh': 0.1, 'k_min': -1, 'k_max': inf}\n", + "lock: \n", + "K_search_settings: {'lock': , 'k_search_method': 'bst_post', 'sill_thresh': 0.9, 'H_sill_thresh': 0.1, 'k_min': -1, 'k_max': inf}\n", "total_exec_seconds: 0\n", "experiment_name: \n", "nmf_params: {'niter': 1000, 'use_gpu': False, 'nmf_verbose': False, 'mask': None, 'use_consensus_stopping': False}\n" @@ -186,7 +186,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/maksim/Desktop/Code/T-ELF_public/TELF/factorization/NMFk.py:697: UserWarning: calculate_error is True! Error calculation can make the runtime longer and take up more memory space!\n", + "/Users/maksim/Desktop/Code/T-ELF_public/TELF/factorization/NMFk.py:700: UserWarning: calculate_error is True! Error calculation can make the runtime longer and take up more memory space!\n", " warnings.warn(\n" ] } @@ -231,20 +231,15 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/maksim/Desktop/Code/T-ELF_public/TELF/factorization/NMFk.py:852: UserWarning: X is data type float64. Whic is not float32. Higher precision will result in significantly longer runtime!\n", + "/Users/maksim/Desktop/Code/T-ELF_public/TELF/factorization/NMFk.py:855: UserWarning: X is data type float64. Whic is not float32. Higher precision will result in significantly longer runtime!\n", " warnings.warn(\n", - " 0%| | 0/20 [00:00 Date: Mon, 29 Apr 2024 14:24:47 -0600 Subject: [PATCH 03/11] add option to use either W sill for k prediction, H sill for k prediction, or both --- TELF/factorization/NMFk.py | 58 +++++++++++++++++++++++++---------- examples/NMFk/NMFk_wtsi.ipynb | 35 +++++++++------------ 2 files changed, 56 insertions(+), 37 deletions(-) diff --git a/TELF/factorization/NMFk.py b/TELF/factorization/NMFk.py index 43285ea..d259066 100644 --- a/TELF/factorization/NMFk.py +++ b/TELF/factorization/NMFk.py @@ -174,7 +174,7 @@ def _nmf_parallel_wrapper( mask=None, consensus_mat=False, predict_k=False, - predict_k_method="sill", + predict_k_method="WH_sill", pruned=True, perturb_rows=None, perturb_cols=None, @@ -357,7 +357,19 @@ def _nmf_parallel_wrapper( # if K_search_settings["k_search_method"] != "linear": with K_search_settings['lock']: - if min(sils_min_W, sils_min_H) >= K_search_settings["sill_thresh"]: + + if predict_k_method in ["WH_sill", "sill"]: + curr_score = min(sils_min_W, sils_min_H) + elif predict_k_method == "W_sill": + curr_score = sils_min_W + elif predict_k_method == "H_sill": + curr_score = sils_min_H + elif predict_k_method == "pvalue": + curr_score = sils_min_W + else: + raise Exception("Unknown predict_k_method!") + + if curr_score >= K_search_settings["sill_thresh"]: K_search_settings['k_min'] = k if K_search_settings["H_sill_thresh"] is not None and (sils_min_H <= K_search_settings["H_sill_thresh"]): K_search_settings['k_max'] = k @@ -488,7 +500,7 @@ def __init__( save_output=True, collect_output=False, predict_k=False, - predict_k_method="sill", + predict_k_method="WH_sill", verbose=True, nmf_verbose=False, perturb_verbose=False, @@ -549,13 +561,15 @@ def __init__( Even when ``predict_k=False``, number of latent factors can be estimated using the figures saved in ``save_path``. predict_k_method : str, optional - Method to use when performing automatic k prediction. Default is "sill".\n + Method to use when performing automatic k prediction. Default is "WH_sill".\n * ``predict_k_method='pvalue'`` will use L-Statistics with column-wise error for automatically estimating the number of latent factors.\n - * ``predict_k_method='sill'`` will use Silhouette score for estimating the number of latent factors. - + * ``predict_k_method='WH_sill'`` will use Silhouette scores from minimum of W and H latent factors for estimating the number of latent factors. + * ``predict_k_method='W_sill'`` will use Silhouette scores from W latent factor for estimating the number of latent factors. + * ``predict_k_method='H_sill'`` will use Silhouette scores from H latent factor for estimating the number of latent factors. + * ``predict_k_method='sill'`` will default to `predict_k_method='WH_sill'``. .. warning:: - ``predict_k_method='pvalue'`` prediction will result in significantly longer processing time, altough it is more accurate! ``predict_k_method='sill'``, on the other hand, will be much faster. + ``predict_k_method='pvalue'`` prediction will result in significantly longer processing time, altough it is more accurate! ``predict_k_method='WH_sill'``, on the other hand, will be much faster. verbose : bool, optional If True, shows progress in each k. The default is True. @@ -622,8 +636,8 @@ def __init__( k_search_method : str, optional Which approach to use when searching for the rank or k. The default is "linear".\n * ``k_search_method='linear'`` will linearly visit each K given in ``Ks`` hyper-parameter of the ``fit()`` function.\n - * ``k_search_method='bst_post'`` will perform post-order binary search. When an ideal rank is found with ``min(W silhouette, H silhouette) >= sill_thresh``, all lower ranks are pruned from the search space. - * ``k_search_method='bst_pre'`` will perform pre-order binary search. When an ideal rank is found with ``min(W silhouette, H silhouette) >= sill_thresh``, all lower ranks are pruned from the search space. + * ``k_search_method='bst_post'`` will perform post-order binary search. When an ideal rank is found, determined by the selected ``predict_k_method``, all lower ranks are pruned from the search space. + * ``k_search_method='bst_pre'`` will perform pre-order binary search. When an ideal rank is found, determined by the selected ``predict_k_method``, all lower ranks are pruned from the search space. H_sill_thresh : float, optional Setting for removing higher ranks from the search space.\n When searching for the optimal rank with binary search using ``k_search='bst_post'`` or ``k_search='bst_pre'``, this hyper-parameter can be used to cut off higher ranks from search space.\n @@ -680,7 +694,7 @@ def __init__( # warnings assert self.k_search_method in ["linear", "bst_pre", "bst_post"], "Invalid k_search_method method. Choose from linear, bst_pre, or bst_post." - assert self.predict_k_method in ["pvalue", "sill"], "Invalid predict_k_method method. Choose from pvalue, sill." + assert self.predict_k_method in ["pvalue", "WH_sill", "W_sill", "H_sill", "sill"], "Invalid predict_k_method method. Choose from pvalue, WH_sill, W_sill, H_sill, or sill. sill defaults to WH_sill." if self.calculate_pac and not self.consensus_mat: self.consensus_mat = True warnings.warn("consensus_mat was False when calculate_pac was True! consensus_mat changed to True.") @@ -1136,18 +1150,30 @@ def fit(self, X, Ks, name="NMFk", note=""): combined_result["col_err"], Ks, combined_result["sils_min_W"], SILL_thr=self.sill_thresh )[0] - elif self.predict_k_method == "sill": - + else: + if self.predict_k_method in ["WH_sill", "sill"]: + curr_sill_max_score = min([max(combined_result["sils_min_W"]), max(combined_result["sils_min_H"])]) + elif self.predict_k_method == "W_sill": + curr_sill_max_score = max(combined_result["sils_min_W"]) + elif self.predict_k_method == "H_sill": + curr_sill_max_score = max(combined_result["sils_min_H"]) + # check if that sill threshold exist - if self.sill_thresh > min([max(combined_result["sils_min_W"]), max(combined_result["sils_min_H"])]): - self.sill_thresh = min([max(combined_result["sils_min_W"]), max(combined_result["sils_min_H"])]) + if self.sill_thresh > curr_sill_max_score: + self.sill_thresh = curr_sill_max_score warnings.warn(f'W or H Silhouettes were all less than sill_thresh. Setting sill_thresh to minimum for K prediction. sill_thresh={round(self.sill_thresh, 3)}') - + k_predict_W = Ks[np.max(np.argwhere( np.array(combined_result["sils_min_W"]) >= self.sill_thresh).flatten())] k_predict_H = Ks[np.max(np.argwhere( np.array(combined_result["sils_min_H"]) >= self.sill_thresh).flatten())] - k_predict = min(k_predict_W, k_predict_H) + + if self.predict_k_method in ["WH_sill", "sill"]: + k_predict = min(k_predict_W, k_predict_H) + elif self.predict_k_method == "W_sill": + k_predict = k_predict_W + elif self.predict_k_method == "H_sill": + k_predict = k_predict_H else: k_predict = 0 diff --git a/examples/NMFk/NMFk_wtsi.ipynb b/examples/NMFk/NMFk_wtsi.ipynb index 3479979..136999d 100644 --- a/examples/NMFk/NMFk_wtsi.ipynb +++ b/examples/NMFk/NMFk_wtsi.ipynb @@ -74,7 +74,7 @@ "\u001b[0m\u001b[33mDEPRECATION: Loading egg at /Users/maksim/miniconda3/envs/TELF_public/lib/python3.11/site-packages/spacy-3.7.2-py3.11-macosx-11.1-arm64.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation.. Discussion can be found at https://github.com/pypa/pip/issues/12330\u001b[0m\u001b[33m\n", "\u001b[0m\u001b[33mDEPRECATION: Loading egg at /Users/maksim/miniconda3/envs/TELF_public/lib/python3.11/site-packages/thinc-8.2.2-py3.11-macosx-11.1-arm64.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation.. Discussion can be found at https://github.com/pypa/pip/issues/12330\u001b[0m\u001b[33m\n", "\u001b[0m\u001b[33mDEPRECATION: Loading egg at /Users/maksim/miniconda3/envs/TELF_public/lib/python3.11/site-packages/MarkupSafe-2.1.5-py3.11-macosx-11.1-arm64.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation.. Discussion can be found at https://github.com/pypa/pip/issues/12330\u001b[0m\u001b[33m\n", - "\u001b[0mRequirement already satisfied: mat73 in /Users/maksim/Desktop/Code/T-ELF_public/.eggs/mat73-0.62-py3.11.egg (0.62)\n", + "\u001b[0mRequirement already satisfied: mat73 in /Users/maksim/miniconda3/envs/TELF_public/lib/python3.11/site-packages (0.63)\n", "Requirement already satisfied: h5py in /Users/maksim/miniconda3/envs/TELF_public/lib/python3.11/site-packages/h5py-3.10.0-py3.11-macosx-11.1-arm64.egg (from mat73) (3.10.0)\n", "Requirement already satisfied: numpy in /Users/maksim/miniconda3/envs/TELF_public/lib/python3.11/site-packages/numpy-1.26.2-py3.11-macosx-11.1-arm64.egg (from mat73) (1.26.2)\n" ] @@ -135,7 +135,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 9, "id": "eb6cea29", "metadata": {}, "outputs": [ @@ -158,10 +158,10 @@ "collect_output: True\n", "sill_thresh: 0.9\n", "predict_k: True\n", - "predict_k_method: sill\n", - "n_jobs: 12\n", + "predict_k_method: pvalue\n", + "n_jobs: 1\n", "n_nodes: 1\n", - "nmf: \n", + "nmf: \n", "nmf_method: nmf_kl_mu\n", "nmf_obj_params: {}\n", "pruned: True\n", @@ -175,8 +175,8 @@ "perturb_multiprocessing: False\n", "k_search_method: bst_post\n", "H_sill_thresh: 0.1\n", - "lock: \n", - "K_search_settings: {'lock': , 'k_search_method': 'bst_post', 'sill_thresh': 0.9, 'H_sill_thresh': 0.1, 'k_min': -1, 'k_max': inf}\n", + "lock: \n", + "K_search_settings: {'lock': , 'k_search_method': 'bst_post', 'sill_thresh': 0.9, 'H_sill_thresh': 0.1, 'k_min': -1, 'k_max': inf}\n", "total_exec_seconds: 0\n", "experiment_name: \n", "nmf_params: {'niter': 1000, 'use_gpu': False, 'nmf_verbose': False, 'mask': None, 'use_consensus_stopping': False}\n" @@ -186,7 +186,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/maksim/Desktop/Code/T-ELF_public/TELF/factorization/NMFk.py:700: UserWarning: calculate_error is True! Error calculation can make the runtime longer and take up more memory space!\n", + "/Users/maksim/Desktop/Code/T-ELF_public/TELF/factorization/NMFk.py:718: UserWarning: predict_k is True with pvalue method! Predicting k can make the runtime significantly longer. Consider using predict_k_method='sill'.\n", " warnings.warn(\n" ] } @@ -197,7 +197,7 @@ " \"n_perturbs\":36,\n", " \"n_iters\":1000,\n", " \"epsilon\":0.015,\n", - " \"n_jobs\":-1,\n", + " \"n_jobs\":1,\n", " \"init\":\"nnsvd\", \n", " \"use_gpu\":False,\n", " \"save_path\":\"../../results/\",\n", @@ -207,6 +207,7 @@ " \"verbose\":True,\n", " \"transpose\":False,\n", " \"sill_thresh\":0.9,\n", + " \"predict_k_method\":\"WH_sill\",\n", " \"nmf_verbose\":False,\n", " \"nmf_method\":'nmf_kl_mu',\n", " 'pruned':True,\n", @@ -218,12 +219,12 @@ "}\n", "model = NMFk(**params)\n", "\n", - "Ks = range(1,21,1)" + "Ks = range(1,11,1)" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 10, "id": "104c2753", "metadata": {}, "outputs": [ @@ -231,15 +232,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/maksim/Desktop/Code/T-ELF_public/TELF/factorization/NMFk.py:855: UserWarning: X is data type float64. Whic is not float32. Higher precision will result in significantly longer runtime!\n", - " warnings.warn(\n", - " 0%| | 0/20 [00:00 Date: Mon, 29 Apr 2024 14:27:08 -0600 Subject: [PATCH 04/11] add option to use either W sill for k prediction, H sill for k prediction, or both --- TELF/factorization/NMFk.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/TELF/factorization/NMFk.py b/TELF/factorization/NMFk.py index d259066..0436dda 100644 --- a/TELF/factorization/NMFk.py +++ b/TELF/factorization/NMFk.py @@ -563,10 +563,10 @@ def __init__( predict_k_method : str, optional Method to use when performing automatic k prediction. Default is "WH_sill".\n * ``predict_k_method='pvalue'`` will use L-Statistics with column-wise error for automatically estimating the number of latent factors.\n - * ``predict_k_method='WH_sill'`` will use Silhouette scores from minimum of W and H latent factors for estimating the number of latent factors. - * ``predict_k_method='W_sill'`` will use Silhouette scores from W latent factor for estimating the number of latent factors. - * ``predict_k_method='H_sill'`` will use Silhouette scores from H latent factor for estimating the number of latent factors. - * ``predict_k_method='sill'`` will default to `predict_k_method='WH_sill'``. + * ``predict_k_method='WH_sill'`` will use Silhouette scores from minimum of W and H latent factors for estimating the number of latent factors.\n + * ``predict_k_method='W_sill'`` will use Silhouette scores from W latent factor for estimating the number of latent factors.\n + * ``predict_k_method='H_sill'`` will use Silhouette scores from H latent factor for estimating the number of latent factors.\n + * ``predict_k_method='sill'`` will default to `predict_k_method='WH_sill'``.\n .. warning:: ``predict_k_method='pvalue'`` prediction will result in significantly longer processing time, altough it is more accurate! ``predict_k_method='WH_sill'``, on the other hand, will be much faster. From 5590dee5949c118082b0ebb4c8d369fe3e890e6c Mon Sep 17 00:00:00 2001 From: MaksimEkin Date: Mon, 29 Apr 2024 14:29:37 -0600 Subject: [PATCH 05/11] add option to use either W sill for k prediction, H sill for k prediction, or both --- TELF/factorization/NMFk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TELF/factorization/NMFk.py b/TELF/factorization/NMFk.py index 0436dda..aef556e 100644 --- a/TELF/factorization/NMFk.py +++ b/TELF/factorization/NMFk.py @@ -566,7 +566,7 @@ def __init__( * ``predict_k_method='WH_sill'`` will use Silhouette scores from minimum of W and H latent factors for estimating the number of latent factors.\n * ``predict_k_method='W_sill'`` will use Silhouette scores from W latent factor for estimating the number of latent factors.\n * ``predict_k_method='H_sill'`` will use Silhouette scores from H latent factor for estimating the number of latent factors.\n - * ``predict_k_method='sill'`` will default to `predict_k_method='WH_sill'``.\n + * ``predict_k_method='sill'`` will default to ``predict_k_method='WH_sill'``.\n .. warning:: ``predict_k_method='pvalue'`` prediction will result in significantly longer processing time, altough it is more accurate! ``predict_k_method='WH_sill'``, on the other hand, will be much faster. From 07b1c3e2fc41c2c056ba961cfc1fe77180ca2440 Mon Sep 17 00:00:00 2001 From: MaksimEkin Date: Mon, 29 Apr 2024 14:29:56 -0600 Subject: [PATCH 06/11] add option to use either W sill for k prediction, H sill for k prediction, or both --- TELF/factorization/NMFk.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/TELF/factorization/NMFk.py b/TELF/factorization/NMFk.py index aef556e..1c9ac41 100644 --- a/TELF/factorization/NMFk.py +++ b/TELF/factorization/NMFk.py @@ -566,7 +566,8 @@ def __init__( * ``predict_k_method='WH_sill'`` will use Silhouette scores from minimum of W and H latent factors for estimating the number of latent factors.\n * ``predict_k_method='W_sill'`` will use Silhouette scores from W latent factor for estimating the number of latent factors.\n * ``predict_k_method='H_sill'`` will use Silhouette scores from H latent factor for estimating the number of latent factors.\n - * ``predict_k_method='sill'`` will default to ``predict_k_method='WH_sill'``.\n + * ``predict_k_method='sill'`` will default to ``predict_k_method='WH_sill'``. + .. warning:: ``predict_k_method='pvalue'`` prediction will result in significantly longer processing time, altough it is more accurate! ``predict_k_method='WH_sill'``, on the other hand, will be much faster. From 53923fe42a1ed8f92b97b8e2d907336ab8a02397 Mon Sep 17 00:00:00 2001 From: MaksimEkin Date: Mon, 29 Apr 2024 14:36:20 -0600 Subject: [PATCH 07/11] update documentation --- docs/Beaver.html | 6 +- docs/Cheetah.html | 6 +- docs/HNMFk.html | 6 +- docs/NMFk.html | 23 +++--- docs/RESCALk.html | 6 +- docs/SymNMFk.html | 6 +- docs/TELF.factorization.decompositions.html | 6 +- ...actorization.decompositions.utilities.html | 6 +- docs/TELF.factorization.html | 23 +++--- docs/TELF.factorization.utilities.html | 6 +- docs/TELF.html | 6 +- docs/TELF.pre_processing.Beaver.html | 6 +- docs/TELF.pre_processing.Vulture.html | 8 +- ...re_processing.Vulture.tokens_analysis.html | 6 +- docs/TELF.pre_processing.html | 6 +- docs/TriNMFk.html | 6 +- docs/Vulture.html | 8 +- .../TELF/applications/Cheetah/cheetah.html | 6 +- docs/_modules/TELF/factorization/HNMFk.html | 6 +- docs/_modules/TELF/factorization/NMFk.html | 73 ++++++++++++------ docs/_modules/TELF/factorization/RESCALk.html | 6 +- docs/_modules/TELF/factorization/SymNMFk.html | 6 +- docs/_modules/TELF/factorization/TriNMFk.html | 6 +- .../decompositions/nmf_fro_admm.html | 6 +- .../decompositions/nmf_fro_mu.html | 6 +- .../decompositions/nmf_kl_admm.html | 6 +- .../decompositions/nmf_kl_mu.html | 6 +- .../decompositions/nmf_mc_fro_mu.html | 6 +- .../decompositions/rescal_fro_mu.html | 6 +- .../decompositions/tri_nmf_fro_mu.html | 6 +- .../utilities/bool_clustering.html | 6 +- .../decompositions/utilities/bool_noise.html | 6 +- .../decompositions/utilities/clustering.html | 6 +- .../utilities/concensus_matrix.html | 6 +- .../utilities/data_reshaping.html | 6 +- .../utilities/generic_utils.html | 6 +- .../decompositions/utilities/math_utils.html | 6 +- .../decompositions/utilities/nnsvd.html | 6 +- .../decompositions/utilities/resample.html | 6 +- .../decompositions/utilities/silhouettes.html | 6 +- .../factorization/utilities/clustering.html | 6 +- .../utilities/co_occurance_matrix.html | 6 +- .../utilities/organize_n_jobs.html | 6 +- .../factorization/utilities/plot_NMFk.html | 6 +- .../utilities/pvalue_analysis.html | 6 +- .../factorization/utilities/sppmi_matrix.html | 6 +- .../factorization/utilities/take_note.html | 6 +- .../factorization/utilities/vectorize.html | 6 +- .../TELF/pre_processing/Beaver/beaver.html | 6 +- .../pre_processing/Beaver/cooccurrence.html | 6 +- .../TELF/pre_processing/Beaver/sppmi.html | 6 +- .../TELF/pre_processing/Beaver/tenmat.html | 6 +- .../TELF/pre_processing/Beaver/vectorize.html | 6 +- .../Vulture/tokens_analysis/top_words.html | 6 +- .../TELF/pre_processing/Vulture/vulture.html | 6 +- docs/_modules/index.html | 6 +- docs/_sources/index.rst | 2 +- docs/_static/documentation_options.js | 2 +- docs/doctrees/NMFk.doctree | Bin 97971 -> 100344 bytes docs/doctrees/TELF.factorization.doctree | Bin 194937 -> 197343 bytes .../TELF.pre_processing.Vulture.doctree | Bin 44272 -> 44272 bytes docs/doctrees/Vulture.doctree | Bin 52684 -> 52684 bytes docs/doctrees/environment.pickle | Bin 3118448 -> 3126480 bytes docs/doctrees/index.doctree | Bin 35064 -> 35064 bytes docs/genindex.html | 6 +- docs/index.html | 8 +- docs/modules.html | 6 +- docs/py-modindex.html | 6 +- docs/search.html | 6 +- docs/searchindex.js | 2 +- 70 files changed, 256 insertions(+), 223 deletions(-) diff --git a/docs/Beaver.html b/docs/Beaver.html index 3d69c51..03f8bd4 100644 --- a/docs/Beaver.html +++ b/docs/Beaver.html @@ -8,7 +8,7 @@ - TELF.pre_processing.Beaver: Fast matrix and tensor building tool — TELF 0.0.17 documentation + TELF.pre_processing.Beaver: Fast matrix and tensor building tool — TELF 0.0.18 documentation @@ -37,7 +37,7 @@ - + @@ -127,7 +127,7 @@ -

TELF 0.0.17 documentation

+

TELF 0.0.18 documentation

-
  • predict_k_method (str, optional) –

    Method to use when performing automatic k prediction. Default is “sill”.

    +
  • predict_k_method (str, optional) –

    Method to use when performing automatic k prediction. Default is “WH_sill”.

    • predict_k_method='pvalue' will use L-Statistics with column-wise error for automatically estimating the number of latent factors.

    • -
    • predict_k_method='sill' will use Silhouette score for estimating the number of latent factors.

    • +
    • predict_k_method='WH_sill' will use Silhouette scores from minimum of W and H latent factors for estimating the number of latent factors.

    • +
    • predict_k_method='W_sill' will use Silhouette scores from W latent factor for estimating the number of latent factors.

    • +
    • predict_k_method='H_sill' will use Silhouette scores from H latent factor for estimating the number of latent factors.

    • +
    • predict_k_method='sill' will default to predict_k_method='WH_sill'.

    Warning

    -

    predict_k_method='pvalue' prediction will result in significantly longer processing time, altough it is more accurate! predict_k_method='sill', on the other hand, will be much faster.

    +

    predict_k_method='pvalue' prediction will result in significantly longer processing time, altough it is more accurate! predict_k_method='WH_sill', on the other hand, will be much faster.

  • verbose (bool, optional) – If True, shows progress in each k. The default is True.

  • @@ -523,14 +526,14 @@

    Available Functionsstr, optional

    Which approach to use when searching for the rank or k. The default is “linear”.

    • k_search_method='linear' will linearly visit each K given in Ks hyper-parameter of the fit() function.

    • -
    • k_search_method='bst_post' will perform post-order binary search. When an ideal rank is found with min(W silhouette, H silhouette) >= sill_thresh, all lower ranks are pruned from the search space.

    • -
    • k_search_method='bst_pre' will perform pre-order binary search. When an ideal rank is found with min(W silhouette, H silhouette) >= sill_thresh, all lower ranks are pruned from the search space.

    • +
    • k_search_method='bst_post' will perform post-order binary search. When an ideal rank is found, determined by the selected predict_k_method, all lower ranks are pruned from the search space.

    • +
    • k_search_method='bst_pre' will perform pre-order binary search. When an ideal rank is found, determined by the selected predict_k_method, all lower ranks are pruned from the search space.

    H_sill_threshfloat, optional

    Setting for removing higher ranks from the search space.

    When searching for the optimal rank with binary search using k_search='bst_post' or k_search='bst_pre', this hyper-parameter can be used to cut off higher ranks from search space.

    The cut-off of higher ranks from the search space is based on threshold for H silhouette. When a H silhouette below H_sill_thresh is found for a given rank or K, all higher ranks are removed from the search space.

    -

    If H_sill_thresh=-1, it is not used. The default is -1.

    +

    If H_sill_thresh=None, it is not used. The default is None.

    diff --git a/docs/RESCALk.html b/docs/RESCALk.html index 2491a86..9baf562 100644 --- a/docs/RESCALk.html +++ b/docs/RESCALk.html @@ -8,7 +8,7 @@ - TELF.factorization.RESCALk: RESCAL with Automatic Model Determination — TELF 0.0.17 documentation + TELF.factorization.RESCALk: RESCAL with Automatic Model Determination — TELF 0.0.18 documentation @@ -37,7 +37,7 @@ - + @@ -127,7 +127,7 @@ -

    TELF 0.0.17 documentation

    +

    TELF 0.0.18 documentation

    -
  • predict_k_method (str, optional) –

    Method to use when performing automatic k prediction. Default is “sill”.

    +
  • predict_k_method (str, optional) –

    Method to use when performing automatic k prediction. Default is “WH_sill”.

    • predict_k_method='pvalue' will use L-Statistics with column-wise error for automatically estimating the number of latent factors.

    • -
    • predict_k_method='sill' will use Silhouette score for estimating the number of latent factors.

    • +
    • predict_k_method='WH_sill' will use Silhouette scores from minimum of W and H latent factors for estimating the number of latent factors.

    • +
    • predict_k_method='W_sill' will use Silhouette scores from W latent factor for estimating the number of latent factors.

    • +
    • predict_k_method='H_sill' will use Silhouette scores from H latent factor for estimating the number of latent factors.

    • +
    • predict_k_method='sill' will default to predict_k_method='WH_sill'.

    Warning

    -

    predict_k_method='pvalue' prediction will result in significantly longer processing time, altough it is more accurate! predict_k_method='sill', on the other hand, will be much faster.

    +

    predict_k_method='pvalue' prediction will result in significantly longer processing time, altough it is more accurate! predict_k_method='WH_sill', on the other hand, will be much faster.

  • verbose (bool, optional) – If True, shows progress in each k. The default is True.

  • @@ -608,14 +611,14 @@

    Submodulesstr, optional

    Which approach to use when searching for the rank or k. The default is “linear”.

    • k_search_method='linear' will linearly visit each K given in Ks hyper-parameter of the fit() function.

    • -
    • k_search_method='bst_post' will perform post-order binary search. When an ideal rank is found with min(W silhouette, H silhouette) >= sill_thresh, all lower ranks are pruned from the search space.

    • -
    • k_search_method='bst_pre' will perform pre-order binary search. When an ideal rank is found with min(W silhouette, H silhouette) >= sill_thresh, all lower ranks are pruned from the search space.

    • +
    • k_search_method='bst_post' will perform post-order binary search. When an ideal rank is found, determined by the selected predict_k_method, all lower ranks are pruned from the search space.

    • +
    • k_search_method='bst_pre' will perform pre-order binary search. When an ideal rank is found, determined by the selected predict_k_method, all lower ranks are pruned from the search space.

    H_sill_threshfloat, optional

    Setting for removing higher ranks from the search space.

    When searching for the optimal rank with binary search using k_search='bst_post' or k_search='bst_pre', this hyper-parameter can be used to cut off higher ranks from search space.

    The cut-off of higher ranks from the search space is based on threshold for H silhouette. When a H silhouette below H_sill_thresh is found for a given rank or K, all higher ranks are removed from the search space.

    -

    If H_sill_thresh=-1, it is not used. The default is -1.

    +

    If H_sill_thresh=None, it is not used. The default is None.

    diff --git a/docs/TELF.factorization.utilities.html b/docs/TELF.factorization.utilities.html index 6df1a50..9e55110 100644 --- a/docs/TELF.factorization.utilities.html +++ b/docs/TELF.factorization.utilities.html @@ -8,7 +8,7 @@ - TELF.factorization.utilities package — TELF 0.0.17 documentation + TELF.factorization.utilities package — TELF 0.0.18 documentation @@ -37,7 +37,7 @@ - + @@ -127,7 +127,7 @@ -

    TELF 0.0.17 documentation

    +

    TELF 0.0.18 documentation

    diff --git a/docs/TELF.pre_processing.Vulture.tokens_analysis.html b/docs/TELF.pre_processing.Vulture.tokens_analysis.html index d85ebba..3c2c15d 100644 --- a/docs/TELF.pre_processing.Vulture.tokens_analysis.html +++ b/docs/TELF.pre_processing.Vulture.tokens_analysis.html @@ -8,7 +8,7 @@ - TELF.pre_processing.Vulture.tokens_analysis package — TELF 0.0.17 documentation + TELF.pre_processing.Vulture.tokens_analysis package — TELF 0.0.18 documentation @@ -37,7 +37,7 @@ - + @@ -126,7 +126,7 @@ -

    TELF 0.0.17 documentation

    +

    TELF 0.0.18 documentation

    diff --git a/docs/_modules/TELF/applications/Cheetah/cheetah.html b/docs/_modules/TELF/applications/Cheetah/cheetah.html index b779955..5c054a4 100644 --- a/docs/_modules/TELF/applications/Cheetah/cheetah.html +++ b/docs/_modules/TELF/applications/Cheetah/cheetah.html @@ -7,7 +7,7 @@ - TELF.applications.Cheetah.cheetah — TELF 0.0.17 documentation + TELF.applications.Cheetah.cheetah — TELF 0.0.18 documentation @@ -36,7 +36,7 @@ - + @@ -124,7 +124,7 @@ -

    TELF 0.0.17 documentation

    +

    TELF 0.0.18 documentation