From cd105a7bef6d1c52640bb977188693ee09aa2005 Mon Sep 17 00:00:00 2001
From: MaksimEkin
Date: Fri, 26 Apr 2024 18:37:16 -0600
Subject: [PATCH 01/11] update develop branch version
---
CITATION.cff | 2 +-
README.md | 2 +-
TELF/version.py | 2 +-
docs/source/conf.py | 2 +-
docs/source/index.rst | 2 +-
setup.py | 2 +-
6 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/CITATION.cff b/CITATION.cff
index 85c3150..0a6d6af 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -20,7 +20,7 @@ authors:
- family-names: Alexandrov
given-names: Boian
title: "Tensor Extraction of Latent Features (T-ELF)"
-version: 0.0.17
+version: 0.0.18
url: https://github.com/lanl/T-ELF
doi: 10.5281/zenodo.10257897
date-released: 2023-12-04
diff --git a/README.md b/README.md
index 36788fe..967f243 100644
--- a/README.md
+++ b/README.md
@@ -153,7 +153,7 @@ If you use T-ELF please cite.
**APA:**
```latex
-Eren, M., Solovyev, N., Barron, R., Bhattarai, M., Truong, D., Boureima, I., Skau, E., Rasmussen, K., & Alexandrov, B. (2023). Tensor Extraction of Latent Features (T-ELF) (Version 0.0.17) [Computer software]. https://doi.org/10.5281/zenodo.10257897
+Eren, M., Solovyev, N., Barron, R., Bhattarai, M., Truong, D., Boureima, I., Skau, E., Rasmussen, K., & Alexandrov, B. (2023). Tensor Extraction of Latent Features (T-ELF) (Version 0.0.18) [Computer software]. https://doi.org/10.5281/zenodo.10257897
```
**BibTeX:**
diff --git a/TELF/version.py b/TELF/version.py
index 39d352f..1ac739d 100644
--- a/TELF/version.py
+++ b/TELF/version.py
@@ -1 +1 @@
-__version__ = '0.0.17'
\ No newline at end of file
+__version__ = '0.0.18'
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 93bc28a..dcc2c78 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -9,7 +9,7 @@
project = 'TELF'
copyright = '2022, LANL'
author = 'Maksim E. Eren, Nicholas Solovyev, Ryan Barron, Manish Bhattarai, Ismael Boureima, Erik Skau, Kim Rasmussen, Boian S. Alexandrov'
-release = '0.0.17'
+release = '0.0.18'
# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 4424a61..1740c3f 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -104,7 +104,7 @@ How to Cite T-ELF?
.. code-block:: console
- Eren, M., Solovyev, N., Barron, R., Bhattarai, M., Truong, D., Boureima, I., Skau, E., Rasmussen, K., & Alexandrov, B. (2023). Tensor Extraction of Latent Features (T-ELF) (Version 0.0.17) [Computer software]. https://doi.org/10.5281/zenodo.10257897
+ Eren, M., Solovyev, N., Barron, R., Bhattarai, M., Truong, D., Boureima, I., Skau, E., Rasmussen, K., & Alexandrov, B. (2023). Tensor Extraction of Latent Features (T-ELF) (Version 0.0.18) [Computer software]. https://doi.org/10.5281/zenodo.10257897
**BibTeX:**
diff --git a/setup.py b/setup.py
index eebe4e0..aaebd0e 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,6 @@
from setuptools import setup, find_packages
from glob import glob
-__version__ = "0.0.17"
+__version__ = "0.0.18"
# add readme
with open('README.md', 'r') as f:
From e14817830554e120327f935c11acf75e366adffc Mon Sep 17 00:00:00 2001
From: MaksimEkin
Date: Mon, 29 Apr 2024 13:55:08 -0600
Subject: [PATCH 02/11] fix bug on H_sill_thresh
---
TELF/factorization/NMFk.py | 6 +++---
examples/NMFk/NMFk_wtsi.ipynb | 25 ++++++++++---------------
2 files changed, 13 insertions(+), 18 deletions(-)
diff --git a/TELF/factorization/NMFk.py b/TELF/factorization/NMFk.py
index 5ff4c1b..43285ea 100644
--- a/TELF/factorization/NMFk.py
+++ b/TELF/factorization/NMFk.py
@@ -359,7 +359,7 @@ def _nmf_parallel_wrapper(
with K_search_settings['lock']:
if min(sils_min_W, sils_min_H) >= K_search_settings["sill_thresh"]:
K_search_settings['k_min'] = k
- if K_search_settings["H_sill_thresh"] >= 0 and (sils_min_H <= K_search_settings["H_sill_thresh"]):
+ if K_search_settings["H_sill_thresh"] is not None and (sils_min_H <= K_search_settings["H_sill_thresh"]):
K_search_settings['k_max'] = k
if n_nodes > 1:
@@ -507,7 +507,7 @@ def __init__(
get_plot_data=False,
simple_plot=True,
k_search_method="linear",
- H_sill_thresh=-1
+ H_sill_thresh=None
):
"""
NMFk is a Non-negative Matrix Factorization module with the capability to do automatic model determination.
@@ -628,7 +628,7 @@ def __init__(
Setting for removing higher ranks from the search space.\n
When searching for the optimal rank with binary search using ``k_search='bst_post'`` or ``k_search='bst_pre'``, this hyper-parameter can be used to cut off higher ranks from search space.\n
The cut-off of higher ranks from the search space is based on threshold for H silhouette. When a H silhouette below ``H_sill_thresh`` is found for a given rank or K, all higher ranks are removed from the search space.\n
- If ``H_sill_thresh=-1``, it is not used. The default is -1.
+ If ``H_sill_thresh=None``, it is not used. The default is None.
Returns
-------
None.
diff --git a/examples/NMFk/NMFk_wtsi.ipynb b/examples/NMFk/NMFk_wtsi.ipynb
index 09548d3..3479979 100644
--- a/examples/NMFk/NMFk_wtsi.ipynb
+++ b/examples/NMFk/NMFk_wtsi.ipynb
@@ -159,9 +159,9 @@
"sill_thresh: 0.9\n",
"predict_k: True\n",
"predict_k_method: sill\n",
- "n_jobs: 2\n",
+ "n_jobs: 12\n",
"n_nodes: 1\n",
- "nmf: \n",
+ "nmf: \n",
"nmf_method: nmf_kl_mu\n",
"nmf_obj_params: {}\n",
"pruned: True\n",
@@ -175,8 +175,8 @@
"perturb_multiprocessing: False\n",
"k_search_method: bst_post\n",
"H_sill_thresh: 0.1\n",
- "lock: \n",
- "K_search_settings: {'lock': , 'k_search_method': 'bst_post', 'sill_thresh': 0.9, 'H_sill_thresh': 0.1, 'k_min': -1, 'k_max': inf}\n",
+ "lock: \n",
+ "K_search_settings: {'lock': , 'k_search_method': 'bst_post', 'sill_thresh': 0.9, 'H_sill_thresh': 0.1, 'k_min': -1, 'k_max': inf}\n",
"total_exec_seconds: 0\n",
"experiment_name: \n",
"nmf_params: {'niter': 1000, 'use_gpu': False, 'nmf_verbose': False, 'mask': None, 'use_consensus_stopping': False}\n"
@@ -186,7 +186,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/Users/maksim/Desktop/Code/T-ELF_public/TELF/factorization/NMFk.py:697: UserWarning: calculate_error is True! Error calculation can make the runtime longer and take up more memory space!\n",
+ "/Users/maksim/Desktop/Code/T-ELF_public/TELF/factorization/NMFk.py:700: UserWarning: calculate_error is True! Error calculation can make the runtime longer and take up more memory space!\n",
" warnings.warn(\n"
]
}
@@ -231,20 +231,15 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/Users/maksim/Desktop/Code/T-ELF_public/TELF/factorization/NMFk.py:852: UserWarning: X is data type float64. Whic is not float32. Higher precision will result in significantly longer runtime!\n",
+ "/Users/maksim/Desktop/Code/T-ELF_public/TELF/factorization/NMFk.py:855: UserWarning: X is data type float64. Whic is not float32. Higher precision will result in significantly longer runtime!\n",
" warnings.warn(\n",
- " 0%| | 0/20 [00:00, ?it/s]"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/Users/maksim/miniconda3/envs/TELF_public/lib/python3.11/site-packages/numpy-1.26.2-py3.11-macosx-11.1-arm64.egg/numpy/core/fromnumeric.py:3504: RuntimeWarning: Mean of empty slice.\n",
+ " 0%| | 0/20 [00:00, ?it/s]/Users/maksim/miniconda3/envs/TELF_public/lib/python3.11/site-packages/numpy-1.26.2-py3.11-macosx-11.1-arm64.egg/numpy/core/fromnumeric.py:3504: RuntimeWarning: Mean of empty slice.\n",
" return _methods._mean(a, axis=axis, dtype=dtype,\n",
"/Users/maksim/miniconda3/envs/TELF_public/lib/python3.11/site-packages/numpy-1.26.2-py3.11-macosx-11.1-arm64.egg/numpy/core/_methods.py:129: RuntimeWarning: invalid value encountered in scalar divide\n",
" ret = ret.dtype.type(ret / rcount)\n",
- "100%|██████████| 20/20 [00:10<00:00, 1.95it/s]\n"
+ " 5%|▌ | 1/20 [00:24<07:42, 24.32s/it]/Users/maksim/miniconda3/envs/TELF_public/lib/python3.11/site-packages/numpy-1.26.2-py3.11-macosx-11.1-arm64.egg/numpy/lib/function_base.py:2897: RuntimeWarning: invalid value encountered in divide\n",
+ " c /= stddev[:, None]\n",
+ "100%|██████████| 20/20 [00:24<00:00, 1.24s/it]\n"
]
}
],
From 299c11ccdf6b2f99a2ced41675ff3193fec2f17d Mon Sep 17 00:00:00 2001
From: MaksimEkin
Date: Mon, 29 Apr 2024 14:24:47 -0600
Subject: [PATCH 03/11] add option to use either W sill for k prediction, H
sill for k prediction, or both
---
TELF/factorization/NMFk.py | 58 +++++++++++++++++++++++++----------
examples/NMFk/NMFk_wtsi.ipynb | 35 +++++++++------------
2 files changed, 56 insertions(+), 37 deletions(-)
diff --git a/TELF/factorization/NMFk.py b/TELF/factorization/NMFk.py
index 43285ea..d259066 100644
--- a/TELF/factorization/NMFk.py
+++ b/TELF/factorization/NMFk.py
@@ -174,7 +174,7 @@ def _nmf_parallel_wrapper(
mask=None,
consensus_mat=False,
predict_k=False,
- predict_k_method="sill",
+ predict_k_method="WH_sill",
pruned=True,
perturb_rows=None,
perturb_cols=None,
@@ -357,7 +357,19 @@ def _nmf_parallel_wrapper(
#
if K_search_settings["k_search_method"] != "linear":
with K_search_settings['lock']:
- if min(sils_min_W, sils_min_H) >= K_search_settings["sill_thresh"]:
+
+ if predict_k_method in ["WH_sill", "sill"]:
+ curr_score = min(sils_min_W, sils_min_H)
+ elif predict_k_method == "W_sill":
+ curr_score = sils_min_W
+ elif predict_k_method == "H_sill":
+ curr_score = sils_min_H
+ elif predict_k_method == "pvalue":
+ curr_score = sils_min_W
+ else:
+ raise Exception("Unknown predict_k_method!")
+
+ if curr_score >= K_search_settings["sill_thresh"]:
K_search_settings['k_min'] = k
if K_search_settings["H_sill_thresh"] is not None and (sils_min_H <= K_search_settings["H_sill_thresh"]):
K_search_settings['k_max'] = k
@@ -488,7 +500,7 @@ def __init__(
save_output=True,
collect_output=False,
predict_k=False,
- predict_k_method="sill",
+ predict_k_method="WH_sill",
verbose=True,
nmf_verbose=False,
perturb_verbose=False,
@@ -549,13 +561,15 @@ def __init__(
Even when ``predict_k=False``, number of latent factors can be estimated using the figures saved in ``save_path``.
predict_k_method : str, optional
- Method to use when performing automatic k prediction. Default is "sill".\n
+ Method to use when performing automatic k prediction. Default is "WH_sill".\n
* ``predict_k_method='pvalue'`` will use L-Statistics with column-wise error for automatically estimating the number of latent factors.\n
- * ``predict_k_method='sill'`` will use Silhouette score for estimating the number of latent factors.
-
+ * ``predict_k_method='WH_sill'`` will use Silhouette scores from minimum of W and H latent factors for estimating the number of latent factors.
+ * ``predict_k_method='W_sill'`` will use Silhouette scores from W latent factor for estimating the number of latent factors.
+ * ``predict_k_method='H_sill'`` will use Silhouette scores from H latent factor for estimating the number of latent factors.
+ * ``predict_k_method='sill'`` will default to `predict_k_method='WH_sill'``.
.. warning::
- ``predict_k_method='pvalue'`` prediction will result in significantly longer processing time, altough it is more accurate! ``predict_k_method='sill'``, on the other hand, will be much faster.
+ ``predict_k_method='pvalue'`` prediction will result in significantly longer processing time, altough it is more accurate! ``predict_k_method='WH_sill'``, on the other hand, will be much faster.
verbose : bool, optional
If True, shows progress in each k. The default is True.
@@ -622,8 +636,8 @@ def __init__(
k_search_method : str, optional
Which approach to use when searching for the rank or k. The default is "linear".\n
* ``k_search_method='linear'`` will linearly visit each K given in ``Ks`` hyper-parameter of the ``fit()`` function.\n
- * ``k_search_method='bst_post'`` will perform post-order binary search. When an ideal rank is found with ``min(W silhouette, H silhouette) >= sill_thresh``, all lower ranks are pruned from the search space.
- * ``k_search_method='bst_pre'`` will perform pre-order binary search. When an ideal rank is found with ``min(W silhouette, H silhouette) >= sill_thresh``, all lower ranks are pruned from the search space.
+ * ``k_search_method='bst_post'`` will perform post-order binary search. When an ideal rank is found, determined by the selected ``predict_k_method``, all lower ranks are pruned from the search space.
+ * ``k_search_method='bst_pre'`` will perform pre-order binary search. When an ideal rank is found, determined by the selected ``predict_k_method``, all lower ranks are pruned from the search space.
H_sill_thresh : float, optional
Setting for removing higher ranks from the search space.\n
When searching for the optimal rank with binary search using ``k_search='bst_post'`` or ``k_search='bst_pre'``, this hyper-parameter can be used to cut off higher ranks from search space.\n
@@ -680,7 +694,7 @@ def __init__(
# warnings
assert self.k_search_method in ["linear", "bst_pre", "bst_post"], "Invalid k_search_method method. Choose from linear, bst_pre, or bst_post."
- assert self.predict_k_method in ["pvalue", "sill"], "Invalid predict_k_method method. Choose from pvalue, sill."
+ assert self.predict_k_method in ["pvalue", "WH_sill", "W_sill", "H_sill", "sill"], "Invalid predict_k_method method. Choose from pvalue, WH_sill, W_sill, H_sill, or sill. sill defaults to WH_sill."
if self.calculate_pac and not self.consensus_mat:
self.consensus_mat = True
warnings.warn("consensus_mat was False when calculate_pac was True! consensus_mat changed to True.")
@@ -1136,18 +1150,30 @@ def fit(self, X, Ks, name="NMFk", note=""):
combined_result["col_err"], Ks, combined_result["sils_min_W"], SILL_thr=self.sill_thresh
)[0]
- elif self.predict_k_method == "sill":
-
+ else:
+ if self.predict_k_method in ["WH_sill", "sill"]:
+ curr_sill_max_score = min([max(combined_result["sils_min_W"]), max(combined_result["sils_min_H"])])
+ elif self.predict_k_method == "W_sill":
+ curr_sill_max_score = max(combined_result["sils_min_W"])
+ elif self.predict_k_method == "H_sill":
+ curr_sill_max_score = max(combined_result["sils_min_H"])
+
# check if that sill threshold exist
- if self.sill_thresh > min([max(combined_result["sils_min_W"]), max(combined_result["sils_min_H"])]):
- self.sill_thresh = min([max(combined_result["sils_min_W"]), max(combined_result["sils_min_H"])])
+ if self.sill_thresh > curr_sill_max_score:
+ self.sill_thresh = curr_sill_max_score
warnings.warn(f'W or H Silhouettes were all less than sill_thresh. Setting sill_thresh to minimum for K prediction. sill_thresh={round(self.sill_thresh, 3)}')
-
+
k_predict_W = Ks[np.max(np.argwhere(
np.array(combined_result["sils_min_W"]) >= self.sill_thresh).flatten())]
k_predict_H = Ks[np.max(np.argwhere(
np.array(combined_result["sils_min_H"]) >= self.sill_thresh).flatten())]
- k_predict = min(k_predict_W, k_predict_H)
+
+ if self.predict_k_method in ["WH_sill", "sill"]:
+ k_predict = min(k_predict_W, k_predict_H)
+ elif self.predict_k_method == "W_sill":
+ k_predict = k_predict_W
+ elif self.predict_k_method == "H_sill":
+ k_predict = k_predict_H
else:
k_predict = 0
diff --git a/examples/NMFk/NMFk_wtsi.ipynb b/examples/NMFk/NMFk_wtsi.ipynb
index 3479979..136999d 100644
--- a/examples/NMFk/NMFk_wtsi.ipynb
+++ b/examples/NMFk/NMFk_wtsi.ipynb
@@ -74,7 +74,7 @@
"\u001b[0m\u001b[33mDEPRECATION: Loading egg at /Users/maksim/miniconda3/envs/TELF_public/lib/python3.11/site-packages/spacy-3.7.2-py3.11-macosx-11.1-arm64.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation.. Discussion can be found at https://github.com/pypa/pip/issues/12330\u001b[0m\u001b[33m\n",
"\u001b[0m\u001b[33mDEPRECATION: Loading egg at /Users/maksim/miniconda3/envs/TELF_public/lib/python3.11/site-packages/thinc-8.2.2-py3.11-macosx-11.1-arm64.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation.. Discussion can be found at https://github.com/pypa/pip/issues/12330\u001b[0m\u001b[33m\n",
"\u001b[0m\u001b[33mDEPRECATION: Loading egg at /Users/maksim/miniconda3/envs/TELF_public/lib/python3.11/site-packages/MarkupSafe-2.1.5-py3.11-macosx-11.1-arm64.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation.. Discussion can be found at https://github.com/pypa/pip/issues/12330\u001b[0m\u001b[33m\n",
- "\u001b[0mRequirement already satisfied: mat73 in /Users/maksim/Desktop/Code/T-ELF_public/.eggs/mat73-0.62-py3.11.egg (0.62)\n",
+ "\u001b[0mRequirement already satisfied: mat73 in /Users/maksim/miniconda3/envs/TELF_public/lib/python3.11/site-packages (0.63)\n",
"Requirement already satisfied: h5py in /Users/maksim/miniconda3/envs/TELF_public/lib/python3.11/site-packages/h5py-3.10.0-py3.11-macosx-11.1-arm64.egg (from mat73) (3.10.0)\n",
"Requirement already satisfied: numpy in /Users/maksim/miniconda3/envs/TELF_public/lib/python3.11/site-packages/numpy-1.26.2-py3.11-macosx-11.1-arm64.egg (from mat73) (1.26.2)\n"
]
@@ -135,7 +135,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 9,
"id": "eb6cea29",
"metadata": {},
"outputs": [
@@ -158,10 +158,10 @@
"collect_output: True\n",
"sill_thresh: 0.9\n",
"predict_k: True\n",
- "predict_k_method: sill\n",
- "n_jobs: 12\n",
+ "predict_k_method: pvalue\n",
+ "n_jobs: 1\n",
"n_nodes: 1\n",
- "nmf: \n",
+ "nmf: \n",
"nmf_method: nmf_kl_mu\n",
"nmf_obj_params: {}\n",
"pruned: True\n",
@@ -175,8 +175,8 @@
"perturb_multiprocessing: False\n",
"k_search_method: bst_post\n",
"H_sill_thresh: 0.1\n",
- "lock: \n",
- "K_search_settings: {'lock': , 'k_search_method': 'bst_post', 'sill_thresh': 0.9, 'H_sill_thresh': 0.1, 'k_min': -1, 'k_max': inf}\n",
+ "lock: \n",
+ "K_search_settings: {'lock': , 'k_search_method': 'bst_post', 'sill_thresh': 0.9, 'H_sill_thresh': 0.1, 'k_min': -1, 'k_max': inf}\n",
"total_exec_seconds: 0\n",
"experiment_name: \n",
"nmf_params: {'niter': 1000, 'use_gpu': False, 'nmf_verbose': False, 'mask': None, 'use_consensus_stopping': False}\n"
@@ -186,7 +186,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/Users/maksim/Desktop/Code/T-ELF_public/TELF/factorization/NMFk.py:700: UserWarning: calculate_error is True! Error calculation can make the runtime longer and take up more memory space!\n",
+ "/Users/maksim/Desktop/Code/T-ELF_public/TELF/factorization/NMFk.py:718: UserWarning: predict_k is True with pvalue method! Predicting k can make the runtime significantly longer. Consider using predict_k_method='sill'.\n",
" warnings.warn(\n"
]
}
@@ -197,7 +197,7 @@
" \"n_perturbs\":36,\n",
" \"n_iters\":1000,\n",
" \"epsilon\":0.015,\n",
- " \"n_jobs\":-1,\n",
+ " \"n_jobs\":1,\n",
" \"init\":\"nnsvd\", \n",
" \"use_gpu\":False,\n",
" \"save_path\":\"../../results/\",\n",
@@ -207,6 +207,7 @@
" \"verbose\":True,\n",
" \"transpose\":False,\n",
" \"sill_thresh\":0.9,\n",
+ " \"predict_k_method\":\"WH_sill\",\n",
" \"nmf_verbose\":False,\n",
" \"nmf_method\":'nmf_kl_mu',\n",
" 'pruned':True,\n",
@@ -218,12 +219,12 @@
"}\n",
"model = NMFk(**params)\n",
"\n",
- "Ks = range(1,21,1)"
+ "Ks = range(1,11,1)"
]
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 10,
"id": "104c2753",
"metadata": {},
"outputs": [
@@ -231,15 +232,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/Users/maksim/Desktop/Code/T-ELF_public/TELF/factorization/NMFk.py:855: UserWarning: X is data type float64. Whic is not float32. Higher precision will result in significantly longer runtime!\n",
- " warnings.warn(\n",
- " 0%| | 0/20 [00:00, ?it/s]/Users/maksim/miniconda3/envs/TELF_public/lib/python3.11/site-packages/numpy-1.26.2-py3.11-macosx-11.1-arm64.egg/numpy/core/fromnumeric.py:3504: RuntimeWarning: Mean of empty slice.\n",
- " return _methods._mean(a, axis=axis, dtype=dtype,\n",
- "/Users/maksim/miniconda3/envs/TELF_public/lib/python3.11/site-packages/numpy-1.26.2-py3.11-macosx-11.1-arm64.egg/numpy/core/_methods.py:129: RuntimeWarning: invalid value encountered in scalar divide\n",
- " ret = ret.dtype.type(ret / rcount)\n",
- " 5%|▌ | 1/20 [00:24<07:42, 24.32s/it]/Users/maksim/miniconda3/envs/TELF_public/lib/python3.11/site-packages/numpy-1.26.2-py3.11-macosx-11.1-arm64.egg/numpy/lib/function_base.py:2897: RuntimeWarning: invalid value encountered in divide\n",
- " c /= stddev[:, None]\n",
- "100%|██████████| 20/20 [00:24<00:00, 1.24s/it]\n"
+ "100%|██████████| 10/10 [00:04<00:00, 2.23it/s]\n"
]
}
],
@@ -251,7 +244,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 11,
"id": "d7ab1a39",
"metadata": {},
"outputs": [
From 487970f1b2eaf8cc49a570cd9770149344e67e58 Mon Sep 17 00:00:00 2001
From: MaksimEkin
Date: Mon, 29 Apr 2024 14:27:08 -0600
Subject: [PATCH 04/11] add option to use either W sill for k prediction, H
sill for k prediction, or both
---
TELF/factorization/NMFk.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/TELF/factorization/NMFk.py b/TELF/factorization/NMFk.py
index d259066..0436dda 100644
--- a/TELF/factorization/NMFk.py
+++ b/TELF/factorization/NMFk.py
@@ -563,10 +563,10 @@ def __init__(
predict_k_method : str, optional
Method to use when performing automatic k prediction. Default is "WH_sill".\n
* ``predict_k_method='pvalue'`` will use L-Statistics with column-wise error for automatically estimating the number of latent factors.\n
- * ``predict_k_method='WH_sill'`` will use Silhouette scores from minimum of W and H latent factors for estimating the number of latent factors.
- * ``predict_k_method='W_sill'`` will use Silhouette scores from W latent factor for estimating the number of latent factors.
- * ``predict_k_method='H_sill'`` will use Silhouette scores from H latent factor for estimating the number of latent factors.
- * ``predict_k_method='sill'`` will default to `predict_k_method='WH_sill'``.
+ * ``predict_k_method='WH_sill'`` will use Silhouette scores from minimum of W and H latent factors for estimating the number of latent factors.\n
+ * ``predict_k_method='W_sill'`` will use Silhouette scores from W latent factor for estimating the number of latent factors.\n
+ * ``predict_k_method='H_sill'`` will use Silhouette scores from H latent factor for estimating the number of latent factors.\n
+ * ``predict_k_method='sill'`` will default to `predict_k_method='WH_sill'``.\n
.. warning::
``predict_k_method='pvalue'`` prediction will result in significantly longer processing time, altough it is more accurate! ``predict_k_method='WH_sill'``, on the other hand, will be much faster.
From 5590dee5949c118082b0ebb4c8d369fe3e890e6c Mon Sep 17 00:00:00 2001
From: MaksimEkin
Date: Mon, 29 Apr 2024 14:29:37 -0600
Subject: [PATCH 05/11] add option to use either W sill for k prediction, H
sill for k prediction, or both
---
TELF/factorization/NMFk.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/TELF/factorization/NMFk.py b/TELF/factorization/NMFk.py
index 0436dda..aef556e 100644
--- a/TELF/factorization/NMFk.py
+++ b/TELF/factorization/NMFk.py
@@ -566,7 +566,7 @@ def __init__(
* ``predict_k_method='WH_sill'`` will use Silhouette scores from minimum of W and H latent factors for estimating the number of latent factors.\n
* ``predict_k_method='W_sill'`` will use Silhouette scores from W latent factor for estimating the number of latent factors.\n
* ``predict_k_method='H_sill'`` will use Silhouette scores from H latent factor for estimating the number of latent factors.\n
- * ``predict_k_method='sill'`` will default to `predict_k_method='WH_sill'``.\n
+ * ``predict_k_method='sill'`` will default to ``predict_k_method='WH_sill'``.\n
.. warning::
``predict_k_method='pvalue'`` prediction will result in significantly longer processing time, altough it is more accurate! ``predict_k_method='WH_sill'``, on the other hand, will be much faster.
From 07b1c3e2fc41c2c056ba961cfc1fe77180ca2440 Mon Sep 17 00:00:00 2001
From: MaksimEkin
Date: Mon, 29 Apr 2024 14:29:56 -0600
Subject: [PATCH 06/11] add option to use either W sill for k prediction, H
sill for k prediction, or both
---
TELF/factorization/NMFk.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/TELF/factorization/NMFk.py b/TELF/factorization/NMFk.py
index aef556e..1c9ac41 100644
--- a/TELF/factorization/NMFk.py
+++ b/TELF/factorization/NMFk.py
@@ -566,7 +566,8 @@ def __init__(
* ``predict_k_method='WH_sill'`` will use Silhouette scores from minimum of W and H latent factors for estimating the number of latent factors.\n
* ``predict_k_method='W_sill'`` will use Silhouette scores from W latent factor for estimating the number of latent factors.\n
* ``predict_k_method='H_sill'`` will use Silhouette scores from H latent factor for estimating the number of latent factors.\n
- * ``predict_k_method='sill'`` will default to ``predict_k_method='WH_sill'``.\n
+ * ``predict_k_method='sill'`` will default to ``predict_k_method='WH_sill'``.
+
.. warning::
``predict_k_method='pvalue'`` prediction will result in significantly longer processing time, altough it is more accurate! ``predict_k_method='WH_sill'``, on the other hand, will be much faster.
From 53923fe42a1ed8f92b97b8e2d907336ab8a02397 Mon Sep 17 00:00:00 2001
From: MaksimEkin
Date: Mon, 29 Apr 2024 14:36:20 -0600
Subject: [PATCH 07/11] update documentation
---
docs/Beaver.html | 6 +-
docs/Cheetah.html | 6 +-
docs/HNMFk.html | 6 +-
docs/NMFk.html | 23 +++---
docs/RESCALk.html | 6 +-
docs/SymNMFk.html | 6 +-
docs/TELF.factorization.decompositions.html | 6 +-
...actorization.decompositions.utilities.html | 6 +-
docs/TELF.factorization.html | 23 +++---
docs/TELF.factorization.utilities.html | 6 +-
docs/TELF.html | 6 +-
docs/TELF.pre_processing.Beaver.html | 6 +-
docs/TELF.pre_processing.Vulture.html | 8 +-
...re_processing.Vulture.tokens_analysis.html | 6 +-
docs/TELF.pre_processing.html | 6 +-
docs/TriNMFk.html | 6 +-
docs/Vulture.html | 8 +-
.../TELF/applications/Cheetah/cheetah.html | 6 +-
docs/_modules/TELF/factorization/HNMFk.html | 6 +-
docs/_modules/TELF/factorization/NMFk.html | 73 ++++++++++++------
docs/_modules/TELF/factorization/RESCALk.html | 6 +-
docs/_modules/TELF/factorization/SymNMFk.html | 6 +-
docs/_modules/TELF/factorization/TriNMFk.html | 6 +-
.../decompositions/nmf_fro_admm.html | 6 +-
.../decompositions/nmf_fro_mu.html | 6 +-
.../decompositions/nmf_kl_admm.html | 6 +-
.../decompositions/nmf_kl_mu.html | 6 +-
.../decompositions/nmf_mc_fro_mu.html | 6 +-
.../decompositions/rescal_fro_mu.html | 6 +-
.../decompositions/tri_nmf_fro_mu.html | 6 +-
.../utilities/bool_clustering.html | 6 +-
.../decompositions/utilities/bool_noise.html | 6 +-
.../decompositions/utilities/clustering.html | 6 +-
.../utilities/concensus_matrix.html | 6 +-
.../utilities/data_reshaping.html | 6 +-
.../utilities/generic_utils.html | 6 +-
.../decompositions/utilities/math_utils.html | 6 +-
.../decompositions/utilities/nnsvd.html | 6 +-
.../decompositions/utilities/resample.html | 6 +-
.../decompositions/utilities/silhouettes.html | 6 +-
.../factorization/utilities/clustering.html | 6 +-
.../utilities/co_occurance_matrix.html | 6 +-
.../utilities/organize_n_jobs.html | 6 +-
.../factorization/utilities/plot_NMFk.html | 6 +-
.../utilities/pvalue_analysis.html | 6 +-
.../factorization/utilities/sppmi_matrix.html | 6 +-
.../factorization/utilities/take_note.html | 6 +-
.../factorization/utilities/vectorize.html | 6 +-
.../TELF/pre_processing/Beaver/beaver.html | 6 +-
.../pre_processing/Beaver/cooccurrence.html | 6 +-
.../TELF/pre_processing/Beaver/sppmi.html | 6 +-
.../TELF/pre_processing/Beaver/tenmat.html | 6 +-
.../TELF/pre_processing/Beaver/vectorize.html | 6 +-
.../Vulture/tokens_analysis/top_words.html | 6 +-
.../TELF/pre_processing/Vulture/vulture.html | 6 +-
docs/_modules/index.html | 6 +-
docs/_sources/index.rst | 2 +-
docs/_static/documentation_options.js | 2 +-
docs/doctrees/NMFk.doctree | Bin 97971 -> 100344 bytes
docs/doctrees/TELF.factorization.doctree | Bin 194937 -> 197343 bytes
.../TELF.pre_processing.Vulture.doctree | Bin 44272 -> 44272 bytes
docs/doctrees/Vulture.doctree | Bin 52684 -> 52684 bytes
docs/doctrees/environment.pickle | Bin 3118448 -> 3126480 bytes
docs/doctrees/index.doctree | Bin 35064 -> 35064 bytes
docs/genindex.html | 6 +-
docs/index.html | 8 +-
docs/modules.html | 6 +-
docs/py-modindex.html | 6 +-
docs/search.html | 6 +-
docs/searchindex.js | 2 +-
70 files changed, 256 insertions(+), 223 deletions(-)
diff --git a/docs/Beaver.html b/docs/Beaver.html
index 3d69c51..03f8bd4 100644
--- a/docs/Beaver.html
+++ b/docs/Beaver.html
@@ -8,7 +8,7 @@
- TELF.pre_processing.Beaver: Fast matrix and tensor building tool — TELF 0.0.17 documentation
+ TELF.pre_processing.Beaver: Fast matrix and tensor building tool — TELF 0.0.18 documentation
@@ -37,7 +37,7 @@
-
+
@@ -127,7 +127,7 @@
-
TELF 0.0.17 documentation
+
TELF 0.0.18 documentation
-
predict_k_method (str, optional) –
Method to use when performing automatic k prediction. Default is “sill”.
+
predict_k_method (str, optional) –
Method to use when performing automatic k prediction. Default is “WH_sill”.
predict_k_method='pvalue' will use L-Statistics with column-wise error for automatically estimating the number of latent factors.
-
predict_k_method='sill' will use Silhouette score for estimating the number of latent factors.
+
predict_k_method='WH_sill' will use Silhouette scores from minimum of W and H latent factors for estimating the number of latent factors.
+
predict_k_method='W_sill' will use Silhouette scores from W latent factor for estimating the number of latent factors.
+
predict_k_method='H_sill' will use Silhouette scores from H latent factor for estimating the number of latent factors.
+
predict_k_method='sill' will default to predict_k_method='WH_sill'.
Warning
-
predict_k_method='pvalue' prediction will result in significantly longer processing time, altough it is more accurate! predict_k_method='sill', on the other hand, will be much faster.
+
predict_k_method='pvalue' prediction will result in significantly longer processing time, altough it is more accurate! predict_k_method='WH_sill', on the other hand, will be much faster.
verbose (bool, optional) – If True, shows progress in each k. The default is True.
Method to use when performing automatic k prediction. Default is “sill”.
+
predict_k_method (str, optional) –
Method to use when performing automatic k prediction. Default is “WH_sill”.
predict_k_method='pvalue' will use L-Statistics with column-wise error for automatically estimating the number of latent factors.
-
predict_k_method='sill' will use Silhouette score for estimating the number of latent factors.
+
predict_k_method='WH_sill' will use Silhouette scores from minimum of W and H latent factors for estimating the number of latent factors.
+
predict_k_method='W_sill' will use Silhouette scores from W latent factor for estimating the number of latent factors.
+
predict_k_method='H_sill' will use Silhouette scores from H latent factor for estimating the number of latent factors.
+
predict_k_method='sill' will default to predict_k_method='WH_sill'.
Warning
-
predict_k_method='pvalue' prediction will result in significantly longer processing time, altough it is more accurate! predict_k_method='sill', on the other hand, will be much faster.
+
predict_k_method='pvalue' prediction will result in significantly longer processing time, altough it is more accurate! predict_k_method='WH_sill', on the other hand, will be much faster.
verbose (bool, optional) – If True, shows progress in each k. The default is True.