From b00d502d90dbb98be5b7e7ce9f17d82ae5d4d49f Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 7 Oct 2024 22:34:16 +0200 Subject: [PATCH 01/18] Update nucleicacids.py added backend and aggregators --- package/MDAnalysis/analysis/nucleicacids.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/package/MDAnalysis/analysis/nucleicacids.py b/package/MDAnalysis/analysis/nucleicacids.py index 0eccd039ba4..69e79b383e9 100644 --- a/package/MDAnalysis/analysis/nucleicacids.py +++ b/package/MDAnalysis/analysis/nucleicacids.py @@ -161,6 +161,12 @@ class NucPairDist(AnalysisBase): helper for selecting atom pairs for distance analysis. """ + _analysis_algorithm_is_parallelizable = True + + @classmethod + def get_supported_backends(cls): + return ('serial', 'multiprocessing', 'dask',) + _s1: mda.AtomGroup _s2: mda.AtomGroup _n_sel: int @@ -292,6 +298,11 @@ def _conclude(self) -> None: self.results['pair_distances'] = self.results['distances'] # TODO: remove pair_distances in 3.0.0 + def _get_aggregator(self): + return ResultsGroup(lookup={ + 'distances': ResultsGroup.ndarray_vstack, + 'pair_distances': ResultsGroup.ndarray_vstack,} + ) class WatsonCrickDist(NucPairDist): r""" From dc03a0248f2d27445fe22d207ebfe113570d4e2c Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 7 Oct 2024 22:36:08 +0200 Subject: [PATCH 02/18] Update conftest.py added NucPairDist to the conftest.py --- testsuite/MDAnalysisTests/analysis/conftest.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/testsuite/MDAnalysisTests/analysis/conftest.py b/testsuite/MDAnalysisTests/analysis/conftest.py index fc3c8a480c7..6cada0d6d6d 100644 --- a/testsuite/MDAnalysisTests/analysis/conftest.py +++ b/testsuite/MDAnalysisTests/analysis/conftest.py @@ -14,6 +14,7 @@ from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import ( HydrogenBondAnalysis, ) +from MDAnalysis.analysis.nucleicacids import NucPairDist from MDAnalysis.lib.util import is_installed @@ -141,3 +142,10 @@ def client_DSSP(request): @pytest.fixture(scope='module', params=params_for_cls(HydrogenBondAnalysis)) def client_HydrogenBondAnalysis(request): return request.param + + +# MDAnalysis.analysis.nucleicacids + +@pytest.fixture(scope="module", params=params_for_cls(NucPairDist)) +def client_NucPairDist(request): + return request.param From 39612c842993f74b77f51a365d6459e3c97a3586 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 7 Oct 2024 22:37:30 +0200 Subject: [PATCH 03/18] Update test_nucleicacids.py added client_NucPairDist to the tests --- .../MDAnalysisTests/analysis/test_nucleicacids.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_nucleicacids.py b/testsuite/MDAnalysisTests/analysis/test_nucleicacids.py index fb7d39374cd..80e943f4379 100644 --- a/testsuite/MDAnalysisTests/analysis/test_nucleicacids.py +++ b/testsuite/MDAnalysisTests/analysis/test_nucleicacids.py @@ -55,12 +55,12 @@ def test_empty_ag_error(strand): @pytest.fixture(scope='module') -def wc_rna(strand): +def wc_rna(strand, client_NucPairDist): strand1 = ResidueGroup([strand.residues[0], strand.residues[21]]) strand2 = ResidueGroup([strand.residues[1], strand.residues[22]]) WC = WatsonCrickDist(strand1, strand2) - WC.run() + WC.run(**client_NucPairDist) return WC @@ -114,23 +114,23 @@ def test_wc_dis_results_keyerrs(wc_rna, key): wc_rna.results[key] -def test_minor_dist(strand): +def test_minor_dist(strand, client_NucPairDist): strand1 = ResidueGroup([strand.residues[2], strand.residues[19]]) strand2 = ResidueGroup([strand.residues[16], strand.residues[4]]) MI = MinorPairDist(strand1, strand2) - MI.run() + MI.run(**client_NucPairDist) assert MI.results.distances[0, 0] == approx(15.06506, rel=1e-3) assert MI.results.distances[0, 1] == approx(3.219116, rel=1e-3) -def test_major_dist(strand): +def test_major_dist(strand, client_NucPairDist): strand1 = ResidueGroup([strand.residues[1], strand.residues[4]]) strand2 = ResidueGroup([strand.residues[11], strand.residues[8]]) MA = MajorPairDist(strand1, strand2) - MA.run() + MA.run(**client_NucPairDist) assert MA.results.distances[0, 0] == approx(26.884272, rel=1e-3) assert MA.results.distances[0, 1] == approx(13.578535, rel=1e-3) From 8feb071042676c1855b0b480d4d078eee3b0f85a Mon Sep 17 00:00:00 2001 From: Yuxuan Zhuang Date: Wed, 9 Oct 2024 17:58:49 -0700 Subject: [PATCH 04/18] fix nucl parallel --- package/MDAnalysis/analysis/nucleicacids.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/package/MDAnalysis/analysis/nucleicacids.py b/package/MDAnalysis/analysis/nucleicacids.py index 69e79b383e9..867fd16642a 100644 --- a/package/MDAnalysis/analysis/nucleicacids.py +++ b/package/MDAnalysis/analysis/nucleicacids.py @@ -70,7 +70,7 @@ import MDAnalysis as mda from .distances import calc_bonds -from .base import AnalysisBase, Results +from .base import AnalysisBase, ResultsGroup from MDAnalysis.core.groups import Residue, ResidueGroup @@ -282,7 +282,7 @@ def select_strand_atoms( return (sel1, sel2) def _prepare(self) -> None: - self._res_array: np.ndarray = np.zeros( + self.results.distances: np.ndarray = np.zeros( [self.n_frames, self._n_sel] ) @@ -291,17 +291,16 @@ def _single_frame(self) -> None: self._s1.positions, self._s2.positions ) - self._res_array[self._frame_index, :] = dist + self.results.distances[self._frame_index, :] = dist def _conclude(self) -> None: - self.results['distances'] = self._res_array self.results['pair_distances'] = self.results['distances'] # TODO: remove pair_distances in 3.0.0 def _get_aggregator(self): return ResultsGroup(lookup={ 'distances': ResultsGroup.ndarray_vstack, - 'pair_distances': ResultsGroup.ndarray_vstack,} + } ) class WatsonCrickDist(NucPairDist): From b2d81b006c19c207f6284b9e4a6864936017f588 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 14 Oct 2024 20:10:50 +0200 Subject: [PATCH 05/18] Update nucleicacids.py fixing PEP --- package/MDAnalysis/analysis/nucleicacids.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/analysis/nucleicacids.py b/package/MDAnalysis/analysis/nucleicacids.py index 867fd16642a..a8806febd46 100644 --- a/package/MDAnalysis/analysis/nucleicacids.py +++ b/package/MDAnalysis/analysis/nucleicacids.py @@ -166,7 +166,7 @@ class NucPairDist(AnalysisBase): @classmethod def get_supported_backends(cls): return ('serial', 'multiprocessing', 'dask',) - + _s1: mda.AtomGroup _s2: mda.AtomGroup _n_sel: int @@ -299,7 +299,7 @@ def _conclude(self) -> None: def _get_aggregator(self): return ResultsGroup(lookup={ - 'distances': ResultsGroup.ndarray_vstack, + 'distances': ResultsGroup.ndarray_vstack, } ) From e30716b819621cfa209891203197bafff84100d0 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 14 Oct 2024 20:12:28 +0200 Subject: [PATCH 06/18] Update nucleicacids.py --- package/MDAnalysis/analysis/nucleicacids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/analysis/nucleicacids.py b/package/MDAnalysis/analysis/nucleicacids.py index a8806febd46..da08604a575 100644 --- a/package/MDAnalysis/analysis/nucleicacids.py +++ b/package/MDAnalysis/analysis/nucleicacids.py @@ -166,7 +166,7 @@ class NucPairDist(AnalysisBase): @classmethod def get_supported_backends(cls): return ('serial', 'multiprocessing', 'dask',) - + _s1: mda.AtomGroup _s2: mda.AtomGroup _n_sel: int From 883eb1ed2794fad51fa6e15da102c51723a0af31 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 14 Oct 2024 20:14:22 +0200 Subject: [PATCH 07/18] Update nucleicacids.py added versionchanged for addition of parallelization --- package/MDAnalysis/analysis/nucleicacids.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/package/MDAnalysis/analysis/nucleicacids.py b/package/MDAnalysis/analysis/nucleicacids.py index da08604a575..f0649be6d50 100644 --- a/package/MDAnalysis/analysis/nucleicacids.py +++ b/package/MDAnalysis/analysis/nucleicacids.py @@ -159,6 +159,11 @@ class NucPairDist(AnalysisBase): .. versionchanged:: 2.7.0 Added static method :attr:`select_strand_atoms` as a helper for selecting atom pairs for distance analysis. + + .. versionchanged:: 2.8.0 + Enabled **parallel execution** with the ``multiprocessing`` and ``dask`` + backends; use the new method :meth:`get_supported_backends` to see all + supported backends. """ _analysis_algorithm_is_parallelizable = True From 701ed01621624d31a98b462937e7544f8b988431 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 14 Oct 2024 20:16:10 +0200 Subject: [PATCH 08/18] Update nucleicacids.py --- package/MDAnalysis/analysis/nucleicacids.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/analysis/nucleicacids.py b/package/MDAnalysis/analysis/nucleicacids.py index f0649be6d50..5f5bdd9d46d 100644 --- a/package/MDAnalysis/analysis/nucleicacids.py +++ b/package/MDAnalysis/analysis/nucleicacids.py @@ -161,8 +161,8 @@ class NucPairDist(AnalysisBase): helper for selecting atom pairs for distance analysis. .. versionchanged:: 2.8.0 - Enabled **parallel execution** with the ``multiprocessing`` and ``dask`` - backends; use the new method :meth:`get_supported_backends` to see all + Enabled **parallel execution** with the ``multiprocessing`` and ``dask`` + backends; use the new method :meth:`get_supported_backends` to see all supported backends. """ From 82a89f9593bf2fafd51294c8f61e0101f56c28f8 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 14 Oct 2024 20:20:42 +0200 Subject: [PATCH 09/18] Update conftest.py --- testsuite/MDAnalysisTests/analysis/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/analysis/conftest.py b/testsuite/MDAnalysisTests/analysis/conftest.py index 6cada0d6d6d..a60b565f1c6 100644 --- a/testsuite/MDAnalysisTests/analysis/conftest.py +++ b/testsuite/MDAnalysisTests/analysis/conftest.py @@ -145,7 +145,7 @@ def client_HydrogenBondAnalysis(request): # MDAnalysis.analysis.nucleicacids - + @pytest.fixture(scope="module", params=params_for_cls(NucPairDist)) def client_NucPairDist(request): return request.param From 886832ef43aac2bf4615c657463c95046ba3c14f Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 14 Oct 2024 20:23:42 +0200 Subject: [PATCH 10/18] Update CHANGELOG Added Parallelization of nucleicacids.py and fixed lettering --- package/CHANGELOG | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index b284ffddeec..6c266e1cfc4 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -59,12 +59,13 @@ Enhancements * Introduce parallelization API to `AnalysisBase` and to `analysis.rms.RMSD` class (Issue #4158, PR #4304) * Enables parallelization for analysis.gnm.GNMAnalysis (Issue #4672) - * explicitly mark `analysis.pca.PCA` as not parallelizable (Issue #4680) - * enables parallelization for analysis.bat.BAT (Issue #4663) - * enable parallelization for analysis.dihedrals.{Dihedral,Ramachandran,Janin} + * Explicitly mark `analysis.pca.PCA` as not parallelizable (Issue #4680) + * Enables parallelization for analysis.bat.BAT (Issue #4663) + * Enable parallelization for analysis.dihedrals.{Dihedral,Ramachandran,Janin} (Issue #4673) - * enables parallelization for analysis.dssp.dssp.DSSP (Issue #4674) + * Enables parallelization for analysis.dssp.dssp.DSSP (Issue #4674) * Enables parallelization for analysis.hydrogenbonds.hbond_analysis.HydrogenBondAnalysis (Issue #4664) + * Enables parallelization for analysis.nucleicacids.NucPairDist (Issue #4670) * Improve error message for `AtomGroup.unwrap()` when bonds are not present.(Issue #4436, PR #4642) * Add `analysis.DSSP` module for protein secondary structure assignment, based on [pydssp](https://github.com/ShintaroMinami/PyDSSP) * Added a tqdm progress bar for `MDAnalysis.analysis.pca.PCA.transform()` From c722ac4f7f67b0fd0c15d3d61d057e8492cb0541 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 14 Oct 2024 20:27:57 +0200 Subject: [PATCH 11/18] Update nucleicacids.py Addition of mention of modification to self.results.distances --- package/MDAnalysis/analysis/nucleicacids.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/package/MDAnalysis/analysis/nucleicacids.py b/package/MDAnalysis/analysis/nucleicacids.py index 5f5bdd9d46d..a7a9a6c3db5 100644 --- a/package/MDAnalysis/analysis/nucleicacids.py +++ b/package/MDAnalysis/analysis/nucleicacids.py @@ -164,6 +164,8 @@ class NucPairDist(AnalysisBase): Enabled **parallel execution** with the ``multiprocessing`` and ``dask`` backends; use the new method :meth:`get_supported_backends` to see all supported backends. + The 'self._res_array' was modified to 'self.results.distances' to + enable the implementation of the parallelization. """ _analysis_algorithm_is_parallelizable = True From d08347fb719d8a09e3d556b0d21dad224a3f0e6f Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Fri, 18 Oct 2024 20:18:58 +0200 Subject: [PATCH 12/18] Update nucleicacids.py removed _res_dict --- package/MDAnalysis/analysis/nucleicacids.py | 1 - 1 file changed, 1 deletion(-) diff --git a/package/MDAnalysis/analysis/nucleicacids.py b/package/MDAnalysis/analysis/nucleicacids.py index a7a9a6c3db5..54491da216c 100644 --- a/package/MDAnalysis/analysis/nucleicacids.py +++ b/package/MDAnalysis/analysis/nucleicacids.py @@ -177,7 +177,6 @@ def get_supported_backends(cls): _s1: mda.AtomGroup _s2: mda.AtomGroup _n_sel: int - _res_dict: Dict[int, List[float]] def __init__(self, selection1: List[mda.AtomGroup], selection2: List[mda.AtomGroup], From f471ad6460b6f6131b5a16133663ea12b05e8342 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Fri, 18 Oct 2024 21:17:31 +0200 Subject: [PATCH 13/18] Update nucleicacids.py moved back _res_dict --- package/MDAnalysis/analysis/nucleicacids.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/package/MDAnalysis/analysis/nucleicacids.py b/package/MDAnalysis/analysis/nucleicacids.py index 54491da216c..5a6b885728e 100644 --- a/package/MDAnalysis/analysis/nucleicacids.py +++ b/package/MDAnalysis/analysis/nucleicacids.py @@ -177,7 +177,8 @@ def get_supported_backends(cls): _s1: mda.AtomGroup _s2: mda.AtomGroup _n_sel: int - + _res_dict: Dict[int, List[float]] + def __init__(self, selection1: List[mda.AtomGroup], selection2: List[mda.AtomGroup], **kwargs) -> None: From 140366cb1f6da02b628c1bc49850a66155aa392d Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Fri, 18 Oct 2024 21:40:42 +0200 Subject: [PATCH 14/18] Update nucleicacids.py removed _res_sel --- package/MDAnalysis/analysis/nucleicacids.py | 1 - 1 file changed, 1 deletion(-) diff --git a/package/MDAnalysis/analysis/nucleicacids.py b/package/MDAnalysis/analysis/nucleicacids.py index 5a6b885728e..51779d556e1 100644 --- a/package/MDAnalysis/analysis/nucleicacids.py +++ b/package/MDAnalysis/analysis/nucleicacids.py @@ -177,7 +177,6 @@ def get_supported_backends(cls): _s1: mda.AtomGroup _s2: mda.AtomGroup _n_sel: int - _res_dict: Dict[int, List[float]] def __init__(self, selection1: List[mda.AtomGroup], selection2: List[mda.AtomGroup], From 5a119dba12a2e789c8f0c135b7d9e7636ee66d97 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Sun, 20 Oct 2024 01:08:23 +0200 Subject: [PATCH 15/18] Update nucleicacids.py remove unecessary , --- package/MDAnalysis/analysis/nucleicacids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/analysis/nucleicacids.py b/package/MDAnalysis/analysis/nucleicacids.py index 51779d556e1..45915f729c3 100644 --- a/package/MDAnalysis/analysis/nucleicacids.py +++ b/package/MDAnalysis/analysis/nucleicacids.py @@ -172,7 +172,7 @@ class NucPairDist(AnalysisBase): @classmethod def get_supported_backends(cls): - return ('serial', 'multiprocessing', 'dask',) + return ('serial', 'multiprocessing', 'dask') _s1: mda.AtomGroup _s2: mda.AtomGroup From c68cb84ee5c73de8742c284ffa1772c97bbd81c1 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Tue, 22 Oct 2024 17:47:05 -0700 Subject: [PATCH 16/18] Update package/MDAnalysis/analysis/nucleicacids.py --- package/MDAnalysis/analysis/nucleicacids.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/package/MDAnalysis/analysis/nucleicacids.py b/package/MDAnalysis/analysis/nucleicacids.py index 45915f729c3..503645a5511 100644 --- a/package/MDAnalysis/analysis/nucleicacids.py +++ b/package/MDAnalysis/analysis/nucleicacids.py @@ -164,8 +164,6 @@ class NucPairDist(AnalysisBase): Enabled **parallel execution** with the ``multiprocessing`` and ``dask`` backends; use the new method :meth:`get_supported_backends` to see all supported backends. - The 'self._res_array' was modified to 'self.results.distances' to - enable the implementation of the parallelization. """ _analysis_algorithm_is_parallelizable = True From 00cb33574b2cee5b2f8ee56d1b797fa2b9dd0348 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Wed, 27 Nov 2024 00:21:59 +0100 Subject: [PATCH 17/18] Update CHANGELOG moved to 2.9.0 --- package/CHANGELOG | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index c5db46c3df6..5811e911eb8 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -14,13 +14,14 @@ The rules for this file: ------------------------------------------------------------------------------- -??/??/?? IAlibay +??/??/?? IAlibay, talagayev * 2.9.0 Fixes Enhancements + * Enable parallelization for analysis.nucleicacids.NucPairDist (Issue #4670) Changes @@ -98,7 +99,6 @@ Enhancements (Issue #4673) * Enables parallelization for analysis.dssp.dssp.DSSP (Issue #4674) * Enables parallelization for analysis.hydrogenbonds.hbond_analysis.HydrogenBondAnalysis (Issue #4664) - * Enables parallelization for analysis.nucleicacids.NucPairDist (Issue #4670) * Improve error message for `AtomGroup.unwrap()` when bonds are not present.(Issue #4436, PR #4642) * Add `analysis.DSSP` module for protein secondary structure assignment, based on [pydssp](https://github.com/ShintaroMinami/PyDSSP) * Added a tqdm progress bar for `MDAnalysis.analysis.pca.PCA.transform()` From 047bd8cd89811b36bb2b1d851f052d7f4054872a Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Fri, 13 Dec 2024 09:03:11 -0700 Subject: [PATCH 18/18] Apply suggestions from code review --- package/MDAnalysis/analysis/nucleicacids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/analysis/nucleicacids.py b/package/MDAnalysis/analysis/nucleicacids.py index d6b45583c58..b0f5013e799 100644 --- a/package/MDAnalysis/analysis/nucleicacids.py +++ b/package/MDAnalysis/analysis/nucleicacids.py @@ -160,7 +160,7 @@ class NucPairDist(AnalysisBase): Added static method :attr:`select_strand_atoms` as a helper for selecting atom pairs for distance analysis. - .. versionchanged:: 2.8.0 + .. versionchanged:: 2.9.0 Enabled **parallel execution** with the ``multiprocessing`` and ``dask`` backends; use the new method :meth:`get_supported_backends` to see all supported backends.