Skip to content

Commit

Permalink
'MDAnalysis.analysis.nucleicacids' parallelization (#4727)
Browse files Browse the repository at this point in the history
- Fixes #4670 
- Parallelization of the backend support to the class `NucPairDist` in nucleicacids.py
- Addition of parallelization tests in test_nucleicacids.py and fixtures in conftest.py
- Updated Changelog
  • Loading branch information
talagayev authored Dec 14, 2024
1 parent c6bfa09 commit 7f686ca
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 17 deletions.
11 changes: 6 additions & 5 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ The rules for this file:


-------------------------------------------------------------------------------
??/??/?? IAlibay, ChiahsinChu, RMeli, tanishy7777
??/??/?? IAlibay, ChiahsinChu, RMeli, tanishy7777, talagayev

* 2.9.0

Expand All @@ -25,6 +25,7 @@ Fixes
the function to prevent shared state. (Issue #4655)

Enhancements
* Enable parallelization for analysis.nucleicacids.NucPairDist (Issue #4670)
* Add check and warning for empty (all zero) coordinates in RDKit converter (PR #4824)
* Added `precision` for XYZWriter (Issue #4775, PR #4771)

Expand Down Expand Up @@ -98,11 +99,11 @@ Enhancements
* Introduce parallelization API to `AnalysisBase` and to `analysis.rms.RMSD` class
(Issue #4158, PR #4304)
* Enables parallelization for analysis.gnm.GNMAnalysis (Issue #4672)
* explicitly mark `analysis.pca.PCA` as not parallelizable (Issue #4680)
* enables parallelization for analysis.bat.BAT (Issue #4663)
* enable parallelization for analysis.dihedrals.{Dihedral,Ramachandran,Janin}
* Explicitly mark `analysis.pca.PCA` as not parallelizable (Issue #4680)
* Enables parallelization for analysis.bat.BAT (Issue #4663)
* Enable parallelization for analysis.dihedrals.{Dihedral,Ramachandran,Janin}
(Issue #4673)
* enables parallelization for analysis.dssp.dssp.DSSP (Issue #4674)
* Enables parallelization for analysis.dssp.dssp.DSSP (Issue #4674)
* Enables parallelization for analysis.hydrogenbonds.hbond_analysis.HydrogenBondAnalysis (Issue #4664)
* Improve error message for `AtomGroup.unwrap()` when bonds are not present.(Issue #4436, PR #4642)
* Add `analysis.DSSP` module for protein secondary structure assignment, based on [pydssp](https://github.com/ShintaroMinami/PyDSSP)
Expand Down
26 changes: 20 additions & 6 deletions package/MDAnalysis/analysis/nucleicacids.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@

import MDAnalysis as mda
from .distances import calc_bonds
from .base import AnalysisBase, Results
from .base import AnalysisBase, ResultsGroup
from MDAnalysis.core.groups import Residue, ResidueGroup


Expand Down Expand Up @@ -159,13 +159,23 @@ class NucPairDist(AnalysisBase):
.. versionchanged:: 2.7.0
Added static method :attr:`select_strand_atoms` as a
helper for selecting atom pairs for distance analysis.
.. versionchanged:: 2.9.0
Enabled **parallel execution** with the ``multiprocessing`` and ``dask``
backends; use the new method :meth:`get_supported_backends` to see all
supported backends.
"""

_analysis_algorithm_is_parallelizable = True

@classmethod
def get_supported_backends(cls):
return ('serial', 'multiprocessing', 'dask')

_s1: mda.AtomGroup
_s2: mda.AtomGroup
_n_sel: int
_res_dict: Dict[int, List[float]]


def __init__(self, selection1: List[mda.AtomGroup],
selection2: List[mda.AtomGroup],
**kwargs) -> None:
Expand Down Expand Up @@ -276,7 +286,7 @@ def select_strand_atoms(
return (sel1, sel2)

def _prepare(self) -> None:
self._res_array: np.ndarray = np.zeros(
self.results.distances: np.ndarray = np.zeros(
[self.n_frames, self._n_sel]
)

Expand All @@ -285,13 +295,17 @@ def _single_frame(self) -> None:
self._s1.positions, self._s2.positions
)

self._res_array[self._frame_index, :] = dist
self.results.distances[self._frame_index, :] = dist

def _conclude(self) -> None:
self.results['distances'] = self._res_array
self.results['pair_distances'] = self.results['distances']
# TODO: remove pair_distances in 3.0.0

def _get_aggregator(self):
return ResultsGroup(lookup={
'distances': ResultsGroup.ndarray_vstack,
}
)

class WatsonCrickDist(NucPairDist):
r"""
Expand Down
8 changes: 8 additions & 0 deletions testsuite/MDAnalysisTests/analysis/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import (
HydrogenBondAnalysis,
)
from MDAnalysis.analysis.nucleicacids import NucPairDist
from MDAnalysis.lib.util import is_installed


Expand Down Expand Up @@ -141,3 +142,10 @@ def client_DSSP(request):
@pytest.fixture(scope='module', params=params_for_cls(HydrogenBondAnalysis))
def client_HydrogenBondAnalysis(request):
return request.param


# MDAnalysis.analysis.nucleicacids

@pytest.fixture(scope="module", params=params_for_cls(NucPairDist))
def client_NucPairDist(request):
return request.param
12 changes: 6 additions & 6 deletions testsuite/MDAnalysisTests/analysis/test_nucleicacids.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,12 @@ def test_empty_ag_error(strand):


@pytest.fixture(scope='module')
def wc_rna(strand):
def wc_rna(strand, client_NucPairDist):
strand1 = ResidueGroup([strand.residues[0], strand.residues[21]])
strand2 = ResidueGroup([strand.residues[1], strand.residues[22]])

WC = WatsonCrickDist(strand1, strand2)
WC.run()
WC.run(**client_NucPairDist)
return WC


Expand Down Expand Up @@ -114,23 +114,23 @@ def test_wc_dis_results_keyerrs(wc_rna, key):
wc_rna.results[key]


def test_minor_dist(strand):
def test_minor_dist(strand, client_NucPairDist):
strand1 = ResidueGroup([strand.residues[2], strand.residues[19]])
strand2 = ResidueGroup([strand.residues[16], strand.residues[4]])

MI = MinorPairDist(strand1, strand2)
MI.run()
MI.run(**client_NucPairDist)

assert MI.results.distances[0, 0] == approx(15.06506, rel=1e-3)
assert MI.results.distances[0, 1] == approx(3.219116, rel=1e-3)


def test_major_dist(strand):
def test_major_dist(strand, client_NucPairDist):
strand1 = ResidueGroup([strand.residues[1], strand.residues[4]])
strand2 = ResidueGroup([strand.residues[11], strand.residues[8]])

MA = MajorPairDist(strand1, strand2)
MA.run()
MA.run(**client_NucPairDist)

assert MA.results.distances[0, 0] == approx(26.884272, rel=1e-3)
assert MA.results.distances[0, 1] == approx(13.578535, rel=1e-3)

0 comments on commit 7f686ca

Please sign in to comment.