diff --git a/package/MDAnalysis/coordinates/TPR.py b/package/MDAnalysis/coordinates/TPR.py new file mode 100644 index 0000000000..ba9db02299 --- /dev/null +++ b/package/MDAnalysis/coordinates/TPR.py @@ -0,0 +1,77 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 + +from . import base +from ..lib import util +from .timestep import Timestep +import MDAnalysis.topology.tpr.utils as tpr_utils +import MDAnalysis.topology.tpr.setting as S + + +import numpy as np + + +class TPRReader(base.SingleFrameReaderBase): + # TODO: reduce duplication with `TPRparser`; + # we could share some state for the position + # in the binary file to avoid re-reading topology + # or perhaps combine the topology and coordinate reading + # with some inheritance shenanigans? + format = "TPR" + units = {"length": "nm", "velocity": "nm/ps"} + _Timestep = Timestep + + def _read_first_frame(self): + # Read header/move over topology + # TODO: reduce duplication with TPRparser perhaps... + with util.openany(self.filename, mode='rb') as infile: + tprf = infile.read() + data = tpr_utils.TPXUnpacker(tprf) + try: + th = tpr_utils.read_tpxheader(data) # tpxheader + except (EOFError, ValueError): + msg = f"{self.filename}: Invalid tpr file or cannot be recognized" + logger.critical(msg) + raise IOError(msg) + + self.ts = ts = self._Timestep(th.natoms, **self._ts_kwargs) + self.n_atoms = th.natoms + + # Starting with gromacs 2020 (tpx version 119), the body of the file + # is encoded differently. We change the unpacker accordingly. + if th.fver >= S.tpxv_AddSizeField and th.fgen >= 27: + actual_body_size = len(data.get_buffer()) - data.get_position() + if actual_body_size == 4 * th.sizeOfTprBody: + # See issue #2428. + msg = ( + "TPR files produced with beta versions of gromacs 2020 " + "are not supported." + ) + logger.critical(msg) + raise IOError(msg) + data = tpr_utils.TPXUnpacker2020.from_unpacker(data) + + state_ngtc = th.ngtc # done init_state() in src/gmxlib/tpxio.c + if th.bBox: + tpr_utils.extract_box_info(data, th.fver) + + if state_ngtc > 0: + if th.fver < 69: # redundancy due to different versions + tpr_utils.ndo_real(data, state_ngtc) + tpr_utils.ndo_real(data, state_ngtc) # relevant to Berendsen tcoupl_lambda + + if th.bTop: + tpr_top = tpr_utils.do_mtop(data, th.fver, + tpr_resid_from_one=True) + else: + msg = f"{self.filename}: No topology found in tpr file" + logger.critical(msg) + raise IOError(msg) + + if th.bX: + self.ts._pos = np.asarray(tpr_utils.ndo_rvec(data, th.natoms), + dtype=np.float32) + if th.bV: + self.ts._velocities = np.asarray(tpr_utils.ndo_rvec(data, th.natoms), + dtype=np.float32) + self.ts.has_velocities = True diff --git a/package/MDAnalysis/coordinates/__init__.py b/package/MDAnalysis/coordinates/__init__.py index 602621e5ad..a8ec94526d 100644 --- a/package/MDAnalysis/coordinates/__init__.py +++ b/package/MDAnalysis/coordinates/__init__.py @@ -776,6 +776,7 @@ class can choose an appropriate reader automatically. from . import PDB from . import PDBQT from . import PQR +from . import TPR from . import TRC from . import TRJ from . import TRR diff --git a/package/MDAnalysis/topology/tpr/utils.py b/package/MDAnalysis/topology/tpr/utils.py index 98b4a4bb84..f8bb6a9fbb 100644 --- a/package/MDAnalysis/topology/tpr/utils.py +++ b/package/MDAnalysis/topology/tpr/utils.py @@ -423,6 +423,48 @@ def do_mtop(data, fver, tpr_resid_from_one=False): elements = Elements(np.array(elements, dtype=object)) top.add_TopologyAttr(elements) + # NOTE: the tpr striding code below serves the + # purpose of placing us in a suitable "seek" position + # in the binary file such that we are well placed + # for reading coords and velocities if they are present + # the order of operations is based on an analysis of + # the C++ code in do_mtop() function in the GROMACS + # source at: + # src/gromacs/fileio/tpxio.cpp + # TODO: expand tpx version support for striding to + # the coordinates + if fver >= 129: + # TODO: the following value is important, and not sure + # how to access programmatically yet... + # from GMX source code: + # api/legacy/include/gromacs/topology/topology_enums.h + # worst case scenario we hard code it based on + # tpx/GMX version? + SimulationAtomGroupType_size = 10 + n_atoms = data.unpack_int() + interm = data.unpack_uchar() + ngrid = data.unpack_int() + grid_spacing = data.unpack_int() + n_elements = grid_spacing ** 2 + for i in range(ngrid): + for j in range(n_elements): + ndo_real(data, 4) + for i in range(SimulationAtomGroupType_size): + group_size = data.unpack_int() + ndo_int(data, group_size) + n_group_names = data.unpack_int() + for i in range(n_group_names): + data.unpack_int() + for i in range(SimulationAtomGroupType_size): + n_grp_numbers = data.unpack_int() + if n_grp_numbers != 0: + for i in range(n_grp_numbers): + data.unpack_uchar() + im_excl_grp_size = data.unpack_int() + ndo_int(data, im_excl_grp_size) + # TODO: why is this needed? + data.unpack_int() + return top diff --git a/testsuite/MDAnalysisTests/coordinates/test_tpr.py b/testsuite/MDAnalysisTests/coordinates/test_tpr.py new file mode 100644 index 0000000000..231f888270 --- /dev/null +++ b/testsuite/MDAnalysisTests/coordinates/test_tpr.py @@ -0,0 +1,77 @@ +from MDAnalysisTests.datafiles import (TPR2024_4_bonded, + TPR_EXTRA_2024_4, + TPR2024_4, + TPR2024, + TPR2023, + TPR_xvf_2024_4) +import MDAnalysis as mda + + +import pytest +import numpy as np +from numpy.testing import assert_allclose, assert_equal + + +@pytest.mark.parametrize("tpr_file, exp_first_atom, exp_last_atom, exp_shape, exp_vel_first_atom, exp_vel_last_atom", [ + (TPR2024_4_bonded, # tpx 134 + [4.446, 4.659, 2.384], + [4.446, 4.659, 2.384], + (14, 3), + np.zeros(3), + np.zeros(3), + ), + # same coordinates, different shape + (TPR_EXTRA_2024_4, # tpx 134 + [4.446, 4.659, 2.384], + [4.446, 4.659, 2.384], + (18, 3), + np.zeros(3), + np.zeros(3), + ), + # different coordinates and different shape + (TPR2024_4, # tpx 134 + [3.25000e-01, 1.00400e+00, 1.03800e+00], + [-2.56000e-01, 1.37300e+00, 3.59800e+00], + (2263, 3), + np.zeros(3), + np.zeros(3), + ), + # nonzero velocities + (TPR_xvf_2024_4, # tpx 134 + [3.19900e+00, 1.62970e+00, 1.54480e+00], + [3.39350e+00, 3.49420e+00, 3.02400e+00], + (19385, 3), + [-2.06687e-01, 2.66782e-01, -1.05640e-01], + [-3.38010e-02, -3.22064e-01, -1.98638e-01], + ), + (TPR2024, # tpx 133 + [3.25000e-01, 1.00400e+00, 1.03800e+00], + [-2.56000e-01, 1.37300e+00, 3.59800e+00], + (2263, 3), + np.zeros(3), + np.zeros(3), + ), + (TPR2023, # tpx 129 + [3.25000e-01, 1.00400e+00, 1.03800e+00], + [-2.56000e-01, 1.37300e+00, 3.59800e+00], + (2263, 3), + np.zeros(3), + np.zeros(3), + ), +]) +def test_basic_read_tpr(tpr_file, + exp_first_atom, + exp_last_atom, + exp_shape, + exp_vel_first_atom, + exp_vel_last_atom): + # verify basic ability to read positions and + # velocities from GMX .tpr files + # expected values are from gmx dump + u = mda.Universe(tpr_file) + assert_allclose(u.atoms.positions[0, ...], exp_first_atom) + assert_allclose(u.atoms.positions[-1, ...], exp_last_atom) + assert_equal(u.atoms.positions.shape, exp_shape) + assert_allclose(u.atoms.velocities[0, ...], exp_vel_first_atom) + assert_allclose(u.atoms.velocities[-1, ...], exp_vel_last_atom) + assert_equal(u.atoms.velocities.shape, exp_shape) diff --git a/testsuite/MDAnalysisTests/data/cobrotoxin_2024_4.tpr b/testsuite/MDAnalysisTests/data/cobrotoxin_2024_4.tpr new file mode 100644 index 0000000000..63925f46e6 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/cobrotoxin_2024_4.tpr differ diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index 3d3d7d9312..d8ec61023a 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -375,6 +375,7 @@ TNG_traj_vels_forces = (_data_ref / 'argon_npt_compressed_vels_forces.tng').as_posix() PDB_xvf = (_data_ref / 'cobrotoxin.pdb').as_posix() TPR_xvf = (_data_ref / 'cobrotoxin.tpr').as_posix() +TPR_xvf_2024_4 = (_data_ref / 'cobrotoxin_2024_4.tpr').as_posix() TRR_xvf = (_data_ref / 'cobrotoxin.trr').as_posix() H5MD_xvf = (_data_ref / 'cobrotoxin.h5md').as_posix() H5MD_energy = (_data_ref / 'cu.h5md').as_posix()