Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP, ENH: add TPR position and velocity read support #4873

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions package/MDAnalysis/coordinates/TPR.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

header needs expanding to full version


from . import base
from ..lib import util
from .timestep import Timestep
import MDAnalysis.topology.tpr.utils as tpr_utils
import MDAnalysis.topology.tpr.setting as S


import numpy as np


class TPRReader(base.SingleFrameReaderBase):
# TODO: reduce duplication with `TPRparser`;
# we could share some state for the position
# in the binary file to avoid re-reading topology
# or perhaps combine the topology and coordinate reading
# with some inheritance shenanigans?
format = "TPR"
units = {"length": "nm", "velocity": "nm/ps"}
_Timestep = Timestep

def _read_first_frame(self):
# Read header/move over topology
# TODO: reduce duplication with TPRparser perhaps...
with util.openany(self.filename, mode='rb') as infile:
tprf = infile.read()
data = tpr_utils.TPXUnpacker(tprf)
try:
th = tpr_utils.read_tpxheader(data) # tpxheader
except (EOFError, ValueError):
msg = f"{self.filename}: Invalid tpr file or cannot be recognized"
logger.critical(msg)
raise IOError(msg)

self.ts = ts = self._Timestep(th.natoms, **self._ts_kwargs)
self.n_atoms = th.natoms

# Starting with gromacs 2020 (tpx version 119), the body of the file
# is encoded differently. We change the unpacker accordingly.
if th.fver >= S.tpxv_AddSizeField and th.fgen >= 27:
actual_body_size = len(data.get_buffer()) - data.get_position()
if actual_body_size == 4 * th.sizeOfTprBody:
# See issue #2428.
msg = (
"TPR files produced with beta versions of gromacs 2020 "
"are not supported."
)
logger.critical(msg)
raise IOError(msg)
data = tpr_utils.TPXUnpacker2020.from_unpacker(data)

state_ngtc = th.ngtc # done init_state() in src/gmxlib/tpxio.c
if th.bBox:
tpr_utils.extract_box_info(data, th.fver)

if state_ngtc > 0:
if th.fver < 69: # redundancy due to different versions
tpr_utils.ndo_real(data, state_ngtc)
tpr_utils.ndo_real(data, state_ngtc) # relevant to Berendsen tcoupl_lambda

if th.bTop:
tpr_top = tpr_utils.do_mtop(data, th.fver,
tpr_resid_from_one=True)
else:
msg = f"{self.filename}: No topology found in tpr file"
logger.critical(msg)
raise IOError(msg)

if th.bX:
self.ts._pos = np.asarray(tpr_utils.ndo_rvec(data, th.natoms),
dtype=np.float32)
tylerjereddy marked this conversation as resolved.
Show resolved Hide resolved
if th.bV:
self.ts._velocities = np.asarray(tpr_utils.ndo_rvec(data, th.natoms),
dtype=np.float32)
self.ts.has_velocities = True
1 change: 1 addition & 0 deletions package/MDAnalysis/coordinates/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,7 @@ class can choose an appropriate reader automatically.
from . import PDB
from . import PDBQT
from . import PQR
from . import TPR
from . import TRC
from . import TRJ
from . import TRR
Expand Down
42 changes: 42 additions & 0 deletions package/MDAnalysis/topology/tpr/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,48 @@ def do_mtop(data, fver, tpr_resid_from_one=False):
elements = Elements(np.array(elements, dtype=object))
top.add_TopologyAttr(elements)

# NOTE: the tpr striding code below serves the
# purpose of placing us in a suitable "seek" position
# in the binary file such that we are well placed
# for reading coords and velocities if they are present
# the order of operations is based on an analysis of
# the C++ code in do_mtop() function in the GROMACS
# source at:
# src/gromacs/fileio/tpxio.cpp
# TODO: expand tpx version support for striding to
# the coordinates
if fver >= 129:
# TODO: the following value is important, and not sure
# how to access programmatically yet...
# from GMX source code:
# api/legacy/include/gromacs/topology/topology_enums.h
# worst case scenario we hard code it based on
# tpx/GMX version?
SimulationAtomGroupType_size = 10
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On latest GMX main branch, this is the data structure in question:

enum class SimulationAtomGroupType : int
{
    TemperatureCoupling,
    EnergyOutput,
    Acceleration,
    Freeze,
    User1,
    User2,
    MassCenterVelocityRemoval,
    CompressedPositionOutput,
    OrientationRestraintsFit,
    QuantumMechanics,
    Count
};

n_atoms = data.unpack_int()
interm = data.unpack_uchar()
ngrid = data.unpack_int()
grid_spacing = data.unpack_int()
n_elements = grid_spacing ** 2
for i in range(ngrid):
for j in range(n_elements):
ndo_real(data, 4)
for i in range(SimulationAtomGroupType_size):
group_size = data.unpack_int()
ndo_int(data, group_size)
n_group_names = data.unpack_int()
for i in range(n_group_names):
data.unpack_int()
for i in range(SimulationAtomGroupType_size):
n_grp_numbers = data.unpack_int()
if n_grp_numbers != 0:
for i in range(n_grp_numbers):
data.unpack_uchar()
im_excl_grp_size = data.unpack_int()
ndo_int(data, im_excl_grp_size)
# TODO: why is this needed?
data.unpack_int()
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The above can probably be cleaned up a bit.. probably has some unused vars, etc.

It was produced by careful printf-ing the GMX source as you might expect. Expanding to support other tpx versions may not be too bad, although this was fairly time consuming to draft.


return top


Expand Down
77 changes: 77 additions & 0 deletions testsuite/MDAnalysisTests/coordinates/test_tpr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from MDAnalysisTests.datafiles import (TPR2024_4_bonded,
TPR_EXTRA_2024_4,
TPR2024_4,
TPR2024,
TPR2023,
TPR_xvf_2024_4)
import MDAnalysis as mda


import pytest
import numpy as np
from numpy.testing import assert_allclose, assert_equal


@pytest.mark.parametrize("tpr_file, exp_first_atom, exp_last_atom, exp_shape, exp_vel_first_atom, exp_vel_last_atom", [
(TPR2024_4_bonded, # tpx 134
[4.446, 4.659, 2.384],
[4.446, 4.659, 2.384],
(14, 3),
np.zeros(3),
np.zeros(3),
),
# same coordinates, different shape
(TPR_EXTRA_2024_4, # tpx 134
[4.446, 4.659, 2.384],
[4.446, 4.659, 2.384],
(18, 3),
np.zeros(3),
np.zeros(3),
),
# different coordinates and different shape
(TPR2024_4, # tpx 134
[3.25000e-01, 1.00400e+00, 1.03800e+00],
[-2.56000e-01, 1.37300e+00, 3.59800e+00],
(2263, 3),
np.zeros(3),
np.zeros(3),
),
# nonzero velocities
(TPR_xvf_2024_4, # tpx 134
[3.19900e+00, 1.62970e+00, 1.54480e+00],
[3.39350e+00, 3.49420e+00, 3.02400e+00],
(19385, 3),
[-2.06687e-01, 2.66782e-01, -1.05640e-01],
[-3.38010e-02, -3.22064e-01, -1.98638e-01],
),
(TPR2024, # tpx 133
[3.25000e-01, 1.00400e+00, 1.03800e+00],
[-2.56000e-01, 1.37300e+00, 3.59800e+00],
(2263, 3),
np.zeros(3),
np.zeros(3),
),
(TPR2023, # tpx 129
[3.25000e-01, 1.00400e+00, 1.03800e+00],
[-2.56000e-01, 1.37300e+00, 3.59800e+00],
(2263, 3),
np.zeros(3),
np.zeros(3),
),
])
def test_basic_read_tpr(tpr_file,
exp_first_atom,
exp_last_atom,
exp_shape,
exp_vel_first_atom,
exp_vel_last_atom):
# verify basic ability to read positions and
# velocities from GMX .tpr files
# expected values are from gmx dump
u = mda.Universe(tpr_file)
assert_allclose(u.atoms.positions[0, ...], exp_first_atom)
assert_allclose(u.atoms.positions[-1, ...], exp_last_atom)
assert_equal(u.atoms.positions.shape, exp_shape)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we have specific conventions for coordinate reader testing, but for now this is where I've started. Should be easy to expand to include velocities as well.

Cases with only positions and no velocities (etc.) may also be sensible to add, on top of older tpx files...

assert_allclose(u.atoms.velocities[0, ...], exp_vel_first_atom)
assert_allclose(u.atoms.velocities[-1, ...], exp_vel_last_atom)
assert_equal(u.atoms.velocities.shape, exp_shape)
Binary file not shown.
1 change: 1 addition & 0 deletions testsuite/MDAnalysisTests/datafiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,7 @@
TNG_traj_vels_forces = (_data_ref / 'argon_npt_compressed_vels_forces.tng').as_posix()
PDB_xvf = (_data_ref / 'cobrotoxin.pdb').as_posix()
TPR_xvf = (_data_ref / 'cobrotoxin.tpr').as_posix()
TPR_xvf_2024_4 = (_data_ref / 'cobrotoxin_2024_4.tpr').as_posix()
TRR_xvf = (_data_ref / 'cobrotoxin.trr').as_posix()
H5MD_xvf = (_data_ref / 'cobrotoxin.h5md').as_posix()
H5MD_energy = (_data_ref / 'cu.h5md').as_posix()
Expand Down
Loading