diff --git a/package/AUTHORS b/package/AUTHORS index db72bf2e286..94a3de28fd7 100644 --- a/package/AUTHORS +++ b/package/AUTHORS @@ -241,6 +241,7 @@ Chronological list of authors - Leon Wehrhan - Valerij Talagayev - Kurt McKee + - Fabian Zills diff --git a/package/CHANGELOG b/package/CHANGELOG index 59afa87003b..ee9e88cf86a 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -17,11 +17,13 @@ The rules for this file: ??/??/?? IAlibay, HeetVekariya, marinegor, lilyminium, RMeli, ljwoods2, aditya292002, pstaerk, PicoCentauri, BFedder, tyler.je.reddy, SampurnaM, leonwehrhan, kainszs, orionarcher, - yuxuanzhuang + yuxuanzhuang, PythonFZ * 2.8.0 Fixes + * Do not raise an Error reading H5MD files with datasets like + `observables//` (part of Issue #4598, PR #4615) * Fix failure in double-serialization of TextIOPicklable file reader. (Issue #3723, PR #3722) * Fix failure to preserve modification of coordinates after serialization, diff --git a/package/MDAnalysis/coordinates/H5MD.py b/package/MDAnalysis/coordinates/H5MD.py index 1062240b48c..48283113f43 100644 --- a/package/MDAnalysis/coordinates/H5MD.py +++ b/package/MDAnalysis/coordinates/H5MD.py @@ -207,6 +207,7 @@ :members: """ +import warnings import numpy as np import MDAnalysis as mda @@ -673,10 +674,27 @@ def _read_frame(self, frame): def _copy_to_data(self): """assigns values to keys in data dictionary""" - if 'observables' in self._file: - for key in self._file['observables'].keys(): - self.ts.data[key] = self._file['observables'][key][ - 'value'][self._frame] + if "observables" in self._file: + for key in self._file["observables"].keys(): + try: + # if has value as subkey read directly into data + if "value" in self._file["observables"][key]: + self.ts.data[key] = self._file["observables"][key][ + "value" + ][self._frame] + # if value is not a subkey, read dict of subkeys + else: + for subkey in self._file["observables"][key].keys(): + self.ts.data[key + "/" + subkey] = self._file[ + "observables" + ][key][subkey]["value"][self._frame] + except (AttributeError, KeyError): + # KeyError: subkey is a dataset, but does not have 'value' + # AttributeError: key is a group, not a dataset + raise ValueError( + f"Could not read {key} from observables group. " + "Possibly not a legal H5MD observable specification." + ) # pulls 'time' and 'step' out of first available parent group for name, value in self._has.items(): diff --git a/testsuite/MDAnalysisTests/coordinates/test_h5md.py b/testsuite/MDAnalysisTests/coordinates/test_h5md.py index 7fcd9966842..d3fec51f818 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_h5md.py +++ b/testsuite/MDAnalysisTests/coordinates/test_h5md.py @@ -5,16 +5,25 @@ import os import MDAnalysis as mda from MDAnalysis.coordinates.H5MD import HAS_H5PY + if HAS_H5PY: import h5py + from MDAnalysis.coordinates.H5MD import H5MDReader from MDAnalysis.exceptions import NoDataError -from MDAnalysisTests import make_Universe -from MDAnalysisTests.datafiles import (H5MD_xvf, TPR_xvf, TRR_xvf, - COORDINATES_TOPOLOGY, - COORDINATES_H5MD) -from MDAnalysisTests.coordinates.base import (MultiframeReaderTest, - BaseReference, BaseWriterTest, - assert_timestep_almost_equal) +from MDAnalysisTests.datafiles import ( + H5MD_xvf, + TPR_xvf, + TRR_xvf, + COORDINATES_TOPOLOGY, + COORDINATES_H5MD, + H5MD_energy, + H5MD_malformed, +) +from MDAnalysisTests.coordinates.base import ( + MultiframeReaderTest, + BaseReference, + BaseWriterTest, +) @pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") @@ -894,3 +903,29 @@ def test_writer_no_h5py(self, Writer, outfile): u.atoms.n_atoms) as W: for ts in u.trajectory: W.write(universe) + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +class TestH5MDReaderWithObservables(object): + """Read H5MD file with 'observables/atoms/energy'.""" + + prec = 3 + ext = 'h5md' + + def test_read_h5md_issue4598(self): + """Read a H5MD file with observables. + + The reader will ignore the 'observables/atoms/energy'. + """ + + u = mda.Universe.empty(n_atoms=108, trajectory=True) + reader = H5MDReader(H5MD_energy) + u.trajectory = reader + for ts in u.trajectory: + assert "atoms/energy" in ts.data + + def test_read_h5md_malformed(self): + """Try reading an H5MD file with malformed observables.""" + + with pytest.raises(ValueError): + H5MDReader(H5MD_malformed) diff --git a/testsuite/MDAnalysisTests/data/cu.h5md b/testsuite/MDAnalysisTests/data/cu.h5md new file mode 100644 index 00000000000..1669bf03eb4 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/cu.h5md differ diff --git a/testsuite/MDAnalysisTests/data/cu_malformed.h5md b/testsuite/MDAnalysisTests/data/cu_malformed.h5md new file mode 100644 index 00000000000..f5105f20938 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/cu_malformed.h5md differ diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index 0647d9ea02b..ef0bea4036a 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -77,6 +77,8 @@ "GRO_sameresid_diffresname", # Case where two residues share the same resid "PDB_xvf", "TPR_xvf", "TRR_xvf", # Gromacs coords/veloc/forces (cobrotoxin, OPLS-AA, Gromacs 4.5.5 tpr) "H5MD_xvf", # TPR_xvf + TRR_xvf converted to h5md format + "H5MD_energy", # H5MD trajectory with observables/atoms/energy + "H5MD_malformed", # H5MD trajectory with malformed observable group "XVG_BZ2", # Compressed xvg file about cobrotoxin "PDB_xlserial", "TPR400", "TPR402", "TPR403", "TPR404", "TPR405", "TPR406", "TPR407", @@ -373,6 +375,8 @@ TPR_xvf = (_data_ref / 'cobrotoxin.tpr').as_posix() TRR_xvf = (_data_ref / 'cobrotoxin.trr').as_posix() H5MD_xvf = (_data_ref / 'cobrotoxin.h5md').as_posix() +H5MD_energy = (_data_ref / 'cu.h5md').as_posix() +H5MD_malformed = (_data_ref / 'cu_malformed.h5md').as_posix() XVG_BZ2 = (_data_ref / 'cobrotoxin_protein_forces.xvg.bz2').as_posix() XPDB_small = (_data_ref / '5digitResid.pdb').as_posix()