From c1865e6ce77a4e63220e41ae04eb1449ffe6cabb Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Thu, 20 Jun 2024 21:53:56 +1000 Subject: [PATCH 01/29] Start of the parser and associated tests --- mda_openbabel_converter/OpenBabelParser.py | 105 +++++++++++++++++- .../tests/test_openbabel_parser.py | 0 2 files changed, 99 insertions(+), 6 deletions(-) create mode 100644 mda_openbabel_converter/tests/test_openbabel_parser.py diff --git a/mda_openbabel_converter/OpenBabelParser.py b/mda_openbabel_converter/OpenBabelParser.py index ab345c3..84a621d 100644 --- a/mda_openbabel_converter/OpenBabelParser.py +++ b/mda_openbabel_converter/OpenBabelParser.py @@ -4,6 +4,15 @@ import MDAnalysis as mda from MDAnalysis.topology.base import TopologyReaderBase +from MDAnalysis.core.topology import Topology +import warnings + +try: + import openbabel as OB + from openbabel import OBMol + from openbabel import OBElementTable +except ImportError: + print("Cannot find openbabel, install with 'pip install openbabel==2.4.0'") class OpenBabelParser(): """ @@ -11,19 +20,103 @@ class OpenBabelParser(): MDAnalysis Topology or adds it to a pre-existing Topology. This parser will does not work in the reverse direction. """ + + @staticmethod def _format_hint(thing): """ Base function to check if the parser can actually parse this “thing” - (i.e., is it a valid OpenBabel OBMol with no missing information, that - can be converted to a MDAnalysis Topology?) + (i.e., is it a valid OpenBabel OBMol that can be converted to a + MDAnalysis Topology?) """ - pass + try: + import openbabel as OB + except ImportError: + return False + else: + return isinstance(thing, OB.OBMol) - def parse(self, **kwargs): + def parse(self, filename: OBMol, **kwargs): """ Accepts an OpenBabel OBMol and returns a MDAnalysis Topology. Will need to extract the number of atoms, number of residues, number of segments, - atom_residue index, residue_segment index and other attributes from the - OBMol to initialise a new Topology. + atom_residue index, residue_segment index and all of the atom's + relevant attributes from the OBMol to initialise a new Topology. """ + format = 'OPENBABEL' + + self.atoms = [] + self.n_atoms = 0 + self.residues = [] + self.n_residues = 0 + self.segments = [] + self.n_segments = 0 + + obmol = filename + + # Atoms + names = [] + chiralities = [] + resnums = [] + resnames = [] + elements = [] + masses = [] + charges = [] + aromatics = [] + ids = [] + atomtypes = [] + segids = [] + altlocs = [] + chainids = [] + icodes = [] + occupancies = [] + tempfactors = [] # B factor; not supported by OB + + if obmol.GetFirstAtom().equals(None): + return Topology(n_atoms = 0, + n_res = 0, + n_seg = 0, + attrs=None, + atom_resindex=None, + residue_segindex=None) + + for atom in OB.OBMolAtomIter(obmol): + # need to add handling incase attributes are invalid or null in OBMol + # names.append(atom.GetType()) #char -> nothing for name in OBMol? Is name required to make MDA Atom? + atomtypes.append(atom.GetType()) #char + ids.append(atom.GetIdx()) #int + masses.append(atom.GetExactMass()) #double -> what about atom.GetAtomicMass()??; which is better? + if not atom.GetExactMass().equals(atom.GetAtomicMass()): + warnings.warn( + f"Exact mass and atomic mass of atom (ID: {atom.GetIdx}) + not equal. Be aware of isotopes, which are NOT supported + by MDAnalysis.") + charges.append(atom.GetPartialCharge()) #int (or use atom.GetFormalCharge()?) + + # convert atomic number to element + elements.append(OBElementTable.GetSymbol(atom.GetAtomicNumber())) #char + + resid = atom.GetResidue() # null if no residue + if resid.equals(None): + warnings.warn( + f"No residue is defined for atom (ID: {atom.GetIdx}). + Please set with 'SETTTING METHOD FOR OBMOL'" # TO DO + ) + # if residue null, will need to assign w MDAnalysis... + else: + resnums.append(resid.GetNum()) # TO DO: check if start at 0 or 1 + resnames.append(resid.GetName()) + + # don't need to check null case, as know assigned to OBMol we're currently parsing + # but, NEED TO HANDLE ADDING MULTIPLE SEGIDS/OBMOLS WHEN CONVERTING TO UNIVERSE AND ADDING TOGETHER... (should be ok, check w tests) + segids.append(atom.GetParent()) + + chiralities.append(atom.IsChiral()) #boolean + aromatics.append(atom.IsAromatic()) #boolean + + + + + + + pass \ No newline at end of file diff --git a/mda_openbabel_converter/tests/test_openbabel_parser.py b/mda_openbabel_converter/tests/test_openbabel_parser.py new file mode 100644 index 0000000..e69de29 From fca21ac528ba2a3ff759bf5ae765d4fe3825c7e8 Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Mon, 24 Jun 2024 10:08:09 +1000 Subject: [PATCH 02/29] for help with using pytest --- mda_openbabel_converter/OpenBabelParser.py | 39 ++-- mda_openbabel_converter/__init__.py | 4 +- mda_openbabel_converter/tests/conftest.py | 2 +- .../tests/test_openbabel_parser.py | 168 ++++++++++++++++++ pyproject.toml | 1 + 5 files changed, 189 insertions(+), 25 deletions(-) diff --git a/mda_openbabel_converter/OpenBabelParser.py b/mda_openbabel_converter/OpenBabelParser.py index 84a621d..bd71a68 100644 --- a/mda_openbabel_converter/OpenBabelParser.py +++ b/mda_openbabel_converter/OpenBabelParser.py @@ -5,6 +5,7 @@ import MDAnalysis as mda from MDAnalysis.topology.base import TopologyReaderBase from MDAnalysis.core.topology import Topology +from MDAnalysis.converters.base import ConverterBase import warnings try: @@ -14,7 +15,7 @@ except ImportError: print("Cannot find openbabel, install with 'pip install openbabel==2.4.0'") -class OpenBabelParser(): +class OpenBabelParser(TopologyReaderBase): """ Inherits from TopologyReaderBase and converts an OpenBabel OBMol to a MDAnalysis Topology or adds it to a pre-existing Topology. This parser will @@ -24,8 +25,8 @@ class OpenBabelParser(): @staticmethod def _format_hint(thing): """ - Base function to check if the parser can actually parse this “thing” - (i.e., is it a valid OpenBabel OBMol that can be converted to a + Base function to check if the parser can actually parse this “thing” + (i.e., is it a valid OpenBabel OBMol that can be converted to a MDAnalysis Topology?) """ try: @@ -35,12 +36,12 @@ def _format_hint(thing): else: return isinstance(thing, OB.OBMol) - def parse(self, filename: OBMol, **kwargs): + def parse(self, **kwargs): """ Accepts an OpenBabel OBMol and returns a MDAnalysis Topology. Will need to extract the number of atoms, number of residues, number of segments, - atom_residue index, residue_segment index and all of the atom's - relevant attributes from the OBMol to initialise a new Topology. + atom_residue index, residue_segment index and all of the atom's + relevant attributes from the OBMol to initialise a new Topology. """ format = 'OPENBABEL' @@ -51,7 +52,7 @@ def parse(self, filename: OBMol, **kwargs): self.segments = [] self.n_segments = 0 - obmol = filename + obmol = self.filename # Atoms names = [] @@ -69,12 +70,12 @@ def parse(self, filename: OBMol, **kwargs): chainids = [] icodes = [] occupancies = [] - tempfactors = [] # B factor; not supported by OB + tempfactors = [] # B factor; not supported by OB if obmol.GetFirstAtom().equals(None): - return Topology(n_atoms = 0, - n_res = 0, - n_seg = 0, + return Topology(n_atoms=0, + n_res=0, + n_seg=0, attrs=None, atom_resindex=None, residue_segindex=None) @@ -82,14 +83,14 @@ def parse(self, filename: OBMol, **kwargs): for atom in OB.OBMolAtomIter(obmol): # need to add handling incase attributes are invalid or null in OBMol # names.append(atom.GetType()) #char -> nothing for name in OBMol? Is name required to make MDA Atom? - atomtypes.append(atom.GetType()) #char + atomtypes.append(atom.GetType()) # char ids.append(atom.GetIdx()) #int - masses.append(atom.GetExactMass()) #double -> what about atom.GetAtomicMass()??; which is better? + masses.append(atom.GetExactMass()) # double -> what about atom.GetAtomicMass()??; which is better? if not atom.GetExactMass().equals(atom.GetAtomicMass()): warnings.warn( f"Exact mass and atomic mass of atom (ID: {atom.GetIdx}) - not equal. Be aware of isotopes, which are NOT supported - by MDAnalysis.") + not equal. Be aware of isotopes, which are NOT supported + by MDAnalysis.") charges.append(atom.GetPartialCharge()) #int (or use atom.GetFormalCharge()?) # convert atomic number to element @@ -108,15 +109,9 @@ def parse(self, filename: OBMol, **kwargs): # don't need to check null case, as know assigned to OBMol we're currently parsing # but, NEED TO HANDLE ADDING MULTIPLE SEGIDS/OBMOLS WHEN CONVERTING TO UNIVERSE AND ADDING TOGETHER... (should be ok, check w tests) - segids.append(atom.GetParent()) + segids.append(atom.GetParent()) chiralities.append(atom.IsChiral()) #boolean aromatics.append(atom.IsAromatic()) #boolean - - - - - - pass \ No newline at end of file diff --git a/mda_openbabel_converter/__init__.py b/mda_openbabel_converter/__init__.py index 9ceb82b..957ff50 100644 --- a/mda_openbabel_converter/__init__.py +++ b/mda_openbabel_converter/__init__.py @@ -5,8 +5,8 @@ # Add imports here from importlib.metadata import version -from .OpenBabel import OpenBabelReader -from .OpenBabel import OpenBabelConverter +# from .OpenBabel import OpenBabelReader +# from .OpenBabel import OpenBabelConverter # from .OpenBabelParser import OpenBabelTopologyParser __version__ = version("mda_openbabel_converter") diff --git a/mda_openbabel_converter/tests/conftest.py b/mda_openbabel_converter/tests/conftest.py index c296ccd..d44cbc3 100644 --- a/mda_openbabel_converter/tests/conftest.py +++ b/mda_openbabel_converter/tests/conftest.py @@ -8,7 +8,7 @@ # https://docs.pytest.org/en/stable/how-to/fixtures.html#scope-sharing-fixtures-across-classes-modules-packages-or-session import pytest - +# import MDAnalysis from mda_openbabel_converter.data.files import MDANALYSIS_LOGO diff --git a/mda_openbabel_converter/tests/test_openbabel_parser.py b/mda_openbabel_converter/tests/test_openbabel_parser.py index e69de29..fa96542 100644 --- a/mda_openbabel_converter/tests/test_openbabel_parser.py +++ b/mda_openbabel_converter/tests/test_openbabel_parser.py @@ -0,0 +1,168 @@ +# Testing OpenBabel and Pybel + +import MDAnalysis as mda +import openbabel as OB +from openbabel import OBMol, OBConversion +import pybel +from pybel import readfile + +import mda_openbabel_converter +from mda_openbabel_converter import OpenBabelParser as OBParser +import pytest # version 8.2.2 +import sys +import numpy as np +from numpy.testing import assert_equal, assert_allclose +# from mda_openbabel_converter.tests.test_data import # something... +from MDAnalysisTests.topology.base import ParserBase # version 2.7.0 + +# *** can run with "python -m pytest" but not "pytest" (can't find +# MDAnalysis) - need to fix this! *** + + +class OpenBabelParserBase(ParserBase): + parser = OBParser + expected_attrs = ['ids', 'names', 'elements', 'masses', 'aromaticities', + 'resids', 'resnums', 'chiralities', + 'segids', 'bonds', + ] + + expected_n_atoms = 0 + expected_n_residues = 1 + expected_n_segments = 1 + expected_n_bonds = 0 + + def test_creates_universe(self, filename): + u = mda.Universe(filename, format='OPENBABEL') + assert isinstance(u, mda.Universe) + + def test_bonds_total_counts(self, top): + assert len(top.bonds.values) == self.expected_n_bonds + +class TestOpenBabelParserEmpty(OpenBabelParserBase): + + @pytest.fixture(scope='class') + def filename(self): + return OBMol() + + expected_attrs = ['ids', 'names', 'elements', 'masses', 'aromaticities', + 'resids', 'resnums', 'chiralities', + 'segids', 'bonds', + ] + + expected_n_atoms = 0 + expected_n_residues = 1 + expected_n_segments = 1 + expected_n_bonds = 0 + + @pytest.fixture(scope='class') + def topology(self, filename): + topology = OBParser(filename) + return topology + + def test_attributes(self, topology): + + assert len(np.unique(topology.results.hbonds[:, 0])) == 10 + assert len(topology.results.hbonds) == 32 + + reference = { + 'distance': {'mean': 2.7627309, 'std': 0.0905052}, + 'angle': {'mean': 158.9038039, 'std': 12.0362826}, + } + + assert_allclose(np.mean(h.results.hbonds[:, 4]), + reference['distance']['mean']) + assert_allclose(np.std(h.results.hbonds[:, 4]), + reference['distance']['std']) + assert_allclose(np.mean(h.results.hbonds[:, 5]), + reference['angle']['mean']) + assert_allclose(np.std(h.results.hbonds[:, 5]), + reference['angle']['std']) + + def test_atoms_total_counts(self, topology): + assert len(topology.select_atoms("all")) == self.expected_n_atoms + + def test_residues_total_counts(self, topology): + assert len(topology.select_atoms("all")) == self.expected_n_atoms + + def test_segments_total_counts(self, topology): + assert len(topology.select_atoms("all")) == self.expected_n_atoms + + +class TestOpenBabelParserAtomBuild(OpenBabelParserBase): + + @pytest.fixture(scope='class') + def filename(self): + return OBMol() + + expected_attrs = ['ids', 'names', 'elements', 'masses', 'aromaticities', + 'resids', 'resnums', 'chiralities', + 'segids', 'bonds', + ] + + # expected_attrs = OpenBabelParserBase.expected_attrs + ['charges'] + + expected_n_atoms = 2 + expected_n_residues = 1 + expected_n_segments = 1 + expected_n_bonds = 1 + + @pytest.fixture(scope='class') + def parser(self, mol): + topology = OBParser(mol, **self.kwargs) + return topology + + def test_hbond_analysis(self, h): + + assert len(np.unique(h.results.hbonds[:, 0])) == 10 + assert len(h.results.hbonds) == 32 + + reference = { + 'distance': {'mean': 2.7627309, 'std': 0.0905052}, + 'angle': {'mean': 158.9038039, 'std': 12.0362826}, + } + + assert_allclose(np.mean(h.results.hbonds[:, 4]), + reference['distance']['mean']) + assert_allclose(np.std(h.results.hbonds[:, 4]), + reference['distance']['std']) + assert_allclose(np.mean(h.results.hbonds[:, 5]), + reference['angle']['mean']) + assert_allclose(np.std(h.results.hbonds[:, 5]), + reference['angle']['std']) + +mol = OBMol() +print(mol.NumAtoms()) #Should print 0 (atoms) + +a = mol.NewAtom() +a.SetAtomicNum(6) # carbon atom +a.SetVector(0.0, 1.0, 2.0) # coordinates +b = mol.NewAtom() +mol.AddBond(1, 2, 1) # atoms indexed from 1 +print(mol.NumAtoms()) # Should print 2 (atoms) +print(mol.NumBonds()) # Should print 1 (bond) + +for i in range(1, mol.NumAtoms()+1): + atom = mol.GetAtomById(i-1) + print(a) + +# -------- + +obConversion = OBConversion() +obConversion.SetInAndOutFormats("smi", "mdl") + +mol = OBMol() +obConversion.ReadString(mol, "C1=CC=CS1") + +#readfile(format="smi", filename="") + +print(mol.NumAtoms()) # Should print 5 (atoms) + +mol.AddHydrogens() +print(mol.NumAtoms()) # Should print 9 (atoms) after adding hydrogens + +outMDL = obConversion.WriteString(mol) +print(outMDL) + +# -------- + + diff --git a/pyproject.toml b/pyproject.toml index 5c28dec..6f16050 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ test = [ "pytest>=6.0", "pytest-xdist>=2.5", "pytest-cov>=3.0", + "MDAnalysisTests>=2.0.0", ] doc = [ "sphinx", From 1a00c271bd20e6db54a749afdc9a62ee8b4a9885 Mon Sep 17 00:00:00 2001 From: hmacdope Date: Mon, 24 Jun 2024 10:17:29 +1000 Subject: [PATCH 03/29] fix a few bits of cruft --- mda_openbabel_converter/OpenBabelParser.py | 10 +++++----- mda_openbabel_converter/tests/test_openbabel_parser.py | 5 ++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/mda_openbabel_converter/OpenBabelParser.py b/mda_openbabel_converter/OpenBabelParser.py index bd71a68..54fab5c 100644 --- a/mda_openbabel_converter/OpenBabelParser.py +++ b/mda_openbabel_converter/OpenBabelParser.py @@ -88,9 +88,9 @@ def parse(self, **kwargs): masses.append(atom.GetExactMass()) # double -> what about atom.GetAtomicMass()??; which is better? if not atom.GetExactMass().equals(atom.GetAtomicMass()): warnings.warn( - f"Exact mass and atomic mass of atom (ID: {atom.GetIdx}) - not equal. Be aware of isotopes, which are NOT supported - by MDAnalysis.") + f"Exact mass and atomic mass of atom (ID: {atom.GetIdx})" + "not equal. Be aware of isotopes, which are NOT supported" + "by MDAnalysis.") charges.append(atom.GetPartialCharge()) #int (or use atom.GetFormalCharge()?) # convert atomic number to element @@ -99,8 +99,8 @@ def parse(self, **kwargs): resid = atom.GetResidue() # null if no residue if resid.equals(None): warnings.warn( - f"No residue is defined for atom (ID: {atom.GetIdx}). - Please set with 'SETTTING METHOD FOR OBMOL'" # TO DO + f"No residue is defined for atom (ID: {atom.GetIdx})." + "Please set with 'SETTTING METHOD FOR OBMOL'" # TO DO ) # if residue null, will need to assign w MDAnalysis... else: diff --git a/mda_openbabel_converter/tests/test_openbabel_parser.py b/mda_openbabel_converter/tests/test_openbabel_parser.py index fa96542..9b21c03 100644 --- a/mda_openbabel_converter/tests/test_openbabel_parser.py +++ b/mda_openbabel_converter/tests/test_openbabel_parser.py @@ -2,9 +2,8 @@ import MDAnalysis as mda import openbabel as OB -from openbabel import OBMol, OBConversion -import pybel -from pybel import readfile +from openbabel import OBMol, OBConversion, pybel +#from pybel import readfile import mda_openbabel_converter from mda_openbabel_converter import OpenBabelParser as OBParser From 58ddeaea07597fcac948b30a4d4637f349c62025 Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Sat, 29 Jun 2024 10:14:36 +1000 Subject: [PATCH 04/29] OpenBabelParser and tests work 29 June --- mda_openbabel_converter/OpenBabelParser.py | 188 ++++++++++++++++-- .../tests/test_openbabel_parser.py | 144 ++++++-------- 2 files changed, 228 insertions(+), 104 deletions(-) diff --git a/mda_openbabel_converter/OpenBabelParser.py b/mda_openbabel_converter/OpenBabelParser.py index 54fab5c..59b75cb 100644 --- a/mda_openbabel_converter/OpenBabelParser.py +++ b/mda_openbabel_converter/OpenBabelParser.py @@ -6,14 +6,38 @@ from MDAnalysis.topology.base import TopologyReaderBase from MDAnalysis.core.topology import Topology from MDAnalysis.converters.base import ConverterBase +from MDAnalysis.core.topologyattrs import ( + Atomids, + Atomnames, + Atomtypes, + Elements, + Masses, + Charges, + Aromaticities, + Bonds, + Resids, + Resnums, + Resnames, + RSChirality, + Segids, + AltLocs, + ChainIDs, + ICodes, + Occupancies, + Tempfactors, +) import warnings +import numpy as np +HAS_OBABEL=False try: - import openbabel as OB + import openbabel as ob from openbabel import OBMol from openbabel import OBElementTable + HAS_OBABEL=True except ImportError: - print("Cannot find openbabel, install with 'pip install openbabel==2.4.0'") + warnings.warn("Cannot find openbabel, install with `mamba install -c " + "conda-forge openbabel`") class OpenBabelParser(TopologyReaderBase): """ @@ -29,12 +53,10 @@ def _format_hint(thing): (i.e., is it a valid OpenBabel OBMol that can be converted to a MDAnalysis Topology?) """ - try: - import openbabel as OB - except ImportError: + if HAS_OBABEL == False: return False else: - return isinstance(thing, OB.OBMol) + return isinstance(thing, ob.OBMol) def parse(self, **kwargs): """ @@ -44,6 +66,7 @@ def parse(self, **kwargs): relevant attributes from the OBMol to initialise a new Topology. """ format = 'OPENBABEL' + mol = self.filename self.atoms = [] self.n_atoms = 0 @@ -72,6 +95,7 @@ def parse(self, **kwargs): occupancies = [] tempfactors = [] # B factor; not supported by OB + if obmol.GetFirstAtom().equals(None): return Topology(n_atoms=0, n_res=0, @@ -80,7 +104,7 @@ def parse(self, **kwargs): atom_resindex=None, residue_segindex=None) - for atom in OB.OBMolAtomIter(obmol): + for atom in ob.OBMolAtomIter(obmol): # need to add handling incase attributes are invalid or null in OBMol # names.append(atom.GetType()) #char -> nothing for name in OBMol? Is name required to make MDA Atom? atomtypes.append(atom.GetType()) # char @@ -96,22 +120,152 @@ def parse(self, **kwargs): # convert atomic number to element elements.append(OBElementTable.GetSymbol(atom.GetAtomicNumber())) #char - resid = atom.GetResidue() # null if no residue - if resid.equals(None): + if atom.HasResidue(): + resid = atom.GetResidue() # null if no residue + resnums.append(resid.GetNum()) # TO DO: check if start at 0 or 1 + resnames.append(resid.GetName()) + chainids.append(resid.GetChainNum()) # is this correct??? + icodes.append(resid.GetInsertionCode()) + else: warnings.warn( f"No residue is defined for atom (ID: {atom.GetIdx})." - "Please set with 'SETTTING METHOD FOR OBMOL'" # TO DO + "Please set with 'MDA SETTING METHOD' if required." # TO DO ) - # if residue null, will need to assign w MDAnalysis... - else: - resnums.append(resid.GetNum()) # TO DO: check if start at 0 or 1 - resnames.append(resid.GetName()) - + resnums.append(None) # TO DO: is this best?? + resnames.append(None) + chainids.append(None) + icodes.append(None) + + # don't need to check null case, as know assigned to OBMol we're currently parsing # but, NEED TO HANDLE ADDING MULTIPLE SEGIDS/OBMOLS WHEN CONVERTING TO UNIVERSE AND ADDING TOGETHER... (should be ok, check w tests) segids.append(atom.GetParent()) - chiralities.append(atom.IsChiral()) #boolean + # TO DO: may need to create seperate for if SMILES input + chirality = None + if atom.IsPositiveStereo(): + chirality = "+" + if atom.IsNegativeStereo(): + chirality = "-" + chiralities.append(chirality) + aromatics.append(atom.IsAromatic()) #boolean - pass \ No newline at end of file + # altlocs.append() + # occupancies.append() + # tempfactors.append() + + + # make Topology attributes + attrs = [] + n_atoms = len(ids) + + if resnums and resnums.__contains__(None): + raise ValueError( + "ResidueInfo is only partially available in the molecule. " + "If you have added hydrogens to the input RDKit molecule with " + "`Chem.AddHs(mol)`, consider using " + "`Chem.AddHs(mol, addResidueInfo=True)` instead" + ) + + # * Attributes always present * + + # Atom attributes + for vals, Attr, dtype in ( + (ids, Atomids, np.int32), + (elements, Elements, object), + (masses, Masses, np.float32), + (aromatics, Aromaticities, bool), + (chiralities, RSChirality, 'U1'), + ): + attrs.append(Attr(np.array(vals, dtype=dtype))) + + # # Bonds + # bonds = [] + # bond_types = [] + # bond_orders = [] + # for bond in mol.GetBonds(): + # bonds.append((bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())) + # bond_orders.append(bond.GetBondTypeAsDouble()) + # bond_types.append(str(bond.GetBondType())) + # attrs.append(Bonds(bonds, types=bond_types, order=bond_orders)) + + # # * Optional attributes * + + # # Atom name + # if names: + # attrs.append(Atomnames(np.array(names, dtype=object))) + # else: + # for atom in mol.GetAtoms(): + # name = "%s%d" % (atom.GetSymbol(), atom.GetIdx()) + # names.append(name) + # attrs.append(Atomnames(np.array(names, dtype=object))) + + # # Atom type + # if atomtypes: + # attrs.append(Atomtypes(np.array(atomtypes, dtype=object))) + # else: + # atomtypes = guessers.guess_types(names) + # attrs.append(Atomtypes(atomtypes, guessed=True)) + + # # Partial charges + # if charges: + # attrs.append(Charges(np.array(charges, dtype=np.float32))) + # else: + # pass # no guesser yet + + # # PDB only + # for vals, Attr, dtype in ( + # (altlocs, AltLocs, object), + # (chainids, ChainIDs, object), + # (occupancies, Occupancies, np.float32), + # (tempfactors, Tempfactors, np.float32), + # ): + # if vals: + # attrs.append(Attr(np.array(vals, dtype=dtype))) + + # # Residue + # if any(resnums) and not any(val is None for val in resnums): + # resnums = np.array(resnums, dtype=np.int32) + # resnames = np.array(resnames, dtype=object) + # segids = np.array(segids, dtype=object) + # icodes = np.array(icodes, dtype=object) + # residx, (resnums, resnames, icodes, segids) = change_squash( + # (resnums, resnames, icodes, segids), + # (resnums, resnames, icodes, segids)) + # n_residues = len(resnums) + # for vals, Attr, dtype in ( + # (resnums, Resids, np.int32), + # (resnums.copy(), Resnums, np.int32), + # (resnames, Resnames, object), + # (icodes, ICodes, object), + # ): + # attrs.append(Attr(np.array(vals, dtype=dtype))) + # else: + # attrs.append(Resids(np.array([1]))) + # attrs.append(Resnums(np.array([1]))) + # residx = None + # n_residues = 1 + + # # Segment + # if any(segids) and not any(val is None for val in segids): + # segidx, (segids,) = change_squash((segids,), (segids,)) + # n_segments = len(segids) + # attrs.append(Segids(segids)) + # else: + # n_segments = 1 + # attrs.append(Segids(np.array(['SYSTEM'], dtype=object))) + # segidx = None + + # create topology + # top = Topology(n_atoms, n_residues, n_segments, + # attrs=attrs, + # atom_resindex=residx, + # residue_segindex=segidx) + + top = Topology(n_atoms, 0, 0, + attrs=attrs, + atom_resindex=1, + residue_segindex=1) + + return top diff --git a/mda_openbabel_converter/tests/test_openbabel_parser.py b/mda_openbabel_converter/tests/test_openbabel_parser.py index 9b21c03..002a248 100644 --- a/mda_openbabel_converter/tests/test_openbabel_parser.py +++ b/mda_openbabel_converter/tests/test_openbabel_parser.py @@ -1,9 +1,9 @@ # Testing OpenBabel and Pybel import MDAnalysis as mda -import openbabel as OB -from openbabel import OBMol, OBConversion, pybel -#from pybel import readfile +import openbabel as ob +from openbabel import OBMol, OBConversion +from pybel import readfile import mda_openbabel_converter from mda_openbabel_converter import OpenBabelParser as OBParser @@ -12,14 +12,19 @@ import numpy as np from numpy.testing import assert_equal, assert_allclose # from mda_openbabel_converter.tests.test_data import # something... -from MDAnalysisTests.topology.base import ParserBase # version 2.7.0 +from MDAnalysisTests.topology.base import ParserBase + +import mda_openbabel_converter.OpenBabelParser # version 2.7.0 # *** can run with "python -m pytest" but not "pytest" (can't find # MDAnalysis) - need to fix this! *** class OpenBabelParserBase(ParserBase): - parser = OBParser + #parser = OBParser + parser = mda_openbabel_converter.OpenBabelParser.OpenBabelParser + ref_filename = "none" + expected_attrs = ['ids', 'names', 'elements', 'masses', 'aromaticities', 'resids', 'resnums', 'chiralities', 'segids', 'bonds', @@ -38,10 +43,11 @@ def test_bonds_total_counts(self, top): assert len(top.bonds.values) == self.expected_n_bonds class TestOpenBabelParserEmpty(OpenBabelParserBase): + ref_filename = OBMol() - @pytest.fixture(scope='class') + @pytest.fixture(autouse=True) def filename(self): - return OBMol() + return ob.OBMol() expected_attrs = ['ids', 'names', 'elements', 'masses', 'aromaticities', 'resids', 'resnums', 'chiralities', @@ -49,47 +55,28 @@ def filename(self): ] expected_n_atoms = 0 - expected_n_residues = 1 - expected_n_segments = 1 + expected_n_residues = 0 + expected_n_segments = 0 expected_n_bonds = 0 - @pytest.fixture(scope='class') - def topology(self, filename): - topology = OBParser(filename) - return topology - - def test_attributes(self, topology): - - assert len(np.unique(topology.results.hbonds[:, 0])) == 10 - assert len(topology.results.hbonds) == 32 - - reference = { - 'distance': {'mean': 2.7627309, 'std': 0.0905052}, - 'angle': {'mean': 158.9038039, 'std': 12.0362826}, - } - - assert_allclose(np.mean(h.results.hbonds[:, 4]), - reference['distance']['mean']) - assert_allclose(np.std(h.results.hbonds[:, 4]), - reference['distance']['std']) - assert_allclose(np.mean(h.results.hbonds[:, 5]), - reference['angle']['mean']) - assert_allclose(np.std(h.results.hbonds[:, 5]), - reference['angle']['std']) - - def test_atoms_total_counts(self, topology): - assert len(topology.select_atoms("all")) == self.expected_n_atoms + @pytest.fixture(autouse=True) + def top(self, filename): + top = OBParser.OpenBabelParser(filename) + return top + + def test_atoms_total_counts(self, top): + assert len(top.select_atoms("all")) == self.expected_n_atoms - def test_residues_total_counts(self, topology): - assert len(topology.select_atoms("all")) == self.expected_n_atoms + def test_residues_total_counts(self, top): + assert len(top.select_atoms("all")) == self.expected_n_atoms - def test_segments_total_counts(self, topology): - assert len(topology.select_atoms("all")) == self.expected_n_atoms + def test_segments_total_counts(self, top): + assert len(top.select_atoms("all")) == self.expected_n_atoms class TestOpenBabelParserAtomBuild(OpenBabelParserBase): - @pytest.fixture(scope='class') + @pytest.fixture(autouse=True) def filename(self): return OBMol() @@ -105,63 +92,46 @@ def filename(self): expected_n_segments = 1 expected_n_bonds = 1 - @pytest.fixture(scope='class') - def parser(self, mol): - topology = OBParser(mol, **self.kwargs) - return topology - - def test_hbond_analysis(self, h): - - assert len(np.unique(h.results.hbonds[:, 0])) == 10 - assert len(h.results.hbonds) == 32 - - reference = { - 'distance': {'mean': 2.7627309, 'std': 0.0905052}, - 'angle': {'mean': 158.9038039, 'std': 12.0362826}, - } - - assert_allclose(np.mean(h.results.hbonds[:, 4]), - reference['distance']['mean']) - assert_allclose(np.std(h.results.hbonds[:, 4]), - reference['distance']['std']) - assert_allclose(np.mean(h.results.hbonds[:, 5]), - reference['angle']['mean']) - assert_allclose(np.std(h.results.hbonds[:, 5]), - reference['angle']['std']) + # @pytest.fixture(scope='class') + # def parser(self, mol): + # top = OBParser(mol, **self.kwargs) + # return top + + -mol = OBMol() -print(mol.NumAtoms()) #Should print 0 (atoms) +# mol = OBMol() +# print(mol.NumAtoms()) #Should print 0 (atoms) -a = mol.NewAtom() -a.SetAtomicNum(6) # carbon atom -a.SetVector(0.0, 1.0, 2.0) # coordinates -b = mol.NewAtom() -mol.AddBond(1, 2, 1) # atoms indexed from 1 -print(mol.NumAtoms()) # Should print 2 (atoms) -print(mol.NumBonds()) # Should print 1 (bond) +# a = mol.NewAtom() +# a.SetAtomicNum(6) # carbon atom +# a.SetVector(0.0, 1.0, 2.0) # coordinates +# b = mol.NewAtom() +# mol.AddBond(1, 2, 1) # atoms indexed from 1 +# print(mol.NumAtoms()) # Should print 2 (atoms) +# print(mol.NumBonds()) # Should print 1 (bond) -for i in range(1, mol.NumAtoms()+1): - atom = mol.GetAtomById(i-1) - print(a) +# for i in range(1, mol.NumAtoms()+1): +# atom = mol.GetAtomById(i-1) +# print(a) -# -------- +# # -------- -obConversion = OBConversion() -obConversion.SetInAndOutFormats("smi", "mdl") +# obConversion = OBConversion() +# obConversion.SetInAndOutFormats("smi", "mdl") -mol = OBMol() -obConversion.ReadString(mol, "C1=CC=CS1") +# mol = OBMol() +# obConversion.ReadString(mol, "C1=CC=CS1") -#readfile(format="smi", filename="") +# #readfile(format="smi", filename="") -print(mol.NumAtoms()) # Should print 5 (atoms) +# print(mol.NumAtoms()) # Should print 5 (atoms) -mol.AddHydrogens() -print(mol.NumAtoms()) # Should print 9 (atoms) after adding hydrogens +# mol.AddHydrogens() +# print(mol.NumAtoms()) # Should print 9 (atoms) after adding hydrogens -outMDL = obConversion.WriteString(mol) -print(outMDL) +# outMDL = obConversion.WriteString(mol) +# print(outMDL) -# -------- +# # -------- From fbc1989ec94d17ca5930e29811b42bcea73c32a3 Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Tue, 2 Jul 2024 19:32:57 +1000 Subject: [PATCH 05/29] rough functionality sorted, can now parse all attributes and test the basic ones (atoms, bonds, segments and residues) --- mda_openbabel_converter/OpenBabelParser.py | 191 ++++++++++-------- .../tests/test_openbabel_parser.py | 185 ++++++++++------- 2 files changed, 211 insertions(+), 165 deletions(-) diff --git a/mda_openbabel_converter/OpenBabelParser.py b/mda_openbabel_converter/OpenBabelParser.py index 59b75cb..b8833b4 100644 --- a/mda_openbabel_converter/OpenBabelParser.py +++ b/mda_openbabel_converter/OpenBabelParser.py @@ -3,8 +3,9 @@ """ import MDAnalysis as mda -from MDAnalysis.topology.base import TopologyReaderBase +from MDAnalysis.topology.base import TopologyReaderBase, change_squash from MDAnalysis.core.topology import Topology +from MDAnalysis.topology import guessers from MDAnalysis.converters.base import ConverterBase from MDAnalysis.core.topologyattrs import ( Atomids, @@ -28,6 +29,7 @@ ) import warnings import numpy as np +import pdb # for debugging HAS_OBABEL=False try: @@ -75,8 +77,6 @@ def parse(self, **kwargs): self.segments = [] self.n_segments = 0 - obmol = self.filename - # Atoms names = [] chiralities = [] @@ -95,8 +95,7 @@ def parse(self, **kwargs): occupancies = [] tempfactors = [] # B factor; not supported by OB - - if obmol.GetFirstAtom().equals(None): + if mol.Empty(): return Topology(n_atoms=0, n_res=0, n_seg=0, @@ -104,13 +103,13 @@ def parse(self, **kwargs): atom_resindex=None, residue_segindex=None) - for atom in ob.OBMolAtomIter(obmol): + for atom in ob.OBMolAtomIter(mol): # need to add handling incase attributes are invalid or null in OBMol # names.append(atom.GetType()) #char -> nothing for name in OBMol? Is name required to make MDA Atom? atomtypes.append(atom.GetType()) # char ids.append(atom.GetIdx()) #int masses.append(atom.GetExactMass()) # double -> what about atom.GetAtomicMass()??; which is better? - if not atom.GetExactMass().equals(atom.GetAtomicMass()): + if not (atom.GetExactMass() == atom.GetAtomicMass()): warnings.warn( f"Exact mass and atomic mass of atom (ID: {atom.GetIdx})" "not equal. Be aware of isotopes, which are NOT supported" @@ -118,7 +117,7 @@ def parse(self, **kwargs): charges.append(atom.GetPartialCharge()) #int (or use atom.GetFormalCharge()?) # convert atomic number to element - elements.append(OBElementTable.GetSymbol(atom.GetAtomicNumber())) #char + elements.append(OBElementTable().GetSymbol(atom.GetAtomicNum())) #char if atom.HasResidue(): resid = atom.GetResidue() # null if no residue @@ -139,7 +138,8 @@ def parse(self, **kwargs): # don't need to check null case, as know assigned to OBMol we're currently parsing # but, NEED TO HANDLE ADDING MULTIPLE SEGIDS/OBMOLS WHEN CONVERTING TO UNIVERSE AND ADDING TOGETHER... (should be ok, check w tests) - segids.append(atom.GetParent()) + # segids.append(atom.GetParent()) + segids.append(0) # need better system! # TO DO: may need to create seperate for if SMILES input chirality = None @@ -160,12 +160,9 @@ def parse(self, **kwargs): attrs = [] n_atoms = len(ids) - if resnums and resnums.__contains__(None): + if resnums and (len(resnums) != n_atoms): #resnums.__contains__(None): raise ValueError( - "ResidueInfo is only partially available in the molecule. " - "If you have added hydrogens to the input RDKit molecule with " - "`Chem.AddHs(mol)`, consider using " - "`Chem.AddHs(mol, addResidueInfo=True)` instead" + "ResidueInfo is only partially available in the molecule." ) # * Attributes always present * @@ -180,39 +177,64 @@ def parse(self, **kwargs): ): attrs.append(Attr(np.array(vals, dtype=dtype))) - # # Bonds - # bonds = [] - # bond_types = [] - # bond_orders = [] - # for bond in mol.GetBonds(): - # bonds.append((bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())) - # bond_orders.append(bond.GetBondTypeAsDouble()) - # bond_types.append(str(bond.GetBondType())) - # attrs.append(Bonds(bonds, types=bond_types, order=bond_orders)) - - # # * Optional attributes * - - # # Atom name - # if names: - # attrs.append(Atomnames(np.array(names, dtype=object))) - # else: - # for atom in mol.GetAtoms(): - # name = "%s%d" % (atom.GetSymbol(), atom.GetIdx()) - # names.append(name) - # attrs.append(Atomnames(np.array(names, dtype=object))) - - # # Atom type - # if atomtypes: - # attrs.append(Atomtypes(np.array(atomtypes, dtype=object))) - # else: - # atomtypes = guessers.guess_types(names) - # attrs.append(Atomtypes(atomtypes, guessed=True)) - - # # Partial charges - # if charges: - # attrs.append(Charges(np.array(charges, dtype=np.float32))) - # else: - # pass # no guesser yet + # Bonds + bonds = [] + bond_types = [] + bond_orders = [] + for bond_idx in range(1, mol.NumBonds()): + bond = mol.GetBond(bond_idx) + print(bond) + bonds.append((bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())) + bond_orders.append(bond.GetBondOrder()) # is int, not double. Does this matter? + + # make these a dict instead? + OB_BOND_TYPES = [ + bond.IsAromatic(), + bond.IsAmide(), + bond.IsPrimaryAmide(), + bond.IsSecondaryAmide(), + bond.IsEster(), + bond.IsCarbonyl(), + ] + + MDA_BOND_TYPES = [ + "aromatic", + "amide", + "primary amide", + "secondary amide", + "ester", + "carbonyl", + ] + + for index, b_type in enumerate(OB_BOND_TYPES): + if b_type==True: + bond_types.append(MDA_BOND_TYPES[index]) + + attrs.append(Bonds(bonds, types=bond_types, order=bond_orders)) + + # * Optional attributes * + + # Atom name + if names: + attrs.append(Atomnames(np.array(names, dtype=object))) + else: + for atom in ob.OBMolAtomIter(mol): + name = "%s%d" % (OBElementTable().GetSymbol(atom.GetAtomicNum()), atom.GetIdx()) + names.append(name) + attrs.append(Atomnames(np.array(names, dtype=object))) + + # Atom type + if atomtypes: + attrs.append(Atomtypes(np.array(atomtypes, dtype=object))) + else: + atomtypes = guessers.guess_types(names) + attrs.append(Atomtypes(atomtypes, guessed=True)) + + # Partial charges + if charges: + attrs.append(Charges(np.array(charges, dtype=np.float32))) + else: + pass # no guesser yet # # PDB only # for vals, Attr, dtype in ( @@ -224,48 +246,43 @@ def parse(self, **kwargs): # if vals: # attrs.append(Attr(np.array(vals, dtype=dtype))) - # # Residue - # if any(resnums) and not any(val is None for val in resnums): - # resnums = np.array(resnums, dtype=np.int32) - # resnames = np.array(resnames, dtype=object) - # segids = np.array(segids, dtype=object) - # icodes = np.array(icodes, dtype=object) - # residx, (resnums, resnames, icodes, segids) = change_squash( - # (resnums, resnames, icodes, segids), - # (resnums, resnames, icodes, segids)) - # n_residues = len(resnums) - # for vals, Attr, dtype in ( - # (resnums, Resids, np.int32), - # (resnums.copy(), Resnums, np.int32), - # (resnames, Resnames, object), - # (icodes, ICodes, object), - # ): - # attrs.append(Attr(np.array(vals, dtype=dtype))) - # else: - # attrs.append(Resids(np.array([1]))) - # attrs.append(Resnums(np.array([1]))) - # residx = None - # n_residues = 1 - - # # Segment - # if any(segids) and not any(val is None for val in segids): - # segidx, (segids,) = change_squash((segids,), (segids,)) - # n_segments = len(segids) - # attrs.append(Segids(segids)) - # else: - # n_segments = 1 - # attrs.append(Segids(np.array(['SYSTEM'], dtype=object))) - # segidx = None + # Residue + if any(resnums) and not any(val is None for val in resnums): + resnums = np.array(resnums, dtype=np.int32) + resnames = np.array(resnames, dtype=object) + segids = np.array(segids, dtype=object) + icodes = np.array(icodes, dtype=object) + residx, (resnums, resnames, icodes, segids) = change_squash( + (resnums, resnames, icodes, segids), + (resnums, resnames, icodes, segids)) + n_residues = len(resnums) + for vals, Attr, dtype in ( + (resnums, Resids, np.int32), + (resnums.copy(), Resnums, np.int32), + (resnames, Resnames, object), + (icodes, ICodes, object), + ): + attrs.append(Attr(np.array(vals, dtype=dtype))) + else: + attrs.append(Resids(np.array([1]))) + attrs.append(Resnums(np.array([1]))) + residx = None + n_residues = 1 + + # Segment + if any(segids) and not any(val is None for val in segids): + segidx, (segids,) = change_squash((segids,), (segids,)) + n_segments = len(segids) + attrs.append(Segids(segids)) + else: + n_segments = 1 + attrs.append(Segids(np.array(['SYSTEM'], dtype=object))) + segidx = None # create topology - # top = Topology(n_atoms, n_residues, n_segments, - # attrs=attrs, - # atom_resindex=residx, - # residue_segindex=segidx) - - top = Topology(n_atoms, 0, 0, + top = Topology(n_atoms, n_residues, n_segments, attrs=attrs, - atom_resindex=1, - residue_segindex=1) + atom_resindex=residx, + residue_segindex=segidx) return top diff --git a/mda_openbabel_converter/tests/test_openbabel_parser.py b/mda_openbabel_converter/tests/test_openbabel_parser.py index 002a248..5032214 100644 --- a/mda_openbabel_converter/tests/test_openbabel_parser.py +++ b/mda_openbabel_converter/tests/test_openbabel_parser.py @@ -13,6 +13,7 @@ from numpy.testing import assert_equal, assert_allclose # from mda_openbabel_converter.tests.test_data import # something... from MDAnalysisTests.topology.base import ParserBase +from MDAnalysis.core.topology import Topology import mda_openbabel_converter.OpenBabelParser # version 2.7.0 @@ -21,9 +22,7 @@ class OpenBabelParserBase(ParserBase): - #parser = OBParser parser = mda_openbabel_converter.OpenBabelParser.OpenBabelParser - ref_filename = "none" expected_attrs = ['ids', 'names', 'elements', 'masses', 'aromaticities', 'resids', 'resnums', 'chiralities', @@ -40,98 +39,128 @@ def test_creates_universe(self, filename): assert isinstance(u, mda.Universe) def test_bonds_total_counts(self, top): - assert len(top.bonds.values) == self.expected_n_bonds + if hasattr(top, 'bonds'): + assert len(top.bonds.values) == self.expected_n_bonds -class TestOpenBabelParserEmpty(OpenBabelParserBase): - ref_filename = OBMol() - @pytest.fixture(autouse=True) +class TestOpenBabelParserEmpty(OpenBabelParserBase): + @pytest.fixture() def filename(self): - return ob.OBMol() + return OBMol() - expected_attrs = ['ids', 'names', 'elements', 'masses', 'aromaticities', - 'resids', 'resnums', 'chiralities', - 'segids', 'bonds', - ] + expected_attrs = [] + mandatory_attrs = [] # as not instantiated during empty Topology creation expected_n_atoms = 0 expected_n_residues = 0 expected_n_segments = 0 expected_n_bonds = 0 - @pytest.fixture(autouse=True) - def top(self, filename): - top = OBParser.OpenBabelParser(filename) - return top + def test_mandatory_attributes(self, top): + for attr in self.mandatory_attrs: + assert hasattr(top, attr), 'Missing required attribute: {}'.format(attr) - def test_atoms_total_counts(self, top): - assert len(top.select_atoms("all")) == self.expected_n_atoms + def test_attrs_total_counts(self, top): + ag = mda.Universe(top).select_atoms("all") + res = ag.residues + seg = ag.segments + assert len(ag) == self.expected_n_atoms + assert len(res) == self.expected_n_residues + assert len(seg) == self.expected_n_segments - def test_residues_total_counts(self, top): - assert len(top.select_atoms("all")) == self.expected_n_atoms - - def test_segments_total_counts(self, top): - assert len(top.select_atoms("all")) == self.expected_n_atoms - -class TestOpenBabelParserAtomBuild(OpenBabelParserBase): +class TestOpenBabelParserAtomBuild(object): + parser = mda_openbabel_converter.OpenBabelParser.OpenBabelParser + expected_attrs = ['ids', 'elements', 'bonds'] - @pytest.fixture(autouse=True) + @pytest.fixture() def filename(self): - return OBMol() - - expected_attrs = ['ids', 'names', 'elements', 'masses', 'aromaticities', - 'resids', 'resnums', 'chiralities', - 'segids', 'bonds', - ] + obConversion = ob.OBConversion() + obConversion.SetInFormat("smi") + mol = OBMol() + obConversion.ReadString(mol, "C1=CC=CS1") + mol.AddHydrogens() + #print(mol.NumAtoms()) # Should print 9 (atoms) after adding hydrogens + return mol - # expected_attrs = OpenBabelParserBase.expected_attrs + ['charges'] - - expected_n_atoms = 2 + @pytest.fixture() + def top(self, filename): + yield self.parser(filename).parse() + + expected_n_atoms = 9 expected_n_residues = 1 expected_n_segments = 1 - expected_n_bonds = 1 - - # @pytest.fixture(scope='class') - # def parser(self, mol): - # top = OBParser(mol, **self.kwargs) - # return top - - - -# mol = OBMol() -# print(mol.NumAtoms()) #Should print 0 (atoms) - -# a = mol.NewAtom() -# a.SetAtomicNum(6) # carbon atom -# a.SetVector(0.0, 1.0, 2.0) # coordinates -# b = mol.NewAtom() -# mol.AddBond(1, 2, 1) # atoms indexed from 1 -# print(mol.NumAtoms()) # Should print 2 (atoms) -# print(mol.NumBonds()) # Should print 1 (bond) - -# for i in range(1, mol.NumAtoms()+1): -# atom = mol.GetAtomById(i-1) -# print(a) - -# # -------- - -# obConversion = OBConversion() -# obConversion.SetInAndOutFormats("smi", "mdl") - -# mol = OBMol() -# obConversion.ReadString(mol, "C1=CC=CS1") - -# #readfile(format="smi", filename="") - -# print(mol.NumAtoms()) # Should print 5 (atoms) - -# mol.AddHydrogens() -# print(mol.NumAtoms()) # Should print 9 (atoms) after adding hydrogens - -# outMDL = obConversion.WriteString(mol) -# print(outMDL) - -# # -------- - + expected_n_bonds = 9 + + def test_attrs_total_counts(self, top): + # assert(isinstance(top, Topology)) #false? + u = mda.Universe(top) + ag = u.select_atoms("all") + res = ag.residues + seg = ag.segments + assert len(ag) == self.expected_n_atoms + assert len(res) == self.expected_n_residues + assert len(seg) == self.expected_n_segments + + + +# @pytest.fixture() +# def obabel(): +# obConversion = ob.OBConversion() +# obConversion.SetInFormat("smi") +# mol = OBMol() +# obConversion.ReadString(mol, "C1=CC=CS1") +# mol.AddHydrogens() +# #print(mol.NumAtoms()) # Should print 9 (atoms) after adding hydrogens +# assert(mol.NumAtoms() == 9) +# return mol + +# def test_caller(obabel): +# assert(obabel.NumAtoms() == 9) + + + # expected_attrs = ['ids', 'names', 'elements', 'masses', 'aromaticities', + # 'resids', 'resnums', 'chiralities', + # 'segids', 'bonds', + # ] + + # # expected_attrs = OpenBabelParserBase.expected_attrs + ['charges'] + +# parser = mda_openbabel_converter.OpenBabelParser.OpenBabelParser + +# @pytest.fixture() +# def filename(): +# obConversion = ob.OBConversion() +# obConversion.SetInFormat("smi") +# mol = OBMol() +# obConversion.ReadString(mol, "C1=CC=CS1") +# mol.AddHydrogens() +# #print(mol.NumAtoms()) # Should print 9 (atoms) after adding hydrogens +# return mol + +# # @pytest.fixture() +# def test_top(filename): +# b = parser(filename).parse() +# print(type(b)) +# # try: +# # yield parser(filename).parse() +# # catch pytest.raises(ValueError, match="ResidueInfo is only partially available in the molecule."): +# # # top = parser(filename).parse() +# # # assert(isinstance(top, Topology)) #true? +# # yield parser(filename).parse() + +# # expected_n_atoms = 9 +# # expected_n_residues = 1 +# # expected_n_segments = 1 +# # expected_n_bonds = 9 + +# # def test_attrs_total_counts(top): +# # # assert(isinstance(top, Topology)) #false? +# # u = mda.Universe(top) +# # ag = u.select_atoms("all") +# # res = ag.residues +# # seg = ag.segments +# # assert len(ag) == expected_n_atoms +# # assert len(res) == expected_n_residues +# # assert len(seg) == expected_n_segments From 5883bc9fcc9470f2129c12d508af3e891c104fba Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Tue, 2 Jul 2024 20:19:58 +1000 Subject: [PATCH 06/29] cleaned up tests and parser a bit more, and added aromaticity test --- mda_openbabel_converter/OpenBabelParser.py | 3 +- .../tests/test_openbabel_parser.py | 75 +++---------------- 2 files changed, 10 insertions(+), 68 deletions(-) diff --git a/mda_openbabel_converter/OpenBabelParser.py b/mda_openbabel_converter/OpenBabelParser.py index b8833b4..5b69bc2 100644 --- a/mda_openbabel_converter/OpenBabelParser.py +++ b/mda_openbabel_converter/OpenBabelParser.py @@ -181,7 +181,7 @@ def parse(self, **kwargs): bonds = [] bond_types = [] bond_orders = [] - for bond_idx in range(1, mol.NumBonds()): + for bond_idx in range(0, mol.NumBonds()): bond = mol.GetBond(bond_idx) print(bond) bonds.append((bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())) @@ -205,7 +205,6 @@ def parse(self, **kwargs): "ester", "carbonyl", ] - for index, b_type in enumerate(OB_BOND_TYPES): if b_type==True: bond_types.append(MDA_BOND_TYPES[index]) diff --git a/mda_openbabel_converter/tests/test_openbabel_parser.py b/mda_openbabel_converter/tests/test_openbabel_parser.py index 5032214..0d790e0 100644 --- a/mda_openbabel_converter/tests/test_openbabel_parser.py +++ b/mda_openbabel_converter/tests/test_openbabel_parser.py @@ -69,9 +69,9 @@ def test_attrs_total_counts(self, top): assert len(seg) == self.expected_n_segments -class TestOpenBabelParserAtomBuild(object): - parser = mda_openbabel_converter.OpenBabelParser.OpenBabelParser - expected_attrs = ['ids', 'elements', 'bonds'] +class TestOpenBabelParserBasicFromSMILE(OpenBabelParserBase): + #parser = mda_openbabel_converter.OpenBabelParser.OpenBabelParser + expected_attrs = OpenBabelParserBase.expected_attrs + ['charges'] @pytest.fixture() def filename(self): @@ -80,7 +80,6 @@ def filename(self): mol = OBMol() obConversion.ReadString(mol, "C1=CC=CS1") mol.AddHydrogens() - #print(mol.NumAtoms()) # Should print 9 (atoms) after adding hydrogens return mol @pytest.fixture() @@ -93,7 +92,6 @@ def top(self, filename): expected_n_bonds = 9 def test_attrs_total_counts(self, top): - # assert(isinstance(top, Topology)) #false? u = mda.Universe(top) ag = u.select_atoms("all") res = ag.residues @@ -101,66 +99,11 @@ def test_attrs_total_counts(self, top): assert len(ag) == self.expected_n_atoms assert len(res) == self.expected_n_residues assert len(seg) == self.expected_n_segments - - - -# @pytest.fixture() -# def obabel(): -# obConversion = ob.OBConversion() -# obConversion.SetInFormat("smi") -# mol = OBMol() -# obConversion.ReadString(mol, "C1=CC=CS1") -# mol.AddHydrogens() -# #print(mol.NumAtoms()) # Should print 9 (atoms) after adding hydrogens -# assert(mol.NumAtoms() == 9) -# return mol - -# def test_caller(obabel): -# assert(obabel.NumAtoms() == 9) + def test_aromaticities(self, top, filename): + expected = np.array([ + atom.IsAromatic() for atom in ob.OBMolAtomIter(filename)]) + assert_equal(expected, top.aromaticities.values) - # expected_attrs = ['ids', 'names', 'elements', 'masses', 'aromaticities', - # 'resids', 'resnums', 'chiralities', - # 'segids', 'bonds', - # ] - - # # expected_attrs = OpenBabelParserBase.expected_attrs + ['charges'] - -# parser = mda_openbabel_converter.OpenBabelParser.OpenBabelParser - -# @pytest.fixture() -# def filename(): -# obConversion = ob.OBConversion() -# obConversion.SetInFormat("smi") -# mol = OBMol() -# obConversion.ReadString(mol, "C1=CC=CS1") -# mol.AddHydrogens() -# #print(mol.NumAtoms()) # Should print 9 (atoms) after adding hydrogens -# return mol - -# # @pytest.fixture() -# def test_top(filename): -# b = parser(filename).parse() -# print(type(b)) -# # try: -# # yield parser(filename).parse() -# # catch pytest.raises(ValueError, match="ResidueInfo is only partially available in the molecule."): -# # # top = parser(filename).parse() -# # # assert(isinstance(top, Topology)) #true? -# # yield parser(filename).parse() - -# # expected_n_atoms = 9 -# # expected_n_residues = 1 -# # expected_n_segments = 1 -# # expected_n_bonds = 9 - -# # def test_attrs_total_counts(top): -# # # assert(isinstance(top, Topology)) #false? -# # u = mda.Universe(top) -# # ag = u.select_atoms("all") -# # res = ag.residues -# # seg = ag.segments -# # assert len(ag) == expected_n_atoms -# # assert len(res) == expected_n_residues -# # assert len(seg) == expected_n_segments - + # need to check other attrs including: + # 'ids', 'names', 'elements', 'masses', 'resids', 'resnums', 'chiralities', 'segids' \ No newline at end of file From 74d4f915efc6221413fabd94273e81a94d3758d9 Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Sat, 13 Jul 2024 20:09:46 +1000 Subject: [PATCH 07/29] mostly functional parser and tests, ready for review please --- mda_openbabel_converter/OpenBabelParser.py | 15 ++++--- .../tests/test_openbabel_parser.py | 43 ++++++++++++++----- 2 files changed, 41 insertions(+), 17 deletions(-) diff --git a/mda_openbabel_converter/OpenBabelParser.py b/mda_openbabel_converter/OpenBabelParser.py index 5b69bc2..034e350 100644 --- a/mda_openbabel_converter/OpenBabelParser.py +++ b/mda_openbabel_converter/OpenBabelParser.py @@ -31,6 +31,7 @@ import numpy as np import pdb # for debugging HAS_OBABEL=False +NEUTRON_MASS = 1.008 try: import openbabel as ob @@ -47,6 +48,7 @@ class OpenBabelParser(TopologyReaderBase): MDAnalysis Topology or adds it to a pre-existing Topology. This parser will does not work in the reverse direction. """ + format = 'OPENBABEL' @staticmethod def _format_hint(thing): @@ -67,7 +69,6 @@ def parse(self, **kwargs): atom_residue index, residue_segment index and all of the atom's relevant attributes from the OBMol to initialise a new Topology. """ - format = 'OPENBABEL' mol = self.filename self.atoms = [] @@ -109,11 +110,11 @@ def parse(self, **kwargs): atomtypes.append(atom.GetType()) # char ids.append(atom.GetIdx()) #int masses.append(atom.GetExactMass()) # double -> what about atom.GetAtomicMass()??; which is better? - if not (atom.GetExactMass() == atom.GetAtomicMass()): + if abs(atom.GetExactMass()-atom.GetAtomicMass()) >= NEUTRON_MASS: warnings.warn( - f"Exact mass and atomic mass of atom (ID: {atom.GetIdx})" - "not equal. Be aware of isotopes, which are NOT supported" - "by MDAnalysis.") + f"Exact mass and atomic mass of atom ID: {atom.GetIdx()}" + " are more than 1.008 AMU different. Be aware of isotopes," + " which are NOT supported by MDAnalysis.") charges.append(atom.GetPartialCharge()) #int (or use atom.GetFormalCharge()?) # convert atomic number to element @@ -127,8 +128,8 @@ def parse(self, **kwargs): icodes.append(resid.GetInsertionCode()) else: warnings.warn( - f"No residue is defined for atom (ID: {atom.GetIdx})." - "Please set with 'MDA SETTING METHOD' if required." # TO DO + f"No residue is defined for atom (ID: {atom.GetIdx()})." + " Please set with 'MDA SETTING METHOD' if required." # TO DO ) resnums.append(None) # TO DO: is this best?? resnames.append(None) diff --git a/mda_openbabel_converter/tests/test_openbabel_parser.py b/mda_openbabel_converter/tests/test_openbabel_parser.py index 0d790e0..6f61a1c 100644 --- a/mda_openbabel_converter/tests/test_openbabel_parser.py +++ b/mda_openbabel_converter/tests/test_openbabel_parser.py @@ -2,8 +2,9 @@ import MDAnalysis as mda import openbabel as ob -from openbabel import OBMol, OBConversion +from openbabel import OBMol, OBConversion, OBElementTable from pybel import readfile +#from openbabel.test.files import files import mda_openbabel_converter from mda_openbabel_converter import OpenBabelParser as OBParser @@ -30,8 +31,8 @@ class OpenBabelParserBase(ParserBase): ] expected_n_atoms = 0 - expected_n_residues = 1 - expected_n_segments = 1 + expected_n_residues = 0 + expected_n_segments = 0 expected_n_bonds = 0 def test_creates_universe(self, filename): @@ -50,11 +51,6 @@ def filename(self): expected_attrs = [] mandatory_attrs = [] # as not instantiated during empty Topology creation - - expected_n_atoms = 0 - expected_n_residues = 0 - expected_n_segments = 0 - expected_n_bonds = 0 def test_mandatory_attributes(self, top): for attr in self.mandatory_attrs: @@ -69,7 +65,7 @@ def test_attrs_total_counts(self, top): assert len(seg) == self.expected_n_segments -class TestOpenBabelParserBasicFromSMILE(OpenBabelParserBase): +class TestOpenBabelParserSMILES(OpenBabelParserBase): #parser = mda_openbabel_converter.OpenBabelParser.OpenBabelParser expected_attrs = OpenBabelParserBase.expected_attrs + ['charges'] @@ -105,5 +101,32 @@ def test_aromaticities(self, top, filename): atom.IsAromatic() for atom in ob.OBMolAtomIter(filename)]) assert_equal(expected, top.aromaticities.values) + def test_elements(self, top, filename): + expected = np.array([ + OBElementTable().GetSymbol(atom.GetAtomicNum()) for atom in ob.OBMolAtomIter(filename)]) + assert_equal(expected, top.elements.values) + + # def test_chiralities(self, top, filename): + # chirality = None + # if atom.IsPositiveStereo(): + # chirality = "+" + # if atom.IsNegativeStereo(): + # chirality = "-" + # chiralities.append(chirality) + + # expected = np.array([ + # OBElementTable().GetSymbol(atom.GetAtomicNum()) for atom in ob.OBMolAtomIter(filename)]) + # assert_equal(expected, top.chiralities.values) + + def test_charges(self, top, filename): + expected = np.array([ + atom.GetPartialCharge() for atom in ob.OBMolAtomIter(filename)]) + assert_allclose(expected, top.charges.values) + + # def test_mass_check(self, top, filename): + # expected = np.array([ + # OBElementTable().GetSymbol(atom.GetAtomicNum()) for atom in ob.OBMolAtomIter(filename)]) + # assert_equal(expected, top.elements.values) + # need to check other attrs including: - # 'ids', 'names', 'elements', 'masses', 'resids', 'resnums', 'chiralities', 'segids' \ No newline at end of file + # 'ids', 'names', 'resids', 'resnums', 'chiralities', 'segids', 'charges' \ No newline at end of file From 1cd1c1a752cf9a42a2048b689f8b986bf5681547 Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Tue, 16 Jul 2024 11:23:00 +1000 Subject: [PATCH 08/29] updates after pair-coding session with Hugo and fix CI --- mda_openbabel_converter/OpenBabelParser.py | 116 ++++-------------- mda_openbabel_converter/__init__.py | 4 - mda_openbabel_converter/tests/conftest.py | 1 - .../tests/test_openbabel_parser.py | 27 ++-- 4 files changed, 36 insertions(+), 112 deletions(-) diff --git a/mda_openbabel_converter/OpenBabelParser.py b/mda_openbabel_converter/OpenBabelParser.py index 034e350..9739b38 100644 --- a/mda_openbabel_converter/OpenBabelParser.py +++ b/mda_openbabel_converter/OpenBabelParser.py @@ -33,6 +33,11 @@ HAS_OBABEL=False NEUTRON_MASS = 1.008 +from enum import StrEnum +class StereoEnum(StrEnum): + positive = "+" + negative = "-" + try: import openbabel as ob from openbabel import OBMol @@ -45,7 +50,7 @@ class OpenBabelParser(TopologyReaderBase): """ Inherits from TopologyReaderBase and converts an OpenBabel OBMol to a - MDAnalysis Topology or adds it to a pre-existing Topology. This parser will + MDAnalysis Topology or adds it to a pre-existing Topology. This parser does not work in the reverse direction. """ format = 'OPENBABEL' @@ -71,13 +76,6 @@ def parse(self, **kwargs): """ mol = self.filename - self.atoms = [] - self.n_atoms = 0 - self.residues = [] - self.n_residues = 0 - self.segments = [] - self.n_segments = 0 - # Atoms names = [] chiralities = [] @@ -90,11 +88,8 @@ def parse(self, **kwargs): ids = [] atomtypes = [] segids = [] - altlocs = [] chainids = [] icodes = [] - occupancies = [] - tempfactors = [] # B factor; not supported by OB if mol.Empty(): return Topology(n_atoms=0, @@ -106,62 +101,41 @@ def parse(self, **kwargs): for atom in ob.OBMolAtomIter(mol): # need to add handling incase attributes are invalid or null in OBMol - # names.append(atom.GetType()) #char -> nothing for name in OBMol? Is name required to make MDA Atom? - atomtypes.append(atom.GetType()) # char - ids.append(atom.GetIdx()) #int - masses.append(atom.GetExactMass()) # double -> what about atom.GetAtomicMass()??; which is better? + atomtypes.append(atom.GetType()) + ids.append(atom.GetIdx()) + masses.append(atom.GetExactMass()) if abs(atom.GetExactMass()-atom.GetAtomicMass()) >= NEUTRON_MASS: warnings.warn( f"Exact mass and atomic mass of atom ID: {atom.GetIdx()}" " are more than 1.008 AMU different. Be aware of isotopes," - " which are NOT supported by MDAnalysis.") - charges.append(atom.GetPartialCharge()) #int (or use atom.GetFormalCharge()?) + " which are NOT flagged by MDAnalysis.") + charges.append(atom.GetPartialCharge()) # convert atomic number to element - elements.append(OBElementTable().GetSymbol(atom.GetAtomicNum())) #char + elements.append(OBElementTable().GetSymbol(atom.GetAtomicNum())) + # only for PBD and MOL2 if atom.HasResidue(): - resid = atom.GetResidue() # null if no residue + resid = atom.GetResidue() resnums.append(resid.GetNum()) # TO DO: check if start at 0 or 1 resnames.append(resid.GetName()) - chainids.append(resid.GetChainNum()) # is this correct??? + chainids.append(resid.GetChain()) icodes.append(resid.GetInsertionCode()) - else: - warnings.warn( - f"No residue is defined for atom (ID: {atom.GetIdx()})." - " Please set with 'MDA SETTING METHOD' if required." # TO DO - ) - resnums.append(None) # TO DO: is this best?? - resnames.append(None) - chainids.append(None) - icodes.append(None) - - - # don't need to check null case, as know assigned to OBMol we're currently parsing - # but, NEED TO HANDLE ADDING MULTIPLE SEGIDS/OBMOLS WHEN CONVERTING TO UNIVERSE AND ADDING TOGETHER... (should be ok, check w tests) - # segids.append(atom.GetParent()) - segids.append(0) # need better system! - # TO DO: may need to create seperate for if SMILES input chirality = None if atom.IsPositiveStereo(): - chirality = "+" + chirality = StereoEnum.positive if atom.IsNegativeStereo(): - chirality = "-" + chirality = StereoEnum.negative chiralities.append(chirality) - aromatics.append(atom.IsAromatic()) #boolean - - # altlocs.append() - # occupancies.append() - # tempfactors.append() - + aromatics.append(atom.IsAromatic()) # make Topology attributes attrs = [] n_atoms = len(ids) - if resnums and (len(resnums) != n_atoms): #resnums.__contains__(None): + if resnums and resnums.__contains__(None): #(len(resnums) != n_atoms): raise ValueError( "ResidueInfo is only partially available in the molecule." ) @@ -184,44 +158,18 @@ def parse(self, **kwargs): bond_orders = [] for bond_idx in range(0, mol.NumBonds()): bond = mol.GetBond(bond_idx) - print(bond) bonds.append((bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())) - bond_orders.append(bond.GetBondOrder()) # is int, not double. Does this matter? - - # make these a dict instead? - OB_BOND_TYPES = [ - bond.IsAromatic(), - bond.IsAmide(), - bond.IsPrimaryAmide(), - bond.IsSecondaryAmide(), - bond.IsEster(), - bond.IsCarbonyl(), - ] - - MDA_BOND_TYPES = [ - "aromatic", - "amide", - "primary amide", - "secondary amide", - "ester", - "carbonyl", - ] - for index, b_type in enumerate(OB_BOND_TYPES): - if b_type==True: - bond_types.append(MDA_BOND_TYPES[index]) - + bond_orders.append(float(bond.GetBondOrder())) + bond_types.append(float(bond.GetBondOrder())) attrs.append(Bonds(bonds, types=bond_types, order=bond_orders)) # * Optional attributes * - # Atom name - if names: - attrs.append(Atomnames(np.array(names, dtype=object))) - else: - for atom in ob.OBMolAtomIter(mol): - name = "%s%d" % (OBElementTable().GetSymbol(atom.GetAtomicNum()), atom.GetIdx()) - names.append(name) - attrs.append(Atomnames(np.array(names, dtype=object))) + # Atom name set with element and id, as name not supported by OpenBabel + for atom in ob.OBMolAtomIter(mol): + name = "%s%d" % (OBElementTable().GetSymbol(atom.GetAtomicNum()), atom.GetIdx()) + names.append(name) + attrs.append(Atomnames(np.array(names, dtype=object))) # Atom type if atomtypes: @@ -234,17 +182,7 @@ def parse(self, **kwargs): if charges: attrs.append(Charges(np.array(charges, dtype=np.float32))) else: - pass # no guesser yet - - # # PDB only - # for vals, Attr, dtype in ( - # (altlocs, AltLocs, object), - # (chainids, ChainIDs, object), - # (occupancies, Occupancies, np.float32), - # (tempfactors, Tempfactors, np.float32), - # ): - # if vals: - # attrs.append(Attr(np.array(vals, dtype=dtype))) + pass # no guesser yet # Residue if any(resnums) and not any(val is None for val in resnums): diff --git a/mda_openbabel_converter/__init__.py b/mda_openbabel_converter/__init__.py index 957ff50..2f3b31f 100644 --- a/mda_openbabel_converter/__init__.py +++ b/mda_openbabel_converter/__init__.py @@ -3,10 +3,6 @@ A package to convert between MDAnalysis and OpenBabel Objects """ -# Add imports here from importlib.metadata import version -# from .OpenBabel import OpenBabelReader -# from .OpenBabel import OpenBabelConverter -# from .OpenBabelParser import OpenBabelTopologyParser __version__ = version("mda_openbabel_converter") diff --git a/mda_openbabel_converter/tests/conftest.py b/mda_openbabel_converter/tests/conftest.py index d44cbc3..ef71494 100644 --- a/mda_openbabel_converter/tests/conftest.py +++ b/mda_openbabel_converter/tests/conftest.py @@ -8,7 +8,6 @@ # https://docs.pytest.org/en/stable/how-to/fixtures.html#scope-sharing-fixtures-across-classes-modules-packages-or-session import pytest -# import MDAnalysis from mda_openbabel_converter.data.files import MDANALYSIS_LOGO diff --git a/mda_openbabel_converter/tests/test_openbabel_parser.py b/mda_openbabel_converter/tests/test_openbabel_parser.py index 6f61a1c..aaf3b80 100644 --- a/mda_openbabel_converter/tests/test_openbabel_parser.py +++ b/mda_openbabel_converter/tests/test_openbabel_parser.py @@ -106,27 +106,18 @@ def test_elements(self, top, filename): OBElementTable().GetSymbol(atom.GetAtomicNum()) for atom in ob.OBMolAtomIter(filename)]) assert_equal(expected, top.elements.values) - # def test_chiralities(self, top, filename): - # chirality = None - # if atom.IsPositiveStereo(): - # chirality = "+" - # if atom.IsNegativeStereo(): - # chirality = "-" - # chiralities.append(chirality) - - # expected = np.array([ - # OBElementTable().GetSymbol(atom.GetAtomicNum()) for atom in ob.OBMolAtomIter(filename)]) - # assert_equal(expected, top.chiralities.values) + def test_chiralities(self, top, filename): + expected = np.array([ + "+" if atom.IsPositiveStereo() else "-" if atom.IsNegativeStereo() + else "N" for atom in ob.OBMolAtomIter(filename)]) + assert_equal(expected, top.chiralities.values) def test_charges(self, top, filename): expected = np.array([ atom.GetPartialCharge() for atom in ob.OBMolAtomIter(filename)]) assert_allclose(expected, top.charges.values) - # def test_mass_check(self, top, filename): - # expected = np.array([ - # OBElementTable().GetSymbol(atom.GetAtomicNum()) for atom in ob.OBMolAtomIter(filename)]) - # assert_equal(expected, top.elements.values) - - # need to check other attrs including: - # 'ids', 'names', 'resids', 'resnums', 'chiralities', 'segids', 'charges' \ No newline at end of file + def test_mass_check(self, top, filename): + expected = np.array([ + atom.GetExactMass() for atom in ob.OBMolAtomIter(filename)]) + assert_allclose(expected, top.masses.values) From 597602b96def63239e4a6297b83f0f18fb2f1fa4 Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Wed, 17 Jul 2024 16:38:47 +1000 Subject: [PATCH 09/29] CI fix take 5 --- mda_openbabel_converter/data/files.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mda_openbabel_converter/data/files.py b/mda_openbabel_converter/data/files.py index a9518b2..bce40b7 100644 --- a/mda_openbabel_converter/data/files.py +++ b/mda_openbabel_converter/data/files.py @@ -12,5 +12,8 @@ "MDANALYSIS_LOGO", # example file of MDAnalysis logo ] -import importlib.resources -MDANALYSIS_LOGO = importlib.resources.files(__name__) / "mda.txt" +from importlib.resources import files +#from . import data +# MDANALYSIS_LOGO = importlib.resources.files(__name__) / "mda.txt" +MDANALYSIS_LOGO = files("mda_openbabel_converter") / "data" / "mda.txt" + From 7c1c9c78c1525ee00b2d85d605fcc254e577debf Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Wed, 17 Jul 2024 16:57:33 +1000 Subject: [PATCH 10/29] trying again --- .github/workflows/gh-ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gh-ci.yaml b/.github/workflows/gh-ci.yaml index 7079784..d4d98d5 100644 --- a/.github/workflows/gh-ci.yaml +++ b/.github/workflows/gh-ci.yaml @@ -76,7 +76,7 @@ jobs: - name: Install package run: | python --version - python -m pip install . --no-deps + mamba install -c conda-forge . --no-deps - name: Python information run: | From 7e121159bf7de683854ee4c2bdcd8b89b5b64e7b Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Wed, 17 Jul 2024 17:14:10 +1000 Subject: [PATCH 11/29] I'm getting closer... --- .github/workflows/gh-ci.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/gh-ci.yaml b/.github/workflows/gh-ci.yaml index d4d98d5..3bee952 100644 --- a/.github/workflows/gh-ci.yaml +++ b/.github/workflows/gh-ci.yaml @@ -76,7 +76,8 @@ jobs: - name: Install package run: | python --version - mamba install -c conda-forge . --no-deps + mamba install -c conda-forge openbabel + python -m pip install . --no-deps - name: Python information run: | From 048c8e1316c89cf7fcfad127a0ed6ae5771216ee Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Wed, 17 Jul 2024 17:16:55 +1000 Subject: [PATCH 12/29] Fixing for pep8 --- mda_openbabel_converter/OpenBabelParser.py | 34 ++++++++++--------- mda_openbabel_converter/data/files.py | 2 +- .../tests/test_openbabel_parser.py | 20 +++++------ 3 files changed, 29 insertions(+), 27 deletions(-) diff --git a/mda_openbabel_converter/OpenBabelParser.py b/mda_openbabel_converter/OpenBabelParser.py index 9739b38..8dd2fce 100644 --- a/mda_openbabel_converter/OpenBabelParser.py +++ b/mda_openbabel_converter/OpenBabelParser.py @@ -29,28 +29,30 @@ ) import warnings import numpy as np -import pdb # for debugging -HAS_OBABEL=False -NEUTRON_MASS = 1.008 - from enum import StrEnum -class StereoEnum(StrEnum): - positive = "+" - negative = "-" + +HAS_OBABEL = False +NEUTRON_MASS = 1.008 try: import openbabel as ob from openbabel import OBMol from openbabel import OBElementTable - HAS_OBABEL=True + HAS_OBABEL = True except ImportError: warnings.warn("Cannot find openbabel, install with `mamba install -c " "conda-forge openbabel`") + +class StereoEnum(StrEnum): + positive = "+" + negative = "-" + + class OpenBabelParser(TopologyReaderBase): """ - Inherits from TopologyReaderBase and converts an OpenBabel OBMol to a - MDAnalysis Topology or adds it to a pre-existing Topology. This parser + Inherits from TopologyReaderBase and converts an OpenBabel OBMol to a + MDAnalysis Topology or adds it to a pre-existing Topology. This parser does not work in the reverse direction. """ format = 'OPENBABEL' @@ -62,7 +64,7 @@ def _format_hint(thing): (i.e., is it a valid OpenBabel OBMol that can be converted to a MDAnalysis Topology?) """ - if HAS_OBABEL == False: + if HAS_OBABEL is False: return False else: return isinstance(thing, ob.OBMol) @@ -100,14 +102,13 @@ def parse(self, **kwargs): residue_segindex=None) for atom in ob.OBMolAtomIter(mol): - # need to add handling incase attributes are invalid or null in OBMol atomtypes.append(atom.GetType()) ids.append(atom.GetIdx()) masses.append(atom.GetExactMass()) if abs(atom.GetExactMass()-atom.GetAtomicMass()) >= NEUTRON_MASS: warnings.warn( f"Exact mass and atomic mass of atom ID: {atom.GetIdx()}" - " are more than 1.008 AMU different. Be aware of isotopes," + " are more than 1.008 AMU different. Be aware of isotopes," " which are NOT flagged by MDAnalysis.") charges.append(atom.GetPartialCharge()) @@ -117,7 +118,7 @@ def parse(self, **kwargs): # only for PBD and MOL2 if atom.HasResidue(): resid = atom.GetResidue() - resnums.append(resid.GetNum()) # TO DO: check if start at 0 or 1 + resnums.append(resid.GetNum()) resnames.append(resid.GetName()) chainids.append(resid.GetChain()) icodes.append(resid.GetInsertionCode()) @@ -135,7 +136,7 @@ def parse(self, **kwargs): attrs = [] n_atoms = len(ids) - if resnums and resnums.__contains__(None): #(len(resnums) != n_atoms): + if resnums and resnums.__contains__(None): raise ValueError( "ResidueInfo is only partially available in the molecule." ) @@ -167,7 +168,8 @@ def parse(self, **kwargs): # Atom name set with element and id, as name not supported by OpenBabel for atom in ob.OBMolAtomIter(mol): - name = "%s%d" % (OBElementTable().GetSymbol(atom.GetAtomicNum()), atom.GetIdx()) + name = "%s%d" % (OBElementTable().GetSymbol(atom.GetAtomicNum()), + atom.GetIdx()) names.append(name) attrs.append(Atomnames(np.array(names, dtype=object))) diff --git a/mda_openbabel_converter/data/files.py b/mda_openbabel_converter/data/files.py index bce40b7..4e31d00 100644 --- a/mda_openbabel_converter/data/files.py +++ b/mda_openbabel_converter/data/files.py @@ -13,7 +13,7 @@ ] from importlib.resources import files -#from . import data +# from . import data # MDANALYSIS_LOGO = importlib.resources.files(__name__) / "mda.txt" MDANALYSIS_LOGO = files("mda_openbabel_converter") / "data" / "mda.txt" diff --git a/mda_openbabel_converter/tests/test_openbabel_parser.py b/mda_openbabel_converter/tests/test_openbabel_parser.py index aaf3b80..7a4a7ac 100644 --- a/mda_openbabel_converter/tests/test_openbabel_parser.py +++ b/mda_openbabel_converter/tests/test_openbabel_parser.py @@ -4,7 +4,7 @@ import openbabel as ob from openbabel import OBMol, OBConversion, OBElementTable from pybel import readfile -#from openbabel.test.files import files +# from openbabel.test.files import files import mda_openbabel_converter from mda_openbabel_converter import OpenBabelParser as OBParser @@ -12,7 +12,6 @@ import sys import numpy as np from numpy.testing import assert_equal, assert_allclose -# from mda_openbabel_converter.tests.test_data import # something... from MDAnalysisTests.topology.base import ParserBase from MDAnalysis.core.topology import Topology @@ -28,8 +27,8 @@ class OpenBabelParserBase(ParserBase): expected_attrs = ['ids', 'names', 'elements', 'masses', 'aromaticities', 'resids', 'resnums', 'chiralities', 'segids', 'bonds', - ] - + ] + expected_n_atoms = 0 expected_n_residues = 0 expected_n_segments = 0 @@ -50,11 +49,12 @@ def filename(self): return OBMol() expected_attrs = [] - mandatory_attrs = [] # as not instantiated during empty Topology creation + mandatory_attrs = [] # as not instantiated during empty Topology creation def test_mandatory_attributes(self, top): for attr in self.mandatory_attrs: - assert hasattr(top, attr), 'Missing required attribute: {}'.format(attr) + assert (hasattr(top, attr), + 'Missing required attribute: {}'.format(attr)) def test_attrs_total_counts(self, top): ag = mda.Universe(top).select_atoms("all") @@ -66,7 +66,6 @@ def test_attrs_total_counts(self, top): class TestOpenBabelParserSMILES(OpenBabelParserBase): - #parser = mda_openbabel_converter.OpenBabelParser.OpenBabelParser expected_attrs = OpenBabelParserBase.expected_attrs + ['charges'] @pytest.fixture() @@ -77,11 +76,11 @@ def filename(self): obConversion.ReadString(mol, "C1=CC=CS1") mol.AddHydrogens() return mol - + @pytest.fixture() def top(self, filename): yield self.parser(filename).parse() - + expected_n_atoms = 9 expected_n_residues = 1 expected_n_segments = 1 @@ -103,7 +102,8 @@ def test_aromaticities(self, top, filename): def test_elements(self, top, filename): expected = np.array([ - OBElementTable().GetSymbol(atom.GetAtomicNum()) for atom in ob.OBMolAtomIter(filename)]) + OBElementTable().GetSymbol(atom.GetAtomicNum()) for atom in + ob.OBMolAtomIter(filename)]) assert_equal(expected, top.elements.values) def test_chiralities(self, top, filename): From b50c4642f2ee90414d2894c564a50c6886c5a2d9 Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Wed, 17 Jul 2024 17:22:31 +1000 Subject: [PATCH 13/29] Install appropriate/specific OpenBabel version for imports --- .github/workflows/gh-ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gh-ci.yaml b/.github/workflows/gh-ci.yaml index 3bee952..34ec2f4 100644 --- a/.github/workflows/gh-ci.yaml +++ b/.github/workflows/gh-ci.yaml @@ -76,7 +76,7 @@ jobs: - name: Install package run: | python --version - mamba install -c conda-forge openbabel + mamba install -c conda-forge openbabel==2.4.0 python -m pip install . --no-deps - name: Python information From 5f56f3fc378bd9096532124c29e4b02c1518bf12 Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Thu, 18 Jul 2024 14:00:12 +1000 Subject: [PATCH 14/29] Specified less specific openbabel version --- .github/workflows/gh-ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gh-ci.yaml b/.github/workflows/gh-ci.yaml index 34ec2f4..c4c36e1 100644 --- a/.github/workflows/gh-ci.yaml +++ b/.github/workflows/gh-ci.yaml @@ -76,7 +76,7 @@ jobs: - name: Install package run: | python --version - mamba install -c conda-forge openbabel==2.4.0 + mamba install -c conda-forge openbabel<3.0.0 python -m pip install . --no-deps - name: Python information From b0fe5416da876f05df216a210c7b733606933787 Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Thu, 18 Jul 2024 14:05:03 +1000 Subject: [PATCH 15/29] Fixing new error --- .github/workflows/gh-ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gh-ci.yaml b/.github/workflows/gh-ci.yaml index c4c36e1..ef585a9 100644 --- a/.github/workflows/gh-ci.yaml +++ b/.github/workflows/gh-ci.yaml @@ -75,8 +75,8 @@ jobs: - name: Install package run: | - python --version mamba install -c conda-forge openbabel<3.0.0 + python --version python -m pip install . --no-deps - name: Python information From 16a70a0eedd6b070f52e2ab60acb60907789b1d9 Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Thu, 18 Jul 2024 14:11:46 +1000 Subject: [PATCH 16/29] Fixing new error take 2 --- .github/workflows/gh-ci.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/gh-ci.yaml b/.github/workflows/gh-ci.yaml index ef585a9..1de0203 100644 --- a/.github/workflows/gh-ci.yaml +++ b/.github/workflows/gh-ci.yaml @@ -75,7 +75,6 @@ jobs: - name: Install package run: | - mamba install -c conda-forge openbabel<3.0.0 python --version python -m pip install . --no-deps @@ -142,7 +141,7 @@ jobs: - name: Install dependencies run: | - pip install pipx twine + pip install pipx twine openbabel<3.0.0 - name: Build package run: | From dfed4b3a800a5d9fcc0bae5543fd65921b214de1 Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Thu, 18 Jul 2024 14:18:04 +1000 Subject: [PATCH 17/29] Fixing new error take 3 --- .github/workflows/gh-ci.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/gh-ci.yaml b/.github/workflows/gh-ci.yaml index 1de0203..744f861 100644 --- a/.github/workflows/gh-ci.yaml +++ b/.github/workflows/gh-ci.yaml @@ -103,7 +103,8 @@ jobs: pylint_check: - if: "github.repository == 'MDAnalysis/mda_openbabel_converter'" + if: "github.repository == 'lunamorrow/mda_openbabel_converter'" + needs: environment-config runs-on: ubuntu-latest steps: From 6263bf67cad22e429cf716c4272796548a8522b8 Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Thu, 18 Jul 2024 14:37:41 +1000 Subject: [PATCH 18/29] Fixing new error take 7 --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 6f16050..7a467bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ readme = "README.md" requires-python = ">=3.9" dependencies = [ "MDAnalysis>=2.0.0", + "openbabel<=2.9.9" ] keywords = [ "molecular simulations", From 55a6656110e1e1ca4561ee80a7cb7068ea40a6d5 Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Wed, 24 Jul 2024 12:39:09 +1000 Subject: [PATCH 19/29] push to stash changes before switching over to CI fix branch --- mda_openbabel_converter/OpenBabelParser.py | 5 +++-- mda_openbabel_converter/data/files.py | 2 -- mda_openbabel_converter/tests/test_openbabel_parser.py | 8 ++++---- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/mda_openbabel_converter/OpenBabelParser.py b/mda_openbabel_converter/OpenBabelParser.py index 8dd2fce..f4f9d19 100644 --- a/mda_openbabel_converter/OpenBabelParser.py +++ b/mda_openbabel_converter/OpenBabelParser.py @@ -40,8 +40,9 @@ from openbabel import OBElementTable HAS_OBABEL = True except ImportError: + # import breaks with version 3.x warnings.warn("Cannot find openbabel, install with `mamba install -c " - "conda-forge openbabel`") + "conda-forge openbabel==2.4.0`") class StereoEnum(StrEnum): @@ -168,7 +169,7 @@ def parse(self, **kwargs): # Atom name set with element and id, as name not supported by OpenBabel for atom in ob.OBMolAtomIter(mol): - name = "%s%d" % (OBElementTable().GetSymbol(atom.GetAtomicNum()), + name = "%s%d" % (OBElementTable().GetSymbol(atom.GetAtomicNum()), atom.GetIdx()) names.append(name) attrs.append(Atomnames(np.array(names, dtype=object))) diff --git a/mda_openbabel_converter/data/files.py b/mda_openbabel_converter/data/files.py index 4e31d00..c0b340a 100644 --- a/mda_openbabel_converter/data/files.py +++ b/mda_openbabel_converter/data/files.py @@ -13,7 +13,5 @@ ] from importlib.resources import files -# from . import data -# MDANALYSIS_LOGO = importlib.resources.files(__name__) / "mda.txt" MDANALYSIS_LOGO = files("mda_openbabel_converter") / "data" / "mda.txt" diff --git a/mda_openbabel_converter/tests/test_openbabel_parser.py b/mda_openbabel_converter/tests/test_openbabel_parser.py index 7a4a7ac..580abe6 100644 --- a/mda_openbabel_converter/tests/test_openbabel_parser.py +++ b/mda_openbabel_converter/tests/test_openbabel_parser.py @@ -17,7 +17,7 @@ import mda_openbabel_converter.OpenBabelParser # version 2.7.0 -# *** can run with "python -m pytest" but not "pytest" (can't find +# *** can run with "python -m pytest" but not "pytest" (can't find # MDAnalysis) - need to fix this! *** @@ -27,7 +27,7 @@ class OpenBabelParserBase(ParserBase): expected_attrs = ['ids', 'names', 'elements', 'masses', 'aromaticities', 'resids', 'resnums', 'chiralities', 'segids', 'bonds', - ] + ] expected_n_atoms = 0 expected_n_residues = 0 @@ -53,7 +53,7 @@ def filename(self): def test_mandatory_attributes(self, top): for attr in self.mandatory_attrs: - assert (hasattr(top, attr), + assert (hasattr(top, attr), 'Missing required attribute: {}'.format(attr)) def test_attrs_total_counts(self, top): @@ -102,7 +102,7 @@ def test_aromaticities(self, top, filename): def test_elements(self, top, filename): expected = np.array([ - OBElementTable().GetSymbol(atom.GetAtomicNum()) for atom in + OBElementTable().GetSymbol(atom.GetAtomicNum()) for atom in ob.OBMolAtomIter(filename)]) assert_equal(expected, top.elements.values) From 3ab670ec372179edd724174703d21f01f42a2feb Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Wed, 24 Jul 2024 15:51:49 +1000 Subject: [PATCH 20/29] Rebase done From 0d4962cb66f471561ab7e529ab65b957e0df370c Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Wed, 24 Jul 2024 16:39:13 +1000 Subject: [PATCH 21/29] updates after rebase and applying Cedric's review --- mda_openbabel_converter/OpenBabelParser.py | 31 +++++++++---------- mda_openbabel_converter/data/files.py | 3 +- .../tests/test_openbabel_parser.py | 2 +- pyproject.toml | 2 +- 4 files changed, 17 insertions(+), 21 deletions(-) diff --git a/mda_openbabel_converter/OpenBabelParser.py b/mda_openbabel_converter/OpenBabelParser.py index f4f9d19..3a4d7d4 100644 --- a/mda_openbabel_converter/OpenBabelParser.py +++ b/mda_openbabel_converter/OpenBabelParser.py @@ -46,8 +46,9 @@ class StereoEnum(StrEnum): - positive = "+" - negative = "-" + POSITIVE = "+" + NEGATIVE = "-" + NONE = "" class OpenBabelParser(TopologyReaderBase): @@ -103,6 +104,10 @@ def parse(self, **kwargs): residue_segindex=None) for atom in ob.OBMolAtomIter(mol): + # Atom name set with element and id, as name not supported by OpenBabel + name = "%s%d" % (OBElementTable().GetSymbol(atom.GetAtomicNum()), + atom.GetIdx()) + names.append(name) atomtypes.append(atom.GetType()) ids.append(atom.GetIdx()) masses.append(atom.GetExactMass()) @@ -124,11 +129,11 @@ def parse(self, **kwargs): chainids.append(resid.GetChain()) icodes.append(resid.GetInsertionCode()) - chirality = None + chirality = StereoEnum.NONE if atom.IsPositiveStereo(): - chirality = StereoEnum.positive - if atom.IsNegativeStereo(): - chirality = StereoEnum.negative + chirality = StereoEnum.POSITIVE + elif atom.IsNegativeStereo(): + chirality = StereoEnum.NEGATIVE chiralities.append(chirality) aromatics.append(atom.IsAromatic()) @@ -137,7 +142,7 @@ def parse(self, **kwargs): attrs = [] n_atoms = len(ids) - if resnums and resnums.__contains__(None): + if resnums and (len(resnums) != len(ids)): raise ValueError( "ResidueInfo is only partially available in the molecule." ) @@ -156,22 +161,14 @@ def parse(self, **kwargs): # Bonds bonds = [] - bond_types = [] bond_orders = [] for bond_idx in range(0, mol.NumBonds()): bond = mol.GetBond(bond_idx) bonds.append((bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())) bond_orders.append(float(bond.GetBondOrder())) - bond_types.append(float(bond.GetBondOrder())) - attrs.append(Bonds(bonds, types=bond_types, order=bond_orders)) + attrs.append(Bonds(bonds, order=bond_orders)) # * Optional attributes * - - # Atom name set with element and id, as name not supported by OpenBabel - for atom in ob.OBMolAtomIter(mol): - name = "%s%d" % (OBElementTable().GetSymbol(atom.GetAtomicNum()), - atom.GetIdx()) - names.append(name) attrs.append(Atomnames(np.array(names, dtype=object))) # Atom type @@ -188,7 +185,7 @@ def parse(self, **kwargs): pass # no guesser yet # Residue - if any(resnums) and not any(val is None for val in resnums): + if resnums: resnums = np.array(resnums, dtype=np.int32) resnames = np.array(resnames, dtype=object) segids = np.array(segids, dtype=object) diff --git a/mda_openbabel_converter/data/files.py b/mda_openbabel_converter/data/files.py index c0b340a..758d0b5 100644 --- a/mda_openbabel_converter/data/files.py +++ b/mda_openbabel_converter/data/files.py @@ -13,5 +13,4 @@ ] from importlib.resources import files -MDANALYSIS_LOGO = files("mda_openbabel_converter") / "data" / "mda.txt" - +MDANALYSIS_LOGO = files("mda_openbabel_converter") / "data" / "mda.txt" \ No newline at end of file diff --git a/mda_openbabel_converter/tests/test_openbabel_parser.py b/mda_openbabel_converter/tests/test_openbabel_parser.py index 580abe6..1e0f07b 100644 --- a/mda_openbabel_converter/tests/test_openbabel_parser.py +++ b/mda_openbabel_converter/tests/test_openbabel_parser.py @@ -109,7 +109,7 @@ def test_elements(self, top, filename): def test_chiralities(self, top, filename): expected = np.array([ "+" if atom.IsPositiveStereo() else "-" if atom.IsNegativeStereo() - else "N" for atom in ob.OBMolAtomIter(filename)]) + else "" for atom in ob.OBMolAtomIter(filename)]) assert_equal(expected, top.chiralities.values) def test_charges(self, top, filename): diff --git a/pyproject.toml b/pyproject.toml index 7a467bd..e457e2b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ readme = "README.md" requires-python = ">=3.9" dependencies = [ "MDAnalysis>=2.0.0", - "openbabel<=2.9.9" + "openbabel>=3.0.0" ] keywords = [ "molecular simulations", From 3146a58705fe264c0f7022335a5bd7f84e9e888c Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Wed, 24 Jul 2024 17:02:46 +1000 Subject: [PATCH 22/29] Getting things straightened up From 80e2cb08ff26b52e18ddc5e29ab15c6cb0043fc7 Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Wed, 24 Jul 2024 17:05:10 +1000 Subject: [PATCH 23/29] remove dependency for pylint to work --- .github/workflows/gh-ci.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/gh-ci.yaml b/.github/workflows/gh-ci.yaml index 744f861..c668041 100644 --- a/.github/workflows/gh-ci.yaml +++ b/.github/workflows/gh-ci.yaml @@ -104,7 +104,6 @@ jobs: pylint_check: if: "github.repository == 'lunamorrow/mda_openbabel_converter'" - needs: environment-config runs-on: ubuntu-latest steps: From dc8a137ba6740927f322730b361b7831b86a21f6 Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Wed, 24 Jul 2024 17:13:28 +1000 Subject: [PATCH 24/29] update ob imports to work for openbabel>=3.0.0 --- mda_openbabel_converter/OpenBabel.py | 2 +- mda_openbabel_converter/OpenBabelParser.py | 2 +- mda_openbabel_converter/tests/test_openbabel_parser.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mda_openbabel_converter/OpenBabel.py b/mda_openbabel_converter/OpenBabel.py index a3b4993..00ca74f 100644 --- a/mda_openbabel_converter/OpenBabel.py +++ b/mda_openbabel_converter/OpenBabel.py @@ -8,7 +8,7 @@ from MDAnalysis.core.groups import AtomGroup try: - import openbabel as OB + from openbabel import openbabel as OB from openbabel import OBMol except ImportError: print("Cannot find openbabel, install with 'pip install openbabel==2.4.0'") diff --git a/mda_openbabel_converter/OpenBabelParser.py b/mda_openbabel_converter/OpenBabelParser.py index 3a4d7d4..1399c59 100644 --- a/mda_openbabel_converter/OpenBabelParser.py +++ b/mda_openbabel_converter/OpenBabelParser.py @@ -35,7 +35,7 @@ NEUTRON_MASS = 1.008 try: - import openbabel as ob + from openbabel import openbabel as ob from openbabel import OBMol from openbabel import OBElementTable HAS_OBABEL = True diff --git a/mda_openbabel_converter/tests/test_openbabel_parser.py b/mda_openbabel_converter/tests/test_openbabel_parser.py index 1e0f07b..b7cca9a 100644 --- a/mda_openbabel_converter/tests/test_openbabel_parser.py +++ b/mda_openbabel_converter/tests/test_openbabel_parser.py @@ -1,7 +1,7 @@ # Testing OpenBabel and Pybel import MDAnalysis as mda -import openbabel as ob +from openbabel import openbabel as ob from openbabel import OBMol, OBConversion, OBElementTable from pybel import readfile # from openbabel.test.files import files From 2cecf9bc1e62a7604fbe0bcda7c33891a5b70baa Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Wed, 31 Jul 2024 15:23:52 +1000 Subject: [PATCH 25/29] Support updated to OpenBabel 3.x instead of 2.x --- devtools/conda-envs/test_env.yaml | 2 +- mda_openbabel_converter/OpenBabelParser.py | 45 +++++++++---------- .../tests/test_openbabel_parser.py | 16 ++----- 3 files changed, 27 insertions(+), 36 deletions(-) diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml index 37b6ccc..35ea937 100644 --- a/devtools/conda-envs/test_env.yaml +++ b/devtools/conda-envs/test_env.yaml @@ -1,4 +1,4 @@ -name: mda_openbabel_converter-test +name: mda_openbabel_converter channels: - conda-forge - defaults diff --git a/mda_openbabel_converter/OpenBabelParser.py b/mda_openbabel_converter/OpenBabelParser.py index 1399c59..643da3d 100644 --- a/mda_openbabel_converter/OpenBabelParser.py +++ b/mda_openbabel_converter/OpenBabelParser.py @@ -30,19 +30,24 @@ import warnings import numpy as np from enum import StrEnum +# from enum import auto +# from strenum import StrEnum HAS_OBABEL = False NEUTRON_MASS = 1.008 try: + import openbabel from openbabel import openbabel as ob - from openbabel import OBMol - from openbabel import OBElementTable + from openbabel.openbabel import OBMol, OBResidue, GetSymbol + from openbabel.openbabel import * + #from openbabel.openbabel.OBElements import GetSymbol + #from openbabel import OBElement HAS_OBABEL = True except ImportError: # import breaks with version 3.x warnings.warn("Cannot find openbabel, install with `mamba install -c " - "conda-forge openbabel==2.4.0`") + "conda-forge openbabel`") class StereoEnum(StrEnum): @@ -82,7 +87,6 @@ def parse(self, **kwargs): # Atoms names = [] - chiralities = [] resnums = [] resnames = [] elements = [] @@ -105,21 +109,21 @@ def parse(self, **kwargs): for atom in ob.OBMolAtomIter(mol): # Atom name set with element and id, as name not supported by OpenBabel - name = "%s%d" % (OBElementTable().GetSymbol(atom.GetAtomicNum()), - atom.GetIdx()) + id = atom.GetIdx() + name = "%s%d" % (GetSymbol(atom.GetAtomicNum()), id) names.append(name) atomtypes.append(atom.GetType()) - ids.append(atom.GetIdx()) + ids.append(id) masses.append(atom.GetExactMass()) if abs(atom.GetExactMass()-atom.GetAtomicMass()) >= NEUTRON_MASS: warnings.warn( - f"Exact mass and atomic mass of atom ID: {atom.GetIdx()}" - " are more than 1.008 AMU different. Be aware of isotopes," + f"Exact mass and atomic mass of atom ID: {id} are more" + " than 1.008 AMU different. Be aware of isotopes," " which are NOT flagged by MDAnalysis.") charges.append(atom.GetPartialCharge()) # convert atomic number to element - elements.append(OBElementTable().GetSymbol(atom.GetAtomicNum())) + elements.append(GetSymbol(atom.GetAtomicNum())) # only for PBD and MOL2 if atom.HasResidue(): @@ -129,13 +133,6 @@ def parse(self, **kwargs): chainids.append(resid.GetChain()) icodes.append(resid.GetInsertionCode()) - chirality = StereoEnum.NONE - if atom.IsPositiveStereo(): - chirality = StereoEnum.POSITIVE - elif atom.IsNegativeStereo(): - chirality = StereoEnum.NEGATIVE - chiralities.append(chirality) - aromatics.append(atom.IsAromatic()) # make Topology attributes @@ -155,7 +152,6 @@ def parse(self, **kwargs): (elements, Elements, object), (masses, Masses, np.float32), (aromatics, Aromaticities, bool), - (chiralities, RSChirality, 'U1'), ): attrs.append(Attr(np.array(vals, dtype=dtype))) @@ -188,11 +184,14 @@ def parse(self, **kwargs): if resnums: resnums = np.array(resnums, dtype=np.int32) resnames = np.array(resnames, dtype=object) - segids = np.array(segids, dtype=object) + #segids = np.array(segids, dtype=object) icodes = np.array(icodes, dtype=object) - residx, (resnums, resnames, icodes, segids) = change_squash( - (resnums, resnames, icodes, segids), - (resnums, resnames, icodes, segids)) + # residx, (resnums, resnames, icodes, segids) = change_squash( + # (resnums, resnames, icodes, segids), + # (resnums, resnames, icodes, segids)) + residx, (resnums, resnames, icodes) = change_squash( + (resnums, resnames, icodes), + (resnums, resnames, icodes)) n_residues = len(resnums) for vals, Attr, dtype in ( (resnums, Resids, np.int32), @@ -208,7 +207,7 @@ def parse(self, **kwargs): n_residues = 1 # Segment - if any(segids) and not any(val is None for val in segids): + if len(segids) and not any(val is None for val in segids): segidx, (segids,) = change_squash((segids,), (segids,)) n_segments = len(segids) attrs.append(Segids(segids)) diff --git a/mda_openbabel_converter/tests/test_openbabel_parser.py b/mda_openbabel_converter/tests/test_openbabel_parser.py index b7cca9a..714657e 100644 --- a/mda_openbabel_converter/tests/test_openbabel_parser.py +++ b/mda_openbabel_converter/tests/test_openbabel_parser.py @@ -1,13 +1,12 @@ # Testing OpenBabel and Pybel import MDAnalysis as mda +import openbabel from openbabel import openbabel as ob -from openbabel import OBMol, OBConversion, OBElementTable -from pybel import readfile +from openbabel.openbabel import OBMol, OBConversion, GetSymbol # from openbabel.test.files import files import mda_openbabel_converter -from mda_openbabel_converter import OpenBabelParser as OBParser import pytest # version 8.2.2 import sys import numpy as np @@ -25,8 +24,7 @@ class OpenBabelParserBase(ParserBase): parser = mda_openbabel_converter.OpenBabelParser.OpenBabelParser expected_attrs = ['ids', 'names', 'elements', 'masses', 'aromaticities', - 'resids', 'resnums', 'chiralities', - 'segids', 'bonds', + 'resids', 'resnums', 'segids', 'bonds', ] expected_n_atoms = 0 @@ -102,16 +100,10 @@ def test_aromaticities(self, top, filename): def test_elements(self, top, filename): expected = np.array([ - OBElementTable().GetSymbol(atom.GetAtomicNum()) for atom in + GetSymbol(atom.GetAtomicNum()) for atom in ob.OBMolAtomIter(filename)]) assert_equal(expected, top.elements.values) - def test_chiralities(self, top, filename): - expected = np.array([ - "+" if atom.IsPositiveStereo() else "-" if atom.IsNegativeStereo() - else "" for atom in ob.OBMolAtomIter(filename)]) - assert_equal(expected, top.chiralities.values) - def test_charges(self, top, filename): expected = np.array([ atom.GetPartialCharge() for atom in ob.OBMolAtomIter(filename)]) From e1091d361aee1aa02923b5119e233a5833f16319 Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Wed, 31 Jul 2024 15:28:33 +1000 Subject: [PATCH 26/29] Added MDAnalysisTests to environment for CI imports --- devtools/conda-envs/test_env.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml index 35ea937..e4303f0 100644 --- a/devtools/conda-envs/test_env.yaml +++ b/devtools/conda-envs/test_env.yaml @@ -9,9 +9,10 @@ dependencies: # MDAnalysis - MDAnalysis + - MDAnalysisTests # OpenBabel - - openbabel + - openbabelS # Testing - pytest From ee4f91aa8931fd61e5b13eeeefc5ede9303d8090 Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Wed, 31 Jul 2024 15:29:41 +1000 Subject: [PATCH 27/29] Fixing silly typo --- devtools/conda-envs/test_env.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml index e4303f0..792e4c2 100644 --- a/devtools/conda-envs/test_env.yaml +++ b/devtools/conda-envs/test_env.yaml @@ -12,7 +12,7 @@ dependencies: - MDAnalysisTests # OpenBabel - - openbabelS + - openbabel # Testing - pytest From a393e365adcf17f2821f189e40b8b0a8985b970e Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Wed, 31 Jul 2024 15:38:43 +1000 Subject: [PATCH 28/29] Cleaned up the last bits of chirality to remove CI env errors --- mda_openbabel_converter/OpenBabelParser.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/mda_openbabel_converter/OpenBabelParser.py b/mda_openbabel_converter/OpenBabelParser.py index 643da3d..8914c48 100644 --- a/mda_openbabel_converter/OpenBabelParser.py +++ b/mda_openbabel_converter/OpenBabelParser.py @@ -29,9 +29,6 @@ ) import warnings import numpy as np -from enum import StrEnum -# from enum import auto -# from strenum import StrEnum HAS_OBABEL = False NEUTRON_MASS = 1.008 @@ -41,21 +38,11 @@ from openbabel import openbabel as ob from openbabel.openbabel import OBMol, OBResidue, GetSymbol from openbabel.openbabel import * - #from openbabel.openbabel.OBElements import GetSymbol - #from openbabel import OBElement HAS_OBABEL = True except ImportError: - # import breaks with version 3.x warnings.warn("Cannot find openbabel, install with `mamba install -c " "conda-forge openbabel`") - -class StereoEnum(StrEnum): - POSITIVE = "+" - NEGATIVE = "-" - NONE = "" - - class OpenBabelParser(TopologyReaderBase): """ Inherits from TopologyReaderBase and converts an OpenBabel OBMol to a From a24a6ab0389fc75fa62e8d4c4acca0dc93d958e2 Mon Sep 17 00:00:00 2001 From: lunamorrow Date: Wed, 31 Jul 2024 15:52:26 +1000 Subject: [PATCH 29/29] Final fixes for PEP8 --- mda_openbabel_converter/OpenBabelParser.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/mda_openbabel_converter/OpenBabelParser.py b/mda_openbabel_converter/OpenBabelParser.py index 8914c48..850f752 100644 --- a/mda_openbabel_converter/OpenBabelParser.py +++ b/mda_openbabel_converter/OpenBabelParser.py @@ -43,6 +43,7 @@ warnings.warn("Cannot find openbabel, install with `mamba install -c " "conda-forge openbabel`") + class OpenBabelParser(TopologyReaderBase): """ Inherits from TopologyReaderBase and converts an OpenBabel OBMol to a @@ -95,16 +96,16 @@ def parse(self, **kwargs): residue_segindex=None) for atom in ob.OBMolAtomIter(mol): - # Atom name set with element and id, as name not supported by OpenBabel - id = atom.GetIdx() - name = "%s%d" % (GetSymbol(atom.GetAtomicNum()), id) + # Name set with element and id, as name not stored by OpenBabel + a_id = atom.GetIdx() + name = "%s%d" % (GetSymbol(atom.GetAtomicNum()), a_id) names.append(name) atomtypes.append(atom.GetType()) - ids.append(id) + ids.append(a_id) masses.append(atom.GetExactMass()) if abs(atom.GetExactMass()-atom.GetAtomicMass()) >= NEUTRON_MASS: warnings.warn( - f"Exact mass and atomic mass of atom ID: {id} are more" + f"Exact mass and atomic mass of atom ID: {a_id} are more" " than 1.008 AMU different. Be aware of isotopes," " which are NOT flagged by MDAnalysis.") charges.append(atom.GetPartialCharge()) @@ -171,11 +172,7 @@ def parse(self, **kwargs): if resnums: resnums = np.array(resnums, dtype=np.int32) resnames = np.array(resnames, dtype=object) - #segids = np.array(segids, dtype=object) icodes = np.array(icodes, dtype=object) - # residx, (resnums, resnames, icodes, segids) = change_squash( - # (resnums, resnames, icodes, segids), - # (resnums, resnames, icodes, segids)) residx, (resnums, resnames, icodes) = change_squash( (resnums, resnames, icodes), (resnums, resnames, icodes))