From 53f906fefd517d30934b959ee704a6e0dc48ce7d Mon Sep 17 00:00:00 2001 From: Annette Lien <70581832+liannette@users.noreply.github.com> Date: Tue, 26 Nov 2024 09:54:17 +0100 Subject: [PATCH] Precursor m/z value fix (#283) * fix: loading the correct precursor ion m/z * feat: add precursor charge to Spectrum attributes * Add comment about mfg pepmass being precursor m/z --- .../metabolomics/gnps/gnps_spectrum_loader.py | 27 ++++--------------- src/nplinker/metabolomics/spectrum.py | 19 ++++++++++--- .../metabolomics/test_molecular_family.py | 4 +-- tests/unit/metabolomics/test_spectrum.py | 17 ++++++------ tests/unit/metabolomics/test_utils.py | 6 ++--- tests/unit/scoring/conftest.py | 6 ++--- 6 files changed, 37 insertions(+), 42 deletions(-) diff --git a/src/nplinker/metabolomics/gnps/gnps_spectrum_loader.py b/src/nplinker/metabolomics/gnps/gnps_spectrum_loader.py index 9d7eff8bd..13d259686 100644 --- a/src/nplinker/metabolomics/gnps/gnps_spectrum_loader.py +++ b/src/nplinker/metabolomics/gnps/gnps_spectrum_loader.py @@ -85,10 +85,10 @@ def _load(self) -> None: # Load the spectrum spectrum_id: str = spec["params"]["scans"] - # calculate precursor m/z from precursor mass and charge - precursor_mass = spec["params"]["pepmass"][0] - precursor_charge = self._get_precursor_charge(spec["params"]["charge"]) - precursor_mz: float = precursor_mass / abs(precursor_charge) + # The pepmass in an mgf file is actually the m/z and not the peptide mass + # See: https://www.matrixscience.com/help/obsolete_data_file_formats.html + precursor_mz: float = spec["params"]["pepmass"][0] + precursor_charge: int = spec["params"]["charge"][0] rt = spec["params"].get("rtinseconds", 0) spectrum = Spectrum( @@ -96,25 +96,8 @@ def _load(self) -> None: mz=list(spec["m/z array"]), intensity=list(spec["intensity array"]), precursor_mz=precursor_mz, + precursor_charge=precursor_charge, rt=rt, metadata=spec["params"], ) self._spectra.append(spectrum) - - def _get_precursor_charge(self, charges: list[int]) -> int: - """Get the precursor charge from the charge list. - - Args: - charges: list of charge values. - - Returns: - the precursor charge. - """ - charge = charges[0] - if charge == 0: - logger.warning( - f"Invalid precursor charge value 0. " - f"Assuming charge is 1 for spectrum '{self._file}'." - ) - charge = 1 - return charge diff --git a/src/nplinker/metabolomics/spectrum.py b/src/nplinker/metabolomics/spectrum.py index 61d8d4214..fa65de2e6 100644 --- a/src/nplinker/metabolomics/spectrum.py +++ b/src/nplinker/metabolomics/spectrum.py @@ -17,10 +17,10 @@ class Spectrum: id: the spectrum ID. mz: the list of m/z values. intensity: the list of intensity values. - precursor_mz: the m/z value of the precursor. + precursor_mz: the m/z value of the precursor ion. + precursor_charge: the charge of the precursor ion. rt: the retention time in seconds. - metadata: the metadata of the spectrum, i.e. the header information in the MGF - file. + metadata: the metadata of the spectrum, i.e. the header information in the MGF file. gnps_annotations: the GNPS annotations of the spectrum. gnps_id: the GNPS ID of the spectrum. strains: the strains that this spectrum belongs to. @@ -34,6 +34,7 @@ def __init__( mz: list[float], intensity: list[float], precursor_mz: float, + precursor_charge: int, rt: float = 0, metadata: dict | None = None, ) -> None: @@ -44,6 +45,7 @@ def __init__( mz: the list of m/z values. intensity: the list of intensity values. precursor_mz: the precursor m/z. + precursor_charge: the charge of the precursor ion. rt: the retention time in seconds. Defaults to 0. metadata: the metadata of the spectrum, i.e. the header information in the MGF file. @@ -52,6 +54,7 @@ def __init__( self.mz = mz self.intensity = intensity self.precursor_mz = precursor_mz + self.precursor_charge = precursor_charge self.rt = rt self.metadata = metadata or {} @@ -78,7 +81,15 @@ def __reduce__(self) -> tuple: """Reduce function for pickling.""" return ( self.__class__, - (self.id, self.mz, self.intensity, self.precursor_mz, self.rt, self.metadata), + ( + self.id, + self.mz, + self.intensity, + self.precursor_mz, + self.precursor_charge, + self.rt, + self.metadata, + ), self.__dict__, ) diff --git a/tests/unit/metabolomics/test_molecular_family.py b/tests/unit/metabolomics/test_molecular_family.py index eb6fcd263..185871015 100644 --- a/tests/unit/metabolomics/test_molecular_family.py +++ b/tests/unit/metabolomics/test_molecular_family.py @@ -8,7 +8,7 @@ @pytest.fixture() def spectrum1(): """Return a Spectrum object.""" - spec = Spectrum(id="spec001", mz=[1.0], intensity=[1.0], precursor_mz=100.0) + spec = Spectrum(id="spec001", mz=[1.0], intensity=[1.0], precursor_mz=100.0, precursor_charge=1) spec.strains = StrainCollection() spec.strains.add(Strain("strain001")) yield spec @@ -17,7 +17,7 @@ def spectrum1(): @pytest.fixture() def spectrum2(): """Return a Spectrum object.""" - spec = Spectrum(id="spec002", mz=[1.0], intensity=[1.0], precursor_mz=100.0) + spec = Spectrum(id="spec002", mz=[1.0], intensity=[1.0], precursor_mz=100.0, precursor_charge=1) spec.strains = StrainCollection() spec.strains.add(Strain("strain002")) yield spec diff --git a/tests/unit/metabolomics/test_spectrum.py b/tests/unit/metabolomics/test_spectrum.py index e984eabac..e52621942 100644 --- a/tests/unit/metabolomics/test_spectrum.py +++ b/tests/unit/metabolomics/test_spectrum.py @@ -14,12 +14,13 @@ ) def test_init(rt, metadata, expected_metadata): """Test the initialization of the Spectrum class.""" - spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150, rt, metadata) + spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150, 1, rt, metadata) assert spec.id == "spec1" assert spec.mz == [100, 200] assert spec.intensity == [0.1, 0.2] assert spec.precursor_mz == 150 + assert spec.precursor_charge == 1 assert spec.rt == rt assert spec.metadata == expected_metadata @@ -32,16 +33,16 @@ def test_init(rt, metadata, expected_metadata): def test_str_repr(): """Test the __str__ and __repr__ methods.""" - spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150) + spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150, 1) assert str(spec) == "Spectrum(id=spec1, #strains=0)" assert repr(spec) == "Spectrum(id=spec1, #strains=0)" def test_eq(): """Test the __eq__ method.""" - spec1 = Spectrum("spec1", [100, 200], [0.1, 0.2], 150, 0, {"info": "test"}) - spec2 = Spectrum("spec1", [100, 200], [0.1, 0.2], 150, 0, {"info": "test"}) - spec3 = Spectrum("spec2", [100, 200], [0.1, 0.2], 150, 0, {"info": "test"}) + spec1 = Spectrum("spec1", [100, 200], [0.1, 0.2], 150, 1, 0, {"info": "test"}) + spec2 = Spectrum("spec1", [100, 200], [0.1, 0.2], 150, 1, 0, {"info": "test"}) + spec3 = Spectrum("spec2", [100, 200], [0.1, 0.2], 150, 1, 0, {"info": "test"}) assert spec1 == spec2 assert spec1 != spec3 @@ -49,19 +50,19 @@ def test_eq(): def test_hash(): """Test the __hash__ method.""" - spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150) + spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150, 1) assert hash(spec) == hash(("spec1", 150)) def test_peaks(): """Test the peaks attribute.""" - spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150) + spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150, 1) assert np.array_equal(spec.peaks, np.array([[100, 0.1], [200, 0.2]])) def test_has_strain(): """Test the has_strain method.""" - spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150) + spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150, 1) strain1 = Strain("strain1") strain2 = Strain("strain2") diff --git a/tests/unit/metabolomics/test_utils.py b/tests/unit/metabolomics/test_utils.py index aa9bd2e4e..038531039 100644 --- a/tests/unit/metabolomics/test_utils.py +++ b/tests/unit/metabolomics/test_utils.py @@ -17,9 +17,9 @@ def spectra(): """Fixture for a list of Spectrum objects.""" # The order of the spectra is important for the tests. return [ - Spectrum("spec0", [100, 200], [0.1, 0.2], 150), - Spectrum("spec1", [100, 200], [0.1, 0.2], 150), - Spectrum("spec2", [100, 200], [0.1, 0.2], 150), + Spectrum("spec0", [100, 200], [0.1, 0.2], 150, 1), + Spectrum("spec1", [100, 200], [0.1, 0.2], 150, 1), + Spectrum("spec2", [100, 200], [0.1, 0.2], 150, 1), ] diff --git a/tests/unit/scoring/conftest.py b/tests/unit/scoring/conftest.py index 009a92523..c7b7ea518 100644 --- a/tests/unit/scoring/conftest.py +++ b/tests/unit/scoring/conftest.py @@ -37,11 +37,11 @@ def gcfs(strains_list) -> tuple[GCF, GCF, GCF]: @fixture(scope="session") def spectra(strains_list) -> tuple[Spectrum, Spectrum, Spectrum]: - spectrum1 = Spectrum("spectrum1", [1], [1], 10.0) + spectrum1 = Spectrum("spectrum1", [1], [1], 10.0, 1) spectrum1.strains.add(strains_list[0]) - spectrum2 = Spectrum("spectrum2", [1], [1], 10.0) + spectrum2 = Spectrum("spectrum2", [1], [1], 10.0, 1) spectrum2.strains.add(strains_list[1]) - spectrum3 = Spectrum("spectrum3", [1], [1], 10.0) + spectrum3 = Spectrum("spectrum3", [1], [1], 10.0, 1) spectrum3.strains.add(strains_list[0]) spectrum3.strains.add(strains_list[1]) return spectrum1, spectrum2, spectrum3