Skip to content

Commit

Permalink
add support of mibig v4.0 (#286)
Browse files Browse the repository at this point in the history
[MIBiG v4.0](https://mibig.secondarymetabolites.org/download) is released recently. This PR adds support for it.
  • Loading branch information
CunliangGeng authored Nov 27, 2024
1 parent 505496e commit cd15f1e
Show file tree
Hide file tree
Showing 5 changed files with 654 additions and 46 deletions.
7 changes: 5 additions & 2 deletions src/nplinker/genomics/mibig/mibig_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"2.0": "843ce4677db6d11422f0e6d94dd03e81",
"3.0": "7c38b90f939086c03392d99a913baef9",
"3.1": "643d1349722a9437d8dcf558dac5f815",
"4.0": "70d1e7d573652ba62548b1fcfbdbf844",
}


Expand All @@ -31,6 +32,8 @@ def download_and_extract_mibig_metadata(
):
"""Download and extract MIBiG metadata json files.
The MIBiG metadata json files are available at https://mibig.secondarymetabolites.org/download.
Note that it does not matter whether the metadata json files are in nested folders or not in the archive,
all json files will be extracted to the same location, i.e. `extract_path`. The nested
folders will be removed if they exist. So the `extract_path` will have only json files.
Expand All @@ -39,7 +42,7 @@ def download_and_extract_mibig_metadata(
download_root: Path to the directory in which to place the downloaded archive.
extract_path: Path to an empty directory where the json files will be extracted.
The directory must be empty if it exists. If it doesn't exist, the directory will be created.
version: _description_. Defaults to "3.1".
version: MIBiG version. Defaults to "3.1".
Examples:
>>> download_and_extract_mibig_metadata("/data/download", "/data/mibig_metadata")
Expand All @@ -58,7 +61,7 @@ def download_and_extract_mibig_metadata(
raise ValueError(f'Nonempty directory: "{extract_path}"')

# download and extract
md5 = _MD5_MIBIG_METADATA[version]
md5 = _MD5_MIBIG_METADATA.get(version, None)
download_and_extract_archive(
url=MIBIG_METADATA_URL.format(version=version),
download_root=download_root,
Expand Down
33 changes: 25 additions & 8 deletions src/nplinker/genomics/mibig/mibig_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ class MibigMetadata:
MIBiG is a specification of BGC metadata and use JSON schema to
represent BGC metadata. More details see:
https://mibig.secondarymetabolites.org/download.
This class supports MIBiG version 1.0 to 4.0.
"""

def __init__(self, file: str | PathLike) -> None:
Expand Down Expand Up @@ -37,22 +39,37 @@ def mibig_accession(self) -> str:
def biosyn_class(self) -> tuple[str]:
"""Get the value of metadata item 'biosyn_class'.
The 'biosyn_class' is biosynthetic class(es), namely the type of
natural product or secondary metabolite.
The 'biosyn_class' is biosynthetic class(es) defined by MIBiG.
MIBiG defines 6 major biosynthetic classes for natural products,
Before version 4.0 of MIBiG, it defines 6 major biosynthetic classes,
including `NRP`, `Polyketide`, `RiPP`, `Terpene`, `Saccharide`
and `Alkaloid`. Note that natural products created by the other
biosynthetic mechanisms fall under the category `Other`. For more details
see [the paper](https://doi.org/10.1186/s40793-018-0318-y).
and `Alkaloid`.
Starting from version 4.0, MIBiG defines 5 major biosynthetic classes,
including `PKS`, `NRPS`, `Ribosomal`, `Terpene` and `Saccharide`.
The mapping between the old and new classes is as follows:
- `NRP` -> `NRPS`
- `Polyketide` -> `PKS`
- `RiPP` -> `Ribosomal`
- `Terpene` -> `Terpene`
- `Saccharide` -> `Saccharide`
- `Alkaloid` -> `Other`
Note that natural products that do not fit into any of the above
biosynthetic classes fall under the category `Other`.
"""
return self._biosyn_class

def _parse_metadata(self) -> None:
"""Parse metadata to get 'mibig_accession' and 'biosyn_class' values."""
if "general_params" in self.metadata:
if "general_params" in self.metadata: # version ≤1.4
self._mibig_accession = self.metadata["general_params"]["mibig_accession"]
self._biosyn_class = tuple(self.metadata["general_params"]["biosyn_class"])
else: # version≥2.0
elif "cluster" in self.metadata: # version ≥2.0 and <4.0
self._mibig_accession = self.metadata["cluster"]["mibig_accession"]
self._biosyn_class = tuple(self.metadata["cluster"]["biosyn_class"])
elif "version" in self.metadata: # version≥4.0
self._mibig_accession = self.metadata["accession"]
self._biosyn_class = tuple(i["class"] for i in self.metadata["biosynthesis"]["classes"])
Loading

0 comments on commit cd15f1e

Please sign in to comment.