From 6a6f1705c5a8c34e57f11c35273e6306fc8e6c6e Mon Sep 17 00:00:00 2001
From: Cunliang Geng <c.geng@esciencecenter.nl>
Date: Fri, 14 Jun 2024 13:38:02 +0200
Subject: [PATCH] fix mypy and ruff errors (#257)

* use overload for MetcalfScoring

* add networkx stub file

* fix mypy errors

fix code or ignore type checking for some nonsense mypy errors

* fix ruff check errors for refactored code

* run ruff format

* fix imports

* uniform the use of TYPE_CHECKING to only avoid circular imports

This ensures that the type hints are available both during type checking and at runtime, improving code clarity and reducing the chance of runtime errors related to type hints.

* fix non-existing attribute bug

* fix typos

* use broader type hints Sequence and Mapping

use broader type hints Sequence and Mapping to replace list and dict, respectively

* change `datas` to `data`

* use specific types for return of abstract method when possible

use more general type when necessary
---
 README.dev.md                                 |  2 +-
 pyproject.toml                                |  1 +
 src/nplinker/arranger.py                      |  4 +-
 src/nplinker/class_info/runcanopus.py         |  4 --
 src/nplinker/config.py                        |  2 +-
 src/nplinker/genomics/abc.py                  |  7 ++--
 .../genomics/antismash/antismash_loader.py    |  3 +-
 .../antismash/podp_antismash_downloader.py    | 10 +++--
 src/nplinker/genomics/bgc.py                  |  2 +-
 src/nplinker/genomics/gcf.py                  |  2 +-
 src/nplinker/genomics/mibig/mibig_loader.py   |  8 ++--
 src/nplinker/genomics/utils.py                | 16 ++++---
 src/nplinker/loader.py                        | 20 ++++++---
 src/nplinker/metabolomics/abc.py              | 42 +++++++++++++------
 .../metabolomics/gnps/gnps_downloader.py      |  8 +---
 .../metabolomics/gnps/gnps_extractor.py       |  2 +-
 src/nplinker/metabolomics/gnps/gnps_format.py |  2 +-
 src/nplinker/metabolomics/molecular_family.py |  4 +-
 src/nplinker/metabolomics/spectrum.py         |  4 +-
 src/nplinker/metabolomics/utils.py            | 18 ++++----
 src/nplinker/nplinker.py                      | 10 ++---
 src/nplinker/schemas/user_strains.json        |  4 +-
 src/nplinker/schemas/utils.py                 |  2 +-
 src/nplinker/scoring/abc.py                   |  2 +-
 src/nplinker/scoring/iokr/mk_fprints.py       |  4 --
 src/nplinker/scoring/iokr/spectrum_filters.py |  1 +
 src/nplinker/scoring/link_graph.py            |  6 +--
 src/nplinker/scoring/metcalf_scoring.py       | 24 +++++++----
 src/nplinker/scoring/utils.py                 | 31 ++++++--------
 src/nplinker/strain/utils.py                  |  2 +-
 src/nplinker/utils.py                         | 26 ++++++++++--
 tests/unit/genomics/test_mibig_downloader.py  |  2 +-
 tests/unit/genomics/test_mibig_loader.py      | 14 +++----
 .../test_genome_bgc_mappings_schema.py        |  4 +-
 .../unit/schemas/test_genome_status_schema.py |  4 +-
 .../schemas/test_strain_mappings_schema.py    |  4 +-
 .../unit/schemas/test_user_strains_schema.py  | 11 +++--
 tests/unit/scoring/conftest.py                |  7 +---
 tests/unit/test_config.py                     |  2 +-
 tests/unit/test_utils.py                      | 10 ++---
 40 files changed, 188 insertions(+), 143 deletions(-)

diff --git a/README.dev.md b/README.dev.md
index 27b9967a..3f03ceaf 100644
--- a/README.dev.md
+++ b/README.dev.md
@@ -33,7 +33,7 @@ python3 -m pip install --upgrade pip setuptools
 # install development dependencies
 pip install --no-cache-dir --editable ".[dev]"
 
-# install non-pypi dependecies
+# install non-pypi dependencies
 install-nplinker-deps
 ```
 
diff --git a/pyproject.toml b/pyproject.toml
index 7326a021..dd118f41 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -62,6 +62,7 @@ dev = [
     "types-Deprecated",
     "types-beautifulsoup4",
     "types-jsonschema",
+    "types-networkx",
     "pandas-stubs",
     # docs
     "mkdocs",
diff --git a/src/nplinker/arranger.py b/src/nplinker/arranger.py
index e91cb726..06df9ac5 100644
--- a/src/nplinker/arranger.py
+++ b/src/nplinker/arranger.py
@@ -171,7 +171,7 @@ def _get_gnps_file_mappings_file(self) -> Path:
             file_mappings_tsv if file_mappings_tsv.exists() else file_mappings_csv
         )
 
-        return gnps_file_mappings_file
+        return gnps_file_mappings_file  # type: ignore
 
     def _download_and_extract_gnps(self) -> None:
         """Download and extract the GNPS data.
@@ -304,7 +304,7 @@ def arrange_strain_mappings(self) -> None:
         If `self.config.mode` is "local", validate the strain mappings file.
         If `self.config.mode` is "podp", always generate the strain mappings file and validate it.
 
-        The valiation checks if the strain mappings file exists and if it is a valid JSON file
+        The validation checks if the strain mappings file exists and if it is a valid JSON file
         according to the schema defined in `schemas/strain_mappings_schema.json`.
         """
         if self.config.mode == "podp":
diff --git a/src/nplinker/class_info/runcanopus.py b/src/nplinker/class_info/runcanopus.py
index 6108e7e5..17807278 100644
--- a/src/nplinker/class_info/runcanopus.py
+++ b/src/nplinker/class_info/runcanopus.py
@@ -81,7 +81,3 @@ def run_canopus(mgf_file, output_path, extra_params="--maxmz 600 formula zodiac
     open(os.path.join(output_path, "completed"), "w").close()
 
     return True
-
-
-if __name__ == "__main__":
-    run_canopus(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])
diff --git a/src/nplinker/config.py b/src/nplinker/config.py
index 5e734ff5..24fba952 100644
--- a/src/nplinker/config.py
+++ b/src/nplinker/config.py
@@ -36,7 +36,7 @@ def load_config(config_file: str | PathLike) -> Dynaconf:
 
 
 # Note:
-# Validataor parameter `required=False` means the setting (e.g. "loglevel") must not exist rather
+# Validator parameter `required=False` means the setting (e.g. "loglevel") must not exist rather
 # than being optional. So don't set the parameter `required` if the key is optional.
 CONFIG_VALIDATORS = [
     # General settings
diff --git a/src/nplinker/genomics/abc.py b/src/nplinker/genomics/abc.py
index 57c49ae4..daeb3bef 100644
--- a/src/nplinker/genomics/abc.py
+++ b/src/nplinker/genomics/abc.py
@@ -1,6 +1,5 @@
 from abc import ABC
 from abc import abstractmethod
-from collections.abc import Sequence
 from .bgc import BGC
 from .gcf import GCF
 
@@ -8,7 +7,7 @@
 class BGCLoaderBase(ABC):
     """Abstract base class for BGC loader."""
 
-    def __init__(self, data_dir: str):
+    def __init__(self, data_dir: str) -> None:
         """Initialize the BGC loader.
 
         Args:
@@ -26,7 +25,7 @@ def get_files(self) -> dict[str, str]:
         """
 
     @abstractmethod
-    def get_bgcs(self) -> Sequence[BGC]:
+    def get_bgcs(self) -> list[BGC]:
         """Get BGC objects.
 
         Returns:
@@ -38,7 +37,7 @@ class GCFLoaderBase(ABC):
     """Abstract base class for GCF loader."""
 
     @abstractmethod
-    def get_gcfs(self, keep_mibig_only: bool, keep_singleton: bool) -> Sequence[GCF]:
+    def get_gcfs(self, keep_mibig_only: bool, keep_singleton: bool) -> list[GCF]:
         """Get GCF objects.
 
         Args:
diff --git a/src/nplinker/genomics/antismash/antismash_loader.py b/src/nplinker/genomics/antismash/antismash_loader.py
index 08384817..4c6bd991 100644
--- a/src/nplinker/genomics/antismash/antismash_loader.py
+++ b/src/nplinker/genomics/antismash/antismash_loader.py
@@ -2,6 +2,7 @@
 import fnmatch
 import logging
 import os
+from typing import Mapping
 from Bio import SeqIO
 from Bio import SeqRecord
 from nplinker.genomics import BGC
@@ -97,7 +98,7 @@ def get_bgcs(self) -> list[BGC]:
         return self._bgcs
 
     @staticmethod
-    def _parse_bgcs(bgc_files: dict[str, str]) -> list[BGC]:
+    def _parse_bgcs(bgc_files: Mapping[str, str]) -> list[BGC]:
         """Load given BGC files as BGC objects.
 
         Args:
diff --git a/src/nplinker/genomics/antismash/podp_antismash_downloader.py b/src/nplinker/genomics/antismash/podp_antismash_downloader.py
index 515fdffe..2a77cc20 100644
--- a/src/nplinker/genomics/antismash/podp_antismash_downloader.py
+++ b/src/nplinker/genomics/antismash/podp_antismash_downloader.py
@@ -5,6 +5,8 @@
 import time
 from os import PathLike
 from pathlib import Path
+from typing import Mapping
+from typing import Sequence
 import httpx
 from bs4 import BeautifulSoup
 from bs4 import NavigableString
@@ -82,7 +84,7 @@ def read_json(file: str | PathLike) -> dict[str, "GenomeStatus"]:
 
     @staticmethod
     def to_json(
-        genome_status_dict: dict[str, "GenomeStatus"], file: str | PathLike | None = None
+        genome_status_dict: Mapping[str, "GenomeStatus"], file: str | PathLike | None = None
     ) -> str | None:
         """Convert the genome status dictionary to a JSON string.
 
@@ -122,7 +124,7 @@ def _to_dict(self) -> dict:
 
 
 def podp_download_and_extract_antismash_data(
-    genome_records: list[dict[str, dict[str, str]]],
+    genome_records: Sequence[Mapping[str, Mapping[str, str]]],
     project_download_root: str | PathLike,
     project_extract_root: str | PathLike,
 ):
@@ -220,7 +222,7 @@ def podp_download_and_extract_antismash_data(
         raise ValueError("No antiSMASH data found for any genome")
 
 
-def get_best_available_genome_id(genome_id_data: dict[str, str]) -> str | None:
+def get_best_available_genome_id(genome_id_data: Mapping[str, str]) -> str | None:
     """Get the best available ID from genome_id_data dict.
 
     Args:
@@ -359,7 +361,7 @@ def _resolve_jgi_accession(jgi_id: str) -> str:
     return _resolve_genbank_accession(link.text)
 
 
-def _resolve_refseq_id(genome_id_data: dict[str, str]) -> str:
+def _resolve_refseq_id(genome_id_data: Mapping[str, str]) -> str:
     """Get the RefSeq ID to which the genome accession is linked.
 
     Check https://pairedomicsdata.bioinformatics.nl/schema.json.
diff --git a/src/nplinker/genomics/bgc.py b/src/nplinker/genomics/bgc.py
index e291cac0..12e6660e 100644
--- a/src/nplinker/genomics/bgc.py
+++ b/src/nplinker/genomics/bgc.py
@@ -2,11 +2,11 @@
 import logging
 from typing import TYPE_CHECKING
 from deprecated import deprecated
+from nplinker.strain import Strain
 from .aa_pred import predict_aa
 
 
 if TYPE_CHECKING:
-    from ..strain import Strain
     from .gcf import GCF
 
 logger = logging.getLogger(__name__)
diff --git a/src/nplinker/genomics/gcf.py b/src/nplinker/genomics/gcf.py
index ecddd7a7..6a1e4f2f 100644
--- a/src/nplinker/genomics/gcf.py
+++ b/src/nplinker/genomics/gcf.py
@@ -1,11 +1,11 @@
 from __future__ import annotations
 import logging
 from typing import TYPE_CHECKING
+from nplinker.strain import Strain
 from nplinker.strain import StrainCollection
 
 
 if TYPE_CHECKING:
-    from nplinker.strain import Strain
     from .bgc import BGC
 
 logger = logging.getLogger(__name__)
diff --git a/src/nplinker/genomics/mibig/mibig_loader.py b/src/nplinker/genomics/mibig/mibig_loader.py
index 38fed6b3..3d8eab6b 100644
--- a/src/nplinker/genomics/mibig/mibig_loader.py
+++ b/src/nplinker/genomics/mibig/mibig_loader.py
@@ -21,14 +21,14 @@ class MibigLoader:
     """
 
     def __init__(self, data_dir: str):
-        """Initialize the MIBiG metatdata loader.
+        """Initialize the MIBiG metadata loader.
 
         Args:
             data_dir: Path to the directory of MIBiG metadata json files
         """
         self.data_dir = data_dir
         self._file_dict = self.parse_data_dir(self.data_dir)
-        self._metadata_dict = self._parse_metadatas()
+        self._metadata_dict = self._parse_metadata()
         self._bgcs = self._parse_bgcs()
 
     def get_files(self) -> dict[str, str]:
@@ -58,7 +58,7 @@ def parse_data_dir(data_dir: str) -> dict[str, str]:
             file_dict[fname] = file
         return file_dict
 
-    def get_metadatas(self) -> dict[str, MibigMetadata]:
+    def get_metadata(self) -> dict[str, MibigMetadata]:
         """Get MibigMetadata objects.
 
         Returns:
@@ -66,7 +66,7 @@ def get_metadatas(self) -> dict[str, MibigMetadata]:
         """
         return self._metadata_dict
 
-    def _parse_metadatas(self) -> dict[str, MibigMetadata]:
+    def _parse_metadata(self) -> dict[str, MibigMetadata]:
         """Parse all metadata files and return MibigMetadata objects.
 
         Returns:
diff --git a/src/nplinker/genomics/utils.py b/src/nplinker/genomics/utils.py
index ba4c227b..41b65316 100644
--- a/src/nplinker/genomics/utils.py
+++ b/src/nplinker/genomics/utils.py
@@ -3,6 +3,8 @@
 import logging
 from os import PathLike
 from pathlib import Path
+from typing import Mapping
+from typing import Sequence
 from jsonschema import validate
 from nplinker.defaults import GENOME_BGC_MAPPINGS_FILENAME
 from nplinker.schemas import GENOME_BGC_MAPPINGS_SCHEMA
@@ -65,7 +67,9 @@ def generate_mappings_genome_id_bgc_id(
     logger.info("Generated genome-BGC mappings file: %s", output_file)
 
 
-def add_strain_to_bgc(strains: StrainCollection, bgcs: list[BGC]) -> tuple[list[BGC], list[BGC]]:
+def add_strain_to_bgc(
+    strains: StrainCollection, bgcs: Sequence[BGC]
+) -> tuple[list[BGC], list[BGC]]:
     """Assign a Strain object to `BGC.strain` for input BGCs.
 
     BGC id is used to find the corresponding Strain object. It's possible that
@@ -111,7 +115,7 @@ def add_strain_to_bgc(strains: StrainCollection, bgcs: list[BGC]) -> tuple[list[
 
 
 def add_bgc_to_gcf(
-    bgcs: list[BGC], gcfs: list[GCF]
+    bgcs: Sequence[BGC], gcfs: Sequence[GCF]
 ) -> tuple[list[GCF], list[GCF], dict[GCF, set[str]]]:
     """Add BGC objects to GCF object based on GCF's BGC ids.
 
@@ -165,7 +169,7 @@ def add_bgc_to_gcf(
     return gcf_with_bgc, gcf_without_bgc, gcf_missing_bgc
 
 
-def get_mibig_from_gcf(gcfs: list[GCF]) -> tuple[list[BGC], StrainCollection]:
+def get_mibig_from_gcf(gcfs: Sequence[GCF]) -> tuple[list[BGC], StrainCollection]:
     """Get MIBiG BGCs and strains from GCF objects.
 
     Args:
@@ -277,9 +281,9 @@ def extract_mappings_resolved_genome_id_bgc_id(
 
 
 def get_mappings_strain_id_bgc_id(
-    mappings_strain_id_original_genome_id: dict[str, set[str]],
-    mappings_original_genome_id_resolved_genome_id: dict[str, str],
-    mappings_resolved_genome_id_bgc_id: dict[str, set[str]],
+    mappings_strain_id_original_genome_id: Mapping[str, set[str]],
+    mappings_original_genome_id_resolved_genome_id: Mapping[str, str],
+    mappings_resolved_genome_id_bgc_id: Mapping[str, set[str]],
 ) -> dict[str, set[str]]:
     """Get mappings "strain_id <-> bgc_id".
 
diff --git a/src/nplinker/loader.py b/src/nplinker/loader.py
index ce534f52..3f248bec 100644
--- a/src/nplinker/loader.py
+++ b/src/nplinker/loader.py
@@ -1,9 +1,12 @@
+from __future__ import annotations
 import logging
 import os
 from deprecated import deprecated
 from dynaconf import Dynaconf
 from nplinker import NPLINKER_APP_DATA_DIR
 from nplinker import defaults
+from nplinker.genomics import BGC
+from nplinker.genomics import GCF
 from nplinker.genomics.antismash import AntismashBGCLoader
 from nplinker.genomics.bigscape import BigscapeGCFLoader
 from nplinker.genomics.bigscape import BigscapeV2GCFLoader
@@ -11,6 +14,8 @@
 from nplinker.genomics.utils import add_bgc_to_gcf
 from nplinker.genomics.utils import add_strain_to_bgc
 from nplinker.genomics.utils import get_mibig_from_gcf
+from nplinker.metabolomics import MolecularFamily
+from nplinker.metabolomics import Spectrum
 from nplinker.metabolomics.gnps import GNPSAnnotationLoader
 from nplinker.metabolomics.gnps import GNPSMolecularFamilyLoader
 from nplinker.metabolomics.gnps import GNPSSpectrumLoader
@@ -58,11 +63,14 @@ def __init__(self, config: Dynaconf):
         """
         self.config = config
 
-        self.bgcs, self.gcfs, self.spectra, self.mfs = [], [], [], []
-        self.mibig_bgcs = []
-        self.mibig_strains_in_use = StrainCollection()
-        self.product_types = []
-        self.strains = StrainCollection()
+        self.bgcs: list[BGC] = []
+        self.gcfs: list[GCF] = []
+        self.spectra: list[Spectrum] = []
+        self.mfs: list[MolecularFamily] = []
+        self.mibig_bgcs: list[BGC] = []
+        self.mibig_strains_in_use: StrainCollection = StrainCollection()
+        self.product_types: list = []
+        self.strains: StrainCollection = StrainCollection()
 
         self.class_matches = None
         self.chem_classes = None
@@ -93,7 +101,7 @@ def _load_strain_mappings(self):
             self.strains.add(strain)
         logger.info("Loaded {} non-MiBIG Strain objects".format(len(self.strains)))
 
-        # 2. filter user specificied strains (remove all that are not specified by user).
+        # 2. filter user specified strains (remove all that are not specified by user).
         # It's not allowed to specify empty list of strains, otherwise validation will fail.
         user_strains_file = self.config.root_dir / defaults.STRAINS_SELECTED_FILENAME
         if user_strains_file.exists():
diff --git a/src/nplinker/metabolomics/abc.py b/src/nplinker/metabolomics/abc.py
index 5cadb0e4..6af3052e 100644
--- a/src/nplinker/metabolomics/abc.py
+++ b/src/nplinker/metabolomics/abc.py
@@ -1,23 +1,27 @@
 from abc import ABC
 from abc import abstractmethod
-from collections.abc import Sequence
-from typing import TYPE_CHECKING
-
-
-if TYPE_CHECKING:
-    from .molecular_family import MolecularFamily
-    from .spectrum import Spectrum
+from .molecular_family import MolecularFamily
+from .spectrum import Spectrum
 
 
 class SpectrumLoaderBase(ABC):
+    """Abstract base class for SpectrumLoader."""
+
     @property
     @abstractmethod
-    def spectra(self) -> Sequence["Spectrum"]: ...
+    def spectra(self) -> list["Spectrum"]:
+        """Get Spectrum objects.
+
+        Returns:
+            A sequence of Spectrum objects.
+        """
 
 
 class MolecularFamilyLoaderBase(ABC):
+    """Abstract base class for MolecularFamilyLoader."""
+
     @abstractmethod
-    def get_mfs(self, keep_singleton: bool) -> Sequence["MolecularFamily"]:
+    def get_mfs(self, keep_singleton: bool) -> list["MolecularFamily"]:
         """Get MolecularFamily objects.
 
         Args:
@@ -26,17 +30,31 @@ def get_mfs(self, keep_singleton: bool) -> Sequence["MolecularFamily"]:
                 only one spectrum.
 
         Returns:
-            A list of MolecularFamily objects.
+            A sequence of MolecularFamily objects.
         """
 
 
 class FileMappingLoaderBase(ABC):
+    """Abstract base class for FileMappingLoader."""
+
     @property
     @abstractmethod
-    def mappings(self) -> dict[str, list[str]]: ...
+    def mappings(self) -> dict[str, list[str]]:
+        """Get file mappings.
+
+        Returns:
+            A mapping from spectrum ID to the names of files where the spectrum occurs.
+        """
 
 
 class AnnotationLoaderBase(ABC):
+    """Abstract base class for AnnotationLoader."""
+
     @property
     @abstractmethod
-    def annotations(self) -> dict[str, dict]: ...
+    def annotations(self) -> dict[str, dict]:
+        """Get annotations.
+
+        Returns:
+            A mapping from spectrum ID to its annotations.
+        """
diff --git a/src/nplinker/metabolomics/gnps/gnps_downloader.py b/src/nplinker/metabolomics/gnps/gnps_downloader.py
index b0febe07..365d60fb 100644
--- a/src/nplinker/metabolomics/gnps/gnps_downloader.py
+++ b/src/nplinker/metabolomics/gnps/gnps_downloader.py
@@ -1,16 +1,12 @@
 from __future__ import annotations
 from os import PathLike
 from pathlib import Path
-from typing import TYPE_CHECKING
+from typing_extensions import Self
 from nplinker.utils import download_url
 from .gnps_format import GNPSFormat
 from .gnps_format import gnps_format_from_task_id
 
 
-if TYPE_CHECKING:
-    from typing_extensions import Self
-
-
 class GNPSDownloader:
     """Download GNPS zip archive for the given task id.
 
@@ -92,7 +88,7 @@ def get_task_id(self) -> str:
         return self._task_id
 
     def get_url(self) -> str:
-        """Get the full URL linking to GNPS data to be dowloaded.
+        """Get the full URL linking to GNPS data to be downloaded.
 
         Returns:
             URL pointing to the GNPS data to be downloaded.
diff --git a/src/nplinker/metabolomics/gnps/gnps_extractor.py b/src/nplinker/metabolomics/gnps/gnps_extractor.py
index 7d71f089..f393e830 100644
--- a/src/nplinker/metabolomics/gnps/gnps_extractor.py
+++ b/src/nplinker/metabolomics/gnps/gnps_extractor.py
@@ -19,7 +19,7 @@ class GNPSExtractor:
     - annotations.tsv
 
     The files to be extracted are selected based on the GNPS workflow type,
-    as desribed below (in the order of the files above):
+    as described below (in the order of the files above):
 
     1. METABOLOMICS-SNETS
         - clusterinfosummarygroup_attributes_withIDs_withcomponentID/*.tsv
diff --git a/src/nplinker/metabolomics/gnps/gnps_format.py b/src/nplinker/metabolomics/gnps/gnps_format.py
index 5014682d..96fa083e 100644
--- a/src/nplinker/metabolomics/gnps/gnps_format.py
+++ b/src/nplinker/metabolomics/gnps/gnps_format.py
@@ -105,7 +105,7 @@ def gnps_format_from_archive(zip_file: str | PathLike) -> GNPSFormat:
 def gnps_format_from_file_mapping(file: str | PathLike) -> GNPSFormat:
     """Detect GNPS format from the given file mapping file.
 
-    The GNSP file mapping file is located in different folders depending on the
+    The GNPS file mapping file is located in different folders depending on the
     GNPS workflow. Here are the locations in corresponding GNPS zip archives:
 
     - METABOLOMICS-SNETS workflow: the .tsv file under folder "clusterinfosummarygroup_attributes_withIDs_withcomponentID"
diff --git a/src/nplinker/metabolomics/molecular_family.py b/src/nplinker/metabolomics/molecular_family.py
index 16d9bd6a..7988aa0c 100644
--- a/src/nplinker/metabolomics/molecular_family.py
+++ b/src/nplinker/metabolomics/molecular_family.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 from typing import TYPE_CHECKING
-from ..strain.strain import Strain
-from ..strain.strain_collection import StrainCollection
+from nplinker.strain import Strain
+from nplinker.strain import StrainCollection
 
 
 if TYPE_CHECKING:
diff --git a/src/nplinker/metabolomics/spectrum.py b/src/nplinker/metabolomics/spectrum.py
index 5ec7ccb2..841fe2a8 100644
--- a/src/nplinker/metabolomics/spectrum.py
+++ b/src/nplinker/metabolomics/spectrum.py
@@ -19,7 +19,7 @@ class Spectrum:
         intensity: the list of intensity values.
         precursor_mz: the m/z value of the precursor.
         rt: the retention time in seconds.
-        metadata: the metadata of the spectrum, i.e. the header infomation in the MGF
+        metadata: the metadata of the spectrum, i.e. the header information in the MGF
             file.
         gnps_annotations: the GNPS annotations of the spectrum.
         gnps_id: the GNPS ID of the spectrum.
@@ -45,7 +45,7 @@ def __init__(
             intensity: the list of intensity values.
             precursor_mz: the precursor m/z.
             rt: the retention time in seconds. Defaults to 0.
-            metadata: the metadata of the spectrum, i.e. the header infomation
+            metadata: the metadata of the spectrum, i.e. the header information
                 in the MGF file.
         """
         self.id = id
diff --git a/src/nplinker/metabolomics/utils.py b/src/nplinker/metabolomics/utils.py
index a8a53aef..1110fb7d 100644
--- a/src/nplinker/metabolomics/utils.py
+++ b/src/nplinker/metabolomics/utils.py
@@ -3,6 +3,8 @@
 import logging
 from os import PathLike
 from pathlib import Path
+from typing import Mapping
+from typing import Sequence
 from nplinker.schemas import validate_podp_json
 from nplinker.strain import StrainCollection
 from .gnps.gnps_file_mapping_loader import GNPSFileMappingLoader
@@ -13,8 +15,10 @@
 logger = logging.getLogger(__name__)
 
 
-def add_annotation_to_spectrum(annotations: dict[str, dict], spectra: list[Spectrum]) -> None:
-    """Add GNPS annotations to the `Spectrum.gnps_annotaions` attribute for input spectra.
+def add_annotation_to_spectrum(
+    annotations: Mapping[str, dict], spectra: Sequence[Spectrum]
+) -> None:
+    """Add GNPS annotations to the `Spectrum.gnps_annotations` attribute for input spectra.
 
     It is possible that some spectra don't have annotations.
     Note that the input `spectra` list is changed in place.
@@ -30,7 +34,7 @@ def add_annotation_to_spectrum(annotations: dict[str, dict], spectra: list[Spect
 
 
 def add_strains_to_spectrum(
-    strains: StrainCollection, spectra: list[Spectrum]
+    strains: StrainCollection, spectra: Sequence[Spectrum]
 ) -> tuple[list[Spectrum], list[Spectrum]]:
     """Add `Strain` objects to the `Spectrum.strains` attribute for input spectra.
 
@@ -45,7 +49,7 @@ def add_strains_to_spectrum(
 
             - the first list contains Spectrum objects that are updated with Strain objects;
             - the second list contains Spectrum objects that are not updated with Strain objects
-            becuase no Strain objects are found.
+            because no Strain objects are found.
     """
     spectra_with_strains = []
     spectra_without_strains = []
@@ -69,7 +73,7 @@ def add_strains_to_spectrum(
 
 
 def add_spectrum_to_mf(
-    spectra: list[Spectrum], mfs: list[MolecularFamily]
+    spectra: Sequence[Spectrum], mfs: Sequence[MolecularFamily]
 ) -> tuple[list[MolecularFamily], list[MolecularFamily], dict[MolecularFamily, set[str]]]:
     """Add Spectrum objects to MolecularFamily objects.
 
@@ -186,8 +190,8 @@ def extract_mappings_ms_filename_spectrum_id(
 
 
 def get_mappings_strain_id_spectrum_id(
-    mappings_strain_id_ms_filename: dict[str, set[str]],
-    mappings_ms_filename_spectrum_id: dict[str, set[str]],
+    mappings_strain_id_ms_filename: Mapping[str, set[str]],
+    mappings_ms_filename_spectrum_id: Mapping[str, set[str]],
 ) -> dict[str, set[str]]:
     """Get mappings "strain_id <-> spectrum_id".
 
diff --git a/src/nplinker/nplinker.py b/src/nplinker/nplinker.py
index 60e20be7..33e07bb7 100644
--- a/src/nplinker/nplinker.py
+++ b/src/nplinker/nplinker.py
@@ -1,7 +1,8 @@
 from __future__ import annotations
 import logging
+from os import PathLike
 from pprint import pformat
-from typing import TYPE_CHECKING
+from typing import Sequence
 from typing import TypeVar
 from typing import overload
 from . import setup_logging
@@ -13,16 +14,11 @@
 from .loader import DatasetLoader
 from .metabolomics import MolecularFamily
 from .metabolomics import Spectrum
+from .scoring.link_graph import LinkGraph
 from .scoring.metcalf_scoring import MetcalfScoring
 from .strain import StrainCollection
 
 
-if TYPE_CHECKING:
-    from os import PathLike
-    from typing import Sequence
-    from nplinker.scoring.link_graph import LinkGraph
-
-
 logger = logging.getLogger(__name__)
 
 ObjectType = TypeVar("ObjectType", BGC, GCF, Spectrum, MolecularFamily)
diff --git a/src/nplinker/schemas/user_strains.json b/src/nplinker/schemas/user_strains.json
index 64949566..35b0e7af 100644
--- a/src/nplinker/schemas/user_strains.json
+++ b/src/nplinker/schemas/user_strains.json
@@ -1,7 +1,7 @@
 {
   "$schema": "https://json-schema.org/draft/2020-12/schema",
   "$id": "https://raw.githubusercontent.com/NPLinker/nplinker/main/src/nplinker/schemas/user_strains.json",
-  "title": "User specificed strains",
+  "title": "User specified strains",
   "description": "A list of strain IDs specified by user",
   "type": "object",
   "required": [
@@ -11,7 +11,7 @@
     "strain_ids": {
       "type": "array",
       "title": "Strain IDs",
-      "description": "A list of strain IDs specificed by user. The strain IDs must be the same as the ones in the strain mappings file.",
+      "description": "A list of strain IDs specified by user. The strain IDs must be the same as the ones in the strain mappings file.",
       "items": {
         "type": "string",
         "minLength": 1
diff --git a/src/nplinker/schemas/utils.py b/src/nplinker/schemas/utils.py
index ce47d946..c0a9bd23 100644
--- a/src/nplinker/schemas/utils.py
+++ b/src/nplinker/schemas/utils.py
@@ -13,7 +13,7 @@ def validate_podp_json(json_data: dict) -> None:
     All validation error messages are collected and raised as a single
     ValueError.
 
-    Parameters:
+    Args:
         json_data: The JSON data to validate.
 
     Raises:
diff --git a/src/nplinker/scoring/abc.py b/src/nplinker/scoring/abc.py
index fa287190..cad3a88e 100644
--- a/src/nplinker/scoring/abc.py
+++ b/src/nplinker/scoring/abc.py
@@ -3,11 +3,11 @@
 from abc import ABC
 from abc import abstractmethod
 from typing import TYPE_CHECKING
+from .link_graph import LinkGraph
 
 
 if TYPE_CHECKING:
     from nplinker.nplinker import NPLinker
-    from .link_graph import LinkGraph
 
 logger = logging.getLogger(__name__)
 
diff --git a/src/nplinker/scoring/iokr/mk_fprints.py b/src/nplinker/scoring/iokr/mk_fprints.py
index 13874e19..a0abf7a4 100644
--- a/src/nplinker/scoring/iokr/mk_fprints.py
+++ b/src/nplinker/scoring/iokr/mk_fprints.py
@@ -101,7 +101,3 @@ def fingerprint_from_inchi(inchi, fingerprint_type=None):
     for fp_bit in range(fp_size):
         fp_array[fp_bit] = fp.get(fp_bit)
     return fp_array
-
-
-if __name__ == "__main__":
-    main()
diff --git a/src/nplinker/scoring/iokr/spectrum_filters.py b/src/nplinker/scoring/iokr/spectrum_filters.py
index 195feac0..eeb4da50 100644
--- a/src/nplinker/scoring/iokr/spectrum_filters.py
+++ b/src/nplinker/scoring/iokr/spectrum_filters.py
@@ -16,6 +16,7 @@
 import os
 import pickle
 import numpy
+
 # import sys
 # sys.path.append('/home/grimur/git/lda')
 # from lda.code.formula import Formula
diff --git a/src/nplinker/scoring/link_graph.py b/src/nplinker/scoring/link_graph.py
index 96596dc4..ce1a06b6 100644
--- a/src/nplinker/scoring/link_graph.py
+++ b/src/nplinker/scoring/link_graph.py
@@ -83,7 +83,7 @@ class LinkGraph:
     """
 
     def __init__(self) -> None:
-        self._g = Graph()
+        self._g: Graph = Graph()
 
     def __str__(self) -> str:
         """Get a short summary of the LinkGraph."""
@@ -113,7 +113,7 @@ def __getitem__(
         except KeyError:
             raise KeyError(f"{u} not found in the link graph.")
 
-        return {**links}
+        return {**links}  # type: ignore
 
     @property
     def links(
@@ -191,4 +191,4 @@ def get_link_data(
             A dictionary of scoring methods and their data for the link between the two objects, or
             None if there is no link between the two objects.
         """
-        return self._g.get_edge_data(u, v)
+        return self._g.get_edge_data(u, v)  # type: ignore
diff --git a/src/nplinker/scoring/metcalf_scoring.py b/src/nplinker/scoring/metcalf_scoring.py
index 9c1df113..9985b710 100644
--- a/src/nplinker/scoring/metcalf_scoring.py
+++ b/src/nplinker/scoring/metcalf_scoring.py
@@ -3,6 +3,7 @@
 from enum import Enum
 from typing import TYPE_CHECKING
 from typing import TypeVar
+from typing import overload
 import numpy as np
 import pandas as pd
 from scipy.stats import hypergeom
@@ -19,7 +20,7 @@
 
 
 if TYPE_CHECKING:
-    from ..nplinker import NPLinker
+    from nplinker.nplinker import NPLinker
 
 
 logger = logging.getLogger(__name__)
@@ -117,14 +118,14 @@ def setup(cls, npl: NPLinker):
             cls.presence_spec_strain, cls.presence_gcf_strain, cls.metcalf_weights
         )
         cls.raw_score_spec_gcf = raw_score_spec_gcf.reset_index().melt(id_vars="index")
-        cls.raw_score_spec_gcf.columns = ["spec", "gcf", "score"]
+        cls.raw_score_spec_gcf.columns = ["spec", "gcf", "score"]  # type: ignore
 
         # calculate raw Metcalf scores for spec-gcf links
         raw_score_mf_gcf = cls._calc_raw_score(
             cls.presence_mf_strain, cls.presence_gcf_strain, cls.metcalf_weights
         )
         cls.raw_score_mf_gcf = raw_score_mf_gcf.reset_index().melt(id_vars="index")
-        cls.raw_score_mf_gcf.columns = ["mf", "gcf", "score"]
+        cls.raw_score_mf_gcf.columns = ["mf", "gcf", "score"]  # type: ignore
 
         # calculate mean and std for standardising Metcalf scores
         cls.metcalf_mean, cls.metcalf_std = cls._calc_mean_std(
@@ -133,7 +134,14 @@ def setup(cls, npl: NPLinker):
 
         logger.info("MetcalfScoring.setup completed")
 
-    def get_links(self, *objects: ObjectType, **parameters) -> LinkGraph:
+    @overload
+    def get_links(self, *objects: GCF, **parameters) -> LinkGraph: ...
+    @overload
+    def get_links(self, *objects: Spectrum, **parameters) -> LinkGraph: ...
+    @overload
+    def get_links(self, *objects: MolecularFamily, **parameters) -> LinkGraph: ...
+
+    def get_links(self, *objects, **parameters):
         """Get links for the given objects.
 
         Args:
@@ -348,11 +356,11 @@ def _calc_standardised_score(self, raw_scores: list[pd.DataFrame]) -> list[pd.Da
 
             for row in raw_score_df.itertuples(index=False):
                 met = row.spec if raw_score_df.name == LinkType.SPEC_GCF else row.mf
-                n_gcf_strains = len(row.gcf.strains)
-                n_met_strains = len(met.strains)
+                n_gcf_strains = len(row.gcf.strains)  # type: ignore
+                n_met_strains = len(met.strains)  # type: ignore
 
-                mean = self.metcalf_mean[n_met_strains][n_gcf_strains]
-                sqrt = self.metcalf_std[n_met_strains][n_gcf_strains]
+                mean = self.metcalf_mean[n_met_strains][n_gcf_strains]  # type: ignore
+                sqrt = self.metcalf_std[n_met_strains][n_gcf_strains]  # type: ignore
 
                 z_score = (row.score - mean) / sqrt
 
diff --git a/src/nplinker/scoring/utils.py b/src/nplinker/scoring/utils.py
index 418fc1ec..35b2fbff 100644
--- a/src/nplinker/scoring/utils.py
+++ b/src/nplinker/scoring/utils.py
@@ -1,15 +1,10 @@
 from __future__ import annotations
-from typing import TYPE_CHECKING
 from typing import Sequence
-import numpy as np
 import pandas as pd
-
-
-if TYPE_CHECKING:
-    from nplinker.genomics import GCF
-    from nplinker.metabolomics import MolecularFamily
-    from nplinker.metabolomics import Spectrum
-    from nplinker.strain import StrainCollection
+from nplinker.genomics import GCF
+from nplinker.metabolomics import MolecularFamily
+from nplinker.metabolomics import Spectrum
+from nplinker.strain import StrainCollection
 
 
 def get_presence_gcf_strain(gcfs: Sequence[GCF], strains: StrainCollection) -> pd.DataFrame:
@@ -19,16 +14,16 @@ def get_presence_gcf_strain(gcfs: Sequence[GCF], strains: StrainCollection) -> p
     values are 1 if the gcf occurs in the strain,  0 otherwise.
     """
     df_gcf_strain = pd.DataFrame(
-        np.zeros((len(gcfs), len(strains))),
+        0,
         index=gcfs,
         columns=list(strains),
         dtype=int,
-    )
+    )  # type: ignore
     for gcf in gcfs:
         for strain in strains:
             if gcf.has_strain(strain):
                 df_gcf_strain.loc[gcf, strain] = 1
-    return df_gcf_strain
+    return df_gcf_strain  # type: ignore
 
 
 def get_presence_spec_strain(
@@ -40,16 +35,16 @@ def get_presence_spec_strain(
     the values are 1 if the spectrum occurs in the strain, 0 otherwise.
     """
     df_spec_strain = pd.DataFrame(
-        np.zeros((len(spectra), len(strains))),
+        0,
         index=spectra,
         columns=list(strains),
         dtype=int,
-    )
+    )  # type: ignore
     for spectrum in spectra:
         for strain in strains:
             if spectrum.has_strain(strain):
                 df_spec_strain.loc[spectrum, strain] = 1
-    return df_spec_strain
+    return df_spec_strain  # type: ignore
 
 
 def get_presence_mf_strain(
@@ -61,13 +56,13 @@ def get_presence_mf_strain(
     columns, and the values are 1 if the molecular family occurs in the strain, 0 otherwise.
     """
     df_mf_strain = pd.DataFrame(
-        np.zeros((len(mfs), len(strains))),
+        0,
         index=mfs,
         columns=list(strains),
         dtype=int,
-    )
+    )  # type: ignore
     for mf in mfs:
         for strain in strains:
             if mf.has_strain(strain):
                 df_mf_strain.loc[mf, strain] = 1
-    return df_mf_strain
+    return df_mf_strain  # type: ignore
diff --git a/src/nplinker/strain/utils.py b/src/nplinker/strain/utils.py
index 0443067e..25af274a 100644
--- a/src/nplinker/strain/utils.py
+++ b/src/nplinker/strain/utils.py
@@ -97,7 +97,7 @@ def podp_generate_strain_mappings(
             "MS_filename <-> spectrum_id".
         - `get_mappings_strain_id_spectrum_id`: Get mappings "strain_id <-> spectrum_id".
     """
-    # Get mappings strain_id <-> original_geonme_id <-> resolved_genome_id <-> bgc_id
+    # Get mappings strain_id <-> original_genome_id <-> resolved_genome_id <-> bgc_id
     mappings_strain_id_bgc_id = get_mappings_strain_id_bgc_id(
         extract_mappings_strain_id_original_genome_id(podp_project_json_file),
         extract_mappings_original_genome_id_resolved_genome_id(genome_status_json_file),
diff --git a/src/nplinker/utils.py b/src/nplinker/utils.py
index aedccfa4..c28d14fd 100644
--- a/src/nplinker/utils.py
+++ b/src/nplinker/utils.py
@@ -27,7 +27,7 @@
 from os import PathLike
 from pathlib import Path
 from typing import IO
-from typing import Callable
+from typing import Callable, Sequence
 import httpx
 from rich.progress import BarColumn
 from rich.progress import DownloadColumn
@@ -109,6 +109,15 @@ def is_file_format(file: str | PathLike, format: str = "tsv") -> bool:
 
 
 def calculate_md5(fpath: str | PathLike, chunk_size: int = 1024 * 1024) -> str:
+    """Calculate the MD5 checksum of a file.
+
+    Args:
+        fpath: Path to the file.
+        chunk_size: Chunk size for reading the file. Defaults to 1024*1024.
+
+    Returns:
+        MD5 checksum of the file.
+    """
     if sys.version_info >= (3, 9):
         md5 = hashlib.md5(usedforsecurity=False)
     else:
@@ -120,6 +129,15 @@ def calculate_md5(fpath: str | PathLike, chunk_size: int = 1024 * 1024) -> str:
 
 
 def check_md5(fpath: str | PathLike, md5: str) -> bool:
+    """Verify the MD5 checksum of a file.
+
+    Args:
+        fpath: Path to the file.
+        md5: MD5 checksum to verify.
+
+    Returns:
+        True if the MD5 checksum matches, False otherwise.
+    """
     return md5 == calculate_md5(fpath)
 
 
@@ -238,7 +256,7 @@ def list_files(
 def _extract_tar(
     from_path: str | PathLike,
     to_path: str | PathLike,
-    members: list[tarfile.TarInfo] | None,
+    members: Sequence[tarfile.TarInfo] | None,
     compression: str | None,
 ) -> None:
     with tarfile.open(from_path, f"r:{compression[1:]}" if compression else "r") as tar:
@@ -254,7 +272,7 @@ def _extract_tar(
 def _extract_zip(
     from_path: str | PathLike,
     to_path: str | PathLike,
-    members: list[str | zipfile.ZipInfo] | None,
+    members: Sequence[str | zipfile.ZipInfo] | None,
     compression: str | None,
 ) -> None:
     with zipfile.ZipFile(
@@ -380,7 +398,7 @@ def extract_archive(
             If omitted, the directory of the archive file is used.
         members: Optional selection of members to extract. If not specified,
             all members are extracted.
-            Memers must be a subset of the list returned by
+            Members must be a subset of the list returned by
             - `zipfile.ZipFile.namelist()` or a list of strings for zip file
             - `tarfile.TarFile.getmembers()` for tar file
         remove_finished: If `True`, remove the file after the extraction.
diff --git a/tests/unit/genomics/test_mibig_downloader.py b/tests/unit/genomics/test_mibig_downloader.py
index 2e678ba2..2c00c2b4 100644
--- a/tests/unit/genomics/test_mibig_downloader.py
+++ b/tests/unit/genomics/test_mibig_downloader.py
@@ -2,7 +2,7 @@
 from nplinker.genomics import mibig
 
 
-class TestDownloadAndExtractMibigMetadatas:
+class TestDownloadAndExtractMibigMetadata:
     def test_default(self, tmp_path):
         download_path = tmp_path / "download"
         extract_path = tmp_path / "metadata"
diff --git a/tests/unit/genomics/test_mibig_loader.py b/tests/unit/genomics/test_mibig_loader.py
index 1188971b..8722a6e0 100644
--- a/tests/unit/genomics/test_mibig_loader.py
+++ b/tests/unit/genomics/test_mibig_loader.py
@@ -48,13 +48,13 @@ def test_parse_data_dir(self, data_dir):
         assert isinstance(files["BGC0000001"], str)
         assert os.path.exists(files["BGC0000001"])
 
-    def test_get_metadatas(self, loader):
-        metadatas = loader.get_metadatas()
-        assert isinstance(metadatas, dict)
-        assert len(metadatas) == 2502  # MIBiG v3.1 has 2502 BGCs
-        assert "BGC0000001" in metadatas
-        assert "BGC0000246" not in metadatas
-        assert isinstance(metadatas["BGC0000001"], MibigMetadata)
+    def test_get_metadata(self, loader):
+        metadata = loader.get_metadata()
+        assert isinstance(metadata, dict)
+        assert len(metadata) == 2502  # MIBiG v3.1 has 2502 BGCs
+        assert "BGC0000001" in metadata
+        assert "BGC0000246" not in metadata
+        assert isinstance(metadata["BGC0000001"], MibigMetadata)
 
     def test_get_bgcs(self, loader):
         bgcs = loader.get_bgcs()
diff --git a/tests/unit/schemas/test_genome_bgc_mappings_schema.py b/tests/unit/schemas/test_genome_bgc_mappings_schema.py
index 4617558c..ebc1e6d8 100644
--- a/tests/unit/schemas/test_genome_bgc_mappings_schema.py
+++ b/tests/unit/schemas/test_genome_bgc_mappings_schema.py
@@ -47,7 +47,7 @@
 }
 
 
-# Test schema aginast invalid data
+# Test schema against invalid data
 @pytest.mark.parametrize(
     "data, expected",
     [
@@ -72,7 +72,7 @@ def test_invalid_data(data, expected):
     assert e.value.message == expected
 
 
-# Test schema aginast valid data
+# Test schema against valid data
 def test_valid_data():
     data = {
         "mappings": [
diff --git a/tests/unit/schemas/test_genome_status_schema.py b/tests/unit/schemas/test_genome_status_schema.py
index cdea276f..de0679a5 100644
--- a/tests/unit/schemas/test_genome_status_schema.py
+++ b/tests/unit/schemas/test_genome_status_schema.py
@@ -103,7 +103,7 @@
 }
 
 
-# Test schema aginast invalid data
+# Test schema against invalid data
 @pytest.mark.parametrize(
     "data, expected",
     [
@@ -129,7 +129,7 @@ def test_invalid_data(data, expected):
     assert e.value.message == expected
 
 
-# Test schema aginast valid data
+# Test schema against valid data
 def test_valid_data():
     data = {
         "genome_status": [
diff --git a/tests/unit/schemas/test_strain_mappings_schema.py b/tests/unit/schemas/test_strain_mappings_schema.py
index 5a45f979..46d00d0c 100644
--- a/tests/unit/schemas/test_strain_mappings_schema.py
+++ b/tests/unit/schemas/test_strain_mappings_schema.py
@@ -58,7 +58,7 @@
 }
 
 
-# Test schema aginast invalid data
+# Test schema against invalid data
 @pytest.mark.parametrize(
     "data, expected",
     [
@@ -83,7 +83,7 @@ def test_invalid_data(data, expected):
     assert e.value.message == expected
 
 
-# Test schema aginast valid data
+# Test schema against valid data
 def test_valid_data():
     data = {
         "strain_mappings": [
diff --git a/tests/unit/schemas/test_user_strains_schema.py b/tests/unit/schemas/test_user_strains_schema.py
index 423b84e0..5a4e37ac 100644
--- a/tests/unit/schemas/test_user_strains_schema.py
+++ b/tests/unit/schemas/test_user_strains_schema.py
@@ -4,10 +4,15 @@
 from nplinker.schemas import USER_STRAINS_SCHEMA
 
 
-# Test schema aginast invalid data
+# Test schema against invalid data
 data_no_strain_ids = {"version": "1.0"}
 data_empty_strain_ids = {"strain_ids": [], "version": "1.0"}
-data_invalid_strain_ids = {"strain_ids": [1, ], "version": "1.0"}
+data_invalid_strain_ids = {
+    "strain_ids": [
+        1,
+    ],
+    "version": "1.0",
+}
 data_empty_version = {"strain_ids": ["strain1", "strain2"], "version": ""}
 data_invalid_version = {"strain_ids": ["strain1", "strain2"], "version": "1.0.0"}
 
@@ -29,7 +34,7 @@ def test_invalid_data(data, expected):
     assert e.value.message == expected
 
 
-# Test schema aginast valid data
+# Test schema against valid data
 data = {"strain_ids": ["strain1", "strain2"], "version": "1.0"}
 data_no_version = {"strain_ids": ["strain1", "strain2"]}
 
diff --git a/tests/unit/scoring/conftest.py b/tests/unit/scoring/conftest.py
index ec2f4ad6..009a9252 100644
--- a/tests/unit/scoring/conftest.py
+++ b/tests/unit/scoring/conftest.py
@@ -69,13 +69,10 @@ def npl(gcfs, spectra, mfs, strains, tmp_path) -> NPLinker:
     manually set its attributes to the values we want to test.
 
     The config file `nplinker_demo1.toml` does not affect the tests, just
-    making sure the NPLinker object can be created succesfully.
+    making sure the NPLinker object can be created successfully.
     """
-    os.environ["NPLINKER_ROOT_DIR"] = str(tmp_path)  # Create a tmporary root dir for NPLinker
+    os.environ["NPLINKER_ROOT_DIR"] = str(tmp_path)  # Create a temporary root dir for NPLinker
     npl = NPLinker(CONFIG_FILE_LOCAL_MODE)
-    npl._gcfs = gcfs
-    npl._spectra = spectra
-    npl._mfs = mfs
     npl._strains = strains
     npl._gcf_dict = {gcf.id: gcf for gcf in gcfs}
     npl._mf_dict = {mf.id: mf for mf in mfs}
diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py
index f681cc92..8ced67c3 100644
--- a/tests/unit/test_config.py
+++ b/tests/unit/test_config.py
@@ -5,7 +5,7 @@
 
 def test_config(tmp_path):
     """Test loading the default config file."""
-    os.environ["NPLINKER_ROOT_DIR"] = str(tmp_path)  # Create a tmporary root dir for NPLinker
+    os.environ["NPLINKER_ROOT_DIR"] = str(tmp_path)  # Create a temporary root dir for NPLinker
     config = load_config(CONFIG_FILE_LOCAL_MODE)
 
     assert config.mode == "local"
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index 0242ebe8..29eaf056 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -17,7 +17,7 @@ def test_find_delimiter(filename, expected):
 
 
 BGC_GBK_URL = "https://mibig.secondarymetabolites.org/repository/BGC0000001/BGC0000001.gbk"
-MIBIG_METADATAS_URL = "https://dl.secondarymetabolites.org/mibig/mibig_json_3.1.tar.gz"
+MIBIG_METADATA_URL = "https://dl.secondarymetabolites.org/mibig/mibig_json_3.1.tar.gz"
 ROOT = Path(__file__).parent
 
 
@@ -47,7 +47,7 @@ class TestExtractArchive:
     @pytest.fixture
     def archive(self):
         temppath = mkdtemp()
-        utils.download_url(MIBIG_METADATAS_URL, temppath)
+        utils.download_url(MIBIG_METADATA_URL, temppath)
         archive = Path(temppath) / "mibig_json_3.1.tar.gz"
         yield archive
 
@@ -82,7 +82,7 @@ def temppath2(self):
         rmtree(temppath)
 
     def test_defaults(self, temppath1):
-        utils.download_and_extract_archive(url=MIBIG_METADATAS_URL, download_root=temppath1)
+        utils.download_and_extract_archive(url=MIBIG_METADATA_URL, download_root=temppath1)
 
         fdownload = Path(temppath1) / "mibig_json_3.1.tar.gz"
         fextract = Path(temppath1) / "mibig_json_3.1"
@@ -92,7 +92,7 @@ def test_defaults(self, temppath1):
 
     def test_optional_args(self, temppath1, temppath2):
         utils.download_and_extract_archive(
-            url=MIBIG_METADATAS_URL,
+            url=MIBIG_METADATA_URL,
             download_root=temppath1,
             extract_root=temppath2,
             filename="example.tar.gz",
@@ -108,7 +108,7 @@ def test_optional_args(self, temppath1, temppath2):
 
     def test_arg_remove_finished(self, temppath1):
         utils.download_and_extract_archive(
-            url=MIBIG_METADATAS_URL, download_root=temppath1, remove_finished=True
+            url=MIBIG_METADATA_URL, download_root=temppath1, remove_finished=True
         )
 
         fdownload = Path(temppath1) / "mibig_json_3.1.tar.gz"