diff --git a/CITATION.cff b/CITATION.cff index 754eefddd6..ce93dd58e0 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -9,6 +9,9 @@ authors: given-names: Nick orcid: "https://orcid.org/0000-0003-3038-1855" alias: zerothi + - family-names: Febrer + given-names: Pol + alias: pfebrer title: sisl identifiers: - description: "Collection DOI for any sisl version." diff --git a/CMakeLists.txt b/CMakeLists.txt index d5b15c8fed..bfab99a53b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,7 +26,19 @@ find_package(Python COMPONENTS REQUIRED) # Retrive the cython executable +message(STATUS "Locating Cython executable") find_program(CYTHON_EXECUTABLE "cython" REQUIRED) +execute_process(COMMAND + "${CYTHON_EXECUTABLE}" --version + OUTPUT_QUIET + OUTPUT_VARIABLE CYTHON_VERSION + RESULT_VARIABLE CYTHON_VERSION_ERR + OUTPUT_STRIP_TRAILING_WHITESPACE) +# Print it out +message(STATUS "Cython version information [${CYTHON_EXECUTABLE} --version]: ${CYTHON_VERSION}") +if(CYTHON_VERSION_ERR) + message(SEND_ERROR "ERROR: Cython version detection failed, this might be problematic") +endif() # Define global definitions @@ -287,7 +299,7 @@ function(add_cython_library) if(NOT DEFINED _c_SOURCE) list(GET _c_UNPARSED_ARGUMENTS 0 _c_SOURCE) endif() - + # Parse simple flags if(NOT DEFINED _c_CYTHON_EXECUTABLE) set(_c_CYTHON_EXECUTABLE "${CYTHON_EXECUTABLE}") @@ -617,7 +629,7 @@ function(add_f2py_library) VERBATIM COMMENT "Generating module file from signature ${_f_SIGNATURE}" ) - + python_add_library(${_f_LIBRARY} WITH_SOABI MODULE "${_module_file}" ${_f_SOURCES} ${_wrappers} ) diff --git a/benchmarks/comparisons/neighbors.py b/benchmarks/comparisons/neighbors.py new file mode 100644 index 0000000000..70294cd686 --- /dev/null +++ b/benchmarks/comparisons/neighbors.py @@ -0,0 +1,90 @@ +import timeit +from collections import defaultdict + +from ase.neighborlist import neighbor_list + +from sisl.geom import NeighborFinder, fcc, graphene_nanoribbon + + +def quadratic(geom): + neighs = [] + for at in geom: + neighs.append(geom.close(at, R=1.5)) + + +what = "fcc" +times = defaultdict(list) +na = [] + +# Iterate over multiple tiles +for tiles in range(1, 11): + print(tiles) + + if what == "fcc": + geom = fcc(1.5, "C").tile(3 * tiles, 1).tile(3 * tiles, 2).tile(3 * tiles, 0) + elif what == "ribbon": + geom = graphene_nanoribbon(9).tile(tiles, 0) + + na.append(geom.na) + + # Compute with ASE + ase_time = timeit.timeit( + lambda: neighbor_list("ij", geom.to.ase(), 1.5, self_interaction=False), + number=1, + ) + times["ASE"].append(ase_time) + + for bs in [4, 8, 12]: + print(" bs = ", bs) + + # Compute with this implementation (different variants) + my_time = timeit.timeit( + lambda: NeighborFinder(geom, R=1.5, bin_size=bs).find_neighbors( + None, as_pairs=True + ), + number=1, + ) + times[f"(PAIRS) [{bs}]"].append(my_time) + + my_time = timeit.timeit( + lambda: NeighborFinder(geom, R=1.5, bin_size=bs).find_neighbors( + None, as_pairs=False + ), + number=1, + ) + times[f"(NEIGHS) [{bs}]"].append(my_time) + + my_time = timeit.timeit( + lambda: NeighborFinder(geom, R=1.5, bin_size=bs).find_neighbors( + None, as_pairs=False + ), + number=1, + ) + times[f"(NEIGHS) [{bs}]"].append(my_time) + + my_time = timeit.timeit( + lambda: NeighborFinder(geom, R=1.5, bin_size=bs).find_all_unique_pairs(), + number=1, + ) + times[f"(UNIQUE PAIRS) [{bs}]"].append(my_time) + + # Compute with quadratic search + # quadratic_time = timeit.timeit(lambda: quadratic(geom), number=1) + # times["QUADRATIC"].append(quadratic_time) + +# Plotting +import plotly.graph_objs as go + +fig = go.Figure() +for key, time in times.items(): + fig.add_scatter(x=na, y=time, name=key) + +fig.update_layout( + xaxis_title="Number of atoms", + yaxis_title="Time (s)", + title=f"Finding neighbours on {what}", + yaxis_showgrid=True, + xaxis_showgrid=True, +) +fig.write_image(f"neighbors_{what}.png") +fig.show() diff --git a/ci/doc.yml b/ci/doc.yml index 3cab8b1e59..33e2c30aa8 100644 --- a/ci/doc.yml +++ b/ci/doc.yml @@ -12,7 +12,7 @@ dependencies: - pyproject-metadata - pyparsing>=1.5.7 - numpy>=1.13 - - cython>=0.29.28 + - cython>=3.0.0 - scipy>=1.5.0 - netcdf4 - xarray>=0.10.0 diff --git a/docs/api/basic.rst b/docs/api/basic.rst index 94eae50f45..f38d2a9754 100644 --- a/docs/api/basic.rst +++ b/docs/api/basic.rst @@ -66,6 +66,6 @@ In particular `oplist` is useful when calculating averages in Brillouin zones (s .. autosummary:: :toctree: generated/ + NeighborFinder oplist ~sisl.utils.PropertyDict - diff --git a/pyproject.toml b/pyproject.toml index 44599e56b3..374fcb80e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ requires = [ "setuptools_scm[toml]>=6.2", "scikit-build-core[pyproject]", - "Cython>=0.29.28", + "Cython>=3", # see https://github.com/scipy/oldest-supported-numpy/ # should fix #310 "oldest-supported-numpy; sys_platform != 'win32'", @@ -49,7 +49,8 @@ dependencies = [ ] authors = [ - {name = "Nick Papior", email = "nickpapior@gmail.com"} + {name = "Nick Papior", email = "nickpapior@gmail.com"}, + {name = "Pol Febrer"} ] maintainers = [{name="sisl developers"}] @@ -136,9 +137,9 @@ analysis = [ ] viz = [ + "netCDF4", "dill >= 0.3.2", "pathos", - "netCDF4", "scikit-image", "plotly", @@ -147,34 +148,34 @@ viz = [ ] viz-plotly = [ + "netCDF4", "dill >= 0.3.2", "pathos", - "netCDF4", "scikit-image", "plotly", ] viz-matplotlib = [ + "netCDF4", "dill >= 0.3.2", "pathos", - "netCDF4", "scikit-image", "matplotlib", ] viz-blender = [ + "netCDF4", "dill >= 0.3.2", "pathos", - "netCDF4", "scikit-image", ] viz-ase = [ + "netCDF4", "dill >= 0.3.2", "pathos", - "netCDF4", "scikit-image", "ase", @@ -186,10 +187,10 @@ test = [ "pytest-env", "pytest-faulthandler", "coveralls", + "netCDF4", "tqdm", "dill >= 0.3.2", "pathos", - "netCDF4", "scikit-image", "matplotlib", "plotly", diff --git a/src/sisl/CMakeLists.txt b/src/sisl/CMakeLists.txt index 76089d1579..9a77620e57 100644 --- a/src/sisl/CMakeLists.txt +++ b/src/sisl/CMakeLists.txt @@ -12,3 +12,4 @@ endforeach() add_subdirectory("_core") add_subdirectory("io") add_subdirectory("physics") +add_subdirectory("geom") diff --git a/src/sisl/conftest.py b/src/sisl/conftest.py index 7cfd21df99..79a59563b9 100644 --- a/src/sisl/conftest.py +++ b/src/sisl/conftest.py @@ -227,6 +227,7 @@ def pytest_configure(config): "hamiltonian", "geometry", "geom", + "neighbor", "shape", "state", "electron", diff --git a/src/sisl/geom/CMakeLists.txt b/src/sisl/geom/CMakeLists.txt new file mode 100644 index 0000000000..25c1ecc0a4 --- /dev/null +++ b/src/sisl/geom/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory("_neighbors") diff --git a/src/sisl/geom/__init__.py b/src/sisl/geom/__init__.py index 2dd2becd94..33e178f5f0 100644 --- a/src/sisl/geom/__init__.py +++ b/src/sisl/geom/__init__.py @@ -43,6 +43,7 @@ """ from ._composite import * +from ._neighbors import * from .basic import * from .bilayer import * from .category import * diff --git a/src/sisl/geom/_neighbors/CMakeLists.txt b/src/sisl/geom/_neighbors/CMakeLists.txt new file mode 100644 index 0000000000..e48ed38047 --- /dev/null +++ b/src/sisl/geom/_neighbors/CMakeLists.txt @@ -0,0 +1,11 @@ +# In this directory we have a set of libraries +# We will need to link to the Numpy includes +foreach(source _operations) + add_cython_library( + SOURCE ${source}.py + LIBRARY ${source} + OUTPUT ${source}_C + ) + install(TARGETS ${source} LIBRARY + DESTINATION ${SKBUILD_PROJECT_NAME}/geom/_neighbors) +endforeach() diff --git a/src/sisl/geom/_neighbors/__init__.py b/src/sisl/geom/_neighbors/__init__.py new file mode 100644 index 0000000000..e75df2478b --- /dev/null +++ b/src/sisl/geom/_neighbors/__init__.py @@ -0,0 +1 @@ +from ._finder import NeighborFinder diff --git a/src/sisl/geom/_neighbors/_finder.py b/src/sisl/geom/_neighbors/_finder.py new file mode 100644 index 0000000000..15791687b1 --- /dev/null +++ b/src/sisl/geom/_neighbors/_finder.py @@ -0,0 +1,684 @@ +from typing import Optional, Sequence, Tuple, Union + +import numpy as np + +from sisl import Geometry +from sisl.typing import AtomsArgument +from sisl.utils import size_to_elements + +from . import _operations + + +class NeighborFinder: + """Efficient linear scaling finding of neighbors. + + Once this class is instantiated, a table is build. Then, + the neighbor finder can be queried as many times as wished + as long as the geometry doesn't change. + + Note that the radius (`R`) is used to build the table. + Therefore, if one wants to look for neighbors using a different + R, one needs to create another finder or call `setup`. + + Parameters + ---------- + geometry: sisl.Geometry + the geometry to find neighbors in + R: float or array-like of shape (geometry.na), optional + The radius to consider two atoms neighbors. + If it is a single float, the same radius is used for all atoms. + If it is an array, it should contain the radius for each atom. + + If not provided, an array is constructed, where the radius for + each atom is its maxR. + overlap: bool, optional + If `True`, two atoms are considered neighbors if their spheres + overlap. + If `False`, two atoms are considered neighbors if the second atom + is within the sphere of the first atom. Note that this implies that + atom ``i`` might be atom ``j``'s neighbor while the opposite is not true. + + If not provided, it will be `True` if `R` is an array and `False` if + it is a single float. + bin_size : float or tuple of float, optional + the factor for the radius to determine how large the bins are, + optionally along each lattice vector. + It can minimally be 2, meaning that the maximum radius to consider + is twice the radius considered. For larger values, more atoms will be in + each bin (and thus fewer bins). + Hence, this value can be used to fine-tune the memory requirement by + decreasing number of bins, at the cost of a bit more run-time searching + bins. + """ + + #: Memory control of the finder + memory: Tuple[str, float] = ("200MB", 1.5) + + geometry: Geometry + # Geometry actually used for binning. Can be the provided geometry + # or a tiled geometry if the search radius is too big (compared to the lattice size). + _bins_geometry: Geometry + + nbins: Tuple[int, int, int] + total_nbins: int + + R: np.ndarray + _aux_R: np.ndarray + _overlap: bool + + # Data structure + _list: np.ndarray # (natoms, ) + _heads: np.ndarray # (total_nbins, ) + _counts: np.ndarray # (total_nbins, ) + + def __init__( + self, + geometry: Geometry, + R: Optional[Union[float, np.ndarray]] = None, + overlap: bool = False, + bin_size: Union[float, Tuple[float, float, float]] = 2, + ): + self.setup(geometry, R=R, overlap=overlap, bin_size=bin_size) + + def setup( + self, + geometry: Optional[Geometry] = None, + R: Optional[Union[float, np.ndarray]] = None, + overlap: bool = None, + bin_size: Union[float, Tuple[float, float, float]] = 2, + ): + """Prepares everything for neighbor finding. + + Parameters + ---------- + geometry: sisl.Geometry, optional + the geometry to find neighbors in. + + If not provided, the current geometry is kept. + R: float or array-like of shape (geometry.na), optional + The radius to consider two atoms neighbors. + If it is a single float, the same radius is used for all atoms. + If it is an array, it should contain the radius for each atom. + + If not provided, an array is constructed, where the radius for + each atom is its maxR. + + Note that negative R values are allowed + overlap: bool + If `True`, two atoms are considered neighbors if their spheres + overlap. + If `False`, two atoms are considered neighbors if the second atom + is within the sphere of the first atom. Note that this implies that + atom ``i`` might be atom ``j``'s neighbor while the opposite is not true. + bin_size : + the factor for the radius to determine how large the bins are, + optionally along each lattice vector. + It can minimally be 2, meaning that the maximum radius to consider + is twice the radius considered. For larger values, more atoms will be in + each bin (and thus fewer bins). + Hence, this value can be used to fine-tune the memory requirement by + decreasing number of bins, at the cost of a bit more run-time searching + bins. + """ + # Set the geometry. Copy it because we may need to modify the supercell size. + if geometry is not None: + self.geometry = geometry.copy() + + # If R was not provided, build an array of Rs + if R is None: + R = self.geometry.atoms.maxR(all=True) + else: + R = np.asarray(R) + + # Set the radius + self.R = R + self._aux_R = R + + # If sphere overlap was not provided, set it to False if R is a single float + # and True otherwise. + self._overlap = overlap + + # Determine the bin_size as the maximum DIAMETER to ensure that we ALWAYS + # only need to look one bin away for neighbors. + max_R = np.max(self.R) + if overlap: + # In the case when we want to check for sphere overlap, the size should + # be twice as big. + max_R *= 2 + + if max_R <= 0: + raise ValueError( + "All R values are 0 or less. Please provide some positive values" + ) + + bin_size = np.asarray(bin_size) + if np.any(bin_size < 2): + raise ValueError( + "The bin_size must be larger than 2 to only search in the " + "neighboring bins. Please increase to a value >=2" + ) + + bin_size = max_R * bin_size + + # We add a small amount to bin_size to avoid ambiguities when + # a position is exactly at the center of a bin. + bin_size += 0.001 + + lattice_sizes = self.geometry.length + + self._R_too_big = np.any(bin_size > lattice_sizes) + if self._R_too_big: + # This means that nsc must be at least 5. + + # We round the amount of cells needed in each direction + # to the closest next odd number. + nsc = np.ceil(bin_size / lattice_sizes) // 2 * 2 + 1 + # And then set it as the number of supercells. + self.geometry.set_nsc(nsc.astype(int)) + if self._aux_R.ndim == 1: + self._aux_R = np.tile(self._aux_R, self.geometry.n_s) + + all_xyz = [] + for isc in self.geometry.sc_off: + ats_xyz = self.geometry.axyz(isc=isc) + all_xyz.append(ats_xyz) + + self._bins_geometry = Geometry( + np.concatenate(all_xyz), atoms=self.geometry.atoms + ) + + # Recompute lattice sizes + lattice_sizes = self._bins_geometry.length + + else: + self._bins_geometry = self.geometry + + # Get the number of bins along each cell direction. + nbins_float = lattice_sizes / bin_size + self.nbins = tuple(np.floor(nbins_float).astype(int)) + self.total_nbins = np.prod(self.nbins) + + # Get the scalar bin indices of all atoms + scalar_bin_indices = self._get_bin_indices(self._bins_geometry.fxyz) + + # Build the tables that will allow us to look for neighbors in an efficient + # and linear scaling manner. + self._build_table(scalar_bin_indices) + + def _build_table(self, bin_indices): + """Builds the tables that will allow efficient linear scaling neighbor search. + + Parameters + ---------- + bin_indices: array-like of shape (self.total_nbins, ) + Array containing the scalar bin index for each atom. + """ + # Call the fortran routine that builds the table + self._list, self._heads, self._counts = _operations.build_table( + self.total_nbins, bin_indices + ) + + def assert_consistency(self): + """Asserts that the data structure (self._list, self._heads, self._counts) is consistent. + + It also stores that the shape is consistent with the stored geometry and the store total_nbins. + """ + # Check shapes + assert self._list.shape == (self._bins_geometry.na,) + assert self._counts.shape == self._heads.shape == (self.total_nbins,) + + # Check values + for i_bin, bin_count in enumerate(self._counts): + count = 0 + head = self._heads[i_bin] + while head != -1: + count += 1 + head = self._list[head] + + assert ( + count == bin_count + ), f"Bin {i_bin} has {bin_count} atoms but we found {count} atoms" + + def _get_search_atom_counts(self, scalar_bin_indices): + """Computes the number of atoms that will be explored for each search + + Parameters + ----------- + scalar_bin_indices: np.ndarray of shape ([n_searches], 8) + Array containing the bin indices for each search. + Bin indices should be within the unit cell! + + Returns + ----------- + np.ndarray of shape (n_searches, ): + For each search, the number of atoms that will be involved. + """ + return self._counts[scalar_bin_indices.ravel()].reshape(-1, 8).sum(axis=1) + + def _get_bin_indices(self, fxyz, cartesian=False, floor=True): + """Gets the bin indices for a given fractional coordinate. + + Parameters + ----------- + fxyz: np.ndarray of shape (N, 3) + the fractional coordinates for which we want to get the bin indices. + cartesian: bool, optional + whether the indices should be returned as cartesian. + If `False`, scalar indices are returned. + floor: bool, optional + whether to floor the indices or not. + + If asking for scalar indices (i.e. `cartesian=False`), the indices will + ALWAYS be floored regardless of this argument. + + Returns + -------- + np.ndarray: + The bin indices. If `cartesian=True`, the shape of the array is (N, 3), + otherwise it is (N,). + """ + bin_indices = ((fxyz) % 1) * self.nbins + # Atoms that are exactly at the limit of the cell might have fxyz = 1 + # which would result in a bin index outside of range. + # We just bring it back to the unit cell. + bin_indices = bin_indices % self.nbins + + if floor or not cartesian: + bin_indices = np.floor(bin_indices).astype(int) + + if not cartesian: + bin_indices = self._cartesian_to_scalar_index(bin_indices) + + return bin_indices + + def _get_search_indices(self, fxyz, cartesian=False): + """Gets the bin indices to explore for a given fractional coordinate. + + Given a fractional coordinate, we will need to look for neighbors + in its own bin, and one bin away in each direction. That is, $2^3=8$ bins. + + Parameters + ----------- + fxyz: np.ndarray of shape (N, 3) + the fractional coordinates for which we want to get the search indices. + cartesian: bool, optional + whether the indices should be returned as cartesian. + If `False`, scalar indices are returned. + + Returns + -------- + np.ndarray: + The bin indices where we need to perform the search for each + fractional coordinate. These indices are all inside the unit cell. + If `cartesian=True`, cartesian indices are returned and the array + has shape (N, 8, 3). + If `cartesian=False`, scalar indices are returned and the array + has shape (N, 8). + np.ndarray of shape (N, 8, 3): + The supercell indices of each bin index in the search. + """ + # Get the bin indices for the positions that are requested. + # Note that we don't floor the indices so that we can know to which + # border of the bin are we closer in each direction. + bin_indices = self._get_bin_indices(fxyz, floor=False, cartesian=True) + bin_indices = np.atleast_2d(bin_indices) + + # Determine which is the neighboring cell that we need to look for + # along each direction. + signs = np.ones(bin_indices.shape, dtype=int) + signs[(bin_indices % 1) < 0.5] = -1 + + # Build and arrays with all the indices that we need to look for. Since + # we have to move one bin away in each direction, we have to look for + # neighbors along a total of 8 bins (2**3) + search_indices = np.tile(bin_indices.astype(int), 8).reshape(-1, 8, 3) + + search_indices[:, 1::2, 0] += signs[:, 0].reshape(-1, 1) + search_indices[:, [2, 3, 6, 7], 1] += signs[:, 1].reshape(-1, 1) + search_indices[:, 4:, 2] += signs[:, 2].reshape(-1, 1) + + # Convert search indices to unit cell indices, but keep the supercell indices. + isc, search_indices = np.divmod(search_indices, self.nbins) + + if not cartesian: + search_indices = self._cartesian_to_scalar_index(search_indices) + + return search_indices, isc + + def _cartesian_to_scalar_index(self, index): + """Converts cartesian indices to scalar indices""" + if not np.issubdtype(index.dtype, int): + raise ValueError( + "Decimal scalar indices do not make sense, please floor your cartesian indices." + ) + return index.dot([1, self.nbins[0], self.nbins[0] * self.nbins[1]]) + + def _scalar_to_cartesian_index(self, index): + """Converts cartesian indices to scalar indices""" + if np.min(index) < 0 or np.max(index) > self.total_nbins: + raise ValueError( + "Some scalar indices are not within the unit cell. We cannot uniquely convert to cartesian" + ) + + third, index = np.divmod(index, self.nbins[0] * self.nbins[1]) + second, first = np.divmod(index, self.nbins[0]) + return np.array([first, second, third]).T + + def _correct_pairs_R_too_big( + self, + neighbor_pairs: np.ndarray, # (n_pairs, 5) + split_ind: Union[int, np.ndarray], # (n_queried_atoms, ) + pbc: np.ndarray, # (3, ) + ): + """Correction to atom and supercell indices when the binning has been done on a tiled geometry""" + is_sc_neigh = neighbor_pairs[:, 1] >= self.geometry.na + + invalid = None + if not np.any(pbc): + invalid = is_sc_neigh + else: + pbc_neighs = neighbor_pairs.copy() + + sc_neigh, uc_neigh = np.divmod( + neighbor_pairs[:, 1][is_sc_neigh], self.geometry.na + ) + isc_neigh = self.geometry.sc_off[sc_neigh] + + pbc_neighs[is_sc_neigh, 1] = uc_neigh + pbc_neighs[is_sc_neigh, 2:] = isc_neigh + + if not np.all(pbc): + invalid = pbc_neighs[:, 2:][:, ~pbc].any(axis=1) + + neighbor_pairs = pbc_neighs + + if invalid is not None: + neighbor_pairs = neighbor_pairs[~invalid] + if isinstance(split_ind, int): + split_ind = split_ind - invalid.sum() + else: + split_ind = split_ind - np.cumsum(invalid)[split_ind - 1] + + return neighbor_pairs, split_ind + + def find_neighbors( + self, + atoms: AtomsArgument = None, + as_pairs: bool = False, + self_interaction: bool = False, + pbc: Union[bool, Tuple[bool, bool, bool]] = (True, True, True), + ): + """Find neighbors as specified in the finder. + + This routine only executes the action of finding neighbors, + the parameters of the search (`geometry`, `R`, `overlap`...) + are defined when the finder is initialized or by calling `setup`. + + Parameters + ---------- + atoms: optional + the atoms for which neighbors are desired. Anything that can + be sanitized by `sisl.Geometry` is a valid value. + + If not provided, neighbors for all atoms are searched. + as_pairs: bool, optional + If `True` pairs of atoms are returned. Otherwise a list containing + the neighbors for each atom is returned. + self_interaction: bool, optional + whether to consider an atom a neighbor of itself. + pbc: bool or array-like of shape (3, ) + whether periodic conditions should be considered. + If a single bool is passed, all directions use that value. + + Returns + ---------- + np.ndarray or list: + If `as_pairs=True`: + A `np.ndarray` of shape (n_pairs, 5) is returned. + Each pair `ij` means that `j` is a neighbor of `i`. + The three extra columns are the supercell indices of atom `j`. + If `as_pairs=False`: + A list containing a numpy array of shape (n_neighs, 4) for each atom. + """ + # Sanitize atoms + atoms = self.geometry._sanitize_atoms(atoms) + + # Cast R and pbc into arrays of appropiate shape and type. + thresholds = np.full(self._bins_geometry.na, self._aux_R, dtype=np.float64) + pbc = np.full(3, pbc, dtype=bool) + + # Get search indices + search_indices, isc = self._get_search_indices( + self.geometry.fxyz[atoms], cartesian=False + ) + + # Get atom counts + at_counts = self._get_search_atom_counts(search_indices) + + max_pairs = at_counts.sum() + if not self_interaction: + max_pairs -= search_indices.shape[0] + init_pairs = min( + max_pairs, + # 8 bytes per element (int64) + # While this might be wrong on Win, it shouldn't matter + size_to_elements(self.memory[0], 8), + ) + + # Find the neighbor pairs + neighbor_pairs, split_ind = _operations.get_pairs( + atoms, + search_indices, + isc, + self._heads, + self._list, + self_interaction, + self._bins_geometry.xyz, + self._bins_geometry.cell, + pbc, + thresholds, + self._overlap, + init_pairs, + self.memory[1], + ) + + # Correct neighbor indices for the case where R was too big and + # we needed to create an auxiliary supercell. + if self._R_too_big: + neighbor_pairs, split_ind = self._correct_pairs_R_too_big( + neighbor_pairs, split_ind, pbc + ) + + if as_pairs: + # Just return the neighbor pairs + return neighbor_pairs[: split_ind[-1]] + + # Split to get the neighbors of each atom + return np.split(neighbor_pairs[:, 1:], split_ind, axis=0)[:-1] + + def find_all_unique_pairs( + self, + self_interaction: bool = False, + pbc: Union[bool, Tuple[bool, bool, bool]] = (True, True, True), + ): + """Find all unique neighbor pairs within the geometry. + + A pair of atoms is considered unique if atoms have the same index + and correspond to the same unit cell. As an example, the connection + atom 0 (unit cell) to atom 5 (1, 0, 0) is not the same as the + connection atom 5 (unit cell) to atom 0 (-1, 0, 0). + + Note that this routine can not be called if `overlap` is + set to `False` and the radius is not a single float. In that case, + there is no way of defining "uniqueness" since pair `ij` can have + a different threshold radius than pair `ji`. + + Parameters + ---------- + atoms: optional + the atoms for which neighbors are desired. Anything that can + be sanitized by `sisl.Geometry` is a valid value. + + If not provided, neighbors for all atoms are searched. + as_pairs: bool, optional + If `True` pairs of atoms are returned. Otherwise a list containing + the neighbors for each atom is returned. + self_interaction: bool, optional + whether to consider an atom a neighbor of itself. + pbc: bool or array-like of shape (3, ) + whether periodic conditions should be considered. + If a single bool is passed, all directions use that value. + + Returns + ---------- + np.ndarray of shape (n_pairs, 5): + Each pair `ij` means that `j` is a neighbor of `i`. + The three extra columns are the supercell indices of atom `j`. + """ + if not self._overlap and self._aux_R.ndim == 1: + raise ValueError( + "Unique atom pairs do not make sense if we are not looking for sphere overlaps." + " Please setup the finder again setting `overlap` to `True` if you wish so." + ) + + # In the case where we tiled the geometry to do the binning, it is much better to + # just find all neighbors and then drop duplicate connections. Otherwise it is a bit of a mess. + if self._R_too_big: + # Find all neighbors + all_neighbors = self.find_neighbors( + as_pairs=True, self_interaction=self_interaction, pbc=pbc + ) + + # Find out which of the pairs are uc connections + is_uc_neigh = ~np.any(all_neighbors[:, 2:], axis=1) + + # Create an array with unit cell connections where duplicates are removed + unique_uc = np.unique(np.sort(all_neighbors[is_uc_neigh][:, :2]), axis=0) + uc_neighbors = np.zeros((len(unique_uc), 5), dtype=int) + uc_neighbors[:, :2] = unique_uc + + # Concatenate the uc connections with the rest of the connections. + return np.concatenate((uc_neighbors, all_neighbors[~is_uc_neigh])) + + # Cast R and pbc into arrays of appropiate shape and type. + thresholds = np.full(self.geometry.na, self.R, dtype=np.float64) + pbc = np.full(3, pbc, dtype=bool) + + # Get search indices + search_indices, isc = self._get_search_indices( + self.geometry.fxyz, cartesian=False + ) + + # Get atom counts + at_counts = self._get_search_atom_counts(search_indices) + + max_pairs = at_counts.sum() + if not self_interaction: + max_pairs -= search_indices.shape[0] + init_pairs = min( + max_pairs, + # 8 bytes per element (int64) + # While this might be wrong on Win, it shouldn't matter + size_to_elements(self.memory[0], 8), + ) + + # Find all unique neighbor pairs + neighbor_pairs = _operations.get_all_unique_pairs( + search_indices, + isc, + self._heads, + self._list, + self_interaction, + self.geometry.xyz, + self.geometry.cell, + pbc, + thresholds, + self._overlap, + init_pairs, + self.memory[1], + ) + + return neighbor_pairs + + def find_close( + self, + xyz: Sequence, + as_pairs: bool = False, + pbc: Union[bool, Tuple[bool, bool, bool]] = (True, True, True), + ): + """Find all atoms that are close to some coordinates in space. + + This routine only executes the action of finding neighbors, + the parameters of the search (`geometry`, `R`, `overlap`...) + are defined when the finder is initialized or by calling `setup`. + + Parameters + ---------- + xyz: array-like of shape ([npoints], 3) + the coordinates for which neighbors are desired. + as_pairs: bool, optional + If `True` pairs are returned, where the first item is the index + of the point and the second one is the atom index. + Otherwise a list containing the neighbors for each point is returned. + pbc: bool or array-like of shape (3, ) + whether periodic conditions should be considered. + If a single bool is passed, all directions use that value. + + Returns + ---------- + np.ndarray or list: + If `as_pairs=True`: + A `np.ndarray` of shape (n_pairs, 5) is returned. + Each pair `ij` means that `j` is a neighbor of `i`. + The three extra columns are the supercell indices of atom `j`. + If `as_pairs=False`: + A list containing a numpy array of shape (n_neighs, 4) for each atom. + """ + # Cast R and pbc into arrays of appropiate shape and type. + thresholds = np.full(self._bins_geometry.na, self._aux_R, dtype=np.float64) + pbc = np.full(3, pbc, dtype=bool) + + xyz = np.atleast_2d(xyz) + # Get search indices + search_indices, isc = self._get_search_indices( + xyz.dot(self._bins_geometry.icell.T) % 1, cartesian=False + ) + + # Get atom counts + at_counts = self._get_search_atom_counts(search_indices) + + max_pairs = at_counts.sum() + init_pairs = min( + max_pairs, + # 8 bytes per element (int64) + # While this might be wrong on Win, it shouldn't matter + size_to_elements(self.memory[0], 8), + ) + + # Find the neighbor pairs + neighbor_pairs, split_ind = _operations.get_close( + xyz, + search_indices, + isc, + self._heads, + self._list, + self._bins_geometry.xyz, + self._bins_geometry.cell, + pbc, + thresholds, + init_pairs, + self.memory[1], + ) + + # Correct neighbor indices for the case where R was too big and + # we needed to create an auxiliary supercell. + if self._R_too_big: + neighbor_pairs, split_ind = self._correct_pairs_R_too_big( + neighbor_pairs, split_ind, pbc + ) + + if as_pairs: + # Just return the neighbor pairs + return neighbor_pairs[: split_ind[-1]] + + return np.split(neighbor_pairs[:, 1:], split_ind, axis=0)[:-1] diff --git a/src/sisl/geom/_neighbors/_operations.py b/src/sisl/geom/_neighbors/_operations.py new file mode 100644 index 0000000000..a166537014 --- /dev/null +++ b/src/sisl/geom/_neighbors/_operations.py @@ -0,0 +1,628 @@ +"""File meant to be compiled with Cython so that operations are much faster.""" + +from __future__ import annotations + +import cython +import cython.cimports.numpy as cnp +import numpy as np +from cython.cimports.libc.math import sqrt + + +@cython.boundscheck(False) +@cython.wraparound(False) +def build_table(nbins: cnp.int64_t, bin_indices: cnp.int64_t[:]): + """Builds the table where the location of all atoms is encoded + + Additionally, it also builds an array with the number of atoms at + each bin. + + Parameters + ---------- + nbins: int + Total number of bins + bin_indices: array of int + An array containing the bin index of each atom. + + Returns + ---------- + list: + contains the list of atoms, modified to encode all bin + locations. Each item in the list contains the index of + the next atom that we can find in the same bin. + If an item is -1, it means that there are no more atoms + in the same bin. + heads: + For each bin, the index of `list` where we can find the + first atom that is contained in it. + counts: + For each bin, the number of atoms that it contains. + """ + n_atoms = bin_indices.shape[0] + + list_array_obj = np.zeros(n_atoms, dtype=np.int64) + list_array: cnp.int64_t[:] = list_array_obj + + counts_obj = np.zeros(nbins, dtype=np.int64) + counts: cnp.int64_t[:] = counts_obj + + heads_obj = np.full(nbins, -1, dtype=np.int64) + heads: cnp.int64_t[:] = heads_obj + + # Loop through all atoms + for at in range(n_atoms): + # Get the index of the bin where this atom is located. + bin_index = bin_indices[at] + + # Replace the head of this bin by the current atom index. + # Before replacing, store the previous head in this atoms' + # position in the list. + list_array[at] = heads[bin_index] + heads[bin_index] = at + + # Update the count of this bin (increment it by 1). + counts[bin_index] += 1 + + # Return the memory views as numpy arrays + return list_array_obj, heads_obj, counts_obj + + +@cython.boundscheck(False) +@cython.wraparound(False) +def get_pairs( + at_indices: cnp.int64_t[:], + indices: cnp.int64_t[:, :], + iscs: cnp.int64_t[:, :, :], + heads: cnp.int64_t[:], + list_array: cnp.int64_t[:], + self_interaction: cython.bint, + xyz: cnp.float64_t[:, :], + cell: cnp.float64_t[:, :], + pbc: cnp.npy_bool[:], + thresholds: cnp.float64_t[:], + overlap: cython.bint, + init_npairs: cython.size_t, + grow_factor: cnp.float64_t, +): + """Gets (possibly duplicated) pairs of neighbour atoms. + + Parameters + --------- + at_indices: + The indices of the atoms that we want to get potential + neighbours for. + indices: + For each atom index (first dimension), the indices of the + 8 bins that contain potential neighbours. + iscs: + For each bin, the supercell index. + heads: + For each bin, the index of `list` where we can find the + first atom that is contained in it. + This array is constructed by `build_table`. + list_array: + contains the list of atoms, modified to encode all bin + locations. Each item in the list contains the index of + the next atom that we can find in the same bin. + If an item is -1, it means that there are no more + atoms in the same bin. + This array is constructed by `build_table`. + self_interaction: bool, optional + whether to consider an atom a neighbour of itself. + xyz: + the cartesian coordinates of all atoms in the geometry. + cell: + the lattice vectors of the geometry, where cell(i, :) is the + ith lattice vector. + pbc: + for each direction, whether periodic boundary conditions should + be considered. + thresholds: + the threshold radius for each atom in the geometry. + overlap: + If true, two atoms are considered neighbours if their spheres + overlap. + If false, two atoms are considered neighbours if the second atom + is within the sphere of the first atom. Note that this implies that + atom `i` might be atom `j`'s neighbour while the opposite is not true. + init_npairs: + The initial number of pairs that can be found. + It is used to allocate the `neighs` array. This is computed + in python with the help of the `counts` array constructed + by `build_table`. + This is not limiting the final size, but is used to pre-allocate + a growing array. + grow_factor: + the grow factor of the size when the neighbor list needs + to grow. + + Returns + -------- + neighs: + contains the atom indices of all pairs of neighbor atoms. + split_indices: + array containing the breakpoints in `neighs`. These + breakpoints delimit the position where one can find pairs + for each 8 bin search. It can be used later to quickly + split `neighs` on the multiple individual searches. + """ + N_ind = at_indices.shape[0] + + ref_xyz: cnp.float64_t[:] = np.empty(3, dtype=np.float64) + neigh_isc: cnp.int64_t[:] = np.empty(3, dtype=np.int64) + + def grow(): + nonlocal neighs_obj, neighs, grow_factor + + n: cython.size_t = neighs.shape[0] + new_neighs_obj = np.empty([int(n * grow_factor), 5], dtype=np.int64) + new_neighs_obj[:n, :] = neighs_obj[:, :] + neighs_obj = new_neighs_obj + neighs = neighs_obj + + neighs_obj: cnp.ndarray = np.empty([init_npairs, 5], dtype=np.int64) + neighs: cnp.int64_t[:, :] = neighs_obj + + split_indices_obj: cnp.ndarray = np.zeros(N_ind, dtype=np.int64) + split_indices: cnp.int64_t[:] = split_indices_obj + + # Counter for filling neighs + i_pair: cython.size_t = 0 + + for search_index in range(N_ind): + at = at_indices[search_index] + + for j in range(8): + # Find the bin index. + bin_index: cnp.int64_t = indices[search_index, j] + + # Get the first atom index in this bin + neigh_at: cnp.int64_t = heads[bin_index] + + # If there are no atoms in this bin, do not even bother + # checking supercell indices. + if neigh_at == -1: + continue + + # Find the supercell indices for this bin + neigh_isc[:] = iscs[search_index, j, :] + + # And check if this bin corresponds to the unit cell + not_unit_cell: cython.bint = ( + neigh_isc[0] != 0 or neigh_isc[1] != 0 or neigh_isc[2] != 0 + ) + + ref_xyz[:] = xyz[at, :] + if not_unit_cell: + # If we are looking at a neighbouring cell in a direction + # where there are no periodic boundary conditions, go to + # next bin. + should_not_check = False + for i in range(3): + if not pbc[i] and neigh_isc[i] != 0: + should_not_check = True + if should_not_check: + continue + # Otherwise, move the atom to the neighbor cell. We do this + # instead of moving potential neighbors to the unit cell + # because in this way we reduce the number of operations. + for i in range(3): + ref_xyz[i] -= ( + cell[0, i] * neigh_isc[0] + + cell[1, i] * neigh_isc[1] + + cell[2, i] * neigh_isc[2] + ) + + # Loop through all atoms that are in this bin. + # If neigh_at == -1, this means no more atoms are in this bin. + while neigh_at >= 0: + # If this is a self interaction and the user didn't want them, + # go to next atom. + if not self_interaction and at == neigh_at: + neigh_at = list_array[neigh_at] + continue + + # Calculate the distance between the atom and the potential + # neighbor. + dist: float = 0.0 + for i in range(3): + dist += (xyz[neigh_at, i] - ref_xyz[i]) ** 2 + dist = sqrt(dist) + + # Get the threshold for this pair of atoms + threshold: float = thresholds[at] + if overlap: + # If the user wants to check for sphere overlaps, we have + # to sum the radius of the neighbor to the threshold + threshold = threshold + thresholds[neigh_at] + + if dist < threshold: + + if i_pair >= neighs.shape[0]: + grow() + + # Store the pair of neighbours. + neighs[i_pair, 0] = at + neighs[i_pair, 1] = neigh_at + neighs[i_pair, 2] = neigh_isc[0] + neighs[i_pair, 3] = neigh_isc[1] + neighs[i_pair, 4] = neigh_isc[2] + + # Increment the pair index + i_pair = i_pair + 1 + + # Get the next atom in this bin. Sum 1 to get fortran index. + neigh_at = list_array[neigh_at] + + # We have finished this search, store the breakpoint. + split_indices[search_index] = i_pair + + # We copy to allow GC to remove the full array + return neighs_obj[:i_pair, :].copy(), split_indices_obj + + +@cython.boundscheck(False) +@cython.wraparound(False) +def get_all_unique_pairs( + indices: cnp.int64_t[:, :], + iscs: cnp.int64_t[:, :, :], + heads: cnp.int64_t[:], + list_array: cnp.int64_t[:], + self_interaction: cython.bint, + xyz: cnp.float64_t[:, :], + cell: cnp.float64_t[:, :], + pbc: cnp.npy_bool[:], + thresholds: cnp.float64_t[:], + overlap: cython.bint, + init_npairs: cython.size_t, + grow_factor: cnp.float64_t, +): + """Gets all unique pairs of atoms that are neighbours. + + Parameters + --------- + indices: + For each atom index (first dimension), the indices of the + 8 bins that contain potential neighbours. + iscs: + For each bin, the supercell index. + heads: + For each bin, the index of `list` where we can find the + first atom that is contained in it. + This array is constructed by `build_table`. + list_array: + contains the list of atoms, modified to encode all bin + locations. Each item in the list contains the index of + the next atom that we can find in the same bin. + If an item is -1, it means that there are no more + atoms in the same bin. + This array is constructed by `build_table`. + self_interaction: bool, optional + whether to consider an atom a neighbour of itself. + xyz: + the cartesian coordinates of all atoms in the geometry. + cell: + the lattice vectors of the geometry, where cell(i, :) is the + ith lattice vector. + pbc: + for each direction, whether periodic boundary conditions should + be considered. + thresholds: + the threshold radius for each atom in the geometry. + overlap: + If true, two atoms are considered neighbours if their spheres + overlap. + If false, two atoms are considered neighbours if the second atom + is within the sphere of the first atom. Note that this implies that + atom `i` might be atom `j`'s neighbour while the opposite is not true. + init_npairs: + The initial number of pairs that can be found. + It is used to allocate the `neighs` array. This is computed + in python with the help of the `counts` array constructed + by `build_table`. + This is not limiting the final size, but is used to pre-allocate + a growing array. + grow_factor: + the grow factor of the size when the neighbor list needs + to grow. + + Returns + -------- + neighs: + contains the atom indices of all unique neighbor pairs + First column of atoms is sorted in increasing order. + Columns 3 to 5 contain the supercell indices of the neighbour + atom (the one in column 2, column 1 atoms are always in the + unit cell). + """ + + N_ats = list_array.shape[0] + + ref_xyz: cnp.float64_t[:] = np.empty(3, dtype=np.float64) + neigh_isc: cnp.int64_t[:] = np.empty(3, dtype=np.int64) + + def grow(): + nonlocal neighs_obj, neighs, grow_factor + + n: cython.size_t = neighs.shape[0] + new_neighs_obj = np.empty([int(n * grow_factor), 5], dtype=np.int64) + new_neighs_obj[:n, :] = neighs_obj[:, :] + neighs_obj = new_neighs_obj + neighs = neighs_obj + + neighs_obj: cnp.ndarray = np.empty([init_npairs, 5], dtype=np.int64) + neighs: cnp.int64_t[:, :] = neighs_obj + + # Counter for filling neighs + i_pair: cython.size_t = 0 + + for at in range(N_ats): + if self_interaction: + if i_pair >= neighs.shape[0]: + grow() + + # Add the self interaction + neighs[i_pair, 0] = at + neighs[i_pair, 1] = at + neighs[i_pair, 2:] = 0 + + # Increment the pair index + i_pair += 1 + + for j in range(8): + # Find the bin index. + bin_index: cnp.int64_t = indices[at, j] + + # Get the first atom index in this bin + neigh_at: cnp.int64_t = heads[bin_index] + + # If there are no atoms in this bin, do not even bother + # checking supercell indices. + if neigh_at == -1: + continue + + # Find the supercell indices for this bin + neigh_isc[:] = iscs[at, j, :] + + # And check if this bin corresponds to the unit cell + not_unit_cell: cython.bint = ( + neigh_isc[0] != 0 or neigh_isc[1] != 0 or neigh_isc[2] != 0 + ) + + ref_xyz[:] = xyz[at, :] + if not_unit_cell: + # If we are looking at a neighbouring cell in a direction + # where there are no periodic boundary conditions, go to + # next bin. + should_not_check = False + for i in range(3): + if not pbc[i] and neigh_isc[i] != 0: + should_not_check = True + if should_not_check: + continue + # Otherwise, move the atom to the neighbor cell. We do this + # instead of moving potential neighbors to the unit cell + # because in this way we reduce the number of operations. + for i in range(3): + ref_xyz[i] -= ( + cell[0, i] * neigh_isc[0] + + cell[1, i] * neigh_isc[1] + + cell[2, i] * neigh_isc[2] + ) + + # Loop through all atoms that are in this bin. + # If neigh_at == -1, this means no more atoms are in this bin. + while neigh_at >= 0: + # If neigh_at is smaller than at, we already stored + # this pair when performing the search for neigh_at. + # The following atoms will have even lower indices + # So we can just move to the next bin. However, if + # we are checking a neighboring cell, this connection + # will always be unique. + if not not_unit_cell and neigh_at <= at: + break + + # Calculate the distance between the atom and the potential + # neighbor. + dist: float = 0.0 + for i in range(3): + dist += (xyz[neigh_at, i] - ref_xyz[i]) ** 2 + dist = sqrt(dist) + + # Get the threshold for this pair of atoms + threshold: float = thresholds[at] + if overlap: + # If the user wants to check for sphere overlaps, we have + # to sum the radius of the neighbor to the threshold + threshold = threshold + thresholds[neigh_at] + + if dist < threshold: + if i_pair >= neighs.shape[0]: + grow() + + # Store the pair of neighbours. + neighs[i_pair, 0] = at + neighs[i_pair, 1] = neigh_at + neighs[i_pair, 2] = neigh_isc[0] + neighs[i_pair, 3] = neigh_isc[1] + neighs[i_pair, 4] = neigh_isc[2] + + # Increment the pair index + i_pair += 1 + + # Get the next atom in this bin. Sum 1 to get fortran index. + neigh_at = list_array[neigh_at] + + # Return the array of neighbours, but only the filled part + # We copy to remove the unneeded data-sizes + return neighs_obj[:i_pair].copy() + + +@cython.boundscheck(False) +@cython.wraparound(False) +def get_close( + search_xyz: cnp.float64_t[:, :], + indices: cnp.int64_t[:, :], + iscs: cnp.int64_t[:, :, :], + heads: cnp.int64_t[:], + list_array: cnp.int64_t[:], + xyz: cnp.float64_t[:, :], + cell: cnp.float64_t[:, :], + pbc: cnp.npy_bool[:], + thresholds: cnp.float64_t[:], + init_npairs: cython.size_t, + grow_factor: cnp.float64_t, +): + """Gets the atoms that are close to given positions + + Parameters + --------- + search_xyz: + The coordinates for which we want to look for atoms that + are close. + indices: + For each point (first dimension), the indices of the + 8 bins that contain potential neighbours. + iscs: + For each bin, the supercell index. + heads: + For each bin, the index of `list` where we can find the + first atom that is contained in it. + This array is constructed by `build_table`. + list_array: + contains the list of atoms, modified to encode all bin + locations. Each item in the list contains the index of + the next atom that we can find in the same bin. + If an item is -1, it means that there are no more + atoms in the same bin. + This array is constructed by `build_table`. + xyz: + the cartesian coordinates of all atoms in the geometry. + cell: + the lattice vectors of the geometry, where cell(i, :) is the + ith lattice vector. + pbc: + for each direction, whether periodic boundary conditions should + be considered. + thresholds: + the threshold radius for each atom in the geometry. + init_npairs: + The initial number of pairs that can be found. + It is used to allocate the `neighs` array. This is computed + in python with the help of the `counts` array constructed + by `build_table`. + This is not limiting the final size, but is used to pre-allocate + a growing array. + grow_factor: + the grow factor of the size when the neighbor list needs + to grow. + + Returns + -------- + neighs: + contains the atom indices of all pairs of neighbor atoms. + split_indices: + array containing the breakpoints in `neighs`. These + breakpoints delimit the position where one can find pairs + for each 8 bin search. It can be used later to quickly + split `neighs` on the multiple individual searches. + """ + + N_ind = search_xyz.shape[0] + + ref_xyz: cnp.float64_t[:] = np.empty(3, dtype=np.float64) + neigh_isc: cnp.int64_t[:] = np.empty(3, dtype=np.int64) + + def grow(): + nonlocal neighs_obj, neighs, grow_factor + + n: cython.size_t = neighs.shape[0] + new_neighs_obj = np.empty([int(n * grow_factor), 5], dtype=np.int64) + new_neighs_obj[:n, :] = neighs_obj[:, :] + neighs_obj = new_neighs_obj + neighs = neighs_obj + + neighs_obj: cnp.ndarray = np.empty([init_npairs, 5], dtype=np.int64) + neighs: cnp.int64_t[:, :] = neighs_obj + + split_indices_obj: cnp.ndarray = np.zeros(N_ind, dtype=np.int64) + split_indices: cnp.int64_t[:] = split_indices_obj + + i_pair: cython.size_t = 0 + + for search_index in range(N_ind): + for j in range(8): + # Find the bin index. + bin_index: cnp.int64_t = indices[search_index, j] + + # Get the first atom index in this bin + neigh_at: cnp.int64_t = heads[bin_index] + + # If there are no atoms in this bin, do not even bother + # checking supercell indices. + if neigh_at == -1: + continue + + # Find the supercell indices for this bin + neigh_isc[:] = iscs[search_index, j, :] + + # And check if this bin corresponds to the unit cell + not_unit_cell: cython.bint = ( + neigh_isc[0] != 0 or neigh_isc[1] != 0 or neigh_isc[2] != 0 + ) + + ref_xyz[:] = search_xyz[search_index, :] + if not_unit_cell: + # If we are looking at a neighbouring cell in a direction + # where there are no periodic boundary conditions, go to + # next bin. + should_not_check = False + for i in range(3): + if not pbc[i] and neigh_isc[i] != 0: + should_not_check = True + if should_not_check: + continue + # Otherwise, move the atom to the neighbor cell. We do this + # instead of moving potential neighbors to the unit cell + # because in this way we reduce the number of operations. + for i in range(3): + ref_xyz[i] -= ( + cell[0, i] * neigh_isc[0] + + cell[1, i] * neigh_isc[1] + + cell[2, i] * neigh_isc[2] + ) + + # Loop through all atoms that are in this bin. + # If neigh_at == -1, this means no more atoms are in this bin. + while neigh_at >= 0: + # Calculate the distance between the atom and the potential + # neighbor. + dist: float = 0.0 + for i in range(3): + dist += (xyz[neigh_at, i] - ref_xyz[i]) ** 2 + dist = sqrt(dist) + + # Get the threshold for the potential neighbour + threshold: float = thresholds[neigh_at] + + if dist < threshold: + + if i_pair >= neighs.shape[0]: + grow() + + # Store the pair of neighbours. + neighs[i_pair, 0] = search_index + neighs[i_pair, 1] = neigh_at + neighs[i_pair, 2] = neigh_isc[0] + neighs[i_pair, 3] = neigh_isc[1] + neighs[i_pair, 4] = neigh_isc[2] + + # Increment the pair index + i_pair = i_pair + 1 + + # Get the next atom in this bin. Sum 1 to get fortran index. + neigh_at = list_array[neigh_at] + + # We have finished this search, store the breakpoint. + split_indices[search_index] = i_pair + + return neighs_obj[:i_pair, :].copy(), split_indices_obj diff --git a/src/sisl/geom/_neighbors/tests/test_finder.py b/src/sisl/geom/_neighbors/tests/test_finder.py new file mode 100644 index 0000000000..67d6892445 --- /dev/null +++ b/src/sisl/geom/_neighbors/tests/test_finder.py @@ -0,0 +1,285 @@ +from functools import partial + +import numpy as np +import pytest + +from sisl import Geometry +from sisl.geom import NeighborFinder + +pytestmark = [pytest.mark.neighbor] + + +tr_fixture = partial(pytest.fixture, scope="module", params=[True, False]) + + +def request_param(request): + return request.param + + +sphere_overlap = tr_fixture()(request_param) +multiR = tr_fixture()(request_param) +self_interaction = tr_fixture()(request_param) +post_setup = tr_fixture()(request_param) +pbc = tr_fixture()(request_param) + + +@pytest.fixture(scope="module") +def neighfinder(sphere_overlap, multiR): + geom = Geometry([[0, 0, 0], [1.2, 0, 0], [9, 0, 0]], lattice=[10, 10, 7]) + + R = np.array([1.1, 1.5, 1.2]) if multiR else 1.5 + + neighfinder = NeighborFinder(geom, R=R, overlap=sphere_overlap) + + neighfinder.assert_consistency() + + return neighfinder + + +@pytest.fixture(scope="module") +def expected_neighs(sphere_overlap, multiR, self_interaction, pbc): + first_at_neighs = [] + if not (multiR and not sphere_overlap): + first_at_neighs.append([0, 1, 0, 0, 0]) + if self_interaction: + first_at_neighs.append([0, 0, 0, 0, 0]) + if pbc: + first_at_neighs.append([0, 2, -1, 0, 0]) + + second_at_neighs = [[1, 0, 0, 0, 0]] + if self_interaction: + second_at_neighs.insert(0, [1, 1, 0, 0, 0]) + if pbc and sphere_overlap: + second_at_neighs.append([1, 2, -1, 0, 0]) + + third_at_neighs = [] + if self_interaction: + third_at_neighs.append([2, 2, 0, 0, 0]) + if pbc: + if sphere_overlap: + third_at_neighs.append([2, 1, 1, 0, 0]) + third_at_neighs.append([2, 0, 1, 0, 0]) + + return ( + np.array(first_at_neighs), + np.array(second_at_neighs), + np.array(third_at_neighs), + ) + + +def test_neighfinder_setup(sphere_overlap, multiR, post_setup): + geom = Geometry([[0, 0, 0], [1, 0, 0]], lattice=[10, 10, 7]) + + R = np.array([0.9, 1.5]) if multiR else 1.5 + + if post_setup: + # We are going to create a data structure with the wrong parameters, + # and then update it. + finder = NeighborFinder(geom, R=R - 0.7, overlap=not sphere_overlap) + finder.setup(R=R, overlap=sphere_overlap) + else: + # Initialize finder with the right parameters. + finder = NeighborFinder(geom, R=R, overlap=sphere_overlap) + + # Check that R is properly set when its a scalar and an array. + if multiR: + assert isinstance(finder.R, np.ndarray) + assert np.all(finder.R == R) + else: + assert finder.R.ndim == 0 + assert finder.R == R + + # Assert that we have stored a copy of the geometry + assert isinstance(finder.geometry, Geometry) + assert finder.geometry == geom + assert finder.geometry is not geom + + # Check that the total number of bins is correct. If sphere_overlap is + # True, bins are much bigger. + nbins = (1, 1, 1) if sphere_overlap else (3, 3, 2) + assert finder.nbins == nbins + + total_bins = 1 if sphere_overlap else 18 + assert finder.total_nbins == total_bins + + # Assert that the data structure is generated. + for k in ("_list", "_counts", "_heads"): + assert hasattr(finder, k), k + assert isinstance(finder._list, np.ndarray), k + + finder.assert_consistency() + + # Check that all bins are empty except one, which contains the two atoms. + assert (finder._counts == 0).sum() == finder.total_nbins - 1 + assert finder._counts.sum() == 2 + + +def test_neighbour_pairs(neighfinder, self_interaction, pbc, expected_neighs): + neighs = neighfinder.find_neighbors( + as_pairs=True, self_interaction=self_interaction, pbc=pbc + ) + + assert isinstance(neighs, np.ndarray) + + first_at_neighs, second_at_neighs, third_at_neighs = expected_neighs + + n_neighs = len(first_at_neighs) + len(second_at_neighs) + len(third_at_neighs) + + assert neighs.shape == (n_neighs, 5) + + assert np.all(neighs == [*first_at_neighs, *second_at_neighs, *third_at_neighs]) + + +def test_neighbors_lists(neighfinder, self_interaction, pbc, expected_neighs): + neighs = neighfinder.find_neighbors( + as_pairs=False, self_interaction=self_interaction, pbc=pbc + ) + + assert isinstance(neighs, list) + assert len(neighs) == 3 + + assert all(isinstance(n, np.ndarray) for n in neighs) + + first_at_neighs, second_at_neighs, third_at_neighs = expected_neighs + + # Check shapes + for i, i_at_neighs in enumerate( + [first_at_neighs, second_at_neighs, third_at_neighs] + ): + assert neighs[i].shape == ( + len(i_at_neighs), + 4, + ), f"Wrong shape for neighbors of atom {i}" + + # Check values + for i, i_at_neighs in enumerate( + [first_at_neighs, second_at_neighs, third_at_neighs] + ): + if len(neighs[i]) == 0: + continue + + assert np.all( + neighs[i] == i_at_neighs[:, 1:] + ), f"Wrong values for neighbors of atom {i}" + + +def test_all_unique_pairs(neighfinder, self_interaction, pbc, expected_neighs): + if neighfinder.R.ndim == 1 and not neighfinder._overlap: + with pytest.raises(ValueError): + neighfinder.find_all_unique_pairs( + self_interaction=self_interaction, pbc=pbc + ) + return + + neighs = neighfinder.find_all_unique_pairs( + self_interaction=self_interaction, pbc=pbc + ) + + first_at_neighs, second_at_neighs, third_at_neighs = expected_neighs + + all_expected_neighs = np.array( + [*first_at_neighs, *second_at_neighs, *third_at_neighs] + ) + + unique_neighs = [] + for neigh_pair in all_expected_neighs: + if not np.all(neigh_pair[2:] == 0): + unique_neighs.append(neigh_pair) + else: + for others in unique_neighs: + if np.all(others == [neigh_pair[1], neigh_pair[0], *neigh_pair[2:]]): + break + else: + unique_neighs.append(neigh_pair) + + assert neighs.shape == (len(unique_neighs), 5) + + +def test_close(neighfinder, pbc): + neighs = neighfinder.find_close([0.3, 0, 0], as_pairs=True, pbc=pbc) + + expected_neighs = [[0, 1, 0, 0, 0], [0, 0, 0, 0, 0]] + if pbc and neighfinder.R.ndim == 0: + expected_neighs.append([0, 2, -1, 0, 0]) + + assert neighs.shape == (len(expected_neighs), 5) + assert np.all(neighs == expected_neighs) + + +def test_no_neighbors(pbc): + """Test the case where there are no neighbors, to see that it doesn't crash.""" + + geom = Geometry([[0, 0, 0]]) + + finder = NeighborFinder(geom, R=1.5) + + neighs = finder.find_neighbors(as_pairs=True, pbc=pbc) + + assert isinstance(neighs, np.ndarray) + assert neighs.shape == (0, 5) + + neighs = finder.find_neighbors(as_pairs=False, pbc=pbc) + + assert isinstance(neighs, list) + assert len(neighs) == 1 + + assert isinstance(neighs[0], np.ndarray) + assert neighs[0].shape == (0, 4) + + neighs = finder.find_all_unique_pairs(pbc=pbc) + + assert isinstance(neighs, np.ndarray) + assert neighs.shape == (0, 5) + + +def test_R_too_big(pbc): + """Test the case when R is so big that it needs a bigger bin + than the unit cell.""" + + geom = Geometry([[0, 0, 0], [1, 0, 0]], lattice=[2, 10, 10]) + + neighfinder = NeighborFinder(geom, R=1.5) + + neighs = neighfinder.find_all_unique_pairs(pbc=pbc) + + expected_neighs = [[0, 1, 0, 0, 0]] + if pbc: + expected_neighs.append([0, 1, -1, 0, 0]) + expected_neighs.append([1, 0, 1, 0, 0]) + + assert neighs.shape == (len(expected_neighs), 5) + assert np.all(neighs == expected_neighs) + + neighfinder = NeighborFinder(geom, R=[0.6, 2.2], overlap=True) + + neighs = neighfinder.find_close([[0.5, 0, 0]], as_pairs=True, pbc=pbc) + + expected_neighs = [[0, 1, 0, 0, 0], [0, 0, 0, 0, 0]] + if pbc: + expected_neighs.insert(0, [0, 1, -1, 0, 0]) + + assert neighs.shape == (len(expected_neighs), 5) + assert np.all(neighs == expected_neighs) + + +def test_bin_sizes(): + geom = Geometry([[0, 0, 0], [1, 0, 0]], lattice=[2, 10, 10]) + + # We should have fewer bins along the first lattice vector + n1 = NeighborFinder(geom, R=1.5, bin_size=2) + n2 = NeighborFinder(geom, R=1.5, bin_size=4) + + assert n1.total_nbins > n2.total_nbins + # When the bin is bigger than the unit cell, this situation + # occurs + assert n1.nbins[0] == n2.nbins[0] + assert n1.nbins[1] > n2.nbins[1] + assert n1.nbins[2] > n2.nbins[2] + + # We should have the same number of bins the 2nd and 3rd lattice vectors + n3 = NeighborFinder(geom, R=1.5, bin_size=2) + n4 = NeighborFinder(geom, R=1.5, bin_size=(2, 4, 4)) + + assert n3.nbins[0] == n4.nbins[0] + assert n3.nbins[1] > n4.nbins[1] + assert n3.nbins[2] > n4.nbins[2] diff --git a/src/sisl/utils/mathematics.py b/src/sisl/utils/mathematics.py index 715fb47a61..6f820ce8ce 100644 --- a/src/sisl/utils/mathematics.py +++ b/src/sisl/utils/mathematics.py @@ -130,8 +130,6 @@ def spher2cart(r, theta, phi): theta = asarray(theta) phi = asarray(phi) shape = _a.broadcast_shapes(r.shape, theta.shape, phi.shape) - print(r.shape, theta.shape, phi.shape) - print(shape) R = _a.empty(shape + (3,), dtype=r.dtype) sphi = sin(phi) R[..., 0] = r * cos(theta) * sphi diff --git a/src/sisl/utils/misc.py b/src/sisl/utils/misc.py index cebb03409d..78dec16935 100644 --- a/src/sisl/utils/misc.py +++ b/src/sisl/utils/misc.py @@ -6,14 +6,17 @@ import importlib import inspect import operator as op +import re import sys from math import pi from numbers import Integral +from typing import Union __all__ = ["merge_instances", "str_spec", "direction", "angle"] __all__ += ["iter_shape", "math_eval", "allow_kwargs"] __all__ += ["import_attr", "lazy_import"] __all__ += ["PropertyDict", "NotNonePropertyDict"] +__all__ += ["size_to_num", "size_to_elements"] # supported operators @@ -81,6 +84,68 @@ def merge_instances(*args, **kwargs): return m +def size_to_num(size: Union[int, float, str], unit: str = "MB") -> float: + """Convert a size-specification to a size in a specific `unit` + + Converts the input value (`size`) into a number corresponding to + the `size` converted to the specified `unit`. + + If `size` is passed as an integer (or float), it will be interpreted + as a size in MB. Otherwise the string may contain the size specification. + """ + if isinstance(size, (float, int)): + return size + + # Now parse the size from a string + match = re.match(r"(\d+[.\d]*)(\D*)", size) + size = float(match[1].strip()) + unit_in = match[2].strip() + + # Now parse things + # We expect data to be in MB (default unit) + # Then we can always convert + conv = { + "b": 1 / (1024 * 1024), + "B": 1 / (1024 * 1024), + "k": 1 / 1024, + "kb": 1 / 1024, + "kB": 1 / 1024, + "mb": 1, + "M": 1, + "MB": 1, + "G": 1024, + "GB": 1024, + "T": 1024 * 1024, + "TB": 1024 * 1024, + } + + if unit_in: + unit_in = conv[unit_in] + else: + unit_in = 1 + + # Convert the requested unit + unit = conv[unit] + + return size * unit_in / unit + + +def size_to_elements(size: Union[int, str], byte_per_elem: int = 8) -> int: + """Calculate the number of elements such that they occupy `size` memory + + Parameters + ---------- + size : + a size specification, either by an integer, or a str. If an integer it is + assumed to be in MB, otherwise the str can hold a unit specification. + byte_per_elem + number of bytes per element when doing the conversion + """ + size = size_to_num(size, unit="B") + + return int(size // byte_per_elem) + + def iter_shape(shape): """Generator for iterating a shape by returning consecutive slices