From 8595bb5dbc0f3ae75c3e4fe37f892743cc5b6424 Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Sun, 3 Nov 2024 20:56:13 +0100 Subject: [PATCH 01/14] complete rewrite of cython files This was not really intended. But the amount of different codes that were not using fused tyes was annoying and prohibited further development. I initially tried to implement pure-python mode codes. However, it seems to be impossible to do pure-python mode and cimport it into another pure-python mode code. Before this will be merged, I need to benchmark the changes made. I *hope* it is faster, but I am not sure anything happened. The main change is that the fused datatypes are omnipresent and that the ndarray data type has been removed almost completely. So now, the memoryviews are prevalent. Signed-off-by: Nick Papior --- benchmarks/run.sh | 5 +- benchmarks/run3.sh | 20 - src/sisl/CMakeLists.txt | 7 + src/sisl/__init__.py | 4 +- src/sisl/_core/CMakeLists.txt | 2 +- src/sisl/_core/_dtypes.pxd | 101 ++ src/sisl/_core/_dtypes.pyx | 80 ++ src/sisl/_core/_sparse.pxd | 6 +- src/sisl/_core/_sparse.pyx | 179 ++-- src/sisl/_indices.pxd | 18 +- src/sisl/_indices.pyx | 734 +++++++-------- src/sisl/_math_small.pyx | 86 +- src/sisl/physics/CMakeLists.txt | 8 +- src/sisl/physics/_matrix_ddk.pyx | 264 ++---- src/sisl/physics/_matrix_dk.pyx | 141 +-- src/sisl/physics/_matrix_k.pyx | 265 ++---- src/sisl/physics/_matrix_phase.pyx | 776 +++++++++++----- src/sisl/physics/_matrix_phase3.pyx | 872 ++++++++++-------- src/sisl/physics/_matrix_phase3_nc.pyx | 366 -------- src/sisl/physics/_matrix_phase3_so.pyx | 438 --------- src/sisl/physics/_matrix_phase_nc.pyx | 224 ----- src/sisl/physics/_matrix_phase_nc_diag.pyx | 198 ---- src/sisl/physics/_matrix_phase_sc.pyx | 617 +++++++++++++ src/sisl/physics/_matrix_phase_so.pyx | 248 ----- src/sisl/physics/_matrix_sc_phase.pyx | 185 ---- src/sisl/physics/_matrix_sc_phase_nc.pyx | 272 ------ src/sisl/physics/_matrix_sc_phase_nc_diag.pyx | 234 ----- src/sisl/physics/_matrix_sc_phase_so.pyx | 293 ------ src/sisl/physics/_matrix_utils.pxd | 3 - src/sisl/physics/_matrix_utils.pyx | 37 - src/sisl/physics/_phase.pxd | 7 + src/sisl/physics/_phase.pyx | 39 +- 32 files changed, 2545 insertions(+), 4184 deletions(-) delete mode 100755 benchmarks/run3.sh create mode 100644 src/sisl/_core/_dtypes.pxd create mode 100644 src/sisl/_core/_dtypes.pyx delete mode 100644 src/sisl/physics/_matrix_phase3_nc.pyx delete mode 100644 src/sisl/physics/_matrix_phase3_so.pyx delete mode 100644 src/sisl/physics/_matrix_phase_nc.pyx delete mode 100644 src/sisl/physics/_matrix_phase_nc_diag.pyx create mode 100644 src/sisl/physics/_matrix_phase_sc.pyx delete mode 100644 src/sisl/physics/_matrix_phase_so.pyx delete mode 100644 src/sisl/physics/_matrix_sc_phase.pyx delete mode 100644 src/sisl/physics/_matrix_sc_phase_nc.pyx delete mode 100644 src/sisl/physics/_matrix_sc_phase_nc_diag.pyx delete mode 100644 src/sisl/physics/_matrix_sc_phase_so.pyx delete mode 100644 src/sisl/physics/_matrix_utils.pxd delete mode 100644 src/sisl/physics/_matrix_utils.pyx create mode 100644 src/sisl/physics/_phase.pxd diff --git a/benchmarks/run.sh b/benchmarks/run.sh index 5390c0c125..8762b8ab42 100755 --- a/benchmarks/run.sh +++ b/benchmarks/run.sh @@ -15,6 +15,5 @@ profile=$base.profile # Stats stats=$base.stats -python -m cProfile -o $profile $script $@ -python stats.py $profile > $stats - +python3 -m cProfile -o $profile $script $@ +python3 stats.py $profile > $stats diff --git a/benchmarks/run3.sh b/benchmarks/run3.sh deleted file mode 100755 index d3586bb313..0000000000 --- a/benchmarks/run3.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -script=graphene.py -if [ $# -gt 0 ]; then - script=$1 - shift -fi - -# Base name -base=${script%.py} - -# Determine output profile -profile=$base.profile - -# Stats -stats=$base.stats - -python3 -m cProfile -o $profile $script $@ -python3 stats.py $profile > $stats - diff --git a/src/sisl/CMakeLists.txt b/src/sisl/CMakeLists.txt index b94e5a741e..87a6346477 100644 --- a/src/sisl/CMakeLists.txt +++ b/src/sisl/CMakeLists.txt @@ -1,3 +1,9 @@ +set_property(DIRECTORY + APPEND + PROPERTY INCLUDE_DIRECTORIES + ${CMAKE_CURRENT_SOURCE_DIR}/_core + ) + foreach(source _indices _math_small) add_cython_library( SOURCE ${source}.pyx @@ -29,6 +35,7 @@ endforeach() get_directory_property( SISL_DEFINITIONS DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMPILE_DEFINITIONS ) + # Join to stringify list list(JOIN SISL_DEFINITIONS " " SISL_DEFINITIONS) diff --git a/src/sisl/__init__.py b/src/sisl/__init__.py index 8b1f7b065b..3841fc2dac 100644 --- a/src/sisl/__init__.py +++ b/src/sisl/__init__.py @@ -88,6 +88,8 @@ # import the common options used from ._common import * +from ._core import * + # Import warning classes # We currently do not import warn and info # as they are too generic names in case one does from sisl import * @@ -106,8 +108,6 @@ # Below are sisl-specific imports from .shape import * -from ._core import * - # Physical quantities and required classes from .physics import * diff --git a/src/sisl/_core/CMakeLists.txt b/src/sisl/_core/CMakeLists.txt index 5453a1c9f6..f2cea9c7c6 100644 --- a/src/sisl/_core/CMakeLists.txt +++ b/src/sisl/_core/CMakeLists.txt @@ -1,4 +1,4 @@ -foreach(source _lattice _sparse) +foreach(source _lattice _dtypes _sparse) add_cython_library( SOURCE ${source}.pyx LIBRARY ${source} diff --git a/src/sisl/_core/_dtypes.pxd b/src/sisl/_core/_dtypes.pxd new file mode 100644 index 0000000000..45ce9d4683 --- /dev/null +++ b/src/sisl/_core/_dtypes.pxd @@ -0,0 +1,101 @@ +""" +Shared header for fused dtypes +""" +cimport cython + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + complex64_t, + complex128_t, + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) + +# Generic typedefs for sisl internal naming convention +ctypedef size_t size_st +ctypedef Py_ssize_t ssize_st + + +ctypedef fused ints_st: + int + long + +ctypedef fused floats_st: + float + double + +ctypedef fused complexs_st: + float complex + double complex + +ctypedef fused floatcomplexs_st: + float + double + float complex + double complex + + +# Another one to have two separate ctypes (cross-product of type-defs) +ctypedef fused floatcomplexs2_st: + float + double + float complex + double complex + + +ctypedef fused numerics_st: + int + long + float + double + float complex + double complex + + +ctypedef fused _type2dtype_types_st: + short + int + long + float + double + float complex + double complex + float32_t + float64_t + #complex64_t # not usable... + #complex128_t + int8_t + int16_t + int32_t + int64_t + uint8_t + uint16_t + uint32_t + uint64_t + + +cdef object type2dtype(const _type2dtype_types_st v) + + +ctypedef fused _inline_sum_st: + short + int + long + int16_t + int32_t + int64_t + uint16_t + uint32_t + uint64_t + +cdef ssize_st inline_sum(const _inline_sum_st[::1] array) noexcept nogil diff --git a/src/sisl/_core/_dtypes.pyx b/src/sisl/_core/_dtypes.pyx new file mode 100644 index 0000000000..a57d775e35 --- /dev/null +++ b/src/sisl/_core/_dtypes.pyx @@ -0,0 +1,80 @@ +""" +Inline-sum (all useful shared codes could be placed here +""" +cimport cython + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + complex64_t, + complex128_t, + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) + + +@cython.initializedcheck(False) +cdef inline object type2dtype(const _type2dtype_types_st v): + if _type2dtype_types_st is int8_t: + return np.int8 + elif _type2dtype_types_st is int16_t: + return np.int16 + elif _type2dtype_types_st is cython.short: + return np.int16 + elif _type2dtype_types_st is int32_t: + return np.int32 + elif _type2dtype_types_st is cython.int: + return np.int32 + elif _type2dtype_types_st is int64_t: + return np.int64 + elif _type2dtype_types_st is cython.long: + return np.int64 + elif _type2dtype_types_st is float32_t: + return np.float32 + elif _type2dtype_types_st is cython.float: + return np.float32 + elif _type2dtype_types_st is float64_t: + return np.float64 + elif _type2dtype_types_st is cython.double: + return np.float64 + elif _type2dtype_types_st is complex64_t: + return np.complex64 + elif _type2dtype_types_st is cython.floatcomplex: + return np.complex64 + elif _type2dtype_types_st is complex128_t: + return np.complex128 + elif _type2dtype_types_st is cython.doublecomplex: + return np.complex128 + + # More special cases + elif _type2dtype_types_st is uint8_t: + return np.uint8 + elif _type2dtype_types_st is uint16_t: + return np.uint16 + elif _type2dtype_types_st is uint32_t: + return np.uint32 + elif _type2dtype_types_st is uint64_t: + return np.uint64 + + + +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.boundscheck(False) +cdef inline ssize_st inline_sum(const _inline_sum_st[::1] array) noexcept nogil: + cdef ssize_st total, i + + total = 0 + for i in range(array.shape[0]): + total += array[i] + + return total diff --git a/src/sisl/_core/_sparse.pxd b/src/sisl/_core/_sparse.pxd index d36d0f24d2..a588c5d149 100644 --- a/src/sisl/_core/_sparse.pxd +++ b/src/sisl/_core/_sparse.pxd @@ -1,2 +1,6 @@ # Define the interfaces for the functions exposed through cimport -cdef Py_ssize_t inline_sum(const int[::1] array) nogil +from sisl._core._dtypes cimport ints_st + + +cdef void ncol2ptr_nc(const ints_st nr, const ints_st[::1] ncol, ints_st[::1] ptr, const +ints_st per_elem) noexcept nogil diff --git a/src/sisl/_core/_sparse.pyx b/src/sisl/_core/_sparse.pyx index 484aefbffb..bad7f3508c 100644 --- a/src/sisl/_core/_sparse.pyx +++ b/src/sisl/_core/_sparse.pyx @@ -2,57 +2,61 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. cimport cython -from libc.math cimport fabs import numpy as np -# This enables Cython enhanced compatibilities - -cimport numpy as np +cimport numpy as cnp +from numpy cimport dtype, ndarray +from sisl._core._dtypes cimport inline_sum, ints_st, numerics_st, ssize_st, type2dtype from sisl._indices cimport in_1d -__all__ = ["fold_csr_matrix", "fold_csr_matrix_nc", - "fold_csr_diagonal_nc", "sparse_dense", "inline_sum"] - @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef inline Py_ssize_t inline_sum(const int[::1] array) noexcept nogil: - cdef Py_ssize_t total, i +cdef void ncol2ptr_nc(const ints_st nr, const ints_st[::1] ncol, ints_st[::1] ptr, const ints_st per_elem) noexcept nogil: + cdef ssize_st r, rr + + # this is NC/SOC + ptr[0] = 0 + ptr[1] = ncol[0] * per_elem + for r in range(1, nr): + rr = r * 2 + # do both + ptr[rr] = ptr[rr - 1] + ncol[r-1] * per_elem + ptr[rr+1] = ptr[rr] + ncol[r] * per_elem - total = 0 - for i in range(array.shape[0]): - total += array[i] - return total + ptr[nr * 2] = ptr[nr * 2 - 1] + ncol[nr - 1] * per_elem @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) @cython.cdivision(True) -def fold_csr_matrix(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL): +def fold_csr_matrix(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col): """ Fold all columns into a square matrix """ - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL + # Number of rows - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] FOLD_ptr = np.empty([nr + 1], dtype=np.int32) - cdef int[::1] fold_ptr = FOLD_ptr - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] FOLD_ncol = np.empty([nr], dtype=np.int32) - cdef int[::1] fold_ncol = FOLD_ncol - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] FOLD_col = np.empty([inline_sum(ncol)], dtype=np.int32) - cdef int[::1] fold_col = FOLD_col + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[ints_st](1) + cdef ndarray[ints_st, mode='c'] FOLD_ptr = np.empty([nr + 1], dtype=dtype) + cdef ndarray[ints_st, mode='c'] FOLD_ncol = np.empty([nr], dtype=dtype) + cdef ndarray[ints_st, mode='c'] FOLD_col = np.empty([inline_sum(ncol)], dtype=dtype) + cdef ints_st[::1] fold_ptr = FOLD_ptr + cdef ints_st[::1] fold_ncol = FOLD_ncol + cdef ints_st[::1] fold_col = FOLD_col + # local variables - cdef Py_ssize_t r, ind, nz, c - cdef int[::1] tmp + cdef ints_st r, c, nz, ind + cdef ints_st[::1] tmp nz = 0 fold_ptr[0] = 0 + # Loop on all rows for r in range(nr): @@ -88,29 +92,31 @@ def fold_csr_matrix(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, @cython.wraparound(False) @cython.initializedcheck(False) @cython.cdivision(True) -def fold_csr_matrix_nc(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL): +def fold_csr_matrix_nc(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col): """ Fold all columns into a square matrix """ - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL # Number of rows - cdef Py_ssize_t nr = ncol.shape[0] - - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] FOLD_ptr = np.empty([nr * 2 + 1], dtype=np.int32) - cdef int[::1] fold_ptr = FOLD_ptr - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] FOLD_ncol = np.empty([nr * 2], dtype=np.int32) - cdef int[::1] fold_ncol = FOLD_ncol - # We have to multiply by 4, 2 times the number of rows, and each row couples to 2 more elements - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] FOLD_col = np.empty([inline_sum(ncol) * 4], dtype=np.int32) - cdef int[::1] fold_col = FOLD_col + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[ints_st](1) + cdef ndarray[ints_st, mode='c'] FOLD_ptr = np.empty([nr * 2 + 1], dtype=dtype) + cdef ndarray[ints_st, mode='c'] FOLD_ncol = np.empty([nr * 2], dtype=dtype) + # We have to multiply by 4, 2 times for the extra rows, and another + # 2 for the possible double couplings + cdef ndarray[ints_st, mode='c'] FOLD_col = np.empty([inline_sum(ncol) * 4], dtype=dtype) + + cdef ints_st[::1] fold_ptr = FOLD_ptr + cdef ints_st[::1] fold_ncol = FOLD_ncol + cdef ints_st[::1] fold_col = FOLD_col + # local variables - cdef Py_ssize_t r, rr, ind, nz, c - cdef int[::1] tmp + cdef ints_st r, rr, ind, nz, c + cdef ints_st[::1] tmp nz = 0 fold_ptr[0] = 0 + # Loop on all rows for r in range(nr): rr = r * 2 @@ -158,29 +164,30 @@ def fold_csr_matrix_nc(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, @cython.wraparound(False) @cython.initializedcheck(False) @cython.cdivision(True) -def fold_csr_diagonal_nc(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL): +def fold_csr_matrix_nc_diag(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col): """ Fold all columns into a square matrix """ - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL # Number of rows - cdef Py_ssize_t nr = ncol.shape[0] - - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] FOLD_ptr = np.empty([nr * 2 + 1], dtype=np.int32) - cdef int[::1] fold_ptr = FOLD_ptr - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] FOLD_ncol = np.empty([nr * 2], dtype=np.int32) - cdef int[::1] fold_ncol = FOLD_ncol - # We have to multiply by 2, 2 times the number of rows - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] FOLD_col = np.empty([inline_sum(ncol) * 2], dtype=np.int32) - cdef int[::1] fold_col = FOLD_col + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[ints_st](1) + cdef ndarray[ints_st, mode='c'] FOLD_ptr = np.empty([nr * 2 + 1], dtype=dtype) + cdef ndarray[ints_st, mode='c'] FOLD_ncol = np.empty([nr * 2], dtype=dtype) + # We have to multiply by 2 times for the extra rows + cdef ndarray[ints_st, mode='c'] FOLD_col = np.empty([inline_sum(ncol) * 2], dtype=dtype) + + cdef ints_st[::1] fold_ptr = FOLD_ptr + cdef ints_st[::1] fold_ncol = FOLD_ncol + cdef ints_st[::1] fold_col = FOLD_col + # local variables - cdef Py_ssize_t r, rr, ind, nz, c - cdef int[::1] tmp + cdef ints_st r, rr, ind, nz, c + cdef ints_st[::1] tmp nz = 0 fold_ptr[0] = 0 + # Loop on all rows for r in range(nr): rr = r * 2 @@ -222,45 +229,25 @@ def fold_csr_diagonal_nc(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, return FOLD_ptr, FOLD_ncol, FOLD_col[:nz].copy() -# Here we have the int + long -# For some analysis it may be useful -ctypedef fused numeric_complex: - int - long - float - double - float complex - double complex - - def sparse_dense(M): - return _sparse_dense(M.shape, M.ptr, M.ncol, M.col, M._D, M.dtype) + cdef cnp.ndarray dense = np.zeros(M.shape, dtype=M.dtype) + _sparse_dense(M.ptr, M.ncol, M.col, M._D, dense) + return dense @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -@cython.cdivision(True) -def _sparse_dense(shape, - np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, dtype): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - - cdef Py_ssize_t nr = ncol.shape[0] - cdef V = np.zeros(shape, dtype=dtype) - cdef VV = V[:, ::1] - cdef Py_ssize_t r, ind, ix, s2 - - s2 = shape[2] - for r in range(nr): +def _sparse_dense(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] data, + numerics_st[:, :, ::1] dense): + + cdef ints_st r, ind, ix, s2 + + s2 = dense.shape[2] + for r in range(ncol.shape[0]): for ind in range(ptr[r], ptr[r] + ncol[r]): for ix in range(s2): - VV[r, col[ind], ix] += D[ind, ix] - - return V + dense[r, col[ind], ix] += data[ind, ix] diff --git a/src/sisl/_indices.pxd b/src/sisl/_indices.pxd index 261207e919..5922b5bd71 100644 --- a/src/sisl/_indices.pxd +++ b/src/sisl/_indices.pxd @@ -1,3 +1,17 @@ # Define the interfaces for the functions exposed through cimport -cdef int in_1d(const int[::1] array, const int v) nogil -cdef Py_ssize_t _index_sorted(const int[::1] array, const int v) nogil +from numpy cimport int16_t, int32_t, int64_t + +from sisl._core._dtypes cimport ints_st, ssize_st + + +cdef bint in_1d(const ints_st[::1] array, const ints_st v) noexcept nogil + +ctypedef fused _ints_index_sorted_st: + short + int + long + int16_t + int32_t + int64_t + +cdef ssize_st _index_sorted(const ints_st[::1] array, const _ints_index_sorted_st v) noexcept nogil diff --git a/src/sisl/_indices.pyx b/src/sisl/_indices.pyx index 2c270ee220..a4a1d0efbe 100644 --- a/src/sisl/_indices.pyx +++ b/src/sisl/_indices.pyx @@ -2,394 +2,343 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. cimport cython -from libc.math cimport fabs, sqrt +from libc.math cimport fabs, fabsf, sqrt, sqrtf import numpy as np -# This enables Cython enhanced compatibilities +cimport numpy as cnp +from numpy cimport dtype, ndarray -cimport numpy as np +from sisl._core._dtypes cimport floats_st, ints_st, ssize_st, type2dtype @cython.boundscheck(False) @cython.wraparound(False) -def indices_only(np.ndarray[np.int32_t, ndim=1, mode='c'] element, np.ndarray[np.int32_t, ndim=1, mode='c'] test_element): +def indices_only(ints_st[::1] element, ints_st[::1] test_element): """ Return indices of all `test_element` in the element array. Parameters ---------- - element : np.ndarray(np.int32) + element : array to search in - test_element : np.ndarray(np.int32) + test_element : values to find the indices of in `element` """ # Ensure contiguous arrays - cdef int[::1] ELEMENT = element - cdef int[::1] TEST_ELEMENT = test_element - cdef Py_ssize_t n_element = ELEMENT.shape[0] - cdef Py_ssize_t n_test_element = TEST_ELEMENT.shape[0] + cdef ssize_st n_element = element.shape[0] + cdef ssize_st n_test_element = test_element.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] idx = np.empty([max(n_test_element, n_element)], dtype=np.int32) - cdef int[::1] IDX = idx + cdef object dtype = type2dtype[ints_st](1) + cdef ndarray[ints_st, mode='c'] IDX = np.empty([max(n_test_element, n_element)], dtype=dtype) + cdef ints_st[::1] idx = IDX - cdef Py_ssize_t n = _indices_only(n_element, ELEMENT, n_test_element, TEST_ELEMENT, IDX) + cdef ssize_st i, j, n - return idx[:n] + n = 0 + with nogil: + # Fast return + if n_test_element == 0: + pass -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -cdef Py_ssize_t _indices_only(const Py_ssize_t n_element, const int[::1] element, - const Py_ssize_t n_test_element, const int[::1] test_element, - int[::1] idx) noexcept nogil: - cdef Py_ssize_t i, j, n + elif n_element == 0: + pass - # Fast return - if n_test_element == 0: - return 0 - elif n_element == 0: - return 0 + elif n_test_element > n_element: + for j in range(n_test_element): + for i in range(n_element): + if test_element[j] == element[i]: + idx[n] = i + n += 1 + break - elif n_test_element > n_element: - n = 0 - for j in range(n_test_element): + else: for i in range(n_element): - if test_element[j] == element[i]: - idx[n] = i - n += 1 - break + for j in range(n_test_element): + if test_element[j] == element[i]: + idx[n] = i + n += 1 + break + + return IDX[:n].copy() - else: - n = 0 - for i in range(n_element): - for j in range(n_test_element): - if test_element[j] == element[i]: - idx[n] = i - n += 1 - break - return n @cython.boundscheck(False) @cython.wraparound(False) -def indices(np.ndarray[np.int32_t, ndim=1, mode='c'] element, np.ndarray[np.int32_t, ndim=1, mode='c'] test_element, int offset=0, both_sorted=False): +def indices(ints_st[::1] element, ints_st[::1] test_element, ints_st offset=0, + both_sorted: bool = False): """ Return indices of all `test_element` in the search array. If not found the index will be ``-1`` Parameters ---------- - element : np.ndarray(np.int32) + element : array to search in - test_element : np.ndarray(np.int32) + test_element : values to find the indices of in `element` - offset : int + offset : index offset """ # Ensure contiguous arrays - cdef int[::1] ELEMENT = element - cdef int[::1] TEST_ELEMENT = test_element - cdef Py_ssize_t n_element = ELEMENT.shape[0] - cdef Py_ssize_t n_test_element = TEST_ELEMENT.shape[0] + cdef ssize_st n_element = element.shape[0] + cdef ssize_st n_test_element = test_element.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] idx = np.empty([n_test_element], dtype=np.int32) - cdef int[::1] IDX = idx + cdef object dtype = type2dtype[ints_st](1) + cdef ndarray[ints_st, mode='c'] IDX = np.empty([n_test_element], dtype=dtype) + cdef ints_st[::1] idx = IDX + cdef ssize_st i, j + cdef ints_st ctest_element, celement if offset < 0: raise ValueError(f"indices requires offset argument >=0, got {offset}") - if both_sorted: - _indices_sorted_arrays(n_element, ELEMENT, n_test_element, TEST_ELEMENT, offset, IDX) - else: - _indices(n_element, ELEMENT, n_test_element, TEST_ELEMENT, offset, IDX) - - return idx - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -cdef void _indices(const Py_ssize_t n_element, const int[::1] element, - const Py_ssize_t n_test_element, const int[::1] test_element, - const int offset, int[::1] idx) noexcept nogil: - cdef Py_ssize_t i, j - - # Fast return if n_test_element == 0: + # fast return pass + elif n_element == 0: - for j in range(n_test_element): - idx[j] = -1 - pass - elif n_test_element > n_element: for j in range(n_test_element): - idx[j] = -1 - for i in range(n_element): - if test_element[j] == element[i]: - idx[j] = offset + i - break + idx[j] = -1 + + elif both_sorted: + + i = j = 0 + while (i < n_element) and (j < n_test_element): + celement = element[i] + ctest_element = test_element[j] + if celement == ctest_element: + idx[j] = (i + offset) + j += 1 + elif celement < ctest_element: + i += 1 + elif celement > ctest_element: + idx[j] = -1 + j += 1 + for i in range(j, n_test_element): + idx[i] = -1 else: - # We need to initialize - for j in range(n_test_element): - idx[j] = -1 - for i in range(n_element): + if n_test_element > n_element: for j in range(n_test_element): - if test_element[j] == element[i]: - idx[j] = offset + i - break + idx[j] = -1 + for i in range(n_element): + if test_element[j] == element[i]: + idx[j] = (offset + i) + break + else: + # We need to initialize + for j in range(n_test_element): + idx[j] = -1 + for i in range(n_element): + for j in range(n_test_element): + if test_element[j] == element[i]: + idx[j] = (offset + i) + break -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -cdef void _indices_sorted_arrays(const Py_ssize_t n_element, const int[::1] element, - const Py_ssize_t n_test_element, const int[::1] test_element, - const int offset, int[::1] idx) noexcept nogil: - cdef Py_ssize_t i, j - cdef int ctest_element, celement + return IDX - # Fast return - if n_test_element == 0: - pass - elif n_element == 0: - for j in range(n_test_element): - idx[j] = -1 - return - - i = 0 - j = 0 - while (i < n_element) and (j < n_test_element): - celement = element[i] - ctest_element = test_element[j] - if celement == ctest_element: - idx[j] = i + offset - j += 1 - elif celement < ctest_element: - i += 1 - elif celement > ctest_element: - idx[j] = -1 - j += 1 - for j in range(j, n_test_element): - idx[j] = -1 @cython.boundscheck(False) @cython.wraparound(False) -def indices_in_cylinder(np.ndarray[np.float64_t, ndim=2, mode='c'] dxyz, const double R, const double h): +def indices_in_cylinder(floats_st[:, ::1] dxyz, const floats_st R, const floats_st h): """ Indices for all coordinates that are within a cylinde radius `R` and height `h` Parameters ---------- - dxyz : ndarray(np.float64) + dxyz : coordinates centered around the cylinder - R : float + R : radius of cylinder to check - h : float + h : height of cylinder to check Returns ------- - index : np.ndarray(np.int32) + index : indices of all dxyz coordinates that are within the cylinder """ - cdef double[:, ::1] dXYZ = dxyz - cdef Py_ssize_t n = dXYZ.shape[0] - cdef np.ndarray[np.int32_t, ndim=1] idx = np.empty([n], dtype=np.int32) - cdef int[::1] IDX = idx - - n = _indices_in_cylinder(dXYZ, R, h, IDX) - - if n == 0: - return np.empty([0], dtype=np.int32) - return idx[:n].copy() + cdef ssize_st n = dxyz.shape[0] + cdef ssize_st nxyz = dxyz.shape[1] - 1 + cdef ndarray[int32_t] IDX = np.empty([n], dtype=np.int32) + cdef int[::1] idx = IDX -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -cdef Py_ssize_t _indices_in_cylinder(const double[:, ::1] dxyz, const double R, const double h, int[::1] idx) noexcept nogil: - cdef Py_ssize_t N = dxyz.shape[0] - cdef Py_ssize_t xyz = dxyz.shape[1] - cdef double R2 = R * R - cdef double L2 - cdef Py_ssize_t i, j, n - cdef int skip + cdef floats_st R2 = R * R + cdef floats_st L2 + cdef ssize_st i, j, m + cdef bint skip # Reset number of elements - n = 0 - - for i in range(N): - skip = 0 - for j in range(xyz-1): - skip |= dxyz[i, j] > R - if skip or dxyz[i, xyz-1] > h: continue - - L2 = 0. - for j in range(xyz-1): - L2 += dxyz[i, j] * dxyz[i, j] - if L2 > R2: continue - idx[n] = i - n += 1 - - return n + m = 0 + + with nogil: + for i in range(n): + skip = 0 + for j in range(nxyz): + skip |= dxyz[i, j] > R + if skip or dxyz[i, nxyz] > h: continue + + L2 = 0. + for j in range(nxyz): + L2 += dxyz[i, j] * dxyz[i, j] + if L2 > R2: continue + idx[m] = i + m += 1 + + if m == 0: + return np.empty([0], dtype=np.int32) + return IDX[:m].copy() @cython.boundscheck(False) @cython.wraparound(False) -def indices_in_sphere(np.ndarray[np.float64_t, ndim=2, mode='c'] dxyz, const double R): +def indices_in_sphere(floats_st[:, ::1] dxyz, const floats_st R): """ Indices for all coordinates that are within a sphere of radius `R` Parameters ---------- - dxyz : ndarray(np.float64) + dxyz : coordinates centered around the sphere - R : float + R : radius of sphere to check Returns ------- - index : np.ndarray(np.int32) + index: indices of all dxyz coordinates that are within the sphere of radius `R` """ - cdef double[:, ::1] dXYZ = dxyz - cdef Py_ssize_t n = dXYZ.shape[0] - cdef np.ndarray[np.int32_t, ndim=1] idx = np.empty([n], dtype=np.int32) - cdef int[::1] IDX = idx + cdef ssize_st n = dxyz.shape[0] + cdef ndarray[int32_t, mode='c'] IDX = np.empty([n], dtype=np.int32) + cdef int[::1] idx = IDX - n = _indices_in_sphere(dXYZ, R, IDX) - - if n == 0: - return np.empty([0], dtype=np.int32) - return idx[:n].copy() - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -cdef Py_ssize_t _indices_in_sphere(const double[:, ::1] dxyz, const double R, int[::1] idx) noexcept nogil: - cdef Py_ssize_t N = dxyz.shape[0] - cdef Py_ssize_t xyz = dxyz.shape[1] - cdef double R2 = R * R - cdef Py_ssize_t i, n + cdef floats_st R2 = R * R + cdef ssize_st i, m # Reset number of elements - n = 0 - - for i in range(N): - if all_fabs_le(dxyz, i, R): - if fabs2(dxyz, i) <= R2: - idx[n] = i - n += 1 - return n + m = 0 + + with nogil: + for i in range(n): + if all_fabs_le(dxyz, i, R): + if fabs2(dxyz, i) <= R2: + idx[m] = i + m += 1 + if m == 0: + return np.empty([0], dtype=np.int32) + return IDX[:m].copy() @cython.boundscheck(False) @cython.wraparound(False) -def indices_in_sphere_with_dist(np.ndarray[np.float64_t, ndim=2, mode='c'] dxyz, const double R): +def indices_in_sphere_with_dist(floats_st[:, ::1] dxyz, const floats_st R): """ Indices and the distances for all coordinates that are within a sphere of radius `R` Parameters ---------- - dxyz : ndarray(np.float64) + dxyz : coordinates centered around the sphere R : float radius of sphere to check Returns ------- - index : np.ndarray(np.int32) + index : indices of all dxyz coordinates that are within the sphere of radius `R` - dist : np.ndarray(np.float64) + dist : distances for the coordinates within the sphere of radius `R` (corresponds to `index`) """ - cdef double[:, ::1] dXYZ = dxyz - cdef Py_ssize_t n = dXYZ.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] idx = np.empty([n], dtype=np.int32) - cdef np.ndarray[np.float64_t, ndim=1, mode='c'] dist = np.empty([n], dtype=np.float64) - cdef int[::1] IDX = idx - cdef double[::1] DIST = dist - - n = _indices_in_sphere_with_dist(dXYZ, R, DIST, IDX) - - if n == 0: - return np.empty([0], dtype=np.int32), np.empty([0], dtype=np.float64) - return idx[:n].copy(), dist[:n].copy() - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -cdef Py_ssize_t _indices_in_sphere_with_dist(const double[:, ::1] dxyz, const double R, - double[::1] dist, int[::1] idx) noexcept nogil: - cdef Py_ssize_t N = dxyz.shape[0] - cdef double R2 = R * R - cdef double d - cdef Py_ssize_t i, n + cdef ssize_st n = dxyz.shape[0] + cdef ndarray[int32_t, mode='c'] IDX = np.empty([n], dtype=np.int32) + cdef object dtype = type2dtype[floats_st](1) + cdef ndarray[floats_st, mode='c'] DIST = np.empty([n], dtype=dtype) + cdef int[::1] idx = IDX + cdef floats_st[::1] dist = DIST + + cdef floats_st R2 = R * R + cdef floats_st d + cdef ssize_st i, m + + with nogil: + + # Reset number of elements + m = 0 + + if floats_st is cython.float: + for i in range(n): + if all_fabs_le(dxyz, i, R): + d = fabs2(dxyz, i) + if d <= R2: + dist[m] = sqrtf(d) + idx[m] = i + m += 1 - # Reset number of elements - n = 0 + else: + for i in range(n): + if all_fabs_le(dxyz, i, R): + d = fabs2(dxyz, i) + if d <= R2: + dist[m] = sqrt(d) + idx[m] = i + m += 1 - for i in range(N): - if all_fabs_le(dxyz, i, R): - d = fabs2(dxyz, i) - if d <= R2: - dist[n] = sqrt(d) - idx[n] = i - n += 1 - return n + if m == 0: + return np.empty([0], dtype=np.int32), np.empty([0], dtype=dtype) + return IDX[:m].copy(), DIST[:m].copy() @cython.boundscheck(False) @cython.wraparound(False) -def indices_le(np.ndarray a, const double V): +def indices_le(ndarray a, const floats_st V): """ Indices for all values in `a` that are ``<= V`` Parameters ---------- - a : np.ndarray(np.float64) + a : array to check if 2D, all last dimension values must be ``<= V`` V : float value that is checked against Returns ------- - index : np.ndarray(np.int32) + index : indices for the values in `a` which are less than or equal to `V` """ - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] idx = np.empty([a.shape[0]], dtype=np.int32) - cdef int[::1] IDX = idx + cdef ndarray[int32_t, mode='c'] IDX = np.empty([a.shape[0]], dtype=np.int32) + cdef int[::1] idx = IDX - cdef Py_ssize_t ndim = a.ndim - cdef double[::1] A1 - cdef double[:, ::1] A2 - cdef Py_ssize_t n - - if a.dtype != np.float64: - raise ValueError('indices_le requires input array to be of float64 type') + cdef ssize_st ndim = a.ndim + cdef floats_st[::1] A1 + cdef floats_st[:, ::1] A2 + cdef ssize_st n if ndim == 1: A1 = a - n = _indices_le1(A1, V, IDX) + n = _indices_le1(A1, V, idx) elif ndim == 2: A2 = a - n = _indices_le2(A2, V, IDX) + n = _indices_le2(A2, V, idx) + + else: + raise NotImplementedError("indices_le not implemented for ndim>2") if n == 0: return np.empty([0], dtype=np.int32) - return idx[:n].copy() + return IDX[:n].copy() @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef Py_ssize_t _indices_le1(const double[::1] a, const double V, int[::1] idx) noexcept nogil: - cdef Py_ssize_t N = a.shape[0] - cdef Py_ssize_t i, n +cdef ssize_st _indices_le1(const floats_st[::1] a, const floats_st V, int[::1] idx) noexcept nogil: + cdef ssize_st N = a.shape[0] + cdef ssize_st i, n n = 0 for i in range(N): if a[i] <= V: - idx[n] = i + idx[n] = i n += 1 return n @@ -397,8 +346,8 @@ cdef Py_ssize_t _indices_le1(const double[::1] a, const double V, int[::1] idx) @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef inline int all_le(const double[:, ::1] a, const Py_ssize_t i, const double V) noexcept nogil: - cdef Py_ssize_t j +cdef inline bint all_le(const floats_st[:, ::1] a, const ssize_st i, const floats_st V) noexcept nogil: + cdef ssize_st j for j in range(a.shape[1]): if a[i, j] > V: return 0 @@ -408,65 +357,65 @@ cdef inline int all_le(const double[:, ::1] a, const Py_ssize_t i, const double @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef Py_ssize_t _indices_le2(const double[:, ::1] a, const double V, int[::1] idx) noexcept nogil: - cdef Py_ssize_t N = a.shape[0] - cdef Py_ssize_t i, n +cdef ssize_st _indices_le2(const floats_st[:, ::1] a, const floats_st V, int[::1] idx) noexcept nogil: + cdef ssize_st N = a.shape[0] + cdef ssize_st i, n n = 0 for i in range(N): if all_le(a, i, V): - idx[n] = i + idx[n] = i n += 1 return n @cython.boundscheck(False) @cython.wraparound(False) -def indices_fabs_le(np.ndarray a, const double V): +def indices_fabs_le(ndarray a, const floats_st V): """ Indices for all values in `a` that are ``| | <= V`` Parameters ---------- - a : np.ndarray(np.float64) + a : array to check if 2D, all last dimension values must be ``| | <= V`` - V : float + V : value that is checked against Returns ------- - index : np.ndarray(np.int32) + index : indices for the values in ``|a|`` which are less than or equal to `V` """ - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] idx = np.empty([a.shape[0]], dtype=np.int32) - cdef int[::1] IDX = idx - - cdef Py_ssize_t ndim = a.ndim - cdef double[::1] A1 - cdef double[:, ::1] A2 - cdef Py_ssize_t n + cdef ndarray[int32_t, mode='c'] IDX = np.empty([a.shape[0]], dtype=np.int32) + cdef int[::1] idx = IDX - if a.dtype != np.float64: - raise ValueError('indices_fabs_le requires input array to be of float64 type') + cdef ssize_st ndim = a.ndim + cdef floats_st[::1] A1 + cdef floats_st[:, ::1] A2 + cdef ssize_st n if ndim == 1: A1 = a - n = _indices_fabs_le1(A1, V, IDX) + n = _indices_fabs_le1(A1, V, idx) elif ndim == 2: A2 = a - n = _indices_fabs_le2(A2, V, IDX) + n = _indices_fabs_le2(A2, V, idx) + + else: + raise NotImplementedError("indices_fabs_le not implemented for ndim>2") if n == 0: return np.empty([0], dtype=np.int32) - return idx[:n].copy() + return IDX[:n].copy() @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef inline double fabs2(const double[:, ::1] a, const Py_ssize_t i) noexcept nogil: - cdef Py_ssize_t j - cdef double abs2 - abs2 = 0. +cdef inline floats_st fabs2(const floats_st[:, ::1] a, const ssize_st i) noexcept nogil: + cdef ssize_st j + cdef floats_st abs2 = 0. + for j in range(a.shape[1]): abs2 += a[i, j]*a[i, j] return abs2 @@ -475,117 +424,139 @@ cdef inline double fabs2(const double[:, ::1] a, const Py_ssize_t i) noexcept no @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef Py_ssize_t _indices_fabs_le1(const double[::1] a, const double V, int[::1] idx) noexcept nogil: - cdef Py_ssize_t N = a.shape[0] - cdef Py_ssize_t i, n +cdef ssize_st _indices_fabs_le1(const floats_st[::1] a, const floats_st V, int[::1] idx) noexcept nogil: + cdef ssize_st N = a.shape[0] + cdef ssize_st i, n n = 0 - for i in range(N): - if fabs(a[i]) <= V: - idx[n] = i - n += 1 + if floats_st is cython.float: + for i in range(N): + if fabsf(a[i]) <= V: + idx[n] = i + n += 1 + else: + for i in range(N): + if fabs(a[i]) <= V: + idx[n] = i + n += 1 return n @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef inline int all_fabs_le(const double[:, ::1] a, const Py_ssize_t i, const double V) noexcept nogil: - cdef Py_ssize_t j - for j in range(a.shape[1]): - if fabs(a[i, j]) > V: - return 0 +cdef inline bint all_fabs_le(const floats_st[:, ::1] a, const ssize_st i, const floats_st V) noexcept nogil: + cdef ssize_st j + + if floats_st is cython.float: + for j in range(a.shape[1]): + if fabsf(a[i, j]) > V: + return 0 + + else: + for j in range(a.shape[1]): + if fabs(a[i, j]) > V: + return 0 + return 1 @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef int _indices_fabs_le2(const double[:, ::1] a, const double V, int[::1] idx) noexcept nogil: - cdef Py_ssize_t N = a.shape[0] - cdef Py_ssize_t i, n +cdef ssize_st _indices_fabs_le2(const floats_st[:, ::1] a, const floats_st V, int[::1] idx) noexcept nogil: + cdef ssize_st N = a.shape[0] + cdef ssize_st i, n n = 0 for i in range(N): if all_fabs_le(a, i, V): - idx[n] = i + idx[n] = i n += 1 + return n + @cython.boundscheck(False) @cython.wraparound(False) -def indices_gt_le(np.ndarray a, const double V1, const double V2): - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] idx = np.empty([a.shape[0]], dtype=np.int32) - cdef int[::1] IDX = idx +def indices_gt_le(ndarray a, const floats_st V1, const floats_st V2): + cdef ndarray[int32_t, mode='c'] IDX = np.empty([a.shape[0]], dtype=np.int32) + cdef int[::1] idx = IDX - cdef Py_ssize_t ndim = a.ndim - cdef double[::1] A1 - cdef double[:, ::1] A2 - cdef Py_ssize_t n - - if a.dtype != np.float64: - raise ValueError('indices_gt_le requires input array to be of float64 type') + cdef ssize_st ndim = a.ndim + cdef floats_st[::1] A1 + cdef floats_st[:, ::1] A2 + cdef ssize_st n if ndim == 1: A1 = a - n = _indices_gt_le1(A1, V1, V2, IDX) + n = _indices_gt_le1(A1, V1, V2, idx) elif ndim == 2: A2 = a - n = _indices_gt_le2(A2, V1, V2, IDX) + n = _indices_gt_le2(A2, V1, V2, idx) + + else: + raise NotImplementedError("indices_gt_le not implemented for ndim>2") if n == 0: return np.empty([0], dtype=np.int32) - return idx[:n].copy() + + return IDX[:n].copy() @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef Py_ssize_t _indices_gt_le1(const double[::1] a, const double V1, const double V2, int[::1] idx) noexcept nogil: - cdef Py_ssize_t N = a.shape[0] - cdef Py_ssize_t i, n +cdef ssize_st _indices_gt_le1(const floats_st[::1] a, const floats_st V1, const floats_st + V2, int[::1] idx) noexcept nogil: + cdef ssize_st N = a.shape[0] + cdef ssize_st i, n n = 0 for i in range(N): if V1 < a[i]: if a[i] <= V2: - idx[n] = i + idx[n] = i n += 1 + return n -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -cdef inline int all_gt_le(const double[:, ::1] a, const Py_ssize_t i, const double V1, const double V2) noexcept nogil: - cdef Py_ssize_t j - for j in range(a.shape[1]): - if a[i, j] <= V1: - return 0 - elif V2 < a[i, j]: - return 0 - return 1 - @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef Py_ssize_t _indices_gt_le2(const double[:, ::1] a, const double V1, const double V2, int[::1] idx) noexcept nogil: - cdef Py_ssize_t N = a.shape[0] - cdef Py_ssize_t i, n +cdef ssize_st _indices_gt_le2(const floats_st[:, ::1] a, const floats_st V1, const floats_st + V2, int[::1] idx) noexcept nogil: + cdef ssize_st N = a.shape[0] + cdef ssize_st i, n n = 0 for i in range(N): if all_gt_le(a, i, V1, V2): - idx[n] = i + idx[n] = i n += 1 + return n @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef inline int in_1d(const int[::1] array, const int v) noexcept nogil: - cdef Py_ssize_t N = array.shape[0] - cdef Py_ssize_t i +cdef inline bint all_gt_le(const floats_st[:, ::1] a, const ssize_st i, const floats_st V1, + const floats_st V2) noexcept nogil: + cdef ssize_st j + for j in range(a.shape[1]): + if a[i, j] <= V1: + return 0 + elif V2 < a[i, j]: + return 0 + return 1 + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +cdef inline bint in_1d(const ints_st[::1] array, const ints_st v) noexcept nogil: + cdef ssize_st N = array.shape[0] + cdef ssize_st i for i in range(N): if array[i] == v: return 1 @@ -594,14 +565,14 @@ cdef inline int in_1d(const int[::1] array, const int v) noexcept nogil: @cython.boundscheck(False) @cython.wraparound(False) -def index_sorted(np.ndarray[np.int32_t, ndim=1, mode='c'] a, const int v): +def index_sorted(ints_st[::1] a, const ints_st v): """ Return index for the value v in a sorted array, otherwise return -1 Parameters ---------- - a : int[::1] + a : sorted array to check - v : int + v : value to find Returns @@ -609,60 +580,62 @@ def index_sorted(np.ndarray[np.int32_t, ndim=1, mode='c'] a, const int v): int : -1 if not found, otherwise the first index in `a` that is equal to `v` """ # Ensure contiguous arrays - cdef int[::1] A = a - return _index_sorted(A, v) + return _index_sorted(a, v) +# This small code needs all variants +# The variants are declared in the _indices.pxd file + @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef Py_ssize_t _index_sorted(const int[::1] a, const int v) noexcept nogil: +@cython.cdivision(True) +cdef ssize_st _index_sorted(const ints_st[::1] a, const _ints_index_sorted_st v) noexcept nogil: """ Return index for the value v in a sorted array, otherwise return -1 This implements a binary search method Parameters ---------- - a : int[::1] + a : sorted array to check - v : int + v : value to find Returns ------- int : 0 if not unique, otherwise 1. """ - cdef Py_ssize_t i, L, R + cdef ssize_st MIN1 = -1 + cdef ssize_st i, L, R # Simple binary search + R = a.shape[0] - 1 + if R == -1: + return MIN1 + elif a[R] < v: + return MIN1 + L = 0 - R = a.shape[0] - if R == 0: - return -1 - elif v < a[L]: - return -1 - - while L < R: - i = (L + R) // 2 + while L <= R: + i = (L + R) / 2 if a[i] < v: L = i + 1 - elif a[i] == v: - return i + elif v < a[i]: + R = i - 1 else: - R = i - if a[R] == v: - return R - return -1 + return i + return MIN1 @cython.boundscheck(False) @cython.wraparound(False) -def sorted_unique(np.ndarray[np.int32_t, ndim=1, mode='c'] a): +def is_sorted_unique(ints_st[::1] a): """ Return True/False if all elements of the sorted array `a` are unique Parameters ---------- - a : np.ndarray(np.int32) + a : sorted array to check Returns @@ -670,31 +643,23 @@ def sorted_unique(np.ndarray[np.int32_t, ndim=1, mode='c'] a): int : 0 if not unique, otherwise 1. """ # Ensure contiguous arrays - cdef int[::1] A = a - cdef Py_ssize_t n = A.shape[0] - - return _sorted_unique(n, A) - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -cdef int _sorted_unique(const Py_ssize_t n_a, const int[::1] a) noexcept nogil: - cdef Py_ssize_t i - - # Fast return - if n_a <= 1: - return 1 + cdef ssize_st n = a.shape[0] + cdef ssize_st i, ret = 1 + + if n > 1: + # only check for larger than 1 arrays + with nogil: + for i in range(n - 1): + if a[i] == a[i+1]: + ret = 0 + break + return ret - for i in range(n_a - 1): - if a[i] == a[i+1]: - return 0 - return 1 @cython.boundscheck(False) @cython.wraparound(False) -def list_index_le(np.ndarray[np.int32_t, ndim=1, mode='c'] a, np.ndarray[np.int32_t, ndim=1, mode='c'] b): +def list_index_le(ints_st[::1] a, ints_st[::1] b): """ Find indices for each ``a`` such that the returned ``a[i] <= b[ret[i]]`` where `b` is assumed sorted This corresponds to: @@ -704,34 +669,25 @@ def list_index_le(np.ndarray[np.int32_t, ndim=1, mode='c'] a, np.ndarray[np.int3 Parameters ---------- - a : np.ndarray(np.int32) + a : values to check indicies of - b : np.ndarray(np.int32) + b : sorted array to check against Returns ------- - np.ndarray(np.int32): same length as `a` with indicies + indices with same length as `a` """ # Ensure contiguous arrays - cdef int[::1] A = a - cdef int[::1] B = b - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] c = np.empty([A.shape[0]], dtype=np.int32) - cdef int[::1] C = c + cdef ssize_st na = a.shape[0] + cdef ssize_st nb = b.shape[0] + cdef object dtype = type2dtype[ints_st](1) + cdef ndarray[ints_st] C = np.empty([na], dtype=dtype) + cdef ints_st[::1] c = C - _list_index_le(A, B, C) - return c - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -cdef inline void _list_index_le(const int[::1] a, const int[::1] b, int[::1] c) noexcept nogil: - cdef Py_ssize_t na = a.shape[0] - cdef Py_ssize_t nb = b.shape[0] - cdef Py_ssize_t ia, ib - cdef int ai, alast - cdef Py_ssize_t start = 0 + cdef ssize_st ia, ib + cdef ints_st ai, alast + cdef ssize_st start = 0 if na > 0: alast = a[0] @@ -743,6 +699,8 @@ cdef inline void _list_index_le(const int[::1] a, const int[::1] b, int[::1] c) alast = ai for ib in range(start, nb): if ai <= b[ib]: - c[ia] = ib + c[ia] = ib start = ib break + + return C diff --git a/src/sisl/_math_small.pyx b/src/sisl/_math_small.pyx index 5b8cd9b15a..c5e7b39e01 100644 --- a/src/sisl/_math_small.pyx +++ b/src/sisl/_math_small.pyx @@ -1,46 +1,51 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# file, you can obtain one at https://mozilla.org/MPL/2.0/. cimport cython -from libc.math cimport atan2, sqrt +from libc.math cimport atan2, atan2f, sqrt, sqrtf import numpy as np -# This enables Cython enhanced compatibilities +from numpy cimport dtype, ndarray -cimport numpy as np +from sisl._core._dtypes cimport floats_st, ssize_st, type2dtype @cython.boundscheck(False) @cython.wraparound(False) -def cross3(np.ndarray[np.float64_t, ndim=1, mode='c'] u, np.ndarray[np.float64_t, ndim=1, mode='c'] v): - cdef np.ndarray[np.float64_t, ndim=1, mode='c'] y = np.empty([3], dtype=np.float64) +def cross3(const floats_st[::1] u, const floats_st[::1] v): + cdef object dtyp = type2dtype[floats_st](1) + cdef ndarray[floats_st, mode='c'] Y = np.empty([3], dtype=dtyp) + cdef floats_st[::1] y = Y y[0] = u[1] * v[2] - u[2] * v[1] y[1] = u[2] * v[0] - u[0] * v[2] y[2] = u[0] * v[1] - u[1] * v[0] - return y + return Y @cython.boundscheck(False) @cython.wraparound(False) -def dot3(np.ndarray[np.float64_t, ndim=1, mode='c'] u, np.ndarray[np.float64_t, ndim=1, mode='c'] v): - return u[0] * v[0] + u[1] * v[1] + u[2] * v[2] +def dot3(const floats_st[::1] u, const floats_st[::1] v): + cdef floats_st r + r = u[0] * v[0] + u[1] * v[1] + u[2] * v[2] + return r @cython.boundscheck(False) @cython.wraparound(False) -def product3(np.ndarray[np.float64_t, ndim=1, mode='c'] v): - return v[0] * v[1] * v[2] +def product3(const floats_st[::1] v): + cdef floats_st r + r = v[0] * v[1] * v[2] + return r @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -def is_ascending(np.ndarray[np.float64_t, ndim=1, mode='c'] v): - cdef double[::1] V = v - cdef Py_ssize_t i - for i in range(1, V.shape[0]): - if V[i-1] > V[i]: +def is_ascending(const floats_st[::1] v): + cdef ssize_st i + for i in range(1, v.shape[0]): + if v[i-1] > v[i]: return 0 return 1 @@ -49,26 +54,37 @@ def is_ascending(np.ndarray[np.float64_t, ndim=1, mode='c'] v): @cython.wraparound(False) @cython.initializedcheck(False) @cython.cdivision(True) -def xyz_to_spherical_cos_phi(np.ndarray[np.float64_t, ndim=1, mode='c'] x, - np.ndarray[np.float64_t, ndim=1, mode='c'] y, - np.ndarray[np.float64_t, ndim=1, mode='c'] z): +def xyz_to_spherical_cos_phi(floats_st[::1] x, + floats_st[::1] y, + floats_st[::1] z): """ In x, y, z coordinates shifted to origo Returns x = R, y = theta, z = cos_phi """ - cdef double[::1] X = x - cdef double[::1] Y = y - cdef double[::1] Z = z - cdef Py_ssize_t i - cdef double R - for i in range(X.shape[0]): - # theta (radians) - R = sqrt(X[i] * X[i] + Y[i] * Y[i] + Z[i] * Z[i]) - Y[i] = atan2(Y[i], X[i]) - # Radius - X[i] = R - # cos(phi) - if R > 0.: - Z[i] = Z[i] / R - else: - Z[i] = 0. + cdef ssize_st i + cdef floats_st R + + if floats_st is cython.float: + for i in range(x.shape[0]): + # theta (radians) + R = sqrtf(x[i] * x[i] + y[i] * y[i] + z[i] * z[i]) + y[i] = atan2f(y[i], x[i]) + # Radius + x[i] = R + # cos(phi) + if R > 0.: + z[i] = z[i] / R + else: + z[i] = 0. + else: + for i in range(x.shape[0]): + # theta (radians) + R = sqrt(x[i] * x[i] + y[i] * y[i] + z[i] * z[i]) + y[i] = atan2(y[i], x[i]) + # Radius + x[i] = R + # cos(phi) + if R > 0.: + z[i] = z[i] / R + else: + z[i] = 0. diff --git a/src/sisl/physics/CMakeLists.txt b/src/sisl/physics/CMakeLists.txt index a144553ee0..e5b5f706fd 100644 --- a/src/sisl/physics/CMakeLists.txt +++ b/src/sisl/physics/CMakeLists.txt @@ -4,15 +4,13 @@ set_property(DIRECTORY APPEND PROPERTY INCLUDE_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/../_core ) -foreach(source +foreach(source _bloch _phase - _matrix_utils _matrix_k _matrix_dk _matrix_ddk - _matrix_phase _matrix_phase_nc_diag _matrix_phase_nc _matrix_phase_so - _matrix_phase3 _matrix_phase3_nc _matrix_phase3_so - _matrix_sc_phase _matrix_sc_phase_nc_diag _matrix_sc_phase_nc _matrix_sc_phase_so + _matrix_phase _matrix_phase_sc _matrix_phase3 ) add_cython_library( SOURCE ${source}.pyx diff --git a/src/sisl/physics/_matrix_ddk.pyx b/src/sisl/physics/_matrix_ddk.pyx index d1fa01cc41..fc83ac76df 100644 --- a/src/sisl/physics/_matrix_ddk.pyx +++ b/src/sisl/physics/_matrix_ddk.pyx @@ -2,26 +2,19 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. cimport cython -from libc.math cimport fabs import numpy as np - -cimport numpy as np +cimport numpy as cnp from ._common import comply_gauge +from sisl._core._dtypes cimport floats_st from ._matrix_phase3 import * -from ._matrix_phase3_nc import * -from ._matrix_phase3_so import * -from ._matrix_phase_nc_diag import * from ._phase import * -_dot = np.dot -_roll = np.roll - __all__ = ["matrix_ddk", "matrix_ddk_nc", "matrix_ddk_nc_diag", "matrix_ddk_so"] -def _phase_ddk(gauge, M, sc, np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype): +def _phase_ddk(gauge, M, sc, cnp.ndarray[floats_st] k, dtype): # dtype *must* be passed through phase_dtype gauge = comply_gauge(gauge) @@ -34,10 +27,10 @@ def _phase_ddk(gauge, M, sc, np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype # Rd = dx^2, dy^2, dz^2, dzy, dxz, dyx if gauge == 'cell': phases = phase_rsc(sc, k, dtype).reshape(-1, 1) - Rs = _dot(sc.sc_off, sc.cell) + Rs = np.dot(sc.sc_off, sc.cell) Rd = - (Rs * Rs * phases).astype(dtype, copy=False) - Ro = - (_roll(Rs, 1, axis=1) * phases).astype(dtype, copy=False) # z, x, y - Ro *= _roll(Rs, -1, axis=1) # y, z, x + Ro = - (np.roll(Rs, 1, axis=1) * phases).astype(dtype, copy=False) # z, x, y + Ro *= np.roll(Rs, -1, axis=1) # y, z, x del phases, Rs p_opt = 1 @@ -46,151 +39,73 @@ def _phase_ddk(gauge, M, sc, np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype rij = M.Rij()._csr._D phases = phase_rij(rij, sc, k, dtype).reshape(-1, 1) Rd = - (rij * rij * phases).astype(dtype, copy=False) - Ro = - (_roll(rij, 1, axis=1) * phases).astype(dtype, copy=False) # z, x, y - Ro *= _roll(rij, -1, axis=1) # y, z, x + Ro = - (np.roll(rij, 1, axis=1) * phases).astype(dtype, copy=False) # z, x, y + Ro *= np.roll(rij, -1, axis=1) # y, z, x del rij, phases - p_opt = 1 + p_opt = 0 return p_opt, Rd, Ro -def matrix_ddk(gauge, M, const int idx, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): +def matrix_ddk(gauge, M, const int idx, sc, cnp.ndarray[floats_st] k, dtype, format): dtype = phase_dtype(k, M.dtype, dtype) p_opt, Rd, Ro = _phase_ddk(gauge, M, sc, k, dtype) - return _matrix_ddk(M._csr, idx, Rd, Ro, dtype, format, p_opt) - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_ddk(csr, const int idx, Rd, Ro, dtype, format, p_opt): # Return list dd = [None, None, None, None, None, None] - if dtype == np.complex128: - - if format in ("array", "matrix", "dense"): - dd[:3] = _phase3_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rd, p_opt) - dd[3:] = _phase3_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ro, p_opt) - - else: - # Default must be something else. - dd[:3] = _phase3_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rd, p_opt) - dd[3:] = _phase3_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ro, p_opt) - dd[0] = dd[0].asformat(format) - dd[1] = dd[1].asformat(format) - dd[2] = dd[2].asformat(format) - dd[3] = dd[3].asformat(format) - dd[4] = dd[4].asformat(format) - dd[5] = dd[5].asformat(format) - - elif dtype == np.float64: - if format in ("array", "matrix", "dense"): - dd[:3] = _phase3_array_f64(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rd, p_opt) - dd[3:] = _phase3_array_f64(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ro, p_opt) - else: - dd[:3] = _phase3_csr_f64(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rd, p_opt) - dd[3:] = _phase3_csr_f64(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ro, p_opt) - dd[0] = dd[0].asformat(format) - dd[1] = dd[1].asformat(format) - dd[2] = dd[2].asformat(format) - dd[3] = dd[3].asformat(format) - dd[4] = dd[4].asformat(format) - dd[5] = dd[5].asformat(format) - - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - dd[:3] = _phase3_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rd, p_opt) - dd[3:] = _phase3_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ro, p_opt) - else: - dd[:3] = _phase3_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rd, p_opt) - dd[3:] = _phase3_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ro, p_opt) - dd[0] = dd[0].asformat(format) - dd[1] = dd[1].asformat(format) - dd[2] = dd[2].asformat(format) - dd[3] = dd[3].asformat(format) - dd[4] = dd[4].asformat(format) - dd[5] = dd[5].asformat(format) - - elif dtype == np.float32: - if format in ("array", "matrix", "dense"): - dd[:3] = _phase3_array_f32(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rd, p_opt) - dd[3:] = _phase3_array_f32(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ro, p_opt) - else: - dd[:3] = _phase3_csr_f32(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rd, p_opt) - dd[3:] = _phase3_csr_f32(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ro, p_opt) - dd[0] = dd[0].asformat(format) - dd[1] = dd[1].asformat(format) - dd[2] = dd[2].asformat(format) - dd[3] = dd[3].asformat(format) - dd[4] = dd[4].asformat(format) - dd[5] = dd[5].asformat(format) + csr = M._csr + + if format in ("array", "matrix", "dense"): + dd[:3] = _phase3_array(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rd, p_opt) + dd[3:] = _phase3_array(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ro, p_opt) else: - raise ValueError("matrix_ddk: currently only supports dtype in [float32, float64, complex64, complex128].") + # Default must be something else. + dd[:3] = _phase3_csr(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rd, p_opt) + dd[3:] = _phase3_csr(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ro, p_opt) + dd[0] = dd[0].asformat(format) + dd[1] = dd[1].asformat(format) + dd[2] = dd[2].asformat(format) + dd[3] = dd[3].asformat(format) + dd[4] = dd[4].asformat(format) + dd[5] = dd[5].asformat(format) return dd -def matrix_ddk_nc(gauge, M, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): +def matrix_ddk_nc(gauge, M, sc, cnp.ndarray[floats_st] k, dtype, format): dtype = phase_dtype(k, M.dtype, dtype, True) p_opt, Rd, Ro = _phase_ddk(gauge, M, sc, k, dtype) - return _matrix_ddk_nc(M._csr, Rd, Ro, dtype, format, p_opt) - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_ddk_nc(csr, Rd, Ro, dtype, format, p_opt): # Return list dd = [None, None, None, None, None, None] - if dtype == np.complex128: - - if format in ("array", "matrix", "dense"): - dd[:3] = _phase3_nc_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) - dd[3:] = _phase3_nc_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) - - else: - # Default must be something else. - dd[:3] = _phase3_nc_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) - dd[3:] = _phase3_nc_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) - dd[0] = dd[0].asformat(format) - dd[1] = dd[1].asformat(format) - dd[2] = dd[2].asformat(format) - dd[3] = dd[3].asformat(format) - dd[4] = dd[4].asformat(format) - dd[5] = dd[5].asformat(format) - - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - dd[:3] = _phase3_nc_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) - dd[3:] = _phase3_nc_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) - else: - dd[:3] = _phase3_nc_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) - dd[3:] = _phase3_nc_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) - dd[0] = dd[0].asformat(format) - dd[1] = dd[1].asformat(format) - dd[2] = dd[2].asformat(format) - dd[3] = dd[3].asformat(format) - dd[4] = dd[4].asformat(format) - dd[5] = dd[5].asformat(format) + csr = M._csr + + if format in ("array", "matrix", "dense"): + dd[:3] = _phase3_array_nc(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) + dd[3:] = _phase3_array_nc(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) else: - raise ValueError("matrix_ddk_nc: currently only supports dtype in [complex64, complex128].") + # Default must be something else. + dd[:3] = _phase3_csr_nc(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) + dd[3:] = _phase3_csr_nc(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) + dd[0] = dd[0].asformat(format) + dd[1] = dd[1].asformat(format) + dd[2] = dd[2].asformat(format) + dd[3] = dd[3].asformat(format) + dd[4] = dd[4].asformat(format) + dd[5] = dd[5].asformat(format) return dd -def matrix_ddk_nc_diag(gauge, M, const int idx, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): +def matrix_ddk_nc_diag(gauge, M, const int idx, sc, cnp.ndarray[floats_st] k, dtype, format): dtype = phase_dtype(k, M.dtype, dtype, True) p_opt, Rd, Ro = _phase_ddk(gauge, M, sc, k, dtype) + # We need the phases to be consecutive in memory Rxx = Rd[:, 0].copy() Ryy = Rd[:, 1].copy() Rzz = Rd[:, 2].copy() @@ -200,84 +115,49 @@ def matrix_ddk_nc_diag(gauge, M, const int idx, sc, Ryx = Ro[:, 2].copy() del Ro - # Get each of them - dxx = _matrix_ddk_nc_diag(M._csr, idx, Rxx, dtype, format, p_opt) - dyy = _matrix_ddk_nc_diag(M._csr, idx, Ryy, dtype, format, p_opt) - dzz = _matrix_ddk_nc_diag(M._csr, idx, Rzz, dtype, format, p_opt) - dzy = _matrix_ddk_nc_diag(M._csr, idx, Rzy, dtype, format, p_opt) - dxz = _matrix_ddk_nc_diag(M._csr, idx, Rxz, dtype, format, p_opt) - dyx = _matrix_ddk_nc_diag(M._csr, idx, Ryx, dtype, format, p_opt) - return dxx, dyy, dzz, dzy, dxz, dyx + csr = M._csr + if format in ("array", "matrix", "dense"): + dxx = _phase_array_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rxx, p_opt) + dyy = _phase_array_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ryy, p_opt) + dzz = _phase_array_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rzz, p_opt) + dzy = _phase_array_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rzy, p_opt) + dxz = _phase_array_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rxz, p_opt) + dyx = _phase_array_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ryx, p_opt) -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_ddk_nc_diag(csr, const int idx, phases, dtype, format, p_opt): - - if dtype == np.complex128: - - if format in ("array", "matrix", "dense"): - return _phase_nc_diag_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt) - - # Default must be something else. - return _phase_nc_diag_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt).asformat(format) - - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - return _phase_nc_diag_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt) - return _phase_nc_diag_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt).asformat(format) + else: + dxx = _phase_csr_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rxx, p_opt).asformat(format) + dyy = _phase_csr_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ryy, p_opt).asformat(format) + dzz = _phase_csr_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rzz, p_opt).asformat(format) + dzy = _phase_csr_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rzy, p_opt).asformat(format) + dxz = _phase_csr_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rxz, p_opt).asformat(format) + dyx = _phase_csr_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ryx, p_opt).asformat(format) - raise ValueError("matrix_ddk_nc_diag: only supports dtype in [complex64, complex128].") + return dxx, dyy, dzz, dzy, dxz, dyx -def matrix_ddk_so(gauge, M, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): +def matrix_ddk_so(gauge, M, sc, cnp.ndarray[floats_st] k, dtype, format): dtype = phase_dtype(k, M.dtype, dtype, True) p_opt, Rd, Ro = _phase_ddk(gauge, M, sc, k, dtype) - return _matrix_ddk_so(M._csr, Rd, Ro, dtype, format, p_opt) - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_ddk_so(csr, Rd, Ro, dtype, format, p_opt): # Return list dd = [None, None, None, None, None, None] - if dtype == np.complex128: - - if format in ("array", "matrix", "dense"): - dd[:3] = _phase3_so_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) - dd[3:] = _phase3_so_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) - - else: - # Default must be something else. - dd[:3] = _phase3_so_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) - dd[3:] = _phase3_so_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) - dd[0] = dd[0].asformat(format) - dd[1] = dd[1].asformat(format) - dd[2] = dd[2].asformat(format) - dd[3] = dd[3].asformat(format) - dd[4] = dd[4].asformat(format) - dd[5] = dd[5].asformat(format) - - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - dd[:3] = _phase3_so_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) - dd[3:] = _phase3_so_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) - else: - dd[:3] = _phase3_so_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) - dd[3:] = _phase3_so_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) - dd[0] = dd[0].asformat(format) - dd[1] = dd[1].asformat(format) - dd[2] = dd[2].asformat(format) - dd[3] = dd[3].asformat(format) - dd[4] = dd[4].asformat(format) - dd[5] = dd[5].asformat(format) + csr = M._csr + + if format in ("array", "matrix", "dense"): + dd[:3] = _phase3_array_so(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) + dd[3:] = _phase3_array_so(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) else: - raise ValueError("matrix_ddk_so: currently only supports dtype in [complex64, complex128].") + # Default must be something else. + dd[:3] = _phase3_csr_so(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) + dd[3:] = _phase3_csr_so(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) + dd[0] = dd[0].asformat(format) + dd[1] = dd[1].asformat(format) + dd[2] = dd[2].asformat(format) + dd[3] = dd[3].asformat(format) + dd[4] = dd[4].asformat(format) + dd[5] = dd[5].asformat(format) return dd diff --git a/src/sisl/physics/_matrix_dk.pyx b/src/sisl/physics/_matrix_dk.pyx index 0523e7a8ba..1810c70093 100644 --- a/src/sisl/physics/_matrix_dk.pyx +++ b/src/sisl/physics/_matrix_dk.pyx @@ -2,25 +2,22 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. cimport cython -from libc.math cimport fabs import numpy as np -cimport numpy as np +cimport numpy as cnp + +from sisl._core._dtypes cimport floats_st from ._common import comply_gauge +from ._matrix_phase import * from ._matrix_phase3 import * -from ._matrix_phase3_nc import * -from ._matrix_phase3_so import * -from ._matrix_phase_nc_diag import * from ._phase import * -_dot = np.dot - __all__ = ["matrix_dk", "matrik_dk_nc", "matrik_dk_nc_diag", "matrik_dk_so"] -def _phase_dk(gauge, M, sc, np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype): +def _phase_dk(gauge, M, sc, cnp.ndarray[floats_st] k, dtype): # dtype *must* be passed through phase_dtype gauge = comply_gauge(gauge) @@ -29,7 +26,7 @@ def _phase_dk(gauge, M, sc, np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype) # i R if gauge == 'cell': iRs = phase_rsc(sc, k, dtype).reshape(-1, 1) - iRs = (1j * _dot(sc.sc_off, sc.cell) * iRs).astype(dtype, copy=False) + iRs = (1j * np.dot(sc.sc_off, sc.cell) * iRs).astype(dtype, copy=False) p_opt = 1 elif gauge == 'atom': @@ -42,68 +39,35 @@ def _phase_dk(gauge, M, sc, np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype) return p_opt, iRs -def matrix_dk(gauge, M, const int idx, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): +def matrix_dk(gauge, M, const int idx, sc, cnp.ndarray[floats_st] k, dtype, format): dtype = phase_dtype(k, M.dtype, dtype, True) p_opt, iRs = _phase_dk(gauge, M, sc, k, dtype) - return _matrix_dk(M._csr, idx, iRs, dtype, format, p_opt) - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_dk(csr, const int idx, iRs, dtype, format, p_opt): - - if dtype == np.complex128: - if format in ("array", "matrix", "dense"): - return _phase3_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, iRs, p_opt) + csr = M._csr - # Default must be something else. - d1, d2, d3 = _phase3_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, iRs, p_opt) - return d1.asformat(format), d2.asformat(format), d3.asformat(format) + if format in ("array", "matrix", "dense"): + return _phase3_array(csr.ptr, csr.ncol, csr.col, csr._D, idx, iRs, p_opt) - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - return _phase3_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, iRs, p_opt) - d1, d2, d3 = _phase3_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, iRs, p_opt) - return d1.asformat(format), d2.asformat(format), d3.asformat(format) + # Default must be something else. + d1, d2, d3 = _phase3_csr(csr.ptr, csr.ncol, csr.col, csr._D, idx, iRs, p_opt) + return d1.asformat(format), d2.asformat(format), d3.asformat(format) - raise ValueError("matrix_dk: currently only supports dtype in [complex64, complex128].") - -def matrix_dk_nc(gauge, M, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): +def matrix_dk_nc(gauge, M, sc, cnp.ndarray[floats_st] k, dtype, format): dtype = phase_dtype(k, M.dtype, dtype, True) p_opt, iRs = _phase_dk(gauge, M, sc, k, dtype) - return _matrix_dk_nc(M._csr, iRs, dtype, format, p_opt) - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_dk_nc(csr, iRs, dtype, format, p_opt): - - if dtype == np.complex128: - - if format in ("array", "matrix", "dense"): - return _phase3_nc_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) - # Default must be something else. - d1, d2, d3 = _phase3_nc_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) - return d1.asformat(format), d2.asformat(format), d3.asformat(format) + csr = M._csr - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - return _phase3_nc_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) - d1, d2, d3 = _phase3_nc_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) - return d1.asformat(format), d2.asformat(format), d3.asformat(format) + if format in ("array", "matrix", "dense"): + return _phase3_array_nc(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) - raise ValueError("matrix_dk_nc: currently only supports dtype in [complex64, complex128].") + # Default must be something else. + d1, d2, d3 = _phase3_csr_nc(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) + return d1.asformat(format), d2.asformat(format), d3.asformat(format) -def matrix_dk_nc_diag(gauge, M, const int idx, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): +def matrix_dk_nc_diag(gauge, M, const int idx, sc, cnp.ndarray[floats_st] k, dtype, format): dtype = phase_dtype(k, M.dtype, dtype, True) p_opt, iRs = _phase_dk(gauge, M, sc, k, dtype) @@ -112,59 +76,30 @@ def matrix_dk_nc_diag(gauge, M, const int idx, sc, phz = iRs[:, 2].copy() del iRs - # Get each of them - x = _matrix_dk_nc_diag(M._csr, idx, phx, dtype, format, p_opt) - y = _matrix_dk_nc_diag(M._csr, idx, phy, dtype, format, p_opt) - z = _matrix_dk_nc_diag(M._csr, idx, phz, dtype, format, p_opt) - return x, y, z - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_dk_nc_diag(csr, const int idx, phases, dtype, format, p_opt): - - if dtype == np.complex128: - - if format in ("array", "matrix", "dense"): - return _phase_nc_diag_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt) + csr = M._csr - # Default must be something else. - return _phase_nc_diag_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt).asformat(format) + if format in ("array", "matrix", "dense"): + x = _phase_array_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, phx, p_opt) + y = _phase_array_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, phy, p_opt) + z = _phase_array_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, phz, p_opt) - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - return _phase_nc_diag_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt) - return _phase_nc_diag_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt).asformat(format) + else: + x = _phase_csr_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, phx, p_opt).asformat(format) + y = _phase_csr_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, phy, p_opt).asformat(format) + z = _phase_csr_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, phz, p_opt).asformat(format) - raise ValueError("matrix_dk_nc_diag: only supports dtype in [complex64, complex128].") + return x, y, z -def matrix_dk_so(gauge, M, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): +def matrix_dk_so(gauge, M, sc, cnp.ndarray[floats_st] k, dtype, format): dtype = phase_dtype(k, M.dtype, dtype, True) p_opt, iRs = _phase_dk(gauge, M, sc, k, dtype) - return _matrix_dk_so(M._csr, iRs, dtype, format, p_opt) - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_dk_so(csr, iRs, dtype, format, p_opt): - - if dtype == np.complex128: - - if format in ("array", "matrix", "dense"): - return _phase3_so_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) - # Default must be something else. - d1, d2, d3 = _phase3_so_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) - return d1.asformat(format), d2.asformat(format), d3.asformat(format) + csr = M._csr - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - return _phase3_so_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) - d1, d2, d3 = _phase3_so_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) - return d1.asformat(format), d2.asformat(format), d3.asformat(format) + if format in ("array", "matrix", "dense"): + return _phase3_array_so(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) - raise ValueError("matrix_dk_so: currently only supports dtype in [complex64, complex128].") + # Default must be something else. + d1, d2, d3 = _phase3_csr_so(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) + return d1.asformat(format), d2.asformat(format), d3.asformat(format) diff --git a/src/sisl/physics/_matrix_k.pyx b/src/sisl/physics/_matrix_k.pyx index be8b80a451..b0ebd7cc5c 100644 --- a/src/sisl/physics/_matrix_k.pyx +++ b/src/sisl/physics/_matrix_k.pyx @@ -4,38 +4,46 @@ cimport cython import numpy as np +cimport numpy as cnp -cimport numpy as np - +from sisl._core._dtypes cimport floats_st from ._common import comply_gauge from ._matrix_phase import * -from ._matrix_phase_nc import * -from ._matrix_phase_nc_diag import * -from ._matrix_phase_so import * -from ._matrix_sc_phase import * -from ._matrix_sc_phase_nc import * -from ._matrix_sc_phase_nc_diag import * -from ._matrix_sc_phase_so import * +from ._matrix_phase_sc import * from ._phase import * +from ._phase cimport is_gamma __all__ = ["matrix_k", "matrix_k_nc", "matrix_k_so", "matrix_k_nc_diag"] -def matrix_k(gauge, M, const int idx, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): - dtype = phase_dtype(k, M.dtype, dtype) +def _phase_k(gauge, M, sc, cnp.ndarray[floats_st] K, dtype): + cdef floats_st[::1] k = K + + # dtype *must* be passed through phase_dtype gauge = comply_gauge(gauge) - if gauge == 'cell': - phases = phase_rsc(sc, k, dtype) - p_opt = 1 + if is_gamma(k): + # no - phases required + p_opt = -1 + phases = np.empty([0], dtype=dtype) - elif gauge == 'atom': + elif gauge == "atom": M.finalize() phases = phase_rij(M.Rij()._csr._D, sc, k, dtype) p_opt = 0 + + elif gauge == "cell": + phases = phase_rsc(sc, k, dtype) + p_opt = 1 + else: - raise ValueError("matrix_k: gauge must be in [cell, atom]") + raise ValueError("phase_k: gauge must be in [cell, atom]") + + return p_opt, phases + +def matrix_k(gauge, M, const int idx, sc, cnp.ndarray[floats_st] k, dtype, format): + dtype = phase_dtype(k, M.dtype, dtype) + p_opt, phases = _phase_k(gauge, M, sc, k, dtype) # Check that the dimension *works* if idx < 0: @@ -44,83 +52,32 @@ def matrix_k(gauge, M, const int idx, sc, d = M.shape[-1] raise ValueError(f"matrix_k: unknown index specification {idx} must be in 0:{d}") + csr = M._csr + if format.startswith("sc:") or format == "sc": if format == "sc": format = "csr" else: format = format[3:] nc = M.geometry.no_s - return _matrix_sc_k(M._csr, nc, idx, phases, dtype, format, p_opt) - - return _matrix_k(M._csr, idx, phases, dtype, format, p_opt) - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_k(csr, const int idx, phases, dtype, format, p_opt): - - if dtype == np.complex128: if format in ("array", "matrix", "dense"): - return _phase_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt) + return _phase_sc_array(csr.ptr, csr.ncol, csr.col, nc, csr._D, idx, phases, p_opt) - # Default must be something else. - return _phase_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt).asformat(format) + return _phase_sc_csr(csr.ptr, csr.ncol, csr.col, nc, csr._D, idx, phases, p_opt).asformat(format) - elif dtype == np.float64: - if format in ("array", "matrix", "dense"): - return _array_f64(csr.ptr, csr.ncol, csr.col, csr._D, idx) - return _csr_f64(csr.ptr, csr.ncol, csr.col, csr._D, idx).asformat(format) - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - return _phase_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt) - return _phase_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt).asformat(format) + if format in ("array", "matrix", "dense"): + return _phase_array(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt) - elif dtype == np.float32: - if format in ("array", "matrix", "dense"): - return _array_f32(csr.ptr, csr.ncol, csr.col, csr._D, idx) - return _csr_f32(csr.ptr, csr.ncol, csr.col, csr._D, idx).asformat(format) - - raise ValueError("matrix_k: currently only supports dtype in [float32, float64, complex64, complex128].") + return _phase_csr(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt).asformat(format) -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_sc_k(csr, const int nc, const int idx, phases, dtype, format, p_opt): - if dtype == np.complex128: - if format in ("array", "matrix", "dense"): - return _sc_phase_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, nc, idx, phases, p_opt) - return _sc_phase_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, nc, idx, phases, p_opt).asformat(format) - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - return _sc_phase_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, nc, idx, phases, p_opt) - return _sc_phase_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, nc, idx, phases, p_opt).asformat(format) - elif dtype in (np.float32, np.float64): - # direct conversion, should be simple (generally only at Gamma-point) - m = csr.tocsr(idx) - if format in ("array", "matrix", "dense"): - return m.toarray() - return m - - raise ValueError("matrix_k: (supercell format) currently only supports dtype in [float32, float64, complex64, complex128].") - - -def matrix_k_nc(gauge, M, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): +def matrix_k_nc(gauge, M, sc, cnp.ndarray[floats_st] k, dtype, format): dtype = phase_dtype(k, M.dtype, dtype, True) - gauge = comply_gauge(gauge) - if gauge == 'cell': - phases = phase_rsc(sc, k, dtype) - p_opt = 1 - elif gauge == 'atom': - M.finalize() - phases = phase_rij(M.Rij()._csr._D, sc, k, dtype) - p_opt = 0 - else: - raise ValueError("matrix_k_nc: gauge must be in [cell, atom]") + p_opt, phases = _phase_k(gauge, M, sc, k, dtype) + + csr = M._csr if format.startswith("sc:") or format == "sc": if format == "sc": @@ -128,165 +85,61 @@ def matrix_k_nc(gauge, M, sc, else: format = format[3:] nc = M.geometry.no_s - return _matrix_sc_k_nc(M._csr, nc, phases, dtype, format, p_opt) - return _matrix_k_nc(M._csr, phases, dtype, format, p_opt) - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_k_nc(csr, phases, dtype, format, p_opt): - if csr.shape[2] < 4: - raise ValueError("matrix_k_nc requires input matrix to have 4 components") - - if dtype == np.complex128: - if format in ("array", "matrix", "dense"): - return _phase_nc_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt) - return _phase_nc_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt).asformat(format) - elif dtype == np.complex64: if format in ("array", "matrix", "dense"): - return _phase_nc_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt) - return _phase_nc_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt).asformat(format) - - raise ValueError("matrix_k_nc: only supports dtype in [complex64, complex128].") + return _phase_sc_array_nc(csr.ptr, csr.ncol, csr.col, nc, csr._D, phases, p_opt) + return _phase_sc_csr_nc(csr.ptr, csr.ncol, csr.col, nc, csr._D, phases, p_opt).asformat(format) -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_sc_k_nc(csr, nc, phases, dtype, format, p_opt): + if format in ("array", "matrix", "dense"): + return _phase_array_nc(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt) - if csr.shape[2] < 4: - raise ValueError("matrix_k_nc: (supercell format) requires input matrix to have 4 components") + return _phase_csr_nc(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt).asformat(format) - if dtype == np.complex128: - if format in ("array", "matrix", "dense"): - return _sc_phase_nc_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, nc, phases, p_opt) - return _sc_phase_nc_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, nc, phases, p_opt).asformat(format) - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - return _sc_phase_nc_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, nc, phases, p_opt) - return _sc_phase_nc_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, nc, phases, p_opt).asformat(format) - raise ValueError("matrix_k_nc: (supercell format) only supports dtype in [complex64, complex128].") +def matrix_k_nc_diag(gauge, M, const int idx, sc, cnp.ndarray[floats_st] k, dtype, format): + dtype = phase_dtype(k, M.dtype, dtype, True) + p_opt, phases = _phase_k(gauge, M, sc, k, dtype) + csr = M._csr -def matrix_k_so(gauge, M, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): - dtype = phase_dtype(k, M.dtype, dtype, True) - gauge = comply_gauge(gauge) - if gauge == 'cell': - phases = phase_rsc(sc, k, dtype) - p_opt = 1 - elif gauge == 'atom': - M.finalize() - phases = phase_rij(M.Rij()._csr._D, sc, k, dtype) - p_opt = 0 - else: - raise ValueError("matrix_k_so: gauge must be in [r, R]") if format.startswith("sc:") or format == "sc": if format == "sc": format = "csr" else: format = format[3:] nc = M.geometry.no_s - return _matrix_sc_k_so(M._csr, nc, phases, dtype, format, p_opt) - return _matrix_k_so(M._csr, phases, dtype, format, p_opt) - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_k_so(csr, phases, dtype, format, p_opt): - - if csr.shape[2] < 8: - raise ValueError("matrix_k_so requires input matrix to have 8 components") - - if dtype == np.complex128: if format in ("array", "matrix", "dense"): - return _phase_so_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt) - return _phase_so_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt).asformat(format) - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - return _phase_so_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt) - return _phase_so_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt).asformat(format) + return _phase_sc_array_nc_diag(csr.ptr, csr.ncol, csr.col, nc, csr._D, idx, phases, p_opt) - raise ValueError("matrix_k_so: only supports dtype in [complex64, complex128].") + return _phase_sc_csr_nc_diag(csr.ptr, csr.ncol, csr.col, nc, csr._D, idx, phases, p_opt).asformat(format) + if format in ("array", "matrix", "dense"): + return _phase_array_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt) -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_sc_k_so(csr, nc, phases, dtype, format, p_opt): + return _phase_csr_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt).asformat(format) - if csr.shape[2] < 8: - raise ValueError("matrix_k_so: (supercell format) requires input matrix to have 8 components") - if dtype == np.complex128: - if format in ("array", "matrix", "dense"): - return _sc_phase_so_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, nc, phases, p_opt) - return _sc_phase_so_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, nc, phases, p_opt).asformat(format) - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - return _sc_phase_so_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, nc, phases, p_opt) - return _sc_phase_so_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, nc, phases, p_opt).asformat(format) - - raise ValueError("matrix_k_so: (supercell format) only supports dtype in [complex64, complex128].") +def matrix_k_so(gauge, M, sc, cnp.ndarray[floats_st] k, dtype, format): + dtype = phase_dtype(k, M.dtype, dtype, True) + p_opt, phases = _phase_k(gauge, M, sc, k, dtype) + csr = M._csr -def matrix_k_nc_diag(gauge, M, const int idx, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): - dtype = phase_dtype(k, M.dtype, dtype, True) - gauge = comply_gauge(gauge) - if gauge == 'cell': - phases = phase_rsc(sc, k, dtype) - p_opt = 1 - elif gauge == 'atom': - M.finalize() - phases = phase_rij(M.Rij()._csr._D, sc, k, dtype) - p_opt = 0 - else: - raise ValueError("matrix_k_nc_diag: gauge must be in [r, R]") if format.startswith("sc:") or format == "sc": if format == "sc": format = "csr" else: format = format[3:] nc = M.geometry.no_s - return _matrix_sc_k_nc_diag(M._csr, nc, idx, phases, dtype, format, p_opt) - return _matrix_k_nc_diag(M._csr, idx, phases, dtype, format, p_opt) - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_k_nc_diag(csr, const int idx, phases, dtype, format, p_opt): - - if dtype == np.complex128: - if format in ("array", "matrix", "dense"): - return _phase_nc_diag_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt) - return _phase_nc_diag_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt).asformat(format) - elif dtype == np.complex64: if format in ("array", "matrix", "dense"): - return _phase_nc_diag_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt) - return _phase_nc_diag_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt).asformat(format) + return _phase_sc_array_so(csr.ptr, csr.ncol, csr.col, nc, csr._D, phases, p_opt) - raise ValueError("matrix_k_nc_diag: only supports dtype in [complex64, complex128].") + return _phase_sc_csr_so(csr.ptr, csr.ncol, csr.col, nc, csr._D, phases, p_opt).asformat(format) + if format in ("array", "matrix", "dense"): + return _phase_array_so(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt) -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_sc_k_nc_diag(csr, const int nc, const int idx, phases, dtype, format, p_opt): - - if dtype == np.complex128: - if format in ("array", "matrix", "dense"): - return _sc_phase_nc_diag_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, nc, idx, phases, p_opt) - return _sc_phase_nc_diag_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, nc, idx, phases, p_opt).asformat(format) - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - return _sc_phase_nc_diag_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, nc, idx, phases, p_opt) - return _sc_phase_nc_diag_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, nc, idx, phases, p_opt).asformat(format) - - raise ValueError("matrix_k_nc_diag: (supercell format) only supports dtype in [complex64, complex128].") + return _phase_csr_so(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt).asformat(format) diff --git a/src/sisl/physics/_matrix_phase.pyx b/src/sisl/physics/_matrix_phase.pyx index ac5bf3832a..a554e726ee 100644 --- a/src/sisl/physics/_matrix_phase.pyx +++ b/src/sisl/physics/_matrix_phase.pyx @@ -1,276 +1,582 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True cimport cython import numpy as np -cimport numpy as np -from scipy.sparse import csr_matrix - -from sisl._indices cimport _index_sorted -from sisl._core._sparse import fold_csr_matrix - -__all__ = ['_csr_f32', '_csr_f64', '_phase_csr_c64', '_phase_csr_c128', - '_array_f32', '_array_f64', '_phase_array_c64', '_phase_array_c128'] - -# The fused data-types forces the data input to be of "correct" values. -ctypedef fused numeric_real: - float - double - -ctypedef fused numeric_complex: - float - double - float complex - double complex - - -def _csr_f32(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_real[:, ::1] D, const int idx): - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef Py_ssize_t nr = v_ncol.shape[0] - cdef np.ndarray[np.float32_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.float32) - cdef float[::1] v = V - cdef Py_ssize_t r, ind, s_idx - cdef int c - - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - v[v_ptr[r] + s_idx] += D[ind, idx] - - return csr_matrix((V, V_COL, V_PTR), shape=(nr, nr)) +cimport numpy as cnp +from scipy.sparse import csr_matrix -def _csr_f64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_real[:, ::1] D, const int idx): +from sisl._indices cimport _index_sorted - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL +from sisl._core._sparse import ( + fold_csr_matrix, + fold_csr_matrix_nc, + fold_csr_matrix_nc_diag, +) + +from sisl._core._dtypes cimport ( + complexs_st, + floatcomplexs_st, + floats_st, + ints_st, + numerics_st, + ssize_st, + type2dtype, +) + +__all__ = [ + "_phase_csr", + "_phase_array", + "_phase_csr_nc", + "_phase_array_nc", + "_phase_csr_nc_diag", + "_phase_array_nc_diag", + "_phase_csr_so", + "_phase_array_so", +] + +""" +In this Cython code we use `p_opt` to signal whether the resulting +matrices will use the phases variable. + +There are 3 cases: + +p_opt == -1: + no phases are added, the `phases` array will not be accessed +p_opt == 0: + the phases are *per* spares index, i.e. the array is as big + as the sparse data. +p_opt == 1: + the phases are in reduced format where each column block + uses the same phase. A column block is defined as `col[ind] / nr` which + results in a unique index. +""" + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_csr(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + const int idx, + floatcomplexs_st[::1] phases, + const int p_opt): # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef Py_ssize_t nr = v_ncol.shape[0] - cdef np.ndarray[np.float64_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.float64) - cdef double[::1] v = V - cdef Py_ssize_t r, ind, s_idx - cdef int c - - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - v[v_ptr[r] + s_idx] += D[ind, idx] + V_PTR, V_NCOL, V_COL = fold_csr_matrix(ptr, ncol, col) + cdef ints_st[::1] v_ptr = V_PTR + cdef ints_st[::1] v_ncol = V_NCOL + cdef ints_st[::1] v_col = V_COL + cdef ints_st[::1] tmp + + # This may fail, when numerics_st is complex, but floatcomplexs_st is float + cdef object dtype = type2dtype[floatcomplexs_st](1) + cdef cnp.ndarray[floatcomplexs_st, mode='c'] V = np.zeros([v_col.shape[0]], dtype=dtype) + cdef floatcomplexs_st[::1] v = V + + # Local columns + cdef ints_st nr = ncol.shape[0] + cdef ints_st r, ind, s, s_idx, c + + with nogil: + if p_opt == -1: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] % nr + + tmp = v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]] + s_idx = _index_sorted(tmp, c) + v[v_ptr[r] + s_idx] += D[ind, idx] + + elif p_opt == 0: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] % nr + + tmp = v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]] + s_idx = _index_sorted(tmp, c) + v[v_ptr[r] + s_idx] += (D[ind, idx] * phases[ind]) + + else: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] % nr + s = col[ind] / nr + + tmp = v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]] + s_idx = _index_sorted(tmp, c) + v[v_ptr[r] + s_idx] += (D[ind, idx] * phases[s]) return csr_matrix((V, V_COL, V_PTR), shape=(nr, nr)) -def _phase_csr_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef Py_ssize_t nr = v_ncol.shape[0] - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef float complex[::1] v = V - cdef Py_ssize_t r, ind, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - v[v_ptr[r] + s_idx] += D[ind, idx] * phases[ind] - else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - v[v_ptr[r] + s_idx] += D[ind, idx] * phases[col[ind] / nr] - - return csr_matrix((V, V_COL, V_PTR), shape=(nr, nr)) +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_array(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + const int idx, + floatcomplexs_st[::1] phases, + const int p_opt): + + cdef ints_st[::1] tmp + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[floatcomplexs_st](1) + cdef cnp.ndarray[floatcomplexs_st, ndim=2, mode='c'] V = np.zeros([nr, nr], dtype=dtype) + cdef floatcomplexs_st[:, ::1] v = V + + # Local columns + cdef ints_st r, ind, s, c + + with nogil: + if p_opt == -1: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] % nr + v[r, c] += D[ind, idx] + + elif p_opt == 0: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] % nr + v[r, c] += (D[ind, idx] * phases[ind]) + + else: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] % nr + s = col[ind] / nr + v[r, c] += (D[ind, idx] * phases[s]) + return V -def _phase_csr_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_csr_nc_diag(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + const int idx, + complexs_st[::1] phases, + const int p_opt): # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef Py_ssize_t nr = v_ncol.shape[0] - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef double complex[::1] v = V - cdef Py_ssize_t r, ind, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - v[v_ptr[r] + s_idx] += D[ind, idx] * phases[ind] - else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - v[v_ptr[r] + s_idx] += D[ind, idx] * phases[col[ind] / nr] - + V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc_diag(ptr, ncol, col) + cdef ints_st[::1] v_ptr = V_PTR + cdef ints_st[::1] v_ncol = V_NCOL + cdef ints_st[::1] v_col = V_COL + cdef ints_st[::1] tmp + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, mode='c'] V = np.zeros([v_col.shape[0]], dtype=dtype) + cdef complexs_st[::1] v = V + + # Local columns + cdef ints_st nr = ncol.shape[0] + cdef ints_st r, rr, ind, s, s_idx, c + + cdef complexs_st d + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + + tmp = v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]] + s_idx = _index_sorted(tmp, c) + + d = D[ind, idx] + v[v_ptr[rr] + s_idx] += d + v[v_ptr[rr+1] + s_idx] += d + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + + tmp = v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]] + s_idx = _index_sorted(tmp, c) + + d = (phases[ind] * D[ind, idx]) + v[v_ptr[rr] + s_idx] += d + v[v_ptr[rr+1] + s_idx] += d + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s = col[ind] / nr + + tmp = v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]] + s_idx = _index_sorted(tmp, c) + + d = (phases[s] * D[ind, idx]) + v[v_ptr[rr] + s_idx] += d + v[v_ptr[rr+1] + s_idx] += d + + nr = nr * 2 return csr_matrix((V, V_COL, V_PTR), shape=(nr, nr)) -def _array_f32(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_real[:, ::1] D, const int idx): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.float32_t, ndim=2, mode='c'] V = np.zeros([nr, nr], dtype=np.float32) - cdef float[:, ::1] v = V - cdef Py_ssize_t r, ind - - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - v[r, col[ind] % nr] += D[ind, idx] +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_array_nc_diag(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + const int idx, + complexs_st[::1] phases, + const int p_opt): + + cdef ints_st[::1] tmp + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] V = np.zeros([nr * 2, nr * 2], dtype=dtype) + cdef complexs_st[:, ::1] v = V + + # Local columns + cdef ints_st r, rr, ind, s, c + + cdef complexs_st d + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + d = D[ind, idx] + v[rr, c] += d + v[rr + 1, c + 1] += d + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + d = (phases[ind] * D[ind, idx]) + v[rr, c] += d + v[rr + 1, c + 1] += d + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s = col[ind] / nr + d = (phases[s] * D[ind, idx]) + v[rr, c] += d + v[rr + 1, c + 1] += d return V -def _array_f64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_real[:, ::1] D, const int idx): +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_csr_nc(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + complexs_st[::1] phases, + const int p_opt): - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL + # Now create the folded sparse elements + V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(ptr, ncol, col) + cdef ints_st[::1] v_ptr = V_PTR + cdef ints_st[::1] v_ncol = V_NCOL + cdef ints_st[::1] v_col = V_COL + cdef ints_st[::1] tmp + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, mode='c'] V = np.zeros([v_col.shape[0]], dtype=dtype) + cdef complexs_st[::1] v = V + + # Local columns + cdef ints_st nr = ncol.shape[0] + cdef ints_st r, rr, ind, s, s_idx, c + + cdef complexs_st ph, d + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + + tmp = v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]] + s_idx = _index_sorted(tmp, c) + + v[v_ptr[rr] + s_idx] += D[ind, 0] + d = (D[ind, 2] + 1j * D[ind, 3]) + v[v_ptr[rr] + s_idx+1] += d + v[v_ptr[rr+1] + s_idx] += d.conjugate() + v[v_ptr[rr+1] + s_idx+1] += D[ind, 1] + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + ph = phases[ind] + + tmp = v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]] + s_idx = _index_sorted(tmp, c) + + v[v_ptr[rr] + s_idx] += (D[ind, 0] * ph) + v[v_ptr[rr] + s_idx+1] += ((D[ind, 2] + 1j * D[ind, 3]) * + ph) + v[v_ptr[rr+1] + s_idx] += ((D[ind, 2] + 1j * D[ind, 3]).conjugate() * ph) + v[v_ptr[rr+1] + s_idx+1] += (D[ind, 1] * ph) + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s = col[ind] / nr + ph = phases[s] + + tmp = v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]] + s_idx = _index_sorted(tmp, c) + + v[v_ptr[rr] + s_idx] += (D[ind, 0] * ph) + v[v_ptr[rr] + s_idx+1] += ((D[ind, 2] + 1j * D[ind, 3]) * + ph) + v[v_ptr[rr+1] + s_idx] += ((D[ind, 2] + 1j * D[ind, + 3]).conjugate() * ph) + v[v_ptr[rr+1] + s_idx+1] += (D[ind, 1] * ph) + + nr = nr * 2 + return csr_matrix((V, V_COL, V_PTR), shape=(nr, nr)) - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.float64_t, ndim=2, mode='c'] V = np.zeros([nr, nr], dtype=np.float64) - cdef double[:, ::1] v = V - cdef Py_ssize_t r, ind - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - v[r, col[ind] % nr] += D[ind, idx] +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_array_nc(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + complexs_st[::1] phases, + const int p_opt): + + cdef ints_st[::1] tmp + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] V = np.zeros([nr * 2, nr * 2], dtype=dtype) + cdef complexs_st[:, ::1] v = V + + # Local columns + cdef ints_st r, rr, ind, s, c + + cdef complexs_st ph, d + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + + v[rr, c] += D[ind, 0] + d = (D[ind, 2] + 1j * D[ind, 3]) + v[rr, c + 1] += d + v[rr + 1, c] += d.conjugate() + v[rr + 1, c + 1] += D[ind, 1] + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + ph = phases[ind] + + v[rr, c] += (D[ind, 0] * ph) + v[rr, c + 1] += ((D[ind, 2] + 1j * D[ind, 3]) * ph) + v[rr + 1, c] += ((D[ind, 2] + 1j * D[ind, 3]).conjugate() * + ph) + v[rr + 1, c + 1] += (D[ind, 1] * ph) + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s = col[ind] / nr + ph = phases[s] + + v[rr, c] += (D[ind, 0] * ph) + v[rr, c + 1] += ((D[ind, 2] + 1j * D[ind, 3]) * ph) + v[rr + 1, c] += ((D[ind, 2] + 1j * D[ind, 3]).conjugate() * + ph) + v[rr + 1, c + 1] += (D[ind, 1] * ph) return V -def _phase_array_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] V = np.zeros([nr, nr], dtype=np.complex64) - cdef float complex[:, ::1] v = V - cdef Py_ssize_t r, ind, c - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - v[r, c] += D[ind, idx] * phases[ind] +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_csr_so(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + # complexs_st requires only 4 indices... + floats_st[:, ::1] D, + complexs_st[::1] phases, + const int p_opt): - else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - v[r, c] += D[ind, idx] * phases[col[ind] / nr] - - return V + # Now create the folded sparse elements + V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(ptr, ncol, col) + cdef ints_st[::1] v_ptr = V_PTR + cdef ints_st[::1] v_ncol = V_NCOL + cdef ints_st[::1] v_col = V_COL + cdef ints_st[::1] tmp + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, mode='c'] V = np.zeros([v_col.shape[0]], dtype=dtype) + cdef complexs_st[::1] v = V + + # Local columns + cdef ints_st nr = ncol.shape[0] + cdef ints_st r, rr, ind, s, s_idx, c + + cdef complexs_st ph + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + + tmp = v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]] + s_idx = _index_sorted(tmp, c) + + v[v_ptr[rr] + s_idx] += (D[ind, 0] + 1j * D[ind, 4]) + v[v_ptr[rr] + s_idx+1] += (D[ind, 2] + 1j * D[ind, 3]) + v[v_ptr[rr+1] + s_idx] += (D[ind, 6] + 1j * D[ind, 7]) + v[v_ptr[rr+1] + s_idx+1] += (D[ind, 1] + 1j * D[ind, 5]) + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + ph = phases[ind] + + tmp = v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]] + s_idx = _index_sorted(tmp, c) + + v[v_ptr[rr] + s_idx] += ((D[ind, 0] + 1j * D[ind, 4]) * ph) + v[v_ptr[rr] + s_idx+1] += ((D[ind, 2] + 1j * D[ind, 3]) * ph) + v[v_ptr[rr+1] + s_idx] += ((D[ind, 6] + 1j * D[ind, 7]) * ph) + v[v_ptr[rr+1] + s_idx+1] += ((D[ind, 1] + 1j * D[ind, 5]) * ph) + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s = col[ind] / nr + ph = phases[s] + + tmp = v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]] + s_idx = _index_sorted(tmp, c) + + v[v_ptr[rr] + s_idx] += ((D[ind, 0] + 1j * D[ind, 4]) * ph) + v[v_ptr[rr] + s_idx+1] += ((D[ind, 2] + 1j * D[ind, 3]) * ph) + v[v_ptr[rr+1] + s_idx] += ((D[ind, 6] + 1j * D[ind, 7]) * ph) + v[v_ptr[rr+1] + s_idx+1] += ((D[ind, 1] + 1j * D[ind, 5]) * ph) + + nr = nr * 2 + return csr_matrix((V, V_COL, V_PTR), shape=(nr, nr)) -def _phase_array_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] V = np.zeros([nr, nr], dtype=np.complex128) - cdef double complex[:, ::1] v = V - cdef Py_ssize_t r, ind, c - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - v[r, c] += D[ind, idx] * phases[ind] - - else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - v[r, c] += D[ind, idx] * phases[col[ind] / nr] +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_array_so(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + # complexs_st requires only 4 indices... + floats_st[:, ::1] D, + complexs_st[::1] phases, + const int p_opt): + + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] V = np.zeros([nr * 2, nr * 2], dtype=dtype) + cdef complexs_st[:, ::1] v = V + + # Local columns + cdef ints_st r, rr, s, c, ind + + cdef complexs_st ph + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + + v[rr, c] += (D[ind, 0] + 1j * D[ind, 4]) + v[rr, c + 1] += (D[ind, 2] + 1j * D[ind, 3]) + v[rr + 1, c] += (D[ind, 6] + 1j * D[ind, 7]) + v[rr + 1, c + 1] += (D[ind, 1] + 1j * D[ind, 5]) + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + ph = phases[ind] + + v[rr, c] += ((D[ind, 0] + 1j * D[ind, 4]) * ph) + v[rr, c + 1] += ((D[ind, 2] + 1j * D[ind, 3]) * ph) + v[rr + 1, c] += ((D[ind, 6] + 1j * D[ind, 7]) * ph) + v[rr + 1, c + 1] += ((D[ind, 1] + 1j * D[ind, 5]) * ph) + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s = col[ind] / nr + ph = phases[s] + + v[rr, c] += ((D[ind, 0] + 1j * D[ind, 4]) * ph) + v[rr, c + 1] += ((D[ind, 2] + 1j * D[ind, 3]) * ph) + v[rr + 1, c] += ((D[ind, 6] + 1j * D[ind, 7]) * ph) + v[rr + 1, c + 1] += ((D[ind, 1] + 1j * D[ind, 5]) * ph) return V diff --git a/src/sisl/physics/_matrix_phase3.pyx b/src/sisl/physics/_matrix_phase3.pyx index b40155b738..62000bc755 100644 --- a/src/sisl/physics/_matrix_phase3.pyx +++ b/src/sisl/physics/_matrix_phase3.pyx @@ -1,417 +1,531 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True cimport cython import numpy as np -cimport numpy as np -from scipy.sparse import csr_matrix - -from sisl._indices cimport _index_sorted -from sisl._core._sparse import fold_csr_matrix - -__all__ = ['_phase3_csr_f32', '_phase3_csr_f64', - '_phase3_csr_c64', '_phase3_csr_c128', - '_phase3_array_f32', '_phase3_array_f64', - '_phase3_array_c64', '_phase3_array_c128'] - -# The fused data-types forces the data input to be of "correct" values. -ctypedef fused numeric_real: - float - double - -ctypedef fused numeric_complex: - float - double - float complex - double complex +cimport numpy as cnp -def _phase3_csr_f32(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_real[:, ::1] D, const int idx, - np.ndarray[np.float32_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float[:, ::1] phases = PHASES - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef Py_ssize_t nr = v_ncol.shape[0] - cdef np.ndarray[np.float32_t, ndim=1, mode='c'] Vx = np.zeros([v_col.shape[0]], dtype=np.float32) - cdef np.ndarray[np.float32_t, ndim=1, mode='c'] Vy = np.zeros([v_col.shape[0]], dtype=np.float32) - cdef np.ndarray[np.float32_t, ndim=1, mode='c'] Vz = np.zeros([v_col.shape[0]], dtype=np.float32) - cdef float[::1] vx = Vx - cdef float[::1] vy = Vy - cdef float[::1] vz = Vz - cdef float d - cdef Py_ssize_t r, ind, s, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - d = D[ind, idx] - vx[v_ptr[r] + s_idx] += d * phases[ind, 0] - vy[v_ptr[r] + s_idx] += d * phases[ind, 1] - vz[v_ptr[r] + s_idx] += d * phases[ind, 2] - - else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s = col[ind] / nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - d = D[ind, idx] - vx[v_ptr[r] + s_idx] += d * phases[s, 0] - vy[v_ptr[r] + s_idx] += d * phases[s, 1] - vz[v_ptr[r] + s_idx] += d * phases[s, 2] - - return csr_matrix((Vx, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vy, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vz, V_COL, V_PTR), shape=(nr, nr)) - +from scipy.sparse import csr_matrix -def _phase3_csr_f64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_real[:, ::1] D, const int idx, - np.ndarray[np.float64_t, ndim=2, mode='c'] PHASES, const int p_opt): +from sisl._indices cimport _index_sorted - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double[:, ::1] phases = PHASES +from sisl._core._sparse import fold_csr_matrix, fold_csr_matrix_nc + +from sisl._core._dtypes cimport ( + complexs_st, + floatcomplexs_st, + floats_st, + ints_st, + numerics_st, + ssize_st, + type2dtype, +) + +__all__ = [ + "_phase3_csr", + "_phase3_array", + "_phase3_csr_nc", + "_phase3_array_nc", + "_phase3_csr_so", + "_phase3_array_so", +] + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase3_csr(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + const int idx, + floatcomplexs_st[:, ::1] phases, + const int p_opt): # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef Py_ssize_t nr = v_ncol.shape[0] - cdef np.ndarray[np.float64_t, ndim=1, mode='c'] Vx = np.zeros([v_col.shape[0]], dtype=np.float64) - cdef np.ndarray[np.float64_t, ndim=1, mode='c'] Vy = np.zeros([v_col.shape[0]], dtype=np.float64) - cdef np.ndarray[np.float64_t, ndim=1, mode='c'] Vz = np.zeros([v_col.shape[0]], dtype=np.float64) - cdef double[::1] vx = Vx - cdef double[::1] vy = Vy - cdef double[::1] vz = Vz - cdef double d - cdef Py_ssize_t r, ind, s, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - d = D[ind, idx] - vx[v_ptr[r] + s_idx] += d * phases[ind, 0] - vy[v_ptr[r] + s_idx] += d * phases[ind, 1] - vz[v_ptr[r] + s_idx] += d * phases[ind, 2] - - else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s = col[ind] / nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - d = D[ind, idx] - vx[v_ptr[r] + s_idx] += d * phases[s, 0] - vy[v_ptr[r] + s_idx] += d * phases[s, 1] - vz[v_ptr[r] + s_idx] += d * phases[s, 2] + V_PTR, V_NCOL, V_COL = fold_csr_matrix(ptr, ncol, col) + cdef ints_st[::1] v_ptr = V_PTR + cdef ints_st[::1] v_ncol = V_NCOL + cdef ints_st[::1] v_col = V_COL + + # This may fail, when numerics_st is complex, but floatcomplexs_st is float + cdef object dtype = type2dtype[floatcomplexs_st](1) + cdef cnp.ndarray[floatcomplexs_st, mode='c'] Vx = np.zeros([v_col.shape[0]], dtype=dtype) + cdef cnp.ndarray[floatcomplexs_st, mode='c'] Vy = np.zeros([v_col.shape[0]], dtype=dtype) + cdef cnp.ndarray[floatcomplexs_st, mode='c'] Vz = np.zeros([v_col.shape[0]], dtype=dtype) + + # Local columns + cdef ints_st nr = ncol.shape[0] + cdef ints_st r, ind, s, s_idx, c + + cdef numerics_st d + + with nogil: + if p_opt == 0: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] % nr + s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) + d = D[ind, idx] + Vx[v_ptr[r] + s_idx] += (d * phases[ind, 0]) + Vy[v_ptr[r] + s_idx] += (d * phases[ind, 1]) + Vz[v_ptr[r] + s_idx] += (d * phases[ind, 2]) + + else: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] % nr + s = col[ind] / nr + s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) + d = D[ind, idx] + Vx[v_ptr[r] + s_idx] += (d * phases[s, 0]) + Vy[v_ptr[r] + s_idx] += (d * phases[s, 1]) + Vz[v_ptr[r] + s_idx] += (d * phases[s, 2]) return csr_matrix((Vx, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vy, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vz, V_COL, V_PTR), shape=(nr, nr)) -def _phase3_csr_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex64_t, ndim=2, mode='c'] PHASES, const int p_opt): - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[:, ::1] phases = PHASES - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef Py_ssize_t nr = v_ncol.shape[0] - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] Vx = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] Vy = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] Vz = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef float complex[::1] vx = Vx - cdef float complex[::1] vy = Vy - cdef float complex[::1] vz = Vz - cdef float complex d - cdef Py_ssize_t r, ind, s, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - d = D[ind, idx] - vx[v_ptr[r] + s_idx] += d * phases[ind, 0] - vy[v_ptr[r] + s_idx] += d * phases[ind, 1] - vz[v_ptr[r] + s_idx] += d * phases[ind, 2] - - else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s = col[ind] / nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - d = D[ind, idx] - vx[v_ptr[r] + s_idx] += d * phases[s, 0] - vy[v_ptr[r] + s_idx] += d * phases[s, 1] - vz[v_ptr[r] + s_idx] += d * phases[s, 2] +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase3_array(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + const int idx, + floatcomplexs_st[:, ::1] phases, + const int p_opt): + + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[floatcomplexs_st](1) + cdef cnp.ndarray[floatcomplexs_st, ndim=2, mode='c'] Vx = np.zeros([nr, nr], dtype=dtype) + cdef cnp.ndarray[floatcomplexs_st, ndim=2, mode='c'] Vy = np.zeros([nr, nr], dtype=dtype) + cdef cnp.ndarray[floatcomplexs_st, ndim=2, mode='c'] Vz = np.zeros([nr, nr], dtype=dtype) + + # Local columns + cdef ints_st r, ind, s, c + cdef numerics_st d + + with nogil: + if p_opt == 0: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] % nr + d = D[ind, idx] + Vx[r, c] += (d * phases[ind, 0]) + Vy[r, c] += (d * phases[ind, 1]) + Vz[r, c] += (d * phases[ind, 2]) + + else: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] % nr + s = col[ind] / nr + d = D[ind, idx] + Vx[r, c] += (d * phases[s, 0]) + Vy[r, c] += (d * phases[s, 1]) + Vz[r, c] += (d * phases[s, 2]) - return csr_matrix((Vx, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vy, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vz, V_COL, V_PTR), shape=(nr, nr)) + return Vx, Vy, Vz -def _phase3_csr_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex128_t, ndim=2, mode='c'] PHASES, const int p_opt): +### +# Non-collinear code +### - # Convert to memory viewsz - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[:, ::1] phases = PHASES +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase3_csr_nc(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + complexs_st[:, ::1] phases, + const int p_opt): # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef Py_ssize_t nr = v_ncol.shape[0] - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] Vx = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] Vy = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] Vz = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef double complex[::1] vx = Vx - cdef double complex[::1] vy = Vy - cdef double complex[::1] vz = Vz - cdef double complex d - cdef Py_ssize_t r, ind, s, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - d = D[ind, idx] - vx[v_ptr[r] + s_idx] += d * phases[ind, 0] - vy[v_ptr[r] + s_idx] += d * phases[ind, 1] - vz[v_ptr[r] + s_idx] += d * phases[ind, 2] - - else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s = col[ind] / nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - d = D[ind, idx] - vx[v_ptr[r] + s_idx] += d * phases[s, 0] - vy[v_ptr[r] + s_idx] += d * phases[s, 1] - vz[v_ptr[r] + s_idx] += d * phases[s, 2] - + V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(ptr, ncol, col) + cdef ints_st[::1] v_ptr = V_PTR + cdef ints_st[::1] v_ncol = V_NCOL + cdef ints_st[::1] v_col = V_COL + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, mode='c'] Vx = np.zeros([v_col.shape[0]], dtype=dtype) + cdef cnp.ndarray[complexs_st, mode='c'] Vy = np.zeros([v_col.shape[0]], dtype=dtype) + cdef cnp.ndarray[complexs_st, mode='c'] Vz = np.zeros([v_col.shape[0]], dtype=dtype) + cdef complexs_st ph, v12 + + # Local columns (not in NC form) + cdef ints_st nr = ncol.shape[0] + cdef ints_st r, rr, ind, s, c + cdef ints_st s_idx + + with nogil: + if p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) + + ph = phases[ind, 0] + Vx[v_ptr[rr] + s_idx] += ph * D[ind, 0] + v12 = (D[ind, 2] + 1j * D[ind, 3]) + Vx[v_ptr[rr] + s_idx+1] += ph * v12 + Vx[v_ptr[rr+1] + s_idx] += ph * v12.conjugate() + Vx[v_ptr[rr+1] + s_idx+1] += ph * D[ind, 1] + + ph = phases[ind, 1] + Vy[v_ptr[rr] + s_idx] += ph * D[ind, 0] + v12 = (D[ind, 2] + 1j * D[ind, 3]) + Vy[v_ptr[rr] + s_idx+1] += ph * v12 + Vy[v_ptr[rr+1] + s_idx] += ph * v12.conjugate() + Vy[v_ptr[rr+1] + s_idx+1] += ph * D[ind, 1] + + ph = phases[ind, 2] + Vz[v_ptr[rr] + s_idx] += ph * D[ind, 0] + v12 = (D[ind, 2] + 1j * D[ind, 3]) + Vz[v_ptr[rr] + s_idx+1] += ph * v12 + Vz[v_ptr[rr+1] + s_idx] += ph * v12.conjugate() + Vz[v_ptr[rr+1] + s_idx+1] += ph * D[ind, 1] + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s = col[ind] / nr + + s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) + + + ph = phases[s, 0] + Vx[v_ptr[rr] + s_idx] += ph * D[ind, 0] + v12 = (D[ind, 2] + 1j * D[ind, 3]) + Vx[v_ptr[rr] + s_idx+1] += ph * v12 + Vx[v_ptr[rr+1] + s_idx] += ph * v12.conjugate() + Vx[v_ptr[rr+1] + s_idx+1] += ph * D[ind, 1] + + ph = phases[s, 1] + Vy[v_ptr[rr] + s_idx] += ph * D[ind, 0] + v12 = (D[ind, 2] + 1j * D[ind, 3]) + Vy[v_ptr[rr] + s_idx+1] += ph * v12 + Vy[v_ptr[rr+1] + s_idx] += ph * v12.conjugate() + Vy[v_ptr[rr+1] + s_idx+1] += ph * D[ind, 1] + + ph = phases[s, 2] + Vz[v_ptr[rr] + s_idx] += ph * D[ind, 0] + v12 = (D[ind, 2] + 1j * D[ind, 3]) + Vz[v_ptr[rr] + s_idx+1] += ph * v12 + Vz[v_ptr[rr+1] + s_idx] += ph * v12.conjugate() + Vz[v_ptr[rr+1] + s_idx+1] += ph * D[ind, 1] + + nr = nr * 2 return csr_matrix((Vx, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vy, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vz, V_COL, V_PTR), shape=(nr, nr)) -def _phase3_array_f32(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_real[:, ::1] D, const int idx, - np.ndarray[np.float32_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float[:, ::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.float32_t, ndim=2, mode='c'] Vx = np.zeros([nr, nr], dtype=np.float32) - cdef np.ndarray[np.float32_t, ndim=2, mode='c'] Vy = np.zeros([nr, nr], dtype=np.float32) - cdef np.ndarray[np.float32_t, ndim=2, mode='c'] Vz = np.zeros([nr, nr], dtype=np.float32) - cdef float[:, ::1] vx = Vx - cdef float[:, ::1] vy = Vy - cdef float[:, ::1] vz = Vz - cdef float d - cdef Py_ssize_t r, ind, s, c - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - d = D[ind, idx] - vx[r, c] += d * phases[ind, 0] - vy[r, c] += d * phases[ind, 1] - vz[r, c] += d * phases[ind, 2] - - else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s = col[ind] / nr - d = D[ind, idx] - vx[r, c] += d * phases[s, 0] - vy[r, c] += d * phases[s, 1] - vz[r, c] += d * phases[s, 2] +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase3_array_nc(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + complexs_st[:, ::1] phases, + const int p_opt): + + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] Vx = np.zeros([nr * 2, nr * 2], dtype=dtype) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] Vy = np.zeros([nr * 2, nr * 2], dtype=dtype) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] Vz = np.zeros([nr * 2, nr * 2], dtype=dtype) + + cdef complexs_st ph, vv + cdef ints_st r, rr, ind, s, c + cdef ints_st s_idx + + with nogil: + if p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + + ph = phases[ind, 0] + Vx[rr, c] += ph * D[ind, 0] + v12 = (D[ind, 2] + 1j * D[ind, 3]) + Vx[rr, c+1] += ph * v12 + Vx[rr+1, c] += ph * v12.conjugate() + Vx[rr+1, c+1] += ph * D[ind, 1] + + ph = phases[ind, 1] + Vy[rr, c] += ph * D[ind, 0] + v12 = (D[ind, 2] + 1j * D[ind, 3]) + Vy[rr, c+1] += ph * v12 + Vy[rr+1, c] += ph * v12.conjugate() + Vy[rr+1, c+1] += ph * D[ind, 1] + + ph = phases[ind, 2] + Vz[rr, c] += ph * D[ind, 0] + v12 = (D[ind, 2] + 1j * D[ind, 3]) + Vz[rr, c+1] += ph * v12 + Vz[rr+1, c] += ph * v12.conjugate() + Vz[rr+1, c+1] += ph * D[ind, 1] + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s = col[ind] / nr + + ph = phases[s, 0] + Vx[rr, c] += ph * D[ind, 0] + v12 = (D[ind, 2] + 1j * D[ind, 3]) + Vx[rr, c+1] += ph * v12 + Vx[rr+1, c] += ph * v12.conjugate() + Vx[rr+1, c+1] += ph * D[ind, 1] + + ph = phases[s, 1] + Vy[rr, c] += ph * D[ind, 0] + v12 = (D[ind, 2] + 1j * D[ind, 3]) + Vy[rr, c+1] += ph * v12 + Vy[rr+1, c] += ph * v12.conjugate() + Vy[rr+1, c+1] += ph * D[ind, 1] + + ph = phases[s, 2] + Vz[rr, c] += ph * D[ind, 0] + v12 = (D[ind, 2] + 1j * D[ind, 3]) + Vz[rr, c+1] += ph * v12 + Vz[rr+1, c] += ph * v12.conjugate() + Vz[rr+1, c+1] += ph * D[ind, 1] return Vx, Vy, Vz -def _phase3_array_f64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_real[:, ::1] D, const int idx, - np.ndarray[np.float64_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double[:, ::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.float64_t, ndim=2, mode='c'] Vx = np.zeros([nr, nr], dtype=np.float64) - cdef np.ndarray[np.float64_t, ndim=2, mode='c'] Vy = np.zeros([nr, nr], dtype=np.float64) - cdef np.ndarray[np.float64_t, ndim=2, mode='c'] Vz = np.zeros([nr, nr], dtype=np.float64) - cdef double[:, ::1] vx = Vx - cdef double[:, ::1] vy = Vy - cdef double[:, ::1] vz = Vz - cdef double d - cdef Py_ssize_t r, ind, s, c - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - d = D[ind, idx] - vx[r, c] += d * phases[ind, 0] - vy[r, c] += d * phases[ind, 1] - vz[r, c] += d * phases[ind, 2] - - else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s = col[ind] / nr - d = D[ind, idx] - vx[r, c] += d * phases[s, 0] - vy[r, c] += d * phases[s, 1] - vz[r, c] += d * phases[s, 2] - - return Vx, Vy, Vz +### +# Spin-orbit coupling matrices +### -def _phase3_array_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex64_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory viezws - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[:, ::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] Vx = np.zeros([nr, nr], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] Vy = np.zeros([nr, nr], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] Vz = np.zeros([nr, nr], dtype=np.complex64) - cdef float complex[:, ::1] vx = Vx - cdef float complex[:, ::1] vy = Vy - cdef float complex[:, ::1] vz = Vz - cdef float complex d - - cdef Py_ssize_t r, ind, s, c - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - d = D[ind, idx] - vx[r, c] += d * phases[ind, 0] - vy[r, c] += d * phases[ind, 1] - vz[r, c] += d * phases[ind, 2] - - else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s = col[ind] / nr - d = D[ind, idx] - vx[r, c] += d * phases[s, 0] - vy[r, c] += d * phases[s, 1] - vz[r, c] += d * phases[s, 2] +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase3_csr_so(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + # complexs_st requires only 4 indices... + floats_st[:, ::1] D, + complexs_st[:, ::1] phases, + const int p_opt): - return Vx, Vy, Vz + # Now create the folded sparse elements + V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(ptr, ncol, col) + cdef ints_st[::1] v_ptr = V_PTR + cdef ints_st[::1] v_ncol = V_NCOL + cdef ints_st[::1] v_col = V_COL + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, mode='c'] Vx = np.zeros([v_col.shape[0]], dtype=dtype) + cdef cnp.ndarray[complexs_st, mode='c'] Vy = np.zeros([v_col.shape[0]], dtype=dtype) + cdef cnp.ndarray[complexs_st, mode='c'] Vz = np.zeros([v_col.shape[0]], dtype=dtype) + cdef complexs_st ph, vv + + # Local columns (not in NC form) + cdef ints_st nr = ncol.shape[0] + cdef ints_st r, rr, ind, s, c + cdef ints_st s_idx + + with nogil: + if p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) + + ph = phases[ind, 0] + vv = (D[ind, 0] + 1j * D[ind, 4]) + Vx[v_ptr[rr] + s_idx] += ph * vv + vv = (D[ind, 2] + 1j * D[ind, 3]) + Vx[v_ptr[rr] + s_idx+1] += ph * vv + vv = (D[ind, 6] + 1j * D[ind, 7]) + Vx[v_ptr[rr+1] + s_idx] += ph * vv + vv = (D[ind, 1] + 1j * D[ind, 5]) + Vx[v_ptr[rr+1] + s_idx+1] += ph * vv + + ph = phases[ind, 1] + vv = (D[ind, 0] + 1j * D[ind, 4]) + Vy[v_ptr[rr] + s_idx] += ph * vv + vv = (D[ind, 2] + 1j * D[ind, 3]) + Vy[v_ptr[rr] + s_idx+1] += ph * vv + vv = (D[ind, 6] + 1j * D[ind, 7]) + Vy[v_ptr[rr+1] + s_idx] += ph * vv + vv = (D[ind, 1] + 1j * D[ind, 5]) + Vy[v_ptr[rr+1] + s_idx+1] += ph * vv + + ph = phases[ind, 2] + vv = (D[ind, 0] + 1j * D[ind, 4]) + Vz[v_ptr[rr] + s_idx] += ph * vv + vv = (D[ind, 2] + 1j * D[ind, 3]) + Vz[v_ptr[rr] + s_idx+1] += ph * vv + vv = (D[ind, 6] + 1j * D[ind, 7]) + Vz[v_ptr[rr+1] + s_idx] += ph * vv + vv = (D[ind, 1] + 1j * D[ind, 5]) + Vz[v_ptr[rr+1] + s_idx+1] += ph * vv + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s = col[ind] / nr + + s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) + + ph = phases[s, 0] + vv = (D[ind, 0] + 1j * D[ind, 4]) + Vx[v_ptr[rr] + s_idx] += ph * vv + vv = (D[ind, 2] + 1j * D[ind, 3]) + Vx[v_ptr[rr] + s_idx+1] += ph * vv + vv = (D[ind, 6] + 1j * D[ind, 7]) + Vx[v_ptr[rr+1] + s_idx] += ph * vv + vv = (D[ind, 1] + 1j * D[ind, 5]) + Vx[v_ptr[rr+1] + s_idx+1] += ph * vv + + ph = phases[s, 1] + vv = (D[ind, 0] + 1j * D[ind, 4]) + Vy[v_ptr[rr] + s_idx] += ph * vv + vv = (D[ind, 2] + 1j * D[ind, 3]) + Vy[v_ptr[rr] + s_idx+1] += ph * vv + vv = (D[ind, 6] + 1j * D[ind, 7]) + Vy[v_ptr[rr+1] + s_idx] += ph * vv + vv = (D[ind, 1] + 1j * D[ind, 5]) + Vy[v_ptr[rr+1] + s_idx+1] += ph * vv + + ph = phases[s, 2] + vv = (D[ind, 0] + 1j * D[ind, 4]) + Vz[v_ptr[rr] + s_idx] += ph * vv + vv = (D[ind, 2] + 1j * D[ind, 3]) + Vz[v_ptr[rr] + s_idx+1] += ph * vv + vv = (D[ind, 6] + 1j * D[ind, 7]) + Vz[v_ptr[rr+1] + s_idx] += ph * vv + vv = (D[ind, 1] + 1j * D[ind, 5]) + Vz[v_ptr[rr+1] + s_idx+1] += ph * vv + + nr = nr * 2 + return csr_matrix((Vx, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vy, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vz, V_COL, V_PTR), shape=(nr, nr)) -def _phase3_array_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex128_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[:, ::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] Vx = np.zeros([nr, nr], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] Vy = np.zeros([nr, nr], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] Vz = np.zeros([nr, nr], dtype=np.complex128) - cdef double complex[:, ::1] vx = Vx - cdef double complex[:, ::1] vy = Vy - cdef double complex[:, ::1] vz = Vz - cdef double complex d - cdef Py_ssize_t r, ind, s, c - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - d = D[ind, idx] - vx[r, c] += d * phases[ind, 0] - vy[r, c] += d * phases[ind, 1] - vz[r, c] += d * phases[ind, 2] - - else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s = col[ind] / nr - d = D[ind, idx] - vx[r, c] += d * phases[s, 0] - vy[r, c] += d * phases[s, 1] - vz[r, c] += d * phases[s, 2] +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase3_array_so(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + # complexs_st requires only 4 indices... + floats_st[:, ::1] D, + complexs_st[:, ::1] phases, + const int p_opt): + + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] Vx = np.zeros([nr * 2, nr * 2], dtype=dtype) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] Vy = np.zeros([nr * 2, nr * 2], dtype=dtype) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] Vz = np.zeros([nr * 2, nr * 2], dtype=dtype) + cdef complexs_st[:, ::1] vx = Vx + cdef complexs_st[:, ::1] vy = Vy + cdef complexs_st[:, ::1] vz = Vz + + cdef complexs_st ph, vv + cdef ints_st r, rr, ind, s, c + cdef ints_st s_idx + + with nogil: + if p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + + ph = phases[ind, 0] + vv = (D[ind, 0] + 1j * D[ind, 4]) + vx[rr, c] += ph * vv + vv = (D[ind, 2] + 1j * D[ind, 3]) + vx[rr, c+1] += ph * vv + vv = (D[ind, 6] + 1j * D[ind, 7]) + vx[rr+1, c] += ph * vv + vv = (D[ind, 1] + 1j * D[ind, 5]) + vx[rr+1, c+1] += ph * vv + + ph = phases[ind, 1] + vv = (D[ind, 0] + 1j * D[ind, 4]) + vy[rr, c] += ph * vv + vv = (D[ind, 2] + 1j * D[ind, 3]) + vy[rr, c+1] += ph * vv + vv = (D[ind, 6] + 1j * D[ind, 7]) + vy[rr+1, c] += ph * vv + vv = (D[ind, 1] + 1j * D[ind, 5]) + vy[rr+1, c+1] += ph * vv + + ph = phases[ind, 2] + vv = (D[ind, 0] + 1j * D[ind, 4]) + vz[rr, c] += ph * vv + vv = (D[ind, 2] + 1j * D[ind, 3]) + vz[rr, c+1] += ph * vv + vv = (D[ind, 6] + 1j * D[ind, 7]) + vz[rr+1, c] += ph * vv + vv = (D[ind, 1] + 1j * D[ind, 5]) + vz[rr+1, c+1] += ph * vv + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s = col[ind] / nr + + ph = phases[s, 0] + vv = (D[ind, 0] + 1j * D[ind, 4]) + vx[rr, c] += ph * vv + vv = (D[ind, 2] + 1j * D[ind, 3]) + vx[rr, c+1] += ph * vv + vv = (D[ind, 6] + 1j * D[ind, 7]) + vx[rr+1, c] += ph * vv + vv = (D[ind, 1] + 1j * D[ind, 5]) + vx[rr+1, c+1] += ph * vv + + ph = phases[s, 1] + vv = (D[ind, 0] + 1j * D[ind, 4]) + vy[rr, c] += ph * vv + vv = (D[ind, 2] + 1j * D[ind, 3]) + vy[rr, c+1] += ph * vv + vv = (D[ind, 6] + 1j * D[ind, 7]) + vy[rr+1, c] += ph * vv + vv = (D[ind, 1] + 1j * D[ind, 5]) + vy[rr+1, c+1] += ph * vv + + ph = phases[s, 2] + vv = (D[ind, 0] + 1j * D[ind, 4]) + vz[rr, c] += ph * vv + vv = (D[ind, 2] + 1j * D[ind, 3]) + vz[rr, c+1] += ph * vv + vv = (D[ind, 6] + 1j * D[ind, 7]) + vz[rr+1, c] += ph * vv + vv = (D[ind, 1] + 1j * D[ind, 5]) + vz[rr+1, c+1] += ph * vv return Vx, Vy, Vz diff --git a/src/sisl/physics/_matrix_phase3_nc.pyx b/src/sisl/physics/_matrix_phase3_nc.pyx deleted file mode 100644 index 9aee5b8cf0..0000000000 --- a/src/sisl/physics/_matrix_phase3_nc.pyx +++ /dev/null @@ -1,366 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True -cimport cython - -import numpy as np -cimport numpy as np -from scipy.sparse import csr_matrix - -from sisl._indices cimport _index_sorted -from sisl._core._sparse import fold_csr_matrix_nc - -__all__ = ["_phase3_nc_csr_c64", "_phase3_nc_csr_c128", - "_phase3_nc_array_c64", "_phase3_nc_array_c128"] - -# The fused data-types forces the data input to be of "correct" values. -ctypedef fused numeric_complex: - float - double - float complex - double complex - - -def _phase3_nc_csr_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex64_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[:, ::1] phases = PHASES - # Local columns (not in NC form) - cdef Py_ssize_t nr = ncol.shape[0] - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] Vx = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] Vy = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] Vz = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef float complex[::1] vx = Vx - cdef float complex[::1] vy = Vy - cdef float complex[::1] vz = Vz - cdef float complex ph, v12 - cdef Py_ssize_t r, rr, ind, s, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - ph = phases[ind, 0] - vx[v_ptr[rr] + s_idx] = vx[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vx[v_ptr[rr] + s_idx+1] = vx[v_ptr[rr] + s_idx+1] + ph * v12 - vx[v_ptr[rr+1] + s_idx] = vx[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vx[v_ptr[rr+1] + s_idx+1] = vx[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - ph = phases[ind, 1] - vy[v_ptr[rr] + s_idx] = vy[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vy[v_ptr[rr] + s_idx+1] = vy[v_ptr[rr] + s_idx+1] + ph * v12 - vy[v_ptr[rr+1] + s_idx] = vy[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vy[v_ptr[rr+1] + s_idx+1] = vy[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - ph = phases[ind, 2] - vz[v_ptr[rr] + s_idx] = vz[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vz[v_ptr[rr] + s_idx+1] = vz[v_ptr[rr] + s_idx+1] + ph * v12 - vz[v_ptr[rr+1] + s_idx] = vz[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vz[v_ptr[rr+1] + s_idx+1] = vz[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s = col[ind] / nr - - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - ph = phases[s, 0] - vx[v_ptr[rr] + s_idx] = vx[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vx[v_ptr[rr] + s_idx+1] = vx[v_ptr[rr] + s_idx+1] + ph * v12 - vx[v_ptr[rr+1] + s_idx] = vx[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vx[v_ptr[rr+1] + s_idx+1] = vx[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - ph = phases[s, 1] - vy[v_ptr[rr] + s_idx] = vy[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vy[v_ptr[rr] + s_idx+1] = vy[v_ptr[rr] + s_idx+1] + ph * v12 - vy[v_ptr[rr+1] + s_idx] = vy[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vy[v_ptr[rr+1] + s_idx+1] = vy[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - ph = phases[s, 2] - vz[v_ptr[rr] + s_idx] = vz[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vz[v_ptr[rr] + s_idx+1] = vz[v_ptr[rr] + s_idx+1] + ph * v12 - vz[v_ptr[rr+1] + s_idx] = vz[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vz[v_ptr[rr+1] + s_idx+1] = vz[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - nr = nr * 2 - return csr_matrix((Vx, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vy, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vz, V_COL, V_PTR), shape=(nr, nr)) - - -def _phase3_nc_csr_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex128_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[:, ::1] phases = PHASES - # Local columns (not in NC form) - cdef Py_ssize_t nr = ncol.shape[0] - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] Vx = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] Vy = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] Vz = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef double complex[::1] vx = Vx - cdef double complex[::1] vy = Vy - cdef double complex[::1] vz = Vz - cdef double complex ph, v12 - cdef Py_ssize_t r, rr, ind, s, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - ph = phases[ind, 0] - vx[v_ptr[rr] + s_idx] = vx[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vx[v_ptr[rr] + s_idx+1] = vx[v_ptr[rr] + s_idx+1] + ph * v12 - vx[v_ptr[rr+1] + s_idx] = vx[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vx[v_ptr[rr+1] + s_idx+1] = vx[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - ph = phases[ind, 1] - vy[v_ptr[rr] + s_idx] = vy[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vy[v_ptr[rr] + s_idx+1] = vy[v_ptr[rr] + s_idx+1] + ph * v12 - vy[v_ptr[rr+1] + s_idx] = vy[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vy[v_ptr[rr+1] + s_idx+1] = vy[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - ph = phases[ind, 2] - vz[v_ptr[rr] + s_idx] = vz[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vz[v_ptr[rr] + s_idx+1] = vz[v_ptr[rr] + s_idx+1] + ph * v12 - vz[v_ptr[rr+1] + s_idx] = vz[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vz[v_ptr[rr+1] + s_idx+1] = vz[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s = col[ind] / nr - - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - ph = phases[s, 0] - vx[v_ptr[rr] + s_idx] = vx[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vx[v_ptr[rr] + s_idx+1] = vx[v_ptr[rr] + s_idx+1] + ph * v12 - vx[v_ptr[rr+1] + s_idx] = vx[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vx[v_ptr[rr+1] + s_idx+1] = vx[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - ph = phases[s, 1] - vy[v_ptr[rr] + s_idx] = vy[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vy[v_ptr[rr] + s_idx+1] = vy[v_ptr[rr] + s_idx+1] + ph * v12 - vy[v_ptr[rr+1] + s_idx] = vy[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vy[v_ptr[rr+1] + s_idx+1] = vy[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - ph = phases[s, 2] - vz[v_ptr[rr] + s_idx] = vz[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vz[v_ptr[rr] + s_idx+1] = vz[v_ptr[rr] + s_idx+1] + ph * v12 - vz[v_ptr[rr+1] + s_idx] = vz[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vz[v_ptr[rr+1] + s_idx+1] = vz[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - nr = nr * 2 - return csr_matrix((Vx, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vy, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vz, V_COL, V_PTR), shape=(nr, nr)) - - -def _phase3_nc_array_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex64_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[:, ::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] Vx = np.zeros([nr * 2, nr * 2], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] Vy = np.zeros([nr * 2, nr * 2], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] Vz = np.zeros([nr * 2, nr * 2], dtype=np.complex64) - cdef float complex[:, ::1] vx = Vx - cdef float complex[:, ::1] vy = Vy - cdef float complex[:, ::1] vz = Vz - cdef float complex ph, v12 - cdef Py_ssize_t r, rr, ind, c, s - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - - ph = phases[ind, 0] - vx[rr, c] = vx[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vx[rr, c+1] = vx[rr, c+1] + ph * v12 - vx[rr+1, c] = vx[rr+1, c] + ph * v12.conjugate() - vx[rr+1, c+1] = vx[rr+1, c+1] + (ph * D[ind, 1]) - - ph = phases[ind, 1] - vy[rr, c] = vy[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vy[rr, c+1] = vy[rr, c+1] + ph * v12 - vy[rr+1, c] = vy[rr+1, c] + ph * v12.conjugate() - vy[rr+1, c+1] = vy[rr+1, c+1] + (ph * D[ind, 1]) - - ph = phases[ind, 2] - vz[rr, c] = vz[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vz[rr, c+1] = vz[rr, c+1] + ph * v12 - vz[rr+1, c] = vz[rr+1, c] + ph * v12.conjugate() - vz[rr+1, c+1] = vz[rr+1, c+1] + (ph * D[ind, 1]) - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s = col[ind] / nr - - ph = phases[s, 0] - vx[rr, c] = vx[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vx[rr, c+1] = vx[rr, c+1] + ph * v12 - vx[rr+1, c] = vx[rr+1, c] + ph * v12.conjugate() - vx[rr+1, c+1] = vx[rr+1, c+1] + (ph * D[ind, 1]) - - ph = phases[s, 1] - vy[rr, c] = vy[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vy[rr, c+1] = vy[rr, c+1] + ph * v12 - vy[rr+1, c] = vy[rr+1, c] + ph * v12.conjugate() - vy[rr+1, c+1] = vy[rr+1, c+1] + (ph * D[ind, 1]) - - ph = phases[s, 2] - vz[rr, c] = vz[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vz[rr, c+1] = vz[rr, c+1] + ph * v12 - vz[rr+1, c] = vz[rr+1, c] + ph * v12.conjugate() - vz[rr+1, c+1] = vz[rr+1, c+1] + (ph * D[ind, 1]) - - return Vx, Vy, Vz - - -def _phase3_nc_array_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex128_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[:, ::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] Vx = np.zeros([nr * 2, nr * 2], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] Vy = np.zeros([nr * 2, nr * 2], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] Vz = np.zeros([nr * 2, nr * 2], dtype=np.complex128) - cdef double complex[:, ::1] vx = Vx - cdef double complex[:, ::1] vy = Vy - cdef double complex[:, ::1] vz = Vz - cdef double complex ph, v12 - cdef Py_ssize_t r, rr, ind, c, s - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - - ph = phases[ind, 0] - vx[rr, c] = vx[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vx[rr, c+1] = vx[rr, c+1] + ph * v12 - vx[rr+1, c] = vx[rr+1, c] + ph * v12.conjugate() - vx[rr+1, c+1] = vx[rr+1, c+1] + (ph * D[ind, 1]) - - ph = phases[ind, 1] - vy[rr, c] = vy[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vy[rr, c+1] = vy[rr, c+1] + ph * v12 - vy[rr+1, c] = vy[rr+1, c] + ph * v12.conjugate() - vy[rr+1, c+1] = vy[rr+1, c+1] + (ph * D[ind, 1]) - - ph = phases[ind, 2] - vz[rr, c] = vz[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vz[rr, c+1] = vz[rr, c+1] + ph * v12 - vz[rr+1, c] = vz[rr+1, c] + ph * v12.conjugate() - vz[rr+1, c+1] = vz[rr+1, c+1] + (ph * D[ind, 1]) - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s = col[ind] / nr - - ph = phases[s, 0] - vx[rr, c] = vx[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vx[rr, c+1] = vx[rr, c+1] + ph * v12 - vx[rr+1, c] = vx[rr+1, c] + ph * v12.conjugate() - vx[rr+1, c+1] = vx[rr+1, c+1] + (ph * D[ind, 1]) - - ph = phases[s, 1] - vy[rr, c] = vy[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vy[rr, c+1] = vy[rr, c+1] + ph * v12 - vy[rr+1, c] = vy[rr+1, c] + ph * v12.conjugate() - vy[rr+1, c+1] = vy[rr+1, c+1] + (ph * D[ind, 1]) - - ph = phases[s, 2] - vz[rr, c] = vz[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vz[rr, c+1] = vz[rr, c+1] + ph * v12 - vz[rr+1, c] = vz[rr+1, c] + ph * v12.conjugate() - vz[rr+1, c+1] = vz[rr+1, c+1] + (ph * D[ind, 1]) - - return Vx, Vy, Vz diff --git a/src/sisl/physics/_matrix_phase3_so.pyx b/src/sisl/physics/_matrix_phase3_so.pyx deleted file mode 100644 index 2fba15af46..0000000000 --- a/src/sisl/physics/_matrix_phase3_so.pyx +++ /dev/null @@ -1,438 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True -cimport cython - -import numpy as np -cimport numpy as np -from scipy.sparse import csr_matrix - -from sisl._indices cimport _index_sorted -from sisl._core._sparse import fold_csr_matrix_nc - -__all__ = ["_phase3_so_csr_c64", "_phase3_so_csr_c128", - "_phase3_so_array_c64", "_phase3_so_array_c128"] - -# The fused data-types forces the data input to be of "correct" values. -ctypedef fused numeric_complex: - float - double - float complex - double complex - - -def _phase3_so_csr_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex64_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[:, ::1] phases = PHASES - # Local columns (not in NC form) - cdef Py_ssize_t nr = ncol.shape[0] - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] Vx = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] Vy = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] Vz = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef float complex[::1] vx = Vx - cdef float complex[::1] vy = Vy - cdef float complex[::1] vz = Vz - cdef float complex ph, vv - cdef Py_ssize_t r, rr, ind, s, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - ph = phases[ind, 0] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vx[v_ptr[rr] + s_idx] = vx[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vx[v_ptr[rr] + s_idx+1] = vx[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vx[v_ptr[rr+1] + s_idx] = vx[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vx[v_ptr[rr+1] + s_idx+1] = vx[v_ptr[rr+1] + s_idx+1] + ph * vv - - ph = phases[ind, 1] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vy[v_ptr[rr] + s_idx] = vy[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vy[v_ptr[rr] + s_idx+1] = vy[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vy[v_ptr[rr+1] + s_idx] = vy[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vy[v_ptr[rr+1] + s_idx+1] = vy[v_ptr[rr+1] + s_idx+1] + ph * vv - - ph = phases[ind, 2] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vz[v_ptr[rr] + s_idx] = vz[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vz[v_ptr[rr] + s_idx+1] = vz[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vz[v_ptr[rr+1] + s_idx] = vz[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vz[v_ptr[rr+1] + s_idx+1] = vz[v_ptr[rr+1] + s_idx+1] + ph * vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s = col[ind] / nr - - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - ph = phases[s, 0] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vx[v_ptr[rr] + s_idx] = vx[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vx[v_ptr[rr] + s_idx+1] = vx[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vx[v_ptr[rr+1] + s_idx] = vx[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vx[v_ptr[rr+1] + s_idx+1] = vx[v_ptr[rr+1] + s_idx+1] + ph * vv - - ph = phases[s, 1] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vy[v_ptr[rr] + s_idx] = vy[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vy[v_ptr[rr] + s_idx+1] = vy[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vy[v_ptr[rr+1] + s_idx] = vy[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vy[v_ptr[rr+1] + s_idx+1] = vy[v_ptr[rr+1] + s_idx+1] + ph * vv - - ph = phases[s, 2] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vz[v_ptr[rr] + s_idx] = vz[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vz[v_ptr[rr] + s_idx+1] = vz[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vz[v_ptr[rr+1] + s_idx] = vz[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vz[v_ptr[rr+1] + s_idx+1] = vz[v_ptr[rr+1] + s_idx+1] + ph * vv - - nr = nr * 2 - return csr_matrix((Vx, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vy, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vz, V_COL, V_PTR), shape=(nr, nr)) - - -def _phase3_so_csr_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex128_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[:, ::1] phases = PHASES - # Local columns (not in NC form) - cdef Py_ssize_t nr = ncol.shape[0] - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] Vx = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] Vy = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] Vz = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef double complex[::1] vx = Vx - cdef double complex[::1] vy = Vy - cdef double complex[::1] vz = Vz - cdef double complex ph, vv - cdef Py_ssize_t r, rr, ind, s, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - ph = phases[ind, 0] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vx[v_ptr[rr] + s_idx] = vx[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vx[v_ptr[rr] + s_idx+1] = vx[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vx[v_ptr[rr+1] + s_idx] = vx[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vx[v_ptr[rr+1] + s_idx+1] = vx[v_ptr[rr+1] + s_idx+1] + ph * vv - - ph = phases[ind, 1] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vy[v_ptr[rr] + s_idx] = vy[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vy[v_ptr[rr] + s_idx+1] = vy[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vy[v_ptr[rr+1] + s_idx] = vy[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vy[v_ptr[rr+1] + s_idx+1] = vy[v_ptr[rr+1] + s_idx+1] + ph * vv - - ph = phases[ind, 2] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vz[v_ptr[rr] + s_idx] = vz[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vz[v_ptr[rr] + s_idx+1] = vz[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vz[v_ptr[rr+1] + s_idx] = vz[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vz[v_ptr[rr+1] + s_idx+1] = vz[v_ptr[rr+1] + s_idx+1] + ph * vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s = col[ind] / nr - - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - ph = phases[s, 0] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vx[v_ptr[rr] + s_idx] = vx[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vx[v_ptr[rr] + s_idx+1] = vx[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vx[v_ptr[rr+1] + s_idx] = vx[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vx[v_ptr[rr+1] + s_idx+1] = vx[v_ptr[rr+1] + s_idx+1] + ph * vv - - ph = phases[s, 1] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vy[v_ptr[rr] + s_idx] = vy[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vy[v_ptr[rr] + s_idx+1] = vy[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vy[v_ptr[rr+1] + s_idx] = vy[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vy[v_ptr[rr+1] + s_idx+1] = vy[v_ptr[rr+1] + s_idx+1] + ph * vv - - ph = phases[s, 2] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vz[v_ptr[rr] + s_idx] = vz[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vz[v_ptr[rr] + s_idx+1] = vz[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vz[v_ptr[rr+1] + s_idx] = vz[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vz[v_ptr[rr+1] + s_idx+1] = vz[v_ptr[rr+1] + s_idx+1] + ph * vv - - nr = nr * 2 - return csr_matrix((Vx, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vy, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vz, V_COL, V_PTR), shape=(nr, nr)) - - -def _phase3_so_array_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex64_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[:, ::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] Vx = np.zeros([nr * 2, nr * 2], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] Vy = np.zeros([nr * 2, nr * 2], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] Vz = np.zeros([nr * 2, nr * 2], dtype=np.complex64) - cdef float complex[:, ::1] vx = Vx - cdef float complex[:, ::1] vy = Vy - cdef float complex[:, ::1] vz = Vz - cdef float complex ph, v12 - cdef Py_ssize_t r, rr, ind, s, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - - ph = phases[ind, 0] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vx[rr, c] = vx[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vx[rr, c+1] = vx[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vx[rr+1, c] = vx[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vx[rr+1, c+1] = vx[rr+1, c+1] + ph * vv - - ph = phases[ind, 1] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vy[rr, c] = vy[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vy[rr, c+1] = vy[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vy[rr+1, c] = vy[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vy[rr+1, c+1] = vy[rr+1, c+1] + ph * vv - - ph = phases[ind, 2] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vz[rr, c] = vz[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vz[rr, c+1] = vz[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vz[rr+1, c] = vz[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vz[rr+1, c+1] = vz[rr+1, c+1] + ph * vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s = col[ind] / nr - - ph = phases[s, 0] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vx[rr, c] = vx[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vx[rr, c+1] = vx[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vx[rr+1, c] = vx[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vx[rr+1, c+1] = vx[rr+1, c+1] + ph * vv - - ph = phases[s, 1] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vy[rr, c] = vy[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vy[rr, c+1] = vy[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vy[rr+1, c] = vy[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vy[rr+1, c+1] = vy[rr+1, c+1] + ph * vv - - ph = phases[s, 2] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vz[rr, c] = vz[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vz[rr, c+1] = vz[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vz[rr+1, c] = vz[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vz[rr+1, c+1] = vz[rr+1, c+1] + ph * vv - - return Vx, Vy, Vz - - -def _phase3_so_array_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex128_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[:, ::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] Vx = np.zeros([nr * 2, nr * 2], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] Vy = np.zeros([nr * 2, nr * 2], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] Vz = np.zeros([nr * 2, nr * 2], dtype=np.complex128) - cdef double complex[:, ::1] vx = Vx - cdef double complex[:, ::1] vy = Vy - cdef double complex[:, ::1] vz = Vz - cdef double complex ph, vv - cdef Py_ssize_t r, rr, ind, s, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - - ph = phases[ind, 0] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vx[rr, c] = vx[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vx[rr, c+1] = vx[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vx[rr+1, c] = vx[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vx[rr+1, c+1] = vx[rr+1, c+1] + ph * vv - - ph = phases[ind, 1] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vy[rr, c] = vy[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vy[rr, c+1] = vy[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vy[rr+1, c] = vy[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vy[rr+1, c+1] = vy[rr+1, c+1] + ph * vv - - ph = phases[ind, 2] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vz[rr, c] = vz[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vz[rr, c+1] = vz[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vz[rr+1, c] = vz[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vz[rr+1, c+1] = vz[rr+1, c+1] + ph * vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s = col[ind] / nr - - ph = phases[s, 0] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vx[rr, c] = vx[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vx[rr, c+1] = vx[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vx[rr+1, c] = vx[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vx[rr+1, c+1] = vx[rr+1, c+1] + ph * vv - - ph = phases[s, 1] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vy[rr, c] = vy[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vy[rr, c+1] = vy[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vy[rr+1, c] = vy[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vy[rr+1, c+1] = vy[rr+1, c+1] + ph * vv - - ph = phases[s, 2] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vz[rr, c] = vz[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vz[rr, c+1] = vz[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vz[rr+1, c] = vz[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vz[rr+1, c+1] = vz[rr+1, c+1] + ph * vv - - return Vx, Vy, Vz diff --git a/src/sisl/physics/_matrix_phase_nc.pyx b/src/sisl/physics/_matrix_phase_nc.pyx deleted file mode 100644 index 5b10ae6cf2..0000000000 --- a/src/sisl/physics/_matrix_phase_nc.pyx +++ /dev/null @@ -1,224 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True -cimport cython - -import numpy as np -cimport numpy as np -from scipy.sparse import csr_matrix - -from sisl._indices cimport _index_sorted -from sisl._core._sparse import fold_csr_matrix_nc - -__all__ = ['_phase_nc_csr_c64', '_phase_nc_csr_c128', - '_phase_nc_array_c64', '_phase_nc_array_c128'] - -# The fused data-types forces the data input to be of "correct" values. -ctypedef fused numeric_complex: - float - double - float complex - double complex - - -def _phase_nc_csr_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef float complex[::1] v = V - cdef float complex ph, v12 - cdef Py_ssize_t r, rr, ind, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[ind] - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - v[v_ptr[rr] + s_idx] += D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + s_idx+1] += v12 * ph - v[v_ptr[rr+1] + s_idx] += v12.conjugate() * ph - v[v_ptr[rr+1] + s_idx+1] += D[ind, 1] * ph - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[col[ind] / nr] - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - v[v_ptr[rr] + s_idx] += D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + s_idx+1] += v12 * ph - v[v_ptr[rr+1] + s_idx] += v12.conjugate() * ph - v[v_ptr[rr+1] + s_idx+1] += D[ind, 1] * ph - - return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nr * 2)) - - -def _phase_nc_csr_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef double complex[::1] v = V - cdef double complex ph, v12 - cdef Py_ssize_t r, rr, ind, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[ind] - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - v[v_ptr[rr] + s_idx] += D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + s_idx+1] += v12 * ph - v[v_ptr[rr+1] + s_idx] += v12.conjugate() * ph - v[v_ptr[rr+1] + s_idx+1] += D[ind, 1] * ph - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[col[ind] / nr] - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - v[v_ptr[rr] + s_idx] += D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + s_idx+1] += v12 * ph - v[v_ptr[rr+1] + s_idx] += v12.conjugate() * ph - v[v_ptr[rr+1] + s_idx+1] += D[ind, 1] * ph - - return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nr * 2)) - - -def _phase_nc_array_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nr * 2], dtype=np.complex64) - cdef float complex[:, ::1] v = V - cdef float complex ph, v12 - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[ind] - v[rr, c] += D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] += v12 * ph - v[rr+1, c] += v12.conjugate() * ph - v[rr+1, c+1] += D[ind, 1] * ph - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[col[ind] / nr] - v[rr, c] += D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] += v12 * ph - v[rr+1, c] += v12.conjugate() * ph - v[rr+1, c+1] += D[ind, 1] * ph - - return V - - -def _phase_nc_array_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nr * 2], dtype=np.complex128) - cdef double complex[:, ::1] v = V - cdef double complex ph, v12 - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[ind] - v[rr, c] += D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] += v12 * ph - v[rr+1, c] += v12.conjugate() * ph - v[rr+1, c+1] += D[ind, 1] * ph - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[col[ind] / nr] - v[rr, c] += D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] += v12 * ph - v[rr+1, c] += v12.conjugate() * ph - v[rr+1, c+1] += D[ind, 1] * ph - - return V diff --git a/src/sisl/physics/_matrix_phase_nc_diag.pyx b/src/sisl/physics/_matrix_phase_nc_diag.pyx deleted file mode 100644 index 8a590f79c6..0000000000 --- a/src/sisl/physics/_matrix_phase_nc_diag.pyx +++ /dev/null @@ -1,198 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True -cimport cython - -import numpy as np -cimport numpy as np -from scipy.sparse import csr_matrix - -from sisl._indices cimport _index_sorted -from sisl._core._sparse import fold_csr_diagonal_nc - -__all__ = ['_phase_nc_diag_csr_c64', '_phase_nc_diag_csr_c128', - '_phase_nc_diag_array_c64', '_phase_nc_diag_array_c128'] - -# The fused data-types forces the data input to be of "correct" values. -ctypedef fused numeric_complex: - float - double - float complex - double complex - - -def _phase_nc_diag_csr_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_diagonal_nc(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef float complex[::1] v = V - cdef float complex vv - cdef Py_ssize_t r, rr, ind, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - vv = (phases[ind] * D[ind, idx]) - v[v_ptr[rr] + s_idx] = v[v_ptr[rr] + s_idx] + vv - v[v_ptr[rr+1] + s_idx] = v[v_ptr[rr+1] + s_idx] + vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - vv = (phases[col[ind] / nr] * D[ind, idx]) - v[v_ptr[rr] + s_idx] = v[v_ptr[rr] + s_idx] + vv - v[v_ptr[rr+1] + s_idx] = v[v_ptr[rr+1] + s_idx] + vv - - nr = nr * 2 - return csr_matrix((V, V_COL, V_PTR), shape=(nr, nr)) - - -def _phase_nc_diag_csr_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_diagonal_nc(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef double complex[::1] v = V - cdef double complex vv - cdef Py_ssize_t r, rr, ind, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - vv = (phases[ind] * D[ind, idx]) - v[v_ptr[rr] + s_idx] = v[v_ptr[rr] + s_idx] + vv - v[v_ptr[rr+1] + s_idx] = v[v_ptr[rr+1] + s_idx] + vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - vv = (phases[col[ind] / nr] * D[ind, idx]) - v[v_ptr[rr] + s_idx] = v[v_ptr[rr] + s_idx] + vv - v[v_ptr[rr+1] + s_idx] = v[v_ptr[rr+1] + s_idx] + vv - - nr = nr * 2 - return csr_matrix((V, V_COL, V_PTR), shape=(nr, nr)) - - -def _phase_nc_diag_array_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nr * 2], dtype=np.complex64) - cdef float complex[:, ::1] v = V - cdef float complex vv - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - vv = (phases[ind] * D[ind, idx]) - v[rr, c] = v[rr, c] + vv - v[rr+1, c+1] = v[rr+1, c+1] + vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - vv = (phases[col[ind] / nr] * D[ind, idx]) - v[rr, c] = v[rr, c] + vv - v[rr+1, c+1] = v[rr+1, c+1] + vv - - return V - - -def _phase_nc_diag_array_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nr * 2], dtype=np.complex128) - cdef double complex[:, ::1] v = V - cdef double complex vv - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - vv = (phases[ind] * D[ind, idx]) - v[rr, c] = v[rr, c] + vv - v[rr+1, c+1] = v[rr+1, c+1] + vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - vv = (phases[col[ind] / nr] * D[ind, idx]) - v[rr, c] = v[rr, c] + vv - v[rr+1, c+1] = v[rr+1, c+1] + vv - - return V diff --git a/src/sisl/physics/_matrix_phase_sc.pyx b/src/sisl/physics/_matrix_phase_sc.pyx new file mode 100644 index 0000000000..02c683183e --- /dev/null +++ b/src/sisl/physics/_matrix_phase_sc.pyx @@ -0,0 +1,617 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +cimport cython + +import numpy as np + +cimport numpy as cnp + +from scipy.sparse import csr_matrix + +from sisl._core._dtypes cimport ( + complexs_st, + floatcomplexs_st, + floats_st, + inline_sum, + ints_st, + numerics_st, + ssize_st, + type2dtype, +) +from sisl._core._sparse cimport ncol2ptr_nc +from sisl._indices cimport _index_sorted + +__all__ = [ + "_phase_sc_csr", + "_phase_sc_array", + "_phase_sc_csr_nc", + "_phase_sc_array_nc", + "_phase_sc_csr_nc_diag", + "_phase_sc_array_nc_diag", + "_phase_sc_csr_so", + "_phase_sc_array_so", +] + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_sc_csr(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + const ints_st nc, + numerics_st[:, ::1] D, + const int idx, + floatcomplexs_st[::1] phases, + const int p_opt): + + # Now copy the sparse matrix form + cdef ints_st nr = ncol.shape[0] + cdef object idtype = type2dtype[ints_st](1) + cdef cnp.ndarray[ints_st, mode='c'] V_PTR = np.empty([nr + 1], dtype=idtype) + cdef cnp.ndarray[ints_st, mode='c'] V_NCOL = np.empty([nr], dtype=idtype) + cdef cnp.ndarray[ints_st, mode='c'] V_COL = np.empty([inline_sum(ncol)], dtype=idtype) + + cdef ints_st[::1] v_ptr = V_PTR + cdef ints_st[::1] v_ncol = V_NCOL + cdef ints_st[::1] v_col = V_COL + + cdef object dtype = type2dtype[floatcomplexs_st](1) + cdef cnp.ndarray[floatcomplexs_st, mode='c'] V = np.zeros([v_col.shape[0]], dtype=dtype) + cdef floatcomplexs_st[::1] v = V + + cdef ints_st r, c, nz, ind, cind + cdef floatcomplexs_st ph + + # Copy ncol + v_ncol[:] = ncol[:] + + # This abstraction allows to handle non-finalized CSR matrices + cind = 0 + + with nogil: + if p_opt == -1: + for r in range(nr): + v_ptr[r] = cind + for ind in range(ptr[r], ptr[r] + ncol[r]): + v[cind] = D[ind, idx] + v_col[cind] = col[ind] + cind = cind + 1 + + elif p_opt == 0: + for r in range(nr): + v_ptr[r] = cind + for ind in range(ptr[r], ptr[r] + ncol[r]): + ph = phases[ind] + v[cind] = (D[ind, idx] * ph) + v_col[cind] = col[ind] + cind = cind + 1 + + else: + for r in range(nr): + v_ptr[r] = cind + for ind in range(ptr[r], ptr[r] + ncol[r]): + ph = phases[col[ind] / nr] + v[cind] = (D[ind, idx] * ph) + v_col[cind] = col[ind] + cind = cind + 1 + + v_ptr[nr] = cind + + return csr_matrix((V, V_COL, V_PTR), shape=(nr, nc)) + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_sc_array(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + const ints_st nc, + numerics_st[:, ::1] D, + const int idx, + floatcomplexs_st[::1] phases, + const int p_opt): + + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[floatcomplexs_st](1) + cdef cnp.ndarray[floatcomplexs_st, ndim=2, mode='c'] V = np.zeros([nr, nc], dtype=dtype) + cdef floatcomplexs_st[:, ::1] v = V + + cdef ints_st r, c, nz, ind + cdef floatcomplexs_st ph + + with nogil: + if p_opt == -1: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + v[r, col[ind]] = D[ind, idx] + + elif p_opt == 0: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + ph = phases[ind] + v[r, col[ind]] = (D[ind, idx] * ph) + + else: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + ph = phases[col[ind] / nr] + v[r, col[ind]] = (D[ind, idx] * ph) + + return V + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_sc_csr_nc(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + const ints_st nc, + numerics_st[:, ::1] D, + complexs_st[::1] phases, + const int p_opt): + + # Now copy the sparse matrix form + cdef ints_st nr = ncol.shape[0] + cdef object idtype = type2dtype[ints_st](1) + cdef cnp.ndarray[ints_st, mode='c'] V_PTR = np.empty([nr*2 + 1], dtype=idtype) + cdef cnp.ndarray[ints_st, mode='c'] V_NCOL = np.empty([nr*2], dtype=idtype) + cdef cnp.ndarray[ints_st, mode='c'] V_COL = np.empty([inline_sum(ncol)*4], dtype=idtype) + + cdef ints_st[::1] v_ptr = V_PTR + cdef ints_st[::1] v_ncol = V_NCOL + cdef ints_st[::1] v_col = V_COL + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, mode='c'] V = np.zeros([v_col.shape[0]], dtype=dtype) + cdef complexs_st[::1] v = V + + cdef ints_st r, rr, cind, c, nz, ind + cdef complexs_st ph + + # We have to do it manually due to the double elements per matrix element + ncol2ptr_nc(nr, ncol, v_ptr, 2) + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + v_ncol[rr] = ncol[r] * 2 + v_ncol[rr+1] = ncol[r] * 2 + + cind = 0 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + + v[v_ptr[rr] + cind] = D[ind, 0] + v_col[v_ptr[rr] + cind] = c + ph = (D[ind, 2] + 1j * D[ind, 3]) + v[v_ptr[rr] + cind+1] = ph + v_col[v_ptr[rr] + cind+1] = c + 1 + v[v_ptr[rr+1] + cind] = ph.conjugate() + v_col[v_ptr[rr+1] + cind] = c + v[v_ptr[rr+1] + cind+1] = D[ind, 1] + v_col[v_ptr[rr+1] + cind+1] = c + 1 + + cind = cind + 2 + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + v_ncol[rr] = ncol[r] * 2 + v_ncol[rr+1] = ncol[r] * 2 + + cind = 0 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + ph = phases[ind] + + v[v_ptr[rr] + cind] = (D[ind, 0] * ph) + v_col[v_ptr[rr] + cind] = c + v[v_ptr[rr] + cind+1] = ((D[ind, 2] + 1j * D[ind, 3]) * ph) + v_col[v_ptr[rr] + cind+1] = c + 1 + v[v_ptr[rr+1] + cind] = ((D[ind, 2] + 1j * D[ind, 3]).conjugate() * ph) + v_col[v_ptr[rr+1] + cind] = c + v[v_ptr[rr+1] + cind+1] = (D[ind, 1] * ph) + v_col[v_ptr[rr+1] + cind+1] = c + 1 + + cind = cind + 2 + + else: + for r in range(nr): + rr = r * 2 + v_ncol[rr] = ncol[r] * 2 + v_ncol[rr+1] = ncol[r] * 2 + + cind = 0 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + ph = phases[col[ind] / nr] + + v[v_ptr[rr] + cind] = (D[ind, 0] * ph) + v_col[v_ptr[rr] + cind] = c + v[v_ptr[rr] + cind+1] = ((D[ind, 2] + 1j * D[ind, 3]) * ph) + v_col[v_ptr[rr] + cind+1] = c + 1 + v[v_ptr[rr+1] + cind] = ((D[ind, 2] + 1j * D[ind, 3]).conjugate() * ph) + v_col[v_ptr[rr+1] + cind] = c + v[v_ptr[rr+1] + cind+1] = (D[ind, 1] * ph) + v_col[v_ptr[rr+1] + cind+1] = c + 1 + + cind = cind + 2 + + return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nc * 2)) + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_sc_array_nc(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + const ints_st nc, + numerics_st[:, ::1] D, + complexs_st[::1] phases, + const int p_opt): + + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] V = np.zeros([nr*2, nc*2], dtype=dtype) + cdef complexs_st[:, ::1] v = V + + cdef complexs_st ph + cdef ints_st r, rr, c, nz, ind + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + v[rr, c] = D[ind, 0] + ph = (D[ind, 2] + 1j * D[ind, 3]) + v[rr, c+1] = ph + v[rr+1, c] = ph.conjugate() + v[rr+1, c+1] = D[ind, 1] + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + ph = phases[ind] + + v[rr, c] = (D[ind, 0] * ph) + v[rr, c+1] = ((D[ind, 2] + 1j * D[ind, 3]) * ph) + v[rr+1, c] = ((D[ind, 2] + 1j * D[ind, 3]).conjugate() * ph) + v[rr+1, c+1] = (D[ind, 1] * ph) + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + ph = phases[col[ind] / nr] + + v[rr, c] = (D[ind, 0] * ph) + v[rr, c+1] = ((D[ind, 2] + 1j * D[ind, 3]) * ph) + v[rr+1, c] = ((D[ind, 2] + 1j * D[ind, 3]).conjugate() * ph) + v[rr+1, c+1] = (D[ind, 1] * ph) + + return V + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_sc_csr_nc_diag(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + const ints_st nc, + numerics_st[:, ::1] D, + const int idx, + complexs_st[::1] phases, + const int p_opt): + + # Now copy the sparse matrix form + cdef ints_st nr = ncol.shape[0] + cdef object idtype = type2dtype[ints_st](1) + cdef cnp.ndarray[ints_st, mode='c'] V_PTR = np.empty([nr*2 + 1], dtype=idtype) + cdef cnp.ndarray[ints_st, mode='c'] V_NCOL = np.empty([nr*2], dtype=idtype) + cdef cnp.ndarray[ints_st, mode='c'] V_COL = np.empty([inline_sum(ncol)*2], dtype=idtype) + + cdef ints_st[::1] v_ptr = V_PTR + cdef ints_st[::1] v_ncol = V_NCOL + cdef ints_st[::1] v_col = V_COL + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, mode='c'] V = np.zeros([v_col.shape[0]], dtype=dtype) + cdef complexs_st[::1] v = V + + cdef ints_st r, rr, cind, c, nz, ind + cdef complexs_st ph + + # We have to do it manually due to the double elements per matrix element + ncol2ptr_nc(nr, ncol, v_ptr, 1) + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + v_ncol[rr] = ncol[r] + v_ncol[rr+1] = ncol[r] + + cind = 0 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + + v[v_ptr[rr] + cind] = D[ind, idx] + v_col[v_ptr[rr] + cind] = c + v[v_ptr[rr+1] + cind] = D[ind, idx] + v_col[v_ptr[rr+1] + cind] = c + 1 + + cind = cind + 1 + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + v_ncol[rr] = ncol[r] * 2 + v_ncol[rr+1] = ncol[r] * 2 + + cind = 0 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + ph = phases[ind] + + v[v_ptr[rr] + cind] = (D[ind, idx] * ph) + v_col[v_ptr[rr] + cind] = c + v[v_ptr[rr+1] + cind] = (D[ind, idx] * ph) + v_col[v_ptr[rr+1] + cind] = c + 1 + + cind = cind + 1 + + else: + for r in range(nr): + rr = r * 2 + v_ncol[rr] = ncol[r] * 2 + v_ncol[rr+1] = ncol[r] * 2 + + cind = 0 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + ph = phases[col[ind] / nr] + + + v[v_ptr[rr] + cind] = (D[ind, idx] * ph) + v_col[v_ptr[rr] + cind] = c + v[v_ptr[rr+1] + cind] = (D[ind, idx] * ph) + v_col[v_ptr[rr+1] + cind] = c + 1 + + cind = cind + 1 + + return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nc * 2)) + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_sc_array_nc_diag(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + const ints_st nc, + numerics_st[:, ::1] D, + const int idx, + complexs_st[::1] phases, + const int p_opt): + + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] V = np.zeros([nr*2, nc*2], dtype=dtype) + cdef complexs_st[:, ::1] v = V + + cdef complexs_st d + cdef ints_st r, rr, c, nz, ind + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + d = D[ind, idx] + v[rr, c] = d + v[rr+1, c+1] = d + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + d = (D[ind, idx] * phases[ind]) + + v[rr, c] = d + v[rr+1, c+1] = d + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + d = (D[ind, idx] * phases[col[ind] / nr]) + + v[rr, c] = d + v[rr+1, c+1] = d + + return V + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_sc_csr_so(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + const ints_st nc, + # complexs_st requires only 4 indices... + floats_st[:, ::1] D, + complexs_st[::1] phases, + const int p_opt): + + # Now copy the sparse matrix form + cdef ints_st nr = ncol.shape[0] + cdef object idtype = type2dtype[ints_st](1) + cdef cnp.ndarray[ints_st, mode='c'] V_PTR = np.empty([nr*2 + 1], dtype=idtype) + cdef cnp.ndarray[ints_st, mode='c'] V_NCOL = np.empty([nr*2], dtype=idtype) + cdef cnp.ndarray[ints_st, mode='c'] V_COL = np.empty([inline_sum(ncol)*4], dtype=idtype) + + cdef ints_st[::1] v_ptr = V_PTR + cdef ints_st[::1] v_ncol = V_NCOL + cdef ints_st[::1] v_col = V_COL + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, mode='c'] V = np.zeros([v_col.shape[0]], dtype=dtype) + cdef complexs_st[::1] v = V + + cdef ints_st r, rr, cind, c, nz, ind + cdef complexs_st ph + + # We have to do it manually due to the double elements per matrix element + ncol2ptr_nc(nr, ncol, v_ptr, 2) + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + v_ncol[rr] = ncol[r] * 2 + v_ncol[rr+1] = ncol[r] * 2 + + cind = 0 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + + v[v_ptr[rr] + cind] = (D[ind, 0] + 1j * D[ind, 4]) + v_col[v_ptr[rr] + cind] = c + v[v_ptr[rr] + cind+1] = (D[ind, 2] + 1j * D[ind, 3]) + v_col[v_ptr[rr] + cind+1] = c + 1 + v[v_ptr[rr+1] + cind] = (D[ind, 6] + 1j * D[ind, 7]) + v_col[v_ptr[rr+1] + cind] = c + v[v_ptr[rr+1] + cind+1] = (D[ind, 1] + 1j * D[ind, 5]) + v_col[v_ptr[rr+1] + cind+1] = c + 1 + + cind = cind + 2 + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + v_ncol[rr] = ncol[r] * 2 + v_ncol[rr+1] = ncol[r] * 2 + + cind = 0 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + ph = phases[ind] + + v[v_ptr[rr] + cind] = ((D[ind, 0] + 1j * D[ind, 4]) * ph) + v_col[v_ptr[rr] + cind] = c + v[v_ptr[rr] + cind+1] = ((D[ind, 2] + 1j * D[ind, 3]) * ph) + v_col[v_ptr[rr] + cind+1] = c + 1 + v[v_ptr[rr+1] + cind] = ((D[ind, 6] + 1j * D[ind, 7]) * ph) + v_col[v_ptr[rr+1] + cind] = c + v[v_ptr[rr+1] + cind+1] = ((D[ind, 1] + 1j * D[ind, 5]) * ph) + v_col[v_ptr[rr+1] + cind+1] = c + 1 + + cind = cind + 2 + + else: + for r in range(nr): + rr = r * 2 + v_ncol[rr] = ncol[r] * 2 + v_ncol[rr+1] = ncol[r] * 2 + + cind = 0 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + ph = phases[col[ind] / nr] + + v[v_ptr[rr] + cind] = ((D[ind, 0] + 1j * D[ind, 4]) * ph) + v_col[v_ptr[rr] + cind] = c + v[v_ptr[rr] + cind+1] = ((D[ind, 2] + 1j * D[ind, 3]) * ph) + v_col[v_ptr[rr] + cind+1] = c + 1 + v[v_ptr[rr+1] + cind] = ((D[ind, 6] + 1j * D[ind, 7]) * ph) + v_col[v_ptr[rr+1] + cind] = c + v[v_ptr[rr+1] + cind+1] = ((D[ind, 1] + 1j * D[ind, 5]) * ph) + v_col[v_ptr[rr+1] + cind+1] = c + 1 + + cind = cind + 2 + + return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nc * 2)) + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_sc_array_so(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + const ints_st nc, + # complexs_st requires only 4 indices... + floats_st[:, ::1] D, + complexs_st[::1] phases, + const int p_opt): + + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] V = np.zeros([nr*2, nc*2], dtype=dtype) + cdef complexs_st[:, ::1] v = V + + cdef complexs_st ph + cdef ints_st r, rr, c, nz, ind + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + + v[rr, c] = (D[ind, 0] + 1j * D[ind, 4]) + v[rr, c+1] = (D[ind, 2] + 1j * D[ind, 3]) + v[rr+1, c] = (D[ind, 6] + 1j * D[ind, 7]) + v[rr+1, c+1] = (D[ind, 1] + 1j * D[ind, 5]) + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + ph = phases[ind] + + v[rr, c] = ((D[ind, 0] + 1j * D[ind, 4]) * ph) + v[rr, c+1] = ((D[ind, 2] + 1j * D[ind, 3]) * ph) + v[rr+1, c] = ((D[ind, 6] + 1j * D[ind, 7]) * ph) + v[rr+1, c+1] = ((D[ind, 1] + 1j * D[ind, 5]) * ph) + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + ph = phases[col[ind] / nr] + + v[rr, c] = ((D[ind, 0] + 1j * D[ind, 4]) * ph) + v[rr, c+1] = ((D[ind, 2] + 1j * D[ind, 3]) * ph) + v[rr+1, c] = ((D[ind, 6] + 1j * D[ind, 7]) * ph) + v[rr+1, c+1] = ((D[ind, 1] + 1j * D[ind, 5]) * ph) + + return V diff --git a/src/sisl/physics/_matrix_phase_so.pyx b/src/sisl/physics/_matrix_phase_so.pyx deleted file mode 100644 index ea2b6b1572..0000000000 --- a/src/sisl/physics/_matrix_phase_so.pyx +++ /dev/null @@ -1,248 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True -cimport cython - -import numpy as np -cimport numpy as np -from scipy.sparse import csr_matrix - -from sisl._indices cimport _index_sorted -from sisl._core._sparse import fold_csr_matrix_nc - -__all__ = ['_phase_so_csr_c64', '_phase_so_csr_c128', - '_phase_so_array_c64', '_phase_so_array_c128'] - -# The fused data-types forces the data input to be of "correct" values. -ctypedef fused numeric_complex: - float - double - float complex - double complex - - -def _phase_so_csr_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef float complex[::1] v = V - cdef float complex ph, vv - cdef Py_ssize_t r, rr, ind, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[ind] - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[v_ptr[rr] + s_idx] = v[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + s_idx+1] = v[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[v_ptr[rr+1] + s_idx] = v[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[v_ptr[rr+1] + s_idx+1] = v[v_ptr[rr+1] + s_idx+1] + ph * vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[col[ind] / nr] - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[v_ptr[rr] + s_idx] = v[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + s_idx+1] = v[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[v_ptr[rr+1] + s_idx] = v[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[v_ptr[rr+1] + s_idx+1] = v[v_ptr[rr+1] + s_idx+1] + ph * vv - - return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nr * 2)) - - -def _phase_so_csr_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef double complex[::1] v = V - cdef double complex ph, vv - cdef Py_ssize_t r, rr, ind, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[ind] - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[v_ptr[rr] + s_idx] = v[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + s_idx+1] = v[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[v_ptr[rr+1] + s_idx] = v[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[v_ptr[rr+1] + s_idx+1] = v[v_ptr[rr+1] + s_idx+1] + ph * vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[col[ind] / nr] - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[v_ptr[rr] + s_idx] = v[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + s_idx+1] = v[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[v_ptr[rr+1] + s_idx] = v[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[v_ptr[rr+1] + s_idx+1] = v[v_ptr[rr+1] + s_idx+1] + ph * vv - - return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nr * 2)) - - -def _phase_so_array_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nr * 2], dtype=np.complex64) - cdef float complex[:, ::1] v = V - cdef float complex ph, vv - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[ind] - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[rr, c] = v[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = v[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[rr+1, c] = v[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[rr+1, c+1] = v[rr+1, c+1] + ph * vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[col[ind] / nr] - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[rr, c] = v[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = v[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[rr+1, c] = v[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[rr+1, c+1] = v[rr+1, c+1] + ph * vv - - return V - - -def _phase_so_array_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nr * 2], dtype=np.complex128) - cdef double complex[:, ::1] v = V - cdef double complex ph, vv - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[ind] - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[rr, c] = v[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = v[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[rr+1, c] = v[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[rr+1, c+1] = v[rr+1, c+1] + ph * vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[col[ind] / nr] - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[rr, c] = v[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = v[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[rr+1, c] = v[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[rr+1, c+1] = v[rr+1, c+1] + ph * vv - - return V diff --git a/src/sisl/physics/_matrix_sc_phase.pyx b/src/sisl/physics/_matrix_sc_phase.pyx deleted file mode 100644 index 12a7f17aff..0000000000 --- a/src/sisl/physics/_matrix_sc_phase.pyx +++ /dev/null @@ -1,185 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True -cimport cython - -import numpy as np -cimport numpy as np -from scipy.sparse import csr_matrix - -from sisl._core._sparse cimport inline_sum - -__all__ = ['_sc_phase_csr_c64', '_sc_phase_csr_c128', - '_sc_phase_array_c64', '_sc_phase_array_c128'] - -# The fused data-types forces the data input to be of "correct" values. -ctypedef fused numeric_real: - float - double - -ctypedef fused numeric_complex: - float - double - float complex - double complex - - -def _sc_phase_csr_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, const int idx, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] ph = PHASES - - # Now copy the sparse matrix form - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_PTR = np.empty([nr + 1], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_NCOL = np.empty([nr], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_COL = np.empty([inline_sum(ncol)], dtype=np.int32) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef float complex[::1] v = V - cdef Py_ssize_t r, ind, cind - - # Copy ncol - v_ncol[:] = ncol[:] - - cind = 0 - if p_opt == 0: - for r in range(nr): - v_ptr[r] = cind - for ind in range(ptr[r], ptr[r] + ncol[r]): - v[cind] = D[ind, idx] * ph[ind] - v_col[cind] = col[ind] - cind = cind + 1 - else: - for r in range(nr): - v_ptr[r] = cind - for ind in range(ptr[r], ptr[r] + ncol[r]): - v[cind] = D[ind, idx] * ph[col[ind] / nr] - v_col[cind] = col[ind] - cind = cind + 1 - v_ptr[nr] = cind - - return csr_matrix((V, V_COL, V_PTR), shape=(nr, nc)) - - -def _sc_phase_csr_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, const int idx, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] ph = PHASES - - # Now copy the sparse matrix form - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_PTR = np.empty([nr + 1], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_NCOL = np.empty([nr], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_COL = np.empty([inline_sum(ncol)], dtype=np.int32) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef double complex[::1] v = V - cdef Py_ssize_t r, ind, cind - - # Copy ncol - v_ncol[:] = ncol[:] - - cind = 0 - if p_opt == 0: - for r in range(nr): - v_ptr[r] = cind - for ind in range(ptr[r], ptr[r] + ncol[r]): - v[cind] = D[ind, idx] * ph[ind] - v_col[cind] = col[ind] - cind = cind + 1 - else: - for r in range(nr): - v_ptr[r] = cind - for ind in range(ptr[r], ptr[r] + ncol[r]): - v[cind] = D[ind, idx] * ph[col[ind] / nr] - v_col[cind] = col[ind] - cind = cind + 1 - v_ptr[nr] = cind - - return csr_matrix((V, V_COL, V_PTR), shape=(nr, nc)) - - -def _sc_phase_array_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, const int idx, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] ph = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] V = np.zeros([nr, nc], dtype=np.complex64) - cdef float complex[:, ::1] v = V - cdef Py_ssize_t r, ind - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - v[r, col[ind]] = D[ind, idx] * ph[ind] - - else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - v[r, col[ind]] = D[ind, idx] * ph[col[ind] / nr] - - return V - - -def _sc_phase_array_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, const int idx, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] ph = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] V = np.zeros([nr, nc], dtype=np.complex128) - cdef double complex[:, ::1] v = V - cdef Py_ssize_t r, ind - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - v[r, col[ind]] = D[ind, idx] * ph[ind] - - else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - v[r, col[ind]] = D[ind, idx] * ph[col[ind] / nr] - - return V diff --git a/src/sisl/physics/_matrix_sc_phase_nc.pyx b/src/sisl/physics/_matrix_sc_phase_nc.pyx deleted file mode 100644 index 2c98d45d62..0000000000 --- a/src/sisl/physics/_matrix_sc_phase_nc.pyx +++ /dev/null @@ -1,272 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True -cimport cython - -import numpy as np - -cimport numpy as np - -from scipy.sparse import csr_matrix - -from sisl._core._sparse cimport inline_sum -from sisl.physics._matrix_utils cimport ncol2ptr_double - -__all__ = ['_sc_phase_nc_csr_c64', '_sc_phase_nc_csr_c128', - '_sc_phase_nc_array_c64', '_sc_phase_nc_array_c128'] - -# The fused data-types forces the data input to be of "correct" values. -ctypedef fused numeric_complex: - float - double - float complex - double complex - - -def _sc_phase_nc_csr_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - - # Now copy the sparse matrix form - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_PTR = np.empty([nr*2 + 1], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_NCOL = np.empty([nr*2], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_COL = np.empty([inline_sum(ncol)*4], dtype=np.int32) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef float complex[::1] v = V - cdef float complex ph, v12 - cdef Py_ssize_t r, rr, ind, cind, c - - # We have to do it manually due to the double elements per matrix element - ncol2ptr_double(nr, ncol, v_ptr) - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] * 2 - v_ncol[rr+1] = ncol[r] * 2 - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[ind] - c = col[ind] * 2 - - v[v_ptr[rr] + cind] = D[ind, 0] * ph - v_col[v_ptr[rr] + cind] = c - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + cind+1] = v12 * ph - v_col[v_ptr[rr] + cind+1] = c + 1 - v[v_ptr[rr+1] + cind] = v12.conjugate() * ph - v_col[v_ptr[rr+1] + cind] = c - v[v_ptr[rr+1] + cind+1] = D[ind, 1] * ph - v_col[v_ptr[rr+1] + cind+1] = c + 1 - - cind = cind + 2 - - else: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] * 2 - v_ncol[rr+1] = ncol[r] * 2 - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[col[ind] / nr] - c = col[ind] * 2 - - v[v_ptr[rr] + cind] = D[ind, 0] * ph - v_col[v_ptr[rr] + cind] = c - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + cind+1] = v12 * ph - v_col[v_ptr[rr] + cind+1] = c + 1 - v[v_ptr[rr+1] + cind] = v12.conjugate() * ph - v_col[v_ptr[rr+1] + cind] = c - v[v_ptr[rr+1] + cind+1] = D[ind, 1] * ph - v_col[v_ptr[rr+1] + cind+1] = c + 1 - - cind = cind + 2 - - return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nc * 2)) - - -def _sc_phase_nc_csr_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - - # Now copy the sparse matrix form - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_PTR = np.empty([nr*2 + 1], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_NCOL = np.empty([nr*2], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_COL = np.empty([inline_sum(ncol)*4], dtype=np.int32) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef double complex[::1] v = V - cdef double complex ph, v12 - cdef Py_ssize_t r, rr, ind, cind, c - - ncol2ptr_double(nr, ncol, v_ptr) - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] * 2 - v_ncol[rr+1] = ncol[r] * 2 - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[ind] - c = col[ind] * 2 - - v[v_ptr[rr] + cind] = D[ind, 0] * ph - v_col[v_ptr[rr] + cind] = c - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + cind+1] = v12 * ph - v_col[v_ptr[rr] + cind+1] = c + 1 - v[v_ptr[rr+1] + cind] = v12.conjugate() * ph - v_col[v_ptr[rr+1] + cind] = c - v[v_ptr[rr+1] + cind+1] = D[ind, 1] * ph - v_col[v_ptr[rr+1] + cind+1] = c + 1 - - cind = cind + 2 - - else: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] * 2 - v_ncol[rr+1] = ncol[r] * 2 - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[col[ind] / nr] - c = col[ind] * 2 - - v[v_ptr[rr] + cind] = D[ind, 0] * ph - v_col[v_ptr[rr] + cind] = c - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + cind+1] = v12 * ph - v_col[v_ptr[rr] + cind+1] = c + 1 - v[v_ptr[rr+1] + cind] = v12.conjugate() * ph - v_col[v_ptr[rr+1] + cind] = c - v[v_ptr[rr+1] + cind+1] = D[ind, 1] * ph - v_col[v_ptr[rr+1] + cind+1] = c + 1 - cind = cind + 2 - - return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nc * 2)) - - -def _sc_phase_nc_array_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nc * 2], dtype=np.complex64) - cdef float complex[:, ::1] v = V - cdef float complex ph, v12 - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[ind] - c = col[ind] * 2 - v[rr, c] = D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = v12 * ph - v[rr+1, c] = v12.conjugate() * ph - v[rr+1, c+1] = D[ind, 1] * ph - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[col[ind] / nr] - c = col[ind] * 2 - v[rr, c] = D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = v12 * ph - v[rr+1, c] = v12.conjugate() * ph - v[rr+1, c+1] = D[ind, 1] * ph - - return V - - -def _sc_phase_nc_array_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nc * 2], dtype=np.complex128) - cdef double complex[:, ::1] v = V - cdef double complex ph, v12 - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[ind] - c = col[ind] * 2 - v[rr, c] = D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = v12 * ph - v[rr+1, c] = v12.conjugate() * ph - v[rr+1, c+1] = D[ind, 1] * ph - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[col[ind] / nr] - c = col[ind] * 2 - v[rr, c] = D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = v12 * ph - v[rr+1, c] = v12.conjugate() * ph - v[rr+1, c+1] = D[ind, 1] * ph - - return V diff --git a/src/sisl/physics/_matrix_sc_phase_nc_diag.pyx b/src/sisl/physics/_matrix_sc_phase_nc_diag.pyx deleted file mode 100644 index d2ac71b721..0000000000 --- a/src/sisl/physics/_matrix_sc_phase_nc_diag.pyx +++ /dev/null @@ -1,234 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True -cimport cython - -import numpy as np - -cimport numpy as np - -from scipy.sparse import csr_matrix - -from sisl._core._sparse cimport inline_sum -from sisl.physics._matrix_utils cimport ncol2ptr_single - -__all__ = ['_sc_phase_nc_diag_csr_c64', '_sc_phase_nc_diag_csr_c128', - '_sc_phase_nc_diag_array_c64', '_sc_phase_nc_diag_array_c128'] - -# The fused data-types forces the data input to be of "correct" values. -ctypedef fused numeric_complex: - float - double - float complex - double complex - - -def _sc_phase_nc_diag_csr_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, const int idx, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - - # Now copy the sparse matrix form - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_PTR = np.empty([nr*2 + 1], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_NCOL = np.empty([nr*2], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_COL = np.empty([inline_sum(ncol)*2], dtype=np.int32) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef float complex[::1] v = V - cdef float complex vv - cdef Py_ssize_t r, rr, ind, cind, c - - ncol2ptr_single(nr, ncol, v_ptr) - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] - v_ncol[rr+1] = ncol[r] - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] * 2 - vv = (phases[ind] * D[ind, idx]) - v[v_ptr[rr] + cind] = vv - v_col[v_ptr[rr] + cind] = c - v[v_ptr[rr+1] + cind] = vv - v_col[v_ptr[rr+1] + cind] = c + 1 - - cind = cind + 1 - - else: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] - v_ncol[rr+1] = ncol[r] - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] * 2 - vv = (phases[col[ind] / nr] * D[ind, idx]) - v[v_ptr[rr] + cind] = vv - v_col[v_ptr[rr] + cind] = c - v[v_ptr[rr+1] + cind] = vv - v_col[v_ptr[rr+1] + cind] = c + 1 - cind = cind + 1 - - return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nc * 2)) - - -def _sc_phase_nc_diag_csr_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, const int idx, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - - # Now copy the sparse matrix form - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_PTR = np.empty([nr*2 + 1], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_NCOL = np.empty([nr*2], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_COL = np.empty([inline_sum(ncol)*2], dtype=np.int32) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef double complex[::1] v = V - cdef double complex vv - cdef Py_ssize_t r, rr, ind, cind, c - - # We have to do it manually due to the double elements per matrix element - ncol2ptr_single(nr, ncol, v_ptr) - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] - v_ncol[rr+1] = ncol[r] - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] * 2 - vv = (phases[ind] * D[ind, idx]) - v[v_ptr[rr] + cind] = vv - v_col[v_ptr[rr] + cind] = c - v[v_ptr[rr+1] + cind] = vv - v_col[v_ptr[rr+1] + cind] = c + 1 - cind = cind + 1 - - else: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] - v_ncol[rr+1] = ncol[r] - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] * 2 - vv = (phases[col[ind] / nr] * D[ind, idx]) - v[v_ptr[rr] + cind] = vv - v_col[v_ptr[rr] + cind] = c - v[v_ptr[rr+1] + cind] = vv - v_col[v_ptr[rr+1] + cind] = c + 1 - cind = cind + 1 - - return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nc * 2)) - - -def _sc_phase_nc_diag_array_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, const int idx, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nc * 2], dtype=np.complex64) - cdef float complex[:, ::1] v = V - cdef float complex vv - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] * 2 - vv = (phases[ind] * D[ind, idx]) - v[rr, c] = vv - v[rr+1, c+1] = vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] * 2 - vv = (phases[col[ind] / nr] * D[ind, idx]) - v[rr, c] = vv - v[rr+1, c+1] = vv - - return V - - -def _sc_phase_nc_diag_array_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, const int idx, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nc * 2], dtype=np.complex128) - cdef double complex[:, ::1] v = V - cdef double complex vv - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] * 2 - vv = (phases[ind] * D[ind, idx]) - v[rr, c] = vv - v[rr+1, c+1] = vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] * 2 - vv = (phases[col[ind] / nr] * D[ind, idx]) - v[rr, c] = vv - v[rr+1, c+1] = vv - - return V diff --git a/src/sisl/physics/_matrix_sc_phase_so.pyx b/src/sisl/physics/_matrix_sc_phase_so.pyx deleted file mode 100644 index 60d3327cfa..0000000000 --- a/src/sisl/physics/_matrix_sc_phase_so.pyx +++ /dev/null @@ -1,293 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True -cimport cython - -import numpy as np - -cimport numpy as np - -from scipy.sparse import csr_matrix - -from sisl._core._sparse cimport inline_sum -from sisl.physics._matrix_utils cimport ncol2ptr_double - -__all__ = ['_sc_phase_so_csr_c64', '_sc_phase_so_csr_c128', - '_sc_phase_so_array_c64', '_sc_phase_so_array_c128'] - -# The fused data-types forces the data input to be of "correct" values. -ctypedef fused numeric_complex: - float - double - float complex - double complex - - -def _sc_phase_so_csr_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - - # Now copy the sparse matrix form - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_PTR = np.empty([nr*2 + 1], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_NCOL = np.empty([nr*2], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_COL = np.empty([inline_sum(ncol)*4], dtype=np.int32) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef float complex[::1] v = V - cdef float complex ph, vv - cdef Py_ssize_t r, rr, ind, cind, c - - ncol2ptr_double(nr, ncol, v_ptr) - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] * 2 - v_ncol[rr+1] = ncol[r] * 2 - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[ind] - c = col[ind] * 2 - - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[v_ptr[rr] + cind] = ph * vv - v_col[v_ptr[rr] + cind] = c - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + cind+1] = ph * vv - v_col[v_ptr[rr] + cind+1] = c + 1 - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[v_ptr[rr+1] + cind] = ph * vv - v_col[v_ptr[rr+1] + cind] = c - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[v_ptr[rr+1] + cind+1] = ph * vv - v_col[v_ptr[rr+1] + cind+1] = c + 1 - - cind = cind + 2 - - else: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] * 2 - v_ncol[rr+1] = ncol[r] * 2 - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[col[ind] / nr] - c = col[ind] * 2 - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[v_ptr[rr] + cind] = ph * vv - v_col[v_ptr[rr] + cind] = c - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + cind+1] = ph * vv - v_col[v_ptr[rr] + cind+1] = c + 1 - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[v_ptr[rr+1] + cind] = ph * vv - v_col[v_ptr[rr+1] + cind] = c - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[v_ptr[rr+1] + cind+1] = ph * vv - v_col[v_ptr[rr+1] + cind+1] = c + 1 - cind = cind + 2 - - return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nc * 2)) - - -def _sc_phase_so_csr_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - - # Now copy the sparse matrix form - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_PTR = np.empty([nr*2 + 1], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_NCOL = np.empty([nr*2], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_COL = np.empty([inline_sum(ncol)*4], dtype=np.int32) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef double complex[::1] v = V - cdef double complex ph, vv - cdef Py_ssize_t r, rr, ind, cind - - # We have to do it manually due to the double elements per matrix element - ncol2ptr_double(nr, ncol, v_ptr) - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] * 2 - v_ncol[rr+1] = ncol[r] * 2 - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[ind] - c = col[ind] * 2 - - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[v_ptr[rr] + cind] = ph * vv - v_col[v_ptr[rr] + cind] = c - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + cind+1] = ph * vv - v_col[v_ptr[rr] + cind+1] = c + 1 - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[v_ptr[rr+1] + cind] = ph * vv - v_col[v_ptr[rr+1] + cind] = c - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[v_ptr[rr+1] + cind+1] = ph * vv - v_col[v_ptr[rr+1] + cind+1] = c + 1 - cind = cind + 2 - - else: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] * 2 - v_ncol[rr+1] = ncol[r] * 2 - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[col[ind] / nr] - c = col[ind] * 2 - - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[v_ptr[rr] + cind] = ph * vv - v_col[v_ptr[rr] + cind] = c - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + cind+1] = ph * vv - v_col[v_ptr[rr] + cind+1] = c + 1 - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[v_ptr[rr+1] + cind] = ph * vv - v_col[v_ptr[rr+1] + cind] = c - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[v_ptr[rr+1] + cind+1] = ph * vv - v_col[v_ptr[rr+1] + cind+1] = c + 1 - cind = cind + 2 - - return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nc * 2)) - - -def _sc_phase_so_array_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nc * 2], dtype=np.complex64) - cdef float complex[:, ::1] v = V - cdef float complex ph, vv - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[ind] - c = col[ind] * 2 - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[rr, c] = ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[rr+1, c] = ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[rr+1, c+1] = ph * vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[col[ind] / nr] - c = col[ind] * 2 - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[rr, c] = ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[rr+1, c] = ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[rr+1, c+1] = ph * vv - - return V - - -def _sc_phase_so_array_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nc * 2], dtype=np.complex128) - cdef double complex[:, ::1] v = V - cdef double complex ph, vv - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[ind] - c = col[ind] * 2 - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[rr, c] = ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[rr+1, c] = ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[rr+1, c+1] = ph * vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[col[ind] / nr] - c = col[ind] * 2 - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[rr, c] = ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[rr+1, c] = ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[rr+1, c+1] = ph * vv - - return V diff --git a/src/sisl/physics/_matrix_utils.pxd b/src/sisl/physics/_matrix_utils.pxd deleted file mode 100644 index c83feed2b8..0000000000 --- a/src/sisl/physics/_matrix_utils.pxd +++ /dev/null @@ -1,3 +0,0 @@ -# Define the interfaces for the functions exposed through cimport -cdef void ncol2ptr_double(const int nr, const int[::1] ncol, int[::1] ptr) nogil -cdef void ncol2ptr_single(const int nr, const int[::1] ncol, int[::1] ptr) nogil diff --git a/src/sisl/physics/_matrix_utils.pyx b/src/sisl/physics/_matrix_utils.pyx deleted file mode 100644 index 0f9014a094..0000000000 --- a/src/sisl/physics/_matrix_utils.pyx +++ /dev/null @@ -1,37 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True -cimport cython - -__all__ = ["ncol2ptr_double", "ncol2ptr_single"] - - -cdef void ncol2ptr_double(const int nr, const int[::1] ncol, int[::1] ptr) noexcept nogil: - cdef Py_ssize_t r, rr - - # this is NC/SOC - ptr[0] = 0 - ptr[1] = ncol[0] * 2 - for r in range(1, nr): - rr = r * 2 - # do both - ptr[rr] = ptr[rr - 1] + ncol[r-1] * 2 - ptr[rr+1] = ptr[rr] + ncol[r] * 2 - - ptr[nr * 2] = ptr[nr * 2 - 1] + ncol[nr - 1] * 2 - - -cdef void ncol2ptr_single(const int nr, const int[::1] ncol, int[::1] ptr) noexcept nogil: - cdef Py_ssize_t r, rr - - # this is NC/SOC - ptr[0] = 0 - ptr[1] = ncol[0] - for r in range(1, nr): - rr = r * 2 - # do both - ptr[rr] = ptr[rr - 1] + ncol[r-1] - ptr[rr+1] = ptr[rr] + ncol[r] - - ptr[nr * 2] = ptr[nr * 2 - 1] + ncol[nr - 1] diff --git a/src/sisl/physics/_phase.pxd b/src/sisl/physics/_phase.pxd new file mode 100644 index 0000000000..7449aaa779 --- /dev/null +++ b/src/sisl/physics/_phase.pxd @@ -0,0 +1,7 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +from sisl._core._dtypes cimport floats_st + + +cdef bint is_gamma(const floats_st[::1] k) noexcept nogil diff --git a/src/sisl/physics/_phase.pyx b/src/sisl/physics/_phase.pyx index 183804867b..ddd784cce9 100644 --- a/src/sisl/physics/_phase.pyx +++ b/src/sisl/physics/_phase.pyx @@ -2,33 +2,36 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. cimport cython -from libc.math cimport fabs - -import numpy as np - -cimport numpy as np +from libc.math cimport fabs, fabsf from numpy import complex64, complex128, dot, exp, float32, float64, ndarray, ones, pi -from numpy cimport complex64_t, complex128_t, float32_t, float64_t, ndarray - -__all__ = ['phase_dtype', 'phase_rsc', 'phase_rij'] +from sisl._core._dtypes cimport floats_st @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef inline int is_gamma(const double[::1] k) noexcept nogil: - if fabs(k[0]) > 0.0000001: - return 0 - if fabs(k[1]) > 0.0000001: - return 0 - if fabs(k[2]) > 0.0000001: - return 0 +cdef inline bint is_gamma(const floats_st[::1] k) noexcept nogil: + if floats_st is cython.float: + if fabsf(k[0]) > 0.0000001: + return 0 + if fabsf(k[1]) > 0.0000001: + return 0 + if fabsf(k[2]) > 0.0000001: + return 0 + + else: + if fabs(k[0]) > 0.0000001: + return 0 + if fabs(k[1]) > 0.0000001: + return 0 + if fabs(k[2]) > 0.0000001: + return 0 return 1 -def phase_dtype(ndarray[float64_t, ndim=1, mode='c'] k, M_dtype, R_dtype, force_complex=False): +def phase_dtype(const floats_st[::1] k, M_dtype, R_dtype, force_complex: bool=False): if is_gamma(k) and not force_complex: if R_dtype is None: return M_dtype @@ -52,7 +55,7 @@ def phase_dtype(ndarray[float64_t, ndim=1, mode='c'] k, M_dtype, R_dtype, force_ return R_dtype -def phase_rsc(sc, ndarray[float64_t, ndim=1, mode='c'] k, dtype): +def phase_rsc(sc, const floats_st[::1] k, dtype): """ Calculate the phases for the supercell interactions using k """ # Figure out if this is a Gamma point or not @@ -66,7 +69,7 @@ def phase_rsc(sc, ndarray[float64_t, ndim=1, mode='c'] k, dtype): return phases -def phase_rij(rij, sc, ndarray[float64_t, ndim=1, mode='c'] k, dtype): +def phase_rij(rij, sc, const floats_st[::1] k, dtype): """ Calculate the phases for the distance matrix using k """ # Figure out if this is a Gamma point or not From bd8f23fa91867d72f000073564a32e8f34f53c5e Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Mon, 4 Nov 2024 12:20:07 +0100 Subject: [PATCH 02/14] offloaded NC/SO calculations to simple routines This means that it is much simpler to track problems. There is no code duplication in the spin-box calculations. I am not sure whether the python-yellow annotations are only for the int/long variables, and when down-casting. In any case, I think this is more easy to manage. Signed-off-by: Nick Papior --- src/sisl/_core/_dtypes.pxd | 9 + src/sisl/physics/CMakeLists.txt | 1 + src/sisl/physics/_matrix_phase.pyx | 134 +++++++---- src/sisl/physics/_matrix_phase3.pyx | 330 +++++++++++++------------- src/sisl/physics/_matrix_phase_sc.pyx | 116 ++++++--- src/sisl/physics/_matrix_utils.pxd | 30 +++ src/sisl/physics/_matrix_utils.pyx | 63 +++++ 7 files changed, 435 insertions(+), 248 deletions(-) create mode 100644 src/sisl/physics/_matrix_utils.pxd create mode 100644 src/sisl/physics/_matrix_utils.pyx diff --git a/src/sisl/_core/_dtypes.pxd b/src/sisl/_core/_dtypes.pxd index 45ce9d4683..1d16a8fe40 100644 --- a/src/sisl/_core/_dtypes.pxd +++ b/src/sisl/_core/_dtypes.pxd @@ -34,10 +34,12 @@ ctypedef fused floats_st: float double + ctypedef fused complexs_st: float complex double complex + ctypedef fused floatcomplexs_st: float double @@ -53,6 +55,13 @@ ctypedef fused floatcomplexs2_st: double complex +# We need this fused data-type to omit complex data-types +ctypedef fused reals_st: + int + long + float + double + ctypedef fused numerics_st: int long diff --git a/src/sisl/physics/CMakeLists.txt b/src/sisl/physics/CMakeLists.txt index e5b5f706fd..f5eb34a534 100644 --- a/src/sisl/physics/CMakeLists.txt +++ b/src/sisl/physics/CMakeLists.txt @@ -9,6 +9,7 @@ set_property(DIRECTORY foreach(source _bloch _phase + _matrix_utils _matrix_k _matrix_dk _matrix_ddk _matrix_phase _matrix_phase_sc _matrix_phase3 ) diff --git a/src/sisl/physics/_matrix_phase.pyx b/src/sisl/physics/_matrix_phase.pyx index a554e726ee..4e5aedae93 100644 --- a/src/sisl/physics/_matrix_phase.pyx +++ b/src/sisl/physics/_matrix_phase.pyx @@ -27,6 +27,13 @@ from sisl._core._dtypes cimport ( type2dtype, ) +from ._matrix_utils cimport ( + _f_matrix_box_so, + _matrix_box_nc, + _matrix_box_so_cmplx, + _matrix_box_so_real, +) + __all__ = [ "_phase_csr", "_phase_array", @@ -317,7 +324,9 @@ def _phase_csr_nc(ints_st[::1] ptr, cdef ints_st nr = ncol.shape[0] cdef ints_st r, rr, ind, s, s_idx, c - cdef complexs_st ph, d + cdef complexs_st ph + cdef numerics_st *d + cdef complexs_st *M = [0, 0, 0, 0] with nogil: if p_opt == -1: @@ -330,9 +339,9 @@ def _phase_csr_nc(ints_st[::1] ptr, s_idx = _index_sorted(tmp, c) v[v_ptr[rr] + s_idx] += D[ind, 0] - d = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + s_idx+1] += d - v[v_ptr[rr+1] + s_idx] += d.conjugate() + ph = (D[ind, 2] + 1j * D[ind, 3]) + v[v_ptr[rr] + s_idx+1] += ph + v[v_ptr[rr+1] + s_idx] += ph.conjugate() v[v_ptr[rr+1] + s_idx+1] += D[ind, 1] elif p_opt == 0: @@ -345,11 +354,12 @@ def _phase_csr_nc(ints_st[::1] ptr, tmp = v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]] s_idx = _index_sorted(tmp, c) - v[v_ptr[rr] + s_idx] += (D[ind, 0] * ph) - v[v_ptr[rr] + s_idx+1] += ((D[ind, 2] + 1j * D[ind, 3]) * - ph) - v[v_ptr[rr+1] + s_idx] += ((D[ind, 2] + 1j * D[ind, 3]).conjugate() * ph) - v[v_ptr[rr+1] + s_idx+1] += (D[ind, 1] * ph) + d = &D[ind, 0] + _matrix_box_nc(d, ph, M) + v[v_ptr[rr] + s_idx] += M[0] + v[v_ptr[rr] + s_idx+1] += M[1] + v[v_ptr[rr+1] + s_idx] += M[2] + v[v_ptr[rr+1] + s_idx+1] += M[3] else: for r in range(nr): @@ -362,12 +372,12 @@ def _phase_csr_nc(ints_st[::1] ptr, tmp = v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]] s_idx = _index_sorted(tmp, c) - v[v_ptr[rr] + s_idx] += (D[ind, 0] * ph) - v[v_ptr[rr] + s_idx+1] += ((D[ind, 2] + 1j * D[ind, 3]) * - ph) - v[v_ptr[rr+1] + s_idx] += ((D[ind, 2] + 1j * D[ind, - 3]).conjugate() * ph) - v[v_ptr[rr+1] + s_idx+1] += (D[ind, 1] * ph) + d = &D[ind, 0] + _matrix_box_nc(d, ph, M) + v[v_ptr[rr] + s_idx] += M[0] + v[v_ptr[rr] + s_idx+1] += M[1] + v[v_ptr[rr+1] + s_idx] += M[2] + v[v_ptr[rr+1] + s_idx+1] += M[3] nr = nr * 2 return csr_matrix((V, V_COL, V_PTR), shape=(nr, nr)) @@ -394,7 +404,9 @@ def _phase_array_nc(ints_st[::1] ptr, # Local columns cdef ints_st r, rr, ind, s, c - cdef complexs_st ph, d + cdef complexs_st ph + cdef numerics_st *d + cdef complexs_st *M = [0, 0, 0, 0] with nogil: if p_opt == -1: @@ -404,9 +416,9 @@ def _phase_array_nc(ints_st[::1] ptr, c = (col[ind] % nr) * 2 v[rr, c] += D[ind, 0] - d = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c + 1] += d - v[rr + 1, c] += d.conjugate() + ph = (D[ind, 2] + 1j * D[ind, 3]) + v[rr, c + 1] += ph + v[rr + 1, c] += ph.conjugate() v[rr + 1, c + 1] += D[ind, 1] elif p_opt == 0: @@ -416,11 +428,12 @@ def _phase_array_nc(ints_st[::1] ptr, c = (col[ind] % nr) * 2 ph = phases[ind] - v[rr, c] += (D[ind, 0] * ph) - v[rr, c + 1] += ((D[ind, 2] + 1j * D[ind, 3]) * ph) - v[rr + 1, c] += ((D[ind, 2] + 1j * D[ind, 3]).conjugate() * - ph) - v[rr + 1, c + 1] += (D[ind, 1] * ph) + d = &D[ind, 0] + _matrix_box_nc(d, ph, M) + v[rr, c] += M[0] + v[rr, c + 1] += M[1] + v[rr + 1, c] += M[2] + v[rr + 1, c + 1] += M[3] else: for r in range(nr): @@ -430,11 +443,12 @@ def _phase_array_nc(ints_st[::1] ptr, s = col[ind] / nr ph = phases[s] - v[rr, c] += (D[ind, 0] * ph) - v[rr, c + 1] += ((D[ind, 2] + 1j * D[ind, 3]) * ph) - v[rr + 1, c] += ((D[ind, 2] + 1j * D[ind, 3]).conjugate() * - ph) - v[rr + 1, c + 1] += (D[ind, 1] * ph) + d = &D[ind, 0] + _matrix_box_nc(d, ph, M) + v[rr, c] += M[0] + v[rr, c + 1] += M[1] + v[rr + 1, c] += M[2] + v[rr + 1, c + 1] += M[3] return V @@ -447,8 +461,7 @@ def _phase_array_nc(ints_st[::1] ptr, def _phase_csr_so(ints_st[::1] ptr, ints_st[::1] ncol, ints_st[::1] col, - # complexs_st requires only 4 indices... - floats_st[:, ::1] D, + numerics_st[:, ::1] D, complexs_st[::1] phases, const int p_opt): @@ -468,6 +481,14 @@ def _phase_csr_so(ints_st[::1] ptr, cdef ints_st r, rr, ind, s, s_idx, c cdef complexs_st ph + cdef _f_matrix_box_so func + cdef numerics_st *d + cdef complexs_st *M = [0, 0, 0, 0] + + if numerics_st in complexs_st: + func = _matrix_box_so_cmplx + else: + func = _matrix_box_so_real with nogil: if p_opt == -1: @@ -494,10 +515,12 @@ def _phase_csr_so(ints_st[::1] ptr, tmp = v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]] s_idx = _index_sorted(tmp, c) - v[v_ptr[rr] + s_idx] += ((D[ind, 0] + 1j * D[ind, 4]) * ph) - v[v_ptr[rr] + s_idx+1] += ((D[ind, 2] + 1j * D[ind, 3]) * ph) - v[v_ptr[rr+1] + s_idx] += ((D[ind, 6] + 1j * D[ind, 7]) * ph) - v[v_ptr[rr+1] + s_idx+1] += ((D[ind, 1] + 1j * D[ind, 5]) * ph) + d = &D[ind, 0] + func(d, ph, M) + v[v_ptr[rr] + s_idx] += M[0] + v[v_ptr[rr] + s_idx+1] += M[1] + v[v_ptr[rr+1] + s_idx] += M[2] + v[v_ptr[rr+1] + s_idx+1] += M[3] else: for r in range(nr): @@ -510,10 +533,12 @@ def _phase_csr_so(ints_st[::1] ptr, tmp = v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]] s_idx = _index_sorted(tmp, c) - v[v_ptr[rr] + s_idx] += ((D[ind, 0] + 1j * D[ind, 4]) * ph) - v[v_ptr[rr] + s_idx+1] += ((D[ind, 2] + 1j * D[ind, 3]) * ph) - v[v_ptr[rr+1] + s_idx] += ((D[ind, 6] + 1j * D[ind, 7]) * ph) - v[v_ptr[rr+1] + s_idx+1] += ((D[ind, 1] + 1j * D[ind, 5]) * ph) + d = &D[ind, 0] + func(d, ph, M) + v[v_ptr[rr] + s_idx] += M[0] + v[v_ptr[rr] + s_idx+1] += M[1] + v[v_ptr[rr+1] + s_idx] += M[2] + v[v_ptr[rr+1] + s_idx+1] += M[3] nr = nr * 2 return csr_matrix((V, V_COL, V_PTR), shape=(nr, nr)) @@ -526,8 +551,7 @@ def _phase_csr_so(ints_st[::1] ptr, def _phase_array_so(ints_st[::1] ptr, ints_st[::1] ncol, ints_st[::1] col, - # complexs_st requires only 4 indices... - floats_st[:, ::1] D, + numerics_st[:, ::1] D, complexs_st[::1] phases, const int p_opt): @@ -541,6 +565,14 @@ def _phase_array_so(ints_st[::1] ptr, cdef ints_st r, rr, s, c, ind cdef complexs_st ph + cdef _f_matrix_box_so func + cdef numerics_st *d + cdef complexs_st *M = [0, 0, 0, 0] + + if numerics_st in complexs_st: + func = _matrix_box_so_cmplx + else: + func = _matrix_box_so_real with nogil: if p_opt == -1: @@ -561,10 +593,12 @@ def _phase_array_so(ints_st[::1] ptr, c = (col[ind] % nr) * 2 ph = phases[ind] - v[rr, c] += ((D[ind, 0] + 1j * D[ind, 4]) * ph) - v[rr, c + 1] += ((D[ind, 2] + 1j * D[ind, 3]) * ph) - v[rr + 1, c] += ((D[ind, 6] + 1j * D[ind, 7]) * ph) - v[rr + 1, c + 1] += ((D[ind, 1] + 1j * D[ind, 5]) * ph) + d = &D[ind, 0] + func(d, ph, M) + v[rr, c] += M[0] + v[rr, c + 1] += M[1] + v[rr + 1, c] += M[2] + v[rr + 1, c + 1] += M[3] else: for r in range(nr): @@ -574,9 +608,11 @@ def _phase_array_so(ints_st[::1] ptr, s = col[ind] / nr ph = phases[s] - v[rr, c] += ((D[ind, 0] + 1j * D[ind, 4]) * ph) - v[rr, c + 1] += ((D[ind, 2] + 1j * D[ind, 3]) * ph) - v[rr + 1, c] += ((D[ind, 6] + 1j * D[ind, 7]) * ph) - v[rr + 1, c + 1] += ((D[ind, 1] + 1j * D[ind, 5]) * ph) + d = &D[ind, 0] + func(d, ph, M) + v[rr, c] += M[0] + v[rr, c + 1] += M[1] + v[rr + 1, c] += M[2] + v[rr + 1, c + 1] += M[3] return V diff --git a/src/sisl/physics/_matrix_phase3.pyx b/src/sisl/physics/_matrix_phase3.pyx index 62000bc755..683a51e4f0 100644 --- a/src/sisl/physics/_matrix_phase3.pyx +++ b/src/sisl/physics/_matrix_phase3.pyx @@ -23,6 +23,13 @@ from sisl._core._dtypes cimport ( type2dtype, ) +from ._matrix_utils cimport ( + _f_matrix_box_so, + _matrix_box_nc, + _matrix_box_so_cmplx, + _matrix_box_so_real, +) + __all__ = [ "_phase3_csr", "_phase3_array", @@ -167,6 +174,8 @@ def _phase3_csr_nc(ints_st[::1] ptr, cdef ints_st nr = ncol.shape[0] cdef ints_st r, rr, ind, s, c cdef ints_st s_idx + cdef numerics_st *d + cdef complexs_st *M = [0, 0, 0, 0] with nogil: if p_opt == 0: @@ -176,26 +185,28 @@ def _phase3_csr_nc(ints_st[::1] ptr, c = (col[ind] % nr) * 2 s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) + d = &D[ind, 0] + ph = phases[ind, 0] - Vx[v_ptr[rr] + s_idx] += ph * D[ind, 0] - v12 = (D[ind, 2] + 1j * D[ind, 3]) - Vx[v_ptr[rr] + s_idx+1] += ph * v12 - Vx[v_ptr[rr+1] + s_idx] += ph * v12.conjugate() - Vx[v_ptr[rr+1] + s_idx+1] += ph * D[ind, 1] + _matrix_box_nc(d, ph, M) + Vx[v_ptr[rr] + s_idx] += M[0] + Vx[v_ptr[rr] + s_idx+1] += M[1] + Vx[v_ptr[rr+1] + s_idx] += M[2] + Vx[v_ptr[rr+1] + s_idx+1] += M[3] ph = phases[ind, 1] - Vy[v_ptr[rr] + s_idx] += ph * D[ind, 0] - v12 = (D[ind, 2] + 1j * D[ind, 3]) - Vy[v_ptr[rr] + s_idx+1] += ph * v12 - Vy[v_ptr[rr+1] + s_idx] += ph * v12.conjugate() - Vy[v_ptr[rr+1] + s_idx+1] += ph * D[ind, 1] + _matrix_box_nc(d, ph, M) + Vy[v_ptr[rr] + s_idx] += M[0] + Vy[v_ptr[rr] + s_idx+1] += M[1] + Vy[v_ptr[rr+1] + s_idx] += M[2] + Vy[v_ptr[rr+1] + s_idx+1] += M[3] ph = phases[ind, 2] - Vz[v_ptr[rr] + s_idx] += ph * D[ind, 0] - v12 = (D[ind, 2] + 1j * D[ind, 3]) - Vz[v_ptr[rr] + s_idx+1] += ph * v12 - Vz[v_ptr[rr+1] + s_idx] += ph * v12.conjugate() - Vz[v_ptr[rr+1] + s_idx+1] += ph * D[ind, 1] + _matrix_box_nc(d, ph, M) + Vz[v_ptr[rr] + s_idx] += M[0] + Vz[v_ptr[rr] + s_idx+1] += M[1] + Vz[v_ptr[rr+1] + s_idx] += M[2] + Vz[v_ptr[rr+1] + s_idx+1] += M[3] else: for r in range(nr): @@ -206,27 +217,28 @@ def _phase3_csr_nc(ints_st[::1] ptr, s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) + d = &D[ind, 0] ph = phases[s, 0] - Vx[v_ptr[rr] + s_idx] += ph * D[ind, 0] - v12 = (D[ind, 2] + 1j * D[ind, 3]) - Vx[v_ptr[rr] + s_idx+1] += ph * v12 - Vx[v_ptr[rr+1] + s_idx] += ph * v12.conjugate() - Vx[v_ptr[rr+1] + s_idx+1] += ph * D[ind, 1] + _matrix_box_nc(d, ph, M) + Vx[v_ptr[rr] + s_idx] += M[0] + Vx[v_ptr[rr] + s_idx+1] += M[1] + Vx[v_ptr[rr+1] + s_idx] += M[2] + Vx[v_ptr[rr+1] + s_idx+1] += M[3] ph = phases[s, 1] - Vy[v_ptr[rr] + s_idx] += ph * D[ind, 0] - v12 = (D[ind, 2] + 1j * D[ind, 3]) - Vy[v_ptr[rr] + s_idx+1] += ph * v12 - Vy[v_ptr[rr+1] + s_idx] += ph * v12.conjugate() - Vy[v_ptr[rr+1] + s_idx+1] += ph * D[ind, 1] + _matrix_box_nc(d, ph, M) + Vy[v_ptr[rr] + s_idx] += M[0] + Vy[v_ptr[rr] + s_idx+1] += M[1] + Vy[v_ptr[rr+1] + s_idx] += M[2] + Vy[v_ptr[rr+1] + s_idx+1] += M[3] ph = phases[s, 2] - Vz[v_ptr[rr] + s_idx] += ph * D[ind, 0] - v12 = (D[ind, 2] + 1j * D[ind, 3]) - Vz[v_ptr[rr] + s_idx+1] += ph * v12 - Vz[v_ptr[rr+1] + s_idx] += ph * v12.conjugate() - Vz[v_ptr[rr+1] + s_idx+1] += ph * D[ind, 1] + _matrix_box_nc(d, ph, M) + Vz[v_ptr[rr] + s_idx] += M[0] + Vz[v_ptr[rr] + s_idx+1] += M[1] + Vz[v_ptr[rr+1] + s_idx] += M[2] + Vz[v_ptr[rr+1] + s_idx+1] += M[3] nr = nr * 2 return csr_matrix((Vx, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vy, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vz, V_COL, V_PTR), shape=(nr, nr)) @@ -250,9 +262,11 @@ def _phase3_array_nc(ints_st[::1] ptr, cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] Vy = np.zeros([nr * 2, nr * 2], dtype=dtype) cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] Vz = np.zeros([nr * 2, nr * 2], dtype=dtype) - cdef complexs_st ph, vv + cdef complexs_st ph cdef ints_st r, rr, ind, s, c cdef ints_st s_idx + cdef numerics_st *d + cdef complexs_st *M = [0, 0, 0, 0] with nogil: if p_opt == 0: @@ -261,26 +275,28 @@ def _phase3_array_nc(ints_st[::1] ptr, for ind in range(ptr[r], ptr[r] + ncol[r]): c = (col[ind] % nr) * 2 + d = &D[ind, 0] + ph = phases[ind, 0] - Vx[rr, c] += ph * D[ind, 0] - v12 = (D[ind, 2] + 1j * D[ind, 3]) - Vx[rr, c+1] += ph * v12 - Vx[rr+1, c] += ph * v12.conjugate() - Vx[rr+1, c+1] += ph * D[ind, 1] + _matrix_box_nc(d, ph, M) + Vx[rr, c] += M[0] + Vx[rr, c+1] += M[1] + Vx[rr+1, c] += M[2] + Vx[rr+1, c+1] += M[3] ph = phases[ind, 1] - Vy[rr, c] += ph * D[ind, 0] - v12 = (D[ind, 2] + 1j * D[ind, 3]) - Vy[rr, c+1] += ph * v12 - Vy[rr+1, c] += ph * v12.conjugate() - Vy[rr+1, c+1] += ph * D[ind, 1] + _matrix_box_nc(d, ph, M) + Vy[rr, c] += M[0] + Vy[rr, c+1] += M[1] + Vy[rr+1, c] += M[2] + Vy[rr+1, c+1] += M[3] ph = phases[ind, 2] - Vz[rr, c] += ph * D[ind, 0] - v12 = (D[ind, 2] + 1j * D[ind, 3]) - Vz[rr, c+1] += ph * v12 - Vz[rr+1, c] += ph * v12.conjugate() - Vz[rr+1, c+1] += ph * D[ind, 1] + _matrix_box_nc(d, ph, M) + Vz[rr, c] += M[0] + Vz[rr, c+1] += M[1] + Vz[rr+1, c] += M[2] + Vz[rr+1, c+1] += M[3] else: for r in range(nr): @@ -289,26 +305,28 @@ def _phase3_array_nc(ints_st[::1] ptr, c = (col[ind] % nr) * 2 s = col[ind] / nr + d = &D[ind, 0] + ph = phases[s, 0] - Vx[rr, c] += ph * D[ind, 0] - v12 = (D[ind, 2] + 1j * D[ind, 3]) - Vx[rr, c+1] += ph * v12 - Vx[rr+1, c] += ph * v12.conjugate() - Vx[rr+1, c+1] += ph * D[ind, 1] + _matrix_box_nc(d, ph, M) + Vx[rr, c] += M[0] + Vx[rr, c+1] += M[1] + Vx[rr+1, c] += M[2] + Vx[rr+1, c+1] += M[3] ph = phases[s, 1] - Vy[rr, c] += ph * D[ind, 0] - v12 = (D[ind, 2] + 1j * D[ind, 3]) - Vy[rr, c+1] += ph * v12 - Vy[rr+1, c] += ph * v12.conjugate() - Vy[rr+1, c+1] += ph * D[ind, 1] + _matrix_box_nc(d, ph, M) + Vy[rr, c] += M[0] + Vy[rr, c+1] += M[1] + Vy[rr+1, c] += M[2] + Vy[rr+1, c+1] += M[3] ph = phases[s, 2] - Vz[rr, c] += ph * D[ind, 0] - v12 = (D[ind, 2] + 1j * D[ind, 3]) - Vz[rr, c+1] += ph * v12 - Vz[rr+1, c] += ph * v12.conjugate() - Vz[rr+1, c+1] += ph * D[ind, 1] + _matrix_box_nc(d, ph, M) + Vz[rr, c] += M[0] + Vz[rr, c+1] += M[1] + Vz[rr+1, c] += M[2] + Vz[rr+1, c+1] += M[3] return Vx, Vy, Vz @@ -325,8 +343,7 @@ def _phase3_array_nc(ints_st[::1] ptr, def _phase3_csr_so(ints_st[::1] ptr, ints_st[::1] ncol, ints_st[::1] col, - # complexs_st requires only 4 indices... - floats_st[:, ::1] D, + numerics_st[:, ::1] D, complexs_st[:, ::1] phases, const int p_opt): @@ -340,12 +357,20 @@ def _phase3_csr_so(ints_st[::1] ptr, cdef cnp.ndarray[complexs_st, mode='c'] Vx = np.zeros([v_col.shape[0]], dtype=dtype) cdef cnp.ndarray[complexs_st, mode='c'] Vy = np.zeros([v_col.shape[0]], dtype=dtype) cdef cnp.ndarray[complexs_st, mode='c'] Vz = np.zeros([v_col.shape[0]], dtype=dtype) - cdef complexs_st ph, vv + cdef complexs_st ph # Local columns (not in NC form) cdef ints_st nr = ncol.shape[0] cdef ints_st r, rr, ind, s, c cdef ints_st s_idx + cdef _f_matrix_box_so func + cdef numerics_st *d + cdef complexs_st *M = [0, 0, 0, 0] + + if numerics_st in complexs_st: + func = _matrix_box_so_cmplx + else: + func = _matrix_box_so_real with nogil: if p_opt == 0: @@ -355,35 +380,28 @@ def _phase3_csr_so(ints_st[::1] ptr, c = (col[ind] % nr) * 2 s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) + d = &D[ind, 0] + ph = phases[ind, 0] - vv = (D[ind, 0] + 1j * D[ind, 4]) - Vx[v_ptr[rr] + s_idx] += ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - Vx[v_ptr[rr] + s_idx+1] += ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - Vx[v_ptr[rr+1] + s_idx] += ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - Vx[v_ptr[rr+1] + s_idx+1] += ph * vv + func(d, ph, M) + Vx[v_ptr[rr] + s_idx] += M[0] + Vx[v_ptr[rr] + s_idx+1] += M[1] + Vx[v_ptr[rr+1] + s_idx] += M[2] + Vx[v_ptr[rr+1] + s_idx+1] += M[3] ph = phases[ind, 1] - vv = (D[ind, 0] + 1j * D[ind, 4]) - Vy[v_ptr[rr] + s_idx] += ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - Vy[v_ptr[rr] + s_idx+1] += ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - Vy[v_ptr[rr+1] + s_idx] += ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - Vy[v_ptr[rr+1] + s_idx+1] += ph * vv + func(d, ph, M) + Vy[v_ptr[rr] + s_idx] += M[0] + Vy[v_ptr[rr] + s_idx+1] += M[1] + Vy[v_ptr[rr+1] + s_idx] += M[2] + Vy[v_ptr[rr+1] + s_idx+1] += M[3] ph = phases[ind, 2] - vv = (D[ind, 0] + 1j * D[ind, 4]) - Vz[v_ptr[rr] + s_idx] += ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - Vz[v_ptr[rr] + s_idx+1] += ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - Vz[v_ptr[rr+1] + s_idx] += ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - Vz[v_ptr[rr+1] + s_idx+1] += ph * vv + func(d, ph, M) + Vz[v_ptr[rr] + s_idx] += M[0] + Vz[v_ptr[rr] + s_idx+1] += M[1] + Vz[v_ptr[rr+1] + s_idx] += M[2] + Vz[v_ptr[rr+1] + s_idx+1] += M[3] else: for r in range(nr): @@ -394,35 +412,28 @@ def _phase3_csr_so(ints_st[::1] ptr, s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) + d = &D[ind, 0] + ph = phases[s, 0] - vv = (D[ind, 0] + 1j * D[ind, 4]) - Vx[v_ptr[rr] + s_idx] += ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - Vx[v_ptr[rr] + s_idx+1] += ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - Vx[v_ptr[rr+1] + s_idx] += ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - Vx[v_ptr[rr+1] + s_idx+1] += ph * vv + func(d, ph, M) + Vx[v_ptr[rr] + s_idx] += M[0] + Vx[v_ptr[rr] + s_idx+1] += M[1] + Vx[v_ptr[rr+1] + s_idx] += M[2] + Vx[v_ptr[rr+1] + s_idx+1] += M[3] ph = phases[s, 1] - vv = (D[ind, 0] + 1j * D[ind, 4]) - Vy[v_ptr[rr] + s_idx] += ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - Vy[v_ptr[rr] + s_idx+1] += ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - Vy[v_ptr[rr+1] + s_idx] += ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - Vy[v_ptr[rr+1] + s_idx+1] += ph * vv + func(d, ph, M) + Vy[v_ptr[rr] + s_idx] += M[0] + Vy[v_ptr[rr] + s_idx+1] += M[1] + Vy[v_ptr[rr+1] + s_idx] += M[2] + Vy[v_ptr[rr+1] + s_idx+1] += M[3] ph = phases[s, 2] - vv = (D[ind, 0] + 1j * D[ind, 4]) - Vz[v_ptr[rr] + s_idx] += ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - Vz[v_ptr[rr] + s_idx+1] += ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - Vz[v_ptr[rr+1] + s_idx] += ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - Vz[v_ptr[rr+1] + s_idx+1] += ph * vv + func(d, ph, M) + Vz[v_ptr[rr] + s_idx] += M[0] + Vz[v_ptr[rr] + s_idx+1] += M[1] + Vz[v_ptr[rr+1] + s_idx] += M[2] + Vz[v_ptr[rr+1] + s_idx+1] += M[3] nr = nr * 2 return csr_matrix((Vx, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vy, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vz, V_COL, V_PTR), shape=(nr, nr)) @@ -435,8 +446,7 @@ def _phase3_csr_so(ints_st[::1] ptr, def _phase3_array_so(ints_st[::1] ptr, ints_st[::1] ncol, ints_st[::1] col, - # complexs_st requires only 4 indices... - floats_st[:, ::1] D, + numerics_st[:, ::1] D, complexs_st[:, ::1] phases, const int p_opt): @@ -450,9 +460,17 @@ def _phase3_array_so(ints_st[::1] ptr, cdef complexs_st[:, ::1] vy = Vy cdef complexs_st[:, ::1] vz = Vz - cdef complexs_st ph, vv + cdef complexs_st ph cdef ints_st r, rr, ind, s, c cdef ints_st s_idx + cdef _f_matrix_box_so func + cdef numerics_st *d + cdef complexs_st *M = [0, 0, 0, 0] + + if numerics_st in complexs_st: + func = _matrix_box_so_cmplx + else: + func = _matrix_box_so_real with nogil: if p_opt == 0: @@ -461,35 +479,28 @@ def _phase3_array_so(ints_st[::1] ptr, for ind in range(ptr[r], ptr[r] + ncol[r]): c = (col[ind] % nr) * 2 + d = &D[ind, 0] + ph = phases[ind, 0] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vx[rr, c] += ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vx[rr, c+1] += ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vx[rr+1, c] += ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vx[rr+1, c+1] += ph * vv + func(d, ph, M) + vx[rr, c] += M[0] + vx[rr, c+1] += M[1] + vx[rr+1, c] += M[2] + vx[rr+1, c+1] += M[3] ph = phases[ind, 1] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vy[rr, c] += ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vy[rr, c+1] += ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vy[rr+1, c] += ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vy[rr+1, c+1] += ph * vv + func(d, ph, M) + vy[rr, c] += M[0] + vy[rr, c+1] += M[1] + vy[rr+1, c] += M[2] + vy[rr+1, c+1] += M[3] ph = phases[ind, 2] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vz[rr, c] += ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vz[rr, c+1] += ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vz[rr+1, c] += ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vz[rr+1, c+1] += ph * vv + func(d, ph, M) + vz[rr, c] += M[0] + vz[rr, c+1] += M[1] + vz[rr+1, c] += M[2] + vz[rr+1, c+1] += M[3] else: for r in range(nr): @@ -498,34 +509,27 @@ def _phase3_array_so(ints_st[::1] ptr, c = (col[ind] % nr) * 2 s = col[ind] / nr + d = &D[ind, 0] + ph = phases[s, 0] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vx[rr, c] += ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vx[rr, c+1] += ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vx[rr+1, c] += ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vx[rr+1, c+1] += ph * vv + func(d, ph, M) + vx[rr, c] += M[0] + vx[rr, c+1] += M[1] + vx[rr+1, c] += M[2] + vx[rr+1, c+1] += M[3] ph = phases[s, 1] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vy[rr, c] += ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vy[rr, c+1] += ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vy[rr+1, c] += ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vy[rr+1, c+1] += ph * vv + func(d, ph, M) + vy[rr, c] += M[0] + vy[rr, c+1] += M[1] + vy[rr+1, c] += M[2] + vy[rr+1, c+1] += M[3] ph = phases[s, 2] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vz[rr, c] += ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vz[rr, c+1] += ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vz[rr+1, c] += ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vz[rr+1, c+1] += ph * vv + func(d, ph, M) + vz[rr, c] += M[0] + vz[rr, c+1] += M[1] + vz[rr+1, c] += M[2] + vz[rr+1, c+1] += M[3] return Vx, Vy, Vz diff --git a/src/sisl/physics/_matrix_phase_sc.pyx b/src/sisl/physics/_matrix_phase_sc.pyx index 02c683183e..aae52887cd 100644 --- a/src/sisl/physics/_matrix_phase_sc.pyx +++ b/src/sisl/physics/_matrix_phase_sc.pyx @@ -22,6 +22,13 @@ from sisl._core._dtypes cimport ( from sisl._core._sparse cimport ncol2ptr_nc from sisl._indices cimport _index_sorted +from ._matrix_utils cimport ( + _f_matrix_box_so, + _matrix_box_nc, + _matrix_box_so_cmplx, + _matrix_box_so_real, +) + __all__ = [ "_phase_sc_csr", "_phase_sc_array", @@ -174,6 +181,8 @@ def _phase_sc_csr_nc(ints_st[::1] ptr, cdef ints_st r, rr, cind, c, nz, ind cdef complexs_st ph + cdef numerics_st *d + cdef complexs_st *M = [0, 0, 0, 0] # We have to do it manually due to the double elements per matrix element ncol2ptr_nc(nr, ncol, v_ptr, 2) @@ -212,13 +221,15 @@ def _phase_sc_csr_nc(ints_st[::1] ptr, c = col[ind] * 2 ph = phases[ind] - v[v_ptr[rr] + cind] = (D[ind, 0] * ph) + d = &D[ind, 0] + _matrix_box_nc(d, ph, M) + v[v_ptr[rr] + cind] = M[0] v_col[v_ptr[rr] + cind] = c - v[v_ptr[rr] + cind+1] = ((D[ind, 2] + 1j * D[ind, 3]) * ph) + v[v_ptr[rr] + cind+1] = M[1] v_col[v_ptr[rr] + cind+1] = c + 1 - v[v_ptr[rr+1] + cind] = ((D[ind, 2] + 1j * D[ind, 3]).conjugate() * ph) + v[v_ptr[rr+1] + cind] = M[2] v_col[v_ptr[rr+1] + cind] = c - v[v_ptr[rr+1] + cind+1] = (D[ind, 1] * ph) + v[v_ptr[rr+1] + cind+1] = M[3] v_col[v_ptr[rr+1] + cind+1] = c + 1 cind = cind + 2 @@ -234,13 +245,16 @@ def _phase_sc_csr_nc(ints_st[::1] ptr, c = col[ind] * 2 ph = phases[col[ind] / nr] - v[v_ptr[rr] + cind] = (D[ind, 0] * ph) + d = &D[ind, 0] + _matrix_box_nc(d, ph, M) + + v[v_ptr[rr] + cind] = M[0] v_col[v_ptr[rr] + cind] = c - v[v_ptr[rr] + cind+1] = ((D[ind, 2] + 1j * D[ind, 3]) * ph) + v[v_ptr[rr] + cind+1] = M[1] v_col[v_ptr[rr] + cind+1] = c + 1 - v[v_ptr[rr+1] + cind] = ((D[ind, 2] + 1j * D[ind, 3]).conjugate() * ph) + v[v_ptr[rr+1] + cind] = M[2] v_col[v_ptr[rr+1] + cind] = c - v[v_ptr[rr+1] + cind+1] = (D[ind, 1] * ph) + v[v_ptr[rr+1] + cind+1] = M[3] v_col[v_ptr[rr+1] + cind+1] = c + 1 cind = cind + 2 @@ -268,6 +282,8 @@ def _phase_sc_array_nc(ints_st[::1] ptr, cdef complexs_st ph cdef ints_st r, rr, c, nz, ind + cdef numerics_st *d + cdef complexs_st *M = [0, 0, 0, 0] with nogil: if p_opt == -1: @@ -288,10 +304,12 @@ def _phase_sc_array_nc(ints_st[::1] ptr, c = col[ind] * 2 ph = phases[ind] - v[rr, c] = (D[ind, 0] * ph) - v[rr, c+1] = ((D[ind, 2] + 1j * D[ind, 3]) * ph) - v[rr+1, c] = ((D[ind, 2] + 1j * D[ind, 3]).conjugate() * ph) - v[rr+1, c+1] = (D[ind, 1] * ph) + d = &D[ind, 0] + _matrix_box_nc(d, ph, M) + v[rr, c] = M[0] + v[rr, c+1] = M[1] + v[rr+1, c] = M[2] + v[rr+1, c+1] = M[3] else: for r in range(nr): @@ -300,10 +318,12 @@ def _phase_sc_array_nc(ints_st[::1] ptr, c = col[ind] * 2 ph = phases[col[ind] / nr] - v[rr, c] = (D[ind, 0] * ph) - v[rr, c+1] = ((D[ind, 2] + 1j * D[ind, 3]) * ph) - v[rr+1, c] = ((D[ind, 2] + 1j * D[ind, 3]).conjugate() * ph) - v[rr+1, c+1] = (D[ind, 1] * ph) + d = &D[ind, 0] + _matrix_box_nc(d, ph, M) + v[rr, c] = M[0] + v[rr, c+1] = M[1] + v[rr+1, c] = M[2] + v[rr+1, c+1] = M[3] return V @@ -461,8 +481,7 @@ def _phase_sc_csr_so(ints_st[::1] ptr, ints_st[::1] ncol, ints_st[::1] col, const ints_st nc, - # complexs_st requires only 4 indices... - floats_st[:, ::1] D, + numerics_st[:, ::1] D, complexs_st[::1] phases, const int p_opt): @@ -483,6 +502,14 @@ def _phase_sc_csr_so(ints_st[::1] ptr, cdef ints_st r, rr, cind, c, nz, ind cdef complexs_st ph + cdef _f_matrix_box_so func + cdef numerics_st *d + cdef complexs_st *M = [0, 0, 0, 0] + + if numerics_st in complexs_st: + func = _matrix_box_so_cmplx + else: + func = _matrix_box_so_real # We have to do it manually due to the double elements per matrix element ncol2ptr_nc(nr, ncol, v_ptr, 2) @@ -520,13 +547,16 @@ def _phase_sc_csr_so(ints_st[::1] ptr, c = col[ind] * 2 ph = phases[ind] - v[v_ptr[rr] + cind] = ((D[ind, 0] + 1j * D[ind, 4]) * ph) + d = &D[ind, 0] + func(d, ph, M) + + v[v_ptr[rr] + cind] = M[0] v_col[v_ptr[rr] + cind] = c - v[v_ptr[rr] + cind+1] = ((D[ind, 2] + 1j * D[ind, 3]) * ph) + v[v_ptr[rr] + cind+1] = M[1] v_col[v_ptr[rr] + cind+1] = c + 1 - v[v_ptr[rr+1] + cind] = ((D[ind, 6] + 1j * D[ind, 7]) * ph) + v[v_ptr[rr+1] + cind] = M[2] v_col[v_ptr[rr+1] + cind] = c - v[v_ptr[rr+1] + cind+1] = ((D[ind, 1] + 1j * D[ind, 5]) * ph) + v[v_ptr[rr+1] + cind+1] = M[3] v_col[v_ptr[rr+1] + cind+1] = c + 1 cind = cind + 2 @@ -542,13 +572,16 @@ def _phase_sc_csr_so(ints_st[::1] ptr, c = col[ind] * 2 ph = phases[col[ind] / nr] - v[v_ptr[rr] + cind] = ((D[ind, 0] + 1j * D[ind, 4]) * ph) + d = &D[ind, 0] + func(d, ph, M) + + v[v_ptr[rr] + cind] = M[0] v_col[v_ptr[rr] + cind] = c - v[v_ptr[rr] + cind+1] = ((D[ind, 2] + 1j * D[ind, 3]) * ph) + v[v_ptr[rr] + cind+1] = M[1] v_col[v_ptr[rr] + cind+1] = c + 1 - v[v_ptr[rr+1] + cind] = ((D[ind, 6] + 1j * D[ind, 7]) * ph) + v[v_ptr[rr+1] + cind] = M[2] v_col[v_ptr[rr+1] + cind] = c - v[v_ptr[rr+1] + cind+1] = ((D[ind, 1] + 1j * D[ind, 5]) * ph) + v[v_ptr[rr+1] + cind+1] = M[3] v_col[v_ptr[rr+1] + cind+1] = c + 1 cind = cind + 2 @@ -564,8 +597,7 @@ def _phase_sc_array_so(ints_st[::1] ptr, ints_st[::1] ncol, ints_st[::1] col, const ints_st nc, - # complexs_st requires only 4 indices... - floats_st[:, ::1] D, + numerics_st[:, ::1] D, complexs_st[::1] phases, const int p_opt): @@ -577,6 +609,14 @@ def _phase_sc_array_so(ints_st[::1] ptr, cdef complexs_st ph cdef ints_st r, rr, c, nz, ind + cdef _f_matrix_box_so func + cdef numerics_st *d + cdef complexs_st *M = [0, 0, 0, 0] + + if numerics_st in complexs_st: + func = _matrix_box_so_cmplx + else: + func = _matrix_box_so_real with nogil: if p_opt == -1: @@ -597,10 +637,12 @@ def _phase_sc_array_so(ints_st[::1] ptr, c = col[ind] * 2 ph = phases[ind] - v[rr, c] = ((D[ind, 0] + 1j * D[ind, 4]) * ph) - v[rr, c+1] = ((D[ind, 2] + 1j * D[ind, 3]) * ph) - v[rr+1, c] = ((D[ind, 6] + 1j * D[ind, 7]) * ph) - v[rr+1, c+1] = ((D[ind, 1] + 1j * D[ind, 5]) * ph) + d = &D[ind, 0] + func(d, ph, M) + v[rr, c] = M[0] + v[rr, c+1] = M[1] + v[rr+1, c] = M[2] + v[rr+1, c+1] = M[3] else: for r in range(nr): @@ -609,9 +651,11 @@ def _phase_sc_array_so(ints_st[::1] ptr, c = col[ind] * 2 ph = phases[col[ind] / nr] - v[rr, c] = ((D[ind, 0] + 1j * D[ind, 4]) * ph) - v[rr, c+1] = ((D[ind, 2] + 1j * D[ind, 3]) * ph) - v[rr+1, c] = ((D[ind, 6] + 1j * D[ind, 7]) * ph) - v[rr+1, c+1] = ((D[ind, 1] + 1j * D[ind, 5]) * ph) + d = &D[ind, 0] + func(d, ph, M) + v[rr, c] = M[0] + v[rr, c+1] = M[1] + v[rr+1, c] = M[2] + v[rr+1, c+1] = M[3] return V diff --git a/src/sisl/physics/_matrix_utils.pxd b/src/sisl/physics/_matrix_utils.pxd new file mode 100644 index 0000000000..80cfa7352d --- /dev/null +++ b/src/sisl/physics/_matrix_utils.pxd @@ -0,0 +1,30 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +cimport cython + +import numpy as np + +cimport numpy as cnp + +from sisl._core._dtypes cimport complexs_st, numerics_st, reals_st + +ctypedef fused _internal_complexs_st: + float complex + double complex + +ctypedef void(*_f_matrix_box_so)(const numerics_st *data, + const complexs_st phase, + complexs_st *M) noexcept nogil + +cdef void _matrix_box_nc(const numerics_st *data, + const complexs_st phase, + complexs_st *M) noexcept nogil + +cdef void _matrix_box_so_real(const reals_st *data, + const complexs_st phase, + complexs_st *M) noexcept nogil + +cdef void _matrix_box_so_cmplx(const _internal_complexs_st *data, + const complexs_st phase, + complexs_st *M) noexcept nogil diff --git a/src/sisl/physics/_matrix_utils.pyx b/src/sisl/physics/_matrix_utils.pyx new file mode 100644 index 0000000000..0e3ee597f9 --- /dev/null +++ b/src/sisl/physics/_matrix_utils.pyx @@ -0,0 +1,63 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +cimport cython + +import numpy as np + +cimport numpy as cnp + +from sisl._core._dtypes cimport complexs_st, numerics_st, reals_st + +""" +These routines converts an array of n-values into a spin-box matrix. + +In all cases, the resulting linear returned matrix `M` +has 4 entries. + +M[0] == spin[0, 0] +M[1] == spin[0, 1] +M[2] == spin[1, 0] +M[3] == spin[1, 1] +""" + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +cdef inline void _matrix_box_nc(const numerics_st *data, + const complexs_st phase, + complexs_st *M) noexcept nogil: + M[0] = (data[0] * phase) + M[1] = ((data[2] + 1j * data[3]) * phase) + M[2] = ((data[2] + 1j * data[3]).conjugate() * phase) + M[3] = (data[1] * phase) + + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +cdef inline void _matrix_box_so_real(const reals_st *data, + const complexs_st phase, + complexs_st *M) noexcept nogil: + M[0] = ((data[0] + 1j * data[4]) * phase) + M[1] = ((data[2] + 1j * data[3]) * phase) + M[2] = ((data[6] + 1j * data[7]) * phase) + M[3] = ((data[1] + 1j * data[5]) * phase) + + +# necessary to double the interfaces +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +cdef inline void _matrix_box_so_cmplx(const _internal_complexs_st *data, + const complexs_st phase, + complexs_st *M) noexcept nogil: + M[0] = (data[0] * phase) + M[1] = (data[2] * phase) + M[2] = (data[3] * phase) + M[3] = (data[1] * phase) From 1cba3e261b66bd49d89ba958fdf88704b786904a Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Mon, 4 Nov 2024 22:06:03 +0100 Subject: [PATCH 03/14] redid fold_csr_matrix for much better perf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simple benchmarks showed that siesta Hamiltonians to Hk can be much faster by changing how the folding of the matrices are done. Instead of incrementally adding elements, and searching for duplicates before each addition of elements, we know built the entire array, and use numpy.unique to reduce the actual array. This leverages the numpy unique function which already returns a sorted array. It marginally slows down csr creation of matrices with few edges per orbital (TB models). But will be much faster for larger models stemming from DFT or the likes. Tests for this commit: %timeit H.Hk() %timeit H.Hk([0.1] * 3) %timeit H.Hk(format="array") %timeit H.Hk([0.1] * 3, format="array") For a *many* edge system, we get: 67.2 ms ± 1.51 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) 85.4 ms ± 8.81 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) 5.59 ms ± 426 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) 11.3 ms ± 39.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) While for a *few* edge system, we get: 9.1 ms ± 52.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) 9.25 ms ± 65.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) 5.75 ms ± 397 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) 6.17 ms ± 394 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) For commit v0.15.1-57-g6bbbde39 we get: 196 ms ± 3.01 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) 214 ms ± 1.87 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) 6.58 ms ± 139 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) 12.8 ms ± 58.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) and 7.41 ms ± 77.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) 7.37 ms ± 73.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) 6.04 ms ± 383 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) 5.81 ms ± 37 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) Signed-off-by: Nick Papior --- benchmarks/optimizations/hamiltonian.ipynb | 77 ++++++++++++++++++++++ src/sisl/_core/_dtypes.pxd | 10 +-- src/sisl/_core/_sparse.pyx | 75 ++++++++++----------- src/sisl/_indices.pyx | 11 ++++ src/sisl/physics/_matrix_dk.pyx | 17 ++--- src/sisl/physics/_matrix_k.pyx | 6 +- src/sisl/physics/_matrix_phase.pyx | 7 +- 7 files changed, 141 insertions(+), 62 deletions(-) create mode 100644 benchmarks/optimizations/hamiltonian.ipynb diff --git a/benchmarks/optimizations/hamiltonian.ipynb b/benchmarks/optimizations/hamiltonian.ipynb new file mode 100644 index 0000000000..e6edd7ff5c --- /dev/null +++ b/benchmarks/optimizations/hamiltonian.ipynb @@ -0,0 +1,77 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here we test and check the performance of the `Hk` implementation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from pathlib import Path\n", + "import numpy as np\n", + "import sisl as si\n", + "\n", + "files = Path(os.environ[\"SISL_FILES_TESTS\"])\n", + "siesta = files / \"siesta\"\n", + "\n", + "N = 10" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "H = si.Hamiltonian.read(siesta / \"Si_pdos_k\" / \"Si_pdos.TSHS\").tile(N, 0).tile(N, 1)\n", + "\n", + "%timeit H.Hk()\n", + "%timeit H.Hk([0.1] * 3)\n", + "%timeit H.Hk(format=\"array\")\n", + "%timeit H.Hk([0.1] * 3, format=\"array\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "H = si.Hamiltonian.read(siesta / \"Pt2_soc\" / \"Pt2_xx.TSHS\").tile(N, 0).tile(N // 2, 1)\n", + "\n", + "%timeit H.Hk()\n", + "%timeit H.Hk([0.1] * 3)\n", + "%timeit H.Hk(format=\"array\")\n", + "%timeit H.Hk([0.1] * 3, format=\"array\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/src/sisl/_core/_dtypes.pxd b/src/sisl/_core/_dtypes.pxd index 1d16a8fe40..9d2247c9e5 100644 --- a/src/sisl/_core/_dtypes.pxd +++ b/src/sisl/_core/_dtypes.pxd @@ -30,6 +30,7 @@ ctypedef fused ints_st: int long + ctypedef fused floats_st: float double @@ -47,14 +48,6 @@ ctypedef fused floatcomplexs_st: double complex -# Another one to have two separate ctypes (cross-product of type-defs) -ctypedef fused floatcomplexs2_st: - float - double - float complex - double complex - - # We need this fused data-type to omit complex data-types ctypedef fused reals_st: int @@ -70,7 +63,6 @@ ctypedef fused numerics_st: float complex double complex - ctypedef fused _type2dtype_types_st: short int diff --git a/src/sisl/_core/_sparse.pyx b/src/sisl/_core/_sparse.pyx index bad7f3508c..c0ff04e706 100644 --- a/src/sisl/_core/_sparse.pyx +++ b/src/sisl/_core/_sparse.pyx @@ -46,6 +46,7 @@ def fold_csr_matrix(ints_st[::1] ptr, cdef ndarray[ints_st, mode='c'] FOLD_ptr = np.empty([nr + 1], dtype=dtype) cdef ndarray[ints_st, mode='c'] FOLD_ncol = np.empty([nr], dtype=dtype) cdef ndarray[ints_st, mode='c'] FOLD_col = np.empty([inline_sum(ncol)], dtype=dtype) + cdef ints_st[::1] fold_ptr = FOLD_ptr cdef ints_st[::1] fold_ncol = FOLD_ncol cdef ints_st[::1] fold_col = FOLD_col @@ -61,21 +62,29 @@ def fold_csr_matrix(ints_st[::1] ptr, for r in range(nr): # Initialize the pointer arrays - if ncol[r] > 0: - fold_ncol[r] = 1 - fold_col[fold_ptr[r]] = col[ptr[r]] % nr - else: - fold_ncol[r] = 0 - - for ind in range(ptr[r] + 1, ptr[r] + ncol[r]): - c = col[ind] % nr - if not in_1d(fold_col[fold_ptr[r]:fold_ptr[r] + fold_ncol[r]], c): - fold_col[fold_ptr[r] + fold_ncol[r]] = c - fold_ncol[r] += 1 - - # Sort indices (we should implement our own sorting algorithm) - tmp = np.sort(fold_col[fold_ptr[r]:fold_ptr[r] + fold_ncol[r]]) - for ind in range(fold_ncol[r]): + # Even though large supercells has *many* double entries (after folding) + # this turns out to be faster than incrementally searching + # the array. + # This kind-of-makes sense. + # We can do: + # 1. + # a) build a full list of folded items + # b) find unique (and sorted) elements + # or + # 2. + # a) incrementally add a value, only + # if it does not exist. + # 1. creates a bigger temporary array, but only + # adds unique values 1 time through numpy fast algorithm + # 2. searchs an array (of seemingly small arrays) ncol times + # which can be quite heavy. + tmp = col[ptr[r]:ptr[r] + ncol[r]].copy() + for ind in range(ncol[r]): + tmp[ind] %= nr + + tmp = np.unique(tmp) + fold_ncol[r] = tmp.shape[0] + for ind in range(tmp.shape[0]): fold_col[fold_ptr[r] + ind] = tmp[ind] fold_ptr[r + 1] = fold_ptr[r] + fold_ncol[r] @@ -121,36 +130,24 @@ def fold_csr_matrix_nc(ints_st[::1] ptr, for r in range(nr): rr = r * 2 - # Initialize the pointer arrays - if ncol[r] > 0: - c = (col[ptr[r]] % nr) * 2 - fold_ncol[rr] = 2 - fold_col[fold_ptr[rr]] = c - fold_col[fold_ptr[rr] + 1] = c + 1 - else: - fold_ncol[rr] = 0 + tmp = col[ptr[r]:ptr[r] + ncol[r]].copy() + for ind in range(ncol[r]): + tmp[ind] = (tmp[ind] % nr) * 2 - for ind in range(ptr[r] + 1, ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - if not in_1d(fold_col[fold_ptr[rr]:fold_ptr[rr] + fold_ncol[rr]], c): - fold_col[fold_ptr[rr] + fold_ncol[rr]] = c - fold_col[fold_ptr[rr] + fold_ncol[rr] + 1] = c + 1 - fold_ncol[rr] += 2 + tmp = np.unique(tmp) # Duplicate pointers and counters for next row (off-diagonal) - fold_ptr[rr + 1] = fold_ptr[rr] + fold_ncol[rr] + fold_ncol[rr] = tmp.shape[0] * 2 fold_ncol[rr + 1] = fold_ncol[rr] + fold_ptr[rr + 1] = fold_ptr[rr] + fold_ncol[rr] + fold_ptr[rr + 2] = fold_ptr[rr + 1] + fold_ncol[rr] - # Sort indices (we should implement our own sorting algorithm) - tmp = np.sort(fold_col[fold_ptr[rr]:fold_ptr[rr] + fold_ncol[rr]]) - for ind in range(fold_ncol[rr]): - c = tmp[ind] - fold_col[fold_ptr[rr] + ind] = c - # Copy to next row as well - fold_col[fold_ptr[rr+1] + ind] = c + for ind in range(tmp.shape[0]): + fold_col[fold_ptr[rr] + ind * 2] = tmp[ind] + fold_col[fold_ptr[rr] + ind * 2 + 1] = tmp[ind] + 1 + fold_col[fold_ptr[rr+1] + ind * 2] = tmp[ind] + fold_col[fold_ptr[rr+1] + ind * 2 + 1] = tmp[ind] + 1 - # Increment the next row - fold_ptr[rr + 2] = fold_ptr[rr + 1] + fold_ncol[rr + 1] nz += fold_ncol[rr] * 2 if nz > fold_col.shape[0]: diff --git a/src/sisl/_indices.pyx b/src/sisl/_indices.pyx index a4a1d0efbe..3795342a79 100644 --- a/src/sisl/_indices.pyx +++ b/src/sisl/_indices.pyx @@ -14,6 +14,7 @@ from sisl._core._dtypes cimport floats_st, ints_st, ssize_st, type2dtype @cython.boundscheck(False) @cython.wraparound(False) +@cython.initializedcheck(False) def indices_only(ints_st[::1] element, ints_st[::1] test_element): """ Return indices of all `test_element` in the element array. @@ -66,6 +67,7 @@ def indices_only(ints_st[::1] element, ints_st[::1] test_element): @cython.boundscheck(False) @cython.wraparound(False) +@cython.initializedcheck(False) def indices(ints_st[::1] element, ints_st[::1] test_element, ints_st offset=0, both_sorted: bool = False): """ Return indices of all `test_element` in the search array. If not found the index will be ``-1`` @@ -142,6 +144,7 @@ def indices(ints_st[::1] element, ints_st[::1] test_element, ints_st offset=0, @cython.boundscheck(False) @cython.wraparound(False) +@cython.initializedcheck(False) def indices_in_cylinder(floats_st[:, ::1] dxyz, const floats_st R, const floats_st h): """ Indices for all coordinates that are within a cylinde radius `R` and height `h` @@ -194,6 +197,7 @@ def indices_in_cylinder(floats_st[:, ::1] dxyz, const floats_st R, const floats_ @cython.boundscheck(False) @cython.wraparound(False) +@cython.initializedcheck(False) def indices_in_sphere(floats_st[:, ::1] dxyz, const floats_st R): """ Indices for all coordinates that are within a sphere of radius `R` @@ -232,6 +236,7 @@ def indices_in_sphere(floats_st[:, ::1] dxyz, const floats_st R): @cython.boundscheck(False) @cython.wraparound(False) +@cython.initializedcheck(False) def indices_in_sphere_with_dist(floats_st[:, ::1] dxyz, const floats_st R): """ Indices and the distances for all coordinates that are within a sphere of radius `R` @@ -290,6 +295,7 @@ def indices_in_sphere_with_dist(floats_st[:, ::1] dxyz, const floats_st R): @cython.boundscheck(False) @cython.wraparound(False) +@cython.initializedcheck(False) def indices_le(ndarray a, const floats_st V): """ Indices for all values in `a` that are ``<= V`` @@ -370,6 +376,7 @@ cdef ssize_st _indices_le2(const floats_st[:, ::1] a, const floats_st V, int[::1 @cython.boundscheck(False) @cython.wraparound(False) +@cython.initializedcheck(False) def indices_fabs_le(ndarray a, const floats_st V): """ Indices for all values in `a` that are ``| | <= V`` @@ -478,6 +485,7 @@ cdef ssize_st _indices_fabs_le2(const floats_st[:, ::1] a, const floats_st V, in @cython.boundscheck(False) @cython.wraparound(False) +@cython.initializedcheck(False) def indices_gt_le(ndarray a, const floats_st V1, const floats_st V2): cdef ndarray[int32_t, mode='c'] IDX = np.empty([a.shape[0]], dtype=np.int32) cdef int[::1] idx = IDX @@ -565,6 +573,7 @@ cdef inline bint in_1d(const ints_st[::1] array, const ints_st v) noexcept nogil @cython.boundscheck(False) @cython.wraparound(False) +@cython.initializedcheck(False) def index_sorted(ints_st[::1] a, const ints_st v): """ Return index for the value v in a sorted array, otherwise return -1 @@ -630,6 +639,7 @@ cdef ssize_st _index_sorted(const ints_st[::1] a, const _ints_index_sorted_st v) @cython.boundscheck(False) @cython.wraparound(False) +@cython.initializedcheck(False) def is_sorted_unique(ints_st[::1] a): """ Return True/False if all elements of the sorted array `a` are unique @@ -659,6 +669,7 @@ def is_sorted_unique(ints_st[::1] a): @cython.boundscheck(False) @cython.wraparound(False) +@cython.initializedcheck(False) def list_index_le(ints_st[::1] a, ints_st[::1] b): """ Find indices for each ``a`` such that the returned ``a[i] <= b[ret[i]]`` where `b` is assumed sorted diff --git a/src/sisl/physics/_matrix_dk.pyx b/src/sisl/physics/_matrix_dk.pyx index 1810c70093..3a937f3495 100644 --- a/src/sisl/physics/_matrix_dk.pyx +++ b/src/sisl/physics/_matrix_dk.pyx @@ -7,7 +7,7 @@ import numpy as np cimport numpy as cnp -from sisl._core._dtypes cimport floats_st +from sisl._core._dtypes cimport floats_st, ints_st from ._common import comply_gauge from ._matrix_phase import * @@ -24,22 +24,23 @@ def _phase_dk(gauge, M, sc, cnp.ndarray[floats_st] k, dtype): # This is the differentiated matrix with respect to k # See _phase.pyx, we are using exp(i k.R/r) # i R - if gauge == 'cell': - iRs = phase_rsc(sc, k, dtype).reshape(-1, 1) - iRs = (1j * np.dot(sc.sc_off, sc.cell) * iRs).astype(dtype, copy=False) - p_opt = 1 - elif gauge == 'atom': + if gauge == 'atom': M.finalize() rij = M.Rij()._csr._D iRs = (1j * rij * phase_rij(rij, sc, k, dtype).reshape(-1, 1)).astype(dtype, copy=False) del rij p_opt = 0 + elif gauge == 'cell': + iRs = phase_rsc(sc, k, dtype).reshape(-1, 1) + iRs = (1j * np.dot(sc.sc_off, sc.cell) * iRs).astype(dtype, copy=False) + p_opt = 1 + return p_opt, iRs -def matrix_dk(gauge, M, const int idx, sc, cnp.ndarray[floats_st] k, dtype, format): +def matrix_dk(gauge, M, const ints_st idx, sc, cnp.ndarray[floats_st] k, dtype, format): dtype = phase_dtype(k, M.dtype, dtype, True) p_opt, iRs = _phase_dk(gauge, M, sc, k, dtype) @@ -67,7 +68,7 @@ def matrix_dk_nc(gauge, M, sc, cnp.ndarray[floats_st] k, dtype, format): return d1.asformat(format), d2.asformat(format), d3.asformat(format) -def matrix_dk_nc_diag(gauge, M, const int idx, sc, cnp.ndarray[floats_st] k, dtype, format): +def matrix_dk_nc_diag(gauge, M, const ints_st idx, sc, cnp.ndarray[floats_st] k, dtype, format): dtype = phase_dtype(k, M.dtype, dtype, True) p_opt, iRs = _phase_dk(gauge, M, sc, k, dtype) diff --git a/src/sisl/physics/_matrix_k.pyx b/src/sisl/physics/_matrix_k.pyx index b0ebd7cc5c..eb8a78e14f 100644 --- a/src/sisl/physics/_matrix_k.pyx +++ b/src/sisl/physics/_matrix_k.pyx @@ -6,7 +6,7 @@ cimport cython import numpy as np cimport numpy as cnp -from sisl._core._dtypes cimport floats_st +from sisl._core._dtypes cimport floats_st, ints_st from ._common import comply_gauge from ._matrix_phase import * from ._matrix_phase_sc import * @@ -41,7 +41,7 @@ def _phase_k(gauge, M, sc, cnp.ndarray[floats_st] K, dtype): return p_opt, phases -def matrix_k(gauge, M, const int idx, sc, cnp.ndarray[floats_st] k, dtype, format): +def matrix_k(gauge, M, const ints_st idx, sc, cnp.ndarray[floats_st] k, dtype, format): dtype = phase_dtype(k, M.dtype, dtype) p_opt, phases = _phase_k(gauge, M, sc, k, dtype) @@ -97,7 +97,7 @@ def matrix_k_nc(gauge, M, sc, cnp.ndarray[floats_st] k, dtype, format): return _phase_csr_nc(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt).asformat(format) -def matrix_k_nc_diag(gauge, M, const int idx, sc, cnp.ndarray[floats_st] k, dtype, format): +def matrix_k_nc_diag(gauge, M, const ints_st idx, sc, cnp.ndarray[floats_st] k, dtype, format): dtype = phase_dtype(k, M.dtype, dtype, True) p_opt, phases = _phase_k(gauge, M, sc, k, dtype) diff --git a/src/sisl/physics/_matrix_phase.pyx b/src/sisl/physics/_matrix_phase.pyx index 4e5aedae93..50fbc46be2 100644 --- a/src/sisl/physics/_matrix_phase.pyx +++ b/src/sisl/physics/_matrix_phase.pyx @@ -23,6 +23,7 @@ from sisl._core._dtypes cimport ( floats_st, ints_st, numerics_st, + reals_st, ssize_st, type2dtype, ) @@ -71,7 +72,7 @@ def _phase_csr(ints_st[::1] ptr, ints_st[::1] ncol, ints_st[::1] col, numerics_st[:, ::1] D, - const int idx, + const ints_st idx, floatcomplexs_st[::1] phases, const int p_opt): @@ -99,7 +100,7 @@ def _phase_csr(ints_st[::1] ptr, tmp = v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]] s_idx = _index_sorted(tmp, c) - v[v_ptr[r] + s_idx] += D[ind, idx] + v[v_ptr[r] + s_idx] += (D[ind, idx]) elif p_opt == 0: for r in range(nr): @@ -150,7 +151,7 @@ def _phase_array(ints_st[::1] ptr, for r in range(nr): for ind in range(ptr[r], ptr[r] + ncol[r]): c = col[ind] % nr - v[r, c] += D[ind, idx] + v[r, c] += (D[ind, idx]) elif p_opt == 0: for r in range(nr): From 487bb54295f32bbe21d080a1d88e3d6af712bf40 Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Wed, 6 Nov 2024 20:03:02 +0100 Subject: [PATCH 04/14] fixed print-out of cmake-variables and ensured Fortran is enabled Enabling fortran is necessary for it to populate details of the fortran world. Signed-off-by: Nick Papior --- CMakeLists.txt | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e44950fb77..a50c371a69 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -65,7 +65,6 @@ add_compile_definitions(CYTHON_NO_PYINIT_EXPORT=1) #: lib, perhaps we should change this set(CMAKE_SHARED_MODULE_PREFIX "") - # Determine whether we are in CIBUILDWHEEL # and whether we are building for the universal target set(_def_fortran TRUE) @@ -81,6 +80,8 @@ option(WITH_FORTRAN # Define all options for the user if( WITH_FORTRAN ) + enable_language(Fortran) + set(F2PY_REPORT_ON_ARRAY_COPY 10 CACHE STRING "The minimum (element) size of arrays before warning about copies") @@ -209,6 +210,18 @@ if(WITH_FORTRAN) endif(WITH_FORTRAN) +message(STATUS "Python variables:") +list(APPEND CMAKE_MESSAGE_INDENT " ") + +cmake_print_variables(Python_INCLUDE_DIRS) +cmake_print_variables(Python_NumPy_INCLUDE_DIRS) +if(WITH_FORTRAN) + cmake_print_variables(Python_NumPy_F2Py_INCLUDE_DIR) +endif() + +list(POP_BACK CMAKE_MESSAGE_INDENT) + + message(STATUS "sisl options") list(APPEND CMAKE_MESSAGE_INDENT " ") @@ -230,18 +243,6 @@ endif() list(POP_BACK CMAKE_MESSAGE_INDENT) -message(STATUS "Python variables:") -list(APPEND CMAKE_MESSAGE_INDENT " ") - -cmake_print_variables(Python_INCLUDE_DIRS) -cmake_print_variables(Python_NumPy_INCLUDE_DIRS) -if(WITH_FORTRAN) - cmake_print_variables(Python_NumPy_F2Py_INCLUDE_DIR) -endif() - -list(POP_BACK CMAKE_MESSAGE_INDENT) - - # Return in _result whether the _file should be built, or not # It checks whether the file is present in the NO_COMPILATION From 72937266eeef49defab9ca66dd6bce449fb15306 Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Wed, 6 Nov 2024 20:53:33 +0100 Subject: [PATCH 05/14] added the changelog Signed-off-by: Nick Papior --- CHANGELOG.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d7b72543c..788bbd81b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,9 +16,14 @@ we hit release version 1.0.0. sisl.geom.graphene ### Fixed - - `projection` arguments of several functions has been streamlined +### Changed +- internal Cython code for performance improvements. + This yield significant perf. improvements for DFT sparse matrices + with *many* edges in the sparse matrix, but a perf. hit for very + small TB matrices. + ## [0.15.2] - 2024-11-06 From 53742cfe1f9dd6a72357df0ff39df8665d53c458 Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Thu, 7 Nov 2024 10:08:59 +0100 Subject: [PATCH 06/14] cleaned spin-box extraction of matrix data This should enable simpler access to data Signed-off-by: Nick Papior --- src/sisl/_core/sparse_geometry.py | 2 +- src/sisl/physics/densitymatrix.py | 36 +------- src/sisl/physics/sparse.py | 142 +++++++++++++++++++++++++----- 3 files changed, 123 insertions(+), 57 deletions(-) diff --git a/src/sisl/_core/sparse_geometry.py b/src/sisl/_core/sparse_geometry.py index 862eb750f3..0adc7035cb 100644 --- a/src/sisl/_core/sparse_geometry.py +++ b/src/sisl/_core/sparse_geometry.py @@ -652,7 +652,7 @@ def create_construct(self, R, params): """ if len(R) != len(params): raise ValueError( - f"{self.__class__.__name__}.create_construct got different lengths of `R` and `param`" + f"{self.__class__.__name__}.create_construct got different lengths of 'R' and 'params'" ) def func(self, ia, atoms, atoms_xyz=None): diff --git a/src/sisl/physics/densitymatrix.py b/src/sisl/physics/densitymatrix.py index eddab35867..be0f33faee 100644 --- a/src/sisl/physics/densitymatrix.py +++ b/src/sisl/physics/densitymatrix.py @@ -24,40 +24,12 @@ from sisl.messages import deprecate_argument, progressbar, warn from sisl.typing import AtomsIndex, GaugeType, SeqFloat -from .sparse import SparseOrbitalBZSpin +from .sparse import SparseOrbitalBZSpin, _get_spin from .spin import Spin __all__ = ["DensityMatrix"] -def _get_density(DM, orthogonal, what="sum"): - DM = DM.T - if orthogonal: - off = 0 - else: - off = 1 - if what == "sum": - if DM.shape[0] in (2 + off, 4 + off, 8 + off): - return DM[0] + DM[1] - return DM[0] - if what == "spin": - m = np.empty([3, DM.shape[1]], dtype=DM.dtype) - if DM.shape[0] == 8 + off: - m[0] = DM[2] + DM[6] - m[1] = -DM[3] + DM[7] - m[2] = DM[0] - DM[1] - elif DM.shape[0] == 4 + off: - m[0] = 2 * DM[2] - m[1] = -2 * DM[3] - m[2] = DM[0] - DM[1] - elif DM.shape[0] == 2 + off: - m[:2, :] = 0.0 - m[2] = DM[0] - DM[1] - elif DM.shape[0] == 1 + off: - m[...] = 0.0 - return m - - class _densitymatrix(SparseOrbitalBZSpin): def spin_rotate(self, angles: SeqFloat, rad: bool = False): r"""Rotates spin-boxes by fixed angles around the :math:`x`, :math:`y` and :math:`z` axis, respectively. @@ -539,10 +511,10 @@ def bond_order( m, *opts = method.split(":") # only extract the summed density - what = "sum" + what = "trace" if "spin" in opts: # do this for each spin x, y, z - what = "spin" + what = "vector" del opts[opts.index("spin")] # Check that there are no un-used options @@ -556,7 +528,7 @@ def bond_order( rows, cols, DM = _to_coo(self._csr) # Convert to requested matrix form - D = _get_density(DM, self.orthogonal, what) + D = _get_spin(DM, self.spin, what) # Define a matrix-matrix multiplication def mm(A, B): diff --git a/src/sisl/physics/sparse.py b/src/sisl/physics/sparse.py index f2c531aa6d..387ca83663 100644 --- a/src/sisl/physics/sparse.py +++ b/src/sisl/physics/sparse.py @@ -4,6 +4,7 @@ from __future__ import annotations import warnings +from typing import Literal import numpy as np from scipy.sparse import SparseEfficiencyWarning, csr_matrix @@ -13,6 +14,7 @@ from sisl import Geometry from sisl._core.sparse import issparse from sisl._core.sparse_geometry import SparseOrbital +from sisl._help import dtype_complex_to_real, dtype_real_to_complex from sisl._internal import set_module from sisl.messages import warn from sisl.typing import AtomsIndex, GaugeType, KPoint @@ -29,6 +31,85 @@ warnings.filterwarnings("ignore", category=SparseEfficiencyWarning) +def _get_spin(M, spin, what: Literal["trace", "box", "vector"] = "box"): + M = M.T + if what == "trace": + if spin.spinor == 2: + # we have both up+down + # TODO fix spin-orbit with complex values + return M[0] + M[1] + return M[0] + + if what == "vector": + m = np.empty([3, M.shape[1]], dtype=dtype_complex_to_real(M.dtype)) + if spin.is_unpolarized: + # no spin-density + m[...] = 0.0 + else: + # Same for all spin-configurations + m[2] = (M[0] - M[1]).real + + # These indices should be reflected in sisl/physics/sparse.py + # for the Mxy[ri] indices in the reset method + if spin.is_polarized: + m[:2, :] = 0.0 + elif spin.is_noncolinear: + if spin.dkind == "f": + m[0] = 2 * M[2] + m[1] = -2 * M[3] + else: + m[0] = 2 * M[2].real + m[1] = -2 * M[2].imag + else: + # spin-orbit + if spin.dkind == "f": + m[0] = M[2] + M[6] + m[1] = -M[3] + M[7] + else: + tmp = M[2].conj() + M[3] + m[0] = tmp.real + m[1] = tmp.imag + return m + + if what == "box": + m = np.empty([2, 2, M.shape[1]], dtype=dtype_real_to_complex(M.dtype)) + if spin.is_unpolarized: + # no spin-density + m[...] = 0.0 + m[0, 0] = M[0] + m[1, 1] = M[0] + elif spin.is_polarized: + m[...] = 0.0 + m[0, 0] = M[0] + m[1, 1] = M[1] + elif spin.is_noncolinear: + if spin.dkind == "f": + m[0, 0] = M[0] + m[1, 1] = M[1] + m[0, 1] = M[2] + 1j * M[3] + m[1, 0] = m[0, 1].conj() + else: + m[0, 0] = M[0] + m[1, 1] = M[1] + m[0, 1] = M[2] + m[1, 0] = M[2].conj() + else: + if spin.dkind == "f": + m[0, 0] = M[0] + 1j * M[4] + m[1, 1] = M[1] + 1j * M[5] + m[0, 1] = M[2] + 1j * M[3] + m[1, 0] = M[6] + 1j * M[7] + else: + m[0, 0] = M[0] + m[1, 1] = M[1] + m[0, 1] = M[2] + m[1, 0] = M[3] + + return m + + raise ValueError(f"Wrong 'what' argument got {what}.") + + @set_module("sisl.physics") class SparseOrbitalBZ(SparseOrbital): r"""Sparse object containing the orbital connections in a Brillouin zone @@ -815,7 +896,7 @@ def _reset(self): self.M22 = 1 self.M12 = 2 self.M21 = 3 - raise NotImplementedError("Currently not implemented") + # The overlap is the same as non-collinear self.Pk = self._Pk_spin_orbit self.Sk = self._Sk_non_colinear @@ -836,7 +917,7 @@ def spin(self): r"""Associated spin class""" return self._spin - def create_construct(self, R, param): + def create_construct(self, R, params): r"""Create a simple function for passing to the `construct` function. This is to relieve the creation of simplistic @@ -846,7 +927,7 @@ def create_construct(self, R, param): >>> def func(self, ia, atoms, atoms_xyz=None): ... idx = self.geometry.close(ia, R=R, atoms=atoms, atoms_xyz=atoms_xyz) - ... for ix, p in zip(idx, param): + ... for ix, p in zip(idx, params): ... self[ia, ix] = p In the non-colinear case the matrix element :math:`\mathbf M_{ij}` will be set @@ -865,79 +946,88 @@ def create_construct(self, R, param): Parameters ---------- - R : array_like + R : radii parameters for different shells. - Must have same length as `param` or one less. + Must have same length as `params` or one less. If one less it will be extended with ``R[0]/100`` - param : array_like + params : coupling constants corresponding to the `R` - ranges. ``param[0,:]`` are the elements + ranges. ``params[0,:]`` are the elements for the all atoms within ``R[0]`` of each atom. See Also -------- construct : routine to create the sparse matrix from a generic function (as returned from `create_construct`) """ - if len(R) != len(param): + if len(R) != len(params): raise ValueError( - f"{self.__class__.__name__}.create_construct got different lengths of `R` and `param`" + f"{self.__class__.__name__}.create_construct got different lengths of 'R' and 'params'" ) if not self.spin.is_diagonal: + # This portion of code splits the construct into doing Hermitian + # assignments. This probably needs rigorous testing. + + dtype_cplx = dtype_real_to_complex(self.dtype) + is_complex = self.dkind == "c" if self.spin.is_spinorbit: if is_complex: nv = 4 # Hermitian parameters - paramH = [ + # The input order is [uu, dd, ud, du] + paramsH = [ [p[0].conj(), p[1].conj(), p[3].conj(), p[2].conj(), *p[4:]] - for p in param + for p in params ] else: nv = 8 # Hermitian parameters - paramH = [ + # The input order is [Ruu, Rdd, Rud, Iud, Iuu, Idd, Rdu, idu] + paramsH = [ [p[0], p[1], p[6], -p[7], -p[4], -p[5], p[2], -p[3], *p[8:]] - for p in param + for p in params ] if not self.orthogonal: nv += 1 # ensure we have correct number of values - assert all(len(p) == nv for p in param) + assert all(len(p) == nv for p in params) if R[0] <= 0.1001: # no atom closer than 0.1001 Ang! # We check that the the parameters here is Hermitian - p = param[0] + p = params[0] if is_complex: - onsite = np.array([[p[0], p[2]], [p[3], p[1]]], self.dtype) + onsite = np.array([[p[0], p[2]], [p[3], p[1]]], dtype_cplx) else: onsite = np.array( [ [p[0] + 1j * p[4], p[2] + 1j * p[3]], [p[6] + 1j * p[7], p[1] + 1j * p[5]], ], - np.complex128, + dtype_cplx, ) if not np.allclose(onsite, onsite.T.conj()): warn( - f"{self.__class__.__name__}.create_construct is NOT Hermitian for on-site terms. This is your responsibility!" + f"{self.__class__.__name__}.create_construct is NOT " + "Hermitian for on-site terms. This is your responsibility! " + "The code will continue silently, be AWARE!" ) elif self.spin.is_noncolinear: if is_complex: nv = 3 # Hermitian parameters - paramH = [[p[0].conj(), p[1].conj(), p[2], *p[3:]] for p in param] + paramsH = [[p[0].conj(), p[1].conj(), p[2], *p[3:]] for p in params] else: nv = 4 # Hermitian parameters - # Note that we don"t need to do anything here. + # Note that we don't need to do anything here. # H_ij = [[0, 2 + 1j 3], # [2 - 1j 3, 1]] # H_ji = [[0, 2 + 1j 3], # [2 - 1j 3, 1]] # H_ij^H == H_ji^H - paramH = param + paramsH = params if not self.orthogonal: nv += 1 @@ -945,21 +1035,25 @@ def create_construct(self, R, param): # Since the values are ensured Hermitian in the on-site case anyways. # ensure we have correct number of values - assert all(len(p) == nv for p in param) + assert all(len(p) == nv for p in params) na = self.geometry.na # Now create the function that returns the assignment function def func(self, ia, atoms, atoms_xyz=None): idx = self.geometry.close(ia, R=R, atoms=atoms, atoms_xyz=atoms_xyz) - for ix, p, pc in zip(idx, param, paramH): + for ix, p, pc in zip(idx, params, paramsH): ix_ge = (ix % na) >= ia self[ia, ix[ix_ge]] = p self[ia, ix[~ix_ge]] = pc + func.R = R + func.params = params + func.paramsH = paramsH + return func - return super().create_construct(R, param) + return super().create_construct(R, params) def __len__(self): r"""Returns number of rows in the basis (if non-collinear or spin-orbit, twice the number of orbitals)""" From ee3a826c64937fcd098cc6ec6b670935e77b521d Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Fri, 8 Nov 2024 13:23:21 +0100 Subject: [PATCH 07/14] fixed handling complex matrices in sisl Lots of the code were built for floats. This now fixes the issue of reading/writing sparse matrices in specific data-formats. It allows a more natural way of handling SOC matrices, with complex interplay, say: H[0, 0, 2] = Hud as a complex variable, when dealing with floats one needs to do this: H[0, 0, 2] = Hud.real H[0, 0, 3] = Hud.imag which is not super-intuitive. Currently there are still *many* hardcodings of the indices. And we should strive to move these into a common framework to limit the problems it creates. Tests has been added that checks Hamiltonian eigenvalues and Density matrices mulliken charges. So it seems it works as intended, but not everything has been fully tested. Signed-off-by: Nick Papior --- src/sisl/io/siesta/_help.py | 187 ++++++++++++++++++++---- src/sisl/io/siesta/binaries.py | 70 +++++---- src/sisl/io/siesta/siesta_nc.py | 22 +-- src/sisl/io/siesta/tests/test_tsde.py | 28 ++++ src/sisl/io/siesta/tests/test_tshs.py | 56 +++++++ src/sisl/io/tbtrans/delta.py | 14 +- src/sisl/physics/densitymatrix.py | 2 +- src/sisl/physics/energydensitymatrix.py | 2 +- src/sisl/physics/hamiltonian.py | 2 +- src/sisl/physics/sparse.py | 102 ++++++------- src/sisl/physics/spin.py | 48 +++--- 11 files changed, 383 insertions(+), 150 deletions(-) diff --git a/src/sisl/io/siesta/_help.py b/src/sisl/io/siesta/_help.py index 7d3c47244d..7ec03fb148 100644 --- a/src/sisl/io/siesta/_help.py +++ b/src/sisl/io/siesta/_help.py @@ -5,13 +5,14 @@ import numpy as np +import sisl as si import sisl._array as _a from sisl.messages import warn __all__ = ["_siesta_sc_off"] __all__ += ["_csr_from_siesta", "_csr_from_sc_off"] __all__ += ["_csr_to_siesta", "_csr_to_sc_off"] -__all__ += ["_mat_spin_convert", "_fc_correct"] +__all__ += ["_mat_sisl2siesta", "_mat_siesta2sisl", "_fc_correct"] def _siesta_sc_off(nsc): @@ -98,45 +99,177 @@ def _csr_from(col_from, csr): csr.translate_columns(col_from, col_to) -def _mat_spin_convert(M, spin=None): +def _mat2dtype(M, dtype: np.dtype) -> None: + """Change the internal CSR matrix in `M` to a follow `dtype`""" + + if M.dtype == dtype: + return M + + spin = M.spin + csr = M._csr + shape = csr._D.shape + + # Change details + old_dtype = np.dtype(M.dtype) + new_dtype = np.dtype(dtype) + + def toc(D, re, im): + return (D[..., re] + 1j * D[..., im]).astype(dtype, copy=False) + + if old_dtype.kind in ("f", "i"): + if new_dtype.kind in ("f", "i"): + # this is just simple casting + csr._D = csr._D.astype(dtype) + elif new_dtype.kind == "c": + # we need to *collect it + if spin.is_diagonal: + # this is just simple casting, + # each diagonal component has its own index + csr._D = csr._D.astype(dtype) + elif spin.is_noncolinear: + D = np.empty(shape[:-1] + (shape[-1] - 1,), dtype=dtype) + D[..., [0, 1]] = csr._D[..., [0, 1]].astype(dtype) + D[..., 2] = toc(csr._D, 2, 3) + if D.shape[-1] > 4: + D[..., 3:] = csr._D[..., 4:].astype(dtype) + csr._D = D + elif spin.is_spinorbit: + D = np.empty(shape[:-1] + (shape[-1] - 4,), dtype=dtype) + D[..., 0] = toc(csr._D, 0, 4) + D[..., 1] = toc(csr._D, 1, 5) + D[..., 2] = toc(csr._D, 2, 3) + D[..., 3] = toc(csr._D, 6, 7) + if D.shape[-1] > 4: + D[..., 4:] = csr._D[..., 8:].astype(dtype) + csr._D = D + else: + raise NotImplementedError + else: + raise NotImplementedError + + elif old_dtype.kind == "c": + if new_dtype.kind == "c": + # this is just simple casting + csr._D = csr._D.astype(dtype) + elif new_dtype.kind in ("f", "i"): + # we need to *collect it + if spin.is_diagonal: + # this is just simple casting, + # each diagonal component has its own index + csr._D = csr._D.astype(dtype) + elif spin.is_noncolinear: + D = np.empty(shape[:-1] + (shape[-1] + 1,), dtype=dtype) + D[..., [0, 1]] = csr._D[..., [0, 1]].astype(dtype) + D[..., 2] = csr._D[..., 2].real.astype(dtype) + D[..., 3] = csr._D[..., 2].imag.astype(dtype) + if D.shape[-1] > 4: + D[..., 4:] = csr._D[..., 3:].astype(dtype) + csr._D = D + elif spin.is_spinorbit: + D = np.empty(shape[:-1] + (shape[-1] + 4,), dtype=dtype) + D[..., 0] = csr._D[..., 0].real.astype(dtype) + D[..., 1] = csr._D[..., 1].real.astype(dtype) + D[..., 2] = csr._D[..., 2].real.astype(dtype) + D[..., 3] = csr._D[..., 2].imag.astype(dtype) + D[..., 4] = csr._D[..., 0].imag.astype(dtype) + D[..., 5] = csr._D[..., 1].imag.astype(dtype) + D[..., 6] = csr._D[..., 3].real.astype(dtype) + D[..., 7] = csr._D[..., 3].imag.astype(dtype) + if D.shape[-1] > 8: + D[..., 8:] = csr._D[..., 4:].astype(dtype) + csr._D = D + else: + raise NotImplementedError + else: + raise NotImplementedError + M._reset() + + +def _mat_siesta2sisl(M, dtype: Optional[np.dtype] = None) -> None: """Conversion of Siesta spin matrices to sisl spin matrices The matrices from Siesta are given in a format adheering to the following - concept: + concept. + + There are two cases: + + 1. A non-colinear calculation: + + Siesta uses this convention: - A non-colinear calculation has the following entries (in C-index) for - the sparse matrix: + H[:, [0, 1, 2, 3]] + H11 == H[:, 0] + H22 == H[:, 1] + H12 == H[:, 2] - 1j H[:, 3] # spin-box Hermitian + H21 == H[:, 2] + 1j H[:, 3] - H[:, [0, 1, 2, 3]] - H11 == H[:, 0] - H22 == H[:, 1] - H12 == H[:, 2] - 1j H[:, 3] # spin-box Hermitian - H21 == H[:, 2] + 1j H[:, 3] + In sisl we use this convention, see `Hamiltonian`: - Although it really does not make sense to change anything, we - do change it to adhere to the spin-orbit case (see below). - I.e. what Siesta *saves* is the -Im[H12], which we now store - as Im[H12]. + H11 == H[:, 0] + H22 == H[:, 1] + H12 == H[:, 2] + 1j H[:, 3] # spin-box Hermitian + H21 == H[:, 2] - 1j H[:, 3] + 2. A spin-orbit calculation: - A spin-orbit calculation has the following entries (in C-index) for - the sparse matrix: + Siesta uses this convention: - H[:, [0, 1, 2, 3, 4, 5, 6, 7]] - H11 == H[:, 0] + 1j H[:, 4] - H22 == H[:, 1] + 1j H[:, 5] - H12 == H[:, 2] + 1j H[:, 3] # spin-box Hermitian - H21 == H[:, 6] + 1j H[:, 7] + H[:, [0, 1, 2, 3, 4, 5, 6, 7]] + H11 == H[:, 0] + 1j H[:, 4] + H22 == H[:, 1] + 1j H[:, 5] + H12 == H[:, 2] - 1j H[:, 3] + H21 == H[:, 6] + 1j H[:, 7] + + In sisl we use this convention, see `Hamiltonian`: + + H[:, [0, 1, 2, 3, 4, 5, 6, 7]] + H11 == H[:, 0] + 1j H[:, 4] + H22 == H[:, 1] + 1j H[:, 5] + H12 == H[:, 2] + 1j H[:, 3] + H21 == H[:, 6] + 1j H[:, 7] + + On top of this it depends on whether the data-type is complex + or not. """ - if spin is None: - if M.spin.is_noncolinear: + if dtype is None: + dtype = M.dtype + + spin = M.spin + + if spin.is_noncolinear: + if np.dtype(M.dtype).kind in ("f", "i"): M._csr._D[:, 3] = -M._csr._D[:, 3] - elif M.spin.is_spinorbit: + else: + M._csr._D[:, 2] = M._csr._D[:, 2].conj() + elif spin.is_spinorbit: + if np.dtype(M.dtype).kind in ("f", "i"): + M._csr._D[:, 3] = -M._csr._D[:, 3] + else: + M._csr._D[:, 2] = M._csr._D[:, 2].conj() + + _mat2dtype(M, dtype) + + +def _mat_sisl2siesta(M, dtype: Optional[np.dtype] = None) -> None: + """Conversion of sisl to Siesta spin matrices""" + if dtype is None: + dtype = M.dtype + + # convert to float + _mat2dtype(M, dtype) + + spin = M.spin + + if spin.is_noncolinear: + if np.dtype(M.dtype).kind in ("f", "i"): M._csr._D[:, 3] = -M._csr._D[:, 3] - elif spin.is_noncolinear: - M._D[:, 3] = -M._D[:, 3] + else: + M._csr._D[:, 2] = M._csr._D[:, 2].conj() elif spin.is_spinorbit: - M._D[:, 3] = -M._D[:, 3] + if np.dtype(M.dtype).kind in ("f", "i"): + M._csr._D[:, 3] = -M._csr._D[:, 3] + else: + M._csr._D[:, 2] = M._csr._D[:, 2].conj() def _geom2hsx(geometry): diff --git a/src/sisl/io/siesta/binaries.py b/src/sisl/io/siesta/binaries.py index b33164b638..ef8336c0db 100644 --- a/src/sisl/io/siesta/binaries.py +++ b/src/sisl/io/siesta/binaries.py @@ -396,6 +396,8 @@ def read_hamiltonian(self, geometry=None, **kwargs) -> Hamiltonian: ) # Check whether it is an orthogonal basis set + # TODO, this is not an exhaustive test, but is *fine* for most + # cases orthogonal = np.abs(dS).sum() == geom.no # Create the Hamiltonian container @@ -418,7 +420,7 @@ def read_hamiltonian(self, geometry=None, **kwargs) -> Hamiltonian: H._csr._D[:, :spin] = dH[:, :] * _Ry2eV H._csr._D[:, spin] = dS[:] - _mat_spin_convert(H) + _mat_siesta2sisl(H, dtype=kwargs.get("dtype")) # Convert to sisl supercell # equivalent as _csr_from_siesta with explicit isc from file @@ -442,7 +444,8 @@ def write_hamiltonian(self, H, **kwargs): """Writes the Hamiltonian to a siesta.TSHS file""" # we sort below, so no need to do it here # see onlysSileSiesta.read_overlap for .transpose() - csr = H.transpose(spin=False, sort=False)._csr + H = H.transpose(spin=False, sort=False) + csr = H._csr if csr.nnz == 0: raise SileError( f"{self!r}.write_hamiltonian cannot write " @@ -454,7 +457,7 @@ def write_hamiltonian(self, H, **kwargs): # Convert to siesta CSR _csr_to_siesta(H.geometry, csr, diag=True) csr.finalize(sort=sort) - _mat_spin_convert(csr, H.spin) + _mat_sisl2siesta(H, dtype=np.float64) # Extract the data to pass to the fortran routine cell = H.geometry.cell @@ -566,7 +569,7 @@ def read_density_matrix(self, **kwargs) -> DensityMatrix: # DM file does not contain overlap matrix... so neglect it for now. DM._csr._D[:, spin] = 0.0 - _mat_spin_convert(DM) + _mat_siesta2sisl(DM, dtype=kwargs.get("dtype")) # Convert the supercells to sisl supercells if nsc[0] != 0 or geom.no_s >= col.max(): @@ -584,7 +587,8 @@ def read_density_matrix(self, **kwargs) -> DensityMatrix: def write_density_matrix(self, DM, **kwargs): """Writes the density matrix to a siesta.DM file""" - csr = DM.transpose(spin=False, sort=False)._csr + DM = DM.transpose(spin=False, sort=False) + csr = DM._csr # This ensures that we don"t have any *empty* elements if csr.nnz == 0: raise SileError( @@ -596,7 +600,8 @@ def write_density_matrix(self, DM, **kwargs): # We do not really need to sort this one, but we do for consistency # of the interface. csr.finalize(sort=kwargs.get("sort", True)) - _mat_spin_convert(csr, DM.spin) + + _mat_sisl2siesta(DM, dtype=np.float64) # Get DM if DM.orthogonal: @@ -674,7 +679,7 @@ def read_energy_density_matrix(self, **kwargs) -> EnergyDensityMatrix: # EDM file does not contain overlap matrix... so neglect it for now. EDM._csr._D[:, spin] = 0.0 - _mat_spin_convert(EDM) + _mat_siesta2sisl(EDM, dtype=kwargs.get("dtype")) # Convert the supercells to sisl supercells if nsc[0] != 0 or geom.no_s >= col.max(): @@ -704,7 +709,7 @@ def read_fermi_level(self) -> float: self._fortran_check("read_fermi_level", "could not read fermi-level.") return Ef - def write_density_matrices(self, DM, EDM, Ef=0.0, **kwargs): + def write_density_matrices(self, DM, EDM, Ef: float = 0.0, **kwargs): r"""Writes the density matrix to a siesta.DM file Parameters @@ -713,31 +718,32 @@ def write_density_matrices(self, DM, EDM, Ef=0.0, **kwargs): density matrix to write to the file EDM : EnergyDensityMatrix energy density matrix to write to the file - Ef : float, optional + Ef : fermi-level to be contained """ - DMcsr = DM.transpose(spin=False, sort=False)._csr - EDMcsr = EDM.transpose(spin=False, sort=False)._csr - DMcsr.align(EDMcsr) - EDMcsr.align(DMcsr) + DM = DM.transpose(spin=False, sort=False) + EDM = EDM.transpose(spin=False, sort=False) + DM._csr.align(EDM._csr) + EDM._csr.align(DM._csr) - if DMcsr.nnz == 0: + if DM._csr.nnz == 0: raise SileError( f"{self!r}.write_density_matrices cannot write " "a zero element sparse matrix!" ) - _csr_to_siesta(DM.geometry, DMcsr) - _csr_to_siesta(DM.geometry, EDMcsr) + _csr_to_siesta(DM.geometry, DM._csr) + _csr_to_siesta(DM.geometry, EDM._csr) sort = kwargs.get("sort", True) - DMcsr.finalize(sort=sort) - EDMcsr.finalize(sort=sort) - _mat_spin_convert(DMcsr, DM.spin) - _mat_spin_convert(EDMcsr, EDM.spin) + DM._csr.finalize(sort=sort) + EDM._csr.finalize(sort=sort) + _mat_sisl2siesta(DM, dtype=np.float64) + _mat_sisl2siesta(EDM, dtype=np.float64) # Ensure everything is correct if not ( - np.allclose(DMcsr.ncol, EDMcsr.ncol) and np.allclose(DMcsr.col, EDMcsr.col) + np.allclose(DM._csr.ncol, EDM._csr.ncol) + and np.allclose(DM._csr.col, EDM._csr.col) ): raise ValueError( f"{self!r}.write_density_matrices got non compatible " @@ -745,21 +751,21 @@ def write_density_matrices(self, DM, EDM, Ef=0.0, **kwargs): ) if DM.orthogonal: - dm = DMcsr._D + dm = DM._csr._D else: - dm = DMcsr._D[:, : DM.S_idx] + dm = DM._csr._D[:, : DM.S_idx] if EDM.orthogonal: - edm = EDMcsr._D + edm = EDM._csr._D else: - edm = EDMcsr._D[:, : EDM.S_idx] + edm = EDM._csr._D[:, : EDM.S_idx] nsc = DM.geometry.lattice.nsc.astype(np.int32) _siesta.write_tsde_dm_edm( self.file, nsc, - DMcsr.ncol, - DMcsr.col + 1, + DM._csr.ncol, + DM._csr.col + 1, _toF(dm, np.float64), _toF(edm, np.float64, _eV2Ry), Ef * _eV2Ry, @@ -1348,7 +1354,7 @@ def _r_hamiltonian_v0(self, **kwargs): ) # Create the Hamiltonian container - H = Hamiltonian(geom, spin, nnzpr=1, dtype=np.float32, orthogonal=False) + H = Hamiltonian(geom, spin, nnzpr=1, orthogonal=False) # Create the new sparse matrix H._csr.ncol = ncol.astype(np.int32, copy=False) @@ -1361,7 +1367,7 @@ def _r_hamiltonian_v0(self, **kwargs): H._csr._D[:, :spin] = dH[:, :] * _Ry2eV H._csr._D[:, spin] = dS[:] - _mat_spin_convert(H) + _mat_siesta2sisl(H, dtype=kwargs.get("dtype")) # Convert the supercells to sisl supercells if no_s // no == np.prod(geom.nsc): @@ -1392,7 +1398,7 @@ def _r_hamiltonian_v1(self, **kwargs): ) # Create the Hamiltonian container - H = Hamiltonian(geom, spin, nnzpr=1, dtype=np.float32, orthogonal=False) + H = Hamiltonian(geom, spin, nnzpr=1, orthogonal=False) # Create the new sparse matrix H._csr.ncol = ncol.astype(np.int32, copy=False) @@ -1406,7 +1412,7 @@ def _r_hamiltonian_v1(self, **kwargs): H._csr._D[:, :spin] = dH[:, :] * _Ry2eV H._csr._D[:, spin] = dS[:] - _mat_spin_convert(H) + _mat_siesta2sisl(H, dtype=kwargs.get("dtype")) # Convert the supercells to sisl supercells _csr_from_sc_off(H.geometry, isc.T, H._csr) @@ -1440,7 +1446,7 @@ def _r_overlap_v0(self, **kwargs): ) # Create the Hamiltonian container - S = Overlap(geom, nnzpr=1, dtype=np.float32) + S = Overlap(geom, nnzpr=1) # Create the new sparse matrix S._csr.ncol = ncol.astype(np.int32, copy=False) diff --git a/src/sisl/io/siesta/siesta_nc.py b/src/sisl/io/siesta/siesta_nc.py index 91eb35eefa..e52ea1e907 100644 --- a/src/sisl/io/siesta/siesta_nc.py +++ b/src/sisl/io/siesta/siesta_nc.py @@ -250,7 +250,7 @@ def read_hamiltonian(self, **kwargs) -> Hamiltonian: H._csr._D[:, i] = sp.variables["H"][i, :] * Ry2eV # fix siesta specific notation - _mat_spin_convert(H) + _mat_siesta2sisl(H, dtype=kwargs.get("dtype")) # Shift to the Fermi-level Ef = -self._value("Ef")[:] * Ry2eV @@ -285,7 +285,7 @@ def read_density_matrix(self, **kwargs) -> DensityMatrix: DM._csr._D[:, i] = sp.variables["DM"][i, :] # fix siesta specific notation - _mat_spin_convert(DM) + _mat_siesta2sisl(DM, dtype=kwargs.get("dtype")) return DM.transpose(spin=False, sort=kwargs.get("sort", True)) @@ -305,7 +305,7 @@ def read_energy_density_matrix(self, **kwargs) -> EnergyDensityMatrix: EDM._csr._D[:, i] -= sp.variables["DM"][i, :] * Ef[i] # fix siesta specific notation - _mat_spin_convert(EDM) + _mat_siesta2sisl(EDM, dtype=kwargs.get("dtype")) return EDM.transpose(spin=False, sort=kwargs.get("sort", True)) @@ -613,7 +613,8 @@ def write_hamiltonian(self, H, **kwargs): Ef : float, optional the Fermi level of the electronic structure (in eV), default to 0. """ - csr = H.transpose(spin=False, sort=False)._csr + H = H.transpose(spin=False, sort=False) + csr = H._csr if csr.nnz == 0: raise SileError( f"{self}.write_hamiltonian cannot write a zero element sparse matrix!" @@ -622,7 +623,8 @@ def write_hamiltonian(self, H, **kwargs): # Convert to siesta CSR _csr_to_siesta(H.geometry, csr) csr.finalize(sort=kwargs.get("sort", True)) - _mat_spin_convert(csr, H.spin) + + _mat_siesta2sisl(H, dtype=np.float64) # Ensure that the geometry is written self.write_geometry(H.geometry) @@ -671,7 +673,8 @@ def write_density_matrix(self, DM, **kwargs): DM : DensityMatrix the model to be saved in the NC file """ - csr = DM.transpose(spin=False, sort=False)._csr + DM = DM.transpose(spin=False, sort=False) + csr = DM._csr if csr.nnz == 0: raise SileError( f"{self}.write_density_matrix cannot write a zero element sparse matrix!" @@ -680,7 +683,7 @@ def write_density_matrix(self, DM, **kwargs): # Convert to siesta CSR (we don't need to sort this matrix) _csr_to_siesta(DM.geometry, csr) csr.finalize(sort=kwargs.get("sort", True)) - _mat_spin_convert(csr, DM.spin) + _mat_siesta2sisl(DM, dtype=np.float64) # Ensure that the geometry is written self.write_geometry(DM.geometry) @@ -728,7 +731,8 @@ def write_energy_density_matrix(self, EDM, **kwargs): EDM : EnergyDensityMatrix the model to be saved in the NC file """ - csr = EDM.transpose(spin=False, sort=False)._csr + EDM = EDM.transpose(spin=False, sort=False) + csr = EDM._csr if csr.nnz == 0: raise SileError( f"{self}.write_energy_density_matrix cannot write a zero element sparse matrix!" @@ -737,7 +741,7 @@ def write_energy_density_matrix(self, EDM, **kwargs): # no need to sort this matrix _csr_to_siesta(EDM.geometry, csr) csr.finalize(sort=kwargs.get("sort", True)) - _mat_spin_convert(csr, EDM.spin) + _mat_siesta2sisl(EDM, dtype=np.float64) # Ensure that the geometry is written self.write_geometry(EDM.geometry) diff --git a/src/sisl/io/siesta/tests/test_tsde.py b/src/sisl/io/siesta/tests/test_tsde.py index 58822a25f2..4a110049aa 100644 --- a/src/sisl/io/siesta/tests/test_tsde.py +++ b/src/sisl/io/siesta/tests/test_tsde.py @@ -48,6 +48,34 @@ def test_si_pdos_kgrid_tsde_edm(sisl_files): assert np.allclose(EDM1._csr._D[:, :-1], EDM2._csr._D[:, :-1]) +@pytest.mark.filterwarnings("ignore", message="*Casting complex values") +@pytest.mark.parametrize(("matrix"), ["density", "energy_density"]) +def test_si_pdos_kgrid_tsde_edm_dtypes(sisl_files, sisl_tmp, matrix): + fdf = sisl.get_sile( + sisl_files("siesta", "Si_pdos_k", "Si_pdos.fdf"), + base=sisl_files("siesta", "Si_pdos_k"), + ) + data = [] + mull = None + + for dtype in (np.float32, np.float64, np.complex64, np.complex128): + M = getattr(fdf, f"read_{matrix}_matrix")(dtype=dtype) + data.append(M) + assert M.dtype == dtype + + if mull is None: + mull = M.mulliken() + else: + assert np.allclose(mull, M.mulliken(), atol=1e-5) + + fnc = sisl_tmp("tmp.nc") + for M in data: + M.write(fnc) + # The overlap should be here... + M1 = M.read(fnc) + assert np.allclose(mull, M.mulliken(), atol=1e-5) + + @pytest.mark.filterwarnings("ignore", message="*wrong sparse pattern") def test_si_pdos_kgrid_tsde_dm_edm_rw(sisl_files, sisl_tmp): fdf = sisl.get_sile( diff --git a/src/sisl/io/siesta/tests/test_tshs.py b/src/sisl/io/siesta/tests/test_tshs.py index 6ece2f9e16..bd440e1954 100644 --- a/src/sisl/io/siesta/tests/test_tshs.py +++ b/src/sisl/io/siesta/tests/test_tshs.py @@ -28,6 +28,34 @@ def test_tshs_si_pdos_kgrid(sisl_files, sisl_tmp): assert np.allclose(HS1._csr._D, HS2._csr._D) +@pytest.mark.filterwarnings("ignore", message="*Casting complex values") +def test_tshs_si_pdos_dtypes(sisl_files, sisl_tmp): + si = sisl.get_sile(sisl_files("siesta", "Si_pdos_k", "Si_pdos.TSHS")) + data = [] + eigs = None + k = [0.1] * 3 + for dtype in (np.float32, np.float64, np.complex64, np.complex128): + HS = si.read_hamiltonian(dtype=dtype) + data.append(HS) + assert HS.dtype == dtype + + if eigs is None: + eigs = HS.eigh(k) + else: + assert np.allclose(eigs, HS.eigh(k), atol=1e-5) + + f = sisl_tmp("tmp.TSHS") + fnc = sisl_tmp("tmp.nc") + for HS in data: + HS.write(f) + HS1 = HS.read(f) + assert np.allclose(eigs, HS1.eigh(k), atol=1e-5) + + HS.write(fnc) + HS1 = HS.read(fnc) + assert np.allclose(eigs, HS1.eigh(k), atol=1e-5) + + def test_tshs_si_pdos_kgrid_tofromnc(sisl_files, sisl_tmp): pytest.importorskip("netCDF4") si = sisl.get_sile(sisl_files("siesta", "Si_pdos_k", "Si_pdos.TSHS")) @@ -80,6 +108,34 @@ def test_tshs_soc_pt2_xx(sisl_files, sisl_tmp): assert np.allclose(HS1._csr._D, HS2._csr._D) +@pytest.mark.filterwarnings("ignore", message="*Casting complex values") +def test_tshs_soc_pt2_xx_dtypes(sisl_files, sisl_tmp): + fdf = sisl.get_sile(sisl_files("siesta", "Pt2_soc", "Pt2.fdf")) + data = [] + eigs = None + k = [0.1] * 3 + for dtype in (np.float32, np.float64, np.complex64, np.complex128): + HS = fdf.read_hamiltonian(dtype=dtype) + data.append(HS) + assert HS.dtype == dtype + + if eigs is None: + eigs = HS.eigh(k) + else: + assert np.allclose(eigs, HS.eigh(k), atol=1e-5) + + f = sisl_tmp("tmp.TSHS") + fnc = sisl_tmp("tmp.nc") + for HS in data: + HS.write(f) + HS1 = sisl.physics.Hamiltonian.read(f) + assert np.allclose(eigs, HS1.eigh(k), atol=1e-5) + + HS.write(fnc) + HS1 = sisl.physics.Hamiltonian.read(fnc) + assert np.allclose(eigs, HS1.eigh(k), atol=1e-5) + + def test_tshs_soc_pt2_xx_pdos(sisl_files): fdf = sisl.get_sile(sisl_files("siesta", "Pt2_soc", "Pt2.fdf")) sc = fdf.read_lattice(order="TSHS") diff --git a/src/sisl/io/tbtrans/delta.py b/src/sisl/io/tbtrans/delta.py index d2c0594eb9..8a0955dc28 100644 --- a/src/sisl/io/tbtrans/delta.py +++ b/src/sisl/io/tbtrans/delta.py @@ -21,7 +21,8 @@ from ..siesta._help import ( _csr_from_sc_off, _csr_to_siesta, - _mat_spin_convert, + _mat_siesta2sisl, + _mat_sisl2siesta, _siesta_sc_off, ) from ..sile import SileError, add_sile, sile_raise_write @@ -436,7 +437,8 @@ def write_delta(self, delta, **kwargs): The input options for `TBtrans`_ determine whether this is a self-energy term or a Hamiltonian term. """ - csr = delta._csr.copy() + out_delta = delta.copy() + csr = out_delta._csr if csr.nnz == 0: raise SileError( f"{self!s}.write_overlap cannot write a zero element sparse matrix!" @@ -446,7 +448,7 @@ def write_delta(self, delta, **kwargs): _csr_to_siesta(delta.geometry, csr, diag=False) # delta should always write sorted matrices csr.finalize(sort=True) - _mat_spin_convert(csr, delta.spin) + _mat_sisl2siesta(out_delta) # Ensure that the geometry is written self.write_geometry(delta.geometry) @@ -557,9 +559,9 @@ def write_delta(self, delta, **kwargs): csize[-1] = csr.nnz if delta.spin.kind > delta.spin.POLARIZED: - print(delta.spin) raise ValueError( - f"{self.__class__.__name__}.write_delta only allows spin-polarized delta values" + f"{self.__class__.__name__}.write_delta only allows spin-polarized " + f"delta values, got {delta.spin!s}" ) if delta.dtype.kind == "c": @@ -667,7 +669,7 @@ def _r_class(self, cls, **kwargs): # Convert from isc to sisl isc _csr_from_sc_off(C.geometry, lvl.variables["isc_off"][:, :], C._csr) - _mat_spin_convert(C) + _mat_siesta2sisl(C, dtype=kwargs.get("dtype")) return C diff --git a/src/sisl/physics/densitymatrix.py b/src/sisl/physics/densitymatrix.py index be0f33faee..4de3f7a7cb 100644 --- a/src/sisl/physics/densitymatrix.py +++ b/src/sisl/physics/densitymatrix.py @@ -528,7 +528,7 @@ def bond_order( rows, cols, DM = _to_coo(self._csr) # Convert to requested matrix form - D = _get_spin(DM, self.spin, what) + D = _get_spin(DM, self.spin, what).T # Define a matrix-matrix multiplication def mm(A, B): diff --git a/src/sisl/physics/energydensitymatrix.py b/src/sisl/physics/energydensitymatrix.py index 895189e6bf..b11aa221d5 100644 --- a/src/sisl/physics/energydensitymatrix.py +++ b/src/sisl/physics/energydensitymatrix.py @@ -326,7 +326,7 @@ def shift(self, E, DM): return for i in range(self.spin.spinor): - self._csr._D[:, i] += DM._csr._D[:, i] * E[i] + self._csr._D[:, i].real += DM._csr._D[:, i].real * E[i] @staticmethod def read(sile, *args, **kwargs): diff --git a/src/sisl/physics/hamiltonian.py b/src/sisl/physics/hamiltonian.py index 6a243dde60..60b02e0f86 100644 --- a/src/sisl/physics/hamiltonian.py +++ b/src/sisl/physics/hamiltonian.py @@ -328,7 +328,7 @@ def shift(self, E): # For non-collinear and SO only the diagonal (real) components # should be shifted. for i in range(self.spin.spinor): - self._csr._D[:, i] += self._csr._D[:, self.S_idx] * E[i] + self._csr._D[:, i].real += self._csr._D[:, self.S_idx].real * E[i] def eigenvalue(self, k=(0, 0, 0), gauge: GaugeType = "cell", **kwargs): """Calculate the eigenvalues at `k` and return an `EigenvalueElectron` object containing all eigenvalues for a given `k` diff --git a/src/sisl/physics/sparse.py b/src/sisl/physics/sparse.py index 387ca83663..78ea95f07f 100644 --- a/src/sisl/physics/sparse.py +++ b/src/sisl/physics/sparse.py @@ -32,78 +32,77 @@ def _get_spin(M, spin, what: Literal["trace", "box", "vector"] = "box"): - M = M.T if what == "trace": if spin.spinor == 2: # we have both up+down # TODO fix spin-orbit with complex values - return M[0] + M[1] - return M[0] + return M[..., 0] + M[..., 1] + return M[..., 0] if what == "vector": - m = np.empty([3, M.shape[1]], dtype=dtype_complex_to_real(M.dtype)) + m = np.empty(M.shape[:-1] + (3,), dtype=dtype_complex_to_real(M.dtype)) if spin.is_unpolarized: # no spin-density m[...] = 0.0 else: # Same for all spin-configurations - m[2] = (M[0] - M[1]).real + m[..., 2] = (M[..., 0] - M[..., 1]).real # These indices should be reflected in sisl/physics/sparse.py # for the Mxy[ri] indices in the reset method if spin.is_polarized: - m[:2, :] = 0.0 + m[..., :2] = 0.0 elif spin.is_noncolinear: - if spin.dkind == "f": - m[0] = 2 * M[2] - m[1] = -2 * M[3] + if spin.dkind in ("f", "i"): + m[..., 0] = 2 * M[..., 2] + m[..., 1] = -2 * M[..., 3] else: - m[0] = 2 * M[2].real - m[1] = -2 * M[2].imag + m[..., 0] = 2 * M[..., 2].real + m[..., 1] = -2 * M[..., 2].imag else: # spin-orbit - if spin.dkind == "f": - m[0] = M[2] + M[6] - m[1] = -M[3] + M[7] + if spin.dkind in ("f", "i"): + m[..., 0] = M[..., 2] + M[..., 6] + m[..., 1] = -M[..., 3] + M[..., 7] else: - tmp = M[2].conj() + M[3] - m[0] = tmp.real - m[1] = tmp.imag + tmp = M[..., 2].conj() + M[..., 3] + m[..., 0] = tmp.real + m[..., 1] = tmp.imag return m if what == "box": - m = np.empty([2, 2, M.shape[1]], dtype=dtype_real_to_complex(M.dtype)) + m = np.empty(M.shape[:-1] + (2, 2), dtype=dtype_real_to_complex(M.dtype)) if spin.is_unpolarized: # no spin-density m[...] = 0.0 - m[0, 0] = M[0] - m[1, 1] = M[0] + m[..., 0, 0] = M[..., 0] + m[..., 1, 1] = M[..., 0] elif spin.is_polarized: m[...] = 0.0 - m[0, 0] = M[0] - m[1, 1] = M[1] + m[..., 0, 0] = M[..., 0] + m[..., 1, 1] = M[..., 1] elif spin.is_noncolinear: - if spin.dkind == "f": - m[0, 0] = M[0] - m[1, 1] = M[1] - m[0, 1] = M[2] + 1j * M[3] - m[1, 0] = m[0, 1].conj() + if spin.dkind in ("f", "i"): + m[..., 0, 0] = M[..., 0] + m[..., 1, 1] = M[..., 1] + m[..., 0, 1] = M[..., 2] + 1j * M[..., 3] + m[..., 1, 0] = m[..., 0, 1].conj() else: - m[0, 0] = M[0] - m[1, 1] = M[1] - m[0, 1] = M[2] - m[1, 0] = M[2].conj() + m[..., 0, 0] = M[..., 0] + m[..., 1, 1] = M[..., 1] + m[..., 0, 1] = M[..., 2] + m[..., 1, 0] = M[..., 2].conj() else: - if spin.dkind == "f": - m[0, 0] = M[0] + 1j * M[4] - m[1, 1] = M[1] + 1j * M[5] - m[0, 1] = M[2] + 1j * M[3] - m[1, 0] = M[6] + 1j * M[7] + if spin.dkind in ("f", "i"): + m[..., 0, 0] = M[..., 0] + 1j * M[..., 4] + m[..., 1, 1] = M[..., 1] + 1j * M[..., 5] + m[..., 0, 1] = M[..., 2] + 1j * M[..., 3] + m[..., 1, 0] = M[..., 6] + 1j * M[..., 7] else: - m[0, 0] = M[0] - m[1, 1] = M[1] - m[0, 1] = M[2] - m[1, 0] = M[3] + m[..., 0, 0] = M[..., 0] + m[..., 1, 1] = M[..., 1] + m[..., 0, 1] = M[..., 2] + m[..., 1, 0] = M[..., 3] return m @@ -165,6 +164,8 @@ def __init__( def _reset(self): r"""Reset object according to the options, please refer to `SparseOrbital.reset` for details""" + # Update the shape + self._csr._shape = self.shape[:-1] + self._csr._D.shape[-1:] if self.orthogonal: self.Sk = self._Sk_diagonal self.S_idx = -100 @@ -844,6 +845,9 @@ def _reset(self): r"""Reset object according to the options, please refer to `SparseOrbital.reset` for details""" super()._reset() + # Update the dtype of the spin + self._spin = Spin(self.spin, dtype=self.dtype) + if self.spin.is_unpolarized: self.UP = 0 self.DOWN = 0 @@ -861,7 +865,7 @@ def _reset(self): self.dSk = self._dSk elif self.spin.is_noncolinear: - if self.spin.dkind == "f": + if self.spin.dkind in ("f", "i"): self.M11 = 0 self.M22 = 1 self.M12r = 2 @@ -879,7 +883,7 @@ def _reset(self): self.ddSk = self._ddSk_non_colinear elif self.spin.is_spinorbit: - if self.spin.dkind == "f": + if self.spin.dkind in ("f", "i"): self.SX = np.array([0, 0, 1, 0, 0, 0, 1, 0], self.dtype) self.SY = np.array([0, 0, 0, -1, 0, 0, 0, 1], self.dtype) self.SZ = np.array([1, -1, 0, 0, 0, 0, 0, 0], self.dtype) @@ -1497,7 +1501,7 @@ def transpose(self, hermitian: bool = False, spin: bool = True, sort: bool = Tru if sp.is_spinorbit: if hermitian and spin: # conjugate the imaginary value and transpose spin-box - if sp.dkind == "f": + if sp.dkind in ("f", "i"): # imaginary components (including transposing) # 12,11,22,21 D[:, [3, 4, 5, 7]] = -D[:, [7, 4, 5, 3]] @@ -1507,7 +1511,7 @@ def transpose(self, hermitian: bool = False, spin: bool = True, sort: bool = Tru D[:, [0, 1, 2, 3]] = np.conj(D[:, [0, 1, 3, 2]]) elif hermitian: # conjugate the imaginary value - if sp.dkind == "f": + if sp.dkind in ("f", "i"): # imaginary components # 12,11,22,21 D[:, [3, 4, 5, 7]] *= -1.0 @@ -1515,7 +1519,7 @@ def transpose(self, hermitian: bool = False, spin: bool = True, sort: bool = Tru D[:, :] = np.conj(D[:, :]) elif spin: # transpose spin-box, 12 <-> 21 - if sp.dkind == "f": + if sp.dkind in ("f", "i"): D[:, [2, 3, 6, 7]] = D[:, [6, 7, 2, 3]] else: D[:, [2, 3]] = D[:, [3, 2]] @@ -1532,7 +1536,7 @@ def transpose(self, hermitian: bool = False, spin: bool = True, sort: bool = Tru # So for transposing we should negate the sign # to ensure we put the opposite value in the # correct place. - if sp.dkind == "f": + if sp.dkind in ("f", "i"): D[:, 3] = -D[:, 3] else: D[:, 2] = np.conj(D[:, 2]) @@ -1556,7 +1560,7 @@ def trs(self): # Apply Pauli-Y on the left and right of each spin-box if sp.is_spinorbit: - if sp.dkind == "f": + if sp.dkind in ("f", "i"): # [R11, R22, R12, I12, I11, I22, R21, I21] # [R11, R22] = [R22, R11] # [I12, I21] = [I21, I12] (conj + Y @ Y[sign-changes conj]) @@ -1567,7 +1571,7 @@ def trs(self): else: raise NotImplementedError elif sp.is_noncolinear: - if sp.dkind == "f": + if sp.dkind in ("f", "i"): # [R11, R22, R12, I12] D[:, 2] = -D[:, 2] else: @@ -1687,7 +1691,7 @@ def transform(self, matrix=None, dtype=None, spin=None, orthogonal=None): ) new._csr = self._csr.transform(matrix, dtype=dtype) - if not orthogonal and self.orthogonal: + if self.orthogonal and not orthogonal: # set identity overlap matrix, loop over rows for i in range(new._csr.shape[0]): new._csr[i, i, -1] = 1.0 diff --git a/src/sisl/physics/spin.py b/src/sisl/physics/spin.py index c86510ee69..191c510916 100644 --- a/src/sisl/physics/spin.py +++ b/src/sisl/physics/spin.py @@ -56,7 +56,7 @@ class Spin: #: The :math:`\boldsymbol\sigma_z` Pauli matrix Z = np.array([[1, 0], [0, -1]], np.complex128) - __slots__ = ("_size", "_kind", "_dtype") + __slots__ = ("_kind", "_dtype") def __init__(self, kind="", dtype=None): if isinstance(kind, Spin): @@ -64,7 +64,6 @@ def __init__(self, kind="", dtype=None): dtype = kind._dtype self._kind = kind._kind self._dtype = dtype - self._size = kind._size return if dtype is None: @@ -107,24 +106,6 @@ def __init__(self, kind="", dtype=None): # Now assert the checks self._kind = kind - if np.dtype(dtype).kind == "c": - size = { - self.UNPOLARIZED: 1, - self.POLARIZED: 2, - self.NONCOLINEAR: 4, - self.SPINORBIT: 4, - }.get(kind) - - else: - size = { - self.UNPOLARIZED: 1, - self.POLARIZED: 2, - self.NONCOLINEAR: 4, - self.SPINORBIT: 8, - }.get(kind) - - self._size = size - def __str__(self): if self.is_unpolarized: return f"{self.__class__.__name__}{{unpolarized, kind={self.dkind}}}" @@ -151,12 +132,32 @@ def dkind(self): @property def size(self): """Number of elements to describe the spin-components""" - return self._size + size = { + "c": { + self.UNPOLARIZED: 1, + self.POLARIZED: 2, + self.NONCOLINEAR: 3, + self.SPINORBIT: 4, + }, + "i": { + self.UNPOLARIZED: 1, + self.POLARIZED: 2, + self.NONCOLINEAR: 4, + self.SPINORBIT: 8, + }, + "f": { + self.UNPOLARIZED: 1, + self.POLARIZED: 2, + self.NONCOLINEAR: 4, + self.SPINORBIT: 8, + }, + }[self.dkind][self.kind] + return size @property def spinor(self): """Number of spinor components (1 or 2)""" - return min(2, self._size) + return min(2, self.size) @property def kind(self): @@ -196,7 +197,7 @@ def is_spinorbit(self): return self.kind == Spin.SPINORBIT def __len__(self): - return self._size + return self.size # Comparisons def __lt__(self, other): @@ -221,6 +222,5 @@ def __getstate__(self): return {"size": self.size, "kind": self.kind, "dtype": self.dtype} def __setstate__(self, state): - self._size = state["size"] self._kind = state["kind"] self._dtype = state["dtype"] From 71c2c47e24a8fff84c43aea35384b22c88acc040 Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Fri, 8 Nov 2024 13:45:16 +0100 Subject: [PATCH 08/14] removed unused variable Signed-off-by: Nick Papior --- src/sisl/io/siesta/_help.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sisl/io/siesta/_help.py b/src/sisl/io/siesta/_help.py index 7ec03fb148..b356f85ec3 100644 --- a/src/sisl/io/siesta/_help.py +++ b/src/sisl/io/siesta/_help.py @@ -5,7 +5,6 @@ import numpy as np -import sisl as si import sisl._array as _a from sisl.messages import warn From b02db45bd11def630f33ef5dbb8cac7177197cbc Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Fri, 8 Nov 2024 14:00:00 +0100 Subject: [PATCH 09/14] fixed wrong object usage in mulliken extraction Signed-off-by: Nick Papior --- src/sisl/io/siesta/tests/test_tsde.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sisl/io/siesta/tests/test_tsde.py b/src/sisl/io/siesta/tests/test_tsde.py index 4a110049aa..e251b3468c 100644 --- a/src/sisl/io/siesta/tests/test_tsde.py +++ b/src/sisl/io/siesta/tests/test_tsde.py @@ -73,7 +73,7 @@ def test_si_pdos_kgrid_tsde_edm_dtypes(sisl_files, sisl_tmp, matrix): M.write(fnc) # The overlap should be here... M1 = M.read(fnc) - assert np.allclose(mull, M.mulliken(), atol=1e-5) + assert np.allclose(mull, M1.mulliken(), atol=1e-5) @pytest.mark.filterwarnings("ignore", message="*wrong sparse pattern") From 10ada5fba899876fd3f6ee4226ee9660efd98d7a Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Fri, 8 Nov 2024 14:38:31 +0100 Subject: [PATCH 10/14] added explanation of how transform works Signed-off-by: Nick Papior --- src/sisl/io/siesta/tests/test_tshs.py | 4 ++-- src/sisl/physics/sparse.py | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/sisl/io/siesta/tests/test_tshs.py b/src/sisl/io/siesta/tests/test_tshs.py index bd440e1954..9d18d50476 100644 --- a/src/sisl/io/siesta/tests/test_tshs.py +++ b/src/sisl/io/siesta/tests/test_tshs.py @@ -128,11 +128,11 @@ def test_tshs_soc_pt2_xx_dtypes(sisl_files, sisl_tmp): fnc = sisl_tmp("tmp.nc") for HS in data: HS.write(f) - HS1 = sisl.physics.Hamiltonian.read(f) + HS1 = HS.read(f) assert np.allclose(eigs, HS1.eigh(k), atol=1e-5) HS.write(fnc) - HS1 = sisl.physics.Hamiltonian.read(fnc) + HS1 = HS.read(fnc) assert np.allclose(eigs, HS1.eigh(k), atol=1e-5) diff --git a/src/sisl/physics/sparse.py b/src/sisl/physics/sparse.py index 78ea95f07f..c4be7cf32b 100644 --- a/src/sisl/physics/sparse.py +++ b/src/sisl/physics/sparse.py @@ -874,7 +874,6 @@ def _reset(self): self.M11 = 0 self.M22 = 1 self.M12 = 2 - raise NotImplementedError("Currently not implemented") self.Pk = self._Pk_non_colinear self.Sk = self._Sk_non_colinear self.dPk = self._dPk_non_colinear @@ -1617,6 +1616,12 @@ def transform(self, matrix=None, dtype=None, spin=None, orthogonal=None): The transformation matrix does *not* act on the rows and columns, only on the final dimension of the matrix. + The matrix transformation is done like this: + + >>> out = in @ matrix.T + + Meaning that ``matrix[0, :]`` will be the factors of the input matrix elements. + Parameters ---------- matrix : array_like, optional From 95370a92330de3b94340d92f6ba63de8af25e07f Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Mon, 11 Nov 2024 21:39:53 +0100 Subject: [PATCH 11/14] fixed nc compilation with complex numbers Signed-off-by: Nick Papior --- src/sisl/physics/_matrix_phase.pyx | 24 ++++++++++++---- src/sisl/physics/_matrix_phase3.pyx | 40 ++++++++++++++++++--------- src/sisl/physics/_matrix_phase_sc.pyx | 24 ++++++++++++---- src/sisl/physics/_matrix_utils.pxd | 16 ++++++++--- src/sisl/physics/_matrix_utils.pyx | 18 ++++++++++-- 5 files changed, 92 insertions(+), 30 deletions(-) diff --git a/src/sisl/physics/_matrix_phase.pyx b/src/sisl/physics/_matrix_phase.pyx index 50fbc46be2..fba3e6d0cb 100644 --- a/src/sisl/physics/_matrix_phase.pyx +++ b/src/sisl/physics/_matrix_phase.pyx @@ -29,8 +29,10 @@ from sisl._core._dtypes cimport ( ) from ._matrix_utils cimport ( + _f_matrix_box_nc, _f_matrix_box_so, - _matrix_box_nc, + _matrix_box_nc_cmplx, + _matrix_box_nc_real, _matrix_box_so_cmplx, _matrix_box_so_real, ) @@ -326,9 +328,15 @@ def _phase_csr_nc(ints_st[::1] ptr, cdef ints_st r, rr, ind, s, s_idx, c cdef complexs_st ph + cdef _f_matrix_box_nc func cdef numerics_st *d cdef complexs_st *M = [0, 0, 0, 0] + if numerics_st in complexs_st: + func = _matrix_box_nc_cmplx + else: + func = _matrix_box_nc_real + with nogil: if p_opt == -1: for r in range(nr): @@ -356,7 +364,7 @@ def _phase_csr_nc(ints_st[::1] ptr, s_idx = _index_sorted(tmp, c) d = &D[ind, 0] - _matrix_box_nc(d, ph, M) + func(d, ph, M) v[v_ptr[rr] + s_idx] += M[0] v[v_ptr[rr] + s_idx+1] += M[1] v[v_ptr[rr+1] + s_idx] += M[2] @@ -374,7 +382,7 @@ def _phase_csr_nc(ints_st[::1] ptr, s_idx = _index_sorted(tmp, c) d = &D[ind, 0] - _matrix_box_nc(d, ph, M) + func(d, ph, M) v[v_ptr[rr] + s_idx] += M[0] v[v_ptr[rr] + s_idx+1] += M[1] v[v_ptr[rr+1] + s_idx] += M[2] @@ -406,9 +414,15 @@ def _phase_array_nc(ints_st[::1] ptr, cdef ints_st r, rr, ind, s, c cdef complexs_st ph + cdef _f_matrix_box_nc func cdef numerics_st *d cdef complexs_st *M = [0, 0, 0, 0] + if numerics_st in complexs_st: + func = _matrix_box_nc_cmplx + else: + func = _matrix_box_nc_real + with nogil: if p_opt == -1: for r in range(nr): @@ -430,7 +444,7 @@ def _phase_array_nc(ints_st[::1] ptr, ph = phases[ind] d = &D[ind, 0] - _matrix_box_nc(d, ph, M) + func(d, ph, M) v[rr, c] += M[0] v[rr, c + 1] += M[1] v[rr + 1, c] += M[2] @@ -445,7 +459,7 @@ def _phase_array_nc(ints_st[::1] ptr, ph = phases[s] d = &D[ind, 0] - _matrix_box_nc(d, ph, M) + func(d, ph, M) v[rr, c] += M[0] v[rr, c + 1] += M[1] v[rr + 1, c] += M[2] diff --git a/src/sisl/physics/_matrix_phase3.pyx b/src/sisl/physics/_matrix_phase3.pyx index 683a51e4f0..220a8570b9 100644 --- a/src/sisl/physics/_matrix_phase3.pyx +++ b/src/sisl/physics/_matrix_phase3.pyx @@ -24,8 +24,10 @@ from sisl._core._dtypes cimport ( ) from ._matrix_utils cimport ( + _f_matrix_box_nc, _f_matrix_box_so, - _matrix_box_nc, + _matrix_box_nc_cmplx, + _matrix_box_nc_real, _matrix_box_so_cmplx, _matrix_box_so_real, ) @@ -175,8 +177,14 @@ def _phase3_csr_nc(ints_st[::1] ptr, cdef ints_st r, rr, ind, s, c cdef ints_st s_idx cdef numerics_st *d + cdef _f_matrix_box_nc func cdef complexs_st *M = [0, 0, 0, 0] + if numerics_st in complexs_st: + func = _matrix_box_nc_cmplx + else: + func = _matrix_box_nc_real + with nogil: if p_opt == 0: for r in range(nr): @@ -188,21 +196,21 @@ def _phase3_csr_nc(ints_st[::1] ptr, d = &D[ind, 0] ph = phases[ind, 0] - _matrix_box_nc(d, ph, M) + func(d, ph, M) Vx[v_ptr[rr] + s_idx] += M[0] Vx[v_ptr[rr] + s_idx+1] += M[1] Vx[v_ptr[rr+1] + s_idx] += M[2] Vx[v_ptr[rr+1] + s_idx+1] += M[3] ph = phases[ind, 1] - _matrix_box_nc(d, ph, M) + func(d, ph, M) Vy[v_ptr[rr] + s_idx] += M[0] Vy[v_ptr[rr] + s_idx+1] += M[1] Vy[v_ptr[rr+1] + s_idx] += M[2] Vy[v_ptr[rr+1] + s_idx+1] += M[3] ph = phases[ind, 2] - _matrix_box_nc(d, ph, M) + func(d, ph, M) Vz[v_ptr[rr] + s_idx] += M[0] Vz[v_ptr[rr] + s_idx+1] += M[1] Vz[v_ptr[rr+1] + s_idx] += M[2] @@ -220,21 +228,21 @@ def _phase3_csr_nc(ints_st[::1] ptr, d = &D[ind, 0] ph = phases[s, 0] - _matrix_box_nc(d, ph, M) + func(d, ph, M) Vx[v_ptr[rr] + s_idx] += M[0] Vx[v_ptr[rr] + s_idx+1] += M[1] Vx[v_ptr[rr+1] + s_idx] += M[2] Vx[v_ptr[rr+1] + s_idx+1] += M[3] ph = phases[s, 1] - _matrix_box_nc(d, ph, M) + func(d, ph, M) Vy[v_ptr[rr] + s_idx] += M[0] Vy[v_ptr[rr] + s_idx+1] += M[1] Vy[v_ptr[rr+1] + s_idx] += M[2] Vy[v_ptr[rr+1] + s_idx+1] += M[3] ph = phases[s, 2] - _matrix_box_nc(d, ph, M) + func(d, ph, M) Vz[v_ptr[rr] + s_idx] += M[0] Vz[v_ptr[rr] + s_idx+1] += M[1] Vz[v_ptr[rr+1] + s_idx] += M[2] @@ -266,8 +274,14 @@ def _phase3_array_nc(ints_st[::1] ptr, cdef ints_st r, rr, ind, s, c cdef ints_st s_idx cdef numerics_st *d + cdef _f_matrix_box_nc func cdef complexs_st *M = [0, 0, 0, 0] + if numerics_st in complexs_st: + func = _matrix_box_nc_cmplx + else: + func = _matrix_box_nc_real + with nogil: if p_opt == 0: for r in range(nr): @@ -278,21 +292,21 @@ def _phase3_array_nc(ints_st[::1] ptr, d = &D[ind, 0] ph = phases[ind, 0] - _matrix_box_nc(d, ph, M) + func(d, ph, M) Vx[rr, c] += M[0] Vx[rr, c+1] += M[1] Vx[rr+1, c] += M[2] Vx[rr+1, c+1] += M[3] ph = phases[ind, 1] - _matrix_box_nc(d, ph, M) + func(d, ph, M) Vy[rr, c] += M[0] Vy[rr, c+1] += M[1] Vy[rr+1, c] += M[2] Vy[rr+1, c+1] += M[3] ph = phases[ind, 2] - _matrix_box_nc(d, ph, M) + func(d, ph, M) Vz[rr, c] += M[0] Vz[rr, c+1] += M[1] Vz[rr+1, c] += M[2] @@ -308,21 +322,21 @@ def _phase3_array_nc(ints_st[::1] ptr, d = &D[ind, 0] ph = phases[s, 0] - _matrix_box_nc(d, ph, M) + func(d, ph, M) Vx[rr, c] += M[0] Vx[rr, c+1] += M[1] Vx[rr+1, c] += M[2] Vx[rr+1, c+1] += M[3] ph = phases[s, 1] - _matrix_box_nc(d, ph, M) + func(d, ph, M) Vy[rr, c] += M[0] Vy[rr, c+1] += M[1] Vy[rr+1, c] += M[2] Vy[rr+1, c+1] += M[3] ph = phases[s, 2] - _matrix_box_nc(d, ph, M) + func(d, ph, M) Vz[rr, c] += M[0] Vz[rr, c+1] += M[1] Vz[rr+1, c] += M[2] diff --git a/src/sisl/physics/_matrix_phase_sc.pyx b/src/sisl/physics/_matrix_phase_sc.pyx index aae52887cd..ad9ede2faf 100644 --- a/src/sisl/physics/_matrix_phase_sc.pyx +++ b/src/sisl/physics/_matrix_phase_sc.pyx @@ -23,8 +23,10 @@ from sisl._core._sparse cimport ncol2ptr_nc from sisl._indices cimport _index_sorted from ._matrix_utils cimport ( + _f_matrix_box_nc, _f_matrix_box_so, - _matrix_box_nc, + _matrix_box_nc_cmplx, + _matrix_box_nc_real, _matrix_box_so_cmplx, _matrix_box_so_real, ) @@ -181,9 +183,15 @@ def _phase_sc_csr_nc(ints_st[::1] ptr, cdef ints_st r, rr, cind, c, nz, ind cdef complexs_st ph + cdef _f_matrix_box_nc func cdef numerics_st *d cdef complexs_st *M = [0, 0, 0, 0] + if numerics_st in complexs_st: + func = _matrix_box_nc_cmplx + else: + func = _matrix_box_nc_real + # We have to do it manually due to the double elements per matrix element ncol2ptr_nc(nr, ncol, v_ptr, 2) @@ -222,7 +230,7 @@ def _phase_sc_csr_nc(ints_st[::1] ptr, ph = phases[ind] d = &D[ind, 0] - _matrix_box_nc(d, ph, M) + func(d, ph, M) v[v_ptr[rr] + cind] = M[0] v_col[v_ptr[rr] + cind] = c v[v_ptr[rr] + cind+1] = M[1] @@ -246,7 +254,7 @@ def _phase_sc_csr_nc(ints_st[::1] ptr, ph = phases[col[ind] / nr] d = &D[ind, 0] - _matrix_box_nc(d, ph, M) + func(d, ph, M) v[v_ptr[rr] + cind] = M[0] v_col[v_ptr[rr] + cind] = c @@ -283,8 +291,14 @@ def _phase_sc_array_nc(ints_st[::1] ptr, cdef complexs_st ph cdef ints_st r, rr, c, nz, ind cdef numerics_st *d + cdef _f_matrix_box_nc func cdef complexs_st *M = [0, 0, 0, 0] + if numerics_st in complexs_st: + func = _matrix_box_nc_cmplx + else: + func = _matrix_box_nc_real + with nogil: if p_opt == -1: for r in range(nr): @@ -305,7 +319,7 @@ def _phase_sc_array_nc(ints_st[::1] ptr, ph = phases[ind] d = &D[ind, 0] - _matrix_box_nc(d, ph, M) + func(d, ph, M) v[rr, c] = M[0] v[rr, c+1] = M[1] v[rr+1, c] = M[2] @@ -319,7 +333,7 @@ def _phase_sc_array_nc(ints_st[::1] ptr, ph = phases[col[ind] / nr] d = &D[ind, 0] - _matrix_box_nc(d, ph, M) + func(d, ph, M) v[rr, c] = M[0] v[rr, c+1] = M[1] v[rr+1, c] = M[2] diff --git a/src/sisl/physics/_matrix_utils.pxd b/src/sisl/physics/_matrix_utils.pxd index 80cfa7352d..b235ca106b 100644 --- a/src/sisl/physics/_matrix_utils.pxd +++ b/src/sisl/physics/_matrix_utils.pxd @@ -13,13 +13,21 @@ ctypedef fused _internal_complexs_st: float complex double complex -ctypedef void(*_f_matrix_box_so)(const numerics_st *data, +ctypedef void(*_f_matrix_box_nc)(const numerics_st *data, const complexs_st phase, complexs_st *M) noexcept nogil -cdef void _matrix_box_nc(const numerics_st *data, - const complexs_st phase, - complexs_st *M) noexcept nogil +cdef void _matrix_box_nc_real(const reals_st *data, + const complexs_st phase, + complexs_st *M) noexcept nogil + +cdef void _matrix_box_nc_cmplx(const _internal_complexs_st *data, + const complexs_st phase, + complexs_st *M) noexcept nogil + +ctypedef void(*_f_matrix_box_so)(const numerics_st *data, + const complexs_st phase, + complexs_st *M) noexcept nogil cdef void _matrix_box_so_real(const reals_st *data, const complexs_st phase, diff --git a/src/sisl/physics/_matrix_utils.pyx b/src/sisl/physics/_matrix_utils.pyx index 0e3ee597f9..7b0e2fb904 100644 --- a/src/sisl/physics/_matrix_utils.pyx +++ b/src/sisl/physics/_matrix_utils.pyx @@ -26,15 +26,27 @@ M[3] == spin[1, 1] @cython.wraparound(False) @cython.initializedcheck(False) @cython.cdivision(True) -cdef inline void _matrix_box_nc(const numerics_st *data, - const complexs_st phase, - complexs_st *M) noexcept nogil: +cdef inline void _matrix_box_nc_real(const reals_st *data, + const complexs_st phase, + complexs_st *M) noexcept nogil: M[0] = (data[0] * phase) M[1] = ((data[2] + 1j * data[3]) * phase) M[2] = ((data[2] + 1j * data[3]).conjugate() * phase) M[3] = (data[1] * phase) +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +cdef inline void _matrix_box_nc_cmplx(const _internal_complexs_st *data, + const complexs_st phase, + complexs_st *M) noexcept nogil: + M[0] = (data[0] * phase) + M[1] = (data[2] * phase) + M[2] = (data[2].conjugate() * phase) + M[3] = (data[1] * phase) + @cython.boundscheck(False) @cython.wraparound(False) From f6df3fbcf6cd79f4a27a6159b477d8a65f66259e Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Mon, 25 Nov 2024 15:27:43 +0100 Subject: [PATCH 12/14] wrong usage of siesta2sisl Signed-off-by: Nick Papior --- src/sisl/io/siesta/siesta_nc.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sisl/io/siesta/siesta_nc.py b/src/sisl/io/siesta/siesta_nc.py index e52ea1e907..49a3f4b38b 100644 --- a/src/sisl/io/siesta/siesta_nc.py +++ b/src/sisl/io/siesta/siesta_nc.py @@ -624,7 +624,7 @@ def write_hamiltonian(self, H, **kwargs): _csr_to_siesta(H.geometry, csr) csr.finalize(sort=kwargs.get("sort", True)) - _mat_siesta2sisl(H, dtype=np.float64) + _mat_sisl2siesta(H, dtype=np.float64) # Ensure that the geometry is written self.write_geometry(H.geometry) @@ -683,7 +683,7 @@ def write_density_matrix(self, DM, **kwargs): # Convert to siesta CSR (we don't need to sort this matrix) _csr_to_siesta(DM.geometry, csr) csr.finalize(sort=kwargs.get("sort", True)) - _mat_siesta2sisl(DM, dtype=np.float64) + _mat_sisl2siesta(DM, dtype=np.float64) # Ensure that the geometry is written self.write_geometry(DM.geometry) @@ -741,7 +741,7 @@ def write_energy_density_matrix(self, EDM, **kwargs): # no need to sort this matrix _csr_to_siesta(EDM.geometry, csr) csr.finalize(sort=kwargs.get("sort", True)) - _mat_siesta2sisl(EDM, dtype=np.float64) + _mat_sisl2siesta(EDM, dtype=np.float64) # Ensure that the geometry is written self.write_geometry(EDM.geometry) From cc750c63b9b816130206030fa827cf869795842a Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Tue, 26 Nov 2024 11:19:45 +0100 Subject: [PATCH 13/14] added more tests for dtype conversion fixed errors in col creation of dtypes when calling csr.diags() Ensured that sparsegeometry.finalize accepts any arguments the csr.finalize accepts. Removed casting errors in mat2dtype. This may *hide* potential problems when there are non-zero imaginary parts. We should perhaps later revisit the problem considering that TB + Peierls has complex overlap matrices. Fixed issue when a construct was called with Python intrinsic complex values. Signed-off-by: Nick Papior --- src/sisl/_core/sparse.py | 4 +- src/sisl/_core/sparse_geometry.py | 8 +- src/sisl/io/siesta/_help.py | 10 +- src/sisl/io/siesta/binaries.py | 6 +- src/sisl/io/siesta/siesta_nc.py | 4 +- src/sisl/io/siesta/tests/test_matrices.py | 110 ++++++++++++++++++++ src/sisl/io/siesta/tests/test_siesta.py | 121 ---------------------- src/sisl/io/siesta/tests/test_tshs.py | 28 +---- src/sisl/physics/sparse.py | 15 ++- 9 files changed, 141 insertions(+), 165 deletions(-) create mode 100644 src/sisl/io/siesta/tests/test_matrices.py diff --git a/src/sisl/_core/sparse.py b/src/sisl/_core/sparse.py index aa75fdc39b..e8cabbb0a4 100644 --- a/src/sisl/_core/sparse.py +++ b/src/sisl/_core/sparse.py @@ -404,7 +404,7 @@ def diags(self, diagonals, offsets=0, dim=None, dtype=None): shape[2] = dim shape = tuple(shape) - offsets = array_fill_repeat(offsets, shape[0], cls=dtype) + offsets = array_fill_repeat(offsets, shape[0], cls=np.int32) # Create the index-pointer, data and values data = array_fill_repeat(diagonals, shape[0], axis=0, cls=dtype) @@ -488,7 +488,7 @@ def finalized(self): """Whether the contained data is finalized and non-used elements have been removed""" return self._finalized - def finalize(self, sort=True): + def finalize(self, sort: bool = True): """Finalizes the sparse matrix by removing all non-set elements One may still interact with the sparse matrix as one would previously. diff --git a/src/sisl/_core/sparse_geometry.py b/src/sisl/_core/sparse_geometry.py index 0adc7035cb..45c96f081f 100644 --- a/src/sisl/_core/sparse_geometry.py +++ b/src/sisl/_core/sparse_geometry.py @@ -1003,15 +1003,17 @@ def unrepeat( atoms = np.arange(self.geometry.na).reshape(-1, reps).T.ravel() return self.sub(atoms).untile(reps, axis, segment, *args, sym=sym, **kwargs) - def finalize(self): + def finalize(self, *args, **kwargs): """Finalizes the model Finalizes the model so that all non-used elements are removed. I.e. this simply reduces the memory requirement for the sparse matrix. - Note that adding more elements to the sparse matrix is more time-consuming than for a non-finalized sparse matrix due to the + Notes + ----- + Adding more elements to the sparse matrix is more time-consuming than for a non-finalized sparse matrix due to the internal data-representation. """ - self._csr.finalize() + self._csr.finalize(*args, **kwargs) def tocsr(self, dim: int = 0, isc=None, **kwargs): """Return a :class:`~scipy.sparse.csr_matrix` for the specified dimension diff --git a/src/sisl/io/siesta/_help.py b/src/sisl/io/siesta/_help.py index b356f85ec3..d7ac67585d 100644 --- a/src/sisl/io/siesta/_help.py +++ b/src/sisl/io/siesta/_help.py @@ -127,7 +127,8 @@ def toc(D, re, im): csr._D = csr._D.astype(dtype) elif spin.is_noncolinear: D = np.empty(shape[:-1] + (shape[-1] - 1,), dtype=dtype) - D[..., [0, 1]] = csr._D[..., [0, 1]].astype(dtype) + # These should be real only anyways! + D[..., [0, 1]] = csr._D[..., [0, 1]].real.astype(dtype) D[..., 2] = toc(csr._D, 2, 3) if D.shape[-1] > 4: D[..., 3:] = csr._D[..., 4:].astype(dtype) @@ -158,11 +159,12 @@ def toc(D, re, im): csr._D = csr._D.astype(dtype) elif spin.is_noncolinear: D = np.empty(shape[:-1] + (shape[-1] + 1,), dtype=dtype) - D[..., [0, 1]] = csr._D[..., [0, 1]].astype(dtype) + # These should be real only anyways! + D[..., [0, 1]] = csr._D[..., [0, 1]].real.astype(dtype) D[..., 2] = csr._D[..., 2].real.astype(dtype) D[..., 3] = csr._D[..., 2].imag.astype(dtype) if D.shape[-1] > 4: - D[..., 4:] = csr._D[..., 3:].astype(dtype) + D[..., 4:] = csr._D[..., 3:].real.astype(dtype) csr._D = D elif spin.is_spinorbit: D = np.empty(shape[:-1] + (shape[-1] + 4,), dtype=dtype) @@ -175,7 +177,7 @@ def toc(D, re, im): D[..., 6] = csr._D[..., 3].real.astype(dtype) D[..., 7] = csr._D[..., 3].imag.astype(dtype) if D.shape[-1] > 8: - D[..., 8:] = csr._D[..., 4:].astype(dtype) + D[..., 8:] = csr._D[..., 4:].real.astype(dtype) csr._D = D else: raise NotImplementedError diff --git a/src/sisl/io/siesta/binaries.py b/src/sisl/io/siesta/binaries.py index ef8336c0db..fd7224b038 100644 --- a/src/sisl/io/siesta/binaries.py +++ b/src/sisl/io/siesta/binaries.py @@ -721,8 +721,9 @@ def write_density_matrices(self, DM, EDM, Ef: float = 0.0, **kwargs): Ef : fermi-level to be contained """ - DM = DM.transpose(spin=False, sort=False) - EDM = EDM.transpose(spin=False, sort=False) + sort = kwargs.get("sort", True) + DM = DM.transpose(spin=False, sort=sort) + EDM = EDM.transpose(spin=False, sort=sort) DM._csr.align(EDM._csr) EDM._csr.align(DM._csr) @@ -734,7 +735,6 @@ def write_density_matrices(self, DM, EDM, Ef: float = 0.0, **kwargs): _csr_to_siesta(DM.geometry, DM._csr) _csr_to_siesta(DM.geometry, EDM._csr) - sort = kwargs.get("sort", True) DM._csr.finalize(sort=sort) EDM._csr.finalize(sort=sort) _mat_sisl2siesta(DM, dtype=np.float64) diff --git a/src/sisl/io/siesta/siesta_nc.py b/src/sisl/io/siesta/siesta_nc.py index 49a3f4b38b..16d795e53d 100644 --- a/src/sisl/io/siesta/siesta_nc.py +++ b/src/sisl/io/siesta/siesta_nc.py @@ -253,8 +253,8 @@ def read_hamiltonian(self, **kwargs) -> Hamiltonian: _mat_siesta2sisl(H, dtype=kwargs.get("dtype")) # Shift to the Fermi-level - Ef = -self._value("Ef")[:] * Ry2eV - H.shift(Ef) + Ef = self._value("Ef")[:] * Ry2eV + H.shift(-Ef) return H.transpose(spin=False, sort=kwargs.get("sort", True)) diff --git a/src/sisl/io/siesta/tests/test_matrices.py b/src/sisl/io/siesta/tests/test_matrices.py new file mode 100644 index 0000000000..447351055b --- /dev/null +++ b/src/sisl/io/siesta/tests/test_matrices.py @@ -0,0 +1,110 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +from __future__ import annotations + +import numpy as np +import pytest + +import sisl +from sisl.io.siesta._help import _mat2dtype + +pytestmark = [pytest.mark.io, pytest.mark.siesta] + +listify = sisl.utils.listify + + +@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize( + "matrix,ext", + (map(lambda x: ("Hamiltonian", x), ["nc", "TSHS"]) | listify) + + (map(lambda x: ("DensityMatrix", x), ["nc", "DM"]) | listify) + + (map(lambda x: ("EnergyDensityMatrix", x), ["nc"]) | listify), +) +@pytest.mark.parametrize("read_dtype", [np.float64, np.complex128]) +@pytest.mark.parametrize("dtype", [np.float32, np.float64, np.complex128]) +def test_non_colinear(sisl_tmp, sort, matrix, ext, dtype, read_dtype): + M = getattr(sisl, matrix)(sisl.geom.graphene(), spin=sisl.Spin("NC"), dtype=dtype) + if np.issubdtype(dtype, np.complexfloating): + onsite = [0.1 + 0j, 0.2 + 0j, 0.3 + 0.4j] + nn = [0.2, 0.3, 0.4 + 0.5j] + else: + onsite = [0.1, 0.2, 0.3, 0.4] + nn = [0.2, 0.3, 0.4, 0.5] + M.construct(([0.1, 1.44], [onsite, nn])) + + f1 = sisl_tmp(f"M1.{ext}") + f2 = sisl_tmp(f"M2.{ext}") + M.write(f1, sort=sort) + M.finalize() + with sisl.get_sile(f1) as sile: + M2 = M.read(sile, dtype=read_dtype) + M2.write(f2, sort=sort) + with sisl.get_sile(f2) as sile: + M3 = M2.read(sile, dtype=read_dtype) + + if sort: + M.finalize(sort=sort) + assert M._csr.spsame(M2._csr) + assert M._csr.spsame(M3._csr) + + from sisl.io.siesta._help import _mat2dtype + + # Convert to the same dtype + _mat2dtype(M2, dtype) + _mat2dtype(M3, dtype) + if M.orthogonal and not M2.orthogonal: + assert np.allclose(M._csr._D, M2._csr._D[..., :-1]) + else: + assert np.allclose(M._csr._D, M2._csr._D) + if M.orthogonal and not M3.orthogonal: + assert np.allclose(M._csr._D, M3._csr._D[..., :-1]) + else: + assert np.allclose(M._csr._D, M3._csr._D) + + +@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize( + "matrix,ext", + (map(lambda x: ("Hamiltonian", x), ["nc", "TSHS"]) | listify) + + (map(lambda x: ("DensityMatrix", x), ["nc", "DM"]) | listify) + + (map(lambda x: ("EnergyDensityMatrix", x), ["nc"]) | listify), +) +@pytest.mark.parametrize("read_dtype", [np.float64, np.complex128]) +@pytest.mark.parametrize("dtype", [np.float32, np.float64, np.complex128]) +def test_spin_orbit(sisl_tmp, sort, matrix, ext, dtype, read_dtype): + M = getattr(sisl, matrix)(sisl.geom.graphene(), spin=sisl.Spin("SO"), dtype=dtype) + if np.issubdtype(dtype, np.complexfloating): + onsite = [0.1 + 0j, 0.2 + 0j, 0.3 + 0.4j, 0.3 - 0.4j] + nn = [0.2 + 0.1j, 0.3 + 0.3j, 0.4 + 0.5j, 0.4 - 0.5j] + else: + onsite = [0.1, 0.2, 0.3, 0.4, 0, 0, 0.3, -0.4] + nn = [0.2, 0.3, 0.4, 0.5, 0.1, 0.3, 0.4, -0.5] + M.construct(([0.1, 1.44], [onsite, nn])) + + f1 = sisl_tmp(f"M1.{ext}") + f2 = sisl_tmp(f"M2.{ext}") + M.write(f1, sort=sort) + M.finalize() + with sisl.get_sile(f1) as sile: + M2 = M.read(sile, dtype=read_dtype) + M2.write(f2, sort=sort) + with sisl.get_sile(f2) as sile: + M3 = M2.read(sile, dtype=read_dtype) + + if sort: + M.finalize(sort=sort) + assert M._csr.spsame(M2._csr) + assert M._csr.spsame(M3._csr) + + # Convert to the same dtype + _mat2dtype(M2, dtype) + _mat2dtype(M3, dtype) + if M.orthogonal and not M2.orthogonal: + assert np.allclose(M._csr._D, M2._csr._D[..., :-1]) + else: + assert np.allclose(M._csr._D, M2._csr._D) + if M.orthogonal and not M3.orthogonal: + assert np.allclose(M._csr._D, M3._csr._D[..., :-1]) + else: + assert np.allclose(M._csr._D, M3._csr._D) diff --git a/src/sisl/io/siesta/tests/test_siesta.py b/src/sisl/io/siesta/tests/test_siesta.py index 6f62ec52ee..e959fe6ed3 100644 --- a/src/sisl/io/siesta/tests/test_siesta.py +++ b/src/sisl/io/siesta/tests/test_siesta.py @@ -161,99 +161,6 @@ def test_nc_density_matrix(sisl_tmp, sisl_system): assert sisl_system.g.atoms.equal(ndm.atoms, R=False) -def test_nc_H_non_colinear(sisl_tmp): - H1 = Hamiltonian(sisl.geom.graphene(), spin=sisl.Spin("NC")) - H1.construct(([0.1, 1.44], [[0.1, 0.2, 0.3, 0.4], [0.2, 0.3, 0.4, 0.5]])) - - f1 = sisl_tmp("H1.nc") - f2 = sisl_tmp("H2.nc") - H1.write(f1) - H1.finalize() - with sisl.get_sile(f1) as sile: - H2 = sile.read_hamiltonian() - H2.write(f2) - with sisl.get_sile(f2) as sile: - H3 = sile.read_hamiltonian() - assert H1._csr.spsame(H2._csr) - assert np.allclose(H1._csr._D, H2._csr._D) - assert H1._csr.spsame(H3._csr) - assert np.allclose(H1._csr._D, H3._csr._D) - - -def test_nc_DM_non_colinear(sisl_tmp): - DM1 = DensityMatrix(sisl.geom.graphene(), spin=sisl.Spin("NC")) - DM1.construct(([0.1, 1.44], [[0.1, 0.2, 0.3, 0.4], [0.2, 0.3, 0.4, 0.5]])) - - f1 = sisl_tmp("DM1.nc") - f2 = sisl_tmp("DM2.nc") - DM1.write(f1) - DM1.finalize() - with sisl.get_sile(f1) as sile: - DM2 = sile.read_density_matrix() - DM2.write(f2) - with sisl.get_sile(f2) as sile: - DM3 = sile.read_density_matrix() - assert DM1._csr.spsame(DM2._csr) - assert DM1._csr.spsame(DM3._csr) - # DM1 is finalized, but DM2 is not finalized - assert np.allclose(DM1._csr._D, DM2._csr._D) - # DM2 and DM3 are the same - assert np.allclose(DM2._csr._D, DM3._csr._D) - DM2.finalize() - assert np.allclose(DM1._csr._D, DM2._csr._D) - - -def test_nc_EDM_non_colinear(sisl_tmp): - EDM1 = EnergyDensityMatrix(sisl.geom.graphene(), spin=sisl.Spin("NC")) - EDM1.construct(([0.1, 1.44], [[0.1, 0.2, 0.3, 0.4], [0.2, 0.3, 0.4, 0.5]])) - - f1 = sisl_tmp("EDM1.nc") - f2 = sisl_tmp("EDM2.nc") - EDM1.write(f1, sort=False) - EDM1.finalize() - with sisl.get_sile(f1) as sile: - EDM2 = sile.read_energy_density_matrix(sort=False) - EDM2.write(f2, sort=False) - with sisl.get_sile(f2) as sile: - EDM3 = sile.read_energy_density_matrix(sort=False) - assert EDM1._csr.spsame(EDM2._csr) - assert EDM1._csr.spsame(EDM3._csr) - # EDM1 is finalized, but EDM2 is not finalized - assert not np.allclose(EDM1._csr._D, EDM2._csr._D) - # EDM2 and EDM3 are the same - assert np.allclose(EDM2._csr._D, EDM3._csr._D) - EDM2.finalize() - assert np.allclose(EDM1._csr._D, EDM2._csr._D) - - -@pytest.mark.filterwarnings("ignore", message="*is NOT Hermitian for on-site") -def test_nc_H_spin_orbit(sisl_tmp): - H1 = Hamiltonian(sisl.geom.graphene(), spin=sisl.Spin("SO")) - H1.construct( - ( - [0.1, 1.44], - [ - [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8], - [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], - ], - ) - ) - - f1 = sisl_tmp("H1.nc") - f2 = sisl_tmp("H2.nc") - H1.write(f1) - H1.finalize() - with sisl.get_sile(f1) as sile: - H2 = sile.read_hamiltonian() - H2.write(f2) - with sisl.get_sile(f2) as sile: - H3 = sile.read_hamiltonian() - assert H1._csr.spsame(H2._csr) - assert np.allclose(H1._csr._D, H2._csr._D) - assert H1._csr.spsame(H3._csr) - assert np.allclose(H1._csr._D, H3._csr._D) - - @pytest.mark.filterwarnings("ignore", message="*is NOT Hermitian for on-site") def test_nc_H_spin_orbit_nc2tshs2nc(sisl_tmp): H1 = Hamiltonian(sisl.geom.graphene(), spin=sisl.Spin("SO")) @@ -282,34 +189,6 @@ def test_nc_H_spin_orbit_nc2tshs2nc(sisl_tmp): assert np.allclose(H1._csr._D, H3._csr._D) -@pytest.mark.filterwarnings("ignore", message="*is NOT Hermitian for on-site") -def test_nc_DM_spin_orbit(sisl_tmp): - DM1 = DensityMatrix(sisl.geom.graphene(), spin=sisl.Spin("SO")) - DM1.construct( - ( - [0.1, 1.44], - [ - [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8], - [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], - ], - ) - ) - - f1 = sisl_tmp("DM1.nc") - f2 = sisl_tmp("DM2.nc") - DM1.write(f1) - DM1.finalize() - with sisl.get_sile(f1) as sile: - DM2 = sile.read_density_matrix() - DM2.write(f2) - with sisl.get_sile(f2) as sile: - DM3 = sile.read_density_matrix() - assert DM1._csr.spsame(DM2._csr) - assert np.allclose(DM1._csr._D, DM2._csr._D) - assert DM1._csr.spsame(DM3._csr) - assert np.allclose(DM1._csr._D, DM3._csr._D) - - @pytest.mark.filterwarnings("ignore", message="*is NOT Hermitian for on-site") def test_nc_DM_spin_orbit_nc2dm2nc(sisl_tmp): DM1 = DensityMatrix(sisl.geom.graphene(), orthogonal=False, spin=sisl.Spin("SO")) diff --git a/src/sisl/io/siesta/tests/test_tshs.py b/src/sisl/io/siesta/tests/test_tshs.py index 9d18d50476..20e6fc4759 100644 --- a/src/sisl/io/siesta/tests/test_tshs.py +++ b/src/sisl/io/siesta/tests/test_tshs.py @@ -29,7 +29,7 @@ def test_tshs_si_pdos_kgrid(sisl_files, sisl_tmp): @pytest.mark.filterwarnings("ignore", message="*Casting complex values") -def test_tshs_si_pdos_dtypes(sisl_files, sisl_tmp): +def test_tshs_si_pdos_dtypes_eigs(sisl_files, sisl_tmp): si = sisl.get_sile(sisl_files("siesta", "Si_pdos_k", "Si_pdos.TSHS")) data = [] eigs = None @@ -193,32 +193,6 @@ def test_tshs_si_pdos_kgrid_overlap(sisl_files): assert np.allclose(HS._csr._D[:, HS.S_idx], S._csr._D[:, 0]) -@pytest.mark.filterwarnings("ignore", message="*is NOT Hermitian for on-site") -def test_tshs_spin_orbit(sisl_tmp): - H1 = sisl.Hamiltonian(sisl.geom.graphene(), spin=sisl.Spin("SO")) - H1.construct( - ( - [0.1, 1.44], - [ - [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8], - [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], - ], - ) - ) - - f1 = sisl_tmp("tmp1.TSHS") - f2 = sisl_tmp("tmp2.TSHS") - H1.write(f1) - H1.finalize() - H2 = sisl.get_sile(f1).read_hamiltonian() - H2.write(f2) - H3 = sisl.get_sile(f2).read_hamiltonian() - assert H1._csr.spsame(H2._csr) - assert np.allclose(H1._csr._D, H2._csr._D) - assert H1._csr.spsame(H3._csr) - assert np.allclose(H1._csr._D, H3._csr._D) - - @pytest.mark.filterwarnings("ignore", message="*is NOT Hermitian for on-site") def test_tshs_spin_orbit_tshs2nc2tshs(sisl_tmp): pytest.importorskip("netCDF4") diff --git a/src/sisl/physics/sparse.py b/src/sisl/physics/sparse.py index c4be7cf32b..c5e8a79fd4 100644 --- a/src/sisl/physics/sparse.py +++ b/src/sisl/physics/sparse.py @@ -979,7 +979,13 @@ def create_construct(self, R, params): # Hermitian parameters # The input order is [uu, dd, ud, du] paramsH = [ - [p[0].conj(), p[1].conj(), p[3].conj(), p[2].conj(), *p[4:]] + [ + p[0].conjugate(), + p[1].conjugate(), + p[3].conjugate(), + p[2].conjugate(), + *p[4:], + ] for p in params ] else: @@ -1009,7 +1015,7 @@ def create_construct(self, R, params): ], dtype_cplx, ) - if not np.allclose(onsite, onsite.T.conj()): + if not np.allclose(onsite, onsite.T.conjugate()): warn( f"{self.__class__.__name__}.create_construct is NOT " "Hermitian for on-site terms. This is your responsibility! " @@ -1020,7 +1026,10 @@ def create_construct(self, R, params): if is_complex: nv = 3 # Hermitian parameters - paramsH = [[p[0].conj(), p[1].conj(), p[2], *p[3:]] for p in params] + paramsH = [ + [p[0].conjugate(), p[1].conjugate(), p[2], *p[3:]] + for p in params + ] else: nv = 4 # Hermitian parameters From 6fedebbcac0ea2c722841a51bfdb3e3d30a19b55 Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Tue, 26 Nov 2024 11:38:40 +0100 Subject: [PATCH 14/14] fixed bug in mock netCDF4 Signed-off-by: Nick Papior --- src/sisl/io/siesta/tests/test_matrices.py | 6 ++++++ src/sisl/io/sile.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/sisl/io/siesta/tests/test_matrices.py b/src/sisl/io/siesta/tests/test_matrices.py index 447351055b..e43c0b710d 100644 --- a/src/sisl/io/siesta/tests/test_matrices.py +++ b/src/sisl/io/siesta/tests/test_matrices.py @@ -24,6 +24,9 @@ @pytest.mark.parametrize("read_dtype", [np.float64, np.complex128]) @pytest.mark.parametrize("dtype", [np.float32, np.float64, np.complex128]) def test_non_colinear(sisl_tmp, sort, matrix, ext, dtype, read_dtype): + if ext == "nc": + pytest.importorskip("netCDF4") + M = getattr(sisl, matrix)(sisl.geom.graphene(), spin=sisl.Spin("NC"), dtype=dtype) if np.issubdtype(dtype, np.complexfloating): onsite = [0.1 + 0j, 0.2 + 0j, 0.3 + 0.4j] @@ -73,6 +76,9 @@ def test_non_colinear(sisl_tmp, sort, matrix, ext, dtype, read_dtype): @pytest.mark.parametrize("read_dtype", [np.float64, np.complex128]) @pytest.mark.parametrize("dtype", [np.float32, np.float64, np.complex128]) def test_spin_orbit(sisl_tmp, sort, matrix, ext, dtype, read_dtype): + if ext == "nc": + pytest.importorskip("netCDF4") + M = getattr(sisl, matrix)(sisl.geom.graphene(), spin=sisl.Spin("SO"), dtype=dtype) if np.issubdtype(dtype, np.complexfloating): onsite = [0.1 + 0j, 0.2 + 0j, 0.3 + 0.4j, 0.3 - 0.4j] diff --git a/src/sisl/io/sile.py b/src/sisl/io/sile.py index 7469c2e72f..de9c41012f 100644 --- a/src/sisl/io/sile.py +++ b/src/sisl/io/sile.py @@ -1303,7 +1303,7 @@ def __getattr__(self, attr): exe = Path(sys.executable).name msg = f"Could not import netCDF4. Please install it using '{exe} -m pip install netCDF4'" - raise SileError(msg) from e + raise SileError(msg) netCDF4 = _mock_netCDF4()