diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d7b72543c..788bbd81b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,9 +16,14 @@ we hit release version 1.0.0. sisl.geom.graphene ### Fixed - - `projection` arguments of several functions has been streamlined +### Changed +- internal Cython code for performance improvements. + This yield significant perf. improvements for DFT sparse matrices + with *many* edges in the sparse matrix, but a perf. hit for very + small TB matrices. + ## [0.15.2] - 2024-11-06 diff --git a/CMakeLists.txt b/CMakeLists.txt index e44950fb77..a50c371a69 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -65,7 +65,6 @@ add_compile_definitions(CYTHON_NO_PYINIT_EXPORT=1) #: lib, perhaps we should change this set(CMAKE_SHARED_MODULE_PREFIX "") - # Determine whether we are in CIBUILDWHEEL # and whether we are building for the universal target set(_def_fortran TRUE) @@ -81,6 +80,8 @@ option(WITH_FORTRAN # Define all options for the user if( WITH_FORTRAN ) + enable_language(Fortran) + set(F2PY_REPORT_ON_ARRAY_COPY 10 CACHE STRING "The minimum (element) size of arrays before warning about copies") @@ -209,6 +210,18 @@ if(WITH_FORTRAN) endif(WITH_FORTRAN) +message(STATUS "Python variables:") +list(APPEND CMAKE_MESSAGE_INDENT " ") + +cmake_print_variables(Python_INCLUDE_DIRS) +cmake_print_variables(Python_NumPy_INCLUDE_DIRS) +if(WITH_FORTRAN) + cmake_print_variables(Python_NumPy_F2Py_INCLUDE_DIR) +endif() + +list(POP_BACK CMAKE_MESSAGE_INDENT) + + message(STATUS "sisl options") list(APPEND CMAKE_MESSAGE_INDENT " ") @@ -230,18 +243,6 @@ endif() list(POP_BACK CMAKE_MESSAGE_INDENT) -message(STATUS "Python variables:") -list(APPEND CMAKE_MESSAGE_INDENT " ") - -cmake_print_variables(Python_INCLUDE_DIRS) -cmake_print_variables(Python_NumPy_INCLUDE_DIRS) -if(WITH_FORTRAN) - cmake_print_variables(Python_NumPy_F2Py_INCLUDE_DIR) -endif() - -list(POP_BACK CMAKE_MESSAGE_INDENT) - - # Return in _result whether the _file should be built, or not # It checks whether the file is present in the NO_COMPILATION diff --git a/benchmarks/optimizations/hamiltonian.ipynb b/benchmarks/optimizations/hamiltonian.ipynb new file mode 100644 index 0000000000..e6edd7ff5c --- /dev/null +++ b/benchmarks/optimizations/hamiltonian.ipynb @@ -0,0 +1,77 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here we test and check the performance of the `Hk` implementation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from pathlib import Path\n", + "import numpy as np\n", + "import sisl as si\n", + "\n", + "files = Path(os.environ[\"SISL_FILES_TESTS\"])\n", + "siesta = files / \"siesta\"\n", + "\n", + "N = 10" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "H = si.Hamiltonian.read(siesta / \"Si_pdos_k\" / \"Si_pdos.TSHS\").tile(N, 0).tile(N, 1)\n", + "\n", + "%timeit H.Hk()\n", + "%timeit H.Hk([0.1] * 3)\n", + "%timeit H.Hk(format=\"array\")\n", + "%timeit H.Hk([0.1] * 3, format=\"array\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "H = si.Hamiltonian.read(siesta / \"Pt2_soc\" / \"Pt2_xx.TSHS\").tile(N, 0).tile(N // 2, 1)\n", + "\n", + "%timeit H.Hk()\n", + "%timeit H.Hk([0.1] * 3)\n", + "%timeit H.Hk(format=\"array\")\n", + "%timeit H.Hk([0.1] * 3, format=\"array\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/benchmarks/run.sh b/benchmarks/run.sh index 5390c0c125..8762b8ab42 100755 --- a/benchmarks/run.sh +++ b/benchmarks/run.sh @@ -15,6 +15,5 @@ profile=$base.profile # Stats stats=$base.stats -python -m cProfile -o $profile $script $@ -python stats.py $profile > $stats - +python3 -m cProfile -o $profile $script $@ +python3 stats.py $profile > $stats diff --git a/benchmarks/run3.sh b/benchmarks/run3.sh deleted file mode 100755 index d3586bb313..0000000000 --- a/benchmarks/run3.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -script=graphene.py -if [ $# -gt 0 ]; then - script=$1 - shift -fi - -# Base name -base=${script%.py} - -# Determine output profile -profile=$base.profile - -# Stats -stats=$base.stats - -python3 -m cProfile -o $profile $script $@ -python3 stats.py $profile > $stats - diff --git a/src/sisl/CMakeLists.txt b/src/sisl/CMakeLists.txt index b94e5a741e..87a6346477 100644 --- a/src/sisl/CMakeLists.txt +++ b/src/sisl/CMakeLists.txt @@ -1,3 +1,9 @@ +set_property(DIRECTORY + APPEND + PROPERTY INCLUDE_DIRECTORIES + ${CMAKE_CURRENT_SOURCE_DIR}/_core + ) + foreach(source _indices _math_small) add_cython_library( SOURCE ${source}.pyx @@ -29,6 +35,7 @@ endforeach() get_directory_property( SISL_DEFINITIONS DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMPILE_DEFINITIONS ) + # Join to stringify list list(JOIN SISL_DEFINITIONS " " SISL_DEFINITIONS) diff --git a/src/sisl/__init__.py b/src/sisl/__init__.py index 8b1f7b065b..3841fc2dac 100644 --- a/src/sisl/__init__.py +++ b/src/sisl/__init__.py @@ -88,6 +88,8 @@ # import the common options used from ._common import * +from ._core import * + # Import warning classes # We currently do not import warn and info # as they are too generic names in case one does from sisl import * @@ -106,8 +108,6 @@ # Below are sisl-specific imports from .shape import * -from ._core import * - # Physical quantities and required classes from .physics import * diff --git a/src/sisl/_core/CMakeLists.txt b/src/sisl/_core/CMakeLists.txt index 5453a1c9f6..f2cea9c7c6 100644 --- a/src/sisl/_core/CMakeLists.txt +++ b/src/sisl/_core/CMakeLists.txt @@ -1,4 +1,4 @@ -foreach(source _lattice _sparse) +foreach(source _lattice _dtypes _sparse) add_cython_library( SOURCE ${source}.pyx LIBRARY ${source} diff --git a/src/sisl/_core/_dtypes.pxd b/src/sisl/_core/_dtypes.pxd new file mode 100644 index 0000000000..9d2247c9e5 --- /dev/null +++ b/src/sisl/_core/_dtypes.pxd @@ -0,0 +1,102 @@ +""" +Shared header for fused dtypes +""" +cimport cython + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + complex64_t, + complex128_t, + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) + +# Generic typedefs for sisl internal naming convention +ctypedef size_t size_st +ctypedef Py_ssize_t ssize_st + + +ctypedef fused ints_st: + int + long + + +ctypedef fused floats_st: + float + double + + +ctypedef fused complexs_st: + float complex + double complex + + +ctypedef fused floatcomplexs_st: + float + double + float complex + double complex + + +# We need this fused data-type to omit complex data-types +ctypedef fused reals_st: + int + long + float + double + +ctypedef fused numerics_st: + int + long + float + double + float complex + double complex + +ctypedef fused _type2dtype_types_st: + short + int + long + float + double + float complex + double complex + float32_t + float64_t + #complex64_t # not usable... + #complex128_t + int8_t + int16_t + int32_t + int64_t + uint8_t + uint16_t + uint32_t + uint64_t + + +cdef object type2dtype(const _type2dtype_types_st v) + + +ctypedef fused _inline_sum_st: + short + int + long + int16_t + int32_t + int64_t + uint16_t + uint32_t + uint64_t + +cdef ssize_st inline_sum(const _inline_sum_st[::1] array) noexcept nogil diff --git a/src/sisl/_core/_dtypes.pyx b/src/sisl/_core/_dtypes.pyx new file mode 100644 index 0000000000..a57d775e35 --- /dev/null +++ b/src/sisl/_core/_dtypes.pyx @@ -0,0 +1,80 @@ +""" +Inline-sum (all useful shared codes could be placed here +""" +cimport cython + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + complex64_t, + complex128_t, + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) + + +@cython.initializedcheck(False) +cdef inline object type2dtype(const _type2dtype_types_st v): + if _type2dtype_types_st is int8_t: + return np.int8 + elif _type2dtype_types_st is int16_t: + return np.int16 + elif _type2dtype_types_st is cython.short: + return np.int16 + elif _type2dtype_types_st is int32_t: + return np.int32 + elif _type2dtype_types_st is cython.int: + return np.int32 + elif _type2dtype_types_st is int64_t: + return np.int64 + elif _type2dtype_types_st is cython.long: + return np.int64 + elif _type2dtype_types_st is float32_t: + return np.float32 + elif _type2dtype_types_st is cython.float: + return np.float32 + elif _type2dtype_types_st is float64_t: + return np.float64 + elif _type2dtype_types_st is cython.double: + return np.float64 + elif _type2dtype_types_st is complex64_t: + return np.complex64 + elif _type2dtype_types_st is cython.floatcomplex: + return np.complex64 + elif _type2dtype_types_st is complex128_t: + return np.complex128 + elif _type2dtype_types_st is cython.doublecomplex: + return np.complex128 + + # More special cases + elif _type2dtype_types_st is uint8_t: + return np.uint8 + elif _type2dtype_types_st is uint16_t: + return np.uint16 + elif _type2dtype_types_st is uint32_t: + return np.uint32 + elif _type2dtype_types_st is uint64_t: + return np.uint64 + + + +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.boundscheck(False) +cdef inline ssize_st inline_sum(const _inline_sum_st[::1] array) noexcept nogil: + cdef ssize_st total, i + + total = 0 + for i in range(array.shape[0]): + total += array[i] + + return total diff --git a/src/sisl/_core/_sparse.pxd b/src/sisl/_core/_sparse.pxd index d36d0f24d2..a588c5d149 100644 --- a/src/sisl/_core/_sparse.pxd +++ b/src/sisl/_core/_sparse.pxd @@ -1,2 +1,6 @@ # Define the interfaces for the functions exposed through cimport -cdef Py_ssize_t inline_sum(const int[::1] array) nogil +from sisl._core._dtypes cimport ints_st + + +cdef void ncol2ptr_nc(const ints_st nr, const ints_st[::1] ncol, ints_st[::1] ptr, const +ints_st per_elem) noexcept nogil diff --git a/src/sisl/_core/_sparse.pyx b/src/sisl/_core/_sparse.pyx index 484aefbffb..c0ff04e706 100644 --- a/src/sisl/_core/_sparse.pyx +++ b/src/sisl/_core/_sparse.pyx @@ -2,76 +2,89 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. cimport cython -from libc.math cimport fabs import numpy as np -# This enables Cython enhanced compatibilities - -cimport numpy as np +cimport numpy as cnp +from numpy cimport dtype, ndarray +from sisl._core._dtypes cimport inline_sum, ints_st, numerics_st, ssize_st, type2dtype from sisl._indices cimport in_1d -__all__ = ["fold_csr_matrix", "fold_csr_matrix_nc", - "fold_csr_diagonal_nc", "sparse_dense", "inline_sum"] - @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef inline Py_ssize_t inline_sum(const int[::1] array) noexcept nogil: - cdef Py_ssize_t total, i +cdef void ncol2ptr_nc(const ints_st nr, const ints_st[::1] ncol, ints_st[::1] ptr, const ints_st per_elem) noexcept nogil: + cdef ssize_st r, rr + + # this is NC/SOC + ptr[0] = 0 + ptr[1] = ncol[0] * per_elem + for r in range(1, nr): + rr = r * 2 + # do both + ptr[rr] = ptr[rr - 1] + ncol[r-1] * per_elem + ptr[rr+1] = ptr[rr] + ncol[r] * per_elem - total = 0 - for i in range(array.shape[0]): - total += array[i] - return total + ptr[nr * 2] = ptr[nr * 2 - 1] + ncol[nr - 1] * per_elem @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) @cython.cdivision(True) -def fold_csr_matrix(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL): +def fold_csr_matrix(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col): """ Fold all columns into a square matrix """ - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL + # Number of rows - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] FOLD_ptr = np.empty([nr + 1], dtype=np.int32) - cdef int[::1] fold_ptr = FOLD_ptr - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] FOLD_ncol = np.empty([nr], dtype=np.int32) - cdef int[::1] fold_ncol = FOLD_ncol - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] FOLD_col = np.empty([inline_sum(ncol)], dtype=np.int32) - cdef int[::1] fold_col = FOLD_col + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[ints_st](1) + cdef ndarray[ints_st, mode='c'] FOLD_ptr = np.empty([nr + 1], dtype=dtype) + cdef ndarray[ints_st, mode='c'] FOLD_ncol = np.empty([nr], dtype=dtype) + cdef ndarray[ints_st, mode='c'] FOLD_col = np.empty([inline_sum(ncol)], dtype=dtype) + + cdef ints_st[::1] fold_ptr = FOLD_ptr + cdef ints_st[::1] fold_ncol = FOLD_ncol + cdef ints_st[::1] fold_col = FOLD_col + # local variables - cdef Py_ssize_t r, ind, nz, c - cdef int[::1] tmp + cdef ints_st r, c, nz, ind + cdef ints_st[::1] tmp nz = 0 fold_ptr[0] = 0 + # Loop on all rows for r in range(nr): # Initialize the pointer arrays - if ncol[r] > 0: - fold_ncol[r] = 1 - fold_col[fold_ptr[r]] = col[ptr[r]] % nr - else: - fold_ncol[r] = 0 - - for ind in range(ptr[r] + 1, ptr[r] + ncol[r]): - c = col[ind] % nr - if not in_1d(fold_col[fold_ptr[r]:fold_ptr[r] + fold_ncol[r]], c): - fold_col[fold_ptr[r] + fold_ncol[r]] = c - fold_ncol[r] += 1 - - # Sort indices (we should implement our own sorting algorithm) - tmp = np.sort(fold_col[fold_ptr[r]:fold_ptr[r] + fold_ncol[r]]) - for ind in range(fold_ncol[r]): + # Even though large supercells has *many* double entries (after folding) + # this turns out to be faster than incrementally searching + # the array. + # This kind-of-makes sense. + # We can do: + # 1. + # a) build a full list of folded items + # b) find unique (and sorted) elements + # or + # 2. + # a) incrementally add a value, only + # if it does not exist. + # 1. creates a bigger temporary array, but only + # adds unique values 1 time through numpy fast algorithm + # 2. searchs an array (of seemingly small arrays) ncol times + # which can be quite heavy. + tmp = col[ptr[r]:ptr[r] + ncol[r]].copy() + for ind in range(ncol[r]): + tmp[ind] %= nr + + tmp = np.unique(tmp) + fold_ncol[r] = tmp.shape[0] + for ind in range(tmp.shape[0]): fold_col[fold_ptr[r] + ind] = tmp[ind] fold_ptr[r + 1] = fold_ptr[r] + fold_ncol[r] @@ -88,63 +101,53 @@ def fold_csr_matrix(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, @cython.wraparound(False) @cython.initializedcheck(False) @cython.cdivision(True) -def fold_csr_matrix_nc(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL): +def fold_csr_matrix_nc(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col): """ Fold all columns into a square matrix """ - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL # Number of rows - cdef Py_ssize_t nr = ncol.shape[0] - - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] FOLD_ptr = np.empty([nr * 2 + 1], dtype=np.int32) - cdef int[::1] fold_ptr = FOLD_ptr - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] FOLD_ncol = np.empty([nr * 2], dtype=np.int32) - cdef int[::1] fold_ncol = FOLD_ncol - # We have to multiply by 4, 2 times the number of rows, and each row couples to 2 more elements - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] FOLD_col = np.empty([inline_sum(ncol) * 4], dtype=np.int32) - cdef int[::1] fold_col = FOLD_col + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[ints_st](1) + cdef ndarray[ints_st, mode='c'] FOLD_ptr = np.empty([nr * 2 + 1], dtype=dtype) + cdef ndarray[ints_st, mode='c'] FOLD_ncol = np.empty([nr * 2], dtype=dtype) + # We have to multiply by 4, 2 times for the extra rows, and another + # 2 for the possible double couplings + cdef ndarray[ints_st, mode='c'] FOLD_col = np.empty([inline_sum(ncol) * 4], dtype=dtype) + + cdef ints_st[::1] fold_ptr = FOLD_ptr + cdef ints_st[::1] fold_ncol = FOLD_ncol + cdef ints_st[::1] fold_col = FOLD_col + # local variables - cdef Py_ssize_t r, rr, ind, nz, c - cdef int[::1] tmp + cdef ints_st r, rr, ind, nz, c + cdef ints_st[::1] tmp nz = 0 fold_ptr[0] = 0 + # Loop on all rows for r in range(nr): rr = r * 2 - # Initialize the pointer arrays - if ncol[r] > 0: - c = (col[ptr[r]] % nr) * 2 - fold_ncol[rr] = 2 - fold_col[fold_ptr[rr]] = c - fold_col[fold_ptr[rr] + 1] = c + 1 - else: - fold_ncol[rr] = 0 + tmp = col[ptr[r]:ptr[r] + ncol[r]].copy() + for ind in range(ncol[r]): + tmp[ind] = (tmp[ind] % nr) * 2 - for ind in range(ptr[r] + 1, ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - if not in_1d(fold_col[fold_ptr[rr]:fold_ptr[rr] + fold_ncol[rr]], c): - fold_col[fold_ptr[rr] + fold_ncol[rr]] = c - fold_col[fold_ptr[rr] + fold_ncol[rr] + 1] = c + 1 - fold_ncol[rr] += 2 + tmp = np.unique(tmp) # Duplicate pointers and counters for next row (off-diagonal) - fold_ptr[rr + 1] = fold_ptr[rr] + fold_ncol[rr] + fold_ncol[rr] = tmp.shape[0] * 2 fold_ncol[rr + 1] = fold_ncol[rr] + fold_ptr[rr + 1] = fold_ptr[rr] + fold_ncol[rr] + fold_ptr[rr + 2] = fold_ptr[rr + 1] + fold_ncol[rr] - # Sort indices (we should implement our own sorting algorithm) - tmp = np.sort(fold_col[fold_ptr[rr]:fold_ptr[rr] + fold_ncol[rr]]) - for ind in range(fold_ncol[rr]): - c = tmp[ind] - fold_col[fold_ptr[rr] + ind] = c - # Copy to next row as well - fold_col[fold_ptr[rr+1] + ind] = c + for ind in range(tmp.shape[0]): + fold_col[fold_ptr[rr] + ind * 2] = tmp[ind] + fold_col[fold_ptr[rr] + ind * 2 + 1] = tmp[ind] + 1 + fold_col[fold_ptr[rr+1] + ind * 2] = tmp[ind] + fold_col[fold_ptr[rr+1] + ind * 2 + 1] = tmp[ind] + 1 - # Increment the next row - fold_ptr[rr + 2] = fold_ptr[rr + 1] + fold_ncol[rr + 1] nz += fold_ncol[rr] * 2 if nz > fold_col.shape[0]: @@ -158,29 +161,30 @@ def fold_csr_matrix_nc(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, @cython.wraparound(False) @cython.initializedcheck(False) @cython.cdivision(True) -def fold_csr_diagonal_nc(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL): +def fold_csr_matrix_nc_diag(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col): """ Fold all columns into a square matrix """ - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL # Number of rows - cdef Py_ssize_t nr = ncol.shape[0] - - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] FOLD_ptr = np.empty([nr * 2 + 1], dtype=np.int32) - cdef int[::1] fold_ptr = FOLD_ptr - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] FOLD_ncol = np.empty([nr * 2], dtype=np.int32) - cdef int[::1] fold_ncol = FOLD_ncol - # We have to multiply by 2, 2 times the number of rows - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] FOLD_col = np.empty([inline_sum(ncol) * 2], dtype=np.int32) - cdef int[::1] fold_col = FOLD_col + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[ints_st](1) + cdef ndarray[ints_st, mode='c'] FOLD_ptr = np.empty([nr * 2 + 1], dtype=dtype) + cdef ndarray[ints_st, mode='c'] FOLD_ncol = np.empty([nr * 2], dtype=dtype) + # We have to multiply by 2 times for the extra rows + cdef ndarray[ints_st, mode='c'] FOLD_col = np.empty([inline_sum(ncol) * 2], dtype=dtype) + + cdef ints_st[::1] fold_ptr = FOLD_ptr + cdef ints_st[::1] fold_ncol = FOLD_ncol + cdef ints_st[::1] fold_col = FOLD_col + # local variables - cdef Py_ssize_t r, rr, ind, nz, c - cdef int[::1] tmp + cdef ints_st r, rr, ind, nz, c + cdef ints_st[::1] tmp nz = 0 fold_ptr[0] = 0 + # Loop on all rows for r in range(nr): rr = r * 2 @@ -222,45 +226,25 @@ def fold_csr_diagonal_nc(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, return FOLD_ptr, FOLD_ncol, FOLD_col[:nz].copy() -# Here we have the int + long -# For some analysis it may be useful -ctypedef fused numeric_complex: - int - long - float - double - float complex - double complex - - def sparse_dense(M): - return _sparse_dense(M.shape, M.ptr, M.ncol, M.col, M._D, M.dtype) + cdef cnp.ndarray dense = np.zeros(M.shape, dtype=M.dtype) + _sparse_dense(M.ptr, M.ncol, M.col, M._D, dense) + return dense @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -@cython.cdivision(True) -def _sparse_dense(shape, - np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, dtype): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - - cdef Py_ssize_t nr = ncol.shape[0] - cdef V = np.zeros(shape, dtype=dtype) - cdef VV = V[:, ::1] - cdef Py_ssize_t r, ind, ix, s2 - - s2 = shape[2] - for r in range(nr): +def _sparse_dense(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] data, + numerics_st[:, :, ::1] dense): + + cdef ints_st r, ind, ix, s2 + + s2 = dense.shape[2] + for r in range(ncol.shape[0]): for ind in range(ptr[r], ptr[r] + ncol[r]): for ix in range(s2): - VV[r, col[ind], ix] += D[ind, ix] - - return V + dense[r, col[ind], ix] += data[ind, ix] diff --git a/src/sisl/_core/sparse.py b/src/sisl/_core/sparse.py index aa75fdc39b..e8cabbb0a4 100644 --- a/src/sisl/_core/sparse.py +++ b/src/sisl/_core/sparse.py @@ -404,7 +404,7 @@ def diags(self, diagonals, offsets=0, dim=None, dtype=None): shape[2] = dim shape = tuple(shape) - offsets = array_fill_repeat(offsets, shape[0], cls=dtype) + offsets = array_fill_repeat(offsets, shape[0], cls=np.int32) # Create the index-pointer, data and values data = array_fill_repeat(diagonals, shape[0], axis=0, cls=dtype) @@ -488,7 +488,7 @@ def finalized(self): """Whether the contained data is finalized and non-used elements have been removed""" return self._finalized - def finalize(self, sort=True): + def finalize(self, sort: bool = True): """Finalizes the sparse matrix by removing all non-set elements One may still interact with the sparse matrix as one would previously. diff --git a/src/sisl/_core/sparse_geometry.py b/src/sisl/_core/sparse_geometry.py index 862eb750f3..45c96f081f 100644 --- a/src/sisl/_core/sparse_geometry.py +++ b/src/sisl/_core/sparse_geometry.py @@ -652,7 +652,7 @@ def create_construct(self, R, params): """ if len(R) != len(params): raise ValueError( - f"{self.__class__.__name__}.create_construct got different lengths of `R` and `param`" + f"{self.__class__.__name__}.create_construct got different lengths of 'R' and 'params'" ) def func(self, ia, atoms, atoms_xyz=None): @@ -1003,15 +1003,17 @@ def unrepeat( atoms = np.arange(self.geometry.na).reshape(-1, reps).T.ravel() return self.sub(atoms).untile(reps, axis, segment, *args, sym=sym, **kwargs) - def finalize(self): + def finalize(self, *args, **kwargs): """Finalizes the model Finalizes the model so that all non-used elements are removed. I.e. this simply reduces the memory requirement for the sparse matrix. - Note that adding more elements to the sparse matrix is more time-consuming than for a non-finalized sparse matrix due to the + Notes + ----- + Adding more elements to the sparse matrix is more time-consuming than for a non-finalized sparse matrix due to the internal data-representation. """ - self._csr.finalize() + self._csr.finalize(*args, **kwargs) def tocsr(self, dim: int = 0, isc=None, **kwargs): """Return a :class:`~scipy.sparse.csr_matrix` for the specified dimension diff --git a/src/sisl/_indices.pxd b/src/sisl/_indices.pxd index 261207e919..5922b5bd71 100644 --- a/src/sisl/_indices.pxd +++ b/src/sisl/_indices.pxd @@ -1,3 +1,17 @@ # Define the interfaces for the functions exposed through cimport -cdef int in_1d(const int[::1] array, const int v) nogil -cdef Py_ssize_t _index_sorted(const int[::1] array, const int v) nogil +from numpy cimport int16_t, int32_t, int64_t + +from sisl._core._dtypes cimport ints_st, ssize_st + + +cdef bint in_1d(const ints_st[::1] array, const ints_st v) noexcept nogil + +ctypedef fused _ints_index_sorted_st: + short + int + long + int16_t + int32_t + int64_t + +cdef ssize_st _index_sorted(const ints_st[::1] array, const _ints_index_sorted_st v) noexcept nogil diff --git a/src/sisl/_indices.pyx b/src/sisl/_indices.pyx index 2c270ee220..3795342a79 100644 --- a/src/sisl/_indices.pyx +++ b/src/sisl/_indices.pyx @@ -2,394 +2,349 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. cimport cython -from libc.math cimport fabs, sqrt +from libc.math cimport fabs, fabsf, sqrt, sqrtf import numpy as np -# This enables Cython enhanced compatibilities +cimport numpy as cnp +from numpy cimport dtype, ndarray -cimport numpy as np +from sisl._core._dtypes cimport floats_st, ints_st, ssize_st, type2dtype @cython.boundscheck(False) @cython.wraparound(False) -def indices_only(np.ndarray[np.int32_t, ndim=1, mode='c'] element, np.ndarray[np.int32_t, ndim=1, mode='c'] test_element): +@cython.initializedcheck(False) +def indices_only(ints_st[::1] element, ints_st[::1] test_element): """ Return indices of all `test_element` in the element array. Parameters ---------- - element : np.ndarray(np.int32) + element : array to search in - test_element : np.ndarray(np.int32) + test_element : values to find the indices of in `element` """ # Ensure contiguous arrays - cdef int[::1] ELEMENT = element - cdef int[::1] TEST_ELEMENT = test_element - cdef Py_ssize_t n_element = ELEMENT.shape[0] - cdef Py_ssize_t n_test_element = TEST_ELEMENT.shape[0] + cdef ssize_st n_element = element.shape[0] + cdef ssize_st n_test_element = test_element.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] idx = np.empty([max(n_test_element, n_element)], dtype=np.int32) - cdef int[::1] IDX = idx + cdef object dtype = type2dtype[ints_st](1) + cdef ndarray[ints_st, mode='c'] IDX = np.empty([max(n_test_element, n_element)], dtype=dtype) + cdef ints_st[::1] idx = IDX - cdef Py_ssize_t n = _indices_only(n_element, ELEMENT, n_test_element, TEST_ELEMENT, IDX) + cdef ssize_st i, j, n - return idx[:n] + n = 0 + with nogil: + # Fast return + if n_test_element == 0: + pass -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -cdef Py_ssize_t _indices_only(const Py_ssize_t n_element, const int[::1] element, - const Py_ssize_t n_test_element, const int[::1] test_element, - int[::1] idx) noexcept nogil: - cdef Py_ssize_t i, j, n + elif n_element == 0: + pass - # Fast return - if n_test_element == 0: - return 0 - elif n_element == 0: - return 0 + elif n_test_element > n_element: + for j in range(n_test_element): + for i in range(n_element): + if test_element[j] == element[i]: + idx[n] = i + n += 1 + break - elif n_test_element > n_element: - n = 0 - for j in range(n_test_element): + else: for i in range(n_element): - if test_element[j] == element[i]: - idx[n] = i - n += 1 - break + for j in range(n_test_element): + if test_element[j] == element[i]: + idx[n] = i + n += 1 + break + + return IDX[:n].copy() - else: - n = 0 - for i in range(n_element): - for j in range(n_test_element): - if test_element[j] == element[i]: - idx[n] = i - n += 1 - break - return n @cython.boundscheck(False) @cython.wraparound(False) -def indices(np.ndarray[np.int32_t, ndim=1, mode='c'] element, np.ndarray[np.int32_t, ndim=1, mode='c'] test_element, int offset=0, both_sorted=False): +@cython.initializedcheck(False) +def indices(ints_st[::1] element, ints_st[::1] test_element, ints_st offset=0, + both_sorted: bool = False): """ Return indices of all `test_element` in the search array. If not found the index will be ``-1`` Parameters ---------- - element : np.ndarray(np.int32) + element : array to search in - test_element : np.ndarray(np.int32) + test_element : values to find the indices of in `element` - offset : int + offset : index offset """ # Ensure contiguous arrays - cdef int[::1] ELEMENT = element - cdef int[::1] TEST_ELEMENT = test_element - cdef Py_ssize_t n_element = ELEMENT.shape[0] - cdef Py_ssize_t n_test_element = TEST_ELEMENT.shape[0] + cdef ssize_st n_element = element.shape[0] + cdef ssize_st n_test_element = test_element.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] idx = np.empty([n_test_element], dtype=np.int32) - cdef int[::1] IDX = idx + cdef object dtype = type2dtype[ints_st](1) + cdef ndarray[ints_st, mode='c'] IDX = np.empty([n_test_element], dtype=dtype) + cdef ints_st[::1] idx = IDX + cdef ssize_st i, j + cdef ints_st ctest_element, celement if offset < 0: raise ValueError(f"indices requires offset argument >=0, got {offset}") - if both_sorted: - _indices_sorted_arrays(n_element, ELEMENT, n_test_element, TEST_ELEMENT, offset, IDX) - else: - _indices(n_element, ELEMENT, n_test_element, TEST_ELEMENT, offset, IDX) - - return idx - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -cdef void _indices(const Py_ssize_t n_element, const int[::1] element, - const Py_ssize_t n_test_element, const int[::1] test_element, - const int offset, int[::1] idx) noexcept nogil: - cdef Py_ssize_t i, j - - # Fast return if n_test_element == 0: + # fast return pass + elif n_element == 0: - for j in range(n_test_element): - idx[j] = -1 - pass - elif n_test_element > n_element: for j in range(n_test_element): - idx[j] = -1 - for i in range(n_element): - if test_element[j] == element[i]: - idx[j] = offset + i - break + idx[j] = -1 + + elif both_sorted: + + i = j = 0 + while (i < n_element) and (j < n_test_element): + celement = element[i] + ctest_element = test_element[j] + if celement == ctest_element: + idx[j] = (i + offset) + j += 1 + elif celement < ctest_element: + i += 1 + elif celement > ctest_element: + idx[j] = -1 + j += 1 + for i in range(j, n_test_element): + idx[i] = -1 else: - # We need to initialize - for j in range(n_test_element): - idx[j] = -1 - for i in range(n_element): + if n_test_element > n_element: for j in range(n_test_element): - if test_element[j] == element[i]: - idx[j] = offset + i - break + idx[j] = -1 + for i in range(n_element): + if test_element[j] == element[i]: + idx[j] = (offset + i) + break + else: + # We need to initialize + for j in range(n_test_element): + idx[j] = -1 + for i in range(n_element): + for j in range(n_test_element): + if test_element[j] == element[i]: + idx[j] = (offset + i) + break -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -cdef void _indices_sorted_arrays(const Py_ssize_t n_element, const int[::1] element, - const Py_ssize_t n_test_element, const int[::1] test_element, - const int offset, int[::1] idx) noexcept nogil: - cdef Py_ssize_t i, j - cdef int ctest_element, celement + return IDX - # Fast return - if n_test_element == 0: - pass - elif n_element == 0: - for j in range(n_test_element): - idx[j] = -1 - return - - i = 0 - j = 0 - while (i < n_element) and (j < n_test_element): - celement = element[i] - ctest_element = test_element[j] - if celement == ctest_element: - idx[j] = i + offset - j += 1 - elif celement < ctest_element: - i += 1 - elif celement > ctest_element: - idx[j] = -1 - j += 1 - for j in range(j, n_test_element): - idx[j] = -1 @cython.boundscheck(False) @cython.wraparound(False) -def indices_in_cylinder(np.ndarray[np.float64_t, ndim=2, mode='c'] dxyz, const double R, const double h): +@cython.initializedcheck(False) +def indices_in_cylinder(floats_st[:, ::1] dxyz, const floats_st R, const floats_st h): """ Indices for all coordinates that are within a cylinde radius `R` and height `h` Parameters ---------- - dxyz : ndarray(np.float64) + dxyz : coordinates centered around the cylinder - R : float + R : radius of cylinder to check - h : float + h : height of cylinder to check Returns ------- - index : np.ndarray(np.int32) + index : indices of all dxyz coordinates that are within the cylinder """ - cdef double[:, ::1] dXYZ = dxyz - cdef Py_ssize_t n = dXYZ.shape[0] - cdef np.ndarray[np.int32_t, ndim=1] idx = np.empty([n], dtype=np.int32) - cdef int[::1] IDX = idx + cdef ssize_st n = dxyz.shape[0] + cdef ssize_st nxyz = dxyz.shape[1] - 1 - n = _indices_in_cylinder(dXYZ, R, h, IDX) + cdef ndarray[int32_t] IDX = np.empty([n], dtype=np.int32) + cdef int[::1] idx = IDX - if n == 0: - return np.empty([0], dtype=np.int32) - return idx[:n].copy() - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -cdef Py_ssize_t _indices_in_cylinder(const double[:, ::1] dxyz, const double R, const double h, int[::1] idx) noexcept nogil: - cdef Py_ssize_t N = dxyz.shape[0] - cdef Py_ssize_t xyz = dxyz.shape[1] - cdef double R2 = R * R - cdef double L2 - cdef Py_ssize_t i, j, n - cdef int skip + cdef floats_st R2 = R * R + cdef floats_st L2 + cdef ssize_st i, j, m + cdef bint skip # Reset number of elements - n = 0 - - for i in range(N): - skip = 0 - for j in range(xyz-1): - skip |= dxyz[i, j] > R - if skip or dxyz[i, xyz-1] > h: continue - - L2 = 0. - for j in range(xyz-1): - L2 += dxyz[i, j] * dxyz[i, j] - if L2 > R2: continue - idx[n] = i - n += 1 - - return n + m = 0 + + with nogil: + for i in range(n): + skip = 0 + for j in range(nxyz): + skip |= dxyz[i, j] > R + if skip or dxyz[i, nxyz] > h: continue + + L2 = 0. + for j in range(nxyz): + L2 += dxyz[i, j] * dxyz[i, j] + if L2 > R2: continue + idx[m] = i + m += 1 + + if m == 0: + return np.empty([0], dtype=np.int32) + return IDX[:m].copy() @cython.boundscheck(False) @cython.wraparound(False) -def indices_in_sphere(np.ndarray[np.float64_t, ndim=2, mode='c'] dxyz, const double R): +@cython.initializedcheck(False) +def indices_in_sphere(floats_st[:, ::1] dxyz, const floats_st R): """ Indices for all coordinates that are within a sphere of radius `R` Parameters ---------- - dxyz : ndarray(np.float64) + dxyz : coordinates centered around the sphere - R : float + R : radius of sphere to check Returns ------- - index : np.ndarray(np.int32) + index: indices of all dxyz coordinates that are within the sphere of radius `R` """ - cdef double[:, ::1] dXYZ = dxyz - cdef Py_ssize_t n = dXYZ.shape[0] - cdef np.ndarray[np.int32_t, ndim=1] idx = np.empty([n], dtype=np.int32) - cdef int[::1] IDX = idx + cdef ssize_st n = dxyz.shape[0] + cdef ndarray[int32_t, mode='c'] IDX = np.empty([n], dtype=np.int32) + cdef int[::1] idx = IDX - n = _indices_in_sphere(dXYZ, R, IDX) + cdef floats_st R2 = R * R + cdef ssize_st i, m - if n == 0: + # Reset number of elements + m = 0 + + with nogil: + for i in range(n): + if all_fabs_le(dxyz, i, R): + if fabs2(dxyz, i) <= R2: + idx[m] = i + m += 1 + if m == 0: return np.empty([0], dtype=np.int32) - return idx[:n].copy() + return IDX[:m].copy() @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef Py_ssize_t _indices_in_sphere(const double[:, ::1] dxyz, const double R, int[::1] idx) noexcept nogil: - cdef Py_ssize_t N = dxyz.shape[0] - cdef Py_ssize_t xyz = dxyz.shape[1] - cdef double R2 = R * R - cdef Py_ssize_t i, n - - # Reset number of elements - n = 0 - - for i in range(N): - if all_fabs_le(dxyz, i, R): - if fabs2(dxyz, i) <= R2: - idx[n] = i - n += 1 - return n - - -@cython.boundscheck(False) -@cython.wraparound(False) -def indices_in_sphere_with_dist(np.ndarray[np.float64_t, ndim=2, mode='c'] dxyz, const double R): +def indices_in_sphere_with_dist(floats_st[:, ::1] dxyz, const floats_st R): """ Indices and the distances for all coordinates that are within a sphere of radius `R` Parameters ---------- - dxyz : ndarray(np.float64) + dxyz : coordinates centered around the sphere R : float radius of sphere to check Returns ------- - index : np.ndarray(np.int32) + index : indices of all dxyz coordinates that are within the sphere of radius `R` - dist : np.ndarray(np.float64) + dist : distances for the coordinates within the sphere of radius `R` (corresponds to `index`) """ - cdef double[:, ::1] dXYZ = dxyz - cdef Py_ssize_t n = dXYZ.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] idx = np.empty([n], dtype=np.int32) - cdef np.ndarray[np.float64_t, ndim=1, mode='c'] dist = np.empty([n], dtype=np.float64) - cdef int[::1] IDX = idx - cdef double[::1] DIST = dist + cdef ssize_st n = dxyz.shape[0] + cdef ndarray[int32_t, mode='c'] IDX = np.empty([n], dtype=np.int32) + cdef object dtype = type2dtype[floats_st](1) + cdef ndarray[floats_st, mode='c'] DIST = np.empty([n], dtype=dtype) + cdef int[::1] idx = IDX + cdef floats_st[::1] dist = DIST + + cdef floats_st R2 = R * R + cdef floats_st d + cdef ssize_st i, m + + with nogil: + + # Reset number of elements + m = 0 + + if floats_st is cython.float: + for i in range(n): + if all_fabs_le(dxyz, i, R): + d = fabs2(dxyz, i) + if d <= R2: + dist[m] = sqrtf(d) + idx[m] = i + m += 1 - n = _indices_in_sphere_with_dist(dXYZ, R, DIST, IDX) + else: + for i in range(n): + if all_fabs_le(dxyz, i, R): + d = fabs2(dxyz, i) + if d <= R2: + dist[m] = sqrt(d) + idx[m] = i + m += 1 - if n == 0: - return np.empty([0], dtype=np.int32), np.empty([0], dtype=np.float64) - return idx[:n].copy(), dist[:n].copy() + if m == 0: + return np.empty([0], dtype=np.int32), np.empty([0], dtype=dtype) + return IDX[:m].copy(), DIST[:m].copy() @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef Py_ssize_t _indices_in_sphere_with_dist(const double[:, ::1] dxyz, const double R, - double[::1] dist, int[::1] idx) noexcept nogil: - cdef Py_ssize_t N = dxyz.shape[0] - cdef double R2 = R * R - cdef double d - cdef Py_ssize_t i, n - - # Reset number of elements - n = 0 - - for i in range(N): - if all_fabs_le(dxyz, i, R): - d = fabs2(dxyz, i) - if d <= R2: - dist[n] = sqrt(d) - idx[n] = i - n += 1 - return n - - -@cython.boundscheck(False) -@cython.wraparound(False) -def indices_le(np.ndarray a, const double V): +def indices_le(ndarray a, const floats_st V): """ Indices for all values in `a` that are ``<= V`` Parameters ---------- - a : np.ndarray(np.float64) + a : array to check if 2D, all last dimension values must be ``<= V`` V : float value that is checked against Returns ------- - index : np.ndarray(np.int32) + index : indices for the values in `a` which are less than or equal to `V` """ - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] idx = np.empty([a.shape[0]], dtype=np.int32) - cdef int[::1] IDX = idx - - cdef Py_ssize_t ndim = a.ndim - cdef double[::1] A1 - cdef double[:, ::1] A2 - cdef Py_ssize_t n + cdef ndarray[int32_t, mode='c'] IDX = np.empty([a.shape[0]], dtype=np.int32) + cdef int[::1] idx = IDX - if a.dtype != np.float64: - raise ValueError('indices_le requires input array to be of float64 type') + cdef ssize_st ndim = a.ndim + cdef floats_st[::1] A1 + cdef floats_st[:, ::1] A2 + cdef ssize_st n if ndim == 1: A1 = a - n = _indices_le1(A1, V, IDX) + n = _indices_le1(A1, V, idx) elif ndim == 2: A2 = a - n = _indices_le2(A2, V, IDX) + n = _indices_le2(A2, V, idx) + + else: + raise NotImplementedError("indices_le not implemented for ndim>2") if n == 0: return np.empty([0], dtype=np.int32) - return idx[:n].copy() + return IDX[:n].copy() @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef Py_ssize_t _indices_le1(const double[::1] a, const double V, int[::1] idx) noexcept nogil: - cdef Py_ssize_t N = a.shape[0] - cdef Py_ssize_t i, n +cdef ssize_st _indices_le1(const floats_st[::1] a, const floats_st V, int[::1] idx) noexcept nogil: + cdef ssize_st N = a.shape[0] + cdef ssize_st i, n n = 0 for i in range(N): if a[i] <= V: - idx[n] = i + idx[n] = i n += 1 return n @@ -397,8 +352,8 @@ cdef Py_ssize_t _indices_le1(const double[::1] a, const double V, int[::1] idx) @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef inline int all_le(const double[:, ::1] a, const Py_ssize_t i, const double V) noexcept nogil: - cdef Py_ssize_t j +cdef inline bint all_le(const floats_st[:, ::1] a, const ssize_st i, const floats_st V) noexcept nogil: + cdef ssize_st j for j in range(a.shape[1]): if a[i, j] > V: return 0 @@ -408,65 +363,66 @@ cdef inline int all_le(const double[:, ::1] a, const Py_ssize_t i, const double @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef Py_ssize_t _indices_le2(const double[:, ::1] a, const double V, int[::1] idx) noexcept nogil: - cdef Py_ssize_t N = a.shape[0] - cdef Py_ssize_t i, n +cdef ssize_st _indices_le2(const floats_st[:, ::1] a, const floats_st V, int[::1] idx) noexcept nogil: + cdef ssize_st N = a.shape[0] + cdef ssize_st i, n n = 0 for i in range(N): if all_le(a, i, V): - idx[n] = i + idx[n] = i n += 1 return n @cython.boundscheck(False) @cython.wraparound(False) -def indices_fabs_le(np.ndarray a, const double V): +@cython.initializedcheck(False) +def indices_fabs_le(ndarray a, const floats_st V): """ Indices for all values in `a` that are ``| | <= V`` Parameters ---------- - a : np.ndarray(np.float64) + a : array to check if 2D, all last dimension values must be ``| | <= V`` - V : float + V : value that is checked against Returns ------- - index : np.ndarray(np.int32) + index : indices for the values in ``|a|`` which are less than or equal to `V` """ - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] idx = np.empty([a.shape[0]], dtype=np.int32) - cdef int[::1] IDX = idx - - cdef Py_ssize_t ndim = a.ndim - cdef double[::1] A1 - cdef double[:, ::1] A2 - cdef Py_ssize_t n + cdef ndarray[int32_t, mode='c'] IDX = np.empty([a.shape[0]], dtype=np.int32) + cdef int[::1] idx = IDX - if a.dtype != np.float64: - raise ValueError('indices_fabs_le requires input array to be of float64 type') + cdef ssize_st ndim = a.ndim + cdef floats_st[::1] A1 + cdef floats_st[:, ::1] A2 + cdef ssize_st n if ndim == 1: A1 = a - n = _indices_fabs_le1(A1, V, IDX) + n = _indices_fabs_le1(A1, V, idx) elif ndim == 2: A2 = a - n = _indices_fabs_le2(A2, V, IDX) + n = _indices_fabs_le2(A2, V, idx) + + else: + raise NotImplementedError("indices_fabs_le not implemented for ndim>2") if n == 0: return np.empty([0], dtype=np.int32) - return idx[:n].copy() + return IDX[:n].copy() @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef inline double fabs2(const double[:, ::1] a, const Py_ssize_t i) noexcept nogil: - cdef Py_ssize_t j - cdef double abs2 - abs2 = 0. +cdef inline floats_st fabs2(const floats_st[:, ::1] a, const ssize_st i) noexcept nogil: + cdef ssize_st j + cdef floats_st abs2 = 0. + for j in range(a.shape[1]): abs2 += a[i, j]*a[i, j] return abs2 @@ -475,117 +431,140 @@ cdef inline double fabs2(const double[:, ::1] a, const Py_ssize_t i) noexcept no @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef Py_ssize_t _indices_fabs_le1(const double[::1] a, const double V, int[::1] idx) noexcept nogil: - cdef Py_ssize_t N = a.shape[0] - cdef Py_ssize_t i, n +cdef ssize_st _indices_fabs_le1(const floats_st[::1] a, const floats_st V, int[::1] idx) noexcept nogil: + cdef ssize_st N = a.shape[0] + cdef ssize_st i, n n = 0 - for i in range(N): - if fabs(a[i]) <= V: - idx[n] = i - n += 1 + if floats_st is cython.float: + for i in range(N): + if fabsf(a[i]) <= V: + idx[n] = i + n += 1 + else: + for i in range(N): + if fabs(a[i]) <= V: + idx[n] = i + n += 1 return n @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef inline int all_fabs_le(const double[:, ::1] a, const Py_ssize_t i, const double V) noexcept nogil: - cdef Py_ssize_t j - for j in range(a.shape[1]): - if fabs(a[i, j]) > V: - return 0 +cdef inline bint all_fabs_le(const floats_st[:, ::1] a, const ssize_st i, const floats_st V) noexcept nogil: + cdef ssize_st j + + if floats_st is cython.float: + for j in range(a.shape[1]): + if fabsf(a[i, j]) > V: + return 0 + + else: + for j in range(a.shape[1]): + if fabs(a[i, j]) > V: + return 0 + return 1 @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef int _indices_fabs_le2(const double[:, ::1] a, const double V, int[::1] idx) noexcept nogil: - cdef Py_ssize_t N = a.shape[0] - cdef Py_ssize_t i, n +cdef ssize_st _indices_fabs_le2(const floats_st[:, ::1] a, const floats_st V, int[::1] idx) noexcept nogil: + cdef ssize_st N = a.shape[0] + cdef ssize_st i, n n = 0 for i in range(N): if all_fabs_le(a, i, V): - idx[n] = i + idx[n] = i n += 1 + return n + @cython.boundscheck(False) @cython.wraparound(False) -def indices_gt_le(np.ndarray a, const double V1, const double V2): - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] idx = np.empty([a.shape[0]], dtype=np.int32) - cdef int[::1] IDX = idx - - cdef Py_ssize_t ndim = a.ndim - cdef double[::1] A1 - cdef double[:, ::1] A2 - cdef Py_ssize_t n +@cython.initializedcheck(False) +def indices_gt_le(ndarray a, const floats_st V1, const floats_st V2): + cdef ndarray[int32_t, mode='c'] IDX = np.empty([a.shape[0]], dtype=np.int32) + cdef int[::1] idx = IDX - if a.dtype != np.float64: - raise ValueError('indices_gt_le requires input array to be of float64 type') + cdef ssize_st ndim = a.ndim + cdef floats_st[::1] A1 + cdef floats_st[:, ::1] A2 + cdef ssize_st n if ndim == 1: A1 = a - n = _indices_gt_le1(A1, V1, V2, IDX) + n = _indices_gt_le1(A1, V1, V2, idx) elif ndim == 2: A2 = a - n = _indices_gt_le2(A2, V1, V2, IDX) + n = _indices_gt_le2(A2, V1, V2, idx) + + else: + raise NotImplementedError("indices_gt_le not implemented for ndim>2") if n == 0: return np.empty([0], dtype=np.int32) - return idx[:n].copy() + + return IDX[:n].copy() @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef Py_ssize_t _indices_gt_le1(const double[::1] a, const double V1, const double V2, int[::1] idx) noexcept nogil: - cdef Py_ssize_t N = a.shape[0] - cdef Py_ssize_t i, n +cdef ssize_st _indices_gt_le1(const floats_st[::1] a, const floats_st V1, const floats_st + V2, int[::1] idx) noexcept nogil: + cdef ssize_st N = a.shape[0] + cdef ssize_st i, n n = 0 for i in range(N): if V1 < a[i]: if a[i] <= V2: - idx[n] = i + idx[n] = i n += 1 + return n -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -cdef inline int all_gt_le(const double[:, ::1] a, const Py_ssize_t i, const double V1, const double V2) noexcept nogil: - cdef Py_ssize_t j - for j in range(a.shape[1]): - if a[i, j] <= V1: - return 0 - elif V2 < a[i, j]: - return 0 - return 1 - @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef Py_ssize_t _indices_gt_le2(const double[:, ::1] a, const double V1, const double V2, int[::1] idx) noexcept nogil: - cdef Py_ssize_t N = a.shape[0] - cdef Py_ssize_t i, n +cdef ssize_st _indices_gt_le2(const floats_st[:, ::1] a, const floats_st V1, const floats_st + V2, int[::1] idx) noexcept nogil: + cdef ssize_st N = a.shape[0] + cdef ssize_st i, n n = 0 for i in range(N): if all_gt_le(a, i, V1, V2): - idx[n] = i + idx[n] = i n += 1 + return n @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef inline int in_1d(const int[::1] array, const int v) noexcept nogil: - cdef Py_ssize_t N = array.shape[0] - cdef Py_ssize_t i +cdef inline bint all_gt_le(const floats_st[:, ::1] a, const ssize_st i, const floats_st V1, + const floats_st V2) noexcept nogil: + cdef ssize_st j + for j in range(a.shape[1]): + if a[i, j] <= V1: + return 0 + elif V2 < a[i, j]: + return 0 + return 1 + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +cdef inline bint in_1d(const ints_st[::1] array, const ints_st v) noexcept nogil: + cdef ssize_st N = array.shape[0] + cdef ssize_st i for i in range(N): if array[i] == v: return 1 @@ -594,14 +573,15 @@ cdef inline int in_1d(const int[::1] array, const int v) noexcept nogil: @cython.boundscheck(False) @cython.wraparound(False) -def index_sorted(np.ndarray[np.int32_t, ndim=1, mode='c'] a, const int v): +@cython.initializedcheck(False) +def index_sorted(ints_st[::1] a, const ints_st v): """ Return index for the value v in a sorted array, otherwise return -1 Parameters ---------- - a : int[::1] + a : sorted array to check - v : int + v : value to find Returns @@ -609,60 +589,63 @@ def index_sorted(np.ndarray[np.int32_t, ndim=1, mode='c'] a, const int v): int : -1 if not found, otherwise the first index in `a` that is equal to `v` """ # Ensure contiguous arrays - cdef int[::1] A = a - return _index_sorted(A, v) + return _index_sorted(a, v) + +# This small code needs all variants +# The variants are declared in the _indices.pxd file @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef Py_ssize_t _index_sorted(const int[::1] a, const int v) noexcept nogil: +@cython.cdivision(True) +cdef ssize_st _index_sorted(const ints_st[::1] a, const _ints_index_sorted_st v) noexcept nogil: """ Return index for the value v in a sorted array, otherwise return -1 This implements a binary search method Parameters ---------- - a : int[::1] + a : sorted array to check - v : int + v : value to find Returns ------- int : 0 if not unique, otherwise 1. """ - cdef Py_ssize_t i, L, R + cdef ssize_st MIN1 = -1 + cdef ssize_st i, L, R # Simple binary search + R = a.shape[0] - 1 + if R == -1: + return MIN1 + elif a[R] < v: + return MIN1 + L = 0 - R = a.shape[0] - if R == 0: - return -1 - elif v < a[L]: - return -1 - - while L < R: - i = (L + R) // 2 + while L <= R: + i = (L + R) / 2 if a[i] < v: L = i + 1 - elif a[i] == v: - return i + elif v < a[i]: + R = i - 1 else: - R = i - if a[R] == v: - return R - return -1 + return i + return MIN1 @cython.boundscheck(False) @cython.wraparound(False) -def sorted_unique(np.ndarray[np.int32_t, ndim=1, mode='c'] a): +@cython.initializedcheck(False) +def is_sorted_unique(ints_st[::1] a): """ Return True/False if all elements of the sorted array `a` are unique Parameters ---------- - a : np.ndarray(np.int32) + a : sorted array to check Returns @@ -670,31 +653,24 @@ def sorted_unique(np.ndarray[np.int32_t, ndim=1, mode='c'] a): int : 0 if not unique, otherwise 1. """ # Ensure contiguous arrays - cdef int[::1] A = a - cdef Py_ssize_t n = A.shape[0] + cdef ssize_st n = a.shape[0] + cdef ssize_st i, ret = 1 + + if n > 1: + # only check for larger than 1 arrays + with nogil: + for i in range(n - 1): + if a[i] == a[i+1]: + ret = 0 + break + return ret - return _sorted_unique(n, A) @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef int _sorted_unique(const Py_ssize_t n_a, const int[::1] a) noexcept nogil: - cdef Py_ssize_t i - - # Fast return - if n_a <= 1: - return 1 - - for i in range(n_a - 1): - if a[i] == a[i+1]: - return 0 - return 1 - - -@cython.boundscheck(False) -@cython.wraparound(False) -def list_index_le(np.ndarray[np.int32_t, ndim=1, mode='c'] a, np.ndarray[np.int32_t, ndim=1, mode='c'] b): +def list_index_le(ints_st[::1] a, ints_st[::1] b): """ Find indices for each ``a`` such that the returned ``a[i] <= b[ret[i]]`` where `b` is assumed sorted This corresponds to: @@ -704,34 +680,25 @@ def list_index_le(np.ndarray[np.int32_t, ndim=1, mode='c'] a, np.ndarray[np.int3 Parameters ---------- - a : np.ndarray(np.int32) + a : values to check indicies of - b : np.ndarray(np.int32) + b : sorted array to check against Returns ------- - np.ndarray(np.int32): same length as `a` with indicies + indices with same length as `a` """ # Ensure contiguous arrays - cdef int[::1] A = a - cdef int[::1] B = b - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] c = np.empty([A.shape[0]], dtype=np.int32) - cdef int[::1] C = c + cdef ssize_st na = a.shape[0] + cdef ssize_st nb = b.shape[0] + cdef object dtype = type2dtype[ints_st](1) + cdef ndarray[ints_st] C = np.empty([na], dtype=dtype) + cdef ints_st[::1] c = C - _list_index_le(A, B, C) - return c - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -cdef inline void _list_index_le(const int[::1] a, const int[::1] b, int[::1] c) noexcept nogil: - cdef Py_ssize_t na = a.shape[0] - cdef Py_ssize_t nb = b.shape[0] - cdef Py_ssize_t ia, ib - cdef int ai, alast - cdef Py_ssize_t start = 0 + cdef ssize_st ia, ib + cdef ints_st ai, alast + cdef ssize_st start = 0 if na > 0: alast = a[0] @@ -743,6 +710,8 @@ cdef inline void _list_index_le(const int[::1] a, const int[::1] b, int[::1] c) alast = ai for ib in range(start, nb): if ai <= b[ib]: - c[ia] = ib + c[ia] = ib start = ib break + + return C diff --git a/src/sisl/_math_small.pyx b/src/sisl/_math_small.pyx index 5b8cd9b15a..c5e7b39e01 100644 --- a/src/sisl/_math_small.pyx +++ b/src/sisl/_math_small.pyx @@ -1,46 +1,51 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# file, you can obtain one at https://mozilla.org/MPL/2.0/. cimport cython -from libc.math cimport atan2, sqrt +from libc.math cimport atan2, atan2f, sqrt, sqrtf import numpy as np -# This enables Cython enhanced compatibilities +from numpy cimport dtype, ndarray -cimport numpy as np +from sisl._core._dtypes cimport floats_st, ssize_st, type2dtype @cython.boundscheck(False) @cython.wraparound(False) -def cross3(np.ndarray[np.float64_t, ndim=1, mode='c'] u, np.ndarray[np.float64_t, ndim=1, mode='c'] v): - cdef np.ndarray[np.float64_t, ndim=1, mode='c'] y = np.empty([3], dtype=np.float64) +def cross3(const floats_st[::1] u, const floats_st[::1] v): + cdef object dtyp = type2dtype[floats_st](1) + cdef ndarray[floats_st, mode='c'] Y = np.empty([3], dtype=dtyp) + cdef floats_st[::1] y = Y y[0] = u[1] * v[2] - u[2] * v[1] y[1] = u[2] * v[0] - u[0] * v[2] y[2] = u[0] * v[1] - u[1] * v[0] - return y + return Y @cython.boundscheck(False) @cython.wraparound(False) -def dot3(np.ndarray[np.float64_t, ndim=1, mode='c'] u, np.ndarray[np.float64_t, ndim=1, mode='c'] v): - return u[0] * v[0] + u[1] * v[1] + u[2] * v[2] +def dot3(const floats_st[::1] u, const floats_st[::1] v): + cdef floats_st r + r = u[0] * v[0] + u[1] * v[1] + u[2] * v[2] + return r @cython.boundscheck(False) @cython.wraparound(False) -def product3(np.ndarray[np.float64_t, ndim=1, mode='c'] v): - return v[0] * v[1] * v[2] +def product3(const floats_st[::1] v): + cdef floats_st r + r = v[0] * v[1] * v[2] + return r @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -def is_ascending(np.ndarray[np.float64_t, ndim=1, mode='c'] v): - cdef double[::1] V = v - cdef Py_ssize_t i - for i in range(1, V.shape[0]): - if V[i-1] > V[i]: +def is_ascending(const floats_st[::1] v): + cdef ssize_st i + for i in range(1, v.shape[0]): + if v[i-1] > v[i]: return 0 return 1 @@ -49,26 +54,37 @@ def is_ascending(np.ndarray[np.float64_t, ndim=1, mode='c'] v): @cython.wraparound(False) @cython.initializedcheck(False) @cython.cdivision(True) -def xyz_to_spherical_cos_phi(np.ndarray[np.float64_t, ndim=1, mode='c'] x, - np.ndarray[np.float64_t, ndim=1, mode='c'] y, - np.ndarray[np.float64_t, ndim=1, mode='c'] z): +def xyz_to_spherical_cos_phi(floats_st[::1] x, + floats_st[::1] y, + floats_st[::1] z): """ In x, y, z coordinates shifted to origo Returns x = R, y = theta, z = cos_phi """ - cdef double[::1] X = x - cdef double[::1] Y = y - cdef double[::1] Z = z - cdef Py_ssize_t i - cdef double R - for i in range(X.shape[0]): - # theta (radians) - R = sqrt(X[i] * X[i] + Y[i] * Y[i] + Z[i] * Z[i]) - Y[i] = atan2(Y[i], X[i]) - # Radius - X[i] = R - # cos(phi) - if R > 0.: - Z[i] = Z[i] / R - else: - Z[i] = 0. + cdef ssize_st i + cdef floats_st R + + if floats_st is cython.float: + for i in range(x.shape[0]): + # theta (radians) + R = sqrtf(x[i] * x[i] + y[i] * y[i] + z[i] * z[i]) + y[i] = atan2f(y[i], x[i]) + # Radius + x[i] = R + # cos(phi) + if R > 0.: + z[i] = z[i] / R + else: + z[i] = 0. + else: + for i in range(x.shape[0]): + # theta (radians) + R = sqrt(x[i] * x[i] + y[i] * y[i] + z[i] * z[i]) + y[i] = atan2(y[i], x[i]) + # Radius + x[i] = R + # cos(phi) + if R > 0.: + z[i] = z[i] / R + else: + z[i] = 0. diff --git a/src/sisl/io/siesta/_help.py b/src/sisl/io/siesta/_help.py index 7d3c47244d..d7ac67585d 100644 --- a/src/sisl/io/siesta/_help.py +++ b/src/sisl/io/siesta/_help.py @@ -11,7 +11,7 @@ __all__ = ["_siesta_sc_off"] __all__ += ["_csr_from_siesta", "_csr_from_sc_off"] __all__ += ["_csr_to_siesta", "_csr_to_sc_off"] -__all__ += ["_mat_spin_convert", "_fc_correct"] +__all__ += ["_mat_sisl2siesta", "_mat_siesta2sisl", "_fc_correct"] def _siesta_sc_off(nsc): @@ -98,45 +98,179 @@ def _csr_from(col_from, csr): csr.translate_columns(col_from, col_to) -def _mat_spin_convert(M, spin=None): +def _mat2dtype(M, dtype: np.dtype) -> None: + """Change the internal CSR matrix in `M` to a follow `dtype`""" + + if M.dtype == dtype: + return M + + spin = M.spin + csr = M._csr + shape = csr._D.shape + + # Change details + old_dtype = np.dtype(M.dtype) + new_dtype = np.dtype(dtype) + + def toc(D, re, im): + return (D[..., re] + 1j * D[..., im]).astype(dtype, copy=False) + + if old_dtype.kind in ("f", "i"): + if new_dtype.kind in ("f", "i"): + # this is just simple casting + csr._D = csr._D.astype(dtype) + elif new_dtype.kind == "c": + # we need to *collect it + if spin.is_diagonal: + # this is just simple casting, + # each diagonal component has its own index + csr._D = csr._D.astype(dtype) + elif spin.is_noncolinear: + D = np.empty(shape[:-1] + (shape[-1] - 1,), dtype=dtype) + # These should be real only anyways! + D[..., [0, 1]] = csr._D[..., [0, 1]].real.astype(dtype) + D[..., 2] = toc(csr._D, 2, 3) + if D.shape[-1] > 4: + D[..., 3:] = csr._D[..., 4:].astype(dtype) + csr._D = D + elif spin.is_spinorbit: + D = np.empty(shape[:-1] + (shape[-1] - 4,), dtype=dtype) + D[..., 0] = toc(csr._D, 0, 4) + D[..., 1] = toc(csr._D, 1, 5) + D[..., 2] = toc(csr._D, 2, 3) + D[..., 3] = toc(csr._D, 6, 7) + if D.shape[-1] > 4: + D[..., 4:] = csr._D[..., 8:].astype(dtype) + csr._D = D + else: + raise NotImplementedError + else: + raise NotImplementedError + + elif old_dtype.kind == "c": + if new_dtype.kind == "c": + # this is just simple casting + csr._D = csr._D.astype(dtype) + elif new_dtype.kind in ("f", "i"): + # we need to *collect it + if spin.is_diagonal: + # this is just simple casting, + # each diagonal component has its own index + csr._D = csr._D.astype(dtype) + elif spin.is_noncolinear: + D = np.empty(shape[:-1] + (shape[-1] + 1,), dtype=dtype) + # These should be real only anyways! + D[..., [0, 1]] = csr._D[..., [0, 1]].real.astype(dtype) + D[..., 2] = csr._D[..., 2].real.astype(dtype) + D[..., 3] = csr._D[..., 2].imag.astype(dtype) + if D.shape[-1] > 4: + D[..., 4:] = csr._D[..., 3:].real.astype(dtype) + csr._D = D + elif spin.is_spinorbit: + D = np.empty(shape[:-1] + (shape[-1] + 4,), dtype=dtype) + D[..., 0] = csr._D[..., 0].real.astype(dtype) + D[..., 1] = csr._D[..., 1].real.astype(dtype) + D[..., 2] = csr._D[..., 2].real.astype(dtype) + D[..., 3] = csr._D[..., 2].imag.astype(dtype) + D[..., 4] = csr._D[..., 0].imag.astype(dtype) + D[..., 5] = csr._D[..., 1].imag.astype(dtype) + D[..., 6] = csr._D[..., 3].real.astype(dtype) + D[..., 7] = csr._D[..., 3].imag.astype(dtype) + if D.shape[-1] > 8: + D[..., 8:] = csr._D[..., 4:].real.astype(dtype) + csr._D = D + else: + raise NotImplementedError + else: + raise NotImplementedError + M._reset() + + +def _mat_siesta2sisl(M, dtype: Optional[np.dtype] = None) -> None: """Conversion of Siesta spin matrices to sisl spin matrices The matrices from Siesta are given in a format adheering to the following - concept: + concept. + + There are two cases: + + 1. A non-colinear calculation: + + Siesta uses this convention: - A non-colinear calculation has the following entries (in C-index) for - the sparse matrix: + H[:, [0, 1, 2, 3]] + H11 == H[:, 0] + H22 == H[:, 1] + H12 == H[:, 2] - 1j H[:, 3] # spin-box Hermitian + H21 == H[:, 2] + 1j H[:, 3] - H[:, [0, 1, 2, 3]] - H11 == H[:, 0] - H22 == H[:, 1] - H12 == H[:, 2] - 1j H[:, 3] # spin-box Hermitian - H21 == H[:, 2] + 1j H[:, 3] + In sisl we use this convention, see `Hamiltonian`: - Although it really does not make sense to change anything, we - do change it to adhere to the spin-orbit case (see below). - I.e. what Siesta *saves* is the -Im[H12], which we now store - as Im[H12]. + H11 == H[:, 0] + H22 == H[:, 1] + H12 == H[:, 2] + 1j H[:, 3] # spin-box Hermitian + H21 == H[:, 2] - 1j H[:, 3] + 2. A spin-orbit calculation: - A spin-orbit calculation has the following entries (in C-index) for - the sparse matrix: + Siesta uses this convention: - H[:, [0, 1, 2, 3, 4, 5, 6, 7]] - H11 == H[:, 0] + 1j H[:, 4] - H22 == H[:, 1] + 1j H[:, 5] - H12 == H[:, 2] + 1j H[:, 3] # spin-box Hermitian - H21 == H[:, 6] + 1j H[:, 7] + H[:, [0, 1, 2, 3, 4, 5, 6, 7]] + H11 == H[:, 0] + 1j H[:, 4] + H22 == H[:, 1] + 1j H[:, 5] + H12 == H[:, 2] - 1j H[:, 3] + H21 == H[:, 6] + 1j H[:, 7] + + In sisl we use this convention, see `Hamiltonian`: + + H[:, [0, 1, 2, 3, 4, 5, 6, 7]] + H11 == H[:, 0] + 1j H[:, 4] + H22 == H[:, 1] + 1j H[:, 5] + H12 == H[:, 2] + 1j H[:, 3] + H21 == H[:, 6] + 1j H[:, 7] + + On top of this it depends on whether the data-type is complex + or not. """ - if spin is None: - if M.spin.is_noncolinear: + if dtype is None: + dtype = M.dtype + + spin = M.spin + + if spin.is_noncolinear: + if np.dtype(M.dtype).kind in ("f", "i"): M._csr._D[:, 3] = -M._csr._D[:, 3] - elif M.spin.is_spinorbit: + else: + M._csr._D[:, 2] = M._csr._D[:, 2].conj() + elif spin.is_spinorbit: + if np.dtype(M.dtype).kind in ("f", "i"): + M._csr._D[:, 3] = -M._csr._D[:, 3] + else: + M._csr._D[:, 2] = M._csr._D[:, 2].conj() + + _mat2dtype(M, dtype) + + +def _mat_sisl2siesta(M, dtype: Optional[np.dtype] = None) -> None: + """Conversion of sisl to Siesta spin matrices""" + if dtype is None: + dtype = M.dtype + + # convert to float + _mat2dtype(M, dtype) + + spin = M.spin + + if spin.is_noncolinear: + if np.dtype(M.dtype).kind in ("f", "i"): M._csr._D[:, 3] = -M._csr._D[:, 3] - elif spin.is_noncolinear: - M._D[:, 3] = -M._D[:, 3] + else: + M._csr._D[:, 2] = M._csr._D[:, 2].conj() elif spin.is_spinorbit: - M._D[:, 3] = -M._D[:, 3] + if np.dtype(M.dtype).kind in ("f", "i"): + M._csr._D[:, 3] = -M._csr._D[:, 3] + else: + M._csr._D[:, 2] = M._csr._D[:, 2].conj() def _geom2hsx(geometry): diff --git a/src/sisl/io/siesta/binaries.py b/src/sisl/io/siesta/binaries.py index b33164b638..fd7224b038 100644 --- a/src/sisl/io/siesta/binaries.py +++ b/src/sisl/io/siesta/binaries.py @@ -396,6 +396,8 @@ def read_hamiltonian(self, geometry=None, **kwargs) -> Hamiltonian: ) # Check whether it is an orthogonal basis set + # TODO, this is not an exhaustive test, but is *fine* for most + # cases orthogonal = np.abs(dS).sum() == geom.no # Create the Hamiltonian container @@ -418,7 +420,7 @@ def read_hamiltonian(self, geometry=None, **kwargs) -> Hamiltonian: H._csr._D[:, :spin] = dH[:, :] * _Ry2eV H._csr._D[:, spin] = dS[:] - _mat_spin_convert(H) + _mat_siesta2sisl(H, dtype=kwargs.get("dtype")) # Convert to sisl supercell # equivalent as _csr_from_siesta with explicit isc from file @@ -442,7 +444,8 @@ def write_hamiltonian(self, H, **kwargs): """Writes the Hamiltonian to a siesta.TSHS file""" # we sort below, so no need to do it here # see onlysSileSiesta.read_overlap for .transpose() - csr = H.transpose(spin=False, sort=False)._csr + H = H.transpose(spin=False, sort=False) + csr = H._csr if csr.nnz == 0: raise SileError( f"{self!r}.write_hamiltonian cannot write " @@ -454,7 +457,7 @@ def write_hamiltonian(self, H, **kwargs): # Convert to siesta CSR _csr_to_siesta(H.geometry, csr, diag=True) csr.finalize(sort=sort) - _mat_spin_convert(csr, H.spin) + _mat_sisl2siesta(H, dtype=np.float64) # Extract the data to pass to the fortran routine cell = H.geometry.cell @@ -566,7 +569,7 @@ def read_density_matrix(self, **kwargs) -> DensityMatrix: # DM file does not contain overlap matrix... so neglect it for now. DM._csr._D[:, spin] = 0.0 - _mat_spin_convert(DM) + _mat_siesta2sisl(DM, dtype=kwargs.get("dtype")) # Convert the supercells to sisl supercells if nsc[0] != 0 or geom.no_s >= col.max(): @@ -584,7 +587,8 @@ def read_density_matrix(self, **kwargs) -> DensityMatrix: def write_density_matrix(self, DM, **kwargs): """Writes the density matrix to a siesta.DM file""" - csr = DM.transpose(spin=False, sort=False)._csr + DM = DM.transpose(spin=False, sort=False) + csr = DM._csr # This ensures that we don"t have any *empty* elements if csr.nnz == 0: raise SileError( @@ -596,7 +600,8 @@ def write_density_matrix(self, DM, **kwargs): # We do not really need to sort this one, but we do for consistency # of the interface. csr.finalize(sort=kwargs.get("sort", True)) - _mat_spin_convert(csr, DM.spin) + + _mat_sisl2siesta(DM, dtype=np.float64) # Get DM if DM.orthogonal: @@ -674,7 +679,7 @@ def read_energy_density_matrix(self, **kwargs) -> EnergyDensityMatrix: # EDM file does not contain overlap matrix... so neglect it for now. EDM._csr._D[:, spin] = 0.0 - _mat_spin_convert(EDM) + _mat_siesta2sisl(EDM, dtype=kwargs.get("dtype")) # Convert the supercells to sisl supercells if nsc[0] != 0 or geom.no_s >= col.max(): @@ -704,7 +709,7 @@ def read_fermi_level(self) -> float: self._fortran_check("read_fermi_level", "could not read fermi-level.") return Ef - def write_density_matrices(self, DM, EDM, Ef=0.0, **kwargs): + def write_density_matrices(self, DM, EDM, Ef: float = 0.0, **kwargs): r"""Writes the density matrix to a siesta.DM file Parameters @@ -713,31 +718,32 @@ def write_density_matrices(self, DM, EDM, Ef=0.0, **kwargs): density matrix to write to the file EDM : EnergyDensityMatrix energy density matrix to write to the file - Ef : float, optional + Ef : fermi-level to be contained """ - DMcsr = DM.transpose(spin=False, sort=False)._csr - EDMcsr = EDM.transpose(spin=False, sort=False)._csr - DMcsr.align(EDMcsr) - EDMcsr.align(DMcsr) + sort = kwargs.get("sort", True) + DM = DM.transpose(spin=False, sort=sort) + EDM = EDM.transpose(spin=False, sort=sort) + DM._csr.align(EDM._csr) + EDM._csr.align(DM._csr) - if DMcsr.nnz == 0: + if DM._csr.nnz == 0: raise SileError( f"{self!r}.write_density_matrices cannot write " "a zero element sparse matrix!" ) - _csr_to_siesta(DM.geometry, DMcsr) - _csr_to_siesta(DM.geometry, EDMcsr) - sort = kwargs.get("sort", True) - DMcsr.finalize(sort=sort) - EDMcsr.finalize(sort=sort) - _mat_spin_convert(DMcsr, DM.spin) - _mat_spin_convert(EDMcsr, EDM.spin) + _csr_to_siesta(DM.geometry, DM._csr) + _csr_to_siesta(DM.geometry, EDM._csr) + DM._csr.finalize(sort=sort) + EDM._csr.finalize(sort=sort) + _mat_sisl2siesta(DM, dtype=np.float64) + _mat_sisl2siesta(EDM, dtype=np.float64) # Ensure everything is correct if not ( - np.allclose(DMcsr.ncol, EDMcsr.ncol) and np.allclose(DMcsr.col, EDMcsr.col) + np.allclose(DM._csr.ncol, EDM._csr.ncol) + and np.allclose(DM._csr.col, EDM._csr.col) ): raise ValueError( f"{self!r}.write_density_matrices got non compatible " @@ -745,21 +751,21 @@ def write_density_matrices(self, DM, EDM, Ef=0.0, **kwargs): ) if DM.orthogonal: - dm = DMcsr._D + dm = DM._csr._D else: - dm = DMcsr._D[:, : DM.S_idx] + dm = DM._csr._D[:, : DM.S_idx] if EDM.orthogonal: - edm = EDMcsr._D + edm = EDM._csr._D else: - edm = EDMcsr._D[:, : EDM.S_idx] + edm = EDM._csr._D[:, : EDM.S_idx] nsc = DM.geometry.lattice.nsc.astype(np.int32) _siesta.write_tsde_dm_edm( self.file, nsc, - DMcsr.ncol, - DMcsr.col + 1, + DM._csr.ncol, + DM._csr.col + 1, _toF(dm, np.float64), _toF(edm, np.float64, _eV2Ry), Ef * _eV2Ry, @@ -1348,7 +1354,7 @@ def _r_hamiltonian_v0(self, **kwargs): ) # Create the Hamiltonian container - H = Hamiltonian(geom, spin, nnzpr=1, dtype=np.float32, orthogonal=False) + H = Hamiltonian(geom, spin, nnzpr=1, orthogonal=False) # Create the new sparse matrix H._csr.ncol = ncol.astype(np.int32, copy=False) @@ -1361,7 +1367,7 @@ def _r_hamiltonian_v0(self, **kwargs): H._csr._D[:, :spin] = dH[:, :] * _Ry2eV H._csr._D[:, spin] = dS[:] - _mat_spin_convert(H) + _mat_siesta2sisl(H, dtype=kwargs.get("dtype")) # Convert the supercells to sisl supercells if no_s // no == np.prod(geom.nsc): @@ -1392,7 +1398,7 @@ def _r_hamiltonian_v1(self, **kwargs): ) # Create the Hamiltonian container - H = Hamiltonian(geom, spin, nnzpr=1, dtype=np.float32, orthogonal=False) + H = Hamiltonian(geom, spin, nnzpr=1, orthogonal=False) # Create the new sparse matrix H._csr.ncol = ncol.astype(np.int32, copy=False) @@ -1406,7 +1412,7 @@ def _r_hamiltonian_v1(self, **kwargs): H._csr._D[:, :spin] = dH[:, :] * _Ry2eV H._csr._D[:, spin] = dS[:] - _mat_spin_convert(H) + _mat_siesta2sisl(H, dtype=kwargs.get("dtype")) # Convert the supercells to sisl supercells _csr_from_sc_off(H.geometry, isc.T, H._csr) @@ -1440,7 +1446,7 @@ def _r_overlap_v0(self, **kwargs): ) # Create the Hamiltonian container - S = Overlap(geom, nnzpr=1, dtype=np.float32) + S = Overlap(geom, nnzpr=1) # Create the new sparse matrix S._csr.ncol = ncol.astype(np.int32, copy=False) diff --git a/src/sisl/io/siesta/siesta_nc.py b/src/sisl/io/siesta/siesta_nc.py index 91eb35eefa..16d795e53d 100644 --- a/src/sisl/io/siesta/siesta_nc.py +++ b/src/sisl/io/siesta/siesta_nc.py @@ -250,11 +250,11 @@ def read_hamiltonian(self, **kwargs) -> Hamiltonian: H._csr._D[:, i] = sp.variables["H"][i, :] * Ry2eV # fix siesta specific notation - _mat_spin_convert(H) + _mat_siesta2sisl(H, dtype=kwargs.get("dtype")) # Shift to the Fermi-level - Ef = -self._value("Ef")[:] * Ry2eV - H.shift(Ef) + Ef = self._value("Ef")[:] * Ry2eV + H.shift(-Ef) return H.transpose(spin=False, sort=kwargs.get("sort", True)) @@ -285,7 +285,7 @@ def read_density_matrix(self, **kwargs) -> DensityMatrix: DM._csr._D[:, i] = sp.variables["DM"][i, :] # fix siesta specific notation - _mat_spin_convert(DM) + _mat_siesta2sisl(DM, dtype=kwargs.get("dtype")) return DM.transpose(spin=False, sort=kwargs.get("sort", True)) @@ -305,7 +305,7 @@ def read_energy_density_matrix(self, **kwargs) -> EnergyDensityMatrix: EDM._csr._D[:, i] -= sp.variables["DM"][i, :] * Ef[i] # fix siesta specific notation - _mat_spin_convert(EDM) + _mat_siesta2sisl(EDM, dtype=kwargs.get("dtype")) return EDM.transpose(spin=False, sort=kwargs.get("sort", True)) @@ -613,7 +613,8 @@ def write_hamiltonian(self, H, **kwargs): Ef : float, optional the Fermi level of the electronic structure (in eV), default to 0. """ - csr = H.transpose(spin=False, sort=False)._csr + H = H.transpose(spin=False, sort=False) + csr = H._csr if csr.nnz == 0: raise SileError( f"{self}.write_hamiltonian cannot write a zero element sparse matrix!" @@ -622,7 +623,8 @@ def write_hamiltonian(self, H, **kwargs): # Convert to siesta CSR _csr_to_siesta(H.geometry, csr) csr.finalize(sort=kwargs.get("sort", True)) - _mat_spin_convert(csr, H.spin) + + _mat_sisl2siesta(H, dtype=np.float64) # Ensure that the geometry is written self.write_geometry(H.geometry) @@ -671,7 +673,8 @@ def write_density_matrix(self, DM, **kwargs): DM : DensityMatrix the model to be saved in the NC file """ - csr = DM.transpose(spin=False, sort=False)._csr + DM = DM.transpose(spin=False, sort=False) + csr = DM._csr if csr.nnz == 0: raise SileError( f"{self}.write_density_matrix cannot write a zero element sparse matrix!" @@ -680,7 +683,7 @@ def write_density_matrix(self, DM, **kwargs): # Convert to siesta CSR (we don't need to sort this matrix) _csr_to_siesta(DM.geometry, csr) csr.finalize(sort=kwargs.get("sort", True)) - _mat_spin_convert(csr, DM.spin) + _mat_sisl2siesta(DM, dtype=np.float64) # Ensure that the geometry is written self.write_geometry(DM.geometry) @@ -728,7 +731,8 @@ def write_energy_density_matrix(self, EDM, **kwargs): EDM : EnergyDensityMatrix the model to be saved in the NC file """ - csr = EDM.transpose(spin=False, sort=False)._csr + EDM = EDM.transpose(spin=False, sort=False) + csr = EDM._csr if csr.nnz == 0: raise SileError( f"{self}.write_energy_density_matrix cannot write a zero element sparse matrix!" @@ -737,7 +741,7 @@ def write_energy_density_matrix(self, EDM, **kwargs): # no need to sort this matrix _csr_to_siesta(EDM.geometry, csr) csr.finalize(sort=kwargs.get("sort", True)) - _mat_spin_convert(csr, EDM.spin) + _mat_sisl2siesta(EDM, dtype=np.float64) # Ensure that the geometry is written self.write_geometry(EDM.geometry) diff --git a/src/sisl/io/siesta/tests/test_matrices.py b/src/sisl/io/siesta/tests/test_matrices.py new file mode 100644 index 0000000000..e43c0b710d --- /dev/null +++ b/src/sisl/io/siesta/tests/test_matrices.py @@ -0,0 +1,116 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +from __future__ import annotations + +import numpy as np +import pytest + +import sisl +from sisl.io.siesta._help import _mat2dtype + +pytestmark = [pytest.mark.io, pytest.mark.siesta] + +listify = sisl.utils.listify + + +@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize( + "matrix,ext", + (map(lambda x: ("Hamiltonian", x), ["nc", "TSHS"]) | listify) + + (map(lambda x: ("DensityMatrix", x), ["nc", "DM"]) | listify) + + (map(lambda x: ("EnergyDensityMatrix", x), ["nc"]) | listify), +) +@pytest.mark.parametrize("read_dtype", [np.float64, np.complex128]) +@pytest.mark.parametrize("dtype", [np.float32, np.float64, np.complex128]) +def test_non_colinear(sisl_tmp, sort, matrix, ext, dtype, read_dtype): + if ext == "nc": + pytest.importorskip("netCDF4") + + M = getattr(sisl, matrix)(sisl.geom.graphene(), spin=sisl.Spin("NC"), dtype=dtype) + if np.issubdtype(dtype, np.complexfloating): + onsite = [0.1 + 0j, 0.2 + 0j, 0.3 + 0.4j] + nn = [0.2, 0.3, 0.4 + 0.5j] + else: + onsite = [0.1, 0.2, 0.3, 0.4] + nn = [0.2, 0.3, 0.4, 0.5] + M.construct(([0.1, 1.44], [onsite, nn])) + + f1 = sisl_tmp(f"M1.{ext}") + f2 = sisl_tmp(f"M2.{ext}") + M.write(f1, sort=sort) + M.finalize() + with sisl.get_sile(f1) as sile: + M2 = M.read(sile, dtype=read_dtype) + M2.write(f2, sort=sort) + with sisl.get_sile(f2) as sile: + M3 = M2.read(sile, dtype=read_dtype) + + if sort: + M.finalize(sort=sort) + assert M._csr.spsame(M2._csr) + assert M._csr.spsame(M3._csr) + + from sisl.io.siesta._help import _mat2dtype + + # Convert to the same dtype + _mat2dtype(M2, dtype) + _mat2dtype(M3, dtype) + if M.orthogonal and not M2.orthogonal: + assert np.allclose(M._csr._D, M2._csr._D[..., :-1]) + else: + assert np.allclose(M._csr._D, M2._csr._D) + if M.orthogonal and not M3.orthogonal: + assert np.allclose(M._csr._D, M3._csr._D[..., :-1]) + else: + assert np.allclose(M._csr._D, M3._csr._D) + + +@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize( + "matrix,ext", + (map(lambda x: ("Hamiltonian", x), ["nc", "TSHS"]) | listify) + + (map(lambda x: ("DensityMatrix", x), ["nc", "DM"]) | listify) + + (map(lambda x: ("EnergyDensityMatrix", x), ["nc"]) | listify), +) +@pytest.mark.parametrize("read_dtype", [np.float64, np.complex128]) +@pytest.mark.parametrize("dtype", [np.float32, np.float64, np.complex128]) +def test_spin_orbit(sisl_tmp, sort, matrix, ext, dtype, read_dtype): + if ext == "nc": + pytest.importorskip("netCDF4") + + M = getattr(sisl, matrix)(sisl.geom.graphene(), spin=sisl.Spin("SO"), dtype=dtype) + if np.issubdtype(dtype, np.complexfloating): + onsite = [0.1 + 0j, 0.2 + 0j, 0.3 + 0.4j, 0.3 - 0.4j] + nn = [0.2 + 0.1j, 0.3 + 0.3j, 0.4 + 0.5j, 0.4 - 0.5j] + else: + onsite = [0.1, 0.2, 0.3, 0.4, 0, 0, 0.3, -0.4] + nn = [0.2, 0.3, 0.4, 0.5, 0.1, 0.3, 0.4, -0.5] + M.construct(([0.1, 1.44], [onsite, nn])) + + f1 = sisl_tmp(f"M1.{ext}") + f2 = sisl_tmp(f"M2.{ext}") + M.write(f1, sort=sort) + M.finalize() + with sisl.get_sile(f1) as sile: + M2 = M.read(sile, dtype=read_dtype) + M2.write(f2, sort=sort) + with sisl.get_sile(f2) as sile: + M3 = M2.read(sile, dtype=read_dtype) + + if sort: + M.finalize(sort=sort) + assert M._csr.spsame(M2._csr) + assert M._csr.spsame(M3._csr) + + # Convert to the same dtype + _mat2dtype(M2, dtype) + _mat2dtype(M3, dtype) + if M.orthogonal and not M2.orthogonal: + assert np.allclose(M._csr._D, M2._csr._D[..., :-1]) + else: + assert np.allclose(M._csr._D, M2._csr._D) + if M.orthogonal and not M3.orthogonal: + assert np.allclose(M._csr._D, M3._csr._D[..., :-1]) + else: + assert np.allclose(M._csr._D, M3._csr._D) diff --git a/src/sisl/io/siesta/tests/test_siesta.py b/src/sisl/io/siesta/tests/test_siesta.py index 6f62ec52ee..e959fe6ed3 100644 --- a/src/sisl/io/siesta/tests/test_siesta.py +++ b/src/sisl/io/siesta/tests/test_siesta.py @@ -161,99 +161,6 @@ def test_nc_density_matrix(sisl_tmp, sisl_system): assert sisl_system.g.atoms.equal(ndm.atoms, R=False) -def test_nc_H_non_colinear(sisl_tmp): - H1 = Hamiltonian(sisl.geom.graphene(), spin=sisl.Spin("NC")) - H1.construct(([0.1, 1.44], [[0.1, 0.2, 0.3, 0.4], [0.2, 0.3, 0.4, 0.5]])) - - f1 = sisl_tmp("H1.nc") - f2 = sisl_tmp("H2.nc") - H1.write(f1) - H1.finalize() - with sisl.get_sile(f1) as sile: - H2 = sile.read_hamiltonian() - H2.write(f2) - with sisl.get_sile(f2) as sile: - H3 = sile.read_hamiltonian() - assert H1._csr.spsame(H2._csr) - assert np.allclose(H1._csr._D, H2._csr._D) - assert H1._csr.spsame(H3._csr) - assert np.allclose(H1._csr._D, H3._csr._D) - - -def test_nc_DM_non_colinear(sisl_tmp): - DM1 = DensityMatrix(sisl.geom.graphene(), spin=sisl.Spin("NC")) - DM1.construct(([0.1, 1.44], [[0.1, 0.2, 0.3, 0.4], [0.2, 0.3, 0.4, 0.5]])) - - f1 = sisl_tmp("DM1.nc") - f2 = sisl_tmp("DM2.nc") - DM1.write(f1) - DM1.finalize() - with sisl.get_sile(f1) as sile: - DM2 = sile.read_density_matrix() - DM2.write(f2) - with sisl.get_sile(f2) as sile: - DM3 = sile.read_density_matrix() - assert DM1._csr.spsame(DM2._csr) - assert DM1._csr.spsame(DM3._csr) - # DM1 is finalized, but DM2 is not finalized - assert np.allclose(DM1._csr._D, DM2._csr._D) - # DM2 and DM3 are the same - assert np.allclose(DM2._csr._D, DM3._csr._D) - DM2.finalize() - assert np.allclose(DM1._csr._D, DM2._csr._D) - - -def test_nc_EDM_non_colinear(sisl_tmp): - EDM1 = EnergyDensityMatrix(sisl.geom.graphene(), spin=sisl.Spin("NC")) - EDM1.construct(([0.1, 1.44], [[0.1, 0.2, 0.3, 0.4], [0.2, 0.3, 0.4, 0.5]])) - - f1 = sisl_tmp("EDM1.nc") - f2 = sisl_tmp("EDM2.nc") - EDM1.write(f1, sort=False) - EDM1.finalize() - with sisl.get_sile(f1) as sile: - EDM2 = sile.read_energy_density_matrix(sort=False) - EDM2.write(f2, sort=False) - with sisl.get_sile(f2) as sile: - EDM3 = sile.read_energy_density_matrix(sort=False) - assert EDM1._csr.spsame(EDM2._csr) - assert EDM1._csr.spsame(EDM3._csr) - # EDM1 is finalized, but EDM2 is not finalized - assert not np.allclose(EDM1._csr._D, EDM2._csr._D) - # EDM2 and EDM3 are the same - assert np.allclose(EDM2._csr._D, EDM3._csr._D) - EDM2.finalize() - assert np.allclose(EDM1._csr._D, EDM2._csr._D) - - -@pytest.mark.filterwarnings("ignore", message="*is NOT Hermitian for on-site") -def test_nc_H_spin_orbit(sisl_tmp): - H1 = Hamiltonian(sisl.geom.graphene(), spin=sisl.Spin("SO")) - H1.construct( - ( - [0.1, 1.44], - [ - [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8], - [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], - ], - ) - ) - - f1 = sisl_tmp("H1.nc") - f2 = sisl_tmp("H2.nc") - H1.write(f1) - H1.finalize() - with sisl.get_sile(f1) as sile: - H2 = sile.read_hamiltonian() - H2.write(f2) - with sisl.get_sile(f2) as sile: - H3 = sile.read_hamiltonian() - assert H1._csr.spsame(H2._csr) - assert np.allclose(H1._csr._D, H2._csr._D) - assert H1._csr.spsame(H3._csr) - assert np.allclose(H1._csr._D, H3._csr._D) - - @pytest.mark.filterwarnings("ignore", message="*is NOT Hermitian for on-site") def test_nc_H_spin_orbit_nc2tshs2nc(sisl_tmp): H1 = Hamiltonian(sisl.geom.graphene(), spin=sisl.Spin("SO")) @@ -282,34 +189,6 @@ def test_nc_H_spin_orbit_nc2tshs2nc(sisl_tmp): assert np.allclose(H1._csr._D, H3._csr._D) -@pytest.mark.filterwarnings("ignore", message="*is NOT Hermitian for on-site") -def test_nc_DM_spin_orbit(sisl_tmp): - DM1 = DensityMatrix(sisl.geom.graphene(), spin=sisl.Spin("SO")) - DM1.construct( - ( - [0.1, 1.44], - [ - [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8], - [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], - ], - ) - ) - - f1 = sisl_tmp("DM1.nc") - f2 = sisl_tmp("DM2.nc") - DM1.write(f1) - DM1.finalize() - with sisl.get_sile(f1) as sile: - DM2 = sile.read_density_matrix() - DM2.write(f2) - with sisl.get_sile(f2) as sile: - DM3 = sile.read_density_matrix() - assert DM1._csr.spsame(DM2._csr) - assert np.allclose(DM1._csr._D, DM2._csr._D) - assert DM1._csr.spsame(DM3._csr) - assert np.allclose(DM1._csr._D, DM3._csr._D) - - @pytest.mark.filterwarnings("ignore", message="*is NOT Hermitian for on-site") def test_nc_DM_spin_orbit_nc2dm2nc(sisl_tmp): DM1 = DensityMatrix(sisl.geom.graphene(), orthogonal=False, spin=sisl.Spin("SO")) diff --git a/src/sisl/io/siesta/tests/test_tsde.py b/src/sisl/io/siesta/tests/test_tsde.py index 58822a25f2..e251b3468c 100644 --- a/src/sisl/io/siesta/tests/test_tsde.py +++ b/src/sisl/io/siesta/tests/test_tsde.py @@ -48,6 +48,34 @@ def test_si_pdos_kgrid_tsde_edm(sisl_files): assert np.allclose(EDM1._csr._D[:, :-1], EDM2._csr._D[:, :-1]) +@pytest.mark.filterwarnings("ignore", message="*Casting complex values") +@pytest.mark.parametrize(("matrix"), ["density", "energy_density"]) +def test_si_pdos_kgrid_tsde_edm_dtypes(sisl_files, sisl_tmp, matrix): + fdf = sisl.get_sile( + sisl_files("siesta", "Si_pdos_k", "Si_pdos.fdf"), + base=sisl_files("siesta", "Si_pdos_k"), + ) + data = [] + mull = None + + for dtype in (np.float32, np.float64, np.complex64, np.complex128): + M = getattr(fdf, f"read_{matrix}_matrix")(dtype=dtype) + data.append(M) + assert M.dtype == dtype + + if mull is None: + mull = M.mulliken() + else: + assert np.allclose(mull, M.mulliken(), atol=1e-5) + + fnc = sisl_tmp("tmp.nc") + for M in data: + M.write(fnc) + # The overlap should be here... + M1 = M.read(fnc) + assert np.allclose(mull, M1.mulliken(), atol=1e-5) + + @pytest.mark.filterwarnings("ignore", message="*wrong sparse pattern") def test_si_pdos_kgrid_tsde_dm_edm_rw(sisl_files, sisl_tmp): fdf = sisl.get_sile( diff --git a/src/sisl/io/siesta/tests/test_tshs.py b/src/sisl/io/siesta/tests/test_tshs.py index 6ece2f9e16..20e6fc4759 100644 --- a/src/sisl/io/siesta/tests/test_tshs.py +++ b/src/sisl/io/siesta/tests/test_tshs.py @@ -28,6 +28,34 @@ def test_tshs_si_pdos_kgrid(sisl_files, sisl_tmp): assert np.allclose(HS1._csr._D, HS2._csr._D) +@pytest.mark.filterwarnings("ignore", message="*Casting complex values") +def test_tshs_si_pdos_dtypes_eigs(sisl_files, sisl_tmp): + si = sisl.get_sile(sisl_files("siesta", "Si_pdos_k", "Si_pdos.TSHS")) + data = [] + eigs = None + k = [0.1] * 3 + for dtype in (np.float32, np.float64, np.complex64, np.complex128): + HS = si.read_hamiltonian(dtype=dtype) + data.append(HS) + assert HS.dtype == dtype + + if eigs is None: + eigs = HS.eigh(k) + else: + assert np.allclose(eigs, HS.eigh(k), atol=1e-5) + + f = sisl_tmp("tmp.TSHS") + fnc = sisl_tmp("tmp.nc") + for HS in data: + HS.write(f) + HS1 = HS.read(f) + assert np.allclose(eigs, HS1.eigh(k), atol=1e-5) + + HS.write(fnc) + HS1 = HS.read(fnc) + assert np.allclose(eigs, HS1.eigh(k), atol=1e-5) + + def test_tshs_si_pdos_kgrid_tofromnc(sisl_files, sisl_tmp): pytest.importorskip("netCDF4") si = sisl.get_sile(sisl_files("siesta", "Si_pdos_k", "Si_pdos.TSHS")) @@ -80,6 +108,34 @@ def test_tshs_soc_pt2_xx(sisl_files, sisl_tmp): assert np.allclose(HS1._csr._D, HS2._csr._D) +@pytest.mark.filterwarnings("ignore", message="*Casting complex values") +def test_tshs_soc_pt2_xx_dtypes(sisl_files, sisl_tmp): + fdf = sisl.get_sile(sisl_files("siesta", "Pt2_soc", "Pt2.fdf")) + data = [] + eigs = None + k = [0.1] * 3 + for dtype in (np.float32, np.float64, np.complex64, np.complex128): + HS = fdf.read_hamiltonian(dtype=dtype) + data.append(HS) + assert HS.dtype == dtype + + if eigs is None: + eigs = HS.eigh(k) + else: + assert np.allclose(eigs, HS.eigh(k), atol=1e-5) + + f = sisl_tmp("tmp.TSHS") + fnc = sisl_tmp("tmp.nc") + for HS in data: + HS.write(f) + HS1 = HS.read(f) + assert np.allclose(eigs, HS1.eigh(k), atol=1e-5) + + HS.write(fnc) + HS1 = HS.read(fnc) + assert np.allclose(eigs, HS1.eigh(k), atol=1e-5) + + def test_tshs_soc_pt2_xx_pdos(sisl_files): fdf = sisl.get_sile(sisl_files("siesta", "Pt2_soc", "Pt2.fdf")) sc = fdf.read_lattice(order="TSHS") @@ -137,32 +193,6 @@ def test_tshs_si_pdos_kgrid_overlap(sisl_files): assert np.allclose(HS._csr._D[:, HS.S_idx], S._csr._D[:, 0]) -@pytest.mark.filterwarnings("ignore", message="*is NOT Hermitian for on-site") -def test_tshs_spin_orbit(sisl_tmp): - H1 = sisl.Hamiltonian(sisl.geom.graphene(), spin=sisl.Spin("SO")) - H1.construct( - ( - [0.1, 1.44], - [ - [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8], - [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], - ], - ) - ) - - f1 = sisl_tmp("tmp1.TSHS") - f2 = sisl_tmp("tmp2.TSHS") - H1.write(f1) - H1.finalize() - H2 = sisl.get_sile(f1).read_hamiltonian() - H2.write(f2) - H3 = sisl.get_sile(f2).read_hamiltonian() - assert H1._csr.spsame(H2._csr) - assert np.allclose(H1._csr._D, H2._csr._D) - assert H1._csr.spsame(H3._csr) - assert np.allclose(H1._csr._D, H3._csr._D) - - @pytest.mark.filterwarnings("ignore", message="*is NOT Hermitian for on-site") def test_tshs_spin_orbit_tshs2nc2tshs(sisl_tmp): pytest.importorskip("netCDF4") diff --git a/src/sisl/io/sile.py b/src/sisl/io/sile.py index 7469c2e72f..de9c41012f 100644 --- a/src/sisl/io/sile.py +++ b/src/sisl/io/sile.py @@ -1303,7 +1303,7 @@ def __getattr__(self, attr): exe = Path(sys.executable).name msg = f"Could not import netCDF4. Please install it using '{exe} -m pip install netCDF4'" - raise SileError(msg) from e + raise SileError(msg) netCDF4 = _mock_netCDF4() diff --git a/src/sisl/io/tbtrans/delta.py b/src/sisl/io/tbtrans/delta.py index d2c0594eb9..8a0955dc28 100644 --- a/src/sisl/io/tbtrans/delta.py +++ b/src/sisl/io/tbtrans/delta.py @@ -21,7 +21,8 @@ from ..siesta._help import ( _csr_from_sc_off, _csr_to_siesta, - _mat_spin_convert, + _mat_siesta2sisl, + _mat_sisl2siesta, _siesta_sc_off, ) from ..sile import SileError, add_sile, sile_raise_write @@ -436,7 +437,8 @@ def write_delta(self, delta, **kwargs): The input options for `TBtrans`_ determine whether this is a self-energy term or a Hamiltonian term. """ - csr = delta._csr.copy() + out_delta = delta.copy() + csr = out_delta._csr if csr.nnz == 0: raise SileError( f"{self!s}.write_overlap cannot write a zero element sparse matrix!" @@ -446,7 +448,7 @@ def write_delta(self, delta, **kwargs): _csr_to_siesta(delta.geometry, csr, diag=False) # delta should always write sorted matrices csr.finalize(sort=True) - _mat_spin_convert(csr, delta.spin) + _mat_sisl2siesta(out_delta) # Ensure that the geometry is written self.write_geometry(delta.geometry) @@ -557,9 +559,9 @@ def write_delta(self, delta, **kwargs): csize[-1] = csr.nnz if delta.spin.kind > delta.spin.POLARIZED: - print(delta.spin) raise ValueError( - f"{self.__class__.__name__}.write_delta only allows spin-polarized delta values" + f"{self.__class__.__name__}.write_delta only allows spin-polarized " + f"delta values, got {delta.spin!s}" ) if delta.dtype.kind == "c": @@ -667,7 +669,7 @@ def _r_class(self, cls, **kwargs): # Convert from isc to sisl isc _csr_from_sc_off(C.geometry, lvl.variables["isc_off"][:, :], C._csr) - _mat_spin_convert(C) + _mat_siesta2sisl(C, dtype=kwargs.get("dtype")) return C diff --git a/src/sisl/physics/CMakeLists.txt b/src/sisl/physics/CMakeLists.txt index a144553ee0..f5eb34a534 100644 --- a/src/sisl/physics/CMakeLists.txt +++ b/src/sisl/physics/CMakeLists.txt @@ -4,15 +4,14 @@ set_property(DIRECTORY APPEND PROPERTY INCLUDE_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/../_core ) -foreach(source +foreach(source _bloch _phase _matrix_utils _matrix_k _matrix_dk _matrix_ddk - _matrix_phase _matrix_phase_nc_diag _matrix_phase_nc _matrix_phase_so - _matrix_phase3 _matrix_phase3_nc _matrix_phase3_so - _matrix_sc_phase _matrix_sc_phase_nc_diag _matrix_sc_phase_nc _matrix_sc_phase_so + _matrix_phase _matrix_phase_sc _matrix_phase3 ) add_cython_library( SOURCE ${source}.pyx diff --git a/src/sisl/physics/_matrix_ddk.pyx b/src/sisl/physics/_matrix_ddk.pyx index d1fa01cc41..fc83ac76df 100644 --- a/src/sisl/physics/_matrix_ddk.pyx +++ b/src/sisl/physics/_matrix_ddk.pyx @@ -2,26 +2,19 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. cimport cython -from libc.math cimport fabs import numpy as np - -cimport numpy as np +cimport numpy as cnp from ._common import comply_gauge +from sisl._core._dtypes cimport floats_st from ._matrix_phase3 import * -from ._matrix_phase3_nc import * -from ._matrix_phase3_so import * -from ._matrix_phase_nc_diag import * from ._phase import * -_dot = np.dot -_roll = np.roll - __all__ = ["matrix_ddk", "matrix_ddk_nc", "matrix_ddk_nc_diag", "matrix_ddk_so"] -def _phase_ddk(gauge, M, sc, np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype): +def _phase_ddk(gauge, M, sc, cnp.ndarray[floats_st] k, dtype): # dtype *must* be passed through phase_dtype gauge = comply_gauge(gauge) @@ -34,10 +27,10 @@ def _phase_ddk(gauge, M, sc, np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype # Rd = dx^2, dy^2, dz^2, dzy, dxz, dyx if gauge == 'cell': phases = phase_rsc(sc, k, dtype).reshape(-1, 1) - Rs = _dot(sc.sc_off, sc.cell) + Rs = np.dot(sc.sc_off, sc.cell) Rd = - (Rs * Rs * phases).astype(dtype, copy=False) - Ro = - (_roll(Rs, 1, axis=1) * phases).astype(dtype, copy=False) # z, x, y - Ro *= _roll(Rs, -1, axis=1) # y, z, x + Ro = - (np.roll(Rs, 1, axis=1) * phases).astype(dtype, copy=False) # z, x, y + Ro *= np.roll(Rs, -1, axis=1) # y, z, x del phases, Rs p_opt = 1 @@ -46,151 +39,73 @@ def _phase_ddk(gauge, M, sc, np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype rij = M.Rij()._csr._D phases = phase_rij(rij, sc, k, dtype).reshape(-1, 1) Rd = - (rij * rij * phases).astype(dtype, copy=False) - Ro = - (_roll(rij, 1, axis=1) * phases).astype(dtype, copy=False) # z, x, y - Ro *= _roll(rij, -1, axis=1) # y, z, x + Ro = - (np.roll(rij, 1, axis=1) * phases).astype(dtype, copy=False) # z, x, y + Ro *= np.roll(rij, -1, axis=1) # y, z, x del rij, phases - p_opt = 1 + p_opt = 0 return p_opt, Rd, Ro -def matrix_ddk(gauge, M, const int idx, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): +def matrix_ddk(gauge, M, const int idx, sc, cnp.ndarray[floats_st] k, dtype, format): dtype = phase_dtype(k, M.dtype, dtype) p_opt, Rd, Ro = _phase_ddk(gauge, M, sc, k, dtype) - return _matrix_ddk(M._csr, idx, Rd, Ro, dtype, format, p_opt) - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_ddk(csr, const int idx, Rd, Ro, dtype, format, p_opt): # Return list dd = [None, None, None, None, None, None] - if dtype == np.complex128: - - if format in ("array", "matrix", "dense"): - dd[:3] = _phase3_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rd, p_opt) - dd[3:] = _phase3_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ro, p_opt) - - else: - # Default must be something else. - dd[:3] = _phase3_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rd, p_opt) - dd[3:] = _phase3_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ro, p_opt) - dd[0] = dd[0].asformat(format) - dd[1] = dd[1].asformat(format) - dd[2] = dd[2].asformat(format) - dd[3] = dd[3].asformat(format) - dd[4] = dd[4].asformat(format) - dd[5] = dd[5].asformat(format) - - elif dtype == np.float64: - if format in ("array", "matrix", "dense"): - dd[:3] = _phase3_array_f64(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rd, p_opt) - dd[3:] = _phase3_array_f64(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ro, p_opt) - else: - dd[:3] = _phase3_csr_f64(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rd, p_opt) - dd[3:] = _phase3_csr_f64(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ro, p_opt) - dd[0] = dd[0].asformat(format) - dd[1] = dd[1].asformat(format) - dd[2] = dd[2].asformat(format) - dd[3] = dd[3].asformat(format) - dd[4] = dd[4].asformat(format) - dd[5] = dd[5].asformat(format) - - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - dd[:3] = _phase3_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rd, p_opt) - dd[3:] = _phase3_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ro, p_opt) - else: - dd[:3] = _phase3_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rd, p_opt) - dd[3:] = _phase3_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ro, p_opt) - dd[0] = dd[0].asformat(format) - dd[1] = dd[1].asformat(format) - dd[2] = dd[2].asformat(format) - dd[3] = dd[3].asformat(format) - dd[4] = dd[4].asformat(format) - dd[5] = dd[5].asformat(format) - - elif dtype == np.float32: - if format in ("array", "matrix", "dense"): - dd[:3] = _phase3_array_f32(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rd, p_opt) - dd[3:] = _phase3_array_f32(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ro, p_opt) - else: - dd[:3] = _phase3_csr_f32(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rd, p_opt) - dd[3:] = _phase3_csr_f32(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ro, p_opt) - dd[0] = dd[0].asformat(format) - dd[1] = dd[1].asformat(format) - dd[2] = dd[2].asformat(format) - dd[3] = dd[3].asformat(format) - dd[4] = dd[4].asformat(format) - dd[5] = dd[5].asformat(format) + csr = M._csr + + if format in ("array", "matrix", "dense"): + dd[:3] = _phase3_array(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rd, p_opt) + dd[3:] = _phase3_array(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ro, p_opt) else: - raise ValueError("matrix_ddk: currently only supports dtype in [float32, float64, complex64, complex128].") + # Default must be something else. + dd[:3] = _phase3_csr(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rd, p_opt) + dd[3:] = _phase3_csr(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ro, p_opt) + dd[0] = dd[0].asformat(format) + dd[1] = dd[1].asformat(format) + dd[2] = dd[2].asformat(format) + dd[3] = dd[3].asformat(format) + dd[4] = dd[4].asformat(format) + dd[5] = dd[5].asformat(format) return dd -def matrix_ddk_nc(gauge, M, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): +def matrix_ddk_nc(gauge, M, sc, cnp.ndarray[floats_st] k, dtype, format): dtype = phase_dtype(k, M.dtype, dtype, True) p_opt, Rd, Ro = _phase_ddk(gauge, M, sc, k, dtype) - return _matrix_ddk_nc(M._csr, Rd, Ro, dtype, format, p_opt) - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_ddk_nc(csr, Rd, Ro, dtype, format, p_opt): # Return list dd = [None, None, None, None, None, None] - if dtype == np.complex128: - - if format in ("array", "matrix", "dense"): - dd[:3] = _phase3_nc_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) - dd[3:] = _phase3_nc_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) - - else: - # Default must be something else. - dd[:3] = _phase3_nc_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) - dd[3:] = _phase3_nc_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) - dd[0] = dd[0].asformat(format) - dd[1] = dd[1].asformat(format) - dd[2] = dd[2].asformat(format) - dd[3] = dd[3].asformat(format) - dd[4] = dd[4].asformat(format) - dd[5] = dd[5].asformat(format) - - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - dd[:3] = _phase3_nc_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) - dd[3:] = _phase3_nc_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) - else: - dd[:3] = _phase3_nc_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) - dd[3:] = _phase3_nc_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) - dd[0] = dd[0].asformat(format) - dd[1] = dd[1].asformat(format) - dd[2] = dd[2].asformat(format) - dd[3] = dd[3].asformat(format) - dd[4] = dd[4].asformat(format) - dd[5] = dd[5].asformat(format) + csr = M._csr + + if format in ("array", "matrix", "dense"): + dd[:3] = _phase3_array_nc(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) + dd[3:] = _phase3_array_nc(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) else: - raise ValueError("matrix_ddk_nc: currently only supports dtype in [complex64, complex128].") + # Default must be something else. + dd[:3] = _phase3_csr_nc(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) + dd[3:] = _phase3_csr_nc(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) + dd[0] = dd[0].asformat(format) + dd[1] = dd[1].asformat(format) + dd[2] = dd[2].asformat(format) + dd[3] = dd[3].asformat(format) + dd[4] = dd[4].asformat(format) + dd[5] = dd[5].asformat(format) return dd -def matrix_ddk_nc_diag(gauge, M, const int idx, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): +def matrix_ddk_nc_diag(gauge, M, const int idx, sc, cnp.ndarray[floats_st] k, dtype, format): dtype = phase_dtype(k, M.dtype, dtype, True) p_opt, Rd, Ro = _phase_ddk(gauge, M, sc, k, dtype) + # We need the phases to be consecutive in memory Rxx = Rd[:, 0].copy() Ryy = Rd[:, 1].copy() Rzz = Rd[:, 2].copy() @@ -200,84 +115,49 @@ def matrix_ddk_nc_diag(gauge, M, const int idx, sc, Ryx = Ro[:, 2].copy() del Ro - # Get each of them - dxx = _matrix_ddk_nc_diag(M._csr, idx, Rxx, dtype, format, p_opt) - dyy = _matrix_ddk_nc_diag(M._csr, idx, Ryy, dtype, format, p_opt) - dzz = _matrix_ddk_nc_diag(M._csr, idx, Rzz, dtype, format, p_opt) - dzy = _matrix_ddk_nc_diag(M._csr, idx, Rzy, dtype, format, p_opt) - dxz = _matrix_ddk_nc_diag(M._csr, idx, Rxz, dtype, format, p_opt) - dyx = _matrix_ddk_nc_diag(M._csr, idx, Ryx, dtype, format, p_opt) - return dxx, dyy, dzz, dzy, dxz, dyx + csr = M._csr + if format in ("array", "matrix", "dense"): + dxx = _phase_array_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rxx, p_opt) + dyy = _phase_array_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ryy, p_opt) + dzz = _phase_array_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rzz, p_opt) + dzy = _phase_array_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rzy, p_opt) + dxz = _phase_array_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rxz, p_opt) + dyx = _phase_array_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ryx, p_opt) -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_ddk_nc_diag(csr, const int idx, phases, dtype, format, p_opt): - - if dtype == np.complex128: - - if format in ("array", "matrix", "dense"): - return _phase_nc_diag_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt) - - # Default must be something else. - return _phase_nc_diag_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt).asformat(format) - - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - return _phase_nc_diag_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt) - return _phase_nc_diag_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt).asformat(format) + else: + dxx = _phase_csr_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rxx, p_opt).asformat(format) + dyy = _phase_csr_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ryy, p_opt).asformat(format) + dzz = _phase_csr_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rzz, p_opt).asformat(format) + dzy = _phase_csr_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rzy, p_opt).asformat(format) + dxz = _phase_csr_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Rxz, p_opt).asformat(format) + dyx = _phase_csr_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, Ryx, p_opt).asformat(format) - raise ValueError("matrix_ddk_nc_diag: only supports dtype in [complex64, complex128].") + return dxx, dyy, dzz, dzy, dxz, dyx -def matrix_ddk_so(gauge, M, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): +def matrix_ddk_so(gauge, M, sc, cnp.ndarray[floats_st] k, dtype, format): dtype = phase_dtype(k, M.dtype, dtype, True) p_opt, Rd, Ro = _phase_ddk(gauge, M, sc, k, dtype) - return _matrix_ddk_so(M._csr, Rd, Ro, dtype, format, p_opt) - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_ddk_so(csr, Rd, Ro, dtype, format, p_opt): # Return list dd = [None, None, None, None, None, None] - if dtype == np.complex128: - - if format in ("array", "matrix", "dense"): - dd[:3] = _phase3_so_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) - dd[3:] = _phase3_so_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) - - else: - # Default must be something else. - dd[:3] = _phase3_so_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) - dd[3:] = _phase3_so_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) - dd[0] = dd[0].asformat(format) - dd[1] = dd[1].asformat(format) - dd[2] = dd[2].asformat(format) - dd[3] = dd[3].asformat(format) - dd[4] = dd[4].asformat(format) - dd[5] = dd[5].asformat(format) - - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - dd[:3] = _phase3_so_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) - dd[3:] = _phase3_so_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) - else: - dd[:3] = _phase3_so_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) - dd[3:] = _phase3_so_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) - dd[0] = dd[0].asformat(format) - dd[1] = dd[1].asformat(format) - dd[2] = dd[2].asformat(format) - dd[3] = dd[3].asformat(format) - dd[4] = dd[4].asformat(format) - dd[5] = dd[5].asformat(format) + csr = M._csr + + if format in ("array", "matrix", "dense"): + dd[:3] = _phase3_array_so(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) + dd[3:] = _phase3_array_so(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) else: - raise ValueError("matrix_ddk_so: currently only supports dtype in [complex64, complex128].") + # Default must be something else. + dd[:3] = _phase3_csr_so(csr.ptr, csr.ncol, csr.col, csr._D, Rd, p_opt) + dd[3:] = _phase3_csr_so(csr.ptr, csr.ncol, csr.col, csr._D, Ro, p_opt) + dd[0] = dd[0].asformat(format) + dd[1] = dd[1].asformat(format) + dd[2] = dd[2].asformat(format) + dd[3] = dd[3].asformat(format) + dd[4] = dd[4].asformat(format) + dd[5] = dd[5].asformat(format) return dd diff --git a/src/sisl/physics/_matrix_dk.pyx b/src/sisl/physics/_matrix_dk.pyx index 0523e7a8ba..3a937f3495 100644 --- a/src/sisl/physics/_matrix_dk.pyx +++ b/src/sisl/physics/_matrix_dk.pyx @@ -2,108 +2,73 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. cimport cython -from libc.math cimport fabs import numpy as np -cimport numpy as np +cimport numpy as cnp + +from sisl._core._dtypes cimport floats_st, ints_st from ._common import comply_gauge +from ._matrix_phase import * from ._matrix_phase3 import * -from ._matrix_phase3_nc import * -from ._matrix_phase3_so import * -from ._matrix_phase_nc_diag import * from ._phase import * -_dot = np.dot - __all__ = ["matrix_dk", "matrik_dk_nc", "matrik_dk_nc_diag", "matrik_dk_so"] -def _phase_dk(gauge, M, sc, np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype): +def _phase_dk(gauge, M, sc, cnp.ndarray[floats_st] k, dtype): # dtype *must* be passed through phase_dtype gauge = comply_gauge(gauge) # This is the differentiated matrix with respect to k # See _phase.pyx, we are using exp(i k.R/r) # i R - if gauge == 'cell': - iRs = phase_rsc(sc, k, dtype).reshape(-1, 1) - iRs = (1j * _dot(sc.sc_off, sc.cell) * iRs).astype(dtype, copy=False) - p_opt = 1 - elif gauge == 'atom': + if gauge == 'atom': M.finalize() rij = M.Rij()._csr._D iRs = (1j * rij * phase_rij(rij, sc, k, dtype).reshape(-1, 1)).astype(dtype, copy=False) del rij p_opt = 0 + elif gauge == 'cell': + iRs = phase_rsc(sc, k, dtype).reshape(-1, 1) + iRs = (1j * np.dot(sc.sc_off, sc.cell) * iRs).astype(dtype, copy=False) + p_opt = 1 + return p_opt, iRs -def matrix_dk(gauge, M, const int idx, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): +def matrix_dk(gauge, M, const ints_st idx, sc, cnp.ndarray[floats_st] k, dtype, format): dtype = phase_dtype(k, M.dtype, dtype, True) p_opt, iRs = _phase_dk(gauge, M, sc, k, dtype) - return _matrix_dk(M._csr, idx, iRs, dtype, format, p_opt) + csr = M._csr -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_dk(csr, const int idx, iRs, dtype, format, p_opt): + if format in ("array", "matrix", "dense"): + return _phase3_array(csr.ptr, csr.ncol, csr.col, csr._D, idx, iRs, p_opt) - if dtype == np.complex128: + # Default must be something else. + d1, d2, d3 = _phase3_csr(csr.ptr, csr.ncol, csr.col, csr._D, idx, iRs, p_opt) + return d1.asformat(format), d2.asformat(format), d3.asformat(format) - if format in ("array", "matrix", "dense"): - return _phase3_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, iRs, p_opt) - # Default must be something else. - d1, d2, d3 = _phase3_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, iRs, p_opt) - return d1.asformat(format), d2.asformat(format), d3.asformat(format) - - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - return _phase3_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, iRs, p_opt) - d1, d2, d3 = _phase3_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, iRs, p_opt) - return d1.asformat(format), d2.asformat(format), d3.asformat(format) - - raise ValueError("matrix_dk: currently only supports dtype in [complex64, complex128].") - - -def matrix_dk_nc(gauge, M, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): +def matrix_dk_nc(gauge, M, sc, cnp.ndarray[floats_st] k, dtype, format): dtype = phase_dtype(k, M.dtype, dtype, True) p_opt, iRs = _phase_dk(gauge, M, sc, k, dtype) - return _matrix_dk_nc(M._csr, iRs, dtype, format, p_opt) - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_dk_nc(csr, iRs, dtype, format, p_opt): - if dtype == np.complex128: + csr = M._csr - if format in ("array", "matrix", "dense"): - return _phase3_nc_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) + if format in ("array", "matrix", "dense"): + return _phase3_array_nc(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) - # Default must be something else. - d1, d2, d3 = _phase3_nc_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) - return d1.asformat(format), d2.asformat(format), d3.asformat(format) + # Default must be something else. + d1, d2, d3 = _phase3_csr_nc(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) + return d1.asformat(format), d2.asformat(format), d3.asformat(format) - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - return _phase3_nc_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) - d1, d2, d3 = _phase3_nc_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) - return d1.asformat(format), d2.asformat(format), d3.asformat(format) - raise ValueError("matrix_dk_nc: currently only supports dtype in [complex64, complex128].") - - -def matrix_dk_nc_diag(gauge, M, const int idx, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): +def matrix_dk_nc_diag(gauge, M, const ints_st idx, sc, cnp.ndarray[floats_st] k, dtype, format): dtype = phase_dtype(k, M.dtype, dtype, True) p_opt, iRs = _phase_dk(gauge, M, sc, k, dtype) @@ -112,59 +77,30 @@ def matrix_dk_nc_diag(gauge, M, const int idx, sc, phz = iRs[:, 2].copy() del iRs - # Get each of them - x = _matrix_dk_nc_diag(M._csr, idx, phx, dtype, format, p_opt) - y = _matrix_dk_nc_diag(M._csr, idx, phy, dtype, format, p_opt) - z = _matrix_dk_nc_diag(M._csr, idx, phz, dtype, format, p_opt) - return x, y, z - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_dk_nc_diag(csr, const int idx, phases, dtype, format, p_opt): - - if dtype == np.complex128: + csr = M._csr - if format in ("array", "matrix", "dense"): - return _phase_nc_diag_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt) + if format in ("array", "matrix", "dense"): + x = _phase_array_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, phx, p_opt) + y = _phase_array_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, phy, p_opt) + z = _phase_array_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, phz, p_opt) - # Default must be something else. - return _phase_nc_diag_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt).asformat(format) + else: + x = _phase_csr_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, phx, p_opt).asformat(format) + y = _phase_csr_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, phy, p_opt).asformat(format) + z = _phase_csr_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, phz, p_opt).asformat(format) - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - return _phase_nc_diag_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt) - return _phase_nc_diag_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt).asformat(format) - - raise ValueError("matrix_dk_nc_diag: only supports dtype in [complex64, complex128].") + return x, y, z -def matrix_dk_so(gauge, M, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): +def matrix_dk_so(gauge, M, sc, cnp.ndarray[floats_st] k, dtype, format): dtype = phase_dtype(k, M.dtype, dtype, True) p_opt, iRs = _phase_dk(gauge, M, sc, k, dtype) - return _matrix_dk_so(M._csr, iRs, dtype, format, p_opt) - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_dk_so(csr, iRs, dtype, format, p_opt): - - if dtype == np.complex128: - - if format in ("array", "matrix", "dense"): - return _phase3_so_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) - # Default must be something else. - d1, d2, d3 = _phase3_so_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) - return d1.asformat(format), d2.asformat(format), d3.asformat(format) + csr = M._csr - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - return _phase3_so_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) - d1, d2, d3 = _phase3_so_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) - return d1.asformat(format), d2.asformat(format), d3.asformat(format) + if format in ("array", "matrix", "dense"): + return _phase3_array_so(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) - raise ValueError("matrix_dk_so: currently only supports dtype in [complex64, complex128].") + # Default must be something else. + d1, d2, d3 = _phase3_csr_so(csr.ptr, csr.ncol, csr.col, csr._D, iRs, p_opt) + return d1.asformat(format), d2.asformat(format), d3.asformat(format) diff --git a/src/sisl/physics/_matrix_k.pyx b/src/sisl/physics/_matrix_k.pyx index be8b80a451..eb8a78e14f 100644 --- a/src/sisl/physics/_matrix_k.pyx +++ b/src/sisl/physics/_matrix_k.pyx @@ -4,38 +4,46 @@ cimport cython import numpy as np +cimport numpy as cnp -cimport numpy as np - +from sisl._core._dtypes cimport floats_st, ints_st from ._common import comply_gauge from ._matrix_phase import * -from ._matrix_phase_nc import * -from ._matrix_phase_nc_diag import * -from ._matrix_phase_so import * -from ._matrix_sc_phase import * -from ._matrix_sc_phase_nc import * -from ._matrix_sc_phase_nc_diag import * -from ._matrix_sc_phase_so import * +from ._matrix_phase_sc import * from ._phase import * +from ._phase cimport is_gamma __all__ = ["matrix_k", "matrix_k_nc", "matrix_k_so", "matrix_k_nc_diag"] -def matrix_k(gauge, M, const int idx, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): - dtype = phase_dtype(k, M.dtype, dtype) +def _phase_k(gauge, M, sc, cnp.ndarray[floats_st] K, dtype): + cdef floats_st[::1] k = K + + # dtype *must* be passed through phase_dtype gauge = comply_gauge(gauge) - if gauge == 'cell': - phases = phase_rsc(sc, k, dtype) - p_opt = 1 + if is_gamma(k): + # no - phases required + p_opt = -1 + phases = np.empty([0], dtype=dtype) - elif gauge == 'atom': + elif gauge == "atom": M.finalize() phases = phase_rij(M.Rij()._csr._D, sc, k, dtype) p_opt = 0 + + elif gauge == "cell": + phases = phase_rsc(sc, k, dtype) + p_opt = 1 + else: - raise ValueError("matrix_k: gauge must be in [cell, atom]") + raise ValueError("phase_k: gauge must be in [cell, atom]") + + return p_opt, phases + +def matrix_k(gauge, M, const ints_st idx, sc, cnp.ndarray[floats_st] k, dtype, format): + dtype = phase_dtype(k, M.dtype, dtype) + p_opt, phases = _phase_k(gauge, M, sc, k, dtype) # Check that the dimension *works* if idx < 0: @@ -44,83 +52,32 @@ def matrix_k(gauge, M, const int idx, sc, d = M.shape[-1] raise ValueError(f"matrix_k: unknown index specification {idx} must be in 0:{d}") + csr = M._csr + if format.startswith("sc:") or format == "sc": if format == "sc": format = "csr" else: format = format[3:] nc = M.geometry.no_s - return _matrix_sc_k(M._csr, nc, idx, phases, dtype, format, p_opt) - - return _matrix_k(M._csr, idx, phases, dtype, format, p_opt) - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_k(csr, const int idx, phases, dtype, format, p_opt): - - if dtype == np.complex128: if format in ("array", "matrix", "dense"): - return _phase_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt) + return _phase_sc_array(csr.ptr, csr.ncol, csr.col, nc, csr._D, idx, phases, p_opt) - # Default must be something else. - return _phase_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt).asformat(format) + return _phase_sc_csr(csr.ptr, csr.ncol, csr.col, nc, csr._D, idx, phases, p_opt).asformat(format) - elif dtype == np.float64: - if format in ("array", "matrix", "dense"): - return _array_f64(csr.ptr, csr.ncol, csr.col, csr._D, idx) - return _csr_f64(csr.ptr, csr.ncol, csr.col, csr._D, idx).asformat(format) - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - return _phase_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt) - return _phase_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt).asformat(format) + if format in ("array", "matrix", "dense"): + return _phase_array(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt) - elif dtype == np.float32: - if format in ("array", "matrix", "dense"): - return _array_f32(csr.ptr, csr.ncol, csr.col, csr._D, idx) - return _csr_f32(csr.ptr, csr.ncol, csr.col, csr._D, idx).asformat(format) - - raise ValueError("matrix_k: currently only supports dtype in [float32, float64, complex64, complex128].") + return _phase_csr(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt).asformat(format) -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_sc_k(csr, const int nc, const int idx, phases, dtype, format, p_opt): - if dtype == np.complex128: - if format in ("array", "matrix", "dense"): - return _sc_phase_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, nc, idx, phases, p_opt) - return _sc_phase_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, nc, idx, phases, p_opt).asformat(format) - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - return _sc_phase_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, nc, idx, phases, p_opt) - return _sc_phase_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, nc, idx, phases, p_opt).asformat(format) - elif dtype in (np.float32, np.float64): - # direct conversion, should be simple (generally only at Gamma-point) - m = csr.tocsr(idx) - if format in ("array", "matrix", "dense"): - return m.toarray() - return m - - raise ValueError("matrix_k: (supercell format) currently only supports dtype in [float32, float64, complex64, complex128].") - - -def matrix_k_nc(gauge, M, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): +def matrix_k_nc(gauge, M, sc, cnp.ndarray[floats_st] k, dtype, format): dtype = phase_dtype(k, M.dtype, dtype, True) - gauge = comply_gauge(gauge) - if gauge == 'cell': - phases = phase_rsc(sc, k, dtype) - p_opt = 1 - elif gauge == 'atom': - M.finalize() - phases = phase_rij(M.Rij()._csr._D, sc, k, dtype) - p_opt = 0 - else: - raise ValueError("matrix_k_nc: gauge must be in [cell, atom]") + p_opt, phases = _phase_k(gauge, M, sc, k, dtype) + + csr = M._csr if format.startswith("sc:") or format == "sc": if format == "sc": @@ -128,165 +85,61 @@ def matrix_k_nc(gauge, M, sc, else: format = format[3:] nc = M.geometry.no_s - return _matrix_sc_k_nc(M._csr, nc, phases, dtype, format, p_opt) - return _matrix_k_nc(M._csr, phases, dtype, format, p_opt) - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_k_nc(csr, phases, dtype, format, p_opt): - if csr.shape[2] < 4: - raise ValueError("matrix_k_nc requires input matrix to have 4 components") - - if dtype == np.complex128: - if format in ("array", "matrix", "dense"): - return _phase_nc_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt) - return _phase_nc_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt).asformat(format) - elif dtype == np.complex64: if format in ("array", "matrix", "dense"): - return _phase_nc_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt) - return _phase_nc_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt).asformat(format) - - raise ValueError("matrix_k_nc: only supports dtype in [complex64, complex128].") + return _phase_sc_array_nc(csr.ptr, csr.ncol, csr.col, nc, csr._D, phases, p_opt) + return _phase_sc_csr_nc(csr.ptr, csr.ncol, csr.col, nc, csr._D, phases, p_opt).asformat(format) -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_sc_k_nc(csr, nc, phases, dtype, format, p_opt): + if format in ("array", "matrix", "dense"): + return _phase_array_nc(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt) - if csr.shape[2] < 4: - raise ValueError("matrix_k_nc: (supercell format) requires input matrix to have 4 components") + return _phase_csr_nc(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt).asformat(format) - if dtype == np.complex128: - if format in ("array", "matrix", "dense"): - return _sc_phase_nc_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, nc, phases, p_opt) - return _sc_phase_nc_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, nc, phases, p_opt).asformat(format) - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - return _sc_phase_nc_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, nc, phases, p_opt) - return _sc_phase_nc_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, nc, phases, p_opt).asformat(format) - raise ValueError("matrix_k_nc: (supercell format) only supports dtype in [complex64, complex128].") +def matrix_k_nc_diag(gauge, M, const ints_st idx, sc, cnp.ndarray[floats_st] k, dtype, format): + dtype = phase_dtype(k, M.dtype, dtype, True) + p_opt, phases = _phase_k(gauge, M, sc, k, dtype) + csr = M._csr -def matrix_k_so(gauge, M, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): - dtype = phase_dtype(k, M.dtype, dtype, True) - gauge = comply_gauge(gauge) - if gauge == 'cell': - phases = phase_rsc(sc, k, dtype) - p_opt = 1 - elif gauge == 'atom': - M.finalize() - phases = phase_rij(M.Rij()._csr._D, sc, k, dtype) - p_opt = 0 - else: - raise ValueError("matrix_k_so: gauge must be in [r, R]") if format.startswith("sc:") or format == "sc": if format == "sc": format = "csr" else: format = format[3:] nc = M.geometry.no_s - return _matrix_sc_k_so(M._csr, nc, phases, dtype, format, p_opt) - return _matrix_k_so(M._csr, phases, dtype, format, p_opt) - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_k_so(csr, phases, dtype, format, p_opt): - - if csr.shape[2] < 8: - raise ValueError("matrix_k_so requires input matrix to have 8 components") - - if dtype == np.complex128: if format in ("array", "matrix", "dense"): - return _phase_so_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt) - return _phase_so_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt).asformat(format) - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - return _phase_so_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt) - return _phase_so_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt).asformat(format) + return _phase_sc_array_nc_diag(csr.ptr, csr.ncol, csr.col, nc, csr._D, idx, phases, p_opt) - raise ValueError("matrix_k_so: only supports dtype in [complex64, complex128].") + return _phase_sc_csr_nc_diag(csr.ptr, csr.ncol, csr.col, nc, csr._D, idx, phases, p_opt).asformat(format) + if format in ("array", "matrix", "dense"): + return _phase_array_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt) -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_sc_k_so(csr, nc, phases, dtype, format, p_opt): + return _phase_csr_nc_diag(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt).asformat(format) - if csr.shape[2] < 8: - raise ValueError("matrix_k_so: (supercell format) requires input matrix to have 8 components") - if dtype == np.complex128: - if format in ("array", "matrix", "dense"): - return _sc_phase_so_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, nc, phases, p_opt) - return _sc_phase_so_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, nc, phases, p_opt).asformat(format) - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - return _sc_phase_so_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, nc, phases, p_opt) - return _sc_phase_so_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, nc, phases, p_opt).asformat(format) - - raise ValueError("matrix_k_so: (supercell format) only supports dtype in [complex64, complex128].") +def matrix_k_so(gauge, M, sc, cnp.ndarray[floats_st] k, dtype, format): + dtype = phase_dtype(k, M.dtype, dtype, True) + p_opt, phases = _phase_k(gauge, M, sc, k, dtype) + csr = M._csr -def matrix_k_nc_diag(gauge, M, const int idx, sc, - np.ndarray[np.float64_t, ndim=1, mode='c'] k, dtype, format): - dtype = phase_dtype(k, M.dtype, dtype, True) - gauge = comply_gauge(gauge) - if gauge == 'cell': - phases = phase_rsc(sc, k, dtype) - p_opt = 1 - elif gauge == 'atom': - M.finalize() - phases = phase_rij(M.Rij()._csr._D, sc, k, dtype) - p_opt = 0 - else: - raise ValueError("matrix_k_nc_diag: gauge must be in [r, R]") if format.startswith("sc:") or format == "sc": if format == "sc": format = "csr" else: format = format[3:] nc = M.geometry.no_s - return _matrix_sc_k_nc_diag(M._csr, nc, idx, phases, dtype, format, p_opt) - return _matrix_k_nc_diag(M._csr, idx, phases, dtype, format, p_opt) - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_k_nc_diag(csr, const int idx, phases, dtype, format, p_opt): - - if dtype == np.complex128: - if format in ("array", "matrix", "dense"): - return _phase_nc_diag_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt) - return _phase_nc_diag_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt).asformat(format) - elif dtype == np.complex64: if format in ("array", "matrix", "dense"): - return _phase_nc_diag_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt) - return _phase_nc_diag_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, idx, phases, p_opt).asformat(format) + return _phase_sc_array_so(csr.ptr, csr.ncol, csr.col, nc, csr._D, phases, p_opt) - raise ValueError("matrix_k_nc_diag: only supports dtype in [complex64, complex128].") + return _phase_sc_csr_so(csr.ptr, csr.ncol, csr.col, nc, csr._D, phases, p_opt).asformat(format) + if format in ("array", "matrix", "dense"): + return _phase_array_so(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt) -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.initializedcheck(False) -def _matrix_sc_k_nc_diag(csr, const int nc, const int idx, phases, dtype, format, p_opt): - - if dtype == np.complex128: - if format in ("array", "matrix", "dense"): - return _sc_phase_nc_diag_array_c128(csr.ptr, csr.ncol, csr.col, csr._D, nc, idx, phases, p_opt) - return _sc_phase_nc_diag_csr_c128(csr.ptr, csr.ncol, csr.col, csr._D, nc, idx, phases, p_opt).asformat(format) - elif dtype == np.complex64: - if format in ("array", "matrix", "dense"): - return _sc_phase_nc_diag_array_c64(csr.ptr, csr.ncol, csr.col, csr._D, nc, idx, phases, p_opt) - return _sc_phase_nc_diag_csr_c64(csr.ptr, csr.ncol, csr.col, csr._D, nc, idx, phases, p_opt).asformat(format) - - raise ValueError("matrix_k_nc_diag: (supercell format) only supports dtype in [complex64, complex128].") + return _phase_csr_so(csr.ptr, csr.ncol, csr.col, csr._D, phases, p_opt).asformat(format) diff --git a/src/sisl/physics/_matrix_phase.pyx b/src/sisl/physics/_matrix_phase.pyx index ac5bf3832a..fba3e6d0cb 100644 --- a/src/sisl/physics/_matrix_phase.pyx +++ b/src/sisl/physics/_matrix_phase.pyx @@ -1,276 +1,633 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True cimport cython import numpy as np -cimport numpy as np -from scipy.sparse import csr_matrix - -from sisl._indices cimport _index_sorted -from sisl._core._sparse import fold_csr_matrix - -__all__ = ['_csr_f32', '_csr_f64', '_phase_csr_c64', '_phase_csr_c128', - '_array_f32', '_array_f64', '_phase_array_c64', '_phase_array_c128'] -# The fused data-types forces the data input to be of "correct" values. -ctypedef fused numeric_real: - float - double - -ctypedef fused numeric_complex: - float - double - float complex - double complex +cimport numpy as cnp +from scipy.sparse import csr_matrix -def _csr_f32(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_real[:, ::1] D, const int idx): +from sisl._indices cimport _index_sorted - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL +from sisl._core._sparse import ( + fold_csr_matrix, + fold_csr_matrix_nc, + fold_csr_matrix_nc_diag, +) + +from sisl._core._dtypes cimport ( + complexs_st, + floatcomplexs_st, + floats_st, + ints_st, + numerics_st, + reals_st, + ssize_st, + type2dtype, +) + +from ._matrix_utils cimport ( + _f_matrix_box_nc, + _f_matrix_box_so, + _matrix_box_nc_cmplx, + _matrix_box_nc_real, + _matrix_box_so_cmplx, + _matrix_box_so_real, +) + +__all__ = [ + "_phase_csr", + "_phase_array", + "_phase_csr_nc", + "_phase_array_nc", + "_phase_csr_nc_diag", + "_phase_array_nc_diag", + "_phase_csr_so", + "_phase_array_so", +] + +""" +In this Cython code we use `p_opt` to signal whether the resulting +matrices will use the phases variable. + +There are 3 cases: + +p_opt == -1: + no phases are added, the `phases` array will not be accessed +p_opt == 0: + the phases are *per* spares index, i.e. the array is as big + as the sparse data. +p_opt == 1: + the phases are in reduced format where each column block + uses the same phase. A column block is defined as `col[ind] / nr` which + results in a unique index. +""" + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_csr(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + const ints_st idx, + floatcomplexs_st[::1] phases, + const int p_opt): # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef Py_ssize_t nr = v_ncol.shape[0] - cdef np.ndarray[np.float32_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.float32) - cdef float[::1] v = V - cdef Py_ssize_t r, ind, s_idx - cdef int c - - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - v[v_ptr[r] + s_idx] += D[ind, idx] + V_PTR, V_NCOL, V_COL = fold_csr_matrix(ptr, ncol, col) + cdef ints_st[::1] v_ptr = V_PTR + cdef ints_st[::1] v_ncol = V_NCOL + cdef ints_st[::1] v_col = V_COL + cdef ints_st[::1] tmp + + # This may fail, when numerics_st is complex, but floatcomplexs_st is float + cdef object dtype = type2dtype[floatcomplexs_st](1) + cdef cnp.ndarray[floatcomplexs_st, mode='c'] V = np.zeros([v_col.shape[0]], dtype=dtype) + cdef floatcomplexs_st[::1] v = V + + # Local columns + cdef ints_st nr = ncol.shape[0] + cdef ints_st r, ind, s, s_idx, c + + with nogil: + if p_opt == -1: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] % nr + + tmp = v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]] + s_idx = _index_sorted(tmp, c) + v[v_ptr[r] + s_idx] += (D[ind, idx]) + + elif p_opt == 0: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] % nr + + tmp = v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]] + s_idx = _index_sorted(tmp, c) + v[v_ptr[r] + s_idx] += (D[ind, idx] * phases[ind]) + + else: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] % nr + s = col[ind] / nr + + tmp = v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]] + s_idx = _index_sorted(tmp, c) + v[v_ptr[r] + s_idx] += (D[ind, idx] * phases[s]) return csr_matrix((V, V_COL, V_PTR), shape=(nr, nr)) -def _csr_f64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_real[:, ::1] D, const int idx): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef Py_ssize_t nr = v_ncol.shape[0] - cdef np.ndarray[np.float64_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.float64) - cdef double[::1] v = V - cdef Py_ssize_t r, ind, s_idx - cdef int c - - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - v[v_ptr[r] + s_idx] += D[ind, idx] - - return csr_matrix((V, V_COL, V_PTR), shape=(nr, nr)) +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_array(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + const int idx, + floatcomplexs_st[::1] phases, + const int p_opt): + + cdef ints_st[::1] tmp + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[floatcomplexs_st](1) + cdef cnp.ndarray[floatcomplexs_st, ndim=2, mode='c'] V = np.zeros([nr, nr], dtype=dtype) + cdef floatcomplexs_st[:, ::1] v = V + + # Local columns + cdef ints_st r, ind, s, c + + with nogil: + if p_opt == -1: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] % nr + v[r, c] += (D[ind, idx]) + + elif p_opt == 0: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] % nr + v[r, c] += (D[ind, idx] * phases[ind]) + + else: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] % nr + s = col[ind] / nr + v[r, c] += (D[ind, idx] * phases[s]) + return V -def _phase_csr_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_csr_nc_diag(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + const int idx, + complexs_st[::1] phases, + const int p_opt): # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef Py_ssize_t nr = v_ncol.shape[0] - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef float complex[::1] v = V - cdef Py_ssize_t r, ind, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - v[v_ptr[r] + s_idx] += D[ind, idx] * phases[ind] - else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - v[v_ptr[r] + s_idx] += D[ind, idx] * phases[col[ind] / nr] - + V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc_diag(ptr, ncol, col) + cdef ints_st[::1] v_ptr = V_PTR + cdef ints_st[::1] v_ncol = V_NCOL + cdef ints_st[::1] v_col = V_COL + cdef ints_st[::1] tmp + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, mode='c'] V = np.zeros([v_col.shape[0]], dtype=dtype) + cdef complexs_st[::1] v = V + + # Local columns + cdef ints_st nr = ncol.shape[0] + cdef ints_st r, rr, ind, s, s_idx, c + + cdef complexs_st d + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + + tmp = v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]] + s_idx = _index_sorted(tmp, c) + + d = D[ind, idx] + v[v_ptr[rr] + s_idx] += d + v[v_ptr[rr+1] + s_idx] += d + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + + tmp = v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]] + s_idx = _index_sorted(tmp, c) + + d = (phases[ind] * D[ind, idx]) + v[v_ptr[rr] + s_idx] += d + v[v_ptr[rr+1] + s_idx] += d + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s = col[ind] / nr + + tmp = v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]] + s_idx = _index_sorted(tmp, c) + + d = (phases[s] * D[ind, idx]) + v[v_ptr[rr] + s_idx] += d + v[v_ptr[rr+1] + s_idx] += d + + nr = nr * 2 return csr_matrix((V, V_COL, V_PTR), shape=(nr, nr)) -def _phase_csr_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_array_nc_diag(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + const int idx, + complexs_st[::1] phases, + const int p_opt): + + cdef ints_st[::1] tmp + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] V = np.zeros([nr * 2, nr * 2], dtype=dtype) + cdef complexs_st[:, ::1] v = V + + # Local columns + cdef ints_st r, rr, ind, s, c + + cdef complexs_st d + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + d = D[ind, idx] + v[rr, c] += d + v[rr + 1, c + 1] += d + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + d = (phases[ind] * D[ind, idx]) + v[rr, c] += d + v[rr + 1, c + 1] += d + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s = col[ind] / nr + d = (phases[s] * D[ind, idx]) + v[rr, c] += d + v[rr + 1, c + 1] += d + + return V - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_csr_nc(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + complexs_st[::1] phases, + const int p_opt): # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef Py_ssize_t nr = v_ncol.shape[0] - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef double complex[::1] v = V - cdef Py_ssize_t r, ind, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - v[v_ptr[r] + s_idx] += D[ind, idx] * phases[ind] + V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(ptr, ncol, col) + cdef ints_st[::1] v_ptr = V_PTR + cdef ints_st[::1] v_ncol = V_NCOL + cdef ints_st[::1] v_col = V_COL + cdef ints_st[::1] tmp + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, mode='c'] V = np.zeros([v_col.shape[0]], dtype=dtype) + cdef complexs_st[::1] v = V + + # Local columns + cdef ints_st nr = ncol.shape[0] + cdef ints_st r, rr, ind, s, s_idx, c + + cdef complexs_st ph + cdef _f_matrix_box_nc func + cdef numerics_st *d + cdef complexs_st *M = [0, 0, 0, 0] + + if numerics_st in complexs_st: + func = _matrix_box_nc_cmplx else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - v[v_ptr[r] + s_idx] += D[ind, idx] * phases[col[ind] / nr] - + func = _matrix_box_nc_real + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + + tmp = v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]] + s_idx = _index_sorted(tmp, c) + + v[v_ptr[rr] + s_idx] += D[ind, 0] + ph = (D[ind, 2] + 1j * D[ind, 3]) + v[v_ptr[rr] + s_idx+1] += ph + v[v_ptr[rr+1] + s_idx] += ph.conjugate() + v[v_ptr[rr+1] + s_idx+1] += D[ind, 1] + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + ph = phases[ind] + + tmp = v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]] + s_idx = _index_sorted(tmp, c) + + d = &D[ind, 0] + func(d, ph, M) + v[v_ptr[rr] + s_idx] += M[0] + v[v_ptr[rr] + s_idx+1] += M[1] + v[v_ptr[rr+1] + s_idx] += M[2] + v[v_ptr[rr+1] + s_idx+1] += M[3] + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s = col[ind] / nr + ph = phases[s] + + tmp = v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]] + s_idx = _index_sorted(tmp, c) + + d = &D[ind, 0] + func(d, ph, M) + v[v_ptr[rr] + s_idx] += M[0] + v[v_ptr[rr] + s_idx+1] += M[1] + v[v_ptr[rr+1] + s_idx] += M[2] + v[v_ptr[rr+1] + s_idx+1] += M[3] + + nr = nr * 2 return csr_matrix((V, V_COL, V_PTR), shape=(nr, nr)) -def _array_f32(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_real[:, ::1] D, const int idx): +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_array_nc(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + complexs_st[::1] phases, + const int p_opt): - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.float32_t, ndim=2, mode='c'] V = np.zeros([nr, nr], dtype=np.float32) - cdef float[:, ::1] v = V - cdef Py_ssize_t r, ind - - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - v[r, col[ind] % nr] += D[ind, idx] - - return V + cdef ints_st[::1] tmp + cdef ints_st nr = ncol.shape[0] + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] V = np.zeros([nr * 2, nr * 2], dtype=dtype) + cdef complexs_st[:, ::1] v = V -def _array_f64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_real[:, ::1] D, const int idx): + # Local columns + cdef ints_st r, rr, ind, s, c - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL + cdef complexs_st ph + cdef _f_matrix_box_nc func + cdef numerics_st *d + cdef complexs_st *M = [0, 0, 0, 0] - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.float64_t, ndim=2, mode='c'] V = np.zeros([nr, nr], dtype=np.float64) - cdef double[:, ::1] v = V - cdef Py_ssize_t r, ind - - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - v[r, col[ind] % nr] += D[ind, idx] + if numerics_st in complexs_st: + func = _matrix_box_nc_cmplx + else: + func = _matrix_box_nc_real + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + + v[rr, c] += D[ind, 0] + ph = (D[ind, 2] + 1j * D[ind, 3]) + v[rr, c + 1] += ph + v[rr + 1, c] += ph.conjugate() + v[rr + 1, c + 1] += D[ind, 1] + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + ph = phases[ind] + + d = &D[ind, 0] + func(d, ph, M) + v[rr, c] += M[0] + v[rr, c + 1] += M[1] + v[rr + 1, c] += M[2] + v[rr + 1, c + 1] += M[3] + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s = col[ind] / nr + ph = phases[s] + + d = &D[ind, 0] + func(d, ph, M) + v[rr, c] += M[0] + v[rr, c + 1] += M[1] + v[rr + 1, c] += M[2] + v[rr + 1, c + 1] += M[3] return V -def _phase_array_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] V = np.zeros([nr, nr], dtype=np.complex64) - cdef float complex[:, ::1] v = V - cdef Py_ssize_t r, ind, c - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - v[r, c] += D[ind, idx] * phases[ind] +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_csr_so(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + complexs_st[::1] phases, + const int p_opt): + # Now create the folded sparse elements + V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(ptr, ncol, col) + cdef ints_st[::1] v_ptr = V_PTR + cdef ints_st[::1] v_ncol = V_NCOL + cdef ints_st[::1] v_col = V_COL + cdef ints_st[::1] tmp + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, mode='c'] V = np.zeros([v_col.shape[0]], dtype=dtype) + cdef complexs_st[::1] v = V + + # Local columns + cdef ints_st nr = ncol.shape[0] + cdef ints_st r, rr, ind, s, s_idx, c + + cdef complexs_st ph + cdef _f_matrix_box_so func + cdef numerics_st *d + cdef complexs_st *M = [0, 0, 0, 0] + + if numerics_st in complexs_st: + func = _matrix_box_so_cmplx else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - v[r, c] += D[ind, idx] * phases[col[ind] / nr] + func = _matrix_box_so_real + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + + tmp = v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]] + s_idx = _index_sorted(tmp, c) + + v[v_ptr[rr] + s_idx] += (D[ind, 0] + 1j * D[ind, 4]) + v[v_ptr[rr] + s_idx+1] += (D[ind, 2] + 1j * D[ind, 3]) + v[v_ptr[rr+1] + s_idx] += (D[ind, 6] + 1j * D[ind, 7]) + v[v_ptr[rr+1] + s_idx+1] += (D[ind, 1] + 1j * D[ind, 5]) + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + ph = phases[ind] + + tmp = v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]] + s_idx = _index_sorted(tmp, c) + + d = &D[ind, 0] + func(d, ph, M) + v[v_ptr[rr] + s_idx] += M[0] + v[v_ptr[rr] + s_idx+1] += M[1] + v[v_ptr[rr+1] + s_idx] += M[2] + v[v_ptr[rr+1] + s_idx+1] += M[3] + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s = col[ind] / nr + ph = phases[s] + + tmp = v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]] + s_idx = _index_sorted(tmp, c) + + d = &D[ind, 0] + func(d, ph, M) + v[v_ptr[rr] + s_idx] += M[0] + v[v_ptr[rr] + s_idx+1] += M[1] + v[v_ptr[rr+1] + s_idx] += M[2] + v[v_ptr[rr+1] + s_idx+1] += M[3] + + nr = nr * 2 + return csr_matrix((V, V_COL, V_PTR), shape=(nr, nr)) - return V +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_array_so(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + complexs_st[::1] phases, + const int p_opt): -def _phase_array_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): + cdef ints_st nr = ncol.shape[0] - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] V = np.zeros([nr * 2, nr * 2], dtype=dtype) + cdef complexs_st[:, ::1] v = V - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] V = np.zeros([nr, nr], dtype=np.complex128) - cdef double complex[:, ::1] v = V - cdef Py_ssize_t r, ind, c + # Local columns + cdef ints_st r, rr, s, c, ind - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - v[r, c] += D[ind, idx] * phases[ind] + cdef complexs_st ph + cdef _f_matrix_box_so func + cdef numerics_st *d + cdef complexs_st *M = [0, 0, 0, 0] + if numerics_st in complexs_st: + func = _matrix_box_so_cmplx else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - v[r, c] += D[ind, idx] * phases[col[ind] / nr] + func = _matrix_box_so_real + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + + v[rr, c] += (D[ind, 0] + 1j * D[ind, 4]) + v[rr, c + 1] += (D[ind, 2] + 1j * D[ind, 3]) + v[rr + 1, c] += (D[ind, 6] + 1j * D[ind, 7]) + v[rr + 1, c + 1] += (D[ind, 1] + 1j * D[ind, 5]) + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + ph = phases[ind] + + d = &D[ind, 0] + func(d, ph, M) + v[rr, c] += M[0] + v[rr, c + 1] += M[1] + v[rr + 1, c] += M[2] + v[rr + 1, c + 1] += M[3] + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s = col[ind] / nr + ph = phases[s] + + d = &D[ind, 0] + func(d, ph, M) + v[rr, c] += M[0] + v[rr, c + 1] += M[1] + v[rr + 1, c] += M[2] + v[rr + 1, c + 1] += M[3] return V diff --git a/src/sisl/physics/_matrix_phase3.pyx b/src/sisl/physics/_matrix_phase3.pyx index b40155b738..220a8570b9 100644 --- a/src/sisl/physics/_matrix_phase3.pyx +++ b/src/sisl/physics/_matrix_phase3.pyx @@ -1,417 +1,549 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True cimport cython import numpy as np -cimport numpy as np -from scipy.sparse import csr_matrix - -from sisl._indices cimport _index_sorted -from sisl._core._sparse import fold_csr_matrix - -__all__ = ['_phase3_csr_f32', '_phase3_csr_f64', - '_phase3_csr_c64', '_phase3_csr_c128', - '_phase3_array_f32', '_phase3_array_f64', - '_phase3_array_c64', '_phase3_array_c128'] - -# The fused data-types forces the data input to be of "correct" values. -ctypedef fused numeric_real: - float - double -ctypedef fused numeric_complex: - float - double - float complex - double complex +cimport numpy as cnp +from scipy.sparse import csr_matrix -def _phase3_csr_f32(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_real[:, ::1] D, const int idx, - np.ndarray[np.float32_t, ndim=2, mode='c'] PHASES, const int p_opt): +from sisl._indices cimport _index_sorted - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float[:, ::1] phases = PHASES +from sisl._core._sparse import fold_csr_matrix, fold_csr_matrix_nc + +from sisl._core._dtypes cimport ( + complexs_st, + floatcomplexs_st, + floats_st, + ints_st, + numerics_st, + ssize_st, + type2dtype, +) + +from ._matrix_utils cimport ( + _f_matrix_box_nc, + _f_matrix_box_so, + _matrix_box_nc_cmplx, + _matrix_box_nc_real, + _matrix_box_so_cmplx, + _matrix_box_so_real, +) + +__all__ = [ + "_phase3_csr", + "_phase3_array", + "_phase3_csr_nc", + "_phase3_array_nc", + "_phase3_csr_so", + "_phase3_array_so", +] + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase3_csr(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + const int idx, + floatcomplexs_st[:, ::1] phases, + const int p_opt): # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef Py_ssize_t nr = v_ncol.shape[0] - cdef np.ndarray[np.float32_t, ndim=1, mode='c'] Vx = np.zeros([v_col.shape[0]], dtype=np.float32) - cdef np.ndarray[np.float32_t, ndim=1, mode='c'] Vy = np.zeros([v_col.shape[0]], dtype=np.float32) - cdef np.ndarray[np.float32_t, ndim=1, mode='c'] Vz = np.zeros([v_col.shape[0]], dtype=np.float32) - cdef float[::1] vx = Vx - cdef float[::1] vy = Vy - cdef float[::1] vz = Vz - cdef float d - cdef Py_ssize_t r, ind, s, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - d = D[ind, idx] - vx[v_ptr[r] + s_idx] += d * phases[ind, 0] - vy[v_ptr[r] + s_idx] += d * phases[ind, 1] - vz[v_ptr[r] + s_idx] += d * phases[ind, 2] - - else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s = col[ind] / nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - d = D[ind, idx] - vx[v_ptr[r] + s_idx] += d * phases[s, 0] - vy[v_ptr[r] + s_idx] += d * phases[s, 1] - vz[v_ptr[r] + s_idx] += d * phases[s, 2] + V_PTR, V_NCOL, V_COL = fold_csr_matrix(ptr, ncol, col) + cdef ints_st[::1] v_ptr = V_PTR + cdef ints_st[::1] v_ncol = V_NCOL + cdef ints_st[::1] v_col = V_COL + + # This may fail, when numerics_st is complex, but floatcomplexs_st is float + cdef object dtype = type2dtype[floatcomplexs_st](1) + cdef cnp.ndarray[floatcomplexs_st, mode='c'] Vx = np.zeros([v_col.shape[0]], dtype=dtype) + cdef cnp.ndarray[floatcomplexs_st, mode='c'] Vy = np.zeros([v_col.shape[0]], dtype=dtype) + cdef cnp.ndarray[floatcomplexs_st, mode='c'] Vz = np.zeros([v_col.shape[0]], dtype=dtype) + + # Local columns + cdef ints_st nr = ncol.shape[0] + cdef ints_st r, ind, s, s_idx, c + + cdef numerics_st d + + with nogil: + if p_opt == 0: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] % nr + s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) + d = D[ind, idx] + Vx[v_ptr[r] + s_idx] += (d * phases[ind, 0]) + Vy[v_ptr[r] + s_idx] += (d * phases[ind, 1]) + Vz[v_ptr[r] + s_idx] += (d * phases[ind, 2]) + + else: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] % nr + s = col[ind] / nr + s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) + d = D[ind, idx] + Vx[v_ptr[r] + s_idx] += (d * phases[s, 0]) + Vy[v_ptr[r] + s_idx] += (d * phases[s, 1]) + Vz[v_ptr[r] + s_idx] += (d * phases[s, 2]) return csr_matrix((Vx, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vy, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vz, V_COL, V_PTR), shape=(nr, nr)) -def _phase3_csr_f64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_real[:, ::1] D, const int idx, - np.ndarray[np.float64_t, ndim=2, mode='c'] PHASES, const int p_opt): - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double[:, ::1] phases = PHASES - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef Py_ssize_t nr = v_ncol.shape[0] - cdef np.ndarray[np.float64_t, ndim=1, mode='c'] Vx = np.zeros([v_col.shape[0]], dtype=np.float64) - cdef np.ndarray[np.float64_t, ndim=1, mode='c'] Vy = np.zeros([v_col.shape[0]], dtype=np.float64) - cdef np.ndarray[np.float64_t, ndim=1, mode='c'] Vz = np.zeros([v_col.shape[0]], dtype=np.float64) - cdef double[::1] vx = Vx - cdef double[::1] vy = Vy - cdef double[::1] vz = Vz - cdef double d - cdef Py_ssize_t r, ind, s, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - d = D[ind, idx] - vx[v_ptr[r] + s_idx] += d * phases[ind, 0] - vy[v_ptr[r] + s_idx] += d * phases[ind, 1] - vz[v_ptr[r] + s_idx] += d * phases[ind, 2] - - else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s = col[ind] / nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - d = D[ind, idx] - vx[v_ptr[r] + s_idx] += d * phases[s, 0] - vy[v_ptr[r] + s_idx] += d * phases[s, 1] - vz[v_ptr[r] + s_idx] += d * phases[s, 2] - - return csr_matrix((Vx, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vy, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vz, V_COL, V_PTR), shape=(nr, nr)) +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase3_array(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + const int idx, + floatcomplexs_st[:, ::1] phases, + const int p_opt): + + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[floatcomplexs_st](1) + cdef cnp.ndarray[floatcomplexs_st, ndim=2, mode='c'] Vx = np.zeros([nr, nr], dtype=dtype) + cdef cnp.ndarray[floatcomplexs_st, ndim=2, mode='c'] Vy = np.zeros([nr, nr], dtype=dtype) + cdef cnp.ndarray[floatcomplexs_st, ndim=2, mode='c'] Vz = np.zeros([nr, nr], dtype=dtype) + + # Local columns + cdef ints_st r, ind, s, c + cdef numerics_st d + + with nogil: + if p_opt == 0: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] % nr + d = D[ind, idx] + Vx[r, c] += (d * phases[ind, 0]) + Vy[r, c] += (d * phases[ind, 1]) + Vz[r, c] += (d * phases[ind, 2]) + + else: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] % nr + s = col[ind] / nr + d = D[ind, idx] + Vx[r, c] += (d * phases[s, 0]) + Vy[r, c] += (d * phases[s, 1]) + Vz[r, c] += (d * phases[s, 2]) - -def _phase3_csr_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex64_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[:, ::1] phases = PHASES - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef Py_ssize_t nr = v_ncol.shape[0] - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] Vx = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] Vy = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] Vz = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef float complex[::1] vx = Vx - cdef float complex[::1] vy = Vy - cdef float complex[::1] vz = Vz - cdef float complex d - cdef Py_ssize_t r, ind, s, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - d = D[ind, idx] - vx[v_ptr[r] + s_idx] += d * phases[ind, 0] - vy[v_ptr[r] + s_idx] += d * phases[ind, 1] - vz[v_ptr[r] + s_idx] += d * phases[ind, 2] - - else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s = col[ind] / nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - d = D[ind, idx] - vx[v_ptr[r] + s_idx] += d * phases[s, 0] - vy[v_ptr[r] + s_idx] += d * phases[s, 1] - vz[v_ptr[r] + s_idx] += d * phases[s, 2] - - return csr_matrix((Vx, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vy, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vz, V_COL, V_PTR), shape=(nr, nr)) + return Vx, Vy, Vz -def _phase3_csr_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex128_t, ndim=2, mode='c'] PHASES, const int p_opt): +### +# Non-collinear code +### - # Convert to memory viewsz - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[:, ::1] phases = PHASES +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase3_csr_nc(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + complexs_st[:, ::1] phases, + const int p_opt): # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef Py_ssize_t nr = v_ncol.shape[0] - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] Vx = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] Vy = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] Vz = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef double complex[::1] vx = Vx - cdef double complex[::1] vy = Vy - cdef double complex[::1] vz = Vz - cdef double complex d - cdef Py_ssize_t r, ind, s, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - d = D[ind, idx] - vx[v_ptr[r] + s_idx] += d * phases[ind, 0] - vy[v_ptr[r] + s_idx] += d * phases[ind, 1] - vz[v_ptr[r] + s_idx] += d * phases[ind, 2] - + V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(ptr, ncol, col) + cdef ints_st[::1] v_ptr = V_PTR + cdef ints_st[::1] v_ncol = V_NCOL + cdef ints_st[::1] v_col = V_COL + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, mode='c'] Vx = np.zeros([v_col.shape[0]], dtype=dtype) + cdef cnp.ndarray[complexs_st, mode='c'] Vy = np.zeros([v_col.shape[0]], dtype=dtype) + cdef cnp.ndarray[complexs_st, mode='c'] Vz = np.zeros([v_col.shape[0]], dtype=dtype) + cdef complexs_st ph, v12 + + # Local columns (not in NC form) + cdef ints_st nr = ncol.shape[0] + cdef ints_st r, rr, ind, s, c + cdef ints_st s_idx + cdef numerics_st *d + cdef _f_matrix_box_nc func + cdef complexs_st *M = [0, 0, 0, 0] + + if numerics_st in complexs_st: + func = _matrix_box_nc_cmplx else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s = col[ind] / nr - s_idx = _index_sorted(v_col[v_ptr[r]:v_ptr[r] + v_ncol[r]], c) - d = D[ind, idx] - vx[v_ptr[r] + s_idx] += d * phases[s, 0] - vy[v_ptr[r] + s_idx] += d * phases[s, 1] - vz[v_ptr[r] + s_idx] += d * phases[s, 2] - + func = _matrix_box_nc_real + + with nogil: + if p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) + + d = &D[ind, 0] + + ph = phases[ind, 0] + func(d, ph, M) + Vx[v_ptr[rr] + s_idx] += M[0] + Vx[v_ptr[rr] + s_idx+1] += M[1] + Vx[v_ptr[rr+1] + s_idx] += M[2] + Vx[v_ptr[rr+1] + s_idx+1] += M[3] + + ph = phases[ind, 1] + func(d, ph, M) + Vy[v_ptr[rr] + s_idx] += M[0] + Vy[v_ptr[rr] + s_idx+1] += M[1] + Vy[v_ptr[rr+1] + s_idx] += M[2] + Vy[v_ptr[rr+1] + s_idx+1] += M[3] + + ph = phases[ind, 2] + func(d, ph, M) + Vz[v_ptr[rr] + s_idx] += M[0] + Vz[v_ptr[rr] + s_idx+1] += M[1] + Vz[v_ptr[rr+1] + s_idx] += M[2] + Vz[v_ptr[rr+1] + s_idx+1] += M[3] + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s = col[ind] / nr + + s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) + + d = &D[ind, 0] + + ph = phases[s, 0] + func(d, ph, M) + Vx[v_ptr[rr] + s_idx] += M[0] + Vx[v_ptr[rr] + s_idx+1] += M[1] + Vx[v_ptr[rr+1] + s_idx] += M[2] + Vx[v_ptr[rr+1] + s_idx+1] += M[3] + + ph = phases[s, 1] + func(d, ph, M) + Vy[v_ptr[rr] + s_idx] += M[0] + Vy[v_ptr[rr] + s_idx+1] += M[1] + Vy[v_ptr[rr+1] + s_idx] += M[2] + Vy[v_ptr[rr+1] + s_idx+1] += M[3] + + ph = phases[s, 2] + func(d, ph, M) + Vz[v_ptr[rr] + s_idx] += M[0] + Vz[v_ptr[rr] + s_idx+1] += M[1] + Vz[v_ptr[rr+1] + s_idx] += M[2] + Vz[v_ptr[rr+1] + s_idx+1] += M[3] + + nr = nr * 2 return csr_matrix((Vx, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vy, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vz, V_COL, V_PTR), shape=(nr, nr)) -def _phase3_array_f32(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_real[:, ::1] D, const int idx, - np.ndarray[np.float32_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float[:, ::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.float32_t, ndim=2, mode='c'] Vx = np.zeros([nr, nr], dtype=np.float32) - cdef np.ndarray[np.float32_t, ndim=2, mode='c'] Vy = np.zeros([nr, nr], dtype=np.float32) - cdef np.ndarray[np.float32_t, ndim=2, mode='c'] Vz = np.zeros([nr, nr], dtype=np.float32) - cdef float[:, ::1] vx = Vx - cdef float[:, ::1] vy = Vy - cdef float[:, ::1] vz = Vz - cdef float d - cdef Py_ssize_t r, ind, s, c - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - d = D[ind, idx] - vx[r, c] += d * phases[ind, 0] - vy[r, c] += d * phases[ind, 1] - vz[r, c] += d * phases[ind, 2] - +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase3_array_nc(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + complexs_st[:, ::1] phases, + const int p_opt): + + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] Vx = np.zeros([nr * 2, nr * 2], dtype=dtype) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] Vy = np.zeros([nr * 2, nr * 2], dtype=dtype) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] Vz = np.zeros([nr * 2, nr * 2], dtype=dtype) + + cdef complexs_st ph + cdef ints_st r, rr, ind, s, c + cdef ints_st s_idx + cdef numerics_st *d + cdef _f_matrix_box_nc func + cdef complexs_st *M = [0, 0, 0, 0] + + if numerics_st in complexs_st: + func = _matrix_box_nc_cmplx else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s = col[ind] / nr - d = D[ind, idx] - vx[r, c] += d * phases[s, 0] - vy[r, c] += d * phases[s, 1] - vz[r, c] += d * phases[s, 2] + func = _matrix_box_nc_real + + with nogil: + if p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + + d = &D[ind, 0] + + ph = phases[ind, 0] + func(d, ph, M) + Vx[rr, c] += M[0] + Vx[rr, c+1] += M[1] + Vx[rr+1, c] += M[2] + Vx[rr+1, c+1] += M[3] + + ph = phases[ind, 1] + func(d, ph, M) + Vy[rr, c] += M[0] + Vy[rr, c+1] += M[1] + Vy[rr+1, c] += M[2] + Vy[rr+1, c+1] += M[3] + + ph = phases[ind, 2] + func(d, ph, M) + Vz[rr, c] += M[0] + Vz[rr, c+1] += M[1] + Vz[rr+1, c] += M[2] + Vz[rr+1, c+1] += M[3] + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s = col[ind] / nr + + d = &D[ind, 0] + + ph = phases[s, 0] + func(d, ph, M) + Vx[rr, c] += M[0] + Vx[rr, c+1] += M[1] + Vx[rr+1, c] += M[2] + Vx[rr+1, c+1] += M[3] + + ph = phases[s, 1] + func(d, ph, M) + Vy[rr, c] += M[0] + Vy[rr, c+1] += M[1] + Vy[rr+1, c] += M[2] + Vy[rr+1, c+1] += M[3] + + ph = phases[s, 2] + func(d, ph, M) + Vz[rr, c] += M[0] + Vz[rr, c+1] += M[1] + Vz[rr+1, c] += M[2] + Vz[rr+1, c+1] += M[3] return Vx, Vy, Vz -def _phase3_array_f64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_real[:, ::1] D, const int idx, - np.ndarray[np.float64_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double[:, ::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.float64_t, ndim=2, mode='c'] Vx = np.zeros([nr, nr], dtype=np.float64) - cdef np.ndarray[np.float64_t, ndim=2, mode='c'] Vy = np.zeros([nr, nr], dtype=np.float64) - cdef np.ndarray[np.float64_t, ndim=2, mode='c'] Vz = np.zeros([nr, nr], dtype=np.float64) - cdef double[:, ::1] vx = Vx - cdef double[:, ::1] vy = Vy - cdef double[:, ::1] vz = Vz - cdef double d - cdef Py_ssize_t r, ind, s, c - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - d = D[ind, idx] - vx[r, c] += d * phases[ind, 0] - vy[r, c] += d * phases[ind, 1] - vz[r, c] += d * phases[ind, 2] - else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s = col[ind] / nr - d = D[ind, idx] - vx[r, c] += d * phases[s, 0] - vy[r, c] += d * phases[s, 1] - vz[r, c] += d * phases[s, 2] - - return Vx, Vy, Vz +### +# Spin-orbit coupling matrices +### +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase3_csr_so(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + complexs_st[:, ::1] phases, + const int p_opt): -def _phase3_array_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex64_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory viezws - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[:, ::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] Vx = np.zeros([nr, nr], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] Vy = np.zeros([nr, nr], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] Vz = np.zeros([nr, nr], dtype=np.complex64) - cdef float complex[:, ::1] vx = Vx - cdef float complex[:, ::1] vy = Vy - cdef float complex[:, ::1] vz = Vz - cdef float complex d - - cdef Py_ssize_t r, ind, s, c - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - d = D[ind, idx] - vx[r, c] += d * phases[ind, 0] - vy[r, c] += d * phases[ind, 1] - vz[r, c] += d * phases[ind, 2] - + # Now create the folded sparse elements + V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(ptr, ncol, col) + cdef ints_st[::1] v_ptr = V_PTR + cdef ints_st[::1] v_ncol = V_NCOL + cdef ints_st[::1] v_col = V_COL + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, mode='c'] Vx = np.zeros([v_col.shape[0]], dtype=dtype) + cdef cnp.ndarray[complexs_st, mode='c'] Vy = np.zeros([v_col.shape[0]], dtype=dtype) + cdef cnp.ndarray[complexs_st, mode='c'] Vz = np.zeros([v_col.shape[0]], dtype=dtype) + cdef complexs_st ph + + # Local columns (not in NC form) + cdef ints_st nr = ncol.shape[0] + cdef ints_st r, rr, ind, s, c + cdef ints_st s_idx + cdef _f_matrix_box_so func + cdef numerics_st *d + cdef complexs_st *M = [0, 0, 0, 0] + + if numerics_st in complexs_st: + func = _matrix_box_so_cmplx else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s = col[ind] / nr - d = D[ind, idx] - vx[r, c] += d * phases[s, 0] - vy[r, c] += d * phases[s, 1] - vz[r, c] += d * phases[s, 2] - - return Vx, Vy, Vz - + func = _matrix_box_so_real + + with nogil: + if p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) + + d = &D[ind, 0] + + ph = phases[ind, 0] + func(d, ph, M) + Vx[v_ptr[rr] + s_idx] += M[0] + Vx[v_ptr[rr] + s_idx+1] += M[1] + Vx[v_ptr[rr+1] + s_idx] += M[2] + Vx[v_ptr[rr+1] + s_idx+1] += M[3] + + ph = phases[ind, 1] + func(d, ph, M) + Vy[v_ptr[rr] + s_idx] += M[0] + Vy[v_ptr[rr] + s_idx+1] += M[1] + Vy[v_ptr[rr+1] + s_idx] += M[2] + Vy[v_ptr[rr+1] + s_idx+1] += M[3] + + ph = phases[ind, 2] + func(d, ph, M) + Vz[v_ptr[rr] + s_idx] += M[0] + Vz[v_ptr[rr] + s_idx+1] += M[1] + Vz[v_ptr[rr+1] + s_idx] += M[2] + Vz[v_ptr[rr+1] + s_idx+1] += M[3] + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s = col[ind] / nr + + s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) + + d = &D[ind, 0] + + ph = phases[s, 0] + func(d, ph, M) + Vx[v_ptr[rr] + s_idx] += M[0] + Vx[v_ptr[rr] + s_idx+1] += M[1] + Vx[v_ptr[rr+1] + s_idx] += M[2] + Vx[v_ptr[rr+1] + s_idx+1] += M[3] + + ph = phases[s, 1] + func(d, ph, M) + Vy[v_ptr[rr] + s_idx] += M[0] + Vy[v_ptr[rr] + s_idx+1] += M[1] + Vy[v_ptr[rr+1] + s_idx] += M[2] + Vy[v_ptr[rr+1] + s_idx+1] += M[3] + + ph = phases[s, 2] + func(d, ph, M) + Vz[v_ptr[rr] + s_idx] += M[0] + Vz[v_ptr[rr] + s_idx+1] += M[1] + Vz[v_ptr[rr+1] + s_idx] += M[2] + Vz[v_ptr[rr+1] + s_idx+1] += M[3] + + nr = nr * 2 + return csr_matrix((Vx, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vy, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vz, V_COL, V_PTR), shape=(nr, nr)) -def _phase3_array_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex128_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[:, ::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] Vx = np.zeros([nr, nr], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] Vy = np.zeros([nr, nr], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] Vz = np.zeros([nr, nr], dtype=np.complex128) - cdef double complex[:, ::1] vx = Vx - cdef double complex[:, ::1] vy = Vy - cdef double complex[:, ::1] vz = Vz - cdef double complex d - cdef Py_ssize_t r, ind, s, c - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - d = D[ind, idx] - vx[r, c] += d * phases[ind, 0] - vy[r, c] += d * phases[ind, 1] - vz[r, c] += d * phases[ind, 2] +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase3_array_so(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + numerics_st[:, ::1] D, + complexs_st[:, ::1] phases, + const int p_opt): + + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] Vx = np.zeros([nr * 2, nr * 2], dtype=dtype) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] Vy = np.zeros([nr * 2, nr * 2], dtype=dtype) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] Vz = np.zeros([nr * 2, nr * 2], dtype=dtype) + cdef complexs_st[:, ::1] vx = Vx + cdef complexs_st[:, ::1] vy = Vy + cdef complexs_st[:, ::1] vz = Vz + + cdef complexs_st ph + cdef ints_st r, rr, ind, s, c + cdef ints_st s_idx + cdef _f_matrix_box_so func + cdef numerics_st *d + cdef complexs_st *M = [0, 0, 0, 0] + + if numerics_st in complexs_st: + func = _matrix_box_so_cmplx else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] % nr - s = col[ind] / nr - d = D[ind, idx] - vx[r, c] += d * phases[s, 0] - vy[r, c] += d * phases[s, 1] - vz[r, c] += d * phases[s, 2] + func = _matrix_box_so_real + + with nogil: + if p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + + d = &D[ind, 0] + + ph = phases[ind, 0] + func(d, ph, M) + vx[rr, c] += M[0] + vx[rr, c+1] += M[1] + vx[rr+1, c] += M[2] + vx[rr+1, c+1] += M[3] + + ph = phases[ind, 1] + func(d, ph, M) + vy[rr, c] += M[0] + vy[rr, c+1] += M[1] + vy[rr+1, c] += M[2] + vy[rr+1, c+1] += M[3] + + ph = phases[ind, 2] + func(d, ph, M) + vz[rr, c] += M[0] + vz[rr, c+1] += M[1] + vz[rr+1, c] += M[2] + vz[rr+1, c+1] += M[3] + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = (col[ind] % nr) * 2 + s = col[ind] / nr + + d = &D[ind, 0] + + ph = phases[s, 0] + func(d, ph, M) + vx[rr, c] += M[0] + vx[rr, c+1] += M[1] + vx[rr+1, c] += M[2] + vx[rr+1, c+1] += M[3] + + ph = phases[s, 1] + func(d, ph, M) + vy[rr, c] += M[0] + vy[rr, c+1] += M[1] + vy[rr+1, c] += M[2] + vy[rr+1, c+1] += M[3] + + ph = phases[s, 2] + func(d, ph, M) + vz[rr, c] += M[0] + vz[rr, c+1] += M[1] + vz[rr+1, c] += M[2] + vz[rr+1, c+1] += M[3] return Vx, Vy, Vz diff --git a/src/sisl/physics/_matrix_phase3_nc.pyx b/src/sisl/physics/_matrix_phase3_nc.pyx deleted file mode 100644 index 9aee5b8cf0..0000000000 --- a/src/sisl/physics/_matrix_phase3_nc.pyx +++ /dev/null @@ -1,366 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True -cimport cython - -import numpy as np -cimport numpy as np -from scipy.sparse import csr_matrix - -from sisl._indices cimport _index_sorted -from sisl._core._sparse import fold_csr_matrix_nc - -__all__ = ["_phase3_nc_csr_c64", "_phase3_nc_csr_c128", - "_phase3_nc_array_c64", "_phase3_nc_array_c128"] - -# The fused data-types forces the data input to be of "correct" values. -ctypedef fused numeric_complex: - float - double - float complex - double complex - - -def _phase3_nc_csr_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex64_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[:, ::1] phases = PHASES - # Local columns (not in NC form) - cdef Py_ssize_t nr = ncol.shape[0] - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] Vx = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] Vy = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] Vz = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef float complex[::1] vx = Vx - cdef float complex[::1] vy = Vy - cdef float complex[::1] vz = Vz - cdef float complex ph, v12 - cdef Py_ssize_t r, rr, ind, s, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - ph = phases[ind, 0] - vx[v_ptr[rr] + s_idx] = vx[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vx[v_ptr[rr] + s_idx+1] = vx[v_ptr[rr] + s_idx+1] + ph * v12 - vx[v_ptr[rr+1] + s_idx] = vx[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vx[v_ptr[rr+1] + s_idx+1] = vx[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - ph = phases[ind, 1] - vy[v_ptr[rr] + s_idx] = vy[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vy[v_ptr[rr] + s_idx+1] = vy[v_ptr[rr] + s_idx+1] + ph * v12 - vy[v_ptr[rr+1] + s_idx] = vy[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vy[v_ptr[rr+1] + s_idx+1] = vy[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - ph = phases[ind, 2] - vz[v_ptr[rr] + s_idx] = vz[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vz[v_ptr[rr] + s_idx+1] = vz[v_ptr[rr] + s_idx+1] + ph * v12 - vz[v_ptr[rr+1] + s_idx] = vz[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vz[v_ptr[rr+1] + s_idx+1] = vz[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s = col[ind] / nr - - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - ph = phases[s, 0] - vx[v_ptr[rr] + s_idx] = vx[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vx[v_ptr[rr] + s_idx+1] = vx[v_ptr[rr] + s_idx+1] + ph * v12 - vx[v_ptr[rr+1] + s_idx] = vx[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vx[v_ptr[rr+1] + s_idx+1] = vx[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - ph = phases[s, 1] - vy[v_ptr[rr] + s_idx] = vy[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vy[v_ptr[rr] + s_idx+1] = vy[v_ptr[rr] + s_idx+1] + ph * v12 - vy[v_ptr[rr+1] + s_idx] = vy[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vy[v_ptr[rr+1] + s_idx+1] = vy[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - ph = phases[s, 2] - vz[v_ptr[rr] + s_idx] = vz[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vz[v_ptr[rr] + s_idx+1] = vz[v_ptr[rr] + s_idx+1] + ph * v12 - vz[v_ptr[rr+1] + s_idx] = vz[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vz[v_ptr[rr+1] + s_idx+1] = vz[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - nr = nr * 2 - return csr_matrix((Vx, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vy, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vz, V_COL, V_PTR), shape=(nr, nr)) - - -def _phase3_nc_csr_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex128_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[:, ::1] phases = PHASES - # Local columns (not in NC form) - cdef Py_ssize_t nr = ncol.shape[0] - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] Vx = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] Vy = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] Vz = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef double complex[::1] vx = Vx - cdef double complex[::1] vy = Vy - cdef double complex[::1] vz = Vz - cdef double complex ph, v12 - cdef Py_ssize_t r, rr, ind, s, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - ph = phases[ind, 0] - vx[v_ptr[rr] + s_idx] = vx[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vx[v_ptr[rr] + s_idx+1] = vx[v_ptr[rr] + s_idx+1] + ph * v12 - vx[v_ptr[rr+1] + s_idx] = vx[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vx[v_ptr[rr+1] + s_idx+1] = vx[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - ph = phases[ind, 1] - vy[v_ptr[rr] + s_idx] = vy[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vy[v_ptr[rr] + s_idx+1] = vy[v_ptr[rr] + s_idx+1] + ph * v12 - vy[v_ptr[rr+1] + s_idx] = vy[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vy[v_ptr[rr+1] + s_idx+1] = vy[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - ph = phases[ind, 2] - vz[v_ptr[rr] + s_idx] = vz[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vz[v_ptr[rr] + s_idx+1] = vz[v_ptr[rr] + s_idx+1] + ph * v12 - vz[v_ptr[rr+1] + s_idx] = vz[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vz[v_ptr[rr+1] + s_idx+1] = vz[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s = col[ind] / nr - - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - ph = phases[s, 0] - vx[v_ptr[rr] + s_idx] = vx[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vx[v_ptr[rr] + s_idx+1] = vx[v_ptr[rr] + s_idx+1] + ph * v12 - vx[v_ptr[rr+1] + s_idx] = vx[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vx[v_ptr[rr+1] + s_idx+1] = vx[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - ph = phases[s, 1] - vy[v_ptr[rr] + s_idx] = vy[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vy[v_ptr[rr] + s_idx+1] = vy[v_ptr[rr] + s_idx+1] + ph * v12 - vy[v_ptr[rr+1] + s_idx] = vy[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vy[v_ptr[rr+1] + s_idx+1] = vy[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - ph = phases[s, 2] - vz[v_ptr[rr] + s_idx] = vz[v_ptr[rr] + s_idx] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vz[v_ptr[rr] + s_idx+1] = vz[v_ptr[rr] + s_idx+1] + ph * v12 - vz[v_ptr[rr+1] + s_idx] = vz[v_ptr[rr+1] + s_idx] + ph * v12.conjugate() - vz[v_ptr[rr+1] + s_idx+1] = vz[v_ptr[rr+1] + s_idx+1] + (ph * D[ind, 1]) - - nr = nr * 2 - return csr_matrix((Vx, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vy, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vz, V_COL, V_PTR), shape=(nr, nr)) - - -def _phase3_nc_array_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex64_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[:, ::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] Vx = np.zeros([nr * 2, nr * 2], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] Vy = np.zeros([nr * 2, nr * 2], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] Vz = np.zeros([nr * 2, nr * 2], dtype=np.complex64) - cdef float complex[:, ::1] vx = Vx - cdef float complex[:, ::1] vy = Vy - cdef float complex[:, ::1] vz = Vz - cdef float complex ph, v12 - cdef Py_ssize_t r, rr, ind, c, s - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - - ph = phases[ind, 0] - vx[rr, c] = vx[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vx[rr, c+1] = vx[rr, c+1] + ph * v12 - vx[rr+1, c] = vx[rr+1, c] + ph * v12.conjugate() - vx[rr+1, c+1] = vx[rr+1, c+1] + (ph * D[ind, 1]) - - ph = phases[ind, 1] - vy[rr, c] = vy[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vy[rr, c+1] = vy[rr, c+1] + ph * v12 - vy[rr+1, c] = vy[rr+1, c] + ph * v12.conjugate() - vy[rr+1, c+1] = vy[rr+1, c+1] + (ph * D[ind, 1]) - - ph = phases[ind, 2] - vz[rr, c] = vz[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vz[rr, c+1] = vz[rr, c+1] + ph * v12 - vz[rr+1, c] = vz[rr+1, c] + ph * v12.conjugate() - vz[rr+1, c+1] = vz[rr+1, c+1] + (ph * D[ind, 1]) - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s = col[ind] / nr - - ph = phases[s, 0] - vx[rr, c] = vx[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vx[rr, c+1] = vx[rr, c+1] + ph * v12 - vx[rr+1, c] = vx[rr+1, c] + ph * v12.conjugate() - vx[rr+1, c+1] = vx[rr+1, c+1] + (ph * D[ind, 1]) - - ph = phases[s, 1] - vy[rr, c] = vy[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vy[rr, c+1] = vy[rr, c+1] + ph * v12 - vy[rr+1, c] = vy[rr+1, c] + ph * v12.conjugate() - vy[rr+1, c+1] = vy[rr+1, c+1] + (ph * D[ind, 1]) - - ph = phases[s, 2] - vz[rr, c] = vz[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vz[rr, c+1] = vz[rr, c+1] + ph * v12 - vz[rr+1, c] = vz[rr+1, c] + ph * v12.conjugate() - vz[rr+1, c+1] = vz[rr+1, c+1] + (ph * D[ind, 1]) - - return Vx, Vy, Vz - - -def _phase3_nc_array_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex128_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[:, ::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] Vx = np.zeros([nr * 2, nr * 2], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] Vy = np.zeros([nr * 2, nr * 2], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] Vz = np.zeros([nr * 2, nr * 2], dtype=np.complex128) - cdef double complex[:, ::1] vx = Vx - cdef double complex[:, ::1] vy = Vy - cdef double complex[:, ::1] vz = Vz - cdef double complex ph, v12 - cdef Py_ssize_t r, rr, ind, c, s - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - - ph = phases[ind, 0] - vx[rr, c] = vx[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vx[rr, c+1] = vx[rr, c+1] + ph * v12 - vx[rr+1, c] = vx[rr+1, c] + ph * v12.conjugate() - vx[rr+1, c+1] = vx[rr+1, c+1] + (ph * D[ind, 1]) - - ph = phases[ind, 1] - vy[rr, c] = vy[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vy[rr, c+1] = vy[rr, c+1] + ph * v12 - vy[rr+1, c] = vy[rr+1, c] + ph * v12.conjugate() - vy[rr+1, c+1] = vy[rr+1, c+1] + (ph * D[ind, 1]) - - ph = phases[ind, 2] - vz[rr, c] = vz[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vz[rr, c+1] = vz[rr, c+1] + ph * v12 - vz[rr+1, c] = vz[rr+1, c] + ph * v12.conjugate() - vz[rr+1, c+1] = vz[rr+1, c+1] + (ph * D[ind, 1]) - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s = col[ind] / nr - - ph = phases[s, 0] - vx[rr, c] = vx[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vx[rr, c+1] = vx[rr, c+1] + ph * v12 - vx[rr+1, c] = vx[rr+1, c] + ph * v12.conjugate() - vx[rr+1, c+1] = vx[rr+1, c+1] + (ph * D[ind, 1]) - - ph = phases[s, 1] - vy[rr, c] = vy[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vy[rr, c+1] = vy[rr, c+1] + ph * v12 - vy[rr+1, c] = vy[rr+1, c] + ph * v12.conjugate() - vy[rr+1, c+1] = vy[rr+1, c+1] + (ph * D[ind, 1]) - - ph = phases[s, 2] - vz[rr, c] = vz[rr, c] + (ph * D[ind, 0]) - v12 = (D[ind, 2] + 1j * D[ind, 3]) - vz[rr, c+1] = vz[rr, c+1] + ph * v12 - vz[rr+1, c] = vz[rr+1, c] + ph * v12.conjugate() - vz[rr+1, c+1] = vz[rr+1, c+1] + (ph * D[ind, 1]) - - return Vx, Vy, Vz diff --git a/src/sisl/physics/_matrix_phase3_so.pyx b/src/sisl/physics/_matrix_phase3_so.pyx deleted file mode 100644 index 2fba15af46..0000000000 --- a/src/sisl/physics/_matrix_phase3_so.pyx +++ /dev/null @@ -1,438 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True -cimport cython - -import numpy as np -cimport numpy as np -from scipy.sparse import csr_matrix - -from sisl._indices cimport _index_sorted -from sisl._core._sparse import fold_csr_matrix_nc - -__all__ = ["_phase3_so_csr_c64", "_phase3_so_csr_c128", - "_phase3_so_array_c64", "_phase3_so_array_c128"] - -# The fused data-types forces the data input to be of "correct" values. -ctypedef fused numeric_complex: - float - double - float complex - double complex - - -def _phase3_so_csr_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex64_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[:, ::1] phases = PHASES - # Local columns (not in NC form) - cdef Py_ssize_t nr = ncol.shape[0] - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] Vx = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] Vy = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] Vz = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef float complex[::1] vx = Vx - cdef float complex[::1] vy = Vy - cdef float complex[::1] vz = Vz - cdef float complex ph, vv - cdef Py_ssize_t r, rr, ind, s, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - ph = phases[ind, 0] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vx[v_ptr[rr] + s_idx] = vx[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vx[v_ptr[rr] + s_idx+1] = vx[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vx[v_ptr[rr+1] + s_idx] = vx[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vx[v_ptr[rr+1] + s_idx+1] = vx[v_ptr[rr+1] + s_idx+1] + ph * vv - - ph = phases[ind, 1] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vy[v_ptr[rr] + s_idx] = vy[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vy[v_ptr[rr] + s_idx+1] = vy[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vy[v_ptr[rr+1] + s_idx] = vy[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vy[v_ptr[rr+1] + s_idx+1] = vy[v_ptr[rr+1] + s_idx+1] + ph * vv - - ph = phases[ind, 2] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vz[v_ptr[rr] + s_idx] = vz[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vz[v_ptr[rr] + s_idx+1] = vz[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vz[v_ptr[rr+1] + s_idx] = vz[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vz[v_ptr[rr+1] + s_idx+1] = vz[v_ptr[rr+1] + s_idx+1] + ph * vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s = col[ind] / nr - - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - ph = phases[s, 0] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vx[v_ptr[rr] + s_idx] = vx[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vx[v_ptr[rr] + s_idx+1] = vx[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vx[v_ptr[rr+1] + s_idx] = vx[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vx[v_ptr[rr+1] + s_idx+1] = vx[v_ptr[rr+1] + s_idx+1] + ph * vv - - ph = phases[s, 1] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vy[v_ptr[rr] + s_idx] = vy[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vy[v_ptr[rr] + s_idx+1] = vy[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vy[v_ptr[rr+1] + s_idx] = vy[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vy[v_ptr[rr+1] + s_idx+1] = vy[v_ptr[rr+1] + s_idx+1] + ph * vv - - ph = phases[s, 2] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vz[v_ptr[rr] + s_idx] = vz[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vz[v_ptr[rr] + s_idx+1] = vz[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vz[v_ptr[rr+1] + s_idx] = vz[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vz[v_ptr[rr+1] + s_idx+1] = vz[v_ptr[rr+1] + s_idx+1] + ph * vv - - nr = nr * 2 - return csr_matrix((Vx, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vy, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vz, V_COL, V_PTR), shape=(nr, nr)) - - -def _phase3_so_csr_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex128_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[:, ::1] phases = PHASES - # Local columns (not in NC form) - cdef Py_ssize_t nr = ncol.shape[0] - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] Vx = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] Vy = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] Vz = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef double complex[::1] vx = Vx - cdef double complex[::1] vy = Vy - cdef double complex[::1] vz = Vz - cdef double complex ph, vv - cdef Py_ssize_t r, rr, ind, s, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - ph = phases[ind, 0] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vx[v_ptr[rr] + s_idx] = vx[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vx[v_ptr[rr] + s_idx+1] = vx[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vx[v_ptr[rr+1] + s_idx] = vx[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vx[v_ptr[rr+1] + s_idx+1] = vx[v_ptr[rr+1] + s_idx+1] + ph * vv - - ph = phases[ind, 1] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vy[v_ptr[rr] + s_idx] = vy[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vy[v_ptr[rr] + s_idx+1] = vy[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vy[v_ptr[rr+1] + s_idx] = vy[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vy[v_ptr[rr+1] + s_idx+1] = vy[v_ptr[rr+1] + s_idx+1] + ph * vv - - ph = phases[ind, 2] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vz[v_ptr[rr] + s_idx] = vz[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vz[v_ptr[rr] + s_idx+1] = vz[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vz[v_ptr[rr+1] + s_idx] = vz[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vz[v_ptr[rr+1] + s_idx+1] = vz[v_ptr[rr+1] + s_idx+1] + ph * vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s = col[ind] / nr - - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - ph = phases[s, 0] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vx[v_ptr[rr] + s_idx] = vx[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vx[v_ptr[rr] + s_idx+1] = vx[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vx[v_ptr[rr+1] + s_idx] = vx[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vx[v_ptr[rr+1] + s_idx+1] = vx[v_ptr[rr+1] + s_idx+1] + ph * vv - - ph = phases[s, 1] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vy[v_ptr[rr] + s_idx] = vy[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vy[v_ptr[rr] + s_idx+1] = vy[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vy[v_ptr[rr+1] + s_idx] = vy[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vy[v_ptr[rr+1] + s_idx+1] = vy[v_ptr[rr+1] + s_idx+1] + ph * vv - - ph = phases[s, 2] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vz[v_ptr[rr] + s_idx] = vz[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vz[v_ptr[rr] + s_idx+1] = vz[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vz[v_ptr[rr+1] + s_idx] = vz[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vz[v_ptr[rr+1] + s_idx+1] = vz[v_ptr[rr+1] + s_idx+1] + ph * vv - - nr = nr * 2 - return csr_matrix((Vx, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vy, V_COL, V_PTR), shape=(nr, nr)), csr_matrix((Vz, V_COL, V_PTR), shape=(nr, nr)) - - -def _phase3_so_array_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex64_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[:, ::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] Vx = np.zeros([nr * 2, nr * 2], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] Vy = np.zeros([nr * 2, nr * 2], dtype=np.complex64) - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] Vz = np.zeros([nr * 2, nr * 2], dtype=np.complex64) - cdef float complex[:, ::1] vx = Vx - cdef float complex[:, ::1] vy = Vy - cdef float complex[:, ::1] vz = Vz - cdef float complex ph, v12 - cdef Py_ssize_t r, rr, ind, s, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - - ph = phases[ind, 0] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vx[rr, c] = vx[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vx[rr, c+1] = vx[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vx[rr+1, c] = vx[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vx[rr+1, c+1] = vx[rr+1, c+1] + ph * vv - - ph = phases[ind, 1] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vy[rr, c] = vy[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vy[rr, c+1] = vy[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vy[rr+1, c] = vy[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vy[rr+1, c+1] = vy[rr+1, c+1] + ph * vv - - ph = phases[ind, 2] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vz[rr, c] = vz[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vz[rr, c+1] = vz[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vz[rr+1, c] = vz[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vz[rr+1, c+1] = vz[rr+1, c+1] + ph * vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s = col[ind] / nr - - ph = phases[s, 0] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vx[rr, c] = vx[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vx[rr, c+1] = vx[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vx[rr+1, c] = vx[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vx[rr+1, c+1] = vx[rr+1, c+1] + ph * vv - - ph = phases[s, 1] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vy[rr, c] = vy[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vy[rr, c+1] = vy[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vy[rr+1, c] = vy[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vy[rr+1, c+1] = vy[rr+1, c+1] + ph * vv - - ph = phases[s, 2] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vz[rr, c] = vz[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vz[rr, c+1] = vz[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vz[rr+1, c] = vz[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vz[rr+1, c+1] = vz[rr+1, c+1] + ph * vv - - return Vx, Vy, Vz - - -def _phase3_so_array_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex128_t, ndim=2, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[:, ::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] Vx = np.zeros([nr * 2, nr * 2], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] Vy = np.zeros([nr * 2, nr * 2], dtype=np.complex128) - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] Vz = np.zeros([nr * 2, nr * 2], dtype=np.complex128) - cdef double complex[:, ::1] vx = Vx - cdef double complex[:, ::1] vy = Vy - cdef double complex[:, ::1] vz = Vz - cdef double complex ph, vv - cdef Py_ssize_t r, rr, ind, s, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - - ph = phases[ind, 0] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vx[rr, c] = vx[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vx[rr, c+1] = vx[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vx[rr+1, c] = vx[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vx[rr+1, c+1] = vx[rr+1, c+1] + ph * vv - - ph = phases[ind, 1] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vy[rr, c] = vy[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vy[rr, c+1] = vy[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vy[rr+1, c] = vy[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vy[rr+1, c+1] = vy[rr+1, c+1] + ph * vv - - ph = phases[ind, 2] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vz[rr, c] = vz[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vz[rr, c+1] = vz[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vz[rr+1, c] = vz[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vz[rr+1, c+1] = vz[rr+1, c+1] + ph * vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s = col[ind] / nr - - ph = phases[s, 0] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vx[rr, c] = vx[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vx[rr, c+1] = vx[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vx[rr+1, c] = vx[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vx[rr+1, c+1] = vx[rr+1, c+1] + ph * vv - - ph = phases[s, 1] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vy[rr, c] = vy[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vy[rr, c+1] = vy[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vy[rr+1, c] = vy[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vy[rr+1, c+1] = vy[rr+1, c+1] + ph * vv - - ph = phases[s, 2] - vv = (D[ind, 0] + 1j * D[ind, 4]) - vz[rr, c] = vz[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - vz[rr, c+1] = vz[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - vz[rr+1, c] = vz[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - vz[rr+1, c+1] = vz[rr+1, c+1] + ph * vv - - return Vx, Vy, Vz diff --git a/src/sisl/physics/_matrix_phase_nc.pyx b/src/sisl/physics/_matrix_phase_nc.pyx deleted file mode 100644 index 5b10ae6cf2..0000000000 --- a/src/sisl/physics/_matrix_phase_nc.pyx +++ /dev/null @@ -1,224 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True -cimport cython - -import numpy as np -cimport numpy as np -from scipy.sparse import csr_matrix - -from sisl._indices cimport _index_sorted -from sisl._core._sparse import fold_csr_matrix_nc - -__all__ = ['_phase_nc_csr_c64', '_phase_nc_csr_c128', - '_phase_nc_array_c64', '_phase_nc_array_c128'] - -# The fused data-types forces the data input to be of "correct" values. -ctypedef fused numeric_complex: - float - double - float complex - double complex - - -def _phase_nc_csr_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef float complex[::1] v = V - cdef float complex ph, v12 - cdef Py_ssize_t r, rr, ind, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[ind] - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - v[v_ptr[rr] + s_idx] += D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + s_idx+1] += v12 * ph - v[v_ptr[rr+1] + s_idx] += v12.conjugate() * ph - v[v_ptr[rr+1] + s_idx+1] += D[ind, 1] * ph - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[col[ind] / nr] - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - v[v_ptr[rr] + s_idx] += D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + s_idx+1] += v12 * ph - v[v_ptr[rr+1] + s_idx] += v12.conjugate() * ph - v[v_ptr[rr+1] + s_idx+1] += D[ind, 1] * ph - - return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nr * 2)) - - -def _phase_nc_csr_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef double complex[::1] v = V - cdef double complex ph, v12 - cdef Py_ssize_t r, rr, ind, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[ind] - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - v[v_ptr[rr] + s_idx] += D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + s_idx+1] += v12 * ph - v[v_ptr[rr+1] + s_idx] += v12.conjugate() * ph - v[v_ptr[rr+1] + s_idx+1] += D[ind, 1] * ph - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[col[ind] / nr] - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - v[v_ptr[rr] + s_idx] += D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + s_idx+1] += v12 * ph - v[v_ptr[rr+1] + s_idx] += v12.conjugate() * ph - v[v_ptr[rr+1] + s_idx+1] += D[ind, 1] * ph - - return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nr * 2)) - - -def _phase_nc_array_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nr * 2], dtype=np.complex64) - cdef float complex[:, ::1] v = V - cdef float complex ph, v12 - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[ind] - v[rr, c] += D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] += v12 * ph - v[rr+1, c] += v12.conjugate() * ph - v[rr+1, c+1] += D[ind, 1] * ph - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[col[ind] / nr] - v[rr, c] += D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] += v12 * ph - v[rr+1, c] += v12.conjugate() * ph - v[rr+1, c+1] += D[ind, 1] * ph - - return V - - -def _phase_nc_array_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nr * 2], dtype=np.complex128) - cdef double complex[:, ::1] v = V - cdef double complex ph, v12 - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[ind] - v[rr, c] += D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] += v12 * ph - v[rr+1, c] += v12.conjugate() * ph - v[rr+1, c+1] += D[ind, 1] * ph - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[col[ind] / nr] - v[rr, c] += D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] += v12 * ph - v[rr+1, c] += v12.conjugate() * ph - v[rr+1, c+1] += D[ind, 1] * ph - - return V diff --git a/src/sisl/physics/_matrix_phase_nc_diag.pyx b/src/sisl/physics/_matrix_phase_nc_diag.pyx deleted file mode 100644 index 8a590f79c6..0000000000 --- a/src/sisl/physics/_matrix_phase_nc_diag.pyx +++ /dev/null @@ -1,198 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True -cimport cython - -import numpy as np -cimport numpy as np -from scipy.sparse import csr_matrix - -from sisl._indices cimport _index_sorted -from sisl._core._sparse import fold_csr_diagonal_nc - -__all__ = ['_phase_nc_diag_csr_c64', '_phase_nc_diag_csr_c128', - '_phase_nc_diag_array_c64', '_phase_nc_diag_array_c128'] - -# The fused data-types forces the data input to be of "correct" values. -ctypedef fused numeric_complex: - float - double - float complex - double complex - - -def _phase_nc_diag_csr_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_diagonal_nc(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef float complex[::1] v = V - cdef float complex vv - cdef Py_ssize_t r, rr, ind, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - vv = (phases[ind] * D[ind, idx]) - v[v_ptr[rr] + s_idx] = v[v_ptr[rr] + s_idx] + vv - v[v_ptr[rr+1] + s_idx] = v[v_ptr[rr+1] + s_idx] + vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - vv = (phases[col[ind] / nr] * D[ind, idx]) - v[v_ptr[rr] + s_idx] = v[v_ptr[rr] + s_idx] + vv - v[v_ptr[rr+1] + s_idx] = v[v_ptr[rr+1] + s_idx] + vv - - nr = nr * 2 - return csr_matrix((V, V_COL, V_PTR), shape=(nr, nr)) - - -def _phase_nc_diag_csr_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_diagonal_nc(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef double complex[::1] v = V - cdef double complex vv - cdef Py_ssize_t r, rr, ind, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - vv = (phases[ind] * D[ind, idx]) - v[v_ptr[rr] + s_idx] = v[v_ptr[rr] + s_idx] + vv - v[v_ptr[rr+1] + s_idx] = v[v_ptr[rr+1] + s_idx] + vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - vv = (phases[col[ind] / nr] * D[ind, idx]) - v[v_ptr[rr] + s_idx] = v[v_ptr[rr] + s_idx] + vv - v[v_ptr[rr+1] + s_idx] = v[v_ptr[rr+1] + s_idx] + vv - - nr = nr * 2 - return csr_matrix((V, V_COL, V_PTR), shape=(nr, nr)) - - -def _phase_nc_diag_array_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nr * 2], dtype=np.complex64) - cdef float complex[:, ::1] v = V - cdef float complex vv - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - vv = (phases[ind] * D[ind, idx]) - v[rr, c] = v[rr, c] + vv - v[rr+1, c+1] = v[rr+1, c+1] + vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - vv = (phases[col[ind] / nr] * D[ind, idx]) - v[rr, c] = v[rr, c] + vv - v[rr+1, c+1] = v[rr+1, c+1] + vv - - return V - - -def _phase_nc_diag_array_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, const int idx, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nr * 2], dtype=np.complex128) - cdef double complex[:, ::1] v = V - cdef double complex vv - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - vv = (phases[ind] * D[ind, idx]) - v[rr, c] = v[rr, c] + vv - v[rr+1, c+1] = v[rr+1, c+1] + vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - vv = (phases[col[ind] / nr] * D[ind, idx]) - v[rr, c] = v[rr, c] + vv - v[rr+1, c+1] = v[rr+1, c+1] + vv - - return V diff --git a/src/sisl/physics/_matrix_phase_sc.pyx b/src/sisl/physics/_matrix_phase_sc.pyx new file mode 100644 index 0000000000..ad9ede2faf --- /dev/null +++ b/src/sisl/physics/_matrix_phase_sc.pyx @@ -0,0 +1,675 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +cimport cython + +import numpy as np + +cimport numpy as cnp + +from scipy.sparse import csr_matrix + +from sisl._core._dtypes cimport ( + complexs_st, + floatcomplexs_st, + floats_st, + inline_sum, + ints_st, + numerics_st, + ssize_st, + type2dtype, +) +from sisl._core._sparse cimport ncol2ptr_nc +from sisl._indices cimport _index_sorted + +from ._matrix_utils cimport ( + _f_matrix_box_nc, + _f_matrix_box_so, + _matrix_box_nc_cmplx, + _matrix_box_nc_real, + _matrix_box_so_cmplx, + _matrix_box_so_real, +) + +__all__ = [ + "_phase_sc_csr", + "_phase_sc_array", + "_phase_sc_csr_nc", + "_phase_sc_array_nc", + "_phase_sc_csr_nc_diag", + "_phase_sc_array_nc_diag", + "_phase_sc_csr_so", + "_phase_sc_array_so", +] + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_sc_csr(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + const ints_st nc, + numerics_st[:, ::1] D, + const int idx, + floatcomplexs_st[::1] phases, + const int p_opt): + + # Now copy the sparse matrix form + cdef ints_st nr = ncol.shape[0] + cdef object idtype = type2dtype[ints_st](1) + cdef cnp.ndarray[ints_st, mode='c'] V_PTR = np.empty([nr + 1], dtype=idtype) + cdef cnp.ndarray[ints_st, mode='c'] V_NCOL = np.empty([nr], dtype=idtype) + cdef cnp.ndarray[ints_st, mode='c'] V_COL = np.empty([inline_sum(ncol)], dtype=idtype) + + cdef ints_st[::1] v_ptr = V_PTR + cdef ints_st[::1] v_ncol = V_NCOL + cdef ints_st[::1] v_col = V_COL + + cdef object dtype = type2dtype[floatcomplexs_st](1) + cdef cnp.ndarray[floatcomplexs_st, mode='c'] V = np.zeros([v_col.shape[0]], dtype=dtype) + cdef floatcomplexs_st[::1] v = V + + cdef ints_st r, c, nz, ind, cind + cdef floatcomplexs_st ph + + # Copy ncol + v_ncol[:] = ncol[:] + + # This abstraction allows to handle non-finalized CSR matrices + cind = 0 + + with nogil: + if p_opt == -1: + for r in range(nr): + v_ptr[r] = cind + for ind in range(ptr[r], ptr[r] + ncol[r]): + v[cind] = D[ind, idx] + v_col[cind] = col[ind] + cind = cind + 1 + + elif p_opt == 0: + for r in range(nr): + v_ptr[r] = cind + for ind in range(ptr[r], ptr[r] + ncol[r]): + ph = phases[ind] + v[cind] = (D[ind, idx] * ph) + v_col[cind] = col[ind] + cind = cind + 1 + + else: + for r in range(nr): + v_ptr[r] = cind + for ind in range(ptr[r], ptr[r] + ncol[r]): + ph = phases[col[ind] / nr] + v[cind] = (D[ind, idx] * ph) + v_col[cind] = col[ind] + cind = cind + 1 + + v_ptr[nr] = cind + + return csr_matrix((V, V_COL, V_PTR), shape=(nr, nc)) + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_sc_array(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + const ints_st nc, + numerics_st[:, ::1] D, + const int idx, + floatcomplexs_st[::1] phases, + const int p_opt): + + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[floatcomplexs_st](1) + cdef cnp.ndarray[floatcomplexs_st, ndim=2, mode='c'] V = np.zeros([nr, nc], dtype=dtype) + cdef floatcomplexs_st[:, ::1] v = V + + cdef ints_st r, c, nz, ind + cdef floatcomplexs_st ph + + with nogil: + if p_opt == -1: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + v[r, col[ind]] = D[ind, idx] + + elif p_opt == 0: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + ph = phases[ind] + v[r, col[ind]] = (D[ind, idx] * ph) + + else: + for r in range(nr): + for ind in range(ptr[r], ptr[r] + ncol[r]): + ph = phases[col[ind] / nr] + v[r, col[ind]] = (D[ind, idx] * ph) + + return V + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_sc_csr_nc(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + const ints_st nc, + numerics_st[:, ::1] D, + complexs_st[::1] phases, + const int p_opt): + + # Now copy the sparse matrix form + cdef ints_st nr = ncol.shape[0] + cdef object idtype = type2dtype[ints_st](1) + cdef cnp.ndarray[ints_st, mode='c'] V_PTR = np.empty([nr*2 + 1], dtype=idtype) + cdef cnp.ndarray[ints_st, mode='c'] V_NCOL = np.empty([nr*2], dtype=idtype) + cdef cnp.ndarray[ints_st, mode='c'] V_COL = np.empty([inline_sum(ncol)*4], dtype=idtype) + + cdef ints_st[::1] v_ptr = V_PTR + cdef ints_st[::1] v_ncol = V_NCOL + cdef ints_st[::1] v_col = V_COL + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, mode='c'] V = np.zeros([v_col.shape[0]], dtype=dtype) + cdef complexs_st[::1] v = V + + cdef ints_st r, rr, cind, c, nz, ind + cdef complexs_st ph + cdef _f_matrix_box_nc func + cdef numerics_st *d + cdef complexs_st *M = [0, 0, 0, 0] + + if numerics_st in complexs_st: + func = _matrix_box_nc_cmplx + else: + func = _matrix_box_nc_real + + # We have to do it manually due to the double elements per matrix element + ncol2ptr_nc(nr, ncol, v_ptr, 2) + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + v_ncol[rr] = ncol[r] * 2 + v_ncol[rr+1] = ncol[r] * 2 + + cind = 0 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + + v[v_ptr[rr] + cind] = D[ind, 0] + v_col[v_ptr[rr] + cind] = c + ph = (D[ind, 2] + 1j * D[ind, 3]) + v[v_ptr[rr] + cind+1] = ph + v_col[v_ptr[rr] + cind+1] = c + 1 + v[v_ptr[rr+1] + cind] = ph.conjugate() + v_col[v_ptr[rr+1] + cind] = c + v[v_ptr[rr+1] + cind+1] = D[ind, 1] + v_col[v_ptr[rr+1] + cind+1] = c + 1 + + cind = cind + 2 + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + v_ncol[rr] = ncol[r] * 2 + v_ncol[rr+1] = ncol[r] * 2 + + cind = 0 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + ph = phases[ind] + + d = &D[ind, 0] + func(d, ph, M) + v[v_ptr[rr] + cind] = M[0] + v_col[v_ptr[rr] + cind] = c + v[v_ptr[rr] + cind+1] = M[1] + v_col[v_ptr[rr] + cind+1] = c + 1 + v[v_ptr[rr+1] + cind] = M[2] + v_col[v_ptr[rr+1] + cind] = c + v[v_ptr[rr+1] + cind+1] = M[3] + v_col[v_ptr[rr+1] + cind+1] = c + 1 + + cind = cind + 2 + + else: + for r in range(nr): + rr = r * 2 + v_ncol[rr] = ncol[r] * 2 + v_ncol[rr+1] = ncol[r] * 2 + + cind = 0 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + ph = phases[col[ind] / nr] + + d = &D[ind, 0] + func(d, ph, M) + + v[v_ptr[rr] + cind] = M[0] + v_col[v_ptr[rr] + cind] = c + v[v_ptr[rr] + cind+1] = M[1] + v_col[v_ptr[rr] + cind+1] = c + 1 + v[v_ptr[rr+1] + cind] = M[2] + v_col[v_ptr[rr+1] + cind] = c + v[v_ptr[rr+1] + cind+1] = M[3] + v_col[v_ptr[rr+1] + cind+1] = c + 1 + + cind = cind + 2 + + return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nc * 2)) + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_sc_array_nc(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + const ints_st nc, + numerics_st[:, ::1] D, + complexs_st[::1] phases, + const int p_opt): + + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] V = np.zeros([nr*2, nc*2], dtype=dtype) + cdef complexs_st[:, ::1] v = V + + cdef complexs_st ph + cdef ints_st r, rr, c, nz, ind + cdef numerics_st *d + cdef _f_matrix_box_nc func + cdef complexs_st *M = [0, 0, 0, 0] + + if numerics_st in complexs_st: + func = _matrix_box_nc_cmplx + else: + func = _matrix_box_nc_real + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + v[rr, c] = D[ind, 0] + ph = (D[ind, 2] + 1j * D[ind, 3]) + v[rr, c+1] = ph + v[rr+1, c] = ph.conjugate() + v[rr+1, c+1] = D[ind, 1] + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + ph = phases[ind] + + d = &D[ind, 0] + func(d, ph, M) + v[rr, c] = M[0] + v[rr, c+1] = M[1] + v[rr+1, c] = M[2] + v[rr+1, c+1] = M[3] + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + ph = phases[col[ind] / nr] + + d = &D[ind, 0] + func(d, ph, M) + v[rr, c] = M[0] + v[rr, c+1] = M[1] + v[rr+1, c] = M[2] + v[rr+1, c+1] = M[3] + + return V + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_sc_csr_nc_diag(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + const ints_st nc, + numerics_st[:, ::1] D, + const int idx, + complexs_st[::1] phases, + const int p_opt): + + # Now copy the sparse matrix form + cdef ints_st nr = ncol.shape[0] + cdef object idtype = type2dtype[ints_st](1) + cdef cnp.ndarray[ints_st, mode='c'] V_PTR = np.empty([nr*2 + 1], dtype=idtype) + cdef cnp.ndarray[ints_st, mode='c'] V_NCOL = np.empty([nr*2], dtype=idtype) + cdef cnp.ndarray[ints_st, mode='c'] V_COL = np.empty([inline_sum(ncol)*2], dtype=idtype) + + cdef ints_st[::1] v_ptr = V_PTR + cdef ints_st[::1] v_ncol = V_NCOL + cdef ints_st[::1] v_col = V_COL + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, mode='c'] V = np.zeros([v_col.shape[0]], dtype=dtype) + cdef complexs_st[::1] v = V + + cdef ints_st r, rr, cind, c, nz, ind + cdef complexs_st ph + + # We have to do it manually due to the double elements per matrix element + ncol2ptr_nc(nr, ncol, v_ptr, 1) + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + v_ncol[rr] = ncol[r] + v_ncol[rr+1] = ncol[r] + + cind = 0 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + + v[v_ptr[rr] + cind] = D[ind, idx] + v_col[v_ptr[rr] + cind] = c + v[v_ptr[rr+1] + cind] = D[ind, idx] + v_col[v_ptr[rr+1] + cind] = c + 1 + + cind = cind + 1 + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + v_ncol[rr] = ncol[r] * 2 + v_ncol[rr+1] = ncol[r] * 2 + + cind = 0 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + ph = phases[ind] + + v[v_ptr[rr] + cind] = (D[ind, idx] * ph) + v_col[v_ptr[rr] + cind] = c + v[v_ptr[rr+1] + cind] = (D[ind, idx] * ph) + v_col[v_ptr[rr+1] + cind] = c + 1 + + cind = cind + 1 + + else: + for r in range(nr): + rr = r * 2 + v_ncol[rr] = ncol[r] * 2 + v_ncol[rr+1] = ncol[r] * 2 + + cind = 0 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + ph = phases[col[ind] / nr] + + + v[v_ptr[rr] + cind] = (D[ind, idx] * ph) + v_col[v_ptr[rr] + cind] = c + v[v_ptr[rr+1] + cind] = (D[ind, idx] * ph) + v_col[v_ptr[rr+1] + cind] = c + 1 + + cind = cind + 1 + + return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nc * 2)) + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_sc_array_nc_diag(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + const ints_st nc, + numerics_st[:, ::1] D, + const int idx, + complexs_st[::1] phases, + const int p_opt): + + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] V = np.zeros([nr*2, nc*2], dtype=dtype) + cdef complexs_st[:, ::1] v = V + + cdef complexs_st d + cdef ints_st r, rr, c, nz, ind + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + d = D[ind, idx] + v[rr, c] = d + v[rr+1, c+1] = d + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + d = (D[ind, idx] * phases[ind]) + + v[rr, c] = d + v[rr+1, c+1] = d + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + d = (D[ind, idx] * phases[col[ind] / nr]) + + v[rr, c] = d + v[rr+1, c+1] = d + + return V + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_sc_csr_so(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + const ints_st nc, + numerics_st[:, ::1] D, + complexs_st[::1] phases, + const int p_opt): + + # Now copy the sparse matrix form + cdef ints_st nr = ncol.shape[0] + cdef object idtype = type2dtype[ints_st](1) + cdef cnp.ndarray[ints_st, mode='c'] V_PTR = np.empty([nr*2 + 1], dtype=idtype) + cdef cnp.ndarray[ints_st, mode='c'] V_NCOL = np.empty([nr*2], dtype=idtype) + cdef cnp.ndarray[ints_st, mode='c'] V_COL = np.empty([inline_sum(ncol)*4], dtype=idtype) + + cdef ints_st[::1] v_ptr = V_PTR + cdef ints_st[::1] v_ncol = V_NCOL + cdef ints_st[::1] v_col = V_COL + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, mode='c'] V = np.zeros([v_col.shape[0]], dtype=dtype) + cdef complexs_st[::1] v = V + + cdef ints_st r, rr, cind, c, nz, ind + cdef complexs_st ph + cdef _f_matrix_box_so func + cdef numerics_st *d + cdef complexs_st *M = [0, 0, 0, 0] + + if numerics_st in complexs_st: + func = _matrix_box_so_cmplx + else: + func = _matrix_box_so_real + + # We have to do it manually due to the double elements per matrix element + ncol2ptr_nc(nr, ncol, v_ptr, 2) + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + v_ncol[rr] = ncol[r] * 2 + v_ncol[rr+1] = ncol[r] * 2 + + cind = 0 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + + v[v_ptr[rr] + cind] = (D[ind, 0] + 1j * D[ind, 4]) + v_col[v_ptr[rr] + cind] = c + v[v_ptr[rr] + cind+1] = (D[ind, 2] + 1j * D[ind, 3]) + v_col[v_ptr[rr] + cind+1] = c + 1 + v[v_ptr[rr+1] + cind] = (D[ind, 6] + 1j * D[ind, 7]) + v_col[v_ptr[rr+1] + cind] = c + v[v_ptr[rr+1] + cind+1] = (D[ind, 1] + 1j * D[ind, 5]) + v_col[v_ptr[rr+1] + cind+1] = c + 1 + + cind = cind + 2 + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + v_ncol[rr] = ncol[r] * 2 + v_ncol[rr+1] = ncol[r] * 2 + + cind = 0 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + ph = phases[ind] + + d = &D[ind, 0] + func(d, ph, M) + + v[v_ptr[rr] + cind] = M[0] + v_col[v_ptr[rr] + cind] = c + v[v_ptr[rr] + cind+1] = M[1] + v_col[v_ptr[rr] + cind+1] = c + 1 + v[v_ptr[rr+1] + cind] = M[2] + v_col[v_ptr[rr+1] + cind] = c + v[v_ptr[rr+1] + cind+1] = M[3] + v_col[v_ptr[rr+1] + cind+1] = c + 1 + + cind = cind + 2 + + else: + for r in range(nr): + rr = r * 2 + v_ncol[rr] = ncol[r] * 2 + v_ncol[rr+1] = ncol[r] * 2 + + cind = 0 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + ph = phases[col[ind] / nr] + + d = &D[ind, 0] + func(d, ph, M) + + v[v_ptr[rr] + cind] = M[0] + v_col[v_ptr[rr] + cind] = c + v[v_ptr[rr] + cind+1] = M[1] + v_col[v_ptr[rr] + cind+1] = c + 1 + v[v_ptr[rr+1] + cind] = M[2] + v_col[v_ptr[rr+1] + cind] = c + v[v_ptr[rr+1] + cind+1] = M[3] + v_col[v_ptr[rr+1] + cind+1] = c + 1 + + cind = cind + 2 + + return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nc * 2)) + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +def _phase_sc_array_so(ints_st[::1] ptr, + ints_st[::1] ncol, + ints_st[::1] col, + const ints_st nc, + numerics_st[:, ::1] D, + complexs_st[::1] phases, + const int p_opt): + + cdef ints_st nr = ncol.shape[0] + + cdef object dtype = type2dtype[complexs_st](1) + cdef cnp.ndarray[complexs_st, ndim=2, mode='c'] V = np.zeros([nr*2, nc*2], dtype=dtype) + cdef complexs_st[:, ::1] v = V + + cdef complexs_st ph + cdef ints_st r, rr, c, nz, ind + cdef _f_matrix_box_so func + cdef numerics_st *d + cdef complexs_st *M = [0, 0, 0, 0] + + if numerics_st in complexs_st: + func = _matrix_box_so_cmplx + else: + func = _matrix_box_so_real + + with nogil: + if p_opt == -1: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + + v[rr, c] = (D[ind, 0] + 1j * D[ind, 4]) + v[rr, c+1] = (D[ind, 2] + 1j * D[ind, 3]) + v[rr+1, c] = (D[ind, 6] + 1j * D[ind, 7]) + v[rr+1, c+1] = (D[ind, 1] + 1j * D[ind, 5]) + + elif p_opt == 0: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + ph = phases[ind] + + d = &D[ind, 0] + func(d, ph, M) + v[rr, c] = M[0] + v[rr, c+1] = M[1] + v[rr+1, c] = M[2] + v[rr+1, c+1] = M[3] + + else: + for r in range(nr): + rr = r * 2 + for ind in range(ptr[r], ptr[r] + ncol[r]): + c = col[ind] * 2 + ph = phases[col[ind] / nr] + + d = &D[ind, 0] + func(d, ph, M) + v[rr, c] = M[0] + v[rr, c+1] = M[1] + v[rr+1, c] = M[2] + v[rr+1, c+1] = M[3] + + return V diff --git a/src/sisl/physics/_matrix_phase_so.pyx b/src/sisl/physics/_matrix_phase_so.pyx deleted file mode 100644 index ea2b6b1572..0000000000 --- a/src/sisl/physics/_matrix_phase_so.pyx +++ /dev/null @@ -1,248 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True -cimport cython - -import numpy as np -cimport numpy as np -from scipy.sparse import csr_matrix - -from sisl._indices cimport _index_sorted -from sisl._core._sparse import fold_csr_matrix_nc - -__all__ = ['_phase_so_csr_c64', '_phase_so_csr_c128', - '_phase_so_array_c64', '_phase_so_array_c128'] - -# The fused data-types forces the data input to be of "correct" values. -ctypedef fused numeric_complex: - float - double - float complex - double complex - - -def _phase_so_csr_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef float complex[::1] v = V - cdef float complex ph, vv - cdef Py_ssize_t r, rr, ind, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[ind] - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[v_ptr[rr] + s_idx] = v[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + s_idx+1] = v[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[v_ptr[rr+1] + s_idx] = v[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[v_ptr[rr+1] + s_idx+1] = v[v_ptr[rr+1] + s_idx+1] + ph * vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[col[ind] / nr] - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[v_ptr[rr] + s_idx] = v[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + s_idx+1] = v[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[v_ptr[rr+1] + s_idx] = v[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[v_ptr[rr+1] + s_idx+1] = v[v_ptr[rr+1] + s_idx+1] + ph * vv - - return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nr * 2)) - - -def _phase_so_csr_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - # Now create the folded sparse elements - V_PTR, V_NCOL, V_COL = fold_csr_matrix_nc(PTR, NCOL, COL) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef double complex[::1] v = V - cdef double complex ph, vv - cdef Py_ssize_t r, rr, ind, s_idx - cdef int c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[ind] - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[v_ptr[rr] + s_idx] = v[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + s_idx+1] = v[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[v_ptr[rr+1] + s_idx] = v[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[v_ptr[rr+1] + s_idx+1] = v[v_ptr[rr+1] + s_idx+1] + ph * vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[col[ind] / nr] - s_idx = _index_sorted(v_col[v_ptr[rr]:v_ptr[rr] + v_ncol[rr]], c) - - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[v_ptr[rr] + s_idx] = v[v_ptr[rr] + s_idx] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + s_idx+1] = v[v_ptr[rr] + s_idx+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[v_ptr[rr+1] + s_idx] = v[v_ptr[rr+1] + s_idx] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[v_ptr[rr+1] + s_idx+1] = v[v_ptr[rr+1] + s_idx+1] + ph * vv - - return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nr * 2)) - - -def _phase_so_array_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nr * 2], dtype=np.complex64) - cdef float complex[:, ::1] v = V - cdef float complex ph, vv - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[ind] - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[rr, c] = v[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = v[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[rr+1, c] = v[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[rr+1, c+1] = v[rr+1, c+1] + ph * vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[col[ind] / nr] - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[rr, c] = v[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = v[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[rr+1, c] = v[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[rr+1, c+1] = v[rr+1, c+1] + ph * vv - - return V - - -def _phase_so_array_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nr * 2], dtype=np.complex128) - cdef double complex[:, ::1] v = V - cdef double complex ph, vv - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[ind] - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[rr, c] = v[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = v[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[rr+1, c] = v[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[rr+1, c+1] = v[rr+1, c+1] + ph * vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = (col[ind] % nr) * 2 - ph = phases[col[ind] / nr] - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[rr, c] = v[rr, c] + ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = v[rr, c+1] + ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[rr+1, c] = v[rr+1, c] + ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[rr+1, c+1] = v[rr+1, c+1] + ph * vv - - return V diff --git a/src/sisl/physics/_matrix_sc_phase.pyx b/src/sisl/physics/_matrix_sc_phase.pyx deleted file mode 100644 index 12a7f17aff..0000000000 --- a/src/sisl/physics/_matrix_sc_phase.pyx +++ /dev/null @@ -1,185 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True -cimport cython - -import numpy as np -cimport numpy as np -from scipy.sparse import csr_matrix - -from sisl._core._sparse cimport inline_sum - -__all__ = ['_sc_phase_csr_c64', '_sc_phase_csr_c128', - '_sc_phase_array_c64', '_sc_phase_array_c128'] - -# The fused data-types forces the data input to be of "correct" values. -ctypedef fused numeric_real: - float - double - -ctypedef fused numeric_complex: - float - double - float complex - double complex - - -def _sc_phase_csr_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, const int idx, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] ph = PHASES - - # Now copy the sparse matrix form - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_PTR = np.empty([nr + 1], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_NCOL = np.empty([nr], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_COL = np.empty([inline_sum(ncol)], dtype=np.int32) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef float complex[::1] v = V - cdef Py_ssize_t r, ind, cind - - # Copy ncol - v_ncol[:] = ncol[:] - - cind = 0 - if p_opt == 0: - for r in range(nr): - v_ptr[r] = cind - for ind in range(ptr[r], ptr[r] + ncol[r]): - v[cind] = D[ind, idx] * ph[ind] - v_col[cind] = col[ind] - cind = cind + 1 - else: - for r in range(nr): - v_ptr[r] = cind - for ind in range(ptr[r], ptr[r] + ncol[r]): - v[cind] = D[ind, idx] * ph[col[ind] / nr] - v_col[cind] = col[ind] - cind = cind + 1 - v_ptr[nr] = cind - - return csr_matrix((V, V_COL, V_PTR), shape=(nr, nc)) - - -def _sc_phase_csr_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, const int idx, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] ph = PHASES - - # Now copy the sparse matrix form - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_PTR = np.empty([nr + 1], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_NCOL = np.empty([nr], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_COL = np.empty([inline_sum(ncol)], dtype=np.int32) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef double complex[::1] v = V - cdef Py_ssize_t r, ind, cind - - # Copy ncol - v_ncol[:] = ncol[:] - - cind = 0 - if p_opt == 0: - for r in range(nr): - v_ptr[r] = cind - for ind in range(ptr[r], ptr[r] + ncol[r]): - v[cind] = D[ind, idx] * ph[ind] - v_col[cind] = col[ind] - cind = cind + 1 - else: - for r in range(nr): - v_ptr[r] = cind - for ind in range(ptr[r], ptr[r] + ncol[r]): - v[cind] = D[ind, idx] * ph[col[ind] / nr] - v_col[cind] = col[ind] - cind = cind + 1 - v_ptr[nr] = cind - - return csr_matrix((V, V_COL, V_PTR), shape=(nr, nc)) - - -def _sc_phase_array_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, const int idx, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] ph = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] V = np.zeros([nr, nc], dtype=np.complex64) - cdef float complex[:, ::1] v = V - cdef Py_ssize_t r, ind - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - v[r, col[ind]] = D[ind, idx] * ph[ind] - - else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - v[r, col[ind]] = D[ind, idx] * ph[col[ind] / nr] - - return V - - -def _sc_phase_array_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, const int idx, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] ph = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] V = np.zeros([nr, nc], dtype=np.complex128) - cdef double complex[:, ::1] v = V - cdef Py_ssize_t r, ind - - if p_opt == 0: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - v[r, col[ind]] = D[ind, idx] * ph[ind] - - else: - for r in range(nr): - for ind in range(ptr[r], ptr[r] + ncol[r]): - v[r, col[ind]] = D[ind, idx] * ph[col[ind] / nr] - - return V diff --git a/src/sisl/physics/_matrix_sc_phase_nc.pyx b/src/sisl/physics/_matrix_sc_phase_nc.pyx deleted file mode 100644 index 2c98d45d62..0000000000 --- a/src/sisl/physics/_matrix_sc_phase_nc.pyx +++ /dev/null @@ -1,272 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True -cimport cython - -import numpy as np - -cimport numpy as np - -from scipy.sparse import csr_matrix - -from sisl._core._sparse cimport inline_sum -from sisl.physics._matrix_utils cimport ncol2ptr_double - -__all__ = ['_sc_phase_nc_csr_c64', '_sc_phase_nc_csr_c128', - '_sc_phase_nc_array_c64', '_sc_phase_nc_array_c128'] - -# The fused data-types forces the data input to be of "correct" values. -ctypedef fused numeric_complex: - float - double - float complex - double complex - - -def _sc_phase_nc_csr_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - - # Now copy the sparse matrix form - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_PTR = np.empty([nr*2 + 1], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_NCOL = np.empty([nr*2], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_COL = np.empty([inline_sum(ncol)*4], dtype=np.int32) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef float complex[::1] v = V - cdef float complex ph, v12 - cdef Py_ssize_t r, rr, ind, cind, c - - # We have to do it manually due to the double elements per matrix element - ncol2ptr_double(nr, ncol, v_ptr) - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] * 2 - v_ncol[rr+1] = ncol[r] * 2 - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[ind] - c = col[ind] * 2 - - v[v_ptr[rr] + cind] = D[ind, 0] * ph - v_col[v_ptr[rr] + cind] = c - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + cind+1] = v12 * ph - v_col[v_ptr[rr] + cind+1] = c + 1 - v[v_ptr[rr+1] + cind] = v12.conjugate() * ph - v_col[v_ptr[rr+1] + cind] = c - v[v_ptr[rr+1] + cind+1] = D[ind, 1] * ph - v_col[v_ptr[rr+1] + cind+1] = c + 1 - - cind = cind + 2 - - else: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] * 2 - v_ncol[rr+1] = ncol[r] * 2 - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[col[ind] / nr] - c = col[ind] * 2 - - v[v_ptr[rr] + cind] = D[ind, 0] * ph - v_col[v_ptr[rr] + cind] = c - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + cind+1] = v12 * ph - v_col[v_ptr[rr] + cind+1] = c + 1 - v[v_ptr[rr+1] + cind] = v12.conjugate() * ph - v_col[v_ptr[rr+1] + cind] = c - v[v_ptr[rr+1] + cind+1] = D[ind, 1] * ph - v_col[v_ptr[rr+1] + cind+1] = c + 1 - - cind = cind + 2 - - return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nc * 2)) - - -def _sc_phase_nc_csr_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - - # Now copy the sparse matrix form - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_PTR = np.empty([nr*2 + 1], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_NCOL = np.empty([nr*2], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_COL = np.empty([inline_sum(ncol)*4], dtype=np.int32) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef double complex[::1] v = V - cdef double complex ph, v12 - cdef Py_ssize_t r, rr, ind, cind, c - - ncol2ptr_double(nr, ncol, v_ptr) - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] * 2 - v_ncol[rr+1] = ncol[r] * 2 - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[ind] - c = col[ind] * 2 - - v[v_ptr[rr] + cind] = D[ind, 0] * ph - v_col[v_ptr[rr] + cind] = c - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + cind+1] = v12 * ph - v_col[v_ptr[rr] + cind+1] = c + 1 - v[v_ptr[rr+1] + cind] = v12.conjugate() * ph - v_col[v_ptr[rr+1] + cind] = c - v[v_ptr[rr+1] + cind+1] = D[ind, 1] * ph - v_col[v_ptr[rr+1] + cind+1] = c + 1 - - cind = cind + 2 - - else: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] * 2 - v_ncol[rr+1] = ncol[r] * 2 - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[col[ind] / nr] - c = col[ind] * 2 - - v[v_ptr[rr] + cind] = D[ind, 0] * ph - v_col[v_ptr[rr] + cind] = c - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + cind+1] = v12 * ph - v_col[v_ptr[rr] + cind+1] = c + 1 - v[v_ptr[rr+1] + cind] = v12.conjugate() * ph - v_col[v_ptr[rr+1] + cind] = c - v[v_ptr[rr+1] + cind+1] = D[ind, 1] * ph - v_col[v_ptr[rr+1] + cind+1] = c + 1 - cind = cind + 2 - - return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nc * 2)) - - -def _sc_phase_nc_array_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nc * 2], dtype=np.complex64) - cdef float complex[:, ::1] v = V - cdef float complex ph, v12 - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[ind] - c = col[ind] * 2 - v[rr, c] = D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = v12 * ph - v[rr+1, c] = v12.conjugate() * ph - v[rr+1, c+1] = D[ind, 1] * ph - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[col[ind] / nr] - c = col[ind] * 2 - v[rr, c] = D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = v12 * ph - v[rr+1, c] = v12.conjugate() * ph - v[rr+1, c+1] = D[ind, 1] * ph - - return V - - -def _sc_phase_nc_array_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nc * 2], dtype=np.complex128) - cdef double complex[:, ::1] v = V - cdef double complex ph, v12 - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[ind] - c = col[ind] * 2 - v[rr, c] = D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = v12 * ph - v[rr+1, c] = v12.conjugate() * ph - v[rr+1, c+1] = D[ind, 1] * ph - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[col[ind] / nr] - c = col[ind] * 2 - v[rr, c] = D[ind, 0] * ph - v12 = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = v12 * ph - v[rr+1, c] = v12.conjugate() * ph - v[rr+1, c+1] = D[ind, 1] * ph - - return V diff --git a/src/sisl/physics/_matrix_sc_phase_nc_diag.pyx b/src/sisl/physics/_matrix_sc_phase_nc_diag.pyx deleted file mode 100644 index d2ac71b721..0000000000 --- a/src/sisl/physics/_matrix_sc_phase_nc_diag.pyx +++ /dev/null @@ -1,234 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True -cimport cython - -import numpy as np - -cimport numpy as np - -from scipy.sparse import csr_matrix - -from sisl._core._sparse cimport inline_sum -from sisl.physics._matrix_utils cimport ncol2ptr_single - -__all__ = ['_sc_phase_nc_diag_csr_c64', '_sc_phase_nc_diag_csr_c128', - '_sc_phase_nc_diag_array_c64', '_sc_phase_nc_diag_array_c128'] - -# The fused data-types forces the data input to be of "correct" values. -ctypedef fused numeric_complex: - float - double - float complex - double complex - - -def _sc_phase_nc_diag_csr_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, const int idx, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - - # Now copy the sparse matrix form - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_PTR = np.empty([nr*2 + 1], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_NCOL = np.empty([nr*2], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_COL = np.empty([inline_sum(ncol)*2], dtype=np.int32) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef float complex[::1] v = V - cdef float complex vv - cdef Py_ssize_t r, rr, ind, cind, c - - ncol2ptr_single(nr, ncol, v_ptr) - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] - v_ncol[rr+1] = ncol[r] - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] * 2 - vv = (phases[ind] * D[ind, idx]) - v[v_ptr[rr] + cind] = vv - v_col[v_ptr[rr] + cind] = c - v[v_ptr[rr+1] + cind] = vv - v_col[v_ptr[rr+1] + cind] = c + 1 - - cind = cind + 1 - - else: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] - v_ncol[rr+1] = ncol[r] - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] * 2 - vv = (phases[col[ind] / nr] * D[ind, idx]) - v[v_ptr[rr] + cind] = vv - v_col[v_ptr[rr] + cind] = c - v[v_ptr[rr+1] + cind] = vv - v_col[v_ptr[rr+1] + cind] = c + 1 - cind = cind + 1 - - return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nc * 2)) - - -def _sc_phase_nc_diag_csr_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, const int idx, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - - # Now copy the sparse matrix form - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_PTR = np.empty([nr*2 + 1], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_NCOL = np.empty([nr*2], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_COL = np.empty([inline_sum(ncol)*2], dtype=np.int32) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef double complex[::1] v = V - cdef double complex vv - cdef Py_ssize_t r, rr, ind, cind, c - - # We have to do it manually due to the double elements per matrix element - ncol2ptr_single(nr, ncol, v_ptr) - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] - v_ncol[rr+1] = ncol[r] - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] * 2 - vv = (phases[ind] * D[ind, idx]) - v[v_ptr[rr] + cind] = vv - v_col[v_ptr[rr] + cind] = c - v[v_ptr[rr+1] + cind] = vv - v_col[v_ptr[rr+1] + cind] = c + 1 - cind = cind + 1 - - else: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] - v_ncol[rr+1] = ncol[r] - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] * 2 - vv = (phases[col[ind] / nr] * D[ind, idx]) - v[v_ptr[rr] + cind] = vv - v_col[v_ptr[rr] + cind] = c - v[v_ptr[rr+1] + cind] = vv - v_col[v_ptr[rr+1] + cind] = c + 1 - cind = cind + 1 - - return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nc * 2)) - - -def _sc_phase_nc_diag_array_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, const int idx, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nc * 2], dtype=np.complex64) - cdef float complex[:, ::1] v = V - cdef float complex vv - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] * 2 - vv = (phases[ind] * D[ind, idx]) - v[rr, c] = vv - v[rr+1, c+1] = vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] * 2 - vv = (phases[col[ind] / nr] * D[ind, idx]) - v[rr, c] = vv - v[rr+1, c+1] = vv - - return V - - -def _sc_phase_nc_diag_array_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, const int idx, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nc * 2], dtype=np.complex128) - cdef double complex[:, ::1] v = V - cdef double complex vv - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] * 2 - vv = (phases[ind] * D[ind, idx]) - v[rr, c] = vv - v[rr+1, c+1] = vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - c = col[ind] * 2 - vv = (phases[col[ind] / nr] * D[ind, idx]) - v[rr, c] = vv - v[rr+1, c+1] = vv - - return V diff --git a/src/sisl/physics/_matrix_sc_phase_so.pyx b/src/sisl/physics/_matrix_sc_phase_so.pyx deleted file mode 100644 index 60d3327cfa..0000000000 --- a/src/sisl/physics/_matrix_sc_phase_so.pyx +++ /dev/null @@ -1,293 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True -cimport cython - -import numpy as np - -cimport numpy as np - -from scipy.sparse import csr_matrix - -from sisl._core._sparse cimport inline_sum -from sisl.physics._matrix_utils cimport ncol2ptr_double - -__all__ = ['_sc_phase_so_csr_c64', '_sc_phase_so_csr_c128', - '_sc_phase_so_array_c64', '_sc_phase_so_array_c128'] - -# The fused data-types forces the data input to be of "correct" values. -ctypedef fused numeric_complex: - float - double - float complex - double complex - - -def _sc_phase_so_csr_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - - # Now copy the sparse matrix form - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_PTR = np.empty([nr*2 + 1], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_NCOL = np.empty([nr*2], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_COL = np.empty([inline_sum(ncol)*4], dtype=np.int32) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex64_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex64) - cdef float complex[::1] v = V - cdef float complex ph, vv - cdef Py_ssize_t r, rr, ind, cind, c - - ncol2ptr_double(nr, ncol, v_ptr) - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] * 2 - v_ncol[rr+1] = ncol[r] * 2 - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[ind] - c = col[ind] * 2 - - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[v_ptr[rr] + cind] = ph * vv - v_col[v_ptr[rr] + cind] = c - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + cind+1] = ph * vv - v_col[v_ptr[rr] + cind+1] = c + 1 - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[v_ptr[rr+1] + cind] = ph * vv - v_col[v_ptr[rr+1] + cind] = c - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[v_ptr[rr+1] + cind+1] = ph * vv - v_col[v_ptr[rr+1] + cind+1] = c + 1 - - cind = cind + 2 - - else: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] * 2 - v_ncol[rr+1] = ncol[r] * 2 - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[col[ind] / nr] - c = col[ind] * 2 - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[v_ptr[rr] + cind] = ph * vv - v_col[v_ptr[rr] + cind] = c - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + cind+1] = ph * vv - v_col[v_ptr[rr] + cind+1] = c + 1 - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[v_ptr[rr+1] + cind] = ph * vv - v_col[v_ptr[rr+1] + cind] = c - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[v_ptr[rr+1] + cind+1] = ph * vv - v_col[v_ptr[rr+1] + cind+1] = c + 1 - cind = cind + 2 - - return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nc * 2)) - - -def _sc_phase_so_csr_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - - # Now copy the sparse matrix form - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_PTR = np.empty([nr*2 + 1], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_NCOL = np.empty([nr*2], dtype=np.int32) - cdef np.ndarray[np.int32_t, ndim=1, mode='c'] V_COL = np.empty([inline_sum(ncol)*4], dtype=np.int32) - cdef int[::1] v_ptr = V_PTR - cdef int[::1] v_ncol = V_NCOL - cdef int[::1] v_col = V_COL - - cdef np.ndarray[np.complex128_t, ndim=1, mode='c'] V = np.zeros([v_col.shape[0]], dtype=np.complex128) - cdef double complex[::1] v = V - cdef double complex ph, vv - cdef Py_ssize_t r, rr, ind, cind - - # We have to do it manually due to the double elements per matrix element - ncol2ptr_double(nr, ncol, v_ptr) - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] * 2 - v_ncol[rr+1] = ncol[r] * 2 - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[ind] - c = col[ind] * 2 - - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[v_ptr[rr] + cind] = ph * vv - v_col[v_ptr[rr] + cind] = c - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + cind+1] = ph * vv - v_col[v_ptr[rr] + cind+1] = c + 1 - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[v_ptr[rr+1] + cind] = ph * vv - v_col[v_ptr[rr+1] + cind] = c - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[v_ptr[rr+1] + cind+1] = ph * vv - v_col[v_ptr[rr+1] + cind+1] = c + 1 - cind = cind + 2 - - else: - for r in range(nr): - rr = r * 2 - v_ncol[rr] = ncol[r] * 2 - v_ncol[rr+1] = ncol[r] * 2 - - cind = 0 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[col[ind] / nr] - c = col[ind] * 2 - - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[v_ptr[rr] + cind] = ph * vv - v_col[v_ptr[rr] + cind] = c - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[v_ptr[rr] + cind+1] = ph * vv - v_col[v_ptr[rr] + cind+1] = c + 1 - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[v_ptr[rr+1] + cind] = ph * vv - v_col[v_ptr[rr+1] + cind] = c - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[v_ptr[rr+1] + cind+1] = ph * vv - v_col[v_ptr[rr+1] + cind+1] = c + 1 - cind = cind + 2 - - return csr_matrix((V, V_COL, V_PTR), shape=(nr * 2, nc * 2)) - - -def _sc_phase_so_array_c64(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, - np.ndarray[np.complex64_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef float complex[::1] phases = PHASES - - cdef Py_ssize_t nr = ncol.shape[0] - cdef np.ndarray[np.complex64_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nc * 2], dtype=np.complex64) - cdef float complex[:, ::1] v = V - cdef float complex ph, vv - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[ind] - c = col[ind] * 2 - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[rr, c] = ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[rr+1, c] = ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[rr+1, c+1] = ph * vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[col[ind] / nr] - c = col[ind] * 2 - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[rr, c] = ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[rr+1, c] = ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[rr+1, c+1] = ph * vv - - return V - - -def _sc_phase_so_array_c128(np.ndarray[np.int32_t, ndim=1, mode='c'] PTR, - np.ndarray[np.int32_t, ndim=1, mode='c'] NCOL, - np.ndarray[np.int32_t, ndim=1, mode='c'] COL, - numeric_complex[:, ::1] D, - const int nc, - np.ndarray[np.complex128_t, ndim=1, mode='c'] PHASES, const int p_opt): - - # Convert to memory views - cdef int[::1] ptr = PTR - cdef int[::1] ncol = NCOL - cdef int[::1] col = COL - cdef double complex[::1] phases = PHASES - cdef Py_ssize_t nr = ncol.shape[0] - - cdef np.ndarray[np.complex128_t, ndim=2, mode='c'] V = np.zeros([nr * 2, nc * 2], dtype=np.complex128) - cdef double complex[:, ::1] v = V - cdef double complex ph, vv - cdef Py_ssize_t r, rr, ind, c - - if p_opt == 0: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[ind] - c = col[ind] * 2 - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[rr, c] = ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[rr+1, c] = ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[rr+1, c+1] = ph * vv - - else: - for r in range(nr): - rr = r * 2 - for ind in range(ptr[r], ptr[r] + ncol[r]): - ph = phases[col[ind] / nr] - c = col[ind] * 2 - vv = (D[ind, 0] + 1j * D[ind, 4]) - v[rr, c] = ph * vv - vv = (D[ind, 2] + 1j * D[ind, 3]) - v[rr, c+1] = ph * vv - vv = (D[ind, 6] + 1j * D[ind, 7]) - v[rr+1, c] = ph * vv - vv = (D[ind, 1] + 1j * D[ind, 5]) - v[rr+1, c+1] = ph * vv - - return V diff --git a/src/sisl/physics/_matrix_utils.pxd b/src/sisl/physics/_matrix_utils.pxd index c83feed2b8..b235ca106b 100644 --- a/src/sisl/physics/_matrix_utils.pxd +++ b/src/sisl/physics/_matrix_utils.pxd @@ -1,3 +1,38 @@ -# Define the interfaces for the functions exposed through cimport -cdef void ncol2ptr_double(const int nr, const int[::1] ncol, int[::1] ptr) nogil -cdef void ncol2ptr_single(const int nr, const int[::1] ncol, int[::1] ptr) nogil +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +cimport cython + +import numpy as np + +cimport numpy as cnp + +from sisl._core._dtypes cimport complexs_st, numerics_st, reals_st + +ctypedef fused _internal_complexs_st: + float complex + double complex + +ctypedef void(*_f_matrix_box_nc)(const numerics_st *data, + const complexs_st phase, + complexs_st *M) noexcept nogil + +cdef void _matrix_box_nc_real(const reals_st *data, + const complexs_st phase, + complexs_st *M) noexcept nogil + +cdef void _matrix_box_nc_cmplx(const _internal_complexs_st *data, + const complexs_st phase, + complexs_st *M) noexcept nogil + +ctypedef void(*_f_matrix_box_so)(const numerics_st *data, + const complexs_st phase, + complexs_st *M) noexcept nogil + +cdef void _matrix_box_so_real(const reals_st *data, + const complexs_st phase, + complexs_st *M) noexcept nogil + +cdef void _matrix_box_so_cmplx(const _internal_complexs_st *data, + const complexs_st phase, + complexs_st *M) noexcept nogil diff --git a/src/sisl/physics/_matrix_utils.pyx b/src/sisl/physics/_matrix_utils.pyx index 0f9014a094..7b0e2fb904 100644 --- a/src/sisl/physics/_matrix_utils.pyx +++ b/src/sisl/physics/_matrix_utils.pyx @@ -1,37 +1,75 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. -# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True cimport cython -__all__ = ["ncol2ptr_double", "ncol2ptr_single"] +import numpy as np +cimport numpy as cnp -cdef void ncol2ptr_double(const int nr, const int[::1] ncol, int[::1] ptr) noexcept nogil: - cdef Py_ssize_t r, rr +from sisl._core._dtypes cimport complexs_st, numerics_st, reals_st - # this is NC/SOC - ptr[0] = 0 - ptr[1] = ncol[0] * 2 - for r in range(1, nr): - rr = r * 2 - # do both - ptr[rr] = ptr[rr - 1] + ncol[r-1] * 2 - ptr[rr+1] = ptr[rr] + ncol[r] * 2 +""" +These routines converts an array of n-values into a spin-box matrix. - ptr[nr * 2] = ptr[nr * 2 - 1] + ncol[nr - 1] * 2 +In all cases, the resulting linear returned matrix `M` +has 4 entries. +M[0] == spin[0, 0] +M[1] == spin[0, 1] +M[2] == spin[1, 0] +M[3] == spin[1, 1] +""" -cdef void ncol2ptr_single(const int nr, const int[::1] ncol, int[::1] ptr) noexcept nogil: - cdef Py_ssize_t r, rr - # this is NC/SOC - ptr[0] = 0 - ptr[1] = ncol[0] - for r in range(1, nr): - rr = r * 2 - # do both - ptr[rr] = ptr[rr - 1] + ncol[r-1] - ptr[rr+1] = ptr[rr] + ncol[r] +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +cdef inline void _matrix_box_nc_real(const reals_st *data, + const complexs_st phase, + complexs_st *M) noexcept nogil: + M[0] = (data[0] * phase) + M[1] = ((data[2] + 1j * data[3]) * phase) + M[2] = ((data[2] + 1j * data[3]).conjugate() * phase) + M[3] = (data[1] * phase) - ptr[nr * 2] = ptr[nr * 2 - 1] + ncol[nr - 1] + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +cdef inline void _matrix_box_nc_cmplx(const _internal_complexs_st *data, + const complexs_st phase, + complexs_st *M) noexcept nogil: + M[0] = (data[0] * phase) + M[1] = (data[2] * phase) + M[2] = (data[2].conjugate() * phase) + M[3] = (data[1] * phase) + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +cdef inline void _matrix_box_so_real(const reals_st *data, + const complexs_st phase, + complexs_st *M) noexcept nogil: + M[0] = ((data[0] + 1j * data[4]) * phase) + M[1] = ((data[2] + 1j * data[3]) * phase) + M[2] = ((data[6] + 1j * data[7]) * phase) + M[3] = ((data[1] + 1j * data[5]) * phase) + + +# necessary to double the interfaces +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.initializedcheck(False) +@cython.cdivision(True) +cdef inline void _matrix_box_so_cmplx(const _internal_complexs_st *data, + const complexs_st phase, + complexs_st *M) noexcept nogil: + M[0] = (data[0] * phase) + M[1] = (data[2] * phase) + M[2] = (data[3] * phase) + M[3] = (data[1] * phase) diff --git a/src/sisl/physics/_phase.pxd b/src/sisl/physics/_phase.pxd new file mode 100644 index 0000000000..7449aaa779 --- /dev/null +++ b/src/sisl/physics/_phase.pxd @@ -0,0 +1,7 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +from sisl._core._dtypes cimport floats_st + + +cdef bint is_gamma(const floats_st[::1] k) noexcept nogil diff --git a/src/sisl/physics/_phase.pyx b/src/sisl/physics/_phase.pyx index 183804867b..ddd784cce9 100644 --- a/src/sisl/physics/_phase.pyx +++ b/src/sisl/physics/_phase.pyx @@ -2,33 +2,36 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. cimport cython -from libc.math cimport fabs - -import numpy as np - -cimport numpy as np +from libc.math cimport fabs, fabsf from numpy import complex64, complex128, dot, exp, float32, float64, ndarray, ones, pi -from numpy cimport complex64_t, complex128_t, float32_t, float64_t, ndarray - -__all__ = ['phase_dtype', 'phase_rsc', 'phase_rij'] +from sisl._core._dtypes cimport floats_st @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) -cdef inline int is_gamma(const double[::1] k) noexcept nogil: - if fabs(k[0]) > 0.0000001: - return 0 - if fabs(k[1]) > 0.0000001: - return 0 - if fabs(k[2]) > 0.0000001: - return 0 +cdef inline bint is_gamma(const floats_st[::1] k) noexcept nogil: + if floats_st is cython.float: + if fabsf(k[0]) > 0.0000001: + return 0 + if fabsf(k[1]) > 0.0000001: + return 0 + if fabsf(k[2]) > 0.0000001: + return 0 + + else: + if fabs(k[0]) > 0.0000001: + return 0 + if fabs(k[1]) > 0.0000001: + return 0 + if fabs(k[2]) > 0.0000001: + return 0 return 1 -def phase_dtype(ndarray[float64_t, ndim=1, mode='c'] k, M_dtype, R_dtype, force_complex=False): +def phase_dtype(const floats_st[::1] k, M_dtype, R_dtype, force_complex: bool=False): if is_gamma(k) and not force_complex: if R_dtype is None: return M_dtype @@ -52,7 +55,7 @@ def phase_dtype(ndarray[float64_t, ndim=1, mode='c'] k, M_dtype, R_dtype, force_ return R_dtype -def phase_rsc(sc, ndarray[float64_t, ndim=1, mode='c'] k, dtype): +def phase_rsc(sc, const floats_st[::1] k, dtype): """ Calculate the phases for the supercell interactions using k """ # Figure out if this is a Gamma point or not @@ -66,7 +69,7 @@ def phase_rsc(sc, ndarray[float64_t, ndim=1, mode='c'] k, dtype): return phases -def phase_rij(rij, sc, ndarray[float64_t, ndim=1, mode='c'] k, dtype): +def phase_rij(rij, sc, const floats_st[::1] k, dtype): """ Calculate the phases for the distance matrix using k """ # Figure out if this is a Gamma point or not diff --git a/src/sisl/physics/densitymatrix.py b/src/sisl/physics/densitymatrix.py index eddab35867..4de3f7a7cb 100644 --- a/src/sisl/physics/densitymatrix.py +++ b/src/sisl/physics/densitymatrix.py @@ -24,40 +24,12 @@ from sisl.messages import deprecate_argument, progressbar, warn from sisl.typing import AtomsIndex, GaugeType, SeqFloat -from .sparse import SparseOrbitalBZSpin +from .sparse import SparseOrbitalBZSpin, _get_spin from .spin import Spin __all__ = ["DensityMatrix"] -def _get_density(DM, orthogonal, what="sum"): - DM = DM.T - if orthogonal: - off = 0 - else: - off = 1 - if what == "sum": - if DM.shape[0] in (2 + off, 4 + off, 8 + off): - return DM[0] + DM[1] - return DM[0] - if what == "spin": - m = np.empty([3, DM.shape[1]], dtype=DM.dtype) - if DM.shape[0] == 8 + off: - m[0] = DM[2] + DM[6] - m[1] = -DM[3] + DM[7] - m[2] = DM[0] - DM[1] - elif DM.shape[0] == 4 + off: - m[0] = 2 * DM[2] - m[1] = -2 * DM[3] - m[2] = DM[0] - DM[1] - elif DM.shape[0] == 2 + off: - m[:2, :] = 0.0 - m[2] = DM[0] - DM[1] - elif DM.shape[0] == 1 + off: - m[...] = 0.0 - return m - - class _densitymatrix(SparseOrbitalBZSpin): def spin_rotate(self, angles: SeqFloat, rad: bool = False): r"""Rotates spin-boxes by fixed angles around the :math:`x`, :math:`y` and :math:`z` axis, respectively. @@ -539,10 +511,10 @@ def bond_order( m, *opts = method.split(":") # only extract the summed density - what = "sum" + what = "trace" if "spin" in opts: # do this for each spin x, y, z - what = "spin" + what = "vector" del opts[opts.index("spin")] # Check that there are no un-used options @@ -556,7 +528,7 @@ def bond_order( rows, cols, DM = _to_coo(self._csr) # Convert to requested matrix form - D = _get_density(DM, self.orthogonal, what) + D = _get_spin(DM, self.spin, what).T # Define a matrix-matrix multiplication def mm(A, B): diff --git a/src/sisl/physics/energydensitymatrix.py b/src/sisl/physics/energydensitymatrix.py index 895189e6bf..b11aa221d5 100644 --- a/src/sisl/physics/energydensitymatrix.py +++ b/src/sisl/physics/energydensitymatrix.py @@ -326,7 +326,7 @@ def shift(self, E, DM): return for i in range(self.spin.spinor): - self._csr._D[:, i] += DM._csr._D[:, i] * E[i] + self._csr._D[:, i].real += DM._csr._D[:, i].real * E[i] @staticmethod def read(sile, *args, **kwargs): diff --git a/src/sisl/physics/hamiltonian.py b/src/sisl/physics/hamiltonian.py index 6a243dde60..60b02e0f86 100644 --- a/src/sisl/physics/hamiltonian.py +++ b/src/sisl/physics/hamiltonian.py @@ -328,7 +328,7 @@ def shift(self, E): # For non-collinear and SO only the diagonal (real) components # should be shifted. for i in range(self.spin.spinor): - self._csr._D[:, i] += self._csr._D[:, self.S_idx] * E[i] + self._csr._D[:, i].real += self._csr._D[:, self.S_idx].real * E[i] def eigenvalue(self, k=(0, 0, 0), gauge: GaugeType = "cell", **kwargs): """Calculate the eigenvalues at `k` and return an `EigenvalueElectron` object containing all eigenvalues for a given `k` diff --git a/src/sisl/physics/sparse.py b/src/sisl/physics/sparse.py index f2c531aa6d..c5e8a79fd4 100644 --- a/src/sisl/physics/sparse.py +++ b/src/sisl/physics/sparse.py @@ -4,6 +4,7 @@ from __future__ import annotations import warnings +from typing import Literal import numpy as np from scipy.sparse import SparseEfficiencyWarning, csr_matrix @@ -13,6 +14,7 @@ from sisl import Geometry from sisl._core.sparse import issparse from sisl._core.sparse_geometry import SparseOrbital +from sisl._help import dtype_complex_to_real, dtype_real_to_complex from sisl._internal import set_module from sisl.messages import warn from sisl.typing import AtomsIndex, GaugeType, KPoint @@ -29,6 +31,84 @@ warnings.filterwarnings("ignore", category=SparseEfficiencyWarning) +def _get_spin(M, spin, what: Literal["trace", "box", "vector"] = "box"): + if what == "trace": + if spin.spinor == 2: + # we have both up+down + # TODO fix spin-orbit with complex values + return M[..., 0] + M[..., 1] + return M[..., 0] + + if what == "vector": + m = np.empty(M.shape[:-1] + (3,), dtype=dtype_complex_to_real(M.dtype)) + if spin.is_unpolarized: + # no spin-density + m[...] = 0.0 + else: + # Same for all spin-configurations + m[..., 2] = (M[..., 0] - M[..., 1]).real + + # These indices should be reflected in sisl/physics/sparse.py + # for the Mxy[ri] indices in the reset method + if spin.is_polarized: + m[..., :2] = 0.0 + elif spin.is_noncolinear: + if spin.dkind in ("f", "i"): + m[..., 0] = 2 * M[..., 2] + m[..., 1] = -2 * M[..., 3] + else: + m[..., 0] = 2 * M[..., 2].real + m[..., 1] = -2 * M[..., 2].imag + else: + # spin-orbit + if spin.dkind in ("f", "i"): + m[..., 0] = M[..., 2] + M[..., 6] + m[..., 1] = -M[..., 3] + M[..., 7] + else: + tmp = M[..., 2].conj() + M[..., 3] + m[..., 0] = tmp.real + m[..., 1] = tmp.imag + return m + + if what == "box": + m = np.empty(M.shape[:-1] + (2, 2), dtype=dtype_real_to_complex(M.dtype)) + if spin.is_unpolarized: + # no spin-density + m[...] = 0.0 + m[..., 0, 0] = M[..., 0] + m[..., 1, 1] = M[..., 0] + elif spin.is_polarized: + m[...] = 0.0 + m[..., 0, 0] = M[..., 0] + m[..., 1, 1] = M[..., 1] + elif spin.is_noncolinear: + if spin.dkind in ("f", "i"): + m[..., 0, 0] = M[..., 0] + m[..., 1, 1] = M[..., 1] + m[..., 0, 1] = M[..., 2] + 1j * M[..., 3] + m[..., 1, 0] = m[..., 0, 1].conj() + else: + m[..., 0, 0] = M[..., 0] + m[..., 1, 1] = M[..., 1] + m[..., 0, 1] = M[..., 2] + m[..., 1, 0] = M[..., 2].conj() + else: + if spin.dkind in ("f", "i"): + m[..., 0, 0] = M[..., 0] + 1j * M[..., 4] + m[..., 1, 1] = M[..., 1] + 1j * M[..., 5] + m[..., 0, 1] = M[..., 2] + 1j * M[..., 3] + m[..., 1, 0] = M[..., 6] + 1j * M[..., 7] + else: + m[..., 0, 0] = M[..., 0] + m[..., 1, 1] = M[..., 1] + m[..., 0, 1] = M[..., 2] + m[..., 1, 0] = M[..., 3] + + return m + + raise ValueError(f"Wrong 'what' argument got {what}.") + + @set_module("sisl.physics") class SparseOrbitalBZ(SparseOrbital): r"""Sparse object containing the orbital connections in a Brillouin zone @@ -84,6 +164,8 @@ def __init__( def _reset(self): r"""Reset object according to the options, please refer to `SparseOrbital.reset` for details""" + # Update the shape + self._csr._shape = self.shape[:-1] + self._csr._D.shape[-1:] if self.orthogonal: self.Sk = self._Sk_diagonal self.S_idx = -100 @@ -763,6 +845,9 @@ def _reset(self): r"""Reset object according to the options, please refer to `SparseOrbital.reset` for details""" super()._reset() + # Update the dtype of the spin + self._spin = Spin(self.spin, dtype=self.dtype) + if self.spin.is_unpolarized: self.UP = 0 self.DOWN = 0 @@ -780,7 +865,7 @@ def _reset(self): self.dSk = self._dSk elif self.spin.is_noncolinear: - if self.spin.dkind == "f": + if self.spin.dkind in ("f", "i"): self.M11 = 0 self.M22 = 1 self.M12r = 2 @@ -789,7 +874,6 @@ def _reset(self): self.M11 = 0 self.M22 = 1 self.M12 = 2 - raise NotImplementedError("Currently not implemented") self.Pk = self._Pk_non_colinear self.Sk = self._Sk_non_colinear self.dPk = self._dPk_non_colinear @@ -798,7 +882,7 @@ def _reset(self): self.ddSk = self._ddSk_non_colinear elif self.spin.is_spinorbit: - if self.spin.dkind == "f": + if self.spin.dkind in ("f", "i"): self.SX = np.array([0, 0, 1, 0, 0, 0, 1, 0], self.dtype) self.SY = np.array([0, 0, 0, -1, 0, 0, 0, 1], self.dtype) self.SZ = np.array([1, -1, 0, 0, 0, 0, 0, 0], self.dtype) @@ -815,7 +899,7 @@ def _reset(self): self.M22 = 1 self.M12 = 2 self.M21 = 3 - raise NotImplementedError("Currently not implemented") + # The overlap is the same as non-collinear self.Pk = self._Pk_spin_orbit self.Sk = self._Sk_non_colinear @@ -836,7 +920,7 @@ def spin(self): r"""Associated spin class""" return self._spin - def create_construct(self, R, param): + def create_construct(self, R, params): r"""Create a simple function for passing to the `construct` function. This is to relieve the creation of simplistic @@ -846,7 +930,7 @@ def create_construct(self, R, param): >>> def func(self, ia, atoms, atoms_xyz=None): ... idx = self.geometry.close(ia, R=R, atoms=atoms, atoms_xyz=atoms_xyz) - ... for ix, p in zip(idx, param): + ... for ix, p in zip(idx, params): ... self[ia, ix] = p In the non-colinear case the matrix element :math:`\mathbf M_{ij}` will be set @@ -865,79 +949,97 @@ def create_construct(self, R, param): Parameters ---------- - R : array_like + R : radii parameters for different shells. - Must have same length as `param` or one less. + Must have same length as `params` or one less. If one less it will be extended with ``R[0]/100`` - param : array_like + params : coupling constants corresponding to the `R` - ranges. ``param[0,:]`` are the elements + ranges. ``params[0,:]`` are the elements for the all atoms within ``R[0]`` of each atom. See Also -------- construct : routine to create the sparse matrix from a generic function (as returned from `create_construct`) """ - if len(R) != len(param): + if len(R) != len(params): raise ValueError( - f"{self.__class__.__name__}.create_construct got different lengths of `R` and `param`" + f"{self.__class__.__name__}.create_construct got different lengths of 'R' and 'params'" ) if not self.spin.is_diagonal: + # This portion of code splits the construct into doing Hermitian + # assignments. This probably needs rigorous testing. + + dtype_cplx = dtype_real_to_complex(self.dtype) + is_complex = self.dkind == "c" if self.spin.is_spinorbit: if is_complex: nv = 4 # Hermitian parameters - paramH = [ - [p[0].conj(), p[1].conj(), p[3].conj(), p[2].conj(), *p[4:]] - for p in param + # The input order is [uu, dd, ud, du] + paramsH = [ + [ + p[0].conjugate(), + p[1].conjugate(), + p[3].conjugate(), + p[2].conjugate(), + *p[4:], + ] + for p in params ] else: nv = 8 # Hermitian parameters - paramH = [ + # The input order is [Ruu, Rdd, Rud, Iud, Iuu, Idd, Rdu, idu] + paramsH = [ [p[0], p[1], p[6], -p[7], -p[4], -p[5], p[2], -p[3], *p[8:]] - for p in param + for p in params ] if not self.orthogonal: nv += 1 # ensure we have correct number of values - assert all(len(p) == nv for p in param) + assert all(len(p) == nv for p in params) if R[0] <= 0.1001: # no atom closer than 0.1001 Ang! # We check that the the parameters here is Hermitian - p = param[0] + p = params[0] if is_complex: - onsite = np.array([[p[0], p[2]], [p[3], p[1]]], self.dtype) + onsite = np.array([[p[0], p[2]], [p[3], p[1]]], dtype_cplx) else: onsite = np.array( [ [p[0] + 1j * p[4], p[2] + 1j * p[3]], [p[6] + 1j * p[7], p[1] + 1j * p[5]], ], - np.complex128, + dtype_cplx, ) - if not np.allclose(onsite, onsite.T.conj()): + if not np.allclose(onsite, onsite.T.conjugate()): warn( - f"{self.__class__.__name__}.create_construct is NOT Hermitian for on-site terms. This is your responsibility!" + f"{self.__class__.__name__}.create_construct is NOT " + "Hermitian for on-site terms. This is your responsibility! " + "The code will continue silently, be AWARE!" ) elif self.spin.is_noncolinear: if is_complex: nv = 3 # Hermitian parameters - paramH = [[p[0].conj(), p[1].conj(), p[2], *p[3:]] for p in param] + paramsH = [ + [p[0].conjugate(), p[1].conjugate(), p[2], *p[3:]] + for p in params + ] else: nv = 4 # Hermitian parameters - # Note that we don"t need to do anything here. + # Note that we don't need to do anything here. # H_ij = [[0, 2 + 1j 3], # [2 - 1j 3, 1]] # H_ji = [[0, 2 + 1j 3], # [2 - 1j 3, 1]] # H_ij^H == H_ji^H - paramH = param + paramsH = params if not self.orthogonal: nv += 1 @@ -945,21 +1047,25 @@ def create_construct(self, R, param): # Since the values are ensured Hermitian in the on-site case anyways. # ensure we have correct number of values - assert all(len(p) == nv for p in param) + assert all(len(p) == nv for p in params) na = self.geometry.na # Now create the function that returns the assignment function def func(self, ia, atoms, atoms_xyz=None): idx = self.geometry.close(ia, R=R, atoms=atoms, atoms_xyz=atoms_xyz) - for ix, p, pc in zip(idx, param, paramH): + for ix, p, pc in zip(idx, params, paramsH): ix_ge = (ix % na) >= ia self[ia, ix[ix_ge]] = p self[ia, ix[~ix_ge]] = pc + func.R = R + func.params = params + func.paramsH = paramsH + return func - return super().create_construct(R, param) + return super().create_construct(R, params) def __len__(self): r"""Returns number of rows in the basis (if non-collinear or spin-orbit, twice the number of orbitals)""" @@ -1403,7 +1509,7 @@ def transpose(self, hermitian: bool = False, spin: bool = True, sort: bool = Tru if sp.is_spinorbit: if hermitian and spin: # conjugate the imaginary value and transpose spin-box - if sp.dkind == "f": + if sp.dkind in ("f", "i"): # imaginary components (including transposing) # 12,11,22,21 D[:, [3, 4, 5, 7]] = -D[:, [7, 4, 5, 3]] @@ -1413,7 +1519,7 @@ def transpose(self, hermitian: bool = False, spin: bool = True, sort: bool = Tru D[:, [0, 1, 2, 3]] = np.conj(D[:, [0, 1, 3, 2]]) elif hermitian: # conjugate the imaginary value - if sp.dkind == "f": + if sp.dkind in ("f", "i"): # imaginary components # 12,11,22,21 D[:, [3, 4, 5, 7]] *= -1.0 @@ -1421,7 +1527,7 @@ def transpose(self, hermitian: bool = False, spin: bool = True, sort: bool = Tru D[:, :] = np.conj(D[:, :]) elif spin: # transpose spin-box, 12 <-> 21 - if sp.dkind == "f": + if sp.dkind in ("f", "i"): D[:, [2, 3, 6, 7]] = D[:, [6, 7, 2, 3]] else: D[:, [2, 3]] = D[:, [3, 2]] @@ -1438,7 +1544,7 @@ def transpose(self, hermitian: bool = False, spin: bool = True, sort: bool = Tru # So for transposing we should negate the sign # to ensure we put the opposite value in the # correct place. - if sp.dkind == "f": + if sp.dkind in ("f", "i"): D[:, 3] = -D[:, 3] else: D[:, 2] = np.conj(D[:, 2]) @@ -1462,7 +1568,7 @@ def trs(self): # Apply Pauli-Y on the left and right of each spin-box if sp.is_spinorbit: - if sp.dkind == "f": + if sp.dkind in ("f", "i"): # [R11, R22, R12, I12, I11, I22, R21, I21] # [R11, R22] = [R22, R11] # [I12, I21] = [I21, I12] (conj + Y @ Y[sign-changes conj]) @@ -1473,7 +1579,7 @@ def trs(self): else: raise NotImplementedError elif sp.is_noncolinear: - if sp.dkind == "f": + if sp.dkind in ("f", "i"): # [R11, R22, R12, I12] D[:, 2] = -D[:, 2] else: @@ -1519,6 +1625,12 @@ def transform(self, matrix=None, dtype=None, spin=None, orthogonal=None): The transformation matrix does *not* act on the rows and columns, only on the final dimension of the matrix. + The matrix transformation is done like this: + + >>> out = in @ matrix.T + + Meaning that ``matrix[0, :]`` will be the factors of the input matrix elements. + Parameters ---------- matrix : array_like, optional @@ -1593,7 +1705,7 @@ def transform(self, matrix=None, dtype=None, spin=None, orthogonal=None): ) new._csr = self._csr.transform(matrix, dtype=dtype) - if not orthogonal and self.orthogonal: + if self.orthogonal and not orthogonal: # set identity overlap matrix, loop over rows for i in range(new._csr.shape[0]): new._csr[i, i, -1] = 1.0 diff --git a/src/sisl/physics/spin.py b/src/sisl/physics/spin.py index c86510ee69..191c510916 100644 --- a/src/sisl/physics/spin.py +++ b/src/sisl/physics/spin.py @@ -56,7 +56,7 @@ class Spin: #: The :math:`\boldsymbol\sigma_z` Pauli matrix Z = np.array([[1, 0], [0, -1]], np.complex128) - __slots__ = ("_size", "_kind", "_dtype") + __slots__ = ("_kind", "_dtype") def __init__(self, kind="", dtype=None): if isinstance(kind, Spin): @@ -64,7 +64,6 @@ def __init__(self, kind="", dtype=None): dtype = kind._dtype self._kind = kind._kind self._dtype = dtype - self._size = kind._size return if dtype is None: @@ -107,24 +106,6 @@ def __init__(self, kind="", dtype=None): # Now assert the checks self._kind = kind - if np.dtype(dtype).kind == "c": - size = { - self.UNPOLARIZED: 1, - self.POLARIZED: 2, - self.NONCOLINEAR: 4, - self.SPINORBIT: 4, - }.get(kind) - - else: - size = { - self.UNPOLARIZED: 1, - self.POLARIZED: 2, - self.NONCOLINEAR: 4, - self.SPINORBIT: 8, - }.get(kind) - - self._size = size - def __str__(self): if self.is_unpolarized: return f"{self.__class__.__name__}{{unpolarized, kind={self.dkind}}}" @@ -151,12 +132,32 @@ def dkind(self): @property def size(self): """Number of elements to describe the spin-components""" - return self._size + size = { + "c": { + self.UNPOLARIZED: 1, + self.POLARIZED: 2, + self.NONCOLINEAR: 3, + self.SPINORBIT: 4, + }, + "i": { + self.UNPOLARIZED: 1, + self.POLARIZED: 2, + self.NONCOLINEAR: 4, + self.SPINORBIT: 8, + }, + "f": { + self.UNPOLARIZED: 1, + self.POLARIZED: 2, + self.NONCOLINEAR: 4, + self.SPINORBIT: 8, + }, + }[self.dkind][self.kind] + return size @property def spinor(self): """Number of spinor components (1 or 2)""" - return min(2, self._size) + return min(2, self.size) @property def kind(self): @@ -196,7 +197,7 @@ def is_spinorbit(self): return self.kind == Spin.SPINORBIT def __len__(self): - return self._size + return self.size # Comparisons def __lt__(self, other): @@ -221,6 +222,5 @@ def __getstate__(self): return {"size": self.size, "kind": self.kind, "dtype": self.dtype} def __setstate__(self, state): - self._size = state["size"] self._kind = state["kind"] self._dtype = state["dtype"]