From 5709ae828f19ce6e2bcc0b777f01f6ebe04d1d5b Mon Sep 17 00:00:00 2001
From: Michael Staneker <michael.staneker@ecmwf.int>
Date: Mon, 18 Dec 2023 18:08:34 +0000
Subject: [PATCH] introduce 'normalize_array_access' for prior execution to
 'flatten_arrays' to allow for arrays start counting not with '1'

---
 loki/transform/fortran_c_transform.py      |   9 +-
 loki/transform/transform_array_indexing.py |  64 ++++--
 tests/test_transform_array_indexing.py     | 220 ++++++++++++++++-----
 tests/test_transpile.py                    |   3 +-
 4 files changed, 231 insertions(+), 65 deletions(-)

diff --git a/loki/transform/fortran_c_transform.py b/loki/transform/fortran_c_transform.py
index d261183cc..ad563c243 100644
--- a/loki/transform/fortran_c_transform.py
+++ b/loki/transform/fortran_c_transform.py
@@ -12,7 +12,7 @@
 from loki.transform.transform_array_indexing import (
     shift_to_zero_indexing, invert_array_indices,
     resolve_vector_notation, normalize_range_indexing,
-    flatten_arrays
+    normalize_array_access, flatten_arrays
 )
 from loki.transform.transform_associates import resolve_associates
 from loki.transform.transform_utilities import (
@@ -37,7 +37,6 @@
 from loki.tools import as_tuple, flatten
 from loki.types import BasicType, DerivedType, SymbolAttributes
 
-
 __all__ = ['FortranCTransformation']
 
 
@@ -51,8 +50,9 @@ class FortranCTransformation(Transformation):
     # Set of standard module names that have no C equivalent
     __fortran_intrinsic_modules = ['ISO_FORTRAN_ENV', 'ISO_C_BINDING']
 
-    def __init__(self, header_modules=None, inline_elementals=True):
+    def __init__(self, header_modules=None, inline_elementals=True, order='F'):
         self.inline_elementals = inline_elementals
+        self.order = order
 
         # Maps from original type name to ISO-C and C-struct types
         self.c_structs = OrderedDict()
@@ -369,13 +369,14 @@ def generate_c_kernel(self, routine, **kwargs):
 
         # Clean up Fortran vector notation
         resolve_vector_notation(kernel)
+        normalize_array_access(kernel)
         normalize_range_indexing(kernel)
 
         # Convert array indexing to C conventions
         # TODO: Resolve reductions (eg. SUM(myvar(:)))
         invert_array_indices(kernel)
         shift_to_zero_indexing(kernel)
-        flatten_arrays(kernel, order="C", start_index=0)
+        flatten_arrays(kernel, order=self.order, start_index=0)
 
         # Inline all known parameters, since they can be used in declarations,
         # and thus need to be known before we can fetch them via getters.
diff --git a/loki/transform/transform_array_indexing.py b/loki/transform/transform_array_indexing.py
index 34d571c15..33587f3b3 100644
--- a/loki/transform/transform_array_indexing.py
+++ b/loki/transform/transform_array_indexing.py
@@ -29,7 +29,7 @@
     'resolve_vector_notation', 'normalize_range_indexing',
     'promote_variables', 'promote_nonmatching_variables',
     'promotion_dimensions_from_loop_nest', 'demote_variables',
-    'flatten_arrays'
+    'flatten_arrays', 'normalize_array_access'
 ]
 
 
@@ -498,6 +498,43 @@ def demote_variables(routine, variable_names, dimensions):
 
     info(f'[Loki::Transform] Demoted variables in {routine.name}: {", ".join(variable_names)}')
 
+def normalize_array_access(routine):
+    """
+    Shift all arrays to start counting at "1"
+    """
+    def is_range_index(dim):
+        return isinstance(dim, sym.RangeIndex) and not dim.lower == 1
+
+    vmap = {}
+    for v in FindVariables(unique=False).visit(routine.body):
+        if isinstance(v, sym.Array):
+            new_dims = []
+            for i, d in enumerate(v.shape):
+                if isinstance(d, sym.RangeIndex):
+                    if isinstance(v.dimensions[i], sym.RangeIndex):
+                        start = simplify(v.dimensions[i].start - d.start + 1) if d.start is not None else None
+                        stop = simplify(v.dimensions[i].stop - d.start + 1) if d.stop is not None else None
+                        new_dims += [sym.RangeIndex((start, stop, d.step))]
+                    else:
+                        start = simplify(v.dimensions[i] - d.start + 1) if d.start is not None else None
+                        new_dims += [start]
+                else:
+                    new_dims += [v.dimensions[i]]
+            vmap[v] = v.clone(dimensions=as_tuple(new_dims))
+    routine.body = SubstituteExpressions(vmap).visit(routine.body)
+
+    vmap = {}
+    for v in routine.variables:
+        if isinstance(v, sym.Array):
+            new_dims = [sym.RangeIndex((1, simplify(d.upper - d.lower + 1)))
+                if is_range_index(d) else d for d in v.dimensions]
+            new_shape = [sym.RangeIndex((1, simplify(d.upper - d.lower + 1)))
+                if is_range_index(d) else d for d in v.shape]
+            new_type = v.type.clone(shape=as_tuple(new_shape))
+            vmap[v] = v.clone(dimensions=as_tuple(new_dims), type=new_type)
+    routine.variables = [vmap.get(v, v) for v in routine.variables]
+    normalize_range_indexing(routine)
+
 
 def flatten_arrays(routine, order='F', start_index=1):
     """
@@ -515,26 +552,29 @@ def flatten_arrays(routine, order='F', start_index=1):
     """
     def new_dims(dim, shape):
         if len(dim) > 1:
-            assert not isinstance(shape[-1], sym.RangeIndex)
-            _dim = [sym.Sum((dim[-2], sym.Product((shape[-2], dim[-1] - start_index))))]
+            if isinstance(shape[-2], sym.RangeIndex):
+                raise TypeError(f'Resolve shapes being of type RangeIndex, e.g., "{shape[-2]}" before flattening!')
+            _dim = (sym.Sum((dim[-2], sym.Product((shape[-2], dim[-1] - start_index)))),)
             new_dim = dim[:-2]
-            new_dim.extend(_dim)
+            new_dim += _dim
             return new_dims(new_dim, shape[:-1])
-        return as_tuple(dim)
+        return dim
 
-    assert order in ['F', 'C']
-    if order == 'C':
+    if order == 'F':
         array_map = {
-            var: var.clone(dimensions=new_dims(list(var.dimensions)[::-1], list(var.shape)[::-1]))
+            var: var.clone(dimensions=new_dims(var.dimensions[::-1], var.shape[::-1]))
             for var in FindVariables().visit(routine.body)
-                if isinstance(var, sym.Array) and var.shape and len(var.shape)
+            if isinstance(var, sym.Array) and var.shape and len(var.shape)
         }
-    elif order == 'F':
+    elif order == 'C':
         array_map = {
-            var: var.clone(dimensions=new_dims(list(var.dimensions), list(var.shape)))
+            var: var.clone(dimensions=new_dims(var.dimensions, var.shape))
             for var in FindVariables().visit(routine.body)
-                if isinstance(var, sym.Array) and var.shape and len(var.shape)
+            if isinstance(var, sym.Array) and var.shape and len(var.shape)
         }
+    else:
+        raise ValueError(f'Unsupported array order "{order}"')
+
     routine.body = SubstituteExpressions(array_map).visit(routine.body)
 
     routine.variables = [v.clone(dimensions=as_tuple(sym.Product(v.shape)),
diff --git a/tests/test_transform_array_indexing.py b/tests/test_transform_array_indexing.py
index 0986470f4..f023d7d70 100644
--- a/tests/test_transform_array_indexing.py
+++ b/tests/test_transform_array_indexing.py
@@ -9,19 +9,26 @@
 import pytest
 import numpy as np
 
-from conftest import jit_compile, clean_test, available_frontends
-from loki import Subroutine, FindNodes, Assignment
+from conftest import jit_compile, jit_compile_lib, clean_test, available_frontends
+from loki import Subroutine, FindVariables, Array
 from loki.expression import symbols as sym
 from loki.transform import (
         promote_variables, demote_variables, normalize_range_indexing,
-        invert_array_indices, flatten_arrays
+        invert_array_indices, flatten_arrays,
+        normalize_array_access
         )
-
+from loki.transform import (
+    FortranCTransformation
+    )
+from loki.build import Builder
 
 @pytest.fixture(scope='module', name='here')
 def fixture_here():
     return Path(__file__).parent
 
+@pytest.fixture(scope='module', name='builder')
+def fixture_builder(here):
+    return Builder(source_dirs=here, build_dir=here/'build')
 
 @pytest.mark.parametrize('frontend', available_frontends())
 def test_transform_promote_variable_scalar(here, frontend):
@@ -319,38 +326,133 @@ def test_transform_demote_dimension_arguments(here, frontend):
     assert np.all(vec2 == 2) and np.sum(vec2) == 6
     assert np.all(matrix == 16) and np.sum(matrix) == 32
 
+@pytest.mark.parametrize('frontend', available_frontends())
+@pytest.mark.parametrize('start_index', (0, 1, 5))
+def test_normalize_array_access(here, frontend, start_index):
+    """
+    Test flattening or arrays, meaning converting multi-dimensional
+    arrays to one-dimensional arrays including corresponding
+    index arithmetic.
+    """
+    fcode = f"""
+    subroutine transform_normalize_array_access(x1, x2, x3, x4, l1, l2, l3, l4)
+        implicit none
+        integer :: i1, i2, i3, i4, c1, c2, c3, c4
+        integer, intent(in) :: l1, l2, l3, l4
+        integer, intent(inout) :: x1({start_index}:l1+{start_index}-1)
+        integer, intent(inout) :: x2({start_index}:l2+{start_index}-1, &
+         & {start_index}:l1+{start_index}-1)
+        integer, intent(inout) :: x3({start_index}:l3+{start_index}-1, &
+         & {start_index}:l2+{start_index}-1, {start_index}:l1+{start_index}-1)
+        integer, intent(inout) :: x4({start_index}:l4+{start_index}-1, &
+         & {start_index}:l3+{start_index}-1, {start_index}:l2+{start_index}-1, &
+         & {start_index}:l1+{start_index}-1)
+        c1 = 1
+        c2 = 1
+        c3 = 1
+        c4 = 1
+        x1({start_index}:l4+{start_index}-1) = 0
+        do i1={start_index},l1+{start_index}-1
+            x1(i1) = c1
+            do i2={start_index},l2+{start_index}-1
+                x2(i2, i1) = c2*10 + c1
+                do i3={start_index},l3+{start_index}-1
+                    x3(i3, i2, i1) = c3*100 + c2*10 + c1
+                    do i4={start_index},l4+{start_index}-1
+                        x4(i4, i3, i2, i1) = c4*1000 + c3*100 + c2*10 + c1
+                        c4 = c4 + 1
+                    end do
+                    c3 = c3 + 1
+                end do
+                c2 = c2 + 1
+            end do
+            c1 = c1 + 1
+        end do
+
+    end subroutine transform_normalize_array_access
+    """
+    def init_arguments(l1, l2, l3, l4):
+        x1 = np.zeros(shape=(l1,), order='F', dtype=np.int32)
+        x2 = np.zeros(shape=(l2,l1,), order='F', dtype=np.int32)
+        x3 = np.zeros(shape=(l3,l2,l1,), order='F', dtype=np.int32)
+        x4 = np.zeros(shape=(l4,l3,l2,l1,), order='F', dtype=np.int32)
+        return x1, x2, x3, x4
+
+    def validate_routine(routine):
+        arrays = [var for var in FindVariables().visit(routine.body) if isinstance(var, Array)]
+        for arr in arrays:
+            assert all(not isinstance(shape, sym.RangeIndex) for shape in arr.shape)
+
+    l1 = 2
+    l2 = 3
+    l3 = 4
+    l4 = 5
+    routine = Subroutine.from_source(fcode, frontend=frontend)
+    normalize_range_indexing(routine) # Fix OMNI nonsense
+    filepath = here/(f'{routine.name}_{frontend}.f90')
+    function = jit_compile(routine, filepath=filepath, objname=routine.name)
+    orig_x1, orig_x2, orig_x3, orig_x4 = init_arguments(l1, l2, l3, l4)
+    function(orig_x1, orig_x2, orig_x3, orig_x4, l1, l2, l3, l4)
+
+    routine = Subroutine.from_source(fcode, frontend=frontend)
+    normalize_array_access(routine)
+    normalize_range_indexing(routine)
+    filepath = here/(f'{routine.name}_normalized_{frontend}.f90')
+    function = jit_compile(routine, filepath=filepath, objname=routine.name)
+    x1, x2, x3, x4 = init_arguments(l1, l2, l3, l4)
+    function(x1, x2, x3, x4, l1, l2, l3, l4)
+    validate_routine(routine)
+
+    assert (x1 == orig_x1).all()
+    assert (x2 == orig_x2).all()
+    assert (x3 == orig_x3).all()
+    assert (x4 == orig_x4).all()
+
 
 @pytest.mark.parametrize('frontend', available_frontends())
-def test_transform_flatten_arrays(here, frontend):
+@pytest.mark.parametrize('start_index', (0, 1, 5))
+def test_transform_flatten_arrays(here, frontend, builder, start_index):
     """
     Test flattening or arrays, meaning converting multi-dimensional
     arrays to one-dimensional arrays including corresponding
     index arithmetic.
     """
-    fcode = """
+    fcode = f"""
     subroutine transform_flatten_arrays(x1, x2, x3, x4, l1, l2, l3, l4)
         implicit none
-        integer :: i1, i2, i3, i4
+        integer :: i1, i2, i3, i4, c1, c2, c3, c4
         integer, intent(in) :: l1, l2, l3, l4
-        integer, intent(inout) :: x1(l1), x2(l2, l1), x3(l3, l2, l1), x4(l4, l3, l2, l1)
-        
-        do i1=1,l1
-            x1(i1) = 2 * l1
-            do i2=1,l2
-                x2(i2, i1) = 2 * l2
-                do i3=1,l3
-                    x3(i3, i2, i1) = 2 * l3
-                    do i4=1,l4
-                        x4(i4, i3, i2, i1) = 2 * l4
+        integer, intent(inout) :: x1({start_index}:l1+{start_index}-1)
+        integer, intent(inout) :: x2({start_index}:l2+{start_index}-1, &
+         & {start_index}:l1+{start_index}-1)
+        integer, intent(inout) :: x3({start_index}:l3+{start_index}-1, &
+         & {start_index}:l2+{start_index}-1, {start_index}:l1+{start_index}-1)
+        integer, intent(inout) :: x4({start_index}:l4+{start_index}-1, &
+         & {start_index}:l3+{start_index}-1, {start_index}:l2+{start_index}-1, &
+         & {start_index}:l1+{start_index}-1)
+        c1 = 1
+        c2 = 1
+        c3 = 1
+        c4 = 1
+        do i1={start_index},l1+{start_index}-1
+            x1(i1) = c1
+            do i2={start_index},l2+{start_index}-1
+                x2(i2, i1) = c2*10 + c1
+                do i3={start_index},l3+{start_index}-1
+                    x3(i3, i2, i1) = c3*100 + c2*10 + c1
+                    do i4={start_index},l4+{start_index}-1
+                        x4(i4, i3, i2, i1) = c4*1000 + c3*100 + c2*10 + c1
+                        c4 = c4 + 1
                     end do
+                    c3 = c3 + 1
                 end do
+                c2 = c2 + 1
             end do
+            c1 = c1 + 1
         end do
-        
-        
+
     end subroutine transform_flatten_arrays
     """
-
     def init_arguments(l1, l2, l3, l4, flattened=False):
         x1 = np.zeros(shape=(l1,), order='F', dtype=np.int32)
         x2 = np.zeros(shape=(l2*l1) if flattened else (l2,l1,), order='F', dtype=np.int32)
@@ -358,53 +460,75 @@ def init_arguments(l1, l2, l3, l4, flattened=False):
         x4 = np.zeros(shape=(l4*l3*l2*l1) if flattened else (l4,l3,l2,l1,), order='F', dtype=np.int32)
         return x1, x2, x3, x4
 
-    def validate_arguments(x1, x2, x3, x4, l1, l2, l3, l4):
-        assert np.all(x1 == 2 * l1)
-        assert np.all(x2 == 2 * l2)
-        assert np.all(x3 == 2 * l3)
-        assert np.all(x4 == 2 * l4)
-
     def validate_routine(routine):
-        assignments = FindNodes(Assignment).visit(routine.body)
-        lhs = [assignment.lhs for assignment in assignments]
-        dims = [_lhs.dimensions for _lhs in lhs]
-        assert all(len(dim) == 1 for dim in dims)
-        assert dims[1][0].children[0] == 'i2'
-        assert dims[2][0].children[0] == 'i3'
-        assert dims[3][0].children[0] == 'i4'
+        arrays = [var for var in FindVariables().visit(routine.body) if isinstance(var, Array)]
+        assert all(len(arr.dimensions) == 1 for arr in arrays)
+        assert all(len(arr.shape) == 1 for arr in arrays)
 
     l1 = 2
-    l2 = 4
-    l3 = 6
-    l4 = 8
+    l2 = 3
+    l3 = 4
+    l4 = 5
     # Test the original implementation
     routine = Subroutine.from_source(fcode, frontend=frontend)
     normalize_range_indexing(routine) # Fix OMNI nonsense
-    filepath = here/(f'{routine.name}_{frontend}.f90')
+    filepath = here/(f'{routine.name}_{start_index}_{frontend}.f90')
     function = jit_compile(routine, filepath=filepath, objname=routine.name)
-    x1, x2, x3, x4 = init_arguments(l1, l2, l3, l4)
-    function(x1, x2, x3, x4, l1, l2, l3, l4)
-    validate_arguments(x1, x2, x3, x4, l1, l2, l3, l4)
+    orig_x1, orig_x2, orig_x3, orig_x4 = init_arguments(l1, l2, l3, l4)
+    function(orig_x1, orig_x2, orig_x3, orig_x4, l1, l2, l3, l4)
 
+    # Test flattening order='F'
     f_routine = Subroutine.from_source(fcode, frontend=frontend)
+    normalize_array_access(f_routine)
     normalize_range_indexing(f_routine) # Fix OMNI nonsense
     flatten_arrays(routine=f_routine, order='F', start_index=1)
-    filepath = here/(f'{f_routine.name}_flattened_F_{frontend}.f90')
+    filepath = here/(f'{f_routine.name}_{start_index}_flattened_F_{frontend}.f90')
     function = jit_compile(f_routine, filepath=filepath, objname=routine.name)
-    x1, x2, x3, x4 = init_arguments(l1, l2, l3, l4, flattened=True)
-    function(x1, x2, x3, x4, l1, l2, l3, l4)
-    validate_arguments(x1, x2, x3, x4, l1, l2, l3, l4)
+    f_x1, f_x2, f_x3, f_x4 = init_arguments(l1, l2, l3, l4, flattened=True)
+    function(f_x1, f_x2, f_x3, f_x4, l1, l2, l3, l4)
     validate_routine(f_routine)
 
+    assert (f_x1 == orig_x1.flatten()).all()
+    assert (f_x2 == orig_x2.flatten()).all()
+    assert (f_x3 == orig_x3.flatten()).all()
+    assert (f_x4 == orig_x4.flatten()).all()
+
+    # Test flattening order='C'
     c_routine = Subroutine.from_source(fcode, frontend=frontend)
+    normalize_array_access(c_routine)
     normalize_range_indexing(c_routine) # Fix OMNI nonsense
     invert_array_indices(c_routine)
     flatten_arrays(routine=c_routine, order='C', start_index=1)
-    filepath = here/(f'{c_routine.name}_flattened_C_{frontend}.f90')
+    filepath = here/(f'{c_routine.name}_{start_index}_flattened_C_{frontend}.f90')
     function = jit_compile(c_routine, filepath=filepath, objname=routine.name)
-    x1, x2, x3, x4 = init_arguments(l1, l2, l3, l4, flattened=True)
-    function(x1, x2, x3, x4, l1, l2, l3, l4)
-    validate_arguments(x1, x2, x3, x4, l1, l2, l3, l4)
+    c_x1, c_x2, c_x3, c_x4 = init_arguments(l1, l2, l3, l4, flattened=True)
+    function(c_x1, c_x2, c_x3, c_x4, l1, l2, l3, l4)
     validate_routine(c_routine)
 
     assert f_routine.body == c_routine.body
+
+    assert (c_x1 == orig_x1.flatten()).all()
+    assert (c_x2 == orig_x2.flatten()).all()
+    assert (c_x3 == orig_x3.flatten()).all()
+    assert (c_x4 == orig_x4.flatten()).all()
+
+    # Test C transpilation (which includes flattening)
+    f2c_routine = Subroutine.from_source(fcode, frontend=frontend)
+    f2c = FortranCTransformation(order="C")
+    f2c.apply(source=f2c_routine, path=here)
+    libname = f'fc_{f2c_routine.name}_{start_index}_{frontend}'
+    c_kernel = jit_compile_lib([f2c.wrapperpath, f2c.c_path], path=here, name=libname, builder=builder)
+    fc_function = c_kernel.transform_flatten_arrays_fc_mod.transform_flatten_arrays_fc
+    f2c_x1, f2c_x2, f2c_x3, f2c_x4 = init_arguments(l1, l2, l3, l4, flattened=True)
+    fc_function(f2c_x1, f2c_x2, f2c_x3, f2c_x4, l1, l2, l3, l4)
+    validate_routine(c_routine)
+
+    assert (f2c_x1 == orig_x1.flatten()).all()
+    assert (f2c_x2 == orig_x2.flatten()).all()
+    assert (f2c_x3 == orig_x3.flatten()).all()
+    assert (f2c_x4 == orig_x4.flatten()).all()
+
+    builder.clean()
+    clean_test(filepath)
+    f2c.wrapperpath.unlink()
+    f2c.c_path.unlink()
diff --git a/tests/test_transpile.py b/tests/test_transpile.py
index c4a9a2b43..17235bad9 100644
--- a/tests/test_transpile.py
+++ b/tests/test_transpile.py
@@ -940,7 +940,8 @@ def test_transpile_expressions(here, builder, frontend):
 
     # Make sure minus signs are represented correctly in the C code
     ccode = cgen(routine)
-    assert 'vector[i - 1 - 1]' in ccode  # double minus due to index shift to 0
+    # double minus due to index shift to 0
+    assert 'vector[i - 1 - 1]' in ccode or 'vector[-1 + i - 1]' in ccode
     assert 'vector[i - 1]' in ccode
     assert '-scalar' in ccode  # scalar with negative sign