Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[C vecextensions target] #577

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions loopy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@
AddressSpace,
TemporaryVariable,
SubstitutionRule,
CallMangleInfo)
CallMangleInfo,
OpenMPSIMDTag, VectorizeTag)
from loopy.kernel.function_interface import (
CallableKernel, ScalarCallable)
from loopy.translation_unit import (
Expand Down Expand Up @@ -153,6 +154,8 @@
from loopy.target.c import (CFamilyTarget, CTarget, ExecutableCTarget,
generate_header, CWithGNULibcTarget,
ExecutableCWithGNULibcTarget)
from loopy.target.c_vector_extensions import (CVectorExtensionsTarget,
ExecutableCVectorExtensionsTarget)
from loopy.target.cuda import CudaTarget
from loopy.target.opencl import OpenCLTarget
from loopy.target.pyopencl import PyOpenCLTarget
Expand Down Expand Up @@ -190,7 +193,7 @@
"AddressSpace",
"TemporaryVariable",
"SubstitutionRule",
"CallMangleInfo",
"CallMangleInfo", "OpenMPSIMDTag", "VectorizeTag",

"make_kernel", "UniqueName", "make_function",

Expand Down Expand Up @@ -300,6 +303,7 @@
"TargetBase",
"CFamilyTarget", "CTarget", "ExecutableCTarget", "generate_header",
"CWithGNULibcTarget", "ExecutableCWithGNULibcTarget",
"CVectorExtensionsTarget", "ExecutableCVectorExtensionsTarget",
"CudaTarget", "OpenCLTarget",
"PyOpenCLTarget", "ISPCTarget",
"NumbaTarget", "NumbaCudaTarget",
Expand Down
47 changes: 37 additions & 10 deletions loopy/codegen/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,21 +395,48 @@ def try_vectorized(self, what, func):
return self.unvectorize(func)

def unvectorize(self, func):
from loopy.kernel.data import VectorizeTag, UnrollTag, OpenMPSIMDTag
from loopy.codegen.result import (merge_codegen_results,
CodeGenerationResult)
vinf = self.vectorization_info
vec_tag, = self.kernel.inames[vinf.iname].tags_of_type(VectorizeTag)
result = []
novec_self = self.copy(vectorization_info=False)

for i in range(vinf.length):
idx_aff = isl.Aff.zero_on_domain(vinf.space.params()) + i
new_codegen_state = novec_self.fix(vinf.iname, idx_aff)
generated = func(new_codegen_state)

if isinstance(generated, list):
result.extend(generated)
if isinstance(vec_tag.fallback_impl_tag, UnrollTag):
novec_self = self.copy(vectorization_info=False)

for i in range(vinf.length):
idx_aff = isl.Aff.zero_on_domain(vinf.space.params()) + i
new_codegen_state = novec_self.fix(vinf.iname, idx_aff)
generated = func(new_codegen_state)

if isinstance(generated, list):
result.extend(generated)
else:
result.append(generated)
elif isinstance(vec_tag.fallback_impl_tag, OpenMPSIMDTag):
novec_self = self.copy(vectorization_info=False)
astb = self.ast_builder
inner = func(novec_self)
if isinstance(inner, list):
inner = merge_codegen_results(novec_self, inner)
assert isinstance(inner, CodeGenerationResult)
if isinstance(inner.current_ast(novec_self),
astb.ast_comment_class):
# loop body is a comment => do not emit the loop
loop_cgr = inner
else:
result.append(generated)
result.append(astb.emit_pragma("omp simd"))
loop_cgr = inner.with_new_ast(
novec_self,
astb.emit_sequential_loop(
novec_self, vinf.iname, self.kernel.index_dtype,
0, vinf.length-1, inner.current_ast(novec_self)))
result.append(loop_cgr)
elif vec_tag.fallback_impl_tag is None:
raise RuntimeError("Could not vectorize all statements"
f" in name {vinf.iname}")

from loopy.codegen.result import merge_codegen_results
return merge_codegen_results(self, result)

@property
Expand Down
6 changes: 5 additions & 1 deletion loopy/codegen/control.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def generate_code_for_sched_index(codegen_state, sched_index):
elif isinstance(sched_item, EnterLoop):
from loopy.kernel.data import (UnrolledIlpTag, UnrollTag,
ForceSequentialTag, LoopedIlpTag, VectorizeTag,
OpenMPSIMDTag,
InameImplementationTag,
InOrderSequentialSequentialTag, filter_iname_tags_by_type)

Expand All @@ -117,12 +118,15 @@ def generate_code_for_sched_index(codegen_state, sched_index):
from loopy.codegen.loop import (
generate_unroll_loop,
generate_vectorize_loop,
generate_sequential_loop_dim_code)
generate_sequential_loop_dim_code,
generate_openmp_simd_loop)

if filter_iname_tags_by_type(tags, (UnrollTag, UnrolledIlpTag)):
func = generate_unroll_loop
elif filter_iname_tags_by_type(tags, VectorizeTag):
func = generate_vectorize_loop
elif filter_iname_tags_by_type(tags, OpenMPSIMDTag):
func = generate_openmp_simd_loop
elif not tags or filter_iname_tags_by_type(tags, (LoopedIlpTag,
ForceSequentialTag, InOrderSequentialSequentialTag)):
func = generate_sequential_loop_dim_code
Expand Down
7 changes: 7 additions & 0 deletions loopy/codegen/instruction.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,13 @@ def generate_assignment_instruction_code(codegen_state, insn):
raise UnvectorizableError(
"LHS is scalar, RHS is vector, cannot assign")

if (lhs_is_vector
and (not rhs_is_vector)
and (not
kernel.target.broadcasts_scalar_assignment_to_vec_types)):
raise UnvectorizableError(
"LHS is vector, RHS is not vector, cannot assign")

is_vector = lhs_is_vector

del lhs_is_vector
Expand Down
34 changes: 32 additions & 2 deletions loopy/codegen/loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,10 +160,27 @@ def generate_unroll_loop(codegen_state, sched_index):

# {{{ vectorized loops

def raise_for_unvectorizable_loop(codegen_state, sched_index):
kernel = codegen_state.kernel
raise RuntimeError(f"Cannot vectorize {kernel.schedule[sched_index]}")


def generate_vectorize_loop(codegen_state, sched_index):
from loopy.kernel.data import VectorizeTag, UnrollTag, OpenMPSIMDTag
kernel = codegen_state.kernel

iname = kernel.linearization[sched_index].iname
vec_tag, = kernel.inames[iname].tags_of_type(VectorizeTag)
fallback_impl_tag = vec_tag.fallback_impl_tag

if isinstance(fallback_impl_tag, UnrollTag):
fallback_codegen_routine = generate_unroll_loop
elif isinstance(fallback_impl_tag, OpenMPSIMDTag):
fallback_codegen_routine = generate_openmp_simd_loop
elif fallback_impl_tag is None:
fallback_codegen_routine = raise_for_unvectorizable_loop
else:
raise NotImplementedError(fallback_impl_tag)

bounds = kernel.get_iname_bounds(iname, constants_only=True)

Expand All @@ -177,7 +194,7 @@ def generate_vectorize_loop(codegen_state, sched_index):
warn(kernel, "vec_upper_not_const",
"upper bound for vectorized loop '%s' is not a constant, "
"cannot vectorize--unrolling instead")
return generate_unroll_loop(codegen_state, sched_index)
return fallback_codegen_routine(codegen_state, sched_index)

length = int(pw_aff_to_expr(length_aff))

Expand All @@ -192,7 +209,7 @@ def generate_vectorize_loop(codegen_state, sched_index):
warn(kernel, "vec_lower_not_0",
"lower bound for vectorized loop '%s' is not zero, "
"cannot vectorize--unrolling instead")
return generate_unroll_loop(codegen_state, sched_index)
return fallback_codegen_routine(codegen_state, sched_index)

# {{{ 'implement' vectorization bounds

Expand Down Expand Up @@ -484,4 +501,17 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index):

# }}}


# {{{ omp simd loop

def generate_openmp_simd_loop(codegen_state, sched_index):
return merge_codegen_results(
codegen_state,
[codegen_state.ast_builder.emit_pragma("omp simd"),
generate_sequential_loop_dim_code(codegen_state,
sched_index)])

# }}}


# vim: foldmethod=marker
10 changes: 9 additions & 1 deletion loopy/expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,10 @@ def combine(vectorizabilities):
return reduce(and_, vectorizabilities)

def map_sum(self, expr):
return any(self.rec(child) for child in expr.children)
possible = False
for child in expr.children:
possible = self.rec(child) if True else possible
return possible
kaushikcfd marked this conversation as resolved.
Show resolved Hide resolved

map_product = map_sum

Expand Down Expand Up @@ -176,6 +179,11 @@ def map_reduction(self, expr):
# FIXME: Do this more carefully
raise UnvectorizableError()

def map_if(self, expr):
raise UnvectorizableError("Emitting vector instructions with masks not"
" (yet) supported.")


# }}}

# vim: fdm=marker
4 changes: 3 additions & 1 deletion loopy/kernel/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1397,7 +1397,9 @@ def eval_expr_assert_integer_constant(i, expr):
# We'll do absolutely nothing here, which will result
# in the vector being returned.
pass

elif (vectorization_info is None
and target.allows_non_constant_indexing_for_vec_types):
vector_index = idx
else:
idx = eval_expr_assert_integer_constant(i, idx)

Expand Down
33 changes: 32 additions & 1 deletion loopy/kernel/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,29 @@ def __str__(self):
# }}}


class _NotProvided:
pass


class VectorizeTag(UniqueInameTag, HardwareConcurrentTag):
"""
.. attribute:: fallback_impl_tag

If the loop contains instructions that are not vectorizable, the code
generator will implement the loop as directed by `fallback_impl_tag`.
If *None*, then a :class:`RuntimeError` would be raised while
generating code for an unvectorizable instruction within the loop.
"""
def __init__(self, fallback_impl_tag=_NotProvided):
if fallback_impl_tag is _NotProvided:
from warnings import warn
warn("`fallback_impl_tag` not provided to VectorizeTag."
" This will be an error from 2023. To keep the current"
" behavior, instantiate as `VectorizeTag(UnrollTag())`",
DeprecationWarning, stacklevel=2)
fallback_impl_tag = UnrollTag()
super().__init__(fallback_impl_tag=fallback_impl_tag)

def __str__(self):
return "vec"

Expand All @@ -223,6 +245,15 @@ def __str__(self):
return "ord"


class OpenMPSIMDTag(InameImplementationTag):
"""
Directs the code generator to emit code with ``#pragma omp simd``
directive atop the loop.
"""
def __str__(self):
return "omp.simd"


def parse_tag(tag):
from pytools.tag import Tag as TagBase
if tag is None:
Expand All @@ -241,7 +272,7 @@ def parse_tag(tag):
elif tag in ["unr"]:
return UnrollTag()
elif tag in ["vec"]:
return VectorizeTag()
return VectorizeTag(UnrollTag())
elif tag in ["ilp", "ilp.unr"]:
return UnrolledIlpTag()
elif tag == "ilp.seq":
Expand Down
9 changes: 9 additions & 0 deletions loopy/target/c/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1007,6 +1007,11 @@ def ast_block_class(self):
from cgen import Block
return Block

@property
def ast_comment_class(self):
from cgen import Comment
return Comment

@property
def ast_block_scope_class(self):
return ScopingBlock
Expand Down Expand Up @@ -1250,6 +1255,10 @@ def emit_comment(self, s):
from cgen import Comment
return Comment(s)

def emit_pragma(self, s):
from cgen import Pragma
return Pragma(s)

@property
def can_implement_conditionals(self):
return True
Expand Down
Loading