Skip to content

Commit

Permalink
[flang][cuda] Add c_devloc as intrinsic and inline it during lowering (
Browse files Browse the repository at this point in the history
…llvm#120648)

Add `c_devloc` as intrinsic and inline it during lowering. `c_devloc` is
used in CUDA Fortran to get the address of device variables.

For the moment, we borrow almost all semantic checks from `c_loc` except
for the pointer or target restriction. The specifications of `c_devloc`
are are pretty vague and we will relax/enforce the restrictions based on
library and apps usage comparing them to the reference compiler.
  • Loading branch information
clementval authored Jan 8, 2025
1 parent cdbba15 commit 878a574
Show file tree
Hide file tree
Showing 8 changed files with 139 additions and 5 deletions.
5 changes: 5 additions & 0 deletions flang/include/flang/Optimizer/Builder/FIRBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -769,6 +769,11 @@ mlir::Value genMaxWithZero(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value genCPtrOrCFunptrAddr(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value cPtr, mlir::Type ty);

/// The type(C_DEVPTR) is defined as the derived type with only one
/// component of C_PTR type. Get the C address from the C_PTR component.
mlir::Value genCDevPtrAddr(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value cDevPtr, mlir::Type ty);

/// Get the C address value.
mlir::Value genCPtrOrCFunptrValue(fir::FirOpBuilder &builder,
mlir::Location loc, mlir::Value cPtr);
Expand Down
1 change: 1 addition & 0 deletions flang/include/flang/Optimizer/Builder/IntrinsicCall.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ struct IntrinsicLibrary {
llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genCAssociatedCPtr(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genCDevLoc(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genErfcScaled(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args);
void genCFPointer(llvm::ArrayRef<fir::ExtendedValue>);
Expand Down
74 changes: 73 additions & 1 deletion flang/lib/Evaluate/intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2663,6 +2663,8 @@ class IntrinsicProcTable::Implementation {
ActualArguments &, FoldingContext &) const;
std::optional<SpecificCall> HandleC_Loc(
ActualArguments &, FoldingContext &) const;
std::optional<SpecificCall> HandleC_Devloc(
ActualArguments &, FoldingContext &) const;
const std::string &ResolveAlias(const std::string &name) const {
auto iter{aliases_.find(name)};
return iter == aliases_.end() ? name : iter->second;
Expand Down Expand Up @@ -2690,7 +2692,8 @@ bool IntrinsicProcTable::Implementation::IsIntrinsicFunction(
return true;
}
// special cases
return name == "__builtin_c_loc" || name == "null";
return name == "__builtin_c_loc" || name == "__builtin_c_devloc" ||
name == "null";
}
bool IntrinsicProcTable::Implementation::IsIntrinsicSubroutine(
const std::string &name0) const {
Expand Down Expand Up @@ -3080,6 +3083,73 @@ std::optional<SpecificCall> IntrinsicProcTable::Implementation::HandleC_Loc(
return std::nullopt;
}

// CUDA Fortran C_DEVLOC(x)
std::optional<SpecificCall> IntrinsicProcTable::Implementation::HandleC_Devloc(
ActualArguments &arguments, FoldingContext &context) const {
static const char *const keywords[]{"cptr", nullptr};

if (CheckAndRearrangeArguments(arguments, context.messages(), keywords)) {
CHECK(arguments.size() == 1);
const auto *expr{arguments[0].value().UnwrapExpr()};
if (auto typeAndShape{characteristics::TypeAndShape::Characterize(
arguments[0], context)}) {
if (expr && !IsContiguous(*expr, context).value_or(true)) {
context.messages().Say(arguments[0]->sourceLocation(),
"C_DEVLOC() argument must be contiguous"_err_en_US);
}
if (auto constExtents{AsConstantExtents(context, typeAndShape->shape())};
constExtents && GetSize(*constExtents) == 0) {
context.messages().Say(arguments[0]->sourceLocation(),
"C_DEVLOC() argument may not be a zero-sized array"_err_en_US);
}
if (!(typeAndShape->type().category() != TypeCategory::Derived ||
typeAndShape->type().IsAssumedType() ||
(!typeAndShape->type().IsPolymorphic() &&
CountNonConstantLenParameters(
typeAndShape->type().GetDerivedTypeSpec()) == 0))) {
context.messages().Say(arguments[0]->sourceLocation(),
"C_DEVLOC() argument must have an intrinsic type, assumed type, or non-polymorphic derived type with no non-constant length parameter"_err_en_US);
} else if (typeAndShape->type().knownLength().value_or(1) == 0) {
context.messages().Say(arguments[0]->sourceLocation(),
"C_DEVLOC() argument may not be zero-length character"_err_en_US);
} else if (typeAndShape->type().category() != TypeCategory::Derived &&
!IsInteroperableIntrinsicType(typeAndShape->type()).value_or(true)) {
if (typeAndShape->type().category() == TypeCategory::Character &&
typeAndShape->type().kind() == 1) {
// Default character kind, but length is not known to be 1
if (context.languageFeatures().ShouldWarn(
common::UsageWarning::CharacterInteroperability)) {
context.messages().Say(
common::UsageWarning::CharacterInteroperability,
arguments[0]->sourceLocation(),
"C_DEVLOC() argument has non-interoperable character length"_warn_en_US);
}
} else if (context.languageFeatures().ShouldWarn(
common::UsageWarning::Interoperability)) {
context.messages().Say(common::UsageWarning::Interoperability,
arguments[0]->sourceLocation(),
"C_DEVLOC() argument has non-interoperable intrinsic type or kind"_warn_en_US);
}
}

characteristics::DummyDataObject ddo{std::move(*typeAndShape)};
ddo.intent = common::Intent::In;
return SpecificCall{
SpecificIntrinsic{"__builtin_c_devloc"s,
characteristics::Procedure{
characteristics::FunctionResult{
DynamicType{GetBuiltinDerivedType(
builtinsScope_, "__builtin_c_devptr")}},
characteristics::DummyArguments{
characteristics::DummyArgument{"cptr"s, std::move(ddo)}},
characteristics::Procedure::Attrs{
characteristics::Procedure::Attr::Pure}}},
std::move(arguments)};
}
}
return std::nullopt;
}

static bool CheckForNonPositiveValues(FoldingContext &context,
const ActualArgument &arg, const std::string &procName,
const std::string &argName) {
Expand Down Expand Up @@ -3270,6 +3340,8 @@ std::optional<SpecificCall> IntrinsicProcTable::Implementation::Probe(
} else { // function
if (call.name == "__builtin_c_loc") {
return HandleC_Loc(arguments, context);
} else if (call.name == "__builtin_c_devloc") {
return HandleC_Devloc(arguments, context);
} else if (call.name == "null") {
return HandleNull(arguments, context);
}
Expand Down
19 changes: 19 additions & 0 deletions flang/lib/Optimizer/Builder/FIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1626,6 +1626,25 @@ mlir::Value fir::factory::genCPtrOrCFunptrAddr(fir::FirOpBuilder &builder,
cPtr, addrFieldIndex);
}

mlir::Value fir::factory::genCDevPtrAddr(fir::FirOpBuilder &builder,
mlir::Location loc,
mlir::Value cDevPtr, mlir::Type ty) {
auto recTy = mlir::cast<fir::RecordType>(ty);
assert(recTy.getTypeList().size() == 1);
auto cptrFieldName = recTy.getTypeList()[0].first;
mlir::Type cptrFieldTy = recTy.getTypeList()[0].second;
auto fieldIndexType = fir::FieldType::get(ty.getContext());
mlir::Value cptrFieldIndex = builder.create<fir::FieldIndexOp>(
loc, fieldIndexType, cptrFieldName, recTy,
/*typeParams=*/mlir::ValueRange{});
auto cptrCoord = builder.create<fir::CoordinateOp>(
loc, builder.getRefType(cptrFieldTy), cDevPtr, cptrFieldIndex);
auto [addrFieldIndex, addrFieldTy] =
genCPtrOrCFunptrFieldIndex(builder, loc, cptrFieldTy);
return builder.create<fir::CoordinateOp>(loc, builder.getRefType(addrFieldTy),
cptrCoord, addrFieldIndex);
}

mlir::Value fir::factory::genCPtrOrCFunptrValue(fir::FirOpBuilder &builder,
mlir::Location loc,
mlir::Value cPtr) {
Expand Down
18 changes: 15 additions & 3 deletions flang/lib/Optimizer/Builder/IntrinsicCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ static constexpr IntrinsicHandler handlers[]{
&I::genCAssociatedCPtr,
{{{"c_ptr_1", asAddr}, {"c_ptr_2", asAddr, handleDynamicOptional}}},
/*isElemental=*/false},
{"c_devloc", &I::genCDevLoc, {{{"x", asBox}}}, /*isElemental=*/false},
{"c_f_pointer",
&I::genCFPointer,
{{{"cptr", asValue},
Expand Down Expand Up @@ -2867,11 +2868,14 @@ static mlir::Value getAddrFromBox(fir::FirOpBuilder &builder,
static fir::ExtendedValue
genCLocOrCFunLoc(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args,
bool isFunc = false) {
bool isFunc = false, bool isDevLoc = false) {
assert(args.size() == 1);
mlir::Value res = builder.create<fir::AllocaOp>(loc, resultType);
mlir::Value resAddr =
fir::factory::genCPtrOrCFunptrAddr(builder, loc, res, resultType);
mlir::Value resAddr;
if (isDevLoc)
resAddr = fir::factory::genCDevPtrAddr(builder, loc, res, resultType);
else
resAddr = fir::factory::genCPtrOrCFunptrAddr(builder, loc, res, resultType);
assert(fir::isa_box_type(fir::getBase(args[0]).getType()) &&
"argument must have been lowered to box type");
mlir::Value argAddr = getAddrFromBox(builder, loc, args[0], isFunc);
Expand Down Expand Up @@ -2928,6 +2932,14 @@ IntrinsicLibrary::genCAssociatedCPtr(mlir::Type resultType,
return genCAssociated(builder, loc, resultType, args);
}

// C_DEVLOC
fir::ExtendedValue
IntrinsicLibrary::genCDevLoc(mlir::Type resultType,
llvm::ArrayRef<fir::ExtendedValue> args) {
return genCLocOrCFunLoc(builder, loc, resultType, args, /*isFunc=*/false,
/*isDevLoc=*/true);
}

// C_F_POINTER
void IntrinsicLibrary::genCFPointer(llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 3);
Expand Down
4 changes: 4 additions & 0 deletions flang/module/__fortran_builtins.f90
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
intrinsic :: __builtin_c_loc
public :: __builtin_c_loc

intrinsic :: __builtin_c_devloc
public :: __builtin_c_devloc

intrinsic :: __builtin_c_f_pointer
public :: __builtin_c_f_pointer

Expand Down Expand Up @@ -144,6 +147,7 @@

type :: __force_derived_type_instantiations
type(__builtin_c_ptr) :: c_ptr
type(__builtin_c_devptr) :: c_devptr
type(__builtin_c_funptr) :: c_funptr
type(__builtin_event_type) :: event_type
type(__builtin_lock_type) :: lock_type
Expand Down
2 changes: 1 addition & 1 deletion flang/module/__fortran_type_info.f90
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
module __fortran_type_info

use, intrinsic :: __fortran_builtins, &
only: __builtin_c_ptr, __builtin_c_funptr
only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr
implicit none

! Set PRIVATE by default to explicitly only export what is meant
Expand Down
21 changes: 21 additions & 0 deletions flang/test/Lower/CUDA/cuda-cdevloc.cuf
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s

attributes(global) subroutine testcdevloc(a)
use __fortran_builtins, only: c_devloc => __builtin_c_devloc
integer, device :: a(10)
print*, c_devloc(a(1))
end

! CHECK-LABEL: func.func @_QPtestcdevloc(
! CHECK-SAME: %[[A_ARG:.*]]: !fir.ref<!fir.array<10xi32>> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "a"}) attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
! CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_ARG]](%{{.*}}) dummy_scope %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtestcdevlocEa"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
! CHECK: %[[A1:.*]] = hlfir.designate %[[A]]#0 (%c1{{.*}}) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
! CHECK: %[[BOX:.*]] = fir.embox %[[A1]] : (!fir.ref<i32>) -> !fir.box<i32>
! CHECK: %[[CDEVPTR:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
! CHECK: %[[FIELD_CPTR:.*]] = fir.field_index cptr, !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
! CHECK: %[[COORD_CPTR:.*]] = fir.coordinate_of %[[CDEVPTR]], %[[FIELD_CPTR]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>, !fir.field) -> !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>
! CHECK: %[[FIELD_ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>
! CHECK: %[[COORD_ADDRESS:.*]] = fir.coordinate_of %[[COORD_CPTR]], %[[FIELD_ADDRESS]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.field) -> !fir.ref<i64>
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX]] : (!fir.box<i32>) -> !fir.ref<i32>
! CHECK: %[[ADDRESS_A1:.*]] = fir.convert %[[BOX_ADDR]] : (!fir.ref<i32>) -> i64
! CHECK: fir.store %[[ADDRESS_A1]] to %[[COORD_ADDRESS]] : !fir.ref<i64>

0 comments on commit 878a574

Please sign in to comment.