Skip to content

Commit

Permalink
xe: enable source debug information for OpenCL C kernels
Browse files Browse the repository at this point in the history
  • Loading branch information
rjoursler committed Jan 3, 2025
1 parent ba9c30d commit eb5c3df
Show file tree
Hide file tree
Showing 10 changed files with 117 additions and 50 deletions.
23 changes: 13 additions & 10 deletions cmake/gen_gpu_kernel.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#===============================================================================
# Copyright 2019-2024 Intel Corporation
# Copyright 2019-2025 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -22,17 +22,20 @@

file(READ ${CL_FILE} cl_file_lines)

# Remove C++ style comments
string(REGEX REPLACE "//[^\n]*\n" "\n" cl_file_lines "${cl_file_lines}")
# Remove repeated whitespaces
string(REGEX REPLACE " +" " " cl_file_lines "${cl_file_lines}")
# Remove leading whitespaces
string(REGEX REPLACE "\n " "\n" cl_file_lines "${cl_file_lines}")
# Remove empty lines
string(REGEX REPLACE "\n+" "\n" cl_file_lines "${cl_file_lines}")
string(LENGTH "${cl_file_lines}" len)
if(CL_MINIFY STREQUAL "ON" OR len GREATER 65535)
# Remove C++ style comments
string(REGEX REPLACE "//[^\n]*\n" "\n" cl_file_lines "${cl_file_lines}")
# Remove repeated whitespaces
string(REGEX REPLACE " +" " " cl_file_lines "${cl_file_lines}")
# Remove leading whitespaces
string(REGEX REPLACE "\n " "\n" cl_file_lines "${cl_file_lines}")
# Remove empty lines
string(REGEX REPLACE "\n+" "\n" cl_file_lines "${cl_file_lines}")
endif()

string(LENGTH "${cl_file_lines}" len)
if(len GREATER 65535)
if(MSVC AND len GREATER 65535)
message(WARNING "Windows requires string literals to fit in 65535 bytes. Please split ${CL_FILE}.")
endif()

Expand Down
9 changes: 8 additions & 1 deletion cmake/gen_gpu_kernel_list.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#===============================================================================
# Copyright 2020-2021 Intel Corporation
# Copyright 2020-2025 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -46,6 +46,12 @@ endfunction()
function(gen_gpu_kernel_list ker_list_templ ker_list_src ker_sources headers)
set(_sources "${SOURCES}")

if(DNNL_DEV_MODE OR CMAKE_BUILD_TYPE STREQUAL "Debug")
set(CL_MINIFY "OFF")
else()
set(CL_MINIFY "ON")
endif()

set(KER_LIST_EXTERN)
set(KER_LIST_ENTRIES)
set(KER_HEADERS_EXTERN)
Expand All @@ -62,6 +68,7 @@ function(gen_gpu_kernel_list ker_list_templ ker_list_src ker_sources headers)
COMMAND ${CMAKE_COMMAND}
-DCL_FILE="${header_path}"
-DGEN_FILE="${gen_file}"
-DCL_MINIFY="${CL_MINIFY}"
-P ${PROJECT_SOURCE_DIR}/cmake/gen_gpu_kernel.cmake
DEPENDS ${header_path}
)
Expand Down
5 changes: 3 additions & 2 deletions src/gpu/intel/compute/compute_engine.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2019-2024 Intel Corporation
* Copyright 2019-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -72,7 +72,8 @@ class compute_engine_t : public gpu::engine_t {
}

virtual status_t create_kernel_from_binary(compute::kernel_t &kernel,
const xpu::binary_t &binary, const char *kernel_name) const = 0;
const xpu::binary_t &binary, const char *kernel_name,
const program_src_t &src) const = 0;

virtual status_t create_kernels_from_cache_blob(
const cache_blob_t &cache_blob,
Expand Down
39 changes: 38 additions & 1 deletion src/gpu/intel/compute/kernel.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2019-2024 Intel Corporation
* Copyright 2019-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -34,6 +34,43 @@ namespace gpu {
namespace intel {
namespace compute {

#if defined(__linux__) && (defined(DNNL_DEV_MODE) || !defined(NDEBUG))
struct program_src_t {
program_src_t() = default;
program_src_t(const std::string &src_str) {
// Only enable if gdb-oneapi debugging is active
if (getenv_int("ZET_ENABLE_PROGRAM_DEBUGGING", 0) == 0) return;

const int name_size = 29;
char name[name_size] = "/tmp/dnnl_ocl_jit_src.XXXXXX";
int fd = mkstemp(name);
if (fd == -1) return;
write(fd, src_str.c_str(), src_str.length());
close(fd);

auto deleter = [](char *name) {
unlink(name);
delete[] name;
};

name_ = std::shared_ptr<char>(new char[name_size], deleter);
std::memcpy(name_.get(), name, name_size);
}
operator bool() const { return name_ != nullptr; };
const char *name() const { return name_.get(); }

private:
std::shared_ptr<char> name_;
};
#else
struct program_src_t {
program_src_t() = default;
program_src_t(const std::string &src_str) {}
operator bool() const {return false};
const char *name() const { return nullptr; }
};
#endif

class kernel_impl_t {
public:
kernel_impl_t() = default;
Expand Down
35 changes: 21 additions & 14 deletions src/gpu/intel/ocl/ocl_gpu_engine.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2019-2024 Intel Corporation
* Copyright 2019-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -140,8 +140,8 @@ status_t create_ocl_kernel_from_cache_blob(const ocl_gpu_engine_t *ocl_engine,
OCL_CHECK(err);

std::shared_ptr<compute::kernel_impl_t> kernel_impl
= std::make_shared<ocl_gpu_kernel_t>(
std::move(ocl_kernel), arg_types);
= std::make_shared<ocl_gpu_kernel_t>(std::move(ocl_kernel),
arg_types, compute::program_src_t());
(*kernels)[i] = std::move(kernel_impl);
}

Expand Down Expand Up @@ -229,7 +229,8 @@ inline status_t fuse_microkernels(cl_context context, cl_device_id device,
} // namespace

status_t ocl_gpu_engine_t::build_program_from_source(
xpu::ocl::wrapper_t<cl_program> &program, const char *code_string,
xpu::ocl::wrapper_t<cl_program> &program, compute::program_src_t &src,
const char *code_string,
const compute::kernel_ctx_t &kernel_ctx) const {
std::string options = kernel_ctx.options();

Expand All @@ -249,6 +250,9 @@ status_t ocl_gpu_engine_t::build_program_from_source(
std::string pp_code_str = pp_code.str();
const char *pp_code_str_ptr = pp_code_str.c_str();

src = {pp_code_str};
if (src) { options += " -g -s " + std::string(src.name()); }

debugdump_processed_source(
pp_code_str, options, dev_info->get_cl_ext_options());

Expand All @@ -268,7 +272,8 @@ status_t ocl_gpu_engine_t::build_program_from_source(
}

status_t ocl_gpu_engine_t::create_kernel_from_binary(compute::kernel_t &kernel,
const xpu::binary_t &binary, const char *kernel_name) const {
const xpu::binary_t &binary, const char *kernel_name,
const compute::program_src_t &src) const {
xpu::ocl::wrapper_t<cl_program> program;
CHECK(xpu::ocl::create_program(
program, this->device(), this->context(), binary));
Expand All @@ -283,7 +288,7 @@ status_t ocl_gpu_engine_t::create_kernel_from_binary(compute::kernel_t &kernel,

std::shared_ptr<compute::kernel_impl_t> kernel_impl
= std::make_shared<ocl_gpu_kernel_t>(
std::move(ocl_kernel), arg_types);
std::move(ocl_kernel), arg_types, src);
kernel = std::move(kernel_impl);

return status::success;
Expand All @@ -301,14 +306,14 @@ status_t ocl_gpu_engine_t::create_kernel(
if (!jitter) return status::invalid_arguments;
xpu::binary_t binary = jitter->get_binary(context(), device());
if (binary.empty()) return status::runtime_error;
VCHECK_KERNEL(
create_kernel_from_binary(*kernel, binary, jitter->kernel_name()),
VCHECK_KERNEL(create_kernel_from_binary(
*kernel, binary, jitter->kernel_name(), {}),
VERBOSE_KERNEL_CREATION_FAIL, jitter->kernel_name());
return status::success;
}

status_t ocl_gpu_engine_t::create_program(
xpu::ocl::wrapper_t<cl_program> &program,
xpu::ocl::wrapper_t<cl_program> &program, compute::program_src_t &src,
const std::vector<const char *> &kernel_names,
const compute::kernel_ctx_t &kernel_ctx) const {

Expand Down Expand Up @@ -342,7 +347,7 @@ status_t ocl_gpu_engine_t::create_program(
"kernels in a single .cl source file or split creation in groups "
"based on their .cl source file.";

return build_program_from_source(program, source, kernel_ctx);
return build_program_from_source(program, src, source, kernel_ctx);
}

status_t ocl_gpu_engine_t::create_kernels(
Expand All @@ -354,13 +359,15 @@ status_t ocl_gpu_engine_t::create_kernels(
*kernels = std::vector<compute::kernel_t>(kernel_names.size());

xpu::ocl::wrapper_t<cl_program> program;
CHECK(create_program(program, kernel_names, kernel_ctx));
return create_kernels_from_program(kernels, kernel_names, program);
compute::program_src_t src;
CHECK(create_program(program, src, kernel_names, kernel_ctx));
return create_kernels_from_program(kernels, kernel_names, program, src);
}

status_t ocl_gpu_engine_t::create_kernels_from_program(
std::vector<compute::kernel_t> *kernels,
const std::vector<const char *> &kernel_names, cl_program program) {
const std::vector<const char *> &kernel_names, cl_program program,
const compute::program_src_t &src) {
*kernels = std::vector<compute::kernel_t>(kernel_names.size());
for (size_t i = 0; i < kernel_names.size(); ++i) {
if (!kernel_names[i]) continue;
Expand All @@ -373,7 +380,7 @@ status_t ocl_gpu_engine_t::create_kernels_from_program(

std::shared_ptr<compute::kernel_impl_t> kernel_impl
= std::make_shared<ocl_gpu_kernel_t>(
std::move(ocl_kernel), arg_types);
std::move(ocl_kernel), arg_types, src);
(*kernels)[i] = std::move(kernel_impl);
}

Expand Down
12 changes: 7 additions & 5 deletions src/gpu/intel/ocl/ocl_gpu_engine.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2019-2024 Intel Corporation
* Copyright 2019-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -48,8 +48,8 @@ class ocl_gpu_engine_t : public compute::compute_engine_t {
impl::stream_t **stream, impl::stream_impl_t *stream_impl) override;

status_t create_kernel_from_binary(compute::kernel_t &kernel,
const xpu::binary_t &binary,
const char *kernel_name) const override;
const xpu::binary_t &binary, const char *kernel_name,
const compute::program_src_t &src) const override;

status_t create_kernels_from_cache_blob(const cache_blob_t &cache_blob,
std::vector<compute::kernel_t> &kernels,
Expand All @@ -64,7 +64,8 @@ class ocl_gpu_engine_t : public compute::compute_engine_t {

static status_t create_kernels_from_program(
std::vector<compute::kernel_t> *kernels,
const std::vector<const char *> &kernel_names, cl_program program);
const std::vector<const char *> &kernel_names, cl_program program,
const compute::program_src_t &src);

const impl_list_item_t *get_concat_implementation_list() const override {
return gpu_impl_list_t::get_concat_implementation_list();
Expand Down Expand Up @@ -100,6 +101,7 @@ class ocl_gpu_engine_t : public compute::compute_engine_t {
}

status_t create_program(xpu::ocl::wrapper_t<cl_program> &program,
compute::program_src_t &src,
const std::vector<const char *> &kernel_names,
const compute::kernel_ctx_t &kernel_ctx) const;

Expand All @@ -111,7 +113,7 @@ class ocl_gpu_engine_t : public compute::compute_engine_t {
}

status_t build_program_from_source(xpu::ocl::wrapper_t<cl_program> &program,
const char *code_string,
compute::program_src_t &src, const char *code_string,
const compute::kernel_ctx_t &kernel_ctx) const;

~ocl_gpu_engine_t() override = default;
Expand Down
6 changes: 4 additions & 2 deletions src/gpu/intel/ocl/ocl_gpu_kernel.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2019-2024 Intel Corporation
* Copyright 2019-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -111,9 +111,11 @@ class ocl_gpu_kernel_cache_t {
};

ocl_gpu_kernel_t::ocl_gpu_kernel_t(xpu::ocl::wrapper_t<cl_kernel> &&ocl_kernel,
const std::vector<gpu::intel::compute::scalar_type_t> &arg_types)
const std::vector<gpu::intel::compute::scalar_type_t> &arg_types,
compute::program_src_t src)
: ocl_kernel_(std::move(ocl_kernel))
, arg_types_(arg_types)
, src_(src)
, save_events_(false) {
cache_ = std::make_shared<ocl_gpu_kernel_cache_t>(ocl_kernel_);
}
Expand Down
7 changes: 5 additions & 2 deletions src/gpu/intel/ocl/ocl_gpu_kernel.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2019-2024 Intel Corporation
* Copyright 2019-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -35,7 +35,8 @@ class ocl_gpu_kernel_cache_t;
class ocl_gpu_kernel_t : public compute::kernel_impl_t {
public:
ocl_gpu_kernel_t(xpu::ocl::wrapper_t<cl_kernel> &&ocl_kernel,
const std::vector<gpu::intel::compute::scalar_type_t> &arg_types);
const std::vector<gpu::intel::compute::scalar_type_t> &arg_types,
compute::program_src_t src);
~ocl_gpu_kernel_t() override = default;

cl_kernel ocl_kernel() const { return ocl_kernel_; }
Expand All @@ -59,11 +60,13 @@ class ocl_gpu_kernel_t : public compute::kernel_impl_t {

status_t dump() const override;
std::string name() const override;
const compute::program_src_t &src() const { return src_; }

private:
xpu::ocl::wrapper_t<cl_kernel> ocl_kernel_;
std::vector<gpu::intel::compute::scalar_type_t> arg_types_;
std::shared_ptr<ocl_gpu_kernel_cache_t> cache_;
compute::program_src_t src_;
bool save_events_;
};

Expand Down
Loading

0 comments on commit eb5c3df

Please sign in to comment.