xe: enable source debug information for OpenCL C kernels

oneapi-src · Jan 3, 2025 · eb5c3df · eb5c3df
1 parent ba9c30d
commit eb5c3df
Show file tree

Hide file tree

Showing 10 changed files with 117 additions and 50 deletions.
diff --git a/cmake/gen_gpu_kernel.cmake b/cmake/gen_gpu_kernel.cmake
@@ -1,5 +1,5 @@
 #===============================================================================
-# Copyright 2019-2024 Intel Corporation
+# Copyright 2019-2025 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -22,17 +22,20 @@
 
 file(READ ${CL_FILE} cl_file_lines)
 
-# Remove C++ style comments
-string(REGEX REPLACE "//[^\n]*\n" "\n" cl_file_lines "${cl_file_lines}")
-# Remove repeated whitespaces
-string(REGEX REPLACE " +" " " cl_file_lines "${cl_file_lines}")
-# Remove leading whitespaces
-string(REGEX REPLACE "\n " "\n" cl_file_lines "${cl_file_lines}")
-# Remove empty lines
-string(REGEX REPLACE "\n+" "\n" cl_file_lines "${cl_file_lines}")
+string(LENGTH "${cl_file_lines}" len)
+if(CL_MINIFY STREQUAL "ON" OR len GREATER 65535)
+    # Remove C++ style comments
+    string(REGEX REPLACE "//[^\n]*\n" "\n" cl_file_lines "${cl_file_lines}")
+    # Remove repeated whitespaces
+    string(REGEX REPLACE " +" " " cl_file_lines "${cl_file_lines}")
+    # Remove leading whitespaces
+    string(REGEX REPLACE "\n " "\n" cl_file_lines "${cl_file_lines}")
+    # Remove empty lines
+    string(REGEX REPLACE "\n+" "\n" cl_file_lines "${cl_file_lines}")
+endif()
 
 string(LENGTH "${cl_file_lines}" len)
-if(len GREATER 65535)
+if(MSVC AND len GREATER 65535)
     message(WARNING "Windows requires string literals to fit in 65535 bytes. Please split ${CL_FILE}.")
 endif()
 

diff --git a/cmake/gen_gpu_kernel_list.cmake b/cmake/gen_gpu_kernel_list.cmake
@@ -1,5 +1,5 @@
 #===============================================================================
-# Copyright 2020-2021 Intel Corporation
+# Copyright 2020-2025 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -46,6 +46,12 @@ endfunction()
 function(gen_gpu_kernel_list ker_list_templ ker_list_src ker_sources headers)
     set(_sources "${SOURCES}")
 
+    if(DNNL_DEV_MODE OR CMAKE_BUILD_TYPE STREQUAL "Debug")
+        set(CL_MINIFY "OFF")
+    else()
+        set(CL_MINIFY "ON")
+    endif()
+
     set(KER_LIST_EXTERN)
     set(KER_LIST_ENTRIES)
     set(KER_HEADERS_EXTERN)
@@ -62,6 +68,7 @@ function(gen_gpu_kernel_list ker_list_templ ker_list_src ker_sources headers)
             COMMAND ${CMAKE_COMMAND}
                 -DCL_FILE="${header_path}"
                 -DGEN_FILE="${gen_file}"
+                -DCL_MINIFY="${CL_MINIFY}"
                 -P ${PROJECT_SOURCE_DIR}/cmake/gen_gpu_kernel.cmake
             DEPENDS ${header_path}
         )

diff --git a/src/gpu/intel/compute/compute_engine.hpp b/src/gpu/intel/compute/compute_engine.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -72,7 +72,8 @@ class compute_engine_t : public gpu::engine_t {
     }
 
     virtual status_t create_kernel_from_binary(compute::kernel_t &kernel,
-            const xpu::binary_t &binary, const char *kernel_name) const = 0;
+            const xpu::binary_t &binary, const char *kernel_name,
+            const program_src_t &src) const = 0;
 
     virtual status_t create_kernels_from_cache_blob(
             const cache_blob_t &cache_blob,

diff --git a/src/gpu/intel/compute/kernel.hpp b/src/gpu/intel/compute/kernel.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -34,6 +34,43 @@ namespace gpu {
 namespace intel {
 namespace compute {
 
+#if defined(__linux__) && (defined(DNNL_DEV_MODE) || !defined(NDEBUG))
+struct program_src_t {
+    program_src_t() = default;
+    program_src_t(const std::string &src_str) {
+        // Only enable if gdb-oneapi debugging is active
+        if (getenv_int("ZET_ENABLE_PROGRAM_DEBUGGING", 0) == 0) return;
+
+        const int name_size = 29;
+        char name[name_size] = "/tmp/dnnl_ocl_jit_src.XXXXXX";
+        int fd = mkstemp(name);
+        if (fd == -1) return;
+        write(fd, src_str.c_str(), src_str.length());
+        close(fd);
+
+        auto deleter = [](char *name) {
+            unlink(name);
+            delete[] name;
+        };
+
+        name_ = std::shared_ptr<char>(new char[name_size], deleter);
+        std::memcpy(name_.get(), name, name_size);
+    }
+    operator bool() const { return name_ != nullptr; };
+    const char *name() const { return name_.get(); }
+
+private:
+    std::shared_ptr<char> name_;
+};
+#else
+struct program_src_t {
+    program_src_t() = default;
+    program_src_t(const std::string &src_str) {}
+    operator bool() const {return false};
+    const char *name() const { return nullptr; }
+};
+#endif
+
 class kernel_impl_t {
 public:
     kernel_impl_t() = default;

diff --git a/src/gpu/intel/ocl/ocl_gpu_engine.cpp b/src/gpu/intel/ocl/ocl_gpu_engine.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -140,8 +140,8 @@ status_t create_ocl_kernel_from_cache_blob(const ocl_gpu_engine_t *ocl_engine,
         OCL_CHECK(err);
 
         std::shared_ptr<compute::kernel_impl_t> kernel_impl
-                = std::make_shared<ocl_gpu_kernel_t>(
-                        std::move(ocl_kernel), arg_types);
+                = std::make_shared<ocl_gpu_kernel_t>(std::move(ocl_kernel),
+                        arg_types, compute::program_src_t());
         (*kernels)[i] = std::move(kernel_impl);
     }
 
@@ -229,7 +229,8 @@ inline status_t fuse_microkernels(cl_context context, cl_device_id device,
 } // namespace
 
 status_t ocl_gpu_engine_t::build_program_from_source(
-        xpu::ocl::wrapper_t<cl_program> &program, const char *code_string,
+        xpu::ocl::wrapper_t<cl_program> &program, compute::program_src_t &src,
+        const char *code_string,
         const compute::kernel_ctx_t &kernel_ctx) const {
     std::string options = kernel_ctx.options();
 
@@ -249,6 +250,9 @@ status_t ocl_gpu_engine_t::build_program_from_source(
     std::string pp_code_str = pp_code.str();
     const char *pp_code_str_ptr = pp_code_str.c_str();
 
+    src = {pp_code_str};
+    if (src) { options += " -g -s " + std::string(src.name()); }
+
     debugdump_processed_source(
             pp_code_str, options, dev_info->get_cl_ext_options());
 
@@ -268,7 +272,8 @@ status_t ocl_gpu_engine_t::build_program_from_source(
 }
 
 status_t ocl_gpu_engine_t::create_kernel_from_binary(compute::kernel_t &kernel,
-        const xpu::binary_t &binary, const char *kernel_name) const {
+        const xpu::binary_t &binary, const char *kernel_name,
+        const compute::program_src_t &src) const {
     xpu::ocl::wrapper_t<cl_program> program;
     CHECK(xpu::ocl::create_program(
             program, this->device(), this->context(), binary));
@@ -283,7 +288,7 @@ status_t ocl_gpu_engine_t::create_kernel_from_binary(compute::kernel_t &kernel,
 
     std::shared_ptr<compute::kernel_impl_t> kernel_impl
             = std::make_shared<ocl_gpu_kernel_t>(
-                    std::move(ocl_kernel), arg_types);
+                    std::move(ocl_kernel), arg_types, src);
     kernel = std::move(kernel_impl);
 
     return status::success;
@@ -301,14 +306,14 @@ status_t ocl_gpu_engine_t::create_kernel(
     if (!jitter) return status::invalid_arguments;
     xpu::binary_t binary = jitter->get_binary(context(), device());
     if (binary.empty()) return status::runtime_error;
-    VCHECK_KERNEL(
-            create_kernel_from_binary(*kernel, binary, jitter->kernel_name()),
+    VCHECK_KERNEL(create_kernel_from_binary(
+                          *kernel, binary, jitter->kernel_name(), {}),
             VERBOSE_KERNEL_CREATION_FAIL, jitter->kernel_name());
     return status::success;
 }
 
 status_t ocl_gpu_engine_t::create_program(
-        xpu::ocl::wrapper_t<cl_program> &program,
+        xpu::ocl::wrapper_t<cl_program> &program, compute::program_src_t &src,
         const std::vector<const char *> &kernel_names,
         const compute::kernel_ctx_t &kernel_ctx) const {
 
@@ -342,7 +347,7 @@ status_t ocl_gpu_engine_t::create_program(
             "kernels in a single .cl source file or split creation in groups "
             "based on their .cl source file.";
 
-    return build_program_from_source(program, source, kernel_ctx);
+    return build_program_from_source(program, src, source, kernel_ctx);
 }
 
 status_t ocl_gpu_engine_t::create_kernels(
@@ -354,13 +359,15 @@ status_t ocl_gpu_engine_t::create_kernels(
     *kernels = std::vector<compute::kernel_t>(kernel_names.size());
 
     xpu::ocl::wrapper_t<cl_program> program;
-    CHECK(create_program(program, kernel_names, kernel_ctx));
-    return create_kernels_from_program(kernels, kernel_names, program);
+    compute::program_src_t src;
+    CHECK(create_program(program, src, kernel_names, kernel_ctx));
+    return create_kernels_from_program(kernels, kernel_names, program, src);
 }
 
 status_t ocl_gpu_engine_t::create_kernels_from_program(
         std::vector<compute::kernel_t> *kernels,
-        const std::vector<const char *> &kernel_names, cl_program program) {
+        const std::vector<const char *> &kernel_names, cl_program program,
+        const compute::program_src_t &src) {
     *kernels = std::vector<compute::kernel_t>(kernel_names.size());
     for (size_t i = 0; i < kernel_names.size(); ++i) {
         if (!kernel_names[i]) continue;
@@ -373,7 +380,7 @@ status_t ocl_gpu_engine_t::create_kernels_from_program(
 
         std::shared_ptr<compute::kernel_impl_t> kernel_impl
                 = std::make_shared<ocl_gpu_kernel_t>(
-                        std::move(ocl_kernel), arg_types);
+                        std::move(ocl_kernel), arg_types, src);
         (*kernels)[i] = std::move(kernel_impl);
     }
 

diff --git a/src/gpu/intel/ocl/ocl_gpu_engine.hpp b/src/gpu/intel/ocl/ocl_gpu_engine.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -48,8 +48,8 @@ class ocl_gpu_engine_t : public compute::compute_engine_t {
             impl::stream_t **stream, impl::stream_impl_t *stream_impl) override;
 
     status_t create_kernel_from_binary(compute::kernel_t &kernel,
-            const xpu::binary_t &binary,
-            const char *kernel_name) const override;
+            const xpu::binary_t &binary, const char *kernel_name,
+            const compute::program_src_t &src) const override;
 
     status_t create_kernels_from_cache_blob(const cache_blob_t &cache_blob,
             std::vector<compute::kernel_t> &kernels,
@@ -64,7 +64,8 @@ class ocl_gpu_engine_t : public compute::compute_engine_t {
 
     static status_t create_kernels_from_program(
             std::vector<compute::kernel_t> *kernels,
-            const std::vector<const char *> &kernel_names, cl_program program);
+            const std::vector<const char *> &kernel_names, cl_program program,
+            const compute::program_src_t &src);
 
     const impl_list_item_t *get_concat_implementation_list() const override {
         return gpu_impl_list_t::get_concat_implementation_list();
@@ -100,6 +101,7 @@ class ocl_gpu_engine_t : public compute::compute_engine_t {
     }
 
     status_t create_program(xpu::ocl::wrapper_t<cl_program> &program,
+            compute::program_src_t &src,
             const std::vector<const char *> &kernel_names,
             const compute::kernel_ctx_t &kernel_ctx) const;
 
@@ -111,7 +113,7 @@ class ocl_gpu_engine_t : public compute::compute_engine_t {
     }
 
     status_t build_program_from_source(xpu::ocl::wrapper_t<cl_program> &program,
-            const char *code_string,
+            compute::program_src_t &src, const char *code_string,
             const compute::kernel_ctx_t &kernel_ctx) const;
 
     ~ocl_gpu_engine_t() override = default;

diff --git a/src/gpu/intel/ocl/ocl_gpu_kernel.cpp b/src/gpu/intel/ocl/ocl_gpu_kernel.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -111,9 +111,11 @@ class ocl_gpu_kernel_cache_t {
 };
 
 ocl_gpu_kernel_t::ocl_gpu_kernel_t(xpu::ocl::wrapper_t<cl_kernel> &&ocl_kernel,
-        const std::vector<gpu::intel::compute::scalar_type_t> &arg_types)
+        const std::vector<gpu::intel::compute::scalar_type_t> &arg_types,
+        compute::program_src_t src)
     : ocl_kernel_(std::move(ocl_kernel))
     , arg_types_(arg_types)
+    , src_(src)
     , save_events_(false) {
     cache_ = std::make_shared<ocl_gpu_kernel_cache_t>(ocl_kernel_);
 }

diff --git a/src/gpu/intel/ocl/ocl_gpu_kernel.hpp b/src/gpu/intel/ocl/ocl_gpu_kernel.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -35,7 +35,8 @@ class ocl_gpu_kernel_cache_t;
 class ocl_gpu_kernel_t : public compute::kernel_impl_t {
 public:
     ocl_gpu_kernel_t(xpu::ocl::wrapper_t<cl_kernel> &&ocl_kernel,
-            const std::vector<gpu::intel::compute::scalar_type_t> &arg_types);
+            const std::vector<gpu::intel::compute::scalar_type_t> &arg_types,
+            compute::program_src_t src);
     ~ocl_gpu_kernel_t() override = default;
 
     cl_kernel ocl_kernel() const { return ocl_kernel_; }
@@ -59,11 +60,13 @@ class ocl_gpu_kernel_t : public compute::kernel_impl_t {
 
     status_t dump() const override;
     std::string name() const override;
+    const compute::program_src_t &src() const { return src_; }
 
 private:
     xpu::ocl::wrapper_t<cl_kernel> ocl_kernel_;
     std::vector<gpu::intel::compute::scalar_type_t> arg_types_;
     std::shared_ptr<ocl_gpu_kernel_cache_t> cache_;
+    compute::program_src_t src_;
     bool save_events_;
 };