From eb5c3df5585d76a3e42c576b9531a1286792917e Mon Sep 17 00:00:00 2001 From: Roy Oursler Date: Fri, 3 Jan 2025 14:07:34 -0800 Subject: [PATCH] xe: enable source debug information for OpenCL C kernels --- cmake/gen_gpu_kernel.cmake | 23 ++++++----- cmake/gen_gpu_kernel_list.cmake | 9 ++++- src/gpu/intel/compute/compute_engine.hpp | 5 ++- src/gpu/intel/compute/kernel.hpp | 39 ++++++++++++++++++- src/gpu/intel/ocl/ocl_gpu_engine.cpp | 35 ++++++++++------- src/gpu/intel/ocl/ocl_gpu_engine.hpp | 12 +++--- src/gpu/intel/ocl/ocl_gpu_kernel.cpp | 6 ++- src/gpu/intel/ocl/ocl_gpu_kernel.hpp | 7 +++- src/gpu/intel/sycl/engine.hpp | 23 ++++++----- .../intel/sycl/sycl_interop_gpu_kernel.hpp | 8 ++-- 10 files changed, 117 insertions(+), 50 deletions(-) diff --git a/cmake/gen_gpu_kernel.cmake b/cmake/gen_gpu_kernel.cmake index 672c88ef877..915354f4199 100644 --- a/cmake/gen_gpu_kernel.cmake +++ b/cmake/gen_gpu_kernel.cmake @@ -1,5 +1,5 @@ #=============================================================================== -# Copyright 2019-2024 Intel Corporation +# Copyright 2019-2025 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,17 +22,20 @@ file(READ ${CL_FILE} cl_file_lines) -# Remove C++ style comments -string(REGEX REPLACE "//[^\n]*\n" "\n" cl_file_lines "${cl_file_lines}") -# Remove repeated whitespaces -string(REGEX REPLACE " +" " " cl_file_lines "${cl_file_lines}") -# Remove leading whitespaces -string(REGEX REPLACE "\n " "\n" cl_file_lines "${cl_file_lines}") -# Remove empty lines -string(REGEX REPLACE "\n+" "\n" cl_file_lines "${cl_file_lines}") +string(LENGTH "${cl_file_lines}" len) +if(CL_MINIFY STREQUAL "ON" OR len GREATER 65535) + # Remove C++ style comments + string(REGEX REPLACE "//[^\n]*\n" "\n" cl_file_lines "${cl_file_lines}") + # Remove repeated whitespaces + string(REGEX REPLACE " +" " " cl_file_lines "${cl_file_lines}") + # Remove leading whitespaces + string(REGEX REPLACE "\n " "\n" cl_file_lines "${cl_file_lines}") + # Remove empty lines + string(REGEX REPLACE "\n+" "\n" cl_file_lines "${cl_file_lines}") +endif() string(LENGTH "${cl_file_lines}" len) -if(len GREATER 65535) +if(MSVC AND len GREATER 65535) message(WARNING "Windows requires string literals to fit in 65535 bytes. Please split ${CL_FILE}.") endif() diff --git a/cmake/gen_gpu_kernel_list.cmake b/cmake/gen_gpu_kernel_list.cmake index 02f8cacb9bb..f5e1f254c37 100644 --- a/cmake/gen_gpu_kernel_list.cmake +++ b/cmake/gen_gpu_kernel_list.cmake @@ -1,5 +1,5 @@ #=============================================================================== -# Copyright 2020-2021 Intel Corporation +# Copyright 2020-2025 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -46,6 +46,12 @@ endfunction() function(gen_gpu_kernel_list ker_list_templ ker_list_src ker_sources headers) set(_sources "${SOURCES}") + if(DNNL_DEV_MODE OR CMAKE_BUILD_TYPE STREQUAL "Debug") + set(CL_MINIFY "OFF") + else() + set(CL_MINIFY "ON") + endif() + set(KER_LIST_EXTERN) set(KER_LIST_ENTRIES) set(KER_HEADERS_EXTERN) @@ -62,6 +68,7 @@ function(gen_gpu_kernel_list ker_list_templ ker_list_src ker_sources headers) COMMAND ${CMAKE_COMMAND} -DCL_FILE="${header_path}" -DGEN_FILE="${gen_file}" + -DCL_MINIFY="${CL_MINIFY}" -P ${PROJECT_SOURCE_DIR}/cmake/gen_gpu_kernel.cmake DEPENDS ${header_path} ) diff --git a/src/gpu/intel/compute/compute_engine.hpp b/src/gpu/intel/compute/compute_engine.hpp index b166afaa7a4..c832bdcfc7a 100644 --- a/src/gpu/intel/compute/compute_engine.hpp +++ b/src/gpu/intel/compute/compute_engine.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -72,7 +72,8 @@ class compute_engine_t : public gpu::engine_t { } virtual status_t create_kernel_from_binary(compute::kernel_t &kernel, - const xpu::binary_t &binary, const char *kernel_name) const = 0; + const xpu::binary_t &binary, const char *kernel_name, + const program_src_t &src) const = 0; virtual status_t create_kernels_from_cache_blob( const cache_blob_t &cache_blob, diff --git a/src/gpu/intel/compute/kernel.hpp b/src/gpu/intel/compute/kernel.hpp index 7a78da0f8a9..4d22a23e1bb 100644 --- a/src/gpu/intel/compute/kernel.hpp +++ b/src/gpu/intel/compute/kernel.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,6 +34,43 @@ namespace gpu { namespace intel { namespace compute { +#if defined(__linux__) && (defined(DNNL_DEV_MODE) || !defined(NDEBUG)) +struct program_src_t { + program_src_t() = default; + program_src_t(const std::string &src_str) { + // Only enable if gdb-oneapi debugging is active + if (getenv_int("ZET_ENABLE_PROGRAM_DEBUGGING", 0) == 0) return; + + const int name_size = 29; + char name[name_size] = "/tmp/dnnl_ocl_jit_src.XXXXXX"; + int fd = mkstemp(name); + if (fd == -1) return; + write(fd, src_str.c_str(), src_str.length()); + close(fd); + + auto deleter = [](char *name) { + unlink(name); + delete[] name; + }; + + name_ = std::shared_ptr(new char[name_size], deleter); + std::memcpy(name_.get(), name, name_size); + } + operator bool() const { return name_ != nullptr; }; + const char *name() const { return name_.get(); } + +private: + std::shared_ptr name_; +}; +#else +struct program_src_t { + program_src_t() = default; + program_src_t(const std::string &src_str) {} + operator bool() const {return false}; + const char *name() const { return nullptr; } +}; +#endif + class kernel_impl_t { public: kernel_impl_t() = default; diff --git a/src/gpu/intel/ocl/ocl_gpu_engine.cpp b/src/gpu/intel/ocl/ocl_gpu_engine.cpp index 446bd4c8c75..98d119b70bc 100644 --- a/src/gpu/intel/ocl/ocl_gpu_engine.cpp +++ b/src/gpu/intel/ocl/ocl_gpu_engine.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -140,8 +140,8 @@ status_t create_ocl_kernel_from_cache_blob(const ocl_gpu_engine_t *ocl_engine, OCL_CHECK(err); std::shared_ptr kernel_impl - = std::make_shared( - std::move(ocl_kernel), arg_types); + = std::make_shared(std::move(ocl_kernel), + arg_types, compute::program_src_t()); (*kernels)[i] = std::move(kernel_impl); } @@ -229,7 +229,8 @@ inline status_t fuse_microkernels(cl_context context, cl_device_id device, } // namespace status_t ocl_gpu_engine_t::build_program_from_source( - xpu::ocl::wrapper_t &program, const char *code_string, + xpu::ocl::wrapper_t &program, compute::program_src_t &src, + const char *code_string, const compute::kernel_ctx_t &kernel_ctx) const { std::string options = kernel_ctx.options(); @@ -249,6 +250,9 @@ status_t ocl_gpu_engine_t::build_program_from_source( std::string pp_code_str = pp_code.str(); const char *pp_code_str_ptr = pp_code_str.c_str(); + src = {pp_code_str}; + if (src) { options += " -g -s " + std::string(src.name()); } + debugdump_processed_source( pp_code_str, options, dev_info->get_cl_ext_options()); @@ -268,7 +272,8 @@ status_t ocl_gpu_engine_t::build_program_from_source( } status_t ocl_gpu_engine_t::create_kernel_from_binary(compute::kernel_t &kernel, - const xpu::binary_t &binary, const char *kernel_name) const { + const xpu::binary_t &binary, const char *kernel_name, + const compute::program_src_t &src) const { xpu::ocl::wrapper_t program; CHECK(xpu::ocl::create_program( program, this->device(), this->context(), binary)); @@ -283,7 +288,7 @@ status_t ocl_gpu_engine_t::create_kernel_from_binary(compute::kernel_t &kernel, std::shared_ptr kernel_impl = std::make_shared( - std::move(ocl_kernel), arg_types); + std::move(ocl_kernel), arg_types, src); kernel = std::move(kernel_impl); return status::success; @@ -301,14 +306,14 @@ status_t ocl_gpu_engine_t::create_kernel( if (!jitter) return status::invalid_arguments; xpu::binary_t binary = jitter->get_binary(context(), device()); if (binary.empty()) return status::runtime_error; - VCHECK_KERNEL( - create_kernel_from_binary(*kernel, binary, jitter->kernel_name()), + VCHECK_KERNEL(create_kernel_from_binary( + *kernel, binary, jitter->kernel_name(), {}), VERBOSE_KERNEL_CREATION_FAIL, jitter->kernel_name()); return status::success; } status_t ocl_gpu_engine_t::create_program( - xpu::ocl::wrapper_t &program, + xpu::ocl::wrapper_t &program, compute::program_src_t &src, const std::vector &kernel_names, const compute::kernel_ctx_t &kernel_ctx) const { @@ -342,7 +347,7 @@ status_t ocl_gpu_engine_t::create_program( "kernels in a single .cl source file or split creation in groups " "based on their .cl source file."; - return build_program_from_source(program, source, kernel_ctx); + return build_program_from_source(program, src, source, kernel_ctx); } status_t ocl_gpu_engine_t::create_kernels( @@ -354,13 +359,15 @@ status_t ocl_gpu_engine_t::create_kernels( *kernels = std::vector(kernel_names.size()); xpu::ocl::wrapper_t program; - CHECK(create_program(program, kernel_names, kernel_ctx)); - return create_kernels_from_program(kernels, kernel_names, program); + compute::program_src_t src; + CHECK(create_program(program, src, kernel_names, kernel_ctx)); + return create_kernels_from_program(kernels, kernel_names, program, src); } status_t ocl_gpu_engine_t::create_kernels_from_program( std::vector *kernels, - const std::vector &kernel_names, cl_program program) { + const std::vector &kernel_names, cl_program program, + const compute::program_src_t &src) { *kernels = std::vector(kernel_names.size()); for (size_t i = 0; i < kernel_names.size(); ++i) { if (!kernel_names[i]) continue; @@ -373,7 +380,7 @@ status_t ocl_gpu_engine_t::create_kernels_from_program( std::shared_ptr kernel_impl = std::make_shared( - std::move(ocl_kernel), arg_types); + std::move(ocl_kernel), arg_types, src); (*kernels)[i] = std::move(kernel_impl); } diff --git a/src/gpu/intel/ocl/ocl_gpu_engine.hpp b/src/gpu/intel/ocl/ocl_gpu_engine.hpp index 502bfecca2a..ed368626929 100644 --- a/src/gpu/intel/ocl/ocl_gpu_engine.hpp +++ b/src/gpu/intel/ocl/ocl_gpu_engine.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -48,8 +48,8 @@ class ocl_gpu_engine_t : public compute::compute_engine_t { impl::stream_t **stream, impl::stream_impl_t *stream_impl) override; status_t create_kernel_from_binary(compute::kernel_t &kernel, - const xpu::binary_t &binary, - const char *kernel_name) const override; + const xpu::binary_t &binary, const char *kernel_name, + const compute::program_src_t &src) const override; status_t create_kernels_from_cache_blob(const cache_blob_t &cache_blob, std::vector &kernels, @@ -64,7 +64,8 @@ class ocl_gpu_engine_t : public compute::compute_engine_t { static status_t create_kernels_from_program( std::vector *kernels, - const std::vector &kernel_names, cl_program program); + const std::vector &kernel_names, cl_program program, + const compute::program_src_t &src); const impl_list_item_t *get_concat_implementation_list() const override { return gpu_impl_list_t::get_concat_implementation_list(); @@ -100,6 +101,7 @@ class ocl_gpu_engine_t : public compute::compute_engine_t { } status_t create_program(xpu::ocl::wrapper_t &program, + compute::program_src_t &src, const std::vector &kernel_names, const compute::kernel_ctx_t &kernel_ctx) const; @@ -111,7 +113,7 @@ class ocl_gpu_engine_t : public compute::compute_engine_t { } status_t build_program_from_source(xpu::ocl::wrapper_t &program, - const char *code_string, + compute::program_src_t &src, const char *code_string, const compute::kernel_ctx_t &kernel_ctx) const; ~ocl_gpu_engine_t() override = default; diff --git a/src/gpu/intel/ocl/ocl_gpu_kernel.cpp b/src/gpu/intel/ocl/ocl_gpu_kernel.cpp index 0f5f320a1c3..e7b3df74192 100644 --- a/src/gpu/intel/ocl/ocl_gpu_kernel.cpp +++ b/src/gpu/intel/ocl/ocl_gpu_kernel.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -111,9 +111,11 @@ class ocl_gpu_kernel_cache_t { }; ocl_gpu_kernel_t::ocl_gpu_kernel_t(xpu::ocl::wrapper_t &&ocl_kernel, - const std::vector &arg_types) + const std::vector &arg_types, + compute::program_src_t src) : ocl_kernel_(std::move(ocl_kernel)) , arg_types_(arg_types) + , src_(src) , save_events_(false) { cache_ = std::make_shared(ocl_kernel_); } diff --git a/src/gpu/intel/ocl/ocl_gpu_kernel.hpp b/src/gpu/intel/ocl/ocl_gpu_kernel.hpp index a3fde1c967a..43ea09f92cf 100644 --- a/src/gpu/intel/ocl/ocl_gpu_kernel.hpp +++ b/src/gpu/intel/ocl/ocl_gpu_kernel.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,7 +35,8 @@ class ocl_gpu_kernel_cache_t; class ocl_gpu_kernel_t : public compute::kernel_impl_t { public: ocl_gpu_kernel_t(xpu::ocl::wrapper_t &&ocl_kernel, - const std::vector &arg_types); + const std::vector &arg_types, + compute::program_src_t src); ~ocl_gpu_kernel_t() override = default; cl_kernel ocl_kernel() const { return ocl_kernel_; } @@ -59,11 +60,13 @@ class ocl_gpu_kernel_t : public compute::kernel_impl_t { status_t dump() const override; std::string name() const override; + const compute::program_src_t &src() const { return src_; } private: xpu::ocl::wrapper_t ocl_kernel_; std::vector arg_types_; std::shared_ptr cache_; + compute::program_src_t src_; bool save_events_; }; diff --git a/src/gpu/intel/sycl/engine.hpp b/src/gpu/intel/sycl/engine.hpp index 586101e6fe7..66a47298dd0 100644 --- a/src/gpu/intel/sycl/engine.hpp +++ b/src/gpu/intel/sycl/engine.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -65,7 +65,9 @@ class engine_t : public gpu::intel::compute::compute_engine_t { status_t convert_to_sycl( std::vector &kernels, - cl_program program, const std::vector &kernel_names, + cl_program program, + const gpu::intel::compute::program_src_t &program_src, + const std::vector &kernel_names, gpu::intel::ocl::ocl_gpu_engine_t *ocl_engine) const { kernels = std::vector( kernel_names.size()); @@ -80,7 +82,7 @@ class engine_t : public gpu::intel::compute::compute_engine_t { for (size_t i = 0; i < kernel_names.size(); i++) { std::shared_ptr kernel_impl = std::make_shared( - std::move(sycl_kernels[i])); + std::move(sycl_kernels[i]), program_src); kernels[i] = std::move(kernel_impl); } return status::success; @@ -100,14 +102,14 @@ class engine_t : public gpu::intel::compute::compute_engine_t { xpu::binary_t binary; CHECK(k->get_binary(ocl_engine, binary)); CHECK(create_kernel_from_binary( - kernels[i], binary, kernel_names[i])); + kernels[i], binary, kernel_names[i], k->src())); } return status::success; } status_t create_kernel_from_binary(gpu::intel::compute::kernel_t &kernel, - const xpu::binary_t &binary, - const char *kernel_name) const override { + const xpu::binary_t &binary, const char *kernel_name, + const gpu::intel::compute::program_src_t &src) const override { std::unique_ptr<::sycl::kernel> sycl_kernel; VCHECK_KERNEL(gpu::intel::sycl::compat::make_kernel( sycl_kernel, kernel_name, this, binary), @@ -115,7 +117,7 @@ class engine_t : public gpu::intel::compute::compute_engine_t { std::shared_ptr kernel_impl = std::make_shared( - std::move(sycl_kernel)); + std::move(sycl_kernel), src); kernel = std::move(kernel_impl); return status::success; } @@ -157,7 +159,7 @@ class engine_t : public gpu::intel::compute::compute_engine_t { xpu::binary_t binary = jitter->get_binary( ocl_engine->context(), ocl_engine->device()); - return create_kernel_from_binary(*kernel, binary, kernel_name); + return create_kernel_from_binary(*kernel, binary, kernel_name, {}); } status_t create_kernels(std::vector *kernels, @@ -174,10 +176,11 @@ class engine_t : public gpu::intel::compute::compute_engine_t { CHECK(gpu::intel::sycl::create_ocl_engine(&ocl_engine, this)); xpu::ocl::wrapper_t ocl_program; + gpu::intel::compute::program_src_t src; CHECK(ocl_engine->create_program( - ocl_program, kernel_names, kernel_ctx)); + ocl_program, src, kernel_names, kernel_ctx)); CHECK(convert_to_sycl( - *kernels, ocl_program, kernel_names, ocl_engine.get())); + *kernels, ocl_program, src, kernel_names, ocl_engine.get())); return status::success; } diff --git a/src/gpu/intel/sycl/sycl_interop_gpu_kernel.hpp b/src/gpu/intel/sycl/sycl_interop_gpu_kernel.hpp index 5b8a980a926..98553d201ec 100644 --- a/src/gpu/intel/sycl/sycl_interop_gpu_kernel.hpp +++ b/src/gpu/intel/sycl/sycl_interop_gpu_kernel.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,8 +29,9 @@ namespace sycl { class sycl_interop_gpu_kernel_t : public gpu::intel::compute::kernel_impl_t { public: - sycl_interop_gpu_kernel_t(std::unique_ptr<::sycl::kernel> &&sycl_kernel) - : sycl_kernel_(std::move(sycl_kernel)) {} + sycl_interop_gpu_kernel_t(std::unique_ptr<::sycl::kernel> &&sycl_kernel, + gpu::intel::compute::program_src_t src) + : sycl_kernel_(std::move(sycl_kernel)), src_(src) {} ::sycl::kernel sycl_kernel() const { return *sycl_kernel_; } @@ -52,6 +53,7 @@ class sycl_interop_gpu_kernel_t : public gpu::intel::compute::kernel_impl_t { private: std::unique_ptr<::sycl::kernel> sycl_kernel_; std::vector arg_types_; + gpu::intel::compute::program_src_t src_; }; } // namespace sycl