Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade to wgpu 24 #436

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion crates/cubecl-core/src/compute/launcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,10 @@ impl<R: Runtime> KernelLauncher<R> {
///
/// # Safety
///
/// Out-of-bounds reads and writes can happen.
/// The kernel must not:
/// - Contain any out of bounds reads or writes. Doing so is immediate UB.
/// - Contain any loops that never terminate. These may be optimized away entirely or cause
/// other unpredictable behaviour.
pub unsafe fn launch_unchecked<K: Kernel>(
self,
cube_count: CubeCount,
Expand Down
3 changes: 2 additions & 1 deletion crates/cubecl-runtime/src/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ pub enum ExecutionMode {
/// Checked kernels are safe.
#[default]
Checked,
/// Unchecked kernels are unsafe.
/// Unchecked kernels are unsafe - it's up to the user to uphold indexing & infinite loop invariants
/// in their kernel.
Unchecked,
}

Expand Down
7 changes: 5 additions & 2 deletions crates/cubecl-wgpu/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,14 @@ cfg-if = { workspace = true }

# wgpu dependency for platforms other than macOS
[target.'cfg(not(target_os = "macos"))'.dependencies]
wgpu = { version = "23.0.0", features = ["fragile-send-sync-non-atomic-wasm"] }
wgpu = { version = "24.0.0", features = ["fragile-send-sync-non-atomic-wasm"] }
# On macOS, the `vulkan-portability` feature is required due to the MoltenVK translation layer.
# To install MoltenVK, install the VulkanSDK: https://vulkan.lunarg.com/sdk/home#mac
[target.'cfg(target_os = "macos")'.dependencies]
wgpu = { version = "23.0.0", features = ["vulkan-portability", "fragile-send-sync-non-atomic-wasm"] }
wgpu = { version = "24.0.0", features = [
"vulkan-portability",
"fragile-send-sync-non-atomic-wasm",
] }

[dev-dependencies]
cubecl-core = { path = "../cubecl-core", version = "0.5.0", features = [
Expand Down
1 change: 1 addition & 0 deletions crates/cubecl-wgpu/src/compiler/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ pub trait WgpuCompiler: Compiler {
fn create_pipeline(
server: &mut WgpuServer<Self>,
kernel: CompiledKernel<Self>,
mode: ExecutionMode,
) -> Arc<ComputePipeline>;

#[allow(async_fn_in_trait)]
Expand Down
32 changes: 19 additions & 13 deletions crates/cubecl-wgpu/src/compiler/spirv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ impl WgpuCompiler for SpirvCompiler<GLCompute> {
fn create_pipeline(
server: &mut WgpuServer<Self>,
kernel: CompiledKernel<Self>,
mode: ExecutionMode,
) -> Arc<ComputePipeline> {
let (module, layout) = kernel
.repr
Expand Down Expand Up @@ -107,21 +108,28 @@ impl WgpuCompiler for SpirvCompiler<GLCompute> {
})
.unwrap_or_else(|| {
let source = &kernel.source;
// Cube always in principle uses unchecked modules. Certain operations like
// indexing are instead checked by cube. The WebGPU specification only makes
// incredibly loose guarantees that Cube can't rely on. Additionally, kernels
// can opt in/out per operation whether checks should be performed which can be faster.
//

let checks = wgpu::ShaderRuntimeChecks {
// Cube does not need wgpu bounds checks - OOB behaviour is instead
// checked by cube (if enabled).
// This is because the WebGPU specification only makes loose guarantees that Cube can't rely on.
bounds_checks: false,
// Loop bounds are only checked in checked mode.
force_loop_bounding: mode == ExecutionMode::Checked,
};

// SAFETY: Cube guarantees OOB safety when launching in checked mode. Launching in unchecked mode
// is only available through the use of unsafe code.
let module = unsafe {
server
.device
.create_shader_module_unchecked(wgpu::ShaderModuleDescriptor {
label: Some(&kernel.entrypoint_name),
server.device.create_shader_module_trusted(
wgpu::ShaderModuleDescriptor {
label: None,
source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(source)),
})
},
checks,
)
};

(module, None)
});

Expand Down Expand Up @@ -413,9 +421,7 @@ fn is_robust(device: &wgpu::Device) -> bool {
.contains(&EXT_ROBUSTNESS2_NAME)
}
unsafe {
device
.as_hal::<hal::api::Vulkan, _, _>(|device| device.map(is_robust).unwrap_or(false))
.unwrap_or(false)
device.as_hal::<hal::api::Vulkan, _, _>(|device| device.map(is_robust).unwrap_or(false))
}
}

Expand Down
2 changes: 1 addition & 1 deletion crates/cubecl-wgpu/src/compiler/wgsl/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ impl Elem {
}

pub fn is_atomic(&self) -> bool {
matches!(self, Self::AtomicI32 | Self::AtomicU32)
matches!(self, Self::AtomicI32 | Self::AtomicU32 | Self::AtomicF32)
}
}

Expand Down
25 changes: 16 additions & 9 deletions crates/cubecl-wgpu/src/compiler/wgsl/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,22 +87,29 @@ impl WgpuCompiler for WgslCompiler {
fn create_pipeline(
server: &mut WgpuServer<Self>,
kernel: CompiledKernel<Self>,
mode: ExecutionMode,
) -> Arc<ComputePipeline> {
let source = &kernel.source;
// Cube always in principle uses unchecked modules. Certain operations like
// indexing are instead checked by cube. The WebGPU specification only makes
// incredibly loose guarantees that Cube can't rely on. Additionally, kernels
// can opt in/out per operation whether checks should be performed which can be faster.
//

let checks = wgpu::ShaderRuntimeChecks {
// Cube does not need wgpu bounds checks - OOB behaviour is instead
// checked by cube (if enabled).
// This is because the WebGPU specification only makes loose guarantees that Cube can't rely on.
bounds_checks: false,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would add a comment that automatic bounds checks are done by the cubecl compiler.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment above said as much but clarified this a bit

// Loop bounds are only checked in checked mode.
force_loop_bounding: mode == ExecutionMode::Checked,
};

// SAFETY: Cube guarantees OOB safety when launching in checked mode. Launching in unchecked mode
// is only available through the use of unsafe code.
let module = unsafe {
server
.device
.create_shader_module_unchecked(ShaderModuleDescriptor {
server.device.create_shader_module_trusted(
ShaderModuleDescriptor {
label: None,
source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(source)),
})
},
checks,
)
};

let layout = kernel.repr.map(|repr| {
Expand Down
5 changes: 1 addition & 4 deletions crates/cubecl-wgpu/src/compiler/wgsl/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -880,10 +880,7 @@ for (var {i}: {i_ty} = {start}; {i} {cmp} {end}; {increment}) {{
}
Instruction::AtomicSub { lhs, rhs, out } => {
let out = out.fmt_left();
match rhs.elem() {
Elem::F32 => write!(f, "{out} = atomicAdd({lhs}, -{rhs});"),
_ => write!(f, "{out} = atomicSub({lhs}, {rhs});"),
}
write!(f, "{out} = atomicSub({lhs}, {rhs});")
}
Instruction::AtomicMax { lhs, rhs, out } => {
let out = out.fmt_left();
Expand Down
2 changes: 1 addition & 1 deletion crates/cubecl-wgpu/src/compute/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ impl<C: WgpuCompiler> WgpuServer<C> {
}

let compile = self.logger.debug(compile);
let pipeline = C::create_pipeline(self, compile);
let pipeline = C::create_pipeline(self, compile, mode);

self.pipelines.insert(kernel_id.clone(), pipeline.clone());

Expand Down
14 changes: 12 additions & 2 deletions crates/cubecl-wgpu/src/runtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ use crate::{
};
use alloc::sync::Arc;
use cubecl_common::future;
use cubecl_core::{Feature, Runtime};
use cubecl_core::{
ir::{Elem, FloatKind},
AtomicFeature, Feature, Runtime,
};
pub use cubecl_runtime::memory_management::MemoryConfiguration;
use cubecl_runtime::{
channel::MutexComputeChannel,
Expand Down Expand Up @@ -212,6 +215,13 @@ pub(crate) fn create_client_on_setup<C: WgpuCompiler>(
);
let channel = MutexComputeChannel::new(server);

if features.contains(wgpu::Features::SHADER_FLOAT32_ATOMIC) {
device_props.register_feature(Feature::Type(Elem::AtomicFloat(FloatKind::F32)));

device_props.register_feature(Feature::AtomicFloat(AtomicFeature::LoadStore));
device_props.register_feature(Feature::AtomicFloat(AtomicFeature::Add));
}

ComputeClient::new(channel, device_props)
}

Expand Down Expand Up @@ -244,7 +254,7 @@ async fn request_adapter<G: GraphicsApi>(device: &WgpuDevice) -> (wgpu::Instance
(_, false) => InstanceFlags::default(),
};
log::debug!("{instance_flags:?}");
let instance = wgpu::Instance::new(wgpu::InstanceDescriptor {
let instance = wgpu::Instance::new(&wgpu::InstanceDescriptor {
backends: G::backend().into(),
flags: instance_flags,
..Default::default()
Expand Down