Custom fusion (#2486)

tracel-ai · Nov 13, 2024 · c7233bf · c7233bf
1 parent a4567db
commit c7233bf
Show file tree

Hide file tree

Showing 35 changed files with 354 additions and 166 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -154,8 +154,8 @@ ahash = { version = "0.8.11", default-features = false }
 portable-atomic-util = { version = "0.2.2", features = ["alloc"] }
 
 ### For the main burn branch. ###
-cubecl = { git = "https://github.com/tracel-ai/cubecl", default-features = false, rev = "3882ed25b47506d49562c501a179b7468e61702e" }
-cubecl-common = { git = "https://github.com/tracel-ai/cubecl", default-features = false, rev = "3882ed25b47506d49562c501a179b7468e61702e" }
+cubecl = { git = "https://github.com/tracel-ai/cubecl", default-features = false, rev = "a1471a7ffa089ee2878bb8c140d09f66a2b2b664" }
+cubecl-common = { git = "https://github.com/tracel-ai/cubecl", default-features = false, rev = "a1471a7ffa089ee2878bb8c140d09f66a2b2b664" }
 ### For local development. ###
 # cubecl = { path = "../cubecl/crates/cubecl", default-features = false }
 # cubecl-common = { path = "../cubecl/crates/cubecl-common", default-features = false }

diff --git a/crates/burn-fusion/src/client/base.rs b/crates/burn-fusion/src/client/base.rs
@@ -39,31 +39,43 @@ where
         &self,
         tensor: TensorDescription,
         stream: StreamId,
-    ) -> impl Future<Output = TensorData> + Send
+    ) -> impl Future<Output = TensorData> + Send + 'static
     where
         B: FusionBackend<FusionRuntime = R>;
     /// Read the values contained by an int tensor.
     fn read_tensor_int<B>(
         &self,
         tensor: TensorDescription,
         stream: StreamId,
-    ) -> impl Future<Output = TensorData> + Send
+    ) -> impl Future<Output = TensorData> + Send + 'static
     where
         B: FusionBackend<FusionRuntime = R>;
     /// Read the values contained by a bool tensor.
     fn read_tensor_bool<B>(
         &self,
         tensor: TensorDescription,
         stream: StreamId,
-    ) -> impl Future<Output = TensorData> + Send
+    ) -> impl Future<Output = TensorData> + Send + 'static
     where
         B: FusionBackend<FusionRuntime = R>;
     /// Read the values contained by a quantized tensor.
     fn read_tensor_quantized<B>(
         &self,
         tensor: QuantizedTensorDescription,
         streams: Vec<StreamId>,
-    ) -> impl Future<Output = TensorData> + Send
+    ) -> impl Future<Output = TensorData> + Send + 'static
+    where
+        B: FusionBackend<FusionRuntime = R>;
+    /// Resolve the given float tensor to a primitive tensor.
+    fn resolve_tensor_float<B>(&self, tensor: FusionTensor<R>) -> B::FloatTensorPrimitive
+    where
+        B: FusionBackend<FusionRuntime = R>;
+    /// Resolve the given int tensor to a primitive tensor.
+    fn resolve_tensor_int<B>(&self, tensor: FusionTensor<R>) -> B::IntTensorPrimitive
+    where
+        B: FusionBackend<FusionRuntime = R>;
+    /// Resolve the given bool tensor to a primitive tensor.
+    fn resolve_tensor_bool<B>(&self, tensor: FusionTensor<R>) -> B::BoolTensorPrimitive
     where
         B: FusionBackend<FusionRuntime = R>;
     /// Change the client of the given float tensor.

diff --git a/crates/burn-fusion/src/client/mutex.rs b/crates/burn-fusion/src/client/mutex.rs
@@ -9,7 +9,7 @@ use burn_tensor::{
     DType,
 };
 use spin::Mutex;
-use std::sync::Arc;
+use std::{future::Future, sync::Arc};
 
 /// Use a mutex to communicate with the fusion server.
 pub struct MutexFusionClient<R: FusionRuntime> {
@@ -79,51 +79,49 @@ where
         FusionTensor::new(id, shape, dtype, self.clone(), stream)
     }
 
-    async fn read_tensor_float<B>(
+    fn read_tensor_float<B>(
         &self,
         tensor: TensorDescription,
         stream: StreamId,
-    ) -> burn_tensor::TensorData
+    ) -> impl Future<Output = burn_tensor::TensorData> + 'static
     where
         B: FusionBackend<FusionRuntime = R>,
     {
-        self.server.lock().read_float::<B>(tensor, stream).await
+        let mut server = self.server.lock();
+        server.read_float::<B>(tensor, stream)
     }
 
-    async fn read_tensor_int<B>(
+    fn read_tensor_int<B>(
         &self,
         tensor: TensorDescription,
         id: StreamId,
-    ) -> burn_tensor::TensorData
+    ) -> impl Future<Output = burn_tensor::TensorData> + 'static
     where
         B: FusionBackend<FusionRuntime = R>,
     {
-        self.server.lock().read_int::<B>(tensor, id).await
+        self.server.lock().read_int::<B>(tensor, id)
     }
 
-    async fn read_tensor_bool<B>(
+    fn read_tensor_bool<B>(
         &self,
         tensor: TensorDescription,
         stream: StreamId,
-    ) -> burn_tensor::TensorData
+    ) -> impl Future<Output = burn_tensor::TensorData> + 'static
     where
         B: FusionBackend<FusionRuntime = R>,
     {
-        self.server.lock().read_bool::<B>(tensor, stream).await
+        self.server.lock().read_bool::<B>(tensor, stream)
     }
 
-    async fn read_tensor_quantized<B>(
+    fn read_tensor_quantized<B>(
         &self,
         tensor: QuantizedTensorDescription,
         streams: Vec<StreamId>,
-    ) -> burn_tensor::TensorData
+    ) -> impl Future<Output = burn_tensor::TensorData> + 'static
     where
         B: FusionBackend<FusionRuntime = R>,
     {
-        self.server
-            .lock()
-            .read_quantized::<B>(tensor, streams)
-            .await
+        self.server.lock().read_quantized::<B>(tensor, streams)
     }
 
     fn change_client_float<B>(
@@ -246,4 +244,31 @@ where
     fn register_orphan(&self, id: &TensorId) {
         self.server.lock().drop_tensor_handle(*id);
     }
+
+    fn resolve_tensor_float<B>(&self, tensor: FusionTensor<R>) -> B::FloatTensorPrimitive
+    where
+        B: FusionBackend<FusionRuntime = R>,
+    {
+        let mut server = self.server.lock();
+        server.drain_stream(tensor.stream);
+        server.resolve_server_float::<B>(&tensor.into_description())
+    }
+
+    fn resolve_tensor_int<B>(&self, tensor: FusionTensor<R>) -> B::IntTensorPrimitive
+    where
+        B: FusionBackend<FusionRuntime = R>,
+    {
+        let mut server = self.server.lock();
+        server.drain_stream(tensor.stream);
+        server.resolve_server_int::<B>(&tensor.into_description())
+    }
+
+    fn resolve_tensor_bool<B>(&self, tensor: FusionTensor<R>) -> B::BoolTensorPrimitive
+    where
+        B: FusionBackend<FusionRuntime = R>,
+    {
+        let mut server = self.server.lock();
+        server.drain_stream(tensor.stream);
+        server.resolve_server_bool::<B>(&tensor.into_description())
+    }
 }
diff --git a/crates/burn-fusion/src/server.rs b/crates/burn-fusion/src/server.rs
@@ -6,7 +6,7 @@ use burn_tensor::repr::{
     HandleContainer, OperationDescription, QuantizedKind, QuantizedTensorDescription,
     TensorDescription, TensorId,
 };
-use std::sync::Arc;
+use std::{future::Future, sync::Arc};
 
 pub struct FusionServer<R: FusionRuntime> {
     streams: MultiStream<R>,
@@ -42,11 +42,11 @@ where
         self.handles.create_tensor_uninit()
     }
 
-    pub async fn read_float<B>(
+    pub fn read_float<B>(
         &mut self,
         tensor: TensorDescription,
         id: StreamId,
-    ) -> burn_tensor::TensorData
+    ) -> impl Future<Output = burn_tensor::TensorData> + 'static
     where
         B: FusionBackend<FusionRuntime = R>,
     {
@@ -55,14 +55,14 @@ where
         self.drain_stream(id);
 
         let tensor = self.handles.get_float_tensor::<B>(&tensor);
-        B::float_into_data(tensor).await
+        B::float_into_data(tensor)
     }
 
-    pub async fn read_int<B>(
+    pub fn read_int<B>(
         &mut self,
         tensor: TensorDescription,
         id: StreamId,
-    ) -> burn_tensor::TensorData
+    ) -> impl Future<Output = burn_tensor::TensorData> + 'static
     where
         B: FusionBackend<FusionRuntime = R>,
     {
@@ -71,14 +71,14 @@ where
         self.drain_stream(id);
 
         let tensor = self.handles.get_int_tensor::<B>(&tensor);
-        B::int_into_data(tensor).await
+        B::int_into_data(tensor)
     }
 
-    pub async fn read_bool<B>(
+    pub fn read_bool<B>(
         &mut self,
         tensor: TensorDescription,
         id: StreamId,
-    ) -> burn_tensor::TensorData
+    ) -> impl Future<Output = burn_tensor::TensorData> + 'static
     where
         B: FusionBackend<FusionRuntime = R>,
     {
@@ -87,14 +87,14 @@ where
         self.drain_stream(id);
 
         let tensor = self.handles.get_bool_tensor::<B>(&tensor);
-        B::bool_into_data(tensor).await
+        B::bool_into_data(tensor)
     }
 
-    pub async fn read_quantized<B>(
+    pub fn read_quantized<B>(
         &mut self,
         tensor: QuantizedTensorDescription,
         ids: Vec<StreamId>,
-    ) -> burn_tensor::TensorData
+    ) -> impl Future<Output = burn_tensor::TensorData> + 'static
     where
         B: FusionBackend<FusionRuntime = R>,
     {
@@ -105,7 +105,7 @@ where
         }
 
         let tensor = self.handles.get_quantized_tensor::<B>(&tensor);
-        B::q_into_data(tensor).await
+        B::q_into_data(tensor)
     }
 
     pub fn change_server_float<B>(
@@ -128,6 +128,27 @@ where
         id
     }
 
+    pub fn resolve_server_float<B>(&mut self, tensor: &TensorDescription) -> B::FloatTensorPrimitive
+    where
+        B: FusionBackend<FusionRuntime = R>,
+    {
+        self.handles.get_float_tensor::<B>(tensor)
+    }
+
+    pub fn resolve_server_int<B>(&mut self, tensor: &TensorDescription) -> B::IntTensorPrimitive
+    where
+        B: FusionBackend<FusionRuntime = R>,
+    {
+        self.handles.get_int_tensor::<B>(tensor)
+    }
+
+    pub fn resolve_server_bool<B>(&mut self, tensor: &TensorDescription) -> B::BoolTensorPrimitive
+    where
+        B: FusionBackend<FusionRuntime = R>,
+    {
+        self.handles.get_bool_tensor::<B>(tensor)
+    }
+
     pub fn change_server_int<B>(
         &mut self,
         tensor: &TensorDescription,