Add ops w/ default implementation for QTensorOps (#2125)

* Add q_* ops to match float ops * Refactor q_* ops w/ dequant_op_quant macro * Comparison ops are already implemented by default to compare dequantized values * Add default arg min/max implementation and fix tch implementation * Avoid division by zero scale * Add default q_gather implementation (tch does not support on quantized tensor) * Add warning instead for tch quantize_dynamic * Call chunk backend implementation * Add QFloat check for q_ ops * Add tch q_min/max_dim_with_indices * Add q_ ops tests * Clippy fix * Remove dead code/comments * Fix quantization tests precision * Set higher tolerance for ndarray backend * Remove comment
tracel-ai · Sep 9, 2024 · eb899db · eb899db
1 parent 9e9451b
commit eb899db
Show file tree

Hide file tree

Showing 67 changed files with 7,245 additions and 92 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/burn-autodiff/src/ops/qtensor.rs b/crates/burn-autodiff/src/ops/qtensor.rs
@@ -1,6 +1,8 @@
+use std::ops::Range;
+
 use burn_tensor::{
     backend::Backend,
-    ops::{FloatTensor, QTensorOps, QuantizedTensor},
+    ops::{FloatTensor, IntTensor, QTensorOps, QuantizedTensor},
     quantization::{QuantizationParametersPrimitive, QuantizationScheme},
     Device, Shape, TensorData,
 };
@@ -23,6 +25,13 @@ impl<B: Backend, C: CheckpointStrategy> QTensorOps<Self> for Autodiff<B, C> {
         todo!() // required for QAT
     }
 
+    fn quantize_dynamic<const D: usize>(
+        _tensor: FloatTensor<Self, D>,
+        _scheme: &QuantizationScheme,
+    ) -> QuantizedTensor<Self, D> {
+        todo!()
+    }
+
     fn dequantize<const D: usize>(_tensor: QuantizedTensor<Self, D>) -> FloatTensor<Self, D> {
         todo!()
     }
@@ -35,6 +44,13 @@ impl<B: Backend, C: CheckpointStrategy> QTensorOps<Self> for Autodiff<B, C> {
         B::q_device(tensor)
     }
 
+    fn q_to_device<const D: usize>(
+        _tensor: QuantizedTensor<Self, D>,
+        _device: &Device<Self>,
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
     fn q_reshape<const D1: usize, const D2: usize>(
         tensor: QuantizedTensor<Self, D1>,
         shape: Shape<D2>,
@@ -45,4 +61,70 @@ impl<B: Backend, C: CheckpointStrategy> QTensorOps<Self> for Autodiff<B, C> {
     async fn q_into_data<const D: usize>(tensor: QuantizedTensor<Self, D>) -> TensorData {
         B::q_into_data(tensor).await
     }
+
+    fn q_swap_dims<const D: usize>(
+        _tensor: QuantizedTensor<Self, D>,
+        _dim1: usize,
+        _dim2: usize,
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
+    fn q_permute<const D: usize>(
+        _tensor: QuantizedTensor<Self, D>,
+        _axes: [usize; D],
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
+    fn q_flip<const D: usize>(
+        _tensor: QuantizedTensor<Self, D>,
+        _axes: &[usize],
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
+    fn q_gather<const D: usize>(
+        _dim: usize,
+        _tensor: QuantizedTensor<Self, D>,
+        _indices: IntTensor<Self, D>,
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
+    fn q_select<const D: usize>(
+        _tensor: QuantizedTensor<Self, D>,
+        _dim: usize,
+        _indices: IntTensor<Self, 1>,
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
+    fn q_slice<const D1: usize, const D2: usize>(
+        _tensor: QuantizedTensor<Self, D1>,
+        _ranges: [Range<usize>; D2],
+    ) -> QuantizedTensor<Self, D1> {
+        unimplemented!()
+    }
+
+    fn q_argmax<const D: usize>(
+        tensor: QuantizedTensor<Self, D>,
+        dim: usize,
+    ) -> IntTensor<Self, D> {
+        B::q_argmax(tensor, dim)
+    }
+
+    fn q_argmin<const D: usize>(
+        tensor: QuantizedTensor<Self, D>,
+        dim: usize,
+    ) -> IntTensor<Self, D> {
+        B::q_argmin(tensor, dim)
+    }
+
+    fn q_expand<const D1: usize, const D2: usize>(
+        _tensor: QuantizedTensor<Self, D1>,
+        _shape: Shape<D2>,
+    ) -> QuantizedTensor<Self, D2> {
+        unimplemented!()
+    }
 }
diff --git a/crates/burn-candle/src/ops/qtensor.rs b/crates/burn-candle/src/ops/qtensor.rs
@@ -1,6 +1,8 @@
+use std::ops::Range;
+
 use burn_tensor::{
     backend::Backend,
-    ops::{FloatTensor, QTensorOps, QuantizedTensor},
+    ops::{FloatTensor, IntTensor, QTensorOps, QuantizedTensor},
     quantization::{QuantizationParametersPrimitive, QuantizationScheme, QuantizationStrategy},
     DType, Device, Shape, TensorData,
 };
@@ -38,6 +40,13 @@ impl<F: FloatCandleElement, I: IntCandleElement> QTensorOps<Self> for Candle<F,
         super::base::device(&tensor.qtensor)
     }
 
+    fn q_to_device<const D: usize>(
+        _tensor: QuantizedTensor<Self, D>,
+        _device: &Device<Self>,
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
     fn q_reshape<const D1: usize, const D2: usize>(
         tensor: QuantizedTensor<Self, D1>,
         shape: Shape<D2>,
@@ -51,4 +60,56 @@ impl<F: FloatCandleElement, I: IntCandleElement> QTensorOps<Self> for Candle<F,
     async fn q_into_data<const D: usize>(tensor: QuantizedTensor<Self, D>) -> TensorData {
         unimplemented!()
     }
+
+    fn q_swap_dims<const D: usize>(
+        _tensor: QuantizedTensor<Self, D>,
+        _dim1: usize,
+        _dim2: usize,
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
+    fn q_permute<const D: usize>(
+        _tensor: QuantizedTensor<Self, D>,
+        _axes: [usize; D],
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
+    fn q_flip<const D: usize>(
+        _tensor: QuantizedTensor<Self, D>,
+        _axes: &[usize],
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
+    fn q_gather<const D: usize>(
+        _dim: usize,
+        _tensor: QuantizedTensor<Self, D>,
+        _indices: IntTensor<Self, D>,
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
+    fn q_select<const D: usize>(
+        _tensor: QuantizedTensor<Self, D>,
+        _dim: usize,
+        _indices: IntTensor<Self, 1>,
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
+    fn q_slice<const D1: usize, const D2: usize>(
+        _tensor: QuantizedTensor<Self, D1>,
+        _ranges: [Range<usize>; D2],
+    ) -> QuantizedTensor<Self, D1> {
+        unimplemented!()
+    }
+
+    fn q_expand<const D1: usize, const D2: usize>(
+        _tensor: QuantizedTensor<Self, D1>,
+        _shape: Shape<D2>,
+    ) -> QuantizedTensor<Self, D2> {
+        unimplemented!()
+    }
 }
diff --git a/crates/burn-fusion/src/ops/qtensor.rs b/crates/burn-fusion/src/ops/qtensor.rs
@@ -1,6 +1,8 @@
+use std::ops::Range;
+
 use burn_tensor::{
     backend::Backend,
-    ops::{QTensorOps, QuantizedTensor},
+    ops::{IntTensor, QTensorOps, QuantizedTensor},
     quantization::{QuantizationParametersPrimitive, QuantizationScheme},
     Device, Shape, TensorData,
 };
@@ -23,6 +25,13 @@ impl<B: FusionBackend> QTensorOps<Self> for Fusion<B> {
         unimplemented!()
     }
 
+    fn quantize_dynamic<const D: usize>(
+        _tensor: <Self as Backend>::FloatTensorPrimitive<D>,
+        _scheme: &QuantizationScheme,
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
     fn dequantize<const D: usize>(
         _tensor: <Self as Backend>::QuantizedTensorPrimitive<D>,
     ) -> <Self as Backend>::FloatTensorPrimitive<D> {
@@ -37,6 +46,13 @@ impl<B: FusionBackend> QTensorOps<Self> for Fusion<B> {
         tensor.qtensor.client.device().clone()
     }
 
+    fn q_to_device<const D: usize>(
+        _tensor: QuantizedTensor<Self, D>,
+        _device: &Device<Self>,
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
     fn q_reshape<const D1: usize, const D2: usize>(
         _tensor: QuantizedTensor<Self, D1>,
         _shape: Shape<D2>,
@@ -47,4 +63,56 @@ impl<B: FusionBackend> QTensorOps<Self> for Fusion<B> {
     async fn q_into_data<const D: usize>(_tensor: QuantizedTensor<Self, D>) -> TensorData {
         unimplemented!()
     }
+
+    fn q_swap_dims<const D: usize>(
+        _tensor: QuantizedTensor<Self, D>,
+        _dim1: usize,
+        _dim2: usize,
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
+    fn q_permute<const D: usize>(
+        _tensor: QuantizedTensor<Self, D>,
+        _axes: [usize; D],
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
+    fn q_flip<const D: usize>(
+        _tensor: QuantizedTensor<Self, D>,
+        _axes: &[usize],
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
+    fn q_gather<const D: usize>(
+        _dim: usize,
+        _tensor: QuantizedTensor<Self, D>,
+        _indices: IntTensor<Self, D>,
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
+    fn q_select<const D: usize>(
+        _tensor: QuantizedTensor<Self, D>,
+        _dim: usize,
+        _indices: IntTensor<Self, 1>,
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
+    fn q_slice<const D1: usize, const D2: usize>(
+        _tensor: QuantizedTensor<Self, D1>,
+        _ranges: [Range<usize>; D2],
+    ) -> QuantizedTensor<Self, D1> {
+        unimplemented!()
+    }
+
+    fn q_expand<const D1: usize, const D2: usize>(
+        _tensor: QuantizedTensor<Self, D1>,
+        _shape: Shape<D2>,
+    ) -> QuantizedTensor<Self, D2> {
+        unimplemented!()
+    }
 }
diff --git a/crates/burn-jit/src/ops/base.rs b/crates/burn-jit/src/ops/base.rs
@@ -7,6 +7,7 @@ pub(crate) fn from_data<R: JitRuntime, E: JitElement, const D: usize>(
     data: TensorData,
     device: &R::Device,
 ) -> JitTensor<R, E, D> {
+    // TODO: from_data QFloat should not convert
     let shape: Shape<D> = (&data.shape).into();
     let client = R::client(device);
     let buffer = client.create(data.convert::<E>().as_bytes());

diff --git a/crates/burn-jit/src/ops/qtensor.rs b/crates/burn-jit/src/ops/qtensor.rs
@@ -1,5 +1,7 @@
+use std::ops::Range;
+
 use burn_tensor::{
-    ops::{FloatTensor, QTensorOps, QuantizedTensor},
+    ops::{FloatTensor, IntTensor, QTensorOps, QuantizedTensor},
     quantization::{QuantizationParametersPrimitive, QuantizationScheme},
     Device, Shape, TensorData,
 };
@@ -39,6 +41,13 @@ where
         tensor.qtensor.device.clone()
     }
 
+    fn q_to_device<const D: usize>(
+        _tensor: QuantizedTensor<Self, D>,
+        _device: &Device<Self>,
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
     fn q_reshape<const D1: usize, const D2: usize>(
         tensor: QuantizedTensor<Self, D1>,
         shape: Shape<D2>,
@@ -52,4 +61,56 @@ where
     async fn q_into_data<const D: usize>(_tensor: QuantizedTensor<Self, D>) -> TensorData {
         unimplemented!()
     }
+
+    fn q_swap_dims<const D: usize>(
+        _tensor: QuantizedTensor<Self, D>,
+        _dim1: usize,
+        _dim2: usize,
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
+    fn q_permute<const D: usize>(
+        _tensor: QuantizedTensor<Self, D>,
+        _axes: [usize; D],
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
+    fn q_flip<const D: usize>(
+        _tensor: QuantizedTensor<Self, D>,
+        _axes: &[usize],
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
+    fn q_gather<const D: usize>(
+        _dim: usize,
+        _tensor: QuantizedTensor<Self, D>,
+        _indices: IntTensor<Self, D>,
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
+    fn q_select<const D: usize>(
+        _tensor: QuantizedTensor<Self, D>,
+        _dim: usize,
+        _indices: IntTensor<Self, 1>,
+    ) -> QuantizedTensor<Self, D> {
+        unimplemented!()
+    }
+
+    fn q_slice<const D1: usize, const D2: usize>(
+        _tensor: QuantizedTensor<Self, D1>,
+        _ranges: [Range<usize>; D2],
+    ) -> QuantizedTensor<Self, D1> {
+        unimplemented!()
+    }
+
+    fn q_expand<const D1: usize, const D2: usize>(
+        _tensor: QuantizedTensor<Self, D1>,
+        _shape: Shape<D2>,
+    ) -> QuantizedTensor<Self, D2> {
+        unimplemented!()
+    }
 }
diff --git a/crates/burn-ndarray/src/lib.rs b/crates/burn-ndarray/src/lib.rs
@@ -39,6 +39,7 @@ mod tests {
     use alloc::vec;
 
     burn_tensor::testgen_all!();
+    burn_tensor::testgen_quantization!();
 
     #[cfg(feature = "std")]
     burn_autodiff::testgen_all!();