Skip to content

Commit

Permalink
Use num-traits for float ops (#1584)
Browse files Browse the repository at this point in the history
  • Loading branch information
antimora authored Apr 8, 2024
1 parent ca3dcb9 commit 2f88548
Show file tree
Hide file tree
Showing 22 changed files with 124 additions and 110 deletions.
6 changes: 1 addition & 5 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions crates/burn-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ std = [
"rmp-serde",
"serde/std",
"serde_json/std",
"num-traits/std",
]
doc = [
"std",
Expand Down Expand Up @@ -86,7 +87,7 @@ candle = ["burn-candle"]
wgpu = ["burn-wgpu"]

# Custom deserializer for Record that is helpful for importing data, such as PyTorch pt files.
record-item-custom-serde = ["thiserror", "regex", "num-traits"]
record-item-custom-serde = ["thiserror", "regex"]

# Serialization formats
experimental-named-tensor = ["burn-tensor/experimental-named-tensor"]
Expand All @@ -111,9 +112,9 @@ burn-tch = { path = "../burn-tch", version = "0.13.0", optional = true }
burn-candle = { path = "../burn-candle", version = "0.13.0", optional = true }

derive-new = { workspace = true }
libm = { workspace = true }
log = { workspace = true, optional = true }
rand = { workspace = true, features = ["std_rng"] } # Default enables std

# Using in place of use std::sync::Mutex when std is disabled
spin = { workspace = true, features = ["mutex", "spin_mutex"] }

Expand All @@ -130,7 +131,7 @@ rmp-serde = { workspace = true, optional = true }
serde_json = { workspace = true, features = ["alloc"] } #Default enables std
thiserror = { workspace = true, optional = true }
regex = { workspace = true, optional = true }
num-traits = { workspace = true, optional = true }
num-traits = { workspace = true }

[dev-dependencies]
tempfile = { workspace = true }
Expand Down
8 changes: 5 additions & 3 deletions crates/burn-core/src/nn/attention/mha.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ use crate::{
nn,
tensor::{activation, backend::Backend, Bool, Tensor},
};
use libm::sqrtf;

#[cfg(not(feature = "std"))]
use num_traits::Float;

/// Configuration to create a [Multi Head Attention](MultiHeadAttention) layer.
#[derive(Config)]
Expand All @@ -35,7 +37,7 @@ pub struct MultiHeadAttentionConfig {
quiet_softmax: bool,
/// The type of function used to initialize neural network parameters
#[config(
default = "Initializer::KaimingUniform{gain:1.0/libm::sqrt(3.0), fan_out_only:false}"
default = "Initializer::KaimingUniform{gain:1.0/num_traits::Float::sqrt(3.0), fan_out_only:false}"
)]
pub initializer: Initializer,
}
Expand Down Expand Up @@ -207,7 +209,7 @@ impl<B: Backend> MultiHeadAttention<B> {
fn attn_scores(&self, query: Tensor<B, 4>, key: Tensor<B, 4>) -> Tensor<B, 4> {
let attn_scores = query
.matmul(key.transpose())
.div_scalar(sqrtf(self.d_k as f32));
.div_scalar((self.d_k as f32).sqrt());

self.dropout.forward(attn_scores)
}
Expand Down
7 changes: 4 additions & 3 deletions crates/burn-core/src/nn/conv/conv1d.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ use crate::tensor::backend::Backend;
use crate::tensor::Tensor;
use burn_tensor::module::conv1d;
use burn_tensor::ops::ConvOptions;
use libm::sqrt;

use super::checks;

Expand Down Expand Up @@ -37,7 +36,9 @@ pub struct Conv1dConfig {
#[config(default = true)]
pub bias: bool,
/// The type of function used to initialize neural network parameters
#[config(default = "Initializer::KaimingUniform{gain:1.0/sqrt(3.0),fan_out_only:false}")]
#[config(
default = "Initializer::KaimingUniform{gain:1.0/num_traits::Float::sqrt(3.0),fan_out_only:false}"
)]
pub initializer: Initializer,
}

Expand Down Expand Up @@ -132,7 +133,7 @@ mod tests {

let config = Conv1dConfig::new(5, 5, 5);
let k = (config.channels_in * config.kernel_size) as f64;
let k = sqrt(config.groups as f64 / k) as f32;
let k = (config.groups as f64 / k).sqrt() as f32;
let conv = config.init::<TestBackend>(&Default::default());

conv.weight.to_data().assert_within_range(-k..k);
Expand Down
11 changes: 6 additions & 5 deletions crates/burn-core/src/nn/conv/conv2d.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ use crate::tensor::backend::Backend;
use crate::tensor::Tensor;
use burn_tensor::module::conv2d;
use burn_tensor::ops::ConvOptions;
use libm::sqrt;

use super::checks;

Expand All @@ -36,7 +35,9 @@ pub struct Conv2dConfig {
#[config(default = true)]
pub bias: bool,
/// The type of function used to initialize neural network parameters
#[config(default = "Initializer::KaimingUniform{gain:1.0/sqrt(3.0),fan_out_only:false}")]
#[config(
default = "Initializer::KaimingUniform{gain:1.0/num_traits::Float::sqrt(3.0),fan_out_only:false}"
)]
pub initializer: Initializer,
}

Expand Down Expand Up @@ -135,7 +136,7 @@ mod tests {

let config = Conv2dConfig::new([5, 1], [5, 5]);
let k = (config.channels[0] * config.kernel_size[0] * config.kernel_size[1]) as f64;
let k = sqrt(config.groups as f64 / k) as f32;
let k = (config.groups as f64 / k).sqrt() as f32;
let device = Default::default();
let conv = config.init::<TestBackend>(&device);

Expand All @@ -161,7 +162,7 @@ mod tests {
TestBackend::seed(0);

let init = Initializer::KaimingUniform {
gain: 1.0 / sqrt(3.0),
gain: 1.0 / 3.0f64.sqrt(),
fan_out_only: true, // test that fan_out is passed to `init_with()`
};
let device = Default::default();
Expand All @@ -176,7 +177,7 @@ mod tests {
TestBackend::seed(0);

let init = Initializer::KaimingUniform {
gain: 1.0 / sqrt(3.0),
gain: 1.0 / 3.0f64.sqrt(),
fan_out_only: true,
};
let device = Default::default();
Expand Down
7 changes: 4 additions & 3 deletions crates/burn-core/src/nn/conv/conv_transpose1d.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ use crate::tensor::backend::Backend;
use crate::tensor::Tensor;
use burn_tensor::module::conv_transpose1d;
use burn_tensor::ops::ConvTransposeOptions;
use libm::sqrt;

use super::checks;

Expand Down Expand Up @@ -38,7 +37,9 @@ pub struct ConvTranspose1dConfig {
#[config(default = true)]
pub bias: bool,
/// The type of function used to initialize neural network parameters
#[config(default = "Initializer::KaimingUniform{gain:1.0/sqrt(3.0),fan_out_only:false}")]
#[config(
default = "Initializer::KaimingUniform{gain:1.0/num_traits::Float::sqrt(3.0),fan_out_only:false}"
)]
pub initializer: Initializer,
}

Expand Down Expand Up @@ -135,7 +136,7 @@ mod tests {

let config = ConvTranspose1dConfig::new([5, 1], 5);
let k = (config.channels[1] * config.kernel_size) as f64;
let k = sqrt(config.groups as f64 / k) as f32;
let k = (config.groups as f64 / k).sqrt() as f32;
let conv = config.init::<TestBackend>(&Default::default());

conv.weight.to_data().assert_within_range(-k..k);
Expand Down
11 changes: 6 additions & 5 deletions crates/burn-core/src/nn/conv/conv_transpose2d.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
use crate as burn;

use super::checks;
use crate::config::Config;
use crate::module::Module;
use crate::module::Param;
use crate::nn::Initializer;
use crate::tensor::backend::Backend;
use crate::tensor::Tensor;

use burn_tensor::module::conv_transpose2d;
use burn_tensor::ops::ConvTransposeOptions;
use libm::sqrt;

use super::checks;

/// Configuration to create an [2D transposed convolution](ConvTranspose2d) layer.
#[derive(Config, Debug)]
Expand Down Expand Up @@ -38,7 +37,9 @@ pub struct ConvTranspose2dConfig {
#[config(default = true)]
pub bias: bool,
/// The type of function used to initialize neural network parameters
#[config(default = "Initializer::KaimingUniform{gain:1.0/sqrt(3.0),fan_out_only:false}")]
#[config(
default = "Initializer::KaimingUniform{gain:1.0/num_traits::Float::sqrt(3.0),fan_out_only:false}"
)]
pub initializer: Initializer,
}

Expand Down Expand Up @@ -136,7 +137,7 @@ mod tests {

let config = ConvTranspose2dConfig::new([5, 1], [5, 5]);
let k = (config.channels[1] * config.kernel_size[0] * config.kernel_size[1]) as f64;
let k = sqrt(config.groups as f64 / k) as f32;
let k = (config.groups as f64 / k).sqrt() as f32;
let conv = config.init::<TestBackend>(&Default::default());

conv.weight.to_data().assert_within_range(-k..k);
Expand Down
25 changes: 14 additions & 11 deletions crates/burn-core/src/nn/initializer.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use burn_tensor::Shape;
use libm::sqrt;

use crate::config::Config;
use crate::module::{Param, ParamId};
Expand All @@ -8,6 +7,9 @@ use crate::tensor::{Distribution, Tensor};

use crate as burn;

#[cfg(not(feature = "std"))]
use num_traits::Float;

/// Enum specifying with what values a tensor should be initialized
#[derive(Config, Debug, PartialEq)]
pub enum Initializer {
Expand Down Expand Up @@ -129,15 +131,15 @@ impl Initializer {
Initializer::Uniform { min, max } => uniform_draw(shape, *min, *max, device),
Initializer::Normal { mean, std } => normal_draw(shape, *mean, *std, device),
Initializer::KaimingUniform { gain, fan_out_only } => {
let a = sqrt(3.0) * *gain * self.kaiming_std(*fan_out_only, fan_in, fan_out);
let a = 3.0f64.sqrt() * *gain * self.kaiming_std(*fan_out_only, fan_in, fan_out);
uniform_draw(shape, -a, a, device)
}
Initializer::KaimingNormal { gain, fan_out_only } => {
let std = *gain * self.kaiming_std(*fan_out_only, fan_in, fan_out);
normal_draw(shape, 0.0, std, device)
}
Initializer::XavierUniform { gain } => {
let a = sqrt(3.0) * *gain * self.xavier_std(fan_in, fan_out);
let a = 3.0f64.sqrt() * *gain * self.xavier_std(fan_in, fan_out);
uniform_draw(shape, -a, a, device)
}
Initializer::XavierNormal { gain } => {
Expand All @@ -158,7 +160,7 @@ impl Initializer {
"Can't use Kaiming initialization without specifying fan. Use init_with method.",
);

1.0 / sqrt(fan as f64)
1.0 / (fan as f64).sqrt()
}

fn xavier_std(&self, fan_in: Option<usize>, fan_out: Option<usize>) -> f64 {
Expand All @@ -170,7 +172,7 @@ impl Initializer {
"Can't use Xavier initialization without specifying fan out. Use init_with method and \
provide fan_out.",
);
sqrt(2.0 / (fan_in + fan_out) as f64)
(2.0 / (fan_in + fan_out) as f64).sqrt()
}
}

Expand Down Expand Up @@ -199,6 +201,7 @@ mod tests {
use super::*;

use burn_tensor::{Data, ElementConversion};
use num_traits::Pow;

pub type TB = burn_ndarray::NdArray<f32>;

Expand Down Expand Up @@ -293,7 +296,7 @@ mod tests {

let gain = 2_f64;
let (fan_in, fan_out) = (5, 6);
let k = gain * sqrt(3.0 / fan_in as f64);
let k = gain * (3.0 / fan_in as f64).sqrt();

let tensor: Tensor<TB, 2> = Initializer::KaimingUniform {
gain,
Expand All @@ -312,7 +315,7 @@ mod tests {
let (fan_in, fan_out) = (1000, 10);
let expected_mean = 0_f64;

let expected_var = (gain * sqrt(1. / (fan_in as f64))).powf(2.);
let expected_var = (gain * (1. / (fan_in as f64)).sqrt()).pow(2.);
let tensor: Tensor<TB, 2> = Initializer::KaimingNormal {
gain,
fan_out_only: false,
Expand All @@ -329,7 +332,7 @@ mod tests {
let gain = 2_f64;
let shape = [3];
let fan_in = 5;
let k = gain * sqrt(3.0 / fan_in as f64);
let k = gain * (3.0 / fan_in as f64).sqrt();

let tensor: Tensor<TB, 1> = Initializer::KaimingUniform {
gain,
Expand All @@ -346,7 +349,7 @@ mod tests {

let gain = 2_f64;
let (fan_in, fan_out) = (5, 6);
let k = gain * sqrt(3.0 / fan_out as f64);
let k = gain * (3.0 / fan_out as f64).sqrt();

let tensor: Tensor<TB, 2> = Initializer::KaimingUniform {
gain,
Expand Down Expand Up @@ -379,7 +382,7 @@ mod tests {

let gain = 2.;
let (fan_in, fan_out) = (5, 6);
let bound = gain * sqrt(6. / (fan_in + fan_out) as f64);
let bound = gain * (6. / (fan_in + fan_out) as f64).sqrt();
let tensor: Tensor<TB, 2> = Initializer::XavierUniform { gain }
.init_with(
[fan_out, fan_in],
Expand All @@ -400,7 +403,7 @@ mod tests {
let (fan_in, fan_out) = (1000, 10);
let expected_mean = 0_f64;

let expected_var = (gain * sqrt(2. / (fan_in as f64 + fan_out as f64))).powf(2.);
let expected_var = (gain * (2. / (fan_in as f64 + fan_out as f64)).sqrt()).powf(2.);
let tensor: Tensor<TB, 2> = Initializer::XavierNormal { gain }
.init_with(
[fan_out, fan_in],
Expand Down
10 changes: 5 additions & 5 deletions crates/burn-core/src/nn/linear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use crate::config::Config;
use crate::module::Module;
use crate::module::Param;
use crate::tensor::{backend::Backend, Tensor};
use libm::sqrt;

use super::Initializer;

Expand All @@ -19,7 +18,9 @@ pub struct LinearConfig {
#[config(default = true)]
pub bias: bool,
/// The type of function used to initialize neural network parameters
#[config(default = "Initializer::KaimingUniform{gain:1.0/sqrt(3.0), fan_out_only:false}")]
#[config(
default = "Initializer::KaimingUniform{gain:1.0/num_traits::Float::sqrt(3.0), fan_out_only:false}"
)]
pub initializer: Initializer,
}

Expand Down Expand Up @@ -80,21 +81,20 @@ mod tests {
use super::*;
use crate::TestBackend;
use burn_tensor::{Data, Shape};
use libm::sqrt;

#[test]
fn initializer_default() {
TestBackend::seed(0);

let config = LinearConfig::new(5, 5);
let k = sqrt(1.0 / config.d_input as f64) as f32;
let k = (1.0 / config.d_input as f64).sqrt() as f32;
let device = Default::default();
let linear = config.init::<TestBackend>(&device);

assert_eq!(
config.initializer,
Initializer::KaimingUniform {
gain: 1.0 / sqrt(3.0),
gain: 1.0 / 3.0f64.sqrt(),
fan_out_only: false
}
);
Expand Down
Loading

0 comments on commit 2f88548

Please sign in to comment.