From 6dfaed004f4d8ecabe8738a753d87c39ffbd93dc Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Thu, 9 May 2024 13:11:49 -0400 Subject: [PATCH] Replacement of a matmul + use of merge (#181) * dense_layer: replace a matmul(reshape) by a do concurrent * nf_activation: replace some where statements by merge intrinsic * Set correct size for self%gradient in dense_layer * remove some unneeded pack() * Remove notes on -fno-frontend-optimize (no longer necessary) * Bump patch version --------- Co-authored-by: Vandenplas, Jeremie Co-authored-by: milancurcic --- README.md | 12 ++++------ fpm.toml | 2 +- src/nf/nf_activation.f90 | 36 +++++----------------------- src/nf/nf_conv2d_layer_submodule.f90 | 4 ++-- src/nf/nf_dense_layer_submodule.f90 | 14 +++++++---- 5 files changed, 23 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index 2b05e56..f8b2174 100644 --- a/README.md +++ b/README.md @@ -80,23 +80,21 @@ With gfortran, the following will create an optimized build of neural-fortran: ``` fpm build \ --profile release \ - --flag "-fno-frontend-optimize -I$HDF5INC -L$HDF5LIB" + --flag "-I$HDF5INC -L$HDF5LIB" ``` HDF5 is now a required dependency, so you have to provide it to fpm. The above command assumes that the `HDF5INC` and `HDF5LIB` environment variables are set to the include and library paths, respectively, of your HDF5 install. -The `-fno-frontend-optimize` disables some optimizations that may be harmful -when building neural-fortran. If you use Conda, the following instructions work: ``` conda create -n nf hdf5 conda activate nf -fpm build --profile release --flag "-fno-frontend-optimize -I$CONDA_PREFIX/include -L$CONDA_PREFIX/lib -Wl,-rpath -Wl,$CONDA_PREFIX/lib" -fpm test --profile release --flag "-fno-frontend-optimize -I$CONDA_PREFIX/include -L$CONDA_PREFIX/lib -Wl,-rpath -Wl,$CONDA_PREFIX/lib" +fpm build --profile release --flag "-I$CONDA_PREFIX/include -L$CONDA_PREFIX/lib -Wl,-rpath -Wl,$CONDA_PREFIX/lib" +fpm test --profile release --flag "-I$CONDA_PREFIX/include -L$CONDA_PREFIX/lib -Wl,-rpath -Wl,$CONDA_PREFIX/lib" ``` #### Building in parallel mode @@ -110,7 +108,7 @@ in parallel, respectively: fpm build \ --compiler caf \ --profile release \ - --flag "-fno-frontend-optimize -I$HDF5INC -L$HDF5LIB" + --flag "-I$HDF5INC -L$HDF5LIB" ``` #### Testing with fpm @@ -118,7 +116,7 @@ fpm build \ ``` fpm test \ --profile release \ - --flag "-fno-frontend-optimize -I$HDF5INC -L$HDF5LIB" + --flag "-I$HDF5INC -L$HDF5LIB" ``` For the time being, you need to specify the same compiler flags to `fpm test` diff --git a/fpm.toml b/fpm.toml index 5252ab8..4fc21b3 100644 --- a/fpm.toml +++ b/fpm.toml @@ -1,5 +1,5 @@ name = "neural-fortran" -version = "0.16.0" +version = "0.16.1" license = "MIT" author = "Milan Curcic" maintainer = "milancurcic@hey.com" diff --git a/src/nf/nf_activation.f90 b/src/nf/nf_activation.f90 index 67034a3..e413243 100644 --- a/src/nf/nf_activation.f90 +++ b/src/nf/nf_activation.f90 @@ -295,11 +295,7 @@ pure function eval_1d_relu_prime(self, x) result(res) class(relu), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) - where (x > 0) - res = 1 - elsewhere - res = 0 - end where + res = merge(1., 0., x > 0) end function eval_1d_relu_prime pure function eval_3d_relu(self, x) result(res) @@ -315,11 +311,7 @@ pure function eval_3d_relu_prime(self, x) result(res) class(relu), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) - where (x > 0) - res = 1 - elsewhere - res = 0 - end where + res = merge(1., 0., x > 0) end function eval_3d_relu_prime pure function eval_1d_leaky_relu(self, x) result(res) @@ -335,11 +327,7 @@ pure function eval_1d_leaky_relu_prime(self, x) result(res) class(leaky_relu), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) - where (x > 0) - res = 1 - elsewhere - res = self % alpha - end where + res = merge(1., self%alpha, x > 0) end function eval_1d_leaky_relu_prime pure function eval_3d_leaky_relu(self, x) result(res) @@ -355,11 +343,7 @@ pure function eval_3d_leaky_relu_prime(self, x) result(res) class(leaky_relu), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) - where (x > 0) - res = 1 - elsewhere - res = self % alpha - end where + res = merge(1., self%alpha, x > 0) end function eval_3d_leaky_relu_prime pure function eval_1d_sigmoid(self, x) result(res) @@ -465,11 +449,7 @@ pure function eval_1d_step(self, x) result(res) class(step), intent(in) :: self real, intent(in) :: x(:) real :: res(size(x)) - where (x > 0) - res = 1 - elsewhere - res = 0 - end where + res = merge(1., 0., x > 0) end function eval_1d_step pure function eval_1d_step_prime(self, x) result(res) @@ -485,11 +465,7 @@ pure function eval_3d_step(self, x) result(res) class(step), intent(in) :: self real, intent(in) :: x(:,:,:) real :: res(size(x,1),size(x,2),size(x,3)) - where (x > 0) - res = 1 - elsewhere - res = 0 - end where + res = merge(1., 0., x > 0) end function eval_3d_step pure function eval_3d_step_prime(self, x) result(res) diff --git a/src/nf/nf_conv2d_layer_submodule.f90 b/src/nf/nf_conv2d_layer_submodule.f90 index a1733c4..b480424 100644 --- a/src/nf/nf_conv2d_layer_submodule.f90 +++ b/src/nf/nf_conv2d_layer_submodule.f90 @@ -195,7 +195,7 @@ pure module function get_params(self) result(params) params = [ & pack(self % kernel, .true.), & - pack(self % biases, .true.) & + self % biases & ] end function get_params @@ -207,7 +207,7 @@ pure module function get_gradients(self) result(gradients) gradients = [ & pack(self % dw, .true.), & - pack(self % db, .true.) & + self % db & ] end function get_gradients diff --git a/src/nf/nf_dense_layer_submodule.f90 b/src/nf/nf_dense_layer_submodule.f90 index 5944f0f..4be23e3 100644 --- a/src/nf/nf_dense_layer_submodule.f90 +++ b/src/nf/nf_dense_layer_submodule.f90 @@ -27,11 +27,15 @@ pure module subroutine backward(self, input, gradient) real, intent(in) :: gradient(:) real :: db(self % output_size) real :: dw(self % input_size, self % output_size) + integer :: i db = gradient * self % activation % eval_prime(self % z) - dw = matmul(reshape(input, [size(input), 1]), reshape(db, [1, size(db)])) +! dw = matmul(reshape(input, [size(input), 1]), reshape(db, [1, size(db)])) + do concurrent (i = 1:size(db)) + self % dw(:,i) = self % dw(:,i) + input(:) * db(i) + enddo self % gradient = matmul(self % weights, db) - self % dw = self % dw + dw +! self % dw = self % dw + dw self % db = self % db + db end subroutine backward @@ -63,7 +67,7 @@ pure module function get_params(self) result(params) params = [ & pack(self % weights, .true.), & - pack(self % biases, .true.) & + self % biases & ] end function get_params @@ -75,7 +79,7 @@ pure module function get_gradients(self) result(gradients) gradients = [ & pack(self % dw, .true.), & - pack(self % db, .true.) & + self % db & ] end function get_gradients @@ -135,7 +139,7 @@ module subroutine init(self, input_shape) allocate(self % db(self % output_size)) self % db = 0 - allocate(self % gradient(self % output_size)) + allocate(self % gradient(self % input_size)) self % gradient = 0 end subroutine init