Skip to content

Commit

Permalink
Replacement of a matmul + use of merge (#181)
Browse files Browse the repository at this point in the history
* dense_layer: replace a matmul(reshape) by a do concurrent

* nf_activation: replace some where statements by merge intrinsic

* Set correct size for self%gradient in dense_layer

* remove some unneeded pack()

* Remove notes on -fno-frontend-optimize (no longer necessary)

* Bump patch version

---------

Co-authored-by: Vandenplas, Jeremie <[email protected]>
Co-authored-by: milancurcic <[email protected]>
  • Loading branch information
3 people authored May 9, 2024
1 parent c3924b5 commit 6dfaed0
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 45 deletions.
12 changes: 5 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,23 +80,21 @@ With gfortran, the following will create an optimized build of neural-fortran:
```
fpm build \
--profile release \
--flag "-fno-frontend-optimize -I$HDF5INC -L$HDF5LIB"
--flag "-I$HDF5INC -L$HDF5LIB"
```

HDF5 is now a required dependency, so you have to provide it to fpm.
The above command assumes that the `HDF5INC` and `HDF5LIB` environment
variables are set to the include and library paths, respectively, of your
HDF5 install.
The `-fno-frontend-optimize` disables some optimizations that may be harmful
when building neural-fortran.

If you use Conda, the following instructions work:

```
conda create -n nf hdf5
conda activate nf
fpm build --profile release --flag "-fno-frontend-optimize -I$CONDA_PREFIX/include -L$CONDA_PREFIX/lib -Wl,-rpath -Wl,$CONDA_PREFIX/lib"
fpm test --profile release --flag "-fno-frontend-optimize -I$CONDA_PREFIX/include -L$CONDA_PREFIX/lib -Wl,-rpath -Wl,$CONDA_PREFIX/lib"
fpm build --profile release --flag "-I$CONDA_PREFIX/include -L$CONDA_PREFIX/lib -Wl,-rpath -Wl,$CONDA_PREFIX/lib"
fpm test --profile release --flag "-I$CONDA_PREFIX/include -L$CONDA_PREFIX/lib -Wl,-rpath -Wl,$CONDA_PREFIX/lib"
```

#### Building in parallel mode
Expand All @@ -110,15 +108,15 @@ in parallel, respectively:
fpm build \
--compiler caf \
--profile release \
--flag "-fno-frontend-optimize -I$HDF5INC -L$HDF5LIB"
--flag "-I$HDF5INC -L$HDF5LIB"
```

#### Testing with fpm

```
fpm test \
--profile release \
--flag "-fno-frontend-optimize -I$HDF5INC -L$HDF5LIB"
--flag "-I$HDF5INC -L$HDF5LIB"
```

For the time being, you need to specify the same compiler flags to `fpm test`
Expand Down
2 changes: 1 addition & 1 deletion fpm.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name = "neural-fortran"
version = "0.16.0"
version = "0.16.1"
license = "MIT"
author = "Milan Curcic"
maintainer = "[email protected]"
Expand Down
36 changes: 6 additions & 30 deletions src/nf/nf_activation.f90
Original file line number Diff line number Diff line change
Expand Up @@ -295,11 +295,7 @@ pure function eval_1d_relu_prime(self, x) result(res)
class(relu), intent(in) :: self
real, intent(in) :: x(:)
real :: res(size(x))
where (x > 0)
res = 1
elsewhere
res = 0
end where
res = merge(1., 0., x > 0)
end function eval_1d_relu_prime

pure function eval_3d_relu(self, x) result(res)
Expand All @@ -315,11 +311,7 @@ pure function eval_3d_relu_prime(self, x) result(res)
class(relu), intent(in) :: self
real, intent(in) :: x(:,:,:)
real :: res(size(x,1),size(x,2),size(x,3))
where (x > 0)
res = 1
elsewhere
res = 0
end where
res = merge(1., 0., x > 0)
end function eval_3d_relu_prime

pure function eval_1d_leaky_relu(self, x) result(res)
Expand All @@ -335,11 +327,7 @@ pure function eval_1d_leaky_relu_prime(self, x) result(res)
class(leaky_relu), intent(in) :: self
real, intent(in) :: x(:)
real :: res(size(x))
where (x > 0)
res = 1
elsewhere
res = self % alpha
end where
res = merge(1., self%alpha, x > 0)
end function eval_1d_leaky_relu_prime

pure function eval_3d_leaky_relu(self, x) result(res)
Expand All @@ -355,11 +343,7 @@ pure function eval_3d_leaky_relu_prime(self, x) result(res)
class(leaky_relu), intent(in) :: self
real, intent(in) :: x(:,:,:)
real :: res(size(x,1),size(x,2),size(x,3))
where (x > 0)
res = 1
elsewhere
res = self % alpha
end where
res = merge(1., self%alpha, x > 0)
end function eval_3d_leaky_relu_prime

pure function eval_1d_sigmoid(self, x) result(res)
Expand Down Expand Up @@ -465,11 +449,7 @@ pure function eval_1d_step(self, x) result(res)
class(step), intent(in) :: self
real, intent(in) :: x(:)
real :: res(size(x))
where (x > 0)
res = 1
elsewhere
res = 0
end where
res = merge(1., 0., x > 0)
end function eval_1d_step

pure function eval_1d_step_prime(self, x) result(res)
Expand All @@ -485,11 +465,7 @@ pure function eval_3d_step(self, x) result(res)
class(step), intent(in) :: self
real, intent(in) :: x(:,:,:)
real :: res(size(x,1),size(x,2),size(x,3))
where (x > 0)
res = 1
elsewhere
res = 0
end where
res = merge(1., 0., x > 0)
end function eval_3d_step

pure function eval_3d_step_prime(self, x) result(res)
Expand Down
4 changes: 2 additions & 2 deletions src/nf/nf_conv2d_layer_submodule.f90
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ pure module function get_params(self) result(params)

params = [ &
pack(self % kernel, .true.), &
pack(self % biases, .true.) &
self % biases &
]

end function get_params
Expand All @@ -207,7 +207,7 @@ pure module function get_gradients(self) result(gradients)

gradients = [ &
pack(self % dw, .true.), &
pack(self % db, .true.) &
self % db &
]

end function get_gradients
Expand Down
14 changes: 9 additions & 5 deletions src/nf/nf_dense_layer_submodule.f90
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,15 @@ pure module subroutine backward(self, input, gradient)
real, intent(in) :: gradient(:)
real :: db(self % output_size)
real :: dw(self % input_size, self % output_size)
integer :: i

db = gradient * self % activation % eval_prime(self % z)
dw = matmul(reshape(input, [size(input), 1]), reshape(db, [1, size(db)]))
! dw = matmul(reshape(input, [size(input), 1]), reshape(db, [1, size(db)]))
do concurrent (i = 1:size(db))
self % dw(:,i) = self % dw(:,i) + input(:) * db(i)
enddo
self % gradient = matmul(self % weights, db)
self % dw = self % dw + dw
! self % dw = self % dw + dw
self % db = self % db + db

end subroutine backward
Expand Down Expand Up @@ -63,7 +67,7 @@ pure module function get_params(self) result(params)

params = [ &
pack(self % weights, .true.), &
pack(self % biases, .true.) &
self % biases &
]

end function get_params
Expand All @@ -75,7 +79,7 @@ pure module function get_gradients(self) result(gradients)

gradients = [ &
pack(self % dw, .true.), &
pack(self % db, .true.) &
self % db &
]

end function get_gradients
Expand Down Expand Up @@ -135,7 +139,7 @@ module subroutine init(self, input_shape)
allocate(self % db(self % output_size))
self % db = 0

allocate(self % gradient(self % output_size))
allocate(self % gradient(self % input_size))
self % gradient = 0

end subroutine init
Expand Down

0 comments on commit 6dfaed0

Please sign in to comment.