Skip to content

Commit

Permalink
fix(par): first version of message cache for MPI traffic (MODFLOW-USG…
Browse files Browse the repository at this point in the history
…S#1582)

* - cache mpi headers and bodies
* - final step in implementing caching: only cache message bodies (skip routing of headers and maps when possible)
* - fix: initialize mpi request handles
* - add MPI error check
* - PETSc vec readonly access
* - add more strategic MPI error checks
  • Loading branch information
mjr-deltares authored Feb 8, 2024
1 parent e338e83 commit 967f168
Show file tree
Hide file tree
Showing 17 changed files with 691 additions and 159 deletions.
60 changes: 30 additions & 30 deletions make/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,36 +5,36 @@ include ./makedefaults

# Define the source file directories
SOURCEDIR1=../src
SOURCEDIR2=../src/Exchange
SOURCEDIR3=../src/Model
SOURCEDIR4=../src/Model/Geometry
SOURCEDIR5=../src/Model/TransportModel
SOURCEDIR2=../src/Model
SOURCEDIR3=../src/Model/TransportModel
SOURCEDIR4=../src/Model/GroundWaterFlow
SOURCEDIR5=../src/Model/Geometry
SOURCEDIR6=../src/Model/ModelUtilities
SOURCEDIR7=../src/Model/Connection
SOURCEDIR8=../src/Model/GroundWaterTransport
SOURCEDIR9=../src/Model/GroundWaterFlow
SOURCEDIR10=../src/Distributed
SOURCEDIR11=../src/Solution
SOURCEDIR12=../src/Solution/PETSc
SOURCEDIR13=../src/Solution/LinearMethods
SOURCEDIR14=../src/Timing
SOURCEDIR15=../src/Utilities
SOURCEDIR16=../src/Utilities/TimeSeries
SOURCEDIR17=../src/Utilities/Libraries
SOURCEDIR18=../src/Utilities/Libraries/rcm
SOURCEDIR19=../src/Utilities/Libraries/sparsekit
SOURCEDIR20=../src/Utilities/Libraries/sparskit2
SOURCEDIR21=../src/Utilities/Libraries/blas
SOURCEDIR22=../src/Utilities/Libraries/daglib
SOURCEDIR23=../src/Utilities/Idm
SOURCEDIR24=../src/Utilities/Idm/selector
SOURCEDIR25=../src/Utilities/Idm/mf6blockfile
SOURCEDIR26=../src/Utilities/Matrix
SOURCEDIR27=../src/Utilities/Vector
SOURCEDIR28=../src/Utilities/Observation
SOURCEDIR29=../src/Utilities/OutputControl
SOURCEDIR30=../src/Utilities/Memory
SOURCEDIR31=../src/Utilities/ArrayRead
SOURCEDIR7=../src/Model/GroundWaterTransport
SOURCEDIR8=../src/Model/Connection
SOURCEDIR9=../src/Distributed
SOURCEDIR10=../src/Utilities
SOURCEDIR11=../src/Utilities/Idm
SOURCEDIR12=../src/Utilities/Idm/mf6blockfile
SOURCEDIR13=../src/Utilities/Idm/selector
SOURCEDIR14=../src/Utilities/Vector
SOURCEDIR15=../src/Utilities/Matrix
SOURCEDIR16=../src/Utilities/Observation
SOURCEDIR17=../src/Utilities/ArrayRead
SOURCEDIR18=../src/Utilities/OutputControl
SOURCEDIR19=../src/Utilities/Libraries
SOURCEDIR20=../src/Utilities/Libraries/blas
SOURCEDIR21=../src/Utilities/Libraries/rcm
SOURCEDIR22=../src/Utilities/Libraries/sparsekit
SOURCEDIR23=../src/Utilities/Libraries/sparskit2
SOURCEDIR24=../src/Utilities/Libraries/daglib
SOURCEDIR25=../src/Utilities/Memory
SOURCEDIR26=../src/Utilities/TimeSeries
SOURCEDIR27=../src/Timing
SOURCEDIR28=../src/Solution
SOURCEDIR29=../src/Solution/PETSc
SOURCEDIR30=../src/Solution/LinearMethods
SOURCEDIR31=../src/Exchange

VPATH = \
${SOURCEDIR1} \
Expand Down Expand Up @@ -237,6 +237,7 @@ $(OBJDIR)/SparseMatrix.o \
$(OBJDIR)/LinearSolverBase.o \
$(OBJDIR)/ims8reordering.o \
$(OBJDIR)/ModflowInput.o \
$(OBJDIR)/IdmLogger.o \
$(OBJDIR)/Integer2dReader.o \
$(OBJDIR)/VirtualExchange.o \
$(OBJDIR)/GridSorting.o \
Expand Down Expand Up @@ -268,7 +269,6 @@ $(OBJDIR)/RouterBase.o \
$(OBJDIR)/ImsLinearSolver.o \
$(OBJDIR)/ims8base.o \
$(OBJDIR)/StructVector.o \
$(OBJDIR)/IdmLogger.o \
$(OBJDIR)/DefinitionSelect.o \
$(OBJDIR)/InputLoadType.o \
$(OBJDIR)/Integer1dReader.o \
Expand Down
10 changes: 10 additions & 0 deletions msvs/mf6core.vfproj
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@
<FileConfiguration Name="Release|x64" ExcludedFromBuild="true"/>
<FileConfiguration Name="Debug|x64" ExcludedFromBuild="true"/>
<FileConfiguration Name="Release|Win32" ExcludedFromBuild="true"/></File>
<File RelativePath="..\src\Distributed\MpiMessageCache.f90">
<FileConfiguration Name="Debug|Win32" ExcludedFromBuild="true"/>
<FileConfiguration Name="Release|x64" ExcludedFromBuild="true"/>
<FileConfiguration Name="Debug|x64" ExcludedFromBuild="true"/>
<FileConfiguration Name="Release|Win32" ExcludedFromBuild="true"/></File>
<File RelativePath="..\src\Distributed\MpiRouter.f90">
<FileConfiguration Name="Debug|Win32" ExcludedFromBuild="true"/>
<FileConfiguration Name="Release|x64" ExcludedFromBuild="true"/>
Expand All @@ -64,6 +69,11 @@
<FileConfiguration Name="Release|x64" ExcludedFromBuild="true"/>
<FileConfiguration Name="Debug|x64" ExcludedFromBuild="true"/>
<FileConfiguration Name="Release|Win32" ExcludedFromBuild="true"/></File>
<File RelativePath="..\src\Distributed\MpiUnitCache.f90">
<FileConfiguration Name="Debug|Win32" ExcludedFromBuild="true"/>
<FileConfiguration Name="Release|x64" ExcludedFromBuild="true"/>
<FileConfiguration Name="Debug|x64" ExcludedFromBuild="true"/>
<FileConfiguration Name="Release|Win32" ExcludedFromBuild="true"/></File>
<File RelativePath="..\src\Distributed\MpiWorld.f90">
<FileConfiguration Name="Debug|Win32" ExcludedFromBuild="true"/>
<FileConfiguration Name="Release|x64" ExcludedFromBuild="true"/>
Expand Down
2 changes: 2 additions & 0 deletions pymake/excludefiles.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
../src/Utilities/Matrix/PetscMatrix.F90
../src/Utilities/Vector/PetscVector.F90
../src/Distributed/MpiMessageBuilder.f90
../src/Distributed/MpiMessageCache.f90
../src/Distributed/MpiRouter.f90
../src/Distributed/MpiRunControl.F90
../src/Distributed/MpiUnitCache.f90
../src/Distributed/MpiWorld.f90
40 changes: 6 additions & 34 deletions src/Distributed/MpiMessageBuilder.f90
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ module MpiMessageBuilderModule
procedure, private :: create_vdc_snd_map
procedure, private :: create_vdc_snd_body
procedure, private :: create_vdc_rcv_body
procedure, private :: create_element_map
end type

contains
Expand Down Expand Up @@ -114,7 +113,7 @@ subroutine create_header_snd(this, rank, stage, hdrs_snd_type)
class(MpiMessageBuilderType) :: this
integer(I4B) :: rank
integer(I4B) :: stage
integer :: hdrs_snd_type
integer, intent(out) :: hdrs_snd_type
! local
integer(I4B) :: i, offset, nr_types
class(VirtualDataContainerType), pointer :: vdc
Expand Down Expand Up @@ -186,7 +185,7 @@ end subroutine create_header_snd

subroutine create_header_rcv(this, hdr_rcv_type)
class(MpiMessageBuilderType) :: this
integer :: hdr_rcv_type
integer, intent(out) :: hdr_rcv_type
! local
integer :: ierr

Expand All @@ -203,7 +202,7 @@ subroutine create_map_snd(this, rank, stage, map_snd_type)
class(MpiMessageBuilderType) :: this
integer(I4B) :: rank
integer(I4B) :: stage
integer :: map_snd_type
integer, intent(out) :: map_snd_type
! local
integer(I4B) :: i, offset, nr_types
class(VirtualDataContainerType), pointer :: vdc
Expand Down Expand Up @@ -280,7 +279,7 @@ subroutine create_map_rcv(this, rcv_map, nr_headers, map_rcv_type)
class(MpiMessageBuilderType) :: this
type(VdcReceiverMapsType), dimension(:) :: rcv_map
integer(I4B) :: nr_headers
integer :: map_rcv_type
integer, intent(out) :: map_rcv_type
! local
integer(I4B) :: i, j, nr_elems, type_cnt
integer :: ierr, max_nr_maps
Expand Down Expand Up @@ -323,7 +322,7 @@ subroutine create_body_rcv(this, rank, stage, body_rcv_type)
class(MpiMessageBuilderType) :: this
integer(I4B) :: rank
integer(I4B) :: stage
integer :: body_rcv_type
integer, intent(out) :: body_rcv_type
! local
integer(I4B) :: i, nr_types, offset
class(VirtualDataContainerType), pointer :: vdc
Expand Down Expand Up @@ -400,7 +399,7 @@ subroutine create_body_snd(this, rank, stage, headers, maps, body_snd_type)
integer(I4B) :: stage
type(VdcHeaderType), dimension(:) :: headers
type(VdcReceiverMapsType), dimension(:) :: maps
integer :: body_snd_type
integer, intent(out) :: body_snd_type
! local
integer(I4B) :: i, nr_headers
class(VirtualDataContainerType), pointer :: vdc
Expand Down Expand Up @@ -627,33 +626,6 @@ function create_vdc_snd_body(this, vdc, vdc_maps, rank, stage) result(new_type)

end function create_vdc_snd_body

!> @brief Temp. function to generate a dummy (complete) map
!<
function create_element_map(this, rank, vdc, vd) result(el_map)
use MemoryManagerModule, only: get_mem_shape, get_mem_rank
use ConstantsModule, only: MAXMEMRANK
class(MpiMessageBuilderType) :: this
integer(I4B) :: rank
class(VirtualDataContainerType), pointer :: vdc
class(VirtualDataType), pointer :: vd
integer(I4B), dimension(:), pointer, contiguous :: el_map
! local
integer(I4B), dimension(MAXMEMRANK) :: mem_shp
integer(I4B) :: i, nrow, mem_rank

el_map => null()
call get_mem_rank(vd%virtual_mt%name, vd%virtual_mt%path, mem_rank)
call get_mem_shape(vd%virtual_mt%name, vd%virtual_mt%path, mem_shp)
if (mem_rank > 0) then
nrow = mem_shp(mem_rank)
allocate (el_map(nrow))
do i = 1, nrow
el_map(i) = i - 1
end do
end if

end function create_element_map

function get_vdc_from_hdr(this, header) result(vdc)
class(MpiMessageBuilderType) :: this
type(VdcHeaderType) :: header
Expand Down
130 changes: 130 additions & 0 deletions src/Distributed/MpiMessageCache.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
module MpiMessageCacheModule
use KindModule, only: I4B
use SimStagesModule, only: NR_SIM_STAGES
use ListModule
use STLVecIntModule
use MpiUnitCacheModule
implicit none
private

! the message types for caching during a simulation stage:
integer(I4B), public, parameter :: MPI_BDY_RCV = 1 !< receiving data (body) from ranks
integer(I4B), public, parameter :: MPI_BDY_SND = 2 !< sending data (body) to ranks
integer(I4B), public, parameter :: NR_MSG_TYPES = 2 !< the total number of message types to be cached

! expose this from the unit cache module
public :: NO_CACHED_VALUE

!> @brief Facility to cache the constructed MPI datatypes.
!! This will avoid having to construct them over and over
!! again for the communication inside the timestep loop.
!! This class deals with separate caches for different
!! units (solutions or global) and for different types of
!< messages within the communication stage.
type, public :: MpiMessageCacheType
type(STLVecInt) :: cached_ids !< a vector with ids for the cached units (solution ids)
type(ListType) :: unit_caches !< a list with caches per unit
contains
procedure :: init => mmc_init
procedure :: get => mmc_get
procedure :: put => mmc_put
procedure :: destroy => mmc_destroy
end type MpiMessageCacheType

contains

!< @brief Initialize the MPI type cache system.
!<
subroutine mmc_init(this)
class(MpiMessageCacheType) :: this !< the message cache

call this%cached_ids%init()

end subroutine mmc_init

!< @brief Get the cached mpi datatype for the given
!! unit, rank, stage, and message element. Returns
!< NO_CACHED_VALUE when not in cache.
function mmc_get(this, unit, rank, stage, msg_id) result(mpi_type)
class(MpiMessageCacheType) :: this !< the message cache
integer(I4B) :: unit !< the unit (solution or global)
integer(I4B) :: rank !< the rank of the MPI process to communicate with
integer(I4B) :: stage !< the simulation stage at which the message is sent
integer(I4B) :: msg_id !< the message type as an integer between 1 and NR_MSG_TYPES (see above for predefined values)
integer :: mpi_type !< the resulting mpi datatype
! local
integer(I4B) :: unit_idx
class(*), pointer :: obj_ptr

mpi_type = NO_CACHED_VALUE

unit_idx = this%cached_ids%get_index(unit)
if (unit_idx == -1) return ! not cached

obj_ptr => this%unit_caches%GetItem(unit_idx)
select type (obj_ptr)
class is (MpiUnitCacheType)
mpi_type = obj_ptr%get_cached(rank, stage, msg_id)
end select

end function mmc_get

!> @brief Put the mpi datatype for this particular unit,
!! rank, and stage in cache. The datatype should be
!< committed to the type database externally.
subroutine mmc_put(this, unit, rank, stage, msg_id, mpi_type)
class(MpiMessageCacheType) :: this !< the message cache
integer(I4B) :: unit !< the unit (solution or global)
integer(I4B) :: rank !< the rank of the MPI process to communicate with
integer(I4B) :: stage !< the simulation stage at which the message is sent
integer(I4B) :: msg_id !< the message type as an integer between 1 and NR_MSG_TYPES (see above for predefined values)
integer :: mpi_type !< the mpi datatype to cache
! local
integer(I4B) :: unit_idx
type(MpiUnitCacheType), pointer :: new_cache
class(*), pointer :: obj_ptr

unit_idx = this%cached_ids%get_index(unit)
if (unit_idx == -1) then
! add to vector with cached unit ids
call this%cached_ids%push_back(unit)
! create and add unit cache
allocate (new_cache)
call new_cache%init(NR_SIM_STAGES, NR_MSG_TYPES)
obj_ptr => new_cache
call this%unit_caches%Add(obj_ptr)
unit_idx = this%cached_ids%size
end if

! get the cache for this unit
obj_ptr => this%unit_caches%GetItem(unit_idx)
select type (obj_ptr)
class is (MpiUnitCacheType)
call obj_ptr%cache(rank, stage, msg_id, mpi_type)
end select

end subroutine mmc_put

!< @brief Destroy the MPI type cache system.
!<
subroutine mmc_destroy(this)
class(MpiMessageCacheType) :: this !< the message cache
! local
integer(I4B) :: i
class(*), pointer :: obj_ptr

! clear caches
do i = 1, this%cached_ids%size
obj_ptr => this%unit_caches%GetItem(i)
select type (obj_ptr)
class is (MpiUnitCacheType)
call obj_ptr%destroy()
end select
end do
call this%unit_caches%Clear(destroy=.true.)

call this%cached_ids%destroy()

end subroutine mmc_destroy

end module
Loading

0 comments on commit 967f168

Please sign in to comment.