diff --git a/doc/.gitignore b/doc/.gitignore new file mode 100644 index 0000000..e35d885 --- /dev/null +++ b/doc/.gitignore @@ -0,0 +1 @@ +_build diff --git a/doc/api.rst b/doc/api.rst index 4b99718..22be503 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -49,13 +49,19 @@ Three types of boundary condition are currently supported: Name Description =============== ========================================= GO_BC_NONE No boundary conditions are applied. -GO_BC_EXTERNAL Some external forcing is applied. This must be implemented by a kernel. The domain must be defined with a T-point mask (see :ref:`gocean1.0-grid-init`). +GO_BC_EXTERNAL Some external forcing is applied. This must be implemented by a kernel. + The domain can be specified using a ``tmask``, but if no ``tmask`` is + specified, a dummy ``tmask`` is created that will define an all ocean + domain. GO_BC_PERIODIC Periodic boundary conditions are applied. =============== ========================================= The infrastructure requires this information in order to determine the extent of the model grid. +Note that at this stage ``GO_BC_PERIODIC`` is not supported when +using distributed memory. This is tracked in issue #54. + The index offset is required because a model (kernel) developer has choice in how they actually implement the staggering of variables on a grid. This comes down to a choice of which grid points in the vicinity @@ -98,11 +104,12 @@ object. This is done via a call to the ``grid_init`` subroutine:: !! wet (1), dry (0) or external (-1). integer, dimension(m,n), intent(in), optional :: tmask + If no T-mask is supplied then this routine configures the grid -appropriately for an all-wet domain with periodic boundary conditions -in both the *x*- and *y*-dimensions. It should also be noted that -currently only grids with constant resolution in *x* and *y* are -supported by this routine. +appropriately for an all-wet domain by allocating a default +T-mask. It should also be noted that currently only grids with +constant resolution in *x* and *y* are supported by this routine. + .. _gocean1.0-fields: @@ -128,11 +135,21 @@ constructor:: sshn_v = r2d_field(model_grid, GO_V_POINTS) sshn_t = r2d_field(model_grid, GO_T_POINTS) -The constructor takes two arguments: +The constructor takes two mandatory and two optional arguments: 1. The grid on which the field exists 2. The type of grid point at which the field is defined (``GO_U_POINTS``, ``GO_V_POINTS``, ``GO_T_POINTS`` or ``GO_F_POINTS``) + 3. ``do_tile``: If the field should be tiled among all threads, or if only + a single field should be allocated (which is not currently + supported by PSyclone). + 4. ``init_global_data``: an optional global 2D Fortran array, which must be + provided on each rank. On each rank the field will be initialised + with the data from the corresponding subdomain. This is just a convenience + for users with a small problem size, since typically for large data sets + using a global array will create scalability problems. In general, it is + the responsibility of the user to initialise an array with the required + local data. Note that the grid object must have been fully configured (by a call to ``grid_init`` for instance) before it is passed into this diff --git a/doc/conf.py b/doc/conf.py index 5d3c782..2624c93 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -62,7 +62,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +# language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -89,7 +89,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +# html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. @@ -182,7 +182,7 @@ # -- Options for intersphinx extension --------------------------------------- # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'https://docs.python.org/': None} +intersphinx_mapping = {'python': ('https://docs.python.org/', None)} # -- Options for todo extension ---------------------------------------------- diff --git a/finite_difference/src/Makefile b/finite_difference/src/Makefile index d9e3707..6baabb7 100644 --- a/finite_difference/src/Makefile +++ b/finite_difference/src/Makefile @@ -89,7 +89,7 @@ all: ${API_LIB} # Create the archive. ${API_LIB}: ${MODULES} - ${AR} ${ARFLAGS} ${API_LIB} *.o + ${AR} ${ARFLAGS} ${API_LIB} $(MODULES) install: ${MAKE} -C .. install diff --git a/finite_difference/src/field_mod.f90 b/finite_difference/src/field_mod.f90 index f6fc26e..59e577d 100644 --- a/finite_difference/src/field_mod.f90 +++ b/finite_difference/src/field_mod.f90 @@ -26,6 +26,7 @@ ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. !------------------------------------------------------------------------------ ! Author: A. R. Porter, STFC Daresbury Laboratory +! Modified: J. Henrichs, Australian Bureau of Meteorology !> Module for describing all aspects of a field (which exists on some !! grid). @@ -161,6 +162,7 @@ end subroutine write_to_device_f_interface procedure, pass :: read_from_device procedure, pass :: write_to_device procedure, public :: halo_exchange + procedure, public :: gather_inner_data end type r2d_field !> Interface for the copy_field operation. Overloaded to take @@ -237,9 +239,10 @@ end subroutine write_to_device_f_interface !=================================================== - function r2d_field_constructor(grid, & + function r2d_field_constructor(grid, & grid_points, & - do_tile) result(self) + do_tile, & + init_global_data) result(self) use parallel_mod, only: go_decompose, on_master !$ use omp_lib, only : omp_get_max_threads implicit none @@ -252,6 +255,9 @@ function r2d_field_constructor(grid, & !> a single field should be allocated (which is not currently !> supported by PSyclone) logical, intent(in), optional :: do_tile + !> An optional global array with which the field in each domain + !> will be initialsed + real(go_wp), dimension(:,:), intent(in), optional :: init_global_data ! Local declarations type(r2d_field), target :: self logical :: local_do_tiling = .false. @@ -261,7 +267,7 @@ function r2d_field_constructor(grid, & !> The upper bounds actually used to allocate arrays (as opposed !! to the limits carried around with the field) integer :: upper_x_bound, upper_y_bound - integer :: itile, nthreads, ntilex, ntiley + integer :: itile, nthreads, ntilex, ntiley, dx, dy if (present(do_tile)) then local_do_tiling = do_tile @@ -349,7 +355,7 @@ function r2d_field_constructor(grid, & end if ! Since we're allocating the arrays to be larger than strictly - ! required we explicitly set all elements to -999 in case the code + ! required we explicitly set all elements to 0 in case the code ! does access 'out-of-bounds' elements during speculative ! execution. If we're running with OpenMP this also gives ! us the opportunity to do a 'first touch' policy to aid with @@ -368,6 +374,19 @@ function r2d_field_constructor(grid, & else self%data(:,:) = 0.0_go_wp end if + + if (present(init_global_data)) then + dx = grid%subdomain%global%xstart - self%internal%xstart + dy = grid%subdomain%global%ystart - self%internal%ystart + +!$OMP PARALLEL DO schedule(runtime), default(none), & +!$OMP private(ji,jj), shared(self, grid, init_global_data, dx, dy) + do jj = grid%subdomain%internal%ystart, grid%subdomain%internal%ystop + do ji = grid%subdomain%internal%xstart, grid%subdomain%internal%xstop + self%data(ji, jj) = init_global_data(ji+dx, jj+dy) + end do + end do + end if end function r2d_field_constructor !=================================================== @@ -506,6 +525,8 @@ subroutine write_to_device(self, startx, starty, nx, ny, blocking) end subroutine write_to_device + !=================================================== + function get_data(self) result(dptr) !> Getter for the data associated with a field. If the data is on a ! device it ensures that the host copy is up-to-date with that on @@ -1287,6 +1308,89 @@ end function array_checksum !=================================================== + !> Collect a (distributed) field into a global array + !! on the master node. + subroutine gather_inner_data(self, global_data) + use parallel_utils_mod, only: get_num_ranks, gather + use parallel_mod, only: on_master + + class(r2d_field), intent(in) :: self + real(go_wp), dimension(:,:), & + allocatable, intent(out) :: global_data + + real(go_wp), dimension(:), allocatable :: send_buffer, recv_buffer + + integer :: dx, dy, ji, jj, i, n, rank, halo_x, halo_y + integer :: x_start, x_stop, y_start, y_stop, ierr + + allocate(global_data(self%grid%global_nx, self%grid%global_ny), & + stat=ierr) + if(ierr /= 0)then + call gocean_stop('gather_inner_data failed to allocate global result array') + end if + + ! No MPI (or single process), just copy the data out + if (get_num_ranks() == 1) then + ! Compute size of inner area only + dx = self%internal%xstart - 1 + dy = self%internal%ystart - 1 + do jj= self%internal%ystart, self%internal%ystop + do ji = self%internal%xstart, self%internal%xstop + global_data(ji-dx,jj-dy) = self%data(ji,jj) + end do + end do + return + endif + + ! Determine maximum size of data to be sent. We don't need + ! to sent the halo, so reduce max_width and max_height by + ! 2*halo size. + halo_x = self%internal%xstart-1 + halo_y = self%internal%ystart-1 + n = (self%grid%decomp%max_width - 2*halo_x) * & + (self%grid%decomp%max_height - 2*halo_y) + allocate(send_buffer(n), stat=ierr) + if(ierr /= 0)then + call gocean_stop('gather_inner_data failed to allocate send buffer') + end if + allocate(recv_buffer(n*get_num_ranks()), stat=ierr) + if(ierr /= 0)then + call gocean_stop('gather_inner_data failed to allocate receive buffer') + end if + + ! Copy data into 1D send buffer. + i = 0 + do jj= self%internal%ystart, self%internal%ystop + do ji = self%internal%xstart, self%internal%xstop + i = i + 1 + send_buffer(i) = self%data(ji,jj) + end do + end do + + ! Collect all send_buffers on the master: + call gather(send_buffer, recv_buffer) + + if (on_master()) then + ! Copy the data from each process into the global array + do rank=1, get_num_ranks() + x_start = self%grid%decomp%subdomains(rank)%global%xstart + x_stop = self%grid%decomp%subdomains(rank)%global%xstop + y_start = self%grid%decomp%subdomains(rank)%global%ystart + y_stop = self%grid%decomp%subdomains(rank)%global%ystop + i = (rank-1) * n + do jj= y_start, y_stop + do ji = x_start, x_stop + i = i + 1 + global_data(ji, jj) = recv_buffer(i) + end do + end do + enddo + endif + + end subroutine gather_inner_data + + !=================================================== + subroutine init_periodic_bc_halos(fld) implicit none class(field_type), intent(inout) :: fld diff --git a/finite_difference/src/grid_mod.f90 b/finite_difference/src/grid_mod.f90 index 272f037..5cbdcc2 100644 --- a/finite_difference/src/grid_mod.f90 +++ b/finite_difference/src/grid_mod.f90 @@ -1,3 +1,33 @@ +!------------------------------------------------------------------------------ +! BSD 2-Clause License +! +! Copyright (c) 2017-2024, Science and Technology Facilities Council +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions are met: +! +! * Redistributions of source code must retain the above copyright notice, this +! list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above copyright notice, +! this list of conditions and the following disclaimer in the documentation +! and/or other materials provided with the distribution. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +! DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +! DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +! SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +! OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +!------------------------------------------------------------------------------ +! Author: A. R. Porter, STFC Daresbury Laboratory +! Modified: J. Henrichs, Australian Bureau of Meteorology + !> Module for describing and storing all information related to !! a finite-difference grid. module grid_mod @@ -66,10 +96,9 @@ module grid_mod !! -1 == wet outside simulated region !! This is the key quantity that determines the region that !! is actually simulated. However, we also support the - !! specification of a model consisting entirely of wet points - !! with periodic boundary conditions. Since this does not - !! require a T-mask, we do not allocate this array for that - !! case. + !! specification of a model consisting entirely of wet points. + !! In this case a dummy tmask will be allocated, set to indicate + !! an all wet domain. integer, allocatable :: tmask(:,:) !> Pointer to tmask on remote device (if any) type(c_ptr) :: tmask_device @@ -154,7 +183,8 @@ end function get_tmask subroutine decompose(self, domainx, domainy, & ndomains, ndomainx, ndomainy, & halo_width) - use parallel_mod, only: get_num_ranks, get_rank, go_decompose + use parallel_mod, only: get_num_ranks, get_num_ranks, get_rank, & + go_decompose implicit none class (grid_type), target, intent(inout) :: self !> Dimensions of the domain to decompose @@ -295,7 +325,8 @@ end function grid_constructor !! @param[in] dyarg Grid spacing in y dimension !! @param[in] tmask Array holding the T-point mask which defines !! the contents of the local domain. Need not be - !! supplied if domain is all wet and has PBCs. + !! supplied if domain is all wet, in which case a + !! dummy all-wet tmask will be created internally. subroutine grid_init(grid, dxarg, dyarg, tmask) use decomposition_mod, only: subdomain_type, decomposition_type use parallel_mod, only: map_comms, get_rank, get_num_ranks, on_master @@ -359,15 +390,18 @@ subroutine grid_init(grid, dxarg, dyarg, tmask) ystart = grid%subdomain%internal%ystart ystop = grid%subdomain%internal%ystop - ! Copy-in the externally-supplied T-mask, if any. If using OpenMP - ! then apply first-touch policy for data locality. + ! Even if no tmask is supplied, we will create one (with all wet points) + ! so that kernels that query the tmask will get useful values + allocate(grid%tmask(grid%nx, grid%ny), stat=ierr(1)) + if( ierr(1) /= 0 )then + call gocean_stop('grid_init: failed to allocate array for T mask') + end if + if( present(tmask) )then - allocate(grid%tmask(grid%nx, grid%ny), stat=ierr(1)) - if( ierr(1) /= 0 )then - call gocean_stop('grid_init: failed to allocate array for T mask') - end if -!> TODO should use thread tiling here but that is currently only set-up -!! inside a field object. + ! Copy-in the externally-supplied T-mask, if any. If using OpenMP + ! then apply first-touch policy for data locality. + !> TODO should use thread tiling here but that is currently only set-up + !! inside a field object. !$OMP PARALLEL DO schedule(runtime), default(none), private(ji,jj), & !$OMP shared(grid, tmask, ystart, ystop, xstart, xstop) do jj = ystart-1, ystop+1 @@ -395,18 +429,29 @@ subroutine grid_init(grid, dxarg, dyarg, tmask) do ji = xstop+2, grid%nx grid%tmask(ji, :) = grid%tmask(xstop+1, :) end do - else - ! No T-mask supplied. Check that grid has PBCs in both - ! x and y dimensions otherwise we won't know what to do. - if( .not. ( (grid%boundary_conditions(1) == GO_BC_PERIODIC) .and. & - (grid%boundary_conditions(2) == GO_BC_PERIODIC) ) )then - call gocean_stop('grid_init: ERROR: No T-mask supplied and '// & - 'grid does not have periodic boundary conditions!') - end if !> TODO add support for PBCs in parallel - if(get_num_ranks() > 1)then - call gocean_stop('grid_init: PBCs not yet implemented with MPI') + else + ! No T-mask supplied. Check if grid has PBCs, which isn't + ! supported yet in case of distributed memory usage (i.e. + ! if there is more than one process) + if ( (get_num_ranks() > 1) .and. & + ( (grid%boundary_conditions(1) == GO_BC_PERIODIC) .or. & + (grid%boundary_conditions(2) == GO_BC_PERIODIC) ) ) then + call gocean_stop('grid_init: ERROR: Periodic boundary conditions ' & + & // 'are not yet supported.') end if + + ! Initialise an artificial all-wet tmask. If using OpenMP then apply + ! first-touch policy for data locality. + !$OMP PARALLEL DO schedule(runtime), default(none), private(ji,jj), & + !$OMP shared(grid, ystart, ystop, xstart, xstop) + do jj = ystart-1, ystop+1 + do ji = xstart-1, xstop+1 + grid%tmask(ji,jj) = 1 + end do + end do + !$OMP END PARALLEL DO + end if ! T-mask supplied ! For a regular, orthogonal mesh the spatial resolution is constant diff --git a/finite_difference/src/parallel/parallel_utils_mod.f90 b/finite_difference/src/parallel/parallel_utils_mod.f90 index 4efa9bc..333853e 100644 --- a/finite_difference/src/parallel/parallel_utils_mod.f90 +++ b/finite_difference/src/parallel/parallel_utils_mod.f90 @@ -64,7 +64,7 @@ module parallel_utils_mod public parallel_init, parallel_finalise, parallel_abort, get_max_tag public get_rank, get_num_ranks, post_receive, post_send, global_sum - public msg_wait, msg_wait_all + public msg_wait, msg_wait_all, gather public MSG_UNDEFINED, MSG_REQUEST_NULL, DIST_MEM_ENABLED contains @@ -103,7 +103,6 @@ end subroutine parallel_finalise !! @param[in] msg Message to print - reason we're stopping subroutine parallel_abort(msg) use iso_fortran_env, only : error_unit ! access computing environment - implicit none character(len=*), intent(in) :: msg write(error_unit, *) msg @@ -238,4 +237,21 @@ subroutine global_sum(var) end subroutine global_sum + !================================================ + + subroutine gather(send_buffer, recv_buffer) + !> Gathers the data in the send buffer from all + !> processes into the receive buffer. + real(go_wp), dimension(:) :: send_buffer, recv_buffer + + integer :: ierr + integer :: n + + n = size(send_buffer) + call MPI_Gather(send_buffer, n, MPI_DOUBLE_PRECISION, & + recv_buffer, n, MPI_DOUBLE_PRECISION, & + 0, MPI_COMM_WORLD, ierr) + + end subroutine gather + end module parallel_utils_mod diff --git a/finite_difference/src/parallel/parallel_utils_stub_mod.f90 b/finite_difference/src/parallel/parallel_utils_stub_mod.f90 index cc3fc82..ad04d47 100644 --- a/finite_difference/src/parallel/parallel_utils_stub_mod.f90 +++ b/finite_difference/src/parallel/parallel_utils_stub_mod.f90 @@ -45,7 +45,7 @@ module parallel_utils_mod logical, parameter :: DIST_MEM_ENABLED = .False. public parallel_init, parallel_finalise, parallel_abort - public get_rank, get_num_ranks, get_max_tag + public get_rank, get_num_ranks, get_max_tag, gather public msg_wait, msg_wait_all, post_receive, post_send, global_sum public MSG_UNDEFINED, MSG_REQUEST_NULL, DIST_MEM_ENABLED @@ -149,4 +149,16 @@ subroutine global_sum(var) real(go_wp), intent(inout) :: var end subroutine global_sum + !================================================ + + subroutine gather(send_buffer, recv_buffer) + !> Gathers the data in the send buffer from all + !> processes into the receive buffer. + real(go_wp), dimension(:) :: send_buffer, recv_buffer + + recv_buffer = send_buffer + + end subroutine gather + + end module parallel_utils_mod diff --git a/finite_difference/tests/dist_mem/Makefile b/finite_difference/tests/dist_mem/Makefile index a4e5a6c..b6b38d7 100644 --- a/finite_difference/tests/dist_mem/Makefile +++ b/finite_difference/tests/dist_mem/Makefile @@ -1,7 +1,7 @@ #------------------------------------------------------------------------------ # BSD 2-Clause License # -# Copyright (c) 2019-2020, Science and Technology Facilities Council. +# Copyright (c) 2019-2024, Science and Technology Facilities Council. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -27,6 +27,7 @@ #------------------------------------------------------------------------------ # Author: A. R. Porter, STFC Daresbury Laboratory # Modified: S. Siso, STFC Daresbury Laboratory +# Modified: J. Henrichs, Bureau of Meteorology # Makefile for dist_mem tests for the dl_esm_inf finite difference library. # @@ -39,6 +40,12 @@ # export AR=ar # make # + +# Set some useful default values +F90 ?= mpif90 +MPIRUN ?= mpirun +F90FLAGS ?= -g + all: tests # How we launch MPI jobs @@ -49,10 +56,10 @@ INF_DIR=../.. INF_INC = ${INF_DIR}/src INF_LIB = ${INF_DIR}/src/lib_fd.a -.PHONY: test_xsplit test_ysplit test_xysplit inf_lib +.PHONY: test_xsplit test_ysplit test_xysplit test_reduction inf_lib # Compile and run tests. This is a very basic target. -tests: test_xsplit test_ysplit test_xysplit test_gsum +tests: test_xsplit test_ysplit test_xysplit test_gsum test_reduction test_xsplit: inf_lib test_halos.exe JPIGLO=10 JPJGLO=4 ${MPIRUN} -np 2 ./test_halos.exe @@ -68,6 +75,10 @@ test_gsum: inf_lib test_gsum.exe JPIGLO=4 JPJGLO=10 ${MPIRUN} -np 4 ./test_gsum.exe JPIGLO=4 JPJGLO=10 ${MPIRUN} -np 6 ./test_gsum.exe +test_reduction: inf_lib test_reduction.exe + JPIGLO=10 JPJGLO=10 ${MPIRUN} -np 4 ./test_reduction.exe + JPIGLO=10 JPJGLO=10 ${MPIRUN} -np 6 ./test_reduction.exe + inf_lib: ${MAKE} -C ${INF_DIR} F90="${F90}" dm_fd_lib @@ -75,6 +86,9 @@ inf_lib: test_halos.exe: inf_lib test_halos.o ${F90} -o $@ ${LDFLAGS} test_halos.o ${INF_LIB} +test_reduction.exe: inf_lib test_reduction.o + ${F90} -o $@ ${LDFLAGS} test_reduction.o ${INF_LIB} + test_gsum.exe: inf_lib test_gsum.o ${F90} -o $@ ${LDFLAGS} test_gsum.o ${INF_LIB} diff --git a/finite_difference/tests/dist_mem/test_reduction.f90 b/finite_difference/tests/dist_mem/test_reduction.f90 new file mode 100644 index 0000000..f85a4c6 --- /dev/null +++ b/finite_difference/tests/dist_mem/test_reduction.f90 @@ -0,0 +1,194 @@ +!------------------------------------------------------------------------------ +! BSD 2-Clause License +! +! Copyright (c) 2024, Science and Technology Facilities Council +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions are met: +! +! * Redistributions of source code must retain the above copyright notice, this +! list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above copyright notice, +! this list of conditions and the following disclaimer in the documentation +! and/or other materials provided with the distribution. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +! DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +! DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +! SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +! OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +!------------------------------------------------------------------------------ +! Author: J. Henrichs, Bureau of Meteorology +! Author: A. R. Porter, STFC Daresbury Laboratory + +!> Tests for the reduction functionality of the dl_esm_inf library. +!! Domain size must be supplied via the JPIGLO and JPJGLO environment +!! variables. +program test_reduction + use kind_params_mod + use parallel_mod + use grid_mod + use field_mod + use gocean_mod + implicit none + ! Total size of the model domain + integer :: jpiglo = 0, jpjglo = 0 + ! (Uniform) grid resolution + real(go_wp) :: dx = 1.0 + real(go_wp) :: dy = 1.0 + !> The grid on which our fields are defined + type(grid_type), target :: model_grid + !> An example field + type(r2d_field) :: field + + real(go_wp), allocatable :: global_init_data(:,:) + real(go_wp), allocatable :: global_updated_data(:,:) + integer :: my_rank + integer :: ierr, i, j + character(len=10) :: env + + call get_environment_variable("JPIGLO", env) + read(env, '(i10)') jpiglo + call get_environment_variable("JPJGLO", env) + read(env, '(i10)') jpjglo + if(jpiglo < 1 .or. jpjglo < 1)then + stop 'Domain size must be set via $JPIGLO and $JPJGLO' + end if + + call gocean_initialise() + + !> Create our grid object + model_grid = grid_type(GO_ARAKAWA_C, & + (/GO_BC_EXTERNAL,GO_BC_EXTERNAL,GO_BC_NONE/), & + GO_OFFSET_NE) + + !> Generate a domain decomposition. This automatically uses the number + !! of MPI ranks available. + call model_grid%decompose(jpiglo, jpjglo) + my_rank = get_rank() + + if(my_rank == 1) then + write(*,"('Using global domain size of ',I4,'x',I4)") jpiglo, jpjglo + end if + + !> Create global array to hold the global initialisation data + allocate(global_init_data(jpiglo, jpjglo), stat=ierr) + if(ierr /= 0)then + call gocean_stop('Failed to allocate global init data') + end if + + do j=1, jpjglo + global_init_data(:,j) = (/ (unique_global_value(i, j, jpiglo), i=1, jpiglo) /) + enddo + + !> Complete the initialisation of the grid + call grid_init(model_grid, dx, dy) + + !> Create T point field + field = r2d_field(model_grid, GO_T_POINTS, & + init_global_data=global_init_data) + !> Check that the global data is set correctly in the local fields + call check_field_distribution (field) + + !> Update the local data + call update_field(field) + + !> This will allocate global_updated_data: + call field%gather_inner_data(global_updated_data) + + !> The result is only on rank 1: + if (my_rank == 1) then + call check_gathered_field(global_updated_data) + endif + + call gocean_finalise() + +contains + + !> Computes a unique value for each index pair (i,j) (1<=i,j) + !! given that there are n values in a row. It assigns the + !! numbers 0, 1, ..., n*(#rows)-1 to each cell: + function unique_global_value(i, j, n) + implicit none + integer, intent(in) :: i, j, n + integer :: unique_global_value + unique_global_value = (i-1) + (j-1)*n + end function unique_global_value + + ! ----------------------------------------------------------------------------------- + !> Checks that the local data in field corresponds to the original + !! global data: + subroutine check_field_distribution(field) + type(r2d_field), intent(inout) :: field + integer :: ji, jj + integer :: istart, istop, jstart, jstop, xmin, ymin + real(go_wp) :: global_value + istart = field%internal%xstart + istop = field%internal%xstop + jstart = field%internal%ystart + jstop = field%internal%ystop + xmin = field%grid%subdomain%global%xstart + ymin = field%grid%subdomain%global%ystart + + do jj = jstart, jstop + do ji = istart, istop + global_value = unique_global_value(xmin + ji-istart, & + ymin + jj-jstart, & + field%grid%decomp%global_nx) + if (field%data(ji, jj) /= global_value) then + print *,"Incorrect field data for ", ji, jj, field%data(ji, jj), global_value + call gocean_stop('Failed to distribute init data') + endif + + end do + end do + print *,"Field distributed correctly." + end subroutine check_field_distribution + + ! ----------------------------------------------------------------------------------- + !> Adds one to each entry in the local fiels + subroutine update_field(field) + type(r2d_field), intent(inout) :: field + integer :: ji, jj + integer :: istart, istop, jstart, jstop + istart = field%internal%xstart + istop = field%internal%xstop + jstart = field%internal%ystart + jstop = field%internal%ystop + + do jj = jstart, jstop + do ji = istart, istop + field%data(ji,jj) = field%data(ji,jj) + 1 + end do + end do + end subroutine update_field + + !! ----------------------------------------------------------------------------------- + !> Checks if the global data gathered after updating the local fields is correct: + subroutine check_gathered_field(global_field) + real(go_wp), allocatable, dimension(:,:) :: global_field + integer :: ji, jj + real(go_wp) :: global_value + + do jj = 1, size(global_field, 2) + do ji = 1, size(global_field, 1) + global_value = unique_global_value(ji, jj, size(global_field, 1)) + 1 + if (global_field(ji, jj) /= global_value) then + print *, "Incorrect global field data gathered for ", ji, jj, & + global_field(ji, jj), global_value + endif + + end do + end do + + print *,"Field gathered correctly." + end subroutine check_gathered_field + +end program test_reduction