diff --git a/src/fmpi_task.c b/src/fmpi_task.c index 59c29c72f16d74da61fe2b51079ea983a884a29e..6022fb12712e26d2ecd5b0570d8975b4327a62e0 100644 --- a/src/fmpi_task.c +++ b/src/fmpi_task.c @@ -138,34 +138,34 @@ int fmpi_task_run_sync( memcpy(dest, src, size); const size_t idx_1 = type_size; - const size_t idx_n = task->domains[0].halo.cnt * type_size; - const size_t idx_np1 = idx_n + idx_1; + const size_t idx_nm2 = (task->domains[0].halo.cnt-2) * type_size; + const size_t idx_nm1 = idx_nm2 + idx_1; void * const buf_0 = task->domains[0].halo.raw; const void * const buf_1 = (char *)buf_0 + idx_1; - const void * const buf_n = (char *)buf_0 + idx_n; - void * const buf_np1 = (char *)buf_0 + idx_np1; + const void * const buf_nm2 = (char *)buf_0 + idx_nm2; + void * const buf_nm1 = (char *)buf_0 + idx_nm1; MPI_Datatype type = fmpi_mpi_type(task->domains[0].halo.type.base); if((rank % 2) == 0) { int left = (rank != 0) ? (rank - 1) : MPI_PROC_NULL; int right = rank + 1; - MPI_Send(buf_n , 1, type, right, 0, ctx->mpi->world); + MPI_Send(buf_nm2, 1, type, right, 0, ctx->mpi->world); MPI_Recv(buf_0 , 1, type, left, 1, ctx->mpi->world, MPI_STATUS_IGNORE); - MPI_Send(buf_1 , 1, type, left, 0, ctx->mpi->world); - MPI_Recv(buf_np1, 1, type, right, 1, ctx->mpi->world, MPI_STATUS_IGNORE); + MPI_Send(buf_1 , 1, type, left, 2, ctx->mpi->world); + MPI_Recv(buf_nm1, 1, type, right, 3, ctx->mpi->world, MPI_STATUS_IGNORE); } else { int left = rank - 1; int right = (rank != (ctx->mpi->size-1)) ? (rank + 1) : MPI_PROC_NULL; MPI_Recv(buf_0 , 1, type, left, 0, ctx->mpi->world, MPI_STATUS_IGNORE); - MPI_Send(buf_n , 1, type, right, 1, ctx->mpi->world); - MPI_Recv(buf_np1, 1, type, right, 0, ctx->mpi->world, MPI_STATUS_IGNORE); - MPI_Send(buf_1 , 1, type, left, 1, ctx->mpi->world); + MPI_Send(buf_nm2, 1, type, right, 1, ctx->mpi->world); + MPI_Recv(buf_nm1, 1, type, right, 2, ctx->mpi->world, MPI_STATUS_IGNORE); + MPI_Send(buf_1 , 1, type, left, 3, ctx->mpi->world); } const int err = fmpi_futhark_free_data_sync( ctx->fut, task->args.in[0].gpu, task->domains[0].halo.type.base, - task->domains[0].halo.cnt + task->domains[0].halo.dim_cnt ); if(err != FMPI_SUCCESS) { FMPI_RAISE_ERROR(ctx->err_handler, "FMPI",