Newer
Older
/**
* Author: Baptiste Coudray
* School: HEPIA
* Class: ITI-3
* Year: 2020-2021
*/
#include <stdio.h>
#include <stdint.h>
#include <mpi.h>
#include <math.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include "dispatch.h"
#define min(a, b) (((a) <= (b)) ? (a) : (b))
#define max(a, b) (((a) >= (b)) ? (a) : (b))
#define INDEX_2D_TO_1D(y, x, nb_columns) ((y) * (nb_columns) + (x))
#define NORTH_ROW_TAG 0
#define EAST_COLUMN_TAG 1
#define SOUTH_ROW_TAG 2
#define WEST_COLUMN_TAG 3
#define NORTH_EAST_CELLS_TAG 4
#define SOUTH_EAST_CELLS_TAG 5
#define SOUTH_WEST_CELLS_TAG 6
#define NORTH_WEST_CELLS_TAG 7
#define CHUNK_DATA_TAG 8
#define ROOT_RANK 0
struct dispatch_context {
int my_rank;
int world_size;
int my_cart_rank;
int coordinates[2];
MPI_Comm communicators[4]; /* cart_comm, row_comm, column_comm, depth_comm */
int network_dimensions[3];
int data_dimensions[3];
int n_dimensions;
chunk_info_t *chunk_info;
};
static void get_world_size(struct dispatch_context *dc) {
MPI_Comm_size(MPI_COMM_WORLD, &dc->world_size);
}
static void get_my_rank(struct dispatch_context *dc) {
MPI_Comm_rank(MPI_COMM_WORLD, &dc->my_rank);
}
static void find_best_factors(int n, int factors[2]) {
int result[2] = {0};
int limit = (int) sqrt(n);
bool first_pass = true;
for (int i = 1; i <= limit; ++i) {
if (n % i == 0) {
int factor1 = i;
int factor2 = n / i;
int current_difference = abs(result[0] - result[1]);
int new_difference = abs(factor1 - factor2);
if (first_pass || current_difference > new_difference) {
result[0] = factor1;
result[1] = factor2;
}
first_pass = false;
}
}
factors[0] = result[0];
factors[1] = result[1];
}
static void find_network_dimensions(struct dispatch_context *dc) {
/* 1D */
dc->network_dimensions[0] = 1;
dc->network_dimensions[1] = dc->world_size;
find_best_factors(dc->world_size, dc->network_dimensions);
dc->network_dimensions[2] = 1;
/* 3D */
int factors[2] = {0};
find_best_factors(dc->network_dimensions[1], factors);
if (factors[0] < factors[1]) {
dc->network_dimensions[1] = factors[1];
dc->network_dimensions[2] = factors[0];
} else {
dc->network_dimensions[1] = factors[0];
dc->network_dimensions[2] = factors[1];
}
}
}
static void create_network_communicators(struct dispatch_context *dc) {
int periods[3] = {true, true, true}; // Cyclic on row-column-depth
MPI_Cart_create(MPI_COMM_WORLD, 3, dc->network_dimensions, periods, 1, &dc->communicators[0]);
/* Create row communicator */
int remain_dims[3] = {false, true, false};
MPI_Cart_sub(dc->communicators[0], remain_dims, &dc->communicators[1]);
/* Create column communicator */
remain_dims[0] = true; // row
remain_dims[1] = false; // column
MPI_Cart_sub(dc->communicators[0], remain_dims, &dc->communicators[2]);
/* Create depth communicator */
remain_dims[0] = false; // row
remain_dims[1] = false; // column
remain_dims[2] = true; // depth
MPI_Cart_sub(dc->communicators[0], remain_dims, &dc->communicators[3]);
MPI_Comm_rank(dc->communicators[0], &dc->my_cart_rank);
MPI_Cart_coords(dc->communicators[0], dc->my_cart_rank, 3, dc->coordinates);
}
static void divide_data(struct dispatch_context *dc) {
dc->chunks_info = calloc((size_t) dc->world_size, sizeof(chunk_info_t));
int nb_rows_per_process = dc->data_dimensions[0] / dc->network_dimensions[0];
int remaining_rows = dc->data_dimensions[0] % dc->network_dimensions[0];
int nb_columns_per_process = dc->data_dimensions[1] / dc->network_dimensions[1];
int remaining_columns = dc->data_dimensions[1] % dc->network_dimensions[1];
int nb_depths_per_process = dc->data_dimensions[2] / dc->network_dimensions[2];
int remaining_depths = dc->data_dimensions[2] % dc->network_dimensions[2];
for (int i = 0, y = 0, x = 0, z = 0; i < dc->world_size; ++i) {
int nb_rows = nb_rows_per_process;
if (remaining_rows > 0) {
++nb_rows;
}
int nb_columns = nb_columns_per_process;
if (remaining_columns > 0) {
++nb_columns;
--remaining_columns;
}
int nb_depths = nb_depths_per_process;
if (remaining_depths > 0) {
++nb_depths;
}
int dimensions[3] = {nb_rows, nb_columns, nb_depths};
chunk_info_init(&dc->chunks_info[i], dc->type, dimensions, y, x, z, i == dc->my_rank);
x += nb_columns;
if (x >= dc->data_dimensions[1]) {
x = 0;
y += dc->chunks_info[max(i - 1, 0)].dimensions[0];
remaining_columns = dc->data_dimensions[1] % dc->network_dimensions[1];
--remaining_rows;
}
if ((i + 1) % (dc->network_dimensions[0] * dc->network_dimensions[1]) == 0) {
z += nb_depths;
y = 0;
remaining_columns = dc->data_dimensions[1] % dc->network_dimensions[1];
--remaining_depths;
}
}
dc->chunk_info = &dc->chunks_info[dc->my_rank];
}
extern struct dispatch_context *dispatch_context_new(const int *dimensions, MPI_Datatype datatype, int n_dimensions) {
struct dispatch_context *dc = calloc(1, sizeof(struct dispatch_context));
assert(dc != NULL);
get_world_size(dc);
get_my_rank(dc);
dc->n_dimensions = n_dimensions;
dc->datatype = datatype;
MPI_Type_size(dc->datatype, &dc->type);
switch (n_dimensions) {
case 1:
dc->data_dimensions[0] = 1;
dc->data_dimensions[1] = dimensions[0];
dc->data_dimensions[0] = dimensions[0];
dc->data_dimensions[1] = dimensions[1];
dc->data_dimensions[0] = dimensions[0];
dc->data_dimensions[1] = dimensions[1];
dc->data_dimensions[2] = dimensions[2];
break;
default:
fprintf(stderr, "Invalid dimensions size.");
MPI_Abort(MPI_COMM_WORLD, 1);
break;
}
dc->count = (size_t) dc->data_dimensions[0] * (size_t) dc->data_dimensions[1] * (size_t) dc->data_dimensions[2];
find_network_dimensions(dc);
create_network_communicators(dc);
divide_data(dc);
dc->active_domain = *dc->chunk_info;
}
extern void dispatch_context_print(struct dispatch_context *dc) {
printf("[dispatch_context] my_rank = %d, world_size = %d, network_dimensions = [%d][%d][%d], n_dimensions = %d, data_dimensions = [%d][%d][%d]\n",
dc->my_rank, dc->world_size, dc->network_dimensions[0], dc->network_dimensions[1], dc->network_dimensions[2],
dc->n_dimensions,
dc->data_dimensions[0], dc->data_dimensions[1], dc->data_dimensions[2]);
struct futhark_opaque_envelope_1d_t {
struct futhark_i8_1d *v0;
struct futhark_i8_1d *v1;
};
struct futhark_opaque_envelope_2d_t {
struct futhark_i8_2d *v0;
struct futhark_i8_2d *v1;
struct futhark_i8_2d *v2;
struct futhark_i8_2d *v3;
struct futhark_i8_2d *v4;
struct futhark_i8_2d *v5;
struct futhark_i8_2d *v6;
struct futhark_i8_2d *v7;
};
static envelope_t get_inner_envelope_1d(struct dispatch_context *dc, struct futhark_context *fc, int thickness) {
struct futhark_u8_1d *fut_chunk_data = futhark_new_u8_1d(fc, dc->chunk_info->data,
int thickness_x = min(thickness, dc->chunk_info->dimensions[1]);
int dimensions[3] = {1, thickness_x, 1};
struct futhark_opaque_envelope_1d_t *fut_inner_envelope;
futhark_entry_get_envelope_1d(fc, &fut_inner_envelope, fut_chunk_data, dimensions[1] * dc->type);
envelope_t inner_envelope = (envelope_t) {0};
chunk_info_init(&inner_envelope.west, dc->type, dimensions, 0, start_x, 0, true);
futhark_values_i8_1d(fc, fut_inner_envelope->v1, inner_envelope.west.data);
// East
{
int start_x = dc->chunk_info->x + dc->chunk_info->dimensions[1] - thickness_x;
chunk_info_init(&inner_envelope.east, dc->type, dimensions, 0, start_x, 0, true);
futhark_values_i8_1d(fc, fut_inner_envelope->v0, inner_envelope.east.data);
futhark_free_opaque_envelope_1d_t(fc, fut_inner_envelope);
futhark_free_u8_1d(fc, fut_chunk_data);
return inner_envelope;
}
static envelope_t get_inner_envelope_2d(struct dispatch_context *dc, struct futhark_context *fc, int thickness) {
struct futhark_u8_2d *fut_chunk_data = futhark_new_u8_2d(fc, dc->active_domain.data,
dc->active_domain.dimensions[0],
dc->active_domain.dimensions[1] * dc->type);
int thickness_y = min(thickness, dc->active_domain.dimensions[0]);
int thickness_x = min(thickness, dc->active_domain.dimensions[1]);
struct futhark_opaque_envelope_2d_t *fut_inner_envelope;
futhark_entry_get_envelope_2d(fc, &fut_inner_envelope, fut_chunk_data, thickness_y, thickness_x * dc->type);
envelope_t inner_envelope = (envelope_t) {0};
int dimensions[3] = {thickness_y, thickness_x, 1};
int start_y = dc->active_domain.y;
int start_x = dc->active_domain.x;
chunk_info_init(&inner_envelope.north_west, dc->type, dimensions, start_y, start_x, 0, true);
futhark_values_i8_2d(fc, fut_inner_envelope->v3, inner_envelope.north_west.data);
int dimensions[3] = {thickness_y, dc->active_domain.dimensions[1], 1};
int start_y = dc->active_domain.y;
int start_x = dc->active_domain.x;
chunk_info_init(&inner_envelope.north, dc->type, dimensions, start_y, start_x, 0, true);
futhark_values_i8_2d(fc, fut_inner_envelope->v1, inner_envelope.north.data);
int dimensions[3] = {thickness_y, thickness_x, 1};
int start_y = dc->active_domain.y;
int start_x = dc->active_domain.x + dc->active_domain.dimensions[1] - thickness_x;
chunk_info_init(&inner_envelope.north_east, dc->type, dimensions, start_y, start_x, 0, true);
futhark_values_i8_2d(fc, fut_inner_envelope->v2, inner_envelope.north_east.data);
int dimensions[3] = {dc->active_domain.dimensions[0], thickness_x, 1};
int start_y = dc->active_domain.y;
int start_x = dc->active_domain.x;
chunk_info_init(&inner_envelope.west, dc->type, dimensions, start_y, start_x, 0, true);
futhark_values_i8_2d(fc, fut_inner_envelope->v7, inner_envelope.west.data);
int dimensions[3] = {dc->chunk_info->dimensions[0], thickness_x, 1};
int start_y = dc->active_domain.y;
int start_x = dc->active_domain.x + dc->active_domain.dimensions[1] - thickness_x;
chunk_info_init(&inner_envelope.east, dc->type, dimensions, start_y, start_x, 0, true);
futhark_values_i8_2d(fc, fut_inner_envelope->v0, inner_envelope.east.data);
int dimensions[3] = {thickness_y, thickness_x, 1};
int start_y = dc->active_domain.y + dc->active_domain.dimensions[0] - thickness_y;
int start_x = dc->active_domain.x;
chunk_info_init(&inner_envelope.south_west, dc->type, dimensions, start_y, start_x, 0, true);
futhark_values_i8_2d(fc, fut_inner_envelope->v6, inner_envelope.south_west.data);
int dimensions[3] = {thickness_y, dc->active_domain.dimensions[1], 1};
int start_y = dc->active_domain.y + dc->active_domain.dimensions[0] - thickness_y;
int start_x = dc->active_domain.x;
chunk_info_init(&inner_envelope.south, dc->type, dimensions, start_y, start_x, 0, true);
futhark_values_i8_2d(fc, fut_inner_envelope->v4, inner_envelope.south.data);
int dimensions[3] = {thickness_y, thickness_x, 1};
int start_y = dc->active_domain.y + dc->active_domain.dimensions[0] - thickness_y;
int start_x = dc->active_domain.x + dc->active_domain.dimensions[1] - thickness_x;
chunk_info_init(&inner_envelope.south_east, dc->type, dimensions, start_y, start_x, 0, true);
futhark_values_i8_2d(fc, fut_inner_envelope->v5, inner_envelope.south_east.data);
futhark_free_u8_2d(fc, fut_chunk_data);
futhark_free_opaque_envelope_2d_t(fc, fut_inner_envelope);
return inner_envelope;
}
static uint8_t *chunk_info_to_futhark(struct dispatch_context *dc, uint8_t *out, chunk_info_t *ci, char *fut_type) {
*out++ = 'b';
*out++ = 2;
*out++ = dc->n_dimensions;
memcpy(out, fut_type, 4);
out += 4;
int64_t dimensions64[3];
dimensions64[0] = (int64_t) ci->dimensions[0];
dimensions64[1] = (int64_t) ci->dimensions[1];
dimensions64[2] = (int64_t) ci->dimensions[2];
if (dc->n_dimensions == 1) {
memcpy(out, &dimensions64[1], 1 * sizeof(int64_t));
} else if (dc->n_dimensions == 2) {
memcpy(out, dimensions64, 2 * sizeof(int64_t));
}
out += dc->n_dimensions * sizeof(int64_t);
memcpy(out, ci->data, ci->count * dc->type);
out += ci->count * dc->type;
return out;
static void *
futhark_outer_envelope_1d_new(struct dispatch_context *dc, struct futhark_context *fc, envelope_t *outer_envelope,
void *f(struct futhark_context *, const void *), char *fut_type) {
int64_t size_0 = 7 + 1 * sizeof(int64_t) + outer_envelope->east.count * dc->type;
int64_t size_1 = 7 + 1 * sizeof(int64_t) + outer_envelope->west.count * dc->type;
void *opaque_struct = calloc(size_0 * size_1, sizeof(uint8_t));
uint8_t *out = (uint8_t *) opaque_struct;
// East
out = chunk_info_to_futhark(dc, out, &outer_envelope->east, fut_type);
// West
chunk_info_to_futhark(dc, out, &outer_envelope->west, fut_type);
void *fut_opaque_struct = f(fc, opaque_struct);
futhark_context_sync(fc);
free(opaque_struct);
return fut_opaque_struct;
}
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
static void *
futhark_outer_envelope_2d_new(struct dispatch_context *dc, struct futhark_context *fc, envelope_t *outer_envelope,
void *f(struct futhark_context *, const void *),
char *fut_type) {
int64_t size_0 = 7 + 2 * sizeof(int64_t) + outer_envelope->east.count * dc->type;
int64_t size_1 = 7 + 2 * sizeof(int64_t) + outer_envelope->north.count * dc->type;
int64_t size_2 = 7 + 2 * sizeof(int64_t) + outer_envelope->north_east.count * dc->type;
int64_t size_3 = 7 + 2 * sizeof(int64_t) + outer_envelope->north_west.count * dc->type;
int64_t size_4 = 7 + 2 * sizeof(int64_t) + outer_envelope->south.count * dc->type;
int64_t size_5 = 7 + 2 * sizeof(int64_t) + outer_envelope->south_east.count * dc->type;
int64_t size_6 = 7 + 2 * sizeof(int64_t) + outer_envelope->south_west.count * dc->type;
int64_t size_7 = 7 + 2 * sizeof(int64_t) + outer_envelope->west.count * dc->type;
void *opaque_struct = calloc(size_0 + size_1 + size_2 + size_3 + size_4 + size_5 + size_6 + size_7,
sizeof(uint8_t));
uint8_t *out = (uint8_t *) opaque_struct;
out = chunk_info_to_futhark(dc, out, &outer_envelope->east, fut_type);
out = chunk_info_to_futhark(dc, out, &outer_envelope->north, fut_type);
out = chunk_info_to_futhark(dc, out, &outer_envelope->north_east, fut_type);
out = chunk_info_to_futhark(dc, out, &outer_envelope->north_west, fut_type);
out = chunk_info_to_futhark(dc, out, &outer_envelope->south, fut_type);
out = chunk_info_to_futhark(dc, out, &outer_envelope->south_east, fut_type);
out = chunk_info_to_futhark(dc, out, &outer_envelope->south_west, fut_type);
chunk_info_to_futhark(dc, out, &outer_envelope->west, fut_type);
void *fut_opaque_struct = f(fc, opaque_struct);
futhark_context_sync(fc);
free(opaque_struct);
return fut_opaque_struct;
extern void *futhark_outer_envelope_new(struct dispatch_context *dc, struct futhark_context *fc,
envelope_t *outer_envelope, void *f(struct futhark_context *, const void *),
char *fut_type) {
switch (dc->n_dimensions) {
case 1:
return futhark_outer_envelope_1d_new(dc, fc, outer_envelope, f, fut_type);
return futhark_outer_envelope_2d_new(dc, fc, outer_envelope, f, fut_type);
static envelope_t get_outer_envelope_1d(struct dispatch_context *dc, int thickness, envelope_t *inner_envelope) {
int coordinate_x = dc->coordinates[1];
MPI_Request requests[4];
int i_request = 0;
envelope_t outer_envelope = (envelope_t) {0};
// West-part
{
int dest_source_x = (coordinate_x - 1 >= 0) ? coordinate_x - 1 : dc->network_dimensions[1] - 1;
int dest_source = INDEX_2D_TO_1D(0, dest_source_x, dc->network_dimensions[1]);
int send_count = min(thickness, dc->chunk_info->dimensions[1]);
MPI_Isend(inner_envelope->west.data, send_count, dc->datatype, dest_source_x, WEST_COLUMN_TAG,
dc->communicators[1], &requests[i_request++]);
int dimensions[3] = {1, min(thickness, dc->chunks_info[dest_source].dimensions[1]), 1};
int start_x = dc->chunks_info[dest_source].x + dc->chunks_info[dest_source].dimensions[1] - dimensions[1];
chunk_info_init(&outer_envelope.west, dc->type, dimensions, 0, start_x, 0, true);
MPI_Irecv(outer_envelope.west.data, (int) outer_envelope.west.count, dc->datatype,
dest_source_x, EAST_COLUMN_TAG, dc->communicators[1], &requests[i_request++]);
}
// East-part
{
int dest_source_x = (coordinate_x + 1) % dc->network_dimensions[1];
int dest_source = INDEX_2D_TO_1D(0, dest_source_x, dc->network_dimensions[1]);
int send_count = min(thickness, dc->chunk_info->dimensions[1]);
void *inner_envelope_east = ((uint8_t *) inner_envelope->east.data)
+ ((inner_envelope->east.dimensions[1] - send_count) * dc->type);
MPI_Isend(inner_envelope_east, send_count, dc->datatype, dest_source_x, EAST_COLUMN_TAG,
dc->communicators[1], &requests[i_request++]);
int dimensions[3] = {1, min(thickness, dc->chunks_info[dest_source].dimensions[1]), 1};
int start_x = dc->chunks_info[dest_source].x + dimensions[1];
chunk_info_init(&outer_envelope.east, dc->type, dimensions, 0, start_x, 0, true);
MPI_Irecv(outer_envelope.east.data, (int) outer_envelope.east.count, dc->datatype,
dest_source_x, WEST_COLUMN_TAG, dc->communicators[1], &requests[i_request]);
}
MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
return outer_envelope;
}
static envelope_t get_outer_envelope_2d(struct dispatch_context *dc, int thickness, envelope_t *inner_envelope) {
int coordinate_y = dc->coordinates[0];
int coordinate_x = dc->coordinates[1];
MPI_Request requests[16] = {0};
int i_request = 0;
envelope_t outer_envelope = (envelope_t) {0};
// North
{
int dest_source_y = (coordinate_y - 1) >= 0 ? (coordinate_y - 1) : (dc->network_dimensions[0] - 1);
int dest_source = INDEX_2D_TO_1D(dest_source_y, coordinate_x, dc->network_dimensions[1]);
int send_count = min(thickness, dc->chunk_info->dimensions[0]) * dc->chunk_info->dimensions[1];
MPI_Isend(inner_envelope->north.data, send_count, dc->datatype, dest_source_y, NORTH_ROW_TAG,
dc->communicators[2], &requests[i_request++]);
/* Neighbour send south row, which correspond to north envelope */
min(thickness, dc->chunks_info[dest_source].dimensions[0]),
dc->chunks_info[dest_source].dimensions[1], 1
};
int start_y = dc->chunks_info[dest_source].y + dc->chunks_info[dest_source].dimensions[0] - dimensions[0];
int start_x = dc->chunks_info[dest_source].x;
chunk_info_init(&outer_envelope.north, dc->type, dimensions, start_y, start_x, 0, true);
MPI_Irecv(outer_envelope.north.data, (int) outer_envelope.north.count, dc->datatype,
dest_source_y, SOUTH_ROW_TAG, dc->communicators[2], &requests[i_request++]);
}
// East
{
int dest_source_x = (coordinate_x + 1) % dc->network_dimensions[1];
int dest_source = INDEX_2D_TO_1D(coordinate_y, dest_source_x, dc->network_dimensions[1]);
int send_count = min(thickness, dc->chunk_info->dimensions[1]) * dc->chunk_info->dimensions[0];
MPI_Isend(inner_envelope->east.data, (int) send_count, dc->datatype, dest_source_x, EAST_COLUMN_TAG,
dc->communicators[1], &requests[i_request++]);
/* Neighbour send west column, which correspond to east envelope */
min(thickness, dc->chunks_info[dest_source].dimensions[1]), 1
};
int start_y = dc->chunks_info[dest_source].y;
int start_x = dc->chunks_info[dest_source].x + dc->chunks_info[dest_source].dimensions[1] - dimensions[1];
chunk_info_init(&outer_envelope.east, dc->type, dimensions, start_y, start_x, 0, true);
MPI_Irecv(outer_envelope.east.data, (int) outer_envelope.east.count, dc->datatype, dest_source_x,
WEST_COLUMN_TAG, dc->communicators[1], &requests[i_request++]);
}
// South
{
int dest_source_y = (coordinate_y + 1) % dc->network_dimensions[0];
int dest_source = INDEX_2D_TO_1D(dest_source_y, coordinate_x, dc->network_dimensions[1]);
int send_count = min(thickness, dc->chunk_info->dimensions[0]) * dc->chunk_info->dimensions[1];
MPI_Isend(inner_envelope->south.data, send_count, dc->datatype, dest_source_y, SOUTH_ROW_TAG,
dc->communicators[2], &requests[i_request++]);
/* Neighbour send north row, which correspond to south envelope */
min(thickness, dc->chunks_info[dest_source].dimensions[0]),
dc->chunks_info[dest_source].dimensions[1], 1
};
int start_y = dc->chunks_info[dest_source].y;
int start_x = dc->chunks_info[dest_source].x;
chunk_info_init(&outer_envelope.south, dc->type, dimensions, start_y, start_x, 0, true);
MPI_Irecv(outer_envelope.south.data, (int) outer_envelope.south.count, dc->datatype, dest_source_y,
NORTH_ROW_TAG, dc->communicators[2], &requests[i_request++]);
}
// West
{
int dest_source_x = (coordinate_x - 1) >= 0 ? coordinate_x - 1 : dc->network_dimensions[1] - 1;
int dest_source = INDEX_2D_TO_1D(coordinate_y, dest_source_x, dc->network_dimensions[1]);
int send_count = min(thickness, dc->chunk_info->dimensions[1]) * dc->chunk_info->dimensions[0];
MPI_Isend(inner_envelope->west.data, send_count, dc->datatype, dest_source_x, WEST_COLUMN_TAG,
/* Neighbour send west column, which correspond to east envelope */
dc->chunks_info[dest_source].dimensions[0],
min(thickness, dc->chunks_info[dest_source].dimensions[1]), 1
int start_y = dc->chunks_info[dest_source].y;
int start_x = dc->chunks_info[dest_source].x;
chunk_info_init(&outer_envelope.west, dc->type, dimensions, start_y, start_x, 0, true);
MPI_Irecv(outer_envelope.west.data, (int) outer_envelope.west.count, dc->datatype, dest_source_x,
EAST_COLUMN_TAG, dc->communicators[1], &requests[i_request++]);
}
// North-East
{
int dest_source_y = (coordinate_y - 1) >= 0 ? coordinate_y - 1 : dc->network_dimensions[0] - 1;
int dest_source_x = (coordinate_x + 1) % dc->network_dimensions[1];
int dest_source = INDEX_2D_TO_1D(dest_source_y, dest_source_x, dc->network_dimensions[1]);
int send_count = thickness * thickness;
MPI_Isend(inner_envelope->north_east.data, send_count, dc->datatype, dest_source, NORTH_EAST_CELLS_TAG,
MPI_COMM_WORLD, &requests[i_request++]);
/* Neighbour send south-west cell, which correspond to north-east cell */
int dimensions[3] = {thickness, thickness, 1};
int start_y = dc->chunks_info[dest_source].y + dc->chunks_info[dest_source].dimensions[0] + dimensions[0];
int start_x = dc->chunks_info[dest_source].x;
chunk_info_init(&outer_envelope.north_east, dc->type, dimensions, start_y, start_x, 0, true);
MPI_Irecv(outer_envelope.north_east.data, (int) outer_envelope.north_east.count, dc->datatype, dest_source,
SOUTH_WEST_CELLS_TAG, MPI_COMM_WORLD, &requests[i_request++]);
}
// South-East
{
int dest_source_y = (coordinate_y + 1) % dc->network_dimensions[0];
int dest_source_x = (coordinate_x + 1) % dc->network_dimensions[1];
int dest_source = INDEX_2D_TO_1D(dest_source_y, dest_source_x, dc->network_dimensions[1]);
int send_count = thickness * thickness;
MPI_Isend(inner_envelope->south_east.data, send_count, dc->datatype, dest_source, SOUTH_EAST_CELLS_TAG,
MPI_COMM_WORLD, &requests[i_request++]);
/* Neighbour send north-west cell, which correspond to south-east cell */
int dimensions[3] = {thickness, thickness, 1};
int start_y = dc->chunks_info[dest_source].y;
int start_x = dc->chunks_info[dest_source].x;
chunk_info_init(&outer_envelope.south_east, dc->type, dimensions, start_y, start_x, 0, true);
MPI_Irecv(outer_envelope.south_east.data, (int) outer_envelope.south_east.count, dc->datatype, dest_source,
NORTH_WEST_CELLS_TAG, MPI_COMM_WORLD, &requests[i_request++]);
}
// South-West
{
int dest_source_y = (coordinate_y + 1) % dc->network_dimensions[0];
int dest_source_x = (coordinate_x - 1) >= 0 ? coordinate_x - 1 : dc->network_dimensions[1] - 1;
int dest_source = INDEX_2D_TO_1D(dest_source_y, dest_source_x, dc->network_dimensions[1]);
int send_count = thickness * thickness;
MPI_Isend(inner_envelope->south_west.data, send_count, dc->datatype, dest_source, SOUTH_WEST_CELLS_TAG,
MPI_COMM_WORLD, &requests[i_request++]);
/* Neighbour send north-east cell, which correspond to south-west cell */
int dimensions[3] = {thickness, thickness, 1};
int start_x = dc->chunks_info[dest_source].x + dc->chunks_info[dest_source].dimensions[1] - thickness;
chunk_info_init(&outer_envelope.south_west, dc->type, dimensions, start_y, start_x, 0, true);
MPI_Irecv(outer_envelope.south_west.data, (int) outer_envelope.south_west.count, dc->datatype, dest_source,
NORTH_EAST_CELLS_TAG, MPI_COMM_WORLD, &requests[i_request++]);
}
// North-West
{
int dest_source_y = (coordinate_y - 1) >= 0 ? coordinate_y - 1 : dc->network_dimensions[0] - 1;
int dest_source_x = (coordinate_x - 1) >= 0 ? coordinate_x - 1 : dc->network_dimensions[1] - 1;
int dest_source = INDEX_2D_TO_1D(dest_source_y, dest_source_x, dc->network_dimensions[1]);
int send_count = thickness * thickness;
MPI_Isend(inner_envelope->north_west.data, send_count, dc->datatype, dest_source, NORTH_WEST_CELLS_TAG,
MPI_COMM_WORLD, &requests[i_request++]);
/* Neighbour send south-east cell, which correspond to north-west cell */
int dimensions[3] = {thickness, thickness, 1};
int start_y = dc->chunks_info[dest_source].y + dc->chunks_info[dest_source].dimensions[0] - thickness;
int start_x = dc->chunks_info[dest_source].x + dc->chunks_info[dest_source].dimensions[1] - thickness;
chunk_info_init(&outer_envelope.north_west, dc->type, dimensions, start_y, start_x, 0, true);
MPI_Irecv(outer_envelope.north_west.data, (int) outer_envelope.north_west.count, dc->datatype, dest_source,
SOUTH_EAST_CELLS_TAG, MPI_COMM_WORLD, &requests[i_request++]);
}
MPI_Waitall(i_request, requests, MPI_STATUSES_IGNORE);
return outer_envelope;
}
extern envelope_t get_inner_envelope(struct dispatch_context *dc, struct futhark_context *fc, int thickness) {
envelope_t inner_envelope = {0};
switch (dc->n_dimensions) {
case 1:
inner_envelope = get_inner_envelope_1d(dc, fc, thickness);
break;
case 2:
inner_envelope = get_inner_envelope_2d(dc, fc, thickness);
break;
case 3:
break;
default:
fprintf(stderr, "Invalid dimensions size.");
MPI_Abort(MPI_COMM_WORLD, 1);
break;
}
return inner_envelope;
}
extern envelope_t get_outer_envelope(struct dispatch_context *dc, struct futhark_context *fc, int thickness) {
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
envelope_t inner_envelope = get_inner_envelope(dc, fc, thickness);
envelope_t outer_envelope = {0};
switch (dc->n_dimensions) {
case 1:
outer_envelope = get_outer_envelope_1d(dc, thickness, &inner_envelope);
break;
case 2:
outer_envelope = get_outer_envelope_2d(dc, thickness, &inner_envelope);
break;
case 3:
break;
default:
fprintf(stderr, "Invalid dimensions size.");
MPI_Abort(MPI_COMM_WORLD, 1);
break;
}
envelope_free(&inner_envelope);
return outer_envelope;
}
static void chunk_data_to_data(struct dispatch_context *dc, void *chunk_data, void *data, int rank) {
int y = dc->chunks_info[rank].y;
int x = dc->chunks_info[rank].x;
uint8_t *data8 = (uint8_t *) data;
uint8_t *chunk_data8 = (uint8_t *) chunk_data;
for (int i = 0; i < dc->chunks_info[rank].dimensions[0]; ++i) {
for (int j = 0; j < dc->chunks_info[rank].dimensions[1]; ++j) {
uint8_t *src = chunk_data8 + (INDEX_2D_TO_1D(i, j, dc->chunks_info[rank].dimensions[1]) * dc->type);
uint8_t *dst = data8 + (INDEX_2D_TO_1D(y + i, x + j, dc->data_dimensions[1]) * dc->type);
void *data = NULL;
if (dc->my_rank == ROOT_RANK) {
chunk_data_to_data(dc, dc->chunk_info->data, data, dc->my_rank);
for (int i = 0; i < dc->world_size; ++i) {
if (i != dc->my_rank) {
chunk_info_allocate_data(&dc->chunks_info[i], dc->type);
MPI_Recv(dc->chunks_info[i].data, (int) dc->chunks_info[i].count, dc->datatype, i, CHUNK_DATA_TAG,
MPI_COMM_WORLD, MPI_STATUS_IGNORE);
chunk_data_to_data(dc, dc->chunks_info[i].data, data, i);
MPI_Send(dc->chunk_info->data, (int) dc->chunk_info->count, dc->datatype, ROOT_RANK, CHUNK_DATA_TAG,
MPI_COMM_WORLD);
}
return data;
}
extern chunk_info_t get_chunk_info(struct dispatch_context *dc) {
return *dc->chunk_info;
}
extern void dispatch_context_free(struct dispatch_context *dc) {
free(dc->chunks_info);
MPI_Comm_free(&dc->communicators[2]);
MPI_Comm_free(&dc->communicators[1]);
MPI_Comm_free(&dc->communicators[0]);
}
static void set_active_domain_1d(struct dispatch_context *dc, struct futhark_context *fc, int dimensions[1], int x) {
chunk_info_init(&dc->active_domain, dc->type, dimensions, 0, x, 0, true);
int64_t dimensions64[1];
dimensions64[0] = dimensions[0] * dc->type;
struct futhark_u8_1d *fut_chunk_data = futhark_new_u8_1d(fc, dc->chunk_info->data, dimensions64[0] * dc->type);
struct futhark_i64_1d *fut_dimension = futhark_new_i64_1d(fc, dimensions64, 1);
struct futhark_u8_1d *subdomain;
futhark_context_sync(fc);
futhark_entry_get_subdomain_1d(fc, &subdomain, fut_chunk_data, x * dc->type, fut_dimension);
futhark_context_sync(fc);
futhark_values_u8_1d(fc, subdomain, dc->active_domain.data);
futhark_context_sync(fc);
futhark_free_u8_1d(fc, fut_chunk_data);
futhark_free_i64_1d(fc, fut_dimension);
futhark_free_u8_1d(fc, subdomain);
}
static void
set_active_domain_2d(struct dispatch_context *dc, struct futhark_context *fc, int dimensions[2], int y, int x) {
chunk_info_init(&dc->active_domain, dc->type, dimensions, y, x, 0, true);
int64_t dimensions64[2];
dimensions64[0] = dimensions[0];
dimensions64[1] = dimensions[1] * dc->type;
struct futhark_u8_2d *fut_chunk_data = futhark_new_u8_2d(fc, dc->chunk_info->data, dimensions64[0],
dimensions64[1]);
struct futhark_i64_1d *fut_dimensions = futhark_new_i64_1d(fc, dimensions64, 2);
struct futhark_u8_2d *subdomain;
futhark_context_sync(fc);
futhark_entry_get_subdomain_2d(fc, &subdomain, fut_chunk_data, y, x * dc->type, fut_dimensions);
futhark_context_sync(fc);
futhark_values_u8_2d(fc, subdomain, dc->active_domain.data);
futhark_context_sync(fc);
futhark_free_u8_2d(fc, fut_chunk_data);
futhark_free_i64_1d(fc, fut_dimensions);
futhark_free_u8_2d(fc, subdomain);
}
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
static void
set_active_domain_3d(struct dispatch_context *dc, struct futhark_context *fc, int dimensions[3], int y, int x, int z) {
chunk_info_init(&dc->active_domain, dc->type, dimensions, y, x, z, true);
int64_t dimensions64[3];
dimensions64[0] = dimensions[0];
dimensions64[1] = dimensions[1] * dc->type;
dimensions64[2] = dimensions[2];
struct futhark_u8_3d *fut_chunk_data = futhark_new_u8_3d(fc, dc->chunk_info->data, dimensions64[0],
dimensions64[1], dimensions64[2]);
struct futhark_i64_1d *fut_dimensions = futhark_new_i64_1d(fc, dimensions64, 3);
struct futhark_u8_3d *subdomain;
futhark_context_sync(fc);
futhark_entry_get_subdomain_3d(fc, &subdomain, fut_chunk_data, y, x * dc->type, z, fut_dimensions);
futhark_context_sync(fc);
futhark_values_u8_3d(fc, subdomain, dc->active_domain.data);
futhark_context_sync(fc);
futhark_free_u8_3d(fc, fut_chunk_data);
futhark_free_i64_1d(fc, fut_dimensions);
futhark_free_u8_3d(fc, subdomain);
}
extern void
set_active_domain(struct dispatch_context *dc, struct futhark_context *fc, int *dimensions, int y, int x, int z) {
switch (dc->n_dimensions) {
case 1:
set_active_domain_1d(dc, fc, dimensions, x);
break;
case 2:
set_active_domain_2d(dc, fc, dimensions, y, x);
break;
case 3:
set_active_domain_3d(dc, fc, dimensions, y, x, z);
break;
default:
break;
}
}
extern void restore_active_domain(struct dispatch_context *dc) {
chunk_info_free(&dc->active_domain);
dc->active_domain = *dc->chunk_info;
}
chunk_info_free(&envelope->north);
chunk_info_free(&envelope->north_east);
chunk_info_free(&envelope->east);
chunk_info_free(&envelope->south_east);
chunk_info_free(&envelope->south_west);
chunk_info_free(&envelope->west);
chunk_info_free(&envelope->north_west);