Skip to content
Snippets Groups Projects
dispatch.c 36.3 KiB
Newer Older
/**
 * Author: Baptiste Coudray
 * School: HEPIA
 * Class: ITI-3
 * Year: 2020-2021
 */

#include <stdio.h>
#include <stdint.h>
#include <mpi.h>
#include <math.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
baptiste.coudray's avatar
baptiste.coudray committed
#include <assert.h>
#include "dispatch.h"

#define min(a, b) (((a) <= (b)) ? (a) : (b))
#define max(a, b) (((a) >= (b)) ? (a) : (b))

#define INDEX_2D_TO_1D(y, x, nb_columns) ((y) * (nb_columns) + (x))

#define NORTH_ROW_TAG 0
#define EAST_COLUMN_TAG 1
#define SOUTH_ROW_TAG 2
#define WEST_COLUMN_TAG 3

#define NORTH_EAST_CELLS_TAG 4
#define SOUTH_EAST_CELLS_TAG 5
#define SOUTH_WEST_CELLS_TAG 6
#define NORTH_WEST_CELLS_TAG 7

#define CHUNK_DATA_TAG 8
#define ROOT_RANK 0

baptiste.coudray's avatar
baptiste.coudray committed
struct futhark_opaque_envelope_1d_t {
    struct futhark_i8_1d *v0;
    struct futhark_i8_1d *v1;
};

struct futhark_opaque_envelope_2d_t {
    struct futhark_i8_2d *v0;
    struct futhark_i8_2d *v1;
    struct futhark_i8_2d *v2;
    struct futhark_i8_2d *v3;
    struct futhark_i8_2d *v4;
    struct futhark_i8_2d *v5;
    struct futhark_i8_2d *v6;
    struct futhark_i8_2d *v7;
};

struct dispatch_context {
    int my_rank;
    int world_size;
    int my_cart_rank;
    int coordinates[2];
    MPI_Comm communicators[4]; /* cart_comm, row_comm, column_comm, depth_comm */
    int network_dimensions[3];
    int data_dimensions[3];
baptiste.coudray's avatar
baptiste.coudray committed
    MPI_Datatype datatype;
baptiste.coudray's avatar
baptiste.coudray committed
    size_t count;
    int n_dimensions;
    chunk_info_t *chunk_info;
baptiste.coudray's avatar
baptiste.coudray committed
    int type;
    chunk_info_t *chunks_info;
    chunk_info_t active_domain;
};

static void get_world_size(struct dispatch_context *dc) {
    MPI_Comm_size(MPI_COMM_WORLD, &dc->world_size);
}

static void get_my_rank(struct dispatch_context *dc) {
    MPI_Comm_rank(MPI_COMM_WORLD, &dc->my_rank);
}

static void find_best_factors(int n, int factors[2]) {
    int result[2] = {0};
    int limit = (int) sqrt(n);
    bool first_pass = true;
    for (int i = 1; i <= limit; ++i) {
        if (n % i == 0) {
            int factor1 = i;
            int factor2 = n / i;
            int current_difference = abs(result[0] - result[1]);
            int new_difference = abs(factor1 - factor2);
            if (first_pass || current_difference > new_difference) {
                result[0] = factor1;
                result[1] = factor2;
            }
            first_pass = false;
        }
    }
    factors[0] = result[0];
    factors[1] = result[1];
}

static void find_network_dimensions(struct dispatch_context *dc) {
    /* 1D */
baptiste.coudray's avatar
baptiste.coudray committed
    if (dc->n_dimensions == 1) {
        dc->network_dimensions[0] = 1;
        dc->network_dimensions[1] = dc->world_size;
        dc->network_dimensions[2] = 1;
        find_best_factors(dc->world_size, dc->network_dimensions);
        dc->network_dimensions[2] = 1;
        /* 3D */
baptiste.coudray's avatar
baptiste.coudray committed
        if (dc->n_dimensions == 3) {
            int factors[2] = {0};
            find_best_factors(dc->network_dimensions[1], factors);
            if (factors[0] < factors[1]) {
                dc->network_dimensions[1] = factors[1];
                dc->network_dimensions[2] = factors[0];
            } else {
                dc->network_dimensions[1] = factors[0];
                dc->network_dimensions[2] = factors[1];
            }
baptiste.coudray's avatar
baptiste.coudray committed
        }
    }
}

static void create_network_communicators(struct dispatch_context *dc) {
    int periods[3] = {true, true, true}; // Cyclic on row-column-depth
    MPI_Cart_create(MPI_COMM_WORLD, 3, dc->network_dimensions, periods, 1, &dc->communicators[0]);

    /* Create row communicator */
    int remain_dims[3] = {false, true, false};
    MPI_Cart_sub(dc->communicators[0], remain_dims, &dc->communicators[1]);

    /* Create column communicator */
    remain_dims[0] = true; // row
    remain_dims[1] = false; // column
    remain_dims[2] = false; // depth
    MPI_Cart_sub(dc->communicators[0], remain_dims, &dc->communicators[2]);

    /* Create depth communicator */
    remain_dims[0] = false; // row
    remain_dims[1] = false; // column
    remain_dims[2] = true; // depth
    MPI_Cart_sub(dc->communicators[0], remain_dims, &dc->communicators[3]);

    MPI_Comm_rank(dc->communicators[0], &dc->my_cart_rank);

    MPI_Cart_coords(dc->communicators[0], dc->my_cart_rank, 3, dc->coordinates);
}

static void divide_data(struct dispatch_context *dc) {
    dc->chunks_info = calloc((size_t) dc->world_size, sizeof(chunk_info_t));

    int nb_rows_per_process = dc->data_dimensions[0] / dc->network_dimensions[0];
    int remaining_rows = dc->data_dimensions[0] % dc->network_dimensions[0];

    int nb_columns_per_process = dc->data_dimensions[1] / dc->network_dimensions[1];
    int remaining_columns = dc->data_dimensions[1] % dc->network_dimensions[1];

    int nb_depths_per_process = dc->data_dimensions[2] / dc->network_dimensions[2];
    int remaining_depths = dc->data_dimensions[2] % dc->network_dimensions[2];

    for (int i = 0, y = 0, x = 0, z = 0; i < dc->world_size; ++i) {
        int nb_rows = nb_rows_per_process;
        if (remaining_rows > 0) {
            ++nb_rows;
        }
        int nb_columns = nb_columns_per_process;
        if (remaining_columns > 0) {
            ++nb_columns;
            --remaining_columns;
        }
        int nb_depths = nb_depths_per_process;
        if (remaining_depths > 0) {
            ++nb_depths;
        }
        int dimensions[3] = {nb_rows, nb_columns, nb_depths};
        chunk_info_init(&dc->chunks_info[i], dc->type, dimensions, y, x, z, i == dc->my_rank);

        x += nb_columns;
        if (x >= dc->data_dimensions[1]) {
            x = 0;
            y += dc->chunks_info[max(i - 1, 0)].dimensions[0];
            remaining_columns = dc->data_dimensions[1] % dc->network_dimensions[1];
            --remaining_rows;
        }
        if ((i + 1) % (dc->network_dimensions[0] * dc->network_dimensions[1]) == 0) {
            z += nb_depths;
            y = 0;
            remaining_columns = dc->data_dimensions[1] % dc->network_dimensions[1];
            --remaining_depths;
        }
    }
    dc->chunk_info = &dc->chunks_info[dc->my_rank];
}

baptiste.coudray's avatar
baptiste.coudray committed
extern struct dispatch_context *dispatch_context_new(const int *dimensions, MPI_Datatype datatype, int n_dimensions) {
baptiste.coudray's avatar
baptiste.coudray committed
    struct dispatch_context *dc = calloc(1, sizeof(struct dispatch_context));
    assert(dc != NULL);
    get_world_size(dc);
    get_my_rank(dc);
    dc->n_dimensions = n_dimensions;
    dc->datatype = datatype;
    MPI_Type_size(dc->datatype, &dc->type);

    switch (n_dimensions) {
        case 1:
baptiste.coudray's avatar
baptiste.coudray committed
            dc->data_dimensions[0] = 1;
            dc->data_dimensions[1] = dimensions[0];
            dc->data_dimensions[2] = 1;
baptiste.coudray's avatar
baptiste.coudray committed
            dc->data_dimensions[0] = dimensions[0];
            dc->data_dimensions[1] = dimensions[1];
            dc->data_dimensions[2] = 1;
            dc->data_dimensions[0] = dimensions[0];
            dc->data_dimensions[1] = dimensions[1];
            dc->data_dimensions[2] = dimensions[2];
            break;
        default:
            fprintf(stderr, "Invalid dimensions size.");
            MPI_Abort(MPI_COMM_WORLD, 1);
            break;
    }
    dc->count = (size_t) dc->data_dimensions[0] * (size_t) dc->data_dimensions[1] * (size_t) dc->data_dimensions[2];
baptiste.coudray's avatar
baptiste.coudray committed
    find_network_dimensions(dc);
    create_network_communicators(dc);
    divide_data(dc);
    dc->active_domain = *dc->chunk_info;
baptiste.coudray's avatar
baptiste.coudray committed
    return dc;
}

extern void dispatch_context_print(struct dispatch_context *dc) {
    printf("[dispatch_context] my_rank = %d, world_size = %d, network_dimensions = [%d][%d][%d], n_dimensions = %d, data_dimensions = [%d][%d][%d]\n",
           dc->my_rank, dc->world_size, dc->network_dimensions[0], dc->network_dimensions[1], dc->network_dimensions[2],
           dc->n_dimensions,
           dc->data_dimensions[0], dc->data_dimensions[1], dc->data_dimensions[2]);
}

static envelope_t get_inner_envelope_1d(struct dispatch_context *dc, struct futhark_context *fc, int thickness) {
    struct futhark_u8_1d *fut_chunk_data = futhark_new_u8_1d(fc, dc->chunk_info->data,
baptiste.coudray's avatar
baptiste.coudray committed
                                                             dc->chunk_info->dimensions[1] * dc->type);
    int thickness_x = min(thickness, dc->chunk_info->dimensions[1]);
    int dimensions[3] = {1, thickness_x, 1};
    struct futhark_opaque_envelope_1d_t *fut_inner_envelope;
baptiste.coudray's avatar
baptiste.coudray committed
    futhark_context_sync(fc);
    futhark_entry_get_envelope_1d(fc, &fut_inner_envelope, fut_chunk_data, dimensions[1] * dc->type);
baptiste.coudray's avatar
baptiste.coudray committed
    futhark_context_sync(fc);

    envelope_t inner_envelope = (envelope_t) {0};
baptiste.coudray's avatar
baptiste.coudray committed
    // West
    {
        int start_x = dc->chunk_info->x;
        chunk_info_init(&inner_envelope.west, dc->type, dimensions, 0, start_x, 0, true);
        futhark_values_i8_1d(fc, fut_inner_envelope->v1, inner_envelope.west.data);
baptiste.coudray's avatar
baptiste.coudray committed
    }
baptiste.coudray's avatar
baptiste.coudray committed
    // East
    {
        int start_x = dc->chunk_info->x + dc->chunk_info->dimensions[1] - thickness_x;
        chunk_info_init(&inner_envelope.east, dc->type, dimensions, 0, start_x, 0, true);
        futhark_values_i8_1d(fc, fut_inner_envelope->v0, inner_envelope.east.data);
baptiste.coudray's avatar
baptiste.coudray committed
    }
baptiste.coudray's avatar
baptiste.coudray committed
    futhark_context_sync(fc);
    futhark_free_opaque_envelope_1d_t(fc, fut_inner_envelope);
    futhark_free_u8_1d(fc, fut_chunk_data);
    return inner_envelope;
}

static envelope_t get_inner_envelope_2d(struct dispatch_context *dc, struct futhark_context *fc, int thickness) {
    struct futhark_u8_2d *fut_chunk_data = futhark_new_u8_2d(fc, dc->active_domain.data,
                                                             dc->active_domain.dimensions[0],
                                                             dc->active_domain.dimensions[1] * dc->type);
    int thickness_y = min(thickness, dc->active_domain.dimensions[0]);
    int thickness_x = min(thickness, dc->active_domain.dimensions[1]);
    struct futhark_opaque_envelope_2d_t *fut_inner_envelope;

baptiste.coudray's avatar
baptiste.coudray committed
    futhark_context_sync(fc);
    futhark_entry_get_envelope_2d(fc, &fut_inner_envelope, fut_chunk_data, thickness_y, thickness_x * dc->type);
baptiste.coudray's avatar
baptiste.coudray committed
    futhark_context_sync(fc);

    envelope_t inner_envelope = (envelope_t) {0};
baptiste.coudray's avatar
baptiste.coudray committed
    // North-West
    {
        int dimensions[3] = {thickness_y, thickness_x, 1};
        int start_y = dc->active_domain.y;
        int start_x = dc->active_domain.x;
        chunk_info_init(&inner_envelope.north_west, dc->type, dimensions, start_y, start_x, 0, true);
        futhark_values_i8_2d(fc, fut_inner_envelope->v3, inner_envelope.north_west.data);
baptiste.coudray's avatar
baptiste.coudray committed
    }
baptiste.coudray's avatar
baptiste.coudray committed
    // North
    {
        int dimensions[3] = {thickness_y, dc->active_domain.dimensions[1], 1};
        int start_y = dc->active_domain.y;
        int start_x = dc->active_domain.x;
        chunk_info_init(&inner_envelope.north, dc->type, dimensions, start_y, start_x, 0, true);
        futhark_values_i8_2d(fc, fut_inner_envelope->v1, inner_envelope.north.data);
baptiste.coudray's avatar
baptiste.coudray committed
    }
baptiste.coudray's avatar
baptiste.coudray committed
    // North-East
    {
        int dimensions[3] = {thickness_y, thickness_x, 1};
        int start_y = dc->active_domain.y;
        int start_x = dc->active_domain.x + dc->active_domain.dimensions[1] - thickness_x;
        chunk_info_init(&inner_envelope.north_east, dc->type, dimensions, start_y, start_x, 0, true);
        futhark_values_i8_2d(fc, fut_inner_envelope->v2, inner_envelope.north_east.data);
baptiste.coudray's avatar
baptiste.coudray committed
    }
baptiste.coudray's avatar
baptiste.coudray committed
    // West
    {
        int dimensions[3] = {dc->active_domain.dimensions[0], thickness_x, 1};
        int start_y = dc->active_domain.y;
        int start_x = dc->active_domain.x;
        chunk_info_init(&inner_envelope.west, dc->type, dimensions, start_y, start_x, 0, true);
        futhark_values_i8_2d(fc, fut_inner_envelope->v7, inner_envelope.west.data);
baptiste.coudray's avatar
baptiste.coudray committed
    }
baptiste.coudray's avatar
baptiste.coudray committed
    // East
    {
        int dimensions[3] = {dc->chunk_info->dimensions[0], thickness_x, 1};
        int start_y = dc->active_domain.y;
        int start_x = dc->active_domain.x + dc->active_domain.dimensions[1] - thickness_x;
        chunk_info_init(&inner_envelope.east, dc->type, dimensions, start_y, start_x, 0, true);
        futhark_values_i8_2d(fc, fut_inner_envelope->v0, inner_envelope.east.data);
baptiste.coudray's avatar
baptiste.coudray committed
    }
baptiste.coudray's avatar
baptiste.coudray committed
    // South-West
    {
        int dimensions[3] = {thickness_y, thickness_x, 1};
        int start_y = dc->active_domain.y + dc->active_domain.dimensions[0] - thickness_y;
        int start_x = dc->active_domain.x;
        chunk_info_init(&inner_envelope.south_west, dc->type, dimensions, start_y, start_x, 0, true);
        futhark_values_i8_2d(fc, fut_inner_envelope->v6, inner_envelope.south_west.data);
baptiste.coudray's avatar
baptiste.coudray committed
    }
baptiste.coudray's avatar
baptiste.coudray committed
    // South
    {
        int dimensions[3] = {thickness_y, dc->active_domain.dimensions[1], 1};
        int start_y = dc->active_domain.y + dc->active_domain.dimensions[0] - thickness_y;
        int start_x = dc->active_domain.x;
        chunk_info_init(&inner_envelope.south, dc->type, dimensions, start_y, start_x, 0, true);
        futhark_values_i8_2d(fc, fut_inner_envelope->v4, inner_envelope.south.data);
baptiste.coudray's avatar
baptiste.coudray committed
    }
baptiste.coudray's avatar
baptiste.coudray committed
    // South-East
    {
        int dimensions[3] = {thickness_y, thickness_x, 1};
        int start_y = dc->active_domain.y + dc->active_domain.dimensions[0] - thickness_y;
        int start_x = dc->active_domain.x + dc->active_domain.dimensions[1] - thickness_x;
        chunk_info_init(&inner_envelope.south_east, dc->type, dimensions, start_y, start_x, 0, true);
        futhark_values_i8_2d(fc, fut_inner_envelope->v5, inner_envelope.south_east.data);
baptiste.coudray's avatar
baptiste.coudray committed
    }
baptiste.coudray's avatar
baptiste.coudray committed

    futhark_context_sync(fc);
    futhark_free_u8_2d(fc, fut_chunk_data);
    futhark_free_opaque_envelope_2d_t(fc, fut_inner_envelope);
    return inner_envelope;
}

baptiste.coudray's avatar
baptiste.coudray committed
static uint8_t *
chunk_info_to_futhark_struct(chunk_info_t *ci, struct dispatch_context *dc, uint8_t *out, char *futhark_type) {
    *out++ = 'b';
    *out++ = 2;
    *out++ = dc->n_dimensions;
baptiste.coudray's avatar
baptiste.coudray committed
    memcpy(out, futhark_type, 4);
    out += 4;

    int64_t dimensions64[3];
    dimensions64[0] = (int64_t) ci->dimensions[0];
    dimensions64[1] = (int64_t) ci->dimensions[1];
    dimensions64[2] = (int64_t) ci->dimensions[2];

    if (dc->n_dimensions == 1) {
        memcpy(out, &dimensions64[1], 1 * sizeof(int64_t));
    } else if (dc->n_dimensions == 2) {
        memcpy(out, dimensions64, 2 * sizeof(int64_t));
baptiste.coudray's avatar
baptiste.coudray committed
    } else {
        memcpy(out, dimensions64, 3 * sizeof(int64_t));
    }
    out += dc->n_dimensions * sizeof(int64_t);
    memcpy(out, ci->data, ci->count * dc->type);
    out += ci->count * dc->type;
    return out;
static void *
futhark_outer_envelope_1d_new(struct dispatch_context *dc, struct futhark_context *fc, envelope_t *outer_envelope,
baptiste.coudray's avatar
baptiste.coudray committed
                              void *f(struct futhark_context *, const void *), char *futhark_type) {
    int64_t size_0 = 7 + 1 * sizeof(int64_t) + outer_envelope->east.count * dc->type;
    int64_t size_1 = 7 + 1 * sizeof(int64_t) + outer_envelope->west.count * dc->type;
    void *opaque_struct = calloc(size_0 * size_1, sizeof(uint8_t));
baptiste.coudray's avatar
baptiste.coudray committed
    uint8_t *opaque_struct8 = (uint8_t *) opaque_struct;
baptiste.coudray's avatar
baptiste.coudray committed
    opaque_struct8 = chunk_info_to_futhark_struct(&outer_envelope->east, dc, opaque_struct8, futhark_type);
baptiste.coudray's avatar
baptiste.coudray committed
    chunk_info_to_futhark_struct(&outer_envelope->west, dc, opaque_struct8, futhark_type);
    void *fut_opaque_struct = f(fc, opaque_struct);
    futhark_context_sync(fc);
    free(opaque_struct);
    return fut_opaque_struct;
}
static void *
futhark_outer_envelope_2d_new(struct dispatch_context *dc, struct futhark_context *fc, envelope_t *outer_envelope,
baptiste.coudray's avatar
baptiste.coudray committed
                              void *f(struct futhark_context *, const void *), char *futhark_type) {

    int64_t size_0 = 7 + 2 * sizeof(int64_t) + outer_envelope->east.count * dc->type;
    int64_t size_1 = 7 + 2 * sizeof(int64_t) + outer_envelope->north.count * dc->type;
    int64_t size_2 = 7 + 2 * sizeof(int64_t) + outer_envelope->north_east.count * dc->type;
    int64_t size_3 = 7 + 2 * sizeof(int64_t) + outer_envelope->north_west.count * dc->type;
    int64_t size_4 = 7 + 2 * sizeof(int64_t) + outer_envelope->south.count * dc->type;
    int64_t size_5 = 7 + 2 * sizeof(int64_t) + outer_envelope->south_east.count * dc->type;
    int64_t size_6 = 7 + 2 * sizeof(int64_t) + outer_envelope->south_west.count * dc->type;
    int64_t size_7 = 7 + 2 * sizeof(int64_t) + outer_envelope->west.count * dc->type;

    void *opaque_struct = calloc(size_0 + size_1 + size_2 + size_3 + size_4 + size_5 + size_6 + size_7,
                                 sizeof(uint8_t));
baptiste.coudray's avatar
baptiste.coudray committed
    uint8_t *opaque_struct8 = (uint8_t *) opaque_struct;
baptiste.coudray's avatar
baptiste.coudray committed
    opaque_struct8 = chunk_info_to_futhark_struct(&outer_envelope->east, dc, opaque_struct8, futhark_type);
    opaque_struct8 = chunk_info_to_futhark_struct(&outer_envelope->north, dc, opaque_struct8, futhark_type);
    opaque_struct8 = chunk_info_to_futhark_struct(&outer_envelope->north_east, dc, opaque_struct8, futhark_type);
    opaque_struct8 = chunk_info_to_futhark_struct(&outer_envelope->north_west, dc, opaque_struct8, futhark_type);
    opaque_struct8 = chunk_info_to_futhark_struct(&outer_envelope->south, dc, opaque_struct8, futhark_type);
    opaque_struct8 = chunk_info_to_futhark_struct(&outer_envelope->south_east, dc, opaque_struct8, futhark_type);
    opaque_struct8 = chunk_info_to_futhark_struct(&outer_envelope->south_west, dc, opaque_struct8, futhark_type);
    chunk_info_to_futhark_struct(&outer_envelope->west, dc, opaque_struct8, futhark_type);

    void *fut_opaque_struct = f(fc, opaque_struct);
    futhark_context_sync(fc);
    free(opaque_struct);
    return fut_opaque_struct;
extern void *futhark_outer_envelope_new(struct dispatch_context *dc, struct futhark_context *fc,
                                        envelope_t *outer_envelope, void *f(struct futhark_context *, const void *),
baptiste.coudray's avatar
baptiste.coudray committed
                                        char *futhark_type) {
    switch (dc->n_dimensions) {
        case 1:
baptiste.coudray's avatar
baptiste.coudray committed
            return futhark_outer_envelope_1d_new(dc, fc, outer_envelope, f, futhark_type);
baptiste.coudray's avatar
baptiste.coudray committed
            return futhark_outer_envelope_2d_new(dc, fc, outer_envelope, f, futhark_type);
            return NULL;
static envelope_t get_outer_envelope_1d(struct dispatch_context *dc, int thickness, envelope_t *inner_envelope) {
    int coordinate_x = dc->coordinates[1];
    MPI_Request requests[4];
    int i_request = 0;
    envelope_t outer_envelope = (envelope_t) {0};

    // West-part
    {
        int dest_source_x = (coordinate_x - 1 >= 0) ? coordinate_x - 1 : dc->network_dimensions[1] - 1;
        int dest_source = INDEX_2D_TO_1D(0, dest_source_x, dc->network_dimensions[1]);

        int send_count = min(thickness, dc->chunk_info->dimensions[1]);

baptiste.coudray's avatar
baptiste.coudray committed
        MPI_Isend(inner_envelope->west.data, send_count, dc->datatype, dest_source_x, WEST_COLUMN_TAG,
                  dc->communicators[1], &requests[i_request++]);

        int dimensions[3] = {1, min(thickness, dc->chunks_info[dest_source].dimensions[1]), 1};
baptiste.coudray's avatar
baptiste.coudray committed
        int start_x = dc->chunks_info[dest_source].x + dc->chunks_info[dest_source].dimensions[1] - dimensions[1];
        chunk_info_init(&outer_envelope.west, dc->type, dimensions, 0, start_x, 0, true);
baptiste.coudray's avatar
baptiste.coudray committed
        MPI_Irecv(outer_envelope.west.data, (int) outer_envelope.west.count, dc->datatype,
baptiste.coudray's avatar
baptiste.coudray committed
                  dest_source_x, EAST_COLUMN_TAG, dc->communicators[1], &requests[i_request++]);
    }

    // East-part
    {
        int dest_source_x = (coordinate_x + 1) % dc->network_dimensions[1];
        int dest_source = INDEX_2D_TO_1D(0, dest_source_x, dc->network_dimensions[1]);

        int send_count = min(thickness, dc->chunk_info->dimensions[1]);

        void *inner_envelope_east = ((uint8_t *) inner_envelope->east.data)
baptiste.coudray's avatar
baptiste.coudray committed
                                    + ((inner_envelope->east.dimensions[1] - send_count) * dc->type);
baptiste.coudray's avatar
baptiste.coudray committed
        MPI_Isend(inner_envelope_east, send_count, dc->datatype, dest_source_x, EAST_COLUMN_TAG,
                  dc->communicators[1], &requests[i_request++]);

        int dimensions[3] = {1, min(thickness, dc->chunks_info[dest_source].dimensions[1]), 1};
baptiste.coudray's avatar
baptiste.coudray committed
        int start_x = dc->chunks_info[dest_source].x + dimensions[1];
        chunk_info_init(&outer_envelope.east, dc->type, dimensions, 0, start_x, 0, true);
baptiste.coudray's avatar
baptiste.coudray committed
        MPI_Irecv(outer_envelope.east.data, (int) outer_envelope.east.count, dc->datatype,
baptiste.coudray's avatar
baptiste.coudray committed
                  dest_source_x, WEST_COLUMN_TAG, dc->communicators[1], &requests[i_request]);
    }

    MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
    return outer_envelope;
}

static envelope_t get_outer_envelope_2d(struct dispatch_context *dc, int thickness, envelope_t *inner_envelope) {
    int coordinate_y = dc->coordinates[0];
    int coordinate_x = dc->coordinates[1];
    MPI_Request requests[16] = {0};
    int i_request = 0;

    envelope_t outer_envelope = (envelope_t) {0};

    // North
    {
        int dest_source_y = (coordinate_y - 1) >= 0 ? (coordinate_y - 1) : (dc->network_dimensions[0] - 1);
        int dest_source = INDEX_2D_TO_1D(dest_source_y, coordinate_x, dc->network_dimensions[1]);

        int send_count = min(thickness, dc->chunk_info->dimensions[0]) * dc->chunk_info->dimensions[1];

baptiste.coudray's avatar
baptiste.coudray committed
        MPI_Isend(inner_envelope->north.data, send_count, dc->datatype, dest_source_y, NORTH_ROW_TAG,
                  dc->communicators[2], &requests[i_request++]);

        /* Neighbour send south row, which correspond to north envelope */
        int dimensions[3] = {
baptiste.coudray's avatar
baptiste.coudray committed
                min(thickness, dc->chunks_info[dest_source].dimensions[0]),
                dc->chunks_info[dest_source].dimensions[1], 1
baptiste.coudray's avatar
baptiste.coudray committed
        };
        int start_y = dc->chunks_info[dest_source].y + dc->chunks_info[dest_source].dimensions[0] - dimensions[0];
        int start_x = dc->chunks_info[dest_source].x;
        chunk_info_init(&outer_envelope.north, dc->type, dimensions, start_y, start_x, 0, true);
baptiste.coudray's avatar
baptiste.coudray committed
        MPI_Irecv(outer_envelope.north.data, (int) outer_envelope.north.count, dc->datatype,
                  dest_source_y, SOUTH_ROW_TAG, dc->communicators[2], &requests[i_request++]);
    }

    // East
    {
        int dest_source_x = (coordinate_x + 1) % dc->network_dimensions[1];
        int dest_source = INDEX_2D_TO_1D(coordinate_y, dest_source_x, dc->network_dimensions[1]);

        int send_count = min(thickness, dc->chunk_info->dimensions[1]) * dc->chunk_info->dimensions[0];

baptiste.coudray's avatar
baptiste.coudray committed
        MPI_Isend(inner_envelope->east.data, (int) send_count, dc->datatype, dest_source_x, EAST_COLUMN_TAG,
                  dc->communicators[1], &requests[i_request++]);

        /* Neighbour send west column, which correspond to east envelope */
        int dimensions[3] = {
baptiste.coudray's avatar
baptiste.coudray committed
                dc->chunks_info[dest_source].dimensions[0],
                min(thickness, dc->chunks_info[dest_source].dimensions[1]), 1
baptiste.coudray's avatar
baptiste.coudray committed
        };
        int start_y = dc->chunks_info[dest_source].y;
        int start_x = dc->chunks_info[dest_source].x + dc->chunks_info[dest_source].dimensions[1] - dimensions[1];
        chunk_info_init(&outer_envelope.east, dc->type, dimensions, start_y, start_x, 0, true);
baptiste.coudray's avatar
baptiste.coudray committed
        MPI_Irecv(outer_envelope.east.data, (int) outer_envelope.east.count, dc->datatype, dest_source_x,
baptiste.coudray's avatar
baptiste.coudray committed
                  WEST_COLUMN_TAG, dc->communicators[1], &requests[i_request++]);
    }

    // South
    {
        int dest_source_y = (coordinate_y + 1) % dc->network_dimensions[0];
        int dest_source = INDEX_2D_TO_1D(dest_source_y, coordinate_x, dc->network_dimensions[1]);

        int send_count = min(thickness, dc->chunk_info->dimensions[0]) * dc->chunk_info->dimensions[1];

baptiste.coudray's avatar
baptiste.coudray committed
        MPI_Isend(inner_envelope->south.data, send_count, dc->datatype, dest_source_y, SOUTH_ROW_TAG,
                  dc->communicators[2], &requests[i_request++]);

baptiste.coudray's avatar
baptiste.coudray committed
        /* Neighbour send north row, which correspond to south envelope */
        int dimensions[3] = {
baptiste.coudray's avatar
baptiste.coudray committed
                min(thickness, dc->chunks_info[dest_source].dimensions[0]),
                dc->chunks_info[dest_source].dimensions[1], 1
baptiste.coudray's avatar
baptiste.coudray committed
        };
        int start_y = dc->chunks_info[dest_source].y;
        int start_x = dc->chunks_info[dest_source].x;
        chunk_info_init(&outer_envelope.south, dc->type, dimensions, start_y, start_x, 0, true);
baptiste.coudray's avatar
baptiste.coudray committed
        MPI_Irecv(outer_envelope.south.data, (int) outer_envelope.south.count, dc->datatype, dest_source_y,
                  NORTH_ROW_TAG, dc->communicators[2], &requests[i_request++]);
    }

    // West
    {
        int dest_source_x = (coordinate_x - 1) >= 0 ? coordinate_x - 1 : dc->network_dimensions[1] - 1;
        int dest_source = INDEX_2D_TO_1D(coordinate_y, dest_source_x, dc->network_dimensions[1]);

        int send_count = min(thickness, dc->chunk_info->dimensions[1]) * dc->chunk_info->dimensions[0];
baptiste.coudray's avatar
baptiste.coudray committed
        MPI_Isend(inner_envelope->west.data, send_count, dc->datatype, dest_source_x, WEST_COLUMN_TAG,
baptiste.coudray's avatar
baptiste.coudray committed
                  dc->communicators[1], &requests[i_request++]);
baptiste.coudray's avatar
baptiste.coudray committed
        /* Neighbour send west column, which correspond to east envelope */
        int dimensions[3] = {
                dc->chunks_info[dest_source].dimensions[0],
                min(thickness, dc->chunks_info[dest_source].dimensions[1]), 1
baptiste.coudray's avatar
baptiste.coudray committed
        int start_y = dc->chunks_info[dest_source].y;
        int start_x = dc->chunks_info[dest_source].x;
        chunk_info_init(&outer_envelope.west, dc->type, dimensions, start_y, start_x, 0, true);
baptiste.coudray's avatar
baptiste.coudray committed
        MPI_Irecv(outer_envelope.west.data, (int) outer_envelope.west.count, dc->datatype, dest_source_x,
                  EAST_COLUMN_TAG, dc->communicators[1], &requests[i_request++]);
    }

    // North-East
    {
        int dest_source_y = (coordinate_y - 1) >= 0 ? coordinate_y - 1 : dc->network_dimensions[0] - 1;
        int dest_source_x = (coordinate_x + 1) % dc->network_dimensions[1];
        int dest_source = INDEX_2D_TO_1D(dest_source_y, dest_source_x, dc->network_dimensions[1]);

        int send_count = thickness * thickness;
baptiste.coudray's avatar
baptiste.coudray committed
        MPI_Isend(inner_envelope->north_east.data, send_count, dc->datatype, dest_source, NORTH_EAST_CELLS_TAG,
                  MPI_COMM_WORLD, &requests[i_request++]);
        /* Neighbour send south-west cell, which correspond to north-east cell */
        int dimensions[3] = {thickness, thickness, 1};
baptiste.coudray's avatar
baptiste.coudray committed
        int start_y = dc->chunks_info[dest_source].y + dc->chunks_info[dest_source].dimensions[0] + dimensions[0];
        int start_x = dc->chunks_info[dest_source].x;
        chunk_info_init(&outer_envelope.north_east, dc->type, dimensions, start_y, start_x, 0, true);
baptiste.coudray's avatar
baptiste.coudray committed
        MPI_Irecv(outer_envelope.north_east.data, (int) outer_envelope.north_east.count, dc->datatype, dest_source,
                  SOUTH_WEST_CELLS_TAG, MPI_COMM_WORLD, &requests[i_request++]);
    }

    // South-East
    {
        int dest_source_y = (coordinate_y + 1) % dc->network_dimensions[0];
        int dest_source_x = (coordinate_x + 1) % dc->network_dimensions[1];
baptiste.coudray's avatar
baptiste.coudray committed
        int dest_source = INDEX_2D_TO_1D(dest_source_y, dest_source_x, dc->network_dimensions[1]);

        int send_count = thickness * thickness;
baptiste.coudray's avatar
baptiste.coudray committed
        MPI_Isend(inner_envelope->south_east.data, send_count, dc->datatype, dest_source, SOUTH_EAST_CELLS_TAG,
                  MPI_COMM_WORLD, &requests[i_request++]);

        /* Neighbour send north-west cell, which correspond to south-east cell */
        int dimensions[3] = {thickness, thickness, 1};
baptiste.coudray's avatar
baptiste.coudray committed
        int start_y = dc->chunks_info[dest_source].y;
        int start_x = dc->chunks_info[dest_source].x;
        chunk_info_init(&outer_envelope.south_east, dc->type, dimensions, start_y, start_x, 0, true);
baptiste.coudray's avatar
baptiste.coudray committed
        MPI_Irecv(outer_envelope.south_east.data, (int) outer_envelope.south_east.count, dc->datatype, dest_source,
                  NORTH_WEST_CELLS_TAG, MPI_COMM_WORLD, &requests[i_request++]);
    }

    // South-West
    {
        int dest_source_y = (coordinate_y + 1) % dc->network_dimensions[0];
        int dest_source_x = (coordinate_x - 1) >= 0 ? coordinate_x - 1 : dc->network_dimensions[1] - 1;
        int dest_source = INDEX_2D_TO_1D(dest_source_y, dest_source_x, dc->network_dimensions[1]);

        int send_count = thickness * thickness;
baptiste.coudray's avatar
baptiste.coudray committed
        MPI_Isend(inner_envelope->south_west.data, send_count, dc->datatype, dest_source, SOUTH_WEST_CELLS_TAG,
                  MPI_COMM_WORLD, &requests[i_request++]);
        /* Neighbour send north-east cell, which correspond to south-west cell */
        int dimensions[3] = {thickness, thickness, 1};
baptiste.coudray's avatar
baptiste.coudray committed
        int start_y = dc->chunks_info[dest_source].y;
baptiste.coudray's avatar
baptiste.coudray committed
        int start_x = dc->chunks_info[dest_source].x + dc->chunks_info[dest_source].dimensions[1] - thickness;
        chunk_info_init(&outer_envelope.south_west, dc->type, dimensions, start_y, start_x, 0, true);
baptiste.coudray's avatar
baptiste.coudray committed
        MPI_Irecv(outer_envelope.south_west.data, (int) outer_envelope.south_west.count, dc->datatype, dest_source,
                  NORTH_EAST_CELLS_TAG, MPI_COMM_WORLD, &requests[i_request++]);
    }

    // North-West
    {
        int dest_source_y = (coordinate_y - 1) >= 0 ? coordinate_y - 1 : dc->network_dimensions[0] - 1;
        int dest_source_x = (coordinate_x - 1) >= 0 ? coordinate_x - 1 : dc->network_dimensions[1] - 1;
        int dest_source = INDEX_2D_TO_1D(dest_source_y, dest_source_x, dc->network_dimensions[1]);

        int send_count = thickness * thickness;
baptiste.coudray's avatar
baptiste.coudray committed
        MPI_Isend(inner_envelope->north_west.data, send_count, dc->datatype, dest_source, NORTH_WEST_CELLS_TAG,
                  MPI_COMM_WORLD, &requests[i_request++]);
        /* Neighbour send south-east cell, which correspond to north-west cell */
        int dimensions[3] = {thickness, thickness, 1};
baptiste.coudray's avatar
baptiste.coudray committed
        int start_y = dc->chunks_info[dest_source].y + dc->chunks_info[dest_source].dimensions[0] - thickness;
        int start_x = dc->chunks_info[dest_source].x + dc->chunks_info[dest_source].dimensions[1] - thickness;
        chunk_info_init(&outer_envelope.north_west, dc->type, dimensions, start_y, start_x, 0, true);
baptiste.coudray's avatar
baptiste.coudray committed
        MPI_Irecv(outer_envelope.north_west.data, (int) outer_envelope.north_west.count, dc->datatype, dest_source,
                  SOUTH_EAST_CELLS_TAG, MPI_COMM_WORLD, &requests[i_request++]);
    }

    MPI_Waitall(i_request, requests, MPI_STATUSES_IGNORE);
    return outer_envelope;
}

baptiste.coudray's avatar
baptiste.coudray committed
extern envelope_t get_inner_envelope(struct dispatch_context *dc, struct futhark_context *fc, int thickness) {
    envelope_t inner_envelope = {0};
    switch (dc->n_dimensions) {
        case 1:
            inner_envelope = get_inner_envelope_1d(dc, fc, thickness);
            break;
        case 2:
            inner_envelope = get_inner_envelope_2d(dc, fc, thickness);
            break;
        case 3:
            break;
        default:
            fprintf(stderr, "Invalid dimensions size.");
            MPI_Abort(MPI_COMM_WORLD, 1);
            break;
    }
    return inner_envelope;
}

baptiste.coudray's avatar
baptiste.coudray committed
extern envelope_t get_outer_envelope(struct dispatch_context *dc, struct futhark_context *fc, int thickness) {
    envelope_t inner_envelope = get_inner_envelope(dc, fc, thickness);
    envelope_t outer_envelope = {0};
    switch (dc->n_dimensions) {
        case 1:
            outer_envelope = get_outer_envelope_1d(dc, thickness, &inner_envelope);
            break;
        case 2:
            outer_envelope = get_outer_envelope_2d(dc, thickness, &inner_envelope);
            break;
        case 3:
            break;
        default:
            fprintf(stderr, "Invalid dimensions size.");
            MPI_Abort(MPI_COMM_WORLD, 1);
            break;
    }
    envelope_free(&inner_envelope);
    return outer_envelope;
}

static void chunk_data_to_data(struct dispatch_context *dc, void *chunk_data, void *data, int rank) {
    int y = dc->chunks_info[rank].y;
    int x = dc->chunks_info[rank].x;
    uint8_t *data8 = (uint8_t *) data;
    uint8_t *chunk_data8 = (uint8_t *) chunk_data;

    for (int i = 0; i < dc->chunks_info[rank].dimensions[0]; ++i) {
baptiste.coudray's avatar
baptiste.coudray committed
        for (int j = 0; j < dc->chunks_info[rank].dimensions[1]; ++j) {
baptiste.coudray's avatar
baptiste.coudray committed
            uint8_t *src = chunk_data8 + (INDEX_2D_TO_1D(i, j, dc->chunks_info[rank].dimensions[1]) * dc->type);
            uint8_t *dst = data8 + (INDEX_2D_TO_1D(y + i, x + j, dc->data_dimensions[1]) * dc->type);
baptiste.coudray's avatar
baptiste.coudray committed
            memcpy(dst, src, dc->type);
        }
baptiste.coudray's avatar
baptiste.coudray committed
extern void *get_data(struct dispatch_context *dc) {
    void *data = NULL;
    if (dc->my_rank == ROOT_RANK) {
baptiste.coudray's avatar
baptiste.coudray committed
        data = calloc(dc->count, (size_t) dc->type);
        chunk_data_to_data(dc, dc->chunk_info->data, data, dc->my_rank);
        for (int i = 0; i < dc->world_size; ++i) {
            if (i != dc->my_rank) {
baptiste.coudray's avatar
baptiste.coudray committed
                chunk_info_allocate_data(&dc->chunks_info[i], dc->type);
baptiste.coudray's avatar
baptiste.coudray committed
                MPI_Recv(dc->chunks_info[i].data, (int) dc->chunks_info[i].count, dc->datatype, i, CHUNK_DATA_TAG,
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE);
                chunk_data_to_data(dc, dc->chunks_info[i].data, data, i);
baptiste.coudray's avatar
baptiste.coudray committed
                chunk_info_free(&dc->chunks_info[i]);
baptiste.coudray's avatar
baptiste.coudray committed
        MPI_Send(dc->chunk_info->data, (int) dc->chunk_info->count, dc->datatype, ROOT_RANK, CHUNK_DATA_TAG,
                 MPI_COMM_WORLD);
    }
    return data;
}

baptiste.coudray's avatar
baptiste.coudray committed
extern chunk_info_t get_chunk_info(struct dispatch_context *dc) {
    return *dc->chunk_info;
}

baptiste.coudray's avatar
baptiste.coudray committed
extern void dispatch_context_free(struct dispatch_context *dc) {
baptiste.coudray's avatar
baptiste.coudray committed
    chunk_info_free(dc->chunk_info);
    free(dc->chunks_info);
    MPI_Comm_free(&dc->communicators[2]);
    MPI_Comm_free(&dc->communicators[1]);
    MPI_Comm_free(&dc->communicators[0]);
}

static void set_active_domain_1d(struct dispatch_context *dc, struct futhark_context *fc, int dimensions[1], int x) {
    chunk_info_init(&dc->active_domain, dc->type, dimensions, 0, x, 0, true);
    int64_t dimensions64[1];
    dimensions64[0] = dimensions[0] * dc->type;

    struct futhark_u8_1d *fut_chunk_data = futhark_new_u8_1d(fc, dc->chunk_info->data, dimensions64[0] * dc->type);
    struct futhark_i64_1d *fut_dimension = futhark_new_i64_1d(fc, dimensions64, 1);

    struct futhark_u8_1d *subdomain;
    futhark_context_sync(fc);
    futhark_entry_get_subdomain_1d(fc, &subdomain, fut_chunk_data, x * dc->type, fut_dimension);
    futhark_context_sync(fc);
    futhark_values_u8_1d(fc, subdomain, dc->active_domain.data);
    futhark_context_sync(fc);

    futhark_free_u8_1d(fc, fut_chunk_data);
    futhark_free_i64_1d(fc, fut_dimension);
    futhark_free_u8_1d(fc, subdomain);
}
static void
set_active_domain_2d(struct dispatch_context *dc, struct futhark_context *fc, int dimensions[2], int y, int x) {
    chunk_info_init(&dc->active_domain, dc->type, dimensions, y, x, 0, true);

    int64_t dimensions64[2];
    dimensions64[0] = dimensions[0];
    dimensions64[1] = dimensions[1] * dc->type;

    struct futhark_u8_2d *fut_chunk_data = futhark_new_u8_2d(fc, dc->chunk_info->data, dimensions64[0],
                                                             dimensions64[1]);

    struct futhark_i64_1d *fut_dimensions = futhark_new_i64_1d(fc, dimensions64, 2);
    struct futhark_u8_2d *subdomain;

    futhark_context_sync(fc);
    futhark_entry_get_subdomain_2d(fc, &subdomain, fut_chunk_data, y, x * dc->type, fut_dimensions);

    futhark_context_sync(fc);
    futhark_values_u8_2d(fc, subdomain, dc->active_domain.data);
    futhark_context_sync(fc);

    futhark_free_u8_2d(fc, fut_chunk_data);
    futhark_free_i64_1d(fc, fut_dimensions);
    futhark_free_u8_2d(fc, subdomain);
}

static void
set_active_domain_3d(struct dispatch_context *dc, struct futhark_context *fc, int dimensions[3], int y, int x, int z) {
    chunk_info_init(&dc->active_domain, dc->type, dimensions, y, x, z, true);

    int64_t dimensions64[3];
    dimensions64[0] = dimensions[0];
    dimensions64[1] = dimensions[1] * dc->type;
    dimensions64[2] = dimensions[2];

    struct futhark_u8_3d *fut_chunk_data = futhark_new_u8_3d(fc, dc->chunk_info->data, dimensions64[0],
                                                             dimensions64[1], dimensions64[2]);

    struct futhark_i64_1d *fut_dimensions = futhark_new_i64_1d(fc, dimensions64, 3);
    struct futhark_u8_3d *subdomain;

    futhark_context_sync(fc);
    futhark_entry_get_subdomain_3d(fc, &subdomain, fut_chunk_data, y, x * dc->type, z, fut_dimensions);

    futhark_context_sync(fc);
    futhark_values_u8_3d(fc, subdomain, dc->active_domain.data);
    futhark_context_sync(fc);

    futhark_free_u8_3d(fc, fut_chunk_data);
    futhark_free_i64_1d(fc, fut_dimensions);
    futhark_free_u8_3d(fc, subdomain);
}

extern void
set_active_domain(struct dispatch_context *dc, struct futhark_context *fc, int *dimensions, int y, int x, int z) {
    switch (dc->n_dimensions) {
        case 1:
            set_active_domain_1d(dc, fc, dimensions, x);
            break;
        case 2:
            set_active_domain_2d(dc, fc, dimensions, y, x);
            break;
        case 3:
            set_active_domain_3d(dc, fc, dimensions, y, x, z);
            break;
        default:
            break;
    }
}

extern void restore_active_domain(struct dispatch_context *dc) {
    chunk_info_free(&dc->active_domain);
    dc->active_domain = *dc->chunk_info;
}

baptiste.coudray's avatar
baptiste.coudray committed
extern void envelope_free(envelope_t *envelope) {
baptiste.coudray's avatar
baptiste.coudray committed
    chunk_info_free(&envelope->north);
    chunk_info_free(&envelope->north_east);
    chunk_info_free(&envelope->east);
    chunk_info_free(&envelope->south_east);
    chunk_info_free(&envelope->south_west);
    chunk_info_free(&envelope->west);
    chunk_info_free(&envelope->north_west);