/** * Author: Baptiste Coudray * School: HEPIA * Class: ITI-3 * Year: 2020-2021 */ #include <stdio.h> #include <stddef.h> #include "stdlib.h" #include <mpi.h> #include "lbm.h" #include "../futhark_mpi/dispatch.h" #define N_MEASURES 15 #if defined(FUTHARK_BACKEND_cuda) || defined(FUTHARK_BACKEND_opencl) #define N_ITERATIONS 3000 #else #define N_ITERATIONS 50 #endif #define NB_VALUES 27 #define ROOT_RANK 0 typedef struct lbm_values { float values[NB_VALUES]; } lbm_values_t; void init_chunk_lbm(chunk_info_t *ci) { lbm_values_t *data32 = ci->data; for (size_t i = 0; i < ci->count; ++i) { for (int l = 0; l < NB_VALUES; ++l) { data32[i].values[l] = ((float) rand() / (float) rand()); } } } struct futhark_f32_4d * convert_chunk_with_envelope(struct futhark_context *fc, const void *data, int64_t dim0, int64_t dim1, int64_t dim2) { return futhark_new_f32_4d(fc, data, dim0, dim1, dim2, NB_VALUES); } double compute_next_lbm(struct dispatch_context *dc, struct futhark_context *fc, chunk_info_t *ci) { double start = MPI_Wtime(); struct futhark_f32_4d *fut_chunk_with_envelope = get_chunk_with_envelope(dc, fc, 1, convert_chunk_with_envelope); struct futhark_f32_4d *fut_next_chunk_lbm; for (int i = 0; i < N_ITERATIONS; ++i) { futhark_entry_next_chunk_lbm(fc, &fut_next_chunk_lbm, fut_chunk_with_envelope); futhark_context_sync(fc); if (i + 1 < N_ITERATIONS) { futhark_free_f32_4d(fc, fut_next_chunk_lbm); futhark_context_sync(fc); } } futhark_values_f32_4d(fc, fut_next_chunk_lbm, ci->data); futhark_context_sync(fc); futhark_free_f32_4d(fc, fut_next_chunk_lbm); futhark_free_f32_4d(fc, fut_chunk_with_envelope); double finish = MPI_Wtime(); return finish - start; } int main(int argc, char *argv[]) { if (argc < 5) { printf("usage: mpirun -n <nb_proc> %s <nb_devices> <height> <width> <depth>\n", argv[0]); return EXIT_FAILURE; } MPI_Init(&argc, &argv); int my_rank; int world_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &world_size); struct futhark_context_config *fut_config = futhark_context_config_new(); int nb_devices = atoi(argv[1]); #if defined(FUTHARK_BACKEND_cuda) || defined(FUTHARK_BACKEND_opencl) #if defined(FUTHARK_BACKEND_opencl) futhark_context_config_list_devices(fut_config); #endif char device[4] = {0}; snprintf(device, sizeof(device), "#%d", my_rank % nb_devices); futhark_context_config_set_device(fut_config, "AMD"); #endif struct futhark_context *fut_context = futhark_context_new(fut_config); int lbm_dimensions[3] = {atoi(argv[2]), atoi(argv[3]), atoi(argv[4])}; int count = 1; int block_lengths[] = {NB_VALUES}; MPI_Aint displacements[] = {offsetof(struct lbm_values, values)}; MPI_Datatype types[] = {MPI_FLOAT}; MPI_Datatype lbm_type = create_type(count, block_lengths, displacements, types); struct dispatch_context *disp_context = dispatch_context_new(lbm_dimensions, lbm_type, 3); chunk_info_t ci = get_chunk_info(disp_context); init_chunk_lbm(&ci); for (int i = 0; i < N_MEASURES; ++i) { double time = compute_next_lbm(disp_context, fut_context, &ci); if (my_rank == ROOT_RANK) { printf("%d;%d;%d;%f\n", world_size, nb_devices, lbm_dimensions[0], time); } } dispatch_context_free(disp_context); futhark_context_config_free(fut_config); futhark_context_free(fut_context); MPI_Type_free(&lbm_type); return MPI_Finalize(); }