Skip to content
Snippets Groups Projects
Verified Commit fe5f2420 authored by raphael.bach's avatar raphael.bach
Browse files

Add `gpu` member to `struct fmpi_data`

parent 143c4592
No related branches found
No related tags found
No related merge requests found
...@@ -47,7 +47,8 @@ typedef struct fmpi_data { ...@@ -47,7 +47,8 @@ typedef struct fmpi_data {
size_t size; //!< Total size `(cnt * type.size)`. size_t size; //!< Total size `(cnt * type.size)`.
size_t dim_len[FMPI_DIM_MAX]; //!< Length of each dimension. size_t dim_len[FMPI_DIM_MAX]; //!< Length of each dimension.
size_t dim_cnt; //!< Number of dimensions. size_t dim_cnt; //!< Number of dimensions.
void * raw; //!< Pointer to the start of the data. void * raw; //!< Pointer to user data.
void * gpu; //!< Pointer to data returned by the GPU.
} fmpi_data; } fmpi_data;
/*============================================================================== /*==============================================================================
PUBLIC FUNCTION PUBLIC FUNCTION
......
...@@ -59,7 +59,6 @@ ...@@ -59,7 +59,6 @@
typedef struct fmpi_task_args { typedef struct fmpi_task_args {
struct fmpi_data in[FMPI_TASK_ARGS_MAX]; //!< TODO struct fmpi_data in[FMPI_TASK_ARGS_MAX]; //!< TODO
struct fmpi_data out; //!< TODO struct fmpi_data out; //!< TODO
void * out_raw;
size_t cnt; //!< TODO size_t cnt; //!< TODO
} fmpi_task_args; } fmpi_task_args;
/*------------------------------------------------------------------------------ /*------------------------------------------------------------------------------
......
...@@ -51,15 +51,15 @@ _Pragma("GCC diagnostic ignored \"-Wcast-qual\"")\ ...@@ -51,15 +51,15 @@ _Pragma("GCC diagnostic ignored \"-Wcast-qual\"")\
if(args->out.type.derived == FMPI_TYPE_ARRAY) { \ if(args->out.type.derived == FMPI_TYPE_ARRAY) { \
if(args->out.dim_cnt == 1) { \ if(args->out.dim_cnt == 1) { \
CPL_MAP_FIXED(FMPI_PRIV_TASK_RET_FUNC, CPL_EMPTY, \ CPL_MAP_FIXED(FMPI_PRIV_TASK_RET_FUNC, CPL_EMPTY, \
(FUNC, N, 1, ctx->fut->ctx, args->out_raw, args->in), FMPI_TYPE_REAL) \ (FUNC, N, 1, ctx->fut->ctx, args->out.gpu, args->in), FMPI_TYPE_REAL) \
} \ } \
if(args->out.dim_cnt == 2) { \ if(args->out.dim_cnt == 2) { \
CPL_MAP_FIXED(FMPI_PRIV_TASK_RET_FUNC, CPL_EMPTY, \ CPL_MAP_FIXED(FMPI_PRIV_TASK_RET_FUNC, CPL_EMPTY, \
(FUNC, N, 2, ctx->fut->ctx, args->out_raw, args->in), FMPI_TYPE_REAL) \ (FUNC, N, 2, ctx->fut->ctx, args->out.gpu, args->in), FMPI_TYPE_REAL) \
} \ } \
if(args->out.dim_cnt == 3) { \ if(args->out.dim_cnt == 3) { \
CPL_MAP_FIXED(FMPI_PRIV_TASK_RET_FUNC, CPL_EMPTY, \ CPL_MAP_FIXED(FMPI_PRIV_TASK_RET_FUNC, CPL_EMPTY, \
(FUNC, N, 3, ctx->fut->ctx, args->out_raw, args->in), FMPI_TYPE_REAL) \ (FUNC, N, 3, ctx->fut->ctx, args->out.gpu, args->in), FMPI_TYPE_REAL) \
} \ } \
} \ } \
return futhark_entry_##FUNC(ctx->fut->ctx, args->out.raw, FMPI_PRIV_TASK_ARGS_##N(args->in)); \ return futhark_entry_##FUNC(ctx->fut->ctx, args->out.raw, FMPI_PRIV_TASK_ARGS_##N(args->in)); \
...@@ -80,7 +80,6 @@ _Pragma("GCC diagnostic warning \"-Wincompatible-pointer-types\"")\ ...@@ -80,7 +80,6 @@ _Pragma("GCC diagnostic warning \"-Wincompatible-pointer-types\"")\
#define FMPI_PRIV_TASK_REGISTER_1(FUNC, TYPE, ctx, stencil, arg_out) \ #define FMPI_PRIV_TASK_REGISTER_1(FUNC, TYPE, ctx, stencil, arg_out) \
fmpi_task_register_##TYPE((ctx), FUNC##_0, #FUNC, (stencil), &(struct fmpi_task_args){ \ fmpi_task_register_##TYPE((ctx), FUNC##_0, #FUNC, (stencil), &(struct fmpi_task_args){ \
.out = (arg_out), \ .out = (arg_out), \
.out_raw = NULL, \
.cnt = 0 \ .cnt = 0 \
}) })
...@@ -88,42 +87,41 @@ _Pragma("GCC diagnostic warning \"-Wincompatible-pointer-types\"")\ ...@@ -88,42 +87,41 @@ _Pragma("GCC diagnostic warning \"-Wincompatible-pointer-types\"")\
fmpi_task_register_##TYPE((ctx), FUNC##_##N, #FUNC, (stencil), &(struct fmpi_task_args){ \ fmpi_task_register_##TYPE((ctx), FUNC##_##N, #FUNC, (stencil), &(struct fmpi_task_args){ \
.in = {__VA_ARGS__}, \ .in = {__VA_ARGS__}, \
.out = (arg_out), \ .out = (arg_out), \
.out_raw = NULL, \
.cnt = N \ .cnt = N \
}) })
#define FMPI_PRIV_TASK_ARGS_1(args_in) (args_in)[0].raw #define FMPI_PRIV_TASK_ARGS_1(args_in) (args_in)[0].gpu
#define FMPI_PRIV_TASK_ARGS_2(args_in) FMPI_PRIV_TASK_ARGS_1(args_in), (args_in)[1].raw #define FMPI_PRIV_TASK_ARGS_2(args_in) FMPI_PRIV_TASK_ARGS_1(args_in), (args_in)[1].gpu
#define FMPI_PRIV_TASK_ARGS_3(args_in) FMPI_PRIV_TASK_ARGS_2(args_in), (args_in)[2].raw #define FMPI_PRIV_TASK_ARGS_3(args_in) FMPI_PRIV_TASK_ARGS_2(args_in), (args_in)[2].gpu
#define FMPI_PRIV_TASK_ARGS_4(args_in) FMPI_PRIV_TASK_ARGS_3(args_in), (args_in)[3].raw #define FMPI_PRIV_TASK_ARGS_4(args_in) FMPI_PRIV_TASK_ARGS_3(args_in), (args_in)[3].gpu
#define FMPI_PRIV_TASK_ARGS_5(args_in) FMPI_PRIV_TASK_ARGS_4(args_in), (args_in)[4].raw #define FMPI_PRIV_TASK_ARGS_5(args_in) FMPI_PRIV_TASK_ARGS_4(args_in), (args_in)[4].gpu
#define FMPI_PRIV_TASK_ARGS_6(args_in) FMPI_PRIV_TASK_ARGS_5(args_in), (args_in)[5].raw #define FMPI_PRIV_TASK_ARGS_6(args_in) FMPI_PRIV_TASK_ARGS_5(args_in), (args_in)[5].gpu
#define FMPI_PRIV_TASK_ARGS_7(args_in) FMPI_PRIV_TASK_ARGS_6(args_in), (args_in)[6].raw #define FMPI_PRIV_TASK_ARGS_7(args_in) FMPI_PRIV_TASK_ARGS_6(args_in), (args_in)[6].gpu
#define FMPI_PRIV_TASK_ARGS_8(args_in) FMPI_PRIV_TASK_ARGS_7(args_in), (args_in)[7].raw #define FMPI_PRIV_TASK_ARGS_8(args_in) FMPI_PRIV_TASK_ARGS_7(args_in), (args_in)[7].gpu
#define FMPI_PRIV_TASK_ARGS_9(args_in) FMPI_PRIV_TASK_ARGS_8(args_in), (args_in)[8].raw #define FMPI_PRIV_TASK_ARGS_9(args_in) FMPI_PRIV_TASK_ARGS_8(args_in), (args_in)[8].gpu
#define FMPI_PRIV_TASK_ARGS_10(args_in) FMPI_PRIV_TASK_ARGS_9(args_in), (args_in)[9].raw #define FMPI_PRIV_TASK_ARGS_10(args_in) FMPI_PRIV_TASK_ARGS_9(args_in), (args_in)[9].gpu
#define FMPI_PRIV_TASK_ARGS_11(args_in) FMPI_PRIV_TASK_ARGS_10(args_in), (args_in)[10].raw #define FMPI_PRIV_TASK_ARGS_11(args_in) FMPI_PRIV_TASK_ARGS_10(args_in), (args_in)[10].gpu
#define FMPI_PRIV_TASK_ARGS_12(args_in) FMPI_PRIV_TASK_ARGS_11(args_in), (args_in)[11].raw #define FMPI_PRIV_TASK_ARGS_12(args_in) FMPI_PRIV_TASK_ARGS_11(args_in), (args_in)[11].gpu
#define FMPI_PRIV_TASK_ARGS_13(args_in) FMPI_PRIV_TASK_ARGS_12(args_in), (args_in)[12].raw #define FMPI_PRIV_TASK_ARGS_13(args_in) FMPI_PRIV_TASK_ARGS_12(args_in), (args_in)[12].gpu
#define FMPI_PRIV_TASK_ARGS_14(args_in) FMPI_PRIV_TASK_ARGS_13(args_in), (args_in)[13].raw #define FMPI_PRIV_TASK_ARGS_14(args_in) FMPI_PRIV_TASK_ARGS_13(args_in), (args_in)[13].gpu
#define FMPI_PRIV_TASK_ARGS_15(args_in) FMPI_PRIV_TASK_ARGS_14(args_in), (args_in)[14].raw #define FMPI_PRIV_TASK_ARGS_15(args_in) FMPI_PRIV_TASK_ARGS_14(args_in), (args_in)[14].gpu
#define FMPI_PRIV_TASK_ARGS_16(args_in) FMPI_PRIV_TASK_ARGS_15(args_in), (args_in)[15].raw #define FMPI_PRIV_TASK_ARGS_16(args_in) FMPI_PRIV_TASK_ARGS_15(args_in), (args_in)[15].gpu
#define FMPI_PRIV_TASK_ARGS_17(args_in) FMPI_PRIV_TASK_ARGS_16(args_in), (args_in)[16].raw #define FMPI_PRIV_TASK_ARGS_17(args_in) FMPI_PRIV_TASK_ARGS_16(args_in), (args_in)[16].gpu
#define FMPI_PRIV_TASK_ARGS_18(args_in) FMPI_PRIV_TASK_ARGS_17(args_in), (args_in)[17].raw #define FMPI_PRIV_TASK_ARGS_18(args_in) FMPI_PRIV_TASK_ARGS_17(args_in), (args_in)[17].gpu
#define FMPI_PRIV_TASK_ARGS_19(args_in) FMPI_PRIV_TASK_ARGS_18(args_in), (args_in)[18].raw #define FMPI_PRIV_TASK_ARGS_19(args_in) FMPI_PRIV_TASK_ARGS_18(args_in), (args_in)[18].gpu
#define FMPI_PRIV_TASK_ARGS_20(args_in) FMPI_PRIV_TASK_ARGS_19(args_in), (args_in)[19].raw #define FMPI_PRIV_TASK_ARGS_20(args_in) FMPI_PRIV_TASK_ARGS_19(args_in), (args_in)[19].gpu
#define FMPI_PRIV_TASK_ARGS_21(args_in) FMPI_PRIV_TASK_ARGS_20(args_in), (args_in)[20].raw #define FMPI_PRIV_TASK_ARGS_21(args_in) FMPI_PRIV_TASK_ARGS_20(args_in), (args_in)[20].gpu
#define FMPI_PRIV_TASK_ARGS_22(args_in) FMPI_PRIV_TASK_ARGS_21(args_in), (args_in)[21].raw #define FMPI_PRIV_TASK_ARGS_22(args_in) FMPI_PRIV_TASK_ARGS_21(args_in), (args_in)[21].gpu
#define FMPI_PRIV_TASK_ARGS_23(args_in) FMPI_PRIV_TASK_ARGS_22(args_in), (args_in)[22].raw #define FMPI_PRIV_TASK_ARGS_23(args_in) FMPI_PRIV_TASK_ARGS_22(args_in), (args_in)[22].gpu
#define FMPI_PRIV_TASK_ARGS_24(args_in) FMPI_PRIV_TASK_ARGS_23(args_in), (args_in)[23].raw #define FMPI_PRIV_TASK_ARGS_24(args_in) FMPI_PRIV_TASK_ARGS_23(args_in), (args_in)[23].gpu
#define FMPI_PRIV_TASK_ARGS_25(args_in) FMPI_PRIV_TASK_ARGS_24(args_in), (args_in)[24].raw #define FMPI_PRIV_TASK_ARGS_25(args_in) FMPI_PRIV_TASK_ARGS_24(args_in), (args_in)[24].gpu
#define FMPI_PRIV_TASK_ARGS_26(args_in) FMPI_PRIV_TASK_ARGS_25(args_in), (args_in)[25].raw #define FMPI_PRIV_TASK_ARGS_26(args_in) FMPI_PRIV_TASK_ARGS_25(args_in), (args_in)[25].gpu
#define FMPI_PRIV_TASK_ARGS_27(args_in) FMPI_PRIV_TASK_ARGS_26(args_in), (args_in)[26].raw #define FMPI_PRIV_TASK_ARGS_27(args_in) FMPI_PRIV_TASK_ARGS_26(args_in), (args_in)[26].gpu
#define FMPI_PRIV_TASK_ARGS_28(args_in) FMPI_PRIV_TASK_ARGS_27(args_in), (args_in)[27].raw #define FMPI_PRIV_TASK_ARGS_28(args_in) FMPI_PRIV_TASK_ARGS_27(args_in), (args_in)[27].gpu
#define FMPI_PRIV_TASK_ARGS_29(args_in) FMPI_PRIV_TASK_ARGS_28(args_in), (args_in)[28].raw #define FMPI_PRIV_TASK_ARGS_29(args_in) FMPI_PRIV_TASK_ARGS_28(args_in), (args_in)[28].gpu
#define FMPI_PRIV_TASK_ARGS_30(args_in) FMPI_PRIV_TASK_ARGS_29(args_in), (args_in)[29].raw #define FMPI_PRIV_TASK_ARGS_30(args_in) FMPI_PRIV_TASK_ARGS_29(args_in), (args_in)[29].gpu
#define FMPI_PRIV_TASK_ARGS_31(args_in) FMPI_PRIV_TASK_ARGS_30(args_in), (args_in)[30].raw #define FMPI_PRIV_TASK_ARGS_31(args_in) FMPI_PRIV_TASK_ARGS_30(args_in), (args_in)[30].gpu
#define FMPI_PRIV_TASK_ARGS_32(args_in) FMPI_PRIV_TASK_ARGS_31(args_in), (args_in)[31].raw #define FMPI_PRIV_TASK_ARGS_32(args_in) FMPI_PRIV_TASK_ARGS_31(args_in), (args_in)[31].gpu
#define FMPI_PRIV_TASK_REGISTER_2(...) FMPI_PRIV_TASK_REGISTER_N(1, __VA_ARGS__) #define FMPI_PRIV_TASK_REGISTER_2(...) FMPI_PRIV_TASK_REGISTER_N(1, __VA_ARGS__)
#define FMPI_PRIV_TASK_REGISTER_3(...) FMPI_PRIV_TASK_REGISTER_N(2, __VA_ARGS__) #define FMPI_PRIV_TASK_REGISTER_3(...) FMPI_PRIV_TASK_REGISTER_N(2, __VA_ARGS__)
......
...@@ -49,7 +49,8 @@ struct fmpi_data fmpi_data_out_##T( \ ...@@ -49,7 +49,8 @@ struct fmpi_data fmpi_data_out_##T( \
.size = sizeof(T), \ .size = sizeof(T), \
.dim_len = {0, 0, 0}, \ .dim_len = {0, 0, 0}, \
.dim_cnt = 0, \ .dim_cnt = 0, \
.raw = data \ .raw = data, \
.gpu = NULL \
}; \ }; \
} }
...@@ -74,7 +75,8 @@ struct fmpi_data fmpi_data_##D##d_in_##T( \ ...@@ -74,7 +75,8 @@ struct fmpi_data fmpi_data_##D##d_in_##T( \
.size = cnt * sizeof(T), \ .size = cnt * sizeof(T), \
.dim_len = {x, y, z}, \ .dim_len = {x, y, z}, \
.dim_cnt = (D), \ .dim_cnt = (D), \
.raw = data \ .raw = data, \
.gpu = NULL \
}; \ }; \
} \ } \
struct fmpi_data fmpi_data_##D##d_out_##T( \ struct fmpi_data fmpi_data_##D##d_out_##T( \
...@@ -95,7 +97,8 @@ struct fmpi_data fmpi_data_##D##d_out_##T( \ ...@@ -95,7 +97,8 @@ struct fmpi_data fmpi_data_##D##d_out_##T( \
.size = cnt * sizeof(T), \ .size = cnt * sizeof(T), \
.dim_len = {x, y, z}, \ .dim_len = {x, y, z}, \
.dim_cnt = (D), \ .dim_cnt = (D), \
.raw = data \ .raw = data, \
.gpu = NULL \
}; \ }; \
} }
......
...@@ -65,20 +65,21 @@ struct fmpi_task fmpi_task_register_sync( ...@@ -65,20 +65,21 @@ struct fmpi_task fmpi_task_register_sync(
FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "fmpi_new_domain() failed!"); FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "fmpi_new_domain() failed!");
continue; continue;
} }
//! @todo Could fmpi_futhark_new_data_async() could be called here? //! @todo Could fmpi_futhark_new_data_async() be called here instead?
void * data = fmpi_futhark_new_data_sync( void * gpu_data = fmpi_futhark_new_data_sync(
ctx->fut, task.domains[i].parts[rank].raw, task.domains[i].data.type.base, ctx->fut, task.domains[i].parts[rank].raw, task.domains[i].data.type.base,
task.domains[i].data.dim_cnt, task.domains[i].data.dim_cnt,
task.domains[i].parts[rank].dim_len[0], task.domains[i].parts[rank].dim_len[0],
task.domains[i].parts[rank].dim_len[1], task.domains[i].parts[rank].dim_len[1],
task.domains[i].parts[rank].dim_len[2] task.domains[i].parts[rank].dim_len[2]
); );
if(data == NULL) { if(gpu_data == NULL) {
FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", FMPI_RAISE_ERROR(ctx->err_handler, "FMPI",
"fmpi_futhark_new_data_sync() failed!" "fmpi_futhark_new_data_sync() failed!"
); );
} }
task.args.in[i].raw = data; task.domains[i].parts[rank].gpu = gpu_data;
task.args.in[i].gpu = gpu_data;
} }
ctx->tasks[ctx->task_cnt++] = task; ctx->tasks[ctx->task_cnt++] = task;
fmpi_futhark_sync(ctx->fut); fmpi_futhark_sync(ctx->fut);
...@@ -110,19 +111,20 @@ struct fmpi_task fmpi_task_register_async( ...@@ -110,19 +111,20 @@ struct fmpi_task fmpi_task_register_async(
FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "fmpi_new_domain() failed!"); FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "fmpi_new_domain() failed!");
continue; continue;
} }
void * data = fmpi_futhark_new_data_async( void * gpu_data = fmpi_futhark_new_data_async(
ctx->fut, task.domains[i].parts[rank].raw, task.domains[i].data.type.base, ctx->fut, task.domains[i].parts[rank].raw, task.domains[i].data.type.base,
task.domains[i].data.dim_cnt, task.domains[i].data.dim_cnt,
task.domains[i].parts[rank].dim_len[0], task.domains[i].parts[rank].dim_len[0],
task.domains[i].parts[rank].dim_len[1], task.domains[i].parts[rank].dim_len[1],
task.domains[i].parts[rank].dim_len[2] task.domains[i].parts[rank].dim_len[2]
); );
if(data == NULL) { if(gpu_data == NULL) {
FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", FMPI_RAISE_ERROR(ctx->err_handler, "FMPI",
"fmpi_futhark_new_data_async() failed!" "fmpi_futhark_new_data_async() failed!"
); );
} }
task.args.in[i].raw = data; task.domains[i].parts[rank].gpu = gpu_data;
task.args.in[i].gpu = gpu_data;
} }
ctx->tasks[ctx->task_cnt++] = task; ctx->tasks[ctx->task_cnt++] = task;
return task; return task;
...@@ -140,7 +142,7 @@ int fmpi_task_run_sync( ...@@ -140,7 +142,7 @@ int fmpi_task_run_sync(
fmpi_futhark_check_error(ctx->fut, "task->func"); fmpi_futhark_check_error(ctx->fut, "task->func");
if(task->args.out.type.derived == FMPI_TYPE_ARRAY) { if(task->args.out.type.derived == FMPI_TYPE_ARRAY) {
void * out = fmpi_futhark_get_data_sync( void * out = fmpi_futhark_get_data_sync(
ctx->fut, task->args.out_raw, task->args.out.raw, ctx->fut, task->args.out.gpu, task->args.out.raw,
task->args.out.type.base, task->args.out.dim_cnt task->args.out.type.base, task->args.out.dim_cnt
); );
if(out == NULL) { if(out == NULL) {
...@@ -149,7 +151,7 @@ int fmpi_task_run_sync( ...@@ -149,7 +151,7 @@ int fmpi_task_run_sync(
); );
} }
const int err = fmpi_futhark_free_data_sync( const int err = fmpi_futhark_free_data_sync(
ctx->fut, task->args.out_raw, task->args.out.type.base, ctx->fut, task->args.out.gpu, task->args.out.type.base,
task->args.out.dim_cnt task->args.out.dim_cnt
); );
if(err != 0) { if(err != 0) {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment