Updated bench

6b15ec66 · baptiste.coudray · 96d63367 · 6b15ec66 · 6b15ec66
Verified Commit 6b15ec66 authored 3 years ago by baptiste.coudray
--- a/elementary/benchmark/main.c
+++ b/elementary/benchmark/main.c
@@ -17,7 +17,7 @@
 #define N_MEASURES 15
 #if defined(FUTHARK_BACKEND_cuda) || defined(FUTHARK_BACKEND_opencl)
-#define N_ITERATIONS 300
+#define N_ITERATIONS 500
 #else
 #define N_ITERATIONS 100
 #endif
@@ -29,18 +29,26 @@ void init_chunk_elems(chunk_info_t *ci) {
    }
 }
-void compute_next_chunk_elems(struct dispatch_context *dc, struct futhark_context *fc, chunk_info_t *ci) {
+double compute_next_chunk_elems(struct dispatch_context *dc, struct futhark_context *fc, chunk_info_t *ci) {
    struct futhark_i8_1d *fut_chunk_with_envelope = get_chunk_with_envelope(dc, fc, 1, futhark_new_i8_1d);
+    // Warmup
    struct futhark_i8_1d *fut_next_chunk_elems;
    futhark_entry_next_chunk_elems(fc, &fut_next_chunk_elems, fut_chunk_with_envelope);
    futhark_context_sync(fc);
+    double start = MPI_Wtime();
+    for (int i = 0; i < N_ITERATIONS; ++i) {
+        futhark_entry_next_chunk_elems(fc, &fut_next_chunk_elems, fut_chunk_with_envelope);
+        futhark_context_sync(fc);
+    }
    futhark_values_i8_1d(fc, fut_next_chunk_elems, ci->data);
    futhark_context_sync(fc);
+    double finish = MPI_Wtime();
    futhark_free_i8_1d(fc, fut_chunk_with_envelope);
    futhark_free_i8_1d(fc, fut_next_chunk_elems);
+    return finish - start;
 }
 int main(int argc, char *argv[]) {
@@ -74,13 +82,9 @@ int main(int argc, char *argv[]) {
    init_chunk_elems(&ci);
    for (int i = 0; i < N_MEASURES; ++i) {
-        double start = MPI_Wtime();
+        double time = compute_next_chunk_elems(disp_context, fut_context, &ci);
-        for (int j = 0; j < N_ITERATIONS; ++j) {
-            compute_next_chunk_elems(disp_context, fut_context, &ci);
-        }
-        double finish = MPI_Wtime();
        if (my_rank == ROOT_RANK) {
-            printf("%d;%d;%d;%f\n", world_size, nb_devices, elems_n, finish - start);
+            printf("%d;%d;%d;%f\n", world_size, nb_devices, elems_n, time);
        }
    }

--- a/slurm_gen.py
+++ b/slurm_gen.py
@@ -10,7 +10,7 @@ def slurm_gpu(file, version, ntasks, mem_per_cpu, time):
 #SBATCH --ntasks={ntasks}
 #SBATCH --cpus-per-task=1
 #SBATCH --partition=shared-gpu
-#SBATCH --gpus=ampere:{min(ntasks, 2)}
+#SBATCH --gpus=rtx:{min(ntasks, 8)}
 #SBATCH --mem-per-cpu={mem_per_cpu}
 #SBATCH --time={time}