From 92977c50c6f8f6080405de8a0be8d6793794e871 Mon Sep 17 00:00:00 2001 From: "baptiste.coudray" <baptiste.coudray@etu.hesge.ch> Date: Tue, 18 May 2021 12:42:56 +0200 Subject: [PATCH] Recreated GoL --- futmpi/.gitignore | 128 + futmpi/CMakeLists.txt | 29 + futmpi/gfx.c | 95 + futmpi/gfx.h | 43 + futmpi/gol.c | 4707 +++++++++++++++ futmpi/gol.fut | 48 + futmpi/gol.h | 122 + futmpi/main.c | 346 ++ game_of_life/CMakeLists.txt | 55 - game_of_life/Makefile | 34 - game_of_life/README.md | 12 - game_of_life/futhark.pkg | 3 - game_of_life/gol.c | 5273 ----------------- game_of_life/gol.fut | 34 - game_of_life/gol.h | 120 - .../lib/github.com/athas/matte/.gitignore | 3 - .../lib/github.com/athas/matte/colour.fut | 186 - .../github.com/athas/matte/colour_test.fut | 17 - .../diku-dk/lys/Inconsolata-Regular.ttf | Bin 95960 -> 0 bytes .../lib/github.com/diku-dk/lys/common.mk | 36 - .../github.com/diku-dk/lys/context_setup.c | 50 - .../github.com/diku-dk/lys/context_setup.h | 29 - .../lib/github.com/diku-dk/lys/default.nix | 5 - .../lib/github.com/diku-dk/lys/gen_printf.py | 40 - .../lib/github.com/diku-dk/lys/genlys.fut | 41 - .../lib/github.com/diku-dk/lys/liblys.c | 269 - .../lib/github.com/diku-dk/lys/liblys.h | 61 - .../lib/github.com/diku-dk/lys/lys.fut | 366 -- .../lib/github.com/diku-dk/lys/main.c | 355 -- .../lib/github.com/diku-dk/lys/setup_flags.mk | 43 - game_of_life/libfpmpi.a | Bin 24728 -> 0 bytes game_of_life/main.c | 153 - lib/.gitignore | 383 -- lib/CMakeLists.txt | 29 - lib/Makefile | 18 - lib/benchmark/benchmark.c | 149 - lib/dispatch.c | 36 - lib/dispatch.h | 15 - lib/fp.c | 164 - lib/fp.h | 25 - lib/fpmpi.c | 546 -- lib/fpmpi.h | 61 - lib/tests/tests.c | 453 -- 43 files changed, 5518 insertions(+), 9064 deletions(-) create mode 100644 futmpi/.gitignore create mode 100644 futmpi/CMakeLists.txt create mode 100644 futmpi/gfx.c create mode 100644 futmpi/gfx.h create mode 100644 futmpi/gol.c create mode 100644 futmpi/gol.fut create mode 100644 futmpi/gol.h create mode 100644 futmpi/main.c delete mode 100644 game_of_life/CMakeLists.txt delete mode 100644 game_of_life/Makefile delete mode 100644 game_of_life/README.md delete mode 100644 game_of_life/futhark.pkg delete mode 100644 game_of_life/gol.c delete mode 100644 game_of_life/gol.fut delete mode 100644 game_of_life/gol.h delete mode 100644 game_of_life/lib/github.com/athas/matte/.gitignore delete mode 100644 game_of_life/lib/github.com/athas/matte/colour.fut delete mode 100644 game_of_life/lib/github.com/athas/matte/colour_test.fut delete mode 100644 game_of_life/lib/github.com/diku-dk/lys/Inconsolata-Regular.ttf delete mode 100644 game_of_life/lib/github.com/diku-dk/lys/common.mk delete mode 100644 game_of_life/lib/github.com/diku-dk/lys/context_setup.c delete mode 100644 game_of_life/lib/github.com/diku-dk/lys/context_setup.h delete mode 100644 game_of_life/lib/github.com/diku-dk/lys/default.nix delete mode 100644 game_of_life/lib/github.com/diku-dk/lys/gen_printf.py delete mode 100644 game_of_life/lib/github.com/diku-dk/lys/genlys.fut delete mode 100644 game_of_life/lib/github.com/diku-dk/lys/liblys.c delete mode 100644 game_of_life/lib/github.com/diku-dk/lys/liblys.h delete mode 100644 game_of_life/lib/github.com/diku-dk/lys/lys.fut delete mode 100644 game_of_life/lib/github.com/diku-dk/lys/main.c delete mode 100644 game_of_life/lib/github.com/diku-dk/lys/setup_flags.mk delete mode 100644 game_of_life/libfpmpi.a delete mode 100644 game_of_life/main.c delete mode 100644 lib/.gitignore delete mode 100644 lib/CMakeLists.txt delete mode 100644 lib/Makefile delete mode 100644 lib/benchmark/benchmark.c delete mode 100644 lib/dispatch.c delete mode 100644 lib/dispatch.h delete mode 100644 lib/fp.c delete mode 100644 lib/fp.h delete mode 100644 lib/fpmpi.c delete mode 100644 lib/fpmpi.h delete mode 100644 lib/tests/tests.c diff --git a/futmpi/.gitignore b/futmpi/.gitignore new file mode 100644 index 0000000..3c9d127 --- /dev/null +++ b/futmpi/.gitignore @@ -0,0 +1,128 @@ +### C template +# Prerequisites +*.d + +# Object files +*.o +*.ko +*.obj +*.elf + +# Linker output +*.ilk +*.map +*.exp + +# Precompiled Headers +*.gch +*.pch + +# Libraries +*.lib +*.a +*.la +*.lo + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex + +# Debug files +*.dSYM/ +*.su +*.idb +*.pdb + +# Kernel Module Compile Results +*.mod* +*.cmd +.tmp_versions/ +modules.order +Module.symvers +Mkfile.old +dkms.conf + +### JetBrains template +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +.idea diff --git a/futmpi/CMakeLists.txt b/futmpi/CMakeLists.txt new file mode 100644 index 0000000..56ff990 --- /dev/null +++ b/futmpi/CMakeLists.txt @@ -0,0 +1,29 @@ +cmake_minimum_required(VERSION 3.19) +project(futmpi C) + +set(CMAKE_C_STANDARD 11) + +if (CMAKE_BUILD_TYPE MATCHES Debug) + set(GCC_COMPILE_FLAGS "-DDEBUG -Wall -Wextra -Wconversion -pedantic -fsanitize=undefined -fsanitize=address") + if (CMAKE_SYSTEM_NAME MATCHES "Linux") + set(GCC_COMPILE_FLAGS "${GCC_COMPILE_FLAGS} -fsanitize=leak") + endif () +elseif (CMAKE_BUILD_TYPE MATCHES Release) + set(GCC_COMPILE_FLAGS "-O3") +endif () + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GCC_COMPILE_FLAGS}") + +if (CMAKE_SYSTEM_NAME MATCHES "Darwin") + include_directories(/usr/local/include) +endif () + +if (CMAKE_SYSTEM_NAME MATCHES "Linux") + execute_process(COMMAND sdl2-config --cflags OUTPUT_VARIABLE SDL2_C_FLAGS) +endif () + +find_package(MPI REQUIRED) +include_directories(${MPI_C_INCLUDE_PATH}) + +add_executable(futmpi main.c gol.h gol.c gfx.c gfx.h) +target_link_libraries(futmpi ${MPI_C_LIBRARIES} SDL2 m "-framework OpenCL") diff --git a/futmpi/gfx.c b/futmpi/gfx.c new file mode 100644 index 0000000..bc1ae9d --- /dev/null +++ b/futmpi/gfx.c @@ -0,0 +1,95 @@ +/// @file gfx.c +/// @author Florent Gluck +/// @date November 6, 2016 +/// Helper routines to render pixels in fullscreen graphic mode. +/// Uses the SDL2 library. + +#include "gfx.h" + +/// Create a fullscreen graphic window. +/// @param title Title of the window. +/// @param width Width of the window in pixels. +/// @param height Height of the window in pixels. +/// @return a pointer to the graphic context or NULL if it failed. +struct gfx_context_t *gfx_create(char *title, uint width, uint height) { + if (SDL_Init(SDL_INIT_VIDEO) != 0) goto error; + SDL_Window *window = SDL_CreateWindow(title, SDL_WINDOWPOS_CENTERED, + SDL_WINDOWPOS_CENTERED, width, height, SDL_WINDOW_RESIZABLE); + SDL_Renderer *renderer = SDL_CreateRenderer(window, -1, 0); + SDL_Texture *texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888, + SDL_TEXTUREACCESS_STREAMING, width, height); + uint32_t *pixels = malloc(width * height * sizeof(uint32_t)); + struct gfx_context_t *ctxt = malloc(sizeof(struct gfx_context_t)); + + if (!window || !renderer || !texture || !pixels || !ctxt) goto error; + + ctxt->renderer = renderer; + ctxt->texture = texture; + ctxt->window = window; + ctxt->width = width; + ctxt->height = height; + ctxt->pixels = pixels; + + SDL_ShowCursor(SDL_DISABLE); + gfx_clear(ctxt, COLOR_BLACK); + return ctxt; + + error: + return NULL; +} + +/// Draw a pixel in the specified graphic context. +/// @param ctxt Graphic context where the pixel is to be drawn. +/// @param x X coordinate of the pixel. +/// @param y Y coordinate of the pixel. +/// @param color Color of the pixel. +void gfx_putpixel(struct gfx_context_t *ctxt, int x, int y, uint32_t color) { + if (x < ctxt->width && y < ctxt->height) + ctxt->pixels[ctxt->width * y + x] = color; +} + +/// Clear the specified graphic context. +/// @param ctxt Graphic context to clear. +/// @param color Color to use. +void gfx_clear(struct gfx_context_t *ctxt, uint32_t color) { + int n = ctxt->width * ctxt->height; + while (n) + ctxt->pixels[--n] = color; +} + +/// Display the graphic context. +/// @param ctxt Graphic context to clear. +void gfx_present(struct gfx_context_t *ctxt) { + SDL_UpdateTexture(ctxt->texture, NULL, ctxt->pixels, ctxt->width * sizeof(uint32_t)); + SDL_RenderCopy(ctxt->renderer, ctxt->texture, NULL, NULL); + SDL_RenderPresent(ctxt->renderer); +} + +/// Destroy a graphic window. +/// @param ctxt Graphic context of the window to close. +void *gfx_destroy(struct gfx_context_t *ctxt) { + SDL_ShowCursor(SDL_ENABLE); + SDL_DestroyTexture(ctxt->texture); + SDL_DestroyRenderer(ctxt->renderer); + SDL_DestroyWindow(ctxt->window); + free(ctxt->pixels); + ctxt->texture = NULL; + ctxt->renderer = NULL; + ctxt->window = NULL; + ctxt->pixels = NULL; + SDL_Quit(); + free(ctxt); + return NULL; +} + +/// If a key was pressed, returns its key code (non blocking call). +/// List of key codes: https://wiki.libsdl.org/SDL_Keycode +/// SDL_PumpEvents() must be called before. +/// @return 0 if escape was not pressed. +SDL_Keycode gfx_keypressed() { + const Uint8 *state = SDL_GetKeyboardState(NULL); + if (state && state[SDL_SCANCODE_ESCAPE]) { + return SDLK_ESCAPE; + } + return 0; +} diff --git a/futmpi/gfx.h b/futmpi/gfx.h new file mode 100644 index 0000000..d6604ae --- /dev/null +++ b/futmpi/gfx.h @@ -0,0 +1,43 @@ +#ifndef _GFX_H_ +#define _GFX_H_ + +#include <stdint.h> +#include <SDL2/SDL.h> + +#define MAKE_COLOR(r, g, b) ((uint32_t)b|((uint32_t)g<<8)|((uint32_t)r<<16)) + +#define COLOR_BLACK 0x00000000 +#define COLOR_RED 0x00FF0000 +#define COLOR_GREEN 0x0000FF00 +#define COLOR_BLUE 0x000000FF +#define COLOR_WHITE 0x00FFFFFF +#define COLOR_YELLOW 0x00FFFF00 + +typedef unsigned int uint; +typedef unsigned long ulong; +typedef unsigned char uchar; + +struct gfx_context_t { + SDL_Window *window; + SDL_Renderer *renderer; + SDL_Texture *texture; + uint32_t *pixels; + int width; + int height; +}; + +extern void gfx_putpixel(struct gfx_context_t *ctxt, int x, int y, uint32_t color); + +extern void gfx_clear(struct gfx_context_t *ctxt, uint32_t color); + +extern struct gfx_context_t *gfx_create(char *text, uint width, uint height); + +extern void *gfx_destroy(struct gfx_context_t *ctxt); + +extern void gfx_present(struct gfx_context_t *ctxt); + +extern SDL_Keycode gfx_keypressed(); + +extern SDL_EventType poll_event(); + +#endif diff --git a/futmpi/gol.c b/futmpi/gol.c new file mode 100644 index 0000000..cc1a72e --- /dev/null +++ b/futmpi/gol.c @@ -0,0 +1,4707 @@ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wunused-function" +#pragma GCC diagnostic ignored "-Wunused-variable" +#pragma GCC diagnostic ignored "-Wparentheses" +#pragma GCC diagnostic ignored "-Wunused-label" +#pragma GCC diagnostic ignored "-Wunused-but-set-variable" +#endif +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wunused-function" +#pragma clang diagnostic ignored "-Wunused-variable" +#pragma clang diagnostic ignored "-Wparentheses" +#pragma clang diagnostic ignored "-Wunused-label" +#endif +// Headers + +#include <stdint.h> +#include <stddef.h> +#include <stdbool.h> +#include <stdio.h> +#include <float.h> +#define CL_TARGET_OPENCL_VERSION 120 +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#ifdef __APPLE__ +#define CL_SILENCE_DEPRECATION +#include <OpenCL/cl.h> +#else +#include <CL/cl.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// Initialisation + +struct futhark_context_config ; +struct futhark_context_config *futhark_context_config_new(void); +void futhark_context_config_free(struct futhark_context_config *cfg); +void futhark_context_config_add_build_option(struct futhark_context_config *cfg, + const char *opt); +void futhark_context_config_set_debugging(struct futhark_context_config *cfg, + int flag); +void futhark_context_config_set_profiling(struct futhark_context_config *cfg, + int flag); +void futhark_context_config_set_logging(struct futhark_context_config *cfg, + int flag); +void futhark_context_config_set_device(struct futhark_context_config *cfg, const + char *s); +void futhark_context_config_set_platform(struct futhark_context_config *cfg, + const char *s); +void +futhark_context_config_select_device_interactively(struct futhark_context_config *cfg); +void futhark_context_config_list_devices(struct futhark_context_config *cfg); +void futhark_context_config_dump_program_to(struct futhark_context_config *cfg, + const char *path); +void +futhark_context_config_load_program_from(struct futhark_context_config *cfg, + const char *path); +void futhark_context_config_dump_binary_to(struct futhark_context_config *cfg, + const char *path); +void futhark_context_config_load_binary_from(struct futhark_context_config *cfg, + const char *path); +void +futhark_context_config_set_default_group_size(struct futhark_context_config *cfg, + int size); +void +futhark_context_config_set_default_num_groups(struct futhark_context_config *cfg, + int num); +void +futhark_context_config_set_default_tile_size(struct futhark_context_config *cfg, + int num); +void +futhark_context_config_set_default_reg_tile_size(struct futhark_context_config *cfg, + int num); +void +futhark_context_config_set_default_threshold(struct futhark_context_config *cfg, + int num); +int futhark_context_config_set_size(struct futhark_context_config *cfg, const + char *size_name, size_t size_value); +struct futhark_context ; +struct futhark_context *futhark_context_new(struct futhark_context_config *cfg); +struct futhark_context +*futhark_context_new_with_command_queue(struct futhark_context_config *cfg, + cl_command_queue queue); +void futhark_context_free(struct futhark_context *ctx); +cl_command_queue futhark_context_get_command_queue(struct futhark_context *ctx); +int futhark_get_num_sizes(void); +const char *futhark_get_size_name(int); +const char *futhark_get_size_class(int); + +// Arrays + +struct futhark_i8_2d ; +struct futhark_i8_2d *futhark_new_i8_2d(struct futhark_context *ctx, const + int8_t *data, int64_t dim0, + int64_t dim1); +struct futhark_i8_2d *futhark_new_raw_i8_2d(struct futhark_context *ctx, const + cl_mem data, int offset, + int64_t dim0, int64_t dim1); +int futhark_free_i8_2d(struct futhark_context *ctx, struct futhark_i8_2d *arr); +int futhark_values_i8_2d(struct futhark_context *ctx, struct futhark_i8_2d *arr, + int8_t *data); +cl_mem futhark_values_raw_i8_2d(struct futhark_context *ctx, + struct futhark_i8_2d *arr); +const int64_t *futhark_shape_i8_2d(struct futhark_context *ctx, + struct futhark_i8_2d *arr); + +// Opaque values + + +// Entry points + +int futhark_entry_get_envelope(struct futhark_context *ctx, + struct futhark_i8_2d **out0, const + struct futhark_i8_2d *in0); +int futhark_entry_next_chunk_board(struct futhark_context *ctx, + struct futhark_i8_2d **out0, const + struct futhark_i8_2d *in0, const + struct futhark_i8_2d *in1); + +// Miscellaneous + +int futhark_context_sync(struct futhark_context *ctx); +char *futhark_context_report(struct futhark_context *ctx); +char *futhark_context_get_error(struct futhark_context *ctx); +void futhark_context_set_logging_file(struct futhark_context *ctx, FILE *f); +void futhark_context_pause_profiling(struct futhark_context *ctx); +void futhark_context_unpause_profiling(struct futhark_context *ctx); +int futhark_context_clear_caches(struct futhark_context *ctx); +#define FUTHARK_BACKEND_opencl +#ifdef __cplusplus +} +#endif +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <math.h> +#include <stdint.h> +#undef NDEBUG +#include <assert.h> +#include <stdarg.h> +// Start of util.h. +// +// Various helper functions that are useful in all generated C code. + +#include <errno.h> +#include <string.h> + +static const char *fut_progname = "(embedded Futhark)"; + +static void futhark_panic(int eval, const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + fprintf(stderr, "%s: ", fut_progname); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(eval); +} + +// For generating arbitrary-sized error messages. It is the callers +// responsibility to free the buffer at some point. +static char* msgprintf(const char *s, ...) { + va_list vl; + va_start(vl, s); + size_t needed = 1 + (size_t)vsnprintf(NULL, 0, s, vl); + char *buffer = (char*) malloc(needed); + va_start(vl, s); // Must re-init. + vsnprintf(buffer, needed, s, vl); + return buffer; +} + + +static inline void check_err(int errval, int sets_errno, const char *fun, int line, + const char *msg, ...) { + if (errval) { + char errnum[10]; + + va_list vl; + va_start(vl, msg); + + fprintf(stderr, "ERROR: "); + vfprintf(stderr, msg, vl); + fprintf(stderr, " in %s() at line %d with error code %s\n", + fun, line, + sets_errno ? strerror(errno) : errnum); + exit(errval); + } +} + +#define CHECK_ERR(err, msg...) check_err(err, 0, __func__, __LINE__, msg) +#define CHECK_ERRNO(err, msg...) check_err(err, 1, __func__, __LINE__, msg) + +// Read the rest of an open file into a NUL-terminated string; returns +// NULL on error. +static void* fslurp_file(FILE *f, size_t *size) { + size_t start = ftell(f); + fseek(f, 0, SEEK_END); + size_t src_size = ftell(f)-start; + fseek(f, start, SEEK_SET); + unsigned char *s = (unsigned char*) malloc(src_size + 1); + if (fread(s, 1, src_size, f) != src_size) { + free(s); + s = NULL; + } else { + s[src_size] = '\0'; + } + + if (size) { + *size = src_size; + } + + return s; +} + +// Read a file into a NUL-terminated string; returns NULL on error. +static void* slurp_file(const char *filename, size_t *size) { + FILE *f = fopen(filename, "rb"); // To avoid Windows messing with linebreaks. + if (f == NULL) return NULL; + unsigned char *s = fslurp_file(f, size); + fclose(f); + return s; +} + +// Dump 'n' bytes from 'buf' into the file at the designated location. +// Returns 0 on success. +static int dump_file(const char *file, const void *buf, size_t n) { + FILE *f = fopen(file, "w"); + + if (f == NULL) { + return 1; + } + + if (fwrite(buf, sizeof(char), n, f) != n) { + return 1; + } + + if (fclose(f) != 0) { + return 1; + } + + return 0; +} + +struct str_builder { + char *str; + size_t capacity; // Size of buffer. + size_t used; // Bytes used, *not* including final zero. +}; + +static void str_builder_init(struct str_builder *b) { + b->capacity = 10; + b->used = 0; + b->str = malloc(b->capacity); + b->str[0] = 0; +} + +static void str_builder(struct str_builder *b, const char *s, ...) { + va_list vl; + va_start(vl, s); + size_t needed = (size_t)vsnprintf(NULL, 0, s, vl); + + while (b->capacity < b->used + needed + 1) { + b->capacity *= 2; + b->str = realloc(b->str, b->capacity); + } + + va_start(vl, s); // Must re-init. + vsnprintf(b->str+b->used, b->capacity-b->used, s, vl); + b->used += needed; +} + +// End of util.h. + +// Start of timing.h. + +// The function get_wall_time() returns the wall time in microseconds +// (with an unspecified offset). + +#ifdef _WIN32 + +#include <windows.h> + +static int64_t get_wall_time(void) { + LARGE_INTEGER time,freq; + assert(QueryPerformanceFrequency(&freq)); + assert(QueryPerformanceCounter(&time)); + return ((double)time.QuadPart / freq.QuadPart) * 1000000; +} + +#else +// Assuming POSIX + +#include <time.h> +#include <sys/time.h> + +static int64_t get_wall_time(void) { + struct timeval time; + assert(gettimeofday(&time,NULL) == 0); + return time.tv_sec * 1000000 + time.tv_usec; +} + +static int64_t get_wall_time_ns(void) { + struct timespec time; + assert(clock_gettime(CLOCK_REALTIME, &time) == 0); + return time.tv_sec * 1000000000 + time.tv_nsec; +} + +#endif + +// End of timing.h. + +#ifdef _MSC_VER +#define inline __inline +#endif +#include <string.h> +#include <string.h> +#include <errno.h> +#include <assert.h> +#include <ctype.h> +#define CL_TARGET_OPENCL_VERSION 120 +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#ifdef __APPLE__ +#define CL_SILENCE_DEPRECATION +#include <OpenCL/cl.h> +#else +#include <CL/cl.h> +#endif + +// Start of lock.h. + +// A very simple cross-platform implementation of locks. Uses +// pthreads on Unix and some Windows thing there. Futhark's +// host-level code is not multithreaded, but user code may be, so we +// need some mechanism for ensuring atomic access to API functions. +// This is that mechanism. It is not exposed to user code at all, so +// we do not have to worry about name collisions. + +#ifdef _WIN32 + +typedef HANDLE lock_t; + +static void create_lock(lock_t *lock) { + *lock = CreateMutex(NULL, // Default security attributes. + FALSE, // Initially unlocked. + NULL); // Unnamed. +} + +static void lock_lock(lock_t *lock) { + assert(WaitForSingleObject(*lock, INFINITE) == WAIT_OBJECT_0); +} + +static void lock_unlock(lock_t *lock) { + assert(ReleaseMutex(*lock)); +} + +static void free_lock(lock_t *lock) { + CloseHandle(*lock); +} + +#else +// Assuming POSIX + +#include <pthread.h> + +typedef pthread_mutex_t lock_t; + +static void create_lock(lock_t *lock) { + int r = pthread_mutex_init(lock, NULL); + assert(r == 0); +} + +static void lock_lock(lock_t *lock) { + int r = pthread_mutex_lock(lock); + assert(r == 0); +} + +static void lock_unlock(lock_t *lock) { + int r = pthread_mutex_unlock(lock); + assert(r == 0); +} + +static void free_lock(lock_t *lock) { + // Nothing to do for pthreads. + (void)lock; +} + +#endif + +// End of lock.h. + +static inline uint8_t add8(uint8_t x, uint8_t y) +{ + return x + y; +} +static inline uint16_t add16(uint16_t x, uint16_t y) +{ + return x + y; +} +static inline uint32_t add32(uint32_t x, uint32_t y) +{ + return x + y; +} +static inline uint64_t add64(uint64_t x, uint64_t y) +{ + return x + y; +} +static inline uint8_t sub8(uint8_t x, uint8_t y) +{ + return x - y; +} +static inline uint16_t sub16(uint16_t x, uint16_t y) +{ + return x - y; +} +static inline uint32_t sub32(uint32_t x, uint32_t y) +{ + return x - y; +} +static inline uint64_t sub64(uint64_t x, uint64_t y) +{ + return x - y; +} +static inline uint8_t mul8(uint8_t x, uint8_t y) +{ + return x * y; +} +static inline uint16_t mul16(uint16_t x, uint16_t y) +{ + return x * y; +} +static inline uint32_t mul32(uint32_t x, uint32_t y) +{ + return x * y; +} +static inline uint64_t mul64(uint64_t x, uint64_t y) +{ + return x * y; +} +static inline uint8_t udiv8(uint8_t x, uint8_t y) +{ + return x / y; +} +static inline uint16_t udiv16(uint16_t x, uint16_t y) +{ + return x / y; +} +static inline uint32_t udiv32(uint32_t x, uint32_t y) +{ + return x / y; +} +static inline uint64_t udiv64(uint64_t x, uint64_t y) +{ + return x / y; +} +static inline uint8_t udiv_up8(uint8_t x, uint8_t y) +{ + return (x + y - 1) / y; +} +static inline uint16_t udiv_up16(uint16_t x, uint16_t y) +{ + return (x + y - 1) / y; +} +static inline uint32_t udiv_up32(uint32_t x, uint32_t y) +{ + return (x + y - 1) / y; +} +static inline uint64_t udiv_up64(uint64_t x, uint64_t y) +{ + return (x + y - 1) / y; +} +static inline uint8_t umod8(uint8_t x, uint8_t y) +{ + return x % y; +} +static inline uint16_t umod16(uint16_t x, uint16_t y) +{ + return x % y; +} +static inline uint32_t umod32(uint32_t x, uint32_t y) +{ + return x % y; +} +static inline uint64_t umod64(uint64_t x, uint64_t y) +{ + return x % y; +} +static inline uint8_t udiv_safe8(uint8_t x, uint8_t y) +{ + return y == 0 ? 0 : x / y; +} +static inline uint16_t udiv_safe16(uint16_t x, uint16_t y) +{ + return y == 0 ? 0 : x / y; +} +static inline uint32_t udiv_safe32(uint32_t x, uint32_t y) +{ + return y == 0 ? 0 : x / y; +} +static inline uint64_t udiv_safe64(uint64_t x, uint64_t y) +{ + return y == 0 ? 0 : x / y; +} +static inline uint8_t udiv_up_safe8(uint8_t x, uint8_t y) +{ + return y == 0 ? 0 : (x + y - 1) / y; +} +static inline uint16_t udiv_up_safe16(uint16_t x, uint16_t y) +{ + return y == 0 ? 0 : (x + y - 1) / y; +} +static inline uint32_t udiv_up_safe32(uint32_t x, uint32_t y) +{ + return y == 0 ? 0 : (x + y - 1) / y; +} +static inline uint64_t udiv_up_safe64(uint64_t x, uint64_t y) +{ + return y == 0 ? 0 : (x + y - 1) / y; +} +static inline uint8_t umod_safe8(uint8_t x, uint8_t y) +{ + return y == 0 ? 0 : x % y; +} +static inline uint16_t umod_safe16(uint16_t x, uint16_t y) +{ + return y == 0 ? 0 : x % y; +} +static inline uint32_t umod_safe32(uint32_t x, uint32_t y) +{ + return y == 0 ? 0 : x % y; +} +static inline uint64_t umod_safe64(uint64_t x, uint64_t y) +{ + return y == 0 ? 0 : x % y; +} +static inline int8_t sdiv8(int8_t x, int8_t y) +{ + int8_t q = x / y; + int8_t r = x % y; + + return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); +} +static inline int16_t sdiv16(int16_t x, int16_t y) +{ + int16_t q = x / y; + int16_t r = x % y; + + return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); +} +static inline int32_t sdiv32(int32_t x, int32_t y) +{ + int32_t q = x / y; + int32_t r = x % y; + + return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); +} +static inline int64_t sdiv64(int64_t x, int64_t y) +{ + int64_t q = x / y; + int64_t r = x % y; + + return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); +} +static inline int8_t sdiv_up8(int8_t x, int8_t y) +{ + return sdiv8(x + y - 1, y); +} +static inline int16_t sdiv_up16(int16_t x, int16_t y) +{ + return sdiv16(x + y - 1, y); +} +static inline int32_t sdiv_up32(int32_t x, int32_t y) +{ + return sdiv32(x + y - 1, y); +} +static inline int64_t sdiv_up64(int64_t x, int64_t y) +{ + return sdiv64(x + y - 1, y); +} +static inline int8_t smod8(int8_t x, int8_t y) +{ + int8_t r = x % y; + + return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); +} +static inline int16_t smod16(int16_t x, int16_t y) +{ + int16_t r = x % y; + + return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); +} +static inline int32_t smod32(int32_t x, int32_t y) +{ + int32_t r = x % y; + + return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); +} +static inline int64_t smod64(int64_t x, int64_t y) +{ + int64_t r = x % y; + + return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); +} +static inline int8_t sdiv_safe8(int8_t x, int8_t y) +{ + return y == 0 ? 0 : sdiv8(x, y); +} +static inline int16_t sdiv_safe16(int16_t x, int16_t y) +{ + return y == 0 ? 0 : sdiv16(x, y); +} +static inline int32_t sdiv_safe32(int32_t x, int32_t y) +{ + return y == 0 ? 0 : sdiv32(x, y); +} +static inline int64_t sdiv_safe64(int64_t x, int64_t y) +{ + return y == 0 ? 0 : sdiv64(x, y); +} +static inline int8_t sdiv_up_safe8(int8_t x, int8_t y) +{ + return sdiv_safe8(x + y - 1, y); +} +static inline int16_t sdiv_up_safe16(int16_t x, int16_t y) +{ + return sdiv_safe16(x + y - 1, y); +} +static inline int32_t sdiv_up_safe32(int32_t x, int32_t y) +{ + return sdiv_safe32(x + y - 1, y); +} +static inline int64_t sdiv_up_safe64(int64_t x, int64_t y) +{ + return sdiv_safe64(x + y - 1, y); +} +static inline int8_t smod_safe8(int8_t x, int8_t y) +{ + return y == 0 ? 0 : smod8(x, y); +} +static inline int16_t smod_safe16(int16_t x, int16_t y) +{ + return y == 0 ? 0 : smod16(x, y); +} +static inline int32_t smod_safe32(int32_t x, int32_t y) +{ + return y == 0 ? 0 : smod32(x, y); +} +static inline int64_t smod_safe64(int64_t x, int64_t y) +{ + return y == 0 ? 0 : smod64(x, y); +} +static inline int8_t squot8(int8_t x, int8_t y) +{ + return x / y; +} +static inline int16_t squot16(int16_t x, int16_t y) +{ + return x / y; +} +static inline int32_t squot32(int32_t x, int32_t y) +{ + return x / y; +} +static inline int64_t squot64(int64_t x, int64_t y) +{ + return x / y; +} +static inline int8_t srem8(int8_t x, int8_t y) +{ + return x % y; +} +static inline int16_t srem16(int16_t x, int16_t y) +{ + return x % y; +} +static inline int32_t srem32(int32_t x, int32_t y) +{ + return x % y; +} +static inline int64_t srem64(int64_t x, int64_t y) +{ + return x % y; +} +static inline int8_t squot_safe8(int8_t x, int8_t y) +{ + return y == 0 ? 0 : x / y; +} +static inline int16_t squot_safe16(int16_t x, int16_t y) +{ + return y == 0 ? 0 : x / y; +} +static inline int32_t squot_safe32(int32_t x, int32_t y) +{ + return y == 0 ? 0 : x / y; +} +static inline int64_t squot_safe64(int64_t x, int64_t y) +{ + return y == 0 ? 0 : x / y; +} +static inline int8_t srem_safe8(int8_t x, int8_t y) +{ + return y == 0 ? 0 : x % y; +} +static inline int16_t srem_safe16(int16_t x, int16_t y) +{ + return y == 0 ? 0 : x % y; +} +static inline int32_t srem_safe32(int32_t x, int32_t y) +{ + return y == 0 ? 0 : x % y; +} +static inline int64_t srem_safe64(int64_t x, int64_t y) +{ + return y == 0 ? 0 : x % y; +} +static inline int8_t smin8(int8_t x, int8_t y) +{ + return x < y ? x : y; +} +static inline int16_t smin16(int16_t x, int16_t y) +{ + return x < y ? x : y; +} +static inline int32_t smin32(int32_t x, int32_t y) +{ + return x < y ? x : y; +} +static inline int64_t smin64(int64_t x, int64_t y) +{ + return x < y ? x : y; +} +static inline uint8_t umin8(uint8_t x, uint8_t y) +{ + return x < y ? x : y; +} +static inline uint16_t umin16(uint16_t x, uint16_t y) +{ + return x < y ? x : y; +} +static inline uint32_t umin32(uint32_t x, uint32_t y) +{ + return x < y ? x : y; +} +static inline uint64_t umin64(uint64_t x, uint64_t y) +{ + return x < y ? x : y; +} +static inline int8_t smax8(int8_t x, int8_t y) +{ + return x < y ? y : x; +} +static inline int16_t smax16(int16_t x, int16_t y) +{ + return x < y ? y : x; +} +static inline int32_t smax32(int32_t x, int32_t y) +{ + return x < y ? y : x; +} +static inline int64_t smax64(int64_t x, int64_t y) +{ + return x < y ? y : x; +} +static inline uint8_t umax8(uint8_t x, uint8_t y) +{ + return x < y ? y : x; +} +static inline uint16_t umax16(uint16_t x, uint16_t y) +{ + return x < y ? y : x; +} +static inline uint32_t umax32(uint32_t x, uint32_t y) +{ + return x < y ? y : x; +} +static inline uint64_t umax64(uint64_t x, uint64_t y) +{ + return x < y ? y : x; +} +static inline uint8_t shl8(uint8_t x, uint8_t y) +{ + return x << y; +} +static inline uint16_t shl16(uint16_t x, uint16_t y) +{ + return x << y; +} +static inline uint32_t shl32(uint32_t x, uint32_t y) +{ + return x << y; +} +static inline uint64_t shl64(uint64_t x, uint64_t y) +{ + return x << y; +} +static inline uint8_t lshr8(uint8_t x, uint8_t y) +{ + return x >> y; +} +static inline uint16_t lshr16(uint16_t x, uint16_t y) +{ + return x >> y; +} +static inline uint32_t lshr32(uint32_t x, uint32_t y) +{ + return x >> y; +} +static inline uint64_t lshr64(uint64_t x, uint64_t y) +{ + return x >> y; +} +static inline int8_t ashr8(int8_t x, int8_t y) +{ + return x >> y; +} +static inline int16_t ashr16(int16_t x, int16_t y) +{ + return x >> y; +} +static inline int32_t ashr32(int32_t x, int32_t y) +{ + return x >> y; +} +static inline int64_t ashr64(int64_t x, int64_t y) +{ + return x >> y; +} +static inline uint8_t and8(uint8_t x, uint8_t y) +{ + return x & y; +} +static inline uint16_t and16(uint16_t x, uint16_t y) +{ + return x & y; +} +static inline uint32_t and32(uint32_t x, uint32_t y) +{ + return x & y; +} +static inline uint64_t and64(uint64_t x, uint64_t y) +{ + return x & y; +} +static inline uint8_t or8(uint8_t x, uint8_t y) +{ + return x | y; +} +static inline uint16_t or16(uint16_t x, uint16_t y) +{ + return x | y; +} +static inline uint32_t or32(uint32_t x, uint32_t y) +{ + return x | y; +} +static inline uint64_t or64(uint64_t x, uint64_t y) +{ + return x | y; +} +static inline uint8_t xor8(uint8_t x, uint8_t y) +{ + return x ^ y; +} +static inline uint16_t xor16(uint16_t x, uint16_t y) +{ + return x ^ y; +} +static inline uint32_t xor32(uint32_t x, uint32_t y) +{ + return x ^ y; +} +static inline uint64_t xor64(uint64_t x, uint64_t y) +{ + return x ^ y; +} +static inline bool ult8(uint8_t x, uint8_t y) +{ + return x < y; +} +static inline bool ult16(uint16_t x, uint16_t y) +{ + return x < y; +} +static inline bool ult32(uint32_t x, uint32_t y) +{ + return x < y; +} +static inline bool ult64(uint64_t x, uint64_t y) +{ + return x < y; +} +static inline bool ule8(uint8_t x, uint8_t y) +{ + return x <= y; +} +static inline bool ule16(uint16_t x, uint16_t y) +{ + return x <= y; +} +static inline bool ule32(uint32_t x, uint32_t y) +{ + return x <= y; +} +static inline bool ule64(uint64_t x, uint64_t y) +{ + return x <= y; +} +static inline bool slt8(int8_t x, int8_t y) +{ + return x < y; +} +static inline bool slt16(int16_t x, int16_t y) +{ + return x < y; +} +static inline bool slt32(int32_t x, int32_t y) +{ + return x < y; +} +static inline bool slt64(int64_t x, int64_t y) +{ + return x < y; +} +static inline bool sle8(int8_t x, int8_t y) +{ + return x <= y; +} +static inline bool sle16(int16_t x, int16_t y) +{ + return x <= y; +} +static inline bool sle32(int32_t x, int32_t y) +{ + return x <= y; +} +static inline bool sle64(int64_t x, int64_t y) +{ + return x <= y; +} +static inline int8_t pow8(int8_t x, int8_t y) +{ + int8_t res = 1, rem = y; + + while (rem != 0) { + if (rem & 1) + res *= x; + rem >>= 1; + x *= x; + } + return res; +} +static inline int16_t pow16(int16_t x, int16_t y) +{ + int16_t res = 1, rem = y; + + while (rem != 0) { + if (rem & 1) + res *= x; + rem >>= 1; + x *= x; + } + return res; +} +static inline int32_t pow32(int32_t x, int32_t y) +{ + int32_t res = 1, rem = y; + + while (rem != 0) { + if (rem & 1) + res *= x; + rem >>= 1; + x *= x; + } + return res; +} +static inline int64_t pow64(int64_t x, int64_t y) +{ + int64_t res = 1, rem = y; + + while (rem != 0) { + if (rem & 1) + res *= x; + rem >>= 1; + x *= x; + } + return res; +} +static inline bool itob_i8_bool(int8_t x) +{ + return x; +} +static inline bool itob_i16_bool(int16_t x) +{ + return x; +} +static inline bool itob_i32_bool(int32_t x) +{ + return x; +} +static inline bool itob_i64_bool(int64_t x) +{ + return x; +} +static inline int8_t btoi_bool_i8(bool x) +{ + return x; +} +static inline int16_t btoi_bool_i16(bool x) +{ + return x; +} +static inline int32_t btoi_bool_i32(bool x) +{ + return x; +} +static inline int64_t btoi_bool_i64(bool x) +{ + return x; +} +#define sext_i8_i8(x) ((int8_t) (int8_t) x) +#define sext_i8_i16(x) ((int16_t) (int8_t) x) +#define sext_i8_i32(x) ((int32_t) (int8_t) x) +#define sext_i8_i64(x) ((int64_t) (int8_t) x) +#define sext_i16_i8(x) ((int8_t) (int16_t) x) +#define sext_i16_i16(x) ((int16_t) (int16_t) x) +#define sext_i16_i32(x) ((int32_t) (int16_t) x) +#define sext_i16_i64(x) ((int64_t) (int16_t) x) +#define sext_i32_i8(x) ((int8_t) (int32_t) x) +#define sext_i32_i16(x) ((int16_t) (int32_t) x) +#define sext_i32_i32(x) ((int32_t) (int32_t) x) +#define sext_i32_i64(x) ((int64_t) (int32_t) x) +#define sext_i64_i8(x) ((int8_t) (int64_t) x) +#define sext_i64_i16(x) ((int16_t) (int64_t) x) +#define sext_i64_i32(x) ((int32_t) (int64_t) x) +#define sext_i64_i64(x) ((int64_t) (int64_t) x) +#define zext_i8_i8(x) ((int8_t) (uint8_t) x) +#define zext_i8_i16(x) ((int16_t) (uint8_t) x) +#define zext_i8_i32(x) ((int32_t) (uint8_t) x) +#define zext_i8_i64(x) ((int64_t) (uint8_t) x) +#define zext_i16_i8(x) ((int8_t) (uint16_t) x) +#define zext_i16_i16(x) ((int16_t) (uint16_t) x) +#define zext_i16_i32(x) ((int32_t) (uint16_t) x) +#define zext_i16_i64(x) ((int64_t) (uint16_t) x) +#define zext_i32_i8(x) ((int8_t) (uint32_t) x) +#define zext_i32_i16(x) ((int16_t) (uint32_t) x) +#define zext_i32_i32(x) ((int32_t) (uint32_t) x) +#define zext_i32_i64(x) ((int64_t) (uint32_t) x) +#define zext_i64_i8(x) ((int8_t) (uint64_t) x) +#define zext_i64_i16(x) ((int16_t) (uint64_t) x) +#define zext_i64_i32(x) ((int32_t) (uint64_t) x) +#define zext_i64_i64(x) ((int64_t) (uint64_t) x) +#if defined(__OPENCL_VERSION__) +static int32_t futrts_popc8(int8_t x) +{ + return popcount(x); +} +static int32_t futrts_popc16(int16_t x) +{ + return popcount(x); +} +static int32_t futrts_popc32(int32_t x) +{ + return popcount(x); +} +static int32_t futrts_popc64(int64_t x) +{ + return popcount(x); +} +#elif defined(__CUDA_ARCH__) +static int32_t futrts_popc8(int8_t x) +{ + return __popc(zext_i8_i32(x)); +} +static int32_t futrts_popc16(int16_t x) +{ + return __popc(zext_i16_i32(x)); +} +static int32_t futrts_popc32(int32_t x) +{ + return __popc(x); +} +static int32_t futrts_popc64(int64_t x) +{ + return __popcll(x); +} +#else +static int32_t futrts_popc8(int8_t x) +{ + int c = 0; + + for (; x; ++c) + x &= x - 1; + return c; +} +static int32_t futrts_popc16(int16_t x) +{ + int c = 0; + + for (; x; ++c) + x &= x - 1; + return c; +} +static int32_t futrts_popc32(int32_t x) +{ + int c = 0; + + for (; x; ++c) + x &= x - 1; + return c; +} +static int32_t futrts_popc64(int64_t x) +{ + int c = 0; + + for (; x; ++c) + x &= x - 1; + return c; +} +#endif +#if defined(__OPENCL_VERSION__) +static uint8_t futrts_mul_hi8(uint8_t a, uint8_t b) +{ + return mul_hi(a, b); +} +static uint16_t futrts_mul_hi16(uint16_t a, uint16_t b) +{ + return mul_hi(a, b); +} +static uint32_t futrts_mul_hi32(uint32_t a, uint32_t b) +{ + return mul_hi(a, b); +} +static uint64_t futrts_mul_hi64(uint64_t a, uint64_t b) +{ + return mul_hi(a, b); +} +#elif defined(__CUDA_ARCH__) +static uint8_t futrts_mul_hi8(uint8_t a, uint8_t b) +{ + uint16_t aa = a; + uint16_t bb = b; + + return aa * bb >> 8; +} +static uint16_t futrts_mul_hi16(uint16_t a, uint16_t b) +{ + uint32_t aa = a; + uint32_t bb = b; + + return aa * bb >> 16; +} +static uint32_t futrts_mul_hi32(uint32_t a, uint32_t b) +{ + return mulhi(a, b); +} +static uint64_t futrts_mul_hi64(uint64_t a, uint64_t b) +{ + return mul64hi(a, b); +} +#else +static uint8_t futrts_mul_hi8(uint8_t a, uint8_t b) +{ + uint16_t aa = a; + uint16_t bb = b; + + return aa * bb >> 8; +} +static uint16_t futrts_mul_hi16(uint16_t a, uint16_t b) +{ + uint32_t aa = a; + uint32_t bb = b; + + return aa * bb >> 16; +} +static uint32_t futrts_mul_hi32(uint32_t a, uint32_t b) +{ + uint64_t aa = a; + uint64_t bb = b; + + return aa * bb >> 32; +} +static uint64_t futrts_mul_hi64(uint64_t a, uint64_t b) +{ + __uint128_t aa = a; + __uint128_t bb = b; + + return aa * bb >> 64; +} +#endif +#if defined(__OPENCL_VERSION__) +static uint8_t futrts_mad_hi8(uint8_t a, uint8_t b, uint8_t c) +{ + return mad_hi(a, b, c); +} +static uint16_t futrts_mad_hi16(uint16_t a, uint16_t b, uint16_t c) +{ + return mad_hi(a, b, c); +} +static uint32_t futrts_mad_hi32(uint32_t a, uint32_t b, uint32_t c) +{ + return mad_hi(a, b, c); +} +static uint64_t futrts_mad_hi64(uint64_t a, uint64_t b, uint64_t c) +{ + return mad_hi(a, b, c); +} +#else +static uint8_t futrts_mad_hi8(uint8_t a, uint8_t b, uint8_t c) +{ + return futrts_mul_hi8(a, b) + c; +} +static uint16_t futrts_mad_hi16(uint16_t a, uint16_t b, uint16_t c) +{ + return futrts_mul_hi16(a, b) + c; +} +static uint32_t futrts_mad_hi32(uint32_t a, uint32_t b, uint32_t c) +{ + return futrts_mul_hi32(a, b) + c; +} +static uint64_t futrts_mad_hi64(uint64_t a, uint64_t b, uint64_t c) +{ + return futrts_mul_hi64(a, b) + c; +} +#endif +#if defined(__OPENCL_VERSION__) +static int32_t futrts_clzz8(int8_t x) +{ + return clz(x); +} +static int32_t futrts_clzz16(int16_t x) +{ + return clz(x); +} +static int32_t futrts_clzz32(int32_t x) +{ + return clz(x); +} +static int32_t futrts_clzz64(int64_t x) +{ + return clz(x); +} +#elif defined(__CUDA_ARCH__) +static int32_t futrts_clzz8(int8_t x) +{ + return __clz(zext_i8_i32(x)) - 24; +} +static int32_t futrts_clzz16(int16_t x) +{ + return __clz(zext_i16_i32(x)) - 16; +} +static int32_t futrts_clzz32(int32_t x) +{ + return __clz(x); +} +static int32_t futrts_clzz64(int64_t x) +{ + return __clzll(x); +} +#else +static int32_t futrts_clzz8(int8_t x) +{ + int n = 0; + int bits = sizeof(x) * 8; + + for (int i = 0; i < bits; i++) { + if (x < 0) + break; + n++; + x <<= 1; + } + return n; +} +static int32_t futrts_clzz16(int16_t x) +{ + int n = 0; + int bits = sizeof(x) * 8; + + for (int i = 0; i < bits; i++) { + if (x < 0) + break; + n++; + x <<= 1; + } + return n; +} +static int32_t futrts_clzz32(int32_t x) +{ + int n = 0; + int bits = sizeof(x) * 8; + + for (int i = 0; i < bits; i++) { + if (x < 0) + break; + n++; + x <<= 1; + } + return n; +} +static int32_t futrts_clzz64(int64_t x) +{ + int n = 0; + int bits = sizeof(x) * 8; + + for (int i = 0; i < bits; i++) { + if (x < 0) + break; + n++; + x <<= 1; + } + return n; +} +#endif +#if defined(__OPENCL_VERSION__) +static int32_t futrts_ctzz8(int8_t x) +{ + int i = 0; + + for (; i < 8 && (x & 1) == 0; i++, x >>= 1) + ; + return i; +} +static int32_t futrts_ctzz16(int16_t x) +{ + int i = 0; + + for (; i < 16 && (x & 1) == 0; i++, x >>= 1) + ; + return i; +} +static int32_t futrts_ctzz32(int32_t x) +{ + int i = 0; + + for (; i < 32 && (x & 1) == 0; i++, x >>= 1) + ; + return i; +} +static int32_t futrts_ctzz64(int64_t x) +{ + int i = 0; + + for (; i < 64 && (x & 1) == 0; i++, x >>= 1) + ; + return i; +} +#elif defined(__CUDA_ARCH__) +static int32_t futrts_ctzz8(int8_t x) +{ + int y = __ffs(x); + + return y == 0 ? 8 : y - 1; +} +static int32_t futrts_ctzz16(int16_t x) +{ + int y = __ffs(x); + + return y == 0 ? 16 : y - 1; +} +static int32_t futrts_ctzz32(int32_t x) +{ + int y = __ffs(x); + + return y == 0 ? 32 : y - 1; +} +static int32_t futrts_ctzz64(int64_t x) +{ + int y = __ffsll(x); + + return y == 0 ? 64 : y - 1; +} +#else +static int32_t futrts_ctzz8(int8_t x) +{ + return x == 0 ? 8 : __builtin_ctz((uint32_t) x); +} +static int32_t futrts_ctzz16(int16_t x) +{ + return x == 0 ? 16 : __builtin_ctz((uint32_t) x); +} +static int32_t futrts_ctzz32(int32_t x) +{ + return x == 0 ? 32 : __builtin_ctz(x); +} +static int32_t futrts_ctzz64(int64_t x) +{ + return x == 0 ? 64 : __builtin_ctzll(x); +} +#endif +static inline float fdiv32(float x, float y) +{ + return x / y; +} +static inline float fadd32(float x, float y) +{ + return x + y; +} +static inline float fsub32(float x, float y) +{ + return x - y; +} +static inline float fmul32(float x, float y) +{ + return x * y; +} +static inline float fmin32(float x, float y) +{ + return fmin(x, y); +} +static inline float fmax32(float x, float y) +{ + return fmax(x, y); +} +static inline float fpow32(float x, float y) +{ + return pow(x, y); +} +static inline bool cmplt32(float x, float y) +{ + return x < y; +} +static inline bool cmple32(float x, float y) +{ + return x <= y; +} +static inline float sitofp_i8_f32(int8_t x) +{ + return (float) x; +} +static inline float sitofp_i16_f32(int16_t x) +{ + return (float) x; +} +static inline float sitofp_i32_f32(int32_t x) +{ + return (float) x; +} +static inline float sitofp_i64_f32(int64_t x) +{ + return (float) x; +} +static inline float uitofp_i8_f32(uint8_t x) +{ + return (float) x; +} +static inline float uitofp_i16_f32(uint16_t x) +{ + return (float) x; +} +static inline float uitofp_i32_f32(uint32_t x) +{ + return (float) x; +} +static inline float uitofp_i64_f32(uint64_t x) +{ + return (float) x; +} +static inline int8_t fptosi_f32_i8(float x) +{ + return (int8_t) x; +} +static inline int16_t fptosi_f32_i16(float x) +{ + return (int16_t) x; +} +static inline int32_t fptosi_f32_i32(float x) +{ + return (int32_t) x; +} +static inline int64_t fptosi_f32_i64(float x) +{ + return (int64_t) x; +} +static inline uint8_t fptoui_f32_i8(float x) +{ + return (uint8_t) x; +} +static inline uint16_t fptoui_f32_i16(float x) +{ + return (uint16_t) x; +} +static inline uint32_t fptoui_f32_i32(float x) +{ + return (uint32_t) x; +} +static inline uint64_t fptoui_f32_i64(float x) +{ + return (uint64_t) x; +} +static inline double fdiv64(double x, double y) +{ + return x / y; +} +static inline double fadd64(double x, double y) +{ + return x + y; +} +static inline double fsub64(double x, double y) +{ + return x - y; +} +static inline double fmul64(double x, double y) +{ + return x * y; +} +static inline double fmin64(double x, double y) +{ + return fmin(x, y); +} +static inline double fmax64(double x, double y) +{ + return fmax(x, y); +} +static inline double fpow64(double x, double y) +{ + return pow(x, y); +} +static inline bool cmplt64(double x, double y) +{ + return x < y; +} +static inline bool cmple64(double x, double y) +{ + return x <= y; +} +static inline double sitofp_i8_f64(int8_t x) +{ + return (double) x; +} +static inline double sitofp_i16_f64(int16_t x) +{ + return (double) x; +} +static inline double sitofp_i32_f64(int32_t x) +{ + return (double) x; +} +static inline double sitofp_i64_f64(int64_t x) +{ + return (double) x; +} +static inline double uitofp_i8_f64(uint8_t x) +{ + return (double) x; +} +static inline double uitofp_i16_f64(uint16_t x) +{ + return (double) x; +} +static inline double uitofp_i32_f64(uint32_t x) +{ + return (double) x; +} +static inline double uitofp_i64_f64(uint64_t x) +{ + return (double) x; +} +static inline int8_t fptosi_f64_i8(double x) +{ + return (int8_t) x; +} +static inline int16_t fptosi_f64_i16(double x) +{ + return (int16_t) x; +} +static inline int32_t fptosi_f64_i32(double x) +{ + return (int32_t) x; +} +static inline int64_t fptosi_f64_i64(double x) +{ + return (int64_t) x; +} +static inline uint8_t fptoui_f64_i8(double x) +{ + return (uint8_t) x; +} +static inline uint16_t fptoui_f64_i16(double x) +{ + return (uint16_t) x; +} +static inline uint32_t fptoui_f64_i32(double x) +{ + return (uint32_t) x; +} +static inline uint64_t fptoui_f64_i64(double x) +{ + return (uint64_t) x; +} +static inline float fpconv_f32_f32(float x) +{ + return (float) x; +} +static inline double fpconv_f32_f64(float x) +{ + return (double) x; +} +static inline float fpconv_f64_f32(double x) +{ + return (float) x; +} +static inline double fpconv_f64_f64(double x) +{ + return (double) x; +} +static inline bool futrts_isnan32(float x) +{ + return isnan(x); +} +static inline bool futrts_isinf32(float x) +{ + return isinf(x); +} +#ifdef __OPENCL_VERSION__ +static inline float futrts_log32(float x) +{ + return log(x); +} +static inline float futrts_log2_32(float x) +{ + return log2(x); +} +static inline float futrts_log10_32(float x) +{ + return log10(x); +} +static inline float futrts_sqrt32(float x) +{ + return sqrt(x); +} +static inline float futrts_exp32(float x) +{ + return exp(x); +} +static inline float futrts_cos32(float x) +{ + return cos(x); +} +static inline float futrts_sin32(float x) +{ + return sin(x); +} +static inline float futrts_tan32(float x) +{ + return tan(x); +} +static inline float futrts_acos32(float x) +{ + return acos(x); +} +static inline float futrts_asin32(float x) +{ + return asin(x); +} +static inline float futrts_atan32(float x) +{ + return atan(x); +} +static inline float futrts_cosh32(float x) +{ + return cosh(x); +} +static inline float futrts_sinh32(float x) +{ + return sinh(x); +} +static inline float futrts_tanh32(float x) +{ + return tanh(x); +} +static inline float futrts_acosh32(float x) +{ + return acosh(x); +} +static inline float futrts_asinh32(float x) +{ + return asinh(x); +} +static inline float futrts_atanh32(float x) +{ + return atanh(x); +} +static inline float futrts_atan2_32(float x, float y) +{ + return atan2(x, y); +} +static inline float futrts_hypot32(float x, float y) +{ + return hypot(x, y); +} +static inline float futrts_gamma32(float x) +{ + return tgamma(x); +} +static inline float futrts_lgamma32(float x) +{ + return lgamma(x); +} +static inline float fmod32(float x, float y) +{ + return fmod(x, y); +} +static inline float futrts_round32(float x) +{ + return rint(x); +} +static inline float futrts_floor32(float x) +{ + return floor(x); +} +static inline float futrts_ceil32(float x) +{ + return ceil(x); +} +static inline float futrts_lerp32(float v0, float v1, float t) +{ + return mix(v0, v1, t); +} +static inline float futrts_mad32(float a, float b, float c) +{ + return mad(a, b, c); +} +static inline float futrts_fma32(float a, float b, float c) +{ + return fma(a, b, c); +} +#else +static inline float futrts_log32(float x) +{ + return logf(x); +} +static inline float futrts_log2_32(float x) +{ + return log2f(x); +} +static inline float futrts_log10_32(float x) +{ + return log10f(x); +} +static inline float futrts_sqrt32(float x) +{ + return sqrtf(x); +} +static inline float futrts_exp32(float x) +{ + return expf(x); +} +static inline float futrts_cos32(float x) +{ + return cosf(x); +} +static inline float futrts_sin32(float x) +{ + return sinf(x); +} +static inline float futrts_tan32(float x) +{ + return tanf(x); +} +static inline float futrts_acos32(float x) +{ + return acosf(x); +} +static inline float futrts_asin32(float x) +{ + return asinf(x); +} +static inline float futrts_atan32(float x) +{ + return atanf(x); +} +static inline float futrts_cosh32(float x) +{ + return coshf(x); +} +static inline float futrts_sinh32(float x) +{ + return sinhf(x); +} +static inline float futrts_tanh32(float x) +{ + return tanhf(x); +} +static inline float futrts_acosh32(float x) +{ + return acoshf(x); +} +static inline float futrts_asinh32(float x) +{ + return asinhf(x); +} +static inline float futrts_atanh32(float x) +{ + return atanhf(x); +} +static inline float futrts_atan2_32(float x, float y) +{ + return atan2f(x, y); +} +static inline float futrts_hypot32(float x, float y) +{ + return hypotf(x, y); +} +static inline float futrts_gamma32(float x) +{ + return tgammaf(x); +} +static inline float futrts_lgamma32(float x) +{ + return lgammaf(x); +} +static inline float fmod32(float x, float y) +{ + return fmodf(x, y); +} +static inline float futrts_round32(float x) +{ + return rintf(x); +} +static inline float futrts_floor32(float x) +{ + return floorf(x); +} +static inline float futrts_ceil32(float x) +{ + return ceilf(x); +} +static inline float futrts_lerp32(float v0, float v1, float t) +{ + return v0 + (v1 - v0) * t; +} +static inline float futrts_mad32(float a, float b, float c) +{ + return a * b + c; +} +static inline float futrts_fma32(float a, float b, float c) +{ + return fmaf(a, b, c); +} +#endif +static inline int32_t futrts_to_bits32(float x) +{ + union { + float f; + int32_t t; + } p; + + p.f = x; + return p.t; +} +static inline float futrts_from_bits32(int32_t x) +{ + union { + int32_t f; + float t; + } p; + + p.f = x; + return p.t; +} +static inline float fsignum32(float x) +{ + return futrts_isnan32(x) ? x : (x > 0) - (x < 0); +} +static inline double futrts_log64(double x) +{ + return log(x); +} +static inline double futrts_log2_64(double x) +{ + return log2(x); +} +static inline double futrts_log10_64(double x) +{ + return log10(x); +} +static inline double futrts_sqrt64(double x) +{ + return sqrt(x); +} +static inline double futrts_exp64(double x) +{ + return exp(x); +} +static inline double futrts_cos64(double x) +{ + return cos(x); +} +static inline double futrts_sin64(double x) +{ + return sin(x); +} +static inline double futrts_tan64(double x) +{ + return tan(x); +} +static inline double futrts_acos64(double x) +{ + return acos(x); +} +static inline double futrts_asin64(double x) +{ + return asin(x); +} +static inline double futrts_atan64(double x) +{ + return atan(x); +} +static inline double futrts_cosh64(double x) +{ + return cosh(x); +} +static inline double futrts_sinh64(double x) +{ + return sinh(x); +} +static inline double futrts_tanh64(double x) +{ + return tanh(x); +} +static inline double futrts_acosh64(double x) +{ + return acosh(x); +} +static inline double futrts_asinh64(double x) +{ + return asinh(x); +} +static inline double futrts_atanh64(double x) +{ + return atanh(x); +} +static inline double futrts_atan2_64(double x, double y) +{ + return atan2(x, y); +} +static inline double futrts_hypot64(double x, double y) +{ + return hypot(x, y); +} +static inline double futrts_gamma64(double x) +{ + return tgamma(x); +} +static inline double futrts_lgamma64(double x) +{ + return lgamma(x); +} +static inline double futrts_fma64(double a, double b, double c) +{ + return fma(a, b, c); +} +static inline double futrts_round64(double x) +{ + return rint(x); +} +static inline double futrts_ceil64(double x) +{ + return ceil(x); +} +static inline double futrts_floor64(double x) +{ + return floor(x); +} +static inline bool futrts_isnan64(double x) +{ + return isnan(x); +} +static inline bool futrts_isinf64(double x) +{ + return isinf(x); +} +static inline int64_t futrts_to_bits64(double x) +{ + union { + double f; + int64_t t; + } p; + + p.f = x; + return p.t; +} +static inline double futrts_from_bits64(int64_t x) +{ + union { + int64_t f; + double t; + } p; + + p.f = x; + return p.t; +} +static inline double fmod64(double x, double y) +{ + return fmod(x, y); +} +static inline double fsignum64(double x) +{ + return futrts_isnan64(x) ? x : (x > 0) - (x < 0); +} +#ifdef __OPENCL_VERSION__ +static inline double futrts_lerp64(double v0, double v1, double t) +{ + return mix(v0, v1, t); +} +static inline double futrts_mad64(double a, double b, double c) +{ + return mad(a, b, c); +} +#else +static inline double futrts_lerp64(double v0, double v1, double t) +{ + return v0 + (v1 - v0) * t; +} +static inline double futrts_mad64(double a, double b, double c) +{ + return a * b + c; +} +#endif +static int init_constants(struct futhark_context *); +static int free_constants(struct futhark_context *); +struct memblock_device { + int *references; + cl_mem mem; + int64_t size; + const char *desc; +} ; +struct memblock { + int *references; + char *mem; + int64_t size; + const char *desc; +} ; +typedef cl_mem fl_mem_t; +// Start of free_list.h. + +// An entry in the free list. May be invalid, to avoid having to +// deallocate entries as soon as they are removed. There is also a +// tag, to help with memory reuse. +struct free_list_entry { + size_t size; + fl_mem_t mem; + const char *tag; + unsigned char valid; +}; + +struct free_list { + struct free_list_entry *entries; // Pointer to entries. + int capacity; // Number of entries. + int used; // Number of valid entries. +}; + +static void free_list_init(struct free_list *l) { + l->capacity = 30; // Picked arbitrarily. + l->used = 0; + l->entries = (struct free_list_entry*) malloc(sizeof(struct free_list_entry) * l->capacity); + for (int i = 0; i < l->capacity; i++) { + l->entries[i].valid = 0; + } +} + +// Remove invalid entries from the free list. +static void free_list_pack(struct free_list *l) { + int p = 0; + for (int i = 0; i < l->capacity; i++) { + if (l->entries[i].valid) { + l->entries[p] = l->entries[i]; + if (i > p) { + l->entries[i].valid = 0; + } + p++; + } + } + + // Now p is the number of used elements. We don't want it to go + // less than the default capacity (although in practice it's OK as + // long as it doesn't become 1). + if (p < 30) { + p = 30; + } + l->entries = realloc(l->entries, p * sizeof(struct free_list_entry)); + l->capacity = p; +} + +static void free_list_destroy(struct free_list *l) { + assert(l->used == 0); + free(l->entries); +} + +static int free_list_find_invalid(struct free_list *l) { + int i; + for (i = 0; i < l->capacity; i++) { + if (!l->entries[i].valid) { + break; + } + } + return i; +} + +static void free_list_insert(struct free_list *l, size_t size, fl_mem_t mem, const char *tag) { + int i = free_list_find_invalid(l); + + if (i == l->capacity) { + // List is full; so we have to grow it. + int new_capacity = l->capacity * 2 * sizeof(struct free_list_entry); + l->entries = realloc(l->entries, new_capacity); + for (int j = 0; j < l->capacity; j++) { + l->entries[j+l->capacity].valid = 0; + } + l->capacity *= 2; + } + + // Now 'i' points to the first invalid entry. + l->entries[i].valid = 1; + l->entries[i].size = size; + l->entries[i].mem = mem; + l->entries[i].tag = tag; + + l->used++; +} + +// Find and remove a memory block of the indicated tag, or if that +// does not exist, another memory block with exactly the desired size. +// Returns 0 on success. +static int free_list_find(struct free_list *l, size_t size, + size_t *size_out, fl_mem_t *mem_out) { + int size_match = -1; + int i; + for (i = 0; i < l->capacity; i++) { + if (l->entries[i].valid && + size <= l->entries[i].size && + (size_match < 0 || l->entries[i].size < l->entries[size_match].size)) { + // If this entry is valid, has sufficient size, and is smaller than the + // best entry found so far, use this entry. + size_match = i; + } + } + + if (size_match >= 0) { + l->entries[size_match].valid = 0; + *size_out = l->entries[size_match].size; + *mem_out = l->entries[size_match].mem; + l->used--; + return 0; + } else { + return 1; + } +} + +// Remove the first block in the free list. Returns 0 if a block was +// removed, and nonzero if the free list was already empty. +static int free_list_first(struct free_list *l, fl_mem_t *mem_out) { + for (int i = 0; i < l->capacity; i++) { + if (l->entries[i].valid) { + l->entries[i].valid = 0; + *mem_out = l->entries[i].mem; + l->used--; + return 0; + } + } + + return 1; +} + +// End of free_list.h. + +// Start of opencl.h. + +#define OPENCL_SUCCEED_FATAL(e) opencl_succeed_fatal(e, #e, __FILE__, __LINE__) +#define OPENCL_SUCCEED_NONFATAL(e) opencl_succeed_nonfatal(e, #e, __FILE__, __LINE__) +// Take care not to override an existing error. +#define OPENCL_SUCCEED_OR_RETURN(e) { \ + char *serror = OPENCL_SUCCEED_NONFATAL(e); \ + if (serror) { \ + if (!ctx->error) { \ + ctx->error = serror; \ + return bad; \ + } else { \ + free(serror); \ + } \ + } \ + } + +// OPENCL_SUCCEED_OR_RETURN returns the value of the variable 'bad' in +// scope. By default, it will be this one. Create a local variable +// of some other type if needed. This is a bit of a hack, but it +// saves effort in the code generator. +static const int bad = 1; + +struct opencl_config { + int debugging; + int profiling; + int logging; + int preferred_device_num; + const char *preferred_platform; + const char *preferred_device; + int ignore_blacklist; + + const char* dump_program_to; + const char* load_program_from; + const char* dump_binary_to; + const char* load_binary_from; + + size_t default_group_size; + size_t default_num_groups; + size_t default_tile_size; + size_t default_reg_tile_size; + size_t default_threshold; + + int default_group_size_changed; + int default_tile_size_changed; + + int num_sizes; + const char **size_names; + const char **size_vars; + int64_t *size_values; + const char **size_classes; +}; + +static void opencl_config_init(struct opencl_config *cfg, + int num_sizes, + const char *size_names[], + const char *size_vars[], + int64_t *size_values, + const char *size_classes[]) { + cfg->debugging = 0; + cfg->logging = 0; + cfg->profiling = 0; + cfg->preferred_device_num = 0; + cfg->preferred_platform = ""; + cfg->preferred_device = ""; + cfg->ignore_blacklist = 0; + cfg->dump_program_to = NULL; + cfg->load_program_from = NULL; + cfg->dump_binary_to = NULL; + cfg->load_binary_from = NULL; + + // The following are dummy sizes that mean the concrete defaults + // will be set during initialisation via hardware-inspection-based + // heuristics. + cfg->default_group_size = 0; + cfg->default_num_groups = 0; + cfg->default_tile_size = 0; + cfg->default_reg_tile_size = 0; + cfg->default_threshold = 0; + + cfg->default_group_size_changed = 0; + cfg->default_tile_size_changed = 0; + + cfg->num_sizes = num_sizes; + cfg->size_names = size_names; + cfg->size_vars = size_vars; + cfg->size_values = size_values; + cfg->size_classes = size_classes; +} + +// A record of something that happened. +struct profiling_record { + cl_event *event; + int *runs; + int64_t *runtime; +}; + +struct opencl_context { + cl_device_id device; + cl_context ctx; + cl_command_queue queue; + + struct opencl_config cfg; + + struct free_list free_list; + + size_t max_group_size; + size_t max_num_groups; + size_t max_tile_size; + size_t max_threshold; + size_t max_local_memory; + + size_t lockstep_width; + + struct profiling_record *profiling_records; + int profiling_records_capacity; + int profiling_records_used; +}; + +struct opencl_device_option { + cl_platform_id platform; + cl_device_id device; + cl_device_type device_type; + char *platform_name; + char *device_name; +}; + +// This function must be defined by the user. It is invoked by +// setup_opencl() after the platform and device has been found, but +// before the program is loaded. Its intended use is to tune +// constants based on the selected platform and device. +static void post_opencl_setup(struct opencl_context*, struct opencl_device_option*); + +static char *strclone(const char *str) { + size_t size = strlen(str) + 1; + char *copy = (char*) malloc(size); + if (copy == NULL) { + return NULL; + } + + memcpy(copy, str, size); + return copy; +} + +static const char* opencl_error_string(cl_int err) +{ + switch (err) { + case CL_SUCCESS: return "Success!"; + case CL_DEVICE_NOT_FOUND: return "Device not found."; + case CL_DEVICE_NOT_AVAILABLE: return "Device not available"; + case CL_COMPILER_NOT_AVAILABLE: return "Compiler not available"; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "Memory object allocation failure"; + case CL_OUT_OF_RESOURCES: return "Out of resources"; + case CL_OUT_OF_HOST_MEMORY: return "Out of host memory"; + case CL_PROFILING_INFO_NOT_AVAILABLE: return "Profiling information not available"; + case CL_MEM_COPY_OVERLAP: return "Memory copy overlap"; + case CL_IMAGE_FORMAT_MISMATCH: return "Image format mismatch"; + case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "Image format not supported"; + case CL_BUILD_PROGRAM_FAILURE: return "Program build failure"; + case CL_MAP_FAILURE: return "Map failure"; + case CL_INVALID_VALUE: return "Invalid value"; + case CL_INVALID_DEVICE_TYPE: return "Invalid device type"; + case CL_INVALID_PLATFORM: return "Invalid platform"; + case CL_INVALID_DEVICE: return "Invalid device"; + case CL_INVALID_CONTEXT: return "Invalid context"; + case CL_INVALID_QUEUE_PROPERTIES: return "Invalid queue properties"; + case CL_INVALID_COMMAND_QUEUE: return "Invalid command queue"; + case CL_INVALID_HOST_PTR: return "Invalid host pointer"; + case CL_INVALID_MEM_OBJECT: return "Invalid memory object"; + case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "Invalid image format descriptor"; + case CL_INVALID_IMAGE_SIZE: return "Invalid image size"; + case CL_INVALID_SAMPLER: return "Invalid sampler"; + case CL_INVALID_BINARY: return "Invalid binary"; + case CL_INVALID_BUILD_OPTIONS: return "Invalid build options"; + case CL_INVALID_PROGRAM: return "Invalid program"; + case CL_INVALID_PROGRAM_EXECUTABLE: return "Invalid program executable"; + case CL_INVALID_KERNEL_NAME: return "Invalid kernel name"; + case CL_INVALID_KERNEL_DEFINITION: return "Invalid kernel definition"; + case CL_INVALID_KERNEL: return "Invalid kernel"; + case CL_INVALID_ARG_INDEX: return "Invalid argument index"; + case CL_INVALID_ARG_VALUE: return "Invalid argument value"; + case CL_INVALID_ARG_SIZE: return "Invalid argument size"; + case CL_INVALID_KERNEL_ARGS: return "Invalid kernel arguments"; + case CL_INVALID_WORK_DIMENSION: return "Invalid work dimension"; + case CL_INVALID_WORK_GROUP_SIZE: return "Invalid work group size"; + case CL_INVALID_WORK_ITEM_SIZE: return "Invalid work item size"; + case CL_INVALID_GLOBAL_OFFSET: return "Invalid global offset"; + case CL_INVALID_EVENT_WAIT_LIST: return "Invalid event wait list"; + case CL_INVALID_EVENT: return "Invalid event"; + case CL_INVALID_OPERATION: return "Invalid operation"; + case CL_INVALID_GL_OBJECT: return "Invalid OpenGL object"; + case CL_INVALID_BUFFER_SIZE: return "Invalid buffer size"; + case CL_INVALID_MIP_LEVEL: return "Invalid mip-map level"; + default: return "Unknown"; + } +} + +static void opencl_succeed_fatal(unsigned int ret, + const char *call, + const char *file, + int line) { + if (ret != CL_SUCCESS) { + futhark_panic(-1, "%s:%d: OpenCL call\n %s\nfailed with error code %d (%s)\n", + file, line, call, ret, opencl_error_string(ret)); + } +} + +static char* opencl_succeed_nonfatal(unsigned int ret, + const char *call, + const char *file, + int line) { + if (ret != CL_SUCCESS) { + return msgprintf("%s:%d: OpenCL call\n %s\nfailed with error code %d (%s)\n", + file, line, call, ret, opencl_error_string(ret)); + } else { + return NULL; + } +} + +static void set_preferred_platform(struct opencl_config *cfg, const char *s) { + cfg->preferred_platform = s; + cfg->ignore_blacklist = 1; +} + +static void set_preferred_device(struct opencl_config *cfg, const char *s) { + int x = 0; + if (*s == '#') { + s++; + while (isdigit(*s)) { + x = x * 10 + (*s++)-'0'; + } + // Skip trailing spaces. + while (isspace(*s)) { + s++; + } + } + cfg->preferred_device = s; + cfg->preferred_device_num = x; + cfg->ignore_blacklist = 1; +} + +static char* opencl_platform_info(cl_platform_id platform, + cl_platform_info param) { + size_t req_bytes; + char *info; + + OPENCL_SUCCEED_FATAL(clGetPlatformInfo(platform, param, 0, NULL, &req_bytes)); + + info = (char*) malloc(req_bytes); + + OPENCL_SUCCEED_FATAL(clGetPlatformInfo(platform, param, req_bytes, info, NULL)); + + return info; +} + +static char* opencl_device_info(cl_device_id device, + cl_device_info param) { + size_t req_bytes; + char *info; + + OPENCL_SUCCEED_FATAL(clGetDeviceInfo(device, param, 0, NULL, &req_bytes)); + + info = (char*) malloc(req_bytes); + + OPENCL_SUCCEED_FATAL(clGetDeviceInfo(device, param, req_bytes, info, NULL)); + + return info; +} + +static void opencl_all_device_options(struct opencl_device_option **devices_out, + size_t *num_devices_out) { + size_t num_devices = 0, num_devices_added = 0; + + cl_platform_id *all_platforms; + cl_uint *platform_num_devices; + + cl_uint num_platforms; + + // Find the number of platforms. + OPENCL_SUCCEED_FATAL(clGetPlatformIDs(0, NULL, &num_platforms)); + + // Make room for them. + all_platforms = calloc(num_platforms, sizeof(cl_platform_id)); + platform_num_devices = calloc(num_platforms, sizeof(cl_uint)); + + // Fetch all the platforms. + OPENCL_SUCCEED_FATAL(clGetPlatformIDs(num_platforms, all_platforms, NULL)); + + // Count the number of devices for each platform, as well as the + // total number of devices. + for (cl_uint i = 0; i < num_platforms; i++) { + if (clGetDeviceIDs(all_platforms[i], CL_DEVICE_TYPE_ALL, + 0, NULL, &platform_num_devices[i]) == CL_SUCCESS) { + num_devices += platform_num_devices[i]; + } else { + platform_num_devices[i] = 0; + } + } + + // Make room for all the device options. + struct opencl_device_option *devices = + calloc(num_devices, sizeof(struct opencl_device_option)); + + // Loop through the platforms, getting information about their devices. + for (cl_uint i = 0; i < num_platforms; i++) { + cl_platform_id platform = all_platforms[i]; + cl_uint num_platform_devices = platform_num_devices[i]; + + if (num_platform_devices == 0) { + continue; + } + + char *platform_name = opencl_platform_info(platform, CL_PLATFORM_NAME); + cl_device_id *platform_devices = + calloc(num_platform_devices, sizeof(cl_device_id)); + + // Fetch all the devices. + OPENCL_SUCCEED_FATAL(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, + num_platform_devices, platform_devices, NULL)); + + // Loop through the devices, adding them to the devices array. + for (cl_uint i = 0; i < num_platform_devices; i++) { + char *device_name = opencl_device_info(platform_devices[i], CL_DEVICE_NAME); + devices[num_devices_added].platform = platform; + devices[num_devices_added].device = platform_devices[i]; + OPENCL_SUCCEED_FATAL(clGetDeviceInfo(platform_devices[i], CL_DEVICE_TYPE, + sizeof(cl_device_type), + &devices[num_devices_added].device_type, + NULL)); + // We don't want the structs to share memory, so copy the platform name. + // Each device name is already unique. + devices[num_devices_added].platform_name = strclone(platform_name); + devices[num_devices_added].device_name = device_name; + num_devices_added++; + } + free(platform_devices); + free(platform_name); + } + free(all_platforms); + free(platform_num_devices); + + *devices_out = devices; + *num_devices_out = num_devices; +} + +// Returns 0 on success. +static int list_devices(void) { + struct opencl_device_option *devices; + size_t num_devices; + + opencl_all_device_options(&devices, &num_devices); + + const char *cur_platform = ""; + for (size_t i = 0; i < num_devices; i++) { + struct opencl_device_option device = devices[i]; + if (strcmp(cur_platform, device.platform_name) != 0) { + printf("Platform: %s\n", device.platform_name); + cur_platform = device.platform_name; + } + printf("[%d]: %s\n", (int)i, device.device_name); + } + + // Free all the platform and device names. + for (size_t j = 0; j < num_devices; j++) { + free(devices[j].platform_name); + free(devices[j].device_name); + } + free(devices); + + return 0; +} + +// Returns 0 on success. +static int select_device_interactively(struct opencl_config *cfg) { + struct opencl_device_option *devices; + size_t num_devices; + int ret = 1; + + opencl_all_device_options(&devices, &num_devices); + + printf("Choose OpenCL device:\n"); + const char *cur_platform = ""; + for (size_t i = 0; i < num_devices; i++) { + struct opencl_device_option device = devices[i]; + if (strcmp(cur_platform, device.platform_name) != 0) { + printf("Platform: %s\n", device.platform_name); + cur_platform = device.platform_name; + } + printf("[%d] %s\n", (int)i, device.device_name); + } + + int selection; + printf("Choice: "); + if (scanf("%d", &selection) == 1) { + ret = 0; + cfg->preferred_platform = ""; + cfg->preferred_device = ""; + cfg->preferred_device_num = selection; + cfg->ignore_blacklist = 1; + } + + // Free all the platform and device names. + for (size_t j = 0; j < num_devices; j++) { + free(devices[j].platform_name); + free(devices[j].device_name); + } + free(devices); + + return ret; +} + +static int is_blacklisted(const char *platform_name, const char *device_name, + const struct opencl_config *cfg) { + if (strcmp(cfg->preferred_platform, "") != 0 || + strcmp(cfg->preferred_device, "") != 0) { + return 0; + } else if (strstr(platform_name, "Apple") != NULL && + strstr(device_name, "Intel(R) Core(TM)") != NULL) { + return 1; + } else { + return 0; + } +} + +static struct opencl_device_option get_preferred_device(const struct opencl_config *cfg) { + struct opencl_device_option *devices; + size_t num_devices; + + opencl_all_device_options(&devices, &num_devices); + + int num_device_matches = 0; + + for (size_t i = 0; i < num_devices; i++) { + struct opencl_device_option device = devices[i]; + if (strstr(device.platform_name, cfg->preferred_platform) != NULL && + strstr(device.device_name, cfg->preferred_device) != NULL && + (cfg->ignore_blacklist || + !is_blacklisted(device.platform_name, device.device_name, cfg)) && + num_device_matches++ == cfg->preferred_device_num) { + // Free all the platform and device names, except the ones we have chosen. + for (size_t j = 0; j < num_devices; j++) { + if (j != i) { + free(devices[j].platform_name); + free(devices[j].device_name); + } + } + free(devices); + return device; + } + } + + futhark_panic(1, "Could not find acceptable OpenCL device.\n"); + exit(1); // Never reached +} + +static void describe_device_option(struct opencl_device_option device) { + fprintf(stderr, "Using platform: %s\n", device.platform_name); + fprintf(stderr, "Using device: %s\n", device.device_name); +} + +static cl_build_status build_opencl_program(cl_program program, cl_device_id device, const char* options) { + cl_int clBuildProgram_error = clBuildProgram(program, 1, &device, options, NULL, NULL); + + // Avoid termination due to CL_BUILD_PROGRAM_FAILURE + if (clBuildProgram_error != CL_SUCCESS && + clBuildProgram_error != CL_BUILD_PROGRAM_FAILURE) { + OPENCL_SUCCEED_FATAL(clBuildProgram_error); + } + + cl_build_status build_status; + OPENCL_SUCCEED_FATAL(clGetProgramBuildInfo(program, + device, + CL_PROGRAM_BUILD_STATUS, + sizeof(cl_build_status), + &build_status, + NULL)); + + if (build_status != CL_SUCCESS) { + char *build_log; + size_t ret_val_size; + OPENCL_SUCCEED_FATAL(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size)); + + build_log = (char*) malloc(ret_val_size+1); + OPENCL_SUCCEED_FATAL(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL)); + + // The spec technically does not say whether the build log is zero-terminated, so let's be careful. + build_log[ret_val_size] = '\0'; + + fprintf(stderr, "Build log:\n%s\n", build_log); + + free(build_log); + } + + return build_status; +} + +// Fields in a bitmask indicating which types we must be sure are +// available. +enum opencl_required_type { OPENCL_F64 = 1 }; + +// We take as input several strings representing the program, because +// C does not guarantee that the compiler supports particularly large +// literals. Notably, Visual C has a limit of 2048 characters. The +// array must be NULL-terminated. +static cl_program setup_opencl_with_command_queue(struct opencl_context *ctx, + cl_command_queue queue, + const char *srcs[], + int required_types, + const char *extra_build_opts[]) { + int error; + + free_list_init(&ctx->free_list); + ctx->queue = queue; + + OPENCL_SUCCEED_FATAL(clGetCommandQueueInfo(ctx->queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &ctx->ctx, NULL)); + + // Fill out the device info. This is redundant work if we are + // called from setup_opencl() (which is the common case), but I + // doubt it matters much. + struct opencl_device_option device_option; + OPENCL_SUCCEED_FATAL(clGetCommandQueueInfo(ctx->queue, CL_QUEUE_DEVICE, + sizeof(cl_device_id), + &device_option.device, + NULL)); + OPENCL_SUCCEED_FATAL(clGetDeviceInfo(device_option.device, CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), + &device_option.platform, + NULL)); + OPENCL_SUCCEED_FATAL(clGetDeviceInfo(device_option.device, CL_DEVICE_TYPE, + sizeof(cl_device_type), + &device_option.device_type, + NULL)); + device_option.platform_name = opencl_platform_info(device_option.platform, CL_PLATFORM_NAME); + device_option.device_name = opencl_device_info(device_option.device, CL_DEVICE_NAME); + + ctx->device = device_option.device; + + if (required_types & OPENCL_F64) { + cl_uint supported; + OPENCL_SUCCEED_FATAL(clGetDeviceInfo(device_option.device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, + sizeof(cl_uint), &supported, NULL)); + if (!supported) { + futhark_panic(1, "Program uses double-precision floats, but this is not supported on the chosen device: %s\n", + device_option.device_name); + } + } + + size_t max_group_size; + OPENCL_SUCCEED_FATAL(clGetDeviceInfo(device_option.device, CL_DEVICE_MAX_WORK_GROUP_SIZE, + sizeof(size_t), &max_group_size, NULL)); + + size_t max_tile_size = sqrt(max_group_size); + + cl_ulong max_local_memory; + OPENCL_SUCCEED_FATAL(clGetDeviceInfo(device_option.device, CL_DEVICE_LOCAL_MEM_SIZE, + sizeof(size_t), &max_local_memory, NULL)); + + // Futhark reserves 4 bytes for bookkeeping information. + max_local_memory -= 4; + + // The OpenCL implementation may reserve some local memory bytes for + // various purposes. In principle, we should use + // clGetKernelWorkGroupInfo() to figure out for each kernel how much + // is actually available, but our current code generator design + // makes this infeasible. Instead, we have this nasty hack where we + // arbitrarily subtract some bytes, based on empirical measurements + // (but which might be arbitrarily wrong). Fortunately, we rarely + // try to really push the local memory usage. + if (strstr(device_option.platform_name, "NVIDIA CUDA") != NULL) { + max_local_memory -= 12; + } else if (strstr(device_option.platform_name, "AMD") != NULL) { + max_local_memory -= 16; + } + + // Make sure this function is defined. + post_opencl_setup(ctx, &device_option); + + if (max_group_size < ctx->cfg.default_group_size) { + if (ctx->cfg.default_group_size_changed) { + fprintf(stderr, "Note: Device limits default group size to %zu (down from %zu).\n", + max_group_size, ctx->cfg.default_group_size); + } + ctx->cfg.default_group_size = max_group_size; + } + + if (max_tile_size < ctx->cfg.default_tile_size) { + if (ctx->cfg.default_tile_size_changed) { + fprintf(stderr, "Note: Device limits default tile size to %zu (down from %zu).\n", + max_tile_size, ctx->cfg.default_tile_size); + } + ctx->cfg.default_tile_size = max_tile_size; + } + + ctx->max_group_size = max_group_size; + ctx->max_tile_size = max_tile_size; // No limit. + ctx->max_threshold = ctx->max_num_groups = 0; // No limit. + ctx->max_local_memory = max_local_memory; + + // Now we go through all the sizes, clamp them to the valid range, + // or set them to the default. + for (int i = 0; i < ctx->cfg.num_sizes; i++) { + const char *size_class = ctx->cfg.size_classes[i]; + int64_t *size_value = &ctx->cfg.size_values[i]; + const char* size_name = ctx->cfg.size_names[i]; + int64_t max_value = 0, default_value = 0; + + if (strstr(size_class, "group_size") == size_class) { + max_value = max_group_size; + default_value = ctx->cfg.default_group_size; + } else if (strstr(size_class, "num_groups") == size_class) { + max_value = max_group_size; // Futhark assumes this constraint. + default_value = ctx->cfg.default_num_groups; + // XXX: as a quick and dirty hack, use twice as many threads for + // histograms by default. We really should just be smarter + // about sizes somehow. + if (strstr(size_name, ".seghist_") != NULL) { + default_value *= 2; + } + } else if (strstr(size_class, "tile_size") == size_class) { + max_value = sqrt(max_group_size); + default_value = ctx->cfg.default_tile_size; + } else if (strstr(size_class, "reg_tile_size") == size_class) { + max_value = 0; // No limit. + default_value = ctx->cfg.default_reg_tile_size; + } else if (strstr(size_class, "threshold") == size_class) { + // Threshold can be as large as it takes. + default_value = ctx->cfg.default_threshold; + } else { + // Bespoke sizes have no limit or default. + } + if (*size_value == 0) { + *size_value = default_value; + } else if (max_value > 0 && *size_value > max_value) { + fprintf(stderr, "Note: Device limits %s to %d (down from %d)\n", + size_name, (int)max_value, (int)*size_value); + *size_value = max_value; + } + } + + if (ctx->lockstep_width == 0) { + ctx->lockstep_width = 1; + } + + if (ctx->cfg.logging) { + fprintf(stderr, "Lockstep width: %d\n", (int)ctx->lockstep_width); + fprintf(stderr, "Default group size: %d\n", (int)ctx->cfg.default_group_size); + fprintf(stderr, "Default number of groups: %d\n", (int)ctx->cfg.default_num_groups); + } + + char *fut_opencl_src = NULL; + cl_program prog; + error = CL_SUCCESS; + + if (ctx->cfg.load_binary_from == NULL) { + size_t src_size = 0; + + // Maybe we have to read OpenCL source from somewhere else (used for debugging). + if (ctx->cfg.load_program_from != NULL) { + fut_opencl_src = slurp_file(ctx->cfg.load_program_from, NULL); + assert(fut_opencl_src != NULL); + } else { + // Construct the OpenCL source concatenating all the fragments. + for (const char **src = srcs; src && *src; src++) { + src_size += strlen(*src); + } + + fut_opencl_src = (char*) malloc(src_size + 1); + + size_t n, i; + for (i = 0, n = 0; srcs && srcs[i]; i++) { + strncpy(fut_opencl_src+n, srcs[i], src_size-n); + n += strlen(srcs[i]); + } + fut_opencl_src[src_size] = 0; + } + + if (ctx->cfg.dump_program_to != NULL) { + if (ctx->cfg.debugging) { + fprintf(stderr, "Dumping OpenCL source to %s...\n", ctx->cfg.dump_program_to); + } + + dump_file(ctx->cfg.dump_program_to, fut_opencl_src, strlen(fut_opencl_src)); + } + + if (ctx->cfg.debugging) { + fprintf(stderr, "Creating OpenCL program...\n"); + } + + const char* src_ptr[] = {fut_opencl_src}; + prog = clCreateProgramWithSource(ctx->ctx, 1, src_ptr, &src_size, &error); + OPENCL_SUCCEED_FATAL(error); + } else { + if (ctx->cfg.debugging) { + fprintf(stderr, "Loading OpenCL binary from %s...\n", ctx->cfg.load_binary_from); + } + size_t binary_size; + unsigned char *fut_opencl_bin = + (unsigned char*) slurp_file(ctx->cfg.load_binary_from, &binary_size); + assert(fut_opencl_bin != NULL); + const unsigned char *binaries[1] = { fut_opencl_bin }; + cl_int status = 0; + + prog = clCreateProgramWithBinary(ctx->ctx, 1, &device_option.device, + &binary_size, binaries, + &status, &error); + + OPENCL_SUCCEED_FATAL(status); + OPENCL_SUCCEED_FATAL(error); + } + + int compile_opts_size = 1024; + + for (int i = 0; i < ctx->cfg.num_sizes; i++) { + compile_opts_size += strlen(ctx->cfg.size_names[i]) + 20; + } + + for (int i = 0; extra_build_opts[i] != NULL; i++) { + compile_opts_size += strlen(extra_build_opts[i] + 1); + } + + char *compile_opts = (char*) malloc(compile_opts_size); + + int w = snprintf(compile_opts, compile_opts_size, + "-DLOCKSTEP_WIDTH=%d ", + (int)ctx->lockstep_width); + + for (int i = 0; i < ctx->cfg.num_sizes; i++) { + w += snprintf(compile_opts+w, compile_opts_size-w, + "-D%s=%d ", + ctx->cfg.size_vars[i], + (int)ctx->cfg.size_values[i]); + } + + for (int i = 0; extra_build_opts[i] != NULL; i++) { + w += snprintf(compile_opts+w, compile_opts_size-w, + "%s ", extra_build_opts[i]); + } + + if (ctx->cfg.debugging) { + fprintf(stderr, "OpenCL compiler options: %s\n", compile_opts); + fprintf(stderr, "Building OpenCL program...\n"); + } + OPENCL_SUCCEED_FATAL(build_opencl_program(prog, device_option.device, compile_opts)); + + free(compile_opts); + free(fut_opencl_src); + + if (ctx->cfg.dump_binary_to != NULL) { + if (ctx->cfg.debugging) { + fprintf(stderr, "Dumping OpenCL binary to %s...\n", ctx->cfg.dump_binary_to); + } + + size_t binary_size; + OPENCL_SUCCEED_FATAL(clGetProgramInfo(prog, CL_PROGRAM_BINARY_SIZES, + sizeof(size_t), &binary_size, NULL)); + unsigned char *binary = (unsigned char*) malloc(binary_size); + unsigned char *binaries[1] = { binary }; + OPENCL_SUCCEED_FATAL(clGetProgramInfo(prog, CL_PROGRAM_BINARIES, + sizeof(unsigned char*), binaries, NULL)); + + dump_file(ctx->cfg.dump_binary_to, binary, binary_size); + } + + return prog; +} + +static cl_program setup_opencl(struct opencl_context *ctx, + const char *srcs[], + int required_types, + const char *extra_build_opts[]) { + + ctx->lockstep_width = 0; // Real value set later. + + struct opencl_device_option device_option = get_preferred_device(&ctx->cfg); + + if (ctx->cfg.logging) { + describe_device_option(device_option); + } + + // Note that NVIDIA's OpenCL requires the platform property + cl_context_properties properties[] = { + CL_CONTEXT_PLATFORM, + (cl_context_properties)device_option.platform, + 0 + }; + + cl_int clCreateContext_error; + ctx->ctx = clCreateContext(properties, 1, &device_option.device, NULL, NULL, &clCreateContext_error); + OPENCL_SUCCEED_FATAL(clCreateContext_error); + + cl_int clCreateCommandQueue_error; + cl_command_queue queue = + clCreateCommandQueue(ctx->ctx, + device_option.device, + ctx->cfg.profiling ? CL_QUEUE_PROFILING_ENABLE : 0, + &clCreateCommandQueue_error); + OPENCL_SUCCEED_FATAL(clCreateCommandQueue_error); + + return setup_opencl_with_command_queue(ctx, queue, srcs, required_types, extra_build_opts); +} + +// Count up the runtime all the profiling_records that occured during execution. +// Also clears the buffer of profiling_records. +static cl_int opencl_tally_profiling_records(struct opencl_context *ctx) { + cl_int err; + for (int i = 0; i < ctx->profiling_records_used; i++) { + struct profiling_record record = ctx->profiling_records[i]; + + cl_ulong start_t, end_t; + + if ((err = clGetEventProfilingInfo(*record.event, + CL_PROFILING_COMMAND_START, + sizeof(start_t), + &start_t, + NULL)) != CL_SUCCESS) { + return err; + } + + if ((err = clGetEventProfilingInfo(*record.event, + CL_PROFILING_COMMAND_END, + sizeof(end_t), + &end_t, + NULL)) != CL_SUCCESS) { + return err; + } + + // OpenCL provides nanosecond resolution, but we want + // microseconds. + *record.runs += 1; + *record.runtime += (end_t - start_t)/1000; + + if ((err = clReleaseEvent(*record.event)) != CL_SUCCESS) { + return err; + } + free(record.event); + } + + ctx->profiling_records_used = 0; + + return CL_SUCCESS; +} + +// If profiling, produce an event associated with a profiling record. +static cl_event* opencl_get_event(struct opencl_context *ctx, int *runs, int64_t *runtime) { + if (ctx->profiling_records_used == ctx->profiling_records_capacity) { + ctx->profiling_records_capacity *= 2; + ctx->profiling_records = + realloc(ctx->profiling_records, + ctx->profiling_records_capacity * + sizeof(struct profiling_record)); + } + cl_event *event = malloc(sizeof(cl_event)); + ctx->profiling_records[ctx->profiling_records_used].event = event; + ctx->profiling_records[ctx->profiling_records_used].runs = runs; + ctx->profiling_records[ctx->profiling_records_used].runtime = runtime; + ctx->profiling_records_used++; + return event; +} + +// Allocate memory from driver. The problem is that OpenCL may perform +// lazy allocation, so we cannot know whether an allocation succeeded +// until the first time we try to use it. Hence we immediately +// perform a write to see if the allocation succeeded. This is slow, +// but the assumption is that this operation will be rare (most things +// will go through the free list). +static int opencl_alloc_actual(struct opencl_context *ctx, size_t size, cl_mem *mem_out) { + int error; + *mem_out = clCreateBuffer(ctx->ctx, CL_MEM_READ_WRITE, size, NULL, &error); + + if (error != CL_SUCCESS) { + return error; + } + + int x = 2; + error = clEnqueueWriteBuffer(ctx->queue, *mem_out, 1, 0, sizeof(x), &x, 0, NULL, NULL); + + // No need to wait for completion here. clWaitForEvents() cannot + // return mem object allocation failures. This implies that the + // buffer is faulted onto the device on enqueue. (Observation by + // Andreas Kloeckner.) + + return error; +} + +static int opencl_alloc(struct opencl_context *ctx, size_t min_size, const char *tag, cl_mem *mem_out) { + (void)tag; + if (min_size < sizeof(int)) { + min_size = sizeof(int); + } + + size_t size; + + if (free_list_find(&ctx->free_list, min_size, &size, mem_out) == 0) { + // Successfully found a free block. Is it big enough? + // + // FIXME: we might also want to check whether the block is *too + // big*, to avoid internal fragmentation. However, this can + // sharply impact performance on programs where arrays change size + // frequently. Fortunately, such allocations are usually fairly + // short-lived, as they are necessarily within a loop, so the risk + // of internal fragmentation resulting in an OOM situation is + // limited. However, it would be preferable if we could go back + // and *shrink* oversize allocations when we encounter an OOM + // condition. That is technically feasible, since we do not + // expose OpenCL pointer values directly to the application, but + // instead rely on a level of indirection. + if (size >= min_size) { + if (ctx->cfg.debugging) { + fprintf(stderr, "No need to allocate: Found a block in the free list.\n"); + } + + return CL_SUCCESS; + } else { + if (ctx->cfg.debugging) { + fprintf(stderr, "Found a free block, but it was too small.\n"); + } + + // Not just right - free it. + int error = clReleaseMemObject(*mem_out); + if (error != CL_SUCCESS) { + return error; + } + } + } + + // We have to allocate a new block from the driver. If the + // allocation does not succeed, then we might be in an out-of-memory + // situation. We now start freeing things from the free list until + // we think we have freed enough that the allocation will succeed. + // Since we don't know how far the allocation is from fitting, we + // have to check after every deallocation. This might be pretty + // expensive. Let's hope that this case is hit rarely. + + if (ctx->cfg.debugging) { + fprintf(stderr, "Actually allocating the desired block.\n"); + } + + int error = opencl_alloc_actual(ctx, min_size, mem_out); + + while (error == CL_MEM_OBJECT_ALLOCATION_FAILURE) { + if (ctx->cfg.debugging) { + fprintf(stderr, "Out of OpenCL memory: releasing entry from the free list...\n"); + } + cl_mem mem; + if (free_list_first(&ctx->free_list, &mem) == 0) { + error = clReleaseMemObject(mem); + if (error != CL_SUCCESS) { + return error; + } + } else { + break; + } + error = opencl_alloc_actual(ctx, min_size, mem_out); + } + + return error; +} + +static int opencl_free(struct opencl_context *ctx, cl_mem mem, const char *tag) { + size_t size; + cl_mem existing_mem; + + // If there is already a block with this tag, then remove it. + if (free_list_find(&ctx->free_list, -1, &size, &existing_mem) == 0) { + int error = clReleaseMemObject(existing_mem); + if (error != CL_SUCCESS) { + return error; + } + } + + int error = clGetMemObjectInfo(mem, CL_MEM_SIZE, sizeof(size_t), &size, NULL); + + if (error == CL_SUCCESS) { + free_list_insert(&ctx->free_list, size, mem, tag); + } + + return error; +} + +static int opencl_free_all(struct opencl_context *ctx) { + cl_mem mem; + free_list_pack(&ctx->free_list); + while (free_list_first(&ctx->free_list, &mem) == 0) { + int error = clReleaseMemObject(mem); + if (error != CL_SUCCESS) { + return error; + } + } + + return CL_SUCCESS; +} + +// Free everything that belongs to 'ctx', but do not free 'ctx' +// itself. +static void teardown_opencl(struct opencl_context *ctx) { + (void)opencl_tally_profiling_records(ctx); + free(ctx->profiling_records); + (void)opencl_free_all(ctx); + (void)clReleaseCommandQueue(ctx->queue); + (void)clReleaseContext(ctx->ctx); +} + +// End of opencl.h. + +static const char *opencl_program[] = + {"#ifdef cl_clang_storage_class_specifiers\n#pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable\n#endif\n#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable\n__kernel void dummy_kernel(__global unsigned char *dummy, int n)\n{\n const int thread_gid = get_global_id(0);\n \n if (thread_gid >= n)\n return;\n}\n#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\ntypedef char int8_t;\ntypedef short int16_t;\ntypedef int int32_t;\ntypedef long int64_t;\ntypedef uchar uint8_t;\ntypedef ushort uint16_t;\ntypedef uint uint32_t;\ntypedef ulong uint64_t;\n#ifdef cl_nv_pragma_unroll\nstatic inline void mem_fence_global()\n{\n asm(\"membar.gl;\");\n}\n#else\nstatic inline void mem_fence_global()\n{\n mem_fence(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);\n}\n#endif\nstatic inline void mem_fence_local()\n{\n mem_fence(CLK_LOCAL_MEM_FENCE);\n}\nstatic inline uint8_t add8(uint8_t x, uint8_t y)\n{\n return x + y;\n}\nstatic inline uint16_t add16(uint16_t x, uint16_t y)\n{\n return x + y;\n}\nstatic inline uint32_t add32(uint32_t x, uint32_t y)\n{\n return x + y;\n}\nstatic inline uint64_t add64(uint64_t x, uint64_t y)\n{\n return x + y;\n}\nstatic inline uint8_t sub8(uint8_t x, uint8_t y)\n{\n return x - y;\n}\nstatic inline uint16_t sub16(uint16_t x, uint16_t y)\n{\n return x - y;\n}\nstatic inline uint32_t sub32(uint32_t x, uint32_t y)\n{\n return x - y;\n}\nstatic inline uint64_t sub64(uint64_t x, uint64_t y)\n{\n return x - y;\n}\nstatic inline uint8_t mul8(uint8_t x, uint8_t y)\n{\n return x * y;\n}\nstatic inline uint16_t mul16(uint16_t x, uint16_t y)\n{\n return x * y;\n}\nstatic inline uint32_t mul32(uint32_t x, uint32_t y)\n{\n return x * y;\n}\nstatic inline uint64_t mul64(uint64_t x, uint64_t y)\n{\n return x * y;\n}\nstatic inline uint8_t udiv8(uint8_t x, uint8_t y)\n{\n return x / y;\n}\nstatic inline uint16_t udiv16(uint16_t x, uint16_t y)\n{\n return x / y;\n}\nstatic inline uint32_t udiv32(uint3", + "2_t x, uint32_t y)\n{\n return x / y;\n}\nstatic inline uint64_t udiv64(uint64_t x, uint64_t y)\n{\n return x / y;\n}\nstatic inline uint8_t udiv_up8(uint8_t x, uint8_t y)\n{\n return (x + y - 1) / y;\n}\nstatic inline uint16_t udiv_up16(uint16_t x, uint16_t y)\n{\n return (x + y - 1) / y;\n}\nstatic inline uint32_t udiv_up32(uint32_t x, uint32_t y)\n{\n return (x + y - 1) / y;\n}\nstatic inline uint64_t udiv_up64(uint64_t x, uint64_t y)\n{\n return (x + y - 1) / y;\n}\nstatic inline uint8_t umod8(uint8_t x, uint8_t y)\n{\n return x % y;\n}\nstatic inline uint16_t umod16(uint16_t x, uint16_t y)\n{\n return x % y;\n}\nstatic inline uint32_t umod32(uint32_t x, uint32_t y)\n{\n return x % y;\n}\nstatic inline uint64_t umod64(uint64_t x, uint64_t y)\n{\n return x % y;\n}\nstatic inline uint8_t udiv_safe8(uint8_t x, uint8_t y)\n{\n return y == 0 ? 0 : x / y;\n}\nstatic inline uint16_t udiv_safe16(uint16_t x, uint16_t y)\n{\n return y == 0 ? 0 : x / y;\n}\nstatic inline uint32_t udiv_safe32(uint32_t x, uint32_t y)\n{\n return y == 0 ? 0 : x / y;\n}\nstatic inline uint64_t udiv_safe64(uint64_t x, uint64_t y)\n{\n return y == 0 ? 0 : x / y;\n}\nstatic inline uint8_t udiv_up_safe8(uint8_t x, uint8_t y)\n{\n return y == 0 ? 0 : (x + y - 1) / y;\n}\nstatic inline uint16_t udiv_up_safe16(uint16_t x, uint16_t y)\n{\n return y == 0 ? 0 : (x + y - 1) / y;\n}\nstatic inline uint32_t udiv_up_safe32(uint32_t x, uint32_t y)\n{\n return y == 0 ? 0 : (x + y - 1) / y;\n}\nstatic inline uint64_t udiv_up_safe64(uint64_t x, uint64_t y)\n{\n return y == 0 ? 0 : (x + y - 1) / y;\n}\nstatic inline uint8_t umod_safe8(uint8_t x, uint8_t y)\n{\n return y == 0 ? 0 : x % y;\n}\nstatic inline uint16_t umod_safe16(uint16_t x, uint16_t y)\n{\n return y == 0 ? 0 : x % y;\n}\nstatic inline uint32_t umod_safe32(uint32_t x, uint32_t y)\n{\n return y == 0 ? 0 : x % y;\n}\nstatic inline uint64_t umod_safe64(uint64_t x, uint64_t y)\n{\n return y == 0 ? 0 : x % y;\n}\nstatic inline int8_t sdiv8(int8_t x, int8_t y)\n{\n int8_t q =", + " x / y;\n int8_t r = x % y;\n \n return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0);\n}\nstatic inline int16_t sdiv16(int16_t x, int16_t y)\n{\n int16_t q = x / y;\n int16_t r = x % y;\n \n return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0);\n}\nstatic inline int32_t sdiv32(int32_t x, int32_t y)\n{\n int32_t q = x / y;\n int32_t r = x % y;\n \n return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0);\n}\nstatic inline int64_t sdiv64(int64_t x, int64_t y)\n{\n int64_t q = x / y;\n int64_t r = x % y;\n \n return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0);\n}\nstatic inline int8_t sdiv_up8(int8_t x, int8_t y)\n{\n return sdiv8(x + y - 1, y);\n}\nstatic inline int16_t sdiv_up16(int16_t x, int16_t y)\n{\n return sdiv16(x + y - 1, y);\n}\nstatic inline int32_t sdiv_up32(int32_t x, int32_t y)\n{\n return sdiv32(x + y - 1, y);\n}\nstatic inline int64_t sdiv_up64(int64_t x, int64_t y)\n{\n return sdiv64(x + y - 1, y);\n}\nstatic inline int8_t smod8(int8_t x, int8_t y)\n{\n int8_t r = x % y;\n \n return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y);\n}\nstatic inline int16_t smod16(int16_t x, int16_t y)\n{\n int16_t r = x % y;\n \n return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y);\n}\nstatic inline int32_t smod32(int32_t x, int32_t y)\n{\n int32_t r = x % y;\n \n return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y);\n}\nstatic inline int64_t smod64(int64_t x, int64_t y)\n{\n int64_t r = x % y;\n \n return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y);\n}\nstatic inline int8_t sdiv_safe8(int8_t x, int8_t y)\n{\n return y == 0 ? 0 : sdiv8(x, y);\n}\nstatic inline int16_t sdiv_safe16(int16_t x, int16_t y)\n{\n return y == 0 ? 0 : sdiv16(x, y);\n}\nstatic inline int32_t sdiv_safe32(int32_t x, int32_t y)\n{\n return y == 0 ? 0 : sdiv32(x, y);\n}\nstatic inline int64_t sdiv_safe64(int64_t x, int64_t y)\n{\n return y == 0 ? 0 : sdiv64(x, y);\n}\nstatic inline int8_t sdiv_up_safe8(int8_t x, int8_t y)\n{\n return ", + "sdiv_safe8(x + y - 1, y);\n}\nstatic inline int16_t sdiv_up_safe16(int16_t x, int16_t y)\n{\n return sdiv_safe16(x + y - 1, y);\n}\nstatic inline int32_t sdiv_up_safe32(int32_t x, int32_t y)\n{\n return sdiv_safe32(x + y - 1, y);\n}\nstatic inline int64_t sdiv_up_safe64(int64_t x, int64_t y)\n{\n return sdiv_safe64(x + y - 1, y);\n}\nstatic inline int8_t smod_safe8(int8_t x, int8_t y)\n{\n return y == 0 ? 0 : smod8(x, y);\n}\nstatic inline int16_t smod_safe16(int16_t x, int16_t y)\n{\n return y == 0 ? 0 : smod16(x, y);\n}\nstatic inline int32_t smod_safe32(int32_t x, int32_t y)\n{\n return y == 0 ? 0 : smod32(x, y);\n}\nstatic inline int64_t smod_safe64(int64_t x, int64_t y)\n{\n return y == 0 ? 0 : smod64(x, y);\n}\nstatic inline int8_t squot8(int8_t x, int8_t y)\n{\n return x / y;\n}\nstatic inline int16_t squot16(int16_t x, int16_t y)\n{\n return x / y;\n}\nstatic inline int32_t squot32(int32_t x, int32_t y)\n{\n return x / y;\n}\nstatic inline int64_t squot64(int64_t x, int64_t y)\n{\n return x / y;\n}\nstatic inline int8_t srem8(int8_t x, int8_t y)\n{\n return x % y;\n}\nstatic inline int16_t srem16(int16_t x, int16_t y)\n{\n return x % y;\n}\nstatic inline int32_t srem32(int32_t x, int32_t y)\n{\n return x % y;\n}\nstatic inline int64_t srem64(int64_t x, int64_t y)\n{\n return x % y;\n}\nstatic inline int8_t squot_safe8(int8_t x, int8_t y)\n{\n return y == 0 ? 0 : x / y;\n}\nstatic inline int16_t squot_safe16(int16_t x, int16_t y)\n{\n return y == 0 ? 0 : x / y;\n}\nstatic inline int32_t squot_safe32(int32_t x, int32_t y)\n{\n return y == 0 ? 0 : x / y;\n}\nstatic inline int64_t squot_safe64(int64_t x, int64_t y)\n{\n return y == 0 ? 0 : x / y;\n}\nstatic inline int8_t srem_safe8(int8_t x, int8_t y)\n{\n return y == 0 ? 0 : x % y;\n}\nstatic inline int16_t srem_safe16(int16_t x, int16_t y)\n{\n return y == 0 ? 0 : x % y;\n}\nstatic inline int32_t srem_safe32(int32_t x, int32_t y)\n{\n return y == 0 ? 0 : x % y;\n}\nstatic inline int64_t srem_safe64(int64_t x, int64_t y)\n{\n return ", + "y == 0 ? 0 : x % y;\n}\nstatic inline int8_t smin8(int8_t x, int8_t y)\n{\n return x < y ? x : y;\n}\nstatic inline int16_t smin16(int16_t x, int16_t y)\n{\n return x < y ? x : y;\n}\nstatic inline int32_t smin32(int32_t x, int32_t y)\n{\n return x < y ? x : y;\n}\nstatic inline int64_t smin64(int64_t x, int64_t y)\n{\n return x < y ? x : y;\n}\nstatic inline uint8_t umin8(uint8_t x, uint8_t y)\n{\n return x < y ? x : y;\n}\nstatic inline uint16_t umin16(uint16_t x, uint16_t y)\n{\n return x < y ? x : y;\n}\nstatic inline uint32_t umin32(uint32_t x, uint32_t y)\n{\n return x < y ? x : y;\n}\nstatic inline uint64_t umin64(uint64_t x, uint64_t y)\n{\n return x < y ? x : y;\n}\nstatic inline int8_t smax8(int8_t x, int8_t y)\n{\n return x < y ? y : x;\n}\nstatic inline int16_t smax16(int16_t x, int16_t y)\n{\n return x < y ? y : x;\n}\nstatic inline int32_t smax32(int32_t x, int32_t y)\n{\n return x < y ? y : x;\n}\nstatic inline int64_t smax64(int64_t x, int64_t y)\n{\n return x < y ? y : x;\n}\nstatic inline uint8_t umax8(uint8_t x, uint8_t y)\n{\n return x < y ? y : x;\n}\nstatic inline uint16_t umax16(uint16_t x, uint16_t y)\n{\n return x < y ? y : x;\n}\nstatic inline uint32_t umax32(uint32_t x, uint32_t y)\n{\n return x < y ? y : x;\n}\nstatic inline uint64_t umax64(uint64_t x, uint64_t y)\n{\n return x < y ? y : x;\n}\nstatic inline uint8_t shl8(uint8_t x, uint8_t y)\n{\n return x << y;\n}\nstatic inline uint16_t shl16(uint16_t x, uint16_t y)\n{\n return x << y;\n}\nstatic inline uint32_t shl32(uint32_t x, uint32_t y)\n{\n return x << y;\n}\nstatic inline uint64_t shl64(uint64_t x, uint64_t y)\n{\n return x << y;\n}\nstatic inline uint8_t lshr8(uint8_t x, uint8_t y)\n{\n return x >> y;\n}\nstatic inline uint16_t lshr16(uint16_t x, uint16_t y)\n{\n return x >> y;\n}\nstatic inline uint32_t lshr32(uint32_t x, uint32_t y)\n{\n return x >> y;\n}\nstatic inline uint64_t lshr64(uint64_t x, uint64_t y)\n{\n return x >> y;\n}\nstatic inline int8_t ashr8(int8_t x, int8_t y)\n{\n return x >> y;\n}\n", + "static inline int16_t ashr16(int16_t x, int16_t y)\n{\n return x >> y;\n}\nstatic inline int32_t ashr32(int32_t x, int32_t y)\n{\n return x >> y;\n}\nstatic inline int64_t ashr64(int64_t x, int64_t y)\n{\n return x >> y;\n}\nstatic inline uint8_t and8(uint8_t x, uint8_t y)\n{\n return x & y;\n}\nstatic inline uint16_t and16(uint16_t x, uint16_t y)\n{\n return x & y;\n}\nstatic inline uint32_t and32(uint32_t x, uint32_t y)\n{\n return x & y;\n}\nstatic inline uint64_t and64(uint64_t x, uint64_t y)\n{\n return x & y;\n}\nstatic inline uint8_t or8(uint8_t x, uint8_t y)\n{\n return x | y;\n}\nstatic inline uint16_t or16(uint16_t x, uint16_t y)\n{\n return x | y;\n}\nstatic inline uint32_t or32(uint32_t x, uint32_t y)\n{\n return x | y;\n}\nstatic inline uint64_t or64(uint64_t x, uint64_t y)\n{\n return x | y;\n}\nstatic inline uint8_t xor8(uint8_t x, uint8_t y)\n{\n return x ^ y;\n}\nstatic inline uint16_t xor16(uint16_t x, uint16_t y)\n{\n return x ^ y;\n}\nstatic inline uint32_t xor32(uint32_t x, uint32_t y)\n{\n return x ^ y;\n}\nstatic inline uint64_t xor64(uint64_t x, uint64_t y)\n{\n return x ^ y;\n}\nstatic inline bool ult8(uint8_t x, uint8_t y)\n{\n return x < y;\n}\nstatic inline bool ult16(uint16_t x, uint16_t y)\n{\n return x < y;\n}\nstatic inline bool ult32(uint32_t x, uint32_t y)\n{\n return x < y;\n}\nstatic inline bool ult64(uint64_t x, uint64_t y)\n{\n return x < y;\n}\nstatic inline bool ule8(uint8_t x, uint8_t y)\n{\n return x <= y;\n}\nstatic inline bool ule16(uint16_t x, uint16_t y)\n{\n return x <= y;\n}\nstatic inline bool ule32(uint32_t x, uint32_t y)\n{\n return x <= y;\n}\nstatic inline bool ule64(uint64_t x, uint64_t y)\n{\n return x <= y;\n}\nstatic inline bool slt8(int8_t x, int8_t y)\n{\n return x < y;\n}\nstatic inline bool slt16(int16_t x, int16_t y)\n{\n return x < y;\n}\nstatic inline bool slt32(int32_t x, int32_t y)\n{\n return x < y;\n}\nstatic inline bool slt64(int64_t x, int64_t y)\n{\n return x < y;\n}\nstatic inline bool sle8(int8_t x, int8_t y)\n{\n retur", + "n x <= y;\n}\nstatic inline bool sle16(int16_t x, int16_t y)\n{\n return x <= y;\n}\nstatic inline bool sle32(int32_t x, int32_t y)\n{\n return x <= y;\n}\nstatic inline bool sle64(int64_t x, int64_t y)\n{\n return x <= y;\n}\nstatic inline int8_t pow8(int8_t x, int8_t y)\n{\n int8_t res = 1, rem = y;\n \n while (rem != 0) {\n if (rem & 1)\n res *= x;\n rem >>= 1;\n x *= x;\n }\n return res;\n}\nstatic inline int16_t pow16(int16_t x, int16_t y)\n{\n int16_t res = 1, rem = y;\n \n while (rem != 0) {\n if (rem & 1)\n res *= x;\n rem >>= 1;\n x *= x;\n }\n return res;\n}\nstatic inline int32_t pow32(int32_t x, int32_t y)\n{\n int32_t res = 1, rem = y;\n \n while (rem != 0) {\n if (rem & 1)\n res *= x;\n rem >>= 1;\n x *= x;\n }\n return res;\n}\nstatic inline int64_t pow64(int64_t x, int64_t y)\n{\n int64_t res = 1, rem = y;\n \n while (rem != 0) {\n if (rem & 1)\n res *= x;\n rem >>= 1;\n x *= x;\n }\n return res;\n}\nstatic inline bool itob_i8_bool(int8_t x)\n{\n return x;\n}\nstatic inline bool itob_i16_bool(int16_t x)\n{\n return x;\n}\nstatic inline bool itob_i32_bool(int32_t x)\n{\n return x;\n}\nstatic inline bool itob_i64_bool(int64_t x)\n{\n return x;\n}\nstatic inline int8_t btoi_bool_i8(bool x)\n{\n return x;\n}\nstatic inline int16_t btoi_bool_i16(bool x)\n{\n return x;\n}\nstatic inline int32_t btoi_bool_i32(bool x)\n{\n return x;\n}\nstatic inline int64_t btoi_bool_i64(bool x)\n{\n return x;\n}\n#define sext_i8_i8(x) ((int8_t) (int8_t) x)\n#define sext_i8_i16(x) ((int16_t) (int8_t) x)\n#define sext_i8_i32(x) ((int32_t) (int8_t) x)\n#define sext_i8_i64(x) ((int64_t) (int8_t) x)\n#define sext_i16_i8(x) ((int8_t) (int16_t) x)\n#define sext_i16_i16(x) ((int16_t) (int16_t) x)\n#define sext_i16_i32(x) ((int32_t) (int16_t) x)\n#define sext_i16_i64(x) ((int64_t) (int16_t) x)\n#define sext_i32_i8(x) ((int8_t) (int32_t) x)\n#define sext_i32_i16(x) (", + "(int16_t) (int32_t) x)\n#define sext_i32_i32(x) ((int32_t) (int32_t) x)\n#define sext_i32_i64(x) ((int64_t) (int32_t) x)\n#define sext_i64_i8(x) ((int8_t) (int64_t) x)\n#define sext_i64_i16(x) ((int16_t) (int64_t) x)\n#define sext_i64_i32(x) ((int32_t) (int64_t) x)\n#define sext_i64_i64(x) ((int64_t) (int64_t) x)\n#define zext_i8_i8(x) ((int8_t) (uint8_t) x)\n#define zext_i8_i16(x) ((int16_t) (uint8_t) x)\n#define zext_i8_i32(x) ((int32_t) (uint8_t) x)\n#define zext_i8_i64(x) ((int64_t) (uint8_t) x)\n#define zext_i16_i8(x) ((int8_t) (uint16_t) x)\n#define zext_i16_i16(x) ((int16_t) (uint16_t) x)\n#define zext_i16_i32(x) ((int32_t) (uint16_t) x)\n#define zext_i16_i64(x) ((int64_t) (uint16_t) x)\n#define zext_i32_i8(x) ((int8_t) (uint32_t) x)\n#define zext_i32_i16(x) ((int16_t) (uint32_t) x)\n#define zext_i32_i32(x) ((int32_t) (uint32_t) x)\n#define zext_i32_i64(x) ((int64_t) (uint32_t) x)\n#define zext_i64_i8(x) ((int8_t) (uint64_t) x)\n#define zext_i64_i16(x) ((int16_t) (uint64_t) x)\n#define zext_i64_i32(x) ((int32_t) (uint64_t) x)\n#define zext_i64_i64(x) ((int64_t) (uint64_t) x)\n#if defined(__OPENCL_VERSION__)\nstatic int32_t futrts_popc8(int8_t x)\n{\n return popcount(x);\n}\nstatic int32_t futrts_popc16(int16_t x)\n{\n return popcount(x);\n}\nstatic int32_t futrts_popc32(int32_t x)\n{\n return popcount(x);\n}\nstatic int32_t futrts_popc64(int64_t x)\n{\n return popcount(x);\n}\n#elif defined(__CUDA_ARCH__)\nstatic int32_t futrts_popc8(int8_t x)\n{\n return __popc(zext_i8_i32(x));\n}\nstatic int32_t futrts_popc16(int16_t x)\n{\n return __popc(zext_i16_i32(x));\n}\nstatic int32_t futrts_popc32(int32_t x)\n{\n return __popc(x);\n}\nstatic int32_t futrts_popc64(int64_t x)\n{\n return __popcll(x);\n}\n#else\nstatic int32_t futrts_popc8(int8_t x)\n{\n int c = 0;\n \n for (; x; ++c)\n x &= x - 1;\n return c;\n}\nstatic int32_t futrts_popc16(int16_t x)\n{\n int c = 0;\n \n for (; x; ++c)\n x &= x - 1;\n return c;\n}\nstatic int32_t futrts_popc32(int32_t x)\n{\n int c = 0;\n \n ", + " for (; x; ++c)\n x &= x - 1;\n return c;\n}\nstatic int32_t futrts_popc64(int64_t x)\n{\n int c = 0;\n \n for (; x; ++c)\n x &= x - 1;\n return c;\n}\n#endif\n#if defined(__OPENCL_VERSION__)\nstatic uint8_t futrts_mul_hi8(uint8_t a, uint8_t b)\n{\n return mul_hi(a, b);\n}\nstatic uint16_t futrts_mul_hi16(uint16_t a, uint16_t b)\n{\n return mul_hi(a, b);\n}\nstatic uint32_t futrts_mul_hi32(uint32_t a, uint32_t b)\n{\n return mul_hi(a, b);\n}\nstatic uint64_t futrts_mul_hi64(uint64_t a, uint64_t b)\n{\n return mul_hi(a, b);\n}\n#elif defined(__CUDA_ARCH__)\nstatic uint8_t futrts_mul_hi8(uint8_t a, uint8_t b)\n{\n uint16_t aa = a;\n uint16_t bb = b;\n \n return aa * bb >> 8;\n}\nstatic uint16_t futrts_mul_hi16(uint16_t a, uint16_t b)\n{\n uint32_t aa = a;\n uint32_t bb = b;\n \n return aa * bb >> 16;\n}\nstatic uint32_t futrts_mul_hi32(uint32_t a, uint32_t b)\n{\n return mulhi(a, b);\n}\nstatic uint64_t futrts_mul_hi64(uint64_t a, uint64_t b)\n{\n return mul64hi(a, b);\n}\n#else\nstatic uint8_t futrts_mul_hi8(uint8_t a, uint8_t b)\n{\n uint16_t aa = a;\n uint16_t bb = b;\n \n return aa * bb >> 8;\n}\nstatic uint16_t futrts_mul_hi16(uint16_t a, uint16_t b)\n{\n uint32_t aa = a;\n uint32_t bb = b;\n \n return aa * bb >> 16;\n}\nstatic uint32_t futrts_mul_hi32(uint32_t a, uint32_t b)\n{\n uint64_t aa = a;\n uint64_t bb = b;\n \n return aa * bb >> 32;\n}\nstatic uint64_t futrts_mul_hi64(uint64_t a, uint64_t b)\n{\n __uint128_t aa = a;\n __uint128_t bb = b;\n \n return aa * bb >> 64;\n}\n#endif\n#if defined(__OPENCL_VERSION__)\nstatic uint8_t futrts_mad_hi8(uint8_t a, uint8_t b, uint8_t c)\n{\n return mad_hi(a, b, c);\n}\nstatic uint16_t futrts_mad_hi16(uint16_t a, uint16_t b, uint16_t c)\n{\n return mad_hi(a, b, c);\n}\nstatic uint32_t futrts_mad_hi32(uint32_t a, uint32_t b, uint32_t c)\n{\n return mad_hi(a, b, c);\n}\nstatic uint64_t futrts_mad_hi64(uint64_t a, uint64_t b, uint64_t c)\n{\n return mad_hi(a, b, c);\n}\n#else\nstatic uint8_t ", + "futrts_mad_hi8(uint8_t a, uint8_t b, uint8_t c)\n{\n return futrts_mul_hi8(a, b) + c;\n}\nstatic uint16_t futrts_mad_hi16(uint16_t a, uint16_t b, uint16_t c)\n{\n return futrts_mul_hi16(a, b) + c;\n}\nstatic uint32_t futrts_mad_hi32(uint32_t a, uint32_t b, uint32_t c)\n{\n return futrts_mul_hi32(a, b) + c;\n}\nstatic uint64_t futrts_mad_hi64(uint64_t a, uint64_t b, uint64_t c)\n{\n return futrts_mul_hi64(a, b) + c;\n}\n#endif\n#if defined(__OPENCL_VERSION__)\nstatic int32_t futrts_clzz8(int8_t x)\n{\n return clz(x);\n}\nstatic int32_t futrts_clzz16(int16_t x)\n{\n return clz(x);\n}\nstatic int32_t futrts_clzz32(int32_t x)\n{\n return clz(x);\n}\nstatic int32_t futrts_clzz64(int64_t x)\n{\n return clz(x);\n}\n#elif defined(__CUDA_ARCH__)\nstatic int32_t futrts_clzz8(int8_t x)\n{\n return __clz(zext_i8_i32(x)) - 24;\n}\nstatic int32_t futrts_clzz16(int16_t x)\n{\n return __clz(zext_i16_i32(x)) - 16;\n}\nstatic int32_t futrts_clzz32(int32_t x)\n{\n return __clz(x);\n}\nstatic int32_t futrts_clzz64(int64_t x)\n{\n return __clzll(x);\n}\n#else\nstatic int32_t futrts_clzz8(int8_t x)\n{\n int n = 0;\n int bits = sizeof(x) * 8;\n \n for (int i = 0; i < bits; i++) {\n if (x < 0)\n break;\n n++;\n x <<= 1;\n }\n return n;\n}\nstatic int32_t futrts_clzz16(int16_t x)\n{\n int n = 0;\n int bits = sizeof(x) * 8;\n \n for (int i = 0; i < bits; i++) {\n if (x < 0)\n break;\n n++;\n x <<= 1;\n }\n return n;\n}\nstatic int32_t futrts_clzz32(int32_t x)\n{\n int n = 0;\n int bits = sizeof(x) * 8;\n \n for (int i = 0; i < bits; i++) {\n if (x < 0)\n break;\n n++;\n x <<= 1;\n }\n return n;\n}\nstatic int32_t futrts_clzz64(int64_t x)\n{\n int n = 0;\n int bits = sizeof(x) * 8;\n \n for (int i = 0; i < bits; i++) {\n if (x < 0)\n break;\n n++;\n x <<= 1;\n }\n return n;\n}\n#endif\n#if defined(__OPENCL_VERSION__)\nstatic int32_t futrts_ctzz8(int8_t x)\n{\n int", + " i = 0;\n \n for (; i < 8 && (x & 1) == 0; i++, x >>= 1)\n ;\n return i;\n}\nstatic int32_t futrts_ctzz16(int16_t x)\n{\n int i = 0;\n \n for (; i < 16 && (x & 1) == 0; i++, x >>= 1)\n ;\n return i;\n}\nstatic int32_t futrts_ctzz32(int32_t x)\n{\n int i = 0;\n \n for (; i < 32 && (x & 1) == 0; i++, x >>= 1)\n ;\n return i;\n}\nstatic int32_t futrts_ctzz64(int64_t x)\n{\n int i = 0;\n \n for (; i < 64 && (x & 1) == 0; i++, x >>= 1)\n ;\n return i;\n}\n#elif defined(__CUDA_ARCH__)\nstatic int32_t futrts_ctzz8(int8_t x)\n{\n int y = __ffs(x);\n \n return y == 0 ? 8 : y - 1;\n}\nstatic int32_t futrts_ctzz16(int16_t x)\n{\n int y = __ffs(x);\n \n return y == 0 ? 16 : y - 1;\n}\nstatic int32_t futrts_ctzz32(int32_t x)\n{\n int y = __ffs(x);\n \n return y == 0 ? 32 : y - 1;\n}\nstatic int32_t futrts_ctzz64(int64_t x)\n{\n int y = __ffsll(x);\n \n return y == 0 ? 64 : y - 1;\n}\n#else\nstatic int32_t futrts_ctzz8(int8_t x)\n{\n return x == 0 ? 8 : __builtin_ctz((uint32_t) x);\n}\nstatic int32_t futrts_ctzz16(int16_t x)\n{\n return x == 0 ? 16 : __builtin_ctz((uint32_t) x);\n}\nstatic int32_t futrts_ctzz32(int32_t x)\n{\n return x == 0 ? 32 : __builtin_ctz(x);\n}\nstatic int32_t futrts_ctzz64(int64_t x)\n{\n return x == 0 ? 64 : __builtin_ctzll(x);\n}\n#endif\nstatic inline float fdiv32(float x, float y)\n{\n return x / y;\n}\nstatic inline float fadd32(float x, float y)\n{\n return x + y;\n}\nstatic inline float fsub32(float x, float y)\n{\n return x - y;\n}\nstatic inline float fmul32(float x, float y)\n{\n return x * y;\n}\nstatic inline float fmin32(float x, float y)\n{\n return fmin(x, y);\n}\nstatic inline float fmax32(float x, float y)\n{\n return fmax(x, y);\n}\nstatic inline float fpow32(float x, float y)\n{\n return pow(x, y);\n}\nstatic inline bool cmplt32(float x, float y)\n{\n return x < y;\n}\nstatic inline bool cmple32(float x, float y)\n{\n return x <= y;\n}\nstatic inline float sitofp_i8_f32(int8_t x)\n{\n return (floa", + "t) x;\n}\nstatic inline float sitofp_i16_f32(int16_t x)\n{\n return (float) x;\n}\nstatic inline float sitofp_i32_f32(int32_t x)\n{\n return (float) x;\n}\nstatic inline float sitofp_i64_f32(int64_t x)\n{\n return (float) x;\n}\nstatic inline float uitofp_i8_f32(uint8_t x)\n{\n return (float) x;\n}\nstatic inline float uitofp_i16_f32(uint16_t x)\n{\n return (float) x;\n}\nstatic inline float uitofp_i32_f32(uint32_t x)\n{\n return (float) x;\n}\nstatic inline float uitofp_i64_f32(uint64_t x)\n{\n return (float) x;\n}\nstatic inline int8_t fptosi_f32_i8(float x)\n{\n return (int8_t) x;\n}\nstatic inline int16_t fptosi_f32_i16(float x)\n{\n return (int16_t) x;\n}\nstatic inline int32_t fptosi_f32_i32(float x)\n{\n return (int32_t) x;\n}\nstatic inline int64_t fptosi_f32_i64(float x)\n{\n return (int64_t) x;\n}\nstatic inline uint8_t fptoui_f32_i8(float x)\n{\n return (uint8_t) x;\n}\nstatic inline uint16_t fptoui_f32_i16(float x)\n{\n return (uint16_t) x;\n}\nstatic inline uint32_t fptoui_f32_i32(float x)\n{\n return (uint32_t) x;\n}\nstatic inline uint64_t fptoui_f32_i64(float x)\n{\n return (uint64_t) x;\n}\nstatic inline bool futrts_isnan32(float x)\n{\n return isnan(x);\n}\nstatic inline bool futrts_isinf32(float x)\n{\n return isinf(x);\n}\n#ifdef __OPENCL_VERSION__\nstatic inline float futrts_log32(float x)\n{\n return log(x);\n}\nstatic inline float futrts_log2_32(float x)\n{\n return log2(x);\n}\nstatic inline float futrts_log10_32(float x)\n{\n return log10(x);\n}\nstatic inline float futrts_sqrt32(float x)\n{\n return sqrt(x);\n}\nstatic inline float futrts_exp32(float x)\n{\n return exp(x);\n}\nstatic inline float futrts_cos32(float x)\n{\n return cos(x);\n}\nstatic inline float futrts_sin32(float x)\n{\n return sin(x);\n}\nstatic inline float futrts_tan32(float x)\n{\n return tan(x);\n}\nstatic inline float futrts_acos32(float x)\n{\n return acos(x);\n}\nstatic inline float futrts_asin32(float x)\n{\n return asin(x);\n}\nstatic inline float futrts_atan32(float x)\n{\n return atan(x);\n}", + "\nstatic inline float futrts_cosh32(float x)\n{\n return cosh(x);\n}\nstatic inline float futrts_sinh32(float x)\n{\n return sinh(x);\n}\nstatic inline float futrts_tanh32(float x)\n{\n return tanh(x);\n}\nstatic inline float futrts_acosh32(float x)\n{\n return acosh(x);\n}\nstatic inline float futrts_asinh32(float x)\n{\n return asinh(x);\n}\nstatic inline float futrts_atanh32(float x)\n{\n return atanh(x);\n}\nstatic inline float futrts_atan2_32(float x, float y)\n{\n return atan2(x, y);\n}\nstatic inline float futrts_hypot32(float x, float y)\n{\n return hypot(x, y);\n}\nstatic inline float futrts_gamma32(float x)\n{\n return tgamma(x);\n}\nstatic inline float futrts_lgamma32(float x)\n{\n return lgamma(x);\n}\nstatic inline float fmod32(float x, float y)\n{\n return fmod(x, y);\n}\nstatic inline float futrts_round32(float x)\n{\n return rint(x);\n}\nstatic inline float futrts_floor32(float x)\n{\n return floor(x);\n}\nstatic inline float futrts_ceil32(float x)\n{\n return ceil(x);\n}\nstatic inline float futrts_lerp32(float v0, float v1, float t)\n{\n return mix(v0, v1, t);\n}\nstatic inline float futrts_mad32(float a, float b, float c)\n{\n return mad(a, b, c);\n}\nstatic inline float futrts_fma32(float a, float b, float c)\n{\n return fma(a, b, c);\n}\n#else\nstatic inline float futrts_log32(float x)\n{\n return logf(x);\n}\nstatic inline float futrts_log2_32(float x)\n{\n return log2f(x);\n}\nstatic inline float futrts_log10_32(float x)\n{\n return log10f(x);\n}\nstatic inline float futrts_sqrt32(float x)\n{\n return sqrtf(x);\n}\nstatic inline float futrts_exp32(float x)\n{\n return expf(x);\n}\nstatic inline float futrts_cos32(float x)\n{\n return cosf(x);\n}\nstatic inline float futrts_sin32(float x)\n{\n return sinf(x);\n}\nstatic inline float futrts_tan32(float x)\n{\n return tanf(x);\n}\nstatic inline float futrts_acos32(float x)\n{\n return acosf(x);\n}\nstatic inline float futrts_asin32(float x)\n{\n return asinf(x);\n}\nstatic inline float futrts_atan32(float x)\n{\n return ata", + "nf(x);\n}\nstatic inline float futrts_cosh32(float x)\n{\n return coshf(x);\n}\nstatic inline float futrts_sinh32(float x)\n{\n return sinhf(x);\n}\nstatic inline float futrts_tanh32(float x)\n{\n return tanhf(x);\n}\nstatic inline float futrts_acosh32(float x)\n{\n return acoshf(x);\n}\nstatic inline float futrts_asinh32(float x)\n{\n return asinhf(x);\n}\nstatic inline float futrts_atanh32(float x)\n{\n return atanhf(x);\n}\nstatic inline float futrts_atan2_32(float x, float y)\n{\n return atan2f(x, y);\n}\nstatic inline float futrts_hypot32(float x, float y)\n{\n return hypotf(x, y);\n}\nstatic inline float futrts_gamma32(float x)\n{\n return tgammaf(x);\n}\nstatic inline float futrts_lgamma32(float x)\n{\n return lgammaf(x);\n}\nstatic inline float fmod32(float x, float y)\n{\n return fmodf(x, y);\n}\nstatic inline float futrts_round32(float x)\n{\n return rintf(x);\n}\nstatic inline float futrts_floor32(float x)\n{\n return floorf(x);\n}\nstatic inline float futrts_ceil32(float x)\n{\n return ceilf(x);\n}\nstatic inline float futrts_lerp32(float v0, float v1, float t)\n{\n return v0 + (v1 - v0) * t;\n}\nstatic inline float futrts_mad32(float a, float b, float c)\n{\n return a * b + c;\n}\nstatic inline float futrts_fma32(float a, float b, float c)\n{\n return fmaf(a, b, c);\n}\n#endif\nstatic inline int32_t futrts_to_bits32(float x)\n{\n union {\n float f;\n int32_t t;\n } p;\n \n p.f = x;\n return p.t;\n}\nstatic inline float futrts_from_bits32(int32_t x)\n{\n union {\n int32_t f;\n float t;\n } p;\n \n p.f = x;\n return p.t;\n}\nstatic inline float fsignum32(float x)\n{\n return futrts_isnan32(x) ? x : (x > 0) - (x < 0);\n}\n// Start of atomics.h\n\ninline int32_t atomic_xchg_i32_global(volatile __global int32_t *p, int32_t x) {\n#ifdef FUTHARK_CUDA\n return atomicExch((int32_t*)p, x);\n#else\n return atomic_xor(p, x);\n#endif\n}\n\ninline int32_t atomic_xchg_i32_local(volatile __local int32_t *p, int32_t x) {\n#ifdef FUTHARK_CUDA\n return atomicExch(", + "(int32_t*)p, x);\n#else\n return atomic_xor(p, x);\n#endif\n}\n\ninline int32_t atomic_cmpxchg_i32_global(volatile __global int32_t *p,\n int32_t cmp, int32_t val) {\n#ifdef FUTHARK_CUDA\n return atomicCAS((int32_t*)p, cmp, val);\n#else\n return atomic_cmpxchg(p, cmp, val);\n#endif\n}\n\ninline int32_t atomic_cmpxchg_i32_local(volatile __local int32_t *p,\n int32_t cmp, int32_t val) {\n#ifdef FUTHARK_CUDA\n return atomicCAS((int32_t*)p, cmp, val);\n#else\n return atomic_cmpxchg(p, cmp, val);\n#endif\n}\n\ninline int32_t atomic_add_i32_global(volatile __global int32_t *p, int32_t x) {\n#ifdef FUTHARK_CUDA\n return atomicAdd((int32_t*)p, x);\n#else\n return atomic_add(p, x);\n#endif\n}\n\ninline int32_t atomic_add_i32_local(volatile __local int32_t *p, int32_t x) {\n#ifdef FUTHARK_CUDA\n return atomicAdd((int32_t*)p, x);\n#else\n return atomic_add(p, x);\n#endif\n}\n\ninline float atomic_fadd_f32_global(volatile __global float *p, float x) {\n#ifdef FUTHARK_CUDA\n return atomicAdd((float*)p, x);\n#else\n union { int32_t i; float f; } old;\n union { int32_t i; float f; } assumed;\n old.f = *p;\n do {\n assumed.f = old.f;\n old.f = old.f + x;\n old.i = atomic_cmpxchg_i32_global((volatile __global int32_t*)p, assumed.i, old.i);\n } while (assumed.i != old.i);\n return old.f;\n#endif\n}\n\ninline float atomic_fadd_f32_local(volatile __local float *p, float x) {\n#ifdef FUTHARK_CUDA\n return atomicAdd((float*)p, x);\n#else\n union { int32_t i; float f; } old;\n union { int32_t i; float f; } assumed;\n old.f = *p;\n do {\n assumed.f = old.f;\n old.f = old.f + x;\n old.i = atomic_cmpxchg_i32_local((volatile __local int32_t*)p, assumed.i, old.i);\n } while (assumed.i != old.i);\n return old.f;\n#endif\n}\n\ninline int32_t atomic_smax_i32_global(volatile __global int32_t *p, int32_t x) {\n#ifdef FUTHARK_CUDA\n return atomicMax((int32_t*)p, x);\n#else\n return atomic_max(p, x);\n#endif\n}\n\ninline int32_t atomic_smax_i32_local(volatile", + " __local int32_t *p, int32_t x) {\n#ifdef FUTHARK_CUDA\n return atomicMax((int32_t*)p, x);\n#else\n return atomic_max(p, x);\n#endif\n}\n\ninline int32_t atomic_smin_i32_global(volatile __global int32_t *p, int32_t x) {\n#ifdef FUTHARK_CUDA\n return atomicMin((int32_t*)p, x);\n#else\n return atomic_min(p, x);\n#endif\n}\n\ninline int32_t atomic_smin_i32_local(volatile __local int32_t *p, int32_t x) {\n#ifdef FUTHARK_CUDA\n return atomicMin((int32_t*)p, x);\n#else\n return atomic_min(p, x);\n#endif\n}\n\ninline uint32_t atomic_umax_i32_global(volatile __global uint32_t *p, uint32_t x) {\n#ifdef FUTHARK_CUDA\n return atomicMax((uint32_t*)p, x);\n#else\n return atomic_max(p, x);\n#endif\n}\n\ninline uint32_t atomic_umax_i32_local(volatile __local uint32_t *p, uint32_t x) {\n#ifdef FUTHARK_CUDA\n return atomicMax((uint32_t*)p, x);\n#else\n return atomic_max(p, x);\n#endif\n}\n\ninline uint32_t atomic_umin_i32_global(volatile __global uint32_t *p, uint32_t x) {\n#ifdef FUTHARK_CUDA\n return atomicMin((uint32_t*)p, x);\n#else\n return atomic_min(p, x);\n#endif\n}\n\ninline uint32_t atomic_umin_i32_local(volatile __local uint32_t *p, uint32_t x) {\n#ifdef FUTHARK_CUDA\n return atomicMin((uint32_t*)p, x);\n#else\n return atomic_min(p, x);\n#endif\n}\n\ninline int32_t atomic_and_i32_global(volatile __global int32_t *p, int32_t x) {\n#ifdef FUTHARK_CUDA\n return atomicAnd((int32_t*)p, x);\n#else\n return atomic_and(p, x);\n#endif\n}\n\ninline int32_t atomic_and_i32_local(volatile __local int32_t *p, int32_t x) {\n#ifdef FUTHARK_CUDA\n return atomicAnd((int32_t*)p, x);\n#else\n return atomic_and(p, x);\n#endif\n}\n\ninline int32_t atomic_or_i32_global(volatile __global int32_t *p, int32_t x) {\n#ifdef FUTHARK_CUDA\n return atomicOr((int32_t*)p, x);\n#else\n return atomic_or(p, x);\n#endif\n}\n\ninline int32_t atomic_or_i32_local(volatile __local int32_t *p, int32_t x) {\n#ifdef FUTHARK_CUDA\n return atomicOr((int32_t*)p, x);\n#else\n return atomic_or(p, x);\n#endif\n}\n\ninline int32_t atomic_xor_i32_global(volatile __global int32_t *p, int3", + "2_t x) {\n#ifdef FUTHARK_CUDA\n return atomicXor((int32_t*)p, x);\n#else\n return atomic_xor(p, x);\n#endif\n}\n\ninline int32_t atomic_xor_i32_local(volatile __local int32_t *p, int32_t x) {\n#ifdef FUTHARK_CUDA\n return atomicXor((int32_t*)p, x);\n#else\n return atomic_xor(p, x);\n#endif\n}\n\n// Start of 64 bit atomics\n\ninline int64_t atomic_xchg_i64_global(volatile __global int64_t *p, int64_t x) {\n#ifdef FUTHARK_CUDA\n return atomicExch((uint64_t*)p, x);\n#else\n return atom_xor(p, x);\n#endif\n}\n\ninline int64_t atomic_xchg_i64_local(volatile __local int64_t *p, int64_t x) {\n#ifdef FUTHARK_CUDA\n return atomicExch((uint64_t*)p, x);\n#else\n return atom_xor(p, x);\n#endif\n}\n\ninline int64_t atomic_cmpxchg_i64_global(volatile __global int64_t *p,\n int64_t cmp, int64_t val) {\n#ifdef FUTHARK_CUDA\n return atomicCAS((uint64_t*)p, cmp, val);\n#else\n return atom_cmpxchg(p, cmp, val);\n#endif\n}\n\ninline int64_t atomic_cmpxchg_i64_local(volatile __local int64_t *p,\n int64_t cmp, int64_t val) {\n#ifdef FUTHARK_CUDA\n return atomicCAS((uint64_t*)p, cmp, val);\n#else\n return atom_cmpxchg(p, cmp, val);\n#endif\n}\n\ninline int64_t atomic_add_i64_global(volatile __global int64_t *p, int64_t x) {\n#ifdef FUTHARK_CUDA\n return atomicAdd((uint64_t*)p, x);\n#else\n return atom_add(p, x);\n#endif\n}\n\ninline int64_t atomic_add_i64_local(volatile __local int64_t *p, int64_t x) {\n#ifdef FUTHARK_CUDA\n return atomicAdd((uint64_t*)p, x);\n#else\n return atom_add(p, x);\n#endif\n}\n\n#ifdef FUTHARK_F64_ENABLED\n\ninline double atomic_fadd_f64_global(volatile __global double *p, double x) {\n#if defined(FUTHARK_CUDA) && __CUDA_ARCH__ >= 600\n return atomicAdd((double*)p, x);\n#else\n union { int64_t i; double f; } old;\n union { int64_t i; double f; } assumed;\n old.f = *p;\n do {\n assumed.f = old.f;\n old.f = old.f + x;\n old.i = atomic_cmpxchg_i64_global((volatile __global int64_t*)p, assumed.i, old.i);\n } while (assumed.i != old.i);\n ", + "return old.f;\n#endif\n}\n\ninline double atomic_fadd_f64_local(volatile __local double *p, double x) {\n#if defined(FUTHARK_CUDA) && __CUDA_ARCH__ >= 600\n return atomicAdd((double*)p, x);\n#else\n union { int64_t i; double f; } old;\n union { int64_t i; double f; } assumed;\n old.f = *p;\n do {\n assumed.f = old.f;\n old.f = old.f + x;\n old.i = atomic_cmpxchg_i64_local((volatile __local int64_t*)p, assumed.i, old.i);\n } while (assumed.i != old.i);\n return old.f;\n#endif\n}\n\n#endif\n\ninline int64_t atomic_smax_i64_global(volatile __global int64_t *p, int64_t x) {\n#ifdef FUTHARK_CUDA\n return atomicMax((int64_t*)p, x);\n#else\n return atom_max(p, x);\n#endif\n}\n\ninline int64_t atomic_smax_i64_local(volatile __local int64_t *p, int64_t x) {\n#ifdef FUTHARK_CUDA\n return atomicMax((int64_t*)p, x);\n#else\n return atom_max(p, x);\n#endif\n}\n\ninline int64_t atomic_smin_i64_global(volatile __global int64_t *p, int64_t x) {\n#ifdef FUTHARK_CUDA\n return atomicMin((int64_t*)p, x);\n#else\n return atom_min(p, x);\n#endif\n}\n\ninline int64_t atomic_smin_i64_local(volatile __local int64_t *p, int64_t x) {\n#ifdef FUTHARK_CUDA\n return atomicMin((int64_t*)p, x);\n#else\n return atom_min(p, x);\n#endif\n}\n\ninline uint64_t atomic_umax_i64_global(volatile __global uint64_t *p, uint64_t x) {\n#ifdef FUTHARK_CUDA\n return atomicMax((uint64_t*)p, x);\n#else\n return atom_max(p, x);\n#endif\n}\n\ninline uint64_t atomic_umax_i64_local(volatile __local uint64_t *p, uint64_t x) {\n#ifdef FUTHARK_CUDA\n return atomicMax((uint64_t*)p, x);\n#else\n return atom_max(p, x);\n#endif\n}\n\ninline uint64_t atomic_umin_i64_global(volatile __global uint64_t *p, uint64_t x) {\n#ifdef FUTHARK_CUDA\n return atomicMin((uint64_t*)p, x);\n#else\n return atom_min(p, x);\n#endif\n}\n\ninline uint64_t atomic_umin_i64_local(volatile __local uint64_t *p, uint64_t x) {\n#ifdef FUTHARK_CUDA\n return atomicMin((uint64_t*)p, x);\n#else\n return atom_min(p, x);\n#endif\n}\n\ninline int64_t atomic_and_i64_global(volatile __global int64_t *p, int64_t x) ", + "{\n#ifdef FUTHARK_CUDA\n return atomicAnd((int64_t*)p, x);\n#else\n return atom_and(p, x);\n#endif\n}\n\ninline int64_t atomic_and_i64_local(volatile __local int64_t *p, int64_t x) {\n#ifdef FUTHARK_CUDA\n return atomicAnd((int64_t*)p, x);\n#else\n return atom_and(p, x);\n#endif\n}\n\ninline int64_t atomic_or_i64_global(volatile __global int64_t *p, int64_t x) {\n#ifdef FUTHARK_CUDA\n return atomicOr((int64_t*)p, x);\n#else\n return atom_or(p, x);\n#endif\n}\n\ninline int64_t atomic_or_i64_local(volatile __local int64_t *p, int64_t x) {\n#ifdef FUTHARK_CUDA\n return atomicOr((int64_t*)p, x);\n#else\n return atom_or(p, x);\n#endif\n}\n\ninline int64_t atomic_xor_i64_global(volatile __global int64_t *p, int64_t x) {\n#ifdef FUTHARK_CUDA\n return atomicXor((int64_t*)p, x);\n#else\n return atom_xor(p, x);\n#endif\n}\n\ninline int64_t atomic_xor_i64_local(volatile __local int64_t *p, int64_t x) {\n#ifdef FUTHARK_CUDA\n return atomicXor((int64_t*)p, x);\n#else\n return atom_xor(p, x);\n#endif\n}\n\n// End of atomics.h\n\n\n\n\n__kernel void get_envelopezicopy_9955(int64_t n_9485, int64_t i_9490, __global\n unsigned char *chunk_board_mem_9941,\n __global unsigned char *mem_9943)\n{\n const int block_dim0 = 0;\n const int block_dim1 = 1;\n const int block_dim2 = 2;\n int32_t copy_gtid_9955;\n int32_t copy_ltid_9956;\n int32_t copy_gid_9957;\n \n copy_gtid_9955 = get_global_id(0);\n copy_ltid_9956 = get_local_id(0);\n copy_gid_9957 = get_group_id(0);\n if (slt64(sext_i32_i64(copy_gtid_9955), n_9485)) {\n ((__global int8_t *) mem_9943)[n_9485 + sext_i32_i64(copy_gtid_9955)] =\n ((__global int8_t *) chunk_board_mem_9941)[i_9490 +\n sext_i32_i64(copy_gtid_9955) *\n n_9485];\n }\n \n error_0:\n return;\n}\n__kernel void get_envelopezicopy_9960(int64_t n_9485, __global\n ", + " unsigned char *chunk_board_mem_9941,\n __global unsigned char *mem_9943)\n{\n const int block_dim0 = 0;\n const int block_dim1 = 1;\n const int block_dim2 = 2;\n int32_t copy_gtid_9960;\n int32_t copy_ltid_9961;\n int32_t copy_gid_9962;\n \n copy_gtid_9960 = get_global_id(0);\n copy_ltid_9961 = get_local_id(0);\n copy_gid_9962 = get_group_id(0);\n if (slt64(sext_i32_i64(copy_gtid_9960), n_9485)) {\n ((__global int8_t *) mem_9943)[(int64_t) 3 * n_9485 +\n sext_i32_i64(copy_gtid_9960)] =\n ((__global\n int8_t *) chunk_board_mem_9941)[sext_i32_i64(copy_gtid_9960) *\n n_9485];\n }\n \n error_0:\n return;\n}\n__kernel void next_chunk_boardzicopy_9965(int64_t n_9500, int64_t m_9501,\n __global unsigned char *mem_9948,\n __global unsigned char *mem_9950)\n{\n const int block_dim0 = 0;\n const int block_dim1 = 1;\n const int block_dim2 = 2;\n int32_t copy_gtid_9965;\n int32_t copy_ltid_9966;\n int32_t copy_gid_9967;\n \n copy_gtid_9965 = get_global_id(0);\n copy_ltid_9966 = get_local_id(0);\n copy_gid_9967 = get_group_id(0);\n if (slt64(sext_i32_i64(copy_gtid_9965), n_9500 * n_9500)) {\n ((__global int8_t *) mem_9950)[squot64(sext_i32_i64(copy_gtid_9965),\n n_9500) * n_9500 +\n (sext_i32_i64(copy_gtid_9965) -\n squot64(sext_i32_i64(copy_gtid_9965),\n n_9500) * n_9500)] = ((__global\n int8_t *) mem_9948)[m_9501 +\n (int64_t) 1 +\n ", + " (squot64(sext_i32_i64(copy_gtid_9965),\n n_9500) *\n m_9501 +\n (sext_i32_i64(copy_gtid_9965) -\n squot64(sext_i32_i64(copy_gtid_9965),\n n_9500) *\n n_9500))];\n }\n \n error_0:\n return;\n}\n__kernel void next_chunk_boardzisegmap_9619(__global int *global_failure,\n int failure_is_an_option, __global\n int64_t *global_failure_args,\n int64_t n_9500, int64_t m_9501,\n __global\n unsigned char *chunk_board_mem_9941,\n __global\n unsigned char *envelope_board_mem_9942,\n __global unsigned char *mem_9945)\n{\n #define segmap_group_sizze_9734 (next_chunk_boardzisegmap_group_sizze_9622)\n \n const int block_dim0 = 0;\n const int block_dim1 = 1;\n const int block_dim2 = 2;\n \n if (*global_failure >= 0)\n return;\n \n int32_t global_tid_9955;\n int32_t local_tid_9956;\n int64_t group_sizze_9959;\n int32_t wave_sizze_9958;\n int32_t group_tid_9957;\n \n global_tid_9955 = get_global_id(0);\n local_tid_9956 = get_local_id(0);\n group_sizze_9959 = get_local_size(0);\n wave_sizze_9958 = LOCKSTEP_WIDTH;\n group_tid_99", + "57 = get_group_id(0);\n \n int32_t phys_tid_9619;\n \n phys_tid_9619 = global_tid_9955;\n \n int64_t gtid_9617;\n \n gtid_9617 = squot64(sext_i32_i64(group_tid_9957) * segmap_group_sizze_9734 +\n sext_i32_i64(local_tid_9956), m_9501);\n \n int64_t gtid_9618;\n \n gtid_9618 = sext_i32_i64(group_tid_9957) * segmap_group_sizze_9734 +\n sext_i32_i64(local_tid_9956) - squot64(sext_i32_i64(group_tid_9957) *\n segmap_group_sizze_9734 +\n sext_i32_i64(local_tid_9956),\n m_9501) * m_9501;\n if (slt64(gtid_9617, m_9501) && slt64(gtid_9618, m_9501)) {\n bool index_primexp_9904 = gtid_9617 == (int64_t) 0;\n int8_t defunc_0_f_res_9740;\n \n if (index_primexp_9904) {\n int8_t defunc_0_f_res_t_res_9745 = ((__global\n int8_t *) envelope_board_mem_9942)[gtid_9618];\n \n defunc_0_f_res_9740 = defunc_0_f_res_t_res_9745;\n } else {\n int64_t y_9746 = sub64(m_9501, (int64_t) 1);\n bool cond_9747 = gtid_9618 == y_9746;\n int8_t defunc_0_f_res_f_res_9748;\n \n if (cond_9747) {\n int8_t defunc_0_f_res_f_res_t_res_9753 = ((__global\n int8_t *) envelope_board_mem_9942)[m_9501 +\n gtid_9617];\n \n defunc_0_f_res_f_res_9748 = defunc_0_f_res_f_res_t_res_9753;\n } else {\n bool cond_9754 = gtid_9617 == y_9746;\n int8_t defunc_0_f_res_f_res_f_res_9755;\n \n if (cond_9754) {\n int8_t defunc_0_f_res_f_res_f_res_t_res_9760 = ((__global\n ", + " int8_t *) envelope_board_mem_9942)[(int64_t) 2 *\n m_9501 +\n gtid_9618];\n \n defunc_0_f_res_f_res_f_res_9755 =\n defunc_0_f_res_f_res_f_res_t_res_9760;\n } else {\n bool cond_9761 = gtid_9618 == (int64_t) 0;\n int8_t defunc_0_f_res_f_res_f_res_f_res_9762;\n \n if (cond_9761) {\n int8_t defunc_0_f_res_f_res_f_res_f_res_t_res_9767 =\n ((__global\n int8_t *) envelope_board_mem_9942)[(int64_t) 3 *\n m_9501 +\n gtid_9617];\n \n defunc_0_f_res_f_res_f_res_f_res_9762 =\n defunc_0_f_res_f_res_f_res_f_res_t_res_9767;\n } else {\n int64_t i_9768 = sub64(gtid_9617, (int64_t) 1);\n bool x_9769 = sle64((int64_t) 0, i_9768);\n bool y_9770 = slt64(i_9768, n_9500);\n bool bounds_check_9771 = x_9769 && y_9770;\n int64_t i_9772 = sub64(gtid_9618, (int64_t) 1);\n bool x_9773 = sle64((int64_t) 0, i_9772);\n bool y_9774 = slt64(i_9772, n_9500);\n bool bounds_check_9775 = x_9773 && y_9774;\n bool index_ok_9776 = bounds_check_9771 &&\n bounds_check_9775;\n bool index_certs_9777;\n \n if (!index_ok_9776) {\n {\n ", + " if (atomic_cmpxchg_i32_global(global_failure,\n -1, 0) == -1) {\n global_failure_args[0] = i_9768;\n global_failure_args[1] = i_9772;\n global_failure_args[2] = n_9500;\n global_failure_args[3] = n_9500;\n ;\n }\n return;\n }\n }\n \n int8_t defunc_0_f_res_f_res_f_res_f_res_f_res_9778 =\n ((__global\n int8_t *) chunk_board_mem_9941)[i_9768 *\n n_9500 +\n i_9772];\n \n defunc_0_f_res_f_res_f_res_f_res_9762 =\n defunc_0_f_res_f_res_f_res_f_res_f_res_9778;\n }\n defunc_0_f_res_f_res_f_res_9755 =\n defunc_0_f_res_f_res_f_res_f_res_9762;\n }\n defunc_0_f_res_f_res_9748 = defunc_0_f_res_f_res_f_res_9755;\n }\n defunc_0_f_res_9740 = defunc_0_f_res_f_res_9748;\n }\n ((__global int8_t *) mem_9945)[gtid_9617 * m_9501 + gtid_9618] =\n defunc_0_f_res_9740;\n }\n \n error_0:\n return;\n #undef segmap_group_sizze_9734\n}\n__kernel void next_chunk_boardzisegmap_9790(__global int *global_failure,\n int64_t m_9501, __global\n unsigned char *mem_9945, __global\n unsigned char *mem_9948)\n{\n #define segmap_group_sizze_9865 (next_chunk_boardzisegmap_group_sizze_9793)\n \n const int ", + "block_dim0 = 0;\n const int block_dim1 = 1;\n const int block_dim2 = 2;\n \n if (*global_failure >= 0)\n return;\n \n int32_t global_tid_9960;\n int32_t local_tid_9961;\n int64_t group_sizze_9964;\n int32_t wave_sizze_9963;\n int32_t group_tid_9962;\n \n global_tid_9960 = get_global_id(0);\n local_tid_9961 = get_local_id(0);\n group_sizze_9964 = get_local_size(0);\n wave_sizze_9963 = LOCKSTEP_WIDTH;\n group_tid_9962 = get_group_id(0);\n \n int32_t phys_tid_9790;\n \n phys_tid_9790 = global_tid_9960;\n \n int64_t gtid_9788;\n \n gtid_9788 = squot64(sext_i32_i64(group_tid_9962) * segmap_group_sizze_9865 +\n sext_i32_i64(local_tid_9961), m_9501);\n \n int64_t gtid_9789;\n \n gtid_9789 = sext_i32_i64(group_tid_9962) * segmap_group_sizze_9865 +\n sext_i32_i64(local_tid_9961) - squot64(sext_i32_i64(group_tid_9962) *\n segmap_group_sizze_9865 +\n sext_i32_i64(local_tid_9961),\n m_9501) * m_9501;\n if (slt64(gtid_9788, m_9501) && slt64(gtid_9789, m_9501)) {\n int64_t i_p_o_9935 = add64((int64_t) -1, gtid_9788);\n int64_t rot_i_9936 = smod64(i_p_o_9935, m_9501);\n int64_t i_p_o_9937 = add64((int64_t) -1, gtid_9789);\n int64_t rot_i_9938 = smod64(i_p_o_9937, m_9501);\n int8_t x_9868 = ((__global int8_t *) mem_9945)[rot_i_9936 * m_9501 +\n rot_i_9938];\n int64_t rot_i_9934 = smod64(gtid_9789, m_9501);\n int8_t x_9869 = ((__global int8_t *) mem_9945)[rot_i_9936 * m_9501 +\n rot_i_9934];\n int64_t i_p_o_9929 = add64((int64_t) 1, gtid_9789);\n int64_t rot_i_9930 = smod64(i_p_o_9929, m_9501);\n int8_t x_9870 = ((__global int8_t *) mem_9945)[rot_i_9936 * m_9501 +\n ", + " rot_i_9930];\n int64_t rot_i_9924 = smod64(gtid_9788, m_9501);\n int8_t x_9871 = ((__global int8_t *) mem_9945)[rot_i_9924 * m_9501 +\n rot_i_9938];\n int8_t x_9872 = ((__global int8_t *) mem_9945)[rot_i_9924 * m_9501 +\n rot_i_9930];\n int64_t i_p_o_9915 = add64((int64_t) 1, gtid_9788);\n int64_t rot_i_9916 = smod64(i_p_o_9915, m_9501);\n int8_t x_9873 = ((__global int8_t *) mem_9945)[rot_i_9916 * m_9501 +\n rot_i_9938];\n int8_t x_9874 = ((__global int8_t *) mem_9945)[rot_i_9916 * m_9501 +\n rot_i_9934];\n int8_t x_9875 = ((__global int8_t *) mem_9945)[rot_i_9916 * m_9501 +\n rot_i_9930];\n int8_t x_9876 = ((__global int8_t *) mem_9945)[gtid_9788 * m_9501 +\n gtid_9789];\n int8_t x_9877 = add8(x_9868, x_9869);\n int8_t x_9878 = add8(x_9870, x_9877);\n int8_t x_9879 = add8(x_9871, x_9878);\n int8_t x_9880 = add8(x_9872, x_9879);\n int8_t x_9881 = add8(x_9873, x_9880);\n int8_t x_9882 = add8(x_9874, x_9881);\n int8_t defunc_2_f_res_9883 = add8(x_9875, x_9882);\n bool cond_9884 = x_9876 == (int8_t) 1;\n bool cond_9885 = defunc_2_f_res_9883 == (int8_t) 2;\n bool cond_t_res_f_res_9886 = defunc_2_f_res_9883 == (int8_t) 3;\n bool x_9887 = !cond_9885;\n bool y_9888 = cond_t_res_f_res_9886 && x_9887;\n bool cond_t_res_9889 = cond_9885 || y_9888;\n bool x_9890 = cond_9884 && cond_t_res_9889;\n bool cond_9891 = x_9876 == (int8_t) 0;\n bool x_9892 = cond_t_res_f_res_9886 && cond_9891;\n bool x_9893 = !x_9890;\n bool y_9894 = x_9892 && x_9893;\n bool cond_9895 = x_9890 || y_9894", + ";\n int8_t defunc_1_f_res_9896 = btoi_bool_i8(cond_9895);\n \n ((__global int8_t *) mem_9948)[gtid_9788 * m_9501 + gtid_9789] =\n defunc_1_f_res_9896;\n }\n \n error_0:\n return;\n #undef segmap_group_sizze_9865\n}\n", + NULL}; +static const char *size_names[] = {"get_envelope.group_size_9958", + "get_envelope.group_size_9963", + "next_chunk_board.group_size_9968", + "next_chunk_board.segmap_group_size_9622", + "next_chunk_board.segmap_group_size_9793"}; +static const char *size_vars[] = {"get_envelopezigroup_sizze_9958", + "get_envelopezigroup_sizze_9963", + "next_chunk_boardzigroup_sizze_9968", + "next_chunk_boardzisegmap_group_sizze_9622", + "next_chunk_boardzisegmap_group_sizze_9793"}; +static const char *size_classes[] = {"group_size", "group_size", "group_size", + "group_size", "group_size"}; +struct sizes { + int64_t get_envelopezigroup_sizze_9958; + int64_t get_envelopezigroup_sizze_9963; + int64_t next_chunk_boardzigroup_sizze_9968; + int64_t next_chunk_boardzisegmap_group_sizze_9622; + int64_t next_chunk_boardzisegmap_group_sizze_9793; +} ; +struct futhark_context_config { + struct opencl_config opencl; + int64_t sizes[5]; + int num_build_opts; + const char **build_opts; +} ; +struct futhark_context_config *futhark_context_config_new(void) +{ + struct futhark_context_config *cfg = + (struct futhark_context_config *) malloc(sizeof(struct futhark_context_config)); + + if (cfg == NULL) + return NULL; + cfg->num_build_opts = 0; + cfg->build_opts = (const char **) malloc(sizeof(const char *)); + cfg->build_opts[0] = NULL; + cfg->sizes[0] = 0; + cfg->sizes[1] = 0; + cfg->sizes[2] = 0; + cfg->sizes[3] = 0; + cfg->sizes[4] = 0; + opencl_config_init(&cfg->opencl, 5, size_names, size_vars, cfg->sizes, + size_classes); + return cfg; +} +void futhark_context_config_free(struct futhark_context_config *cfg) +{ + free(cfg->build_opts); + free(cfg); +} +void futhark_context_config_add_build_option(struct futhark_context_config *cfg, + const char *opt) +{ + cfg->build_opts[cfg->num_build_opts] = opt; + cfg->num_build_opts++; + cfg->build_opts = (const char **) realloc(cfg->build_opts, + (cfg->num_build_opts + 1) * + sizeof(const char *)); + cfg->build_opts[cfg->num_build_opts] = NULL; +} +void futhark_context_config_set_debugging(struct futhark_context_config *cfg, + int flag) +{ + cfg->opencl.profiling = cfg->opencl.logging = cfg->opencl.debugging = flag; +} +void futhark_context_config_set_profiling(struct futhark_context_config *cfg, + int flag) +{ + cfg->opencl.profiling = flag; +} +void futhark_context_config_set_logging(struct futhark_context_config *cfg, + int flag) +{ + cfg->opencl.logging = flag; +} +void futhark_context_config_set_device(struct futhark_context_config *cfg, const + char *s) +{ + set_preferred_device(&cfg->opencl, s); +} +void futhark_context_config_set_platform(struct futhark_context_config *cfg, + const char *s) +{ + set_preferred_platform(&cfg->opencl, s); +} +void futhark_context_config_select_device_interactively(struct futhark_context_config *cfg) +{ + select_device_interactively(&cfg->opencl); +} +void futhark_context_config_list_devices(struct futhark_context_config *cfg) +{ + (void) cfg; + list_devices(); +} +void futhark_context_config_dump_program_to(struct futhark_context_config *cfg, + const char *path) +{ + cfg->opencl.dump_program_to = path; +} +void futhark_context_config_load_program_from(struct futhark_context_config *cfg, + const char *path) +{ + cfg->opencl.load_program_from = path; +} +void futhark_context_config_dump_binary_to(struct futhark_context_config *cfg, + const char *path) +{ + cfg->opencl.dump_binary_to = path; +} +void futhark_context_config_load_binary_from(struct futhark_context_config *cfg, + const char *path) +{ + cfg->opencl.load_binary_from = path; +} +void futhark_context_config_set_default_group_size(struct futhark_context_config *cfg, + int size) +{ + cfg->opencl.default_group_size = size; + cfg->opencl.default_group_size_changed = 1; +} +void futhark_context_config_set_default_num_groups(struct futhark_context_config *cfg, + int num) +{ + cfg->opencl.default_num_groups = num; +} +void futhark_context_config_set_default_tile_size(struct futhark_context_config *cfg, + int size) +{ + cfg->opencl.default_tile_size = size; + cfg->opencl.default_tile_size_changed = 1; +} +void futhark_context_config_set_default_reg_tile_size(struct futhark_context_config *cfg, + int size) +{ + cfg->opencl.default_reg_tile_size = size; +} +void futhark_context_config_set_default_threshold(struct futhark_context_config *cfg, + int size) +{ + cfg->opencl.default_threshold = size; +} +int futhark_context_config_set_size(struct futhark_context_config *cfg, const + char *size_name, size_t size_value) +{ + for (int i = 0; i < 5; i++) { + if (strcmp(size_name, size_names[i]) == 0) { + cfg->sizes[i] = size_value; + return 0; + } + } + if (strcmp(size_name, "default_group_size") == 0) { + cfg->opencl.default_group_size = size_value; + return 0; + } + if (strcmp(size_name, "default_num_groups") == 0) { + cfg->opencl.default_num_groups = size_value; + return 0; + } + if (strcmp(size_name, "default_threshold") == 0) { + cfg->opencl.default_threshold = size_value; + return 0; + } + if (strcmp(size_name, "default_tile_size") == 0) { + cfg->opencl.default_tile_size = size_value; + return 0; + } + if (strcmp(size_name, "default_reg_tile_size") == 0) { + cfg->opencl.default_reg_tile_size = size_value; + return 0; + } + return 1; +} +struct futhark_context { + int detail_memory; + int debugging; + int profiling; + int profiling_paused; + int logging; + lock_t lock; + char *error; + FILE *log; + int64_t peak_mem_usage_device; + int64_t cur_mem_usage_device; + int64_t peak_mem_usage_default; + int64_t cur_mem_usage_default; + struct { + int dummy; + } constants; + int total_runs; + long total_runtime; + cl_kernel get_envelopezicopy_9955; + cl_kernel get_envelopezicopy_9960; + cl_kernel next_chunk_boardzicopy_9965; + cl_kernel next_chunk_boardzisegmap_9619; + cl_kernel next_chunk_boardzisegmap_9790; + int64_t copy_dev_to_dev_total_runtime; + int copy_dev_to_dev_runs; + int64_t copy_dev_to_host_total_runtime; + int copy_dev_to_host_runs; + int64_t copy_host_to_dev_total_runtime; + int copy_host_to_dev_runs; + int64_t copy_scalar_to_dev_total_runtime; + int copy_scalar_to_dev_runs; + int64_t copy_scalar_from_dev_total_runtime; + int copy_scalar_from_dev_runs; + int64_t get_envelopezicopy_9955_total_runtime; + int get_envelopezicopy_9955_runs; + int64_t get_envelopezicopy_9960_total_runtime; + int get_envelopezicopy_9960_runs; + int64_t next_chunk_boardzicopy_9965_total_runtime; + int next_chunk_boardzicopy_9965_runs; + int64_t next_chunk_boardzisegmap_9619_total_runtime; + int next_chunk_boardzisegmap_9619_runs; + int64_t next_chunk_boardzisegmap_9790_total_runtime; + int next_chunk_boardzisegmap_9790_runs; + cl_mem global_failure; + cl_mem global_failure_args; + struct opencl_context opencl; + struct sizes sizes; + cl_int failure_is_an_option; +} ; +void post_opencl_setup(struct opencl_context *ctx, + struct opencl_device_option *option) +{ + if ((ctx->lockstep_width == 0 && strstr(option->platform_name, + "NVIDIA CUDA") != NULL) && + (option->device_type & CL_DEVICE_TYPE_GPU) == CL_DEVICE_TYPE_GPU) { + ctx->lockstep_width = 32; + } + if ((ctx->lockstep_width == 0 && strstr(option->platform_name, + "AMD Accelerated Parallel Processing") != + NULL) && (option->device_type & CL_DEVICE_TYPE_GPU) == + CL_DEVICE_TYPE_GPU) { + ctx->lockstep_width = 32; + } + if ((ctx->lockstep_width == 0 && strstr(option->platform_name, "") != + NULL) && (option->device_type & CL_DEVICE_TYPE_GPU) == + CL_DEVICE_TYPE_GPU) { + ctx->lockstep_width = 1; + } + if ((ctx->cfg.default_num_groups == 0 && strstr(option->platform_name, + "") != NULL) && + (option->device_type & CL_DEVICE_TYPE_GPU) == CL_DEVICE_TYPE_GPU) { + size_t MAX_COMPUTE_UNITS_val = 0; + + clGetDeviceInfo(ctx->device, CL_DEVICE_MAX_COMPUTE_UNITS, + sizeof(MAX_COMPUTE_UNITS_val), &MAX_COMPUTE_UNITS_val, + NULL); + ctx->cfg.default_num_groups = 4 * MAX_COMPUTE_UNITS_val; + } + if ((ctx->cfg.default_group_size == 0 && strstr(option->platform_name, + "") != NULL) && + (option->device_type & CL_DEVICE_TYPE_GPU) == CL_DEVICE_TYPE_GPU) { + ctx->cfg.default_group_size = 256; + } + if ((ctx->cfg.default_tile_size == 0 && strstr(option->platform_name, "") != + NULL) && (option->device_type & CL_DEVICE_TYPE_GPU) == + CL_DEVICE_TYPE_GPU) { + ctx->cfg.default_tile_size = 32; + } + if ((ctx->cfg.default_reg_tile_size == 0 && strstr(option->platform_name, + "") != NULL) && + (option->device_type & CL_DEVICE_TYPE_GPU) == CL_DEVICE_TYPE_GPU) { + ctx->cfg.default_reg_tile_size = 2; + } + if ((ctx->cfg.default_threshold == 0 && strstr(option->platform_name, "") != + NULL) && (option->device_type & CL_DEVICE_TYPE_GPU) == + CL_DEVICE_TYPE_GPU) { + ctx->cfg.default_threshold = 32768; + } + if ((ctx->lockstep_width == 0 && strstr(option->platform_name, "") != + NULL) && (option->device_type & CL_DEVICE_TYPE_CPU) == + CL_DEVICE_TYPE_CPU) { + ctx->lockstep_width = 1; + } + if ((ctx->cfg.default_num_groups == 0 && strstr(option->platform_name, + "") != NULL) && + (option->device_type & CL_DEVICE_TYPE_CPU) == CL_DEVICE_TYPE_CPU) { + size_t MAX_COMPUTE_UNITS_val = 0; + + clGetDeviceInfo(ctx->device, CL_DEVICE_MAX_COMPUTE_UNITS, + sizeof(MAX_COMPUTE_UNITS_val), &MAX_COMPUTE_UNITS_val, + NULL); + ctx->cfg.default_num_groups = MAX_COMPUTE_UNITS_val; + } + if ((ctx->cfg.default_group_size == 0 && strstr(option->platform_name, + "") != NULL) && + (option->device_type & CL_DEVICE_TYPE_CPU) == CL_DEVICE_TYPE_CPU) { + ctx->cfg.default_group_size = 32; + } + if ((ctx->cfg.default_tile_size == 0 && strstr(option->platform_name, "") != + NULL) && (option->device_type & CL_DEVICE_TYPE_CPU) == + CL_DEVICE_TYPE_CPU) { + ctx->cfg.default_tile_size = 4; + } + if ((ctx->cfg.default_reg_tile_size == 0 && strstr(option->platform_name, + "") != NULL) && + (option->device_type & CL_DEVICE_TYPE_CPU) == CL_DEVICE_TYPE_CPU) { + ctx->cfg.default_reg_tile_size = 1; + } + if ((ctx->cfg.default_threshold == 0 && strstr(option->platform_name, "") != + NULL) && (option->device_type & CL_DEVICE_TYPE_CPU) == + CL_DEVICE_TYPE_CPU) { + size_t MAX_COMPUTE_UNITS_val = 0; + + clGetDeviceInfo(ctx->device, CL_DEVICE_MAX_COMPUTE_UNITS, + sizeof(MAX_COMPUTE_UNITS_val), &MAX_COMPUTE_UNITS_val, + NULL); + ctx->cfg.default_threshold = MAX_COMPUTE_UNITS_val; + } +} +static void init_context_early(struct futhark_context_config *cfg, + struct futhark_context *ctx) +{ + ctx->opencl.cfg = cfg->opencl; + ctx->detail_memory = cfg->opencl.debugging; + ctx->debugging = cfg->opencl.debugging; + ctx->profiling = cfg->opencl.profiling; + ctx->profiling_paused = 0; + ctx->logging = cfg->opencl.logging; + ctx->error = NULL; + ctx->log = stderr; + ctx->opencl.profiling_records_capacity = 200; + ctx->opencl.profiling_records_used = 0; + ctx->opencl.profiling_records = + malloc(ctx->opencl.profiling_records_capacity * + sizeof(struct profiling_record)); + create_lock(&ctx->lock); + ctx->failure_is_an_option = 0; + ctx->peak_mem_usage_device = 0; + ctx->cur_mem_usage_device = 0; + ctx->peak_mem_usage_default = 0; + ctx->cur_mem_usage_default = 0; + ctx->total_runs = 0; + ctx->total_runtime = 0; + ctx->copy_dev_to_dev_total_runtime = 0; + ctx->copy_dev_to_dev_runs = 0; + ctx->copy_dev_to_host_total_runtime = 0; + ctx->copy_dev_to_host_runs = 0; + ctx->copy_host_to_dev_total_runtime = 0; + ctx->copy_host_to_dev_runs = 0; + ctx->copy_scalar_to_dev_total_runtime = 0; + ctx->copy_scalar_to_dev_runs = 0; + ctx->copy_scalar_from_dev_total_runtime = 0; + ctx->copy_scalar_from_dev_runs = 0; + ctx->get_envelopezicopy_9955_total_runtime = 0; + ctx->get_envelopezicopy_9955_runs = 0; + ctx->get_envelopezicopy_9960_total_runtime = 0; + ctx->get_envelopezicopy_9960_runs = 0; + ctx->next_chunk_boardzicopy_9965_total_runtime = 0; + ctx->next_chunk_boardzicopy_9965_runs = 0; + ctx->next_chunk_boardzisegmap_9619_total_runtime = 0; + ctx->next_chunk_boardzisegmap_9619_runs = 0; + ctx->next_chunk_boardzisegmap_9790_total_runtime = 0; + ctx->next_chunk_boardzisegmap_9790_runs = 0; +} +static int init_context_late(struct futhark_context_config *cfg, + struct futhark_context *ctx, cl_program prog) +{ + cl_int error; + cl_int no_error = -1; + + ctx->global_failure = clCreateBuffer(ctx->opencl.ctx, CL_MEM_READ_WRITE | + CL_MEM_COPY_HOST_PTR, sizeof(cl_int), + &no_error, &error); + OPENCL_SUCCEED_OR_RETURN(error); + // The +1 is to avoid zero-byte allocations. + ctx->global_failure_args = clCreateBuffer(ctx->opencl.ctx, + CL_MEM_READ_WRITE, + sizeof(int64_t) * (4 + 1), NULL, + &error); + OPENCL_SUCCEED_OR_RETURN(error); + { + ctx->get_envelopezicopy_9955 = clCreateKernel(prog, + "get_envelopezicopy_9955", + &error); + OPENCL_SUCCEED_FATAL(error); + if (ctx->debugging) + fprintf(ctx->log, "Created kernel %s.\n", "get_envelope.copy_9955"); + } + { + ctx->get_envelopezicopy_9960 = clCreateKernel(prog, + "get_envelopezicopy_9960", + &error); + OPENCL_SUCCEED_FATAL(error); + if (ctx->debugging) + fprintf(ctx->log, "Created kernel %s.\n", "get_envelope.copy_9960"); + } + { + ctx->next_chunk_boardzicopy_9965 = clCreateKernel(prog, + "next_chunk_boardzicopy_9965", + &error); + OPENCL_SUCCEED_FATAL(error); + if (ctx->debugging) + fprintf(ctx->log, "Created kernel %s.\n", + "next_chunk_board.copy_9965"); + } + { + ctx->next_chunk_boardzisegmap_9619 = clCreateKernel(prog, + "next_chunk_boardzisegmap_9619", + &error); + OPENCL_SUCCEED_FATAL(error); + OPENCL_SUCCEED_FATAL(clSetKernelArg(ctx->next_chunk_boardzisegmap_9619, + 0, sizeof(cl_mem), + &ctx->global_failure)); + OPENCL_SUCCEED_FATAL(clSetKernelArg(ctx->next_chunk_boardzisegmap_9619, + 2, sizeof(cl_mem), + &ctx->global_failure_args)); + if (ctx->debugging) + fprintf(ctx->log, "Created kernel %s.\n", + "next_chunk_board.segmap_9619"); + } + { + ctx->next_chunk_boardzisegmap_9790 = clCreateKernel(prog, + "next_chunk_boardzisegmap_9790", + &error); + OPENCL_SUCCEED_FATAL(error); + OPENCL_SUCCEED_FATAL(clSetKernelArg(ctx->next_chunk_boardzisegmap_9790, + 0, sizeof(cl_mem), + &ctx->global_failure)); + if (ctx->debugging) + fprintf(ctx->log, "Created kernel %s.\n", + "next_chunk_board.segmap_9790"); + } + ctx->sizes.get_envelopezigroup_sizze_9958 = cfg->sizes[0]; + ctx->sizes.get_envelopezigroup_sizze_9963 = cfg->sizes[1]; + ctx->sizes.next_chunk_boardzigroup_sizze_9968 = cfg->sizes[2]; + ctx->sizes.next_chunk_boardzisegmap_group_sizze_9622 = cfg->sizes[3]; + ctx->sizes.next_chunk_boardzisegmap_group_sizze_9793 = cfg->sizes[4]; + init_constants(ctx); + // Clear the free list of any deallocations that occurred while initialising constants. + OPENCL_SUCCEED_OR_RETURN(opencl_free_all(&ctx->opencl)); + // The program will be properly freed after all the kernels have also been freed. + OPENCL_SUCCEED_OR_RETURN(clReleaseProgram(prog)); + return futhark_context_sync(ctx); +} +struct futhark_context *futhark_context_new(struct futhark_context_config *cfg) +{ + struct futhark_context *ctx = + (struct futhark_context *) malloc(sizeof(struct futhark_context)); + + if (ctx == NULL) + return NULL; + + int required_types = 0; + + init_context_early(cfg, ctx); + + cl_program prog = setup_opencl(&ctx->opencl, opencl_program, required_types, + cfg->build_opts); + + init_context_late(cfg, ctx, prog); + return ctx; +} +struct futhark_context *futhark_context_new_with_command_queue(struct futhark_context_config *cfg, + cl_command_queue queue) +{ + struct futhark_context *ctx = + (struct futhark_context *) malloc(sizeof(struct futhark_context)); + + if (ctx == NULL) + return NULL; + + int required_types = 0; + + init_context_early(cfg, ctx); + + cl_program prog = setup_opencl_with_command_queue(&ctx->opencl, queue, + opencl_program, + required_types, + cfg->build_opts); + + init_context_late(cfg, ctx, prog); + return ctx; +} +void futhark_context_free(struct futhark_context *ctx) +{ + free_constants(ctx); + free_lock(&ctx->lock); + OPENCL_SUCCEED_FATAL(clReleaseKernel(ctx->get_envelopezicopy_9955)); + OPENCL_SUCCEED_FATAL(clReleaseKernel(ctx->get_envelopezicopy_9960)); + OPENCL_SUCCEED_FATAL(clReleaseKernel(ctx->next_chunk_boardzicopy_9965)); + OPENCL_SUCCEED_FATAL(clReleaseKernel(ctx->next_chunk_boardzisegmap_9619)); + OPENCL_SUCCEED_FATAL(clReleaseKernel(ctx->next_chunk_boardzisegmap_9790)); + teardown_opencl(&ctx->opencl); + free(ctx); +} +int futhark_context_sync(struct futhark_context *ctx) +{ + cl_int failure_idx = -1; + + if (ctx->failure_is_an_option) { + OPENCL_SUCCEED_OR_RETURN(clEnqueueReadBuffer(ctx->opencl.queue, + ctx->global_failure, + CL_FALSE, 0, + sizeof(cl_int), + &failure_idx, 0, NULL, + ctx->profiling_paused || + !ctx->profiling ? NULL : opencl_get_event(&ctx->opencl, + &ctx->copy_scalar_from_dev_runs, + &ctx->copy_scalar_from_dev_total_runtime))); + ctx->failure_is_an_option = 0; + } + OPENCL_SUCCEED_OR_RETURN(clFinish(ctx->opencl.queue)); + if (failure_idx >= 0) { + cl_int no_failure = -1; + + OPENCL_SUCCEED_OR_RETURN(clEnqueueWriteBuffer(ctx->opencl.queue, + ctx->global_failure, + CL_TRUE, 0, + sizeof(cl_int), + &no_failure, 0, NULL, + NULL)); + + int64_t args[4 + 1]; + + OPENCL_SUCCEED_OR_RETURN(clEnqueueReadBuffer(ctx->opencl.queue, + ctx->global_failure_args, + CL_TRUE, 0, sizeof(args), + &args, 0, NULL, + ctx->profiling_paused || + !ctx->profiling ? NULL : opencl_get_event(&ctx->opencl, + &ctx->copy_dev_to_host_runs, + &ctx->copy_dev_to_host_total_runtime))); + switch (failure_idx) { + + case 0: + { + ctx->error = + msgprintf("Index [%lld, %lld] out of bounds for array of shape [%lld][%lld].\n-> #0 gol.fut:27:36-55\n #1 /prelude/soacs.fut:59:3-10\n #2 /prelude/array.fut:195:3-17\n #3 /prelude/functional.fut:39:59-65\n #4 /prelude/soacs.fut:59:3-10\n #5 /prelude/array.fut:203:3-34\n #6 gol.fut:18:5-27:56\n #7 gol.fut:31:27-66\n #8 gol.fut:30:1-40:43\n", + args[0], args[1], args[2], args[3]); + break; + } + } + return 1; + } + return 0; +} +cl_command_queue futhark_context_get_command_queue(struct futhark_context *ctx) +{ + return ctx->opencl.queue; +} +static int memblock_unref_device(struct futhark_context *ctx, + struct memblock_device *block, const + char *desc) +{ + if (block->references != NULL) { + *block->references -= 1; + if (ctx->detail_memory) + fprintf(ctx->log, + "Unreferencing block %s (allocated as %s) in %s: %d references remaining.\n", + desc, block->desc, "space 'device'", *block->references); + if (*block->references == 0) { + ctx->cur_mem_usage_device -= block->size; + OPENCL_SUCCEED_OR_RETURN(opencl_free(&ctx->opencl, block->mem, + desc)); + free(block->references); + if (ctx->detail_memory) + fprintf(ctx->log, + "%lld bytes freed (now allocated: %lld bytes)\n", + (long long) block->size, + (long long) ctx->cur_mem_usage_device); + } + block->references = NULL; + } + return 0; +} +static int memblock_alloc_device(struct futhark_context *ctx, + struct memblock_device *block, int64_t size, + const char *desc) +{ + if (size < 0) + futhark_panic(1, + "Negative allocation of %lld bytes attempted for %s in %s.\n", + (long long) size, desc, "space 'device'", + ctx->cur_mem_usage_device); + + int ret = memblock_unref_device(ctx, block, desc); + + ctx->cur_mem_usage_device += size; + if (ctx->detail_memory) + fprintf(ctx->log, + "Allocating %lld bytes for %s in %s (then allocated: %lld bytes)", + (long long) size, desc, "space 'device'", + (long long) ctx->cur_mem_usage_device); + if (ctx->cur_mem_usage_device > ctx->peak_mem_usage_device) { + ctx->peak_mem_usage_device = ctx->cur_mem_usage_device; + if (ctx->detail_memory) + fprintf(ctx->log, " (new peak).\n"); + } else if (ctx->detail_memory) + fprintf(ctx->log, ".\n"); + OPENCL_SUCCEED_OR_RETURN(opencl_alloc(&ctx->opencl, size, desc, + &block->mem)); + block->references = (int *) malloc(sizeof(int)); + *block->references = 1; + block->size = size; + block->desc = desc; + return ret; +} +static int memblock_set_device(struct futhark_context *ctx, + struct memblock_device *lhs, + struct memblock_device *rhs, const + char *lhs_desc) +{ + int ret = memblock_unref_device(ctx, lhs, lhs_desc); + + if (rhs->references != NULL) + (*rhs->references)++; + *lhs = *rhs; + return ret; +} +static int memblock_unref(struct futhark_context *ctx, struct memblock *block, + const char *desc) +{ + if (block->references != NULL) { + *block->references -= 1; + if (ctx->detail_memory) + fprintf(ctx->log, + "Unreferencing block %s (allocated as %s) in %s: %d references remaining.\n", + desc, block->desc, "default space", *block->references); + if (*block->references == 0) { + ctx->cur_mem_usage_default -= block->size; + free(block->mem); + free(block->references); + if (ctx->detail_memory) + fprintf(ctx->log, + "%lld bytes freed (now allocated: %lld bytes)\n", + (long long) block->size, + (long long) ctx->cur_mem_usage_default); + } + block->references = NULL; + } + return 0; +} +static int memblock_alloc(struct futhark_context *ctx, struct memblock *block, + int64_t size, const char *desc) +{ + if (size < 0) + futhark_panic(1, + "Negative allocation of %lld bytes attempted for %s in %s.\n", + (long long) size, desc, "default space", + ctx->cur_mem_usage_default); + + int ret = memblock_unref(ctx, block, desc); + + ctx->cur_mem_usage_default += size; + if (ctx->detail_memory) + fprintf(ctx->log, + "Allocating %lld bytes for %s in %s (then allocated: %lld bytes)", + (long long) size, desc, "default space", + (long long) ctx->cur_mem_usage_default); + if (ctx->cur_mem_usage_default > ctx->peak_mem_usage_default) { + ctx->peak_mem_usage_default = ctx->cur_mem_usage_default; + if (ctx->detail_memory) + fprintf(ctx->log, " (new peak).\n"); + } else if (ctx->detail_memory) + fprintf(ctx->log, ".\n"); + block->mem = (char *) malloc(size); + block->references = (int *) malloc(sizeof(int)); + *block->references = 1; + block->size = size; + block->desc = desc; + return ret; +} +static int memblock_set(struct futhark_context *ctx, struct memblock *lhs, + struct memblock *rhs, const char *lhs_desc) +{ + int ret = memblock_unref(ctx, lhs, lhs_desc); + + if (rhs->references != NULL) + (*rhs->references)++; + *lhs = *rhs; + return ret; +} +int futhark_get_num_sizes(void) +{ + return sizeof(size_names) / sizeof(size_names[0]); +} +const char *futhark_get_size_name(int i) +{ + return size_names[i]; +} +const char *futhark_get_size_class(int i) +{ + return size_classes[i]; +} +char *futhark_context_report(struct futhark_context *ctx) +{ + if (futhark_context_sync(ctx) != 0) + return NULL; + + struct str_builder builder; + + str_builder_init(&builder); + if (ctx->detail_memory || ctx->profiling || ctx->logging) { + str_builder(&builder, + "Peak memory usage for space 'device': %lld bytes.\n", + (long long) ctx->peak_mem_usage_device); + { } + } + if (ctx->profiling) { + OPENCL_SUCCEED_FATAL(opencl_tally_profiling_records(&ctx->opencl)); + str_builder(&builder, + "copy_dev_to_dev ran %5d times; avg: %8ldus; total: %8ldus\n", + ctx->copy_dev_to_dev_runs, + (long) ctx->copy_dev_to_dev_total_runtime / + (ctx->copy_dev_to_dev_runs != + 0 ? ctx->copy_dev_to_dev_runs : 1), + (long) ctx->copy_dev_to_dev_total_runtime); + ctx->total_runtime += ctx->copy_dev_to_dev_total_runtime; + ctx->total_runs += ctx->copy_dev_to_dev_runs; + str_builder(&builder, + "copy_dev_to_host ran %5d times; avg: %8ldus; total: %8ldus\n", + ctx->copy_dev_to_host_runs, + (long) ctx->copy_dev_to_host_total_runtime / + (ctx->copy_dev_to_host_runs != + 0 ? ctx->copy_dev_to_host_runs : 1), + (long) ctx->copy_dev_to_host_total_runtime); + ctx->total_runtime += ctx->copy_dev_to_host_total_runtime; + ctx->total_runs += ctx->copy_dev_to_host_runs; + str_builder(&builder, + "copy_host_to_dev ran %5d times; avg: %8ldus; total: %8ldus\n", + ctx->copy_host_to_dev_runs, + (long) ctx->copy_host_to_dev_total_runtime / + (ctx->copy_host_to_dev_runs != + 0 ? ctx->copy_host_to_dev_runs : 1), + (long) ctx->copy_host_to_dev_total_runtime); + ctx->total_runtime += ctx->copy_host_to_dev_total_runtime; + ctx->total_runs += ctx->copy_host_to_dev_runs; + str_builder(&builder, + "copy_scalar_to_dev ran %5d times; avg: %8ldus; total: %8ldus\n", + ctx->copy_scalar_to_dev_runs, + (long) ctx->copy_scalar_to_dev_total_runtime / + (ctx->copy_scalar_to_dev_runs != + 0 ? ctx->copy_scalar_to_dev_runs : 1), + (long) ctx->copy_scalar_to_dev_total_runtime); + ctx->total_runtime += ctx->copy_scalar_to_dev_total_runtime; + ctx->total_runs += ctx->copy_scalar_to_dev_runs; + str_builder(&builder, + "copy_scalar_from_dev ran %5d times; avg: %8ldus; total: %8ldus\n", + ctx->copy_scalar_from_dev_runs, + (long) ctx->copy_scalar_from_dev_total_runtime / + (ctx->copy_scalar_from_dev_runs != + 0 ? ctx->copy_scalar_from_dev_runs : 1), + (long) ctx->copy_scalar_from_dev_total_runtime); + ctx->total_runtime += ctx->copy_scalar_from_dev_total_runtime; + ctx->total_runs += ctx->copy_scalar_from_dev_runs; + str_builder(&builder, + "get_envelope.copy_9955 ran %5d times; avg: %8ldus; total: %8ldus\n", + ctx->get_envelopezicopy_9955_runs, + (long) ctx->get_envelopezicopy_9955_total_runtime / + (ctx->get_envelopezicopy_9955_runs != + 0 ? ctx->get_envelopezicopy_9955_runs : 1), + (long) ctx->get_envelopezicopy_9955_total_runtime); + ctx->total_runtime += ctx->get_envelopezicopy_9955_total_runtime; + ctx->total_runs += ctx->get_envelopezicopy_9955_runs; + str_builder(&builder, + "get_envelope.copy_9960 ran %5d times; avg: %8ldus; total: %8ldus\n", + ctx->get_envelopezicopy_9960_runs, + (long) ctx->get_envelopezicopy_9960_total_runtime / + (ctx->get_envelopezicopy_9960_runs != + 0 ? ctx->get_envelopezicopy_9960_runs : 1), + (long) ctx->get_envelopezicopy_9960_total_runtime); + ctx->total_runtime += ctx->get_envelopezicopy_9960_total_runtime; + ctx->total_runs += ctx->get_envelopezicopy_9960_runs; + str_builder(&builder, + "next_chunk_board.copy_9965 ran %5d times; avg: %8ldus; total: %8ldus\n", + ctx->next_chunk_boardzicopy_9965_runs, + (long) ctx->next_chunk_boardzicopy_9965_total_runtime / + (ctx->next_chunk_boardzicopy_9965_runs != + 0 ? ctx->next_chunk_boardzicopy_9965_runs : 1), + (long) ctx->next_chunk_boardzicopy_9965_total_runtime); + ctx->total_runtime += ctx->next_chunk_boardzicopy_9965_total_runtime; + ctx->total_runs += ctx->next_chunk_boardzicopy_9965_runs; + str_builder(&builder, + "next_chunk_board.segmap_9619 ran %5d times; avg: %8ldus; total: %8ldus\n", + ctx->next_chunk_boardzisegmap_9619_runs, + (long) ctx->next_chunk_boardzisegmap_9619_total_runtime / + (ctx->next_chunk_boardzisegmap_9619_runs != + 0 ? ctx->next_chunk_boardzisegmap_9619_runs : 1), + (long) ctx->next_chunk_boardzisegmap_9619_total_runtime); + ctx->total_runtime += ctx->next_chunk_boardzisegmap_9619_total_runtime; + ctx->total_runs += ctx->next_chunk_boardzisegmap_9619_runs; + str_builder(&builder, + "next_chunk_board.segmap_9790 ran %5d times; avg: %8ldus; total: %8ldus\n", + ctx->next_chunk_boardzisegmap_9790_runs, + (long) ctx->next_chunk_boardzisegmap_9790_total_runtime / + (ctx->next_chunk_boardzisegmap_9790_runs != + 0 ? ctx->next_chunk_boardzisegmap_9790_runs : 1), + (long) ctx->next_chunk_boardzisegmap_9790_total_runtime); + ctx->total_runtime += ctx->next_chunk_boardzisegmap_9790_total_runtime; + ctx->total_runs += ctx->next_chunk_boardzisegmap_9790_runs; + str_builder(&builder, "%d operations with cumulative runtime: %6ldus\n", + ctx->total_runs, ctx->total_runtime); + } + return builder.str; +} +char *futhark_context_get_error(struct futhark_context *ctx) +{ + char *error = ctx->error; + + ctx->error = NULL; + return error; +} +void futhark_context_set_logging_file(struct futhark_context *ctx, FILE *f) +{ + ctx->log = f; +} +void futhark_context_pause_profiling(struct futhark_context *ctx) +{ + ctx->profiling_paused = 1; +} +void futhark_context_unpause_profiling(struct futhark_context *ctx) +{ + ctx->profiling_paused = 0; +} +int futhark_context_clear_caches(struct futhark_context *ctx) +{ + lock_lock(&ctx->lock); + ctx->peak_mem_usage_device = 0; + ctx->peak_mem_usage_default = 0; + if (ctx->error == NULL) + ctx->error = OPENCL_SUCCEED_NONFATAL(opencl_free_all(&ctx->opencl)); + lock_unlock(&ctx->lock); + return ctx->error != NULL; +} +static int futrts_get_envelope(struct futhark_context *ctx, + struct memblock_device *out_mem_p_9970, + struct memblock_device chunk_board_mem_9941, + int64_t n_9485); +static int futrts_next_chunk_board(struct futhark_context *ctx, + struct memblock_device *out_mem_p_9981, + struct memblock_device chunk_board_mem_9941, + struct memblock_device envelope_board_mem_9942, + int64_t n_9500, int64_t m_9501); +static int init_constants(struct futhark_context *ctx) +{ + (void) ctx; + + int err = 0; + + + cleanup: + return err; +} +static int free_constants(struct futhark_context *ctx) +{ + (void) ctx; + return 0; +} +static int futrts_get_envelope(struct futhark_context *ctx, + struct memblock_device *out_mem_p_9970, + struct memblock_device chunk_board_mem_9941, + int64_t n_9485) +{ + (void) ctx; + + int err = 0; + struct memblock_device out_mem_9954; + + out_mem_9954.references = NULL; + + bool y_9487 = slt64((int64_t) 0, n_9485); + bool index_certs_9488; + + if (!y_9487) { + ctx->error = msgprintf("Error: %s%lld%s%lld%s\n\nBacktrace:\n%s", + "Index [", (int64_t) 0, + "] out of bounds for array of shape [", n_9485, + "].", + "-> #0 gol.fut:43:17-30\n #1 gol.fut:42:1-48:33\n"); + if (memblock_unref_device(ctx, &out_mem_9954, "out_mem_9954") != 0) + return 1; + return 1; + } + + int64_t i_9490 = sub64(n_9485, (int64_t) 1); + bool x_9491 = sle64((int64_t) 0, i_9490); + bool y_9492 = slt64(i_9490, n_9485); + bool bounds_check_9493 = x_9491 && y_9492; + bool index_certs_9494; + + if (!bounds_check_9493) { + ctx->error = msgprintf("Error: %s%lld%s%lld%s\n\nBacktrace:\n%s", + "Index [", i_9490, + "] out of bounds for array of shape [", n_9485, + "].", + "-> #0 gol.fut:44:17-32\n #1 gol.fut:42:1-48:33\n"); + if (memblock_unref_device(ctx, &out_mem_9954, "out_mem_9954") != 0) + return 1; + return 1; + } + + int64_t bytes_9942 = (int64_t) 4 * n_9485; + struct memblock_device mem_9943; + + mem_9943.references = NULL; + if (memblock_alloc_device(ctx, &mem_9943, bytes_9942, "mem_9943")) { + err = 1; + goto cleanup; + } + if (n_9485 > 0) { + OPENCL_SUCCEED_OR_RETURN(clEnqueueCopyBuffer(ctx->opencl.queue, + chunk_board_mem_9941.mem, + mem_9943.mem, (int64_t) 0, + (int64_t) 0, n_9485, 0, + NULL, + ctx->profiling_paused || + !ctx->profiling ? NULL : opencl_get_event(&ctx->opencl, + &ctx->copy_dev_to_dev_runs, + &ctx->copy_dev_to_dev_total_runtime))); + if (ctx->debugging) + OPENCL_SUCCEED_FATAL(clFinish(ctx->opencl.queue)); + } + + int64_t group_sizze_9958; + + group_sizze_9958 = ctx->sizes.get_envelopezigroup_sizze_9958; + + int64_t num_groups_9959; + + num_groups_9959 = sdiv_up64(n_9485, group_sizze_9958); + OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->get_envelopezicopy_9955, 0, + sizeof(n_9485), &n_9485)); + OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->get_envelopezicopy_9955, 1, + sizeof(i_9490), &i_9490)); + OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->get_envelopezicopy_9955, 2, + sizeof(chunk_board_mem_9941.mem), + &chunk_board_mem_9941.mem)); + OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->get_envelopezicopy_9955, 3, + sizeof(mem_9943.mem), + &mem_9943.mem)); + if (1 * ((size_t) num_groups_9959 * (size_t) group_sizze_9958) != 0) { + const size_t global_work_sizze_9971[1] = {(size_t) num_groups_9959 * + (size_t) group_sizze_9958}; + const size_t local_work_sizze_9975[1] = {group_sizze_9958}; + int64_t time_start_9972 = 0, time_end_9973 = 0; + + if (ctx->debugging) { + fprintf(ctx->log, "Launching %s with global work size [", + "get_envelope.copy_9955"); + fprintf(ctx->log, "%zu", global_work_sizze_9971[0]); + fprintf(ctx->log, "] and local work size ["); + fprintf(ctx->log, "%zu", local_work_sizze_9975[0]); + fprintf(ctx->log, "]; local memory parameters sum to %d bytes.\n", + (int) 0); + time_start_9972 = get_wall_time(); + } + OPENCL_SUCCEED_OR_RETURN(clEnqueueNDRangeKernel(ctx->opencl.queue, + ctx->get_envelopezicopy_9955, + 1, NULL, + global_work_sizze_9971, + local_work_sizze_9975, + 0, NULL, + ctx->profiling_paused || + !ctx->profiling ? NULL : opencl_get_event(&ctx->opencl, + &ctx->get_envelopezicopy_9955_runs, + &ctx->get_envelopezicopy_9955_total_runtime))); + if (ctx->debugging) { + OPENCL_SUCCEED_FATAL(clFinish(ctx->opencl.queue)); + time_end_9973 = get_wall_time(); + + long time_diff_9974 = time_end_9973 - time_start_9972; + + fprintf(ctx->log, "kernel %s runtime: %ldus\n", + "get_envelope.copy_9955", time_diff_9974); + } + } + if (n_9485 > 0) { + OPENCL_SUCCEED_OR_RETURN(clEnqueueCopyBuffer(ctx->opencl.queue, + chunk_board_mem_9941.mem, + mem_9943.mem, i_9490 * + n_9485, (int64_t) 2 * + n_9485, n_9485, 0, NULL, + ctx->profiling_paused || + !ctx->profiling ? NULL : opencl_get_event(&ctx->opencl, + &ctx->copy_dev_to_dev_runs, + &ctx->copy_dev_to_dev_total_runtime))); + if (ctx->debugging) + OPENCL_SUCCEED_FATAL(clFinish(ctx->opencl.queue)); + } + + int64_t group_sizze_9963; + + group_sizze_9963 = ctx->sizes.get_envelopezigroup_sizze_9963; + + int64_t num_groups_9964; + + num_groups_9964 = sdiv_up64(n_9485, group_sizze_9963); + OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->get_envelopezicopy_9960, 0, + sizeof(n_9485), &n_9485)); + OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->get_envelopezicopy_9960, 1, + sizeof(chunk_board_mem_9941.mem), + &chunk_board_mem_9941.mem)); + OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->get_envelopezicopy_9960, 2, + sizeof(mem_9943.mem), + &mem_9943.mem)); + if (1 * ((size_t) num_groups_9964 * (size_t) group_sizze_9963) != 0) { + const size_t global_work_sizze_9976[1] = {(size_t) num_groups_9964 * + (size_t) group_sizze_9963}; + const size_t local_work_sizze_9980[1] = {group_sizze_9963}; + int64_t time_start_9977 = 0, time_end_9978 = 0; + + if (ctx->debugging) { + fprintf(ctx->log, "Launching %s with global work size [", + "get_envelope.copy_9960"); + fprintf(ctx->log, "%zu", global_work_sizze_9976[0]); + fprintf(ctx->log, "] and local work size ["); + fprintf(ctx->log, "%zu", local_work_sizze_9980[0]); + fprintf(ctx->log, "]; local memory parameters sum to %d bytes.\n", + (int) 0); + time_start_9977 = get_wall_time(); + } + OPENCL_SUCCEED_OR_RETURN(clEnqueueNDRangeKernel(ctx->opencl.queue, + ctx->get_envelopezicopy_9960, + 1, NULL, + global_work_sizze_9976, + local_work_sizze_9980, + 0, NULL, + ctx->profiling_paused || + !ctx->profiling ? NULL : opencl_get_event(&ctx->opencl, + &ctx->get_envelopezicopy_9960_runs, + &ctx->get_envelopezicopy_9960_total_runtime))); + if (ctx->debugging) { + OPENCL_SUCCEED_FATAL(clFinish(ctx->opencl.queue)); + time_end_9978 = get_wall_time(); + + long time_diff_9979 = time_end_9978 - time_start_9977; + + fprintf(ctx->log, "kernel %s runtime: %ldus\n", + "get_envelope.copy_9960", time_diff_9979); + } + } + if (memblock_set_device(ctx, &out_mem_9954, &mem_9943, "mem_9943") != 0) + return 1; + (*out_mem_p_9970).references = NULL; + if (memblock_set_device(ctx, &*out_mem_p_9970, &out_mem_9954, + "out_mem_9954") != 0) + return 1; + if (memblock_unref_device(ctx, &mem_9943, "mem_9943") != 0) + return 1; + if (memblock_unref_device(ctx, &out_mem_9954, "out_mem_9954") != 0) + return 1; + + cleanup: + { } + return err; +} +static int futrts_next_chunk_board(struct futhark_context *ctx, + struct memblock_device *out_mem_p_9981, + struct memblock_device chunk_board_mem_9941, + struct memblock_device envelope_board_mem_9942, + int64_t n_9500, int64_t m_9501) +{ + (void) ctx; + + int err = 0; + struct memblock_device out_mem_9954; + + out_mem_9954.references = NULL; + + int64_t nest_sizze_9733 = m_9501 * m_9501; + int64_t segmap_group_sizze_9734; + + segmap_group_sizze_9734 = + ctx->sizes.next_chunk_boardzisegmap_group_sizze_9622; + + int64_t segmap_usable_groups_9735 = sdiv_up64(nest_sizze_9733, + segmap_group_sizze_9734); + struct memblock_device mem_9945; + + mem_9945.references = NULL; + if (memblock_alloc_device(ctx, &mem_9945, nest_sizze_9733, "mem_9945")) { + err = 1; + goto cleanup; + } + if (ctx->debugging) + fprintf(ctx->log, "%s\n", "\n# SegMap"); + OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->next_chunk_boardzisegmap_9619, + 1, + sizeof(ctx->failure_is_an_option), + &ctx->failure_is_an_option)); + OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->next_chunk_boardzisegmap_9619, + 3, sizeof(n_9500), &n_9500)); + OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->next_chunk_boardzisegmap_9619, + 4, sizeof(m_9501), &m_9501)); + OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->next_chunk_boardzisegmap_9619, + 5, sizeof(chunk_board_mem_9941.mem), + &chunk_board_mem_9941.mem)); + OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->next_chunk_boardzisegmap_9619, + 6, + sizeof(envelope_board_mem_9942.mem), + &envelope_board_mem_9942.mem)); + OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->next_chunk_boardzisegmap_9619, + 7, sizeof(mem_9945.mem), + &mem_9945.mem)); + if (1 * ((size_t) segmap_usable_groups_9735 * + (size_t) segmap_group_sizze_9734) != 0) { + const size_t global_work_sizze_9982[1] = + {(size_t) segmap_usable_groups_9735 * + (size_t) segmap_group_sizze_9734}; + const size_t local_work_sizze_9986[1] = {segmap_group_sizze_9734}; + int64_t time_start_9983 = 0, time_end_9984 = 0; + + if (ctx->debugging) { + fprintf(ctx->log, "Launching %s with global work size [", + "next_chunk_board.segmap_9619"); + fprintf(ctx->log, "%zu", global_work_sizze_9982[0]); + fprintf(ctx->log, "] and local work size ["); + fprintf(ctx->log, "%zu", local_work_sizze_9986[0]); + fprintf(ctx->log, "]; local memory parameters sum to %d bytes.\n", + (int) 0); + time_start_9983 = get_wall_time(); + } + OPENCL_SUCCEED_OR_RETURN(clEnqueueNDRangeKernel(ctx->opencl.queue, + ctx->next_chunk_boardzisegmap_9619, + 1, NULL, + global_work_sizze_9982, + local_work_sizze_9986, + 0, NULL, + ctx->profiling_paused || + !ctx->profiling ? NULL : opencl_get_event(&ctx->opencl, + &ctx->next_chunk_boardzisegmap_9619_runs, + &ctx->next_chunk_boardzisegmap_9619_total_runtime))); + if (ctx->debugging) { + OPENCL_SUCCEED_FATAL(clFinish(ctx->opencl.queue)); + time_end_9984 = get_wall_time(); + + long time_diff_9985 = time_end_9984 - time_start_9983; + + fprintf(ctx->log, "kernel %s runtime: %ldus\n", + "next_chunk_board.segmap_9619", time_diff_9985); + } + } + ctx->failure_is_an_option = 1; + + int64_t segmap_group_sizze_9865; + + segmap_group_sizze_9865 = + ctx->sizes.next_chunk_boardzisegmap_group_sizze_9793; + + int64_t segmap_usable_groups_9866 = sdiv_up64(nest_sizze_9733, + segmap_group_sizze_9865); + struct memblock_device mem_9948; + + mem_9948.references = NULL; + if (memblock_alloc_device(ctx, &mem_9948, nest_sizze_9733, "mem_9948")) { + err = 1; + goto cleanup; + } + if (ctx->debugging) + fprintf(ctx->log, "%s\n", "\n# SegMap"); + OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->next_chunk_boardzisegmap_9790, + 1, sizeof(m_9501), &m_9501)); + OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->next_chunk_boardzisegmap_9790, + 2, sizeof(mem_9945.mem), + &mem_9945.mem)); + OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->next_chunk_boardzisegmap_9790, + 3, sizeof(mem_9948.mem), + &mem_9948.mem)); + if (1 * ((size_t) segmap_usable_groups_9866 * + (size_t) segmap_group_sizze_9865) != 0) { + const size_t global_work_sizze_9987[1] = + {(size_t) segmap_usable_groups_9866 * + (size_t) segmap_group_sizze_9865}; + const size_t local_work_sizze_9991[1] = {segmap_group_sizze_9865}; + int64_t time_start_9988 = 0, time_end_9989 = 0; + + if (ctx->debugging) { + fprintf(ctx->log, "Launching %s with global work size [", + "next_chunk_board.segmap_9790"); + fprintf(ctx->log, "%zu", global_work_sizze_9987[0]); + fprintf(ctx->log, "] and local work size ["); + fprintf(ctx->log, "%zu", local_work_sizze_9991[0]); + fprintf(ctx->log, "]; local memory parameters sum to %d bytes.\n", + (int) 0); + time_start_9988 = get_wall_time(); + } + OPENCL_SUCCEED_OR_RETURN(clEnqueueNDRangeKernel(ctx->opencl.queue, + ctx->next_chunk_boardzisegmap_9790, + 1, NULL, + global_work_sizze_9987, + local_work_sizze_9991, + 0, NULL, + ctx->profiling_paused || + !ctx->profiling ? NULL : opencl_get_event(&ctx->opencl, + &ctx->next_chunk_boardzisegmap_9790_runs, + &ctx->next_chunk_boardzisegmap_9790_total_runtime))); + if (ctx->debugging) { + OPENCL_SUCCEED_FATAL(clFinish(ctx->opencl.queue)); + time_end_9989 = get_wall_time(); + + long time_diff_9990 = time_end_9989 - time_start_9988; + + fprintf(ctx->log, "kernel %s runtime: %ldus\n", + "next_chunk_board.segmap_9790", time_diff_9990); + } + } + if (memblock_unref_device(ctx, &mem_9945, "mem_9945") != 0) + return 1; + + int64_t j_9597 = add64((int64_t) 1, n_9500); + bool empty_slice_9598 = n_9500 == (int64_t) 0; + bool zzero_leq_i_p_m_t_s_9599 = sle64((int64_t) 0, n_9500); + bool i_p_m_t_s_leq_w_9600 = slt64(n_9500, m_9501); + bool i_lte_j_9601 = sle64((int64_t) 1, j_9597); + bool y_9602 = zzero_leq_i_p_m_t_s_9599 && i_p_m_t_s_leq_w_9600; + bool y_9603 = i_lte_j_9601 && y_9602; + bool ok_or_empty_9604 = empty_slice_9598 || y_9603; + bool index_ok_9605 = ok_or_empty_9604 && ok_or_empty_9604; + bool index_certs_9606; + + if (!index_ok_9605) { + ctx->error = + msgprintf("Error: %s%lld%s%lld%s%lld%s%lld%s%lld%s%lld%s\n\nBacktrace:\n%s", + "Index [", (int64_t) 1, ":", j_9597, ", ", (int64_t) 1, + ":", j_9597, "] out of bounds for array of shape [", + m_9501, "][", m_9501, "].", + "-> #0 gol.fut:40:8-31\n #1 gol.fut:30:1-40:43\n"); + if (memblock_unref_device(ctx, &mem_9948, "mem_9948") != 0) + return 1; + if (memblock_unref_device(ctx, &mem_9945, "mem_9945") != 0) + return 1; + if (memblock_unref_device(ctx, &out_mem_9954, "out_mem_9954") != 0) + return 1; + return 1; + } + + int64_t bytes_9949 = n_9500 * n_9500; + struct memblock_device mem_9950; + + mem_9950.references = NULL; + if (memblock_alloc_device(ctx, &mem_9950, bytes_9949, "mem_9950")) { + err = 1; + goto cleanup; + } + + int64_t group_sizze_9968; + + group_sizze_9968 = ctx->sizes.next_chunk_boardzigroup_sizze_9968; + + int64_t num_groups_9969; + + num_groups_9969 = sdiv_up64(n_9500 * n_9500, group_sizze_9968); + OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->next_chunk_boardzicopy_9965, 0, + sizeof(n_9500), &n_9500)); + OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->next_chunk_boardzicopy_9965, 1, + sizeof(m_9501), &m_9501)); + OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->next_chunk_boardzicopy_9965, 2, + sizeof(mem_9948.mem), + &mem_9948.mem)); + OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->next_chunk_boardzicopy_9965, 3, + sizeof(mem_9950.mem), + &mem_9950.mem)); + if (1 * ((size_t) num_groups_9969 * (size_t) group_sizze_9968) != 0) { + const size_t global_work_sizze_9992[1] = {(size_t) num_groups_9969 * + (size_t) group_sizze_9968}; + const size_t local_work_sizze_9996[1] = {group_sizze_9968}; + int64_t time_start_9993 = 0, time_end_9994 = 0; + + if (ctx->debugging) { + fprintf(ctx->log, "Launching %s with global work size [", + "next_chunk_board.copy_9965"); + fprintf(ctx->log, "%zu", global_work_sizze_9992[0]); + fprintf(ctx->log, "] and local work size ["); + fprintf(ctx->log, "%zu", local_work_sizze_9996[0]); + fprintf(ctx->log, "]; local memory parameters sum to %d bytes.\n", + (int) 0); + time_start_9993 = get_wall_time(); + } + OPENCL_SUCCEED_OR_RETURN(clEnqueueNDRangeKernel(ctx->opencl.queue, + ctx->next_chunk_boardzicopy_9965, + 1, NULL, + global_work_sizze_9992, + local_work_sizze_9996, + 0, NULL, + ctx->profiling_paused || + !ctx->profiling ? NULL : opencl_get_event(&ctx->opencl, + &ctx->next_chunk_boardzicopy_9965_runs, + &ctx->next_chunk_boardzicopy_9965_total_runtime))); + if (ctx->debugging) { + OPENCL_SUCCEED_FATAL(clFinish(ctx->opencl.queue)); + time_end_9994 = get_wall_time(); + + long time_diff_9995 = time_end_9994 - time_start_9993; + + fprintf(ctx->log, "kernel %s runtime: %ldus\n", + "next_chunk_board.copy_9965", time_diff_9995); + } + } + if (memblock_unref_device(ctx, &mem_9948, "mem_9948") != 0) + return 1; + if (memblock_set_device(ctx, &out_mem_9954, &mem_9950, "mem_9950") != 0) + return 1; + (*out_mem_p_9981).references = NULL; + if (memblock_set_device(ctx, &*out_mem_p_9981, &out_mem_9954, + "out_mem_9954") != 0) + return 1; + if (memblock_unref_device(ctx, &mem_9950, "mem_9950") != 0) + return 1; + if (memblock_unref_device(ctx, &mem_9948, "mem_9948") != 0) + return 1; + if (memblock_unref_device(ctx, &mem_9945, "mem_9945") != 0) + return 1; + if (memblock_unref_device(ctx, &out_mem_9954, "out_mem_9954") != 0) + return 1; + + cleanup: + { } + return err; +} +struct futhark_i8_2d { + struct memblock_device mem; + int64_t shape[2]; +} ; +struct futhark_i8_2d *futhark_new_i8_2d(struct futhark_context *ctx, const + int8_t *data, int64_t dim0, + int64_t dim1) +{ + struct futhark_i8_2d *bad = NULL; + struct futhark_i8_2d *arr = + (struct futhark_i8_2d *) malloc(sizeof(struct futhark_i8_2d)); + + if (arr == NULL) + return bad; + lock_lock(&ctx->lock); + arr->mem.references = NULL; + if (memblock_alloc_device(ctx, &arr->mem, (size_t) (dim0 * dim1) * 1, + "arr->mem")) + return NULL; + arr->shape[0] = dim0; + arr->shape[1] = dim1; + if ((size_t) (dim0 * dim1) * 1 > 0) + OPENCL_SUCCEED_OR_RETURN(clEnqueueWriteBuffer(ctx->opencl.queue, + arr->mem.mem, CL_TRUE, 0, + (size_t) (dim0 * dim1) * + 1, data + 0, 0, NULL, + ctx->profiling_paused || + !ctx->profiling ? NULL : opencl_get_event(&ctx->opencl, + &ctx->copy_dev_to_host_runs, + &ctx->copy_dev_to_host_total_runtime))); + lock_unlock(&ctx->lock); + return arr; +} +struct futhark_i8_2d *futhark_new_raw_i8_2d(struct futhark_context *ctx, const + cl_mem data, int offset, + int64_t dim0, int64_t dim1) +{ + struct futhark_i8_2d *bad = NULL; + struct futhark_i8_2d *arr = + (struct futhark_i8_2d *) malloc(sizeof(struct futhark_i8_2d)); + + if (arr == NULL) + return bad; + lock_lock(&ctx->lock); + arr->mem.references = NULL; + if (memblock_alloc_device(ctx, &arr->mem, (size_t) (dim0 * dim1) * 1, + "arr->mem")) + return NULL; + arr->shape[0] = dim0; + arr->shape[1] = dim1; + if ((size_t) (dim0 * dim1) * 1 > 0) { + OPENCL_SUCCEED_OR_RETURN(clEnqueueCopyBuffer(ctx->opencl.queue, data, + arr->mem.mem, offset, 0, + (size_t) (dim0 * dim1) * 1, + 0, NULL, + ctx->profiling_paused || + !ctx->profiling ? NULL : opencl_get_event(&ctx->opencl, + &ctx->copy_dev_to_dev_runs, + &ctx->copy_dev_to_dev_total_runtime))); + if (ctx->debugging) + OPENCL_SUCCEED_FATAL(clFinish(ctx->opencl.queue)); + } + lock_unlock(&ctx->lock); + return arr; +} +int futhark_free_i8_2d(struct futhark_context *ctx, struct futhark_i8_2d *arr) +{ + lock_lock(&ctx->lock); + if (memblock_unref_device(ctx, &arr->mem, "arr->mem") != 0) + return 1; + lock_unlock(&ctx->lock); + free(arr); + return 0; +} +int futhark_values_i8_2d(struct futhark_context *ctx, struct futhark_i8_2d *arr, + int8_t *data) +{ + lock_lock(&ctx->lock); + if ((size_t) (arr->shape[0] * arr->shape[1]) * 1 > 0) { + OPENCL_SUCCEED_OR_RETURN(clEnqueueReadBuffer(ctx->opencl.queue, + arr->mem.mem, + ctx->failure_is_an_option ? CL_FALSE : CL_TRUE, + 0, + (size_t) (arr->shape[0] * + arr->shape[1]) * + 1, data + 0, 0, NULL, + ctx->profiling_paused || + !ctx->profiling ? NULL : opencl_get_event(&ctx->opencl, + &ctx->copy_host_to_dev_runs, + &ctx->copy_host_to_dev_total_runtime))); + if (ctx->failure_is_an_option && futhark_context_sync(ctx) != 0) + return 1; + } + lock_unlock(&ctx->lock); + return 0; +} +cl_mem futhark_values_raw_i8_2d(struct futhark_context *ctx, + struct futhark_i8_2d *arr) +{ + (void) ctx; + return arr->mem.mem; +} +const int64_t *futhark_shape_i8_2d(struct futhark_context *ctx, + struct futhark_i8_2d *arr) +{ + (void) ctx; + return arr->shape; +} +int futhark_entry_get_envelope(struct futhark_context *ctx, + struct futhark_i8_2d **out0, const + struct futhark_i8_2d *in0) +{ + struct memblock_device chunk_board_mem_9941; + + chunk_board_mem_9941.references = NULL; + + int64_t n_9485; + struct memblock_device out_mem_9954; + + out_mem_9954.references = NULL; + + int ret = 0; + + lock_lock(&ctx->lock); + chunk_board_mem_9941 = in0->mem; + n_9485 = in0->shape[0]; + n_9485 = in0->shape[1]; + if (!(n_9485 == in0->shape[0] && n_9485 == in0->shape[1])) { + ret = 1; + if (!ctx->error) + ctx->error = + msgprintf("Error: entry point arguments have invalid sizes.\n"); + } else { + ret = futrts_get_envelope(ctx, &out_mem_9954, chunk_board_mem_9941, + n_9485); + if (ret == 0) { + assert((*out0 = + (struct futhark_i8_2d *) malloc(sizeof(struct futhark_i8_2d))) != + NULL); + (*out0)->mem = out_mem_9954; + (*out0)->shape[0] = 4; + (*out0)->shape[1] = n_9485; + } + } + lock_unlock(&ctx->lock); + return ret; +} +int futhark_entry_next_chunk_board(struct futhark_context *ctx, + struct futhark_i8_2d **out0, const + struct futhark_i8_2d *in0, const + struct futhark_i8_2d *in1) +{ + struct memblock_device chunk_board_mem_9941; + + chunk_board_mem_9941.references = NULL; + + struct memblock_device envelope_board_mem_9942; + + envelope_board_mem_9942.references = NULL; + + int64_t n_9500; + int64_t m_9501; + struct memblock_device out_mem_9954; + + out_mem_9954.references = NULL; + + int ret = 0; + + lock_lock(&ctx->lock); + chunk_board_mem_9941 = in0->mem; + n_9500 = in0->shape[0]; + n_9500 = in0->shape[1]; + envelope_board_mem_9942 = in1->mem; + m_9501 = in1->shape[1]; + if (!((n_9500 == in0->shape[0] && n_9500 == in0->shape[1]) && (4 == + in1->shape[0] && + m_9501 == + in1->shape[1]))) { + ret = 1; + if (!ctx->error) + ctx->error = + msgprintf("Error: entry point arguments have invalid sizes.\n"); + } else { + ret = futrts_next_chunk_board(ctx, &out_mem_9954, chunk_board_mem_9941, + envelope_board_mem_9942, n_9500, m_9501); + if (ret == 0) { + assert((*out0 = + (struct futhark_i8_2d *) malloc(sizeof(struct futhark_i8_2d))) != + NULL); + (*out0)->mem = out_mem_9954; + (*out0)->shape[0] = n_9500; + (*out0)->shape[1] = n_9500; + } + } + lock_unlock(&ctx->lock); + return ret; +} diff --git a/futmpi/gol.fut b/futmpi/gol.fut new file mode 100644 index 0000000..25eae32 --- /dev/null +++ b/futmpi/gol.fut @@ -0,0 +1,48 @@ +let count_neighbours [n] (board: [n][n]i8) : [n][n]i8 = + let north = rotate (-1) board + let south = rotate 1 board + let east = map(rotate 1) board + let west = map(rotate (-1)) board + + let north_east = map(rotate 1) north + let north_west = map(rotate (-1)) north + let south_east = map(rotate 1) south + let south_west = map(rotate (-1)) south + + in map3 (\(nwr,nr,ner) (wr, br, er) (swr, sr, ser) -> + map3 (\(nw,n,ne) (w, _, e) (sw, s, se) -> nw + n + ne + w + e + sw + s + se) + (zip3 nwr nr ner) (zip3 wr br er) (zip3 swr sr ser)) + (zip3 north_west north north_east) (zip3 west board east) (zip3 south_west south south_east) + +let augment_board [n][m] (chunk_board :[n][n]i8) (envelope_board: [4][m]i8): [m][m]i8 = + tabulate_2d (m) (m) (\i j -> + -- North + if (i == 0) then envelope_board[0,j] + -- East + else if (j == m-1) then envelope_board[1,i] + -- South + else if (i == m-1) then envelope_board[2,j] + -- West + else if (j == 0) then envelope_board[3,i] + else chunk_board[i-1,j-1]) + + +entry next_chunk_board [n][m] (chunk_board :[n][n]i8) (envelope_board: [4][m]i8) :[n][n]i8 = + let augmented_board = augment_board chunk_board envelope_board + let neighbours = count_neighbours augmented_board + let next_board = map2 (\augmented_board_r neighbours_r -> + map2(\cell nb_alive_cells -> + if (cell == 1 && (nb_alive_cells == 2 || nb_alive_cells == 3)) || (cell == 0 && nb_alive_cells == 3) + then 1 + else 0) + augmented_board_r neighbours_r) + augmented_board neighbours + in next_board[1:n+1, 1:n+1] :> [n][n]i8 + +entry get_envelope [n] (chunk_board: [n][n]i8): [4][n]i8 = + let north = chunk_board[0] + let south = chunk_board[n-1] + let tr_chunk_board = transpose chunk_board + let east = tr_chunk_board[n-1] + let west = tr_chunk_board[0] + in [north, east, south, west] diff --git a/futmpi/gol.h b/futmpi/gol.h new file mode 100644 index 0000000..4d300c1 --- /dev/null +++ b/futmpi/gol.h @@ -0,0 +1,122 @@ +#pragma once + +// Headers + +#include <stdint.h> +#include <stddef.h> +#include <stdbool.h> +#include <stdio.h> +#include <float.h> +#define CL_TARGET_OPENCL_VERSION 120 +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#ifdef __APPLE__ +#define CL_SILENCE_DEPRECATION +#include <OpenCL/cl.h> +#else +#include <CL/cl.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// Initialisation + +struct futhark_context_config ; +struct futhark_context_config *futhark_context_config_new(void); +void futhark_context_config_free(struct futhark_context_config *cfg); +void futhark_context_config_add_build_option(struct futhark_context_config *cfg, + const char *opt); +void futhark_context_config_set_debugging(struct futhark_context_config *cfg, + int flag); +void futhark_context_config_set_profiling(struct futhark_context_config *cfg, + int flag); +void futhark_context_config_set_logging(struct futhark_context_config *cfg, + int flag); +void futhark_context_config_set_device(struct futhark_context_config *cfg, const + char *s); +void futhark_context_config_set_platform(struct futhark_context_config *cfg, + const char *s); +void +futhark_context_config_select_device_interactively(struct futhark_context_config *cfg); +void futhark_context_config_list_devices(struct futhark_context_config *cfg); +void futhark_context_config_dump_program_to(struct futhark_context_config *cfg, + const char *path); +void +futhark_context_config_load_program_from(struct futhark_context_config *cfg, + const char *path); +void futhark_context_config_dump_binary_to(struct futhark_context_config *cfg, + const char *path); +void futhark_context_config_load_binary_from(struct futhark_context_config *cfg, + const char *path); +void +futhark_context_config_set_default_group_size(struct futhark_context_config *cfg, + int size); +void +futhark_context_config_set_default_num_groups(struct futhark_context_config *cfg, + int num); +void +futhark_context_config_set_default_tile_size(struct futhark_context_config *cfg, + int num); +void +futhark_context_config_set_default_reg_tile_size(struct futhark_context_config *cfg, + int num); +void +futhark_context_config_set_default_threshold(struct futhark_context_config *cfg, + int num); +int futhark_context_config_set_size(struct futhark_context_config *cfg, const + char *size_name, size_t size_value); +struct futhark_context ; +struct futhark_context *futhark_context_new(struct futhark_context_config *cfg); +struct futhark_context +*futhark_context_new_with_command_queue(struct futhark_context_config *cfg, + cl_command_queue queue); +void futhark_context_free(struct futhark_context *ctx); +cl_command_queue futhark_context_get_command_queue(struct futhark_context *ctx); +int futhark_get_num_sizes(void); +const char *futhark_get_size_name(int); +const char *futhark_get_size_class(int); + +// Arrays + +struct futhark_i8_2d ; +struct futhark_i8_2d *futhark_new_i8_2d(struct futhark_context *ctx, const + int8_t *data, int64_t dim0, + int64_t dim1); +struct futhark_i8_2d *futhark_new_raw_i8_2d(struct futhark_context *ctx, const + cl_mem data, int offset, + int64_t dim0, int64_t dim1); +int futhark_free_i8_2d(struct futhark_context *ctx, struct futhark_i8_2d *arr); +int futhark_values_i8_2d(struct futhark_context *ctx, struct futhark_i8_2d *arr, + int8_t *data); +cl_mem futhark_values_raw_i8_2d(struct futhark_context *ctx, + struct futhark_i8_2d *arr); +const int64_t *futhark_shape_i8_2d(struct futhark_context *ctx, + struct futhark_i8_2d *arr); + +// Opaque values + + +// Entry points + +int futhark_entry_get_envelope(struct futhark_context *ctx, + struct futhark_i8_2d **out0, const + struct futhark_i8_2d *in0); +int futhark_entry_next_chunk_board(struct futhark_context *ctx, + struct futhark_i8_2d **out0, const + struct futhark_i8_2d *in0, const + struct futhark_i8_2d *in1); + +// Miscellaneous + +int futhark_context_sync(struct futhark_context *ctx); +char *futhark_context_report(struct futhark_context *ctx); +char *futhark_context_get_error(struct futhark_context *ctx); +void futhark_context_set_logging_file(struct futhark_context *ctx, FILE *f); +void futhark_context_pause_profiling(struct futhark_context *ctx); +void futhark_context_unpause_profiling(struct futhark_context *ctx); +int futhark_context_clear_caches(struct futhark_context *ctx); +#define FUTHARK_BACKEND_opencl +#ifdef __cplusplus +} +#endif diff --git a/futmpi/main.c b/futmpi/main.c new file mode 100644 index 0000000..ab2413a --- /dev/null +++ b/futmpi/main.c @@ -0,0 +1,346 @@ +#include <stdio.h> +#include <stdint.h> +#include <mpi.h> +#include <math.h> +#include <stdbool.h> +#include <unistd.h> +#include "gol.h" +#include "gfx.h" + +#define BOARD_N 800 + +#define INDEX_2D_TO_1D(y, x, nb_columns) ((y) * nb_columns + (x)) + +#define NORTH_INDEX 0 +#define EAST_INDEX 1 +#define SOUTH_INDEX 2 +#define WEST_INDEX 3 + +#define NORTH_ROW_TAG 0 +#define EAST_COLUMN_TAG 1 +#define SOUTH_ROW_TAG 2 +#define WEST_COLUMN_TAG 3 + +#define NORTH_EAST_CELL_TAG 4 +#define SOUTH_EAST_CELL_TAG 5 +#define SOUTH_WEST_CELL_TAG 6 +#define NORTH_WEST_CELL_TAG 7 + +#define CHUNK_BOARD_TAG 8 + +//void printChunkBoard(int8_t *chunkBoard, int n1, int n2) { +// for (int i = 0; i < n1; ++i) { +// for (int j = 0; j < n2; ++j) { +// printf("%d ", chunkBoard[INDEX_2D_TO_1D(i, j, n2)]); +// } +// printf("\n"); +// } +//} + +int createGridCommunicators(MPI_Comm *cartComm, MPI_Comm *rowComm, MPI_Comm *colComm, int nProc) { + int gridN = (int) sqrt(nProc); + int dimensions[2] = {gridN, gridN}; + int periods[2] = {true, true}; // Cyclic on column for B matrix + + MPI_Cart_create(MPI_COMM_WORLD, 2, dimensions, periods, 1, cartComm); + + /* Create row communicator */ + int remainDims[2] = {false, true}; + MPI_Cart_sub(*cartComm, remainDims, rowComm); + + /* Create column communicator */ + remainDims[0] = true; // rows + remainDims[1] = false; // columns + MPI_Cart_sub(*cartComm, remainDims, colComm); + return gridN; +} + +int *divideBoard(int n, int chunkN, int nProc) { + int *indexes = calloc((size_t) nProc * 2, sizeof(int)); + for (int i = 0, y = 0, x = 0; i < nProc; ++i) { + indexes[i * 2] = y; + indexes[i * 2 + 1] = x; + + x += (int) chunkN; + if (x >= (int) n) { + x = 0; + y += (int) chunkN; + } + } + return indexes; +} + +void initChunkBoard(int8_t *chunkBoard, int chunkN) { + for (int i = 0; i < chunkN; ++i) { + for (int j = 0; j < chunkN; ++j) { + chunkBoard[INDEX_2D_TO_1D(i, j, chunkN)] = rand() % 2; + } + } +} + +void shareAndBuildEnvelope(int8_t *chunkBoardMyEnvelope, int8_t *chunkBoardEnvelope, MPI_Comm rowComm, + MPI_Comm colComm, int gridN, int coordinates[2], int chunkN, int chunkM) { + int coordinateY = coordinates[0]; + int coordinateX = coordinates[1]; + MPI_Request requests[16] = {0}; + int iRequest = 0; + + // North + { + int8_t *chunkBoardMyEnvelopeNorth = &chunkBoardMyEnvelope[INDEX_2D_TO_1D(NORTH_INDEX, 0, chunkN)]; + int8_t *chunkBoardEnvelopeNorth = &chunkBoardEnvelope[INDEX_2D_TO_1D(NORTH_INDEX, 1, chunkM)]; + int destSource = (coordinateY - 1) < 0 ? (gridN - 1) : (coordinateY - 1); + + MPI_Isend(chunkBoardMyEnvelopeNorth, chunkN, MPI_INT8_T, destSource, NORTH_ROW_TAG, colComm, + &requests[iRequest++]); + /* Neighbour send south row, which correspond to north envelope */ + MPI_Irecv(chunkBoardEnvelopeNorth, chunkN, MPI_INT8_T, destSource, SOUTH_ROW_TAG, colComm, + &requests[iRequest++]); + } + + // East + { + int8_t *chunkBoardMyEnvelopeEast = &chunkBoardMyEnvelope[INDEX_2D_TO_1D(EAST_INDEX, 0, chunkN)]; + int8_t *chunkBoardEnvelopeEast = &chunkBoardEnvelope[INDEX_2D_TO_1D(EAST_INDEX, 1, chunkM)]; + int destSource = (coordinateX + 1) % gridN; + + MPI_Isend(chunkBoardMyEnvelopeEast, chunkN, MPI_INT8_T, destSource, EAST_COLUMN_TAG, rowComm, + &requests[iRequest++]); + /* Neighbour send west column, which correspond to east envelope */ + MPI_Irecv(chunkBoardEnvelopeEast, chunkN, MPI_INT8_T, destSource, WEST_COLUMN_TAG, rowComm, + &requests[iRequest++]); + } + + // South + { + int8_t *chunkBoardMyEnvelopeSouth = &chunkBoardMyEnvelope[INDEX_2D_TO_1D(SOUTH_INDEX, 0, chunkN)]; + int8_t *chunkBoardEnvelopeSouth = &chunkBoardEnvelope[INDEX_2D_TO_1D(SOUTH_INDEX, 1, chunkM)]; + int destSource = (coordinateY + 1) % gridN; + + MPI_Isend(chunkBoardMyEnvelopeSouth, chunkN, MPI_INT8_T, destSource, SOUTH_ROW_TAG, colComm, + &requests[iRequest++]); + /* Neighbour send north row, which correspond to south envelope */ + MPI_Irecv(chunkBoardEnvelopeSouth, chunkN, MPI_INT8_T, destSource, NORTH_ROW_TAG, colComm, + &requests[iRequest++]); + } + + // West + { + int8_t *chunkBoardMyEnvelopeWest = &chunkBoardMyEnvelope[INDEX_2D_TO_1D(WEST_INDEX, 0, chunkN)]; + int8_t *chunkBoardEnvelopeWest = &chunkBoardEnvelope[INDEX_2D_TO_1D(WEST_INDEX, 1, chunkM)]; + int destSource = (coordinateX - 1) < 0 ? (gridN - 1) : (coordinateX - 1); + + MPI_Isend(chunkBoardMyEnvelopeWest, chunkN, MPI_INT8_T, destSource, WEST_COLUMN_TAG, rowComm, + &requests[iRequest++]); + /* Neighbour send east column, which correspond to west envelope */ + MPI_Irecv(chunkBoardEnvelopeWest, chunkN, MPI_INT8_T, destSource, EAST_COLUMN_TAG, rowComm, + &requests[iRequest++]); + } + + int8_t missingCells[4] = {0}; + + // North-East + { + int8_t *chunkBoardMyEnvelopeNorthEast = &chunkBoardMyEnvelope[INDEX_2D_TO_1D(NORTH_INDEX, chunkN - 1, chunkN)]; + int destSrcY = (coordinateY - 1) < 0 ? gridN - 1 : coordinateY - 1; + int destSrcX = (coordinateX + 1) % gridN; + int destSource = INDEX_2D_TO_1D(destSrcY, destSrcX, gridN); + + MPI_Isend(chunkBoardMyEnvelopeNorthEast, 1, MPI_INT8_T, destSource, NORTH_EAST_CELL_TAG, MPI_COMM_WORLD, + &requests[iRequest++]); + MPI_Irecv(&missingCells[1], 1, MPI_INT8_T, destSource, SOUTH_WEST_CELL_TAG, MPI_COMM_WORLD, + &requests[iRequest++]); + } + + // South-East + { + int8_t *chunkBoardMyEnvelopeSouthEast = &chunkBoardMyEnvelope[INDEX_2D_TO_1D(SOUTH_INDEX, chunkN - 1, chunkN)]; + int destSrcY = (coordinateY + 1) % gridN; + int destSrcX = (coordinateX + 1) % gridN; + int destSource = INDEX_2D_TO_1D(destSrcY, destSrcX, gridN); + + MPI_Isend(chunkBoardMyEnvelopeSouthEast, 1, MPI_INT8_T, destSource, SOUTH_EAST_CELL_TAG, MPI_COMM_WORLD, + &requests[iRequest++]); + MPI_Irecv(&missingCells[2], 1, MPI_INT8_T, destSource, NORTH_WEST_CELL_TAG, MPI_COMM_WORLD, + &requests[iRequest++]); + } + + // South-West + { + int8_t *chunkBoardMyEnvelopeSouthWest = &chunkBoardMyEnvelope[INDEX_2D_TO_1D(SOUTH_INDEX, 0, chunkN)]; + int destSrcY = (coordinateY + 1) % gridN; + int destSrcX = (coordinateX - 1) < 0 ? gridN - 1 : coordinateX - 1; + int destSource = INDEX_2D_TO_1D(destSrcY, destSrcX, gridN); + + MPI_Isend(chunkBoardMyEnvelopeSouthWest, 1, MPI_INT8_T, destSource, SOUTH_WEST_CELL_TAG, MPI_COMM_WORLD, + &requests[iRequest++]); + MPI_Irecv(&missingCells[3], 1, MPI_INT8_T, destSource, NORTH_EAST_CELL_TAG, MPI_COMM_WORLD, + &requests[iRequest++]); + } + + // North-West + { + int8_t *chunkBoardMyEnvelopeNorthWest = &chunkBoardMyEnvelope[INDEX_2D_TO_1D(NORTH_INDEX, 0, chunkN)]; + int destSrcY = (coordinateY - 1) < 0 ? gridN - 1 : coordinateY - 1; + int destSrcX = (coordinateX - 1) < 0 ? gridN - 1 : coordinateX - 1; + int destSource = INDEX_2D_TO_1D(destSrcY, destSrcX, gridN); + + MPI_Isend(chunkBoardMyEnvelopeNorthWest, 1, MPI_INT8_T, destSource, NORTH_WEST_CELL_TAG, MPI_COMM_WORLD, + &requests[iRequest++]); + MPI_Irecv(&missingCells[0], 1, MPI_INT8_T, destSource, SOUTH_EAST_CELL_TAG, MPI_COMM_WORLD, + &requests[iRequest]); + } + + MPI_Waitall(16, requests, MPI_STATUSES_IGNORE); + + chunkBoardEnvelope[INDEX_2D_TO_1D(NORTH_INDEX, chunkN, chunkM)] = chunkBoardEnvelope[INDEX_2D_TO_1D( + EAST_INDEX, 0, chunkM)] = missingCells[1]; + chunkBoardEnvelope[INDEX_2D_TO_1D(SOUTH_INDEX, chunkN, chunkM)] = chunkBoardEnvelope[INDEX_2D_TO_1D(EAST_INDEX, + chunkN, + chunkM)] = missingCells[2]; + chunkBoardEnvelope[INDEX_2D_TO_1D(SOUTH_INDEX, 0, chunkM)] = chunkBoardEnvelope[INDEX_2D_TO_1D(WEST_INDEX, + chunkN, + chunkM)] = missingCells[3]; + chunkBoardEnvelope[INDEX_2D_TO_1D(NORTH_INDEX, 0, chunkM)] = chunkBoardEnvelope[INDEX_2D_TO_1D(WEST_INDEX, + 0, + chunkM)] = missingCells[0]; +} + +void chunkBoardToBoard(int8_t *board, int n, const int8_t *chunkBoard, int chunkN, const int *indexes, int rank) { + int y = indexes[rank * 2]; + int x = indexes[rank * 2 + 1]; + + for (int i = 0; i < chunkN; ++i) { + for (int j = 0; j < chunkN; ++j) { + board[INDEX_2D_TO_1D(y + i, x + j, n)] = chunkBoard[INDEX_2D_TO_1D(i, j, chunkN)]; + } + } +} + +int main(int argc, char *argv[]) { + int myRank; + int nProc; + + /* MPI Initialization */ + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myRank); + MPI_Comm_size(MPI_COMM_WORLD, &nProc); + srand((unsigned int) myRank); + + MPI_Comm cartComm, rowComm, colComm; + int gridN = createGridCommunicators(&cartComm, &rowComm, &colComm, nProc); + + int myCartRank; + MPI_Comm_rank(cartComm, &myCartRank); + + int coordinates[2] = {0}; + MPI_Cart_coords(cartComm, myCartRank, 2, coordinates); + + /* Futhark Initialization */ + struct futhark_context_config *contextConfig = futhark_context_config_new(); + futhark_context_config_set_device(contextConfig, "AMD"); + struct futhark_context *futharkContext = futhark_context_new(contextConfig); + + /* GFX Initialization */ + struct gfx_context_t *gfxContext = myRank == 0 ? gfx_create("Game of Life", BOARD_N, BOARD_N) : NULL; + if (myRank == 0 && !gfxContext) { + fprintf(stderr, "Graphic mode initialization failed!\n"); + return EXIT_FAILURE; + } + if (myRank == 0) { + SDL_ShowCursor(SDL_ENABLE); + } + + /* GoL Initialization */ + int chunkN = (int) (BOARD_N / sqrt(nProc)); + int chunkNN = chunkN * chunkN; + int chunkM = chunkN + 2; + int *indexes = divideBoard(BOARD_N, chunkN, nProc); + + int8_t *board = myRank == 0 ? calloc(BOARD_N * BOARD_N, sizeof(int8_t)) : NULL; + int8_t *chunkBoard = calloc((size_t) chunkNN, sizeof(int8_t)); + int8_t *chunkBoardMyEnvelope = calloc(((size_t) (4 * chunkN)), sizeof(int8_t)); + int8_t *chunkBoardEnvelope = calloc(((size_t) (4 * chunkM)), sizeof(int8_t)); + + initChunkBoard(chunkBoard, chunkN); + + bool exit = false; + while (!exit) { + struct futhark_i8_2d *futChunkBoard = futhark_new_i8_2d(futharkContext, chunkBoard, chunkN, chunkN); + futhark_context_sync(futharkContext); + struct futhark_i8_2d *futChunkBoardMyEnvelope; + futhark_entry_get_envelope(futharkContext, &futChunkBoardMyEnvelope, futChunkBoard); + futhark_context_sync(futharkContext); + futhark_values_i8_2d(futharkContext, futChunkBoardMyEnvelope, chunkBoardMyEnvelope); + futhark_context_sync(futharkContext); + + shareAndBuildEnvelope(chunkBoardMyEnvelope, chunkBoardEnvelope, rowComm, colComm, gridN, coordinates, chunkN, + chunkM); + + struct futhark_i8_2d *futChunkBoardEnvelope = futhark_new_i8_2d(futharkContext, chunkBoardEnvelope, 4, chunkM); + futhark_context_sync(futharkContext); + struct futhark_i8_2d *futNextChunkBoard; + futhark_entry_next_chunk_board(futharkContext, &futNextChunkBoard, futChunkBoard, futChunkBoardEnvelope); + futhark_context_sync(futharkContext); + futhark_values_i8_2d(futharkContext, futNextChunkBoard, chunkBoard); + futhark_context_sync(futharkContext); + + if (myRank == 0) { + chunkBoardToBoard(board, BOARD_N, chunkBoard, chunkN, indexes, myRank); + int8_t *tmpChunkBoard = calloc((size_t) chunkNN, sizeof(int8_t)); + MPI_Status status = {0}; + for (int i = 0; i < nProc - 1; ++i) { + MPI_Recv(tmpChunkBoard, chunkNN, MPI_INT8_T, MPI_ANY_SOURCE, CHUNK_BOARD_TAG, MPI_COMM_WORLD, &status); + chunkBoardToBoard(board, BOARD_N, tmpChunkBoard, chunkN, indexes, status.MPI_SOURCE); + } + free(tmpChunkBoard); + } else { + MPI_Send(chunkBoard, chunkNN, MPI_INT8_T, 0, CHUNK_BOARD_TAG, MPI_COMM_WORLD); + } + + if (myRank == 0) { + SDL_PumpEvents(); + SDL_Event event; + SDL_PollEvent(&event); + + exit = gfx_keypressed() == SDLK_ESCAPE || + (event.type == SDL_WINDOWEVENT && event.window.event == SDL_WINDOWEVENT_CLOSE); + + gfx_clear(gfxContext, COLOR_BLACK); + for (int y = 0; y < BOARD_N; ++y) { + for (int x = 0; x < BOARD_N; ++x) { + int cell = (int) board[INDEX_2D_TO_1D(y, x, BOARD_N)]; + gfx_putpixel(gfxContext, x, y, MAKE_COLOR(cell * 255, cell * 255, cell * 255)); + } + } + gfx_present(gfxContext); + } + + futhark_context_sync(futharkContext); + futhark_free_i8_2d(futharkContext, futChunkBoard); + futhark_free_i8_2d(futharkContext, futChunkBoardMyEnvelope); + futhark_free_i8_2d(futharkContext, futChunkBoardEnvelope); + futhark_free_i8_2d(futharkContext, futNextChunkBoard); + + MPI_Bcast(&exit, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD); + usleep(16666); + } + + free(chunkBoard); + free(chunkBoardEnvelope); + free(chunkBoardMyEnvelope); + + if (myRank == 0) { + free(board); + gfx_destroy(gfxContext); + } + + futhark_context_free(futharkContext); + futhark_context_config_free(contextConfig); + + MPI_Comm_free(&cartComm); + MPI_Comm_free(&rowComm); + MPI_Comm_free(&colComm); + MPI_Finalize(); + return 0; +} diff --git a/game_of_life/CMakeLists.txt b/game_of_life/CMakeLists.txt deleted file mode 100644 index 1722508..0000000 --- a/game_of_life/CMakeLists.txt +++ /dev/null @@ -1,55 +0,0 @@ -cmake_minimum_required(VERSION 3.17) -project(game_of_life C) - -set(CMAKE_C_STANDARD 11) - -include_directories(".") - -if (CMAKE_BUILD_TYPE MATCHES Debug) - set(GCC_COMPILE_FLAGS "-Wall -Wextra -pedantic -fsanitize=address -fsanitize=null") - if (CMAKE_SYSTEM_NAME MATCHES "Linux") - set(GCC_COMPILE_FLAGS "${GCC_COMPILE_FLAGS} -fsanitize=leak") - endif () -elseif (CMAKE_BUILD_TYPE MATCHES Release) - set(GCC_COMPILE_FLAGS "-g") -endif () - -if (CMAKE_SYSTEM_NAME MATCHES "Linux") - execute_process(COMMAND sdl2-config --cflags OUTPUT_VARIABLE SDL2_C_FLAGS) - include_directories(${SDL2_C_FLAGS}) -endif () - -if (CMAKE_SYSTEM_NAME MATCHES "Darwin") - include_directories(/usr/local/include) -endif () - -find_package(MPI REQUIRED) -include_directories(${MPI_C_INCLUDE_PATH}) - -set(CMAKE_MACRO_FLAGS -DPROGHEADER='\"${CMAKE_CURRENT_SOURCE_DIR}/gol.h\"') - -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GCC_COMPILE_FLAGS} ${CMAKE_MACRO_FLAGS}") - -add_custom_target( - futhark_opencl - COMMAND futhark opencl ${CMAKE_CURRENT_SOURCE_DIR}/gol.fut --library -) -add_executable(game_of_life_opencl gol.c gol.h main.c lib/github.com/diku-dk/lys/liblys.c lib/github.com/diku-dk/lys/liblys.h lib/github.com/diku-dk/lys/context_setup.c lib/github.com/diku-dk/lys/context_setup.h ../lib/fpmpi.c ../lib/fpmpi.h ../lib/fp.h ../lib/fp.c ../lib/dispatch.c ../lib/dispatch.h) - -if (CMAKE_SYSTEM_NAME MATCHES "Darwin") - target_link_libraries(game_of_life_opencl "-framework OpenCL" m SDL2 ${MPI_C_LIBRARIES}) -endif () - -if (CMAKE_SYSTEM_NAME MATCHES "Linux") - target_link_libraries(game_of_life_opencl OpenCL m SDL2 ${MPI_C_LIBRARIES}) -endif () - -add_dependencies(game_of_life_opencl futhark_opencl) - -add_custom_target( - futhark_multicore - COMMAND futhark multicore ${CMAKE_CURRENT_SOURCE_DIR}/gol.fut --library -) -add_executable(game_of_life_multicore gol.c gol.h main.c lib/github.com/diku-dk/lys/liblys.c lib/github.com/diku-dk/lys/liblys.h lib/github.com/diku-dk/lys/context_setup.c lib/github.com/diku-dk/lys/context_setup.h ../lib/fpmpi.c ../lib/fpmpi.h ../lib/fp.h ../lib/fp.c ../lib/dispatch.c ../lib/dispatch.h) -add_dependencies(game_of_life_multicore futhark_multicore) -target_link_libraries(game_of_life_multicore m pthread SDL2 ${MPI_C_LIBRARIES}) diff --git a/game_of_life/Makefile b/game_of_life/Makefile deleted file mode 100644 index 7cc239a..0000000 --- a/game_of_life/Makefile +++ /dev/null @@ -1,34 +0,0 @@ -all: release debug - -release: - mkdir -p "cmake-build-release" - cmake -DCMAKE_BUILD_TYPE=Release -Bcmake-build-release - $(MAKE) -C cmake-build-release all - -release/multicore: - mkdir -p "cmake-build-release" - cmake -DCMAKE_BUILD_TYPE=Release -Bcmake-build-release - $(MAKE) -C cmake-build-release game_of_life_multicore - -release/opencl: - mkdir -p "cmake-build-release" - cmake -DCMAKE_BUILD_TYPE=Release -Bcmake-build-release - $(MAKE) -C cmake-build-release game_of_life_opencl - -debug: - mkdir -p "cmake-build-debug" - cmake -DCMAKE_BUILD_TYPE=Debug -Bcmake-build-debug - $(MAKE) -C cmake-build-release all - - -debug/multicore: - mkdir -p "cmake-build-debug" - cmake -DCMAKE_BUILD_TYPE=Debug -Bcmake-build-debug - $(MAKE) -C cmake-build-debug game_of_life_multicore - -debug/opencl: - mkdir -p "cmake-build-debug" - cmake -DCMAKE_BUILD_TYPE=Debug -Bcmake-build-debug - $(MAKE) -C cmake-build-debug game_of_life_opencl - -.PHONY: release release/multicore release/opencl debug debug/multicore debug/opencl diff --git a/game_of_life/README.md b/game_of_life/README.md deleted file mode 100644 index d31463c..0000000 --- a/game_of_life/README.md +++ /dev/null @@ -1,12 +0,0 @@ -# Jeu de la vie en Futhark/C - -Le but de ce projet est de créer le jeu de la vie en Futhark + C avec l'affichage du monde dans une fenêtre SDL gérée par Futhark. -La contrainte de cette version est que le monde est représenté dans un tableau en une dimension. - -## Construire le projet - -* Exécuter la commande `futhark pkg sync` -* Exécuter la commande `make` -* Les exécutables sont présents dans le dossier `cmake-build-debug` et/ou `cmake-build-release` - * `./game_of_life_opencl` - * `./game_of_life_multicore` diff --git a/game_of_life/futhark.pkg b/game_of_life/futhark.pkg deleted file mode 100644 index 80bc4b6..0000000 --- a/game_of_life/futhark.pkg +++ /dev/null @@ -1,3 +0,0 @@ -require { - github.com/diku-dk/lys 0.1.12 #34e5ff985fefac9a9627d49e26a19ef5352e7019 -} diff --git a/game_of_life/gol.c b/game_of_life/gol.c deleted file mode 100644 index 90ca1da..0000000 --- a/game_of_life/gol.c +++ /dev/null @@ -1,5273 +0,0 @@ -#ifndef _GNU_SOURCE -#define _GNU_SOURCE -#endif -#ifdef __GNUC__ -#pragma GCC diagnostic ignored "-Wunused-function" -#pragma GCC diagnostic ignored "-Wunused-variable" -#pragma GCC diagnostic ignored "-Wparentheses" -#pragma GCC diagnostic ignored "-Wunused-label" -#pragma GCC diagnostic ignored "-Wunused-but-set-variable" -#endif -#ifdef __clang__ -#pragma clang diagnostic ignored "-Wunused-function" -#pragma clang diagnostic ignored "-Wunused-variable" -#pragma clang diagnostic ignored "-Wparentheses" -#pragma clang diagnostic ignored "-Wunused-label" -#endif -// Headers - -#include <stdint.h> -#include <stddef.h> -#include <stdbool.h> -#include <stdio.h> -#include <float.h> - -#ifdef __cplusplus -extern "C" { -#endif - -// Initialisation - -struct futhark_context_config ; -struct futhark_context_config *futhark_context_config_new(void); -void futhark_context_config_free(struct futhark_context_config *cfg); -void futhark_context_config_set_debugging(struct futhark_context_config *cfg, - int flag); -void futhark_context_config_set_profiling(struct futhark_context_config *cfg, - int flag); -void futhark_context_config_set_logging(struct futhark_context_config *cfg, - int flag); -void futhark_context_config_set_num_threads(struct futhark_context_config *cfg, - int n); -struct futhark_context ; -struct futhark_context *futhark_context_new(struct futhark_context_config *cfg); -void futhark_context_free(struct futhark_context *ctx); -int futhark_context_sync(struct futhark_context *ctx); -int futhark_context_config_set_size(struct futhark_context_config *cfg, const - char *size_name, size_t size_value); -int futhark_get_num_sizes(void); -const char *futhark_get_size_name(int); -const char *futhark_get_size_class(int); - -// Arrays - -struct futhark_i8_1d ; -struct futhark_i8_1d *futhark_new_i8_1d(struct futhark_context *ctx, const - int8_t *data, int64_t dim0); -struct futhark_i8_1d *futhark_new_raw_i8_1d(struct futhark_context *ctx, const - char *data, int offset, - int64_t dim0); -int futhark_free_i8_1d(struct futhark_context *ctx, struct futhark_i8_1d *arr); -int futhark_values_i8_1d(struct futhark_context *ctx, struct futhark_i8_1d *arr, - int8_t *data); -char *futhark_values_raw_i8_1d(struct futhark_context *ctx, - struct futhark_i8_1d *arr); -const int64_t *futhark_shape_i8_1d(struct futhark_context *ctx, - struct futhark_i8_1d *arr); -struct futhark_u32_2d ; -struct futhark_u32_2d *futhark_new_u32_2d(struct futhark_context *ctx, const - uint32_t *data, int64_t dim0, - int64_t dim1); -struct futhark_u32_2d *futhark_new_raw_u32_2d(struct futhark_context *ctx, const - char *data, int offset, - int64_t dim0, int64_t dim1); -int futhark_free_u32_2d(struct futhark_context *ctx, - struct futhark_u32_2d *arr); -int futhark_values_u32_2d(struct futhark_context *ctx, - struct futhark_u32_2d *arr, uint32_t *data); -char *futhark_values_raw_u32_2d(struct futhark_context *ctx, - struct futhark_u32_2d *arr); -const int64_t *futhark_shape_u32_2d(struct futhark_context *ctx, - struct futhark_u32_2d *arr); - -// Opaque values - -struct futhark_opaque_state ; -int futhark_free_opaque_state(struct futhark_context *ctx, - struct futhark_opaque_state *obj); -int futhark_store_opaque_state(struct futhark_context *ctx, const - struct futhark_opaque_state *obj, void **p, - size_t *n); -struct futhark_opaque_state -*futhark_restore_opaque_state(struct futhark_context *ctx, const void *p); - -// Entry points - -int futhark_entry_init(struct futhark_context *ctx, - struct futhark_opaque_state **out0, const - struct futhark_i8_1d *in0, const int64_t in1, const - int64_t in2, const int64_t in3); -int futhark_entry_key(struct futhark_context *ctx, - struct futhark_opaque_state **out0, const int32_t in0, - const int32_t in1, const - struct futhark_opaque_state *in2); -int futhark_entry_mouse(struct futhark_context *ctx, - struct futhark_opaque_state **out0, const int32_t in0, - const int32_t in1, const int32_t in2, const - struct futhark_opaque_state *in3); -int futhark_entry_render(struct futhark_context *ctx, - struct futhark_u32_2d **out0, const - struct futhark_opaque_state *in0); -int futhark_entry_resize(struct futhark_context *ctx, - struct futhark_opaque_state **out0, const int64_t in0, - const int64_t in1, const - struct futhark_opaque_state *in2); -int futhark_entry_step(struct futhark_context *ctx, - struct futhark_opaque_state **out0, const float in0, - const struct futhark_opaque_state *in1); -int futhark_entry_wheel(struct futhark_context *ctx, - struct futhark_opaque_state **out0, const int32_t in0, - const int32_t in1, const - struct futhark_opaque_state *in2); - -// Miscellaneous - -char *futhark_context_report(struct futhark_context *ctx); -char *futhark_context_get_error(struct futhark_context *ctx); -void futhark_context_set_logging_file(struct futhark_context *ctx, FILE *f); -void futhark_context_pause_profiling(struct futhark_context *ctx); -void futhark_context_unpause_profiling(struct futhark_context *ctx); -int futhark_context_clear_caches(struct futhark_context *ctx); -#define FUTHARK_BACKEND_multicore -#ifdef __cplusplus -} -#endif -#include <stdio.h> -#include <stdlib.h> -#include <stdbool.h> -#include <math.h> -#include <stdint.h> -#undef NDEBUG -#include <assert.h> -#include <stdarg.h> -// Start of util.h. -// -// Various helper functions that are useful in all generated C code. - -#include <errno.h> -#include <string.h> - -static const char *fut_progname = "(embedded Futhark)"; - -static void futhark_panic(int eval, const char *fmt, ...) { - va_list ap; - va_start(ap, fmt); - fprintf(stderr, "%s: ", fut_progname); - vfprintf(stderr, fmt, ap); - va_end(ap); - exit(eval); -} - -// For generating arbitrary-sized error messages. It is the callers -// responsibility to free the buffer at some point. -static char* msgprintf(const char *s, ...) { - va_list vl; - va_start(vl, s); - size_t needed = 1 + (size_t)vsnprintf(NULL, 0, s, vl); - char *buffer = (char*) malloc(needed); - va_start(vl, s); // Must re-init. - vsnprintf(buffer, needed, s, vl); - return buffer; -} - - -static inline void check_err(int errval, int sets_errno, const char *fun, int line, - const char *msg, ...) { - if (errval) { - char errnum[10]; - - va_list vl; - va_start(vl, msg); - - fprintf(stderr, "ERROR: "); - vfprintf(stderr, msg, vl); - fprintf(stderr, " in %s() at line %d with error code %s\n", - fun, line, - sets_errno ? strerror(errno) : errnum); - exit(errval); - } -} - -#define CHECK_ERR(err, msg...) check_err(err, 0, __func__, __LINE__, msg) -#define CHECK_ERRNO(err, msg...) check_err(err, 1, __func__, __LINE__, msg) - -// Read the rest of an open file into a NUL-terminated string; returns -// NULL on error. -static void* fslurp_file(FILE *f, size_t *size) { - size_t start = ftell(f); - fseek(f, 0, SEEK_END); - size_t src_size = ftell(f)-start; - fseek(f, start, SEEK_SET); - unsigned char *s = (unsigned char*) malloc(src_size + 1); - if (fread(s, 1, src_size, f) != src_size) { - free(s); - s = NULL; - } else { - s[src_size] = '\0'; - } - - if (size) { - *size = src_size; - } - - return s; -} - -// Read a file into a NUL-terminated string; returns NULL on error. -static void* slurp_file(const char *filename, size_t *size) { - FILE *f = fopen(filename, "rb"); // To avoid Windows messing with linebreaks. - if (f == NULL) return NULL; - unsigned char *s = fslurp_file(f, size); - fclose(f); - return s; -} - -// Dump 'n' bytes from 'buf' into the file at the designated location. -// Returns 0 on success. -static int dump_file(const char *file, const void *buf, size_t n) { - FILE *f = fopen(file, "w"); - - if (f == NULL) { - return 1; - } - - if (fwrite(buf, sizeof(char), n, f) != n) { - return 1; - } - - if (fclose(f) != 0) { - return 1; - } - - return 0; -} - -struct str_builder { - char *str; - size_t capacity; // Size of buffer. - size_t used; // Bytes used, *not* including final zero. -}; - -static void str_builder_init(struct str_builder *b) { - b->capacity = 10; - b->used = 0; - b->str = malloc(b->capacity); - b->str[0] = 0; -} - -static void str_builder(struct str_builder *b, const char *s, ...) { - va_list vl; - va_start(vl, s); - size_t needed = (size_t)vsnprintf(NULL, 0, s, vl); - - while (b->capacity < b->used + needed + 1) { - b->capacity *= 2; - b->str = realloc(b->str, b->capacity); - } - - va_start(vl, s); // Must re-init. - vsnprintf(b->str+b->used, b->capacity-b->used, s, vl); - b->used += needed; -} - -// End of util.h. - -// Start of timing.h. - -// The function get_wall_time() returns the wall time in microseconds -// (with an unspecified offset). - -#ifdef _WIN32 - -#include <windows.h> - -static int64_t get_wall_time(void) { - LARGE_INTEGER time,freq; - assert(QueryPerformanceFrequency(&freq)); - assert(QueryPerformanceCounter(&time)); - return ((double)time.QuadPart / freq.QuadPart) * 1000000; -} - -#else -// Assuming POSIX - -#include <time.h> -#include <sys/time.h> - -static int64_t get_wall_time(void) { - struct timeval time; - assert(gettimeofday(&time,NULL) == 0); - return time.tv_sec * 1000000 + time.tv_usec; -} - -static int64_t get_wall_time_ns(void) { - struct timespec time; - assert(clock_gettime(CLOCK_REALTIME, &time) == 0); - return time.tv_sec * 1000000000 + time.tv_nsec; -} - -#endif - -// End of timing.h. - -#ifdef _MSC_VER -#define inline __inline -#endif -#include <string.h> -#include <string.h> -#include <errno.h> -#include <assert.h> -#include <ctype.h> - -// Start of lock.h. - -// A very simple cross-platform implementation of locks. Uses -// pthreads on Unix and some Windows thing there. Futhark's -// host-level code is not multithreaded, but user code may be, so we -// need some mechanism for ensuring atomic access to API functions. -// This is that mechanism. It is not exposed to user code at all, so -// we do not have to worry about name collisions. - -#ifdef _WIN32 - -typedef HANDLE lock_t; - -static void create_lock(lock_t *lock) { - *lock = CreateMutex(NULL, // Default security attributes. - FALSE, // Initially unlocked. - NULL); // Unnamed. -} - -static void lock_lock(lock_t *lock) { - assert(WaitForSingleObject(*lock, INFINITE) == WAIT_OBJECT_0); -} - -static void lock_unlock(lock_t *lock) { - assert(ReleaseMutex(*lock)); -} - -static void free_lock(lock_t *lock) { - CloseHandle(*lock); -} - -#else -// Assuming POSIX - -#include <pthread.h> - -typedef pthread_mutex_t lock_t; - -static void create_lock(lock_t *lock) { - int r = pthread_mutex_init(lock, NULL); - assert(r == 0); -} - -static void lock_lock(lock_t *lock) { - int r = pthread_mutex_lock(lock); - assert(r == 0); -} - -static void lock_unlock(lock_t *lock) { - int r = pthread_mutex_unlock(lock); - assert(r == 0); -} - -static void free_lock(lock_t *lock) { - // Nothing to do for pthreads. - (void)lock; -} - -#endif - -// End of lock.h. - -static inline uint8_t add8(uint8_t x, uint8_t y) -{ - return x + y; -} -static inline uint16_t add16(uint16_t x, uint16_t y) -{ - return x + y; -} -static inline uint32_t add32(uint32_t x, uint32_t y) -{ - return x + y; -} -static inline uint64_t add64(uint64_t x, uint64_t y) -{ - return x + y; -} -static inline uint8_t sub8(uint8_t x, uint8_t y) -{ - return x - y; -} -static inline uint16_t sub16(uint16_t x, uint16_t y) -{ - return x - y; -} -static inline uint32_t sub32(uint32_t x, uint32_t y) -{ - return x - y; -} -static inline uint64_t sub64(uint64_t x, uint64_t y) -{ - return x - y; -} -static inline uint8_t mul8(uint8_t x, uint8_t y) -{ - return x * y; -} -static inline uint16_t mul16(uint16_t x, uint16_t y) -{ - return x * y; -} -static inline uint32_t mul32(uint32_t x, uint32_t y) -{ - return x * y; -} -static inline uint64_t mul64(uint64_t x, uint64_t y) -{ - return x * y; -} -static inline uint8_t udiv8(uint8_t x, uint8_t y) -{ - return x / y; -} -static inline uint16_t udiv16(uint16_t x, uint16_t y) -{ - return x / y; -} -static inline uint32_t udiv32(uint32_t x, uint32_t y) -{ - return x / y; -} -static inline uint64_t udiv64(uint64_t x, uint64_t y) -{ - return x / y; -} -static inline uint8_t udiv_up8(uint8_t x, uint8_t y) -{ - return (x + y - 1) / y; -} -static inline uint16_t udiv_up16(uint16_t x, uint16_t y) -{ - return (x + y - 1) / y; -} -static inline uint32_t udiv_up32(uint32_t x, uint32_t y) -{ - return (x + y - 1) / y; -} -static inline uint64_t udiv_up64(uint64_t x, uint64_t y) -{ - return (x + y - 1) / y; -} -static inline uint8_t umod8(uint8_t x, uint8_t y) -{ - return x % y; -} -static inline uint16_t umod16(uint16_t x, uint16_t y) -{ - return x % y; -} -static inline uint32_t umod32(uint32_t x, uint32_t y) -{ - return x % y; -} -static inline uint64_t umod64(uint64_t x, uint64_t y) -{ - return x % y; -} -static inline uint8_t udiv_safe8(uint8_t x, uint8_t y) -{ - return y == 0 ? 0 : x / y; -} -static inline uint16_t udiv_safe16(uint16_t x, uint16_t y) -{ - return y == 0 ? 0 : x / y; -} -static inline uint32_t udiv_safe32(uint32_t x, uint32_t y) -{ - return y == 0 ? 0 : x / y; -} -static inline uint64_t udiv_safe64(uint64_t x, uint64_t y) -{ - return y == 0 ? 0 : x / y; -} -static inline uint8_t udiv_up_safe8(uint8_t x, uint8_t y) -{ - return y == 0 ? 0 : (x + y - 1) / y; -} -static inline uint16_t udiv_up_safe16(uint16_t x, uint16_t y) -{ - return y == 0 ? 0 : (x + y - 1) / y; -} -static inline uint32_t udiv_up_safe32(uint32_t x, uint32_t y) -{ - return y == 0 ? 0 : (x + y - 1) / y; -} -static inline uint64_t udiv_up_safe64(uint64_t x, uint64_t y) -{ - return y == 0 ? 0 : (x + y - 1) / y; -} -static inline uint8_t umod_safe8(uint8_t x, uint8_t y) -{ - return y == 0 ? 0 : x % y; -} -static inline uint16_t umod_safe16(uint16_t x, uint16_t y) -{ - return y == 0 ? 0 : x % y; -} -static inline uint32_t umod_safe32(uint32_t x, uint32_t y) -{ - return y == 0 ? 0 : x % y; -} -static inline uint64_t umod_safe64(uint64_t x, uint64_t y) -{ - return y == 0 ? 0 : x % y; -} -static inline int8_t sdiv8(int8_t x, int8_t y) -{ - int8_t q = x / y; - int8_t r = x % y; - - return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); -} -static inline int16_t sdiv16(int16_t x, int16_t y) -{ - int16_t q = x / y; - int16_t r = x % y; - - return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); -} -static inline int32_t sdiv32(int32_t x, int32_t y) -{ - int32_t q = x / y; - int32_t r = x % y; - - return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); -} -static inline int64_t sdiv64(int64_t x, int64_t y) -{ - int64_t q = x / y; - int64_t r = x % y; - - return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); -} -static inline int8_t sdiv_up8(int8_t x, int8_t y) -{ - return sdiv8(x + y - 1, y); -} -static inline int16_t sdiv_up16(int16_t x, int16_t y) -{ - return sdiv16(x + y - 1, y); -} -static inline int32_t sdiv_up32(int32_t x, int32_t y) -{ - return sdiv32(x + y - 1, y); -} -static inline int64_t sdiv_up64(int64_t x, int64_t y) -{ - return sdiv64(x + y - 1, y); -} -static inline int8_t smod8(int8_t x, int8_t y) -{ - int8_t r = x % y; - - return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); -} -static inline int16_t smod16(int16_t x, int16_t y) -{ - int16_t r = x % y; - - return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); -} -static inline int32_t smod32(int32_t x, int32_t y) -{ - int32_t r = x % y; - - return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); -} -static inline int64_t smod64(int64_t x, int64_t y) -{ - int64_t r = x % y; - - return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); -} -static inline int8_t sdiv_safe8(int8_t x, int8_t y) -{ - return y == 0 ? 0 : sdiv8(x, y); -} -static inline int16_t sdiv_safe16(int16_t x, int16_t y) -{ - return y == 0 ? 0 : sdiv16(x, y); -} -static inline int32_t sdiv_safe32(int32_t x, int32_t y) -{ - return y == 0 ? 0 : sdiv32(x, y); -} -static inline int64_t sdiv_safe64(int64_t x, int64_t y) -{ - return y == 0 ? 0 : sdiv64(x, y); -} -static inline int8_t sdiv_up_safe8(int8_t x, int8_t y) -{ - return sdiv_safe8(x + y - 1, y); -} -static inline int16_t sdiv_up_safe16(int16_t x, int16_t y) -{ - return sdiv_safe16(x + y - 1, y); -} -static inline int32_t sdiv_up_safe32(int32_t x, int32_t y) -{ - return sdiv_safe32(x + y - 1, y); -} -static inline int64_t sdiv_up_safe64(int64_t x, int64_t y) -{ - return sdiv_safe64(x + y - 1, y); -} -static inline int8_t smod_safe8(int8_t x, int8_t y) -{ - return y == 0 ? 0 : smod8(x, y); -} -static inline int16_t smod_safe16(int16_t x, int16_t y) -{ - return y == 0 ? 0 : smod16(x, y); -} -static inline int32_t smod_safe32(int32_t x, int32_t y) -{ - return y == 0 ? 0 : smod32(x, y); -} -static inline int64_t smod_safe64(int64_t x, int64_t y) -{ - return y == 0 ? 0 : smod64(x, y); -} -static inline int8_t squot8(int8_t x, int8_t y) -{ - return x / y; -} -static inline int16_t squot16(int16_t x, int16_t y) -{ - return x / y; -} -static inline int32_t squot32(int32_t x, int32_t y) -{ - return x / y; -} -static inline int64_t squot64(int64_t x, int64_t y) -{ - return x / y; -} -static inline int8_t srem8(int8_t x, int8_t y) -{ - return x % y; -} -static inline int16_t srem16(int16_t x, int16_t y) -{ - return x % y; -} -static inline int32_t srem32(int32_t x, int32_t y) -{ - return x % y; -} -static inline int64_t srem64(int64_t x, int64_t y) -{ - return x % y; -} -static inline int8_t squot_safe8(int8_t x, int8_t y) -{ - return y == 0 ? 0 : x / y; -} -static inline int16_t squot_safe16(int16_t x, int16_t y) -{ - return y == 0 ? 0 : x / y; -} -static inline int32_t squot_safe32(int32_t x, int32_t y) -{ - return y == 0 ? 0 : x / y; -} -static inline int64_t squot_safe64(int64_t x, int64_t y) -{ - return y == 0 ? 0 : x / y; -} -static inline int8_t srem_safe8(int8_t x, int8_t y) -{ - return y == 0 ? 0 : x % y; -} -static inline int16_t srem_safe16(int16_t x, int16_t y) -{ - return y == 0 ? 0 : x % y; -} -static inline int32_t srem_safe32(int32_t x, int32_t y) -{ - return y == 0 ? 0 : x % y; -} -static inline int64_t srem_safe64(int64_t x, int64_t y) -{ - return y == 0 ? 0 : x % y; -} -static inline int8_t smin8(int8_t x, int8_t y) -{ - return x < y ? x : y; -} -static inline int16_t smin16(int16_t x, int16_t y) -{ - return x < y ? x : y; -} -static inline int32_t smin32(int32_t x, int32_t y) -{ - return x < y ? x : y; -} -static inline int64_t smin64(int64_t x, int64_t y) -{ - return x < y ? x : y; -} -static inline uint8_t umin8(uint8_t x, uint8_t y) -{ - return x < y ? x : y; -} -static inline uint16_t umin16(uint16_t x, uint16_t y) -{ - return x < y ? x : y; -} -static inline uint32_t umin32(uint32_t x, uint32_t y) -{ - return x < y ? x : y; -} -static inline uint64_t umin64(uint64_t x, uint64_t y) -{ - return x < y ? x : y; -} -static inline int8_t smax8(int8_t x, int8_t y) -{ - return x < y ? y : x; -} -static inline int16_t smax16(int16_t x, int16_t y) -{ - return x < y ? y : x; -} -static inline int32_t smax32(int32_t x, int32_t y) -{ - return x < y ? y : x; -} -static inline int64_t smax64(int64_t x, int64_t y) -{ - return x < y ? y : x; -} -static inline uint8_t umax8(uint8_t x, uint8_t y) -{ - return x < y ? y : x; -} -static inline uint16_t umax16(uint16_t x, uint16_t y) -{ - return x < y ? y : x; -} -static inline uint32_t umax32(uint32_t x, uint32_t y) -{ - return x < y ? y : x; -} -static inline uint64_t umax64(uint64_t x, uint64_t y) -{ - return x < y ? y : x; -} -static inline uint8_t shl8(uint8_t x, uint8_t y) -{ - return x << y; -} -static inline uint16_t shl16(uint16_t x, uint16_t y) -{ - return x << y; -} -static inline uint32_t shl32(uint32_t x, uint32_t y) -{ - return x << y; -} -static inline uint64_t shl64(uint64_t x, uint64_t y) -{ - return x << y; -} -static inline uint8_t lshr8(uint8_t x, uint8_t y) -{ - return x >> y; -} -static inline uint16_t lshr16(uint16_t x, uint16_t y) -{ - return x >> y; -} -static inline uint32_t lshr32(uint32_t x, uint32_t y) -{ - return x >> y; -} -static inline uint64_t lshr64(uint64_t x, uint64_t y) -{ - return x >> y; -} -static inline int8_t ashr8(int8_t x, int8_t y) -{ - return x >> y; -} -static inline int16_t ashr16(int16_t x, int16_t y) -{ - return x >> y; -} -static inline int32_t ashr32(int32_t x, int32_t y) -{ - return x >> y; -} -static inline int64_t ashr64(int64_t x, int64_t y) -{ - return x >> y; -} -static inline uint8_t and8(uint8_t x, uint8_t y) -{ - return x & y; -} -static inline uint16_t and16(uint16_t x, uint16_t y) -{ - return x & y; -} -static inline uint32_t and32(uint32_t x, uint32_t y) -{ - return x & y; -} -static inline uint64_t and64(uint64_t x, uint64_t y) -{ - return x & y; -} -static inline uint8_t or8(uint8_t x, uint8_t y) -{ - return x | y; -} -static inline uint16_t or16(uint16_t x, uint16_t y) -{ - return x | y; -} -static inline uint32_t or32(uint32_t x, uint32_t y) -{ - return x | y; -} -static inline uint64_t or64(uint64_t x, uint64_t y) -{ - return x | y; -} -static inline uint8_t xor8(uint8_t x, uint8_t y) -{ - return x ^ y; -} -static inline uint16_t xor16(uint16_t x, uint16_t y) -{ - return x ^ y; -} -static inline uint32_t xor32(uint32_t x, uint32_t y) -{ - return x ^ y; -} -static inline uint64_t xor64(uint64_t x, uint64_t y) -{ - return x ^ y; -} -static inline bool ult8(uint8_t x, uint8_t y) -{ - return x < y; -} -static inline bool ult16(uint16_t x, uint16_t y) -{ - return x < y; -} -static inline bool ult32(uint32_t x, uint32_t y) -{ - return x < y; -} -static inline bool ult64(uint64_t x, uint64_t y) -{ - return x < y; -} -static inline bool ule8(uint8_t x, uint8_t y) -{ - return x <= y; -} -static inline bool ule16(uint16_t x, uint16_t y) -{ - return x <= y; -} -static inline bool ule32(uint32_t x, uint32_t y) -{ - return x <= y; -} -static inline bool ule64(uint64_t x, uint64_t y) -{ - return x <= y; -} -static inline bool slt8(int8_t x, int8_t y) -{ - return x < y; -} -static inline bool slt16(int16_t x, int16_t y) -{ - return x < y; -} -static inline bool slt32(int32_t x, int32_t y) -{ - return x < y; -} -static inline bool slt64(int64_t x, int64_t y) -{ - return x < y; -} -static inline bool sle8(int8_t x, int8_t y) -{ - return x <= y; -} -static inline bool sle16(int16_t x, int16_t y) -{ - return x <= y; -} -static inline bool sle32(int32_t x, int32_t y) -{ - return x <= y; -} -static inline bool sle64(int64_t x, int64_t y) -{ - return x <= y; -} -static inline int8_t pow8(int8_t x, int8_t y) -{ - int8_t res = 1, rem = y; - - while (rem != 0) { - if (rem & 1) - res *= x; - rem >>= 1; - x *= x; - } - return res; -} -static inline int16_t pow16(int16_t x, int16_t y) -{ - int16_t res = 1, rem = y; - - while (rem != 0) { - if (rem & 1) - res *= x; - rem >>= 1; - x *= x; - } - return res; -} -static inline int32_t pow32(int32_t x, int32_t y) -{ - int32_t res = 1, rem = y; - - while (rem != 0) { - if (rem & 1) - res *= x; - rem >>= 1; - x *= x; - } - return res; -} -static inline int64_t pow64(int64_t x, int64_t y) -{ - int64_t res = 1, rem = y; - - while (rem != 0) { - if (rem & 1) - res *= x; - rem >>= 1; - x *= x; - } - return res; -} -static inline bool itob_i8_bool(int8_t x) -{ - return x; -} -static inline bool itob_i16_bool(int16_t x) -{ - return x; -} -static inline bool itob_i32_bool(int32_t x) -{ - return x; -} -static inline bool itob_i64_bool(int64_t x) -{ - return x; -} -static inline int8_t btoi_bool_i8(bool x) -{ - return x; -} -static inline int16_t btoi_bool_i16(bool x) -{ - return x; -} -static inline int32_t btoi_bool_i32(bool x) -{ - return x; -} -static inline int64_t btoi_bool_i64(bool x) -{ - return x; -} -#define sext_i8_i8(x) ((int8_t) (int8_t) x) -#define sext_i8_i16(x) ((int16_t) (int8_t) x) -#define sext_i8_i32(x) ((int32_t) (int8_t) x) -#define sext_i8_i64(x) ((int64_t) (int8_t) x) -#define sext_i16_i8(x) ((int8_t) (int16_t) x) -#define sext_i16_i16(x) ((int16_t) (int16_t) x) -#define sext_i16_i32(x) ((int32_t) (int16_t) x) -#define sext_i16_i64(x) ((int64_t) (int16_t) x) -#define sext_i32_i8(x) ((int8_t) (int32_t) x) -#define sext_i32_i16(x) ((int16_t) (int32_t) x) -#define sext_i32_i32(x) ((int32_t) (int32_t) x) -#define sext_i32_i64(x) ((int64_t) (int32_t) x) -#define sext_i64_i8(x) ((int8_t) (int64_t) x) -#define sext_i64_i16(x) ((int16_t) (int64_t) x) -#define sext_i64_i32(x) ((int32_t) (int64_t) x) -#define sext_i64_i64(x) ((int64_t) (int64_t) x) -#define zext_i8_i8(x) ((int8_t) (uint8_t) x) -#define zext_i8_i16(x) ((int16_t) (uint8_t) x) -#define zext_i8_i32(x) ((int32_t) (uint8_t) x) -#define zext_i8_i64(x) ((int64_t) (uint8_t) x) -#define zext_i16_i8(x) ((int8_t) (uint16_t) x) -#define zext_i16_i16(x) ((int16_t) (uint16_t) x) -#define zext_i16_i32(x) ((int32_t) (uint16_t) x) -#define zext_i16_i64(x) ((int64_t) (uint16_t) x) -#define zext_i32_i8(x) ((int8_t) (uint32_t) x) -#define zext_i32_i16(x) ((int16_t) (uint32_t) x) -#define zext_i32_i32(x) ((int32_t) (uint32_t) x) -#define zext_i32_i64(x) ((int64_t) (uint32_t) x) -#define zext_i64_i8(x) ((int8_t) (uint64_t) x) -#define zext_i64_i16(x) ((int16_t) (uint64_t) x) -#define zext_i64_i32(x) ((int32_t) (uint64_t) x) -#define zext_i64_i64(x) ((int64_t) (uint64_t) x) -#if defined(__OPENCL_VERSION__) -static int32_t futrts_popc8(int8_t x) -{ - return popcount(x); -} -static int32_t futrts_popc16(int16_t x) -{ - return popcount(x); -} -static int32_t futrts_popc32(int32_t x) -{ - return popcount(x); -} -static int32_t futrts_popc64(int64_t x) -{ - return popcount(x); -} -#elif defined(__CUDA_ARCH__) -static int32_t futrts_popc8(int8_t x) -{ - return __popc(zext_i8_i32(x)); -} -static int32_t futrts_popc16(int16_t x) -{ - return __popc(zext_i16_i32(x)); -} -static int32_t futrts_popc32(int32_t x) -{ - return __popc(x); -} -static int32_t futrts_popc64(int64_t x) -{ - return __popcll(x); -} -#else -static int32_t futrts_popc8(int8_t x) -{ - int c = 0; - - for (; x; ++c) - x &= x - 1; - return c; -} -static int32_t futrts_popc16(int16_t x) -{ - int c = 0; - - for (; x; ++c) - x &= x - 1; - return c; -} -static int32_t futrts_popc32(int32_t x) -{ - int c = 0; - - for (; x; ++c) - x &= x - 1; - return c; -} -static int32_t futrts_popc64(int64_t x) -{ - int c = 0; - - for (; x; ++c) - x &= x - 1; - return c; -} -#endif -#if defined(__OPENCL_VERSION__) -static uint8_t futrts_mul_hi8(uint8_t a, uint8_t b) -{ - return mul_hi(a, b); -} -static uint16_t futrts_mul_hi16(uint16_t a, uint16_t b) -{ - return mul_hi(a, b); -} -static uint32_t futrts_mul_hi32(uint32_t a, uint32_t b) -{ - return mul_hi(a, b); -} -static uint64_t futrts_mul_hi64(uint64_t a, uint64_t b) -{ - return mul_hi(a, b); -} -#elif defined(__CUDA_ARCH__) -static uint8_t futrts_mul_hi8(uint8_t a, uint8_t b) -{ - uint16_t aa = a; - uint16_t bb = b; - - return aa * bb >> 8; -} -static uint16_t futrts_mul_hi16(uint16_t a, uint16_t b) -{ - uint32_t aa = a; - uint32_t bb = b; - - return aa * bb >> 16; -} -static uint32_t futrts_mul_hi32(uint32_t a, uint32_t b) -{ - return mulhi(a, b); -} -static uint64_t futrts_mul_hi64(uint64_t a, uint64_t b) -{ - return mul64hi(a, b); -} -#else -static uint8_t futrts_mul_hi8(uint8_t a, uint8_t b) -{ - uint16_t aa = a; - uint16_t bb = b; - - return aa * bb >> 8; -} -static uint16_t futrts_mul_hi16(uint16_t a, uint16_t b) -{ - uint32_t aa = a; - uint32_t bb = b; - - return aa * bb >> 16; -} -static uint32_t futrts_mul_hi32(uint32_t a, uint32_t b) -{ - uint64_t aa = a; - uint64_t bb = b; - - return aa * bb >> 32; -} -static uint64_t futrts_mul_hi64(uint64_t a, uint64_t b) -{ - __uint128_t aa = a; - __uint128_t bb = b; - - return aa * bb >> 64; -} -#endif -#if defined(__OPENCL_VERSION__) -static uint8_t futrts_mad_hi8(uint8_t a, uint8_t b, uint8_t c) -{ - return mad_hi(a, b, c); -} -static uint16_t futrts_mad_hi16(uint16_t a, uint16_t b, uint16_t c) -{ - return mad_hi(a, b, c); -} -static uint32_t futrts_mad_hi32(uint32_t a, uint32_t b, uint32_t c) -{ - return mad_hi(a, b, c); -} -static uint64_t futrts_mad_hi64(uint64_t a, uint64_t b, uint64_t c) -{ - return mad_hi(a, b, c); -} -#else -static uint8_t futrts_mad_hi8(uint8_t a, uint8_t b, uint8_t c) -{ - return futrts_mul_hi8(a, b) + c; -} -static uint16_t futrts_mad_hi16(uint16_t a, uint16_t b, uint16_t c) -{ - return futrts_mul_hi16(a, b) + c; -} -static uint32_t futrts_mad_hi32(uint32_t a, uint32_t b, uint32_t c) -{ - return futrts_mul_hi32(a, b) + c; -} -static uint64_t futrts_mad_hi64(uint64_t a, uint64_t b, uint64_t c) -{ - return futrts_mul_hi64(a, b) + c; -} -#endif -#if defined(__OPENCL_VERSION__) -static int32_t futrts_clzz8(int8_t x) -{ - return clz(x); -} -static int32_t futrts_clzz16(int16_t x) -{ - return clz(x); -} -static int32_t futrts_clzz32(int32_t x) -{ - return clz(x); -} -static int32_t futrts_clzz64(int64_t x) -{ - return clz(x); -} -#elif defined(__CUDA_ARCH__) -static int32_t futrts_clzz8(int8_t x) -{ - return __clz(zext_i8_i32(x)) - 24; -} -static int32_t futrts_clzz16(int16_t x) -{ - return __clz(zext_i16_i32(x)) - 16; -} -static int32_t futrts_clzz32(int32_t x) -{ - return __clz(x); -} -static int32_t futrts_clzz64(int64_t x) -{ - return __clzll(x); -} -#else -static int32_t futrts_clzz8(int8_t x) -{ - int n = 0; - int bits = sizeof(x) * 8; - - for (int i = 0; i < bits; i++) { - if (x < 0) - break; - n++; - x <<= 1; - } - return n; -} -static int32_t futrts_clzz16(int16_t x) -{ - int n = 0; - int bits = sizeof(x) * 8; - - for (int i = 0; i < bits; i++) { - if (x < 0) - break; - n++; - x <<= 1; - } - return n; -} -static int32_t futrts_clzz32(int32_t x) -{ - int n = 0; - int bits = sizeof(x) * 8; - - for (int i = 0; i < bits; i++) { - if (x < 0) - break; - n++; - x <<= 1; - } - return n; -} -static int32_t futrts_clzz64(int64_t x) -{ - int n = 0; - int bits = sizeof(x) * 8; - - for (int i = 0; i < bits; i++) { - if (x < 0) - break; - n++; - x <<= 1; - } - return n; -} -#endif -#if defined(__OPENCL_VERSION__) -static int32_t futrts_ctzz8(int8_t x) -{ - int i = 0; - - for (; i < 8 && (x & 1) == 0; i++, x >>= 1) - ; - return i; -} -static int32_t futrts_ctzz16(int16_t x) -{ - int i = 0; - - for (; i < 16 && (x & 1) == 0; i++, x >>= 1) - ; - return i; -} -static int32_t futrts_ctzz32(int32_t x) -{ - int i = 0; - - for (; i < 32 && (x & 1) == 0; i++, x >>= 1) - ; - return i; -} -static int32_t futrts_ctzz64(int64_t x) -{ - int i = 0; - - for (; i < 64 && (x & 1) == 0; i++, x >>= 1) - ; - return i; -} -#elif defined(__CUDA_ARCH__) -static int32_t futrts_ctzz8(int8_t x) -{ - int y = __ffs(x); - - return y == 0 ? 8 : y - 1; -} -static int32_t futrts_ctzz16(int16_t x) -{ - int y = __ffs(x); - - return y == 0 ? 16 : y - 1; -} -static int32_t futrts_ctzz32(int32_t x) -{ - int y = __ffs(x); - - return y == 0 ? 32 : y - 1; -} -static int32_t futrts_ctzz64(int64_t x) -{ - int y = __ffsll(x); - - return y == 0 ? 64 : y - 1; -} -#else -static int32_t futrts_ctzz8(int8_t x) -{ - return x == 0 ? 8 : __builtin_ctz((uint32_t) x); -} -static int32_t futrts_ctzz16(int16_t x) -{ - return x == 0 ? 16 : __builtin_ctz((uint32_t) x); -} -static int32_t futrts_ctzz32(int32_t x) -{ - return x == 0 ? 32 : __builtin_ctz(x); -} -static int32_t futrts_ctzz64(int64_t x) -{ - return x == 0 ? 64 : __builtin_ctzll(x); -} -#endif -static inline float fdiv32(float x, float y) -{ - return x / y; -} -static inline float fadd32(float x, float y) -{ - return x + y; -} -static inline float fsub32(float x, float y) -{ - return x - y; -} -static inline float fmul32(float x, float y) -{ - return x * y; -} -static inline float fmin32(float x, float y) -{ - return fmin(x, y); -} -static inline float fmax32(float x, float y) -{ - return fmax(x, y); -} -static inline float fpow32(float x, float y) -{ - return pow(x, y); -} -static inline bool cmplt32(float x, float y) -{ - return x < y; -} -static inline bool cmple32(float x, float y) -{ - return x <= y; -} -static inline float sitofp_i8_f32(int8_t x) -{ - return (float) x; -} -static inline float sitofp_i16_f32(int16_t x) -{ - return (float) x; -} -static inline float sitofp_i32_f32(int32_t x) -{ - return (float) x; -} -static inline float sitofp_i64_f32(int64_t x) -{ - return (float) x; -} -static inline float uitofp_i8_f32(uint8_t x) -{ - return (float) x; -} -static inline float uitofp_i16_f32(uint16_t x) -{ - return (float) x; -} -static inline float uitofp_i32_f32(uint32_t x) -{ - return (float) x; -} -static inline float uitofp_i64_f32(uint64_t x) -{ - return (float) x; -} -static inline int8_t fptosi_f32_i8(float x) -{ - return (int8_t) x; -} -static inline int16_t fptosi_f32_i16(float x) -{ - return (int16_t) x; -} -static inline int32_t fptosi_f32_i32(float x) -{ - return (int32_t) x; -} -static inline int64_t fptosi_f32_i64(float x) -{ - return (int64_t) x; -} -static inline uint8_t fptoui_f32_i8(float x) -{ - return (uint8_t) x; -} -static inline uint16_t fptoui_f32_i16(float x) -{ - return (uint16_t) x; -} -static inline uint32_t fptoui_f32_i32(float x) -{ - return (uint32_t) x; -} -static inline uint64_t fptoui_f32_i64(float x) -{ - return (uint64_t) x; -} -static inline double fdiv64(double x, double y) -{ - return x / y; -} -static inline double fadd64(double x, double y) -{ - return x + y; -} -static inline double fsub64(double x, double y) -{ - return x - y; -} -static inline double fmul64(double x, double y) -{ - return x * y; -} -static inline double fmin64(double x, double y) -{ - return fmin(x, y); -} -static inline double fmax64(double x, double y) -{ - return fmax(x, y); -} -static inline double fpow64(double x, double y) -{ - return pow(x, y); -} -static inline bool cmplt64(double x, double y) -{ - return x < y; -} -static inline bool cmple64(double x, double y) -{ - return x <= y; -} -static inline double sitofp_i8_f64(int8_t x) -{ - return (double) x; -} -static inline double sitofp_i16_f64(int16_t x) -{ - return (double) x; -} -static inline double sitofp_i32_f64(int32_t x) -{ - return (double) x; -} -static inline double sitofp_i64_f64(int64_t x) -{ - return (double) x; -} -static inline double uitofp_i8_f64(uint8_t x) -{ - return (double) x; -} -static inline double uitofp_i16_f64(uint16_t x) -{ - return (double) x; -} -static inline double uitofp_i32_f64(uint32_t x) -{ - return (double) x; -} -static inline double uitofp_i64_f64(uint64_t x) -{ - return (double) x; -} -static inline int8_t fptosi_f64_i8(double x) -{ - return (int8_t) x; -} -static inline int16_t fptosi_f64_i16(double x) -{ - return (int16_t) x; -} -static inline int32_t fptosi_f64_i32(double x) -{ - return (int32_t) x; -} -static inline int64_t fptosi_f64_i64(double x) -{ - return (int64_t) x; -} -static inline uint8_t fptoui_f64_i8(double x) -{ - return (uint8_t) x; -} -static inline uint16_t fptoui_f64_i16(double x) -{ - return (uint16_t) x; -} -static inline uint32_t fptoui_f64_i32(double x) -{ - return (uint32_t) x; -} -static inline uint64_t fptoui_f64_i64(double x) -{ - return (uint64_t) x; -} -static inline float fpconv_f32_f32(float x) -{ - return (float) x; -} -static inline double fpconv_f32_f64(float x) -{ - return (double) x; -} -static inline float fpconv_f64_f32(double x) -{ - return (float) x; -} -static inline double fpconv_f64_f64(double x) -{ - return (double) x; -} -static inline bool futrts_isnan32(float x) -{ - return isnan(x); -} -static inline bool futrts_isinf32(float x) -{ - return isinf(x); -} -#ifdef __OPENCL_VERSION__ -static inline float futrts_log32(float x) -{ - return log(x); -} -static inline float futrts_log2_32(float x) -{ - return log2(x); -} -static inline float futrts_log10_32(float x) -{ - return log10(x); -} -static inline float futrts_sqrt32(float x) -{ - return sqrt(x); -} -static inline float futrts_exp32(float x) -{ - return exp(x); -} -static inline float futrts_cos32(float x) -{ - return cos(x); -} -static inline float futrts_sin32(float x) -{ - return sin(x); -} -static inline float futrts_tan32(float x) -{ - return tan(x); -} -static inline float futrts_acos32(float x) -{ - return acos(x); -} -static inline float futrts_asin32(float x) -{ - return asin(x); -} -static inline float futrts_atan32(float x) -{ - return atan(x); -} -static inline float futrts_cosh32(float x) -{ - return cosh(x); -} -static inline float futrts_sinh32(float x) -{ - return sinh(x); -} -static inline float futrts_tanh32(float x) -{ - return tanh(x); -} -static inline float futrts_acosh32(float x) -{ - return acosh(x); -} -static inline float futrts_asinh32(float x) -{ - return asinh(x); -} -static inline float futrts_atanh32(float x) -{ - return atanh(x); -} -static inline float futrts_atan2_32(float x, float y) -{ - return atan2(x, y); -} -static inline float futrts_gamma32(float x) -{ - return tgamma(x); -} -static inline float futrts_lgamma32(float x) -{ - return lgamma(x); -} -static inline float fmod32(float x, float y) -{ - return fmod(x, y); -} -static inline float futrts_round32(float x) -{ - return rint(x); -} -static inline float futrts_floor32(float x) -{ - return floor(x); -} -static inline float futrts_ceil32(float x) -{ - return ceil(x); -} -static inline float futrts_lerp32(float v0, float v1, float t) -{ - return mix(v0, v1, t); -} -static inline float futrts_mad32(float a, float b, float c) -{ - return mad(a, b, c); -} -static inline float futrts_fma32(float a, float b, float c) -{ - return fma(a, b, c); -} -#else -static inline float futrts_log32(float x) -{ - return logf(x); -} -static inline float futrts_log2_32(float x) -{ - return log2f(x); -} -static inline float futrts_log10_32(float x) -{ - return log10f(x); -} -static inline float futrts_sqrt32(float x) -{ - return sqrtf(x); -} -static inline float futrts_exp32(float x) -{ - return expf(x); -} -static inline float futrts_cos32(float x) -{ - return cosf(x); -} -static inline float futrts_sin32(float x) -{ - return sinf(x); -} -static inline float futrts_tan32(float x) -{ - return tanf(x); -} -static inline float futrts_acos32(float x) -{ - return acosf(x); -} -static inline float futrts_asin32(float x) -{ - return asinf(x); -} -static inline float futrts_atan32(float x) -{ - return atanf(x); -} -static inline float futrts_cosh32(float x) -{ - return coshf(x); -} -static inline float futrts_sinh32(float x) -{ - return sinhf(x); -} -static inline float futrts_tanh32(float x) -{ - return tanhf(x); -} -static inline float futrts_acosh32(float x) -{ - return acoshf(x); -} -static inline float futrts_asinh32(float x) -{ - return asinhf(x); -} -static inline float futrts_atanh32(float x) -{ - return atanhf(x); -} -static inline float futrts_atan2_32(float x, float y) -{ - return atan2f(x, y); -} -static inline float futrts_gamma32(float x) -{ - return tgammaf(x); -} -static inline float futrts_lgamma32(float x) -{ - return lgammaf(x); -} -static inline float fmod32(float x, float y) -{ - return fmodf(x, y); -} -static inline float futrts_round32(float x) -{ - return rintf(x); -} -static inline float futrts_floor32(float x) -{ - return floorf(x); -} -static inline float futrts_ceil32(float x) -{ - return ceilf(x); -} -static inline float futrts_lerp32(float v0, float v1, float t) -{ - return v0 + (v1 - v0) * t; -} -static inline float futrts_mad32(float a, float b, float c) -{ - return a * b + c; -} -static inline float futrts_fma32(float a, float b, float c) -{ - return fmaf(a, b, c); -} -#endif -static inline int32_t futrts_to_bits32(float x) -{ - union { - float f; - int32_t t; - } p; - - p.f = x; - return p.t; -} -static inline float futrts_from_bits32(int32_t x) -{ - union { - int32_t f; - float t; - } p; - - p.f = x; - return p.t; -} -static inline float fsignum32(float x) -{ - return futrts_isnan32(x) ? x : (x > 0) - (x < 0); -} -static inline double futrts_log64(double x) -{ - return log(x); -} -static inline double futrts_log2_64(double x) -{ - return log2(x); -} -static inline double futrts_log10_64(double x) -{ - return log10(x); -} -static inline double futrts_sqrt64(double x) -{ - return sqrt(x); -} -static inline double futrts_exp64(double x) -{ - return exp(x); -} -static inline double futrts_cos64(double x) -{ - return cos(x); -} -static inline double futrts_sin64(double x) -{ - return sin(x); -} -static inline double futrts_tan64(double x) -{ - return tan(x); -} -static inline double futrts_acos64(double x) -{ - return acos(x); -} -static inline double futrts_asin64(double x) -{ - return asin(x); -} -static inline double futrts_atan64(double x) -{ - return atan(x); -} -static inline double futrts_cosh64(double x) -{ - return cosh(x); -} -static inline double futrts_sinh64(double x) -{ - return sinh(x); -} -static inline double futrts_tanh64(double x) -{ - return tanh(x); -} -static inline double futrts_acosh64(double x) -{ - return acosh(x); -} -static inline double futrts_asinh64(double x) -{ - return asinh(x); -} -static inline double futrts_atanh64(double x) -{ - return atanh(x); -} -static inline double futrts_atan2_64(double x, double y) -{ - return atan2(x, y); -} -static inline double futrts_gamma64(double x) -{ - return tgamma(x); -} -static inline double futrts_lgamma64(double x) -{ - return lgamma(x); -} -static inline double futrts_fma64(double a, double b, double c) -{ - return fma(a, b, c); -} -static inline double futrts_round64(double x) -{ - return rint(x); -} -static inline double futrts_ceil64(double x) -{ - return ceil(x); -} -static inline double futrts_floor64(double x) -{ - return floor(x); -} -static inline bool futrts_isnan64(double x) -{ - return isnan(x); -} -static inline bool futrts_isinf64(double x) -{ - return isinf(x); -} -static inline int64_t futrts_to_bits64(double x) -{ - union { - double f; - int64_t t; - } p; - - p.f = x; - return p.t; -} -static inline double futrts_from_bits64(int64_t x) -{ - union { - int64_t f; - double t; - } p; - - p.f = x; - return p.t; -} -static inline double fmod64(double x, double y) -{ - return fmod(x, y); -} -static inline double fsignum64(double x) -{ - return futrts_isnan64(x) ? x : (x > 0) - (x < 0); -} -#ifdef __OPENCL_VERSION__ -static inline double futrts_lerp64(double v0, double v1, double t) -{ - return mix(v0, v1, t); -} -static inline double futrts_mad64(double a, double b, double c) -{ - return mad(a, b, c); -} -#else -static inline double futrts_lerp64(double v0, double v1, double t) -{ - return v0 + (v1 - v0) * t; -} -static inline double futrts_mad64(double a, double b, double c) -{ - return a * b + c; -} -#endif -static int init_constants(struct futhark_context *); -static int free_constants(struct futhark_context *); -struct memblock { - int *references; - char *mem; - int64_t size; - const char *desc; -} ; -// start of scheduler.h - -// First, the API that the generated code will access. In principle, -// we could then compile the scheduler separately and link an object -// file with the generated code. In practice, we will embed all of -// this in the generated code. - -// Scheduler handle. -struct scheduler; - -// Initialise a scheduler (and start worker threads). -static int scheduler_init(struct scheduler *scheduler, - int num_workers, - double kappa); - -// Shut down a scheduler (and destroy worker threads). -static int scheduler_destroy(struct scheduler *scheduler); - -// Figure out the smallest amount of work that amortises task -// creation. -static int determine_kappa(double *kappa); - -// How a segop should be scheduled. -enum scheduling { - DYNAMIC, - STATIC -}; - -// How a given task should be executed. Filled out by the scheduler -// and passed to the segop function -struct scheduler_info { - int64_t iter_pr_subtask; - int64_t remainder; - int nsubtasks; - enum scheduling sched; - int wake_up_threads; - - int64_t *task_time; - int64_t *task_iter; -}; - -// A segop function. This is what you hand the scheduler for -// execution. -typedef int (*segop_fn)(void* args, - int64_t iterations, - int tid, - struct scheduler_info info); - -// A task for the scheduler to execute. -struct scheduler_segop { - void *args; - segop_fn top_level_fn; - segop_fn nested_fn; - int64_t iterations; - enum scheduling sched; - - // Pointers to timer and iter associated with the task - int64_t *task_time; - int64_t *task_iter; - - // For debugging - const char* name; -}; - -static inline int scheduler_prepare_task(struct scheduler *scheduler, - struct scheduler_segop *task); - -typedef int (*parloop_fn)(void* args, - int64_t start, - int64_t end, - int subtask_id, - int tid); - -// A parallel parloop task. -struct scheduler_parloop { - void* args; - parloop_fn fn; - int64_t iterations; - struct scheduler_info info; - - // For debugging - const char* name; -}; - -static inline int scheduler_execute_task(struct scheduler *scheduler, - struct scheduler_parloop *task); - -// Then the API implementation. - -#include <signal.h> - -#if defined(_WIN32) -#include <windows.h> -#elif defined(__APPLE__) -#include <sys/sysctl.h> -// For getting cpu usage of threads -#include <mach/mach.h> -#include <sys/resource.h> -#elif defined(__linux__) -#include <sys/sysinfo.h> -#include <sys/resource.h> -#include <signal.h> -#endif - -/* Multicore Utility functions */ - -/* A wrapper for getting rusage on Linux and MacOS */ -/* TODO maybe figure out this for windows */ -static inline int getrusage_thread(struct rusage *rusage) -{ - int err = -1; -#if defined(__APPLE__) - thread_basic_info_data_t info = { 0 }; - mach_msg_type_number_t info_count = THREAD_BASIC_INFO_COUNT; - kern_return_t kern_err; - - kern_err = thread_info(mach_thread_self(), - THREAD_BASIC_INFO, - (thread_info_t)&info, - &info_count); - if (kern_err == KERN_SUCCESS) { - memset(rusage, 0, sizeof(struct rusage)); - rusage->ru_utime.tv_sec = info.user_time.seconds; - rusage->ru_utime.tv_usec = info.user_time.microseconds; - rusage->ru_stime.tv_sec = info.system_time.seconds; - rusage->ru_stime.tv_usec = info.system_time.microseconds; - err = 0; - } else { - errno = EINVAL; - } -#elif defined(__linux__) - err = getrusage(RUSAGE_THREAD, rusage); -#endif - return err; -} - -/* returns the number of logical cores */ -static int num_processors() -{ -#if defined(_WIN32) -/* https://docs.microsoft.com/en-us/windows/win32/api/sysinfoapi/ns-sysinfoapi-system_info */ - SYSTEM_INFO sysinfo; - GetSystemInfo(&sysinfo); - int ncores = sysinfo.dwNumberOfProcessors; - fprintf(stderr, "Found %d cores on your Windows machine\n Is that correct?\n", ncores); - return ncores; -#elif defined(__APPLE__) - int ncores; - size_t ncores_size = sizeof(ncores); - CHECK_ERRNO(sysctlbyname("hw.logicalcpu", &ncores, &ncores_size, NULL, 0), - "sysctlbyname (hw.logicalcpu)"); - return ncores; -#elif defined(__linux__) - return get_nprocs(); -#else - fprintf(stderr, "operating system not recognised\n"); - return -1; -#endif -} - -static unsigned int g_seed; - -// Used to seed the generator. -static inline void fast_srand(unsigned int seed) { - g_seed = seed; -} - -// Compute a pseudorandom integer. -// Output value in range [0, 32767] -static inline unsigned int fast_rand(void) { - g_seed = (214013*g_seed+2531011); - return (g_seed>>16)&0x7FFF; -} - -struct subtask_queue { - int capacity; // Size of the buffer. - int first; // Index of the start of the ring buffer. - int num_used; // Number of used elements in the buffer. - struct subtask **buffer; - - pthread_mutex_t mutex; // Mutex used for synchronisation. - pthread_cond_t cond; // Condition variable used for synchronisation. - int dead; - -#if defined(MCPROFILE) - /* Profiling fields */ - uint64_t time_enqueue; - uint64_t time_dequeue; - uint64_t n_dequeues; - uint64_t n_enqueues; -#endif -}; - -/* A subtask that can be executed by a worker */ -struct subtask { - /* The parloop function */ - parloop_fn fn; - /* Execution parameters */ - void* args; - int64_t start, end; - int id; - - /* Dynamic scheduling parameters */ - int chunkable; - int64_t chunk_size; - - /* Shared variables across subtasks */ - volatile int *counter; // Counter for ongoing subtasks - // Shared task timers and iterators - int64_t *task_time; - int64_t *task_iter; - - /* For debugging */ - const char *name; -}; - - -struct worker { - pthread_t thread; - struct scheduler *scheduler; /* Reference to the scheduler struct the worker belongs to*/ - struct subtask_queue q; - int dead; - int tid; /* Just a thread id */ - - /* "thread local" time fields used for online algorithm */ - uint64_t timer; - uint64_t total; - int nested; /* How nested the current computation is */ - - // Profiling fields - int output_usage; /* Whether to dump thread usage */ - uint64_t time_spent_working; /* Time spent in parloop functions */ -}; - -static inline void output_worker_usage(struct worker *worker) -{ - struct rusage usage; - CHECK_ERRNO(getrusage_thread(&usage), "getrusage_thread"); - struct timeval user_cpu_time = usage.ru_utime; - struct timeval sys_cpu_time = usage.ru_stime; - fprintf(stderr, "tid: %2d - work time %10llu us - user time: %10llu us - sys: %10llu us\n", - worker->tid, - (long long unsigned)worker->time_spent_working / 1000, - (long long unsigned)(user_cpu_time.tv_sec * 1000000 + user_cpu_time.tv_usec), - (long long unsigned)(sys_cpu_time.tv_sec * 1000000 + sys_cpu_time.tv_usec)); -} - -/* Doubles the size of the queue */ -static inline int subtask_queue_grow_queue(struct subtask_queue *subtask_queue) { - - int new_capacity = 2 * subtask_queue->capacity; -#ifdef MCDEBUG - fprintf(stderr, "Growing queue to %d\n", subtask_queue->capacity * 2); -#endif - - struct subtask **new_buffer = calloc(new_capacity, sizeof(struct subtask*)); - for (int i = 0; i < subtask_queue->num_used; i++) { - new_buffer[i] = subtask_queue->buffer[(subtask_queue->first + i) % subtask_queue->capacity]; - } - - free(subtask_queue->buffer); - subtask_queue->buffer = new_buffer; - subtask_queue->capacity = new_capacity; - subtask_queue->first = 0; - - return 0; -} - -// Initialise a job queue with the given capacity. The queue starts out -// empty. Returns non-zero on error. -static inline int subtask_queue_init(struct subtask_queue *subtask_queue, int capacity) -{ - assert(subtask_queue != NULL); - memset(subtask_queue, 0, sizeof(struct subtask_queue)); - - subtask_queue->capacity = capacity; - subtask_queue->buffer = calloc(capacity, sizeof(struct subtask*)); - if (subtask_queue->buffer == NULL) { - return -1; - } - - CHECK_ERRNO(pthread_mutex_init(&subtask_queue->mutex, NULL), "pthread_mutex_init"); - CHECK_ERRNO(pthread_cond_init(&subtask_queue->cond, NULL), "pthread_cond_init"); - - return 0; -} - -// Destroy the job queue. Blocks until the queue is empty before it -// is destroyed. -static inline int subtask_queue_destroy(struct subtask_queue *subtask_queue) -{ - assert(subtask_queue != NULL); - - CHECK_ERR(pthread_mutex_lock(&subtask_queue->mutex), "pthread_mutex_lock"); - - while (subtask_queue->num_used != 0) { - CHECK_ERR(pthread_cond_wait(&subtask_queue->cond, &subtask_queue->mutex), "pthread_cond_wait"); - } - - // Queue is now empty. Let's kill it! - subtask_queue->dead = 1; - free(subtask_queue->buffer); - CHECK_ERR(pthread_cond_broadcast(&subtask_queue->cond), "pthread_cond_broadcast"); - CHECK_ERR(pthread_mutex_unlock(&subtask_queue->mutex), "pthread_mutex_unlock"); - - return 0; -} - -static inline void dump_queue(struct worker *worker) -{ - struct subtask_queue *subtask_queue = &worker->q; - CHECK_ERR(pthread_mutex_lock(&subtask_queue->mutex), "pthread_mutex_lock"); - for (int i = 0; i < subtask_queue->num_used; i++) { - struct subtask * subtask = subtask_queue->buffer[(subtask_queue->first + i) % subtask_queue->capacity]; - printf("queue tid %d with %d task %s\n", worker->tid, i, subtask->name); - } - CHECK_ERR(pthread_cond_broadcast(&subtask_queue->cond), "pthread_cond_broadcast"); - CHECK_ERR(pthread_mutex_unlock(&subtask_queue->mutex), "pthread_mutex_unlock"); -} - -// Push an element onto the end of the job queue. Blocks if the -// subtask_queue is full (its size is equal to its capacity). Returns -// non-zero on error. It is an error to push a job onto a queue that -// has been destroyed. -static inline int subtask_queue_enqueue(struct worker *worker, struct subtask *subtask ) -{ - assert(worker != NULL); - struct subtask_queue *subtask_queue = &worker->q; - -#ifdef MCPROFILE - uint64_t start = get_wall_time(); -#endif - - CHECK_ERR(pthread_mutex_lock(&subtask_queue->mutex), "pthread_mutex_lock"); - // Wait until there is room in the subtask_queue. - while (subtask_queue->num_used == subtask_queue->capacity && !subtask_queue->dead) { - if (subtask_queue->num_used == subtask_queue->capacity) { - CHECK_ERR(subtask_queue_grow_queue(subtask_queue), "subtask_queue_grow_queue"); - continue; - } - CHECK_ERR(pthread_cond_wait(&subtask_queue->cond, &subtask_queue->mutex), "pthread_cond_wait"); - } - - if (subtask_queue->dead) { - CHECK_ERR(pthread_mutex_unlock(&subtask_queue->mutex), "pthread_mutex_unlock"); - return -1; - } - - // If we made it past the loop, there is room in the subtask_queue. - subtask_queue->buffer[(subtask_queue->first + subtask_queue->num_used) % subtask_queue->capacity] = subtask; - subtask_queue->num_used++; - -#ifdef MCPROFILE - uint64_t end = get_wall_time(); - subtask_queue->time_enqueue += (end - start); - subtask_queue->n_enqueues++; -#endif - // Broadcast a reader (if any) that there is now an element. - CHECK_ERR(pthread_cond_broadcast(&subtask_queue->cond), "pthread_cond_broadcast"); - CHECK_ERR(pthread_mutex_unlock(&subtask_queue->mutex), "pthread_mutex_unlock"); - - return 0; -} - - -/* Like subtask_queue_dequeue, but with two differences: - 1) the subtask is stolen from the __front__ of the queue - 2) returns immediately if there is no subtasks queued, - as we dont' want to block on another workers queue and -*/ -static inline int subtask_queue_steal(struct worker *worker, - struct subtask **subtask) -{ - struct subtask_queue *subtask_queue = &worker->q; - assert(subtask_queue != NULL); - -#ifdef MCPROFILE - uint64_t start = get_wall_time(); -#endif - CHECK_ERR(pthread_mutex_lock(&subtask_queue->mutex), "pthread_mutex_lock"); - - if (subtask_queue->num_used == 0) { - CHECK_ERR(pthread_cond_broadcast(&subtask_queue->cond), "pthread_cond_broadcast"); - CHECK_ERR(pthread_mutex_unlock(&subtask_queue->mutex), "pthread_mutex_unlock"); - return 1; - } - - if (subtask_queue->dead) { - CHECK_ERR(pthread_mutex_unlock(&subtask_queue->mutex), "pthread_mutex_unlock"); - return -1; - } - - // Tasks gets stolen from the "front" - struct subtask *cur_back = subtask_queue->buffer[subtask_queue->first]; - struct subtask *new_subtask = NULL; - int remaining_iter = cur_back->end - cur_back->start; - // If subtask is chunkable, we steal half of the iterations - if (cur_back->chunkable && remaining_iter > 1) { - int64_t half = remaining_iter / 2; - new_subtask = malloc(sizeof(struct subtask)); - *new_subtask = *cur_back; - new_subtask->start = cur_back->end - half; - cur_back->end = new_subtask->start; - __atomic_fetch_add(cur_back->counter, 1, __ATOMIC_RELAXED); - } else { - new_subtask = cur_back; - subtask_queue->num_used--; - subtask_queue->first = (subtask_queue->first + 1) % subtask_queue->capacity; - } - *subtask = new_subtask; - - if (*subtask == NULL) { - CHECK_ERR(pthread_mutex_unlock(&subtask_queue->mutex), "pthred_mutex_unlock"); - return 1; - } - -#ifdef MCPROFILE - uint64_t end = get_wall_time(); - subtask_queue->time_dequeue += (end - start); - subtask_queue->n_dequeues++; -#endif - - // Broadcast a writer (if any) that there is now room for more. - CHECK_ERR(pthread_cond_broadcast(&subtask_queue->cond), "pthread_cond_broadcast"); - CHECK_ERR(pthread_mutex_unlock(&subtask_queue->mutex), "pthread_mutex_unlock"); - - return 0; -} - - -// Pop an element from the back of the job queue. -// Optional argument can be provided to block or not -static inline int subtask_queue_dequeue(struct worker *worker, - struct subtask **subtask, int blocking) -{ - assert(worker != NULL); - struct subtask_queue *subtask_queue = &worker->q; - -#ifdef MCPROFILE - uint64_t start = get_wall_time(); -#endif - - CHECK_ERR(pthread_mutex_lock(&subtask_queue->mutex), "pthread_mutex_lock"); - if (subtask_queue->num_used == 0 && !blocking) { - CHECK_ERR(pthread_mutex_unlock(&subtask_queue->mutex), "pthread_mutex_unlock"); - return 1; - } - // Try to steal some work while the subtask_queue is empty - while (subtask_queue->num_used == 0 && !subtask_queue->dead) { - pthread_cond_wait(&subtask_queue->cond, &subtask_queue->mutex); - } - - if (subtask_queue->dead) { - CHECK_ERR(pthread_mutex_unlock(&subtask_queue->mutex), "pthread_mutex_unlock"); - return -1; - } - - // dequeue pops from the back - *subtask = subtask_queue->buffer[(subtask_queue->first + subtask_queue->num_used - 1) % subtask_queue->capacity]; - subtask_queue->num_used--; - - if (*subtask == NULL) { - assert(!"got NULL ptr"); - CHECK_ERR(pthread_mutex_unlock(&subtask_queue->mutex), "pthred_mutex_unlock"); - return -1; - } - -#ifdef MCPROFILE - uint64_t end = get_wall_time(); - subtask_queue->time_dequeue += (end - start); - subtask_queue->n_dequeues++; -#endif - - // Broadcast a writer (if any) that there is now room for more. - CHECK_ERR(pthread_cond_broadcast(&subtask_queue->cond), "pthread_cond_broadcast"); - CHECK_ERR(pthread_mutex_unlock(&subtask_queue->mutex), "pthread_mutex_unlock"); - - return 0; -} - -static inline int subtask_queue_is_empty(struct subtask_queue *subtask_queue) -{ - return subtask_queue->num_used == 0; -} - -/* Scheduler definitions */ - -struct scheduler { - struct worker *workers; - int num_threads; - - // If there is work to steal => active_work > 0 - volatile int active_work; - - // Only one error can be returned at the time now. Maybe we can - // provide a stack like structure for pushing errors onto if we wish - // to backpropagte multiple errors - volatile int error; - - // kappa time unit in nanoseconds - double kappa; -}; - - -// Thread local variable worker struct -// Note that, accesses to tls variables are expensive -// Minimize direct references to this variable -__thread struct worker* worker_local = NULL; - -static int64_t total_now(int64_t total, int64_t time) { - return total + (get_wall_time_ns() - time); -} - -static int random_other_worker(struct scheduler *scheduler, int my_id) { - int my_num_workers = scheduler->num_threads; - assert(my_num_workers != 1); - int i = fast_rand() % (my_num_workers - 1); - if (i >= my_id) { - i++; - } -#ifdef MCDEBUG - assert(i >= 0); - assert(i < my_num_workers); - assert(i != my_id); -#endif - - return i; -} - - -static inline int64_t compute_chunk_size(double kappa, struct subtask* subtask) -{ - double C = (double)*subtask->task_time / (double)*subtask->task_iter; - if (C == 0.0F) C += DBL_EPSILON; - return smax64((int64_t)(kappa / C), 1); -} - -/* Takes a chunk from subtask and enqueues the remaining iterations onto the worker's queue */ -/* A no-op if the subtask is not chunkable */ -static inline struct subtask* chunk_subtask(struct worker* worker, struct subtask *subtask) -{ - if (subtask->chunkable) { - // Do we have information from previous runs avaliable - if (*subtask->task_iter > 0) { - subtask->chunk_size = compute_chunk_size(worker->scheduler->kappa, subtask); - assert(subtask->chunk_size > 0); - } - int64_t remaining_iter = subtask->end - subtask->start; - assert(remaining_iter > 0); - if (remaining_iter > subtask->chunk_size) { - struct subtask *new_subtask = malloc(sizeof(struct subtask)); - *new_subtask = *subtask; - // increment the subtask join counter to account for new subtask - __atomic_fetch_add(subtask->counter, 1, __ATOMIC_RELAXED); - // Update range parameters - subtask->end = subtask->start + subtask->chunk_size; - new_subtask->start = subtask->end; - subtask_queue_enqueue(worker, new_subtask); - } - } - return subtask; -} - -static inline int run_subtask(struct worker* worker, struct subtask* subtask) -{ - assert(subtask != NULL); - assert(worker != NULL); - - subtask = chunk_subtask(worker, subtask); - worker->total = 0; - worker->timer = get_wall_time_ns(); -#if defined(MCPROFILE) - int64_t start = worker->timer; -#endif - worker->nested++; - int err = subtask->fn(subtask->args, subtask->start, subtask->end, - subtask->chunkable ? worker->tid : subtask->id, - worker->tid); - worker->nested--; - // Some error occured during some other subtask - // so we just clean-up and return - if (worker->scheduler->error != 0) { - // Even a failed task counts as finished. - __atomic_fetch_sub(subtask->counter, 1, __ATOMIC_RELAXED); - free(subtask); - return 0; - } - if (err != 0) { - __atomic_store_n(&worker->scheduler->error, err, __ATOMIC_RELAXED); - } - // Total sequential time spent - int64_t time_elapsed = total_now(worker->total, worker->timer); -#if defined(MCPROFILE) - worker->time_spent_working += get_wall_time_ns() - start; -#endif - int64_t iter = subtask->end - subtask->start; - // report measurements - // These updates should really be done using a single atomic CAS operation - __atomic_fetch_add(subtask->task_time, time_elapsed, __ATOMIC_RELAXED); - __atomic_fetch_add(subtask->task_iter, iter, __ATOMIC_RELAXED); - // We need a fence here, since if the counter is decremented before either - // of the two above are updated bad things can happen, e.g. if they are stack-allocated - __atomic_thread_fence(__ATOMIC_SEQ_CST); - __atomic_fetch_sub(subtask->counter, 1, __ATOMIC_RELAXED); - free(subtask); - return 0; -} - - -static inline int is_small(struct scheduler_segop *task, struct scheduler *scheduler, int *nsubtasks) -{ - int64_t time = *task->task_time; - int64_t iter = *task->task_iter; - - if (task->sched == DYNAMIC || iter == 0) { - *nsubtasks = scheduler->num_threads; - return 0; - } - - // Estimate the constant C - double C = (double)time / (double)iter; - double cur_task_iter = (double) task->iterations; - - // Returns true if the task is small i.e. - // if the number of iterations times C is smaller - // than the overhead of subtask creation - if (C == 0.0F || C * cur_task_iter < scheduler->kappa) { - *nsubtasks = 1; - return 1; - } - - // Else compute how many subtasks this tasks should create - int64_t min_iter_pr_subtask = smax64(scheduler->kappa / C, 1); - *nsubtasks = smin64(smax64(task->iterations / min_iter_pr_subtask, 1), scheduler->num_threads); - - return 0; -} - -// TODO make this prettier -static inline struct subtask* create_subtask(parloop_fn fn, - void* args, - const char* name, - volatile int* counter, - int64_t *timer, - int64_t *iter, - int64_t start, int64_t end, - int chunkable, - int64_t chunk_size, - int id) -{ - struct subtask* subtask = malloc(sizeof(struct subtask)); - if (subtask == NULL) { - assert(!"malloc failed in create_subtask"); - return NULL; - } - subtask->fn = fn; - subtask->args = args; - - subtask->counter = counter; - subtask->task_time = timer; - subtask->task_iter = iter; - - subtask->start = start; - subtask->end = end; - subtask->id = id; - subtask->chunkable = chunkable; - subtask->chunk_size = chunk_size; - - subtask->name = name; - return subtask; -} - -static int dummy_counter = 0; -static int64_t dummy_timer = 0; -static int64_t dummy_iter = 0; - -static int dummy_fn(void *args, int64_t start, int64_t end, int subtask_id, int tid) { - (void)args; - (void)start; - (void)end; - (void)subtask_id; - (void)tid; - return 0; -} - -// Wake up threads, who are blocking by pushing a dummy task -// onto their queue -static inline void wake_up_threads(struct scheduler *scheduler, int start_tid, int end_tid) { - -#if defined(MCDEBUG) - assert(start_tid >= 1); - assert(end_tid <= scheduler->num_threads); -#endif - for (int i = start_tid; i < end_tid; i++) { - struct subtask *subtask = create_subtask(dummy_fn, NULL, "dummy_fn", - &dummy_counter, - &dummy_timer, &dummy_iter, - 0, 0, - 0, 0, - 0); - CHECK_ERR(subtask_queue_enqueue(&scheduler->workers[i], subtask), "subtask_queue_enqueue"); - } -} - -static inline int is_finished(struct worker *worker) { - return worker->dead && subtask_queue_is_empty(&worker->q); -} - -// Try to steal from a random queue -static inline int steal_from_random_worker(struct worker* worker) -{ - int my_id = worker->tid; - struct scheduler* scheduler = worker->scheduler; - int k = random_other_worker(scheduler, my_id); - struct worker *worker_k = &scheduler->workers[k]; - struct subtask* subtask = NULL; - int retval = subtask_queue_steal(worker_k, &subtask); - if (retval == 0) { - subtask_queue_enqueue(worker, subtask); - return 1; - } - return 0; -} - - -static inline void *scheduler_worker(void* args) -{ - struct worker *worker = (struct worker*) args; - struct scheduler *scheduler = worker->scheduler; - worker_local = worker; - struct subtask *subtask = NULL; - - while(!is_finished(worker)) { - if (!subtask_queue_is_empty(&worker->q)) { - int retval = subtask_queue_dequeue(worker, &subtask, 0); - if (retval == 0) { - assert(subtask != NULL); - CHECK_ERR(run_subtask(worker, subtask), "run_subtask"); - } // else someone stole our work - - } else if (scheduler->active_work) { /* steal */ - while (!is_finished(worker) && scheduler->active_work) { - if (steal_from_random_worker(worker)) { - break; - } - } - } else { /* go back to sleep and wait for work */ - int retval = subtask_queue_dequeue(worker, &subtask, 1); - if (retval == 0) { - assert(subtask != NULL); - CHECK_ERR(run_subtask(worker, subtask), "run_subtask"); - } - } - } - - assert(subtask_queue_is_empty(&worker->q)); -#if defined(MCPROFILE) - if (worker->output_usage) - output_worker_usage(worker); -#endif - return NULL; -} - - -static inline int scheduler_execute_parloop(struct scheduler *scheduler, - struct scheduler_parloop *task, - int64_t *timer) -{ - - struct worker *worker = worker_local; - - struct scheduler_info info = task->info; - int64_t iter_pr_subtask = info.iter_pr_subtask; - int64_t remainder = info.remainder; - int nsubtasks = info.nsubtasks; - volatile int join_counter = nsubtasks; - - // Shared timer used to sum up all - // sequential work from each subtask - int64_t task_timer = 0; - int64_t task_iter = 0; - - enum scheduling sched = info.sched; - /* If each subtasks should be processed in chunks */ - int chunkable = sched == STATIC ? 0 : 1; - int64_t chunk_size = 1; // The initial chunk size when no info is avaliable - - - if (info.wake_up_threads || sched == DYNAMIC) - __atomic_add_fetch(&scheduler->active_work, nsubtasks, __ATOMIC_RELAXED); - - int64_t start = 0; - int64_t end = iter_pr_subtask + (int64_t)(remainder != 0); - for (int subtask_id = 0; subtask_id < nsubtasks; subtask_id++) { - struct subtask *subtask = create_subtask(task->fn, task->args, task->name, - &join_counter, - &task_timer, &task_iter, - start, end, - chunkable, chunk_size, - subtask_id); - assert(subtask != NULL); - // In most cases we will never have more subtasks than workers, - // but there can be exceptions (e.g. the kappa tuning function). - struct worker *subtask_worker = - worker->nested - ? &scheduler->workers[worker->tid] - : &scheduler->workers[subtask_id % scheduler->num_threads]; - CHECK_ERR(subtask_queue_enqueue(subtask_worker, subtask), - "subtask_queue_enqueue"); - // Update range params - start = end; - end += iter_pr_subtask + ((subtask_id + 1) < remainder); - } - - if (info.wake_up_threads) { - wake_up_threads(scheduler, nsubtasks, scheduler->num_threads); - } - - // Join (wait for subtasks to finish) - while(join_counter != 0) { - if (!subtask_queue_is_empty(&worker->q)) { - struct subtask *subtask = NULL; - int err = subtask_queue_dequeue(worker, &subtask, 0); - if (err == 0 ) { - CHECK_ERR(run_subtask(worker, subtask), "run_subtask"); - } - } else { - if (steal_from_random_worker(worker)) { - struct subtask *subtask = NULL; - int err = subtask_queue_dequeue(worker, &subtask, 0); - if (err == 0) { - CHECK_ERR(run_subtask(worker, subtask), "run_subtask"); - } - } - } - } - - - if (info.wake_up_threads || sched == DYNAMIC) { - __atomic_sub_fetch(&scheduler->active_work, nsubtasks, __ATOMIC_RELAXED); - } - - // Write back timing results of all sequential work - (*timer) += task_timer; - return scheduler->error; -} - - -static inline int scheduler_execute_task(struct scheduler *scheduler, - struct scheduler_parloop *task) -{ - - struct worker *worker = worker_local; - - int err = 0; - - // How much sequential work was performed by the task - int64_t task_timer = 0; - - /* Execute task sequential or parallel based on decision made earlier */ - if (task->info.nsubtasks == 1) { - int64_t start = get_wall_time_ns(); - err = task->fn(task->args, 0, task->iterations, 0, worker->tid); - int64_t end = get_wall_time_ns(); - task_timer = end - start; - worker->time_spent_working += task_timer; - // Report time measurements - // TODO the update of both of these should really be a single atomic!! - __atomic_fetch_add(task->info.task_time, task_timer, __ATOMIC_RELAXED); - __atomic_fetch_add(task->info.task_iter, task->iterations, __ATOMIC_RELAXED); - } else { - // Add "before" time if we already are inside a task - int64_t time_before = 0; - if (worker->nested > 0) { - time_before = total_now(worker->total, worker->timer); - } - - err = scheduler_execute_parloop(scheduler, task, &task_timer); - - // Report time measurements - // TODO the update of both of these should really be a single atomic!! - __atomic_fetch_add(task->info.task_time, task_timer, __ATOMIC_RELAXED); - __atomic_fetch_add(task->info.task_iter, task->iterations, __ATOMIC_RELAXED); - - // Update timers to account for new timings - worker->total = time_before + task_timer; - worker->timer = get_wall_time_ns(); - } - - - return err; -} - -/* Decide on how schedule the incoming task i.e. how many subtasks and - to run sequential or (potentially nested) parallel code body */ -static inline int scheduler_prepare_task(struct scheduler* scheduler, - struct scheduler_segop *task) -{ - assert(task != NULL); - - struct worker *worker = worker_local; - struct scheduler_info info; - info.task_time = task->task_time; - info.task_iter = task->task_iter; - - int nsubtasks; - // Decide if task should be scheduled sequentially - if (is_small(task, scheduler, &nsubtasks)) { - info.iter_pr_subtask = task->iterations; - info.remainder = 0; - info.nsubtasks = nsubtasks; - return task->top_level_fn(task->args, task->iterations, worker->tid, info); - } else { - info.iter_pr_subtask = task->iterations / nsubtasks; - info.remainder = task->iterations % nsubtasks; - info.sched = task->sched; - switch (task->sched) { - case STATIC: - info.nsubtasks = info.iter_pr_subtask == 0 ? info.remainder : ((task->iterations - info.remainder) / info.iter_pr_subtask); - break; - case DYNAMIC: - // As any thread can take any subtasks, we are being safe with using - // an upper bound on the number of tasks such that the task allocate enough memory - info.nsubtasks = info.iter_pr_subtask == 0 ? info.remainder : nsubtasks; - break; - default: - assert(!"Got unknown scheduling"); - } - } - - info.wake_up_threads = 0; - // We only use the nested parallel segop function if we can't exchaust all cores - // using the outer most level - if (task->nested_fn != NULL && info.nsubtasks < scheduler->num_threads && info.nsubtasks == task->iterations) { - if (worker->nested == 0) - info.wake_up_threads = 1; - return task->nested_fn(task->args, task->iterations, worker->tid, info); - } - - return task->top_level_fn(task->args, task->iterations, worker->tid, info); -} - -// Now some code for finding the proper value of kappa on a given -// machine (the smallest amount of work that amortises the cost of -// task creation). - -struct tuning_struct { - int32_t *free_tuning_res; - int32_t *array; -}; - -// Reduction function over an integer array -static int tuning_loop(void *args, int64_t start, int64_t end, - int flat_tid, int tid) { - (void)flat_tid; - (void)tid; - - int err = 0; - struct tuning_struct *tuning_struct = (struct tuning_struct *) args; - int32_t *array = tuning_struct->array; - int32_t *tuning_res = tuning_struct->free_tuning_res; - - int32_t sum = 0; - for (int i = start; i < end; i++) { - int32_t y = array[i]; - sum = add32(sum, y); - } - *tuning_res = sum; - return err; -} - -// The main entry point for the tuning process. Sets the provided -// variable ``kappa``. -static int determine_kappa(double *kappa) { - int err = 0; - - int64_t iterations = 100000000; - int64_t tuning_time = 0; - int64_t tuning_iter = 0; - - int32_t *array = malloc(sizeof(int32_t) * iterations); - for (int64_t i = 0; i < iterations; i++) { - array[i] = fast_rand(); - } - - int64_t start_tuning = get_wall_time_ns(); - /* **************************** */ - /* Run sequential reduce first' */ - /* **************************** */ - int64_t tuning_sequentiual_start = get_wall_time_ns(); - struct tuning_struct tuning_struct; - int32_t tuning_res; - tuning_struct.free_tuning_res = &tuning_res; - tuning_struct.array = array; - - err = tuning_loop(&tuning_struct, 0, iterations, 0, 0); - int64_t tuning_sequentiual_end = get_wall_time_ns(); - int64_t sequential_elapsed = tuning_sequentiual_end - tuning_sequentiual_start; - - double C = (double)sequential_elapsed / (double)iterations; - fprintf(stderr, " Time for sequential run is %lld - Found C %f\n", (long long)sequential_elapsed, C); - - /* ********************** */ - /* Now run tuning process */ - /* ********************** */ - // Setup a scheduler with a single worker - struct scheduler scheduler; - scheduler.num_threads = 1; - scheduler.workers = malloc(sizeof(struct worker)); - worker_local = &scheduler.workers[0]; - worker_local->tid = 0; - CHECK_ERR(subtask_queue_init(&scheduler.workers[0].q, 1024), - "failed to init queue for worker %d\n", 0); - - // Start tuning for kappa - double kappa_tune = 1000; // Initial kappa is 1 us - double ratio; - int64_t time_elapsed; - while(1) { - int64_t min_iter_pr_subtask = (int64_t) (kappa_tune / C) == 0 ? 1 : (kappa_tune / C); - int nsubtasks = iterations / min_iter_pr_subtask; - struct scheduler_info info; - info.iter_pr_subtask = min_iter_pr_subtask; - - info.nsubtasks = iterations / min_iter_pr_subtask; - info.remainder = iterations % min_iter_pr_subtask; - info.task_time = &tuning_time; - info.task_iter = &tuning_iter; - info.sched = STATIC; - - struct scheduler_parloop parloop; - parloop.name = "tuning_loop"; - parloop.fn = tuning_loop; - parloop.args = &tuning_struct; - parloop.iterations = iterations; - parloop.info = info; - - int64_t tuning_chunked_start = get_wall_time_ns(); - int determine_kappa_err = - scheduler_execute_task(&scheduler, - &parloop); - assert(determine_kappa_err == 0); - int64_t tuning_chunked_end = get_wall_time_ns(); - time_elapsed = tuning_chunked_end - tuning_chunked_start; - - ratio = (double)time_elapsed / (double)sequential_elapsed; - if (ratio < 1.055) { - break; - } - kappa_tune += 100; // Increase by 100 ns at the time - fprintf(stderr, "nsubtask %d - kappa %f - ratio %f\n", nsubtasks, kappa_tune, ratio); - } - - int64_t end_tuning = get_wall_time_ns(); - fprintf(stderr, "tuning took %lld ns and found kappa %f - time %lld - ratio %f\n", - (long long)end_tuning - start_tuning, - kappa_tune, - (long long)time_elapsed, - ratio); - *kappa = kappa_tune; - - // Clean-up - CHECK_ERR(subtask_queue_destroy(&scheduler.workers[0].q), "failed to destroy queue"); - free(array); - free(scheduler.workers); - return err; -} - -static int scheduler_init(struct scheduler *scheduler, - int num_workers, - double kappa) { - assert(num_workers > 0); - - scheduler->kappa = kappa; - scheduler->num_threads = num_workers; - scheduler->active_work = 0; - scheduler->error = 0; - - scheduler->workers = calloc(num_workers, sizeof(struct worker)); - - const int queue_capacity = 1024; - - worker_local = &scheduler->workers[0]; - worker_local->tid = 0; - worker_local->scheduler = scheduler; - CHECK_ERR(subtask_queue_init(&worker_local->q, queue_capacity), - "failed to init queue for worker %d\n", 0); - - for (int i = 1; i < num_workers; i++) { - struct worker *cur_worker = &scheduler->workers[i]; - memset(cur_worker, 0, sizeof(struct worker)); - cur_worker->tid = i; - cur_worker->output_usage = 0; - cur_worker->scheduler = scheduler; - CHECK_ERR(subtask_queue_init(&cur_worker->q, queue_capacity), - "failed to init queue for worker %d\n", i); - - CHECK_ERR(pthread_create(&cur_worker->thread, - NULL, - &scheduler_worker, - cur_worker), - "Failed to create worker %d\n", i); - } - - return 0; -} - -static int scheduler_destroy(struct scheduler *scheduler) { - // First mark them all as dead. - for (int i = 1; i < scheduler->num_threads; i++) { - struct worker *cur_worker = &scheduler->workers[i]; - cur_worker->dead = 1; - } - - // Then destroy their task queues (this will wake up the threads and - // make them do their shutdown). - for (int i = 1; i < scheduler->num_threads; i++) { - struct worker *cur_worker = &scheduler->workers[i]; - subtask_queue_destroy(&cur_worker->q); - } - - // Then actually wait for them to stop. - for (int i = 1; i < scheduler->num_threads; i++) { - struct worker *cur_worker = &scheduler->workers[i]; - CHECK_ERR(pthread_join(scheduler->workers[i].thread, NULL), "pthread_join"); - } - - free(scheduler->workers); - - return 0; -} - -// End of scheduler.h - -struct futhark_context_config { - int debugging; - int profiling; - int num_threads; -} ; -struct futhark_context_config *futhark_context_config_new(void) -{ - struct futhark_context_config *cfg = - (struct futhark_context_config *) malloc(sizeof(struct futhark_context_config)); - - if (cfg == NULL) - return NULL; - cfg->debugging = 0; - cfg->profiling = 0; - cfg->num_threads = 0; - return cfg; -} -void futhark_context_config_free(struct futhark_context_config *cfg) -{ - free(cfg); -} -void futhark_context_config_set_debugging(struct futhark_context_config *cfg, - int detail) -{ - cfg->debugging = detail; -} -void futhark_context_config_set_profiling(struct futhark_context_config *cfg, - int flag) -{ - cfg->profiling = flag; -} -void futhark_context_config_set_logging(struct futhark_context_config *cfg, - int detail) -{ - /* Does nothing for this backend. */ - (void) cfg; - (void) detail; -} -void futhark_context_config_set_num_threads(struct futhark_context_config *cfg, - int n) -{ - cfg->num_threads = n; -} -struct futhark_context { - struct scheduler scheduler; - int detail_memory; - int debugging; - int profiling; - int profiling_paused; - int logging; - lock_t lock; - char *error; - FILE *log; - int total_runs; - long total_runtime; - int64_t peak_mem_usage_default; - int64_t cur_mem_usage_default; - struct { - int dummy; - } constants; - int64_t *futhark_mc_segmap_parloop_6011_total_runtime; - int *futhark_mc_segmap_parloop_6011_runs; - int64_t *futhark_mc_segmap_parloop_6011_iter; - int64_t futhark_mc_segmap_parloop_6011_total_total_runtime; - int futhark_mc_segmap_parloop_6011_total_runs; - int64_t futhark_mc_segmap_parloop_6011_total_iter; - int64_t *futhark_mc_segmap_task_6009_total_runtime; - int *futhark_mc_segmap_task_6009_runs; - int64_t *futhark_mc_segmap_task_6009_iter; - int64_t futhark_mc_segmap_task_6009_total_time; - int64_t futhark_mc_segmap_task_6009_total_iter; - int64_t *futhark_mc_segmap_parloop_6020_total_runtime; - int *futhark_mc_segmap_parloop_6020_runs; - int64_t *futhark_mc_segmap_parloop_6020_iter; - int64_t futhark_mc_segmap_parloop_6020_total_total_runtime; - int futhark_mc_segmap_parloop_6020_total_runs; - int64_t futhark_mc_segmap_parloop_6020_total_iter; - int64_t *futhark_mc_segmap_task_6018_total_runtime; - int *futhark_mc_segmap_task_6018_runs; - int64_t *futhark_mc_segmap_task_6018_iter; - int64_t futhark_mc_segmap_task_6018_total_time; - int64_t futhark_mc_segmap_task_6018_total_iter; - int64_t *futhark_mc_segmap_parloop_6015_total_runtime; - int *futhark_mc_segmap_parloop_6015_runs; - int64_t *futhark_mc_segmap_parloop_6015_iter; - int64_t futhark_mc_segmap_parloop_6015_total_total_runtime; - int futhark_mc_segmap_parloop_6015_total_runs; - int64_t futhark_mc_segmap_parloop_6015_total_iter; - int64_t *futhark_mc_segmap_nested_task_6013_total_runtime; - int *futhark_mc_segmap_nested_task_6013_runs; - int64_t *futhark_mc_segmap_nested_task_6013_iter; - int64_t tuning_timing; - int64_t tuning_iter; -} ; -struct futhark_context *futhark_context_new(struct futhark_context_config *cfg) -{ - struct futhark_context *ctx = - (struct futhark_context *) malloc(sizeof(struct futhark_context)); - - if (ctx == NULL) - return NULL; - fast_srand(time(0)); - ctx->detail_memory = cfg->debugging; - ctx->debugging = cfg->debugging; - ctx->profiling = cfg->profiling; - ctx->profiling_paused = 0; - ctx->logging = 0; - ctx->error = NULL; - ctx->log = stderr; - create_lock(&ctx->lock); - - int tune_kappa = 0; - double kappa = 5.1f * 1000; - - if (tune_kappa) { - if (determine_kappa(&kappa) != 0) - return NULL; - } - if (scheduler_init(&ctx->scheduler, cfg->num_threads > - 0 ? cfg->num_threads : num_processors(), kappa) != 0) - return NULL; - ctx->peak_mem_usage_default = 0; - ctx->cur_mem_usage_default = 0; - ctx->futhark_mc_segmap_parloop_6011_total_runtime = calloc(sizeof(int64_t), - ctx->scheduler.num_threads); - ctx->futhark_mc_segmap_parloop_6011_runs = calloc(sizeof(int), - ctx->scheduler.num_threads); - ctx->futhark_mc_segmap_parloop_6011_iter = calloc(sizeof(sizeof(int64_t)), - ctx->scheduler.num_threads); - ctx->futhark_mc_segmap_parloop_6011_total_total_runtime = 0; - ctx->futhark_mc_segmap_parloop_6011_total_runs = 0; - ctx->futhark_mc_segmap_parloop_6011_total_iter = 0; - ctx->futhark_mc_segmap_task_6009_total_runtime = calloc(sizeof(int64_t), - ctx->scheduler.num_threads); - ctx->futhark_mc_segmap_task_6009_runs = calloc(sizeof(int), - ctx->scheduler.num_threads); - ctx->futhark_mc_segmap_task_6009_iter = calloc(sizeof(sizeof(int64_t)), - ctx->scheduler.num_threads); - ctx->futhark_mc_segmap_task_6009_total_time = 0; - ctx->futhark_mc_segmap_task_6009_total_iter = 0; - ctx->futhark_mc_segmap_parloop_6020_total_runtime = calloc(sizeof(int64_t), - ctx->scheduler.num_threads); - ctx->futhark_mc_segmap_parloop_6020_runs = calloc(sizeof(int), - ctx->scheduler.num_threads); - ctx->futhark_mc_segmap_parloop_6020_iter = calloc(sizeof(sizeof(int64_t)), - ctx->scheduler.num_threads); - ctx->futhark_mc_segmap_parloop_6020_total_total_runtime = 0; - ctx->futhark_mc_segmap_parloop_6020_total_runs = 0; - ctx->futhark_mc_segmap_parloop_6020_total_iter = 0; - ctx->futhark_mc_segmap_task_6018_total_runtime = calloc(sizeof(int64_t), - ctx->scheduler.num_threads); - ctx->futhark_mc_segmap_task_6018_runs = calloc(sizeof(int), - ctx->scheduler.num_threads); - ctx->futhark_mc_segmap_task_6018_iter = calloc(sizeof(sizeof(int64_t)), - ctx->scheduler.num_threads); - ctx->futhark_mc_segmap_task_6018_total_time = 0; - ctx->futhark_mc_segmap_task_6018_total_iter = 0; - ctx->futhark_mc_segmap_parloop_6015_total_runtime = calloc(sizeof(int64_t), - ctx->scheduler.num_threads); - ctx->futhark_mc_segmap_parloop_6015_runs = calloc(sizeof(int), - ctx->scheduler.num_threads); - ctx->futhark_mc_segmap_parloop_6015_iter = calloc(sizeof(sizeof(int64_t)), - ctx->scheduler.num_threads); - ctx->futhark_mc_segmap_parloop_6015_total_total_runtime = 0; - ctx->futhark_mc_segmap_parloop_6015_total_runs = 0; - ctx->futhark_mc_segmap_parloop_6015_total_iter = 0; - ctx->futhark_mc_segmap_nested_task_6013_total_runtime = - calloc(sizeof(int64_t), ctx->scheduler.num_threads); - ctx->futhark_mc_segmap_nested_task_6013_runs = calloc(sizeof(int), - ctx->scheduler.num_threads); - ctx->futhark_mc_segmap_nested_task_6013_iter = - calloc(sizeof(sizeof(int64_t)), ctx->scheduler.num_threads); - init_constants(ctx); - return ctx; -} -void futhark_context_free(struct futhark_context *ctx) -{ - free_constants(ctx); - (void) scheduler_destroy(&ctx->scheduler); - free_lock(&ctx->lock); - free(ctx); -} -int futhark_context_sync(struct futhark_context *ctx) -{ - (void) ctx; - return 0; -} -static const char *size_names[0]; -static const char *size_vars[0]; -static const char *size_classes[0]; -int futhark_context_config_set_size(struct futhark_context_config *cfg, const - char *size_name, size_t size_value) -{ - (void) cfg; - (void) size_name; - (void) size_value; - return 1; -} -static int memblock_unref(struct futhark_context *ctx, struct memblock *block, - const char *desc) -{ - if (block->references != NULL) { - *block->references -= 1; - if (ctx->detail_memory) - fprintf(ctx->log, - "Unreferencing block %s (allocated as %s) in %s: %d references remaining.\n", - desc, block->desc, "default space", *block->references); - if (*block->references == 0) { - ctx->cur_mem_usage_default -= block->size; - free(block->mem); - free(block->references); - if (ctx->detail_memory) - fprintf(ctx->log, - "%lld bytes freed (now allocated: %lld bytes)\n", - (long long) block->size, - (long long) ctx->cur_mem_usage_default); - } - block->references = NULL; - } - return 0; -} -static int memblock_alloc(struct futhark_context *ctx, struct memblock *block, - int64_t size, const char *desc) -{ - if (size < 0) - futhark_panic(1, - "Negative allocation of %lld bytes attempted for %s in %s.\n", - (long long) size, desc, "default space", - ctx->cur_mem_usage_default); - - int ret = memblock_unref(ctx, block, desc); - - ctx->cur_mem_usage_default += size; - if (ctx->detail_memory) - fprintf(ctx->log, - "Allocating %lld bytes for %s in %s (then allocated: %lld bytes)", - (long long) size, desc, "default space", - (long long) ctx->cur_mem_usage_default); - if (ctx->cur_mem_usage_default > ctx->peak_mem_usage_default) { - ctx->peak_mem_usage_default = ctx->cur_mem_usage_default; - if (ctx->detail_memory) - fprintf(ctx->log, " (new peak).\n"); - } else if (ctx->detail_memory) - fprintf(ctx->log, ".\n"); - block->mem = (char *) malloc(size); - block->references = (int *) malloc(sizeof(int)); - *block->references = 1; - block->size = size; - block->desc = desc; - return ret; -} -static int memblock_set(struct futhark_context *ctx, struct memblock *lhs, - struct memblock *rhs, const char *lhs_desc) -{ - int ret = memblock_unref(ctx, lhs, lhs_desc); - - if (rhs->references != NULL) - (*rhs->references)++; - *lhs = *rhs; - return ret; -} -int futhark_get_num_sizes(void) -{ - return sizeof(size_names) / sizeof(size_names[0]); -} -const char *futhark_get_size_name(int i) -{ - return size_names[i]; -} -const char *futhark_get_size_class(int i) -{ - return size_classes[i]; -} -char *futhark_context_report(struct futhark_context *ctx) -{ - struct str_builder builder; - - str_builder_init(&builder); - if (ctx->detail_memory || ctx->profiling || ctx->logging) { - { } - } - if (ctx->profiling) { - for (int i = 0; i < ctx->scheduler.num_threads; i++) - fprintf(ctx->log, - "tid %2d - futhark_mc_segmap_parloop_6011 ran %10d times; avg: %10ldus; total: %10ldus; time pr. iter %9.6f; iters %9ld; avg %ld\n", - i, ctx->futhark_mc_segmap_parloop_6011_runs[i], - (long) ctx->futhark_mc_segmap_parloop_6011_total_runtime[i] / - (ctx->futhark_mc_segmap_parloop_6011_runs[i] != - 0 ? ctx->futhark_mc_segmap_parloop_6011_runs[i] : 1), - (long) ctx->futhark_mc_segmap_parloop_6011_total_runtime[i], - (double) ctx->futhark_mc_segmap_parloop_6011_total_runtime[i] / - (ctx->futhark_mc_segmap_parloop_6011_iter[i] == - 0 ? 1 : (double) ctx->futhark_mc_segmap_parloop_6011_iter[i]), - (long) ctx->futhark_mc_segmap_parloop_6011_iter[i], - (long) ctx->futhark_mc_segmap_parloop_6011_iter[i] / - (ctx->futhark_mc_segmap_parloop_6011_runs[i] != - 0 ? ctx->futhark_mc_segmap_parloop_6011_runs[i] : 1)); - fprintf(ctx->log, - " futhark_mc_segmap_parloop_6011_total ran %10d times; avg: %10ldus; total: %10ldus; time pr. iter %9.6f; iters %9ld; avg %ld\n", - ctx->futhark_mc_segmap_parloop_6011_total_runs, - (long) ctx->futhark_mc_segmap_parloop_6011_total_total_runtime / - (ctx->futhark_mc_segmap_parloop_6011_total_runs != - 0 ? ctx->futhark_mc_segmap_parloop_6011_total_runs : 1), - (long) ctx->futhark_mc_segmap_parloop_6011_total_total_runtime, - (double) ctx->futhark_mc_segmap_parloop_6011_total_total_runtime / - (ctx->futhark_mc_segmap_parloop_6011_total_iter == - 0 ? 1 : (double) ctx->futhark_mc_segmap_parloop_6011_total_iter), - (long) ctx->futhark_mc_segmap_parloop_6011_total_iter, - (long) ctx->futhark_mc_segmap_parloop_6011_total_iter / - (ctx->futhark_mc_segmap_parloop_6011_total_runs != - 0 ? ctx->futhark_mc_segmap_parloop_6011_total_runs : 1)); - ctx->total_runtime += - ctx->futhark_mc_segmap_parloop_6011_total_total_runtime; - ctx->total_runs += ctx->futhark_mc_segmap_parloop_6011_total_runs; - for (int i = 0; i < ctx->scheduler.num_threads; i++) - fprintf(ctx->log, - "tid %2d - futhark_mc_segmap_parloop_6020 ran %10d times; avg: %10ldus; total: %10ldus; time pr. iter %9.6f; iters %9ld; avg %ld\n", - i, ctx->futhark_mc_segmap_parloop_6020_runs[i], - (long) ctx->futhark_mc_segmap_parloop_6020_total_runtime[i] / - (ctx->futhark_mc_segmap_parloop_6020_runs[i] != - 0 ? ctx->futhark_mc_segmap_parloop_6020_runs[i] : 1), - (long) ctx->futhark_mc_segmap_parloop_6020_total_runtime[i], - (double) ctx->futhark_mc_segmap_parloop_6020_total_runtime[i] / - (ctx->futhark_mc_segmap_parloop_6020_iter[i] == - 0 ? 1 : (double) ctx->futhark_mc_segmap_parloop_6020_iter[i]), - (long) ctx->futhark_mc_segmap_parloop_6020_iter[i], - (long) ctx->futhark_mc_segmap_parloop_6020_iter[i] / - (ctx->futhark_mc_segmap_parloop_6020_runs[i] != - 0 ? ctx->futhark_mc_segmap_parloop_6020_runs[i] : 1)); - fprintf(ctx->log, - " futhark_mc_segmap_parloop_6020_total ran %10d times; avg: %10ldus; total: %10ldus; time pr. iter %9.6f; iters %9ld; avg %ld\n", - ctx->futhark_mc_segmap_parloop_6020_total_runs, - (long) ctx->futhark_mc_segmap_parloop_6020_total_total_runtime / - (ctx->futhark_mc_segmap_parloop_6020_total_runs != - 0 ? ctx->futhark_mc_segmap_parloop_6020_total_runs : 1), - (long) ctx->futhark_mc_segmap_parloop_6020_total_total_runtime, - (double) ctx->futhark_mc_segmap_parloop_6020_total_total_runtime / - (ctx->futhark_mc_segmap_parloop_6020_total_iter == - 0 ? 1 : (double) ctx->futhark_mc_segmap_parloop_6020_total_iter), - (long) ctx->futhark_mc_segmap_parloop_6020_total_iter, - (long) ctx->futhark_mc_segmap_parloop_6020_total_iter / - (ctx->futhark_mc_segmap_parloop_6020_total_runs != - 0 ? ctx->futhark_mc_segmap_parloop_6020_total_runs : 1)); - ctx->total_runtime += - ctx->futhark_mc_segmap_parloop_6020_total_total_runtime; - ctx->total_runs += ctx->futhark_mc_segmap_parloop_6020_total_runs; - for (int i = 0; i < ctx->scheduler.num_threads; i++) - fprintf(ctx->log, - "tid %2d - futhark_mc_segmap_task_6018 ran %10d times; avg: %10ldus; total: %10ldus; time pr. iter %9.6f; iters %9ld; avg %ld\n", - i, ctx->futhark_mc_segmap_task_6018_runs[i], - (long) ctx->futhark_mc_segmap_task_6018_total_runtime[i] / - (ctx->futhark_mc_segmap_task_6018_runs[i] != - 0 ? ctx->futhark_mc_segmap_task_6018_runs[i] : 1), - (long) ctx->futhark_mc_segmap_task_6018_total_runtime[i], - (double) ctx->futhark_mc_segmap_task_6018_total_runtime[i] / - (ctx->futhark_mc_segmap_task_6018_iter[i] == - 0 ? 1 : (double) ctx->futhark_mc_segmap_task_6018_iter[i]), - (long) ctx->futhark_mc_segmap_task_6018_iter[i], - (long) ctx->futhark_mc_segmap_task_6018_iter[i] / - (ctx->futhark_mc_segmap_task_6018_runs[i] != - 0 ? ctx->futhark_mc_segmap_task_6018_runs[i] : 1)); - for (int i = 0; i < ctx->scheduler.num_threads; i++) - fprintf(ctx->log, - "tid %2d - futhark_mc_segmap_parloop_6015 ran %10d times; avg: %10ldus; total: %10ldus; time pr. iter %9.6f; iters %9ld; avg %ld\n", - i, ctx->futhark_mc_segmap_parloop_6015_runs[i], - (long) ctx->futhark_mc_segmap_parloop_6015_total_runtime[i] / - (ctx->futhark_mc_segmap_parloop_6015_runs[i] != - 0 ? ctx->futhark_mc_segmap_parloop_6015_runs[i] : 1), - (long) ctx->futhark_mc_segmap_parloop_6015_total_runtime[i], - (double) ctx->futhark_mc_segmap_parloop_6015_total_runtime[i] / - (ctx->futhark_mc_segmap_parloop_6015_iter[i] == - 0 ? 1 : (double) ctx->futhark_mc_segmap_parloop_6015_iter[i]), - (long) ctx->futhark_mc_segmap_parloop_6015_iter[i], - (long) ctx->futhark_mc_segmap_parloop_6015_iter[i] / - (ctx->futhark_mc_segmap_parloop_6015_runs[i] != - 0 ? ctx->futhark_mc_segmap_parloop_6015_runs[i] : 1)); - fprintf(ctx->log, - " futhark_mc_segmap_parloop_6015_total ran %10d times; avg: %10ldus; total: %10ldus; time pr. iter %9.6f; iters %9ld; avg %ld\n", - ctx->futhark_mc_segmap_parloop_6015_total_runs, - (long) ctx->futhark_mc_segmap_parloop_6015_total_total_runtime / - (ctx->futhark_mc_segmap_parloop_6015_total_runs != - 0 ? ctx->futhark_mc_segmap_parloop_6015_total_runs : 1), - (long) ctx->futhark_mc_segmap_parloop_6015_total_total_runtime, - (double) ctx->futhark_mc_segmap_parloop_6015_total_total_runtime / - (ctx->futhark_mc_segmap_parloop_6015_total_iter == - 0 ? 1 : (double) ctx->futhark_mc_segmap_parloop_6015_total_iter), - (long) ctx->futhark_mc_segmap_parloop_6015_total_iter, - (long) ctx->futhark_mc_segmap_parloop_6015_total_iter / - (ctx->futhark_mc_segmap_parloop_6015_total_runs != - 0 ? ctx->futhark_mc_segmap_parloop_6015_total_runs : 1)); - ctx->total_runtime += - ctx->futhark_mc_segmap_parloop_6015_total_total_runtime; - ctx->total_runs += ctx->futhark_mc_segmap_parloop_6015_total_runs; - for (int i = 0; i < ctx->scheduler.num_threads; i++) - fprintf(ctx->log, - "tid %2d - futhark_mc_segmap_task_6009 ran %10d times; avg: %10ldus; total: %10ldus; time pr. iter %9.6f; iters %9ld; avg %ld\n", - i, ctx->futhark_mc_segmap_task_6009_runs[i], - (long) ctx->futhark_mc_segmap_task_6009_total_runtime[i] / - (ctx->futhark_mc_segmap_task_6009_runs[i] != - 0 ? ctx->futhark_mc_segmap_task_6009_runs[i] : 1), - (long) ctx->futhark_mc_segmap_task_6009_total_runtime[i], - (double) ctx->futhark_mc_segmap_task_6009_total_runtime[i] / - (ctx->futhark_mc_segmap_task_6009_iter[i] == - 0 ? 1 : (double) ctx->futhark_mc_segmap_task_6009_iter[i]), - (long) ctx->futhark_mc_segmap_task_6009_iter[i], - (long) ctx->futhark_mc_segmap_task_6009_iter[i] / - (ctx->futhark_mc_segmap_task_6009_runs[i] != - 0 ? ctx->futhark_mc_segmap_task_6009_runs[i] : 1)); - for (int i = 0; i < ctx->scheduler.num_threads; i++) - fprintf(ctx->log, - "tid %2d - futhark_mc_segmap_nested_task_6013 ran %10d times; avg: %10ldus; total: %10ldus; time pr. iter %9.6f; iters %9ld; avg %ld\n", - i, ctx->futhark_mc_segmap_nested_task_6013_runs[i], - (long) ctx->futhark_mc_segmap_nested_task_6013_total_runtime[i] / - (ctx->futhark_mc_segmap_nested_task_6013_runs[i] != - 0 ? ctx->futhark_mc_segmap_nested_task_6013_runs[i] : 1), - (long) ctx->futhark_mc_segmap_nested_task_6013_total_runtime[i], - (double) ctx->futhark_mc_segmap_nested_task_6013_total_runtime[i] / - (ctx->futhark_mc_segmap_nested_task_6013_iter[i] == - 0 ? 1 : (double) ctx->futhark_mc_segmap_nested_task_6013_iter[i]), - (long) ctx->futhark_mc_segmap_nested_task_6013_iter[i], - (long) ctx->futhark_mc_segmap_nested_task_6013_iter[i] / - (ctx->futhark_mc_segmap_nested_task_6013_runs[i] != - 0 ? ctx->futhark_mc_segmap_nested_task_6013_runs[i] : 1)); - } - return builder.str; -} -char *futhark_context_get_error(struct futhark_context *ctx) -{ - char *error = ctx->error; - - ctx->error = NULL; - return error; -} -void futhark_context_set_logging_file(struct futhark_context *ctx, FILE *f) -{ - ctx->log = f; -} -void futhark_context_pause_profiling(struct futhark_context *ctx) -{ - ctx->profiling_paused = 1; -} -void futhark_context_unpause_profiling(struct futhark_context *ctx) -{ - ctx->profiling_paused = 0; -} -int futhark_context_clear_caches(struct futhark_context *ctx) -{ - lock_lock(&ctx->lock); - worker_local = &ctx->scheduler.workers[0]; - ctx->peak_mem_usage_default = 0; - lock_unlock(&ctx->lock); - return ctx->error != NULL; -} -static int futrts_init(struct futhark_context *ctx, - struct memblock *out_mem_p_5993, - int64_t *out_scalar_out_5994, - int64_t *out_scalar_out_5995, - int64_t *out_scalar_out_5996, - struct memblock board_mem_5945, int64_t n_5861, - int64_t nb_rows_5863, int64_t nb_columns_5864, - int64_t sizze_5865); -static int futrts_key(struct futhark_context *ctx, - struct memblock *out_mem_p_5997, - int64_t *out_scalar_out_5998, - int64_t *out_scalar_out_5999, - int64_t *out_scalar_out_6000, - struct memblock board_mem_5945, int64_t implz2080U_5881, - int32_t e_5882, int32_t key_5883, int64_t nb_columns_5885, - int64_t nb_rows_5886, int64_t sizze_5887); -static int futrts_mouse(struct futhark_context *ctx, - struct memblock *out_mem_p_6001, - int64_t *out_scalar_out_6002, - int64_t *out_scalar_out_6003, - int64_t *out_scalar_out_6004, - struct memblock board_mem_5945, int64_t implz2080U_5866, - int32_t buttons_5867, int32_t x_5868, int32_t y_5869, - int64_t nb_columns_5871, int64_t nb_rows_5872, - int64_t sizze_5873); -static int futrts_render(struct futhark_context *ctx, - struct memblock *out_mem_p_6005, - int64_t *out_out_arrsizze_6006, - int64_t *out_out_arrsizze_6007, - struct memblock board_mem_5945, - int64_t implz2080U_5888, int64_t nb_columns_5890, - int64_t nb_rows_5891, int64_t sizze_5892); -static int futrts_resizze(struct futhark_context *ctx, - struct memblock *out_mem_p_6021, - int64_t *out_scalar_out_6022, - int64_t *out_scalar_out_6023, - int64_t *out_scalar_out_6024, - struct memblock board_mem_5945, - int64_t implz2080U_5848, int64_t h_5849, - int64_t w_5850, int64_t nb_columns_5852, - int64_t nb_rows_5853, int64_t sizze_5854); -static int futrts_step(struct futhark_context *ctx, - struct memblock *out_mem_p_6025, - int64_t *out_scalar_out_6026, - int64_t *out_scalar_out_6027, - int64_t *out_scalar_out_6028, - struct memblock board_mem_5945, int64_t implz2080U_5855, - float nameless_5856, int64_t nb_columns_5858, - int64_t nb_rows_5859, int64_t sizze_5860); -static int futrts_wheel(struct futhark_context *ctx, - struct memblock *out_mem_p_6029, - int64_t *out_scalar_out_6030, - int64_t *out_scalar_out_6031, - int64_t *out_scalar_out_6032, - struct memblock board_mem_5945, int64_t implz2080U_5874, - int32_t dx_5875, int32_t dy_5876, - int64_t nb_columns_5878, int64_t nb_rows_5879, - int64_t sizze_5880); -static int init_constants(struct futhark_context *ctx) -{ - (void) ctx; - - int err = 0; - - - cleanup: - return err; -} -static int free_constants(struct futhark_context *ctx) -{ - (void) ctx; - return 0; -} -struct futhark_mc_task_6008 { - struct futhark_context *ctx; - int64_t free_implz2080U_5888; - int64_t free_nb_columns_5890; - char *free_board_mem_5945; - int64_t free_bytes_5946; - char *free_mem_5964; -} ; -struct futhark_mc_segmap_parloop_struct_6010 { - struct futhark_context *ctx; - int64_t free_implz2080U_5888; - int64_t free_nb_columns_5890; - char *free_board_mem_5945; - int64_t free_bytes_5946; - char *free_mem_5964; -} ; -static int futhark_mc_segmap_parloop_6011(void *args, int64_t start, - int64_t end, int flat_tid_5915, - int tid) -{ - int err = 0; - struct futhark_mc_segmap_parloop_struct_6010 - *futhark_mc_segmap_parloop_struct_6010 = - (struct futhark_mc_segmap_parloop_struct_6010 *) args; - struct futhark_context *ctx = futhark_mc_segmap_parloop_struct_6010->ctx; - uint64_t futhark_mc_segmap_parloop_6011_start = 0; - - if (ctx->profiling && !ctx->profiling_paused) - futhark_mc_segmap_parloop_6011_start = get_wall_time(); - - int64_t implz2080U_5888 = - futhark_mc_segmap_parloop_struct_6010->free_implz2080U_5888; - int64_t nb_columns_5890 = - futhark_mc_segmap_parloop_struct_6010->free_nb_columns_5890; - struct memblock board_mem_5945 = {.desc ="board_mem_5945", .mem = - futhark_mc_segmap_parloop_struct_6010->free_board_mem_5945, - .size =0, .references =NULL}; - int64_t bytes_5946 = futhark_mc_segmap_parloop_struct_6010->free_bytes_5946; - struct memblock mem_5964 = {.desc ="mem_5964", .mem = - futhark_mc_segmap_parloop_struct_6010->free_mem_5964, - .size =0, .references =NULL}; - size_t mem_5949_cached_sizze_6012 = 0; - char *mem_5949 = NULL; - int64_t iterations = end - start; - int64_t iter_5969 = start; - - if (mem_5949_cached_sizze_6012 < (size_t) bytes_5946) { - mem_5949 = realloc(mem_5949, bytes_5946); - mem_5949_cached_sizze_6012 = bytes_5946; - } - for (; iter_5969 < end; iter_5969++) { - if (ctx->debugging) - fprintf(ctx->log, "%s\n", "SegMap fbody"); - - int64_t gtid_5916; - - gtid_5916 = iter_5969; - - int64_t x_5903; - - x_5903 = mul64(nb_columns_5890, gtid_5916); - for (int64_t i_5970 = 0; i_5970 < nb_columns_5890; i_5970++) { - int64_t get_cell_index_res_5972 = add64(x_5903, i_5970); - bool x_5973 = sle64((int64_t) 0, get_cell_index_res_5972); - bool y_5974 = slt64(get_cell_index_res_5972, implz2080U_5888); - bool bounds_check_5975 = x_5973 && y_5974; - bool index_certs_5976; - - if (!bounds_check_5975) { - ctx->error = - msgprintf("Error: %s%lld%s%lld%s\n\nBacktrace:\n%s", - "Index [", get_cell_index_res_5972, - "] out of bounds for array of shape [", - implz2080U_5888, "].", - "-> #0 /home/baptistecdr/Documents/Cours/projet-de-bachelor/game_of_life/gol.fut:30:24-63\n #1 /home/baptistecdr/Documents/Cours/projet-de-bachelor/game_of_life/gol.fut:30:10-107\n #2 /home/baptistecdr/Documents/Cours/projet-de-bachelor/game_of_life/gol.fut:29:8-31:12\n #3 /home/baptistecdr/Documents/Cours/projet-de-bachelor/game_of_life/gol.fut:26:1-31:12\n"); - return 1; - } - - int8_t x_5977 = - ((int8_t *) board_mem_5945.mem)[get_cell_index_res_5972]; - bool cond_5978 = x_5977 == (int8_t) 1; - int32_t defunc_0_f_res_5979; - - if (cond_5978) { - defunc_0_f_res_5979 = -1; - } else { - defunc_0_f_res_5979 = -16777216; - } - ((int32_t *) mem_5949)[i_5970] = defunc_0_f_res_5979; - } - memmove(mem_5964.mem + gtid_5916 * nb_columns_5890 * (int64_t) 4, - mem_5949 + (int64_t) 0, nb_columns_5890 * - (int64_t) sizeof(int32_t)); - } - - cleanup: - { } - free(mem_5949); - if (ctx->profiling && !ctx->profiling_paused) { - uint64_t futhark_mc_segmap_parloop_6011_end = get_wall_time(); - uint64_t elapsed = futhark_mc_segmap_parloop_6011_end - - futhark_mc_segmap_parloop_6011_start; - - ctx->futhark_mc_segmap_parloop_6011_runs[tid]++; - ctx->futhark_mc_segmap_parloop_6011_total_runtime[tid] += elapsed; - ctx->futhark_mc_segmap_parloop_6011_iter[tid] += iterations; - } - return err; -} -int futhark_mc_segmap_task_6009(void *args, int64_t iterations, int tid, - struct scheduler_info info) -{ - int err = 0; - int flat_tid_5915 = tid; - int num_tasks_5968 = info.nsubtasks; - struct futhark_mc_task_6008 *futhark_mc_task_6008 = - (struct futhark_mc_task_6008 *) args; - struct futhark_context *ctx = futhark_mc_task_6008->ctx; - uint64_t futhark_mc_segmap_task_6009_start = 0; - - if (ctx->profiling && !ctx->profiling_paused) - futhark_mc_segmap_task_6009_start = get_wall_time(); - - int64_t implz2080U_5888 = futhark_mc_task_6008->free_implz2080U_5888; - int64_t nb_columns_5890 = futhark_mc_task_6008->free_nb_columns_5890; - struct memblock board_mem_5945 = {.desc ="board_mem_5945", .mem = - futhark_mc_task_6008->free_board_mem_5945, - .size =0, .references =NULL}; - int64_t bytes_5946 = futhark_mc_task_6008->free_bytes_5946; - struct memblock mem_5964 = {.desc ="mem_5964", .mem = - futhark_mc_task_6008->free_mem_5964, .size =0, - .references =NULL}; - int64_t iter_5969; - struct futhark_mc_segmap_parloop_struct_6010 - futhark_mc_segmap_parloop_struct_6010; - - futhark_mc_segmap_parloop_struct_6010.ctx = ctx; - futhark_mc_segmap_parloop_struct_6010.free_implz2080U_5888 = - implz2080U_5888; - futhark_mc_segmap_parloop_struct_6010.free_nb_columns_5890 = - nb_columns_5890; - futhark_mc_segmap_parloop_struct_6010.free_board_mem_5945 = - board_mem_5945.mem; - futhark_mc_segmap_parloop_struct_6010.free_bytes_5946 = bytes_5946; - futhark_mc_segmap_parloop_struct_6010.free_mem_5964 = mem_5964.mem; - - struct scheduler_parloop futhark_mc_segmap_parloop_6011_task; - - futhark_mc_segmap_parloop_6011_task.name = "futhark_mc_segmap_parloop_6011"; - futhark_mc_segmap_parloop_6011_task.fn = futhark_mc_segmap_parloop_6011; - futhark_mc_segmap_parloop_6011_task.args = - &futhark_mc_segmap_parloop_struct_6010; - futhark_mc_segmap_parloop_6011_task.iterations = iterations; - futhark_mc_segmap_parloop_6011_task.info = info; - - uint64_t futhark_mc_segmap_parloop_6011_total_start = 0; - - if (ctx->profiling && !ctx->profiling_paused) - futhark_mc_segmap_parloop_6011_total_start = get_wall_time(); - - int futhark_mc_segmap_parloop_6011_err = - scheduler_execute_task(&ctx->scheduler, - &futhark_mc_segmap_parloop_6011_task); - - if (futhark_mc_segmap_parloop_6011_err != 0) { - err = 1; - goto cleanup; - } - if (ctx->profiling && !ctx->profiling_paused) { - uint64_t futhark_mc_segmap_parloop_6011_total_end = get_wall_time(); - uint64_t elapsed = futhark_mc_segmap_parloop_6011_total_end - - futhark_mc_segmap_parloop_6011_total_start; - - __atomic_fetch_add(&ctx->futhark_mc_segmap_parloop_6011_total_runs, 1, - __ATOMIC_RELAXED); - __atomic_fetch_add(&ctx->futhark_mc_segmap_parloop_6011_total_total_runtime, - elapsed, __ATOMIC_RELAXED); - __atomic_fetch_add(&ctx->futhark_mc_segmap_parloop_6011_total_iter, - iterations, __ATOMIC_RELAXED); - } - if (ctx->profiling && !ctx->profiling_paused) { - uint64_t futhark_mc_segmap_task_6009_end = get_wall_time(); - uint64_t elapsed = futhark_mc_segmap_task_6009_end - - futhark_mc_segmap_task_6009_start; - - ctx->futhark_mc_segmap_task_6009_runs[tid]++; - ctx->futhark_mc_segmap_task_6009_total_runtime[tid] += elapsed; - ctx->futhark_mc_segmap_task_6009_iter[tid] += iterations; - } - - cleanup: - { } - return err; -} -struct futhark_mc_segmap_parloop_struct_6014 { - struct futhark_context *ctx; - int64_t free_implz2080U_5888; - int64_t free_nb_columns_5890; - char *free_board_mem_5945; - int64_t free_bytes_5946; - char *free_mem_5964; -} ; -struct futhark_mc_task_6017 { - struct futhark_context *ctx; - int64_t free_implz2080U_5888; - int64_t free_x_5922; - char *free_board_mem_5945; - char *free_mem_5947; -} ; -struct futhark_mc_segmap_parloop_struct_6019 { - struct futhark_context *ctx; - int64_t free_implz2080U_5888; - int64_t free_x_5922; - char *free_board_mem_5945; - char *free_mem_5947; -} ; -static int futhark_mc_segmap_parloop_6020(void *args, int64_t start, - int64_t end, int flat_tid_5919, - int tid) -{ - int err = 0; - struct futhark_mc_segmap_parloop_struct_6019 - *futhark_mc_segmap_parloop_struct_6019 = - (struct futhark_mc_segmap_parloop_struct_6019 *) args; - struct futhark_context *ctx = futhark_mc_segmap_parloop_struct_6019->ctx; - uint64_t futhark_mc_segmap_parloop_6020_start = 0; - - if (ctx->profiling && !ctx->profiling_paused) - futhark_mc_segmap_parloop_6020_start = get_wall_time(); - - int64_t implz2080U_5888 = - futhark_mc_segmap_parloop_struct_6019->free_implz2080U_5888; - int64_t x_5922 = futhark_mc_segmap_parloop_struct_6019->free_x_5922; - struct memblock board_mem_5945 = {.desc ="board_mem_5945", .mem = - futhark_mc_segmap_parloop_struct_6019->free_board_mem_5945, - .size =0, .references =NULL}; - struct memblock mem_5947 = {.desc ="mem_5947", .mem = - futhark_mc_segmap_parloop_struct_6019->free_mem_5947, - .size =0, .references =NULL}; - int64_t iterations = end - start; - int64_t iter_5992 = start; - - for (; iter_5992 < end; iter_5992++) { - if (ctx->debugging) - fprintf(ctx->log, "%s\n", "SegMap fbody"); - - int64_t gtid_5920; - - gtid_5920 = iter_5992; - - int64_t get_cell_index_res_5983; - - get_cell_index_res_5983 = add64(gtid_5920, x_5922); - - bool x_5984 = sle64((int64_t) 0, get_cell_index_res_5983); - bool y_5985 = slt64(get_cell_index_res_5983, implz2080U_5888); - bool bounds_check_5986 = x_5984 && y_5985; - bool index_certs_5987; - - if (!bounds_check_5986) { - ctx->error = msgprintf("Error: %s%lld%s%lld%s\n\nBacktrace:\n%s", - "Index [", get_cell_index_res_5983, - "] out of bounds for array of shape [", - implz2080U_5888, "].", - "-> #0 /home/baptistecdr/Documents/Cours/projet-de-bachelor/game_of_life/gol.fut:30:24-63\n #1 /home/baptistecdr/Documents/Cours/projet-de-bachelor/game_of_life/gol.fut:30:10-107\n #2 /home/baptistecdr/Documents/Cours/projet-de-bachelor/game_of_life/gol.fut:29:8-31:12\n #3 /home/baptistecdr/Documents/Cours/projet-de-bachelor/game_of_life/gol.fut:26:1-31:12\n"); - return 1; - } - - int8_t x_5988 = - ((int8_t *) board_mem_5945.mem)[get_cell_index_res_5983]; - bool cond_5989 = x_5988 == (int8_t) 1; - int32_t defunc_0_f_res_5990; - - if (cond_5989) { - defunc_0_f_res_5990 = -1; - } else { - defunc_0_f_res_5990 = -16777216; - } - ((int32_t *) mem_5947.mem)[gtid_5920] = defunc_0_f_res_5990; - } - - cleanup: - { } - if (ctx->profiling && !ctx->profiling_paused) { - uint64_t futhark_mc_segmap_parloop_6020_end = get_wall_time(); - uint64_t elapsed = futhark_mc_segmap_parloop_6020_end - - futhark_mc_segmap_parloop_6020_start; - - ctx->futhark_mc_segmap_parloop_6020_runs[tid]++; - ctx->futhark_mc_segmap_parloop_6020_total_runtime[tid] += elapsed; - ctx->futhark_mc_segmap_parloop_6020_iter[tid] += iterations; - } - return err; -} -int futhark_mc_segmap_task_6018(void *args, int64_t iterations, int tid, - struct scheduler_info info) -{ - int err = 0; - int flat_tid_5919 = tid; - int num_tasks_5991 = info.nsubtasks; - struct futhark_mc_task_6017 *futhark_mc_task_6017 = - (struct futhark_mc_task_6017 *) args; - struct futhark_context *ctx = futhark_mc_task_6017->ctx; - uint64_t futhark_mc_segmap_task_6018_start = 0; - - if (ctx->profiling && !ctx->profiling_paused) - futhark_mc_segmap_task_6018_start = get_wall_time(); - - int64_t implz2080U_5888 = futhark_mc_task_6017->free_implz2080U_5888; - int64_t x_5922 = futhark_mc_task_6017->free_x_5922; - struct memblock board_mem_5945 = {.desc ="board_mem_5945", .mem = - futhark_mc_task_6017->free_board_mem_5945, - .size =0, .references =NULL}; - struct memblock mem_5947 = {.desc ="mem_5947", .mem = - futhark_mc_task_6017->free_mem_5947, .size =0, - .references =NULL}; - int64_t iter_5992; - struct futhark_mc_segmap_parloop_struct_6019 - futhark_mc_segmap_parloop_struct_6019; - - futhark_mc_segmap_parloop_struct_6019.ctx = ctx; - futhark_mc_segmap_parloop_struct_6019.free_implz2080U_5888 = - implz2080U_5888; - futhark_mc_segmap_parloop_struct_6019.free_x_5922 = x_5922; - futhark_mc_segmap_parloop_struct_6019.free_board_mem_5945 = - board_mem_5945.mem; - futhark_mc_segmap_parloop_struct_6019.free_mem_5947 = mem_5947.mem; - - struct scheduler_parloop futhark_mc_segmap_parloop_6020_task; - - futhark_mc_segmap_parloop_6020_task.name = "futhark_mc_segmap_parloop_6020"; - futhark_mc_segmap_parloop_6020_task.fn = futhark_mc_segmap_parloop_6020; - futhark_mc_segmap_parloop_6020_task.args = - &futhark_mc_segmap_parloop_struct_6019; - futhark_mc_segmap_parloop_6020_task.iterations = iterations; - futhark_mc_segmap_parloop_6020_task.info = info; - - uint64_t futhark_mc_segmap_parloop_6020_total_start = 0; - - if (ctx->profiling && !ctx->profiling_paused) - futhark_mc_segmap_parloop_6020_total_start = get_wall_time(); - - int futhark_mc_segmap_parloop_6020_err = - scheduler_execute_task(&ctx->scheduler, - &futhark_mc_segmap_parloop_6020_task); - - if (futhark_mc_segmap_parloop_6020_err != 0) { - err = 1; - goto cleanup; - } - if (ctx->profiling && !ctx->profiling_paused) { - uint64_t futhark_mc_segmap_parloop_6020_total_end = get_wall_time(); - uint64_t elapsed = futhark_mc_segmap_parloop_6020_total_end - - futhark_mc_segmap_parloop_6020_total_start; - - __atomic_fetch_add(&ctx->futhark_mc_segmap_parloop_6020_total_runs, 1, - __ATOMIC_RELAXED); - __atomic_fetch_add(&ctx->futhark_mc_segmap_parloop_6020_total_total_runtime, - elapsed, __ATOMIC_RELAXED); - __atomic_fetch_add(&ctx->futhark_mc_segmap_parloop_6020_total_iter, - iterations, __ATOMIC_RELAXED); - } - if (ctx->profiling && !ctx->profiling_paused) { - uint64_t futhark_mc_segmap_task_6018_end = get_wall_time(); - uint64_t elapsed = futhark_mc_segmap_task_6018_end - - futhark_mc_segmap_task_6018_start; - - ctx->futhark_mc_segmap_task_6018_runs[tid]++; - ctx->futhark_mc_segmap_task_6018_total_runtime[tid] += elapsed; - ctx->futhark_mc_segmap_task_6018_iter[tid] += iterations; - } - - cleanup: - { } - return err; -} -static int futhark_mc_segmap_parloop_6015(void *args, int64_t start, - int64_t end, int flat_tid_5917, - int tid) -{ - int err = 0; - struct futhark_mc_segmap_parloop_struct_6014 - *futhark_mc_segmap_parloop_struct_6014 = - (struct futhark_mc_segmap_parloop_struct_6014 *) args; - struct futhark_context *ctx = futhark_mc_segmap_parloop_struct_6014->ctx; - uint64_t futhark_mc_segmap_parloop_6015_start = 0; - - if (ctx->profiling && !ctx->profiling_paused) - futhark_mc_segmap_parloop_6015_start = get_wall_time(); - - int64_t implz2080U_5888 = - futhark_mc_segmap_parloop_struct_6014->free_implz2080U_5888; - int64_t nb_columns_5890 = - futhark_mc_segmap_parloop_struct_6014->free_nb_columns_5890; - struct memblock board_mem_5945 = {.desc ="board_mem_5945", .mem = - futhark_mc_segmap_parloop_struct_6014->free_board_mem_5945, - .size =0, .references =NULL}; - int64_t bytes_5946 = futhark_mc_segmap_parloop_struct_6014->free_bytes_5946; - struct memblock mem_5964 = {.desc ="mem_5964", .mem = - futhark_mc_segmap_parloop_struct_6014->free_mem_5964, - .size =0, .references =NULL}; - size_t mem_5947_cached_sizze_6016 = 0; - char *mem_5947 = NULL; - int64_t iterations = end - start; - int64_t iter_5982 = start; - - if (mem_5947_cached_sizze_6016 < (size_t) bytes_5946) { - mem_5947 = realloc(mem_5947, bytes_5946); - mem_5947_cached_sizze_6016 = bytes_5946; - } - for (; iter_5982 < end; iter_5982++) { - if (ctx->debugging) - fprintf(ctx->log, "%s\n", "SegMap fbody"); - - int64_t gtid_5918; - - gtid_5918 = iter_5982; - - int64_t x_5922; - - x_5922 = mul64(nb_columns_5890, gtid_5918); - - int64_t flat_tid_5919 = (int64_t) 0; - int32_t num_tasks_5991; - struct futhark_mc_task_6017 futhark_mc_task_6017; - - futhark_mc_task_6017.ctx = ctx; - futhark_mc_task_6017.free_implz2080U_5888 = implz2080U_5888; - futhark_mc_task_6017.free_x_5922 = x_5922; - futhark_mc_task_6017.free_board_mem_5945 = board_mem_5945.mem; - futhark_mc_task_6017.free_mem_5947 = mem_5947; - - struct scheduler_segop futhark_mc_task_6017_task; - - futhark_mc_task_6017_task.args = &futhark_mc_task_6017; - futhark_mc_task_6017_task.top_level_fn = futhark_mc_segmap_task_6018; - futhark_mc_task_6017_task.name = "futhark_mc_segmap_task_6018"; - futhark_mc_task_6017_task.iterations = nb_columns_5890; - futhark_mc_task_6017_task.task_time = - &ctx->futhark_mc_segmap_task_6018_total_time; - futhark_mc_task_6017_task.task_iter = - &ctx->futhark_mc_segmap_task_6018_total_iter; - futhark_mc_task_6017_task.sched = STATIC; - futhark_mc_task_6017_task.nested_fn = NULL; - - int futhark_mc_segmap_task_6018_err = - scheduler_prepare_task(&ctx->scheduler, &futhark_mc_task_6017_task); - - if (futhark_mc_segmap_task_6018_err != 0) { - err = 1; - goto cleanup; - } - memmove(mem_5964.mem + gtid_5918 * nb_columns_5890 * (int64_t) 4, - mem_5947 + (int64_t) 0, nb_columns_5890 * - (int64_t) sizeof(int32_t)); - } - - cleanup: - { } - free(mem_5947); - if (ctx->profiling && !ctx->profiling_paused) { - uint64_t futhark_mc_segmap_parloop_6015_end = get_wall_time(); - uint64_t elapsed = futhark_mc_segmap_parloop_6015_end - - futhark_mc_segmap_parloop_6015_start; - - ctx->futhark_mc_segmap_parloop_6015_runs[tid]++; - ctx->futhark_mc_segmap_parloop_6015_total_runtime[tid] += elapsed; - ctx->futhark_mc_segmap_parloop_6015_iter[tid] += iterations; - } - return err; -} -int futhark_mc_segmap_nested_task_6013(void *args, int64_t iterations, int tid, - struct scheduler_info info) -{ - int err = 0; - int flat_tid_5917 = tid; - int num_tasks_5968 = info.nsubtasks; - struct futhark_mc_task_6008 *futhark_mc_task_6008 = - (struct futhark_mc_task_6008 *) args; - struct futhark_context *ctx = futhark_mc_task_6008->ctx; - uint64_t futhark_mc_segmap_nested_task_6013_start = 0; - - if (ctx->profiling && !ctx->profiling_paused) - futhark_mc_segmap_nested_task_6013_start = get_wall_time(); - - int64_t implz2080U_5888 = futhark_mc_task_6008->free_implz2080U_5888; - int64_t nb_columns_5890 = futhark_mc_task_6008->free_nb_columns_5890; - struct memblock board_mem_5945 = {.desc ="board_mem_5945", .mem = - futhark_mc_task_6008->free_board_mem_5945, - .size =0, .references =NULL}; - int64_t bytes_5946 = futhark_mc_task_6008->free_bytes_5946; - struct memblock mem_5964 = {.desc ="mem_5964", .mem = - futhark_mc_task_6008->free_mem_5964, .size =0, - .references =NULL}; - int64_t iter_5982; - struct futhark_mc_segmap_parloop_struct_6014 - futhark_mc_segmap_parloop_struct_6014; - - futhark_mc_segmap_parloop_struct_6014.ctx = ctx; - futhark_mc_segmap_parloop_struct_6014.free_implz2080U_5888 = - implz2080U_5888; - futhark_mc_segmap_parloop_struct_6014.free_nb_columns_5890 = - nb_columns_5890; - futhark_mc_segmap_parloop_struct_6014.free_board_mem_5945 = - board_mem_5945.mem; - futhark_mc_segmap_parloop_struct_6014.free_bytes_5946 = bytes_5946; - futhark_mc_segmap_parloop_struct_6014.free_mem_5964 = mem_5964.mem; - - struct scheduler_parloop futhark_mc_segmap_parloop_6015_task; - - futhark_mc_segmap_parloop_6015_task.name = "futhark_mc_segmap_parloop_6015"; - futhark_mc_segmap_parloop_6015_task.fn = futhark_mc_segmap_parloop_6015; - futhark_mc_segmap_parloop_6015_task.args = - &futhark_mc_segmap_parloop_struct_6014; - futhark_mc_segmap_parloop_6015_task.iterations = iterations; - futhark_mc_segmap_parloop_6015_task.info = info; - - uint64_t futhark_mc_segmap_parloop_6015_total_start = 0; - - if (ctx->profiling && !ctx->profiling_paused) - futhark_mc_segmap_parloop_6015_total_start = get_wall_time(); - - int futhark_mc_segmap_parloop_6015_err = - scheduler_execute_task(&ctx->scheduler, - &futhark_mc_segmap_parloop_6015_task); - - if (futhark_mc_segmap_parloop_6015_err != 0) { - err = 1; - goto cleanup; - } - if (ctx->profiling && !ctx->profiling_paused) { - uint64_t futhark_mc_segmap_parloop_6015_total_end = get_wall_time(); - uint64_t elapsed = futhark_mc_segmap_parloop_6015_total_end - - futhark_mc_segmap_parloop_6015_total_start; - - __atomic_fetch_add(&ctx->futhark_mc_segmap_parloop_6015_total_runs, 1, - __ATOMIC_RELAXED); - __atomic_fetch_add(&ctx->futhark_mc_segmap_parloop_6015_total_total_runtime, - elapsed, __ATOMIC_RELAXED); - __atomic_fetch_add(&ctx->futhark_mc_segmap_parloop_6015_total_iter, - iterations, __ATOMIC_RELAXED); - } - if (ctx->profiling && !ctx->profiling_paused) { - uint64_t futhark_mc_segmap_nested_task_6013_end = get_wall_time(); - uint64_t elapsed = futhark_mc_segmap_nested_task_6013_end - - futhark_mc_segmap_nested_task_6013_start; - - ctx->futhark_mc_segmap_nested_task_6013_runs[tid]++; - ctx->futhark_mc_segmap_nested_task_6013_total_runtime[tid] += elapsed; - ctx->futhark_mc_segmap_nested_task_6013_iter[tid] += iterations; - } - - cleanup: - { } - return err; -} -static int futrts_init(struct futhark_context *ctx, - struct memblock *out_mem_p_5993, - int64_t *out_scalar_out_5994, - int64_t *out_scalar_out_5995, - int64_t *out_scalar_out_5996, - struct memblock board_mem_5945, int64_t n_5861, - int64_t nb_rows_5863, int64_t nb_columns_5864, - int64_t sizze_5865) -{ - (void) ctx; - - int err = 0; - struct memblock out_mem_5965; - - out_mem_5965.references = NULL; - - int64_t scalar_out_5966; - int64_t scalar_out_5967; - int64_t scalar_out_5968; - - if (memblock_set(ctx, &out_mem_5965, &board_mem_5945, "board_mem_5945") != - 0) - return 1; - scalar_out_5966 = nb_columns_5864; - scalar_out_5967 = nb_rows_5863; - scalar_out_5968 = sizze_5865; - (*out_mem_p_5993).references = NULL; - if (memblock_set(ctx, &*out_mem_p_5993, &out_mem_5965, "out_mem_5965") != 0) - return 1; - *out_scalar_out_5994 = scalar_out_5966; - *out_scalar_out_5995 = scalar_out_5967; - *out_scalar_out_5996 = scalar_out_5968; - if (memblock_unref(ctx, &out_mem_5965, "out_mem_5965") != 0) - return 1; - - cleanup: - { } - return err; -} -static int futrts_key(struct futhark_context *ctx, - struct memblock *out_mem_p_5997, - int64_t *out_scalar_out_5998, - int64_t *out_scalar_out_5999, - int64_t *out_scalar_out_6000, - struct memblock board_mem_5945, int64_t implz2080U_5881, - int32_t e_5882, int32_t key_5883, int64_t nb_columns_5885, - int64_t nb_rows_5886, int64_t sizze_5887) -{ - (void) ctx; - - int err = 0; - struct memblock out_mem_5965; - - out_mem_5965.references = NULL; - - int64_t scalar_out_5966; - int64_t scalar_out_5967; - int64_t scalar_out_5968; - - if (memblock_set(ctx, &out_mem_5965, &board_mem_5945, "board_mem_5945") != - 0) - return 1; - scalar_out_5966 = nb_columns_5885; - scalar_out_5967 = nb_rows_5886; - scalar_out_5968 = sizze_5887; - (*out_mem_p_5997).references = NULL; - if (memblock_set(ctx, &*out_mem_p_5997, &out_mem_5965, "out_mem_5965") != 0) - return 1; - *out_scalar_out_5998 = scalar_out_5966; - *out_scalar_out_5999 = scalar_out_5967; - *out_scalar_out_6000 = scalar_out_5968; - if (memblock_unref(ctx, &out_mem_5965, "out_mem_5965") != 0) - return 1; - - cleanup: - { } - return err; -} -static int futrts_mouse(struct futhark_context *ctx, - struct memblock *out_mem_p_6001, - int64_t *out_scalar_out_6002, - int64_t *out_scalar_out_6003, - int64_t *out_scalar_out_6004, - struct memblock board_mem_5945, int64_t implz2080U_5866, - int32_t buttons_5867, int32_t x_5868, int32_t y_5869, - int64_t nb_columns_5871, int64_t nb_rows_5872, - int64_t sizze_5873) -{ - (void) ctx; - - int err = 0; - struct memblock out_mem_5965; - - out_mem_5965.references = NULL; - - int64_t scalar_out_5966; - int64_t scalar_out_5967; - int64_t scalar_out_5968; - - if (memblock_set(ctx, &out_mem_5965, &board_mem_5945, "board_mem_5945") != - 0) - return 1; - scalar_out_5966 = nb_columns_5871; - scalar_out_5967 = nb_rows_5872; - scalar_out_5968 = sizze_5873; - (*out_mem_p_6001).references = NULL; - if (memblock_set(ctx, &*out_mem_p_6001, &out_mem_5965, "out_mem_5965") != 0) - return 1; - *out_scalar_out_6002 = scalar_out_5966; - *out_scalar_out_6003 = scalar_out_5967; - *out_scalar_out_6004 = scalar_out_5968; - if (memblock_unref(ctx, &out_mem_5965, "out_mem_5965") != 0) - return 1; - - cleanup: - { } - return err; -} -static int futrts_render(struct futhark_context *ctx, - struct memblock *out_mem_p_6005, - int64_t *out_out_arrsizze_6006, - int64_t *out_out_arrsizze_6007, - struct memblock board_mem_5945, - int64_t implz2080U_5888, int64_t nb_columns_5890, - int64_t nb_rows_5891, int64_t sizze_5892) -{ - (void) ctx; - - int err = 0; - struct memblock out_mem_5965; - - out_mem_5965.references = NULL; - - int64_t out_arrsizze_5966; - int64_t out_arrsizze_5967; - bool bounds_invalid_upwards_5893 = slt64(nb_rows_5891, (int64_t) 0); - bool valid_5894 = !bounds_invalid_upwards_5893; - bool range_valid_c_5895; - - if (!valid_5894) { - ctx->error = msgprintf("Error: %s%lld%s%lld%s%lld%s\n\nBacktrace:\n%s", - "Range ", (int64_t) 0, "..", (int64_t) 1, "..<", - nb_rows_5891, " is invalid.", - "-> #0 /prelude/array.fut:90:3-10\n #1 /home/baptistecdr/Documents/Cours/projet-de-bachelor/game_of_life/gol.fut:27:17-30\n #2 /home/baptistecdr/Documents/Cours/projet-de-bachelor/game_of_life/gol.fut:26:1-31:12\n"); - if (memblock_unref(ctx, &out_mem_5965, "out_mem_5965") != 0) - return 1; - return 1; - } - - bool bounds_invalid_upwards_5897 = slt64(nb_columns_5890, (int64_t) 0); - bool valid_5898 = !bounds_invalid_upwards_5897; - bool range_valid_c_5899; - - if (!valid_5898) { - ctx->error = msgprintf("Error: %s%lld%s%lld%s%lld%s\n\nBacktrace:\n%s", - "Range ", (int64_t) 0, "..", (int64_t) 1, "..<", - nb_columns_5890, " is invalid.", - "-> #0 /prelude/array.fut:90:3-10\n #1 /home/baptistecdr/Documents/Cours/projet-de-bachelor/game_of_life/gol.fut:28:17-33\n #2 /home/baptistecdr/Documents/Cours/projet-de-bachelor/game_of_life/gol.fut:26:1-31:12\n"); - if (memblock_unref(ctx, &out_mem_5965, "out_mem_5965") != 0) - return 1; - return 1; - } - - int64_t binop_x_5963 = nb_columns_5890 * nb_rows_5891; - int64_t bytes_5962 = (int64_t) 4 * binop_x_5963; - struct memblock mem_5964; - - mem_5964.references = NULL; - if (memblock_alloc(ctx, &mem_5964, bytes_5962, "mem_5964")) { - err = 1; - goto cleanup; - } - - int64_t bytes_5946 = (int64_t) 4 * nb_columns_5890; - int64_t flat_tid_5915 = (int64_t) 0; - int32_t num_tasks_5968; - int64_t flat_tid_5917; - - flat_tid_5917 = (int64_t) 0; - - struct futhark_mc_task_6008 futhark_mc_task_6008; - - futhark_mc_task_6008.ctx = ctx; - futhark_mc_task_6008.free_implz2080U_5888 = implz2080U_5888; - futhark_mc_task_6008.free_nb_columns_5890 = nb_columns_5890; - futhark_mc_task_6008.free_board_mem_5945 = board_mem_5945.mem; - futhark_mc_task_6008.free_bytes_5946 = bytes_5946; - futhark_mc_task_6008.free_mem_5964 = mem_5964.mem; - - struct scheduler_segop futhark_mc_task_6008_task; - - futhark_mc_task_6008_task.args = &futhark_mc_task_6008; - futhark_mc_task_6008_task.top_level_fn = futhark_mc_segmap_task_6009; - futhark_mc_task_6008_task.name = "futhark_mc_segmap_task_6009"; - futhark_mc_task_6008_task.iterations = nb_rows_5891; - futhark_mc_task_6008_task.task_time = - &ctx->futhark_mc_segmap_task_6009_total_time; - futhark_mc_task_6008_task.task_iter = - &ctx->futhark_mc_segmap_task_6009_total_iter; - futhark_mc_task_6008_task.sched = STATIC; - futhark_mc_task_6008_task.nested_fn = futhark_mc_segmap_nested_task_6013; - - int futhark_mc_segmap_task_6009_err = - scheduler_prepare_task(&ctx->scheduler, &futhark_mc_task_6008_task); - - if (futhark_mc_segmap_task_6009_err != 0) { - err = 1; - goto cleanup; - } - out_arrsizze_5966 = nb_rows_5891; - out_arrsizze_5967 = nb_columns_5890; - if (memblock_set(ctx, &out_mem_5965, &mem_5964, "mem_5964") != 0) - return 1; - (*out_mem_p_6005).references = NULL; - if (memblock_set(ctx, &*out_mem_p_6005, &out_mem_5965, "out_mem_5965") != 0) - return 1; - *out_out_arrsizze_6006 = out_arrsizze_5966; - *out_out_arrsizze_6007 = out_arrsizze_5967; - if (memblock_unref(ctx, &mem_5964, "mem_5964") != 0) - return 1; - if (memblock_unref(ctx, &out_mem_5965, "out_mem_5965") != 0) - return 1; - - cleanup: - { } - return err; -} -static int futrts_resizze(struct futhark_context *ctx, - struct memblock *out_mem_p_6021, - int64_t *out_scalar_out_6022, - int64_t *out_scalar_out_6023, - int64_t *out_scalar_out_6024, - struct memblock board_mem_5945, - int64_t implz2080U_5848, int64_t h_5849, - int64_t w_5850, int64_t nb_columns_5852, - int64_t nb_rows_5853, int64_t sizze_5854) -{ - (void) ctx; - - int err = 0; - struct memblock out_mem_5965; - - out_mem_5965.references = NULL; - - int64_t scalar_out_5966; - int64_t scalar_out_5967; - int64_t scalar_out_5968; - - if (memblock_set(ctx, &out_mem_5965, &board_mem_5945, "board_mem_5945") != - 0) - return 1; - scalar_out_5966 = nb_columns_5852; - scalar_out_5967 = nb_rows_5853; - scalar_out_5968 = sizze_5854; - (*out_mem_p_6021).references = NULL; - if (memblock_set(ctx, &*out_mem_p_6021, &out_mem_5965, "out_mem_5965") != 0) - return 1; - *out_scalar_out_6022 = scalar_out_5966; - *out_scalar_out_6023 = scalar_out_5967; - *out_scalar_out_6024 = scalar_out_5968; - if (memblock_unref(ctx, &out_mem_5965, "out_mem_5965") != 0) - return 1; - - cleanup: - { } - return err; -} -static int futrts_step(struct futhark_context *ctx, - struct memblock *out_mem_p_6025, - int64_t *out_scalar_out_6026, - int64_t *out_scalar_out_6027, - int64_t *out_scalar_out_6028, - struct memblock board_mem_5945, int64_t implz2080U_5855, - float nameless_5856, int64_t nb_columns_5858, - int64_t nb_rows_5859, int64_t sizze_5860) -{ - (void) ctx; - - int err = 0; - struct memblock out_mem_5965; - - out_mem_5965.references = NULL; - - int64_t scalar_out_5966; - int64_t scalar_out_5967; - int64_t scalar_out_5968; - - if (memblock_set(ctx, &out_mem_5965, &board_mem_5945, "board_mem_5945") != - 0) - return 1; - scalar_out_5966 = nb_columns_5858; - scalar_out_5967 = nb_rows_5859; - scalar_out_5968 = sizze_5860; - (*out_mem_p_6025).references = NULL; - if (memblock_set(ctx, &*out_mem_p_6025, &out_mem_5965, "out_mem_5965") != 0) - return 1; - *out_scalar_out_6026 = scalar_out_5966; - *out_scalar_out_6027 = scalar_out_5967; - *out_scalar_out_6028 = scalar_out_5968; - if (memblock_unref(ctx, &out_mem_5965, "out_mem_5965") != 0) - return 1; - - cleanup: - { } - return err; -} -static int futrts_wheel(struct futhark_context *ctx, - struct memblock *out_mem_p_6029, - int64_t *out_scalar_out_6030, - int64_t *out_scalar_out_6031, - int64_t *out_scalar_out_6032, - struct memblock board_mem_5945, int64_t implz2080U_5874, - int32_t dx_5875, int32_t dy_5876, - int64_t nb_columns_5878, int64_t nb_rows_5879, - int64_t sizze_5880) -{ - (void) ctx; - - int err = 0; - struct memblock out_mem_5965; - - out_mem_5965.references = NULL; - - int64_t scalar_out_5966; - int64_t scalar_out_5967; - int64_t scalar_out_5968; - - if (memblock_set(ctx, &out_mem_5965, &board_mem_5945, "board_mem_5945") != - 0) - return 1; - scalar_out_5966 = nb_columns_5878; - scalar_out_5967 = nb_rows_5879; - scalar_out_5968 = sizze_5880; - (*out_mem_p_6029).references = NULL; - if (memblock_set(ctx, &*out_mem_p_6029, &out_mem_5965, "out_mem_5965") != 0) - return 1; - *out_scalar_out_6030 = scalar_out_5966; - *out_scalar_out_6031 = scalar_out_5967; - *out_scalar_out_6032 = scalar_out_5968; - if (memblock_unref(ctx, &out_mem_5965, "out_mem_5965") != 0) - return 1; - - cleanup: - { } - return err; -} -struct futhark_u32_2d { - struct memblock mem; - int64_t shape[2]; -} ; -struct futhark_u32_2d *futhark_new_u32_2d(struct futhark_context *ctx, const - uint32_t *data, int64_t dim0, - int64_t dim1) -{ - struct futhark_u32_2d *bad = NULL; - struct futhark_u32_2d *arr = - (struct futhark_u32_2d *) malloc(sizeof(struct futhark_u32_2d)); - - if (arr == NULL) - return bad; - lock_lock(&ctx->lock); - worker_local = &ctx->scheduler.workers[0]; - arr->mem.references = NULL; - if (memblock_alloc(ctx, &arr->mem, (size_t) (dim0 * dim1) * - sizeof(uint32_t), "arr->mem")) - return NULL; - arr->shape[0] = dim0; - arr->shape[1] = dim1; - memmove(arr->mem.mem + 0, data + 0, (size_t) (dim0 * dim1) * - sizeof(uint32_t)); - lock_unlock(&ctx->lock); - return arr; -} -struct futhark_u32_2d *futhark_new_raw_u32_2d(struct futhark_context *ctx, const - char *data, int offset, - int64_t dim0, int64_t dim1) -{ - struct futhark_u32_2d *bad = NULL; - struct futhark_u32_2d *arr = - (struct futhark_u32_2d *) malloc(sizeof(struct futhark_u32_2d)); - - if (arr == NULL) - return bad; - lock_lock(&ctx->lock); - worker_local = &ctx->scheduler.workers[0]; - arr->mem.references = NULL; - if (memblock_alloc(ctx, &arr->mem, (size_t) (dim0 * dim1) * - sizeof(uint32_t), "arr->mem")) - return NULL; - arr->shape[0] = dim0; - arr->shape[1] = dim1; - memmove(arr->mem.mem + 0, data + offset, (size_t) (dim0 * dim1) * - sizeof(uint32_t)); - lock_unlock(&ctx->lock); - return arr; -} -int futhark_free_u32_2d(struct futhark_context *ctx, struct futhark_u32_2d *arr) -{ - lock_lock(&ctx->lock); - worker_local = &ctx->scheduler.workers[0]; - if (memblock_unref(ctx, &arr->mem, "arr->mem") != 0) - return 1; - lock_unlock(&ctx->lock); - free(arr); - return 0; -} -int futhark_values_u32_2d(struct futhark_context *ctx, - struct futhark_u32_2d *arr, uint32_t *data) -{ - lock_lock(&ctx->lock); - worker_local = &ctx->scheduler.workers[0]; - memmove(data + 0, arr->mem.mem + 0, (size_t) (arr->shape[0] * - arr->shape[1]) * - sizeof(uint32_t)); - lock_unlock(&ctx->lock); - return 0; -} -char *futhark_values_raw_u32_2d(struct futhark_context *ctx, - struct futhark_u32_2d *arr) -{ - (void) ctx; - return arr->mem.mem; -} -const int64_t *futhark_shape_u32_2d(struct futhark_context *ctx, - struct futhark_u32_2d *arr) -{ - (void) ctx; - return arr->shape; -} -struct futhark_i8_1d { - struct memblock mem; - int64_t shape[1]; -} ; -struct futhark_i8_1d *futhark_new_i8_1d(struct futhark_context *ctx, const - int8_t *data, int64_t dim0) -{ - struct futhark_i8_1d *bad = NULL; - struct futhark_i8_1d *arr = - (struct futhark_i8_1d *) malloc(sizeof(struct futhark_i8_1d)); - - if (arr == NULL) - return bad; - lock_lock(&ctx->lock); - worker_local = &ctx->scheduler.workers[0]; - arr->mem.references = NULL; - if (memblock_alloc(ctx, &arr->mem, (size_t) dim0 * sizeof(int8_t), - "arr->mem")) - return NULL; - arr->shape[0] = dim0; - memmove(arr->mem.mem + 0, data + 0, (size_t) dim0 * sizeof(int8_t)); - lock_unlock(&ctx->lock); - return arr; -} -struct futhark_i8_1d *futhark_new_raw_i8_1d(struct futhark_context *ctx, const - char *data, int offset, - int64_t dim0) -{ - struct futhark_i8_1d *bad = NULL; - struct futhark_i8_1d *arr = - (struct futhark_i8_1d *) malloc(sizeof(struct futhark_i8_1d)); - - if (arr == NULL) - return bad; - lock_lock(&ctx->lock); - worker_local = &ctx->scheduler.workers[0]; - arr->mem.references = NULL; - if (memblock_alloc(ctx, &arr->mem, (size_t) dim0 * sizeof(int8_t), - "arr->mem")) - return NULL; - arr->shape[0] = dim0; - memmove(arr->mem.mem + 0, data + offset, (size_t) dim0 * sizeof(int8_t)); - lock_unlock(&ctx->lock); - return arr; -} -int futhark_free_i8_1d(struct futhark_context *ctx, struct futhark_i8_1d *arr) -{ - lock_lock(&ctx->lock); - worker_local = &ctx->scheduler.workers[0]; - if (memblock_unref(ctx, &arr->mem, "arr->mem") != 0) - return 1; - lock_unlock(&ctx->lock); - free(arr); - return 0; -} -int futhark_values_i8_1d(struct futhark_context *ctx, struct futhark_i8_1d *arr, - int8_t *data) -{ - lock_lock(&ctx->lock); - worker_local = &ctx->scheduler.workers[0]; - memmove(data + 0, arr->mem.mem + 0, (size_t) arr->shape[0] * - sizeof(int8_t)); - lock_unlock(&ctx->lock); - return 0; -} -char *futhark_values_raw_i8_1d(struct futhark_context *ctx, - struct futhark_i8_1d *arr) -{ - (void) ctx; - return arr->mem.mem; -} -const int64_t *futhark_shape_i8_1d(struct futhark_context *ctx, - struct futhark_i8_1d *arr) -{ - (void) ctx; - return arr->shape; -} -struct futhark_opaque_state { - struct futhark_i8_1d *v0; - int64_t v1; - int64_t v2; - int64_t v3; -} ; -int futhark_free_opaque_state(struct futhark_context *ctx, - struct futhark_opaque_state *obj) -{ - int ret = 0, tmp; - - if (obj->v0 != NULL && (tmp = futhark_free_i8_1d(ctx, obj->v0)) != 0) - ret = tmp; - free(obj); - return ret; -} -int futhark_store_opaque_state(struct futhark_context *ctx, const - struct futhark_opaque_state *obj, void **p, - size_t *n) -{ - int ret = 0; - int64_t size_0 = 7 + 1 * sizeof(int64_t) + futhark_shape_i8_1d(ctx, - obj->v0)[0] * - 1; - int64_t size_1 = 7 + 0 * sizeof(int64_t) + 1 * 8; - int64_t size_2 = 7 + 0 * sizeof(int64_t) + 1 * 8; - int64_t size_3 = 7 + 0 * sizeof(int64_t) + 1 * 8; - - *n = size_0 + size_1 + size_2 + size_3; - if (p != NULL && *p == NULL) - *p = malloc(*n); - if (p != NULL) { - unsigned char *out = *p; - - *out++ = 'b'; - *out++ = 2; - *out++ = 1; - memcpy(out, " i8", 4); - out += 4; - memcpy(out, futhark_shape_i8_1d(ctx, obj->v0), 1 * sizeof(int64_t)); - out += 1 * sizeof(int64_t); - ret |= futhark_values_i8_1d(ctx, obj->v0, (void *) out); - out += futhark_shape_i8_1d(ctx, obj->v0)[0] * sizeof(int8_t); - *out++ = 'b'; - *out++ = 2; - *out++ = 0; - memcpy(out, " i64", 4); - out += 4; - memcpy(out, &obj->v1, sizeof(obj->v1)); - out += sizeof(obj->v1); - *out++ = 'b'; - *out++ = 2; - *out++ = 0; - memcpy(out, " i64", 4); - out += 4; - memcpy(out, &obj->v2, sizeof(obj->v2)); - out += sizeof(obj->v2); - *out++ = 'b'; - *out++ = 2; - *out++ = 0; - memcpy(out, " i64", 4); - out += 4; - memcpy(out, &obj->v3, sizeof(obj->v3)); - out += sizeof(obj->v3); - } - return ret; -} -struct futhark_opaque_state *futhark_restore_opaque_state(struct futhark_context *ctx, - const void *p) -{ - int err = 0; - const unsigned char *src = p; - struct futhark_opaque_state *obj = - malloc(sizeof(struct futhark_opaque_state)); - int64_t shape_0[1]; - - err |= *src++ != 'b'; - err |= *src++ != 2; - err |= *src++ != 1; - err |= memcmp(src, " i8", 4) != 0; - src += 4; - if (err == 0) { - memcpy(shape_0, src, 1 * sizeof(int64_t)); - src += 1 * sizeof(int64_t); - } - - const void *data_0 = src; - - obj->v0 = NULL; - src += shape_0[0] * sizeof(int8_t); - err |= *src++ != 'b'; - err |= *src++ != 2; - err |= *src++ != 0; - err |= memcmp(src, " i64", 4) != 0; - src += 4; - if (err == 0) { - src += 0 * sizeof(int64_t); - } - - const void *data_1 = src; - - src += sizeof(obj->v1); - err |= *src++ != 'b'; - err |= *src++ != 2; - err |= *src++ != 0; - err |= memcmp(src, " i64", 4) != 0; - src += 4; - if (err == 0) { - src += 0 * sizeof(int64_t); - } - - const void *data_2 = src; - - src += sizeof(obj->v2); - err |= *src++ != 'b'; - err |= *src++ != 2; - err |= *src++ != 0; - err |= memcmp(src, " i64", 4) != 0; - src += 4; - if (err == 0) { - src += 0 * sizeof(int64_t); - } - - const void *data_3 = src; - - src += sizeof(obj->v3); - if (err == 0) { - obj->v0 = futhark_new_i8_1d(ctx, data_0, shape_0[0]); - if (obj->v0 == NULL) - err = 1; - memcpy(&obj->v1, data_1, sizeof(obj->v1)); - memcpy(&obj->v2, data_2, sizeof(obj->v2)); - memcpy(&obj->v3, data_3, sizeof(obj->v3)); - } - if (err != 0) { - int ret = 0, tmp; - - if (obj->v0 != NULL && (tmp = futhark_free_i8_1d(ctx, obj->v0)) != 0) - ret = tmp; - free(obj); - obj = NULL; - } - return obj; -} -int futhark_entry_init(struct futhark_context *ctx, - struct futhark_opaque_state **out0, const - struct futhark_i8_1d *in0, const int64_t in1, const - int64_t in2, const int64_t in3) -{ - struct memblock board_mem_5945; - - board_mem_5945.references = NULL; - - int64_t n_5861; - int64_t nb_rows_5863; - int64_t nb_columns_5864; - int64_t sizze_5865; - struct memblock out_mem_5965; - - out_mem_5965.references = NULL; - - int64_t scalar_out_5966; - int64_t scalar_out_5967; - int64_t scalar_out_5968; - int ret = 0; - - lock_lock(&ctx->lock); - worker_local = &ctx->scheduler.workers[0]; - board_mem_5945 = in0->mem; - n_5861 = in0->shape[0]; - nb_rows_5863 = in1; - nb_columns_5864 = in2; - sizze_5865 = in3; - if (!(n_5861 == in0->shape[0] && (true && (true && true)))) { - ret = 1; - if (!ctx->error) - ctx->error = - msgprintf("Error: entry point arguments have invalid sizes.\n"); - } else { - ret = futrts_init(ctx, &out_mem_5965, &scalar_out_5966, - &scalar_out_5967, &scalar_out_5968, board_mem_5945, - n_5861, nb_rows_5863, nb_columns_5864, sizze_5865); - if (ret == 0) { - assert((*out0 = - (struct futhark_opaque_state *) malloc(sizeof(struct futhark_opaque_state))) != - NULL); - assert(((*out0)->v0 = - (struct futhark_i8_1d *) malloc(sizeof(struct futhark_i8_1d))) != - NULL); - (*out0)->v0->mem = out_mem_5965; - (*out0)->v0->shape[0] = n_5861; - (*out0)->v1 = scalar_out_5966; - (*out0)->v2 = scalar_out_5967; - (*out0)->v3 = scalar_out_5968; - } - } - lock_unlock(&ctx->lock); - return ret; -} -int futhark_entry_key(struct futhark_context *ctx, - struct futhark_opaque_state **out0, const int32_t in0, - const int32_t in1, const struct futhark_opaque_state *in2) -{ - struct memblock board_mem_5945; - - board_mem_5945.references = NULL; - - int64_t implz2080U_5881; - int32_t e_5882; - int32_t key_5883; - int64_t nb_columns_5885; - int64_t nb_rows_5886; - int64_t sizze_5887; - struct memblock out_mem_5965; - - out_mem_5965.references = NULL; - - int64_t scalar_out_5966; - int64_t scalar_out_5967; - int64_t scalar_out_5968; - int ret = 0; - - lock_lock(&ctx->lock); - worker_local = &ctx->scheduler.workers[0]; - e_5882 = in0; - key_5883 = in1; - board_mem_5945 = in2->v0->mem; - implz2080U_5881 = in2->v0->shape[0]; - nb_columns_5885 = in2->v1; - nb_rows_5886 = in2->v2; - sizze_5887 = in2->v3; - if (!(true && (true && implz2080U_5881 == in2->v0->shape[0]))) { - ret = 1; - if (!ctx->error) - ctx->error = - msgprintf("Error: entry point arguments have invalid sizes.\n"); - } else { - ret = futrts_key(ctx, &out_mem_5965, &scalar_out_5966, &scalar_out_5967, - &scalar_out_5968, board_mem_5945, implz2080U_5881, - e_5882, key_5883, nb_columns_5885, nb_rows_5886, - sizze_5887); - if (ret == 0) { - assert((*out0 = - (struct futhark_opaque_state *) malloc(sizeof(struct futhark_opaque_state))) != - NULL); - assert(((*out0)->v0 = - (struct futhark_i8_1d *) malloc(sizeof(struct futhark_i8_1d))) != - NULL); - (*out0)->v0->mem = out_mem_5965; - (*out0)->v0->shape[0] = implz2080U_5881; - (*out0)->v1 = scalar_out_5966; - (*out0)->v2 = scalar_out_5967; - (*out0)->v3 = scalar_out_5968; - } - } - lock_unlock(&ctx->lock); - return ret; -} -int futhark_entry_mouse(struct futhark_context *ctx, - struct futhark_opaque_state **out0, const int32_t in0, - const int32_t in1, const int32_t in2, const - struct futhark_opaque_state *in3) -{ - struct memblock board_mem_5945; - - board_mem_5945.references = NULL; - - int64_t implz2080U_5866; - int32_t buttons_5867; - int32_t x_5868; - int32_t y_5869; - int64_t nb_columns_5871; - int64_t nb_rows_5872; - int64_t sizze_5873; - struct memblock out_mem_5965; - - out_mem_5965.references = NULL; - - int64_t scalar_out_5966; - int64_t scalar_out_5967; - int64_t scalar_out_5968; - int ret = 0; - - lock_lock(&ctx->lock); - worker_local = &ctx->scheduler.workers[0]; - buttons_5867 = in0; - x_5868 = in1; - y_5869 = in2; - board_mem_5945 = in3->v0->mem; - implz2080U_5866 = in3->v0->shape[0]; - nb_columns_5871 = in3->v1; - nb_rows_5872 = in3->v2; - sizze_5873 = in3->v3; - if (!(true && (true && (true && implz2080U_5866 == in3->v0->shape[0])))) { - ret = 1; - if (!ctx->error) - ctx->error = - msgprintf("Error: entry point arguments have invalid sizes.\n"); - } else { - ret = futrts_mouse(ctx, &out_mem_5965, &scalar_out_5966, - &scalar_out_5967, &scalar_out_5968, board_mem_5945, - implz2080U_5866, buttons_5867, x_5868, y_5869, - nb_columns_5871, nb_rows_5872, sizze_5873); - if (ret == 0) { - assert((*out0 = - (struct futhark_opaque_state *) malloc(sizeof(struct futhark_opaque_state))) != - NULL); - assert(((*out0)->v0 = - (struct futhark_i8_1d *) malloc(sizeof(struct futhark_i8_1d))) != - NULL); - (*out0)->v0->mem = out_mem_5965; - (*out0)->v0->shape[0] = implz2080U_5866; - (*out0)->v1 = scalar_out_5966; - (*out0)->v2 = scalar_out_5967; - (*out0)->v3 = scalar_out_5968; - } - } - lock_unlock(&ctx->lock); - return ret; -} -int futhark_entry_render(struct futhark_context *ctx, - struct futhark_u32_2d **out0, const - struct futhark_opaque_state *in0) -{ - struct memblock board_mem_5945; - - board_mem_5945.references = NULL; - - int64_t implz2080U_5888; - int64_t nb_columns_5890; - int64_t nb_rows_5891; - int64_t sizze_5892; - struct memblock out_mem_5965; - - out_mem_5965.references = NULL; - - int64_t out_arrsizze_5966; - int64_t out_arrsizze_5967; - int ret = 0; - - lock_lock(&ctx->lock); - worker_local = &ctx->scheduler.workers[0]; - board_mem_5945 = in0->v0->mem; - implz2080U_5888 = in0->v0->shape[0]; - nb_columns_5890 = in0->v1; - nb_rows_5891 = in0->v2; - sizze_5892 = in0->v3; - if (!(implz2080U_5888 == in0->v0->shape[0])) { - ret = 1; - if (!ctx->error) - ctx->error = - msgprintf("Error: entry point arguments have invalid sizes.\n"); - } else { - ret = futrts_render(ctx, &out_mem_5965, &out_arrsizze_5966, - &out_arrsizze_5967, board_mem_5945, implz2080U_5888, - nb_columns_5890, nb_rows_5891, sizze_5892); - if (ret == 0) { - assert((*out0 = - (struct futhark_u32_2d *) malloc(sizeof(struct futhark_u32_2d))) != - NULL); - (*out0)->mem = out_mem_5965; - (*out0)->shape[0] = out_arrsizze_5966; - (*out0)->shape[1] = out_arrsizze_5967; - } - } - lock_unlock(&ctx->lock); - return ret; -} -int futhark_entry_resize(struct futhark_context *ctx, - struct futhark_opaque_state **out0, const int64_t in0, - const int64_t in1, const - struct futhark_opaque_state *in2) -{ - struct memblock board_mem_5945; - - board_mem_5945.references = NULL; - - int64_t implz2080U_5848; - int64_t h_5849; - int64_t w_5850; - int64_t nb_columns_5852; - int64_t nb_rows_5853; - int64_t sizze_5854; - struct memblock out_mem_5965; - - out_mem_5965.references = NULL; - - int64_t scalar_out_5966; - int64_t scalar_out_5967; - int64_t scalar_out_5968; - int ret = 0; - - lock_lock(&ctx->lock); - worker_local = &ctx->scheduler.workers[0]; - h_5849 = in0; - w_5850 = in1; - board_mem_5945 = in2->v0->mem; - implz2080U_5848 = in2->v0->shape[0]; - nb_columns_5852 = in2->v1; - nb_rows_5853 = in2->v2; - sizze_5854 = in2->v3; - if (!(true && (true && implz2080U_5848 == in2->v0->shape[0]))) { - ret = 1; - if (!ctx->error) - ctx->error = - msgprintf("Error: entry point arguments have invalid sizes.\n"); - } else { - ret = futrts_resizze(ctx, &out_mem_5965, &scalar_out_5966, - &scalar_out_5967, &scalar_out_5968, board_mem_5945, - implz2080U_5848, h_5849, w_5850, nb_columns_5852, - nb_rows_5853, sizze_5854); - if (ret == 0) { - assert((*out0 = - (struct futhark_opaque_state *) malloc(sizeof(struct futhark_opaque_state))) != - NULL); - assert(((*out0)->v0 = - (struct futhark_i8_1d *) malloc(sizeof(struct futhark_i8_1d))) != - NULL); - (*out0)->v0->mem = out_mem_5965; - (*out0)->v0->shape[0] = implz2080U_5848; - (*out0)->v1 = scalar_out_5966; - (*out0)->v2 = scalar_out_5967; - (*out0)->v3 = scalar_out_5968; - } - } - lock_unlock(&ctx->lock); - return ret; -} -int futhark_entry_step(struct futhark_context *ctx, - struct futhark_opaque_state **out0, const float in0, - const struct futhark_opaque_state *in1) -{ - struct memblock board_mem_5945; - - board_mem_5945.references = NULL; - - int64_t implz2080U_5855; - float nameless_5856; - int64_t nb_columns_5858; - int64_t nb_rows_5859; - int64_t sizze_5860; - struct memblock out_mem_5965; - - out_mem_5965.references = NULL; - - int64_t scalar_out_5966; - int64_t scalar_out_5967; - int64_t scalar_out_5968; - int ret = 0; - - lock_lock(&ctx->lock); - worker_local = &ctx->scheduler.workers[0]; - nameless_5856 = in0; - board_mem_5945 = in1->v0->mem; - implz2080U_5855 = in1->v0->shape[0]; - nb_columns_5858 = in1->v1; - nb_rows_5859 = in1->v2; - sizze_5860 = in1->v3; - if (!(true && implz2080U_5855 == in1->v0->shape[0])) { - ret = 1; - if (!ctx->error) - ctx->error = - msgprintf("Error: entry point arguments have invalid sizes.\n"); - } else { - ret = futrts_step(ctx, &out_mem_5965, &scalar_out_5966, - &scalar_out_5967, &scalar_out_5968, board_mem_5945, - implz2080U_5855, nameless_5856, nb_columns_5858, - nb_rows_5859, sizze_5860); - if (ret == 0) { - assert((*out0 = - (struct futhark_opaque_state *) malloc(sizeof(struct futhark_opaque_state))) != - NULL); - assert(((*out0)->v0 = - (struct futhark_i8_1d *) malloc(sizeof(struct futhark_i8_1d))) != - NULL); - (*out0)->v0->mem = out_mem_5965; - (*out0)->v0->shape[0] = implz2080U_5855; - (*out0)->v1 = scalar_out_5966; - (*out0)->v2 = scalar_out_5967; - (*out0)->v3 = scalar_out_5968; - } - } - lock_unlock(&ctx->lock); - return ret; -} -int futhark_entry_wheel(struct futhark_context *ctx, - struct futhark_opaque_state **out0, const int32_t in0, - const int32_t in1, const - struct futhark_opaque_state *in2) -{ - struct memblock board_mem_5945; - - board_mem_5945.references = NULL; - - int64_t implz2080U_5874; - int32_t dx_5875; - int32_t dy_5876; - int64_t nb_columns_5878; - int64_t nb_rows_5879; - int64_t sizze_5880; - struct memblock out_mem_5965; - - out_mem_5965.references = NULL; - - int64_t scalar_out_5966; - int64_t scalar_out_5967; - int64_t scalar_out_5968; - int ret = 0; - - lock_lock(&ctx->lock); - worker_local = &ctx->scheduler.workers[0]; - dx_5875 = in0; - dy_5876 = in1; - board_mem_5945 = in2->v0->mem; - implz2080U_5874 = in2->v0->shape[0]; - nb_columns_5878 = in2->v1; - nb_rows_5879 = in2->v2; - sizze_5880 = in2->v3; - if (!(true && (true && implz2080U_5874 == in2->v0->shape[0]))) { - ret = 1; - if (!ctx->error) - ctx->error = - msgprintf("Error: entry point arguments have invalid sizes.\n"); - } else { - ret = futrts_wheel(ctx, &out_mem_5965, &scalar_out_5966, - &scalar_out_5967, &scalar_out_5968, board_mem_5945, - implz2080U_5874, dx_5875, dy_5876, nb_columns_5878, - nb_rows_5879, sizze_5880); - if (ret == 0) { - assert((*out0 = - (struct futhark_opaque_state *) malloc(sizeof(struct futhark_opaque_state))) != - NULL); - assert(((*out0)->v0 = - (struct futhark_i8_1d *) malloc(sizeof(struct futhark_i8_1d))) != - NULL); - (*out0)->v0->mem = out_mem_5965; - (*out0)->v0->shape[0] = implz2080U_5874; - (*out0)->v1 = scalar_out_5966; - (*out0)->v2 = scalar_out_5967; - (*out0)->v3 = scalar_out_5968; - } - } - lock_unlock(&ctx->lock); - return ret; -} diff --git a/game_of_life/gol.fut b/game_of_life/gol.fut deleted file mode 100644 index da907c6..0000000 --- a/game_of_life/gol.fut +++ /dev/null @@ -1,34 +0,0 @@ -import "./lib/github.com/diku-dk/lys/lys" - -type sized_state [n] = {board: [n]i8, nb_rows: i64, nb_columns:i64, size:i64} - -type^ state = sized_state [] - -let keydown (key: i32) (s: state) = s -let event (e: event) (s: state): state = s - -entry mouse (buttons: i32) (x: i32) (y: i32) (s: state): state = - event (#mouse {buttons, x, y}) s - -entry wheel (dx: i32) (dy: i32) (s: state): state = - event (#wheel {dx, dy}) s - -entry key (e: i32) (key: i32) (s: state): state = - let e' = if e == 0 then #keydown {key} else #keyup {key} - in event e' s - -entry resize (h: i64) (w: i64) (s: state): state = s - -let get_cell_index (x:i64) (y:i64) (nb_columns:i64) :i64 = (y * nb_columns + x) - -entry step (_: f32) (s: state): state = s - -entry render (s: state): [][]argb.colour = - let ridxs = iota s.nb_rows - let cidxs = iota s.nb_columns - in map (\y -> - map (\x -> if s.board[get_cell_index x y s.nb_columns] == 1 then argb.white else argb.black) cidxs) - ridxs - -entry init [n] (board: [n]i8) (nb_rows: i64) (nb_columns: i64) (size:i64) : state = - { board = board, nb_rows = nb_rows, nb_columns = nb_columns, size = size } diff --git a/game_of_life/gol.h b/game_of_life/gol.h deleted file mode 100644 index dca2718..0000000 --- a/game_of_life/gol.h +++ /dev/null @@ -1,120 +0,0 @@ -#pragma once - -// Headers - -#include <stdint.h> -#include <stddef.h> -#include <stdbool.h> -#include <stdio.h> -#include <float.h> - -#ifdef __cplusplus -extern "C" { -#endif - -// Initialisation - -struct futhark_context_config ; -struct futhark_context_config *futhark_context_config_new(void); -void futhark_context_config_free(struct futhark_context_config *cfg); -void futhark_context_config_set_debugging(struct futhark_context_config *cfg, - int flag); -void futhark_context_config_set_profiling(struct futhark_context_config *cfg, - int flag); -void futhark_context_config_set_logging(struct futhark_context_config *cfg, - int flag); -void futhark_context_config_set_num_threads(struct futhark_context_config *cfg, - int n); -struct futhark_context ; -struct futhark_context *futhark_context_new(struct futhark_context_config *cfg); -void futhark_context_free(struct futhark_context *ctx); -int futhark_context_sync(struct futhark_context *ctx); -int futhark_context_config_set_size(struct futhark_context_config *cfg, const - char *size_name, size_t size_value); -int futhark_get_num_sizes(void); -const char *futhark_get_size_name(int); -const char *futhark_get_size_class(int); - -// Arrays - -struct futhark_i8_1d ; -struct futhark_i8_1d *futhark_new_i8_1d(struct futhark_context *ctx, const - int8_t *data, int64_t dim0); -struct futhark_i8_1d *futhark_new_raw_i8_1d(struct futhark_context *ctx, const - char *data, int offset, - int64_t dim0); -int futhark_free_i8_1d(struct futhark_context *ctx, struct futhark_i8_1d *arr); -int futhark_values_i8_1d(struct futhark_context *ctx, struct futhark_i8_1d *arr, - int8_t *data); -char *futhark_values_raw_i8_1d(struct futhark_context *ctx, - struct futhark_i8_1d *arr); -const int64_t *futhark_shape_i8_1d(struct futhark_context *ctx, - struct futhark_i8_1d *arr); -struct futhark_u32_2d ; -struct futhark_u32_2d *futhark_new_u32_2d(struct futhark_context *ctx, const - uint32_t *data, int64_t dim0, - int64_t dim1); -struct futhark_u32_2d *futhark_new_raw_u32_2d(struct futhark_context *ctx, const - char *data, int offset, - int64_t dim0, int64_t dim1); -int futhark_free_u32_2d(struct futhark_context *ctx, - struct futhark_u32_2d *arr); -int futhark_values_u32_2d(struct futhark_context *ctx, - struct futhark_u32_2d *arr, uint32_t *data); -char *futhark_values_raw_u32_2d(struct futhark_context *ctx, - struct futhark_u32_2d *arr); -const int64_t *futhark_shape_u32_2d(struct futhark_context *ctx, - struct futhark_u32_2d *arr); - -// Opaque values - -struct futhark_opaque_state ; -int futhark_free_opaque_state(struct futhark_context *ctx, - struct futhark_opaque_state *obj); -int futhark_store_opaque_state(struct futhark_context *ctx, const - struct futhark_opaque_state *obj, void **p, - size_t *n); -struct futhark_opaque_state -*futhark_restore_opaque_state(struct futhark_context *ctx, const void *p); - -// Entry points - -int futhark_entry_init(struct futhark_context *ctx, - struct futhark_opaque_state **out0, const - struct futhark_i8_1d *in0, const int64_t in1, const - int64_t in2, const int64_t in3); -int futhark_entry_key(struct futhark_context *ctx, - struct futhark_opaque_state **out0, const int32_t in0, - const int32_t in1, const - struct futhark_opaque_state *in2); -int futhark_entry_mouse(struct futhark_context *ctx, - struct futhark_opaque_state **out0, const int32_t in0, - const int32_t in1, const int32_t in2, const - struct futhark_opaque_state *in3); -int futhark_entry_render(struct futhark_context *ctx, - struct futhark_u32_2d **out0, const - struct futhark_opaque_state *in0); -int futhark_entry_resize(struct futhark_context *ctx, - struct futhark_opaque_state **out0, const int64_t in0, - const int64_t in1, const - struct futhark_opaque_state *in2); -int futhark_entry_step(struct futhark_context *ctx, - struct futhark_opaque_state **out0, const float in0, - const struct futhark_opaque_state *in1); -int futhark_entry_wheel(struct futhark_context *ctx, - struct futhark_opaque_state **out0, const int32_t in0, - const int32_t in1, const - struct futhark_opaque_state *in2); - -// Miscellaneous - -char *futhark_context_report(struct futhark_context *ctx); -char *futhark_context_get_error(struct futhark_context *ctx); -void futhark_context_set_logging_file(struct futhark_context *ctx, FILE *f); -void futhark_context_pause_profiling(struct futhark_context *ctx); -void futhark_context_unpause_profiling(struct futhark_context *ctx); -int futhark_context_clear_caches(struct futhark_context *ctx); -#define FUTHARK_BACKEND_multicore -#ifdef __cplusplus -} -#endif diff --git a/game_of_life/lib/github.com/athas/matte/.gitignore b/game_of_life/lib/github.com/athas/matte/.gitignore deleted file mode 100644 index 3d8fd0f..0000000 --- a/game_of_life/lib/github.com/athas/matte/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -* -!.gitignore -!*.fut diff --git a/game_of_life/lib/github.com/athas/matte/colour.fut b/game_of_life/lib/github.com/athas/matte/colour.fut deleted file mode 100644 index 4d47177..0000000 --- a/game_of_life/lib/github.com/athas/matte/colour.fut +++ /dev/null @@ -1,186 +0,0 @@ --- | Colour manipulation library. --- --- Adapted from the [Gloss](https://hackage.haskell.org/package/gloss) --- library by Ben Lippmeier. - --- | A colour that can be converted back and forth between an RGBA --- representation. Not very useful by itself, but using just this --- interface one can generate a lot of other useful functions via the --- colourspace parametric module. -module type colour = { - type colour - - -- | Construct a colour from R, G, B and A channels, each of which - -- must be a floating-point number between 0.0 and 1.0. The - -- concrete representation need not be able to handle the full - -- precision of each channel. Thus, `from_rgba` and `to_rgba` need - -- not be inverse of each other (but should be close). - val from_rgba: f32 -> f32 -> f32 -> f32 -> colour - - -- | Convert a colour to four R, G, B and A channels, each of which - -- is a floating-point number between 0.0 and 1.0. - val to_rgba: colour -> (f32, f32, f32, f32) -} - --- | A colour representation that encodes the four RGBA channels as a --- byte each in a 32-bit word, using the order A-R-G-B. -module argb_colour: colour with colour = u32 = { - -- ARGB storage. - type colour = u32 - - let clamp_channel (x: f32): f32 = - if x < 0f32 then 0f32 else if x > 1f32 then 1f32 else x - - let from_rgba (r: f32) (g: f32) (b: f32) (a: f32): colour = - ((u32.f32 (clamp_channel a * 255) << 24) | - (u32.f32 (clamp_channel r * 255) << 16) | - (u32.f32 (clamp_channel g * 255) << 8) | - (u32.f32 (clamp_channel b * 255))) - - let to_rgba (x: colour): (f32,f32,f32,f32) = - (f32.u32 ((x>>16) & 0xFF) / 255, - f32.u32 ((x>>8) & 0xFF) / 255, - f32.u32 ((x>>0) & 0xFF) / 255, - f32.u32 ((x>>24) & 0xFF) / 255) -} - --- | A colour representation and a host of useful functions and constants. -module type colourspace = { - include colour - - -- | Add RGB components of a color component-wise, then normalise - -- them to the highest resulting one. The alpha components are - -- averaged. - val add: colour -> colour -> colour - - -- | Add RGBA components of a color component-wise, capping them at - -- the maximum. - val add_linear: colour -> colour -> colour - - val mult: colour -> colour -> colour - val scale: colour -> f32 -> colour - val mix: f32 -> colour -> f32 -> colour -> colour - - -- | Brighten 20%. - val bright: colour -> colour - -- | Dim 20%. - val dim: colour -> colour - -- | 20% lighter. - val light: colour -> colour - -- | 20% darker. - val dark: colour -> colour - - -- Basic colours - val black: colour - val red: colour - val green: colour - val blue: colour - val white: colour - val brown: colour - - -- Derived colours - val yellow: colour - val orange: colour - val magenta: colour - val violet: colour - - -- | Grayness from 0-1. - val gray: f32 -> colour -} - --- | Given a colour representation, construct a colourspace with all --- the handy functions and constants. -module colourspace(C: colour): colourspace with colour = C.colour = { - open C - - let from_rgb_normalised (r: f32) (g: f32) (b: f32): colour = - let m = f32.max r (f32.max g b) - in from_rgba (r / m) (g / m) (b / m) 1f32 - - -- Normalise a color to the value of its largest RGB component. - let normalised_colour (r: f32) (g: f32) (b: f32) (a: f32): colour = - let m = f32.max r (f32.max g b) - in from_rgba (r / m) (g / m) (b / m) a - - let add (x: colour) (y: colour): colour = - let (r1,g1,b1,a1) = to_rgba x - let (r2,g2,b2,a2) = to_rgba y - in normalised_colour - (f32.max r1 r2) - (f32.max g1 g2) - (f32.max b1 b2) - ((a1+a2)/2f32) - - let add_linear (x: colour) (y: colour): colour = - let (r1,g1,b1,a1) = to_rgba x - let (r2,g2,b2,a2) = to_rgba y - in from_rgba (r1+r2) (g1+g2) (b1+b2) (a1+a2) - - let mult (x: colour) (y: colour): colour = - let (r1,g1,b1,a1) = to_rgba x - let (r2,g2,b2,a2) = to_rgba y - in from_rgba (r1*r2) (g1*g2) (b1*b2) (a1*a2) - - let scale (x: colour) (s: f32): colour = - let (r,g,b,a) = to_rgba x - in from_rgba (r*s) (g*s) (b*s) (a*s) - - let mix (m1: f32) (c1: colour) (m2: f32) (c2: colour): colour = - let (r1,g1,b1,a1) = to_rgba c1 - let (r2,g2,b2,a2) = to_rgba c2 - - let m12 = m1 + m2 - let m1' = m1 / m12 - let m2' = m2 / m12 - - let r1s = r1 * r1 - let r2s = r2 * r2 - - let g1s = g1 * g1 - let g2s = g2 * g2 - - let b1s = b1 * b1 - let b2s = b2 * b2 - - in from_rgba (f32.sqrt (m1' * r1s + m2' * r2s)) - (f32.sqrt (m1' * g1s + m2' * g2s)) - (f32.sqrt (m1' * b1s + m2' * b2s)) - ((m1 * a1 + m2 * a2) / m12) - - - let bright (c: colour): colour = - let (r,g,b,a) = to_rgba c - in from_rgba (r * 1.2f32) (g * 1.2f32) (b * 1.2f32) a - - let dim (c: colour): colour = - let (r,g,b,a) = to_rgba c - in from_rgba (r * 0.8f32) (g * 0.8f32) (b * 0.8f32) a - - let light (c: colour): colour = - let (r,g,b,a) = to_rgba c - in from_rgba (r + 0.2f32) (g + 0.2f32) (b + 0.2f32) a - - let dark (c: colour): colour = - let (r,g,b,a) = to_rgba c - in from_rgba (r - 0.2f32) (g - 0.2f32) (b - 0.2f32) a - - -- Basic colours - let black: colour = from_rgba 0f32 0f32 0f32 1f32 - let red: colour = from_rgba 1f32 0f32 0f32 1f32 - let green: colour = from_rgba 0f32 1f32 0f32 1f32 - let blue: colour = from_rgba 0f32 0f32 1f32 1f32 - let white: colour = from_rgba 1f32 1f32 1f32 1f32 - let brown: colour = from_rgba 0.49f32 0.19f32 0.11f32 1f32 - - -- Derived colours - let yellow: colour = add red green - let orange: colour = add yellow red - let magenta: colour = add red blue - let violet: colour = add magenta blue - - let gray (d: f32): colour = from_rgba d d d 1f32 -} - --- | An ARGB colour space - simply `colourspace`@term applied to --- `argb_colour`@term. -module argb: colourspace with colour = argb_colour.colour = colourspace argb_colour diff --git a/game_of_life/lib/github.com/athas/matte/colour_test.fut b/game_of_life/lib/github.com/athas/matte/colour_test.fut deleted file mode 100644 index f2e5eed..0000000 --- a/game_of_life/lib/github.com/athas/matte/colour_test.fut +++ /dev/null @@ -1,17 +0,0 @@ --- | ignore - --- Proper tests of this library require drawing colours to the screen, --- I think. - -import "colour" - --- == --- entry: basic_mix --- input {} output {0.7058824f32 0.7058824f32 0.7058824f32 1.0f32} -entry basic_mix = - argb.to_rgba (argb.mix 0.5f32 argb.white 0.5f32 argb.black) - --- == --- entry: is_argb --- input {} output {0xFF000000u32} -entry is_argb: u32 = argb.black diff --git a/game_of_life/lib/github.com/diku-dk/lys/Inconsolata-Regular.ttf b/game_of_life/lib/github.com/diku-dk/lys/Inconsolata-Regular.ttf deleted file mode 100644 index 592ccd20073f76a663c56fe0176397149782565c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 95960 zcmdSC2Y6h?)jvElceSf#S=&`tt6gbTEvr{sS~uC2WXm?jxZqxK!45Xvp(ca?p@p7= zKtf1Dnjt`d03jqV0YVEQ5Wq3ufFXtep&El%-|w8c_wGuTL&)=f-~a#F&pjHQJN3+& zGiS~@b0wq@!UNzC;i*T>oQ~h5kS9HbpXt-5PV4QzYSw(9YVlM~pEG#WpI=(rA%p@h za&Y=lM^8TW-0iD`D4!$5*!_b?jc;6i=*opcv`j$x6|1+b-1hK=3pe8TZwrwXUVYMz zaLaO|QOFHD@&1~1+tzP6Y0a6Z33;4Ph%tfnE4Ob0J{RwIQ2XmQpSo_zp5xF~{9YkG zxNXDQm1{D3YX^jEp?s|yfG}Qi6yf(D;rFr)TXvjsM|9(_@%v*!WbfR(b@j^Y*S~P0 zkdtQ!;f!xtdCE4&+MG-wXPl4x;p10sSsVFm<F)u5<(*r%ZQZ_O>N(9pAqVq>$Uk=5 z32V1?w9eiNdSLwG`=AV}#UsQULWwfa3~h^$r~s_Oe<^Cj6u@a>72q2Dmtr0MOR+(0 z20R}BrPwCE4R}5NOL2qvCEy*>5K^Yev49>~4OlD30oKc-0gsW#04|b?0GCMAEZ4}h z0ne3R1N^%DI^ad}GQe-jD*>;THvoQ5J_q<W`8OfeQ3~~_g=!(-614<yrCJHNTA_z( zy;=|WNA<2S43AMP42&%kzdtFIdP+Sd4D~1V0@5$47m?nrUPA6y)Ly{X)Z4(nquv4j zU6fZ`USUK+WQyD6M?y{Q9hfh2Hm}@qJZR-0(hUuvetO?5(kCsR78c!8Ce06v8B^u~ z&g-AFI4q8xH8>C!>*fv43X5&?>FsIr=k<rhSIp8<WSJ>N^T9P5VvLz`h#WJOCdQg6 zC%8)Ia|ySZN*5k8l_9*sw`JuCo5X1h&tZ50!^;_7$MAa$Z((>F!+RJ$xMkCpP2y38 z&obQ0@NI?%2}&nJ55o|{YKE<#1<`|O_P_Phftn0ZI}LUIf0uTG?g6ZZn5Yx2Jl9gx z>!08}Ck7}BCkQ77DGP5AP6I!S@6o>(zba(Of|AY1oh_ag4dC25Jk5AI%_lAl`BS8O z@jwE|d3eCjav2`Nfs^G%JcQqdytkox%()EUDZ&%MQ^}8#Hw${CJPFTEJm=s!AJ4^j zu1I<4{Yj|nF8ri+sBY`I4r%HKJ=9miQ+_Ic1OLnr2Sh?H#D9m(q@VWhxw1$GWJFfV zI)2|QJNaka{JmEW@b8s!9+#7ePl-c@(g`LGaJrBottdI<l8*ppF?175e9CDj!)%5* z4976cC7Ae_(}kQqApcDDD9E~@4u-u9^BLALbTagCsZ370WEbjrnlaZfyovA<(i1R; z(;>)2DMRuDaQJIt6Vek^gwpbPq^~3>4pM$`kn)R9IQ<EymvVYN*XCf%5JMoqJM)TH zfy1-Kv!YQbPZ&=OPp$bh3Ww)<q}%ay<C%)556>Jtgj;~;SUiMZg}gTj;r<L;A=uLc zI(u@2^c3JHb|yTA=Nf*LX9Mydk7oy-)9{>y=e(4M-fuvCx8NtWMfF?HMMzU$=%M}+ zp7K-q%ju`Q%e~M2mixWD+`L>)9k5dgE9G%e%9DwFNgOb`gePF9itJRxPE}Hh%6gKx zI{R(2o$9nxaXZyZDQ?&9pJ%{+JI_unv{TEhRB}X~H5SgZ(N1ln6#e&{q;VJvx8d0d z+&TP)Qs>*Li#c_L{gzV6x7XSEZlIKVAKw1dbF-PcjZ&UW3+K7ZO1bxW?$@nxitFRl z?{zJBtJ{4PsU(hD<9ttXyP0^K#63H_Ea%&8<4kIx#9kZ6efCf-JaR7ahW++!JN1E` zI%uW5!cIA@RI(Otw#Hf1^?LGhy?KP=RLD-1*{Nzy)ep<pYQLR8DWb0XJ&U?J<(-_0 z^F%C)Tlu^*yt8##q~_bH#dd0iomyw7HruHa?9?fg@?@sg=ef;(dxo9*ik-T^P9=MA zsm5WHSJ|m=Q_A~2f;R5w-XD2yr}294u~UrmJ^)<uEvJaXG>$kd`PSrYytP)vu+#yo zHBU38sODro??Z&6nv*!QKD3d<dA8|VypIvZy-&OMA$7n`C9D+9jDG8>q!cLmSPHJr ze%oxPI_*^4PW4(T?_cfI9!gPt$sTwHthb(dRx0^c_wVydc(z$(Jqt%kEwgamSMAh( zE0xmPD0E5I?0wfN;r+-?eMTvtN~Lp>;wh<rJA<D1;WOx2$T)ijed&3*z8u1F%4eqv z>{QrJ#W+<vtVAQ@+K1t~88_9&QHpWQ8zwbqY`#7~@OU!iX(pWKQ46Pel5xz5oZ{Z= zvc5S)4c`LyKBNxVsf3lHy+^+#YM7~BN_jF<%hK**zMV%Y-?5(ieXEe#z^UUowPP6W zH2dvYcIrGzQD2D8W_`)jG7IPZtCdRjo^~s<1Xh$zdEeD3q6X9SBK&^2ow~+OU2msu zvQxKEN>i6{yX;yhb-=>u-Ps462XsOGNa?ljyEC<f-hVkCD5UdwFd{RxjZ^p9{iBrb zAEh3&-#*N#C#-z5<1h_~YR~vyptqcQ*-n8!Ox#<P!n{4>8-e2zOd(G3*f<3~&}ID& zyKE8%jxyikr<n@asUkZC9x~gt`{%E;-`3eFmZH?Izth4cN8|_pnQbtgIUl%A|MxRZ zX|}8q-pBkbbNryOAG`rbsf~7O8>ddP-%=|1cBh^197^e${pb5HPNqn<`(7X%?HXok zr~eB7byUJbsT=In&35XioVo&UlepXLx2OsIxF2tmxZm4vAGK3Y*{Nsk)NVVqmr`6y zvM;QcaIfCB^L=2a4qB;zuv1Pel^jtZn{WZBu7MDz%J5tDD5-iI*J`IG*r~~OY6hi< zrh)l^*{LbgOHy!)=`B&*!Ua|Y*6Ff=71o>&OQBAxFBRuEc`_9j*lgv~^?5Q!ui0<% zX<&ul<Wr<fK20qVIDzU5oPy^JJM|Sib%AaTZ!e`3$!DEn8eC<+{a)bPfgdsM=XQ#5 zT!K?f4V?nl1nd%;8k#$~-P`f@9y^5=@jKxj;?!fq-agH^zuLGxc8W^8N^e2a{k|8_ zt9Lo|vYpyzrxI4m@1PWDy5E<?W!i6Z?Nq={71^nXl?r@hr#_<;*Jt;@UunJd*IB8N z{rmh9{sF6OP+2K|vz_X+Qo(dPm1CuJYeXAM4h8n7)aSGE1q<v{m{Pub^KyeRDn}_j zZ=8DB!f9>_)~3GIT<m*d7>;=|*qDmb^(A{nsbD)nPIcR<sr-AN^*jCaB?u;UYR)l- z;R1qID#$qE)8MM$u|e!Yho?vz(Qk=&Q}P{8C5TToj-;qg1$P8bquvM4;uNT1rl@v} zL;rM&s9~leoH{SHEbTMqTdZHCE(%^AyauW3Idv1KZW)H#Wxu`CPSHGFPkjk0q__tp z1<VwrfKCNIvQo+3)4pZq8?aJAAEk&^dX7e<c=j2`vp^|7#-^pCwR2i!gAbyvhj~6I z1+D<3)H8Mplr(W%bC9T*Eb*58_B}gwz)smcNa?BFKh2%V-X~>Fo+Eg99?k6aENNK! zkgbcXK}WQWkqz|!It!V$c**OjZ>HVrpn8qch8I?`JYznkhhViAuN&}Zh>wh`06#R2 z!rRMXji;O;hBq>V&jOgM0rwe&fCtnez}F4nr7))PTlf`_Cxap7IcUIV00@7G97C-S zv)kn`CWmXt8ydvho2gxyP4<6rP__f^W^3GT%7fl6#@h#B*N5egMga_!%AtM&qMv~1 zA!?w$eabcasBgfdZ-D3<;F<jPR!&oGnM5V&p|+%l>Eq$Dw{U+l6Ym3`OTEHd9^KGX zq*pR+-{Bs<!?b;e%YKci_aam2pWKsw8r#w0KY3gqGOgaB{)l(Dws#CzD`g+IzL(3s zMJ<X=Oo4~F{@cm+DIV4{$ZrpDe@d7dr>hxwdw^%>fN?6)2YHOI8yF+fXYt!>06#Ry zGo-DaAM>cwxZO0K3&S99P#TZjFyQ+UA2Wr&!@c^L>0t0EK4#AOfO-hqG|})O?<+ z$xB@4HH>+Q`@DzqKgRR;8dKmU;tG-AKCtCgI=BztW|}Le+rPLc?6Z)HYLM`|06H|D zDUieXv$@@aL?v(<JWt?x*xE7nFpa_h=O8Wd_CiH`bx{3+;S-E6XGmV9ed<KOPt{Vu zXViC*Ul2t8uP|KA@D6Iln8j(b?&oofhhzuRA?{&_d(uF78B)g*WJ>;mS_cO8%MA+c z%4a!eH`CC;=^&SKQ2#*_DtjfDa!{XT9*@_d&~wm%YH%3T^LMG|GL36?aR2vk|1%jn zsQ;)9{g*-YDe~_zsJ9M<{)=~+dUwj-AibOVAl@B<H$v>?^j_}6cBb22qmuAk|6Zz1 zoI|4!C#h>uTaMuXT&(AUdy=C*#M^^Z16J!~yq&@L89dVGxy}t#R&3z54#I*@`Kh*- zHO3f=G<zXr2A3*g`ebmg9L$p$T&F`*hNVOXjSF)@oVi}!jygY8A9KxsGNK^yz$Zk9 zXP6FS3?Jn&;28#=1n`!<88V0K&*3`rXe{6dnh^nCq?BW6PQ?&Y;~i>U{Ds%-zf?co zwsQa9<Mr}7bH(`#cQO8T&cB;6OPFH&cy#CU+fQhWVmIe`fI09jhWohn6FC1N&Y#QF zxR@!K;GP^}YMj7jpX1p#xa=XGuR}cJPjC&-a9c<UnHEWd6oRiKCY<;kek}Od1U#rh zbmEEg;}o^<gtUupF;(<^-oBXllzK3fPP9uLAUX~e61|2TBs~FvciijP!>!(oc7=EW z&&zoBna^AJ`5qqfM?q2$7qF}b&hor5KGpk%Uc;*=;MEf{p7KPHriWxbNgeZnXHVBp zZBYBv4&{sE8Q`BK;x2K&_`P^kJOxkCZm}1>`M1Rf*r9}U%53=4^Wc3dlhyF1x5^1} zvYa7j%lUG#Tp`!V%^2(dDeqZ2al%j*!Ni-y14CuREkjY_|DmgiXNIzgqlR)w+6;|R zcL3%RhYUH1TM~!3l#65w@<4_p4p4N(P#VeSp*G^}p(c{XL-{1Vhw5lw8@irk_>hzF zPTCWPT(lbkVm+dMv?xAgN&6aeua-gy-klvPA*3OAcgPk6@G=Vl4;fNIz@r5Y^`IAr zc?8u7>g5zpx*fUi!SjImJcOT*;dvU*U-9h0^C}*~?Z-nBknkVjfsalqJn+%cek0Mk zMC;Z=@A;wfRK|Q{7-_1X9%`TPR2P+v@y~2=IsRXRxQv^`En=6rQ`{>a6b}o?^8aHz zl4FF?dX|CJIOL%n5*&ej29SG2JIqiH?J$@<=vkQ68v!%TxxE@QVjz;^B_R;8@viW5 zd<N|Z8Hfp)uZ~7c$P&bYAPxjEAPTV@6a@lDC!#>$OBEEcfz`_qF8Lhc66FY&GLCSW z$Pq3}5Z|&+9Lo_dOF6=26-T(N<_MQdIl|>Kj&S(_N4R{z5iUa%;ewTk2p3sE5iT-L z5iW8PMYv#%BEm)XQG|=!KoKr-3q`od<0-;L?w|-4c``+~AQvKB<gd8z6q!Qf+>2f- zk;V}$f@4>pKOs_OEXSzi^XLkYBjZRp)O+eZQDESoC^XWIbWvnvg5!$~x8VjYpim<& z=0!xkV58xmpg-3NQUQn97I}L{_ORHOW{!iP2o&%v_B|i!sT7@%)U(AxL<?;eCyBGf z1$Io#0*;9}jbmc2=a`rWIVNTw#l*-=iiweR6cdBE16?b{#P~%CW~mFaGbk2`)nbb{ zS)45{<Q!u$;%vlcghe%Wi3y@t%n^$*gU5?g#5v+3GY>`41Vt%kxk*eE)5Tn|M64BC z#i`<4@eMN%#n<GC2zHxh>@_pQJk0(&%=u}E&$-ylLy<T6A}Yp-7SSVSild;_t`}bt zr;D$OOUyhJlT#qdI9eo*9c#W=1}-{5>=fsTOU*nKty3t<#dy&sCW(G=v{){V6Whg? z#n;4TW*&;;DFVONWAB?R2E+ofLTnT}#2MmztY@OmSV0j!#fZ~t5FLonnk9}AD-nxu zqBv80U3^nqv2x|+9mZ)4&tZ50!^;_7$MAa$Z((>F!+RJ$xO(OGwZ@|ipJMnd!`%$` zGJJ#K+YCQoc#xn&tX{oko5RU4hhZMWFvCiQ^$gn?#u?6FIA_iATemnCGF--R4a1EL zw=q14;ZBC<Fg%~(#p_O3x!Q3B!|NITh~aGv?`8N9!>1U&!0=UuZzHO1v*Q561i>^1 z!%T*`3<C^{7)Fjiamxv5l?>|`HZ$yG7-!haaDd@Fh6@=k+j_#9<I~nK+{|zX!<`Jj z!tf%7S1|lG!<!iXbUWrW?RJLuFnoaFLku5d_%y@6GTg)PRfhYwZ?A7idzaxy3_m02 zR1DJ@<}mazEMORB7z1o{)-r5l*v_z<;Z%lw4CgRh!0=dxs{osv8yFtXa0kQF7@o!O zJcbuByqw`R46omQ;;QY=n;720a2LZn8Q#nAL52@Ae1hRK3}4uO;<oM1ml^J3_!h(W z7#?7lAn0;1%w(9$FmT%16SlgF7)BUYGOT0R%&?PToMA7+0fvxWkV;x+{%`%1ka1Mw ze*>`tvPMG@kQ6^hz0;Wg_NSz{L-gjqfd*us2XYG=^8afoI#W>p*h)rGl}JOg0(3ES zGR$Deb{LAN#M}P{D(rfGl>XoPDX|MtU;aCo&t?BRNZPJso%f4Fj@!eYMm2m<hzM7d z`tKm=9g?+=FAjrT_KU+}=owS7Ba@y%8qOKm*)JAXK{LEr{2coA{o)~L*Uv)Ve*-7U zgHj<*(gS@kB5_WkHjPeRkJ9)ypL-oRdmg}<F2uhn<kNbUl!z-uekHF*+$Wcm_nJQ+ zG=Dy9{(Q*%`TG&?A25Hv-~4Hnf6DwF`Zo1QK4$)W)NJu#`GT3+ix@|e8B%_x^Qd(5 zXRG<M+x$7%{E4_#%CBbYpT;V4v>1bN595kV%ngXGzRUQLjgtnfCU{U959N{udTM+x z1%r3SrFeADbsmh$IK$2(jR#ON8yeUg^gI(%Dga$K2(9*USXq>M!pH~wi;)NTTuKfv z=kRb2Kj-jq4g<0yNBD4>RO)pDI<|TPCkX}JoBFo`&pQ3n?(;(YJ`WFmGENzNXlIwW zM}CK*bd3oqzY#Zpq6Xr|0qYo6GK`oII=2xMSt>(isw|bQ#;6=MR^=+U@~AB+BSaX~ zK8R6DBWLLENWTpX^^I!OE5xFv6L0A(sr!XI3LJyeZz=}!q&~1QQtd}>)Fd409z+eV zQ`H!IrpUwTW1o6my`lc0_NzD5Tk4<cZ3XKxP8C^3wlT)YF~%CXI9(JNg+>um>}HWB z_p3rxq>5DtwcLa=N0a)xdIV`iqT1;*knbM~4kVSY3ecPLAh(q|Uo^@W<!+|qi*lCS zP1KYh$&cj$`H4IzKb4=!Lo$I{8dbAuQC_u$>i)w0r1r)_gRdS{^QMzD=1@+R6A}IW zrpk~*@<X{_{zEwATbLn%`vDI5Pt25*A0zcPW=zQgNWFuZGvp^oy^9%i$b(4z3p1G} zKSk<2%%~G<%pu>$%(~<uq&~n5r^^J!UZ{wFi&P$F;6qT4I5-5_D3quos^FQ(H2H88 ziZp(48ldJ0uth;bNIg&T?=PyqK`6bXUImXS;j-WV1*cQGO_8ZS14m^U5|-H<!v!w% z7y+y~tS0<-knKb6k$c4J_{~7i-hnERfiq|pIHef5=p_0l^=IZgjz9+olmbh)PQZnb zv3JXF$sZ5~5)uTLFUdE+uc%$8)K+)_VD&+{dWKlp$TbAXMY$yAOLIK+=1t^K;&e%M zOPtf7HK3L^k!MI{;5S2+sM8e5O{K=EI{ZWoHcou#g?JMY?dzy7)U;B{P!5%*oXVxr zQL2>kt9tYX<w2##)Z^+2^(4mjG)DJ~`U|M_nL4Brpp?R~+(EPgrGiEts0D2VGw_mp zS-v9o%D>B3<!f@Ed|keg(zhz^$(uA&=*Q3~y(s;{?c#2Q{6Jnxx|6y<Ed<5-^-sAB z>(7JVMH*>N_!kzYKE=NYF=f%bY2pE#O%EAMV0RbBTe1<N>6`RgK7u)+B4k@LA4PLb z|5%EnJs(!o0sNjJKIT01j`Du~KSRYp?J9MQI#2x^RPR(>n1SO}nTo4wRX-XXRaA{v zC&GreL)ECCfs!TYT?Nke(_lrIh4cSP*c!+JZ~|ugdRTmJ6hFr9exG<o`~_Cq7sYF; zU4>N#=C%Pgl{aCRO_yV(Uyg_Ur;X=i5H}r`$fcN@ugeQz>$wWnom=E@<%9AM@?rV3 zYE^frHgy_?=Y`%C#g26vEC~;aw_wA2ADV+9rOd*eiZW;qrLtT0$~de@YvEnkA&-;W z<X7ZR<gM~&@)zn?>ULPZ)~Q`8qJAN7RJW;H)i2d&hE&^OA-G!YRHvwu)k$iD`ilCh zI!B!iYM;T<6c%oIj;d7^>U4FkTC7f0XQ?S_sw!7=pw||OYsGiu`D&t?pk}C<s!#Q+ zFR2^kAH^QX*F|cY>XpyS7u4P0_5n3reGU6=BY39+I?_?FYa9bh%T8!W0v3t;!5hDq z6JcjfH(<*J&%4wRxZVkA;ecfc)=#j53ww=#HaK3KAsb-TK0{t2FPGQI8|5x}w|oRE z;7v%wagg$DY8HBMy1GVPhdKBS*4N)qTQ+tb4|bd~qtd7|8jM!s#85_PY$zBi4wZ(= zLp7oCp|((0XlCfB(2~$`p{=3QLthPD68dK7>d^I}Pr_N@+^|1f5DtYS;aIpn+!XE% zPYrJipBg?rd}jFT;Tyv@g?||SarkH9+roE*?+M>m>MaeFMoP;{W2JSa*Ou-p-5*gA zXCyr`HsXo+Bf&^PBowKM%!sUrtc_e5Rnfd?MYJY*Vf2ZzX!#$?|5*NHtSh!6wmP=1 z!e5bB<Epu`&hx1nIs^+3{Tl<WtQR-PW>^k2C2o+vkavND_F>mKNZdV)5_hZn)kEs9 zpt2K`aDx(2BgT|y0VUEy*&$!3C=?Erg{nh!q1I4mXmV&Ev^caZv^jK2=&aDiq05;P zH-ufFgeROAE((X45{=;wro;}W#95%k_ro`X5<dyw3QF7^zIPZUE(IlC2PM*&5^knM zzD<cWCMC*kN<38lC@9gvlvrEgtq9hnF(o8ZLJob%)L<|a6AvdIPyTMDh5|z~63+|; z6I&Coq9y+gJu>w0&;vtv58XBN%b{C`ehweefAsI~xqIonE%r9xX{6LELwk?ed*$Bj zSDt+3TKs?Sm1VDNdIkFGE5Cl_etv&F!ymkI(<_&~e9tS3UV(M-<y-Nbi)YQtCjc&Z z8TNsfm%V({%MCB%6#UZrFWvXj&+zl=mzwq**mK?PzY4MY@!cDCPu)Fv_oNrEeDMk) zo^w3I{m{}9>op?pS1)5<<$uOTW3zF*vCY_FoMfD0++#eT@#b6nZaio_Vf@v2ITdfd zeZkmm>^0sn_8V^*ZyWC#?-?H$A2}QjkHhDvcGNps9dQRFIW~p3<2^t~XUAu0hgZZH z$J?CqEeEXaj&~`4@}C1cv@Xqg4l*QJ&;P8vx&{-65~=?jyXiN_Zg}soEByai-m5u~ zwj`|=Kpx+zc0o$tDqn=uJzY&!w?Q`E4rxuA{%4TZzkp;NQkil$WU^oRR6x~2F4GOM zBHlHMv2RSk&d~^&yTp{amDoqV&T{u!@l~}=Tn$emN#6(IN&K$75xdJ<;(Z((4q|_C zV23Hg9z#1!FZLMPVREs@?7%K_9Cny(>Pzw*>@q*0eFpOP0?7V6F@)3MKSe&|b*an{ z<#LRO$s7@Zr?X1BMK$z<9$6vA%b@6#5iuDj-+GxRX38cpT{eh**&_OI%AF%S#cbIw zR>*#_OwJVZWsg`c`@|yX)~n=fu}033bK#{wN^F!zi{s>c#HTC}TjgT09ooo=a)sC- zmy1*6YVl2ZhWN7FATE<TX}=QRliv_G$xFod<;CI_dA0b3{I>YH{FeBsycXKy55!&a zCh;rzeetyXjd()-TKopLF#ja)7mvxi#Z&S=@tpj<_?vtbXZU@%L->aLi+Ei=BQC^U z$cN-F#Z*}byZ;usP|T2x;ws!*E`;|kB-6w-@?2QAK7qF(A=)Kw0n3<JASa2Xa)$Vd z+$t`YUlu=rcJ)JOMZc9l7mv$(#4h<AoD6Rk=gBXLZ{zm!*W?M}@A4`2k$N9Kn}4YT z>SHL{3H7P^Kz*V<R0s9m3HhFGI9R9r6LiUEp;JBvZSOf~mQSjuMVZVNHPS0;;Y*w( zt3+H@iUyf424t(4E4#!XB1aa=DPprcMjS5}i7&}x#Yu=SIvIDdPsP2{>*QC(x8!-^ zN_m#JLY^sZ#$D_m$t%RI@_KQb{I2+kyhi*?UMGGjZxFwhKN9!KAB#WB2gH-GQ9Lgn z63@!tiNDJK5ih~E@DKSc^!iV6LOq22>;s&xKEkQ|W7z)=;AH)waKX2o0e@(w_!q2o z??D52M|8=km?+D|6j>{pWT9x0#iA89k~Uc)nq`qVR!$d-<uq}=+%7JV@Oj9M;%s=n z&yicix$<~%mfR#Rk|&Fc<*DKld78LXo-V#2PZ7V9yTt#%p7IAoXgnhC6pzBs_D9@> zeOTThUXf3Tm*wM#L3vWVD*puiVo=Rhv(-G+%^FY->p;_?ZQTt$Yl&J0PvauB6rRRo z)mpV&t$@ezQfSK;sf*R+>YM5^b)ou(x<p-}u2fg42h^9LTb+aRz(#0Vq>cSv{Z9Q> z-JrgszC~ILX;KEx#bj5NuR<?$fg)w`(hP?(W;n23>9D^R=SnD<1L^7MndzBccW#<1 zSW_N#MZHblXhnrfE`6kD+P?ICGv>~lcjukzv_t2;_4eD)CAdylK_bv}Nq)W|Utuqj z#(Qo2Td?<#cS+;VGx1Tyc<Dm9A$ZQxZ#Cqut-G3Pbl#yA@>TfUb=^_58+oXW;dTAm zes9(FI_BUeUDr)^T{L#%SBK?&4kzc2b>5GM=e^>vyzk1_j*vGw{{6{ussiI|yN><x zee?k1R5!xk#^c;*x5wkO>PU_=r7raS#+151OVsr`+PXm3b%9+MkMnyedEe!{cT z?ZNHt9iI2<l)Sp$cjbP>Tyoxn!}HED^H!+GA?a>LA0AgzVYkS=Rmoj9BQMP#{Y;r_ zzgY%na-sq80F4)XXz>istIZo09Sn@`_LO?o>HKK(Ic1>DbgRvjJk@rd=jB^R$Yb&o z>UkFQ8&(gkk)dMln-8*fe7rVW;`rVdaAT!7)0CV(a=G~Z{-8hTbr;ieaX9_HK+qM7 zwKTW3Rivz)TWU*6a+@YLjPKEl$oJKG^Fosg10hGftFAmcJ$#3?qKLoEHBtzPAA)ze zE#4f?#c{@g9Soj&m;%<jBn-HC)(M9r&`-xXtlW~4lG2jWvPdB2&P{g}=*7!@X=)7k zeNLA*ssc@otu4*5XvFFFnrr%%Xnk9GdF%M-D>MF>?{??qxjlKO@fzRcXpEFKH<y<- zIu2c_b`=zOeFX)+MAh4GW3B6T{5$kE0GgMJiSf=dUpn{FP%u2q__uwHMx*7?a_*~- z`s%G{3wnuQ|F*xk2Cu0!K*1w)*@#y!>OItPr}{S?PvMnfoB!S>zC?fFr-3zX8-99e zJir(k%2=BTRmADYaIQ@Up*i5x*AK#{Ep^GXG=JJaZf>p@mxA5;Uv8GGxW-%RkNTVR z(}HoexueFPWzkPAyX?pN&S{Mw$U07K>wY}`diRfi{C7E+fGV#&?`WT8X)(*WH%OL% z!u9AE**WNo5yjLqG^}KwQ)+fti%*f5-Ev8XLsCE{_#rQtJGQ7GSe#ev_l^zVj!-z= z6{zu=%n%@!h(^7Q#12tE{lt&@TQ{t4sA-<G<HTJ*{O07|Ehm5TrgduT+}^rbV?B$S z=Pi|6R*!4@L*l8E&ioLkhN_{D<X_Yd@MwK}T&Or_Om?Od+ejbUN8|zQFa0oX8OyO| zjACN|h*A+Qua41D3GhU>qBB94tD>#BBF0SV3lN941)a|5b6Y0$ZtkAEGJisbp>l%F zC4)=q2J6eG`kPDUPsp5h@uuyUbWiRma=6knJtg^HKW)}2ldGbUrY6V{?ibTBO!ULv zf>Yio_&3>dN8{+?aWsiv#)B2H@~EMl&NP=)My`}<EF^%VuYXtWoQ3fU*v!G)+eBKL zV>x)Bs2@wwX)MPK6jPie54%(h6!z~5B73FI4khyQ^2f{VtWbWXSUp+^V_?yuI5`uW zL}PWiI~uK}wNu9Qa=G-3Mk?AsLY~yNSgfMCbyOP59?-FHdP7=u&4l)ry8Ku}V_{w> zbKRHaC^T;bUB@*#V~+B?vWAlCP+6|K-+99|htr?d5YrO0HxPsJn|N_ebybDS32J76 znm$m|%hV(h;h6D%4Nd(xnzlJw8^)JML&aD#eh(z5kEN3OFR1%JkW=Ho0D;vNB)C@o z=VTs*m-iDdkAmNaGX~8|>rg^I#`bHEh{dBxk>P-a*%BGqS?;k}o@`GnoMwuOV8HKm zRz%wxTd+W*?$*m@EiRqtJnO=*p7Vq}c3r>PT2(vmimSgX+YX-j)dbn$LqkKbK$5H~ z6s_?lr<5)j*Cpbj(kP%?DPMm&gp{!qJ@6X?xnm3R^MV22*uvbxSgAWV!<AR#jkYwo zO)&)#)uy*BEl&p<8ZJERl)<U7{xf$jU%O=4^y7|`<2q&fkFH#LQ;E0poRfE+_ju<A zphGMAK{5IfQNc3cO`H{oK34w}#RYS|6-*Z^pQVem4y_%A-wXc>EzB-}yCDFwL{&US zI|P*IwY2i-B7(UuV5ry@css~-((YF3xBqv^GZMd-Y53+$M*?SP-UO&0Trc-`9QW6f zSsG6|i^jiY&hyCpEX$}K$g;axXQup7^^!!KAL-0TC};5v>Qi;Buj+F96=E8oBfWt* zTGCgc-`V<V7xcHg?Y^eytHatLebuT9yotJA$9vLOQJ1Z+(m0-lCpMWE=LpvEE?|8X zd2M}FkD2t<;q|@?zYFQB$ZPAXIxp#~l$Z3?yG(s`A?vHs)K^i))>m~I(pRYr`H(<^ zQRar%6w?}X054eiE#A@f5bx=FUY8H2)I)kK@I1yK>z`&{Qu2@<tMfds#1ZnCJb`+i z1zkyxJ-nV<%|2A<JjhFYbshBel<{9@>Fc`RhnJ!LAF<5PoD>=;1B$6Vj{D$uvuN-( z(Gu%t`p}2+XOIsiq9fjl?bQesE9kU+S@25C#Cu1YbgYMLFl7an67Q+SVxY|9_5@04 zJ(WA75v@P8RkT3U3i@NDd6VYQ+6qll{%q2Uo<-vt7Pjw_4S)aXvFm%giz=X($u!r5 z>5WG<wk&Q__xAMM+nm-ryR$bpH!CN@=|bDH(ZM?~pBW+=50ARG{SIE%I2`BeRSg}_ z9i1)LCtmu;KYG=+iNBwic!I|ng>Lj1&Rnpf#XF!F84g)mM6Nt?*~6iyg@&yqr7d?t znJi8tEh#D@Zf{w2%!NisPm0-lcx%kolW4Fl5R`sr^p9IQW#8tu_6-FM=?>}2%4>-9 zFI_S>G&%64+ZHX$oPF`KotJfVj>kTno|9b=-0{`3F7$e@nE1gY;_Wi(C(DaoG)-~> z9upf+Jgo6=9s$4K!YBU8<F)edQJzuj*@tgfCF>_1M*Ymg*~57l^U;VmzXh*&MQNNO zi5-S?tW(gBeEn(ADY1RH-F~-U%UDpt>Gxyfpr3BciaXjk&uM&sr|<olYOBkc<K81P z>Bph_WL4rh4EL}$9Mp<jN45L<sU^P{Kof2^j!y+OQ1#53bt}}G{4LkdEvXH<@<3w0 z-5!m#acC}&BLe>)(fAeGO9)TnV>znv)_K1X@|DJI<=1u(iTGl8J%P_6{-jf%xk#|R zinOI8cb&E-ca&BK`dT{o^c}M0^s^K9cgat>x)RP#ZlCMp_UCcG-Z1w_ji<TM__q*+ zK>Zz=pP~~8-$VT}NY+w*lC{^GvbF<rI!xBsvR2ncvR1e88u{AoHhy5@qp%qO&vNS- zL_@Jn;$ll)jqJ}p*xk4e(2wY3)osb;k@;VTZ86y<$z}8fZ9NBX@kn{9%aUByd7g)z zYxF!O-H4|_pJ92xU2|bypuXji{*dQPGX!i3kPD;_5nD^GNf6)BzarwhVRnU5C`Z=3 z$}QTkP+yn$K+kZBRiQU*Pd4Z}2P3=Am}~amX}I4k(>q2PQB#Tr$YnZX21Q-G2A%_` zG<_*7hIw@?hiG;bJ4i$YMZoR$Ryv`r1uELwf`LGg#BYn)vjEO-8#^jpX)--KTG2G_ z6j{7N)%MqAO>0bdWaeZRPG2C`fDgYju&NGyqqg5*K3vIsxF0qpbXQaleI}p7`O+;) z;zeUKCE|*3w-hAu!REnh(^KL0I7nr5VZCb!AOlTJhSzm-L;rjy<VAjA!CRj#mhG*H zgkF>H%d)h@`-$txT#2@;A-AZF2##E|HuvzE+iH6|wAhAtU4aK-H))8G>BEjS12ZL; z>uzA@;(nlLp(t#vaC?a)X@-lJ9SJ0o-9h|H>T|^DlJd*XanFvuZmMsrbfxFyI6}*h zmM(|vn)$Zw;h`HZa?Hp~cg;8<aWT^Yo-+9>%uR;Kj|YH(nu)c)49c`$K|JLeI8b}F z+=HGu8L~*$CN4B9`(y%2C^32HIKJ14`x#=*omrXKi6joSB{-#);vg%Qrh%d<758J5 z&gFEv&36tILi`#?8_w0E<y8YXmS$vR6lD~GDcn9bKY5#cke1jV!63vQ7JoFk1J3B= zc^S97|CO%sPM0&Iwy>f8oIe=LNk;pfhu)od^|X#Ozcbr6bH04H>(H~?Hq$&Y4VXvP zVLWuE|AihG(`E;3Hu++(e^=d{g?Ef4?(Hk2pWv58jEuXnuuOx*G7JrSM`7HL8epR! zej-DKiC=W5+wZR;u9B@VmQ|27k5416a+}7oEm-Ug6#4_<qZ99#^p?N$HH^)2dvbk+ zv7*UGKf^oCCljde;4zxN2n=@2V+vejjmgF<RZ3Guc*w8R{967Sz|WJvRF^>h`*3<$ z5I-syfbK+QL7b{GNOX`s5$G3Ar)#Njxq>u_j%AoKe?NAI^yQf{J-sx2z>k6ZEBzH^ z(MV~i$OqHam~2#<S(fGU*9d#O&S-hc9EOy=#G7b4lp90E0e?w}Kk>VH88`f6S@U>D zx;u~;9N&J?Uslgkl_7s1RO0uCrt3-ldA!>ZaA#)4C(E5(iCL?bnd@&d&RMBrwa!^S zoZs#TZG3cE%^jPS34Rp_<iKc$@hKKSk&hNYo2#V_r$#b#y4zaY{4P3k&1`9zi=e5S z{f^(zl6c=e(IdN+3Zy4KZB1A4iBC=>Ia3Aj81%kuQ5i4KbRlSpn&G&1M6>X^AQ_Ft z=7A2XmA@vh7joF`idMLzRc+UOr)Bcj3eT9NwkDpGjfXCk&whF!F^=Z43Uf(yM-cHt zb8qeCjcAkZ!;cZ6cwrWTywId>3c{D0f)Rp6Ta(n1%^j6cX{Ji&fYWG8{BsK0YLriy z`po_W2?n&s_3}D7f!nkEF(dJBlANRb;7#gBMCkl`Ox_%sf1muV&X4h==C{W)G@a#3 zo>&;q4SL)N!^wn@%D{{nkTbQAGeJm8+T=)X4KtR-IbaJ$uapcVQ^sHfc_Pp4E02*_ zl_XGLw1kN(q)7h-J)nTO<wLHy)5ljvojJMQaZ#1{WD1C+wi>zVhkm+bS$?<5NuM%H zK9dM&ikWl*-=&U&`Oc;vom(_LEZcA)_NeC(^^}FZuQpzVG4Nv3ny7@0Q_5mFkPYQI zJD45t<nnrQVm$>?))UDS)6Us+$&M2@Zai_vrWu2CrsG@TGqP^J_R3psx$@eZC!V+S z<O?r^A%p47?ect{K5RZUo^%b3w|rtF^IJM4@TNbC@<-Lb^G6k8j-R*NOgZ11Jfqv7 z^SxD<_8X1nkJ5SRd~fA_7o1N%oWuQ5Ixn5?t-QM4(fm<5FX^0=_doMT=`y5qQkkSb zO5>B`VLuh?Z|})wk#Va7aZ$~rCV@Y6-gE6f^13kd29OuuKb8SIFX#xKvD&lrP+bR| zt#m&uKbOXn9tu3uXoQ~&`#9%OH$yIl#nO0YHjd3c`fLK$SRvLJ1(Ra~APt50HB#BE z19Tz1mFnEl-(tDJnHBbzRQk)JwA=(a2qKz$gWakHdx1-(FtB`n$EHnS;=bnG97m_G zWVy-6^5HA4y_uM~M&*`OPMesK2wEI%(~riEo0hzfzhd$^(duR!e>kml{=;bn{AjeY z+oH9s%ko;b@ubUZ`u_v51j)HI@u+h&AEKTkbKUX0kMJSd^^+Zi>WA$G>xO)#B#X(8 zV%nU@S8Cf{biId{q4ADdhIyfa`6~}m<?!djL5WMK_S9P<icBXJWi_;yq#PvLtf^(! zV2rghEt_Plg1wbfoHV-|Pwboi+TaW=r3xk%m_Ex1?OI%&_MO5z&(qV3p<>T09C{Xg zx&{1MgkDXK_qd^jMM`1f&2$=wKS%%iydEWIWTaDXVSCn$N=_n+QHzU<%Zj6wm1X29 z66~moRKQDQ@?u-iC4yd-qaISwj6jpTqin~f{^nTi_=zWOoZMVr-IjQ&=&7=X>2qEh zj6NM}SB^91UVC%Z4Bw)8-@WpdvL%7_i5n)#^U5wfIgvPT=e8+{6RUaN(07)5SC8OL z$bPnEX?_{bN4J^t994`r=lNUutokkMtg7dC*I{`{)^J|6YeZgJ3z~mO{t^E?E8jtM zjXQ;Z?vfqc0PbfGeFDGrDddfnUH!W<APYP$sYC(XRrxZ_;gilZM<4uB#rjS9$TyTd z9pPGb_6$egA}G&P6Wj$b%o&cg>CjCON(Ggus6Q)1rKLHSLtnx<2WJFb^*Af5NKdzN zGAACXP$}wf9aadmXU`#fC7>=OTj|e|85t=}#+yE`;8N(qelF&M_Um#LyWm4x6weFd zepWCNEG;es#mK<pc6-XQU4_<m9fV~q>A`hITU*=Q*3P|r!O?@`OJD!-C71kgU%0-s z(iLu~ZEb6EefNg%e80SyE&3&~iPfcY&7xDjar*hRE)?7MN#DqXHLE&a0SgnZ5g2kU zpQn&Rt3_%cJIj-s<<0iG%fe1R*0Jv%t{1p!S@=f%T@SBbwPM9rXZ+9P?s(4`YU`@m zvz9}Po7>UU(1T#cWLs{WiQ3~W@WBY|5os_z;ueBKrmb^IT>6LSVy$kNk3;yXb-<JF z&3Bi@a$S%W-X@j|esa;`I3M-H5d)7R5?!;QQlGPGxx;bnim&xPzBR)!vHL89A4!$# zTC{R-_Ok0=+|t;M%N*2~;-O8D2PZ*mD#y9gjrEQLOaHEX%z6pBLAN>J9yQlKAI22y z3<@`Na6l_c=D_C=(7<XtCw!rCe2yCmBD|8}5LqNDtKbB!al~5c6pn0kW(&BmXW``8 z=8D3I3f;?^LRmk%d2!p+wJj6YO;AghsM3nLs$%n$+VRuM`{o?ocV*9@tN-gOc3j#$ z_0mlj+yMPEZR*1fn~vIWTEivlPq~7|)s9`~Dc0WuqA@-mOAX^<x=Nxe%+}f=WCmx9 z&GCD40%Kt{34}Fut(XiWb!yx1+PHRl`=zrlICc61le&B2r>d>X=C`fNN$i(iJ;jaP zm}1B(Zl7h<ji7Tf?rJ37((~I<ocbHVeFMsG>G?W8=~0CL&-AEv=!dCwS$nmvi}p{{ z1$i~v{@D(_JGHEJ-fM;(`VZ=4d~*Qyr@{hQZlJ5s$(HvAFa|oE5=qb?>Z@z2IFf+u zPuAfSGYkv4KClN}75Z1$UEm!c69hCGXY`D|GKWhVuJrb!+WLYMCLBAsZei1+(s77; zbOl>u^<DXsdlpR{w`^MGU{8j_$Qk3T?;clN5i2M=wxf4`Rrv&m${FJdcv@n`HC6fL zv%Bi%HKTW!6CMN43G9kICvT<TzX7@sJ_<bmc+h7b^p2CTPe30y$=WAI*6}9AyxDdC z#;$|$W*tA$b^M6yz#4Cc50Gd%Rs1;aZ)_;Yb2tz^q>M2*kY%StGr)hTqZ#0zUk;W9 zmEoB^QZR!V`+(K@Ioac7BV!p3FTkM;nV3REF<{vi)|A0!F<J=27nxo8huC1}h>(U8 zdjFHzK5L-WS&<sw;OPI)=00U^NT=2FzTcWNtT~zo3%|$06R)T63GsoR;}yg5@5U`W z8*k^Q^BC~pFD{4qZbN-fsqaAF`ihpU?y{)Qi^DbomPqBo3A`{q90zraEMp{w5gAHc zz*3mm0Otqn7+7pVKzA&RM@GrOCMC*+jv&*bct+G)hJR$Pwlp@F{m3Q^tuorurgcjk z*tIduCATkKH#ng#;I8sE_fB09x}d4GdB)VObLKBTUv68yaK-YH_U!Dwp7vQCr;hKL z+?yzuPi$JVwtqX(L!W`RLkg3AB;iXG@=4fOT%s_ZM_wCCb1;2GZf!`>R`cjnx5{02 zO;-&)J%^q|@s#po1tR`1Y{+bwW7ej-NWmcu!rGjXd#o7C+Jf7i0j`x4<b^@O5eH{n zsb2=a`qk;DoPriq!{0g*ugSuWzxDJ`pSh1bhwxEg?ku@ng0)J0kKhg%;n`nL`N2D> zcuQWFK=!8ME!kTFxtfZ%<Z2`A7W6$szkm--`_3@CsiM>`eBKJL4$5jVO-<}f#V=%T zxn|vLr_nuqM$eQ@E3!`7EUzJxQ{tO*7A!bQUX|Fi9d;7*m)qg--N)mw?M~D_`xiCd zvQv!AZ`mm{p7tZkfB1fs;<vVaNxBT3>vbDeoGJOyXzUi=qSeT@tuv*@)A<|qK(<<O zrnX-lyfO~s`ImZJgv9LrT{&1jMYx`-(vC-G6_yNHVyz<>>_K$oPE%>PrZd`GtQy2t zd!rs+HQ1Wix)mEa35`%)n&iK-%PUqcSafV}n$aGg)ZXZIr**0;x(XBD-hRS~TZYfd z*z#(c6YeX|$wTIx;3wl(@E9Waq5$#SZuxf;kFllXe;t0jWIaR|<R`koCpXNmWw%9V zZC&Tv@~<g;K{|n+8|$pCX>H<Br==^{w6=MQ&f0oDta#nS^ZdfBKY7+B9vBw0pv<#2 z=CE<7g88D1d~k4uzy$~Sr@a7{L<3=Ps4gwZ3;Nt?hy{~m^Tt^s8Q{ytk>Tq_KGf0> z(=NGasVir-wQ^v1+%#uHUeY;NJfTRATuNxs9&@2huYZ~`)0$pHUrk5r465;T1||IA zXHe)C`do=|=fj`22eQVg<z&+23^j{9NgmUa1k<(=&7XP0)zkhIR@)M#a?hb}keqY_ zyn*$k?c`}N=5nwn&S3aXf|f0GjE8RpX7Us}IoD@E(VaA9O<-+D*XqJeTZ%!GCGzCN z*W?cmT_&$i9EY)U{mg$9vqn1Yn@p3D`2BRx+RjhwUgv)ex7(9=+K)B<b=<(A{*#^m z8H_)R{D~P(1eqLWbccbTjqXNMunlg+XoV~K=O6Z;dv@>jw+>!#-T*E#HU9ITi6;}k z{rF>4i8_k_o(E0RMRB|Uf2J*^PEA$R$nFV)JO+k8y8D4S*Iv+f>%d3yRDAlcF7dUG zxX)Y<kNrh+?5~W9x8$VGPh;2lEtxm69?NcDL^fTPg*yLkGq!1DeoGc=Jgs@;A59j5 z{yGn>Pn~D9^{Mj^-|BXmZ$UTmFFc9<j1lAF)$l#yV<vElK@Y(<E%1|Kn0eQ=0tI~$ zH_uF)ETw(66y|@#H8jdj5b^B9A7y^-Sf?wox<L;0G~nK^a|W#?YJ+L|nn_d5Pb2Yf zn*1~pzYllP^qQnSm-Z1|&u)bqmKX!c*K2|I3+FGf2I|RpFkB$;2KseNW=~wXAZw(k z)*Gl|ANYN^6Gq>GdlDX$034FJ2=gw&#s4(&pwM0uhRT!X#03CsF%CFq9gY%2Q#zg6 z(1Mr&7-1+(84CDe`OsrOXXXPH?M*sH`Sa`i&#EJ{ioXCK`zeSPL`sS=ki2Ml#7hC4 zPze3e7M7QME)ViK+*Q%q<Y{fwNAf1QVEyds69yJ^AJ@}S=bSb*(o+zPudZL<7}q?o zaHah7y3^k8oja|geny3JR!>2GT|BYqn0c<orhd#b#Sh#En_~oDT_Cyfgc+Bj@fX|p zKhwF(%5RJ_@lnNi@c0w3)1IsCcIR5>u?DQ0`|w3kU3bzK@_BVVK5AX)r_p2AWwrYp z<kp$G-7}NzQeIz5UL6PXu6$jbtn;2cEHAz_V2%Gzrk{q!zt4!-bugaBi8IMgUB}L3 z9W+j>j^sE~>$=bAN~sHS3w1pXd3u?y>#}5BAIgV~vXs0Za^C0EPJG*lV|do2=DiPJ zTD9o=CyLcaz3*bKzM=DeV_05$o}+vRiRPQ`AldVZ?=4$&_@lXZ@*HAqS$Ini9LYD2 z596B$?EIt$ChPx-jVGNYiQi)5Ne)8c17FUOw5F?Y({OG)FH));X@xS~QJbqW(w#VG zARI)bVQrI*wO*(UxSY2kJTN`Ok-nbIo;Z7y^yB_XhJn@2cML$B6_Aakynab0G*CHQ zQ#s>{6eJ<hB9sfCN*JvmVh8+Yvye2n*)XhCy4mnwwEAB*2)VK-?y0V>ZmMo9^WxhE z5nnV)cWfLS^4aFsa;2?lQ~?7k8)9#gQf6LmHoOb5it0I8hfAAP>x1W*0;j()J2zMt zj1)Fb8ZLGqhibLp0Z}Yjf*5Iwx1N#zOyZ5JOng)^o@LBG#9QENhurnnaK8SDUB_Qw z?+0ItOswCx!9Ov?0=J6Ww1>}^h0|#+eGn*0k+UIf*N+cJXKFM~_~aCfjmf|tDTW1D zIM&l0CbgVm3`+WQve<&3O&0uOit?!uj!fY=nzD<`G?Dr5axaR9>*^{HTwm8(*V0g5 zF|J}<O%=LY9*uesSmGON8PoabPr@FCWU|Wo5t>2gOWg8-52^yk9;uB3&ej%Iu$%fP zZ9FbkHLfIDGqbEK*WD31>#6qoy25B}Z)}RsJ)sQ#r1Dtx;^KLeB4t^96J`!NrcA1- zZ7VG<F8xEyRo~fLUfoh24wuV{c+a$+*N&d=sA{eZ>-l1uk_?d)fI@qPUaJ#8>qj6v z{30ywx)T8z6j-Bo-D0da*hCx-wHzv439kH5P%^w(9QtK$xuqjAp>jXxG!WU#_n*xD z_H$~Be||-@AYji#q)#|dQdEF0`w(~r<0bSD=&M=-;lML(@JvP>G`-TBmtTi?r9gkh zf_1G68^+C;($U_#Gpjr@L0wDd`u*j#r);0SwJm=1l7)-b4E~&GWZPRP_5ru2aWl@5 zjz6v7fF5FpL6CTo7%GQB<mFKyL|!y65-LgwhsgLh;SiS9mYmKbLnCqv^1WVMKRi4{ z;)Mc_x1iADf!YCmtexgiK8+cK9Vb+bsB2u4gAv)ZQ<aRojusXrvC*!hHOF+AHJgn! zqrSMIt82rgw#7x`TxqUse`BO#N@dl|anl;Br>x5A-ZFLS=I(KowXU>`vA8>3GrOs2 zUR`?w>7d*v=C^dsZwgT@HXi;!sqs%Mk2!`e9z&D3EgrxPNyHsG)8P9sa<Fd4Ku<o> zRW#zuFLD*_i2373x{8)sV%Sx*7>u~pd=(9AdNeDp?U13%e#5CN*ELpm^o=mu_N-|i zjoPj44HGg`uc7s+_@>FDkRE$)JM3zYfbMh;?XGxHRb>ohbf#s1k{(df&6LEc|A^Pn zQb#*1TImQ^`;M6Z3(!+vL#rlw))74=E;FeAf~LXP{I<Y<PFMM9!IhPVZvT&|o8({U zizfg63Ez-Q;xD!FkMUVa;jHu#V%o12g}9Hk;m*8FQ@)Y`+LnlHdm%dFsu;;vwn`Kx zC8GJB&eg+3E97Aj4w^+3MX^X2O%&vLV_F`PFNk*@Efu{m-yjqh`wTp~$yltr7qqr7 zY^a;&L1-O3L(^SGU|P-0&YqqL@hLr-ZO6|Z+}KoIF1;LV_d;GtZf;3ltfptxoVlwO z&04sgbiK-<4-n^f0~)UqJ@E<PGuRd&fAJ*++>6c16u1x#*E!aB3142YL&mt{RYjG$ z;|O%4kTJ8{LCAB2Js^NKh@c0O=61KWY<g&QSLf2E*$t&FXL&Z%x6ZAv*z7*~OPf}< z4a{kcbXS(gW7!ioPoA`GO7mc6S$z@y6jx!ixHkW+`%b&0``E>cj;<VRYdNZpWPCp6 zXAkBlAM?{BPKjrSOE7{!CQhFeTg3j_bO*#Kc0%(`P6=X8oldpfWzSUfupB&Bl+9FN z$!%LuSxM)_%BIT3y4r$@f*73@1KwCB?_eo2hIl`(5A)SR^F>|-NRPJG7IN>U1|nTB zy?T6KRatA!tW23U!{MF~l0JW0Q+19r=y|-MuBxfIxjwV?_(5|Loj%v0Uj}_yxam7i zeczXxF=l6OtuGYz`VW13-N4*c3un`e(wq{{egK+8#dvYboxu!f?UwI41|}s>K^j7O zolejsJ)IqAT3grQ;kbic7_UBJMr=!3$!4{pm;xR>@Suz@t1Cx9f;U#~<E!K{n!>c^ zwI@=g=<*L;|Dj7Ssnn1kblWW4VDa@gz5&ke{;kQ;Dm~F-F|ozm=X5M2?yf!bL6WO! zJ&(irm)31W(#2!46%X+`9|wO!Dqi~==*;sG&OFZ`{-=;M1rK@rWgm!uaECM<?w}U| zIKx(rI3T&uUzi_6Id8;Qt{1Plb}{dz<Vqri(aWl$>y}g7mv<etUL}6*STK1;U1jIG zHIrHz`!X_jeyM*;$MKu80`nI%RrJcDcz^FSQy)jacpvujIG(W2hJ?rYUTXZG&AT}~ z4n_O3#y`zp5Hv8pF3vwjwBY+l2jc|+KSJsArPCF`PF)Hm+tHVa8|w~XUT<y}PN)n{ zME^o@$M;UxXX9%|2#!UNKt@LTaxf0`62ovU$Ds=+E%Tyysl&3*bm;PFXMFJr5Ld%( zk6IRofi%bJQTjj;GVyS0s}QXdS|@b2H#OAOR9BWq!y%t1cPtEc+2vy(q&y^+Ff01D zzQ>-T(e{5!o(25a{plLSvonLuxwWBCZEkaLX5HQPJl4$|p$V)#nJ0QC&#o${ug!~= zmPYex>l4rN6t`tprn+tA&TTyZ!4dOL`;?x?Kbf{Zn%{THHp2L*Vm#YM&OWkj#LT-} z6*CW|*2T6Ft1g@jv~A?HQEVe0$*0LSV&$dvgm#}(=Z#_;0dJCRWJKO~!J~)UMpF5k zY$L3@!!|NIh5u<CYdZYVw3TZ<r~O*vACsS$`C$*jI=1jUW<KwxwgcPCuyq8RiCNEc zrk<<oht0&qKR;}J!DeFOpG}#U`;4h3K8lz(*!ub0O#U3U9oaFHT=yt>sZWRJC0mR& z9;NT8LdLfZ9gt7rjO`OOqBlOJ6c%U-I>KUhz!I&)-t1$J)=lhV4vVcCe{~E2NQLwv zT5nCdy?t}EBB>sS9l{#h{B|JH0}C6*S4|i<yS{sQbL+}@&p==IxVj~{K;uuZ?~FI3 zS5B+0?#zgEM%%*^H&2_kY2t)s^9NUzmvqZ1QsI;`o@BJ9G3*B>E&pPkY1?of`W=tW zlEso@YH%Z;qQKVL5*Pv~86`ZbEM}L!X=o{>&eM6`&{u9{pDB#xGzen^;#<O4{&~1G zPP|I168&8=G$?<IQ;tha6+Zm!tPK2Vzq^JOvpIy$aXkRftM4MPV8NZ)nb6T`*QFZ- z_@Jx7yJY}66f!NPuzTEc!6r!N3w*a2zI$b?Km6sQcqaDLny4GGZH4@W0V_x-HA+Z( z^S`%x^^#e&{dqBHE$QiF!};S|uexfQytE^>aACNl1v<`{99K?udv~lO0kLj~*wB1@ z^YjILEw2-Qy=AYyaOA7v3#%>>#@#Vjya+LKh`)>zI?ce)Q_-fKrWz57*d|?aJ&c3q zeLFt1@-~4x6J!Sj2}*H!NjFkym%z}|GgSKUk}3J)jYr6esW0iL_Bi$f%_R$wonid= z>C<{9j_(}b5sQv5E02Y<KtBNn&}<#4AVm~hZU>7K-y=>?@7-kZ2?k-<(H0)$gl>xq z9NNx<;1Kz=yFTo$m{}hy^-LUGIeo>tRa+}dN~`h<fSXyH*Hq%pc%q}DrUuWo?#9L$ zHF@5FsgozkZ}}rt<7QUp1blh^u8yw337uulV*?eHwSn&P+L_hk1Af`{yG|?~dfqA= zGbT4u1Zz`-)(g)U<ZVROLr^PqC<8xiJni{}H`LSSeOR5J_7IK#6TTL1=QnCJ9y00$ zxqs**eFtFZBjRSePLi*>>|+uak8M23KaGEu>ICo41lR?AD^FC!%bZ|lELgtNWyW=v z^gCd?gSXZf^;Vcykyr`jV-8(UW>V8d<(Y#{qi}BZq`uy|Sh%~YS6g^KtZqDZb>dCw zo!T9pRGs()ZBoP3W>|E^+lz5B=14|ZbkZHhI%p&u5R|?=1GmRYtXt721M`W#8y)p# zx{AolliE7R#F+XiLTS-Gxz6gzOzPU1XQFfRnPHt#>82XmacMQYaiG&o+?}SrMPwVA zwC_$2A-X{BGA3y}Xun@>6V=!s5F1c!*;QvkcTFv8>8@)~_98`dGLh?iOuj=T!2Sz7 z{*GXV;QQwKdUcTRnmU$ZouuCW=iAkE+q`tmiCcfQDe>1%d30yuOP$nbUH{!?{Yrgy z0M*-g(lvFvmJdniC;lQloDFa*P=3rj`boA*!XNJM(B+6Wx*V&hRy#(wU5@=Ayoc@= zSLi+T3X4X%juEt=IzYQ5jiO4^D5?MJa`^V1Zd-l$MCuqx571+@d?z|T=>ZyV`A#&R z)`7-bzLPb$Cr#~V!7APz?~2fkvgz=b*moT116<(EAY_;MwFoQ<Axgs~_=Ha(zAD2v zuxLN_kT5bulqWTUgT6CEPm}*u^Dd)(pZYzTR8?hNVmx$r@@s9qVQZ?BbNU^-{}iiF z_<G#cJe_%L76@bD8=5pe$@njU#{zVV%mI^tHc%@1G!Aw$WaTJVh<KzaG~HXRL$fFl zlA~BDrjEM}Yrs_l1(m!@g{#N6-}3&dSw9blTX}<W=EwC-%hfCG{=7u~J*oTiCpDS- zGZ`@<FJXPTMKm7fJJzfs&@rDnG~%9hn|-m_-G+PCF0bnk6K2Am{91ld!2>TZeXadx zTF4|WcqZ+R#5)c&0b0=gS85|72Wfyh_87F_w()JuT!DCuoD2ycy1w6?Y&WLwck6ax z5Adfno3RhA{+3&nkKoh7cRiPOmu?evqb2zRv^i`o)3@@lu^xFX4}R_JN6$;#(y7Wj z58cRXnd@OWGY>WDJ3J%h4CQ$Q`btD1561T|e0`F}J&18rA1pk_@L{9IiBvwoee(<4 zE*JRJm=pf`^%y?$!SHToW@h>`edQE&$QP@Vk|*`5?zHV&?z}E>i;P90dBO5%zBvTx zDb33(jRb=c>JQd*YJXTBLVxjsS8p@>QzgE8XI@a@K00(Ravsqi`PCu$0o<b?dk=-@ zv&AE0DU^j&`Ph#$Z02$ta5GCYp^xD&xmW|Dl2&kOdiuXFxF}v22xMjkssfee@FbNK zC&xbqM$ys7ujo=}kj|sl;&6E<b$w-L;-Fk$F77?UuPB=oN?GJpN4TRr2wO)g-K^M3 zi^tp)vra}noEh*bo|F|md?xv0CG_bSI4kM4GRYpgsEkcy@N0vEXFs}R2{GT_3MUsz zXJ_L1(rG0hc7m5<;Fsrcb}18=+?fuuiS3#4<EVn`eYhBb+bsPqipZji?K%jA{#p}B zM|=^GzgB?5OELrbaF0+re&mAWdCKxdp?Z`>!HY%o)d6hmreiNaA7QYyk7Oz~Ik&Me zG|oAuJu+^bnjG-s`}I@&#bZl+ljS4h@=IdHq4G)1i5unAj!2}X00BMS?G=R$;lz{R z*%)~CFMMuW!fQ+Otb{pL;JeXeXFB5!jGvDP<PP^hKi1(eK}%CI=>dls=z-T90AuS> zrdw{ba`7V+0U3S;8bmGii%Bp{`I*sMIInKvOekI@JypH(k-624OIK;7t87vglrEMJ zJO<w1m-5)2pbZ>j)_BrGHU4SytUd$sgvM;)_n6<_(fMf)*ZCha&(bqMyVU%<@s~$H zHZc`(3A<2FiMTVK<Ch>G{t5~H;I^>hjEca+dLIJ8$l8u#`U^#+X%>5hK}o#Dm%iyk zh<r>rEFW>%zppF)xz!NQ<)Hl%5kh=ySvh|dN#6j6C77&9{81j05&d)yd8#I&vJ3K8 zLhbFT?CUFl4m_}Q)~?7D>CL%r-_&lN0-ls(SO4wA?V9FT<EgaTOEw&HE^b$Jw$`-$ zi=@d@q5^h<$8okED~eO^gO4!o=jIBLTbx@I^dn==aO1vd7{JY1>u?lkx^G2m>xzjJ z8CFb;4s>-5L?>3HPr%WC^F%TLY@RToZf?`;)vISW&8?&UaX*-c)}dd?*e8uAT~p&N zT~p&p572lkzK8G}E5vxRhrusSI^!s@LKWbv$61%4Iu4H&;x;H&$gb-nu4}YdA)U7@ zCGUrvceGd`otNxky4`;lE5vzix*V=M>-V%bYdTo5Lb?rUw_)kd&mt~~bZ5kE*t+w` zysRUGW~9T8)U%mp;1{bs>nt+7pJY$_2lJiAlOC=6$9lBIci3d;KHa)If@I0TMokM1 zD@hvXYBiE%K9(jO?2{i*D~#hFCuhMOJmo;Z&}g}^+Vb+~QeBnD9plwz8-J3Adh;@P z@?4KAxVIMBtlM3mJ0|WeJjv!@xp962@h<amiHtOKc4Q@PN#W#_G1GbHq!GMK{FZLx z52u~Ze>m+l-a3P5n$lX<c#gBN*ASg$G_9W`o2-qeGmFMsXBJ%#omn)V&n#Acwnq}4 zY?tO5!kmt@M{*sb<fSnT%gb|K!Lo*YizDxOv*2aFBCg?-SH(uV=WWg#3mjLBjrP{e z$*LN@>^(h03mw3zox34(OzGpZ&pi{mY$-HrN5PNh)|s;h{jqZB0r);$2t*0Ndl-*T z%+A2Uro1eKgPBjlerNPil>H39#K-mVMmvBgRNRV1#kZ>~N@8VI{B7W%WjeK-o-jOC zxcI_1PH({gd2Z>_cW-T9*P6~)WqC_QU)$1-#+u4I^9##*$|K*HSXTa>;&A!g=^J~u z#og12XHMPGzqKpSCm$J8JG)_^VM6s_^Sp+{D+ny7dDi_WzB|X{JItFkpDg?JJP+d+ z1SaeGk1EFV`T8H?m->ADOY3~C=Xv0;yd+mRui7;tZ}_mh#7D#G1<jrCH@}TomqMJ6 zo5iK^Txi&`JP#ih%!f7z+qE_jmSw@DXgFL3u5LS>mLiDxA-xgdG3FJoIy)O|sa7ty zrmW<M0*m6IXp|h+(dKB=_;E!rt%XD6caM4_Wsxz^a2*wxG=1(4^4Z%TrEj(*U3h*N z1^LrT_U_jt$2ZorPdsMflnIfF?s)I3GFTIt7>zdfcExHc#}5`8nHfdC%C@Gu?5fHw z=bXO1u5Rh#?%w|5SWT;pOz-aPc{Sh;c1`K%PV?r^J9-&$nGzF*7O{PwzNNn6&Ri?5 zi~QO6gY6&!EDEe9Q_KOF6D~Nz$fvJEg;5>~&M+Z4EDI#1*Xs>=i_5{Auq)$}kfs)5 z+D2m)e%oK~$FiF+b8KH@^D)h>iyEtD1p5y4PoCH@T}|$gk1TF%Y}q(4YioNf^7j{y zT6Xl3<tqp2ENS*1XEWU2qPyH)+$Ms=$19w`j@-3CAW#|zqhEBFoBBmZXI4{>)U`RT z;H(Pj+rq|b?%ULf9n(}#C(iTvS#OrrCc8I#(Zba$7B7ZdiTYO!`rd<Y411slt++Es zm~kBx(Mt5S7$~_9rO=lx9>WHq1(}&e)3?}LR0eN&gvL>hIDk|aILtRO2!kkJ@NJSO z`1{5+FKBE%wr%`m&(s;yr=2)qU|QGAirR@=dS`CwsESQFbm{J;%YVH1n4?yb9(*#s zG4&|)V88f!9L`yxJO~I96z-r;5*Xidoanfl{7H5DBr(iZ0}*V!rG3~#bQw_&Cy7yu zkaCMiISS!(`zrZ!7Zf5zx0o>~%o(EjT$nd`U40mfIDJe(X<58-X20?dj+f6gwr4o9 z#-zK;X12XAADut05i*O%#Cp#~s5hw(uA;HBKU?FUmT$9+I~ivRqCrGz{2moxd>`j0 znMnA<^{11e1E!X<?nLCnxATU46lYdg?Xo~m{sdG6ww*G#)f{xAn@<hNnbzMGgu@%{ zQ23~?x|}{g<A}Aj@pPx$07mRb6QUs?NANv{HaHfda^Wl~J$<z$IAJ<)*3C%whf2F* zGgVnpR(4L7;Y{;JJ8q57IJ2%Pub@4{Fvey(bIST!-%pH*Rv3;knMOu^jAZ?09xv_n znhw@pe=^PmspH(MGSN>dq9*SYi#wQioCT@8Q-lvWLdSwLYAtd*m*Xm_(}{7Xp3lQ6 zdH5y%NZs;ue4_&VWaZ-nR7$?X>xzGFHN;~$hG2ab@y%6VCEZ*V!}*N9b`jMfC5Q9d zm%cP}{`|vvZcN9|e%A3S*pBGY0e|{k;G;Uezn?$i{yv5TW=y%iUnj;@S82<pZP;Yv z4MhhgU2}Y$UoUTceLwoO*=+E11-oinyBjC{^q9HbN8<*08oqlQ>?o>fYoEDb`pRQ7 zXH3Okx5>$I1@kK+#W8=w(w6RMd1HDSKK1MI!dYAGFPPZSJr}*g+;iV~&0Iy@)G@&p z-lzmm5I(B$*ykUEZ~GeT^SE1gjkV9~I_S)<>v)=UKf8{oT?gZ-4xA^xsq6SA*TFRG zVj527d;9f#Z=d{`_#-4LD<$-E<h^}R_lw-yKT`Jik?!rM6fp1YS5=orqm}yJ{%8Ro z^o=&tT>bg8D(~u@JR<BPW5<6m&GQDbt5SnM{-Zg!=Y-F%dakT9a0g@|@vt?A{4G#x z4%wEk^FL<K?TTUfcc<d*{B%ABFMzMO9Ol`&uU{j!#&gOrx!85!*}!+9vGs7I9X3`n z$<RG7%W6svVV4tn8@y0-k~=&H-^nKlL4m-C=huHLqDEB1Tvu8irGQfU7AH)kto_6K z2xF<XJ&@t96?Qz*4)j+ToY9x+qM@3imhh}up|Z;E`1tnjsmtd!AA`R<(J^`4QEize z?Rn+#iq07wT}@5BYmZJGBY$|x84YvmYG!ynD|=dwX~LML=*4%5?p1SPzClcO3BKYY z`#d?>`1>&Ax>rlV-n1Vi52@wuJhwmjm2dcx+tAS-g>TTfT#!;O?Qh1%|L&bUIMAaq z)*k1YLtplhS=W3!LzP#B8pG8M$8K;}U)6Qm80pN+8M~zGTa{zxbOaEfD#b*zKT+DB zq7q-t?DOU3APN{?aKYV(3?<i+L&<^fp*kGsSw@C+xGF9Af2ey8z_zM;fBc>+Ygn@O zwk%nD*pe(uwq<$B+p(R!6S4>iArO+XM+j*WM%e^PfdXwQq-??pvlJ)=+EQMjtoBh> z`*@U6etmTDN+bV1-*Y8fb{s<A_>aJ`q<haj_nhxJ=X<{U3-swgG5geTKjA^ho2hft zI*3jxB*Q4Fi|m+VmF1mOUVo#g54fd%Zt&;Mob2bBx&C1n`iV71aQlV$4A$b!=(Z9% zg?oem@CEdnffBwLge@Auwpn;I?`~j9S;{D?l+74ODe!}Q1Qz%cCeBDlqGBp>+mrz+ zmE5{urt~F<a!B>rjw3&i2Tqde0>=1%{fv*O$h52~NOSB61cH_1#Uoh`@O4RxF_6uW z@xb9`oqR@+xF}zqwnU>yMrT|Hw%)lKxjZwOwq&t><s!oj3nSOaVgz&Se}X56IreKQ z<^o^CGH_bOx%eq`e7k5a#QnQU_<s=eQ2DbZ8O0#zhw|$s-9a~Oa~^{p&>>v-5cpyk z3rkd|nf$0Zg(O?U!(mPe^zFr4APywCYcU>kQGz|=llj9vL{tGh5%7My#T?*ai*2B| zGTfSj=q%%;sr1*EFgBQLXc`)nDF<L1J(|!Qzj=Io!L0eS|54ZhOH#ep9<E1YfMK4- z#m~|^IL<5iVoKLZ9w}Wf$s?ufB#)G?mvHgab`GUKrTZkmlb$Qd@A8-jf@|seTSXte zsqIb5*`(_vXOr3~$%>*F#|N<U^OL;>*22HCKl1TO*NHBs>jCLHrx{)=-d|hx+~obY z5c`kdbGYY!MtNE$?o)rJw*Lm%C`{ZZee=}&e=ORn(0CpfNtPvzrzFdg#!2+8biG91 zO4mu2C0#GcviNmbrg(i)mPK$T`IJ}|PoW#SdV+aKcu`u!RaypWc*GBHZegg%G#QH5 zu+(RPr($%S$lIxP^M_Q!LM`%RAgJlADCu`5-JNa4JLGbO+a;k=@dc(Cp35KR5xneS z?1qq&v~(2{x!j5_1O7uc2v{_>i3`wDNj={g!)1FV9KW)d;cd{%x5(68gK!Y#dj>v) z#W^VX5K41H@=9rLO7bUZZb<$_*AJCHfi9I~N<hR>VB_a8H);VkEffm~Vch|JvYO-W zYxt`c)JAg!LZ`uqSr$N?f-Fwg=M9;ytq%=fI+D~uQ*wG?oc*|P{q36;6LL3z_jwO( z8U(~k*3A$Hf>dvug)x~B#h53{py&r?5FX7Lg))Fx5QgrNIq`Khx;{^`f6=@iw0<S) zFMR*;QycUjzR#kCH?h|FdpIt62Hl+GWu@zcGo<S!oFQE&oFH8<;e_e!a5*u?(SX6c zfN>lW4}x^s+{nsQj0cg8P&^2wo#H|K{HD35pEPpyZ3}jszTm_6ZomEQ-`{^fx`*~` z(4#+&_FMvD8o<K<jZpk!ab!}6Ago1ABnZP_1dKOFxxf?Tno>r*E)mVeQ2JRu515F@ z6;As}_K{3JRtx33T4%C_;?~DUFYB;b<?P3?dA)@v^KFsF$_lksu4w9DFBbMbGB7Jj zxPteG!|w&WkKp$)$05>nf}eE#W%jqS>jXdP`cI3tx>9}8FG|;6WS>sf$9PNE--51a zruY7A%6kd7N$tELD$42;4wA0FKBYgj{-x`$k#8;FObq8Uc<&d)j==JUqP&=7>Tt0m zTrdt;kI;G|j~%hCku68;h<%f>Bc9=~d!-0FNiW0S&E+W4^^zP#x=t{n>u?FcUxEe2 zAfmoX*NgU37()bj`f^<V|HJuz#(c(lTN3HMUYW92A+rwEYc?N0)A^5abptoN20fP( zF)<t}#Ke%yHAtv{JO~qxm1Jq#-P8RwnQeaKK%>8^%HVPP^3k;)wy@=G%M#6v1C?5% zv2lK4f0h6#!=4e?0od47Lq-%AcIVYF7gB1Nibp;s+u%B2elDK1jaI=>k^58FMk`6g z%~q+&bIH!_SGc7xC}BaSf~D}0sxHslT`u@P#G(;b*cE~mjyVb=VlCM|fb{2*iZV$G zd~qlqSh-=264i`>;Yehpb0F%)7FKBu7H{;7GX_|wXqe)SePx(k-UjpHdhBD3PN~Xg z!!RbCG)=j_t-#?RayZ-xIM{(dZXVqq#>myki~;!o{=mMNegH1?1Lr2)M(PFt2J8)t zcFGN-Ng*J`Z6)yeVY&m6@&pWLs+so^*dUNNmRFvk3X%6DWHN^UBqHx8fW*&IlH^Db zViHW|_WD~*5wjgQjQ~<TYnEhY?Xb608zlp4@w$UxgMl%xWw(nb!X(ZII-`)+S82cj z!|z6JVKSuPkr?L~V!)}Cs&zQ#_*&?gU@eCb&j6NkYbfcltdkqb1TgHx7d!x9bn>Sv zVES57(aBwMQg|pOAkV2n1`X;Lzisk47<rW*De>*aM~j@V)V+xZh&p#tj9M~)vis~_ zxOu}E(2N9^no0+^XvB6R-kl8b!M5}GDmY$AY)mA)63Gwl=)Xtr_eZ<D{`8{3uI^-O zbH?P&WdojXvQ>reGS(SCMQn`6W36I;->-c2Ep1t`7V*E2YoYyCg0~Jk&LgZI-A@De zAjX8eLcR{Tm2<@%aKj{ei`>3D$=eMoA+lc;k1bZ5a(j8+rq>GsnP=*&^-il9NdmQD zRV7(ymQ<9GKTP^KmIm@-f3$8emS0pi5Q{b;rYHEsJNOWU#Qdg~C8>3bS{EhSdfM6t z2iw|uPiP^$JB)2l_8cO&+YtGgL2*a5uyj;Cq}L(nL8_iHh(IWgP%}eRA;toX{T%5F zrBFgl2BPN%<Z}?)3;z(i4t`vrXlfMA^#>mps}FuaFv@XSn{onBwClEfw%UnMgGBj~ ziJau&Y2o&%B^iR%N{;c?8mp_yQf1~As!CX>at(V?UxrO-L3HbIz*JOovLzdiS+(kz z4M*kjm9OeE{#;)9s<&4>nQ{eC5bn;4Pwz=HQz~=tALnmxt1EoE3_};_PaM%iim|@1 zZNXw<b`VsFD=Um<5fq;p4wcx6AM7xEw{cK(h9Zj(UP1Vl!IOb<&YTRH185nq5dh<u z6tp9?!b$9@@tGoZ5sI9Z%@z$>NKk_itP!qtqG^TrMgsP%`Sa(e+ESxwEvryvJezvv zt#6t?-2Wj5``*?C^N(8D*xAx;s8Yz}@@QN4+665ej~W`NPz<481AzIR=vSQd%x=o4 z7Q$YqV3z_yT9!%0xLFdqSh8G_a;wFK`asYWtm9Th@L-eZt)h$)yQ#S-5~FP;Md&S> z={&jIn}n2Ep)uEc+B!!2)wbn5jnSUQy3W<3OVg=rKE9}36AWZka<yJ<F!krUy2RGr zTwOX@mtQ=X8Pl8Qb*2|KqC29H0s!B{+^De{VgFghBnvEY1irS=83+|3(q;1aS6tes z(P*?9oQ8<c#+6`Ltz@%I0(+y3brn8k?)G`}M!tVthTT$F-+t|@%u{%u=8*V=?_<pe z$rH000!FI^tCKu2DG42AIwJuiH#t27P(dN!N8$~{W~C%_@IzNn0Ns-85H5TX%4(@3 zC`l^Cr`FZxD*zu$I^b>f&B}Va>~SmWD2%Hcn_Ct)W|p^V(#aG8Bxr2bP+y{dUTuwu zb#_1C$gJt^UJdd|6#Ae;cvw6Q_$Ddz=X)T;65vP8%#tq_wAykJxdCyNRJ)XVS1zA$ zA+4>2iCD5W5f0*UcPKK&g%s<sq*CLANU_UY4oVo43l(jd+c&nX@0-(}figjB3;Oyq z%sLcpn3HT+(3tA-rd@$vt619>Qs4Ns6K|N8Ym=*WmHOE6s{+x?s*cXp&0Vwm8+yDA zA%i-;pqb!L_M!U$f0Y3HW}KE}7>Y|1vCUwxZZ`7ha3(WFH4|llPmeA9m90B_bS|=q z7<waxpA?{`BV5w~*gOjQ+KJPzQE1Mm>q905$2BEnTr<_LJRF8PtudVPddR)He$ua8 za?Y8$x(U}5j}ANkUi8p;dTFvhSKqW`?t+CaiI(1mrMZ?=Q$s^zOKYraxHH+_lU2vZ z8d_FY+vX<vmM-i`%y&3fwWQ`G*m-^M+s=gQT0Dh+>1;^OHk-%d4S5=i2>U~t=aZq4 ziV4H{0Wap)1n*$TVj)AM2p8lojKCLkj36E{i4r;5Dq&HN2d7BP6Ak)FsTGvwTHuqa z1P?XALJ><<JXuT#0aQ90S>w`%6F01APg8m_SG0db>1+>Y7B-|7G$wNPiuU%VuHKY* za^D%VA9-BN<uJ`E<DT_4dunkux2m~tz)^GJ@nb!0gM2KLXTbkZUYHw!>5l@&&*k$u zDu)I=;e18uy<yR({{))^+)2@o?h{Eoj5Tw{_~-Cv*@I)o3~CYQlhbj?`IMSGpBjZR zi-pc7i_217Wv((w=aXuN^9fPg`0=EK-B;hNp}xMM;a(usU6x2FR<YCKXM2uYxn|?W zHAijAj`h^JZjTlIZKU68`7Smo_QhG4|9W;3&Zd!vA=BhXljPv1!Tl(mP%$3gDh-;0 zF7Yixmq?L`{SPSwzb3HR%a=chqf9k%l>L64;k<aB#RIh?g*~7HwxZ@0Y&Rt6XlPtk zgpo{w5$<mr|CD`$osIh#c;R#De&O?p`z!IjYuGus4{7Om1>F}P8Lue9m<eYC#=F=# zf|qct2dmiW(5O{({v#TE*(xShl#h%rVt^V$@WO%09E=5(D(3_VKt~dx+LCUD4rpjS zU;>cu9cz#ES(23FQ){XA#{!<lnuz`v-%`|s85?Nr91D5<h;X7)8<NoX)aajTSxN>@ zqHreqcn;Qq3(_F$H^QrGj#n$h0QMd!<)`A-R2gn<l#THa715fA6rv&!3#wEUYiO#P zme6SN%q$tbNV=M*hv)VBQg&i}Dz!R;KO+wXTL(t6Az!4{7hv^%`O%BkZ!_ECDi|}! z6}gmipuKHYE))#a*_GN<Cx(DEvlsJl65u=oE(KUcF(%<sbgYH+F;+;K-dOOBcb6u5 zcm1fP_QzKf4m}sXE~f$x$dQ2)1~@e8Hvqwq!%LB%O_kx$Mm81*K)jX=B%~}6F$J~) z4f{|GS{gCY-gX-y(-H#h9d^`qCE`8aYJaG|)n~1-J6ts$r#Bp`@%Vie?TMa<KIx3* zvJq#>U`~Y-?SFFF5!1+Oaq9Pbs@(ximEYy?^LgBhdECM0k>gDd<_;m<;Y|wIFt{p9 zVHN~Aj`d=&sua;b5(X{JBSMV;Z^Bk97>MzCq!m|moyZcxoOG6gk;UY>koiHKn=C9e zP@o6G6`pvqv!|iGXSmB1)G|0}_`ObfBm?$JtC6ePI(>u9EhEd9Zu-h|Hj6?Nu8;Wq z-1oAClf+_d*Agxei!!1k@7H&*UpE!^>xaet`r((`uh{RU{VEpa`K7%fvj5<CwY1K} zqKvDwX2hbOdua~DqOSK0;cxI$+ld8K$)7(<ILPY7Gx1x^e{aR_EBJlFKfpJr{|){d z_y)gM;{G+fKH(Si5BGQR-#Y&LO#D8F*9Yr_`+HIUBz_;L5WmkAK4Yiy``GLJ{@>UR z{@X-TFO>QxodK)zl@n)x#L_t?S(X0DqhNsIzC%V%SvSD&!>hQMp#wL-#?5q0fQ>tZ zjXBaWA?`f<F+t}<5ta&}vFw~6MwAW(@b*QnlYa3d0z4=`4l1V|2V#aF2ZTpT^AQln z^11*5S|9}pWjaOnX$c@!w8~`%{an>RR{xy@4Kt7=q$-?b7K&GwABe>w#tLxc3_Ron z<D;0$_ki^w&WBx5_}$zEQBQTouc-j%6L-wdv~`DCn>5wh?}GPXo%xoY(X743?NjSC z3Pm)U%xAp0bhOf}sZdbgOKT@CY{?rZcpvnM^FC7dy*Ot$>x1qW2QoxRKyu$0fcpmL z;wb|)_ctS($Eg)0cMD>a0*y^EWu>#8a%ywe!^u;rS^6r;AEq!$x93uztCaeXE0jz{ zR5qnH8pvlO(WZs12tMbFMEj$E8_3EjhlWn)bJcpqEUR&4Qt{5Fcx|0dmGb+tUUV4X zF6|BClNM2~vl5>q4b?W4UBY^hgf#hSH@Lqaq&FB?IV**MF(f9)kU$EoRI`%RJpGe& zcX#_PwzTP!KeQ~r{2DU%0NhJ^(kpDt>!zOZQcpaG9QAy0@}r(_B?K<O35x_ulNF0> z4Bt@L>XXC(`c>LU|3zZp;9-dY&;mt#LbSk1dDEe^fHb1o=_Ema8A&ie=CN)94YmY= zV@SlqBs+%trHEWpsD?@$&i@lsgWnIT;crKx8i;0)aHUFUeMn-zq`?7mO85e^B#NDs zX-J1@2Fn4yn_pP|5HV)xuW1@F!FCj>2~JNGaj!&AEX`L=&=b;o>}ZUyf&MbGVt~k4 zD|*1?WRz)K5)|P&0zgftIDl0NWdCy%$7f%f;t&Bp=`10?gB$XuL+Opt{dGXbls~wH zF>zu*)GU{G6V8zc4_Yn7MMZoa!e30%CqPo=)v%~07gMoj*?kB_r8sX>Xc$^m|8q3V z-;PAXpj%_earY2xvSEe)08Wooy`9<F7|W#!&l$wqT?d!pj(q%K_KA2acpNq25)9m@ zIHo_}2J0dST?8jmL}?_U1{NZa_Xa!wgiQP{;y*10Jju7cLO$E#2*)X>banlG3ZcI! zt&`zxS&(KDZKlNWQW91{kU>Wc5ZapA+T;BhGw;(?b=~1=pDXDUE5uM_X)LD=M&r)C zqPF5)aihiAR(QE^7VmaNYlFvaaJu0=Cv)~?LpG1M##^I=As58H<Bzdl7sq3T)@m*v zQGanncG&i6;Zp<&+{B0=Azuu;*9wpj3QCEEeA39JB2&zUak|4shEyhax05aEw6TRI zv2wOqSw0dbUit0XbUMjaPwjZx{j(p1=A3*w)Q{9x4e#q+fO}HN2_3?jyssxFYFq}$ z34_Q!DC+s4*^Yy%kttMil7~r0Vo5wNR57_yK!j7JO08V1)rcxYu~VxQ8_30Zm7rFt zXSYE{+Sbw5k!vL~Ig?H`;El0B&>9TuAT^^=nPAsc&dlN1%+4%f4DeZ<GV)w9EGlHx zA$P(THmDRbt*6#sop8JB^6?I*rP?>#80YhQumlu#T2mct*sI-0jK-oOa~MsYuqT}k z+Wg9jh%?esoaYH}fnGdg{68QgI1BVNLh=><F7da8zfaK9N4yUlA<@5h9=msv-n3Ib zZsOoAKyn}@Wo&&3AJ!1cCF6$@4#eH16R1$e<^mJWkV?07c#w~0arJX$#}6QV=z$a> z6srpc>tdn5k>G(S%j`&h&)8T`KdnmY+k`yCE+7YA+53w)7!mtscrCGf(>U81Un@W4 zwH&^Z3`P_&Uiv)&F9@-)?MuI<*t%Cr8QE*0H-S#29AXIFUknXb%Uy5@u<nUCyoc5& z2nT6|6<ixrjMs~L1nLR341pQ}8NVQ2@8Mw1<!X`;F)$teTB}W`vsv|r&cjzMdY#3h z)6w}y`8dvoPl5w#pz6GxN6D;IDTyDi5{6LW6f1*nB%1~G8ObOS025@ja?!54t8X^U zF@3B0&K=nkwterK=AN<p?i=oG`bPb}@1f27blhytGG)G<wY7_9)gD{{s)Y#;FcYie zaxvuI#swxUrT&nYyud?LP%d=Yw8$Z^`K7=LlMxm88158aV~g2#h{}%8RyiPlozD0J z>2%<=Mt`6&?e~*f0PFJs$m{Fa_s~{xePY*06c$1y()Mu6W?G-+@lC|CwfaUd;BW+k z4kvo<2nL+2$!~Y~{SLdo@HC=A@Naw!w~jAkY2jFC#`O6rM8@U7uP5N!`8I>_ytD(9 zx7LKzS`hBluGsMxRd<*xtE#jnt<&ESoO2?rXC=n)y0D3l0dX^tPYW~<V5^eD#0A#B zNIdZctF3&ir6*ph2$X-1BL1Hv;CEhC9M;wJiQgKZ!ycf~mBv>3<gk4Z{i+vlfX3D= zgz|oxbkZU~<qeff$uU8tQkhkzkjYP;IA+@PR$0JO){iG$j*KfEkl-pV2t*bY&ia|3 zL)i{l5$5p0@q6)}B^bY1NaYilK|qBG2uTLjnQSr5BGF1RaW2O+$n<H@p|Tp8)21@e zcC&?qF=nR36vA58QSHh&TuyOGz)q9u@c*oE)}ly&U4hqPO}{X{gUvynW4++d6Y(yY zkK@qgYn#G`39`91qlKqJvrA`Cb4hpeg10W_3&Yp0Gd$Yh^QD@DU7=uSh{ofDsPHZE zBnXUZDFQti6hsYrjZ9XBc?Ipl{R%CkIyEfQ<*O<wWtx2yQYl8!Pm4jy@Ka!Wj`S3n zwI`&if<`8278~?%eK`DEz|g@Dv8JY`wgw@8DB_PDnkci0><{Rl!AMrx0i`kvn_^zF z%1jnT8Q?_;8)GF4Znd^s*Hx^yY>{oXbXrfebXis^R$AA~*V{T>)~%MOA7J+<?o8e( z-uXSYui^H@fr{VWaR;DF>nVagqY<2W8_fWZv4Pc%?<Mq6zM5a>+s$v+P%IWIfGz3g zDDH|E+RT=G2EquQJqUL|4$2VlQ_{0yjS^2^wD$G+d~siW$XOR9FR5(P1R)6GZ;Idn zKVcS7-iP;LOyKWo47b_TXiSw1EE}A+E*kdL_G*f~d%v;6?{7^cvNnfCr&cJ!O-+l2 zbNxYIIud(!qUW?O^5f^B?=g%uE_CE`o*EbECkRq;+(3W113Mj*7}h-oA`_R=CR+;% zOEyms;`M&7%`)?vFXh-Gc}WY29bon^i6n{KMHV!9v7>>*4n>TW`uT@17z{s)FMuR* z3*he;lMi7`Vv@KK%<U_<?#K1Y(%i1X$-rz}W?v~(TIgtC=&CY6n34GrSUiR3;^K9S z<~F2KiFu0w1CCQUE{B#YA9gbluFp)|gZ4|u4US@d+#u>Lt(b~w2afCz+*?BEv&wLA z8B*^(1RY~6hn$DRtK?a(qwt(o+*f<>DAWmz?`N-!{{gGc%PD1?&gj}=f(3MyP_h^e zWGp}gtsP3=0K2y^-`t7Ypl3>FsVMq5>|(;hIMWyXch2<ROgYo>j<2xm*ls>g&EKVY zYVOCY(6N_ry&Ko71Y=%@#HMs^VU|L4a&cQz5aryX<t4p3ZnfJ3?Dk}TFxcX9`bQBS zL1-Rd$F3F65qzWzaA2TVzolhcG=!=lp^_v!bININ`vh;y7xaZmFJ2LbK`{9&a8H^@ zQwEHe-)grYHlh+XIQYdPs}r({c+z5SNQwu#7pxMOE|#_RXM*4Ij13&q-PoFG9lj^G zdGN$=&AE&Enp>rDwV+?umHMRuYlVJ6egr@636UN8Rg&pV^b6gJ`KVt=UJV2vfz$wb zH+k<${ef|i-G(id&Gfg)7B3Z7E$BWVCL7F_WV}L_SUvWw*5<xN=hlQz9Ne6{XBfR} z>^^2-%mb+G9{(P@S-c77XO~3Pn+Q#bvURjnNE~V-xdvQpxKIdU6aWzWfZJe-vf0ZI z5Ju*y)qLdCYW3}Ap10pAyeHn&ad1aY8u#Qo2Bmi(%@z!a1^yC|BBb$`IPqpW-r!n^ z<BgW741qvVZMNi0s}z)Y86DvO1cEI2jJw}?n|YS!#9bW-^+@{11?(oQ8$I}Uy%PL8 zCK3cr3p8(Bfum%(?R~vx&NCl<>KPl%7w*}&Y}w7{hP#@d+Pdx8<_@W^C!vjN%G)TC zN{B+mja^RKaK@=eo9CU`+qZqeNzKo0+xk><SNPnUmo3}3CroQs4coPk*io2Q2T6kf z#6$KQ8Ca;<y9F;hs*m6*4B?r3`7^L5AYxTuiwQFk{FyMD03pa=PXO#hCQbJiuEmq| zDDGPD?7K&Pp9S{AP5gb>ccngxA`QCy{a@JUA-0LXnd662pG7hMUuy%C-6wRiCqd{a zAO}`Gq^(qlu-(cwbB>_+(RXd+%$-rkH|XyN_N!v2T@t-r`oQzUc>afz&#RGI0M8R& zVFeMuKZbXn8TrPIm*VfWH*L7&wAfYl+oRG4#(gE8U&^0XJfINCrd4|PmHef}*G;b{ zqk2-~SC%)6{&a%gzK+w|0l}O%1dJl47PEOut-D^t>>WyV)9J_aW=zoGo$jEfN~`x( z*F-np<qk?TIlgn6T&IEho@nv~n>|wwg$0}G?4HZEvOB~bp!Wj;sKsi#O(lnhVVF!i z(F+qaN2Y+ukNeoM7H(}=+8W%*f|KJ@ubdv;?01E$G&;S%`oXgAY%6u5*3o5F=_{)& zS_kh``ET@R0s3=yu|L7QuhgF*sXsNs*daXvR)^4QI!vZqIZ)P%kIKG_JC4{RTI&bU zr#{{%Tiy&uEMb_>5Y{lDBF+evcVe3ZuCO!{ljD{cBxrD%a4XtO^EPo_75OZTj&oXc zUQL~0a9Yv~gVSP{(lky?E3sPC-UQ!f$ZHWF^GJ-FMy<z!@dY^Jrm9kDEe5AQ5k_mG z5F3A#y(WGa##13-C~xxnJZ=!wR+ApF?6p9)FtGK3_BY_Q;GD^p7+v7$p$&Li+$~*~ zl0iQN7f#A~8RQrhAtRe%eFZ`lYI#V*D2YQ;Bpfk@ZFVCNE}PS8YO0ytoI7eOJ9TNU zZK<sT@6`DlpC0ST59h+Q{#07lmDk;L`L)+-(<ymRQ%wbga;665!M}g!5(x8j=beDQ zpck!>x&9dTRz64;HswtSK;uMS36Vi54L^(u3hYqD#U==$2WoW(`Yf=mAcgT!0iG3R zMx@!O!QoZ9&-P5aEqTrPV7Xy81&9I<Vk3}GVnCA7BQ8M(9<(${nof#SgK?U5qLnh; z^c`(f>6#3Ki(1!r85;F<gEpQ|s3ww7=)RSTCBb*CL*5{^`BeK^3wk%_#gk4|*7>Bo zLe<K`t?cY2RyJ1_pPeFHUz)Q7BEYl>JM%8D+a*)VvZ?xTC<y1PN`=y()u<{UbuCVx zpNlwXOcj_wrILq}t|{h_EKR5%w`CQ(rK&SmTAqitIo&8Egt)~Rv6w=Y5K&9hU4(@M z5X^c@CPNS*_nC>gw-=#;*#~R@B-Y(;ZK_?=;i)l&tU8@DR2xsR!4tYFwIQ8DVF+1T zs`;YOvz-?6-A>!-vzIT?@6wpgO~+sool*47k*imo9Nlx&$(MIEWE8TFF>jZ&N=&Q} zQ2fj40Gu$tcE-P}*}o|(3L&hOe&H6_=IO!LMub`UL0y#!LTbURr?HFGHVZVuvJtHk z``%?08Bvy8RtY+T1Zenx$PoN5n+paMX=ftw7!e#<9{n083PcBXDP;>ut@r}r8@y<q z-QQZ{&IMaz=~S#W*y?d-gYYm)*JYn>X=zBdv?QO;MH@4bT%grm1J1QBosQwd?P;Cg zR#)HFR$m9nGv>cz{AI*lx)`gyL1+=?=It%b4RzkyYNuUa3C9TNv?7on(Xw~*t&8t_ zKxy!;OWt!7ssuV!0VqinchMc_pbo^Mwa5c(tLJ%VY<8O+yBEg}b}y%;F=I+Hri@@O zb4mx~9Fa{$yFu&kkMFOCsH1Ug-q_NCJc1IfYORNnjlXln?ATbkt-fK!aoL8fPC2VX zG>Xltw_LMYvh&m(FqqQF!|J{k(IAUzo^}s*goo0KZgD^!ugwJuAGHMo^`_@BY>c}H z-F{R&7pGSPGAo)Xn~;>5g@)Mx%Wj0o;i*})<j>9EDsn8s8o`8cB1wy&R%=!X8jV#$ zLJC4`h*gUGE==4p^>IN|S3)iGMNg9(0TOM<6~KXC(8P@I&Nm(IK@3w>DN`MbAr$*C z*=6z$s0l*4n@KmOC}B)p6tD=u?E*<-j5=sknDxk6W6G1pkh=hT6p3TxIl0V~^+yKk z<3o{9R{za!cFJX&lIct;(~>@q^%Q;}=2{BhF*~(dr`Z#TFYD@F67{>+whSsNs<eT^ zQFHS>a}bMZ;Tm`mV44IUWE?*iPr({poL``a(#wFCgqd21FpatbHgZUU30;f`ITD=l zL2^O`TNHGI$HABpPFACFYl!!X2}ncY^O}vOu-}Lj(^?34V4F-X<0Rji;$lQ19&$EO z2FPleaP;qX>e{qXu_CL@*mliZ(Ck!KYQypN6~@2WJ`!KD-={mOucN`;wm@_GiEY7n z+m_>6`sGHeZSI^lZB3+Y!O~bdgYlRUkNjQnJAn6B@@6xTMU6#-UX7SXri2NWBqH;Y z(hCFwF)78+9DLVF0bYDc>^->U;79^XS5~zY0R|PMm@S~sFsT9f(~zDF(Hkg;R!f$= zYCsZ_N0lEU2r|8+*a$L3AmfY4v?fDMNNzjx9HnYre}CKu4l?G5)+rQc?{t`U-W$j> zbJajEtIr*E%+bxgYP&nXbV0803T-ZiNJIj3o7+I;WU)B$1H2Zj7LdC~hTd+V81Twf zI25p(xN$nF7zGly@qZKt-pmau&>fun<oo2aUj4h@U4HpsF2{})GtYJ&o7=lL$8IJ0 z0q}4DbjORJJ8I$AwIXj0ILs!5=&2C>E~i{TL{h?Dv_Bo_8wd|D!3b_fTKY)>!~rEo zi2y0xmk5!EjUFd)f)qJQB14+PVDpM}2#^@45d1_h;p@w3krx*(%dI?)wJ&eXWmh;_ z6>3Ms-;wOzS-5eat#v5dS<%^J+;{GlOL|qQq#|3NQyKgL`N4wRcXj8I3(V|4wWpm# z<6?piwuZmQx_6Ki*Z{&BI1#-rE_jS<BOSt)QKBk=i;czz0BGhxb$v+7ihvyG6*>yk zYGo8^o1_W9U-3rcXezY`S;{J$uIe|w|APwF(NZX|q0C<>d()p-Q2rOFYr(rkpx1jH zw2&xn<o!WVU-*S@1<3|G)sm<bA&a4LQOFHA)JEuNqn!w{NJ_metj3e*8~(Zlca4)v z#Y51&1&oMZOc9JJ45ZacA&jX&=D;4m>@0Hcn7^R%ar|4m<%5$>VRm<?yA@@h(yBPV zx3jku<xiv3IN+4VpXb0wR|}l-2epE3Kn%lVWHD<ir5mtDtZ-50sib+3;RyZ$Gze0F z<3}Pb*_P#fscc_UQaQGGL7&)p(x$?V?3h`LDhtcm_tXmq3y*3{GPZI3$?Am*>D@BO zs08*qz{HNXy9H!zb0US4o3`ypmAX<{A=hxX66E+IUpWh$g1K1R;iz#rsvTfcimRUd z0o^brG6s<2o===HV<s03n!JoJVzmeC?04Ob7jNmvW;?H4)GwEH7k<C+4{dGCjF1z& zWLT{528;0wbGO`*!77poEwD}gJ^H6cbhV>cMmokF5u9ydrV{2d?C-vuOrwNrhekev zgN&r}<@a?{?w4+9>Cq{-$i<${&PF{01E`V-$07x+3Nn4AMx`VSCX(=l+u&Q_U8&JX z5;3^c6kpmh@t^?jMcq<*96?!*_;fzYAJpQF;O%uvt?pQ0m@n}pPC$Q^X4#V2qdna% zO%2IJoIK^+$RK9aYfM!ppV4OkJtoj%g}4b~KxLX-90Jx-z!6EV3>^T1adTdl&Wd7m z8HAfD1ru)}EFp2-AmreD76G~?D_^D`?6c=a2WDNlEM9M5OTw+mdS8t{3h853neKvP zVuz|l$#h1&*-`<Cg+gWaI-JbmPQ)4`zLYr#bLdK)E$r~HtCkghByL@tKmNE_%M1S& zbVpKFb7Qkvvoe*eQX+s&RceveR_mw=Rm$XgtpXVZb#}Er=<$VZ&Qyg6e5rO+88lea z6@a3^KE;};5&SqWlft_E>JVAokTp7+1!6{7r9g6crLe)E73DgdTp)Qj7?~dQtsYhy z8jWfd=p@WFci^w9Lu$`tT_Ro|hy~yT>-UoTo{uklzs+Fc>z=rApeM)`i_4!dDrAs0 zd&Y-oE`b0D)22wkYRd+!8TKjM?&>qfm80hkC-LuvIjfE7!j&KEvauciVB+U5yFWRH zy`n&homi$FyW^{|XzY}(7Q1V=0v3bO<teZ}I5!{~WUpYHHk=$dsbHJ|6*q{f4Mh5J zwdn>>52d3@q>L%GZl^scb=}BQ>yc%>bV%`os;JFI-zD`};L@9J8cyTi3+s<wyY{?u zdb{%7JN~un#v8j@yO`(bIkVSv<uZ-kgiDwJtjk_Pf2tu<bgCiyVF3)EfTA=5EuhUT zTzm)^a5Q}B(zJHYY*l~b6>~1S@WM^z!atZ?)z$Jf_DwPE>F7+3R>3RX+%?h}N%o@c z5-x8NM)Sj<pOC$j$?BL=(Nqt!lnP*O%&mYNJ{uq+LuT<x?$$+?otVZ_d>_?VrZEs~ zq7d3fD+Ms(GNlN5#$}&GOK?g8lsvjkQwsV5<bRQRlIRR&q^m*S+oWumpBl}^t#z?j zTD9vGZBU`HCp;dXG32jqYBILhr7ZEGM$fFlaA(fzZC;qj!1bgzXRFUO$4A?NKh&z4 zraH4$3sET6>{LAL73Sq<0~4#fZsJWw1Qlp&m3#+0ek+x7xlUUFn+DAFMsnyaL&~+# zK}l$NiWbnom#7SLK>syo5LW>VsU3f18H96C2JBhF&a-2V{<eYY*icVBy5C{xOaz<j zhTbw;vfip+)LO}<PAXAC)SrJ&f9KDSE?TrintxE^SdZIq-Xn*O!)k)32^)bo6H=Cd zg4#GqK~Xk$ObaKT>^2MR45fckvPW8xQ&wd&`iriz;Q9F1e+m9QG3UzjM($yzTzBiy zN4Iw8tZY`{b5>D!gs;Pxa3S<BA;47efKFbLK_1qA;=w7p7Ob_)Af-A6zByHR9$-#Q zSZorr7kIHNU*}5Kb|-G_sITjAHPqB~(%K*2$nKU^;>0Smhs&_xbm5WnWtUt3aT4l8 zA<6$f`w={46%QCH(XL5BOb6GJK+VnN1lGV)n-uK$!q+RG{?5I}pPf6l|Gw|A8<)(F z-W)yc*#2X4QP4FBR*je)q)>#X0NPCmo7fBR?a>I~d;s*eTwILp4<vvcL1ZEs(uwlf z9L5GRq~jjxMWf%h@4K)2&O5tVp!3c<J0F+YE4F_)xjdPcqu&Yk&B6w@l!#c8++WG= zLCgvb#-<^dlc64j09-&_I48biN5l3Vsa@wLneD6b-RC8D?~b2;eiDxYVnf17>|R(% zXrPU7(b_=(x02Nd6h0uixlQ0wEEsNCSs|axgUY)M5K`!2G{bd=)6?8=&=!v^nzuhx zyNo7RwfPYe&3~_^#$<F=o7g^N&vw-yd-h1*_o_{nYWhmJH6gqO7#~3IxVDF~fK1o+ zz~1r8YI^{!61)gz2Rhf%nn?&*%-bg5ap+(OW{i#yzztiJ7dw8lNb4{Ev8I0h#;;|c z`t8R0WjolJYdddsT#6>{cU}kJ)4FQFzRLpfK{n52?x9e#+$xX4LHt&BTVcuB&)zrm zP6|Q$8F;n^#1SvnB{{yiIb@Z~L0Eqebyu@!>Ybtco+Wx0e2HMhK7xh_)R*cZ5XoF; zAhm6WuV#R)#53OmZOOiQ_YLO`-JN=VMK?bSraxPWsD02k6yGHWFr@dbEF8lZQ95<^ z(78ACy69noy%93ZZoVFtNmQA;d_f~mjEK~II97-Yvl1uipXt6*saRaBrBFgKrYWHr zp0)!(25-R;4K0=8KG<Je)YuG9j8b>7--d+85PSS(AuOIHtY8ZK-7grCuWvu4>q9iU zN5=P0_`(e_HR_U&Y6sUNHzGGGq_2pxL1>)Rg-bCD+#CiwlUv=0vblH<98`m$APLY& z$1D)ZMLaF!`K2R>OOgYuFJia34340o(b$Dlpbm@2XmeYV)#(sZ8+27w)@oZYPP+nj z!gW5H^OeGl>?XDntR(c76qOi}cme0F)DgBAd`3}uMmE`!d~E9V<qtt`Ny2Q35P(@< zT{y`4@~o!&N%R{#L@nIIK4$lVPx4{z)DOo3Zo7ubQuRR)m=TG{<fGtYfoUiPMsW?u zz=02v4p=Zw9}hIoYRL384-7Viz@N2+n)?gCia6bYfZG){z8<Klj+)Iehdc1u$lSTJ z5au*g7mW<HMZ6J&P7ee;5q~h~kJR!$X@L`eCx*e-ctHcI9}a?<wyDu4#wB(zsEA;r z6_0&LX6+RSxsHA*ticF8Iw?LRyRD&PLq}}PldsMv+c&hwW_x;_g)OyzZ0Jag#@UGP zkIBx&Sc3hmW3+u^yT{dD(~fT&+TE_sn*WTy9f^;{TjK9VQ3ZgWHEv)Jil2d2Q#=6u zYYKu4$$!KM1-;$ML1F}!+U#@tqKR+#VbBwg%^moowoBX5EuQ*VRi`F5dhn9v%h8qv z)AJ#h7vOwTpe-q*7sxmsH;_OLgh#lfSrWMJUp_nc;K`k%+qZ2wdsg=;2l@t<&K1{Q zxO3IleFJ^btB=0vmhXOh{ng>V0srOecApRUiGUhI?8WP_W>nbwN(BOlrRqaashTrN znUDRVF#F!6^;fY#GLaNFw;y~PT>`Jx%YJ}mrN$URZezdu&<)}Sje!>SH8~sl(=2hT zTwC#v--(|d1jU1Ag5!&UV{YK*1}t(gKR&Yz+p*a!1CjWX!nHqtgKaIW_~8%PK5<>) z`oh=S=d*=u2^76ds6u?mj{pZ1-iNpZcpo4@ltN&`@*QiQZ~LH$)np3?#f1lnKw*Ld zak@W2yrCe;BYZ9^v_-Z`n4yUyB!Q5^B}ry*6fy-i1)vlC4h#W7Lt)+wISzOpK-7pY zw0ODJgS;GRHCdDaN2R8>Dbd)JkLuhCg)5ZtCM%46k#Lueb?x4@-!)$NkFoBGirM|F zvT)~sU00RK#(g2UB7UQYZy_iAh4^()^GU!w=*4ctIsqo&qydEKP#N+4!foB{(n5wT zhhHS|GxpIXt1HFi>i*SPcqtCga(CO6t-YJKYAYJIES@`0Vb9k!_WML-M}ubFLWiz< z>%bDNv3mJBovz)n<8*Iq<##q_uiKbOwyjL7EgjvM{TlcwzYmP31OM&8?#{6mDHL5j ztoPLnEFKA#A*WTSHsO4xL`)Qy03uuD>4Y4)6vf#9w9_!dBb+{`=P*MvJVw#+{>e8{ z@YsL)P5I0b`$o(lAD0s1Smddu9MNFG!^vcAtr@IXGM8*^O4lZ8<B<?W#CO_FM&yd7 z_%1qb)>J|^oQlk57<V7%8y~pv3&Ovg*yQo>|MRKObA?-4POkA-EpCtX$NXZl$CL5c z-EIdw6>RKOe@7_T9td;<bF=<_@Je<^I2sLyqmf8q%-<fwow1f+yFVQb2NID;Gzy@D zzOi84y(^x9JxCFiBH-h?g;C+YyuR9r-QfTw>0(HO2`OYPVA}`>STzIBrGjCb<l9pN zfe6^KH3)Brd`L!(K~`BQU!zy*5WIv0B>4v1K^0i>z>V_Irw@5zc|M)bH#J40H8o@+ znjg)N4E8s5H+8kOMl;cLGEVv?zqiU>WiuH#^=INBmV^{U?UieqlrY3Bw*f4-q@~#T zr8)4HeP7zySzq7To-kFr%v4S!qmaJh#h(;Ee^b24R8FZDJ8!TlGdPrK?r);Ah|0HE zMQh<7e<)p-KAQ$6?o*ur{^1;r3G0Ci<)DIVk%?LTa9z-5PzX4)TDbUy@Vd0;xtAsN znvPLhh8WlqDf85t*48z-+}hUGwYlW%B>qp(--PMplVjVq#n$7pbQ6E0$vFw>ZemXI z+UVA;(KCzoDzAV>?1n$YSy&%0AkrmhofnA2VM05&xktqT*aa#cP!X-k9daNRG6sHi zO8w)uyVINd*mG9|KK{gad7+oq7v#Xgaqyc-O+Levk&s;$$wD``_7MfEHzeDu3;R>- zBTF`5D7;;@zi@8~&l&LCFWJBI=j24IGH$05ffRwQD8AxhHe7mb@m&%2YsB@Z{z+#L z9x^LKSv%`u^QzmEhQe!#M+*0_PYU<$uPVH42xKj6zSPfl!Hu!4<j-=xj~T(f1n^QL zb8k-HZF{`_6meyt_j2DS9|x{rKcKN`pv3uA@qHW~I2GiG=A~rz3m;JwUZbJ0d8sWl zGUl(^&*qcthGC~22U{d*Y@B9iJ^+zyYJ4zvdbs$C5hgFbS9;fG_A9Y*a%>@HhpJny zj4*abVLuIy)hixh3pTU)`>U9r##Xpj8k<_!3|p%YxxE+CNduNvVl;~H%wR4)y5G0z z&#Qd*?+>j0D|E#Pn#$tKPpRj9ZWmes9|vy(DgI#FL$GN^I~lZN&Dsl7o@t%ZAZQ9d z2Jx!69elMJ=bKT7+$-ouz#frAX1o<8WX4AJDnz2o-r4-3>!;1P+_LI&wnZi@T*bD; zfAbsJqmLH$vJ=rC5p+f$#`KqBduE-6cnpjy(5&PNjpmFU*+)R)vyXlgm&sm!Iey^L zS6*Q;*H5M9e~G4#I;3A_y|UC!1V=vp%R)jzG>o0S`cv17o8MU`W8tGNUv&%9qeF4F zrErx@7GozC_C89mp58B$7RgA7cB0N;hKc7;V#A`B%qf#m6EFY$l~*3U3(e|z`%Tn1 z)<_9nw3d~8&L#lGUOkS3hhEO%LhGXh530{?w9G&Yu<wEnO8qi_54)IPdfc?O`9ATX zZ+x^?R!M_AP8wN!TDvf6rm>U;!koO_yN=@>N8L#rhCWELlk#&<5hzDcQ?QW)yUFKk zu|^300d_~gf8|Y<<Gd-&PeFF{F8Ck{&B$5)H(=hP%m#QV%l`Gf_ugYR{NZysSqH)D z9zwJsqJycPF^d@7RQI;qZsXMpr{f!crU8w=DSj2yQzf2(5`~@tU22vgU$xTS@T~9I zXMHzadf8<#LWVZE@D$5`^wEdZCwUqC=nTjTedLeQ-kORBy#x&T5rI*X)YBnYe$Xk% zW<la8Glv7%JhQo~3S~#}Gtz3Y#1hVWU3IeS@W6aPdWasBN^Na*ZKlTV%N}k#ZT}PF zA-6)V)t<Vh$``=&@$61k>*}4!sKwIi`6tHn)vV86lfj@Mr;Xx_{xLtJB}z$~SJy|q zH5SZG#i4UkPF+X>b&2y;v}8G7OswV)%1-RoA`Gu7n}BF?h6(uB60A#~U!8zC<NA+( z7ncKnC?2S$_@kT;hd~ym1nh!B@+N4ZD0PUN2sMwD{d-rUKUOt1BrEKdjg4vQ2NPYj z@lc?yCMs)AhvJ^+djcb~DoiHXtU2MHrg(3{Qxj_oHxI@<iM}}PB{}~5A8{7eui^op z7Vnf|QQ=(?dU2rq#hH@0oyZwwhV5<l``i5u0a<mm><Ri4NH&l9+XHd~{@{*5$u7o6 zpxy78JuE{fhR58a&A8&G=jeYo_73<#{P|Fv3q7Xd0X5+sDQlk+hGzlNS|yQYk~7B{ ze6W4Jo&;Pa-BC9@CKI0C6zl+o8^pOo&CQFt>SFD4QmHxZvAV8B&CNp{ElpjtKN-$G zk^LKBt0bI3?_g&5N6Fx$q<iBH&wKBQ%L^ZR3hywVhrtjW!P~|Wm0UcXpV^f}FF}3; z>|t*)W!dYo+m5-mbLFh#ed6-R3O_vQ_NL=EGJy_k@KE^kDPo)uFRh+<jvj2uvQM*5 zoU)>ElW+8Eb>f023U6#l{mFjkQGdT0vZ|6CR)id0YNv|*Sv-}?7<IH31TsMb(+bHF zw{EgSti&!`-ZE!kq<QYTft6>~RWEY5N7gm4hc@m#zW>DYj=m~)!9_bRbe^?x&gvX* zvts<9Xb`WI+N?NSo8MlZoxOOhwP)_?O-t%L+iae>D`v5~H+}8ou{GP5Y+tx{(`nz1 zo_zF@xg`7&g<9|+FJrt>a3B9ilKI&jNas)w_hcPoDk$(+7~Cdwy!q)jMCnFRfm;XW z1Qt(n6&TYA#<FC>2PPk$Du`vuvx7i$OV0a^0-jL8?g>vKd6x>tPDgq$-%xr6r-xD{ zQyx3hvt_RWr)ZDGV84@zfqlU+p+gP&En#S`!QxLIk&xLG_b0?coHLPVd~Uu2riM^B z$<E$Cb7b(uj?D6wj!0W>Y~vb>dRAxSf>g)4Y+IwNXLKOuQ1=YPBAVg7n~%F<D6?_+ zcj4q{u4g`Lo85ZyqLt@#`5I%to3k3hPbq5wGE*U!-g{Ule(?A$WFL`gNI=i&a@f^s zndpN;Q5^A;dkF>gglwpWhd1YJJ1k(K!)!@5$07)11*^Hq6lD_vHsV{9w04^yG=Vwb zq~Vp_1XRd}#Ez?7k%|vT``d;>-JwXAzq4VR!z@<xgd&6O-;Tvx?r3LS=edz&CO8&N z#s*?t$NQEHggX5Azwv16jqZB4q9W0gSs7=0`~AK|%$pi3JUwu?V_r0dh0EXv^aNyZ z*MT1i<h>X)$z*p>Wc*#=?RoGVn+#Sc#*h@QT|yV|o}mB!1mK?b)pOd$+F7rdp5HLM z@?b!`1+GI_yFKGSfnUhCaas=wOY&8njkO-gmXwmkFfA}h2Vl}6$kvyjXCv2SC%Oc^ z7*-ozUvyFA)!_O~!m+H-5+a$W9IP#*K(KqV<G`CVk)8l9?Po3nB5u&m<w9`3p(wH@ zvZt27i|h8sR(hJ0GPTj0YG`fjpPf&H`szm$jSC$)=o4Flp(a;TtK+DcFCU+iI<BF$ z-r{V`T3i(^@mQy*HyPXsf1nP*5)!_wHaaWZ6KSYRS14U|39rjvZLN$CdYZNM{q=(p zmW+F2pridZU%f%0B40k8U<N)j0e{vCUBYTE8(-KCGtLS~kjOW^=%fwVegz^9C}95} zQz&5x0l~g(DL!Cp3iCJ^n|i&Z{PA{qJ6l`Qsc6_xYw$R1Xjq5MWaZNHDfFUJ5tzm= z^NFQ{z^+V+*oV@Nf<Vs>kUhDzr)OJF&(_}Ip<}WuTbft4w5)8#-(X)P+T(8?R5w+y zO0&V?Ys*F)QBB<18#E7`Ixuh={l9s1q`vLwT<++$+|liA>vA1+L$O$YBwcS-nDkn8 zLo(j&u)4ialuA0^NtlrR@)i!2qw86PG7c-j1gr!ROackj5jJNBfe6Gf7q%%Cs$(?_ zrgm~=#RdesCJ^BRAVT~|fT$r!Aj&i*n;M$JunF;b%Yan%<$we`1<7cHHm))P%Mlok z91b|@-sI}8HV*_nV?!g2hlj^AXR}~3QeCe)M|o>Jz6{IX1RD4^;@KpH0<El7NqQm( zH1PI-nKBN*2uv9P>$GXfuU8~)2@H?=>N*VBI&on%3e$-8E3Yj4=#3ND@ox}J5!(k5 z@-7h1#d2^7tsI^Sx{jSr?#BcuazEzbW&o&3H6mX>iXvzxg>lk0Di<dGYgaJT6(r~2 zu8_ALnPcPiwd^E#3gar>=nQ2WeBQ=JuNRCy6Mi{<9IFv8!48N@;xjT)f!!X1FA{Ko z2326v$9?t?wu}21!fYz=%f3GGl7IXfH!!BjHk3lU)CxuDl3dQX^ETOu@r0ezb(Gwi z87NJ1J43@(sc~w)@XhswTd9wY=;Jf!qed8zTBo2rHXwN1M0P4hi88o6+?BV3(34BP z4`sJ0AX+6?u1!-fV<5NP*0*n8AG@^Uhd=B%_!b|J9eK9z7H@}r4zf8t<a3)y4gkyf z47MNi;LQqUGb|D%Qq7{I)6Nv|TdmN@^n_&n6|)wPj<yF{!=uB4!~YVEHTLxEYHG_a zUbbpMpH<bN*S|Aw;k?-oFIe8yDt4^?dHxc-b8?JkisMaUOdqTgV4DDYTQcslA?_R0 zamaYZGrCt+Ix8_dK9Fsddk$Yw#JF{3-qzft1Z%fww@7Q4>lXn84h8_Cvx8X(6orpF zUL_nri{ur^*)*`42!bs5B(F9@s_tO2%2E=}2oxMJgRb0wEw2<-$su$_2y4jRnPLsp zo3*7xBcNtT%3fcp4x?Hazwn17>H3V1kc9n9zu}*1oTTF(rxVdkBjJ!U=nT}l>1;9? ziwb)}MCDlmB4kMk`#~JH88cUDNv9oL)uD7;p2`G*4e8)Reah5^uI^>GR6}#`beb05 zemlFnHJ)s5X-Ks`BQvND-o@@O%n`R1wz8A`H8oyuO^yHAI@(80=!@QAzlQ9MWDA~Z zE?X!kzI>FSws;Ou_;J6|>U-$e=H5-Yo@3h!H(5b!%<+1H%SU%w`szlSgmGo>)}DD= zn%hsDb)BuYF=Nbiu}dwr=`(2GVt=xw>?-KP6hbw}M0y3`a?nt4Ff)>aE@^rp7XW%y zuDZ^4LG!v3;T<K6JMWcvfYkVt>{-wh#A|R_3S>t<U`Ai6fi<TJCk(D!c_EFfBr|dH z{p5G^aZT`FATBc`C6-d&g@H|ZgdGz7u{4(x=hi?JJUbzA&UXdX>(cw2H;eKKQ8A>$ zlB76x+}g_8+Z_Aw&KeBxP4+KX!6f5Q$$?-~^*$d2rp#xAWaQ04|J!dDPGBb-IKYN4 zxS;SZQ*GIT+Wzr3CuD%6Z2<pV?XbaOUo8j!%!Pp<fvJZC70Ak9{9`5i#K3f+;KT2{ z6TO^^25-C(LMR9ZZ@Q`Q>u(|wD$%o@!W_g`R^uB*yHG!@ujCF9c!kktq+%}GrjQz# zJzO}SEhxNR=UlL$^uMSs^s^^u1cj?1-=DlebBocU{g==_jo*s)jT+D~B&$c;Xwc|W z+NZqXrNSe7_AqbZvn^Z78>6>Oz^VKOy~BogP_EU<cOU{1>D8IiXY|=4_=Db~|6mIt zIA-tfDLlf47jG$i1`GAF9<%t{?1{Gz6s|s|>>@^hf9%iDhbn=6R1YDr6SvVI9V$Rh zVE^h@Z+Lyzjh6fFV>i_oK$rZO<iHAyR|UEJPlX1dO&Abn35&o6z?*OR;@KlZgZcJM zN-HQx7D6Z-6UR-PNGFZSEFyJSNm?Z>HAnaX5>}Zf6*0aESqUVi0ZNmNRydC!sc2B& zmKlSi5i%j*Z@6UvD88vbn>;pKZLN)|>95TbT`=fr;upO5jhiZZZ2r{8%6_smuWj>4 z%`}}kfBu#HIjK3^1pIu0kG%-^QSJlPL#8Ue2Qc*|>1eZ^Oe>Wzm4Wi;Q`5y4TXr7P z@zXhDaPs(Z;r<ohtiE*FlhG%e=P!W)IN^)u!CM7IvN<Zn(UJV2!ObE70jiW-o6T?} zd)_wGD2fk02%i69JhvIo`Nii4fe*yzpG93<UxP@$Ul%Wh#Ni3iBWZCwlA#I{?jC=T zrODq9xCN~vejD1nyK89q*n<zEjly=ZY5XeiN~9A^bL<s?W)~e<N(x@K27w$A(uGM8 zTqf0UB?_t>ua`?93)g#~y!3c1H@e;YQ*82BEZ~<c9=FxX?{QcZ-VkHsUC18_n>POT zFN{$nS<Fl>uEJl6S!E`($Dq%JeeCn`M&vV^l=ouS6!v8og~eUbuT$(UR`@OIY(1pT zZx>}Zh}N%1FF+l!aFM7Ue**dC2)DpCLN3XK(N>n)yJ%VWhT~Ag=v8$Wcu;t|zhv)> zw?ns&ts&{9L>z@m>|LyFY>Ho5<(WF2!fq13ZZTA9w;SRXkJk)G6zbDfHa>nMa<zWp zIl?52S=f59<6@}tMBgQs_)jD~2`ii;isLsUUsKuhBCDYEAi^n#ts|X7GV~qloWE?1 zQUhY|iGDO7y?>`zJ${3D8TwwF3oT5+*f3VC04qeS36K|v^dIt<B4o%oAtwoFTe8R4 zXC;%f>`jKa#}hY*r^Qn#*e&^d0fOayqI`Ul_z+)bN2b?jh)gzp?#pTQ!iU*xFx-+2 znVj%}aX5^kyeSxL%7lVV^)9p7MFsHrir6*&xOk@kSwCNEbxzn9YpaMiN6-mib(mUW zfFdKCVj8SWm<wAiKRIL2>x;wXEcjG|tpQui0|QU9U1zGUYSd>!R*%(Ktv6V8;@9eY zdi@k@Ek0Klh|S~YiCgfBBgP?Nz|e&Y3I%Zs3?^xwZxuD;W8za7N1TttXQmxHV!{dq z(J^hdf(@nF5*11aV1TKbAf)9;xHt&wgF4ueW!zO7lh&^enPM(~gSg(RMZ6|ujm3#1 zO<KJ-Y_mjZos5qcj>q1)27AXZ{_IDDhkl0W6x6Ssq5<n?57tkF_p3%*1xo19E`?y3 zB86BHL3>TuzA10x3#X~LjKmC<j)N~Ew0*xA9`BYl!vV|A+osrEd|~5|EG}#=U4m^4 z9WrzlWuGpK9V4?(2@U{1a5jD|yLSAqq8oJdAhjvkyVy7Yv7tbIQ>Zj?Ef$41k8I`u z<=xY-BYLWd+syF=g7XZ-ts=0=sLku5QTAhhx+x2U!wS!iuN|)+fBwtkxe9))atp6O z;eDJ>gf+kIv07_ufk62@Y#(1Se%1I7puhjAARG)!@eB`!OE!vyCklTUZy$dTFmrI2 zP4k<l9?SVkfK;i3RF#D%oc_uxZI#FF46gmm-eOc~G*yNwTW!5}aJ@9o5201!w#-5o zZ#RQTRydll21)ou){oLAnlA3*kC=>sK*{>C0;nS+`Kpl1G@rS`Z<5YXI_+|JG;u{8 za^L=_w$2kY7#BD2{MQ7_?~c32vltcOSWYi1dH+eQ1A@W+hfHU8n=2fzt^JKM8H&#H z8<Bm|=BdEzu|~(|kKZu<qc4x+uxGHEX2x*~%TOHJzF>UA_<7?uewjTjFYf8}t$6M{ z<{7^V81~EH0+`I9z7C0SmXfTqu#h>&ALp@7j*Lg>*@Y7UOT@z2;{NfyGAqZ)h!Oc^ z&;7%m`PbpRr&W5B+s!}4{WG5V<8tx7@e^be393Z@9402AII%1hfww?B(ApR8!zW%s zdjR8qUc7qzLbNsYtbaawon3r+a5>mP)Oi$j&OD^fqt`}n7oS`{NOtW+2c0+mj;s#t z%yiao-`%ix@9yN4;yv5*8)7KD-A&@<<F}w)g<#4b!t<5R=}nj0zTw$*w)oJ|>w|;D z5;DB&^6@K5&%p)1bVl<Skc`v#fx>p*vD<2P+r<9}5B2+w;m@5Rerx<{nHg&raQmXO zdFr`*!qjv5^Rh)N7VZ>}A3s{=Dy;#61<BChW&=Afl9E8OG{b3Jz6Pe8#P8u04ncik z!YOQWIE~`*bP_j(f|>eiv!%M)VwUKy6R`FU$lN%?kaO#wIEN2^29F&64E}t^GkE-Z z&;tj>pMkS$l5oGVi2IA?ZWfZ>^C%#@=`<kW3N8WTn>ogp5vR@3{YJA<-c}Wkh+Flc zx?DTa8au>>@o$MYf~p?P>+Hafh}i|ziUz6`DgDbt?0xRsD?`vinMI0IAS1$KJ+P@r zZcSXW7Yqh{M1{cX8MvrKfRJQc5@f6krmQ}r!KBukm5ufxYptouSgAFs#nXq(2#eva z)vYxdG#ay=){*cadya*WSMdwZg+oKMJZN`olvNs+)#hEy-nOLmaIVwnG!|F5rkB_k zw0l3hfHgwKJEh&JhcxtYgrdt1Y1UYF-iT`tIdBfg_uzeJi_PrJFGoWWr+c<ULSg)( znvHV0zGx2u4qo|S_p-EjW!+VTR`AZ}i#!^*EIzhp*)tg1b=Z3;e&&VfDh@i>M#9~h ze00ie{I!DCVziV_IccxhCC1juWZn5VL*!07d!{Rq=uU<ZQWMfajOT}VM~c6rj=$ru zu`SU!7xdBzZkBlHtVAh1Yxnu>ES~R9uul;<GnDL3B)X{WN5mvs0h&=J)bh5;wp^gN zC+rlwyTowv25}TWvU2D&F}ZgyZlT)iu=lQoRj{nY7vN$khSwoHD>Y$<@apwhPrgNB zU_Ae8ctZs-t}p$(8PETEh<%?oz=St3U+cw3@qF2Qy-BQEP-4~ajt$sgmx|{Dk0to{ z{0PS_;Z@>gsT+reyrr)4sMSUDl6^fvOS?axt*MzE6RTz?Y9oVnArgh4{qqsy;7st| z6TH=Qye)x_<THWulXK>Wx7x(30>^F)5TS(UZy#@GJMrzmO*0`d!XY%%8%bxl@e4>M zynnkWLtMcryd}Yh4x~xmknEixj}yk%1kbdc5f`^?_FQr);cHRYCN@Fe@?Vw{n95G_ zayh|eTomwyVge@oRE!~()IT93D1Y1UW)Kko9@n$afdk8A1cU>wFB1`9%sa5|_D|>k zsUGqFJ7!@2(e8f{Co#+TJa!u(tru4C^@|K3Fz}%)ATY-!Uml=tTmjnx#6--vxRyc~ z5MzRntk}wj+<+exZ%-GrP{MOspTtu@`$AH+bk?c}u&%m!y^9Q0ZnrzU-|tHIjJm1} z_9WCo!mWscanyg8UoW4Baw_4iFTt(@2X7F&;4gS5&dt{-0}=i~o+VxlE73A~kLKg< zGExrDZ4<xFE|FD&FZu;CADt~p#LkxNnm3A<v$GCId!a3~7T7qQ^a4D-BkpI9{PVPO zIRwhr{SS^zDl_3f#rxP=^!12zayd1|95WEgOn4k{+dds`kC&0je2<^UeleW}F0J=% zGmyY|=5g_OcG5pjKbFI#d}aK{mm(jr4mOBSv3q5Sf0}*-0a8vvwM^1b?B2r?kTT7o z1SI4jTwaDg_{CpfX)48+AW7Jb)yCNV-JA)Zq0Y|^Tc^9&jzWCHc&^mSxo8D#oCCSs zO`r~?_b9|aj-M{wKV7<iD&&Tr$-=n*GT342$Jd1IQjP7Uew>av56If74ote0pn7*9 z)+6uhm8CjgL7mrRL8=4FMk&65c8-zi98;=u2I}m?8bKYnw<*OxVpm=&)w#4(2Yitb zk$n|)KH_lsP`F3fEY;auRtLP(cP8rmJKDKOs&i3!omXTxPt<usIA3^Qs`Gw%om*tz zn5gr|@xKaxm+BM~JIH`jOSXLE^P+w@9yLM-0{w~^UT$R8M@s0DObk{K@DRjLNpHbL zX0|ei5&e4}7y#Bf>~?!QBAQj#@EnCTK6lt^bvmLpn@(@B=ve@Ef!spyKBp&Su?F3V ztk-8#Dk}9VlgH=qTAUHD^DL`QZ?)=qiU6!>I%j+O?zs1+HTsUffFJk}SA<s|K9OJ} zy!w|juP7`m{6S30eu$s2I-;xihv#G~$9wVR&xM8K&xuLil5~&%mDLupCu&eFUJUsB z-!6K*gGve|_64N{_dF=&pY*-`f3Xaq?Kvm`l;B}X8rP?n`M6eIx~KgvzgPZ!9_lp~ zOC3H-r5E3iR4k}dg5UK1%=FuZ`u|@jmFP!l4&C_PdW6!1`(L(XQJ2O@Yx{6zCfH2B zmca8fmFe}S|90Xz;|zu7spd<T>21<)>Pu;z{BM4$@g6E2C<MnZnj7j<6otlI4y*sI z{?Xi0+w{2-h4xtZ3(73qBfN%BSRL?B;TU{QFC#Ntn_j2%n{ZaCl<&{ExL#fcioeZe z*GqjJ#<iJBf7$g2zW+a2XdbCdf2RW1Xf047+;n=e99{e**wHmw11A)}6Yk9Y6NTn; z5T&#DN#Cazn(O=#N@)z!Kc_!8y)CM5n4vV_n(a%Lnf5HflHTPjmN9%%8AhS{1fyjr zZ78%}XkAfX`%q{t5zJ{#lwf%VzMqA%6=fR=?SZpV&P3UZa&GbS|E+p7HhHmV@Japn zqCz;4#`^%uoyE^v@u@_4uvlKer?dFIG>;eH_bVt=m#)!zqc*6$ub{ky(u_j1!tP=r zxSfPTxNQi9&d$<z`b~x4PIGf*vCw*?d02pQ0EOO1ML0~M_qU?ZC(RqpUk?h+zX`>P zLVeYtL{Mm4>Edtds}_auMFxfLQ@tPxwRaf`U8l8Lj%yd=H}$=AjqcMsN`<b|eJZqG zXw3AUSttYpFABYz-cyC*K%r-AQ0P9ro1Se)q5jZ5p-=ivb8IhuQoD4`k3!!M_et$h zp|Mk;JyyC#eW$+3QK&C^lu8tWH;t1%D^O}tsK0p>f-m)l3c>hHlwTGLofXtS`lP)@ z?^{*;PVb~KQa@>)MHCwfy@Te7=InWti&55~+>Oj|+fm*?*@yC5l#5V)iLw>t6qKDP z_n~|R<pPvjQI1EU>-R%?d@VkoMWJ?{KzRt|yC`&@>Ys<Q5an#tmE-eylt)ph{-)w5 zObZKlqMU#_*W+^s`bK-@eiZ8K*~Q<~_h(Uli1G-^>nOiQc@t#=%K0d_qijbZT=G+t z>rtpZw83cZX{}S;yHTjlMwIWM{3pueD0GcJH=rLhhEh2P-%mw39fe>)eWw0T6g;CB zCjZsJ<D^$u25Y1p!qvhZ!U5q$;Vt1)nCE&}E9+<T*ir06wv+9J<zQIcBVHl?TzpFw zm7OBnDZ8X1UGYK1rxgWxT7H^*m;566HS(L~FUfx;|3Ln!LR9D!K}C~dM6p(Jn&Jw@ z4T{?o4=SEgyrTH6;;%}BvQF8e>{VW&JfKpmY%0GhscKUVs+OrPQaz!1LG`NY-AYxZ zrP5m&uN<sgP`Rn{l**lzmsH+Vxxez6%7R*_9#bz<U!=Z9{g%3*S*_Wuc|!Ao=2gwR zRU=hPsxGN|tm?U{Ppb-AwbrKfYm?gZwR^QUX@8~tK>Mk-ptI<Lx;EWB-A3I`-6gu4 zb&u$t)%{%ezP?vKN54Y9QGbShkNyh%&H4xRFX=xtqzzXXZZO<tc+&8y;S=MN#y^|d zO#4h9m_9WX%xZJcoHF;C=a~1KpD@2-e%t(K%Uza7EYDhgZh6b{vGo}13D!%jPg;Ls zeck%L^%LvgZAn|3ZP2#Bc7p8;+aB8$wwrCAApnlSUIQUtn|;u}%)Z&a$Nq}_ZTm+K z!Qpkxa~$Q^<T%B#({YF6CC9HEA2>dB6r5_O%^7vJIR~9*IQKZOaNgj&&G}OG3Dsv* z@2S4N`i|<Ss$Z}ExcW1f&gF46xkg-TUE5sST^G8pc75A*m+O(5J8BNpyjb(5dzbq% zPmO1r=P}Q7o>yv5^M<`?xOd#+)A_FTSNp^Mw10>HBL6l1n*(PB&JWxWxGV5X;LX4% zfkIFnv<3aaWUwtb7+etC6x<QKDEM*kvrvC%QRuGlp70gn8^X7R9}7PhekD>BSrOS7 zIU}+saz*69$WxIQBX34NjQlyOj@qK(Xgb;z9f>Z9-Vr?zeI{Cnsbd|n;n-=h>tlDt z9*MzzxUQ=1hPvD8-l|_wzp?&>`d91Ut^c_Gvv@`PviSA!eesXte@%!9T_TuhN{l4d zCQeJ7pV*tYE%9XHCyCb+?<eOak4kPzo|4>|yd-&T@|NWO<a5cl8sZI?G+f(oOT%Lg zKX3RTbyn(EjdhI|Hh$VzNUPJfbQp0LFH2va-j_*cF3DV*xhL~j=I2d&nyzTNq3O1! z2b-R4ZfoAv{Ce|;EmySM&~i`9V=d2R)7h@<NOnnfWA>!%j_gI*YqB?I@6SGyeI@&T z_S0;kRo!Z9jkmto`ey5ITmPO5=9cA-$!*JR&s~_?pZlz>qRr4&(-v)OYRk8cwJmEq zrtO-xo7?Vbd#vrb_H_H|_V+thciK8n>)g?KTjyW9Y+XlpJ=^tJ_q^^$y5G+C=kLgW zkpHZwzvrx;i+cWVZC3&xS5@xMIg?EGw6vwx2H}QjOKm2@ELqzC&D@zu(}Zko+Dtcc zGjo$nJDEvmNt%jSS!>)8d0G(>5fKq@3IbNdiW}mFh&w8xA}S&xq9U~Kf0mg{LwV2V z@AsNt=ljk%-}$!lo#mc;@3}|XZ*PCuzsG;b|Ag+>H|qoXh@R3f*Jt%>^&|St`tABr z{h0os{;>Y2{!{%){SW%{`b&YKz~zA_f<wU@LM(J3bX&)Yj$1nJ*>v`%OlNuL%FcD2 z-p)gv$2uSEe0cMw&8f{-ZoYl<BVCnU-mc5Kj&?oUy|Vk(?uRa3bMfJeue<o3i(l%g z>{-|2?b+2c-gCI;vEH+L`+BeJy|wpiy-)O>=v&dJ^{wyo_wDMtwC{#KtM4oQMg86V z*Y_Xqe|n($l)uS=r?*_a<>{@_!S#bTZ(Fi$^R|1oy}0ef_RZT5Z~yVo^Wo9(o#DrK zbnm!x$MZXv>~!wDaOcR*y*sbndB@I&cP-l$+O>Drp<UPSx_j57yI1aZ?>@Nu_T4Y+ zY1=ch=ej+2?s;g>Q+rNa(sRiNhG&f)<67e;<CyV7<GGRQk+ma%kv$`qj@&r%z{pdP z@`yLGEs}^_895TMB9BDv(G^i|v@d#}shKJB4)fVjZM0={>*(a@fzg{szcu!bu@mF# z#_x}XVh_ch+uO2t$KKh!5AJ<=-==*h;!W{w@xAd|;>Y7JOstyNHgWaD&51RMp2Xh7 zRN~6Sb&1CkCz6Yk=O){dJCX;JN0JXsmQVIgCMNHkJU03C{w4c2?N9B$eE*&MpG{S! z)~B|mGO24*-$}iYwx?I5z3HCx-t=|pM>0z?8#C|99LqeFc|Kd6)v{}|JF<tfw`RYV zeJcBpskKv^rVdZtHuco>;^}jz+op%6Z<>B``kCny2PzM&Juq_M@PQ)-?mqC`jCUq5 zvu$Q{W@_f}%#oQpW*(S%Wai147Y^DF@^i3=?Hn?U9j{$@DTR?$q!Hi#BjlQ@<+vlr zcg<GP3-fzkE~I%L|E<9JN)l(N-KuarbxSrXya@N>_9(m<_jdOy+>U(l$qeu*1^!-@ zUk03?*uekcP9wQj;g!IDqwp&79x5Ir01dwONk7W9EGk247v;~Aa-5RojNilHI3v}p zUg7x80o$PPBAi1vt?**963-GydOPy(Rd^}zpDVnKtie-@lBR;}u)R^?mB6o1coo%b zIr?hSQ1ptPoSaF;#>O+6*X?f7wvL-xXCjhJq?2(YV`zP;<X$t9(b}__@nkApP&8nU zW#dN5>vFq28}#r%a6>_1y(%rp(p1(qGnI}d6Pl=4P@bzYU`&o{-R4xxOgJ@tCS@A& z(`1cjGL!4iKYx09+7%bHu1Io1yHqR4kBGXiWD3UpABeY($I@CTnaF5clB1bvBV}sH zh{qykB5g*sY$9r=v<%#|rL$Y>oir1YxLXo9wVZt(m&XNn&e!E^j42})H%8*7HXX~1 zYlaqT@7IjX`kdbBNGdj&NxRaqI9L4q-cYwv{xl^xG9pPP@sax!iQ!1;agre#ju?02 zuL;;z<d}e+xMvsvWrC#f8z%<n4CMErUJ~2#CQ5nDc9I1gM_vk)r>`{t>M_u9E-A=z z327c27NnCf8Nf>d8(yPkJxUAfo}Rz9fNC4+aBX5je{Nk);_2%zlrjK4Ct;I@gWXLs z1wJPH;6z#%elt-Xhs=M6iu*SMZtL+n#Q9`e{JBtjp07*zc>>%o#sAaOM+)S+gs)}G z{r_<0F%X0QG`w8IBY?N_7GR^mrbQd_2$2+Vz=-fq8oyEGXHg>xJO!Lb2B+PE_U#7j zg(tW#3psb6!hw5vVf%T|e*C|T&eO@wX=9+Z4DjQTHX^*NLF1UvLqi^y+fMocc|5NN zJk{T6*uwk8Budke$YUrD$fJ7<?bnO?-TzUY+<ljm{Rqe0TmvNc4|5fNFo?eky^4H; z;_C%C(ZEhiaOA+(X&Fupt)P{(idK^mx(HWXi|HA330+DebQ$iFy@#GjR+Ba46><{0 zm&@__$`#~1@_W2Px{}t=v+&-aN#B60rE|zbRHL=@T<V~!=xXvX&P+HD*LFCtnXaRC z)Jf~9i=K}+#@3O4P&f5ZFZIy|Qb!wU6K$p~=mg)Qt>oKuJ$6wxke6s1`7XTxM`v9` zH_~?OGWdy$>NJ4s_8<+>4!VhU(#^DscH<758!xGNXb<hh6<a?Yz&VXu5mH|A2puHf zqucN{+z<`Z9dsud!xi0bx`%w9UP6bdK}To=CoY?Gl#bDH8l!vZKJ3E8$!F*UuKbcX zZ`DWsgg2xc=_K8cH+<7HgLhk|uyZj@50Kx`8S*cDzx_@0Qu=0k8GQ>qggxT7(#z=; z^h$aay_&R;$LKZqO7&r!3i>u&3(wNG<BRX_r0=5drthKe#g*mz=ymk{^m=^t^aEHC ztjAY1kI)a%8!&LSkw4N8<2v_7`Vo2){V2VeehfPXw~(jk$LX#3x@iC>dEG`oNpGj0 zqIb|wlYKY``7_j_bMz>^3wxq><K(T+(tGH=^gfcHpCc3GCVC9}eV<3DouCA}dH3Tv zs0Z-K%^&C&@!ZvM`XxNUK8a_(zKpm3QuM2MD(~w!i})M#VUnibq~F3G*SG0+=y&NO z^n3JC`hEHs{Q<r=-AOj%%KS%oEAJ;bk?d!ri$0E%(0)OGiGcnQ$<Qar6#W(UYM-RP zroSOs`dj)G{T+Rp{+?WnQ?&kwyI#-GKViS?S^8)C9Q_M@p8gfr>Axm@IAN=wzDWO0 z|3UvrU&8Y-FXM~t1K4SK1v>>N=t%@MWq8gF&k_}}VrC}?SqUp;WvrZ4uu4|Ns@Wo( z7jgz$!j|HBiDl%?IEkLGbLKE?Jc|iwkgZ^^XDe9^JBz)6oz2c+7$R6LJD2>HImpXw z6<dw#{`1&c+;`s2-pJOmI_6~c%*D>fuEAx@O%CByw6`)Zc?<Kg2G+=$STk#3t!zEp zz}naa>_T=C+sN868SP+xrn3MGlATyLKEp!f80%o0SSQ=ex>z^a%`PUtV?C^w^|5|( zoP3FVnB2$)*cPntmf_8}<#=*)1)eTgiFX&zVq4iD+s3xDAr@vk*iN>K?Phz}C2W`( zY=lKvl$mUljj?eSV|&>?7H1PI!IEr}?Pn>LW*L@cQ*4?YU^DC>dlS2qy_wukULb!X zFS5(nTi7A?R(3hN0(%};VHJ82`7*f%r`s$dpTawVAH!2^c05ygF1Z=&-*T*8mk>X@ zh8<>aW3yPj4&$7KjaZFVlDo)J_IAAAy_mfdZ}EH*J1h^7FOsj4yUAy<*K#lU3VS#9 zRNl+3W$$Cx;T@$b+4bZLq=fw!`vAF>{GEM}9bq40H?R-08`($LP3)uWW^xtIUA>;& z!amM!WuIWTVMY6A@*KGj1Ma&Cz6?j+kDZ+NkxyV4zJ*+icfD_5x8sSOBjkg4g6IzN z0j!?C#y-vNWS?Ocn`1}WUF>f5S#}S*m)*xc$Bwbj<DIoHum{)|*>UzI_8|K*`wICS z`zrey`#O7weS<yBzRAADzRkYFzRMoLd#`2WFrET<JDwGI2cEUKggi|yXWt_avPba@ zMmu|q{eb<D{fPaT{e=CL{fs@%e$IZue#xF-zhY0aU$ft^-?FFJ@7UAq_v{bskL(%t zC-yA+GkcExg+0&y%3ff9V=uD5vwyIEvX|Ju*vsq{c7mP6vKns++3;m*oYY{qmDoyc zWwvr#g{{(7WvjL=vMt7^NSEL_h@X*PkzbG><7D~AvBdZ#`6+pV{G9y6w$!%FcBXB) z?RB;lcz=JTt;Tj1K5&1w?HrqCtF@hrgN0VvR@>It&a<s8>KW|rwzrR@%qg=Zn}~UW z?Li6sO$xdd^eWgOVS5hhDy`_-TU5H8LpK)sDxHIZ5AwYw?Gr{Ml}wbhC&!Wrb6<IT zDwY^CBH4^t(jJshsN`;`&?B)_Bs(!0HxHES(PYMmV2PZu2O<U-0Hl&eM#^z3ecVBN zP?%*83L;tRQKf!mrLMHrm3F$aQWw@i%ZBn`UYum%R|fiX1~!!hl_5e~k7DU*C<#i# zR6QZu?eVwRJA_6R9rH$2cHoj?!jNW_cPtR;br)?KF;bF`x4mN1d_|SlVDHSgA+%O1 zdUI9^RXssn<fFX3q*JNfDOI$0W@7QES+RMZb7fb7N?eTF(<<wG^k#dvXdipGu%xoP zfV8L^+Do;3!WC|hl<NullpYOUdylN&BkT7R)R$EoRCqNs*@B5NmDghL&0FaS22`5` zl(hlXb^%p2Af<a{M7X_P)nL9{TdP>mgT=kL-b_d#0i|Ow$4yZMlrMwss^0M|;xv_= zh#Og9DLfi1=`9R`Mpe5}$a14wRrIFgMtZy;tQ!>P2EiHn>GlEXl>zCMfdyW1<D+y^ z$wp<0CnqLo-%`N2NqL~DsbWh(gKk;C&l7A?{F)S>CgsXjsgkE9RJ0X=XWuFm;<DWy zNef++qneZ%O+ou$zI9NnbO`2PQ}N)umxD@BNGTA^d0jCJ2CD{N?PYYW!9p)Lx0ejc z_$?mfV~2fEM6vWybBleu@JYq?c^{N*kH)aKm5!zD+s9JIlvxp;FO>CKR6~av>|sG7 zvqLJHe9Bd!7W)ptzI?}m-rDN54Z31>Bkv5fen>L$H!J8-(5GOdgzY&PP-#Wq-m21m z4iRpuetQlIKFIf#7|KD08dQz>LDewxG2-^hzU&FQTPq^-E+~)Wjj>1bjSecMkUL~I zg<W>Dpwy#E{mM*TsjVyZbY-S4%!8VZ<z`-vw9Kz8^ye&WE-?$6+M`%{8cWRiQ59yW z!0`(e+}=jP2cvAOeN1RrF*a{q<yb)vC?8v(g4a_tK0nI(E5_$5N*%q8_E^4AVTDrJ z8!C&r;>k#S>p+(z)P)fkZ>2I`x4$H&jESl7HkOZ6x7XKPv3Fip<-P(XEBB41OfwNT z646-19vAIrj|;;p;{|j@aTp_o<6v3QPo;1Sx=NV_pFJV#CuDu>yyd4HS+zlhUqiFa z%n!P)_GI2@47#dG1IqY-YTAIR8j#YxGGH*MsrK~cLP3R!9x6`G4+jCIV=%`}Q3X^$ z1wB>CR}Tm7P)V{dMjBP^Mj;D>uD>GrszFzAZV;TIU%;M{UP(!>q!xGuBbidYQJK=L zpeH9zv1$m|)A<gDu~m7bsktIu(7Ndb{6VPrHz|Hitwk9==w^gMvN2^DbWx6KRo$UU znS-x{X7jBCG9^2hgU!X+d2a_*+l7?iV9xuBSuj+UeYLlF4=nUHcY_*yvtsbg=0n;O zY;Ltr3tv=B&)Z%$ogaLs<={IrUnnE6MKyS+(LPf+_^PPF*BTMqo?xieeo(M4Ke(V1 zxB6^ZSF9xEN}H+JD5v(in*~#^$J<bL&`c#=>9pG`F`kq(En*_V%1QVHPz>{cLL#7; z#sGy!0mXCxC^Q2U6B=Nv<Qz>TCrYuZbqORhofJrUW*psFpk<@UY)WCVDMgo#9guWs ztj7})HTj0QBuvB<Ln$MXok$580^u?^qGWJX%8(eBA?UaaK__JhRLT&9LI%f_?}T66 zZaF>!3cmm<zceeq;H#C=*UB%=$}i2zFU_r$qP)c|++Qskg8QZ*$)$><ByE<Y`Fgxc zLB08;sK=A}dORWY33|NCClLwBD8F97%pVmawcft9%19#}OYh?nJyrX&$&48tiSv|i zu}Gz{qKKO!qkJ?rmPKVO;G~(dgJB+4?r9fY2NyfaY^<<_xpF1$cDJkWl5+;=M3=+F z&{!2o#*>K&gi6Lp%@jqGiLs(!HW!xC-B_>(isG@9A(_f64^$S5B@{N<C(ShKt6~+x za!Lf5=<XnEt<2dxVPvEsUXLu~BZe6_C*UN>q}g9;PE2NIz*b~<ys}}vZn+ME=H<Ne z#?8riHoeHe5GSWJ;m#5xl}b)$CuP;v=5mo1w|-PsOPR=uCa1-^8+x?LoVYovB(^rm zoRMT^Tw>YDsG<%^KYQdl((92!8lcqDFSYdgTV+`KRlDFgULn6x580*mDk-Zq%7u%! z$=6zzo`^veBVty@5>xp^aeC5-nnjU#cBFXRG*X$8Xv{$4MFpQ`IVE_T<>D7miffju z7o=rx@F+Tur>V?LXRtOv><JDHUfHjFE&k&1WHxPzTr403#Cuc<%J9MITPiK(_!NJi z9<)!y61b4Ir%l{ni!K)JBL`dAK!{GaPj*_jPerNE-CQOHT})9UWnz5g7#9H=f0dYz zc#g}6i#9^891eWy8q24m!{=@<6SF}<{f4UXWOAP|lAMA{d9HHq^vOBa?UNTYfc{Dh z>?3A8IXzG8ZIG%+tKG^QfWoVQ@~Xur*BWk|5+&(<O?+VBX1XFq+Ej^Vk*LlaER%Vw z3rYQj32$LSFHAHQCfY@!I`4&oGLPg}F<%xFih5;-c6)P+;M^8NZXGGtl_(dT7EpT6 zD|a*8UR{I<pcohcTgrt!LUy@WA4pWRq(G~3Q?f`^=Vwz%C!8bDGYS_3BBzW``jSi- zBG3xy=q5PZQ-He*@YVv{r*K@j<mow{qvv>zp5wm7lE0uA0t+(S3o}|5X81$~_jM=~ z3gEstUN1W3nX%k$l~Z9i_PX0}uY3}(%v^_)xyZ?|?U@tbVN*EI<vRM<$rn!!@HD6d z9mSJsg>>}FlmAk5qF#;;{9Y-fV~4OSD*0{Y{4C<sEHXG03~O4~eWbd-%d&6TF=RE= zSZl+>quT7?ki}LTpTnoLNF?GIsj02CNZ2B}BXAV9>BIhdi#jcBc(mSPPA#fg$NQ|J zH9L;3rR91s6142Wp<2tfIy|s*sMb+iGdrYNeSOH<7_QN*7LK)q!`hsrHlo&AWT}*9 zxp}#pQy%Xd(xA<(p;_g9L&M0?cv(5enmE=pTr-Sg8*40D7Y;it(l=y=!}XTUsRcEw zXte>&iuJxBtJvYU><&Nd2wQZx-YRlBpqCb%D<1J{yo769BQ@Z^mTfo~v25#VQK)OP z+AIXjxr<lB$o`?>z8Yg7Jmd)1hBa&BmLZhYaMP6hdaKxJmFRUxaqmoc%8sPN?|{c0 ze#2rTqZW-o3#)ity;b7WxT@HNyswCifCS2J91e4m;eb%J)Ooa|g6Kj2y4t+&%bcgS zMY-fc>!5-TyN9*lti#~#BK$>axKAyu21@3%MC&+=fD}>j8r7{;sD;XT(+Vq9@(^<P zk5-i1(AYJO+VHyCdaKGg$5_yc8i9JN+6e<R&8pNlbGv{z{9&t#rv{L!LaN?c1U8F> zOEh>O0=2DbeOQ|v)~sqct=?Mf?AkgsR}>9|S6Nl2<3PQ2hO?`GsB4SNtf@u*Qjx#J zIY$=hgF|zR7U>o>{MMp69<zvK|6C>islty%m!l<YtNVuLcp$@B{#mpy7rU<3foeG% z(ZI`j6?6q&8ipku(7yw@r#97VG$5X>UFv{~b&Fhh6nDx*G%R)EJ`xLV9kLcV{94ee zK=7425O98N7~=8iLjD0FzkhakZi&6ldRtx1xp31m*tWE;-a6AcM>$*$KXdpx=bVkh z70$UL4qxw_E9P(|PSoPC#yMBQ;aSePQV!qXoGat-jZSz$`rWb*!@~~EWzpTdgVbB= z3JRC!3-`;yx`M(r`NEVeJljdEs=EKczH?yT+0aXaeH_-pJ`T@?eH=PqABU@8ABU@9 zABSsTABX3`J`UHyJ`SButxa^FdMDg^#;~TNZHIX*7=V_O$EwR|)z?||=!NH_19Tu# zPumS0MvH@wi2ox6!m!?Q=bM%;x6WTTS4__g4q+hU4d^NK%&UvNPOVYs>jNDPzIrsk zAah#vcs^Nvx0qG}7dl$zy!1?NTmu{cjZa%=7`?)1ska)PuGh8ITTTBPDIz!m@@BLN zS-x6xX&t=h!<U`2vmK5Oj5I@-A~3RIIB25u%w=$C3x=rW)?$zqVbokL$gDEG&YX2Q zG_7qG{8~>X(Oi<3RfI~Q)U07XXl(2sx{GNxt>!Ma#<nu-=L2ae#$?EaWJd^H7|nWW zf5hNH>*Tz_^x-Iamu^HcmNDI^0X`hY0I;x*0ktr^J3>ZFjRO)wun#~;0l(7-fg*gw zEy5^)Mkq#PVS-qo2{;<4yjm!MUrbtjsGOID&{&`Ig9Z(X*C;<Y+TfxM`68<nqlKo0 z935OJZ@so$nJ`9qm~0(#X>FKtx%NzEg6Vt{*jFRfiK6`ME*tOF5jX5mEd_n+tqXF} zg!ffb4f7?yLX&gNdm#o27x!YwTC5NC)nLlh+QP0m7hQ%9deJGR12ui8lx{qwG*`Ef z-@+R0POGJ^kRVqnC(!S-TI*&JdOW^n(WPHYH#8`oc7}?&FqwyMu1$DYI?%6N=x5T5 z0LC3mAw1Saod5el+;O^)`cLAPdzcSVZH|_j+JXqF4J#dk7+YKFa()OQ)mm4}TZwx@ zS&}!p14f@IJ2;jb=*vr8Rs;I;rq{^t1UtHHsnrOY%}%Qspo{x02nTB+%&R$1bvt<o zTU~JM#ZJ6k5&+TzgmR?Ud6bHrJ|H5epVI_UG{9*%vW3%dWGkoP$e{BsjC<|CwgIC8 z+wR2iumT$bCiB9aj&iykoK9dnIi0|EaXNwR<}!7V?cp*xatW8okzp>ABL=4l0U6;m z9Eormjzl>PM@*rYAK0kSi(_L#FOH21y*L&VdU0&8(2HaHgkBtr3%xit0Uxi=H()}f ztP4?)l;}l3C%Mx(nY9tA{g_69a+;JxIZay7@B&VgLFEm3u33>1HKruWYfMX&lOF(| z4U*-IL^;7hiE@HBfwnEr?^2Nx{N5~4PJWq0Ir&>qvrY0lBvDTAR*7<g%RzfVp5GNB zCHP$_QBHo9L^=7@sCj|pca20j!C{GVg10&ED#N$g)ne6OXO)_kZB^fa+%(O<yMT+K zhX$}R#0Q2fDldN~=SYeF?hfBMHnNVR_pUEpQ@W(oRw`2gyW4)2U8KtW_g5b;J6?1g zkIa>!w90=!*(m-*+C~C%tLWi=EbtEx&Do-Xxivg>tn?5bpWb*lf>ka^_%~YbZ7W?{ hx~$Yzx$ZuC@+zz7otSq6cNIr*@=X9rBGU2izX8nf`eFb8 diff --git a/game_of_life/lib/github.com/diku-dk/lys/common.mk b/game_of_life/lib/github.com/diku-dk/lys/common.mk deleted file mode 100644 index b6756eb..0000000 --- a/game_of_life/lib/github.com/diku-dk/lys/common.mk +++ /dev/null @@ -1,36 +0,0 @@ -.PHONY: all run clean - -PROGNAME?=lys - -all: $(PROGNAME) - -LYS_TTF=1 - -ifeq ($(shell test futhark.pkg -nt lib; echo $$?),0) -$(PROGNAME): - futhark pkg sync - @make # The sync might have resulted in a new Makefile. -else -include lib/github.com/diku-dk/lys/setup_flags.mk -$(PROGNAME): $(PROGNAME)_wrapper.o $(PROGNAME)_printf.h lib/github.com/diku-dk/lys/liblys.c lib/github.com/diku-dk/lys/liblys.h lib/github.com/diku-dk/lys/context_setup.c lib/github.com/diku-dk/lys/context_setup.h lib/github.com/diku-dk/lys/main.c - gcc lib/github.com/diku-dk/lys/liblys.c lib/github.com/diku-dk/lys/context_setup.c lib/github.com/diku-dk/lys/main.c -I. -DPROGHEADER='"$(PROGNAME)_wrapper.h"' -DPRINTFHEADER='"$(PROGNAME)_printf.h"' $(PROGNAME)_wrapper.o -o $@ $(CFLAGS) $(LDFLAGS) -endif - -$(PROGNAME)_printf.h: $(PROGNAME)_wrapper.c - python3 lib/github.com/diku-dk/lys/gen_printf.py $@ $< - -# We do not want warnings and such for the generated code. -$(PROGNAME)_wrapper.o: $(PROGNAME)_wrapper.c - gcc -o $@ -c $< $(NOWARN_CFLAGS) - -%.c: %.fut - futhark $(LYS_BACKEND) --library $< - -%_wrapper.fut: lib/github.com/diku-dk/lys/genlys.fut $(PROG_FUT_DEPS) - cat $< | sed 's/"lys"/"$(PROGNAME)"/' > $@ - -run: $(PROGNAME) - ./$(PROGNAME) - -clean: - rm -f $(PROGNAME) $(PROGNAME).c $(PROGNAME).h $(PROGNAME)_wrapper.* $(PROGNAME)_printf.h *.o diff --git a/game_of_life/lib/github.com/diku-dk/lys/context_setup.c b/game_of_life/lib/github.com/diku-dk/lys/context_setup.c deleted file mode 100644 index 96a387b..0000000 --- a/game_of_life/lib/github.com/diku-dk/lys/context_setup.c +++ /dev/null @@ -1,50 +0,0 @@ -#include "context_setup.h" - -void lys_setup_futhark_context(const char *deviceopt, bool device_interactive, - struct futhark_context_config* *futcfg, - struct futhark_context* *futctx, - char* *opencl_device_name) { - *futcfg = futhark_context_config_new(); - assert(*futcfg != NULL); - -#if defined(FUTHARK_BACKEND_opencl) || defined(FUTHARK_BACKEND_cuda) - if (deviceopt != NULL) { - futhark_context_config_set_device(*futcfg, deviceopt); - } -#else - (void)deviceopt; -#endif - -#ifdef FUTHARK_BACKEND_opencl - if (device_interactive) { - futhark_context_config_select_device_interactively(*futcfg); - } -#else - (void)device_interactive; -#endif - - *futctx = futhark_context_new(*futcfg); - assert(*futctx != NULL); - -#ifdef FUTHARK_BACKEND_opencl - cl_device_id device; - assert(clGetCommandQueueInfo(futhark_context_get_command_queue(*futctx), - CL_QUEUE_DEVICE, sizeof(cl_device_id), &device, NULL) - == CL_SUCCESS); - - size_t dev_name_size; - assert(clGetDeviceInfo(device, CL_DEVICE_NAME, 0, NULL, &dev_name_size) - == CL_SUCCESS); - *opencl_device_name = malloc(dev_name_size); - assert(clGetDeviceInfo(device, CL_DEVICE_NAME, dev_name_size, *opencl_device_name, NULL) - == CL_SUCCESS); -#else - *opencl_device_name = NULL; -#endif -} - -int64_t lys_wall_time() { - struct timeval time; - assert(gettimeofday(&time,NULL) == 0); - return time.tv_sec * 1000000 + time.tv_usec; -} diff --git a/game_of_life/lib/github.com/diku-dk/lys/context_setup.h b/game_of_life/lib/github.com/diku-dk/lys/context_setup.h deleted file mode 100644 index d613bd7..0000000 --- a/game_of_life/lib/github.com/diku-dk/lys/context_setup.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef LIBLYS_CONTEXT_SETUP -#define LIBLYS_CONTEXT_SETUP - -#include <stdio.h> -#include <stdlib.h> -#include <assert.h> -#include <time.h> -#include <sys/time.h> - -#include PROGHEADER - -void lys_setup_futhark_context(const char *deviceopt, bool device_interactive, - struct futhark_context_config* *futcfg, - struct futhark_context* *futctx, - char* *opencl_device_name); - -int64_t lys_wall_time(); - -#define FUT_CHECK(ctx, x) _fut_check(ctx, x, __FILE__, __LINE__) -static inline void _fut_check(struct futhark_context *ctx, int res, - const char *file, int line) { - if (res != 0) { - fprintf(stderr, "%s:%d: Futhark error %d: %s\n", - file, line, res, futhark_context_get_error(ctx)); - exit(EXIT_FAILURE); - } -} - -#endif diff --git a/game_of_life/lib/github.com/diku-dk/lys/default.nix b/game_of_life/lib/github.com/diku-dk/lys/default.nix deleted file mode 100644 index f45fb1f..0000000 --- a/game_of_life/lib/github.com/diku-dk/lys/default.nix +++ /dev/null @@ -1,5 +0,0 @@ -with import <nixpkgs> {}; -stdenv.mkDerivation { - name = "lys"; - buildInputs = [ pkgconfig SDL2 SDL2_ttf ocl-icd opencl-headers ]; -} diff --git a/game_of_life/lib/github.com/diku-dk/lys/gen_printf.py b/game_of_life/lib/github.com/diku-dk/lys/gen_printf.py deleted file mode 100644 index f1f448c..0000000 --- a/game_of_life/lib/github.com/diku-dk/lys/gen_printf.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python3 - -import sys -import re - -out_file, in_file = sys.argv[1:] - -with open(in_file) as f: - contents = f.read() - -start = contents.find('futhark_entry_text_content') -end = contents.find(')', start) -types = re.findall('([^ ]+) \*out\d+,', contents[start:end]) -out_vars = ['out{}'.format(i) for i in range(len(types))] - -with open(out_file, 'w') as f: - print('#include <stdio.h>', file=f) - print('#include "lib/github.com/diku-dk/lys/liblys.h"', file=f) - print('', file=f) - if len(types) == 0: - print('#define UNUSED(x) (void)(x)', file=f) - print('void build_text(const struct lys_context *ctx, char* dest, size_t dest_len, const char* format, float render_milliseconds, char* **sum_names) {', file=f) - if len(types) > 0: - for v, t in zip(out_vars, types): - print(' union {{ {} val; char* sum_name; }} {};'.format(t, v), file=f) - print(' FUT_CHECK(ctx->fut, futhark_entry_text_content(ctx->fut, {}, render_milliseconds, ctx->state));'.format(', '.join('&{}.val'.format(v) for v in out_vars)), file=f) - for v, i in zip(out_vars, range(len(out_vars))): - print(' if (sum_names[{}] != NULL) {{'.format(i), file=f) - print(' {v}.sum_name = sum_names[{i}][(int32_t) {v}.val];'.format(v=v, i=i), file=f) - print(' }', file=f) - print(' snprintf(dest, dest_len, format, {});'.format(', '.join((s + ('.sum_name' if t == 'int32_t' else '.val')) for s, t in zip(out_vars, types))), file=f) - else: - for x in ['ctx', 'render_milliseconds', 'sum_names']: - print('UNUSED({});'.format(x), file=f) - print(' snprintf(dest, dest_len, "%s", format);', file=f) - print('}', file=f) - print('', file=f) - print('size_t n_printf_arguments() {', file=f) - print(' return {};'.format(len(types)), file=f) - print('}', file=f) diff --git a/game_of_life/lib/github.com/diku-dk/lys/genlys.fut b/game_of_life/lib/github.com/diku-dk/lys/genlys.fut deleted file mode 100644 index e9264ec..0000000 --- a/game_of_life/lib/github.com/diku-dk/lys/genlys.fut +++ /dev/null @@ -1,41 +0,0 @@ --- | ignore - --- This file exists as a wrapper that defines entry points in the --- specific form that liblys.c requires. It is copied into place and --- modified by the rules in common.mk. - -module m = import "lys" - -type^ state = m.lys.state - -entry init (seed: u32) (h: i32) (w: i32): state = - m.lys.init seed (i64.i32 h) (i64.i32 w) - -entry grab_mouse: bool = - m.lys.grab_mouse - -entry resize (h: i32) (w: i32) (s: state): state = - m.lys.resize (i64.i32 h) (i64.i32 w) s - -entry key (e: i32) (key: i32) (s: state): state = - let e' = if e == 0 then #keydown {key} else #keyup {key} - in m.lys.event e' s - -entry mouse (buttons: i32) (x: i32) (y: i32) (s: state): state = - m.lys.event (#mouse {buttons, x, y}) s - -entry wheel (dx: i32) (dy: i32) (s: state): state = - m.lys.event (#wheel {dx, dy}) s - -entry step (td: f32) (s: state): state = - m.lys.event (#step td) s - -entry render (s: state) = m.lys.render s - -entry text_colour (s: state): u32 = - m.lys.text_colour s - -entry text_format: []u8 = m.lys.text_format () - -entry text_content (render_duration: f32) (s: state) = - m.lys.text_content render_duration s diff --git a/game_of_life/lib/github.com/diku-dk/lys/liblys.c b/game_of_life/lib/github.com/diku-dk/lys/liblys.c deleted file mode 100644 index e7d4252..0000000 --- a/game_of_life/lib/github.com/diku-dk/lys/liblys.c +++ /dev/null @@ -1,269 +0,0 @@ -// Convenience framework for writing visualisations with Futhark and -// C/SDL. -// -// Based on initial SDL wrapper code by Jakob Stokholm Bertelsen. - -#include "liblys.h" - - -static void trigger_event(struct lys_context *ctx, enum lys_event event) { - ctx->event_handler(ctx, event); -} - -static void window_size_updated(struct lys_context *ctx, int newx, int newy) { - // https://stackoverflow.com/a/40122002 - ctx->wnd_surface = SDL_GetWindowSurface(ctx->wnd); - SDL_ASSERT(ctx->wnd_surface != NULL); - - ctx->width = newx; - ctx->height = newy; - - struct futhark_opaque_state *new_state; - FUT_CHECK(ctx->fut, futhark_entry_resize(ctx->fut, &new_state, ctx->height, ctx->width, ctx->state)); - futhark_free_opaque_state(ctx->fut, ctx->state); - ctx->state = new_state; - - ctx->wnd_surface = SDL_GetWindowSurface(ctx->wnd); - SDL_ASSERT(ctx->wnd_surface != NULL); - - if (ctx->data != NULL) { - free(ctx->data); - } - ctx->data = malloc(ctx->width * ctx->height * sizeof(uint32_t)); - assert(ctx->data != NULL); - - if (ctx->surface != NULL) { - SDL_FreeSurface(ctx->surface); - } - ctx->surface = SDL_CreateRGBSurfaceFrom(ctx->data, ctx->width, ctx->height, - 32, ctx->width * sizeof(uint32_t), 0xFF0000, 0xFF00, 0xFF, 0x00000000); - SDL_ASSERT(ctx->surface != NULL); - - trigger_event(ctx, LYS_WINDOW_SIZE_UPDATED); -} - -static void mouse_event(struct lys_context *ctx, Uint32 state, int x, int y) { - // We ignore mouse events if we are running a program that would - // like mouse grab, but where we have temporarily taken the mouse - // back from it (to e.g. resize the window). - if (ctx->grab_mouse != ctx->mouse_grabbed) { - return; - } - - struct futhark_opaque_state *new_state; - FUT_CHECK(ctx->fut, futhark_entry_mouse(ctx->fut, &new_state, state, x, y, ctx->state)); - futhark_free_opaque_state(ctx->fut, ctx->state); - ctx->state = new_state; -} - -static void wheel_event(struct lys_context *ctx, int x, int y) { - struct futhark_opaque_state *new_state; - FUT_CHECK(ctx->fut, futhark_entry_wheel(ctx->fut, &new_state, x, y, ctx->state)); - futhark_free_opaque_state(ctx->fut, ctx->state); - ctx->state = new_state; -} - -static void handle_sdl_events(struct lys_context *ctx) { - SDL_Event event; - - while (SDL_PollEvent(&event) == 1) { - switch (event.type) { - case SDL_WINDOWEVENT: - switch (event.window.event) { - case SDL_WINDOWEVENT_RESIZED: - { - int newx = (int)event.window.data1; - int newy = (int)event.window.data2; - window_size_updated(ctx, newx, newy); - break; - } - } - break; - case SDL_QUIT: - ctx->running = 0; - break; - case SDL_MOUSEMOTION: - if (ctx->grab_mouse) { - mouse_event(ctx, event.motion.state, event.motion.xrel, event.motion.yrel); - } else { - mouse_event(ctx, event.motion.state, event.motion.x, event.motion.y); - } - break; - case SDL_MOUSEBUTTONDOWN: - case SDL_MOUSEBUTTONUP: - if (ctx->grab_mouse && !ctx->mouse_grabbed) { - assert(SDL_SetRelativeMouseMode(1) == 0); - ctx->mouse_grabbed = 1; - } - - if (ctx->grab_mouse) { - mouse_event(ctx, 1<<(event.button.button-1), event.motion.xrel, event.motion.yrel); - } else { - mouse_event(ctx, 1<<(event.button.button-1), event.motion.x, event.motion.y); - } - break; - case SDL_MOUSEWHEEL: - wheel_event(ctx, event.wheel.x, event.wheel.y); - break; - case SDL_KEYDOWN: - case SDL_KEYUP: - switch (event.key.keysym.sym) { - case SDLK_ESCAPE: - if (ctx->grab_mouse && ctx->mouse_grabbed) { - assert(SDL_SetRelativeMouseMode(0) == 0); - ctx->mouse_grabbed = 0; - } else if (event.key.type == SDL_KEYDOWN) { - ctx->running = 0; - } - break; - case SDLK_F1: - if (event.key.type == SDL_KEYDOWN) { - trigger_event(ctx, LYS_F1); - } - break; - default: - { - struct futhark_opaque_state *new_state; - int e = event.key.type == SDL_KEYDOWN ? 0 : 1; - FUT_CHECK(ctx->fut, futhark_entry_key(ctx->fut, &new_state, - e, event.key.keysym.sym, ctx->state)); - futhark_free_opaque_state(ctx->fut, ctx->state); - ctx->state = new_state; - } - } - } - } -} - -static void sdl_loop(struct lys_context *ctx) { - struct futhark_u32_2d *out_arr; - - while (ctx->running) { - int64_t now = lys_wall_time(); - float delta = ((float)(now - ctx->last_time))/1000000.0; - ctx->fps = (ctx->fps*0.9 + (1/delta)*0.1); - ctx->last_time = now; - struct futhark_opaque_state *new_state; - FUT_CHECK(ctx->fut, futhark_entry_step(ctx->fut, &new_state, delta, ctx->state)); - futhark_free_opaque_state(ctx->fut, ctx->state); - ctx->state = new_state; - - FUT_CHECK(ctx->fut, futhark_entry_render(ctx->fut, &out_arr, ctx->state)); - FUT_CHECK(ctx->fut, futhark_values_u32_2d(ctx->fut, out_arr, ctx->data)); - FUT_CHECK(ctx->fut, futhark_free_u32_2d(ctx->fut, out_arr)); - - SDL_ASSERT(SDL_BlitSurface(ctx->surface, NULL, ctx->wnd_surface, NULL)==0); - - trigger_event(ctx, LYS_LOOP_ITERATION); - - SDL_ASSERT(SDL_UpdateWindowSurface(ctx->wnd) == 0); - - int delay = 1000.0/ctx->max_fps - delta*1000.0; - if (delay > 0) { - SDL_Delay(delay); - } - - handle_sdl_events(ctx); - } -} - -void lys_run_sdl(struct lys_context *ctx) { - struct futhark_context *fut = ctx->fut; - - ctx->last_time = lys_wall_time(); - - ctx->wnd = - SDL_CreateWindow("Lys", - SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, - ctx->width, ctx->height, - ctx->sdl_flags | - SDL_RENDERER_ACCELERATED | - SDL_RENDERER_PRESENTVSYNC); - SDL_ASSERT(ctx->wnd != NULL); - - window_size_updated(ctx, ctx->width, ctx->height); - - ctx->running = 1; - ctx->mouse_grabbed = 0; - - if (ctx->grab_mouse) { - assert(SDL_SetRelativeMouseMode(1) == 0); - ctx->mouse_grabbed = 1; - } - - trigger_event(ctx, LYS_LOOP_START); - - sdl_loop(ctx); - - FUT_CHECK(fut, futhark_free_opaque_state(fut, ctx->state)); - - trigger_event(ctx, LYS_LOOP_END); - - SDL_FreeSurface(ctx->surface); - // do not free wnd_surface (see SDL_GetWindowSurface) - SDL_DestroyWindow(ctx->wnd); - SDL_Quit(); -} - -void lys_setup(struct lys_context *ctx, int width, int height, int max_fps, int sdl_flags) { - memset(ctx, 0, sizeof(struct lys_context)); - ctx->width = width; - ctx->height = height; - ctx->fps = 0; - ctx->max_fps = max_fps; - ctx->sdl_flags = sdl_flags; - - SDL_ASSERT(SDL_Init(SDL_INIT_EVERYTHING) == 0); -} - -#ifdef LYS_TTF -void draw_text(struct lys_context *ctx, - TTF_Font *font, int font_size, - char* buffer, int32_t colour, - int y_start, int x_start) { - SDL_Surface *text_surface; - SDL_Rect offset_rect; - - SDL_Color sdl_colour = - { .a = (colour >> 24) & 0xff, - .r = (colour >> 16) & 0xff, - .g = (colour >> 8) & 0xff, - .b = colour & 0xff }; - - offset_rect.x = x_start; - int y = y_start; - while (true) { - char* buffer_start = buffer; - - bool no_more_text = false; - while (true) { - if (*buffer == '\n') { - *buffer = '\0'; - break; - } else if (*buffer == '\0') { - no_more_text = true; - break; - } - buffer++; - } - - if (*buffer_start != '\0') { - text_surface = TTF_RenderUTF8_Blended(font, buffer_start, sdl_colour); - SDL_ASSERT(text_surface != NULL); - offset_rect.y = y; - offset_rect.w = text_surface->w; - offset_rect.h = text_surface->h; - SDL_ASSERT(SDL_BlitSurface(text_surface, NULL, - ctx->wnd_surface, &offset_rect) == 0); - SDL_FreeSurface(text_surface); - } - - if (no_more_text) { - break; - } else { - buffer++; - y += font_size; - } - } -} -#endif diff --git a/game_of_life/lib/github.com/diku-dk/lys/liblys.h b/game_of_life/lib/github.com/diku-dk/lys/liblys.h deleted file mode 100644 index 4c0e775..0000000 --- a/game_of_life/lib/github.com/diku-dk/lys/liblys.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef LIBLYS_HEADER -#define LIBLYS_HEADER - -#include <stdio.h> -#include <stdlib.h> -#include <stdbool.h> -#include <assert.h> -#include <SDL2/SDL.h> -#include <SDL2/SDL_ttf.h> - -#include PROGHEADER - -#include "context_setup.h" - -enum lys_event { - LYS_LOOP_START, - LYS_LOOP_ITERATION, - LYS_LOOP_END, - LYS_WINDOW_SIZE_UPDATED, - LYS_F1 -}; - -struct lys_context { - struct futhark_context *fut; - struct futhark_opaque_state *state; - SDL_Window *wnd; - SDL_Surface *wnd_surface; - SDL_Surface *surface; - int width; - int height; - uint32_t *data; - int64_t last_time; - bool running; - bool grab_mouse; - bool mouse_grabbed; - float fps; - int max_fps; - int sdl_flags; - void* event_handler_data; - void (*event_handler)(struct lys_context*, enum lys_event); -}; - -#define SDL_ASSERT(x) _sdl_assert(x, __FILE__, __LINE__) -static inline void _sdl_assert(int res, const char *file, int line) { - if (res == 0) { - fprintf(stderr, "%s:%d: SDL error %d: %s\n", - file, line, res, SDL_GetError()); - exit(EXIT_FAILURE); - } -} - -void lys_setup(struct lys_context *ctx, int width, int height, int max_fps, int sdl_flags); - -void lys_run_sdl(struct lys_context *ctx); - -#ifdef LYS_TTF -void draw_text(struct lys_context *ctx, TTF_Font *font, int font_size, char* buffer, int32_t colour, - int x_start, int y_start); -#endif - -#endif diff --git a/game_of_life/lib/github.com/diku-dk/lys/lys.fut b/game_of_life/lib/github.com/diku-dk/lys/lys.fut deleted file mode 100644 index e1039d6..0000000 --- a/game_of_life/lib/github.com/diku-dk/lys/lys.fut +++ /dev/null @@ -1,366 +0,0 @@ --- | Lights, camera, action! --- --- Making use of Lys requires hooking into (or duplicating) its custom --- Makefile rules, so you should also read the [usage --- section](https://github.com/diku-dk/lys/blob/master/README.md#general-usage) --- of the README. --- --- On the Futhark side, you need to define a module called `lys` that --- implements the module type `lys`@mtype. You can do this directly, --- or use some of the various conveniences defined in this file. For --- example, if you do not care about showing any text, you can use --- `lys_no_text`@mtype. - --- | For convenience, re-export the colour module. -open import "../../athas/matte/colour" - --- | UTF-8 encoded string. This is what is produced by string --- literals in Futhark code. -type string [n] = [n]u8 - --- | An event is sent when something has happened that might cause the --- state of the program to change, or just when some time has passed. --- It is permissible to ignore all of these events. Things that must --- not be ignored are separate functions in `lys`@mtype. --- --- * `#step x`: `x` seconds have passed since `init` or the last time --- this event was received. --- --- * `#keydown {key}`: `key` has pressed. --- --- * `#keyup {key}`: `key` has been released. --- --- * `#mouse {buttons, x, y}`: The mouse has been moved or clicked. --- `buttons` is a bit mask indicating which button(s) are held down, --- and the `x`/`y` the new position of the mouse. --- --- * `#wheel {dx, dy}`: The mouse wheel has been used. Note that there can --- be multiple wheels; this is why the `dy` direction also makes --- sense. In most cases, however, only the `dy` will be non-zero. -type event = #step f32 - | #keydown {key:i32} - | #keyup {key:i32} - | #mouse {buttons:i32, x:i32, y:i32} - | #wheel {dx:i32, dy:i32} - --- | The core subset of the module type of Lys applications. This is useful if --- you need a Lys application with custom initialisation or without text --- rendering. -module type lys_core = { - -- | The state maintained by this Lys application. Most functions - -- will take the current state and return a new state. - type~ state - - -- | An event occured. It is permissible to ignore any of these - -- events by returning the same state unchanged. - val event : event -> state -> state - - -- | The window was resized. - val resize : (h: i64) -> (w: i64) -> state -> state - - -- | The function for rendering a screen image in row-major order - -- (height by width). The size of the array returned must match the - -- last dimensions provided to the state (via `init`@term or - -- `resize`@term). - val render : state -> [][]argb.colour -} - --- | The module type of Lys applications. If you define a module --- called `lys` that has this module type, then the autogenerated Lys --- wrapper application can automatically define the entry point --- functions that allows Lys to communicate with the C program that --- actually implements the user interaction. -module type lys = { - include lys_core - - -- | Initial state for a given window size. A random seed is passed - -- in. Don't treat this as a true random number (it's currently - -- just a timestamp), but use it for initialising a proper RNG. - val init : (seed: u32) -> (h: i64) -> (w: i64) -> state - - -- | If true, the program will grab the mouse, and all positions - -- reported via the `mouse`@term function will be relative to the - -- last time `mouse`@term was called. If in doubt, leave this - -- `false`. - val grab_mouse : bool - - -- | Show helpful text in the upper-left corner. Specify in printf format - -- with extensions: '%[string1|string2|...]' prints a string but takes an - -- index into the given list of strings, separated by '|'. For example, - -- '%[circle|square]' prints 'circle' if passed the i32 value 0, and 'square' - -- if passed 1. - - val text_format : () -> string [] - -- | The content must be a scalar or a tuple of scalars. - type text_content - val text_content : (fps: f32) -> state -> text_content - -- | The colour can vary based on the state. - val text_colour : state -> argb.colour -} - --- | A module type for the simple case where we don't want any text. --- You can define the `lys` module to have this module type instead of --- `lys`@mtype. For maximal convenience, you can `open` --- `lys_no_text`@module inside your module definition. -module type lys_no_text = lys with text_content = () - --- | A convenience module that can be `open`ed to give dummy --- definitions for the text-related functionality. -module lys_no_text = { - let text_format () = "" - type text_content = () - let text_content _ _ = () - let text_colour _ = argb.black -} - --- | A dummy lys module that just produces a black rectangle and does --- nothing in response to events. -module lys: lys_no_text = { - type state = {h: i64, w: i64} - let init _ h w = {h,w} - let event _ s = s - let resize h w _ = {h,w} - let grab_mouse = false - let render {h,w} = replicate w argb.black |> replicate h - open lys_no_text -} - --- The following values are taken from --- https://wiki.libsdl.org/SDLKeycodeLookup - -let SDLK_UNKNOWN: i32 = 0x00 -let SDLK_BACKSPACE: i32 = 0x08 -let SDLK_TAB: i32 = 0x09 -let SDLK_RETURN: i32 = 0x0D -let SDLK_ESCAPE: i32 = 0x1B -let SDLK_SPACE: i32 = 0x20 -let SDLK_EXCLAIM: i32 = 0x21 -let SDLK_QUOTEDBL: i32 = 0x22 -let SDLK_HASH: i32 = 0x23 -let SDLK_DOLLAR: i32 = 0x24 -let SDLK_PERCENT: i32 = 0x25 -let SDLK_AMPERSAND: i32 = 0x26 -let SDLK_QUOTE: i32 = 0x27 -let SDLK_LEFTPAREN: i32 = 0x28 -let SDLK_RIGHTPAREN: i32 = 0x29 -let SDLK_ASTERISK: i32 = 0x2A -let SDLK_PLUS: i32 = 0x2B -let SDLK_COMMA: i32 = 0x2C -let SDLK_MINUS: i32 = 0x2D -let SDLK_PERIOD: i32 = 0x2E -let SDLK_SLASH: i32 = 0x2F -let SDLK_0: i32 = 0x30 -let SDLK_1: i32 = 0x31 -let SDLK_2: i32 = 0x32 -let SDLK_3: i32 = 0x33 -let SDLK_4: i32 = 0x34 -let SDLK_5: i32 = 0x35 -let SDLK_6: i32 = 0x36 -let SDLK_7: i32 = 0x37 -let SDLK_8: i32 = 0x38 -let SDLK_9: i32 = 0x39 -let SDLK_COLON: i32 = 0x3A -let SDLK_SEMICOLON: i32 = 0x3B -let SDLK_LESS: i32 = 0x3C -let SDLK_EQUALS: i32 = 0x3D -let SDLK_GREATER: i32 = 0x3E -let SDLK_QUESTION: i32 = 0x3F -let SDLK_AT: i32 = 0x40 -let SDLK_LEFTBRACKET: i32 = 0x5B -let SDLK_BACKSLASH: i32 = 0x5C -let SDLK_RIGHTBRACKET: i32 = 0x5D -let SDLK_CARET: i32 = 0x5E -let SDLK_UNDERSCORE: i32 = 0x5F -let SDLK_BACKQUOTE: i32 = 0x60 -let SDLK_a: i32 = 0x61 -let SDLK_b: i32 = 0x62 -let SDLK_c: i32 = 0x63 -let SDLK_d: i32 = 0x64 -let SDLK_e: i32 = 0x65 -let SDLK_f: i32 = 0x66 -let SDLK_g: i32 = 0x67 -let SDLK_h: i32 = 0x68 -let SDLK_i: i32 = 0x69 -let SDLK_j: i32 = 0x6A -let SDLK_k: i32 = 0x6B -let SDLK_l: i32 = 0x6C -let SDLK_m: i32 = 0x6D -let SDLK_n: i32 = 0x6E -let SDLK_o: i32 = 0x6F -let SDLK_p: i32 = 0x70 -let SDLK_q: i32 = 0x71 -let SDLK_r: i32 = 0x72 -let SDLK_s: i32 = 0x73 -let SDLK_t: i32 = 0x74 -let SDLK_u: i32 = 0x75 -let SDLK_v: i32 = 0x76 -let SDLK_w: i32 = 0x77 -let SDLK_x: i32 = 0x78 -let SDLK_y: i32 = 0x79 -let SDLK_z: i32 = 0x7A -let SDLK_DELETE: i32 = 0x7F -let SDLK_CAPSLOCK: i32 = 0x40000039 -let SDLK_F1: i32 = 0x4000003A -let SDLK_F2: i32 = 0x4000003B -let SDLK_F3: i32 = 0x4000003C -let SDLK_F4: i32 = 0x4000003D -let SDLK_F5: i32 = 0x4000003E -let SDLK_F6: i32 = 0x4000003F -let SDLK_F7: i32 = 0x40000040 -let SDLK_F8: i32 = 0x40000041 -let SDLK_F9: i32 = 0x40000042 -let SDLK_F10: i32 = 0x40000043 -let SDLK_F11: i32 = 0x40000044 -let SDLK_F12: i32 = 0x40000045 -let SDLK_PRINTSCREEN: i32 = 0x40000046 -let SDLK_SCROLLLOCK: i32 = 0x40000047 -let SDLK_PAUSE: i32 = 0x40000048 -let SDLK_INSERT: i32 = 0x40000049 -let SDLK_HOME: i32 = 0x4000004A -let SDLK_PAGEUP: i32 = 0x4000004B -let SDLK_END: i32 = 0x4000004D -let SDLK_PAGEDOWN: i32 = 0x4000004E -let SDLK_RIGHT: i32 = 0x4000004F -let SDLK_LEFT: i32 = 0x40000050 -let SDLK_DOWN: i32 = 0x40000051 -let SDLK_UP: i32 = 0x40000052 -let SDLK_NUMLOCKCLEAR: i32 = 0x40000053 -let SDLK_KP_DIVIDE: i32 = 0x40000054 -let SDLK_KP_MULTIPLY: i32 = 0x40000055 -let SDLK_KP_MINUS: i32 = 0x40000056 -let SDLK_KP_PLUS: i32 = 0x40000057 -let SDLK_KP_ENTER: i32 = 0x40000058 -let SDLK_KP_1: i32 = 0x40000059 -let SDLK_KP_2: i32 = 0x4000005A -let SDLK_KP_3: i32 = 0x4000005B -let SDLK_KP_4: i32 = 0x4000005C -let SDLK_KP_5: i32 = 0x4000005D -let SDLK_KP_6: i32 = 0x4000005E -let SDLK_KP_7: i32 = 0x4000005F -let SDLK_KP_8: i32 = 0x40000060 -let SDLK_KP_9: i32 = 0x40000061 -let SDLK_KP_0: i32 = 0x40000062 -let SDLK_KP_PERIOD: i32 = 0x40000063 -let SDLK_APPLICATION: i32 = 0x40000065 -let SDLK_POWER: i32 = 0x40000066 -let SDLK_KP_EQUALS: i32 = 0x40000067 -let SDLK_F13: i32 = 0x40000068 -let SDLK_F14: i32 = 0x40000069 -let SDLK_F15: i32 = 0x4000006A -let SDLK_F16: i32 = 0x4000006B -let SDLK_F17: i32 = 0x4000006C -let SDLK_F18: i32 = 0x4000006D -let SDLK_F19: i32 = 0x4000006E -let SDLK_F20: i32 = 0x4000006F -let SDLK_F21: i32 = 0x40000070 -let SDLK_F22: i32 = 0x40000071 -let SDLK_F23: i32 = 0x40000072 -let SDLK_F24: i32 = 0x40000073 -let SDLK_EXECUTE: i32 = 0x40000074 -let SDLK_HELP: i32 = 0x40000075 -let SDLK_MENU: i32 = 0x40000076 -let SDLK_SELECT: i32 = 0x40000077 -let SDLK_STOP: i32 = 0x40000078 -let SDLK_AGAIN: i32 = 0x40000079 -let SDLK_UNDO: i32 = 0x4000007A -let SDLK_CUT: i32 = 0x4000007B -let SDLK_COPY: i32 = 0x4000007C -let SDLK_PASTE: i32 = 0x4000007D -let SDLK_FIND: i32 = 0x4000007E -let SDLK_MUTE: i32 = 0x4000007F -let SDLK_VOLUMEUP: i32 = 0x40000080 -let SDLK_VOLUMEDOWN: i32 = 0x40000081 -let SDLK_KP_COMMA: i32 = 0x40000085 -let SDLK_KP_EQUALSAS400: i32 = 0x40000086 -let SDLK_ALTERASE: i32 = 0x40000099 -let SDLK_SYSREQ: i32 = 0x4000009A -let SDLK_CANCEL: i32 = 0x4000009B -let SDLK_CLEAR: i32 = 0x4000009C -let SDLK_PRIOR: i32 = 0x4000009D -let SDLK_RETURN2: i32 = 0x4000009E -let SDLK_SEPARATOR: i32 = 0x4000009F -let SDLK_OUT: i32 = 0x400000A0 -let SDLK_OPER: i32 = 0x400000A1 -let SDLK_CLEARAGAIN: i32 = 0x400000A2 -let SDLK_CRSEL: i32 = 0x400000A3 -let SDLK_EXSEL: i32 = 0x400000A4 -let SDLK_KP_00: i32 = 0x400000B0 -let SDLK_KP_000: i32 = 0x400000B1 -let SDLK_THOUSANDSSEPARATOR: i32 = 0x400000B2 -let SDLK_DECIMALSEPARATOR: i32 = 0x400000B3 -let SDLK_CURRENCYUNIT: i32 = 0x400000B4 -let SDLK_CURRENCYSUBUNIT: i32 = 0x400000B5 -let SDLK_KP_LEFTPAREN: i32 = 0x400000B6 -let SDLK_KP_RIGHTPAREN: i32 = 0x400000B7 -let SDLK_KP_LEFTBRACE: i32 = 0x400000B8 -let SDLK_KP_RIGHTBRACE: i32 = 0x400000B9 -let SDLK_KP_TAB: i32 = 0x400000BA -let SDLK_KP_BACKSPACE: i32 = 0x400000BB -let SDLK_KP_A: i32 = 0x400000BC -let SDLK_KP_B: i32 = 0x400000BD -let SDLK_KP_C: i32 = 0x400000BE -let SDLK_KP_D: i32 = 0x400000BF -let SDLK_KP_E: i32 = 0x400000C0 -let SDLK_KP_F: i32 = 0x400000C1 -let SDLK_KP_XOR: i32 = 0x400000C2 -let SDLK_KP_POWER: i32 = 0x400000C3 -let SDLK_KP_PERCENT: i32 = 0x400000C4 -let SDLK_KP_LESS: i32 = 0x400000C5 -let SDLK_KP_GREATER: i32 = 0x400000C6 -let SDLK_KP_AMPERSAND: i32 = 0x400000C7 -let SDLK_KP_DBLAMPERSAND: i32 = 0x400000C8 -let SDLK_KP_VERTICALBAR: i32 = 0x400000C9 -let SDLK_KP_DBLVERTICALBAR: i32 = 0x400000CA -let SDLK_KP_COLON: i32 = 0x400000CB -let SDLK_KP_HASH: i32 = 0x400000CC -let SDLK_KP_SPACE: i32 = 0x400000CD -let SDLK_KP_AT: i32 = 0x400000CE -let SDLK_KP_EXCLAM: i32 = 0x400000CF -let SDLK_KP_MEMSTORE: i32 = 0x400000D0 -let SDLK_KP_MEMRECALL: i32 = 0x400000D1 -let SDLK_KP_MEMCLEAR: i32 = 0x400000D2 -let SDLK_KP_MEMADD: i32 = 0x400000D3 -let SDLK_KP_MEMSUBTRACT: i32 = 0x400000D4 -let SDLK_KP_MEMMULTIPLY: i32 = 0x400000D5 -let SDLK_KP_MEMDIVIDE: i32 = 0x400000D6 -let SDLK_KP_PLUSMINUS: i32 = 0x400000D7 -let SDLK_KP_CLEAR: i32 = 0x400000D8 -let SDLK_KP_CLEARENTRY: i32 = 0x400000D9 -let SDLK_KP_BINARY: i32 = 0x400000DA -let SDLK_KP_OCTAL: i32 = 0x400000DB -let SDLK_KP_DECIMAL: i32 = 0x400000DC -let SDLK_KP_HEXADECIMAL: i32 = 0x400000DD -let SDLK_LCTRL: i32 = 0x400000E0 -let SDLK_LSHIFT: i32 = 0x400000E1 -let SDLK_LALT: i32 = 0x400000E2 -let SDLK_LGUI: i32 = 0x400000E3 -let SDLK_RCTRL: i32 = 0x400000E4 -let SDLK_RSHIFT: i32 = 0x400000E5 -let SDLK_RALT: i32 = 0x400000E6 -let SDLK_RGUI: i32 = 0x400000E7 -let SDLK_MODE: i32 = 0x40000101 -let SDLK_AUDIONEXT: i32 = 0x40000102 -let SDLK_AUDIOPREV: i32 = 0x40000103 -let SDLK_AUDIOSTOP: i32 = 0x40000104 -let SDLK_AUDIOPLAY: i32 = 0x40000105 -let SDLK_AUDIOMUTE: i32 = 0x40000106 -let SDLK_MEDIASELECT: i32 = 0x40000107 -let SDLK_WWW: i32 = 0x40000108 -let SDLK_MAIL: i32 = 0x40000109 -let SDLK_CALCULATOR: i32 = 0x4000010A -let SDLK_COMPUTER: i32 = 0x4000010B -let SDLK_AC_SEARCH: i32 = 0x4000010C -let SDLK_AC_HOME: i32 = 0x4000010D -let SDLK_AC_BACK: i32 = 0x4000010E -let SDLK_AC_FORWARD: i32 = 0x4000010F -let SDLK_AC_STOP: i32 = 0x40000110 -let SDLK_AC_REFRESH: i32 = 0x40000111 -let SDLK_AC_BOOKMARKS: i32 = 0x40000112 -let SDLK_BRIGHTNESSDOWN: i32 = 0x40000113 -let SDLK_BRIGHTNESSUP: i32 = 0x40000114 -let SDLK_DISPLAYSWITCH: i32 = 0x40000115 -let SDLK_KBDILLUMTOGGLE: i32 = 0x40000116 -let SDLK_KBDILLUMDOWN: i32 = 0x40000117 -let SDLK_KBDILLUMUP: i32 = 0x40000118 -let SDLK_EJECT: i32 = 0x40000119 -let SDLK_SLEEP: i32 = 0x4000011A diff --git a/game_of_life/lib/github.com/diku-dk/lys/main.c b/game_of_life/lib/github.com/diku-dk/lys/main.c deleted file mode 100644 index 2c24d1f..0000000 --- a/game_of_life/lib/github.com/diku-dk/lys/main.c +++ /dev/null @@ -1,355 +0,0 @@ -#include "liblys.h" -#include PRINTFHEADER - -#define _XOPEN_SOURCE -#include <unistd.h> -#include <getopt.h> - -#define INITIAL_WIDTH 800 -#define INITIAL_HEIGHT 600 - -struct lys_text { - TTF_Font *font; - char* font_path; - int font_size; - char* text_format; - char* text_buffer; - size_t text_buffer_len; - bool show_text; - char* **sum_names; -}; - -void loop_start(struct lys_context *ctx, struct lys_text *text) { - struct futhark_u8_1d *text_format_array; - FUT_CHECK(ctx->fut, futhark_entry_text_format(ctx->fut, &text_format_array)); - size_t text_format_len = futhark_shape_u8_1d(ctx->fut, text_format_array)[0]; - text->text_format = malloc(sizeof(char) * (text_format_len + 1)); - assert(text->text_format != NULL); - FUT_CHECK(ctx->fut, futhark_values_u8_1d(ctx->fut, text_format_array, (unsigned char*) text->text_format)); - FUT_CHECK(ctx->fut, futhark_context_sync(ctx->fut)); - text->text_format[text_format_len] = '\0'; - FUT_CHECK(ctx->fut, futhark_free_u8_1d(ctx->fut, text_format_array)); - - text->sum_names = (char* **) malloc(sizeof(char* *) * n_printf_arguments()); - assert(text->sum_names != NULL); - - text->text_buffer_len = text_format_len; - size_t i_arg = -1; - for (size_t i = 0; i < text_format_len; i++) { - if (text->text_format[i] == '%' && - i + 1 < text_format_len && text->text_format[i + 1] != '%') { - i_arg++; - if (text->text_format[i + 1] == '[') { - text->text_format[i + 1] = 's'; - size_t end_pos; - size_t n_choices = 1; - bool found_end = false; - for (end_pos = i + 2; end_pos < text_format_len; end_pos++) { - if (text->text_format[end_pos] == '|') { - n_choices++; - } else if (text->text_format[end_pos] == ']') { - found_end = true; - break; - } - } - assert(found_end); - text->sum_names[i_arg] = (char* *) malloc(sizeof(char*) * (n_choices + 1)); - assert(text->sum_names[i_arg] != NULL); - text->sum_names[i_arg][n_choices] = NULL; - char* temp_choice = (char*) malloc(sizeof(char) * (end_pos - i - n_choices)); - assert(temp_choice != NULL); - size_t choice_cur = 0; - size_t i_choice = 0; - for (size_t j = i + 2; j < end_pos + 1; j++) { - if (text->text_format[j] == '|' || text->text_format[j] == ']') { - temp_choice[choice_cur] = '\0'; - text->sum_names[i_arg][i_choice] = (char*) malloc(sizeof(char) * (choice_cur + 1)); - assert(text->sum_names[i_arg][i_choice] != NULL); - strncpy(text->sum_names[i_arg][i_choice], temp_choice, choice_cur + 1); - choice_cur = 0; - i_choice++; - } else { - temp_choice[choice_cur] = text->text_format[j]; - choice_cur++; - } - } - free(temp_choice); - size_t shift_left = end_pos - i - 1; - for (size_t j = end_pos + 1; j < text_format_len; j++) { - text->text_format[j - shift_left] = text->text_format[j]; - } - text_format_len -= shift_left; - text->text_format[text_format_len] = '\0'; - i++; - } else { - text->sum_names[i_arg] = NULL; - text->text_buffer_len += 20; // estimate - } - } - } - - text->text_buffer = malloc(sizeof(char) * text->text_buffer_len); - assert(text->text_buffer != NULL); - text->text_buffer[0] = '\0'; - - text->show_text = true; -} - -void loop_iteration(struct lys_context *ctx, struct lys_text *text) { - if (!text->show_text) { - return; - } - - build_text(ctx, text->text_buffer, text->text_buffer_len, text->text_format, - ctx->fps, text->sum_names); - if (*(text->text_buffer) != '\0') { - int32_t text_colour; - FUT_CHECK(ctx->fut, - futhark_entry_text_colour(ctx->fut, (uint32_t*) &text_colour, - ctx->state)); - draw_text(ctx, text->font, text->font_size, text->text_buffer, text_colour, 10, 10); - } -} - -void loop_end(struct lys_text *text) { - free(text->text_format); - free(text->text_buffer); - - for (size_t i = 0; i < n_printf_arguments(); i++) { - if (text->sum_names[i] != NULL) { - size_t j = 0; - while (text->sum_names[i][j] != NULL) { - free(text->sum_names[i][j]); - j++; - } - free(text->sum_names[i]); - } - } - free(text->sum_names); -} - -int font_size_from_dimensions(int width, int height) { - int size, font_size; - if (height < width) { - size = height; - } else { - size = width; - } - font_size = size / 45; - if (font_size < 14) { - font_size = 14; - } else if (font_size > 32) { - font_size = 32; - } - return font_size; -} - -void window_size_updated(struct lys_context *ctx, struct lys_text *text) { - text->font_size = font_size_from_dimensions(ctx->width, ctx->height); - TTF_CloseFont(text->font); - text->font = TTF_OpenFont(text->font_path, text->font_size); - SDL_ASSERT(text->font != NULL); -} - -void f1(struct lys_text *text) { - text->show_text = !text->show_text; -} - -void handle_event(struct lys_context *ctx, enum lys_event event) { - struct lys_text *text = (struct lys_text *) ctx->event_handler_data; - switch (event) { - case LYS_LOOP_START: - loop_start(ctx, text); - break; - case LYS_LOOP_ITERATION: - loop_iteration(ctx, text); - break; - case LYS_LOOP_END: - loop_end(text); - break; - case LYS_WINDOW_SIZE_UPDATED: - window_size_updated(ctx, text); - break; - case LYS_F1: - f1(text); - } -} - -void do_bench(struct futhark_context *fut, int height, int width, int n, const char *operation) { - struct futhark_opaque_state *state; - int64_t start, end; - FUT_CHECK(fut, futhark_entry_init(fut, &state, (int32_t) lys_wall_time(), height, width)); - futhark_context_sync(fut); - bool do_step = false, do_render = false; - - if (strstr(operation, "step") != NULL) { - do_step = true; - } - - if (strstr(operation, "render") != NULL) { - do_render = true; - } - - start = lys_wall_time(); - for (int i = 0; i < n; i++) { - if (do_step) { - struct futhark_opaque_state *new_state; - FUT_CHECK(fut, futhark_entry_step(fut, &new_state, 1.0/n, state)); - futhark_free_opaque_state(fut, state); - state = new_state; - } - if (do_render) { - struct futhark_u32_2d *out_arr; - FUT_CHECK(fut, futhark_entry_render(fut, &out_arr, state)); - FUT_CHECK(fut, futhark_free_u32_2d(fut, out_arr)); - } - } - futhark_context_sync(fut); - end = lys_wall_time(); - - printf("Rendered %d frames in %fs (%f FPS)\n", - n, ((double)end-start)/1000000, - n / (((double)end-start)/1000000)); - - FUT_CHECK(fut, futhark_free_opaque_state(fut, state)); -} - -void usage(char **argv) { - printf("Usage: %s options...\n", argv[0]); - puts("Options:"); - puts(" -? Print this help and exit."); - puts(" -w INT Set the initial width of the window."); - puts(" -h INT Set the initial height of the window."); - puts(" -R Disallow resizing the window."); - puts(" -d DEV Set the computation device."); - puts(" -r INT Maximum frames per second."); - puts(" -i Select execution device interactively."); - puts(" -b <render|step> Benchmark program."); -} - -int main(int argc, char** argv) { - int width = INITIAL_WIDTH, height = INITIAL_HEIGHT, max_fps = 60; - bool allow_resize = true; - char *deviceopt = NULL; - bool device_interactive = false; - char *benchopt = NULL; - - int c; - while ( (c = getopt(argc, argv, "w:h:r:Rd:b:i")) != -1) { - switch (c) { - case 'w': - width = atoi(optarg); - if (width <= 0) { - fprintf(stderr, "'%s' is not a valid width.\n", optarg); - exit(EXIT_FAILURE); - } - break; - case 'h': - height = atoi(optarg); - if (height <= 0) { - fprintf(stderr, "'%s' is not a valid width.\n", optarg); - exit(EXIT_FAILURE); - } - break; - case 'r': - max_fps = atoi(optarg); - if (max_fps <= 0) { - fprintf(stderr, "'%s' is not a valid framerate.\n", optarg); - exit(EXIT_FAILURE); - } - break; - case 'R': - allow_resize = false; - break; - case 'd': - deviceopt = optarg; - break; - case 'i': - device_interactive = true; - break; - case 'b': - if (strcmp(optarg, "render") == 0 || - strcmp(optarg, "step") == 0) { - benchopt = optarg; - } else { - fprintf(stderr, "Use -b <render|step>\n"); - return EXIT_FAILURE; - } - break; - case '?': - usage(argv); - return EXIT_SUCCESS; - default: - fprintf(stderr, "unknown option: %c\n", c); - usage(argv); - return EXIT_FAILURE; - } - } - - if (optind < argc) { - fprintf(stderr, "Excess non-options: "); - while (optind < argc) - fprintf(stderr, "%s ", argv[optind++]); - fprintf(stderr, "\n"); - exit(EXIT_FAILURE); - } - - char font_path_rel[] = "/lib/github.com/diku-dk/lys/Inconsolata-Regular.ttf"; - char* font_path = malloc(sizeof(char) * strlen(argv[0]) + sizeof(font_path_rel)); - assert(font_path != NULL); - strcpy(font_path, argv[0]); - char *last_dash = strrchr(font_path, '/'); - if (last_dash != NULL) { - *last_dash = '\0'; - } - strcat(font_path, font_path_rel); - - int sdl_flags = 0; - if (allow_resize) { - sdl_flags |= SDL_WINDOW_RESIZABLE; - } - - struct lys_context ctx; - struct futhark_context_config *futcfg; - lys_setup(&ctx, width, height, max_fps, sdl_flags); - - char* opencl_device_name = NULL; - lys_setup_futhark_context(deviceopt, device_interactive, - &futcfg, &ctx.fut, &opencl_device_name); - if (opencl_device_name != NULL) { - printf("Using OpenCL device: %s\n", opencl_device_name); - printf("Use -d or -i to change this.\n"); - free(opencl_device_name); - } - - FUT_CHECK(ctx.fut, futhark_entry_grab_mouse(ctx.fut, &ctx.grab_mouse)); - - struct lys_text text; - ctx.event_handler_data = (void*) &text; - ctx.event_handler = handle_event; - - SDL_ASSERT(TTF_Init() == 0); - - text.font_path = font_path; - text.font_size = font_size_from_dimensions(ctx.width, ctx.height); - text.font = TTF_OpenFont(text.font_path, text.font_size); - SDL_ASSERT(text.font != NULL); - - if (benchopt != NULL) { - do_bench(ctx.fut, height, width, max_fps, benchopt); - } else { - int32_t seed = (int32_t) lys_wall_time(); - futhark_entry_init(ctx.fut, &ctx.state, - seed, ctx.height, ctx.width); - lys_run_sdl(&ctx); - free(ctx.data); - } - - TTF_CloseFont(text.font); - free(font_path); - - futhark_context_free(ctx.fut); - futhark_context_config_free(futcfg); - - return EXIT_SUCCESS; -} diff --git a/game_of_life/lib/github.com/diku-dk/lys/setup_flags.mk b/game_of_life/lib/github.com/diku-dk/lys/setup_flags.mk deleted file mode 100644 index 872f590..0000000 --- a/game_of_life/lib/github.com/diku-dk/lys/setup_flags.mk +++ /dev/null @@ -1,43 +0,0 @@ -LYS_BACKEND?=opencl -LYS_TTF?=0 - -ifeq ($(origin PROG_FUT_DEPS), undefined) -PROG_FUT_DEPS:=$(shell ls *.fut; find lib -name \*.fut) -endif - -PKG_CFLAGS_PKGS=sdl2 -ifeq ($(LYS_TTF),1) -PKG_CFLAGS_PKGS+= SDL2_ttf -endif - -PKG_CFLAGS=$(shell pkg-config --cflags $(PKG_CFLAGS_PKGS)) - -BASE_LDFLAGS=-lm -lSDL2 -ifeq ($(LYS_TTF),1) -BASE_LDFLAGS+= -lSDL2_ttf -endif - -NOWARN_CFLAGS=-std=c11 -O - -CFLAGS?=$(NOWARN_CFLAGS) $(PKG_CFLAGS) -Wall -Wextra -pedantic -ifeq ($(LYS_TTF),1) -CFLAGS+= -DLYS_TTF -endif - -ifeq ($(LYS_BACKEND),opencl) -OS=$(shell uname -s) -ifeq ($(OS),Darwin) -DEVICE_LDFLAGS=-framework OpenCL -else -DEVICE_LDFLAGS=-lOpenCL -endif -else ifeq ($(LYS_BACKEND),cuda) -DEVICE_LDFLAGS=-lcuda -lnvrtc -else ifeq ($(LYS_BACKEND),c) -DEVICE_LDFLAGS= -else ifeq ($(LYS_BACKEND),multicore) -DEVICE_LDFLAGS=-lpthread -else -$(error Unknown LYS_BACKEND: $(LYS_BACKEND). Must be 'opencl', 'cuda', 'multicore', or 'c') -endif -LDFLAGS?=$(BASE_LDFLAGS) $(DEVICE_LDFLAGS) diff --git a/game_of_life/libfpmpi.a b/game_of_life/libfpmpi.a deleted file mode 100644 index d1781ab3a3db4e7f94fc91be679cf5dafb3ae95e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 24728 zcmd6P3v?CNb@q%TCyK%M%8eS!4XtlhZ)&-<sjeeCQcMz!m<x092oxjP0s}$}k_8q> z7FQ0oF_0z6WqN&Wnz$`)ocye&{Y%oUK-Qn%3wx#h#oPFaA2|4p-v}9ljcvd@^nd$2 z?%f*hG^UUB|7WeMnfYd)efHUB@AJ|=^R-EBy<X>c&i~8U*i^+H<vsr!i+$tD8bvYx ze8Vs<G7R(XbNkix7{>4W-Ef~_+;HDb?djI<clUNJY+bhK_I86Fm)zFc+qU#(V@X?& z(Xps2-QH_-bc3L)y(4Y(wlD1S+9lAt=%&szNR}=%mUZ{0jb&cjQe#nfy3M$KQIFBp z?X`8a5)*l+SgD5`L-s8BX=-qlw}Q92r4r<Az6ep`!bQt^+R|QU>!PKL(njuSVf(Ul zZ})A6(b2P{XVFA&Vz+9Hur|25x?1QyadlO-q?v#Is-!!m$TZOuGMUDAjQo>E4Wsj# z{C%Ccturn##v4bYfAX2r|9-_nCsXqEO1yA<J6@U?a<b`o$pIqF*zpo4(-bdV==?>w zAn7~L>EB+lO}zcD$Zy?XU2ip8{*zXd<v-u-`cKaEk3y(U7qV2i*%k3B$6paIbNw@} z|MVD1TM|QB$SS;c{KJ;NQy@0vu1Y3$+y1cazv;i``lAAOGjrlqDR1W^q$S%=$H%9# z<^7Id5_i0Bx&EAZrQ<inV}cekl)BkJBrQOyXHx!9%HO%zu&yJ0)Db7g$v`?gb3P$0 zuK$|MmvQ4U=qTp;#~hNw#h;2y$r=cAzjf>s2_&UV`P=bo{NPafR-&mxE5xbP#BNot z>%W&w97?=wWvW+0hG(gC7wkKgfT9dpG5<Yvl@;}0qM}>SYO-PbZihrr>46*PT63(q z);z0qTgO^Ex^YCPBITWy*@YNXasR!Pzk_&a9k~85BoLB(eldpAqoY!w=q0ux?2Xzx zMQypuYEDtBH~8C|XZp{f;>*z>Ff?Y1on(v9Ov>NoD5VKIc+Z7NRAjp1bwX^LEELV{ zzi6XHphvv)U$XrdlKyd3uQ_NkZ?CAVOHF)jw){u3!M$%$o0keY(!P_uirO5lXEizg zhiLc3#`V;~m7>j6b3>P}k+)PS{xAvB)_ka`K2x%GmmO~+F2WArP1Ph*+5HXvkz*(Q zmsC|6{5O(`p~P<D1B-dTREc*?)Y*}?Ckkm4rC2T2tieOk!6mTX#AtGGG<wfgQ3jce zA_vdK1{F3RtOdVh;!{~S63@!68qHMt2csLG^UJ7Gox6%i&e^lb$x5yMQ8T@?BYMdI zfsp?FQY$;%6nq5d>DfvDTfC9Np^S!XcgeKuq{@654q0f*++k&!4RbF_vixSFK2yG_ zzPS9?ss5d2z28(@GOyLW2F=~~W5+w~ct=(C74gc%r|=8$7~MDriWAoOhLb&y%57BF z#RjQ=Yoh}<gfF`B<>Wy<j{ka&Q5ET4J~6aHApSvWtj?T!L<iT1>TX}|`umxSL@!xQ zrIJ1&S2FQ-;*iLnlZ^EKUn$t|g1p1jDSvUj6QYY${p0DGP>JLEZoI<DJP;=V-E0@S z8DT}#RYD8!q|06ZgE7ujqq<Arps+J(WxUV?b)Wx-`b6-MEtwqrl+;?`!bW0bgQYf9 zeR*{7IH-g%qaSPV_t^dkTXlT>UGYj2ZI`ZeVE$F069$y>U$izC<H2fvh>95b(=qmy zdks}0TMcEinw2&CAD|-=8lNS^n+G|?BUuG<`X4MdAZ3j#z(Fq|?GpWkAZxW+5}&&M zVez0@k@V8t6VIZW(fhWNG1M?)*aI10Jy7y-W(?>rF+x;O(YAjEelfb(aKWo0R|Qpi zeza+dS2d7=-X!H!Qo%8QSZGa-6><cqv1VDb`wy5_?auV1F}fq?Bks+rg0gl(E6IfH zSX*WfUksZO{j1W7Bp##B5$RDK9TT;0M=z1~g*;{GI<3Z(|ET|rQZ5E~{a%gFP0bjX zK`RU|=4O5qUkxP*!$a>`E(UASg`nSr@nN_QH9k;48?xi1{<_NXc8JzR7f+TZ4bbX8 zY?1}WtlHPfs4k<*HTWM|n@gy5{9TKU?6qdjPGobVkFr|icnhKYVc0l&Io;f_iouto zgO5;tZK2?ZapMDGt~l#%7vq!E2AI?c`Q)Q~PnnPxa;R*(O5)%vW>B(^X_-yFQ!4C_ zh8$^fIonWnop^TeWkGkA8TQSPCb);0pze<#ZT8wxAwf`T=0rl?S;@q+^M-HL-3qgS zj<qy@L;GT+8#^R7`+H}){x;_a?{7wxSSK<N-vA&n<YpJfdtCorQ37?mxTjtR-GqeP zc#oKVo$(LC?cD6QNjK=S##^wmVL?NqX^-ncF=BYp19D8v#G2q|&{>(Fq{e~Kxc-Tm zVl=jHppgsnD)jwgIKD5w4(!hwk!!^0>kpHA_lQ1=hUWW^o%Ro^5x66I7!!;Y@#%0f zXAl#-9nlL1;%mXo+Dv^_^l$C>bSgfvTL|f72SicI?1Z(dvH3=8-fA#1`g&>dS(+Ri ziVpl3EMRD*xEPqoMIQu#19OTcp6%ad%DTZiWi560L`LIeq^4P)GzfW|A8e-v89R-O zUXt%-X)LE9X&}B{(0xW-#bG5OI+>hkgY7T~$6FB}?|7d$aM&8m$%H^V?f48~#g)`I z-t#3Y7E`cB*I!6e>O(326;!DL*;l!k*vwFE>-gJ7###M4O#7Oy`1tg4Qkqa18V+7L zQ|j{}WE*w;&qPV{TA{ERMtV2Aypib1o-xpiDYvzm>f%2g-H6%2s8ft?VzD8zLA z&w>^!YIjDb@1!zecNL=CzBRB3j9C!%2Pa9ywCj@lVxB8J*!9(1$os_gKXQuGG~wTc zW*3~Psi@V-RAeVKoH{c)n(s4F{gF2XDTc88eAa&&<Fo$)dg-Q=e+sLVW_Zuz@SY8# z5omIU0lEiTZgIwRi_(}d$Mw77vXSr}e&k@Bsj)G=?U6Nxqo5Wo!lTgT9Jc+BQr;oU z-zQ#E_t7JCjm69_L{|eJa{ThRcukshvKK%U7|wg6X_`%AO8Y(XA#~)J*N*X_7s#mR z%bc)6@&%0XYJGxfCHYjQWCK<*XCQ{Z(^8YQ<lv#cRSn*m2Jd|_MVDhP%?06lD3k_g z>f9Z#{O688Udq;g(v04_5=D^8<y@Zbq%I&@3poP$HgklZ!7#|9E8z_r@QQVh!qI8a zz=VKiWpD-Jt4u0Wy+KwB?Ochs#)z-`)6Dp|P+-O?x!>`Qqi&c-b&^?$9N9ZYX|XsV zNxhtnA;rTRP|O)b(?MI&|6#|wG2?a7j@18F6L;m8!sK&CO;XXU!HttrmXmmDj*1%9 z&?DB7Sjp;=>8@t>Ea8ejg>AxPx{c_-k8n#8Rl#%<FN=BU$YmXC-AqXw{%-YeHyg~; zuIl9MYv2xd&a3T;S48jmIRrpa6R7pe`nQ>A!05*9(T$(D-jG`i2Ou4y55}yCc<pkt z)mOr6M)$yL2Zs870`_=>&!GUJ8^L|WKZ@z3DB|7?C_?%N)kh8p{ZNWYkMqI4+A&we z9UI<PQESKNL<gUTm(j~I!EyNTX*Hh_Op~=A-#MK`_9w*sEmGRYZT~ITJKz>~L11sU zQ|!hop>Hv>eHxOYVuBTfbo?!@dBUze+V@6gLfkfw+Lyg0RWT181o0(DtB7vg7u~qS z@m_X{F;jEB{ciEXc$u;fIl2+w9g?qWo@_8rS+$4y-X6XPY!sbpis2L>Q9c`O`FyIu ze~Pv|KCzmx@_if*+9{?^PUe^K^-vy-u61%&^{$#&9cF_t*7doWJnZqXSo4bFP@6iK zpO|{P-gACQoEC`#eXy2f_WPqW+PVH4k9+}T*w~zqJuP(@mC*}n{{eX$@lN)$PL{@4 z|2?%H76s6VQ6;+?|CB_~yV(ARw*T&8BjpcUn~6F3x}HWs96?AuO%0~9(T%UVo+~#{ zs%(r;5V{iY;1HNfH(d5igZZM!;J-xUI}P$xkPwsX?RNI^FT4KhDesd!M+Xy#3dcL? zn7eFvANsbP*Ji8B1pzphsj0PHdptUD2zsD8)=(X(TePw>FufjjQPVmpIrdj*+M%Q; z@%&?g7qX$lHw>&NFt00zYy3_ML0dF$t6NE$7#eOyNw7yN+ibf23y$|0m`XoKP0{I6 z_wl0e4$L7@3TCFXB8S3>5Hzv^RJj;tjF=efG0M(Vi!Mgg!Rr6Oj1EkpYN0kc^^0Ye ztdy*d=zn!<OUu;tnMFo1Pu56}z1l#8QII}L&$f)}0+vxNEW@9aA9o-UjAOAu%b&8K zZBUD<r6U<f1<jPTQuT^W3JZPNSjydxy%VukWBXcN7>_!~F52_s^%$+l^-vu(BG=7u z89E<54sb2fE*#l~RN?SukcnnyKCR}@w13&u?D(I|^iRScQ7RUbY!|CCftlsodb;d- zlVCmMxPqOHKKmhB()h0>{Syz<ot_DTM|_%By_ve?(rMnL&CZq&ikvNni$q3P1iU+a zGg*^1*u?I{%T#GNAhvxbXR0?hFE+%MVU5TmC&XI0a%C;I3M+FXY+nbcYFz&a8(6h| zypxZRfP$AS7lmO+$a(t~_}|&Zia|p=i7a(wjbOr*34f9LOkzz1m8N`*U{A|kVKtwk zELcR8yV>8Ui3yElel_&@Dbx5sd>;t0N}&-_6r(0GAd{;UIjmz8E*EpEcOCDSf+4L1 z8vVzpH?Fk&10pYBw3xxO*zEWWeH{@<jmj4fm7&0g=?`A&E`vM^>k4Ui4&GrR{oD8^ zx>6HKe~ex>5Z^3bq3^Xee-XEj(9D+x{z^FyYMDKFXyvUPsp!&IvFN9|4}l>1l?ib- zdj;-{=w%b)a{UieXn~L@)%6cDZV+jjSF*Xoi7p)?Yg$fEDXc*Vd9)+iwWTBad`X;y zA>N|iZtdy5Ur`?w(;J8$#!1TENQ$GS!~{$P9c%@N3S?y6?mu?I-zpfuK(Sjna4#6T zssW%2VVO(;=smHiykVxW)4K&Lsdkj?kiF)flnDb@BZ~GQ|4o`EQk&<DI663pN6Zi@ zchHuNly~Y8YAnH8%SJK<P5D15*>Ibl2pCf*PySY{vUx$@(sW-eQ8n?(iB(r5zR@S1 z6PGCjB8fykM)J{Yaq9GD0|qff8114_Dwh|-y!|@ETwZ2=b^I4g*J2c?06syztD~zE ziS?czDYeGQcQqmJ27rt0YFtOa$Cwf+ySJ!5QgKgleI%AGNk%H~KF^L+-Sq`KQq%v% zDUtPN(LiK_=|nb}*G4v*^^qa76X0#Yt_PM>*mPh^&yUm~s}-pPE3mAOl+{N{Cy$#A z+PR>uXS8HTui!`Na;j`z4%CFpVM#eQnMH%h@1lGs@~CM=_LwWcb1|^lz@`e`o6H+< zosQ>axLSDMqMnOLcmgQHwIE1=$rLb|5A2qLf;$j=vpFq53sKkOw~lm&^gZ%mA$u~X z!}_s0v?Hs{qF<aJS%Y%dnvLUZ++B~m>u{ZdcP_3|@jMsTdOWWvD9z+?DZEdCuE<e@ z%?h{?R|^@ZQyIj$ew?t6NhBMRuoEXzk&MKqM@kK21+W(N?zsXsAmq2m1}2m4h5UDd z^>ih45@jvN^L2P$smd<03rg*X+Ik?I0z%@1ve)CcPnG|zT-yu%<=Z}A{<O#uYWw3* z((0mQWDTr!ZPCpHP60Rv;8a}e3Fs^+gbg7Agi8Uq$k~Leg`6{RZNu|4^=$M&#uds5 zzeWz3Z)2!2TOxEcm2@QRb6R9S$#?)dIs%^{rmKtYATSG&P?vyHSmTQB19cOqdvLWt zX&C>6VveXXcjWXUd{dMud{gK}`2>9P-;{3_{T*)V;e@krZGuhD$8|D!@89F589*DZ zNdj7ML+)H;<3?mDsI_ayp-DHGH-hqe^7cC1PDB6F1v%G|PLS&VUHcJzNGj4(v?{Xv zS~PyWu$HyZQcp+0LY;BfN0v7;%Em~Ku?FRoQU{6jQUQN+a{i{;VtrB3qmd0o?U79| zkqt#w1i)V0%m9E+2*6O$^#s-z6v7?{B^E(vV~Az>VSh;tCB}!Nh29qz{7*gg@v5-{ znTo-wP%1WP#dUTnHfSz!b}BaD>;+P>7;VPB_!1uffXP{>Ueh>3<5MtA1W*W{6oNMZ zC;oM~QmPYw4PzFrh4`!oPV}pBr6k5GTyMg!5Pe?=PJLY=`fLdP4<R`DW+6VCLU8)m zX(9S2fK$DCRR1T&K43dT=${M0_lMvIL-3;^_^A;5D==u1bG`CAG0qV!@Iv*9DSB;k z6F^Jp3Wdj%DT?t1_@yCyzN7h2)De;xA8@S+p`RRr&kVs^L+~!(!al|BsbPpdALVw0 z(BBe*)4V{m?}a42VTgVl*V{wr2SV^4hTuO5!T%MwkY8-6t9}{f{&NWZvx<K0S4c|L zuj0BXgnnxX{zM3_gPlY)7Ypuco$Ai0x7^klQ>M&JHnlcc^=`7&XlP8ePVQc^L<WGh zwk}Jzc{jIuoj133v@Ply6A_jp74c!YAS)4a<+XJoFw5vhm{lwN`W7us*R-Yu`<Y%_ z8ewF&vY0SpHMK3>NoYrJd;8fTTQbZ`MUCayf~#PeTGoyLG2(JvyLYR&xuFe_UpYdL zsw66}<IlPSF>I;;g3pRP%d=@+(B0jY<Bv)jOWK!sJ-6}TG!kQ`mZ!0|dqKO%Fcrac zUN@E^n$|<~T5r2llq_AuuH_0Non2k6l$PqHib*mGb~B7WA!9+J5eicQ;}n)Xl&IgQ zMEH38G(L|&B*G*3X`K3HN<sQ<5<ZK5FT;7chZs)%lIDLg?2;1kCs)*XT;bIH{sF_k z$LPPz@H-ezYcb8|fe=0qGkS7GO}~@j<Z>E+Dg=Ll;RwFW(O;m(jbObh6&|dY#pt;_ z4Gib<G>71G8P4UoiQ&B6H!+;I`*w!&c6^87yd5u4Q>0*hD-=%IcsnK;{g`%S_{B`0 z>lvRvV)(x@`mZwl=@9y1Mo(j(Zr^W{;7BAV=ii`kx^Vit8P4tY#|-~t<k9?prEua; zzNzsC8P4^ym+|5D@D{_lJ&c6lpD>);!&Pd&K(D!+QxzVRa|Xk?oJ$!WF6Rn{b2$e> z@Ov1}<y^;bZnqyXoZD@YntD^cxZPgFaNb_i7#?Ho@)Lyz?e<?7J-4?98P56d4dL@D zqvv*Ykm0;tK4Li6=cy2UrdnzQ<-bwkq#s`HoebxC_+KG>h8R89!y$(A_WH7#S5mo` zFg<rOoVVjjhVyn@$8fIa9SRTD_X$SN>pQ~mKV|Y<L<=mWAfK-(oaE>AtztNr=bIsX zW-)p$PdCGPyMLeIyxmuY;7=+%DF1#&&*eYNaNb_^YF<zE<?YhUaNaIAD?C{4ZH%6e z7e8Y-*W2$H&c}t*4F5W0(*5To5(0_J{WJVDK2_nt_FBd8OBub-aIWXSVL0!Xh8fQF zSw>1kqH=k?rYbxr=k1K1+r!^8dfwmuI)wfchVydAsdW;`Q;DC}&mS|K*Y|3M6H85h z55u`V{EXqG5>3CA;k@17U^tiaeT4_@=c`m063Nf)!D0BH<DD*d9>Y2RI~mUPu$JLm z5C6b$-tGq#9+dMVM*kN~o_G`wNI`q(Qh1QhUom=a4?kr%uh*Ll=k=;1rK@r|-obEQ z-yeqHuQHtTxr&4+z<(~ox&8c>;iM|vUQaNb+mA^`k3{<6^QDUwPWrq8Kh39-(Q|*f zg3(W8^xtJTiJ<w+3*pnr=y|)`&+xxwd>&?a6~kX<IM?R{GH#@xJzT1AsxR+nzRPev zUQA;+Z{O=e@NR~4Jv_j0UayS|=kgy`cu@XhjGoK?W%UoUVEcYU;lcK;XY`~(-Ck1} zJ#XJx4CnQ_J%rC7qv!2=AH%txA7D7w^FM{)37Q-uQN6fd+`@3~7poc0{o=<AC)d<^ zc!=RwGW<z}2ixTkqo-TV=LExfy-a#QBKdiLSj%v(XIJ6Dau+gsUT!zT=~m10eTMUL zS2CRI^BsnBeV$}E*XP%WDH7F->+`!I`0NmTmBOjM)%a=o|B=yO#qez*^cT}cJ`%~p z$B{p0IJd*u48IzAbh)b)9@PIjM$h&CKN(J<X+E1m@P|Y2tqdoXYd$6|=8>pg+)gf1 zcu>wO89kS?j^SL+X(4!12tJ45T+R-LbGwR>@JJ*l{dAh7ZZuw}o?8O=YW2K7fb0J+ z@O>)crT6peRAHLG-nX9<!1ccUD*@b4^XAoTKZHo?)O_pX0Nz85gY*D36p}8tjv4`J zyjpi_T<=R86PMk#B;B?E*L1JEcIvy{_O7;x>GtJmW8%U#ocNu%VA(QbB5k_1W509a z((ZKoM8s5FA$F71^G!?pCie7p_q6w>Z!;ze4(*+-9ldQ!+NB`>$-gn=r>#4+e9m#J zQ+JD%9q1Qo8l)e^SWGIa%OUb+4J|^D8r8T*>Zfu_2}DxvA=WB{EA@+a^ye3Wa`g+f z&8Q!Z%W+3M|D*X6$V)tFZo-o;pVme^>GJjUTX?DOIo&g#Q~qhlL|X-9M?CFQ`C}Z! zz1E+G7vP4<59U9v@~>5Ps_BFI=i)`7_S;u0bHr5TsQoCf&ad0AP}!7o4Q*~9RjTr7 ztf3`#Fh60zwgY5riql`i(EG~r>1<t-n-<D{oEiqHOI~t+gKgE(!w$IP6#MTx<9=1m z)$*`=>>SRxD<2>s>Vzb9*h{#;$emxK(`z_?frxZE??QpL<;mJDtD=Z8Yhj^|w*O?( z+>-RS`_Co)rx1jgLZmc;4lAuKA6$|&pR%4glk`74@Xz@7hW`=;HhLq7X@14ZPC<M- z$Pm77`9qdJi~zpaft4Ubpj^zx=^5Mc9!H3B9I@*u^J(JgWE)HzS;t)+9cZqpofD6x z?~t+c6kk^^B0q6Tds~AyG%b6%Vb|g$MRec@>W7$K@frtM4nu%X5wcI`Sw!V3M)%P1 zf(iry4&WHl&q0oZB?u(eaq$*T2vFdDJbLdLylgNJQ_+1HVru(Gkq^Y;<P2itA@D*9 zkUwhUTn#EA%Q-+grqJ_>>`c=_D4|_@`p#Yo*>;ep)In&zLox3n<`QQH4pNySlWQJo zFh8(s-|BlSpnFL_YT=Bqpd4<3`f@S&2glZ7_?vjyv6j?C(bHR@1e|EvNAdeO6CjTa z(HRUn{g1+TnXWk^xFQ76H4j_0hgZHe?1G1=r>NWTw~1vwewfGXQ^5XS3iI`bXF7=U z^>9i9aoeY0RcLbr=qla_?M@E9z0ys2Pa;|~<-dxPQPkG?vjm4ltCB=$COYhM&@o34 zH=94<F5}4QbVFIP_Eg^!lmX{Xw^uSd)#o^6%xnrl;@%k<Hj6X$kPCtygrIb?5q40b zVzNn463;?;iI+z%=vWJrNRc*PNne3Bw?_9445dfJ2_BgJL2WP;ikvb(0A;^{bE>cO zJtrywnJUJ}q|Yo{*Sp?_R&#^Dg93Ozh7rgwJJ2zIoa}=W=1)}8;r_ux>FY)FeuUOh zhpesI_g7xw?{mGQbjrJwj%TcMOdL#IO{RyV7@}!MUGpfU265l}Bu?<mE;$11;W_aA zH|m!}GmZQ#cLrIXf&3j88NuAAg@E`L1sSl=p{v@MPVXR6lq5w6yX!wCSoV*Y%P&p( zXW*?Ttz)k>c%L?SknJ>ZcqjtCUGo#j*?)3W%gGO#=WT9GOgZ&grGuZYoQA_9DgP<g zdlROILuh}Pr-vR~^DP`jNq;>>#`L;1`U0Bp0CWK%$DCglF~Ts)k++zQQ+fR-%=8zx z$$p1o+3AxE&;Wgi!~2L1SD^mrFUO0DB2<4oYGmU8Dh`w23jpeL!jC?~As*2Hhowmj zQ!}YErm)ZKO4GU?j;Rj6z?Wbc%ALqcnHPyZ1MV(PjN#<RPEjz~?bMWq15Xqjo%bhv zOG12TqZ9^89)-T^7+E#GU!iiMn-&~g93w2Fj_0vL9wqB9^iRR(IJF@i2?w8T_^>cs z*it@nIOT7FOwoJ(2zF&<?f7W_aWi@sJ#|Evj^OtWPJX*S;;R?D^UOgC;NII2?WP44 z2)f>Ow!C>>^eZ@B0r1-r?Ecs_A4A;up^oUX7h<Bl@bd8vJKDXYBYNg_r++900MXjZ zp25w>+wps<Bib_r9zDD9XTcWGb_{{$(GDxRfc`AT)AL~Z{MP7&&i?3ybw`}qt$iD8 ze}Dh}GuEfu&GcNHhkY@+aZhyPqo}mweMnK^yXemu$2-zLWJ20q&X#veh=~=wuq0vC zZeMw+dC+_j9!WWNJ6rab<Z_)LdeH6xZMi9J;!$Z6+k_SpL!#azf2Csnn@QVe#L)uL zx5DT}d*Pts;L9uT%ua@8i*8-zzXYAWin@vAf$I-#Iaq{xZ!@48F$01|OI&lS*4v{{ zR~g<>#Zl=!eIx$9{{5%S9ktu=jRJ3v7=15an{6~}U%K-m$9uMasHlHOk>!tHTf1%L z7o2sLfB9<ZM?*Kx-If>{d5oG#ChAM*M{`2{qqiETITOwM`0w*Zf>Rz^2T;Y%B?f#U z=xI-hQi23Sz%`HM&d2eJxJVmOxHp7EqZZ#{7JQSWd8Uou0DhF#(Vu**c0OiY#HEUO z&&J67qD6&*UWv~WlppDC`Xl%tFu8E-`P4{b(cD6O9Aqsg#UuR-{mILdkCPR#xhjSo zd<q77P97)fHed0&E5}R3BIkLDx}_)<It7^;3q`q;OdAxR_8gylT>V*XQFnVmk?8+R zHOT+5;?+vLu8Wjq5sO-Jcd;Fb-Bn^oD*MlKBXx@+mG-ts%-V@i`zH`<|0IIDNyk-W z3rN49KY1PJW2^J={Zr9aeT8&PGAvho=p!`5CyU5)EyHAzVQQqVxE6*}39&34?{AH} z9OS=Id4Gj_BN-{X3jz2Q{l!xwF^Y(;up?!bJgh+PLlPDH-x*U&YxE-F!BPO>LrRuH zF|zdP++%HNMK(6ctq`@4W3S{^;(2GqSJM7A^$x)`bxh&%Djsrw3dIV49dD_h38rfR z6vFF)i<m;ymcsXexgqp3L+}>GXTRbjd><J3iHMa{3r^w7!1@#(E5Qwt@Jn?i`b`QK zeg^z+@FP5?R4RN1*aHe*t#IKFz^ELeuOb6L684Sjukj;%J#8Q$3EQO`TGS%suJ@+) zR`DUYiSFbo1Ah|q=Nt6rN~3UK<Cr)-`VO56Q54~ZGOkWV7pjL`fFOH5D>6{N`&{3o zBk?a(eF>?J+F9?N&I-Zr3Bkp8GLS#WXKx67M74L2elf%O|6G2>a4zSA45y{Pmj6r2 zZiqkUQ_pbD=cf$k^baze(_cu8k%ID1XE^7xkl~z9gR(z*&FMFV;5!-4`RGl>pgg6@ zjtS@bxq;!l+#f2O)JpqMTAw-wiEtYCH7<@S;yEb)Ugal5Kc3MaU^s0DYd&|ddT}{p zprJ&3=%>>fb))e*^>R-D*ZoR`vO`VZquO<00N4G>F9W#lSB?j8-LF)u@-=_mudE2* zx?d6BHG!;}UiT~KzNg*WO&i=3y=Cdd)&H+|xD6wQI+2Q$8R1T*I(401s&3SUY?vn+ z&&ZbQIEece6?3p&>62`b%F*m96#btmTs5yk4dWh7m_y`^YR()=^AjTdkLFJxFYzRw z;%Tiazm9{r*Y(%e?*gOzRROv^D*vScT+6Soj>;b(GmfeJL0sq4IE}~TqDsvgHjb<O zu^b|Af^2&L6VG7&3N_W)A9xYWe>3tHYX9SMVq_eV#fm<NSnB+`{R@>%S&oyzAZh!d zK8Z{!n4fqC+a8dyj{dQW^jmq!{SE4m#4MEmfHH{Nb2;TrFuy*upm)lD-yZpuS60`E z55k_q9{C#8*OCNvLX7-(VErT34GrFLu?QERGr|{sj*AZ)&?XdinX%K04MOZ>Vuu*p znKt%d@HMZ!`ujg3BM6e_JJ|6&K-@Lt=<UZmAV0W7d)(u3COlIfx3Q^KM&I!)$CtC{ zqaPJ3o!lOsRf{jVzGp@4)6U@T^evfm=2UU|uTS~af1OEZak~b$Yq0Mg-S`=8h5IYA zhF_g=`nQ;F?TNlEv@x-b{yav1@FkmlbmASG6UFq5jg>Ci)tFp{r9R(b&F`^E*78f4 zRKPBwJ5`MQ+D7e(=wLH;k4Ju}_B6ysMrM`T7S3EFG~#6Lke?pFP1%jZg3URc3@87f z{(_qET=oPU(Ca;c1~NQ{_5`NU8atIcx0*jgYGeI(py0~?T6u=J6D%lEpGoVu{-OnR zgg7)&k^h}*HY&K#+D77P;z18Aoy$2$>N98!pp(!c9w>I7+9Q~%02IQ%37q&eDpo>& zz^Hu+(dW({VWpw_2l*Z8!V3$vj>wVfk4%e&1<I(yj}qAtpUcr2jZzR_DB-j4bO=6I z)r<J>IbM4RzBB}19)ibIxipmU;iM~oYyF=4oa+DYIDr*PFRHp{*-Y3Rs*)N6X-NE) z^Lt{YeO8a5VRNY82Fg`zm`6y2?3s85$HC8=U(cb)e|XaQf8QJ^EKcC9&nf>j6k2Ew zbU@{gaiMUp^{3$lxFJ$*oAj<!0jueOrC|QBIDrGI{j^?aeMWh8e%*eB%BJGhsPeU6 jknb0oLxs(O_NfMKAZ9ED)fzSjs!;O|?WdYWF#rDrlmepS diff --git a/game_of_life/main.c b/game_of_life/main.c deleted file mode 100644 index 5fc1844..0000000 --- a/game_of_life/main.c +++ /dev/null @@ -1,153 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <mpi.h> -#include "../lib/fpmpi.h" -#include "../lib/fp.h" -#include "gol.h" -#include "lib/github.com/diku-dk/lys/liblys.h" - -#define NB_ROWS (800) -#define NB_COLUMNS (800) -#define BOARD_SIZE (NB_ROWS * NB_COLUMNS) -#define NB_NEIGHBOURS 8 - -#define MAX_FPS (60) - -int8_t board[NB_ROWS][NB_COLUMNS] = {0}; -int my_rank; - -typedef struct tuple2 { - int8_t cell; - int8_t neighbours[NB_NEIGHBOURS]; -} tuple2_t; - -void init_board() { - for (int y = 0; y < NB_ROWS; ++y) { - for (int x = 0; x < NB_COLUMNS; ++x) { - board[y][x] = rand() % 2; - } - } -} - -void *get_neighbours(void *index) { - int cell_x = *(int *) index % NB_ROWS; - int cell_y = *(int *) index / NB_COLUMNS; - int8_t *neighbours = calloc(8, sizeof(int8_t)); - int i = 0; - for (int y = -1; y <= 1; ++y) { - for (int x = -1; x <= 1; ++x) { - if (y == 0 && x == 0) continue; - int neigh_y = cell_y + y; - if (neigh_y < 0) { - neigh_y = NB_ROWS - 1; - } else if (neigh_y >= NB_ROWS) { - neigh_y = 0; - } - int neigh_x = cell_x + x; - if (neigh_x < 0) { - neigh_x = NB_COLUMNS - 1; - } else if (neigh_x >= NB_COLUMNS) { - neigh_x = 0; - } - neighbours[i++] = board[neigh_y][neigh_x]; - } - } - return neighbours; -} - -void fold_sum(void *acc, void *neighbour) { - int8_t *acc8 = acc; - int8_t *neighbour8 = neighbour; - *acc8 += *neighbour8; -} - -void *next_state(void *element) { - tuple2_t *tuple2 = (tuple2_t *) element; - int8_t initial_value = 0; - int8_t *nb_cells_alive = local_fold_left(tuple2->neighbours, 8, FPMPI_INT8, FPMPI_INT8, fold_sum, &initial_value); - int8_t *next_state = calloc(1, sizeof(int8_t)); - *next_state = (tuple2->cell == 1 && (*nb_cells_alive == 2 || *nb_cells_alive == 3)) || - (tuple2->cell == 0 && *nb_cells_alive == 3); - return next_state; -} - -void *zip_cell_neigh(void *cell, void *neighs) { - tuple2_t *tuple2 = calloc(1, sizeof(tuple2_t)); - tuple2->cell = *(int8_t *) cell; - memcpy(tuple2->neighbours, neighs, NB_NEIGHBOURS * sizeof(int8_t)); - return tuple2; -} - -void handle_event(struct lys_context *ctx, enum lys_event event) { - MPI_Bcast(&board[0][0], BOARD_SIZE, MPI_INT8_T, FPMPI_ROOT_RANK, MPI_COMM_WORLD); - - fpmpi_result_t indexes = iota(BOARD_SIZE, MPI_COMM_WORLD); -// printf("Indexes OK: %d\n", indexes.count); - - fpmpi_result_t neighbours = map(indexes.content, BOARD_SIZE, FPMPI_INT32, NB_NEIGHBOURS * FPMPI_INT8, - get_neighbours, - MPI_COMM_WORLD); -// printf("Neighbours OK: %d\n", neighbours.count); - fpmpi_result_t board_with_neighbours = zip(&board[0][0], neighbours.content, BOARD_SIZE, FPMPI_INT8, 8 * FPMPI_INT8, - sizeof(tuple2_t), zip_cell_neigh, MPI_COMM_WORLD); -// printf("Board with Neigh OK\n"); - fpmpi_result_t new_board = map(board_with_neighbours.content, BOARD_SIZE, sizeof(tuple2_t), FPMPI_INT8, next_state, - MPI_COMM_WORLD); -// printf("New Board OK\n"); - - if (my_rank == FPMPI_ROOT_RANK) { - memcpy(&board[0][0], new_board.content, BOARD_SIZE); - struct futhark_i8_1d *fut_new_board = futhark_new_i8_1d(ctx->fut, &board[0][0], BOARD_SIZE); - futhark_entry_init(ctx->fut, &ctx->state, fut_new_board, NB_ROWS, NB_COLUMNS, BOARD_SIZE); - free(indexes.content); - free(neighbours.content); - free(board_with_neighbours.content); - free(new_board.content); - futhark_free_i8_1d(ctx->fut, fut_new_board); - } -} - -uint32_t *run_interactive(struct futhark_context *fut_ctx, int width, int height, struct futhark_i8_1d *fut_board) { - struct lys_context ctx = {0}; - lys_setup(&ctx, width, height, MAX_FPS, 0); - - ctx.fut = fut_ctx; - ctx.event_handler_data = NULL; - ctx.event_handler = handle_event; - - futhark_entry_init(ctx.fut, &ctx.state, fut_board, NB_ROWS, NB_COLUMNS, BOARD_SIZE); - lys_run_sdl(&ctx); - return ctx.data; -} - -int main(int argc, char *argv[]) { - MPI_Init(&argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); - if (my_rank == FPMPI_ROOT_RANK) { - struct futhark_context_config *fut_cfg; - struct futhark_context *fut_ctx; - char *deviceopt = NULL; - bool device_interactive = true; - char *opencl_device_name = NULL; - - lys_setup_futhark_context(deviceopt, device_interactive, &fut_cfg, &fut_ctx, &opencl_device_name); - if (opencl_device_name != NULL) { - fprintf(stdout, "Using OpenCL device: %s\n", opencl_device_name); - } - init_board(); - struct futhark_i8_1d *fut_board = futhark_new_i8_1d(fut_ctx, &board[0][0], BOARD_SIZE); - run_interactive(fut_ctx, NB_COLUMNS, NB_ROWS, fut_board); - - free(opencl_device_name); - futhark_free_i8_1d(fut_ctx, fut_board); - futhark_context_config_free(fut_cfg); - futhark_context_free(fut_ctx); - } else { - for (;;) { - handle_event(NULL, LYS_LOOP_ITERATION); - } - } - - MPI_Finalize(); - return 0; -} diff --git a/lib/.gitignore b/lib/.gitignore deleted file mode 100644 index ef86935..0000000 --- a/lib/.gitignore +++ /dev/null @@ -1,383 +0,0 @@ -### macOS template -# General -.DS_Store -.AppleDouble -.LSOverride - -# Icon must end with two \r -Icon - -# Thumbnails -._* - -# Files that might appear in the root of a volume -.DocumentRevisions-V100 -.fseventsd -.Spotlight-V100 -.TemporaryItems -.Trashes -.VolumeIcon.icns -.com.apple.timemachine.donotpresent - -# Directories potentially created on remote AFP share -.AppleDB -.AppleDesktop -Network Trash Folder -Temporary Items -.apdisk - -### Windows template -# Windows thumbnail cache files -Thumbs.db -Thumbs.db:encryptable -ehthumbs.db -ehthumbs_vista.db - -# Dump file -*.stackdump - -# Folder config file -[Dd]esktop.ini - -# Recycle Bin used on file shares -$RECYCLE.BIN/ - -# Windows Installer files -*.cab -*.msi -*.msix -*.msm -*.msp - -# Windows shortcuts -*.lnk - -### C template -# Prerequisites -*.d - -# Object files -*.o -*.ko -*.obj -*.elf - -# Linker output -*.ilk -*.map -*.exp - -# Precompiled Headers -*.gch -*.pch - -# Libraries -*.lib -*.a -*.la -*.lo - -# Shared objects (inc. Windows DLLs) -*.dll -*.so -*.so.* -*.dylib - -# Executables -*.exe -*.out -*.app -*.i*86 -*.x86_64 -*.hex - -# Debug files -*.dSYM/ -*.su -*.idb -*.pdb - -# Kernel Module Compile Results -*.mod* -*.cmd -.tmp_versions/ -modules.order -Module.symvers -Mkfile.old -dkms.conf - -### macOS template -# General -.DS_Store -.AppleDouble -.LSOverride - -# Icon must end with two \r -Icon - -# Thumbnails -._* - -# Files that might appear in the root of a volume -.DocumentRevisions-V100 -.fseventsd -.Spotlight-V100 -.TemporaryItems -.Trashes -.VolumeIcon.icns -.com.apple.timemachine.donotpresent - -# Directories potentially created on remote AFP share -.AppleDB -.AppleDesktop -Network Trash Folder -Temporary Items -.apdisk - -### Linux template -*~ - -# temporary files which can be created if a process still has a handle open of a deleted file -.fuse_hidden* - -# KDE directory preferences -.directory - -# Linux trash folder which might appear on any partition or disk -.Trash-* - -# .nfs files are created when an open file is removed but is still being accessed -.nfs* - -### C template -# Prerequisites -*.d - -# Object files -*.o -*.ko -*.obj -*.elf - -# Linker output -*.ilk -*.map -*.exp - -# Precompiled Headers -*.gch -*.pch - -# Libraries -*.lib -*.a -*.la -*.lo - -# Shared objects (inc. Windows DLLs) -*.dll -*.so -*.so.* -*.dylib - -# Executables -*.exe -*.out -*.app -*.i*86 -*.x86_64 -*.hex - -# Debug files -*.dSYM/ -*.su -*.idb -*.pdb - -# Kernel Module Compile Results -*.mod* -*.cmd -.tmp_versions/ -modules.order -Module.symvers -Mkfile.old -dkms.conf - -### C template -# Prerequisites -*.d - -# Object files -*.o -*.ko -*.obj -*.elf - -# Linker output -*.ilk -*.map -*.exp - -# Precompiled Headers -*.gch -*.pch - -# Libraries -*.lib -*.a -*.la -*.lo - -# Shared objects (inc. Windows DLLs) -*.dll -*.so -*.so.* -*.dylib - -# Executables -*.exe -*.out -*.app -*.i*86 -*.x86_64 -*.hex - -# Debug files -*.dSYM/ -*.su -*.idb -*.pdb - -# Kernel Module Compile Results -*.mod* -*.cmd -.tmp_versions/ -modules.order -Module.symvers -Mkfile.old -dkms.conf - -### macOS template -# General -.DS_Store -.AppleDouble -.LSOverride - -# Icon must end with two \r -Icon - -# Thumbnails -._* - -# Files that might appear in the root of a volume -.DocumentRevisions-V100 -.fseventsd -.Spotlight-V100 -.TemporaryItems -.Trashes -.VolumeIcon.icns -.com.apple.timemachine.donotpresent - -# Directories potentially created on remote AFP share -.AppleDB -.AppleDesktop -Network Trash Folder -Temporary Items -.apdisk - -### Windows template -# Windows thumbnail cache files -Thumbs.db -Thumbs.db:encryptable -ehthumbs.db -ehthumbs_vista.db - -# Dump file -*.stackdump - -# Folder config file -[Dd]esktop.ini - -# Recycle Bin used on file shares -$RECYCLE.BIN/ - -# Windows Installer files -*.cab -*.msi -*.msix -*.msm -*.msp - -# Windows shortcuts -*.lnk - -# User-specific stuff -.idea/**/workspace.xml -.idea/**/tasks.xml -.idea/**/usage.statistics.xml -.idea/**/dictionaries -.idea/**/shelf - -# Generated files -.idea/**/contentModel.xml - -# Sensitive or high-churn files -.idea/**/dataSources/ -.idea/**/dataSources.ids -.idea/**/dataSources.local.xml -.idea/**/sqlDataSources.xml -.idea/**/dynamic.xml -.idea/**/uiDesigner.xml -.idea/**/dbnavigator.xml - -# Gradle -.idea/**/gradle.xml -.idea/**/libraries - -# Gradle and Maven with auto-import -# When using Gradle or Maven with auto-import, you should exclude module files, -# since they will be recreated, and may cause churn. Uncomment if using -# auto-import. -# .idea/artifacts -# .idea/compiler.xml -# .idea/jarRepositories.xml -# .idea/modules.xml -# .idea/*.iml -# .idea/modules -# *.iml -# *.ipr - -# CMake -cmake-build-*/ - -# Mongo Explorer plugin -.idea/**/mongoSettings.xml - -# File-based project format -*.iws - -# IntelliJ -out/ - -# mpeltonen/sbt-idea plugin -.idea_modules/ - -# JIRA plugin -atlassian-ide-plugin.xml - -# Cursive Clojure plugin -.idea/replstate.xml - -# Crashlytics plugin (for Android Studio and IntelliJ) -com_crashlytics_export_strings.xml -crashlytics.properties -crashlytics-build.properties -fabric.properties - -# Editor-based Rest Client -.idea/httpRequests - -# Android studio 3.1+ serialized cache file -.idea/caches/build_file_checksums.ser - -.idea diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt deleted file mode 100644 index fe86691..0000000 --- a/lib/CMakeLists.txt +++ /dev/null @@ -1,29 +0,0 @@ -cmake_minimum_required(VERSION 3.17) -project(fpmpi C) - -set(CMAKE_C_STANDARD 11) - -if (CMAKE_BUILD_TYPE MATCHES Debug) - set(GCC_COMPILE_FLAGS "-Wall -Wextra -pedantic -fsanitize=undefined -fsanitize=address") - if (CMAKE_SYSTEM_NAME MATCHES "Linux") - set(GCC_COMPILE_FLAGS "${GCC_COMPILE_FLAGS} -fsanitize=leak") - endif () -elseif (CMAKE_BUILD_TYPE MATCHES Release) - set(GCC_COMPILE_FLAGS "-O2") -elseif (CMAKE_BUILD_TYPE MATCHES Benchmark) - set(GCC_COMPILE_FLAGS "-DBENCHMARK -O2") -endif () - -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GCC_COMPILE_FLAGS}") - -find_package(MPI REQUIRED) -include_directories(${MPI_C_INCLUDE_PATH}) - -add_library(fpmpi fpmpi.c fpmpi.h fp.c fp.h dispatch.c dispatch.h) -target_link_libraries(fpmpi ${MPI_C_LIBRARIES}) - -add_executable(fpmpi_benchmark benchmark/benchmark.c) -target_link_libraries(fpmpi_benchmark fpmpi) - -add_executable(fpmpi_tests tests/tests.c) -target_link_libraries(fpmpi_tests fpmpi) diff --git a/lib/Makefile b/lib/Makefile deleted file mode 100644 index be8c2a3..0000000 --- a/lib/Makefile +++ /dev/null @@ -1,18 +0,0 @@ -all: release debug benchmark - -release: - mkdir -p "cmake-build-release" - cmake -DCMAKE_BUILD_TYPE=Release -Bcmake-build-release - $(MAKE) -C cmake-build-release all - -debug: - mkdir -p "cmake-build-debug" - cmake -DCMAKE_BUILD_TYPE=Debug -Bcmake-build-debug - $(MAKE) -C cmake-build-release all - -benchmark: - mkdir -p "cmake-build-benchmark" - cmake -DCMAKE_BUILD_TYPE=Benchmark -Bcmake-build-benchmark - $(MAKE) -C cmake-build-benchmark all - -.PHONY: all release benchmark diff --git a/lib/benchmark/benchmark.c b/lib/benchmark/benchmark.c deleted file mode 100644 index 60e0163..0000000 --- a/lib/benchmark/benchmark.c +++ /dev/null @@ -1,149 +0,0 @@ -#include <mpi.h> -#include <stdlib.h> -#include <stdio.h> -#include "../fpmpi.h" - -#define BENCHMARK_MAP 1 -#define BENCHMARK_FILTER 2 -#define BENCHMARK_REDUCE 3 -#define BENCHMARK_FIND 4 -#define BENCHMARK_FOLD_LEFT 5 -#define BENCHMARK_FOLD_RIGHT 6 -#define BENCHMARK_SORT 7 -#define BENCHMARK_SCAN 8 -#define BENCHMARK_IOTA 9 -#define BENCHMARK_ZIP 10 - -void *map_mul_int(void *element) { - int *result = calloc(1, sizeof(int)); - *result = (*(int *) (element)) * 2; - return (void *) result; -} - -bool filter_only_even(void *element) { - int element32 = *(int *) element; - return element32 % 2 == 0; -} - -bool find_divide_by_three(void *element) { - int element32 = *(int *) element; - return element32 % 3 == 0; -} - -void reduce_sum(void *accumulator, void *current_value) { - int *accumulator32 = (int *) accumulator; - int current_value32 = *(int *) current_value; - *accumulator32 = (*accumulator32 + current_value32); -} - -void fold_left_sub(void *accumulator, void *current_value) { - int *accumulator32 = (int *) accumulator; - int current_value32 = *(int *) current_value; - *accumulator32 = (*accumulator32 - current_value32); -} - -void fold_right_sub(void *current_value, void *accumulator) { - int *accumulator32 = (int *) accumulator; - int current_value32 = *(int *) current_value; - *accumulator32 = (current_value32 - *accumulator32); -} - -bool sort_asc(void *left, void *right) { - int left32 = *(int *) left; - int right32 = *(int *) right; - return left32 < right32; -} - -int main(int argc, char *argv[]) { - if(argc < 4) { - printf("Missing argv parameters.\n"); - exit(0); - } - - MPI_Init(&argc, &argv); - - int my_rank; - MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); - - int benchmark = atoi(argv[1]); - int times = atoi(argv[2]); - int N = atoi(argv[3]); - - int *array1 = NULL; - int *array2 = NULL; - if (benchmark == BENCHMARK_REDUCE || benchmark == BENCHMARK_FOLD_LEFT || benchmark == BENCHMARK_FOLD_RIGHT || - my_rank == FPMPI_ROOT_RANK) { - array1 = calloc(N, sizeof(int)); - array2 = calloc(N, sizeof(int)); - for (int i = 0; i < N; ++i) { - if (benchmark == BENCHMARK_SORT) { - array1[i] = rand(); - array2[i] = rand(); - } else { - array1[i] = i; - array2[i] = i; - } - } - } - - for (int i = 0; i < times; ++i) { - fpmpi_result_t result; - switch (benchmark) { - case BENCHMARK_MAP: { - result = map(array1, N, FPMPI_INT32, FPMPI_INT32, map_mul_int, MPI_COMM_WORLD); - } - break; - case BENCHMARK_FILTER: { - result = filter(array1, N, FPMPI_INT32, filter_only_even, MPI_COMM_WORLD); - } - break; - case BENCHMARK_REDUCE: { - result = reduce(array1, N, FPMPI_INT32, reduce_sum, MPI_COMM_WORLD); - } - break; - case BENCHMARK_FIND: { - result = find(array1, N, FPMPI_INT32, find_divide_by_three, MPI_COMM_WORLD); - } - break; - case BENCHMARK_FOLD_LEFT: { - int initial_value = 0; - result = fold_left(array1, N, FPMPI_INT32, FPMPI_INT32, fold_left_sub, &initial_value, MPI_COMM_WORLD); - } - break; - case BENCHMARK_FOLD_RIGHT: { - int initial_value = 0; - result = fold_right(array1, N, FPMPI_INT32, FPMPI_INT32, fold_right_sub, &initial_value, MPI_COMM_WORLD); - } - break; - case BENCHMARK_SORT: { - result = sort(array1, N, FPMPI_INT32, FPMPI_MERGE_SORT, sort_asc, MPI_COMM_WORLD); - } - break; - case BENCHMARK_SCAN: { -// result = sort(array1, N, FPMPI_INT32, FPMPI_MERGE_SORT, sort_asc, MPI_COMM_WORLD); - } - break; - case BENCHMARK_IOTA: { - result = iota(N, MPI_COMM_WORLD); - } - break; - case BENCHMARK_ZIP: { -// result = zip(array1, array2, FPMPI_INT32, FPMPI_MERGE_SORT, sort_asc, MPI_COMM_WORLD); - } - break; - default: - MPI_Finalize(); - exit(0); - } - - if (my_rank == FPMPI_ROOT_RANK) { - free(result.content); - } - } - if (benchmark == BENCHMARK_REDUCE || benchmark == BENCHMARK_FOLD_LEFT || benchmark == BENCHMARK_FOLD_RIGHT || - my_rank == FPMPI_ROOT_RANK) { - free(array1); - } - MPI_Finalize(); - return 0; -} diff --git a/lib/dispatch.c b/lib/dispatch.c deleted file mode 100644 index 7554058..0000000 --- a/lib/dispatch.c +++ /dev/null @@ -1,36 +0,0 @@ -#include <stdlib.h> -#include "dispatch.h" - -dispatch_t dispatch_init(int count, int type, int out_type, int world_size, int root) { - int nb_columns_per_process = count / world_size; - int remaining_columns = count % world_size; - - dispatch_t dispatch = { - .in_counts8 = calloc(world_size, sizeof(int)), - .in_displacements8 = calloc(world_size, sizeof(int)), - .out_counts8 = calloc(world_size, sizeof(int)), - .out_displacements8 = calloc(world_size, sizeof(int)), - }; - - for (int i = 0; i < world_size; ++i) { - int root_nb_columns = nb_columns_per_process + remaining_columns; - int nb_columns = (i == root) ? root_nb_columns : nb_columns_per_process; - if (i == 0) { - dispatch.in_displacements8[i] = 0; - dispatch.out_displacements8[i] = 0; - } else { - dispatch.in_displacements8[i] = dispatch.in_displacements8[i - 1] + dispatch.in_counts8[i - 1]; - dispatch.out_displacements8[i] = dispatch.out_displacements8[i - 1] + dispatch.out_counts8[i - 1]; - } - dispatch.in_counts8[i] = nb_columns * type; - dispatch.out_counts8[i] = nb_columns * out_type; - } - return dispatch; -} - -void dispatch_destroy(dispatch_t *dispatch) { - free(dispatch->in_displacements8); - free(dispatch->in_counts8); - free(dispatch->out_displacements8); - free(dispatch->out_counts8); -} diff --git a/lib/dispatch.h b/lib/dispatch.h deleted file mode 100644 index bdb08d8..0000000 --- a/lib/dispatch.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _DISPATCH_H_ -#define _DISPATCH_H_ - -typedef struct dispatch { - int *in_counts8; - int *in_displacements8; - int *out_counts8; - int *out_displacements8; -} dispatch_t; - -dispatch_t dispatch_init(int count, int type, int out_type, int world_size, int root); - -void dispatch_destroy(dispatch_t *dispatch); - -#endif //_DISPATCH_H_ diff --git a/lib/fp.c b/lib/fp.c deleted file mode 100644 index 3ca8106..0000000 --- a/lib/fp.c +++ /dev/null @@ -1,164 +0,0 @@ -#include "fp.h" -#include "fpmpi.h" -#include <stdint.h> -#include <stdlib.h> -#include <string.h> - -void *local_map(void *array, int count, int type, int map_type, void *f(void *)) { - uint8_t *array8 = (uint8_t *) array; - void *output = calloc(count, map_type); - uint8_t *output8 = (uint8_t *) output; - - for (int i = 0; i < count; ++i) { - void *result = f(array8 + i * type); - memcpy(output8, result, (size_t) map_type); - output8 += map_type; - free(result); - } - return output; -} - -void *local_filter(void *array, int count, int type, bool f(void *), int *output_count) { - uint8_t *array8 = (uint8_t *) array; - void *output = calloc(count, type); - uint8_t *output8 = (uint8_t *) output; - - for (int i = 0; i < count; ++i) { - uint8_t *element8 = array8 + i * type; - if (f(element8)) { - memcpy(output8, element8, type); - output8 += type; - ++(*output_count); - } - } - return output; -} - -void *local_fold_left(void *array, int count, int type, int fold_type, void f(void *, void *), void *initial_value) { - uint8_t *array8 = (uint8_t *) array; - void *accumulator = calloc(1, fold_type); - int i = 0; - /* initial_value is NULL for reduce */ - if (initial_value == NULL) { - memcpy(accumulator, array8, type); - ++i; - } else { - memcpy(accumulator, initial_value, fold_type); - } - - for (; i < count; ++i) { - f(accumulator, array8 + i * type); - } - return accumulator; -} - -void *local_fold_right(void *array, int count, int type, int fold_type, void f(void *, void *), void *initial_value) { - uint8_t *array8 = (uint8_t *) array; - void *accumulator = calloc(1, fold_type); - memcpy(accumulator, initial_value, fold_type); - for (int i = count - 1; i >= 0; --i) { - f(array8 + i * type, accumulator); - } - return accumulator; -} - -void *local_find(void *array, int count, int type, bool f(void *)) { - uint8_t *array8 = (uint8_t *) array; - for (int i = 0; i < count; ++i) { - if (f(array8 + i * type)) { - return array8 + i * type; - } - } - return NULL; -} - -// https://gist.github.com/hackrio1/a11c8499ed68f5df6c30e53d1c3fe076 -static void merge_sort(void *array, void *work_array, int type, bool f(void *, void *), int i, int j) { - uint8_t *array8 = (uint8_t *) array; - uint8_t *work_array8 = (uint8_t *) work_array; - if (j <= i) { - return; // the subsection is empty or a single element - } - int mid = (i + j) / 2; - - // left sub-array is a[i .. mid] - // right sub-array is a[mid + 1 .. j] - - merge_sort(array, work_array, type, f, i, mid); // sort the left sub-array recursively - merge_sort(array, work_array, type, f, mid + 1, j); // sort the right sub-array recursively - - int pointer_left = i; // pointer_left points to the beginning of the left sub-array - int pointer_right = mid + 1; // pointer_right points to the beginning of the right sub-array - int k; // k is the loop counter - - // we loop from i to j to fill each element of the final merged array - for (k = i; k <= j; k++) { - if (pointer_left == mid + 1) { // left pointer has reached the limit - memcpy(work_array8 + k * type, array8 + pointer_right * type, type); - pointer_right++; - } else if (pointer_right == j + 1) { // right pointer has reached the limit - memcpy(work_array8 + k * type, array8 + pointer_left * type, type); - pointer_left++; - } else if (f(array8 + pointer_left * type, - array8 + pointer_right * type)) { // pointer left points to smaller element - memcpy(work_array8 + k * type, array8 + pointer_left * type, type); - pointer_left++; - } else { // pointer right points to smaller element - memcpy(work_array8 + k * type, array8 + pointer_right * type, type); - pointer_right++; - } - } - - for (k = i; k <= j; k++) { // copy the elements from work_array[] to array[] - memcpy(array8 + k * type, work_array8 + k * type, type); - } -} - -void local_sort(void *array, int count, int type, int sort_method, bool f(void *, void *)) { - switch (sort_method) { - case FPMPI_MERGE_SORT: { - void *work_array = calloc(count, type); - merge_sort(array, work_array, type, f, 0, count - 1); - free(work_array); - } - default: - break; - } -} - -void *local_scan(void *array, int count, int type, int scan_type, void *f(void *, void *), void *initial_value) { - uint8_t *array8 = (uint8_t *) array; - void *accumulators = calloc(count + 1, scan_type); - uint8_t *accumulators8 = (uint8_t *) accumulators; - memcpy(accumulators, initial_value, scan_type); - - for (int i = 0; i < count; ++i) { - void *accumulator = f(accumulators8 + i * scan_type, array8 + i * type); - memcpy(accumulators8 + (i + 1) * scan_type, accumulator, scan_type); - free(accumulator); - } - return accumulators; -} - -void *local_zip(void *array1, void *array2, int count, int type1, int type2, int tuple_type, void *f(void *, void *)) { - uint8_t *array1_8 = (uint8_t *) array1; - uint8_t *array2_8 = (uint8_t *) array2; - - void *output = calloc(count, tuple_type); - uint8_t *output8 = (uint8_t *) output; - - for (int i = 0; i < count; ++i) { - void *tuple = f(array1_8 + i * type1, array2_8 + i * type2); - memcpy(output8 + i * tuple_type, tuple, tuple_type); - free(tuple); - } - return output; -} - -int *local_iota(int start, int count) { - int *output = calloc(count, sizeof(int)); - for (int i = 0; i < count; ++i) { - output[i] = start++; - } - return output; -} diff --git a/lib/fp.h b/lib/fp.h deleted file mode 100644 index 0d7f6c2..0000000 --- a/lib/fp.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef _FP_H_ -#define _FP_H_ - -#include <stdbool.h> -#include "fpmpi.h" - -void *local_map(void *array, int count, int type, int map_type, void *f(void *)); - -void *local_filter(void *array, int count, int type, bool f(void *), int *output_count); - -void *local_fold_left(void *array, int count, int type, int fold_type, void f(void *, void *), void *initial_value); - -void *local_fold_right(void *array, int count, int type, int fold_type, void f(void *, void *), void *initial_value); - -void *local_find(void *array, int count, int type, bool f(void *)); - -void local_sort(void *array, int count, int type, int sort_method, bool f(void *, void *)); - -void *local_scan(void *array, int count, int type, int scan_type, void *f(void *, void *), void *initial_value); - -void *local_zip(void *array1, void *array2, int count, int type1, int type2, int tuple_type, void *f(void *, void *)); - -int *local_iota(int start, int count); - -#endif //_FP_H_ diff --git a/lib/fpmpi.c b/lib/fpmpi.c deleted file mode 100644 index b92e965..0000000 --- a/lib/fpmpi.c +++ /dev/null @@ -1,546 +0,0 @@ -#include <stdlib.h> - -#ifdef BENCHMARK -#include <stdio.h> -#endif - -#include <string.h> -#include <stdio.h> -#include "fpmpi.h" -#include "fp.h" -#include "dispatch.h" - -#define TAG_FILTER_LOCAL_OUTPUT_COUNT 0 -#define TAG_FIND_HAS_RESULT 1 -#define TAG_FIND_LOCAL_OUTPUT 2 -#define TAG_FOLD_LEFT_ACCUMULATOR 3 -#define TAG_FOLD_RIGHT_ACCUMULATOR 4 -#define TAG_SORT_LOCAL_OUTPUT 5 -#define TAG_SCAN_ACCUMULATORS 6 -#define TAG_ZIP_START_ARRAYS 7 - -#define min(a, b) (((a) <= (b)) ? (a) : (b)) -#define set_count(my_rank, count) my_rank == FPMPI_ROOT_RANK ? count : 0 - -int get_world_size(MPI_Comm comm) { - int world_size; - MPI_Comm_size(comm, &world_size); - return world_size; -} - -int get_my_rank(MPI_Comm comm) { - int my_rank; - MPI_Comm_rank(comm, &my_rank); - return my_rank; -} - -fpmpi_result_t map(void *array, int count, int type, int map_type, void *f(void *), MPI_Comm comm) { -#ifdef BENCHMARK - double start = MPI_Wtime(); -#endif - int my_rank = get_my_rank(comm); - int world_size = get_world_size(comm); - - dispatch_t dispatch = dispatch_init(count, type, map_type, world_size, FPMPI_ROOT_RANK); - - void *local_array = calloc(dispatch.in_counts8[my_rank], sizeof(uint8_t)); - MPI_Scatterv(array, dispatch.in_counts8, dispatch.in_displacements8, MPI_UINT8_T, local_array, - dispatch.in_counts8[my_rank], MPI_UINT8_T, FPMPI_ROOT_RANK, comm); - - - int local_count = dispatch.in_counts8[my_rank] / type; - void *local_output = local_map(local_array, local_count, type, map_type, f); - - void *result = NULL; - if (my_rank == FPMPI_ROOT_RANK) { - result = calloc(count, map_type); - } - MPI_Gatherv(local_output, dispatch.out_counts8[my_rank], MPI_UINT8_T, result, dispatch.out_counts8, - dispatch.out_displacements8, MPI_UINT8_T, FPMPI_ROOT_RANK, comm); - - - free(local_array); - free(local_output); - dispatch_destroy(&dispatch); - -#ifdef BENCHMARK - double finish = MPI_Wtime(); - if (my_rank == FPMPI_ROOT_RANK) { - printf("%d;%f\n", world_size, finish - start); - } -#endif - return (fpmpi_result_t) { - .content = result, - .count = set_count(my_rank, count), - }; -} - -fpmpi_result_t filter(void *array, int count, int type, bool f(void *), MPI_Comm comm) { -#ifdef BENCHMARK - double start = MPI_Wtime(); -#endif - int my_rank = get_my_rank(comm); - int world_size = get_world_size(comm); - - dispatch_t dispatch = dispatch_init(count, type, type, world_size, FPMPI_ROOT_RANK); - - void *local_array = calloc(dispatch.in_counts8[my_rank], sizeof(uint8_t)); - MPI_Scatterv(array, dispatch.in_counts8, dispatch.in_displacements8, MPI_UINT8_T, local_array, - dispatch.in_counts8[my_rank], MPI_UINT8_T, FPMPI_ROOT_RANK, comm); - - int local_array_count = dispatch.in_counts8[my_rank] / type; - int local_output_count = 0; - void *local_output = local_filter(local_array, local_array_count, type, f, &local_output_count); - - MPI_Request request; - MPI_Isend(&local_output_count, 1, MPI_INT, FPMPI_ROOT_RANK, TAG_FILTER_LOCAL_OUTPUT_COUNT, comm, &request); - - void *result = NULL; - int result_count = 0; - if (my_rank == FPMPI_ROOT_RANK) { - for (int i = 0; i < world_size; ++i) { - MPI_Recv(&dispatch.out_counts8[i], 1, MPI_INT, i, TAG_FILTER_LOCAL_OUTPUT_COUNT, comm, MPI_STATUS_IGNORE); - result_count += dispatch.out_counts8[i]; - dispatch.out_counts8[i] *= type; - dispatch.out_displacements8[i] = - i == 0 ? 0 : dispatch.out_displacements8[i - 1] + dispatch.out_counts8[i - 1]; - } - result = calloc(result_count, type); - } - - MPI_Wait(&request, MPI_STATUS_IGNORE); - - MPI_Gatherv(local_output, local_output_count * type, MPI_UINT8_T, result, dispatch.out_counts8, - dispatch.out_displacements8, MPI_UINT8_T, FPMPI_ROOT_RANK, MPI_COMM_WORLD); - - free(local_array); - free(local_output); - dispatch_destroy(&dispatch); - -#ifdef BENCHMARK - double finish = MPI_Wtime(); - if (my_rank == FPMPI_ROOT_RANK) { - printf("%d;%f\n", world_size, finish - start); - } -#endif - return (fpmpi_result_t) { - .content = result, - .count = set_count(my_rank, result_count), - }; -} - -fpmpi_result_t reduce(void *array, int count, int type, void f(void *, void *), MPI_Comm comm) { - return fold_left(array, count, type, type, f, NULL, comm); -} - -fpmpi_result_t -fold_left(void *array, int count, int type, int fold_type, void f(void *, void *), void *initial_value, MPI_Comm comm) { -#ifdef BENCHMARK - double start = MPI_Wtime(); -#endif - int my_rank = get_my_rank(comm); - int world_size = get_world_size(comm); - - void *accumulator = calloc(1, fold_type); - if (my_rank != 0) { - MPI_Recv(accumulator, 1 * fold_type, MPI_UINT8_T, my_rank - 1, TAG_FOLD_LEFT_ACCUMULATOR, comm, - MPI_STATUS_IGNORE); - initial_value = accumulator; - } - - void *local_result = local_fold_left(array, count, type, fold_type, f, initial_value); - - int dest = my_rank == world_size - 1 ? FPMPI_ROOT_RANK : my_rank + 1; - - /* Isend because if dest == my_rank, a deadlock will occur, MPI_Recv is after */ - MPI_Request request = {0}; - MPI_Isend(local_result, 1 * fold_type, MPI_UINT8_T, dest, TAG_FOLD_LEFT_ACCUMULATOR, comm, &request); - - - void *result = NULL; - if (my_rank == FPMPI_ROOT_RANK) { - result = calloc(1, fold_type); - MPI_Recv(result, 1 * fold_type, MPI_UINT8_T, world_size - 1, TAG_FOLD_LEFT_ACCUMULATOR, comm, - MPI_STATUS_IGNORE); - } - - MPI_Wait(&request, MPI_STATUS_IGNORE); - - free(local_result); - free(accumulator); - -#ifdef BENCHMARK - double finish = MPI_Wtime(); - if (my_rank == FPMPI_ROOT_RANK) { - printf("%d;%f\n", world_size, finish - start); - } -#endif - return (fpmpi_result_t) { - .content = result, - .count = set_count(my_rank, 1), - }; -} - -fpmpi_result_t -fold_right(void *array, int count, int type, int fold_type, void f(void *, void *), void *initial_value, - MPI_Comm comm) { -#ifdef BENCHMARK - double start = MPI_Wtime(); -#endif - int my_rank = get_my_rank(comm); - int world_size = get_world_size(comm); - - void *accumulator = calloc(1, fold_type); - if (my_rank != world_size - 1) { - MPI_Recv(accumulator, 1 * fold_type, MPI_UINT8_T, my_rank + 1, TAG_FOLD_RIGHT_ACCUMULATOR, comm, - MPI_STATUS_IGNORE); - initial_value = accumulator; - } - - void *local_result = local_fold_right(array, count, type, fold_type, f, initial_value); - - int dest = my_rank == 0 ? FPMPI_ROOT_RANK : my_rank - 1; - /* Isend because if dest == my_rank, a deadlock will occur, MPI_Recv will be after */ - MPI_Request request = {0}; - MPI_Isend(local_result, 1 * fold_type, MPI_UINT8_T, dest, TAG_FOLD_RIGHT_ACCUMULATOR, comm, &request); - - void *result = NULL; - if (my_rank == FPMPI_ROOT_RANK) { - result = calloc(1, fold_type); - MPI_Recv(result, 1 * fold_type, MPI_UINT8_T, 0, TAG_FOLD_RIGHT_ACCUMULATOR, comm, - MPI_STATUS_IGNORE); - } - - MPI_Wait(&request, MPI_STATUS_IGNORE); - - free(local_result); - free(accumulator); - -#ifdef BENCHMARK - double finish = MPI_Wtime(); - if (my_rank == FPMPI_ROOT_RANK) { - printf("%d;%f\n", world_size, finish - start); - } -#endif - return (fpmpi_result_t) { - .content = result, - .count = set_count(my_rank, 1), - }; -} - -fpmpi_result_t find(void *array, int count, int type, bool f(void *), MPI_Comm comm) { -#ifdef BENCHMARK - double start = MPI_Wtime(); -#endif - int my_rank = get_my_rank(comm); - int world_size = get_world_size(comm); - - dispatch_t dispatch = dispatch_init(count, type, type, world_size, FPMPI_ROOT_RANK); - - void *local_array = calloc(dispatch.in_counts8[my_rank], sizeof(uint8_t)); - MPI_Scatterv(array, dispatch.in_counts8, dispatch.in_displacements8, MPI_UINT8_T, local_array, - dispatch.in_counts8[my_rank], MPI_UINT8_T, FPMPI_ROOT_RANK, comm); - - int local_count = dispatch.in_counts8[my_rank] / type; - void *local_output = local_find(local_array, local_count, type, f); - bool local_has_result = local_output != NULL; - - MPI_Request requests[2] = {0}; - MPI_Isend(&local_has_result, 1, MPI_C_BOOL, FPMPI_ROOT_RANK, TAG_FIND_HAS_RESULT, comm, &requests[0]); - - if (local_has_result) { - MPI_Isend(local_output, 1 * type, MPI_UINT8_T, FPMPI_ROOT_RANK, TAG_FIND_LOCAL_OUTPUT, comm, &requests[1]); - } - - void *result = NULL; - if (my_rank == FPMPI_ROOT_RANK) { - uint8_t *results = calloc(world_size, type); - int result_index = INT32_MAX; - MPI_Status status; - bool has_result = false; - for (int i = 0; i < world_size; ++i) { - MPI_Recv(&has_result, 1, MPI_C_BOOL, MPI_ANY_SOURCE, TAG_FIND_HAS_RESULT, comm, &status); - if (has_result) { - MPI_Recv(results + status.MPI_SOURCE * type, 1 * type, MPI_UINT8_T, status.MPI_SOURCE, - TAG_FIND_LOCAL_OUTPUT, comm, MPI_STATUS_IGNORE); - result_index = min(status.MPI_SOURCE, result_index); - } - } - if (result_index != INT32_MAX) { - result = calloc(1, type); - memcpy(result, results + result_index * type, type); - } - free(results); - } - - MPI_Waitall(local_has_result + 1, requests, MPI_STATUSES_IGNORE); - - free(local_array); - dispatch_destroy(&dispatch); - -#ifdef BENCHMARK - double finish = MPI_Wtime(); - if (my_rank == FPMPI_ROOT_RANK) { - printf("%d;%f\n", world_size, finish - start); - } -#endif - return (fpmpi_result_t) { - .content = result, - .count = set_count(my_rank, result != NULL), - }; -} - -// https://www.geeksforgeeks.org/merge-k-sorted-arrays/ -static void -marge_arrays(void *array1, void *array2, int count1, int count2, int type, bool f(void *, void *), void *result_array) { - int i = 0, j = 0, k = 0; - uint8_t *array1_8 = (uint8_t *) array1; - uint8_t *array2_8 = (uint8_t *) array2; - uint8_t *result_array8 = (uint8_t *) result_array; - - // Traverse both array - while (i < count1 && j < count2) { - // Check if current element of first - // array is smaller than current element - // of second array. If yes, store first - // array element and increment first array - // index. Otherwise do same with second array - if (f(array1_8 + i * type, array2_8 + j * type)) { - memcpy(result_array8 + k * type, array1_8 + i * type, type); - ++k, ++i; - } else { - memcpy(result_array8 + k * type, array2_8 + j * type, type); - ++k, ++j; - } - } - - // Store remaining elements of first array - while (i < count1) { - memcpy(result_array8 + k * type, array1_8 + i * type, type); - ++k, ++i; - } - - // Store remaining elements of second array - while (j < count2) { - memcpy(result_array8 + k * type, array2_8 + j * type, type); - ++k, ++j; - } -} - -fpmpi_result_t sort(void *array, int count, int type, int sort_method, bool f(void *, void *), MPI_Comm comm) { -#ifdef BENCHMARK - double start = MPI_Wtime(); -#endif - int my_rank = get_my_rank(comm); - int world_size = get_world_size(comm); - - dispatch_t dispatch = dispatch_init(count, type, type, world_size, FPMPI_ROOT_RANK); - - void *local_array = calloc(dispatch.in_counts8[my_rank], sizeof(uint8_t)); - MPI_Scatterv(array, dispatch.in_counts8, dispatch.in_displacements8, MPI_UINT8_T, local_array, - dispatch.in_counts8[my_rank], MPI_UINT8_T, FPMPI_ROOT_RANK, comm); - - int local_count = dispatch.in_counts8[my_rank] / type; - local_sort(local_array, local_count, type, sort_method, f); - - void *result = NULL; - if (my_rank == FPMPI_ROOT_RANK) { - result = calloc(local_count, type); - memcpy(result, local_array, dispatch.in_counts8[my_rank]); - - if (world_size > 1) { - int current_count = local_count; - int recv_count8 = dispatch.in_counts8[(my_rank + 1) % world_size]; - int recv_count = dispatch.in_counts8[(my_rank + 1) % world_size] / type; - void *recv_buffer = calloc(recv_count8, sizeof(uint8_t)); - - for (int i = 0; i < world_size - 1; ++i) { - MPI_Recv(recv_buffer, recv_count8, MPI_UINT8_T, MPI_ANY_SOURCE, TAG_SORT_LOCAL_OUTPUT, comm, - MPI_STATUS_IGNORE); - void *tmp_result = calloc(current_count + recv_count, type); - marge_arrays(result, recv_buffer, current_count, recv_count, type, f, tmp_result); - free(result); - result = tmp_result; - current_count += recv_count; - } - free(recv_buffer); - } - } else { - MPI_Send(local_array, dispatch.in_counts8[my_rank], MPI_UINT8_T, FPMPI_ROOT_RANK, TAG_SORT_LOCAL_OUTPUT, comm); - } - - free(local_array); - dispatch_destroy(&dispatch); - -#ifdef BENCHMARK - double finish = MPI_Wtime(); - if (my_rank == FPMPI_ROOT_RANK) { - printf("%d;%f\n", world_size, finish - start); - } -#endif - return (fpmpi_result_t) { - .content = result, - .count = count, - }; -} - - -fpmpi_result_t -scan(void *array, int count, int type, int scan_type, void *f(void *, void *), void *initial_value, MPI_Comm comm) { -#ifdef BENCHMARK - double start = MPI_Wtime(); -#endif - int my_rank = get_my_rank(comm); - int world_size = get_world_size(comm); - - void *current_accumulators = NULL; - int current_accumulators_count8 = 0; - - if (my_rank != 0) { - MPI_Status status = {0}; - MPI_Probe(my_rank - 1, TAG_SCAN_ACCUMULATORS, MPI_COMM_WORLD, &status); - MPI_Get_count(&status, MPI_UINT8_T, ¤t_accumulators_count8); - current_accumulators = calloc(current_accumulators_count8, sizeof(uint8_t)); - - MPI_Recv(current_accumulators, current_accumulators_count8, MPI_UINT8_T, my_rank - 1, TAG_SCAN_ACCUMULATORS, - comm, MPI_STATUS_IGNORE); - /* Initial value is the last accumulator value */ - initial_value = ((uint8_t *) current_accumulators) + current_accumulators_count8 - scan_type; - } else { - current_accumulators = calloc(count + 1, scan_type); - current_accumulators_count8 = (count + 1) * scan_type; - } - - void *local_accumulators = local_scan(array, count, type, scan_type, f, initial_value); - int local_accumulators_count8 = (count + 1) * scan_type; - - if (my_rank != 0) { - /* First accumulators is ignored because it will be duplicated */ - uint8_t *local_accumulators8 = (uint8_t *) local_accumulators + scan_type; - local_accumulators_count8 = (count) * scan_type; - current_accumulators = realloc(current_accumulators, current_accumulators_count8 + local_accumulators_count8); - uint8_t *current_accumulators8 = (uint8_t *) current_accumulators + current_accumulators_count8; - current_accumulators_count8 += local_accumulators_count8; - memcpy(current_accumulators8, local_accumulators8, local_accumulators_count8); - } else { - memcpy(current_accumulators, local_accumulators, local_accumulators_count8); - } - - int dest = my_rank == world_size - 1 ? FPMPI_ROOT_RANK : my_rank + 1; - - /* Isend because if dest == my_rank, a deadlock will occur, MPI_Recv is after */ - MPI_Request request = {0}; - MPI_Isend(current_accumulators, current_accumulators_count8, MPI_UINT8_T, dest, TAG_SCAN_ACCUMULATORS, comm, - &request); - - void *result = NULL; - int recv_count = 0; - if (my_rank == FPMPI_ROOT_RANK) { - MPI_Status status = {0}; - MPI_Probe(world_size - 1, TAG_SCAN_ACCUMULATORS, MPI_COMM_WORLD, &status); - MPI_Get_count(&status, MPI_UINT8_T, &recv_count); - result = calloc(recv_count, sizeof(uint8_t)); - MPI_Recv(result, recv_count, MPI_UINT8_T, world_size - 1, TAG_SCAN_ACCUMULATORS, comm, - MPI_STATUS_IGNORE); - } - - MPI_Wait(&request, MPI_STATUS_IGNORE); - - free(local_accumulators); - free(current_accumulators); - -#ifdef BENCHMARK - double finish = MPI_Wtime(); - if (my_rank == FPMPI_ROOT_RANK) { - printf("%d;%f\n", world_size, finish - start); - } -#endif - return (fpmpi_result_t) { - .content = result, - .count = set_count(my_rank, recv_count / scan_type), - }; -} - -fpmpi_result_t iota(int n, MPI_Comm comm) { -#ifdef BENCHMARK - double start = MPI_Wtime(); -#endif - int my_rank = get_my_rank(comm); - int world_size = get_world_size(comm); - dispatch_t dispatch = dispatch_init(n, FPMPI_INT32, FPMPI_INT32, world_size, my_rank); - - int local_start = dispatch.in_displacements8[my_rank] / FPMPI_INT32; - int local_n = dispatch.in_counts8[my_rank] / FPMPI_INT32; - void *local_output = local_iota(local_start, local_n); - - void *result = NULL; - if (my_rank == FPMPI_ROOT_RANK) { - result = calloc(n, FPMPI_INT32); - } - MPI_Gatherv(local_output, dispatch.out_counts8[my_rank], MPI_UINT8_T, result, dispatch.out_counts8, - dispatch.out_displacements8, MPI_UINT8_T, FPMPI_ROOT_RANK, comm); - - free(local_output); - dispatch_destroy(&dispatch); -#ifdef BENCHMARK - double finish = MPI_Wtime(); - if (my_rank == FPMPI_ROOT_RANK) { - printf("%d;%f\n", world_size, finish - start); - } -#endif - return (fpmpi_result_t) { - .content = result, - .count = set_count(my_rank, n) - }; -} - -fpmpi_result_t zip(void *array1, void *array2, int count, int type1, int type2, int tuple_type, void *f(void *, void *), - MPI_Comm comm) { -#ifdef BENCHMARK - double start = MPI_Wtime(); -#endif - int my_rank = get_my_rank(comm); - int world_size = get_world_size(comm); - - dispatch_t dispatch1 = dispatch_init(count, type1, type1, world_size, FPMPI_ROOT_RANK); - dispatch_t dispatch2 = dispatch_init(count, type2, type2, world_size, FPMPI_ROOT_RANK); - dispatch_t dispatch3 = dispatch_init(count, tuple_type, tuple_type, world_size, FPMPI_ROOT_RANK); - - void *local_array1 = calloc(dispatch1.in_counts8[my_rank], sizeof(uint8_t)); - MPI_Scatterv(array1, dispatch1.in_counts8, dispatch1.in_displacements8, MPI_UINT8_T, local_array1, - dispatch1.in_counts8[my_rank], MPI_UINT8_T, FPMPI_ROOT_RANK, comm); - - void *local_array2 = calloc(dispatch2.in_counts8[my_rank], sizeof(uint8_t)); - MPI_Scatterv(array2, dispatch2.in_counts8, dispatch2.in_displacements8, MPI_UINT8_T, local_array2, - dispatch2.in_counts8[my_rank], MPI_UINT8_T, FPMPI_ROOT_RANK, comm); - - int local_count = dispatch1.in_counts8[my_rank] / type1; - void *local_output = local_zip(local_array1, local_array2, local_count, type1, type2, tuple_type, f); - - void *result = NULL; - if (my_rank == FPMPI_ROOT_RANK) { - result = calloc(count, tuple_type); - } - MPI_Gatherv(local_output, dispatch3.out_counts8[my_rank], MPI_UINT8_T, result, dispatch3.out_counts8, - dispatch3.out_displacements8, MPI_UINT8_T, FPMPI_ROOT_RANK, comm); - - - free(local_array1); - free(local_array2); - free(local_output); - - dispatch_destroy(&dispatch1); - dispatch_destroy(&dispatch2); - dispatch_destroy(&dispatch3); - -#ifdef BENCHMARK - double finish = MPI_Wtime(); - if (my_rank == FPMPI_ROOT_RANK) { - printf("%d;%f\n", world_size, finish - start); - } -#endif - return (fpmpi_result_t) { - .content = result, - .count = set_count(my_rank, count) - }; -} diff --git a/lib/fpmpi.h b/lib/fpmpi.h deleted file mode 100644 index 02ae54b..0000000 --- a/lib/fpmpi.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef FPMPI_LIBRARY_H -#define FPMPI_LIBRARY_H - -#include <mpi.h> -#include <stdint.h> -#include <stdbool.h> - -#define FPMPI_INT8 sizeof(int8_t) -#define FPMPI_UINT8 sizeof(uint8_t) - -#define FPMPI_INT16 sizeof(int16_t) -#define FPMPI_UINT16 sizeof(uint16_t) - -#define FPMPI_INT32 sizeof(int32_t) -#define FPMPI_UINT32 sizeof(uint32_t) - -#define FPMPI_INT64 sizeof(int64_t) -#define FPMPI_UINT64 sizeof(int64_t) - -#define FPMPI_DOUBLE sizeof(double) - -#define FPMPI_ROOT_RANK 0 - -#define FPMPI_MERGE_SORT 1 - -typedef struct fpmpi_result { - void *content; - int count; -} fpmpi_result_t; - -fpmpi_result_t map(void *array, int count, int type, int map_type, void *f(void *), MPI_Comm comm); - -fpmpi_result_t filter(void *array, int count, int type, bool f(void *), MPI_Comm comm); - -fpmpi_result_t reduce(void *array, int count, int type, void f(void *, void *), MPI_Comm comm); - -fpmpi_result_t -fold_left(void *array, int count, int type, int fold_type, void f(void *, void *), void *initial_value, MPI_Comm comm); - -fpmpi_result_t -fold_right(void *array, int count, int type, int fold_type, void f(void *, void *), void *initial_value, MPI_Comm comm); - -fpmpi_result_t -scan(void *array, int count, int type, int scan_type, void *f(void *, void *), void *initial_value, MPI_Comm comm); - -fpmpi_result_t sort(void *array, int count, int type, int sort_method, bool f(void *, void *), MPI_Comm comm); - -fpmpi_result_t find(void *array, int count, int type, bool f(void *), MPI_Comm comm); - -fpmpi_result_t iota(int n, MPI_Comm comm); - -fpmpi_result_t zip(void *array1, void *array2, int count, int type1, int type2, int tuple_type, void *f(void *, void *), - MPI_Comm comm); - -//fpmpi_result_t unzip(tuple2_t *array, int count, int type1, int type2, MPI_Comm comm); - -//fpmpi_result_t flat_map(void *array, int dimensions[2], int type, MPI_Comm comm); -// map2, map (zip xs ys) -// rotate - -#endif //FPMPI_LIBRARY_H diff --git a/lib/tests/tests.c b/lib/tests/tests.c deleted file mode 100644 index 6edb8af..0000000 --- a/lib/tests/tests.c +++ /dev/null @@ -1,453 +0,0 @@ -#include <mpi.h> -#include <stdlib.h> -#include <stdio.h> -#include <assert.h> -#include "../fpmpi.h" -#include "../fp.h" - -#define TEST_MAP 1 -#define TEST_FILTER 2 -#define TEST_REDUCE 3 -#define TEST_FIND 4 -#define TEST_FOLD_LEFT 5 -#define TEST_FOLD_RIGHT 6 -#define TEST_SORT 7 -#define TEST_SCAN 8 -#define TEST_IOTA 9 -#define TEST_ZIP 10 - -#define N 12 - -void *map_mul_int(void *element) { - int *result = calloc(1, sizeof(int)); - *result = (*(int *) (element)) * 2; - return (void *) result; -} - -void *map_mul_int_double(void *element) { - double *result = calloc(1, sizeof(double)); - *result = (*(int *) (element)) * 2.0; - return (void *) result; -} - -bool filter_only_even(void *element) { - int element32 = *(int *) element; - return element32 % 2 == 0; -} - -bool find_divide_by_three(void *element) { - int element32 = *(int *) element; - return element32 % 3 == 0; -} - -bool find_fifty(void *element) { - int element32 = *(int *) element; - return element32 == 50; -} - -void reduce_sum(void *accumulator, void *current_value) { - int *accumulator32 = (int *) accumulator; - int current_value32 = *(int *) current_value; - *accumulator32 = (*accumulator32 + current_value32); -} - -void fold_left_sub(void *accumulator, void *current_value) { - int *accumulator32 = (int *) accumulator; - int current_value32 = *(int *) current_value; - *accumulator32 = (*accumulator32 - current_value32); -} - -void fold_left_sub_int8_int(void *accumulator, void *current_value) { - int *accumulator32 = (int *) accumulator; - int current_value32 = (int) (*(char *) current_value); - *accumulator32 = (*accumulator32 - current_value32); -} - -void fold_right_sub(void *current_value, void *accumulator) { - int *accumulator32 = (int *) accumulator; - int current_value32 = *(int *) current_value; - *accumulator32 = (current_value32 - *accumulator32); -} - -void fold_right_sub_int8_int(void *current_value, void *accumulator) { - int *accumulator32 = (int *) accumulator; - int current_value32 = (int) (*(char *) current_value); - *accumulator32 = (current_value32 - *accumulator32); -} - -void *scan_add(void *accumulator, void *current_value) { - int *accumulator32 = (int *) accumulator; - int current_value32 = *(int *) current_value; - int *new_accumulator = calloc(1, sizeof(int)); - *new_accumulator = (*accumulator32 + current_value32); - return new_accumulator; -} - -void tests_map(int my_rank) { - int array[N] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; - { - printf("Test map 1...\n"); - fpmpi_result_t result = map(array, N, FPMPI_INT32, FPMPI_INT32, map_mul_int, MPI_COMM_WORLD); - if (my_rank == FPMPI_ROOT_RANK) { - int expected_content[N] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24}; - int *content = result.content; - - assert(result.content != NULL); - assert(result.count == N); - for (int i = 0; i < result.count; ++i) { - assert(content[i] == expected_content[i]); - } - free(result.content); - } else { - assert(result.content == NULL); - assert(result.count == 0); - } - } - - { - printf("Test map 2...\n"); - fpmpi_result_t result = map(array, N, FPMPI_INT32, FPMPI_DOUBLE, map_mul_int_double, MPI_COMM_WORLD); - if (my_rank == FPMPI_ROOT_RANK) { - double expected_content[N] = {2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0}; - double *content = result.content; - - assert(result.content != NULL); - assert(result.count == N); - for (int i = 0; i < result.count; ++i) { - assert(content[i] == expected_content[i]); - } - - free(result.content); - } else { - assert(result.content == NULL); - assert(result.count == 0); - } - } -} - -void tests_filter(int my_rank) { - int array[N] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; - { - printf("Test filter 1...\n"); - fpmpi_result_t result = filter(array, N, FPMPI_INT32, filter_only_even, MPI_COMM_WORLD); - if (my_rank == FPMPI_ROOT_RANK) { - int expected_content[N / 2] = {2, 4, 6, 8, 10, 12}; - int *content = result.content; - - assert(result.count == 6); - for (int i = 0; i < result.count; ++i) { - assert(content[i] == expected_content[i]); - } - free(result.content); - } else { - assert(result.content == NULL); - assert(result.count == 0); - } - } -} - -void tests_reduce(int my_rank) { - int array[N] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; - { - printf("Test reduce 1...\n"); - fpmpi_result_t result = reduce(array, N, FPMPI_INT32, reduce_sum, MPI_COMM_WORLD); - if (my_rank == FPMPI_ROOT_RANK) { - int expected_content = 468; - int *content = result.content; - - assert(result.count == 1); - assert(*content == expected_content); - free(result.content); - } else { - assert(result.content == NULL); - assert(result.count == 0); - } - } -} - -void tests_find(int my_rank) { - int array[N] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; - { - printf("Test find 1...\n"); - fpmpi_result_t result = find(array, N, FPMPI_INT32, find_divide_by_three, MPI_COMM_WORLD); - if (my_rank == FPMPI_ROOT_RANK) { - int expected_content = 3; - int *content = result.content; - - assert(result.count == 1); - assert(*content == expected_content); - free(result.content); - } else { - assert(result.content == NULL); - assert(result.count == 0); - } - } - - { - printf("Test find 2...\n"); - fpmpi_result_t result = find(array, N, FPMPI_INT32, find_fifty, MPI_COMM_WORLD); - if (my_rank == FPMPI_ROOT_RANK) { - int *content = result.content; - - assert(result.count == 0); - assert(content == NULL); - free(result.content); - } else { - assert(result.content == NULL); - assert(result.count == 0); - } - } -} - -void tests_fold_left(int my_rank) { - { - printf("Test fold_left 1...\n"); - int array[N] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; - int initial_value = 0; - fpmpi_result_t result = fold_left(array, N, FPMPI_INT32, FPMPI_INT32, fold_left_sub, &initial_value, - MPI_COMM_WORLD); - if (my_rank == FPMPI_ROOT_RANK) { - int expected_content = -468; - int *content = result.content; - - assert(result.count == 1); - assert(*content == expected_content); - free(result.content); - } else { - assert(result.content == NULL); - assert(result.count == 0); - } - } - { - printf("Test fold_left 2...\n"); - char array[N] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; - double initial_value = 0; - fpmpi_result_t result = fold_left(array, N, FPMPI_INT8, FPMPI_INT32, fold_left_sub_int8_int, &initial_value, - MPI_COMM_WORLD); - if (my_rank == FPMPI_ROOT_RANK) { - double expected_content = -468.0; - int *content = result.content; - - assert(result.count == 1); - assert(*content == expected_content); - free(result.content); - } else { - assert(result.content == NULL); - assert(result.count == 0); - } - } -} - -void tests_fold_right(int my_rank) { - { - printf("Test fold_left 1...\n"); - int array[N] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; - int initial_value = 0; - fpmpi_result_t result = fold_right(array, N, FPMPI_INT32, FPMPI_INT32, fold_right_sub, &initial_value, - MPI_COMM_WORLD); - if (my_rank == FPMPI_ROOT_RANK) { - int expected_content = -36; - int *content = result.content; - - assert(result.count == 1); - assert(*content == expected_content); - free(result.content); - } else { - assert(result.content == NULL); - assert(result.count == 0); - } - } - - { - printf("Test fold_left 2...\n"); - char array[N] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; - int initial_value = 0; - fpmpi_result_t result = fold_right(array, N, FPMPI_INT8, FPMPI_INT32, fold_right_sub_int8_int, &initial_value, - MPI_COMM_WORLD); - if (my_rank == FPMPI_ROOT_RANK) { - int expected_content = -36; - int *content = result.content; - - assert(result.count == 1); - assert(*content == expected_content); - free(result.content); - } else { - assert(result.content == NULL); - assert(result.count == 0); - } - } -} - -bool sort_asc(void *left, void *right) { - int left32 = *(int *) left; - int right32 = *(int *) right; - return left32 < right32; -} - -bool sort_dsc(void *left, void *right) { - int left32 = *(int *) left; - int right32 = *(int *) right; - return left32 > right32; -} - -void tests_sort(int my_rank) { - int array[N] = {18, 15, 83, 56, 41, 100, 71, 7, 69, 23, 36, 77}; - { - printf("Test sort 1...\n"); - fpmpi_result_t result = sort(array, N, FPMPI_INT32, FPMPI_MERGE_SORT, sort_asc, MPI_COMM_WORLD); - if (my_rank == FPMPI_ROOT_RANK) { - int expected_content[N] = {7, 15, 18, 23, 36, 41, 56, 69, 71, 77, 83, 100}; - int *content = result.content; - - assert(result.count == N); - for (int i = 0; i < result.count; ++i) { - assert(content[i] == expected_content[i]); - } - free(result.content); - } else { - assert(result.content == NULL); - assert(result.count == N); - } - } - { - printf("Test sort 2...\n"); - fpmpi_result_t result = sort(array, N, FPMPI_INT32, FPMPI_MERGE_SORT, sort_dsc, MPI_COMM_WORLD); - if (my_rank == FPMPI_ROOT_RANK) { - int expected_content[N] = {100, 83, 77, 71, 69, 56, 41, 36, 23, 18, 15, 7}; - int *content = result.content; - - assert(result.count == N); - for (int i = 0; i < result.count; ++i) { - assert(content[i] == expected_content[i]); - } - free(result.content); - } else { - assert(result.content == NULL); - assert(result.count == N); - } - } -} - -void tests_scan(int my_rank) { - { - printf("Test scan 1...\n"); - int array[N] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; - int initial_value = 0; - fpmpi_result_t result = scan(array, N, FPMPI_INT32, FPMPI_INT32, scan_add, &initial_value, MPI_COMM_WORLD); - if (my_rank == FPMPI_ROOT_RANK) { - int expected_content = 468; - int *content = result.content; - - assert(result.count == 73); - assert(content[result.count - 1] == expected_content); - free(result.content); - } else { - assert(result.content == NULL); - assert(result.count == 0); - } - } -} - -void tests_iota(int my_rank) { - { - printf("Test iota 1...\n"); - fpmpi_result_t result = iota(N, MPI_COMM_WORLD); - if (my_rank == FPMPI_ROOT_RANK) { - int expected_content[N] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; - int *content = result.content; - - assert(result.count == N); - for (int i = 0; i < result.count; ++i) { - assert(content[i] == expected_content[i]); - } - free(result.content); - } else { - assert(result.content == NULL); - assert(result.count == 0); - } - } -} - -//void tests_zip(int my_rank) { -// int array1[N] = {18, 15, 83, 56, 41, 100, 71, 7, 69, 23, 36, 77}; -// char array2[N] = {5, 91, 70, 96, 9, 98, 37, 1, 13, 3, 42, 7}; -// { -// printf("Test zip 1...\n"); -// fpmpi_result_t result = zip(array1, array2, N, FPMPI_INT32, FPMPI_INT8, MPI_COMM_WORLD); -// if (my_rank == FPMPI_ROOT_RANK) { -// tuple2_t expected_content[N] = { -// {.first = &array1[0], .second = &array2[0]}, -// {.first = &array1[1], .second = &array2[1]}, -// {.first = &array1[2], .second = &array2[2]}, -// {.first = &array1[3], .second = &array2[3]}, -// {.first = &array1[4], .second = &array2[4]}, -// {.first = &array1[5], .second = &array2[5]}, -// {.first = &array1[6], .second = &array2[6]}, -// {.first = &array1[7], .second = &array2[7]}, -// {.first = &array1[8], .second = &array2[8]}, -// {.first = &array1[9], .second = &array2[9]}, -// {.first = &array1[10], .second = &array2[10]}, -// {.first = &array1[11], .second = &array2[11]}, -// }; -// tuple2_t *content = result.content; -// assert(result.count == N); -// for (int i = 0; i < result.count; ++i) { -// assert(content[i].first == expected_content[i].first); -// assert(content[i].second == expected_content[i].second); -// } -// free(result.content); -// } else { -// assert(result.content == NULL); -// assert(result.count == 0); -// } -// } -//} - -int main(int argc, char *argv[]) { - MPI_Init(&argc, &argv); - - int my_rank; - MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); - - for (int i = TEST_MAP; i <= TEST_ZIP; ++i) { - switch (i) { - case TEST_MAP: - tests_map(my_rank); - break; - case TEST_FILTER: - tests_filter(my_rank); - break; - case TEST_REDUCE: - tests_reduce(my_rank); - break; - case TEST_FIND: - tests_find(my_rank); - break; - case TEST_FOLD_LEFT: - tests_fold_left(my_rank); - break; - case TEST_FOLD_RIGHT: - tests_fold_right(my_rank); - break; - case TEST_SORT: - tests_sort(my_rank); - break; - case TEST_SCAN: - tests_scan(my_rank); - break; - case TEST_IOTA: - tests_iota(my_rank); - break; - case TEST_ZIP: -// tests_zip(my_rank); - break; - default: - MPI_Finalize(); - exit(0); - } - MPI_Barrier(MPI_COMM_WORLD); - } - - MPI_Finalize(); - return 0; -} -- GitLab