diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000000000000000000000000000000000000..e955634b7224617f43b80693fd2eeb8e533d1660 --- /dev/null +++ b/.clang-format @@ -0,0 +1,183 @@ +--- +Language: Cpp +# BasedOnStyle: Google +AccessModifierOffset: -4 +AlignAfterOpenBracket: AlwaysBreak +AlignConsecutiveAssignments: None +AlignConsecutiveBitFields: None +AlignConsecutiveDeclarations: None +AlignConsecutiveMacros: true +AlignEscapedNewlines: Left +AlignOperands: Align +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllConstructorInitializersOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortEnumsOnASingleLine: true +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: Empty +AllowShortLambdasOnASingleLine: Empty +AllowShortIfStatementsOnASingleLine: Never +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: Yes +BinPackArguments: true +BinPackParameters: true +BitFieldColonSpacing: Both +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: MultiLine + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: false + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: NonAssignment +BreakBeforeBraces: Custom +BreakBeforeInheritanceComma: false +BreakInheritanceList: AfterColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: AfterColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 100 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DeriveLineEnding: false +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeBlocks: Regroup +IncludeCategories: + - Regex: '^<ext/.*\.h>' + Priority: 2 + SortPriority: 0 + - Regex: '^<.*\.h>' + Priority: 1 + SortPriority: 0 + - Regex: '^<.*' + Priority: 2 + SortPriority: 0 + - Regex: '.*' + Priority: 3 + SortPriority: 0 +IncludeIsMainRegex: '([-_](test|unittest))?$' +IncludeIsMainSourceRegex: '' +IndentCaseLabels: false +IndentCaseBlocks: false +IndentGotoLabels: false +IndentPPDirectives: None +IndentExternBlock: AfterExternBlock +IndentWidth: 4 +IndentWrappedFunctionNames: true +InsertTrailingCommas: None +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Never +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Right +RawStringFormats: + - Language: Cpp + Delimiters: + - cc + - CC + - cpp + - Cpp + - CPP + - 'c++' + - 'C++' + CanonicalDelimiter: '' + BasedOnStyle: google + - Language: TextProto + Delimiters: + - pb + - PB + - proto + - PROTO + EnclosingFunctions: + - EqualsProto + - EquivToProto + - PARSE_PARTIAL_TEXT_PROTO + - PARSE_TEST_PROTO + - PARSE_TEXT_PROTO + - ParseTextOrDie + - ParseTextProtoOrDie + - ParseTestProto + - ParsePartialTestProto + CanonicalDelimiter: '' + BasedOnStyle: google +ReflowComments: true +SortIncludes: false +SortUsingDeclarations: false +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: true +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: true +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: false +SpacesInConditionalStatement: false +SpacesInContainerLiterals: false +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpaceBeforeSquareBrackets: false +Standard: Auto +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 4 +UseCRLF: false +UseTab: Never +WhitespaceSensitiveMacros: + - STRINGIZE + - PP_STRINGIZE + - BOOST_PP_STRINGIZE +... + diff --git a/meson.build b/meson.build index ffa47711acf593deaf71140f7190cbe7e256e303..be1ed52d0df4322d6dc487e7d86dc3e3925625e9 100644 --- a/meson.build +++ b/meson.build @@ -14,6 +14,8 @@ build_args = [ liblbm = '' if precision == 'FLT' liblbm = 'liblbm_f32.fut' +elif precision == SHORT_FLT + liblbm = 'liblbm_f16.fut' else liblbm = 'liblbm_f64.fut' endif diff --git a/meson_options.txt b/meson_options.txt index 69d03b9bb7051c605640f3a404088a1b4d5c0983..971c4f2b6509181ccac49cfad360cb552dcfe010 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -1,6 +1,6 @@ option('futhark-backend', type: 'combo', choices: ['c', 'multicore', 'opencl', 'cuda'], value: 'c', description: 'Select the backend that Futhark code compiles to') option('lattice', type: 'combo', choices: ['D3Q27', 'D3Q19'], value: 'D3Q19', description: 'Select the lattice that the model uses') -option('precision', type: 'combo', choices: ['DBL', 'FLT'], value: 'FLT', description: 'Select the numerical precision offloating point numbers that the model uses') +option('precision', type: 'combo', choices: ['DBL', 'FLT', 'SHORT'], value: 'FLT', description: 'Select the numerical precision of floating point numbers that the model uses') option('cavity3d', type: 'feature', value: 'enabled', description: 'Enable the feature to compile the cavity3d case') option('kida', type: 'feature', value: 'enabled', description: 'Enable the feature to compile the Kida case') diff --git a/src/c_interface/lbm_utils.c b/src/c_interface/lbm_utils.c index 6c6f7030d8d02fec1c685e1db03cb10ddbb5d397..9ccaa26c4948c85ec27933079be69ae033ccc215 100644 --- a/src/c_interface/lbm_utils.c +++ b/src/c_interface/lbm_utils.c @@ -11,9 +11,9 @@ tensor_field create_distances(fint nx, fint ny, fint nz, fint q, triangle *t, fi } tensor_field distances = tensor_field_allocate_and_init(nx, ny, nz, q, ini); // TODO should go from 0 to nx/ny/nz but for debugging purposes we keep them - for (fint ix = 1; ix < nx-1; ++ix) { - for (fint iy = 1; iy < ny-1; ++iy) { - for (fint iz = 1; iz < nz-1; ++iz) { + for (fint ix = 1; ix < nx - 1; ++ix) { + for (fint iy = 1; iy < ny - 1; ++iy) { + for (fint iz = 1; iz < nz - 1; ++iz) { for (fint iq = 0; iq < q; ++iq) { T distance = (T)1; point loc_point = point_create(ix, iy, iz); @@ -26,7 +26,8 @@ tensor_field create_distances(fint nx, fint ny, fint nz, fint q, triangle *t, fi if (point_dot(next_vec, triangle_compute_normal(t[it])) > 0.0) { T dist_tmp = (T)0; - bool intersection_tmp = triangle_compute_intersection_with_segment(t[it], loc_point, next_vec, &dist_tmp); + bool intersection_tmp = triangle_compute_intersection_with_segment( + t[it], loc_point, next_vec, &dist_tmp); if (dist_tmp < distance && dist_tmp >= 0.0 && intersection_tmp) { distance = dist_tmp; intersection = intersection_tmp; @@ -52,13 +53,14 @@ static T compute_feq(T w, const int c[], T rho, T u[]) { T c_u = c[0] * u[0]; T u_sqr = u[0] * u[0]; for (int id = 1; id < D3; ++id) { - c_u += c[id] * u[id]; + c_u += c[id] * u[id]; u_sqr += u[id] * u[id]; } return w * rho * (1.0 + 3.0 * c_u + 4.5 * c_u * c_u - 1.5 * u_sqr); } -void lbm_allocate_and_ini_at_equilibrium_inplace(T rho, T u[], int nx, int ny, int nz, int q, tensor_field field) { +void lbm_allocate_and_ini_at_equilibrium_inplace( + T rho, T u[], int nx, int ny, int nz, int q, tensor_field field) { assert(nx == field.nx && "Inconsistent nx size."); assert(ny == field.ny && "Inconsistent ny size."); assert(nz == field.nz && "Inconsistent nz size."); @@ -83,27 +85,45 @@ void lbm_allocate_and_ini_at_equilibrium_kida_inplace(T rho0, T u_max, tensor_fi for (int iy = 0; iy < ny; ++iy) { for (int iz = 0; iz < nz; ++iz) { T u[3]; - - T x = (T)2*pi*(T)ix / (T)(nx); - T y = (T)2*pi*(T)iy / (T)(ny); - T z = (T)2*pi*(T)iz / (T)(nz); - - u[0] = sin(x)*(cos((T)3*y) * cos(z) - cos(y)*cos((T)3*z)); - u[1] = sin(y)*(cos((T)3*z) * cos(x) - cos(z)*cos((T)3*x)); - u[2] = sin(z)*(cos((T)3*x) * cos(y) - cos(x)*cos((T)3*y)); + + T x = (T)2 * pi * (T)ix / (T)(nx); + T y = (T)2 * pi * (T)iy / (T)(ny); + T z = (T)2 * pi * (T)iz / (T)(nz); + + u[0] = sin(x) * (cos((T)3 * y) * cos(z) - cos(y) * cos((T)3 * z)); + u[1] = sin(y) * (cos((T)3 * z) * cos(x) - cos(z) * cos((T)3 * x)); + u[2] = sin(z) * (cos((T)3 * x) * cos(y) - cos(x) * cos((T)3 * y)); for (fint id = 0; id < 3; ++id) { u[id] *= u_max; } - T rho = -(1.0/6.0)*cos(4*z)*cos(2*x)*cos(2*y)+(1.0/36.0)*cos(4*z)*cos(4*x)*cos(2*y)+(1.0/36.0)*cos(4*z)*cos(2*x)*cos(4*y); - rho += +cos(2*z)*cos(2*x)*cos(2*y)-(1.0/6.0)*cos(2*z)*cos(4*x)*cos(2*y)-(1.0/6.0)*cos(2*z)*cos(2*x)*cos(4*y); - rho += +(1.0/36.0)*cos(4*x)*cos(4*y)*cos(2*z)-(1.0/80.0)*cos(6*y)*cos(2*z)-(1.0/10.0)*cos(4*z)*cos(2*y)+(1.0/8.0)*cos(4*z)*cos(4*y); - rho += -(1.0/10.0)*cos(4*z)*cos(2*x)+(1.0/8.0)*cos(4*z)*cos(4*x)+(1.0/8.0)*cos(2*z)*cos(2*y)-(1.0/10.0)*cos(2*z)*cos(4*y); - rho += +(1.0/8.0)*cos(2*z)*cos(2*x)-(1.0/10.0)*cos(2*z)*cos(4*x)-(1.0/80.0)*cos(6*x)*cos(2*z)-(1.0/4.0)*cos(2*x); - rho += -(1.0/80.0)*cos(6*z)*cos(2*x)-(1.0/80.0)*cos(6*z)*cos(2*y)-(1.0/80.0)*cos(6*x)*cos(2*y)-(1.0/10.0)*cos(2*x)*cos(4*y); - rho += +(1.0/8.0)*cos(2*x)*cos(2*y)+(1.0/8.0)*cos(4*x)*cos(4*y)-(1.0/80.0)*cos(2*x)*cos(6*y)-(1.0/10.0)*cos(4*x)*cos(2*y); - rho += -(1.0/4.0)*cos(2*z)-(1.0/4.0)*cos(2*y); + T rho = -(1.0 / 6.0) * cos(4 * z) * cos(2 * x) * cos(2 * y) + + (1.0 / 36.0) * cos(4 * z) * cos(4 * x) * cos(2 * y) + + (1.0 / 36.0) * cos(4 * z) * cos(2 * x) * cos(4 * y); + rho += +cos(2 * z) * cos(2 * x) * cos(2 * y) + - (1.0 / 6.0) * cos(2 * z) * cos(4 * x) * cos(2 * y) + - (1.0 / 6.0) * cos(2 * z) * cos(2 * x) * cos(4 * y); + rho += +(1.0 / 36.0) * cos(4 * x) * cos(4 * y) * cos(2 * z) + - (1.0 / 80.0) * cos(6 * y) * cos(2 * z) + - (1.0 / 10.0) * cos(4 * z) * cos(2 * y) + + (1.0 / 8.0) * cos(4 * z) * cos(4 * y); + rho += -(1.0 / 10.0) * cos(4 * z) * cos(2 * x) + + (1.0 / 8.0) * cos(4 * z) * cos(4 * x) + + (1.0 / 8.0) * cos(2 * z) * cos(2 * y) + - (1.0 / 10.0) * cos(2 * z) * cos(4 * y); + rho += +(1.0 / 8.0) * cos(2 * z) * cos(2 * x) + - (1.0 / 10.0) * cos(2 * z) * cos(4 * x) + - (1.0 / 80.0) * cos(6 * x) * cos(2 * z) - (1.0 / 4.0) * cos(2 * x); + rho += -(1.0 / 80.0) * cos(6 * z) * cos(2 * x) + - (1.0 / 80.0) * cos(6 * z) * cos(2 * y) + - (1.0 / 80.0) * cos(6 * x) * cos(2 * y) + - (1.0 / 10.0) * cos(2 * x) * cos(4 * y); + rho += +(1.0 / 8.0) * cos(2 * x) * cos(2 * y) + + (1.0 / 8.0) * cos(4 * x) * cos(4 * y) + - (1.0 / 80.0) * cos(2 * x) * cos(6 * y) + - (1.0 / 10.0) * cos(4 * x) * cos(2 * y); + rho += -(1.0 / 4.0) * cos(2 * z) - (1.0 / 4.0) * cos(2 * y); rho *= -u_max * u_max; rho *= (T)3.0; @@ -131,12 +151,11 @@ void lbm_allocate_and_ini_at_equilibrium_cavity_inplace(T rho0, T u_max, tensor_ for (int iz = 0; iz < nz; ++iz) { T u[3] = {0.0, 0.0, 0.0}; if (iz == nz - 1) { - T x_vel = 1-pow(x/h, 18); - T y_vel = 1-pow(y/h, 18); + T x_vel = 1 - pow(x / h, 18); + T y_vel = 1 - pow(y / h, 18); u[0] = u_max * x_vel * x_vel * y_vel * y_vel; } - for (int ipop = 0; ipop < field.q; ++ipop) { field.grid[ix][iy][iz][ipop] = compute_feq(w[ipop], c_t[ipop], rho0, u); } @@ -145,28 +164,31 @@ void lbm_allocate_and_ini_at_equilibrium_cavity_inplace(T rho0, T u_max, tensor_ } } -futhark_4d *lbm_collide_and_stream(struct futhark_context *ctx, futhark_4d **f, futhark_4d *vel, futhark_4d *d, T omega, fint iter) { +futhark_4d *lbm_collide_and_stream( + struct futhark_context *ctx, futhark_4d **f, futhark_4d *vel, futhark_4d *d, T omega, + fint iter) { futhark_4d *fout = NULL; // futhark_entry_main(ctx, &fout, *f, vel, d, omega, iter); #ifdef D3Q19 - futhark_entry_main_d3q19_reg(ctx, &fout, *f, vel, d, omega, iter); + futhark_entry_main_d3q19_bgk(ctx, &fout, *f, vel, d, omega, iter); #endif #ifdef D3Q27 - futhark_entry_main_d3q27_reg(ctx, &fout, *f, vel, d, omega, iter); + futhark_entry_main_d3q27_bgk(ctx, &fout, *f, vel, d, omega, iter); #endif - assert(NULL != fout); futhark_free_4d(ctx, *f); *f = NULL; - + return fout; } // TODO: add possibility to have rho/u form futhark directly -// futhark_4d *lbm_collide_and_stream_rho_u(struct futhark_context *ctx, futhark_4d **f, struct futhark_f32_3d **rho, futhark_4d **vel, T omega, fint iter) { +// futhark_4d *lbm_collide_and_stream_rho_u(struct futhark_context *ctx, futhark_4d **f, struct +// futhark_f32_3d **rho, futhark_4d **vel, T omega, fint iter) { // futhark_4d *fout = NULL; -// int ret = futhark_entry_rho_u_main(ctx, &fout, rho, vel, *f, omega, iter); // this produces a waring (should ask what to do with that) +// int ret = futhark_entry_rho_u_main(ctx, &fout, rho, vel, *f, omega, iter); // this produces a +// waring (should ask what to do with that) // futhark_free_4d(ctx, *f); // *f = NULL; @@ -174,10 +196,11 @@ futhark_4d *lbm_collide_and_stream(struct futhark_context *ctx, futhark_4d **f, // return fout; // } - void lbm_compute_density_inplace(tensor_field lattice, scalar_field density) { - assert(tensor_field_consistant_size_scalar_field(lattice, density) && "Sizes of rho and lattice are inconsistent."); - + assert( + tensor_field_consistant_size_scalar_field(lattice, density) + && "Sizes of rho and lattice are inconsistent."); + for (int ix = 0; ix < lattice.nx; ++ix) { for (int iy = 0; iy < lattice.ny; ++iy) { for (int iz = 0; iz < lattice.nz; ++iz) { @@ -192,7 +215,9 @@ void lbm_compute_density_inplace(tensor_field lattice, scalar_field density) { } void lbm_compute_velocity_inplace(tensor_field lattice, tensor_field velocity) { - assert(tensor_field_consistant_spatial_size_tensor_field(lattice, velocity) && "Sizes of velocity and lattice are inconsistent."); + assert( + tensor_field_consistant_spatial_size_tensor_field(lattice, velocity) + && "Sizes of velocity and lattice are inconsistent."); for (int ix = 0; ix < lattice.nx; ++ix) { for (int iy = 0; iy < lattice.ny; ++iy) { @@ -204,7 +229,8 @@ void lbm_compute_velocity_inplace(tensor_field lattice, tensor_field velocity) { for (int id = 0; id < lattice.dim; ++id) { velocity.grid[ix][iy][iz][id] = (T)0; for (int ipop = 0; ipop < lattice.q; ++ipop) { - velocity.grid[ix][iy][iz][id] += lattice.grid[ix][iy][iz][ipop] * c_t[ipop][id]; + velocity.grid[ix][iy][iz][id] += + lattice.grid[ix][iy][iz][ipop] * c_t[ipop][id]; } velocity.grid[ix][iy][iz][id] /= rho; } @@ -221,19 +247,21 @@ tensor_field lbm_allocate_and_ini_at_equilibrium(T rho, T u[], int nx, int ny, i return lattice; } -tensor_field lbm_allocate_and_ini_at_equilibrium_kida(T rho0, T u_max, int nx, int ny, int nz, int q) { +tensor_field lbm_allocate_and_ini_at_equilibrium_kida( + T rho0, T u_max, int nx, int ny, int nz, int q) { tensor_field lattice = tensor_field_allocate(nx, ny, nz, q); lbm_allocate_and_ini_at_equilibrium_kida_inplace(rho0, u_max, lattice); - + return lattice; } -tensor_field lbm_allocate_and_ini_at_equilibrium_cavity(T rho0, T u_max, int nx, int ny, int nz, int q) { +tensor_field lbm_allocate_and_ini_at_equilibrium_cavity( + T rho0, T u_max, int nx, int ny, int nz, int q) { tensor_field lattice = tensor_field_allocate(nx, ny, nz, q); lbm_allocate_and_ini_at_equilibrium_cavity_inplace(rho0, u_max, lattice); - + return lattice; } @@ -242,7 +270,7 @@ scalar_field lbm_compute_density(tensor_field lattice) { lbm_compute_density_inplace(lattice, density); - return density; + return density; } tensor_field lbm_compute_velocity(tensor_field lattice) { @@ -250,5 +278,5 @@ tensor_field lbm_compute_velocity(tensor_field lattice) { lbm_compute_velocity_inplace(lattice, vel); - return vel; + return vel; } diff --git a/src/globals.fut b/src/globals.fut index 72e6fadf9f8355eb33999c3739a2b7d2475c46fa..7e9bc82a165ac94480a15d4dd125e8116a3bb8ec 100644 --- a/src/globals.fut +++ b/src/globals.fut @@ -1,3 +1,4 @@ +import "globals_f16" import "globals_f32" import "globals_f64" diff --git a/src/globals_f16.fut b/src/globals_f16.fut new file mode 100644 index 0000000000000000000000000000000000000000..152cfe75ed75d977088cd086433c5ac95a2e96ca --- /dev/null +++ b/src/globals_f16.fut @@ -0,0 +1,62 @@ +import "lib/github.com/athas/vector/vector" + +import "vectors" +import "lattice" +import "descriptor" +import "dynamics" +import "bc" +import "lbm" + +-- d3q19 + +module d3q19_f16 = mk_desc vec3' vec19' d3q19_lattice f16 + +-- dynamics + +module bgk_d3q19_f16 = mk_bgk f16 d3q19_f16 + +module reg_d3q19_f16 = mk_reg f16 d3q19_f16 + +-- boundary conditions + +module bfl_d3q19_f16 = mk_bfl_bc f16 d3q19_f16 --bfl et al. + +module periodic_d3q19_f16 = mk_no_bc f16 d3q19_f16 + +-- model + +module d3q19_bgk_bfl_f16 = mk_lbm f16 d3q19_f16 bgk_d3q19_f16 bfl_d3q19_f16 + +module d3q19_bgk_periodic_f16 = mk_lbm f16 d3q19_f16 bgk_d3q19_f16 periodic_d3q19_f16 + +module d3q19_reg_bfl_f16 = mk_lbm f16 d3q19_f16 reg_d3q19_f16 bfl_d3q19_f16 + +module d3q19_reg_periodic_f16 = mk_lbm f16 d3q19_f16 reg_d3q19_f16 periodic_d3q19_f16 + + +-- d3q27 + +module d3q27_f16 = mk_desc vec3' vec27' d3q27_lattice f16 + +-- dynamics + +module bgk_d3q27_f16 = mk_bgk f16 d3q27_f16 + +module reg_d3q27_f16 = mk_reg f16 d3q27_f16 + +-- boundary conditions + +module bfl_d3q27_f16 = mk_bfl_bc f16 d3q27_f16 --bfl et al. + +module periodic_d3q27_f16 = mk_no_bc f16 d3q27_f16 + +-- model + +module d3q27_bgk_bfl_f16 = mk_lbm f16 d3q27_f16 bgk_d3q27_f16 bfl_d3q27_f16 + +module d3q27_bgk_periodic_f16 = mk_lbm f16 d3q27_f16 bgk_d3q27_f16 periodic_d3q27_f16 + +module d3q27_reg_bfl_f16 = mk_lbm f16 d3q27_f16 reg_d3q27_f16 bfl_d3q27_f16 + +module d3q27_reg_periodic_f16 = mk_lbm f16 d3q27_f16 reg_d3q27_f16 periodic_d3q27_f16 + diff --git a/src/lbm.fut b/src/lbm.fut index 43c54271b34b0f47fd04d6ad851bb6d0a9a8b37d..684a38058496b58b263bb4fbb456c5d205a8028e 100644 --- a/src/lbm.fut +++ b/src/lbm.fut @@ -10,6 +10,9 @@ module type lbm = { type vec_q 'a type vec_d 'a + val q: i64 + val dim: i64 + val get_q: i64 -> vec_q real -> real val get_d: i64 -> vec_d real -> real val to_ary_q [q] : vec_q real -> [q]real @@ -18,10 +21,10 @@ module type lbm = { val stream [nx][ny][nz]: [nx][ny][nz](vec_q real) -> [nx][ny][nz](vec_q real) val collide [nx][ny][nz]: [nx][ny][nz](vec_q real) -> real -> [nx][ny][nz](vec_q real) val stream_and_collide [nx][ny][nz]: [nx][ny][nz](vec_d real) -> [nx][ny][nz](vec_q real) -> [nx][ny][nz](vec_q real) -> real -> [nx][ny][nz](vec_q real) - val from_array_q [nx][ny][nz][q]: [nx][ny][nz][q]real -> [nx][ny][nz](vec_q real) - val to_array_q [nx][ny][nz][q]: [nx][ny][nz](vec_q real) -> [nx][ny][nz][q]real - val from_array_d [nx][ny][nz][d]: [nx][ny][nz][d]real -> [nx][ny][nz](vec_d real) - val to_array_d [nx][ny][nz][d]: [nx][ny][nz](vec_d real) -> [nx][ny][nz][d]real + val from_array_q [nx][ny][nz]: [nx][ny][nz][q]real -> [nx][ny][nz](vec_q real) + val to_array_q [nx][ny][nz]: [nx][ny][nz](vec_q real) -> [nx][ny][nz][q]real + val from_array_d [nx][ny][nz]: [nx][ny][nz][dim]real -> [nx][ny][nz](vec_d real) + val to_array_d [nx][ny][nz]: [nx][ny][nz](vec_d real) -> [nx][ny][nz][dim]real } @@ -39,6 +42,9 @@ module mk_lbm (real: real) (d: descriptor with real = real.t) (dyn: dynamics wit type vec_q 'a = d.vec_q a type vec_d 'a = d.vec_d a + let q = d.q + let dim = d.d + def get_q (i: i64) (v: d.vec_q real) = d.get_q i v def get_d (i: i64) (v: d.vec_d real) = d.get_d i v @@ -76,23 +82,20 @@ module mk_lbm (real: real) (d: descriptor with real = real.t) (dyn: dynamics wit def stream [nx][ny][nz] (f: [nx][ny][nz](vec_q real)) : [nx][ny][nz](vec_q real) = -- f - let n = nx * ny * nz - let op (i: i64) : [n]real = + let op (i: i64) : []real = let f' = (map_3d (d.get_q i) f) :> [nx][ny][nz]real in ((flatten_3d ( if i == 0 then f' else stream_ci f' (d.get_q i d.c) - )) :> [n]real) + )) :> [nx*ny*nz]real) in ( - ( - unflatten_3d nx ny nz ( - d.vzip_q - ( - d.map_q (\i -> op i) (d.iota_q :> (d.vec_q i64)) - ) - ) - ) :> [nx][ny][nz](vec_q real) + unflatten_3d ( + d.vzip_q + ( + d.map_q (\i -> op i) (d.iota_q :> (d.vec_q i64)) + ) + ) ) def collide [nx][ny][nz] (f: [nx][ny][nz](d.vec_q real)) (omega: real): [nx][ny][nz](vec_q real) = @@ -102,16 +105,16 @@ module mk_lbm (real: real) (d: descriptor with real = real.t) (dyn: dynamics wit -- stream (bc.apply distances (collide f omega)) collide (bc.apply velocity distances (stream f)) omega - def from_array_q [nx][ny][nz][q] (f: [nx][ny][nz][q]real): [nx][ny][nz](vec_q real) = + def from_array_q [nx][ny][nz] (f: [nx][ny][nz][q]real): [nx][ny][nz](vec_q real) = map_3d (\fi -> d.from_array_q (fi :> [q]real)) f - def to_array_q [nx][ny][nz][q] (f: [nx][ny][nz](d.vec_q real)): [nx][ny][nz][q]real = + def to_array_q [nx][ny][nz] (f: [nx][ny][nz](d.vec_q real)): [nx][ny][nz][q]real = map_3d (\fi -> ((to_ary_q (fi) :[q]real ) :> [q]real)) f - def from_array_d [nx][ny][nz][dim] (f: [nx][ny][nz][dim]real): [nx][ny][nz](vec_d real) = + def from_array_d [nx][ny][nz] (f: [nx][ny][nz][dim]real): [nx][ny][nz](vec_d real) = map_3d (\fi -> d.from_array_d (fi : [dim]real)) f - def to_array_d [nx][ny][nz][dim] (f: [nx][ny][nz](vec_d real)): [nx][ny][nz][dim]real = + def to_array_d [nx][ny][nz] (f: [nx][ny][nz](vec_d real)): [nx][ny][nz][dim]real = map_3d (\fi -> ((d.to_array_d fi) : [dim]real)) f } diff --git a/src/liblbm_f16.fut b/src/liblbm_f16.fut new file mode 100644 index 0000000000000000000000000000000000000000..102a98e60eddaccae013b9f202e5c093ae347938 --- /dev/null +++ b/src/liblbm_f16.fut @@ -0,0 +1,29 @@ +-- Program for Lattice-Boltzmann in Futhark +import "globals_f16" +import "sim" +import "polygonise" + +module sim_d3q27_reg_bfl_f16 = mk_sim d3q27_reg_bfl_f16 + +entry main_d3q27_reg = sim_d3q27_reg_bfl_f16.run + +module sim_d3q19_reg_bfl_f16 = mk_sim d3q19_reg_bfl_f16 + +entry main_d3q19_reg = sim_d3q19_reg_bfl_f16.run + +entry main_mcubes_polygonise = mcubes_polygonise_f16 + +entry main_mtetras_polygonise = mtetras_polygonise_f16 + +-- Test the sub1 function. +-- == +-- input { 21i64 21i64 21i64 27i64 3i64 1.5f16 1000i64 } +-- auto output +let main (nx: i64) (ny: i64) (nz: i64) (q: i64) (d: i64) (omega: f16) (max_t: i64): [nx][ny][nz][q]f16 = + let fin = replicate nx (replicate ny (replicate nz (replicate q 0f16))) + let dist = replicate nx (replicate ny (replicate nz (replicate q (-1.0f16)))) + let u = replicate nx (replicate ny (replicate nz (replicate d 0f16))) + + in sim_d3q27_reg_bfl_f16.run fin u dist omega max_t + + diff --git a/src/liblbm_f32.fut b/src/liblbm_f32.fut index fbefef4ac283aa8a577ac458ff1a3d8afffc2348..54e46606c694d9e5e907afb90ba2c91773e9a0e9 100644 --- a/src/liblbm_f32.fut +++ b/src/liblbm_f32.fut @@ -3,59 +3,51 @@ import "globals_f32" import "sim" import "polygonise" +-- D3Q27 - REG + module sim_d3q27_reg_bfl_f32 = mk_sim d3q27_reg_bfl_f32 entry main_d3q27_reg = sim_d3q27_reg_bfl_f32.run -module sim_d3q19_reg_periodic_f32 = mk_sim d3q19_reg_periodic_f32 +module sim_d3q27_reg_periodic_f32 = mk_sim d3q27_reg_periodic_f32 + +entry main_d3q27_reg_periodic = sim_d3q27_reg_periodic_f32.run + +-- D3Q19 - REG + +module sim_d3q19_reg_bfl_f32 = mk_sim d3q19_reg_bfl_f32 + +entry main_d3q19_reg = sim_d3q19_reg_bfl_f32.run + +module sim_d3q19_reg_periodic_f32 = mk_sim d3q19_reg_periodic_f32 + +entry main_d3q19_reg_periodic_f32 = sim_d3q19_reg_periodic_f32.run + +-- D3Q27 - BGK + +module sim_d3q27_bgk_bfl_f32 = mk_sim d3q27_bgk_bfl_f32 + +entry main_d3q27_bgk = sim_d3q27_bgk_bfl_f32.run + +module sim_d3q27_bgk_periodic_f32 = mk_sim d3q27_bgk_periodic_f32 + +entry main_d3q27_bgk_periodic = sim_d3q27_bgk_periodic_f32.run + +-- D3Q19 - BGK + +module sim_d3q19_bgk_bfl_f32 = mk_sim d3q19_bgk_bfl_f32 + +entry main_d3q19_bgk = sim_d3q19_bgk_bfl_f32.run + +module sim_d3q19_bgk_periodic_f32 = mk_sim d3q19_bgk_periodic_f32 + +entry main_d3q19_bgk_periodic_f32 = sim_d3q19_bgk_periodic_f32.run -entry main_d3q19_reg = sim_d3q19_reg_periodic_f32.run +-- Polygonise entry main_mcubes_polygonise = mcubes_polygonise_f32 entry main_mtetras_polygonise = mtetras_polygonise_f32 --- Test the sub1 function. --- == --- input { 21i64 21i64 21i64 27i64 3i64 1.5f32 1000i64 } --- auto output -let main (nx: i64) (ny: i64) (nz: i64) (q: i64) (d: i64) (omega: f32) (max_t: i64): [nx][ny][nz][q]f32 = - let fin = replicate nx (replicate ny (replicate nz (replicate q 0f32))) - let dist = replicate nx (replicate ny (replicate nz (replicate q (-1.0f32)))) - let u = replicate nx (replicate ny (replicate nz (replicate d 0f32))) - - in sim_d3q27_reg_bfl_f32.run fin u dist omega max_t - - - - --- -- executes Lattice-Boltzmann it times and --- let lattice_boltzmann [nx][ny][nz] (fin: [nx][ny][nz](lbm_model.vec_q real)) (vel: [nx][ny][nz](lbm_model.vec_d real)) (dists: [nx][ny][nz](lbm_model.vec_q real)) (omega: real) (max_t: i64): [nx][ny][nz](lbm_model.vec_q real) = --- let fini = lbm_model.collide fin omega --- let f = loop f = fini for _i < (max_t-1) do --- lbm_model.stream_and_collide vel dists f omega --- let fout = lbm_model.bc_apply vel dists (lbm_model.stream f) --- in fout - --- entry main [nx][ny][nz][d][q] (fin: [nx][ny][nz][q]real) (vel: [nx][ny][nz][d]real) (dists: [nx][ny][nz][q]real) (omega: real) (max_t: i64): [nx][ny][nz][q]real = --- let fin_f = lbm_model.from_array_q fin --- let vel = lbm_model.from_array_d vel --- let dist = lbm_model.from_array_q dists --- let f = lattice_boltzmann fin_f vel dist omega max_t --- in lbm_model.to_array_q f - --- TODO add version where rho/u a outputs --- entry rho_main [nx][ny][nz] (fin: [nx][ny][nz][19]real) (omega: real) (max_t: i64): ([nx][ny][nz][19]f32, [nx][ny][nz]f32) = --- let fin_f = lbm_model.from_array_q fin --- let f = lattice_boltzmann fin_f omega max_t --- let rho = compute_moment f bgk_d3q19.compute_rho --- in (lbm_model.to_array_q f, rho) - --- entry rho_u_main [nx][ny][nz] (fin: [nx][ny][nz][19]f32) (omega: real) (max_t: i64): ([nx][ny][nz][19]f32, [nx][ny][nz]f32, [nx][ny][nz][3]f32) = --- let fin_f = lbm_model.from_array_q fin --- let f = lattice_boltzmann fin_f omega max_t --- let rho = compute_moment f bgk_d3q19.compute_rho --- let u = compute_moment f bgk_d3q19.compute_vel --- in (lbm_model.to_array_q f, rho, (lbm_model.to_array_d u)) diff --git a/src/liblbm_f64.fut b/src/liblbm_f64.fut index 44067505766feee6dd270a116a4acdd5b135f618..3ddf182cb347a53940291e57077533d0c8b462d0 100644 --- a/src/liblbm_f64.fut +++ b/src/liblbm_f64.fut @@ -16,43 +16,3 @@ entry main_mcubes_polygonise = mcubes_polygonise_f64 entry main_mtetras_polygonise = mtetras_polygonise_f64 --- -- executes Lattice-Boltzmann it times and --- let lattice_boltzmann [nx][ny][nz] (fin: [nx][ny][nz](lbm_model.vec_q real)) (vel: [nx][ny][nz](lbm_model.vec_d real)) (dists: [nx][ny][nz](lbm_model.vec_q real)) (omega: real) (max_t: i64): [nx][ny][nz](lbm_model.vec_q real) = --- let fini = lbm_model.collide fin omega --- let f = loop f = fini for _i < (max_t-1) do --- lbm_model.stream_and_collide vel dists f omega --- let fout = lbm_model.bc_apply vel dists (lbm_model.stream f) --- in fout - --- entry main [nx][ny][nz][d][q] (fin: [nx][ny][nz][q]real) (vel: [nx][ny][nz][d]real) (dists: [nx][ny][nz][q]real) (omega: real) (max_t: i64): [nx][ny][nz][q]real = --- let fin_f = lbm_model.from_array_q fin --- let vel = lbm_model.from_array_d vel --- let dist = lbm_model.from_array_q dists --- let f = lattice_boltzmann fin_f vel dist omega max_t --- in lbm_model.to_array_q f - --- TODO add version where rho/u a outputs --- entry rho_main [nx][ny][nz] (fin: [nx][ny][nz][19]real) (omega: real) (max_t: i64): ([nx][ny][nz][19]f32, [nx][ny][nz]f32) = --- let fin_f = lbm_model.from_array_q fin --- let f = lattice_boltzmann fin_f omega max_t --- let rho = compute_moment f bgk_d3q19.compute_rho --- in (lbm_model.to_array_q f, rho) - --- entry rho_u_main [nx][ny][nz] (fin: [nx][ny][nz][19]f32) (omega: real) (max_t: i64): ([nx][ny][nz][19]f32, [nx][ny][nz]f32, [nx][ny][nz][3]f32) = --- let fin_f = lbm_model.from_array_q fin --- let f = lattice_boltzmann fin_f omega max_t --- let rho = compute_moment f bgk_d3q19.compute_rho --- let u = compute_moment f bgk_d3q19.compute_vel --- in (lbm_model.to_array_q f, rho, (lbm_model.to_array_d u)) - - --- Testing main --- == --- entry: test_main --- compiled input { 213 123 0.8 1 } --- auto output - --- entry test_main (nx: i64)(ny: i64)(tau: real)(it: i64): [9][]real = --- let f_ini = init_f_in(nx)(ny) --- let f = lattice_boltzmann(nx)(ny)(f_ini)(tau)(it) --- in from_tuple_to_array(f) diff --git a/src/polygonise.fut b/src/polygonise.fut index f3416aae4bbbc8eafe3ba314adb2bfdc433ca4f1..cc9237881c0657a7d7f443f58f9ebd39fdf58705 100644 --- a/src/polygonise.fut +++ b/src/polygonise.fut @@ -20,6 +20,23 @@ let mcubes_polygonise_f32 [nx][ny][nz] (rho: [nx][ny][nz]f32) (isolevel: f32) = map(\txi -> f32.f64 txi) tx ) tri ) + +let mcubes_polygonise_f16 [nx][ny][nz] (rho: [nx][ny][nz]f16) (isolevel: f16) = + let rho' = + map (\rx -> + map (\rxy -> + map (\rxyz -> + f64.f16 rxyz + ) rxy + ) rx + ) rho + let tri = map (mcubes.triangle_to_array) (mcubes.polygonise_field rho' (f64.f16 isolevel)) + let si = length tri + in + (si , map (\tx -> + map(\txi -> f16.f64 txi) tx + ) tri ) + let mtetras_polygonise_f64 [nx][ny][nz] (rho: [nx][ny][nz]f64) (isolevel: f64) = map (mtetras.triangle_to_array) (mtetras.polygonise_field rho isolevel) @@ -37,4 +54,20 @@ let mtetras_polygonise_f32 [nx][ny][nz] (rho: [nx][ny][nz]f32) (isolevel: f32) = in (si , map (\tx -> map(\txi -> f32.f64 txi) tx + ) tri ) + +let mtetras_polygonise_f16 [nx][ny][nz] (rho: [nx][ny][nz]f16) (isolevel: f16) = + let rho' = + map (\rx -> + map (\rxy -> + map (\rxyz -> + f64.f16 rxyz + ) rxy + ) rx + ) rho + let tri = map (mtetras.triangle_to_array) (mtetras.polygonise_field rho' (f64.f16 isolevel)) + let si = length tri + in + (si , map (\tx -> + map(\txi -> f16.f64 txi) tx ) tri ) \ No newline at end of file diff --git a/src/sim.fut b/src/sim.fut index 400970191d5b9bfa12f83e7d703ec259833978e0..0e20f509a18adf05869b1d78522e1e8c74cf6946 100644 --- a/src/sim.fut +++ b/src/sim.fut @@ -7,24 +7,32 @@ module type sim = { type vec_q 'a type vec_d 'a - val from_array_q [nx][ny][nz][q]: [nx][ny][nz][q]real -> [nx][ny][nz](vec_q real) - val to_array_q [nx][ny][nz][q]: [nx][ny][nz](vec_q real) -> [nx][ny][nz][q]real - val from_array_d [nx][ny][nz][d]: [nx][ny][nz][d]real -> [nx][ny][nz](vec_d real) - val to_array_d [nx][ny][nz][d]: [nx][ny][nz](vec_d real) -> [nx][ny][nz][d]real + type ary_q 'a + type ary_d 'a + + val from_array_q [nx][ny][nz]: [nx][ny][nz](ary_q real) -> [nx][ny][nz](vec_q real) + val to_array_q [nx][ny][nz]: [nx][ny][nz](vec_q real) -> [nx][ny][nz](ary_q real) + val from_array_d [nx][ny][nz]: [nx][ny][nz](ary_d real) -> [nx][ny][nz](vec_d real) + val to_array_d [nx][ny][nz]: [nx][ny][nz](vec_d real) -> [nx][ny][nz](ary_d real) val timesteps [nx][ny][nz]: [nx][ny][nz](vec_q real) -> ([nx][ny][nz](vec_d real)) -> ([nx][ny][nz](vec_q real)) -> real -> i64 -> ([nx][ny][nz](vec_q real)) - val run [nx][ny][nz][d][q]: [nx][ny][nz][q]real -> [nx][ny][nz][d]real -> [nx][ny][nz][q]real -> real -> i64 -> [nx][ny][nz][q]real + val run [nx][ny][nz]: [nx][ny][nz](ary_q real) -> [nx][ny][nz](ary_d real) -> [nx][ny][nz](ary_q real) -> real -> i64 -> [nx][ny][nz](ary_q real) } module mk_sim (l: lbm) : (sim with real = l.real with vec_q 'a = l.vec_q a - with vec_d 'a = l.vec_d a) = + with vec_d 'a = l.vec_d a + with ary_q 'a = [l.q]a + with ary_d 'a = [l.dim]a) = { type real = l.real type vec_q 'a = l.vec_q a type vec_d 'a = l.vec_d a + type ary_q 'a = [l.q]a + type ary_d 'a = [l.dim]a + let to_array_q = l.to_array_q let from_array_q = l.from_array_q @@ -38,11 +46,11 @@ module mk_sim (l: lbm) : (sim with real = l.real let fout = l.bc_apply vel dists (l.stream f) in fout - let run [nx][ny][nz][d][q] (fin: [nx][ny][nz][q]real) (vel: [nx][ny][nz][d]real) (dists: [nx][ny][nz][q]real) (omega: real) (max_t: i64): [nx][ny][nz][q]real = - let fin_f = l.from_array_q fin - let vel = l.from_array_d vel - let dist = l.from_array_q dists + let run [nx][ny][nz] (fin: [nx][ny][nz](ary_q real)) (vel: [nx][ny][nz][l.dim]real) (dists: [nx][ny][nz](ary_q real)) (omega: real) (max_t: i64): [nx][ny][nz](ary_q real) = + let fin_f = (l.from_array_q (fin :> [nx][ny][nz](ary_q real)) ) :> [nx][ny][nz](vec_q real) + let vel = l.from_array_d (vel :> [nx][ny][nz][l.dim]real) + let dist = l.from_array_q (dists :> [nx][ny][nz](ary_q real)) let f = timesteps fin_f vel dist omega max_t - in l.to_array_q f + in (l.to_array_q f) :> [nx][ny][nz][l.q]real } \ No newline at end of file diff --git a/src/tests/run_test.sh b/src/tests/run_test.sh index fc9d745a49a5a1a917b43a7dade6015bf453dc79..4818ec3131bf816720141257f13f7d4f70073ba0 100755 --- a/src/tests/run_test.sh +++ b/src/tests/run_test.sh @@ -1,5 +1,6 @@ #!/bin/bash +echo "running tests in ${MESON_BUILD_ROOT}src/tests" cd "${MESON_BUILD_ROOT}src/tests" pwd echo "" | ./liblbm_tests diff --git a/src/tests/tests.fut b/src/tests/tests.fut index b66713d391e2d94b1ebd00e47ac888c3cf2e1721..23b8e1339cf4f33ea1bf3b049cc8fb40fd99466b 100644 --- a/src/tests/tests.fut +++ b/src/tests/tests.fut @@ -1,3 +1,4 @@ +import "../globals_f16" import "../globals_f32" import "../globals_f64" @@ -21,9 +22,11 @@ local module testing (real: real)= { local let all_true (a: []bool) : bool = all (\ai -> ai) a +local let e16 = 1.0e-3f16 local let e32 = 1.0e-5f32 local let e64 = 1.0e-10f64 +local module t16 = testing f16 local module t32 = testing f32 local module t64 = testing f64 @@ -36,18 +39,31 @@ local module t64 = testing f64 -- entry: test_d3q19_w -- compiled nobench input { } -- output { true true } - let test_d3q19_w : []bool = - [(d3q19_f32.reduce_q (+) 0f32 d3q19_f32.w) == 1f32, - (d3q19_f64.reduce_q (+) 0f64 d3q19_f64.w) == 1f64] + [ + t16.approx_eq [d3q19_f16.reduce_q (+) 0f16 d3q19_f16.w] [1f16] e16, + (d3q19_f32.reduce_q (+) 0f32 d3q19_f32.w) == 1f32, + (d3q19_f64.reduce_q (+) 0f64 d3q19_f64.w) == 1f64 + ] -- Test the descriptors ci's. -- == -- entry: test_d3q19_c -- compiled nobench input { } --- output { true true } - +-- output { true true true } let test_d3q19_c : []bool = + let zero_f16 = replicate (d3q19_f16.d) 0f16 + let sum_c_f16 = + d3q19_f16.to_array_d ( + d3q19_f16.map_d (\ca -> + d3q19_f16.reduce_q (+) 0f16 ( + d3q19_f16.map2_q(\wi cai -> + wi * cai + ) d3q19_f16.w ca + ) + ) d3q19_f16.c_t_num + ) + let zero_f32 = replicate (d3q19_f32.d) 0f32 let sum_c_f32 = d3q19_f32.to_array_d ( @@ -60,6 +76,7 @@ let test_d3q19_c : []bool = ) d3q19_f32.c_t_num ) + let zero_f64 = replicate (d3q19_f64.d) 0f64 let sum_c_f64 = d3q19_f64.to_array_d ( @@ -71,7 +88,7 @@ let test_d3q19_c : []bool = ) ) d3q19_f64.c_t_num ) - in [t32.approx_eq sum_c_f32 zero_f32 e32, t64.approx_eq sum_c_f64 zero_f64 e64] + in [t16.approx_eq sum_c_f16 zero_f16 e16, t32.approx_eq sum_c_f32 zero_f32 e32, t64.approx_eq sum_c_f64 zero_f64 e64] -- Test the descriptors equilibrium's.