Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ option( T8CODE_ENABLE_MPI "Enable t8code's features which rely on MPI" ON )
option( T8CODE_ENABLE_VTK "Enable t8code's features which rely on VTK" OFF )
option( T8CODE_ENABLE_OCC "Enable t8code's features which rely on OpenCASCADE" OFF )
option( T8CODE_ENABLE_NETCDF "Enable t8code's features which rely on netCDF" OFF )
option( T8CODE_ENABLE_STANDALONE_LOOP_UNROLL "Enables t8code's loop unroll in standalone scheme" OFF )

option( T8CODE_USE_SYSTEM_SC "Use system-installed sc library" OFF )
option( T8CODE_USE_SYSTEM_P4EST "Use system-installed p4est library" OFF )
Expand Down
5 changes: 5 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ if( T8CODE_EXPORT_COMPILE_COMMANDS )
set_target_properties( T8 PROPERTIES EXPORT_COMPILE_COMMANDS ON )
endif( T8CODE_EXPORT_COMPILE_COMMANDS )

if( T8CODE_ENABLE_STANDALONE_LOOP_UNROLL )
target_compile_definitions(T8 PUBLIC T8_ENABLE_STANDALONE_LOOP_UNROLL=1 )
endif()


if( T8CODE_ENABLE_NETCDF )
target_link_libraries( T8 PUBLIC NetCDF::NetCDF )
target_compile_definitions(T8 PUBLIC
Expand Down
99 changes: 99 additions & 0 deletions src/t8_schemes/t8_standalone/t8_standalone_implementation.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,9 @@ struct t8_standalone_scheme
const t8_standalone_element<TEclass> *el2 = (const t8_standalone_element<TEclass> *) elem2;
if (el1->level != el2->level)
return 0;
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int idim = 0; idim < T8_ELEMENT_DIM[TEclass]; idim++) {
if (el1->coords[idim] != el2->coords[idim])
return 0;
Expand Down Expand Up @@ -411,6 +414,9 @@ struct t8_standalone_scheme
{
t8_standalone_element<TEclass> *el = (t8_standalone_element<TEclass> *) elem;
el->level = 0;
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int idim = 0; idim < T8_ELEMENT_DIM[TEclass]; idim++) {
el->coords[idim] = 0;
}
Expand Down Expand Up @@ -652,6 +658,9 @@ struct t8_standalone_scheme
T8_ASSERT (0 <= el->level && el->level <= T8_ELEMENT_MAXLEVEL[TEclass]);

int cube_id = 0;
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int idim = 0; idim < T8_ELEMENT_DIM[TEclass]; idim++) {
cube_id |= (el->coords[idim] & length) ? (1 << idim) : 0;
}
Expand Down Expand Up @@ -772,6 +781,9 @@ struct t8_standalone_scheme
/* Shift the coords to the eighth cube. The type of the last descendant
* is the type of the input element */
t8_element_coord coord_offset = element_get_len (el->level) - element_get_len (level);
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int idim = 0; idim < T8_ELEMENT_DIM[TEclass]; idim++) {
d->coords[idim] |= coord_offset;
}
Expand Down Expand Up @@ -981,6 +993,9 @@ struct t8_standalone_scheme
last_descendant->type = el->type; /**TODO: Check if this is always true! */
}

#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int idim = 0; idim < T8_ELEMENT_DIM[TEclass]; idim++) {
int multiplier = 1;
if constexpr (!T8_ELEMENT_NUM_EQUATIONS[TEclass]) {
Expand Down Expand Up @@ -1019,6 +1034,9 @@ struct t8_standalone_scheme
}
// all edges containing dim must be fulfilled with x_d-a_d >= x_j-a_j or x_j-a_j <= x_d-a_d
if constexpr (T8_ELEMENT_NUM_EQUATIONS[TEclass]) {
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int ieq = 0; ieq < T8_ELEMENT_NUM_EQUATIONS[TEclass]; ++ieq) {
if ((t8_type_edge_equations<TEclass>[ieq][0] == dim && get_typebit (el->type, ieq))
|| (t8_type_edge_equations<TEclass>[ieq][1] == dim && !get_typebit (el->type, ieq))) {
Expand All @@ -1035,6 +1053,9 @@ struct t8_standalone_scheme
}
// all edges containing dimid must be fulfilled with x_d-a_d <= x_j-a_j or x_j-a_j >= x_d-a_d
if constexpr (T8_ELEMENT_NUM_EQUATIONS[TEclass]) {
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int ieq = 0; ieq < T8_ELEMENT_NUM_EQUATIONS[TEclass]; ieq++) {
if ((t8_type_edge_equations<TEclass>[ieq][0] == dim && !get_typebit (el->type, ieq))
|| (t8_type_edge_equations<TEclass>[ieq][1] == dim && get_typebit (el->type, ieq))) {
Expand Down Expand Up @@ -1146,6 +1167,9 @@ struct t8_standalone_scheme

/**Adapt typebits*/
if constexpr (T8_ELEMENT_NUM_EQUATIONS[TEclass]) {
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int ieq = 0; ieq < T8_ELEMENT_NUM_EQUATIONS[TEclass]; ieq++) {
/**For all neighboring typebits, change typebit*/
if (t8_type_edge_equations<TEclass>[ieq][0] == facenormal_dim
Expand Down Expand Up @@ -1517,6 +1541,9 @@ struct t8_standalone_scheme
int coords_int[T8_ELEMENT_DIM[TEclass]];
T8_ASSERT (0 <= vertex && vertex < T8_ELEMENT_NUM_CORNERS[TEclass]);
element_compute_coords (el, vertex, coords_int);
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int idim = 0; idim < T8_ELEMENT_DIM[TEclass]; idim++) {
coords[idim] = coords_int[idim] / (double) get_root_len ();
}
Expand All @@ -1542,7 +1569,13 @@ struct t8_standalone_scheme

if constexpr (T8_ELEMENT_NUM_EQUATIONS[TEclass]) {
double tmp_coords[T8_ELEMENT_DIM[TEclass]] = { 0.0 };
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int idim = 0; idim < T8_ELEMENT_DIM[TEclass]; ++idim) {
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int jdim = 0; jdim < T8_ELEMENT_DIM[TEclass]; ++jdim) {
tmp_coords[idim]
+= t8_standalone_lut_transform_coords<TEclass>[el->type][idim][jdim] * current_ref_coords[jdim];
Expand All @@ -1554,17 +1587,29 @@ struct t8_standalone_scheme

for (size_t coord = 0; coord < num_coords; ++coord) {
double tmp_out_coords[T8_ELEMENT_DIM[TEclass]] = {};
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int dim = 0; dim < T8_ELEMENT_DIM[TEclass]; ++dim) {
current_out_coords[dim] = el->coords[dim] + tmp_coords[dim] * length;

current_out_coords[dim] /= (double) get_root_len ();
}
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int dim = 0; dim < T8_ELEMENT_DIM[TEclass]; ++dim) {
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int jdim = 0; jdim < T8_ELEMENT_DIM[TEclass]; ++jdim) {
tmp_out_coords[dim]
+= t8_standalone_lut_backtransform_coords<TEclass>[0][dim][jdim] * current_out_coords[jdim];
}
}
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int dim = 0; dim < T8_ELEMENT_DIM[TEclass]; ++dim) {
current_out_coords[dim] = tmp_out_coords[dim];
}
Expand All @@ -1576,6 +1621,9 @@ struct t8_standalone_scheme

else {
for (size_t coord = 0; coord < num_coords; ++coord) {
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int dim = 0; dim < T8_ELEMENT_DIM[TEclass]; ++dim) {
current_out_coords[dim] = el->coords[dim] + current_ref_coords[dim] * length;

Expand Down Expand Up @@ -1784,6 +1832,9 @@ struct t8_standalone_scheme
t8_standalone_element<TEclass> **els = (t8_standalone_element<TEclass> **) elements;

for (unsigned int ielem = 0; ielem < count; ielem++) {
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int idim = 0; idim < T8_ELEMENT_DIM[TEclass]; idim++) {
mpiret = sc_MPI_Pack (&(els[ielem]->coords[idim]), 1, sc_MPI_INT, send_buffer, buffer_size, position, comm);
SC_CHECK_MPI (mpiret);
Expand Down Expand Up @@ -1838,6 +1889,9 @@ struct t8_standalone_scheme
t8_standalone_element<TEclass> **els = (t8_standalone_element<TEclass> **) elements;

for (unsigned int ielem = 0; ielem < count; ielem++) {
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int idim = 0; idim < T8_ELEMENT_DIM[TEclass]; idim++) {
mpiret = sc_MPI_Unpack (recvbuf, buffer_size, position, &(els[ielem]->coords[idim]), 1, sc_MPI_INT, comm);
SC_CHECK_MPI (mpiret);
Expand Down Expand Up @@ -1878,6 +1932,9 @@ struct t8_standalone_scheme

/* The cube id of the root element is 0.*/
if (level != 0) {
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int i = 0; i < T8_ELEMENT_DIM[TEclass]; i++) {
cube_id |= ((elem->coords[i] & h) ? 1 << i : 0);
}
Expand Down Expand Up @@ -1930,6 +1987,9 @@ struct t8_standalone_scheme
Therefore this is the level needed so that all coordinates equal.*/
t8_element_coord maxexclor = 0;

#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int idim = 0; idim < T8_ELEMENT_DIM[TEclass]; idim++) {
maxexclor |= (elem1->coords[idim] ^ elem2->coords[idim]);
}
Expand Down Expand Up @@ -1962,6 +2022,9 @@ struct t8_standalone_scheme
element_cut_coordinates (t8_standalone_element<TEclass> *elem, const int shift) noexcept
{
T8_ASSERT (0 <= shift && shift <= T8_ELEMENT_MAXLEVEL[TEclass]);
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int idim = 0; idim < T8_ELEMENT_DIM[TEclass]; idim++) {
elem->coords[idim] = (elem->coords[idim] >> shift) << shift;
}
Expand All @@ -1979,6 +2042,9 @@ struct t8_standalone_scheme
set_coords_at_level_to_zero (const t8_standalone_element<TEclass> *elem, t8_standalone_element<TEclass> *parent_elem,
const t8_element_coord length) noexcept
{
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int idim = 0; idim < T8_ELEMENT_DIM[TEclass]; idim++) {
parent_elem->coords[idim] = elem->coords[idim] & ~length;
}
Expand All @@ -1997,6 +2063,9 @@ struct t8_standalone_scheme
put_cube_id_at_level (const t8_standalone_element<TEclass> *parent, t8_standalone_element<TEclass> *child,
const t8_element_coord length, const t8_cube_id cube_id) noexcept
{
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int idim = 0; idim < T8_ELEMENT_DIM[TEclass]; idim++) {
child->coords[idim] = parent->coords[idim] + ((cube_id & (1 << idim)) ? length : 0);
}
Expand Down Expand Up @@ -2055,22 +2124,34 @@ struct t8_standalone_scheme
t8_standalone_element<TEclass> *el = (t8_standalone_element<TEclass> *) elem;
const int8_t type = el->type;
int tmp_out_coords[T8_ELEMENT_DIM[TEclass]] = {};
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int idim = 0; idim < T8_ELEMENT_DIM[TEclass]; idim++) {
coords[idim]
= el->coords[idim] + t8_type_vertex_dim_to_binary<TEclass>[type][vertex][idim] * element_get_len (el->level);
}
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int idim = 0; idim < T8_ELEMENT_DIM[TEclass]; ++idim) {
for (int jdim = 0; jdim < T8_ELEMENT_DIM[TEclass]; ++jdim) {
tmp_out_coords[idim] += t8_standalone_lut_backtransform_coords<TEclass>[0][idim][jdim] * coords[jdim];
}
}
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int idim = 0; idim < T8_ELEMENT_DIM[TEclass]; ++idim) {
coords[idim] = tmp_out_coords[idim];
}
//coords = tmp_out_coords;
}
else {
//Hypercubes
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int idim = 0; idim < T8_ELEMENT_DIM[TEclass]; idim++) {
coords[idim] = elem->coords[idim] + ((vertex & (1 << idim)) >> idim) * element_get_len (elem->level);
}
Expand Down Expand Up @@ -2221,6 +2302,9 @@ struct t8_standalone_scheme
boundary->type = 0;
boundary->level = el->level;
/* Delete the coordinate orthogonal to the given face and combine the remaining coordinates*/
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int idim = 0; idim < T8_ELEMENT_DIM[TEclass]; idim++) {
const int ifacedim = get_facedim (idim, root_face);

Expand All @@ -2240,6 +2324,9 @@ struct t8_standalone_scheme
}
}
if constexpr (T8_ELEMENT_NUM_EQUATIONS[TEclass]) {
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int ieq = 0; ieq < T8_ELEMENT_NUM_EQUATIONS[TEclass]; ieq++) {
const int ifaceeq = t8_standalone_lut_rootface_eq_to_faceeq<TEclass>[root_face][ieq];
if (ifaceeq != -1) {
Expand Down Expand Up @@ -2279,6 +2366,9 @@ struct t8_standalone_scheme

else {
el->level = face->level;
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int idim = 0; idim < T8_ELEMENT_DIM[TEclass]; idim++) {
const int ifacedim = get_facedim (idim, root_face);

Expand Down Expand Up @@ -2313,13 +2403,19 @@ struct t8_standalone_scheme
else {
u_int8_t root_type = 0;
el->type = root_type;
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int ieq = 0; ieq < T8_ELEMENT_NUM_EQUATIONS[TEclass]; ieq++) {
const int ifaceeq = t8_standalone_lut_rootface_eq_to_faceeq<TEclass>[root_face][ieq];
if (ifaceeq != -1) {
set_typebit (&el->type, ieq, get_typebit (face->type, ifaceeq));
}
}
/** Set those typebits, that are connected to the face_normaldim of root_face*/
#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int ieq = 0; ieq < T8_ELEMENT_NUM_EQUATIONS[TEclass]; ieq++) {
const int facenormal_dim = t8_standalone_lut_type_face_to_facenormal_dim<TEclass>[root_type][root_face];
if (t8_type_edge_equations<TEclass>[ieq][0] == facenormal_dim) {
Expand Down Expand Up @@ -2385,6 +2481,9 @@ struct t8_standalone_scheme
u_int8_t type = 0;
T8_ASSERT (0 <= el->level && el->level <= T8_ELEMENT_MAXLEVEL[TEclass]);

#if T8_ENABLE_STANDALONE_LOOP_UNROLL
#pragma GCC unroll 4
#endif
for (int e = 0; e < T8_ELEMENT_NUM_EQUATIONS[TEclass]; e++) {
t8_element_coord coord_v0 = el->coords[t8_type_edge_equations<TEclass>[e][0]];
t8_element_coord coord_v1 = el->coords[t8_type_edge_equations<TEclass>[e][1]];
Expand Down