27 #ifdef DEAL_II_WITH_TBB
28 # include <tbb/blocked_range.h>
29 # include <tbb/parallel_for.h>
30 # include <tbb/task.h>
31 # include <tbb/task_scheduler_init.h>
44 namespace MatrixFreeFunctions
46 #ifdef DEAL_II_WITH_TBB
114 tbb::empty_task::spawn(*
dummy);
131 const unsigned int partition_in,
133 const bool is_blocked_in =
false)
144 tbb::empty_task *root =
145 new (tbb::task::allocate_root()) tbb::empty_task;
148 const unsigned int n_blocked_workers =
150 const unsigned int n_workers =
152 std::vector<CellWork *> worker(n_workers);
153 std::vector<CellWork *> blocked_worker(n_blocked_workers);
155 root->set_ref_count(evens + 1);
156 for (
unsigned int j = 0; j < evens; j++)
158 worker[j] =
new (root->allocate_child())
165 worker[j]->set_ref_count(2);
166 blocked_worker[j - 1]->
dummy =
167 new (worker[j]->allocate_child()) tbb::empty_task;
168 tbb::task::spawn(*blocked_worker[j - 1]);
171 worker[j]->set_ref_count(1);
174 blocked_worker[j] =
new (worker[j]->allocate_child())
185 worker[evens] =
new (worker[j]->allocate_child())
191 tbb::task::spawn(*worker[evens]);
195 tbb::empty_task *child =
196 new (worker[j]->allocate_child()) tbb::empty_task();
197 tbb::task::spawn(*child);
202 root->wait_for_all();
203 root->destroy(*root);
205 tbb::empty_task::spawn(*
dummy);
229 const unsigned int partition_in)
238 const unsigned int start_index =
241 const unsigned int end_index =
244 worker.
cell(std::make_pair(start_index, end_index));
264 const unsigned int partition_in,
266 const bool is_blocked_in)
277 const unsigned int n_chunks =
282 parallel_for(tbb::blocked_range<unsigned int>(0, n_chunks, 1),
285 tbb::empty_task::spawn(*
dummy);
344 #ifdef DEAL_II_WITH_TBB
351 tbb::empty_task *root =
352 new (tbb::task::allocate_root()) tbb::empty_task;
353 root->set_ref_count(
evens + 1);
354 std::vector<partition::PartitionWork *> worker(
n_workers);
355 std::vector<partition::PartitionWork *> blocked_worker(
359 worker_compr->set_ref_count(1);
360 for (
unsigned int j = 0; j <
evens; j++)
364 worker[j] =
new (root->allocate_child())
366 worker[j]->set_ref_count(2);
367 blocked_worker[j - 1]->
dummy =
368 new (worker[j]->allocate_child()) tbb::empty_task;
369 tbb::task::spawn(*blocked_worker[j - 1]);
373 worker[j] =
new (worker_compr->allocate_child())
375 worker[j]->set_ref_count(2);
377 new (worker[j]->allocate_child())
379 tbb::task::spawn(*worker_dist);
383 blocked_worker[j] =
new (worker[j]->allocate_child())
390 worker[
evens] =
new (worker[j]->allocate_child())
395 tbb::task::spawn(*worker[
evens]);
399 tbb::empty_task *child =
400 new (worker[j]->allocate_child()) tbb::empty_task();
401 tbb::task::spawn(*child);
406 root->wait_for_all();
407 root->destroy(*root);
423 tbb::empty_task *root =
424 new (tbb::task::allocate_root()) tbb::empty_task;
425 root->set_ref_count(
evens + 1);
430 std::vector<color::PartitionWork *> worker(
n_workers);
431 std::vector<color::PartitionWork *> blocked_worker(
433 unsigned int worker_index = 0, slice_index = 0;
434 int spawn_index_child = -2;
437 worker_compr->set_ref_count(1);
438 for (
unsigned int part = 0;
443 worker[worker_index] =
444 new (worker_compr->allocate_child())
450 worker[worker_index] =
new (root->allocate_child())
459 worker[worker_index]->set_ref_count(1);
461 worker[worker_index] =
462 new (worker[worker_index - 1]->allocate_child())
468 worker[worker_index]->set_ref_count(2);
471 blocked_worker[(part - 1) / 2]->dummy =
472 new (worker[worker_index]->allocate_child())
475 if (spawn_index_child == -1)
476 tbb::task::spawn(*blocked_worker[(part - 1) / 2]);
479 Assert(spawn_index_child >= 0,
481 tbb::task::spawn(*worker[spawn_index_child]);
483 spawn_index_child = -2;
488 new (worker[worker_index]->allocate_child())
490 tbb::task::spawn(*worker_dist);
498 blocked_worker[part / 2] =
499 new (worker[worker_index - 1]->allocate_child())
507 blocked_worker[part / 2]->set_ref_count(1);
508 worker[worker_index] =
new (
509 blocked_worker[part / 2]->allocate_child())
518 spawn_index_child = -1;
527 worker[worker_index]->set_ref_count(1);
530 worker[worker_index] =
531 new (worker[worker_index - 1]->allocate_child())
537 spawn_index_child = worker_index;
542 tbb::empty_task *
final =
543 new (worker[worker_index - 1]->allocate_child())
545 tbb::task::spawn(*
final);
546 spawn_index_child = worker_index - 1;
552 tbb::task::spawn(*worker[spawn_index_child]);
554 root->wait_for_all();
555 root->destroy(*root);
568 tbb::empty_task *root =
569 new (tbb::task::allocate_root()) tbb::empty_task;
570 root->set_ref_count(2);
572 new (root->allocate_child())
574 tbb::empty_task::spawn(*worker);
575 root->wait_for_all();
576 root->destroy(*root);
668 template <
typename StreamType>
671 const std::size_t data_length)
const
678 out << memory_c.
min <<
"/" << memory_c.
avg <<
"/" << memory_c.
max;
679 out <<
" MB" << std::endl;
703 std::vector<unsigned int> &boundary_cells)
707 unsigned int fillup_needed =
715 std::vector<unsigned int> new_boundary_cells;
716 new_boundary_cells.reserve(boundary_cells.size());
718 unsigned int next_free_slot = 0, bound_index = 0;
719 while (fillup_needed > 0 && bound_index < boundary_cells.size())
721 if (next_free_slot < boundary_cells[bound_index])
725 if (next_free_slot + fillup_needed <=
726 boundary_cells[bound_index])
728 for (
unsigned int j =
729 boundary_cells[bound_index] - fillup_needed;
730 j < boundary_cells[bound_index];
732 new_boundary_cells.push_back(j);
739 for (
unsigned int j = next_free_slot;
740 j < boundary_cells[bound_index];
742 new_boundary_cells.push_back(j);
744 boundary_cells[bound_index] - next_free_slot;
747 new_boundary_cells.push_back(boundary_cells[bound_index]);
748 next_free_slot = boundary_cells[bound_index] + 1;
751 while (fillup_needed > 0 &&
752 (new_boundary_cells.size() == 0 ||
754 new_boundary_cells.push_back(new_boundary_cells.back() + 1);
755 while (bound_index < boundary_cells.size())
756 new_boundary_cells.push_back(boundary_cells[bound_index++]);
758 boundary_cells.swap(new_boundary_cells);
762 std::sort(boundary_cells.begin(), boundary_cells.end());
775 const std::vector<unsigned int> &cells_with_comm,
776 const unsigned int dofs_per_cell,
777 const bool categories_are_hp,
778 const std::vector<unsigned int> &cell_vectorization_categories,
779 const bool cell_vectorization_categories_strict,
780 const std::vector<unsigned int> &parent_relation,
781 std::vector<unsigned int> & renumbering,
782 std::vector<unsigned char> & incompletely_filled_vectorization)
808 unsigned int vectorization_length_bits = 0;
810 while (my_length >>= 1)
811 ++vectorization_length_bits;
812 const unsigned int n_lanes = 1 << vectorization_length_bits;
817 unsigned int n_categories = 1;
819 if (cell_vectorization_categories.empty() ==
false)
824 std::set<unsigned int> used_categories;
826 used_categories.insert(cell_vectorization_categories[i]);
827 std::vector<unsigned int> used_categories_vector(
828 used_categories.size());
830 for (
const auto &it : used_categories)
831 used_categories_vector[n_categories++] = it;
834 const unsigned int index =
836 used_categories_vector.end(),
837 cell_vectorization_categories[i]) -
838 used_categories_vector.begin();
840 tight_category_map[i] = index;
847 std::vector<std::vector<unsigned int>> renumbering_category(n_categories);
849 renumbering_category[tight_category_map[i]].push_back(i);
851 if (cell_vectorization_categories_strict ==
false && n_categories > 1)
852 for (
unsigned int j = n_categories - 1; j > 0; --j)
854 unsigned int lower_index = j - 1;
855 while (renumbering_category[j].size() % n_lanes)
857 while (renumbering_category[j].size() % n_lanes &&
858 !renumbering_category[lower_index].empty())
860 renumbering_category[j].push_back(
861 renumbering_category[lower_index].back());
862 renumbering_category[lower_index].pop_back();
864 if (lower_index == 0)
875 std::vector<unsigned int> temporary_numbering;
877 (n_lanes - 1) * n_categories);
878 const unsigned int n_cells_per_parent =
879 std::count(parent_relation.begin(), parent_relation.end(), 0);
880 std::vector<unsigned int> category_size;
881 for (
unsigned int j = 0; j < n_categories; ++j)
883 std::vector<std::pair<unsigned int, unsigned int>> grouped_cells;
884 std::vector<unsigned int> other_cells;
885 for (
const unsigned int cell : renumbering_category[j])
886 if (parent_relation.empty() ||
888 other_cells.push_back(cell);
890 grouped_cells.emplace_back(parent_relation[cell], cell);
893 std::sort(grouped_cells.begin(), grouped_cells.end());
894 std::vector<unsigned int> n_cells_per_group;
895 unsigned int length = 0;
896 for (
unsigned int i = 0; i < grouped_cells.size(); ++i, ++length)
897 if (i > 0 && grouped_cells[i].
first != grouped_cells[i - 1].
first)
899 n_cells_per_group.push_back(length);
903 n_cells_per_group.push_back(length);
908 auto group_it = grouped_cells.begin();
909 for (
unsigned int length : n_cells_per_group)
910 if (length < n_cells_per_parent)
911 for (
unsigned int j = 0; j < length; ++j)
912 other_cells.push_back((group_it++)->second);
918 for (
unsigned int j = 0; j < length; ++j)
919 temporary_numbering.push_back((group_it++)->second);
923 std::sort(other_cells.begin(), other_cells.end());
924 temporary_numbering.insert(temporary_numbering.end(),
928 while (temporary_numbering.size() % n_lanes != 0)
931 category_size.push_back(temporary_numbering.size());
935 std::vector<bool> batch_with_comm(temporary_numbering.size() / n_lanes,
937 std::vector<unsigned int> temporary_numbering_inverse(
n_active_cells);
938 for (
unsigned int i = 0; i < temporary_numbering.size(); ++i)
940 temporary_numbering_inverse[temporary_numbering[i]] = i;
941 for (
const unsigned int cell : cells_with_comm)
942 batch_with_comm[temporary_numbering_inverse[cell] / n_lanes] =
true;
948 std::vector<std::array<unsigned int, 3>> batch_order;
949 std::vector<std::array<unsigned int, 3>> batch_order_comm;
950 for (
unsigned int i = 0; i < temporary_numbering.size(); i += n_lanes)
952 unsigned int max_index = 0;
953 for (
unsigned int j = 0; j < n_lanes; ++j)
955 max_index =
std::max(temporary_numbering[i + j], max_index);
956 const unsigned int category_hp =
958 std::upper_bound(category_size.begin(), category_size.end(), i) -
959 category_size.begin() :
961 const std::array<unsigned int, 3> next{{category_hp, max_index, i}};
962 if (batch_with_comm[i / n_lanes])
963 batch_order_comm.emplace_back(next);
965 batch_order.emplace_back(next);
968 std::sort(batch_order.begin(), batch_order.end());
969 std::sort(batch_order_comm.begin(), batch_order_comm.end());
976 std::vector<unsigned int> blocks;
979 if (batch_order.empty())
980 std::swap(batch_order_comm, batch_order);
983 blocks = {0,
static_cast<unsigned int>(batch_order.size())};
988 const unsigned int comm_begin = batch_order.size() / 2;
989 batch_order.insert(batch_order.begin() + comm_begin,
990 batch_order_comm.begin(),
991 batch_order_comm.end());
992 const unsigned int comm_end = comm_begin + batch_order_comm.size();
993 const unsigned int end = batch_order.size();
994 blocks = {0, comm_begin, comm_end,
end};
998 const unsigned int n_cell_batches = batch_order.size();
999 const unsigned int n_ghost_batches =
1001 incompletely_filled_vectorization.resize(n_cell_batches +
1007 renumbering.clear();
1011 unsigned int counter = 0;
1012 for (
unsigned int block = 0; block < blocks.size() - 1; ++block)
1014 const unsigned int grain_size =
1015 std::max((2048U / dofs_per_cell) / 8 * 4, 2U);
1016 for (
unsigned int k = blocks[block]; k < blocks[block + 1];
1019 std::min(k + grain_size, blocks[block + 1]));
1023 for (
unsigned int k = blocks[block]; k < blocks[block + 1]; ++k)
1025 const unsigned int pos = batch_order[k][2];
1027 for (; j < n_lanes && temporary_numbering[pos + j] !=
1030 renumbering[counter++] = temporary_numbering[pos + j];
1032 incompletely_filled_vectorization[k] = j;
1042 if (!cell_vectorization_categories.empty())
1045 renumbering[cell] = cell;
1048 incompletely_filled_vectorization.back() =
n_ghost_cells % n_lanes;
1053 std::vector<unsigned int> renumber_cpy(renumbering);
1054 std::sort(renumber_cpy.begin(), renumber_cpy.end());
1055 for (
unsigned int i = 0; i < renumber_cpy.size(); ++i)
1064 const std::vector<unsigned int> &boundary_cells,
1065 std::vector<unsigned int> & renumbering,
1066 std::vector<unsigned char> & incompletely_filled_vectorization)
1068 const unsigned int n_cell_batches =
1070 const unsigned int n_ghost_slots =
1072 incompletely_filled_vectorization.resize(n_cell_batches + n_ghost_slots);
1074 incompletely_filled_vectorization[n_cell_batches - 1] =
1078 incompletely_filled_vectorization[n_cell_batches + n_ghost_slots - 1] =
1082 std::vector<unsigned int> reverse_numbering(
1084 for (
unsigned int j = 0; j < boundary_cells.size(); ++j)
1085 reverse_numbering[boundary_cells[j]] = j;
1086 unsigned int counter = boundary_cells.size();
1089 reverse_numbering[j] = counter++;
1096 renumbering.push_back(j);
1104 const unsigned int n_macro_boundary_cells =
1108 (n_cell_batches - n_macro_boundary_cells) / 2);
1110 n_macro_boundary_cells);
1148 1 <<
static_cast<unsigned int>(std::log2(
block_size + 1));
1159 std::vector<unsigned int> & renumbering,
1160 std::vector<unsigned char> &irregular_cells,
1164 if (n_cell_batches == 0)
1169 unsigned int partition = 0, counter = 0;
1183 std::vector<unsigned int> cell_partition(
n_blocks,
1188 std::vector<unsigned int> partition_list(
n_blocks, 0);
1189 std::vector<unsigned int> partition_color_list(
n_blocks, 0);
1192 std::vector<unsigned int> partition_size(2, 0);
1198 unsigned int cluster_size = 1;
1214 partition_color_list);
1216 partition_list = renumbering;
1221 std::vector<unsigned int> sorted_pc_list(partition_color_list);
1222 std::sort(sorted_pc_list.begin(), sorted_pc_list.end());
1223 for (
unsigned int i = 0; i < sorted_pc_list.size(); ++i)
1230 std::vector<unsigned int> block_start(n_cell_batches + 1);
1231 std::vector<unsigned char> irregular(n_cell_batches);
1233 unsigned int mcell_start = 0;
1235 for (
unsigned int block = 0; block <
n_blocks; block++)
1237 block_start[block + 1] = block_start[block];
1238 for (
unsigned int mcell = mcell_start;
1242 unsigned int n_comp = (irregular_cells[mcell] > 0) ?
1243 irregular_cells[mcell] :
1245 block_start[block + 1] += n_comp;
1251 unsigned int counter_macro = 0;
1252 unsigned int block_size_last =
1254 if (block_size_last == 0)
1257 unsigned int tick = 0;
1258 for (
unsigned int block = 0; block <
n_blocks; block++)
1260 unsigned int present_block = partition_color_list[block];
1261 for (
unsigned int cell = block_start[present_block];
1262 cell < block_start[present_block + 1];
1264 renumbering[counter++] = partition_list[cell];
1265 unsigned int this_block_size =
1273 for (
unsigned int j = 0; j < this_block_size; j++)
1274 irregular[counter_macro++] =
1275 irregular_cells[present_block *
block_size + j];
1280 irregular_cells.swap(irregular);
1287 std::vector<unsigned int> sorted_renumbering(renumbering);
1288 std::sort(sorted_renumbering.begin(), sorted_renumbering.end());
1289 for (
unsigned int i = 0; i < sorted_renumbering.size(); ++i)
1305 const std::vector<unsigned int> &cell_active_fe_index,
1307 std::vector<unsigned int> & renumbering,
1308 std::vector<unsigned char> & irregular_cells,
1312 if (n_cell_batches == 0)
1322 connectivity_blocks);
1334 std::vector<unsigned int> cell_partition(
n_blocks,
1340 std::vector<unsigned int> partition_list(
n_blocks, 0);
1341 std::vector<unsigned int> partition_2layers_list(
n_blocks, 0);
1344 std::vector<unsigned int> partition_size(2, 0);
1352 unsigned int cluster_size = 1;
1379 cell_active_fe_index,
1386 partition_2layers_list,
1396 partition_2layers_list);
1402 std::vector<unsigned int> sorted_pc_list(partition_2layers_list);
1403 std::sort(sorted_pc_list.begin(), sorted_pc_list.end());
1404 for (
unsigned int i = 0; i < sorted_pc_list.size(); ++i)
1411 renumbering_in.swap(renumbering);
1417 for (
unsigned int j = 0; j < renumbering.size(); j++)
1418 renumbering[j] = renumbering_in[partition_2layers_list[j]];
1427 std::vector<unsigned int> block_start(n_cell_batches + 1);
1428 std::vector<unsigned char> irregular(n_cell_batches);
1430 unsigned int counter = 0;
1431 unsigned int mcell_start = 0;
1433 for (
unsigned int block = 0; block <
n_blocks; block++)
1435 block_start[block + 1] = block_start[block];
1436 for (
unsigned int mcell = mcell_start;
1440 unsigned int n_comp = (irregular_cells[mcell] > 0) ?
1441 irregular_cells[mcell] :
1443 block_start[block + 1] += n_comp;
1449 unsigned int counter_macro = 0;
1450 unsigned int block_size_last =
1452 if (block_size_last == 0)
1455 unsigned int tick = 0;
1456 for (
unsigned int block = 0; block <
n_blocks; block++)
1458 unsigned int present_block = partition_2layers_list[block];
1459 for (
unsigned int cell = block_start[present_block];
1460 cell < block_start[present_block + 1];
1462 renumbering[counter++] = renumbering_in[cell];
1463 unsigned int this_block_size =
1471 for (
unsigned int j = 0; j < this_block_size; j++)
1472 irregular[counter_macro++] =
1473 irregular_cells[present_block *
block_size + j];
1478 irregular_cells.swap(irregular);
1484 std::vector<unsigned int> sorted_renumbering(renumbering);
1485 std::sort(sorted_renumbering.begin(), sorted_renumbering.end());
1486 for (
unsigned int i = 0; i < sorted_renumbering.size(); ++i)
1500 const std::vector<unsigned int> &cell_active_fe_index,
1502 std::vector<unsigned int> & renumbering,
1503 std::vector<unsigned char> & irregular_cells,
1507 if (n_cell_batches == 0)
1524 std::vector<unsigned int> partition_partition_list(
n_active_cells, 0);
1527 std::vector<unsigned int> partition_size(2, 0);
1543 cell_active_fe_index,
1550 partition_partition_list,
1553 partition_list.swap(renumbering);
1555 for (
unsigned int j = 0; j < renumbering.size(); j++)
1556 renumbering[j] = partition_list[partition_partition_list[j]];
1568 const std::vector<unsigned char> &irregular_cells,
1572 std::vector<std::vector<unsigned int>> cell_blocks(
n_blocks);
1574 unsigned int cell = 0;
1575 for (
unsigned int i = 0, mcell = 0; i <
n_blocks; ++i)
1577 for (
unsigned int c = 0;
1581 unsigned int ncomp = (irregular_cells[mcell] > 0) ?
1582 irregular_cells[mcell] :
1584 for (
unsigned int c = 0; c < ncomp; ++c, ++cell)
1586 cell_blocks[i].push_back(cell);
1587 touched_cells[cell] = i;
1592 for (
unsigned int i = 0; i < cell_blocks.size(); ++i)
1593 for (
unsigned int col = 0; col < cell_blocks[i].size(); ++col)
1596 connectivity_cells.
begin(cell_blocks[i][col]);
1597 it != connectivity_cells.
end(cell_blocks[i][col]);
1600 if (touched_cells[it->column()] != i)
1601 connectivity_blocks.
add(i, touched_cells[it->column()]);
1613 const std::vector<unsigned int> &cell_active_fe_index,
1615 const unsigned int cluster_size,
1617 const std::vector<unsigned int> &cell_partition,
1618 const std::vector<unsigned int> &partition_list,
1619 const std::vector<unsigned int> &partition_size,
1620 std::vector<unsigned int> & partition_partition_list,
1621 std::vector<unsigned char> & irregular_cells)
1624 const unsigned int n_ghost_slots =
1628 std::vector<unsigned int> neighbor_list;
1631 std::vector<unsigned int> neighbor_neighbor_list;
1635 irregular_cells.back() = 0;
1638 unsigned int max_fe_index = 0;
1639 for (
const unsigned int fe_index : cell_active_fe_index)
1640 max_fe_index =
std::max(fe_index, max_fe_index);
1646 unsigned int n_cell_batches_before = 0;
1652 std::vector<unsigned int> cell_partition_l2(
1658 unsigned int counter = 0;
1659 unsigned int missing_macros;
1660 for (
unsigned int part = 0; part <
partition; ++part)
1662 neighbor_neighbor_list.resize(0);
1663 neighbor_list.resize(0);
1665 unsigned int partition_l2 = 0;
1666 unsigned int start_up = partition_size[part];
1667 unsigned int partition_counter = 0;
1670 if (neighbor_list.size() == 0)
1673 partition_counter = 0;
1674 for (
unsigned int j = start_up;
1675 j < partition_size[part + 1];
1677 if (cell_partition[partition_list[j]] == part &&
1678 cell_partition_l2[partition_list[j]] ==
1683 partition_counter = 1;
1687 cell_partition_l2[partition_list[start_up]] =
1689 neighbor_neighbor_list.push_back(
1690 partition_list[start_up]);
1691 partition_partition_list[counter++] =
1692 partition_list[start_up];
1699 partition_counter = 0;
1700 for (
const unsigned int neighbor : neighbor_list)
1702 Assert(cell_partition[neighbor] == part,
1704 Assert(cell_partition_l2[neighbor] == partition_l2 - 1,
1706 auto neighbor_it = connectivity.
begin(neighbor);
1707 const auto end_it = connectivity.
end(neighbor);
1708 for (; neighbor_it != end_it; ++neighbor_it)
1710 if (cell_partition[neighbor_it->column()] == part &&
1711 cell_partition_l2[neighbor_it->column()] ==
1714 cell_partition_l2[neighbor_it->column()] =
1716 neighbor_neighbor_list.push_back(
1717 neighbor_it->column());
1718 partition_partition_list[counter++] =
1720 partition_counter++;
1725 if (partition_counter > 0)
1727 int index_before = neighbor_neighbor_list.size(),
1728 index = index_before;
1733 std::vector<unsigned int> remaining_per_cell_batch(
1735 std::vector<std::vector<unsigned int>>
1736 renumbering_fe_index;
1739 if (hp_bool ==
true)
1741 renumbering_fe_index.resize(max_fe_index + 1);
1742 for (cell = counter - partition_counter;
1746 renumbering_fe_index
1747 [cell_active_fe_index.empty() ?
1749 cell_active_fe_index
1750 [partition_partition_list[cell]]]
1751 .push_back(partition_partition_list[cell]);
1754 for (
unsigned int j = 0; j < max_fe_index + 1; j++)
1756 remaining_per_cell_batch[j] =
1757 renumbering_fe_index[j].size() %
1759 if (remaining_per_cell_batch[j] != 0)
1762 ((renumbering_fe_index[j].size() +
1769 remaining_per_cell_batch.resize(1);
1770 remaining_per_cell_batch[0] =
1774 if (remaining_per_cell_batch[0] != 0)
1781 cluster_size - (missing_macros % cluster_size);
1784 while (missing_macros > 0 || filled ==
false)
1788 index = neighbor_neighbor_list.size();
1789 if (index == index_before)
1791 if (missing_macros != 0)
1793 neighbor_neighbor_list.resize(0);
1798 index_before = index;
1801 unsigned int additional =
1802 neighbor_neighbor_list[index];
1813 for (; neighbor !=
end; ++neighbor)
1815 if (cell_partition[neighbor->
column()] == part &&
1816 cell_partition_l2[neighbor->
column()] ==
1819 unsigned int this_index = 0;
1820 if (hp_bool ==
true)
1822 cell_active_fe_index.empty() ?
1824 cell_active_fe_index[neighbor
1831 if (missing_macros > 0 ||
1832 remaining_per_cell_batch[this_index] > 0)
1834 cell_partition_l2[neighbor->
column()] =
1836 neighbor_neighbor_list.push_back(
1838 if (hp_bool ==
true)
1839 renumbering_fe_index[this_index]
1840 .push_back(neighbor->
column());
1841 partition_partition_list[counter] =
1844 partition_counter++;
1845 if (remaining_per_cell_batch
1846 [this_index] == 0 &&
1849 remaining_per_cell_batch[this_index]++;
1850 if (remaining_per_cell_batch
1854 remaining_per_cell_batch[this_index] =
1857 if (missing_macros == 0)
1860 for (
unsigned int fe_ind = 0;
1861 fe_ind < max_fe_index + 1;
1863 if (remaining_per_cell_batch
1873 if (hp_bool ==
true)
1878 cell = counter - partition_counter;
1879 for (
unsigned int j = 0; j < max_fe_index + 1; j++)
1881 for (
const unsigned int jj :
1882 renumbering_fe_index[j])
1883 renumbering[cell++] = jj;
1884 if (renumbering_fe_index[j].size() %
1887 irregular_cells[renumbering_fe_index[j].size() /
1889 n_cell_batches_before] =
1890 renumbering_fe_index[j].size() %
1892 n_cell_batches_before +=
1893 (renumbering_fe_index[j].size() +
1896 renumbering_fe_index[j].resize(0);
1901 n_cell_batches_before +=
1905 irregular_cells[n_cell_batches_before] =
1907 n_cell_batches_before++;
1914 neighbor_list = neighbor_neighbor_list;
1915 neighbor_neighbor_list.resize(0);
1921 if (hp_bool ==
true)
1923 partition_partition_list.swap(renumbering);
1935 const std::vector<unsigned int> &cell_partition,
1936 const std::vector<unsigned int> &partition_list,
1937 const std::vector<unsigned int> &partition_size,
1938 std::vector<unsigned int> & partition_color_list)
1941 std::vector<unsigned int> cell_color(
n_blocks, n_cell_batches);
1942 std::vector<bool> color_finder;
1946 unsigned int color_counter = 0, index_counter = 0;
1947 for (
unsigned int part = 0; part <
partition; part++)
1950 unsigned int max_color = 0;
1951 for (
unsigned int k = partition_size[part];
1952 k < partition_size[part + 1];
1955 unsigned int cell = partition_list[k];
1956 unsigned int n_neighbors = connectivity.
row_length(cell);
1960 color_finder.resize(n_neighbors + 1);
1961 for (
unsigned int j = 0; j <= n_neighbors; ++j)
1962 color_finder[j] =
true;
1964 connectivity.
begin(cell),
1965 end = connectivity.
end(cell);
1966 for (; neighbor !=
end; ++neighbor)
1970 if (cell_partition[neighbor->
column()] == part &&
1971 cell_color[neighbor->
column()] <= n_neighbors)
1972 color_finder[cell_color[neighbor->
column()]] =
false;
1975 cell_color[cell] = 0;
1976 while (color_finder[cell_color[cell]] ==
false)
1978 if (cell_color[cell] > max_color)
1979 max_color = cell_color[cell];
1988 for (
unsigned int k = partition_size[part];
1989 k < partition_size[part + 1];
1992 unsigned int cell = partition_list[k];
1993 if (cell_color[cell] ==
color)
1995 partition_color_list[color_counter++] = cell;
2009 const unsigned int cluster_size,
2010 std::vector<unsigned int> & cell_partition,
2011 std::vector<unsigned int> & partition_list,
2012 std::vector<unsigned int> & partition_size,
2022 std::vector<unsigned int> neighbor_list;
2025 std::vector<unsigned int> neighbor_neighbor_list;
2035 unsigned int counter = 0;
2036 unsigned int start_nonboundary =
2043 if (n_cell_batches == 0)
2046 start_nonboundary = n_cell_batches;
2061 unsigned int start_up = 0;
2063 unsigned int remainder = cluster_size;
2071 if (start_nonboundary > 0)
2073 for (
unsigned int cell = 0; cell < start_nonboundary; ++cell)
2075 const unsigned int cell_nn = cell;
2077 neighbor_list.push_back(cell_nn);
2078 partition_list[counter++] = cell_nn;
2079 partition_size.back()++;
2081 start_nonboundary = 0;
2082 remainder -= (start_nonboundary % cluster_size);
2083 if (remainder == cluster_size)
2091 neighbor_list.push_back(start_up);
2092 partition_list[counter++] = start_up;
2093 partition_size.back()++;
2096 if (remainder == cluster_size)
2099 int index_before = neighbor_list.size(), index = index_before,
2101 while (remainder > 0)
2103 if (index == index_stop)
2105 index = neighbor_list.size();
2106 if (index == index_before)
2108 neighbor_list.resize(0);
2111 index_stop = index_before;
2112 index_before = index;
2115 unsigned int additional = neighbor_list[index];
2117 connectivity.
begin(additional),
2119 connectivity.
end(additional);
2120 for (; neighbor !=
end; ++neighbor)
2122 if (cell_partition[neighbor->
column()] ==
2125 partition_size.back()++;
2127 neighbor_list.push_back(neighbor->
column());
2128 partition_list[counter++] = neighbor->
column();
2136 while (neighbor_list.size() > 0)
2141 unsigned int partition_counter = 0;
2144 partition_size.push_back(partition_size.back());
2148 for (
const unsigned int cell : neighbor_list)
2152 auto neighbor = connectivity.
begin(cell);
2153 const auto end = connectivity.
end(cell);
2154 for (; neighbor !=
end; ++neighbor)
2156 if (cell_partition[neighbor->column()] ==
2159 partition_size.back()++;
2160 cell_partition[neighbor->column()] =
partition;
2164 neighbor_neighbor_list.push_back(neighbor->column());
2165 partition_list[counter++] = neighbor->column();
2166 partition_counter++;
2170 remainder = cluster_size - (partition_counter % cluster_size);
2171 if (remainder == cluster_size)
2174 int index_before = neighbor_neighbor_list.size(),
2175 index = index_before;
2176 while (remainder > 0)
2178 if (index == index_stop)
2180 index = neighbor_neighbor_list.size();
2181 if (index == index_before)
2183 neighbor_neighbor_list.resize(0);
2186 index_stop = index_before;
2187 index_before = index;
2190 unsigned int additional = neighbor_neighbor_list[index];
2196 for (; neighbor !=
end; ++neighbor)
2198 if (cell_partition[neighbor->
column()] ==
2201 partition_size.back()++;
2203 neighbor_neighbor_list.push_back(neighbor->
column());
2204 partition_list[counter++] = neighbor->
column();
2212 neighbor_list = neighbor_neighbor_list;
2213 neighbor_neighbor_list.resize(0);
2219 for (
unsigned int j = start_up; j <
n_blocks; ++j)
2225 remainder = cluster_size;
2248 for (
unsigned int part = 0; part <
partition; part++)
2269 internal::MatrixFreeFunctions::TaskInfo::print_memory_statistics<std::ostream>(
2271 const std::size_t)
const;
static unsigned int n_threads()
tbb::task * execute() override
MPICommunication(MFWorkerInterface &worker_in, const bool do_compress)
MFWorkerInterface & worker
CellWork(MFWorkerInterface &worker_in, const TaskInfo &task_info_in, const unsigned int partition_in)
const unsigned int partition
MFWorkerInterface & worker
void operator()(const tbb::blocked_range< unsigned int > &r) const
const TaskInfo & task_info
MFWorkerInterface & worker
PartitionWork(MFWorkerInterface &worker_in, const unsigned int partition_in, const TaskInfo &task_info_in, const bool is_blocked_in)
tbb::task * execute() override
const unsigned int partition
const TaskInfo & task_info
const TaskInfo & task_info
MFWorkerInterface ** worker_pointer
const unsigned int partition
MFWorkerInterface * worker
ActualCellWork(MFWorkerInterface **worker_pointer, const unsigned int partition, const TaskInfo &task_info)
ActualCellWork(MFWorkerInterface &worker, const unsigned int partition, const TaskInfo &task_info)
tbb::task * execute() override
CellWork(MFWorkerInterface &worker, const unsigned int partition, const TaskInfo &task_info, const bool is_blocked)
const TaskInfo & task_info
PartitionWork(MFWorkerInterface &function_in, const unsigned int partition_in, const TaskInfo &task_info_in, const bool is_blocked_in=false)
MFWorkerInterface & function
const unsigned int partition
tbb::task * execute() override
#define DEAL_II_NAMESPACE_OPEN
#define DEAL_II_NAMESPACE_CLOSE
static ::ExceptionBase & ExcInternalError()
#define Assert(cond, exc)
static ::ExceptionBase & ExcNotImplemented()
#define AssertDimension(dim1, dim2)
#define AssertIndexRange(index, range)
#define AssertThrow(cond, exc)
size_type row_length(const size_type row) const
void add(const size_type i, const size_type j)
std::enable_if< std::is_fundamental< T >::value, std::size_t >::type memory_consumption(const T &t)
void swap(MemorySpaceData< Number, MemorySpace > &, MemorySpaceData< Number, MemorySpace > &)
SymmetricTensor< 2, dim, Number > e(const Tensor< 2, dim, Number > &F)
VectorType::value_type * end(VectorType &V)
MinMaxAvg min_max_avg(const double my_value, const MPI_Comm &mpi_communicator)
Iterator lower_bound(Iterator first, Iterator last, const T &val)
std::vector< Integer > invert_permutation(const std::vector< Integer > &permutation)
unsigned int minimum_parallel_grain_size
static const unsigned int invalid_unsigned_int
void parallel_for(Iterator x_begin, Iterator x_end, const Functor &functor, const unsigned int grainsize)
virtual void face(const unsigned int range_index)=0
virtual void zero_dst_vector_range(const unsigned int range_index)=0
virtual void cell(const std::pair< unsigned int, unsigned int > &cell_range)=0
virtual void boundary(const unsigned int range_index)=0
virtual void vector_compress_start()=0
Starts the communication for the vector compress operation.
virtual void cell_loop_post_range(const unsigned int range_index)=0
virtual void vector_update_ghosts_start()=0
Starts the communication for the update ghost values operation.
virtual void cell_loop_pre_range(const unsigned int range_index)=0
virtual void vector_update_ghosts_finish()=0
Finishes the communication for the update ghost values operation.
virtual void vector_compress_finish()=0
Finishes the communication for the vector compress operation.
unsigned int n_ghost_cells
std::size_t memory_consumption() const
std::vector< unsigned int > boundary_partition_data
void loop(MFWorkerInterface &worker) const
void make_thread_graph_partition_color(DynamicSparsityPattern &connectivity, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &irregular_cells, const bool hp_bool)
std::vector< unsigned int > partition_n_workers
unsigned int vectorization_length
void create_blocks_serial(const std::vector< unsigned int > &cells_with_comm, const unsigned int dofs_per_cell, const bool categories_are_hp, const std::vector< unsigned int > &cell_vectorization_categories, const bool cell_vectorization_categories_strict, const std::vector< unsigned int > &parent_relation, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &incompletely_filled_vectorization)
void make_connectivity_cells_to_blocks(const std::vector< unsigned char > &irregular_cells, const DynamicSparsityPattern &connectivity_cells, DynamicSparsityPattern &connectivity_blocks) const
void make_partitioning_within_partitions_post_blocked(const DynamicSparsityPattern &connectivity, const std::vector< unsigned int > &cell_active_fe_index, const unsigned int partition, const unsigned int cluster_size, const bool hp_bool, const std::vector< unsigned int > &cell_partition, const std::vector< unsigned int > &partition_list, const std::vector< unsigned int > &partition_size, std::vector< unsigned int > &partition_partition_list, std::vector< unsigned char > &irregular_cells)
unsigned int n_active_cells
void initial_setup_blocks_tasks(const std::vector< unsigned int > &boundary_cells, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &incompletely_filled_vectorization)
void print_memory_statistics(StreamType &out, std::size_t data_length) const
void make_coloring_within_partitions_pre_blocked(const DynamicSparsityPattern &connectivity, const unsigned int partition, const std::vector< unsigned int > &cell_partition, const std::vector< unsigned int > &partition_list, const std::vector< unsigned int > &partition_size, std::vector< unsigned int > &partition_color_list)
std::vector< unsigned int > partition_row_index
void make_thread_graph_partition_partition(const std::vector< unsigned int > &cell_active_fe_index, DynamicSparsityPattern &connectivity, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &irregular_cells, const bool hp_bool)
std::vector< unsigned int > partition_evens
void make_thread_graph(const std::vector< unsigned int > &cell_active_fe_index, DynamicSparsityPattern &connectivity, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &irregular_cells, const bool hp_bool)
unsigned int n_blocked_workers
std::vector< unsigned int > partition_n_blocked_workers
std::vector< unsigned int > cell_partition_data
void make_partitioning(const DynamicSparsityPattern &connectivity, const unsigned int cluster_size, std::vector< unsigned int > &cell_partition, std::vector< unsigned int > &partition_list, std::vector< unsigned int > &partition_size, unsigned int &partition) const
void update_task_info(const unsigned int partition)
void make_boundary_cells_divisible(std::vector< unsigned int > &boundary_cells)
TasksParallelScheme scheme
void guess_block_size(const unsigned int dofs_per_cell)
std::vector< unsigned int > partition_odds
std::vector< unsigned int > face_partition_data