1 #ifndef _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_ 2 #define _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_ 8 #include "Teuchos_ArrayViewDecl.hpp" 11 #include "Teuchos_ReductionOp.hpp" 14 #include "Teuchos_ConfigDefs.hpp" 15 #include "Teuchos_Comm.hpp" 17 # include "Teuchos_DefaultMpiComm.hpp" 19 # include "Teuchos_DefaultSerialComm.hpp" 28 template <
typename Ordinal,
typename T>
41 void reduce(
const Ordinal count,
const T inBuffer[], T inoutBuffer[])
const 44 for (Ordinal i=0; i < count; i++){
45 if (inBuffer[0] - inoutBuffer[0] < -_EPSILON){
46 inoutBuffer[0] = inBuffer[0];
47 inoutBuffer[1] = inBuffer[1];
48 }
else if(inBuffer[0] - inoutBuffer[0] < _EPSILON &&
49 inBuffer[1] - inoutBuffer[1] < _EPSILON){
50 inoutBuffer[0] = inBuffer[0];
51 inoutBuffer[1] = inBuffer[1];
61 template <
typename it>
63 return (x == 1 ? x : x * z2Fact<it>(x - 1));
66 template <
typename gno_t,
typename part_t>
74 template <
typename IT>
84 fact[k] = fact[k - 1] * k;
87 for (k = 0; k < n; ++k)
89 perm[k] = i / fact[n - 1 - k];
90 i = i % fact[n - 1 - k];
95 for (k = n - 1; k > 0; --k)
96 for (j = k - 1; j >= 0; --j)
97 if (perm[j] <= perm[k])
103 template <
typename part_t>
105 int dim = grid_dims.size();
106 int neighborCount = 2 * dim;
107 task_comm_xadj = allocMemory<part_t>(taskCount+1);
108 task_comm_adj = allocMemory<part_t>(taskCount * neighborCount);
110 part_t neighBorIndex = 0;
111 task_comm_xadj[0] = 0;
112 for (part_t i = 0; i < taskCount; ++i){
113 part_t prevDimMul = 1;
114 for (
int j = 0; j < dim; ++j){
115 part_t lNeighbor = i - prevDimMul;
116 part_t rNeighbor = i + prevDimMul;
117 prevDimMul *= grid_dims[j];
118 if (lNeighbor >= 0 && lNeighbor/ prevDimMul == i / prevDimMul && lNeighbor < taskCount){
119 task_comm_adj[neighBorIndex++] = lNeighbor;
121 if (rNeighbor >= 0 && rNeighbor/ prevDimMul == i / prevDimMul && rNeighbor < taskCount){
122 task_comm_adj[neighBorIndex++] = rNeighbor;
125 task_comm_xadj[i+1] = neighBorIndex;
130 template <
typename Adapter,
typename scalar_t,
typename part_t>
133 const Teuchos::Comm<int> *comm,
138 scalar_t **partCenters){
140 typedef typename Adapter::lno_t lno_t;
141 typedef typename Adapter::gno_t gno_t;
144 ArrayView<const gno_t> gnos;
145 ArrayView<input_t> xyz;
146 ArrayView<input_t> wgts;
156 gno_t *point_counts = allocMemory<gno_t>(ntasks);
157 memset(point_counts, 0,
sizeof(gno_t) * ntasks);
160 gno_t *global_point_counts = allocMemory<gno_t>(ntasks);
163 scalar_t **multiJagged_coordinates = allocMemory<scalar_t *>(coordDim);
165 for (
int dim=0; dim < coordDim; dim++){
166 ArrayRCP<const scalar_t> ar;
167 xyz[dim].getInputArray(ar);
169 multiJagged_coordinates[dim] = (scalar_t *)ar.getRawPtr();
170 memset(partCenters[dim], 0,
sizeof(scalar_t) * ntasks);
184 std::vector< std::vector <GNO_LNO_PAIR<gno_t, part_t> > > hash(numLocalCoords);
187 for (lno_t i=0; i < numLocalCoords; i++){
193 ++point_counts[tmp.
part];
194 lno_t hash_index = tmp.
gno % numLocalCoords;
195 hash[hash_index].push_back(tmp);
200 reduceAll<int, gno_t>(*comm, Teuchos::REDUCE_SUM,
201 ntasks, point_counts, global_point_counts
208 for (lno_t i=0; i < numLocalCoords; i++){
210 lno_t hash_index = g % numLocalCoords;
211 lno_t hash_size = hash[hash_index].size();
213 for (lno_t j =0; j < hash_size; ++j){
214 if (hash[hash_index][j].gno == g){
215 p = hash[hash_index][j].part;
220 std::cerr <<
"ERROR AT HASHING FOR GNO:"<< g <<
" LNO:" << i << std::endl;
223 for(
int j = 0; j < coordDim; ++j){
224 scalar_t c = multiJagged_coordinates[j][i];
225 partCenters[j][p] += c;
230 for(
int j = 0; j < coordDim; ++j){
231 for (part_t i=0; i < ntasks; ++i){
232 partCenters[j][i] /= global_point_counts[i];
236 scalar_t *tmpCoords = allocMemory<scalar_t>(ntasks);
237 for(
int j = 0; j < coordDim; ++j){
238 reduceAll<int, scalar_t>(*comm, Teuchos::REDUCE_SUM,
239 ntasks, partCenters[j], tmpCoords
242 scalar_t *tmp = partCenters[j];
243 partCenters[j] = tmpCoords;
247 freeArray<gno_t> (point_counts);
248 freeArray<gno_t> (global_point_counts);
250 freeArray<scalar_t> (tmpCoords);
251 freeArray<scalar_t *>(multiJagged_coordinates);
257 template <
class IT,
class WT>
267 this->heapSize = heapsize_;
268 this->indices = allocMemory<IT>(heapsize_ );
269 this->values = allocMemory<WT>(heapsize_ );
274 freeArray<IT>(this->indices);
275 freeArray<WT>(this->values);
280 WT maxVal = this->values[0];
283 if (distance >= maxVal)
return;
285 this->values[0] = distance;
286 this->indices[0] = index;
293 IT child_index1 = 2 * index_on_heap + 1;
294 IT child_index2 = 2 * index_on_heap + 2;
297 if(child_index1 < this->heapSize && child_index2 < this->heapSize){
299 if (this->values[child_index1] < this->values[child_index2]){
300 biggerIndex = child_index2;
303 biggerIndex = child_index1;
306 else if(child_index1 < this->heapSize){
307 biggerIndex = child_index1;
310 else if(child_index2 < this->heapSize){
311 biggerIndex = child_index2;
313 if (biggerIndex >= 0 && this->values[biggerIndex] > this->values[index_on_heap]){
314 WT tmpVal = this->values[biggerIndex];
315 this->values[biggerIndex] = this->values[index_on_heap];
316 this->values[index_on_heap] = tmpVal;
318 IT tmpIndex = this->indices[biggerIndex];
319 this->indices[biggerIndex] = this->indices[index_on_heap];
320 this->indices[index_on_heap] = tmpIndex;
321 this->push_down(biggerIndex);
326 WT MAXVAL = std::numeric_limits<WT>::max();
327 for(IT i = 0; i < this->heapSize; ++i){
328 this->values[i] = MAXVAL;
329 this->indices[i] = -1;
337 for(IT j = 0; j < this->heapSize; ++j){
338 nc += this->values[j];
348 for(
int i = 0; i < dimension; ++i){
350 for(IT j = 0; j < this->heapSize; ++j){
351 IT k = this->indices[j];
355 nc /= this->heapSize;
356 moved = (
ZOLTAN2_ABS(center[i] - nc) > this->_EPSILON || moved );
364 for(IT i = 0; i < this->heapSize; ++i){
365 permutation[i] = this->indices[i];
372 template <
class IT,
class WT>
381 freeArray<WT>(center);
385 this->dimension = dimension_;
386 this->center = allocMemory<WT>(dimension_);
395 return this->closestPoints.
getNewCenters(center, coords, dimension);
402 for (
int i = 0; i < this->dimension; ++i){
403 WT d = (center[i] - elementCoords[i][index]);
406 distance = pow(distance, WT(1.0 / this->dimension));
407 closestPoints.
addPoint(index, distance);
423 template <
class IT,
class WT>
430 IT required_elements;
436 freeArray<KMeansCluster <IT,WT> >(clusters);
437 freeArray<WT>(maxCoordinates);
438 freeArray<WT>(minCoordinates);
447 IT required_elements_):
449 numElements(numElements_),
450 elementCoords(elementCoords_),
451 numClusters ((1 << dim_) + 1),
452 required_elements(required_elements_)
454 this->clusters = allocMemory<KMeansCluster <IT,WT> >(this->numClusters);
456 for (
int i = 0; i < numClusters; ++i){
457 this->clusters[i].
setParams(this->dim, this->required_elements);
460 this->maxCoordinates = allocMemory <WT> (this->dim);
461 this->minCoordinates = allocMemory <WT> (this->dim);
464 for (
int j = 0; j < dim; ++j){
465 this->minCoordinates[j] = this->maxCoordinates[j] = this->elementCoords[j][0];
466 for(IT i = 1; i < numElements; ++i){
467 WT t = this->elementCoords[j][i];
468 if(t > this->maxCoordinates[j]){
469 this->maxCoordinates[j] = t;
471 if (t < minCoordinates[j]){
472 this->minCoordinates[j] = t;
479 for (
int j = 0; j < dim; ++j){
480 int mod = (1 << (j+1));
481 for (
int i = 0; i < numClusters - 1; ++i){
483 if ( (i % mod) < mod / 2){
484 c = this->maxCoordinates[j];
488 c = this->minCoordinates[j];
490 this->clusters[i].
center[j] = c;
495 for (
int j = 0; j < dim; ++j){
496 this->clusters[numClusters - 1].
center[j] = (this->maxCoordinates[j] + this->minCoordinates[j]) / 2;
512 for(
int it = 0; it < 10; ++it){
514 for (IT j = 0; j < this->numClusters; ++j){
517 for (IT i = 0; i < this->numElements; ++i){
519 for (IT j = 0; j < this->numClusters; ++j){
521 this->clusters[j].
getDistance(i,this->elementCoords);
525 for (IT j = 0; j < this->numClusters; ++j){
526 moved =(this->clusters[j].
getNewCenters(this->elementCoords) || moved );
542 for (IT j = 1; j < this->numClusters; ++j){
545 if(minTmpDistance < minDistance){
546 minDistance = minTmpDistance;
558 #define MINOF(a,b) (((a)<(b))?(a):(b)) 566 template <
typename T>
570 #ifdef HAVE_ZOLTAN2_OMP 571 #pragma omp parallel for 573 for(
size_t i = 0; i < arrSize; ++i){
580 #ifdef HAVE_ZOLTAN2_OMP 581 #pragma omp parallel for 583 for(
size_t i = 0; i < arrSize; ++i){
592 template <
typename part_t,
typename pcoord_t>
604 no_tasks(no_tasks_){}
607 return this->no_procs;
610 return this->no_tasks;
615 part_t *task_to_proc,
616 part_t *task_communication_xadj,
617 part_t *task_communication_adj,
618 pcoord_t *task_communication_edge_weight){
620 double totalCost = 0;
622 part_t commCount = 0;
623 for (part_t task = 0; task < this->no_tasks; ++task){
624 int assigned_proc = task_to_proc[task];
626 part_t task_adj_begin = task_communication_xadj[task];
627 part_t task_adj_end = task_communication_xadj[task+1];
629 commCount += task_adj_end - task_adj_begin;
631 for (part_t task2 = task_adj_begin; task2 < task_adj_end; ++task2){
634 part_t neighborTask = task_communication_adj[task2];
636 int neighborProc = task_to_proc[neighborTask];
637 double distance = getProcDistance(assigned_proc, neighborProc);
641 if (task_communication_edge_weight == NULL){
642 totalCost += distance ;
645 totalCost += distance * task_communication_edge_weight[task2];
650 this->commCost = totalCost;
654 return this->commCost;
657 virtual double getProcDistance(
int procId1,
int procId2)
const = 0;
665 virtual void getMapping(
667 const RCP<const Environment> &env,
668 ArrayRCP <part_t> &proc_to_task_xadj,
669 ArrayRCP <part_t> &proc_to_task_adj,
670 ArrayRCP <part_t> &task_to_proc
676 template <
typename pcoord_t,
typename tcoord_t,
typename part_t>
716 proc_coord_dim(pcoord_dim_), proc_coords(pcoords_),
717 task_coord_dim(tcoord_dim_), task_coords(tcoords_),
718 partArraySize(
std::min(tcoord_dim_, pcoord_dim_)),
724 this->partArraySize = psize;
727 this->partNoArray = pNo;
739 part_t minCoordDim =
MINOF(this->task_coord_dim, this->proc_coord_dim);
742 this->proc_coords, ntasks);
747 for(
int i = ntasks; i < nprocs; ++i){
748 proc_permutation[i] = -1;
772 lo = _u_umpa_seed % q;
773 hi = _u_umpa_seed / q;
774 test = (a*lo)-(r*hi);
778 _u_umpa_seed = test + m;
779 d = (double) ((
double) _u_umpa_seed / (double) m);
780 return (part_t) (d*(double)l);
785 for (
int i = 0 ; i < this->proc_coord_dim; ++i){
786 distance +=
ZOLTAN2_ABS(proc_coords[i][procId1] - proc_coords[i][procId2]);
794 part_t *a = visitOrder;
806 for (i=0; i<n; i+=16)
808 u = umpa_uRandom(n-4, _u_umpa_seed);
809 v = umpa_uRandom(n-4, _u_umpa_seed);
820 part_t i, end = n / 4;
822 for (i=1; i<end; i++)
824 part_t j=umpa_uRandom(n-i, _u_umpa_seed);
843 const RCP<const Environment> &env,
844 ArrayRCP <part_t> &rcp_proc_to_task_xadj,
845 ArrayRCP <part_t> &rcp_proc_to_task_adj,
846 ArrayRCP <part_t> &rcp_task_to_proc
849 rcp_proc_to_task_xadj = ArrayRCP <part_t> (this->no_procs+1);
850 rcp_proc_to_task_adj = ArrayRCP <part_t> (this->no_tasks);
851 rcp_task_to_proc = ArrayRCP <part_t> (this->no_tasks);
853 part_t *proc_to_task_xadj = rcp_proc_to_task_xadj.getRawPtr();
854 part_t *proc_to_task_adj = rcp_proc_to_task_adj.getRawPtr();
855 part_t *task_to_proc = rcp_task_to_proc.getRawPtr();
859 fillContinousArray<part_t> (proc_to_task_xadj, this->no_procs+1, &invalid);
862 part_t num_parts =
MINOF(this->no_procs, this->no_tasks);
864 part_t minCoordDim =
MINOF(this->task_coord_dim, this->proc_coord_dim);
866 int recursion_depth = partArraySize;
867 if(partArraySize < minCoordDim) recursion_depth = minCoordDim;
869 int taskPerm = z2Fact<int>(this->task_coord_dim);
870 int procPerm = z2Fact<int>(this->proc_coord_dim);
871 int permutations = taskPerm * procPerm;
874 part_t *proc_xadj = allocMemory<part_t> (num_parts+1);
877 part_t *proc_adjList = allocMemory<part_t>(this->no_procs);
880 part_t used_num_procs = this->no_procs;
881 if(this->no_procs > this->no_tasks){
883 this->getClosestSubset(proc_adjList, this->no_procs, this->no_tasks);
884 used_num_procs = this->no_tasks;
887 fillContinousArray<part_t>(proc_adjList,this->no_procs, NULL);
890 int myPermutation = myRank % permutations;
892 int myProcPerm= myPermutation % procPerm;
893 int myTaskPerm = myPermutation / procPerm;
895 int *permutation = allocMemory<int> ((this->proc_coord_dim > this->task_coord_dim)
896 ? this->proc_coord_dim : this->task_coord_dim);
899 ithPermutation<int>(this->proc_coord_dim, myProcPerm, permutation);
901 pcoord_t **pcoords = allocMemory<pcoord_t *> (this->proc_coord_dim);
902 for(
int i = 0; i < this->proc_coord_dim; ++i){
903 pcoords[i] = this->proc_coords[permutation[i]];
909 env->timerStart(
MACRO_TIMERS,
"Mapping - Proc Partitioning");
925 env->timerStop(
MACRO_TIMERS,
"Mapping - Proc Partitioning");
926 freeArray<pcoord_t *> (pcoords);
929 part_t *task_xadj = allocMemory<part_t> (num_parts+1);
930 part_t *task_adjList = allocMemory<part_t>(this->no_tasks);
932 fillContinousArray<part_t>(task_adjList,this->no_tasks, NULL);
935 ithPermutation<int>(this->task_coord_dim, myTaskPerm, permutation);
938 tcoord_t **tcoords = allocMemory<tcoord_t *> (this->task_coord_dim);
939 for(
int i = 0; i < this->task_coord_dim; ++i){
940 tcoords[i] = this->task_coords[permutation[i]];
943 env->timerStart(
MACRO_TIMERS,
"Mapping - Task Partitioning");
958 env->timerStop(
MACRO_TIMERS,
"Mapping - Task Partitioning");
959 freeArray<pcoord_t *> (tcoords);
960 freeArray<int> (permutation);
964 for(part_t i = 0; i < num_parts; ++i){
966 part_t proc_index_begin = proc_xadj[i];
967 part_t task_begin_index = task_xadj[i];
968 part_t proc_index_end = proc_xadj[i+1];
969 part_t task_end_index = task_xadj[i+1];
972 if(proc_index_end - proc_index_begin != 1){
973 std::cerr <<
"Error at partitioning of processors" << std::endl;
974 std::cerr <<
"PART:" << i <<
" is assigned to " << proc_index_end - proc_index_begin <<
" processors." << std::endl;
977 part_t assigned_proc = proc_adjList[proc_index_begin];
978 proc_to_task_xadj[assigned_proc] = task_end_index - task_begin_index;
984 part_t *proc_to_task_xadj_work = allocMemory<part_t> (this->no_procs);
986 for(part_t i = 0; i < this->no_procs; ++i){
987 part_t tmp = proc_to_task_xadj[i];
988 proc_to_task_xadj[i] = sum;
990 proc_to_task_xadj_work[i] = sum;
992 proc_to_task_xadj[this->no_procs] = sum;
994 for(part_t i = 0; i < num_parts; ++i){
996 part_t proc_index_begin = proc_xadj[i];
997 part_t task_begin_index = task_xadj[i];
998 part_t task_end_index = task_xadj[i+1];
1000 part_t assigned_proc = proc_adjList[proc_index_begin];
1002 for (part_t j = task_begin_index; j < task_end_index; ++j){
1003 part_t taskId = task_adjList[j];
1005 task_to_proc[taskId] = assigned_proc;
1007 proc_to_task_adj [ --proc_to_task_xadj_work[assigned_proc] ] = taskId;
1011 freeArray<part_t>(proc_to_task_xadj_work);
1012 freeArray<part_t>(task_xadj);
1013 freeArray<part_t>(task_adjList);
1014 freeArray<part_t>(proc_xadj);
1015 freeArray<part_t>(proc_adjList);
1020 template <
typename Adapter,
typename part_t>
1024 #ifndef DOXYGEN_SHOULD_SKIP_THIS 1026 typedef typename Adapter::scalar_t pcoord_t;
1027 typedef typename Adapter::scalar_t tcoord_t;
1047 if(this->proc_task_comm){
1050 Teuchos::RCP<const Environment>(this->env,
false),
1051 this->proc_to_task_xadj,
1052 this->proc_to_task_adj,
1057 std::cerr <<
"communicationModel is not specified in the Mapper" << std::endl;
1069 int taskPerm = z2Fact<int>(procDim);
1070 int procPerm = z2Fact<int>(taskDim);
1071 int idealGroupSize = taskPerm * procPerm;
1073 int myRank = this->comm->getRank();
1074 int commSize = this->comm->getSize();
1076 int myGroupIndex = myRank / idealGroupSize;
1078 int prevGroupBegin = (myGroupIndex - 1)* idealGroupSize;
1079 if (prevGroupBegin < 0) prevGroupBegin = 0;
1080 int myGroupBegin = myGroupIndex * idealGroupSize;
1081 int myGroupEnd = (myGroupIndex + 1) * idealGroupSize;
1082 int nextGroupEnd = (myGroupIndex + 2)* idealGroupSize;
1084 if (myGroupEnd > commSize){
1085 myGroupBegin = prevGroupBegin;
1086 myGroupEnd = commSize;
1088 if (nextGroupEnd > commSize){
1089 myGroupEnd = commSize;
1091 int myGroupSize = myGroupEnd - myGroupBegin;
1093 part_t *myGroup = allocMemory<part_t>(myGroupSize);
1094 for (
int i = 0; i < myGroupSize; ++i){
1095 myGroup[i] = myGroupBegin + i;
1099 ArrayView<const part_t> myGroupView(myGroup, myGroupSize);
1101 RCP<Comm<int> > subComm = this->comm->createSubcommunicator(myGroupView);
1102 freeArray<part_t>(myGroup);
1111 RCP<Comm<int> > subComm = this->create_subCommunicator();
1115 double localCost[2], globalCost[2];
1117 localCost[0] = myCost;
1118 localCost[1] = double(subComm->getRank());
1120 globalCost[1] = globalCost[0] = std::numeric_limits<double>::max();
1122 reduceAll<int, double>(*subComm, reduceBest,
1123 2, localCost, globalCost);
1125 int sender = int(globalCost[1]);
1129 broadcast (*subComm, sender, this->ntasks, this->task_to_proc.getRawPtr());
1130 broadcast (*subComm, sender, this->nprocs, this->proc_to_task_xadj.getRawPtr());
1131 broadcast (*subComm, sender, this->ntasks, this->proc_to_task_adj.getRawPtr());
1138 std::ofstream gnuPlotCode (
"gnuPlot.plot", std::ofstream::out);
1141 std::string ss =
"";
1142 for(part_t i = 0; i < this->nprocs; ++i){
1144 std::string procFile = toString<int>(i) +
"_mapping.txt";
1146 gnuPlotCode <<
"plot \"" << procFile <<
"\"\n";
1149 gnuPlotCode <<
"replot \"" << procFile <<
"\"\n";
1152 std::ofstream inpFile (procFile.c_str(), std::ofstream::out);
1154 std::string gnuPlotArrow =
"set arrow from ";
1155 for(
int j = 0; j < mindim; ++j){
1156 if (j == mindim - 1){
1158 gnuPlotArrow += toString<float>(proc_task_comm->
proc_coords[j][i]);
1162 inpFile << proc_task_comm->
proc_coords[j][i] <<
" ";
1163 gnuPlotArrow += toString<float>(proc_task_comm->
proc_coords[j][i]) +
",";
1166 gnuPlotArrow +=
" to ";
1169 inpFile << std::endl;
1170 ArrayView<part_t> a = this->getAssignedTaksForProc(i);
1171 for(
int k = 0; k < a.size(); ++k){
1174 std::string gnuPlotArrow2 = gnuPlotArrow;
1175 for(
int z = 0; z < mindim; ++z){
1176 if(z == mindim - 1){
1180 gnuPlotArrow2 += toString<float>(proc_task_comm->
task_coords[z][j]);
1183 inpFile << proc_task_comm->
task_coords[z][j] <<
" ";
1184 gnuPlotArrow2 += toString<float>(proc_task_comm->
task_coords[z][j]) +
",";
1187 ss += gnuPlotArrow2 +
"\n";
1188 inpFile << std::endl;
1194 gnuPlotCode <<
"\nreplot\n pause -1 \n";
1195 gnuPlotCode.close();
1203 std::string rankStr = toString<int>(myRank);
1204 std::string gnuPlots =
"gnuPlot", extentionS =
".plot";
1205 std::string outF = gnuPlots + rankStr+ extentionS;
1206 std::ofstream gnuPlotCode ( outF.c_str(), std::ofstream::out);
1210 int mindim =
MINOF(tmpproc_task_comm->proc_coord_dim, tmpproc_task_comm->task_coord_dim);
1211 std::string ss =
"";
1212 std::string procs =
"", parts =
"";
1213 for(part_t i = 0; i < this->nprocs; ++i){
1216 ArrayView<part_t> a = this->getAssignedTaksForProc(i);
1223 std::string gnuPlotArrow =
"set arrow from ";
1224 for(
int j = 0; j < mindim; ++j){
1225 if (j == mindim - 1){
1227 gnuPlotArrow += toString<float>(tmpproc_task_comm->proc_coords[j][i]);
1228 procs += toString<float>(tmpproc_task_comm->proc_coords[j][i]);
1233 gnuPlotArrow += toString<float>(tmpproc_task_comm->proc_coords[j][i]) +
",";
1234 procs += toString<float>(tmpproc_task_comm->proc_coords[j][i])+
" ";
1239 gnuPlotArrow +=
" to ";
1242 for(
int k = 0; k < a.size(); ++k){
1245 std::string gnuPlotArrow2 = gnuPlotArrow;
1246 for(
int z = 0; z < mindim; ++z){
1247 if(z == mindim - 1){
1251 gnuPlotArrow2 += toString<float>(tmpproc_task_comm->task_coords[z][j]);
1252 parts += toString<float>(tmpproc_task_comm->task_coords[z][j]);
1256 gnuPlotArrow2 += toString<float>(tmpproc_task_comm->task_coords[z][j]) +
",";
1257 parts += toString<float>(tmpproc_task_comm->task_coords[z][j]) +
" ";
1261 ss += gnuPlotArrow2 +
" nohead\n";
1269 std::ofstream procFile (
"procPlot.plot", std::ofstream::out);
1270 procFile << procs <<
"\n";
1273 std::ofstream partFile (
"partPlot.plot", std::ofstream::out);
1274 partFile << parts<<
"\n";
1277 std::ofstream extraProcFile (
"allProc.plot", std::ofstream::out);
1279 for(part_t j = 0; j < this->nprocs; ++j){
1280 for(
int i = 0; i < mindim; ++i){
1281 extraProcFile << tmpproc_task_comm->proc_coords[i][j] <<
" ";
1283 extraProcFile << std::endl;
1286 extraProcFile.close();
1290 gnuPlotCode <<
"plot \"procPlot.plot\" with points pointsize 3\n";
1292 gnuPlotCode <<
"splot \"procPlot.plot\" with points pointsize 3\n";
1294 gnuPlotCode <<
"replot \"partPlot.plot\" with points pointsize 3\n";
1295 gnuPlotCode <<
"replot \"allProc.plot\" with points pointsize 0.65\n";
1296 gnuPlotCode <<
"\nreplot\n pause -1 \n";
1297 gnuPlotCode.close();
1305 const Teuchos::Comm<int> *comm_,
1308 tcoord_t **partCenters
1310 std::string file =
"gggnuPlot";
1311 std::string exten =
".plot";
1312 std::ofstream mm(
"2d.txt");
1313 file += toString<int>(comm_->getRank()) + exten;
1314 std::ofstream ff(file.c_str());
1318 for (part_t i = 0; i < this->ntasks;++i){
1319 outPartBoxes[i].writeGnuPlot(ff, mm);
1322 ff <<
"plot \"2d.txt\"" << std::endl;
1326 ff <<
"splot \"2d.txt\"" << std::endl;
1331 ff <<
"set style arrow 5 nohead size screen 0.03,15,135 ls 1" << std::endl;
1332 for (part_t i = 0; i < this->ntasks;++i){
1333 part_t pb = task_communication_xadj[i];
1334 part_t pe = task_communication_xadj[i+1];
1335 for (part_t p = pb; p < pe; ++p){
1336 part_t n = task_communication_adj[p];
1339 std::string arrowline =
"set arrow from ";
1340 for (
int j = 0; j < coordDim - 1; ++j){
1341 arrowline += toString<tcoord_t>(partCenters[j][n]) +
",";
1343 arrowline += toString<tcoord_t>(partCenters[coordDim -1][n]) +
" to ";
1346 for (
int j = 0; j < coordDim - 1; ++j){
1347 arrowline += toString<tcoord_t>(partCenters[j][i]) +
",";
1349 arrowline += toString<tcoord_t>(partCenters[coordDim -1][i]) +
" as 5\n";
1356 ff <<
"replot\n pause -1" << std::endl;
1363 void getProcTask(part_t* &proc_to_task_xadj_, part_t* &proc_to_task_adj_){
1364 proc_to_task_xadj_ = this->proc_to_task_xadj.getRawPtr();
1365 proc_to_task_adj_ = this->proc_to_task_adj.getRawPtr();
1373 if(this->isOwnerofModel){
1374 delete this->proc_task_comm;
1390 const Teuchos::Comm<int> *comm_,
1396 proc_to_task_xadj(0),
1397 proc_to_task_adj(0),
1399 isOwnerofModel(true),
1401 task_communication_xadj(0),
1402 task_communication_adj(0){
1404 pcoord_t *task_communication_edge_weight_ = NULL;
1419 tcoord_t **partCenters = NULL;
1420 partCenters = allocMemory<tcoord_t *>(coordDim);
1421 for (
int i = 0; i < coordDim; ++i){
1422 partCenters[i] = allocMemory<tcoord_t>(this->ntasks);
1428 getSolutionCenterCoordinates<Adapter, typename Adapter::scalar_t,part_t>(
1437 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Solution Center");
1441 this->proc_task_comm =
1451 int myRank = comm_->getRank();
1454 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Processor Task map");
1455 this->doMapping(myRank);
1456 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Processor Task map");
1459 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Graph");
1461 task_communication_adj);
1463 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Graph");
1465 if (comm_->getRank() == 0){
1467 part_t taskCommCount = task_communication_xadj.size();
1468 std::cout <<
" TotalComm:" << task_communication_xadj[taskCommCount] << std::endl;
1469 part_t maxN = task_communication_xadj[0];
1470 for (part_t i = 1; i <= taskCommCount; ++i){
1471 part_t nc = task_communication_xadj[i] - task_communication_xadj[i-1];
1472 if (maxN < nc) maxN = nc;
1474 std::cout <<
" maxNeighbor:" << maxN << std::endl;
1477 this->writeGnuPlot(comm_, soln_, coordDim, partCenters);
1480 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Cost");
1482 task_to_proc.getRawPtr(),
1483 task_communication_xadj.getRawPtr(),
1484 task_communication_adj.getRawPtr(),
1485 task_communication_edge_weight_
1489 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Cost");
1494 this->getBestMapping();
1496 this->writeMapping2(comm_->getRank());
1499 for (
int i = 0; i < coordDim; ++i){
1500 freeArray<tcoord_t>(partCenters[i]);
1502 freeArray<tcoord_t *>(partCenters);
1555 const Teuchos::Comm<int> *problemComm,
1558 pcoord_t **machine_coords,
1562 tcoord_t **task_coords,
1563 ArrayRCP<part_t>task_comm_xadj,
1564 ArrayRCP<part_t>task_comm_adj,
1565 pcoord_t *task_communication_edge_weight_,
1566 int recursion_depth,
1567 part_t *part_no_array,
1568 const part_t *machine_dimensions
1570 proc_to_task_xadj(0),
1571 proc_to_task_adj(0),
1573 isOwnerofModel(true),
1575 task_communication_xadj(task_comm_xadj),
1576 task_communication_adj(task_comm_adj){
1579 pcoord_t ** virtual_machine_coordinates = machine_coords;
1580 if (machine_dimensions){
1581 virtual_machine_coordinates =
1582 this->shiftMachineCoordinates (
1589 this->nprocs = num_processors;
1591 int coordDim = task_dim;
1592 this->ntasks = num_tasks;
1595 tcoord_t **partCenters = task_coords;
1598 this->proc_task_comm =
1601 virtual_machine_coordinates,
1610 int myRank = problemComm->getRank();
1612 this->doMapping(myRank);
1614 this->writeMapping2(myRank);
1617 if (task_communication_xadj.getRawPtr() && task_communication_adj.getRawPtr()){
1619 task_to_proc.getRawPtr(),
1620 task_communication_xadj.getRawPtr(),
1621 task_communication_adj.getRawPtr(),
1622 task_communication_edge_weight_
1626 this->getBestMapping();
1642 if (machine_dimensions){
1643 for (
int i = 0; i < proc_dim; ++i){
1644 delete [] virtual_machine_coordinates[i];
1646 delete [] virtual_machine_coordinates;
1649 if(comm_->getRank() == 0)
1650 this->writeMapping2(-1);
1656 return this->proc_task_comm->getCommCost();
1675 const part_t *machine_dimensions,
1677 pcoord_t **mCoords){
1678 pcoord_t **result_machine_coords = NULL;
1679 result_machine_coords =
new pcoord_t*[machine_dim];
1680 for (
int i = 0; i < machine_dim; ++i){
1681 result_machine_coords[i] =
new pcoord_t [numProcs];
1684 for (
int i = 0; i < machine_dim; ++i){
1685 part_t numMachinesAlongDim = machine_dimensions[i];
1686 part_t *machineCounts=
new part_t[numMachinesAlongDim];
1687 memset(machineCounts, 0,
sizeof(part_t) *numMachinesAlongDim);
1689 int *filledCoordinates=
new int[numMachinesAlongDim];
1691 pcoord_t *coords = mCoords[i];
1692 for(part_t j = 0; j < numProcs; ++j){
1693 part_t mc = (part_t) coords[j];
1694 ++machineCounts[mc];
1697 part_t filledCoordinateCount = 0;
1698 for(part_t j = 0; j < numMachinesAlongDim; ++j){
1699 if (machineCounts[j] > 0){
1700 filledCoordinates[filledCoordinateCount++] = j;
1704 part_t firstProcCoord = filledCoordinates[0];
1705 part_t firstProcCount = machineCounts[firstProcCoord];
1707 part_t lastProcCoord = filledCoordinates[filledCoordinateCount - 1];
1708 part_t lastProcCount = machineCounts[lastProcCoord];
1710 part_t firstLastGap = numMachinesAlongDim - lastProcCoord + firstProcCoord;
1711 part_t firstLastGapProc = lastProcCount + firstProcCount;
1713 part_t leftSideProcCoord = firstProcCoord;
1714 part_t leftSideProcCount = firstProcCount;
1715 part_t biggestGap = 0;
1716 part_t biggestGapProc = numProcs;
1718 part_t shiftBorderCoordinate = -1;
1719 for(part_t j = 1; j < filledCoordinateCount; ++j){
1720 part_t rightSideProcCoord= filledCoordinates[j];
1721 part_t rightSideProcCount = machineCounts[rightSideProcCoord];
1723 part_t gap = rightSideProcCoord - leftSideProcCoord;
1724 part_t gapProc = rightSideProcCount + leftSideProcCount;
1729 if (gap > biggestGap || (gap == biggestGap && biggestGapProc > gapProc)){
1730 shiftBorderCoordinate = rightSideProcCoord;
1731 biggestGapProc = gapProc;
1734 leftSideProcCoord = rightSideProcCoord;
1735 leftSideProcCount = rightSideProcCount;
1739 if (!(biggestGap > firstLastGap || (biggestGap == firstLastGap && biggestGapProc < firstLastGapProc))){
1740 shiftBorderCoordinate = -1;
1743 for(part_t j = 0; j < numProcs; ++j){
1745 if (coords[j] < shiftBorderCoordinate){
1746 result_machine_coords[i][j] = coords[j] + numMachinesAlongDim;
1750 result_machine_coords[i][j] = coords[j];
1754 delete [] machineCounts;
1755 delete [] filledCoordinates;
1758 return result_machine_coords;
1770 procs = this->task_to_proc.getRawPtr() + taskId;
1776 return this->task_to_proc[taskId];
1786 part_t task_begin = this->proc_to_task_xadj[procId];
1787 part_t taskend = this->proc_to_task_xadj[procId+1];
1788 parts = this->proc_to_task_adj.getRawPtr() + task_begin;
1789 numParts = taskend - task_begin;
1793 part_t task_begin = this->proc_to_task_xadj[procId];
1794 part_t taskend = this->proc_to_task_xadj[procId+1];
1802 if (taskend - task_begin > 0){
1803 ArrayView <part_t> assignedParts(this->proc_to_task_adj.getRawPtr() + task_begin, taskend - task_begin);
1804 return assignedParts;
1807 ArrayView <part_t> assignedParts;
1808 return assignedParts;
1884 template <
typename part_t,
typename pcoord_t,
typename tcoord_t>
1886 RCP<
const Teuchos::Comm<int> > problemComm,
1889 pcoord_t **machine_coords,
1892 tcoord_t **task_coords,
1893 part_t *task_comm_xadj,
1894 part_t *task_comm_adj,
1895 pcoord_t *task_communication_edge_weight_,
1896 part_t *proc_to_task_xadj,
1897 part_t *proc_to_task_adj,
1898 int recursion_depth,
1899 part_t *part_no_array,
1900 const part_t *machine_dimensions
1909 typedef Tpetra::MultiVector<tcoord_t, part_t, part_t>
tMVector_t;
1911 Teuchos::ArrayRCP<part_t> task_communication_xadj (task_comm_xadj, 0, num_tasks+1,
false);
1913 Teuchos::ArrayRCP<part_t> task_communication_adj;
1914 if (task_comm_xadj){
1915 Teuchos::ArrayRCP<part_t> tmp_task_communication_adj (task_comm_adj, 0, task_comm_xadj[num_tasks],
false);
1916 task_communication_adj = tmp_task_communication_adj;
1923 problemComm.getRawPtr(),
1932 task_communication_xadj,
1933 task_communication_adj,
1934 task_communication_edge_weight_,
1941 part_t* proc_to_task_xadj_;
1942 part_t* proc_to_task_adj_;
1944 ctm->
getProcTask(proc_to_task_xadj_, proc_to_task_adj_);
1946 for (part_t i = 0; i <= num_processors; ++i){
1947 proc_to_task_xadj[i] = proc_to_task_xadj_[i];
1949 for (part_t i = 0; i < num_tasks; ++i){
1950 proc_to_task_adj[i] = proc_to_task_adj_[i];
void setParams(int dimension_, int heapsize)
CommunicationModel Base Class that performs mapping between the coordinate partitioning result...
void getBestMapping()
finds the lowest cost mapping and broadcasts solution to everyone.
void ithPermutation(const IT n, IT i, IT *perm)
CommunicationModel(part_t no_procs_, part_t no_tasks_)
virtual ~CommunicationModel()
Time an algorithm (or other entity) as a whole.
pcoord_t ** getProcCoords() const
getProcDim function returns the coordinates of processors in two dimensional array.
void setPartArraySize(int psize)
WT getDistance(IT index, WT **elementCoords)
virtual size_t getLocalNumberOfParts() const
Returns the number of parts to be assigned to this process.
bool getNewCenters(WT **coords)
virtual void getPartsForProc(int procId, part_t &numParts, part_t *parts) const
getAssignedProcForTask function, returns the assigned tasks with the number of tasks.
virtual ~CoordinateTaskMapper()
void calculateCommunicationCost(part_t *task_to_proc, part_t *task_communication_xadj, part_t *task_communication_adj, pcoord_t *task_communication_edge_weight)
void getSolutionCenterCoordinates(const Environment *envConst, const Teuchos::Comm< int > *comm, const Zoltan2::CoordinateModel< typename Adapter::base_adapter_t > *coords, const Zoltan2::PartitioningSolution< Adapter > *soln_, int coordDim, part_t ntasks, scalar_t **partCenters)
pcoord_t ** shiftMachineCoordinates(int machine_dim, const part_t *machine_dimensions, part_t numProcs, pcoord_t **mCoords)
Using the machine dimensions provided, create virtual machine coordinates by assigning the largest ga...
virtual void getMapping(int myRank, const RCP< const Environment > &env, ArrayRCP< part_t > &rcp_proc_to_task_xadj, ArrayRCP< part_t > &rcp_proc_to_task_adj, ArrayRCP< part_t > &rcp_task_to_proc) const
Function is called whenever nprocs > no_task. Function returns only the subset of processors that are...
part_t getAssignedProcForTask(part_t taskId)
getAssignedProcForTask function, returns the assigned processor id for the given task ...
void getMinDistanceCluster(IT *procPermutation)
void coordinateTaskMapperInterface(RCP< const Teuchos::Comm< int > > problemComm, int proc_dim, int num_processors, pcoord_t **machine_coords, int task_dim, part_t num_tasks, tcoord_t **task_coords, part_t *task_comm_xadj, part_t *task_comm_adj, pcoord_t *task_communication_edge_weight_, part_t *proc_to_task_xadj, part_t *proc_to_task_adj, int recursion_depth, part_t *part_no_array, const part_t *machine_dimensions)
Constructor The interface function that calls CoordinateTaskMapper which will also perform the mappin...
Defines the XpetraMultiVectorAdapter.
Multi Jagged coordinate partitioning algorithm.
void timerStop(TimerType tt, const char *timerName) const
Stop a named timer.
This class provides geometric coordinates with optional weights to the Zoltan2 algorithm.
Contains the Multi-jagged algorthm.
size_t getCoordinates(ArrayView< const gno_t > &Ids, ArrayView< input_t > &xyz, ArrayView< input_t > &wgts) const
Returns the coordinate ids, values and optional weights.
PartitionMapping maps a solution or an input distribution to ranks.
void getProcTask(part_t *&proc_to_task_xadj_, part_t *&proc_to_task_adj_)
void sequential_task_partitioning(const RCP< const Environment > &env, mj_lno_t num_total_coords, mj_lno_t num_selected_coords, size_t num_target_part, int coord_dim, mj_scalar_t **mj_coordinates, mj_lno_t *initial_selected_coords_output_permutation, mj_lno_t *output_xadj, int recursion_depth, const mj_part_t *part_no_array)
Special function for partitioning for task mapping. Runs sequential, and performs deterministic parti...
A PartitioningSolution is a solution to a partitioning problem.
void getClosestSubset(part_t *proc_permutation, part_t nprocs, part_t ntasks) const
Function is called whenever nprocs > no_task. Function returns only the subset of processors that are...
const part_t * getPartListView() const
Returns the part list corresponding to the global ID list.
void copyCoordinates(IT *permutation)
void getGridCommunicationGraph(part_t taskCount, part_t *&task_comm_xadj, part_t *&task_comm_adj, std::vector< int > grid_dims)
CoordinateCommunicationModel(int pcoord_dim_, pcoord_t **pcoords_, int tcoord_dim_, tcoord_t **tcoords_, part_t no_procs_, part_t no_tasks_)
Class Constructor:
virtual void getProcsForPart(part_t taskId, part_t &numProcs, part_t *procs) const
getAssignedProcForTask function, returns the assigned tasks with the number of tasks.
Zoltan2_ReduceBestMapping Class, reduces the minimum cost mapping, ties breaks with minimum proc id...
ArrayRCP< part_t > task_communication_adj
ArrayView< part_t > getAssignedTaksForProc(part_t procId)
void reduce(const Ordinal count, const T inBuffer[], T inoutBuffer[]) const
Implement Teuchos::ValueTypeReductionOp interface.
The StridedData class manages lists of weights or coordinates.
void setPartArray(part_t *pNo)
ArrayRCP< part_t > proc_to_task_adj
The user parameters, debug, timing and memory profiling output objects, and error checking methods...
static part_t umpa_uRandom(part_t l, int &_u_umpa_seed)
void addPoint(IT index, WT distance)
bool getNewCenters(WT *center, WT **coords, int dimension)
MachineRepresentation Class Finds the coordinate of the processor. Used to find the processor coordin...
size_t getLocalNumCoordinates() const
Returns the number of coordinates on this process.
CoordinateModelInput Class that performs mapping between the coordinate partitioning result and mpi r...
double getCommunicationCostMetric()
CoordinateTaskMapper(const Environment *env_const_, const Teuchos::Comm< int > *problemComm, int proc_dim, int num_processors, pcoord_t **machine_coords, int task_dim, part_t num_tasks, tcoord_t **task_coords, ArrayRCP< part_t >task_comm_xadj, ArrayRCP< part_t >task_comm_adj, pcoord_t *task_communication_edge_weight_, int recursion_depth, part_t *part_no_array, const part_t *machine_dimensions)
Constructor The mapping constructor which will also perform the mapping operation. The result mapping can be obtained by –getAssignedProcForTask function: which returns the assigned processor id for the given task –getPartsForProc: which returns the assigned tasks with the number of tasks.
void doMapping(int myRank)
doMapping function, calls getMapping function of communicationModel object.
ArrayRCP< part_t > task_communication_xadj
void update_visit_order(part_t *visitOrder, part_t n, int &_u_umpa_seed, part_t rndm)
virtual double getProcDistance(int procId1, int procId2) const
Zoltan2_ReduceBestMapping()
Default Constructor.
CoordinateTaskMapper(const Teuchos::Comm< int > *comm_, const MachineRepresentation< pcoord_t > *machine_, const Zoltan2::Model< typename Adapter::base_adapter_t > *model_, const Zoltan2::PartitioningSolution< Adapter > *soln_, const Environment *envConst)
Constructor. When this constructor is called, in order to calculate the communication metric...
void getCommunicationGraph(ArrayRCP< part_t > &comXAdj, ArrayRCP< part_t > &comAdj) const
returns communication graph resulting from geometric partitioning.
int getProcDim() const
getProcDim function returns the dimension of the physical processor layout.
KmeansHeap Class, max heap, but holds the minimum values.
void fillContinousArray(T *arr, size_t arrSize, T *val)
fillContinousArray function
#define ZOLTAN2_ALGMULTIJAGGED_SWAP(a, b, temp)
void push_down(IT index_on_heap)
virtual ~CoordinateCommunicationModel()
RCP< Comm< int > > create_subCommunicator()
creates and returns the subcommunicator for the processor group.
size_t getActualGlobalNumberOfParts() const
Returns the actual global number of parts provided in setParts().
KMeansAlgorithm Class that performs clustering of the coordinates, and returns the closest set of coo...
void timerStart(TimerType tt, const char *timerName) const
Start a named timer.
ArrayRCP< part_t > task_to_proc
KMeansAlgorithm(int dim_, IT numElements_, WT **elementCoords_, IT required_elements_)
KMeansAlgorithm Constructor.
void writeMapping2(int myRank)
int getNumProcs() const
getNumProcs function returns the number of processors.
void copyCoordinates(IT *permutation)
size_t getTargetGlobalNumberOfParts() const
Returns the global number of parts desired in the solution.
double getCommunicationCostMetric()
CoordinateCommunicationModel()
ArrayRCP< part_t > proc_to_task_xadj
void setHeapsize(IT heapsize_)
CoordinateCommunicationModel< pcoord_t, tcoord_t, part_t > * proc_task_comm