COMBINATORIAL_BLAS 1.6
 
Loading...
Searching...
No Matches
dist_graph.cpp
Go to the documentation of this file.
1/*
2//@HEADER
3// *****************************************************************************
4//
5// HPCGraph: Graph Computation on High Performance Computing Systems
6// Copyright (2016) Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact George M. Slota (gmslota@sandia.gov)
39// Siva Rajamanickam (srajama@sandia.gov)
40// Kamesh Madduri (madduri@cse.psu.edu)
41//
42// *****************************************************************************
43//@HEADER
44*/
45
46#include <mpi.h>
47#include <omp.h>
48#include <stdio.h>
49#include <stdlib.h>
50#include <stdint.h>
51#include <string.h>
52#include <fstream>
53
54#include "fast_map.h"
55#include "dist_graph.h"
56#include "util.h"
57#include "comms.h"
58
59extern int procid, nprocs;
60extern bool verbose, debug, verify, output;
61
63{
64 if (debug) { printf("Task %d create_graph() start\n", procid); }
65
66 double elt = 0.0;
67 if (verbose) {
68 MPI_Barrier(MPI_COMM_WORLD);
69 elt = omp_get_wtime();
70 }
71
72 g->n = ggi->n;
73 g->n_local = ggi->n_local;
74 g->n_offset = ggi->n_offset;
75 g->m = ggi->m;
76 g->m_local_out = ggi->m_local_out;
77 g->m_local_in = ggi->m_local_in;
78 init_map(&g->map);
79
80 uint64_t* out_edges = (uint64_t*)malloc(g->m_local_out*sizeof(uint64_t));
81 uint64_t* out_degree_list = (uint64_t*)malloc((g->n_local+1)*sizeof(uint64_t));
82 uint64_t* temp_counts = (uint64_t*)malloc(g->n_local*sizeof(uint64_t));
83 if (out_edges == NULL || out_degree_list == NULL || temp_counts == NULL)
84 throw_err("create_graph(), unable to allocate out edge storage", procid);
85
86#pragma omp parallel
87{
88#pragma omp for nowait
89 for (uint64_t i = 0; i < g->n_local+1; ++i)
90 out_degree_list[i] = 0;
91#pragma omp for
92 for (uint64_t i = 0; i < g->n_local; ++i)
93 temp_counts[i] = 0;
94}
95
96 for (uint64_t i = 0; i < g->m_local_out*2; i+=2)
97 ++temp_counts[ggi->gen_edges[i] - g->n_offset];
98 for (uint64_t i = 0; i < g->n_local; ++i)
99 out_degree_list[i+1] = out_degree_list[i] + temp_counts[i];
100 memcpy(temp_counts, out_degree_list, g->n_local*sizeof(uint64_t));
101
102
103 for (uint64_t i = 0; i < g->m_local_out*2; i+=2)
104 out_edges[temp_counts[ggi->gen_edges[i] - g->n_offset]++] = ggi->gen_edges[i+1];
105
106 free(ggi->gen_edges);
107 g->out_edges = out_edges;
108 g->out_degree_list = out_degree_list;
109
110 uint64_t* in_edges = (uint64_t*)malloc(g->m_local_in*sizeof(uint64_t));
111 uint64_t* in_degree_list = (uint64_t*)malloc((g->n_local+1)*sizeof(uint64_t));
112 if (in_edges == NULL || in_degree_list == NULL)
113 throw_err("create_graph(), unable to allocate in edge storage\n", procid);
114
115#pragma omp parallel
116{
117#pragma omp for nowait
118 for (uint64_t i = 0; i < g->n_local+1; ++i)
119 in_degree_list[i] = 0;
120#pragma omp for nowait
121 for (uint64_t i = 0; i < g->n_local; ++i)
122 temp_counts[i] = 0;
123}
124
125 for (uint64_t i = 0; i < g->m_local_in*2; i+=2)
126 ++temp_counts[ggi->gen_edges_rev[i+1] - g->n_offset];
127 for (uint64_t i = 0; i < g->n_local; ++i)
128 in_degree_list[i+1] = in_degree_list[i] + temp_counts[i];
129 memcpy(temp_counts, in_degree_list, g->n_local*sizeof(uint64_t));
130 for (uint64_t i = 0; i < g->m_local_in*2; i+=2)
131 in_edges[temp_counts[ggi->gen_edges_rev[i+1] - g->n_offset]++] = ggi->gen_edges_rev[i];
132
133 free(temp_counts);
134 free(ggi->gen_edges_rev);
135 g->in_edges = in_edges;
136 g->in_degree_list = in_degree_list;
137
138 g->local_unmap = (uint64_t*)malloc(g->n_local*sizeof(uint64_t));
139 if (g->local_unmap == NULL)
140 throw_err("create_graph(), unable to allocate unmap", procid);
141
142#pragma omp parallel for
143 for (uint64_t i = 0; i < g->n_local; ++i)
144 g->local_unmap[i] = i + g->n_offset;
145
146 if (verbose) {
147 elt = omp_get_wtime() - elt;
148 printf("Task %d create_graph() %9.6f (s)\n", procid, elt);
149 }
150
151 if (debug) { printf("Task %d create_graph() success\n", procid); }
152 return 0;
153}
154
156{
157 if (debug) { printf("Task %d create_graph_serial() success\n", procid); }
158 double elt = 0.0;
159 if (verbose) {
160 MPI_Barrier(MPI_COMM_WORLD);
161 elt = omp_get_wtime();
162 }
163
164 g->n = ggi->n;
165 g->n_local = ggi->n_local;
166 g->n_offset = ggi->n_offset;
167 g->m = ggi->m;
168 g->m_local_out = ggi->m;
169 g->m_local_in = ggi->m;
170 g->n_ghost = 0;
171 g->n_total = g->n_local;
172 init_map(&g->map);
173
174 uint64_t* out_edges = (uint64_t*)malloc(g->m_local_out*sizeof(uint64_t));
175 uint64_t* out_degree_list = (uint64_t*)malloc((g->n_local+1)*sizeof(uint64_t));
176 uint64_t* temp_counts = (uint64_t*)malloc(g->n_local*sizeof(uint64_t));
177 if (out_edges == NULL || out_degree_list == NULL || temp_counts == NULL)
178 throw_err("create_graph_serial(), unable to allocate out edge storage\n", procid);
179
180#pragma omp parallel
181{
182#pragma omp for nowait
183 for (uint64_t i = 0; i < g->n_local+1; ++i)
184 out_degree_list[i] = 0;
185#pragma omp for nowait
186 for (uint64_t i = 0; i < g->n_local; ++i)
187 temp_counts[i] = 0;
188}
189
190 for (uint64_t i = 0; i < g->m_local_out*2; i+=2)
191 ++temp_counts[ggi->gen_edges[i] - g->n_offset];
192 for (uint64_t i = 0; i < g->n_local; ++i)
193 out_degree_list[i+1] = out_degree_list[i] + temp_counts[i];
194 memcpy(temp_counts, out_degree_list, g->n_local*sizeof(uint64_t));
195 for (uint64_t i = 0; i < g->m_local_out*2; i+=2)
196 out_edges[temp_counts[ggi->gen_edges[i] - (uint64_t)g->n_offset]++] = ggi->gen_edges[i+1];
197
198 g->out_edges = out_edges;
199 g->out_degree_list = out_degree_list;
200
201 uint64_t* in_edges = (uint64_t*)malloc(g->m_local_in*sizeof(uint64_t));
202 uint64_t* in_degree_list = (uint64_t*)malloc((g->n_local+1)*sizeof(uint64_t));
203 if (in_edges == NULL || in_degree_list == NULL)
204 throw_err("create_graph_serial(), unable to allocate in edge storage\n", procid);
205
206#pragma omp parallel
207{
208#pragma omp for nowait
209 for (uint64_t i = 0; i < g->n_local+1; ++i)
210 in_degree_list[i] = 0;
211#pragma omp for nowait
212 for (uint64_t i = 0; i < g->n_local; ++i)
213 temp_counts[i] = 0;
214}
215
216 for (uint64_t i = 0; i < g->m_local_in*2; i+=2)
217 ++temp_counts[ggi->gen_edges[i+1] - g->n_offset];
218 for (uint64_t i = 0; i < g->n_local; ++i)
219 in_degree_list[i+1] = in_degree_list[i] + temp_counts[i];
220 memcpy(temp_counts, in_degree_list, g->n_local*sizeof(uint64_t));
221 for (uint64_t i = 0; i < g->m_local_in*2; i+=2)
222 in_edges[temp_counts[ggi->gen_edges[i+1] - (uint64_t)g->n_offset]++] = ggi->gen_edges[i];
223
224 free(temp_counts);
225 free(ggi->gen_edges);
226 g->in_edges = in_edges;
227 g->in_degree_list = in_degree_list;
228
229 g->local_unmap = (uint64_t*)malloc(g->n_local*sizeof(uint64_t));
230 if (g->local_unmap == NULL)
231 throw_err("create_graph(), unable to allocate unmap\n", procid);
232
233 for (uint64_t i = 0; i < g->n_local; ++i)
234 g->local_unmap[i] = i + g->n_offset;
235
236 //int64_t total_edges = g->m_local_in + g->m_local_out;
237 init_map_nohash(&g->map, g->n);
238 for (uint64_t i = 0; i < g->n_local; ++i)
239 set_value_uq(&g->map, i, i);
240
241
242 if (verbose) {
243 elt = omp_get_wtime() - elt;
244 printf("Task %d create_graph_serial() %9.6f (s)\n", procid, elt);
245 }
246 if (debug) { printf("Task %d create_graph_serial() success\n", procid); }
247 return 0;
248}
249
250
252{
253 if (debug) { printf("Task %d clear_graph() start\n", procid); }
254
255 free(g->out_edges);
256 free(g->in_edges);
257 free(g->out_degree_list);
258 free(g->in_degree_list);
259 free(g->local_unmap);
260 clear_map(&g->map);
261 if (nprocs > 1) {
262 free(g->ghost_unmap);
263 free(g->ghost_tasks);
264 }
265
266 if (debug) { printf("Task %d clear_graph() success\n", procid); }
267 return 0;
268}
269
270
272{
273 if (debug) { printf("Task %d relabel_edges() start\n", procid); }
274 double elt = 0.0;
275 if (verbose) {
276 MPI_Barrier(MPI_COMM_WORLD);
277 elt = omp_get_wtime();
278 }
279
280 uint64_t cur_label = g->n_local;
281 uint64_t total_edges = g->m_local_in + g->m_local_out;
282
283 if (is_init(&g->map))
284 clear_map(&g->map);
285 init_map(&g->map, total_edges*2);
286
287 for (uint64_t i = 0; i < g->n_local; ++i)
288 {
289 uint64_t vert = g->local_unmap[i];
290 set_value(&g->map, vert, i);
291 }
292
293 for (uint64_t i = 0; i < g->m_local_out; ++i)
294 {
295 uint64_t out = g->out_edges[i];
296 uint64_t val = get_value(&g->map, out);
297 if (val == NULL_KEY)
298 {
299 set_value_uq(&g->map, out, cur_label);
300 g->out_edges[i] = cur_label++;
301 }
302 else
303 g->out_edges[i] = val;
304 }
305 for (uint64_t i = 0; i < g->m_local_in; ++i)
306 {
307 uint64_t in = g->in_edges[i];
308 uint64_t val = get_value(&g->map, in);
309 if (val == NULL_KEY)
310 {
311 set_value_uq(&g->map, in, cur_label);
312 g->in_edges[i] = cur_label++;
313 }
314 else
315 g->in_edges[i] = val;
316 }
317
319 g->n_total = g->n_ghost + g->n_local;
320
321 if (debug)
322 printf("Task %d, n_ghost %lu\n", procid, g->n_ghost);
323
324 g->ghost_unmap = (uint64_t*)malloc(g->n_ghost*sizeof(uint64_t));
325 g->ghost_tasks = (uint64_t*)malloc(g->n_ghost*sizeof(uint64_t));
326 if (g->ghost_unmap == NULL || g->ghost_tasks == NULL)
327 throw_err("relabel_edges(), unable to allocate ghost unmaps", procid);
328
329 uint64_t n_per_rank = g->n / (uint64_t)nprocs + 1;
330
331#pragma omp parallel for
332 for (uint64_t i = 0; i < g->n_ghost; ++i)
333 {
334 uint64_t cur_index = get_value(&g->map, g->map.unique_keys[i]);
335
336 cur_index -= g->n_local;
337 g->ghost_unmap[cur_index] = g->map.unique_keys[i];
338 g->ghost_tasks[cur_index] = g->map.unique_keys[i] / n_per_rank;
339 }
340
341 if (verbose) {
342 elt = omp_get_wtime() - elt;
343 printf(" Task %d relabel_edges() %9.6f (s)\n", procid, elt);
344 }
345
346 if (debug) { printf("Task %d relabel_edges() success\n", procid); }
347 return 0;
348}
349
350
351int relabel_edges(dist_graph_t *g, int32_t* global_parts)
352{
353 if (debug) { printf("Task %d relabel_edges() start\n", procid); }
354 double elt = 0.0;
355 if (verbose) {
356 MPI_Barrier(MPI_COMM_WORLD);
357 elt = omp_get_wtime();
358 }
359
360 uint64_t cur_label = g->n_local;
361 uint64_t total_edges = g->m_local_in + g->m_local_out;
362
363 clear_map(&g->map);
364 init_map(&g->map, total_edges*2);
365
366 for (uint64_t i = 0; i < g->n_local; ++i)
367 {
368 uint64_t vert = g->local_unmap[i];
369 set_value(&g->map, vert, i);
370 }
371
372 for (uint64_t i = 0; i < g->m_local_out; ++i)
373 {
374 uint64_t out = g->out_edges[i];
375 uint64_t val = get_value(&g->map, out);
376 if (val == NULL_KEY)
377 {
378 set_value_uq(&g->map, out, cur_label);
379 g->out_edges[i] = cur_label++;
380 }
381 else
382 g->out_edges[i] = val;
383 }
384 for (uint64_t i = 0; i < g->m_local_in; ++i)
385 {
386 uint64_t in = g->in_edges[i];
387 uint64_t val = get_value(&g->map, in);
388 if (val == NULL_KEY)
389 {
390 set_value_uq(&g->map, in, cur_label);
391 g->in_edges[i] = cur_label++;
392 }
393 else
394 g->in_edges[i] = val;
395 }
396
398 g->n_total = g->n_ghost + g->n_local;
399
400 if (debug)
401 printf("Task %d, n_ghost %lu\n", procid, g->n_ghost);
402
403 free(g->ghost_unmap);
404 free(g->ghost_tasks);
405 g->ghost_unmap = (uint64_t*)malloc(g->n_ghost*sizeof(uint64_t));
406 g->ghost_tasks = (uint64_t*)malloc(g->n_ghost*sizeof(uint64_t));
407 if (g->ghost_unmap == NULL || g->ghost_tasks == NULL)
408 throw_err("relabel_edges(), unable to allocate ghost unmaps", procid);
409
410
411#pragma omp parallel for
412 for (uint64_t i = 0; i < g->n_ghost; ++i)
413 {
414 uint64_t cur_index = get_value(&g->map, g->map.unique_keys[i]);
415
416 cur_index -= g->n_local;
417 g->ghost_unmap[cur_index] = g->map.unique_keys[i];
418 g->ghost_tasks[cur_index] = global_parts[g->map.unique_keys[i]];
419 }
420
421 if (verbose) {
422 elt = omp_get_wtime() - elt;
423 printf(" Task %d relabel_edges() %9.6f (s)\n", procid, elt);
424 }
425
426 if (debug) { printf("Task %d relabel_edges() success\n", procid); }
427 return 0;
428}
429
430int repart_graph(dist_graph_t *g, mpi_data_t* comm, char* part_file)
431{
432 if (debug) { printf("Task %d repart_graph() start\n", procid); }
433 double elt = 0.0;
434 if (verbose) {
435 MPI_Barrier(MPI_COMM_WORLD);
436 elt = omp_get_wtime();
437 }
438
439 int32_t* global_parts = (int32_t*)malloc(g->n*sizeof(int32_t));
440 int32_t* local_parts = (int32_t*)malloc(g->n_local*sizeof(int32_t));
441
442#pragma omp parallel for
443 for (uint64_t i = 0; i < g->n; ++i)
444 global_parts[i] = -1;
445
446 if (procid == 0)
447 {
448 std::ifstream outfile;
449 outfile.open(part_file);
450
451 for (uint64_t i = 0; i < g->n; ++i)
452 outfile >> global_parts[i];
453
454 outfile.close();
455
456 if (debug)
457 for (uint64_t i = 0; i < g->n; ++i)
458 if (global_parts[i] == -1)
459 {
460 printf("Part Error: %lu not assigned\n", i);
461 global_parts[i] = 0;
462 }
463 }
464
465 MPI_Bcast(global_parts, (int32_t)g->n, MPI_INT32_T, 0, MPI_COMM_WORLD);
466
467#pragma omp parallel for
468 for (uint64_t i = 0; i < g->n_local; ++i)
469 local_parts[i] = global_parts[g->local_unmap[i]];
470
471 repart_graph(g, comm, local_parts);
472 relabel_edges(g, global_parts);
473 if (debug) {
474 for (uint64_t i = 0; i < g->n_local; ++i)
475 if (global_parts[g->local_unmap[i]] != procid)
476 printf("Part Error: task %d received %lu which was assigned to %d\n", procid, g->local_unmap[i], global_parts[g->local_unmap[i]]);
477
478 }
479 free(local_parts);
480 free(global_parts);
481
482 if (verbose) {
483 elt = omp_get_wtime() - elt;
484 printf("Task %d repart_graph() %9.6f (s)\n", procid, elt);
485 }
486
487 if (debug) { printf("Task %d repart_graph() success\n", procid); }
488 return 0;
489}
490
491
492int repart_graph(dist_graph_t *g, mpi_data_t* comm, int32_t* local_parts)
493{
494 for (int i = 0; i < nprocs; ++i)
495 {
496 comm->sendcounts_temp[i] = 0;
497 comm->recvcounts_temp[i] = 0;
498 }
499
500 for (uint64_t i = 0; i < g->n_local; ++i)
501 {
502 int32_t rank = local_parts[i];
503 ++comm->sendcounts_temp[rank];
504 }
505
506 MPI_Alltoall(comm->sendcounts_temp, 1, MPI_UINT64_T,
507 comm->recvcounts_temp, 1, MPI_UINT64_T, MPI_COMM_WORLD);
508
509 uint64_t total_recv = 0;
510 uint64_t total_send = 0;
511 for (int32_t i = 0; i < nprocs; ++i)
512 {
513 total_recv += comm->recvcounts_temp[i];
514 total_send += comm->sendcounts_temp[i];
515 }
516
517 uint64_t* recvbuf_vids = (uint64_t*)malloc(total_recv*sizeof(uint64_t));
518 uint64_t* recvbuf_deg_out = (uint64_t*)malloc(total_recv*sizeof(uint64_t));
519 uint64_t* recvbuf_deg_in = (uint64_t*)malloc(total_recv*sizeof(uint64_t));
520 if (recvbuf_vids == NULL ||
521 recvbuf_deg_out == NULL || recvbuf_deg_in == NULL)
522 throw_err("repart_graph(), unable to allocate buffers\n", procid);
523
524 uint64_t max_transfer = total_send > total_recv ? total_send : total_recv;
525 uint64_t num_comms = max_transfer / (uint64_t)(MAX_SEND_SIZE/(g->m/g->n))+ 1;
526 MPI_Allreduce(MPI_IN_PLACE, &num_comms, 1,
527 MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD);
528
529 if (debug)
530 printf("Task %d repart_graph() num_comms %lu total_send %lu total_recv %lu\n", procid, num_comms, total_send, total_recv);
531
532 uint64_t sum_recv_deg = 0;
533 for (uint64_t c = 0; c < num_comms; ++c)
534 {
535 uint64_t send_begin = (g->n_local * c) / num_comms;
536 uint64_t send_end = (g->n_local * (c + 1)) / num_comms;
537 if (c == (num_comms-1))
538 send_end = g->n_local;
539
540 for (int32_t i = 0; i < nprocs; ++i)
541 {
542 comm->sendcounts[i] = 0;
543 comm->recvcounts[i] = 0;
544 }
545
546 if (debug)
547 printf("Task %d send_begin %lu send_end %lu\n", procid, send_begin, send_end);
548 for (uint64_t i = send_begin; i < send_end; ++i)
549 {
550 int32_t rank = local_parts[i];
551 ++comm->sendcounts[rank];
552 }
553
554 MPI_Alltoall(comm->sendcounts, 1, MPI_INT32_T,
555 comm->recvcounts, 1, MPI_INT32_T, MPI_COMM_WORLD);
556
557 comm->sdispls[0] = 0;
558 comm->sdispls_cpy[0] = 0;
559 comm->rdispls[0] = 0;
560 for (int32_t i = 1; i < nprocs; ++i)
561 {
562 comm->sdispls[i] = comm->sdispls[i-1] + comm->sendcounts[i-1];
563 comm->rdispls[i] = comm->rdispls[i-1] + comm->recvcounts[i-1];
564 comm->sdispls_cpy[i] = comm->sdispls[i];
565 }
566
567 int32_t cur_send = comm->sdispls[nprocs-1] + comm->sendcounts[nprocs-1];
568 int32_t cur_recv = comm->rdispls[nprocs-1] + comm->recvcounts[nprocs-1];
569 uint64_t* sendbuf_vids = (uint64_t*)malloc((uint64_t)cur_send*sizeof(uint64_t));
570 uint64_t* sendbuf_deg_out = (uint64_t*)malloc((uint64_t)cur_send*sizeof(uint64_t));
571 uint64_t* sendbuf_deg_in = (uint64_t*)malloc((uint64_t)cur_send*sizeof(uint64_t));
572 if (sendbuf_vids == NULL ||
573 sendbuf_deg_out == NULL || sendbuf_deg_in == NULL)
574 throw_err("repart_graph(), unable to allocate buffers\n", procid);
575
576 for (uint64_t i = send_begin; i < send_end; ++i)
577 {
578 int32_t rank = local_parts[i];
579 int32_t snd_index = comm->sdispls_cpy[rank]++;
580 sendbuf_vids[snd_index] = g->local_unmap[i];
581 sendbuf_deg_out[snd_index] = (uint64_t)out_degree(g, i);
582 sendbuf_deg_in[snd_index] = (uint64_t)in_degree(g, i);
583 }
584
585 MPI_Alltoallv(
586 sendbuf_vids, comm->sendcounts, comm->sdispls, MPI_UINT64_T,
587 recvbuf_vids+sum_recv_deg, comm->recvcounts, comm->rdispls,
588 MPI_UINT64_T, MPI_COMM_WORLD);
589 MPI_Alltoallv(
590 sendbuf_deg_out, comm->sendcounts, comm->sdispls, MPI_UINT64_T,
591 recvbuf_deg_out+sum_recv_deg, comm->recvcounts, comm->rdispls,
592 MPI_UINT64_T, MPI_COMM_WORLD);
593 MPI_Alltoallv(
594 sendbuf_deg_in, comm->sendcounts, comm->sdispls, MPI_UINT64_T,
595 recvbuf_deg_in+sum_recv_deg, comm->recvcounts, comm->rdispls,
596 MPI_UINT64_T, MPI_COMM_WORLD);
597 sum_recv_deg += (uint64_t)cur_recv;
598 free(sendbuf_vids);
599 free(sendbuf_deg_out);
600 free(sendbuf_deg_in);
601 }
602
603
604
605
606
607
608 for (int i = 0; i < nprocs; ++i)
609 {
610 comm->sendcounts_temp[i] = 0;
611 comm->recvcounts_temp[i] = 0;
612 }
613
614 for (uint64_t i = 0; i < g->n_local; ++i)
615 {
616 int32_t rank = local_parts[i];
617 comm->sendcounts_temp[rank] += (uint64_t)out_degree(g, i);
618 }
619
620 MPI_Alltoall(comm->sendcounts_temp, 1, MPI_UINT64_T,
621 comm->recvcounts_temp, 1, MPI_UINT64_T, MPI_COMM_WORLD);
622
623 total_recv = 0;
624 total_send = 0;
625 for (int32_t i = 0; i < nprocs; ++i)
626 {
627 total_recv += comm->recvcounts_temp[i];
628 total_send += comm->sendcounts_temp[i];
629 }
630
631 uint64_t* recvbuf_e_out = (uint64_t*)malloc(total_recv*sizeof(uint64_t));
632 if (recvbuf_e_out == NULL)
633 throw_err("repart_graph(), unable to allocate buffer\n", procid);
634
635 // max_transfer = total_send > total_recv ? total_send : total_recv;
636 // num_comms = max_transfer / (uint64_t)MAX_SEND_SIZE + 1;
637 // MPI_Allreduce(MPI_IN_PLACE, &num_comms, 1,
638 // MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD);
639
640 if (debug)
641 printf("Task %d repart_graph() num_comms %lu total_send %lu total_recv %lu\n", procid, num_comms, total_send, total_recv);
642
643 uint64_t sum_recv_e_out = 0;
644 for (uint64_t c = 0; c < num_comms; ++c)
645 {
646 uint64_t send_begin = (g->n_local * c) / num_comms;
647 uint64_t send_end = (g->n_local * (c + 1)) / num_comms;
648 if (c == (num_comms-1))
649 send_end = g->n_local;
650
651 for (int32_t i = 0; i < nprocs; ++i)
652 {
653 comm->sendcounts[i] = 0;
654 comm->recvcounts[i] = 0;
655 }
656
657 for (uint64_t i = send_begin; i < send_end; ++i)
658 {
659 uint32_t rank = local_parts[i];
660 comm->sendcounts[rank] += (int32_t)out_degree(g, i);
661 }
662
663 MPI_Alltoall(comm->sendcounts, 1, MPI_INT32_T,
664 comm->recvcounts, 1, MPI_INT32_T, MPI_COMM_WORLD);
665
666 comm->sdispls[0] = 0;
667 comm->sdispls_cpy[0] = 0;
668 comm->rdispls[0] = 0;
669 for (int32_t i = 1; i < nprocs; ++i)
670 {
671 comm->sdispls[i] = comm->sdispls[i-1] + comm->sendcounts[i-1];
672 comm->rdispls[i] = comm->rdispls[i-1] + comm->recvcounts[i-1];
673 comm->sdispls_cpy[i] = comm->sdispls[i];
674 }
675
676 int32_t cur_send = comm->sdispls[nprocs-1] + comm->sendcounts[nprocs-1];
677 int32_t cur_recv = comm->rdispls[nprocs-1] + comm->recvcounts[nprocs-1];
678 uint64_t* sendbuf_e_out = (uint64_t*)malloc((uint64_t)cur_send*sizeof(uint64_t));
679 if (sendbuf_e_out == NULL)
680 throw_err("repart_graph(), unable to allocate buffer\n", procid);
681
682 for (uint64_t i = send_begin; i < send_end; ++i)
683 {
685 uint64_t* outs = out_vertices(g, i);
686 int32_t rank = local_parts[i];
687 int32_t snd_index = comm->sdispls_cpy[rank];
688 comm->sdispls_cpy[rank] += out_degree;
689 for (uint64_t j = 0; j < out_degree; ++j)
690 {
691 uint64_t out;
692 if (outs[j] < g->n_local)
693 out = g->local_unmap[outs[j]];
694 else
695 out = g->ghost_unmap[outs[j]-g->n_local];
696 sendbuf_e_out[snd_index++] = out;
697 }
698 }
699
700 MPI_Alltoallv(sendbuf_e_out, comm->sendcounts, comm->sdispls, MPI_UINT64_T,
701 recvbuf_e_out+sum_recv_e_out, comm->recvcounts, comm->rdispls,
702 MPI_UINT64_T, MPI_COMM_WORLD);
703 sum_recv_e_out += (uint64_t)cur_recv;
704 free(sendbuf_e_out);
705 }
706
707 free(g->out_edges);
708 free(g->out_degree_list);
709 g->out_edges = recvbuf_e_out;
710 g->m_local_out = (uint64_t)sum_recv_e_out;
711 g->out_degree_list = (uint64_t*)malloc((sum_recv_deg+1)*sizeof(uint64_t));
712 g->out_degree_list[0] = 0;
713 for (uint64_t i = 0; i < sum_recv_deg; ++i)
714 g->out_degree_list[i+1] = g->out_degree_list[i] + recvbuf_deg_out[i];
715 assert(g->out_degree_list[sum_recv_deg] == g->m_local_out);
716 free(recvbuf_deg_out);
717
718
719
720
721
722
723
724 for (int i = 0; i < nprocs; ++i)
725 {
726 comm->sendcounts_temp[i] = 0;
727 comm->recvcounts_temp[i] = 0;
728 }
729
730 for (uint64_t i = 0; i < g->n_local; ++i)
731 {
732 int32_t rank = local_parts[i];
733 comm->sendcounts_temp[rank] += (uint64_t)in_degree(g, i);
734 }
735
736 MPI_Alltoall(comm->sendcounts_temp, 1, MPI_UINT64_T,
737 comm->recvcounts_temp, 1, MPI_UINT64_T, MPI_COMM_WORLD);
738
739 total_recv = 0;
740 total_send = 0;
741 for (int32_t i = 0; i < nprocs; ++i)
742 {
743 total_recv += comm->recvcounts_temp[i];
744 total_send += comm->sendcounts_temp[i];
745 }
746
747 uint64_t* recvbuf_e_in = (uint64_t*)malloc(total_recv*sizeof(uint64_t));
748 if (recvbuf_e_in == NULL)
749 throw_err("repart_graph(), unable to allocate buffer\n", procid);
750
751 // max_transfer = total_send > total_recv ? total_send : total_recv;
752 // num_comms = max_transfer / (uint64_t)MAX_SEND_SIZE + 1;
753 // MPI_Allreduce(MPI_IN_PLACE, &num_comms, 1,
754 // MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD);
755
756 if (debug)
757 printf("Task %d repart_graph() num_comms %lu total_send %lu total_recv %lu\n", procid, num_comms, total_send, total_recv);
758
759 uint64_t sum_recv_e_in = 0;
760 for (uint64_t c = 0; c < num_comms; ++c)
761 {
762 uint64_t send_begin = (g->n_local * c) / num_comms;
763 uint64_t send_end = (g->n_local * (c + 1)) / num_comms;
764 if (c == (num_comms-1))
765 send_end = g->n_local;
766
767 for (int32_t i = 0; i < nprocs; ++i)
768 {
769 comm->sendcounts[i] = 0;
770 comm->recvcounts[i] = 0;
771 }
772
773 for (uint64_t i = send_begin; i < send_end; ++i)
774 {
775 int32_t rank = local_parts[i];
776 comm->sendcounts[rank] += (int32_t)in_degree(g, i);
777 }
778
779 MPI_Alltoall(comm->sendcounts, 1, MPI_INT32_T,
780 comm->recvcounts, 1, MPI_INT32_T, MPI_COMM_WORLD);
781
782 comm->sdispls[0] = 0;
783 comm->sdispls_cpy[0] = 0;
784 comm->rdispls[0] = 0;
785 for (int32_t i = 1; i < nprocs; ++i)
786 {
787 comm->sdispls[i] = comm->sdispls[i-1] + comm->sendcounts[i-1];
788 comm->rdispls[i] = comm->rdispls[i-1] + comm->recvcounts[i-1];
789 comm->sdispls_cpy[i] = comm->sdispls[i];
790 }
791
792 int32_t cur_send = comm->sdispls[nprocs-1] + comm->sendcounts[nprocs-1];
793 int32_t cur_recv = comm->rdispls[nprocs-1] + comm->recvcounts[nprocs-1];
794 uint64_t* sendbuf_e_in = (uint64_t*)malloc((uint64_t)cur_send*sizeof(uint64_t));
795 if (sendbuf_e_in == NULL)
796 throw_err("repart_graph(), unable to allocate buffer\n", procid);
797
798 for (uint64_t i = send_begin; i < send_end; ++i)
799 {
801 uint64_t* ins = in_vertices(g, i);
802 int32_t rank = local_parts[i];
803 int32_t snd_index = comm->sdispls_cpy[rank];
804 comm->sdispls_cpy[rank] += in_degree;
805 for (uint32_t j = 0; j < in_degree; ++j)
806 {
807 uint64_t in;
808 if (ins[j] < g->n_local)
809 in = g->local_unmap[ins[j]];
810 else
811 in = g->ghost_unmap[ins[j]-g->n_local];
812 sendbuf_e_in[snd_index++] = in;
813 }
814 }
815
816 MPI_Alltoallv(sendbuf_e_in, comm->sendcounts, comm->sdispls, MPI_UINT64_T,
817 recvbuf_e_in+sum_recv_e_in, comm->recvcounts, comm->rdispls,
818 MPI_UINT64_T, MPI_COMM_WORLD);
819 sum_recv_e_in += (uint64_t)cur_recv;
820 free(sendbuf_e_in);
821 }
822
823 free(g->in_edges);
824 free(g->in_degree_list);
825 g->in_edges = recvbuf_e_in;
826 g->m_local_in = (uint64_t)sum_recv_e_in;
827 g->in_degree_list = (uint64_t*)malloc((sum_recv_deg+1)*sizeof(uint64_t));
828 g->in_degree_list[0] = 0;
829 for (uint64_t i = 0; i < sum_recv_deg; ++i)
830 g->in_degree_list[i+1] = g->in_degree_list[i] + recvbuf_deg_in[i];
831 assert(g->in_degree_list[sum_recv_deg] == g->m_local_in);
832 free(recvbuf_deg_in);
833
834
835
836 free(g->local_unmap);
837 g->local_unmap = (uint64_t*)malloc(sum_recv_deg*sizeof(uint64_t));
838 for (uint64_t i = 0; i < sum_recv_deg; ++i)
839 g->local_unmap[i] = recvbuf_vids[i];
840 free(recvbuf_vids);
841
842 g->n_local = sum_recv_deg;
843}
844
845
847{
848 if (debug) { printf("Task %d get_max_degree_vert() start\n", procid); }
849 double elt = 0.0;
850 if (verbose) {
851 MPI_Barrier(MPI_COMM_WORLD);
852 elt = omp_get_wtime();
853 }
854
855 uint64_t my_max_degree = 0;
856 uint64_t my_max_out_degree = 0;
857 uint64_t my_max_in_degree = 0;
858 uint64_t my_max_vert = -1;
859 for (uint64_t i = 0; i < g->n_local; ++i)
860 {
863 uint64_t this_degree = (uint64_t)in_degree;
864 if (this_degree > my_max_degree)
865 {
866 my_max_degree = this_degree;
867 my_max_vert = g->local_unmap[i];
868 }
869 if (out_degree > my_max_out_degree)
870 my_max_out_degree = out_degree;
871 if (in_degree > my_max_in_degree)
872 my_max_in_degree = in_degree;
873 }
874
875 uint64_t max_in_degree;
876 uint64_t max_out_degree;
877 uint64_t max_degree;
878 uint64_t max_vert;
879
880 MPI_Allreduce(&my_max_out_degree, &max_out_degree, 1, MPI_UINT64_T,
881 MPI_MAX, MPI_COMM_WORLD);
882 MPI_Allreduce(&my_max_in_degree, &max_in_degree, 1, MPI_UINT64_T,
883 MPI_MAX, MPI_COMM_WORLD);
884 MPI_Allreduce(&my_max_degree, &max_degree, 1, MPI_UINT64_T,
885 MPI_MAX, MPI_COMM_WORLD);
886 if (my_max_degree == max_degree)
887 max_vert = my_max_vert;
888 else
889 max_vert = NULL_KEY;
890 MPI_Allreduce(MPI_IN_PLACE, &max_vert, 1, MPI_UINT64_T,
891 MPI_MIN, MPI_COMM_WORLD);
892
893 g->max_degree_vert = max_vert;
894 g->max_out_degree = max_out_degree;
895 g->max_in_degree = max_in_degree;
896
897 if (verbose) {
898 elt = omp_get_wtime() - elt;
899 printf("Task %d, max_degree %lu, max_vert %lu, max_in_degree %lu, max_out_degree %lu, %f (s)\n",
900 procid, max_degree, max_vert, max_in_degree, max_out_degree, elt);
901 }
902
903 if (debug) { printf("Task %d get_max_degree_vert() success\n", procid); }
904 return 0;
905}
int rank
#define MAX_SEND_SIZE
Definition comms.h:62
int procid
Definition main.cpp:55
bool debug
int clear_graph(dist_graph_t *g)
bool verify
int repart_graph(dist_graph_t *g, mpi_data_t *comm, char *part_file)
int relabel_edges(dist_graph_t *g)
int create_graph(graph_gen_data_t *ggi, dist_graph_t *g)
bool verbose
Definition main.cpp:56
bool output
int nprocs
int create_graph_serial(graph_gen_data_t *ggi, dist_graph_t *g)
int get_max_degree_vert(dist_graph_t *g)
#define in_degree(g, n)
Definition dist_graph.h:55
#define out_vertices(g, n)
Definition dist_graph.h:56
#define out_degree(g, n)
Definition dist_graph.h:54
#define in_vertices(g, n)
Definition dist_graph.h:57
void clear_map(fast_map *map)
Definition fast_map.cpp:121
void init_map_nohash(fast_map *map, uint64_t init_size)
Definition fast_map.cpp:90
bool is_init(fast_map *map)
Definition fast_map.cpp:58
void init_map(fast_map *map)
Definition fast_map.cpp:63
#define NULL_KEY
Definition fast_map.h:58
void set_value(fast_map *map, uint64_t key, uint64_t value)
Definition fast_map.h:90
uint64_t get_value(fast_map *map, uint64_t key)
Definition fast_map.h:132
void set_value_uq(fast_map *map, uint64_t key, uint64_t value)
Definition fast_map.h:109
unsigned int uint32_t
Definition stdint.h:80
signed int int32_t
Definition stdint.h:77
unsigned __int64 uint64_t
Definition stdint.h:90
uint64_t n_total
Definition dist_graph.h:69
uint64_t max_out_degree
Definition dist_graph.h:72
uint64_t max_degree_vert
Definition dist_graph.h:71
uint64_t * local_unmap
Definition dist_graph.h:80
uint64_t * ghost_tasks
Definition dist_graph.h:82
uint64_t * ghost_unmap
Definition dist_graph.h:81
uint64_t * in_degree_list
Definition dist_graph.h:78
uint64_t * in_edges
Definition dist_graph.h:76
uint64_t n_local
Definition dist_graph.h:66
uint64_t n_ghost
Definition dist_graph.h:68
uint64_t m_local_in
Definition dist_graph.h:64
uint64_t max_in_degree
Definition dist_graph.h:73
uint64_t m
Definition dist_graph.h:62
uint64_t n
Definition dist_graph.h:61
uint64_t n_offset
Definition dist_graph.h:67
uint64_t * out_edges
Definition dist_graph.h:75
uint64_t m_local_out
Definition dist_graph.h:63
fast_map map
Definition dist_graph.h:83
uint64_t * out_degree_list
Definition dist_graph.h:77
uint64_t * unique_keys
Definition fast_map.h:62
uint64_t num_unique
Definition fast_map.h:65
uint64_t m_local_in
Definition dist_graph.h:96
uint64_t n_offset
Definition dist_graph.h:92
uint64_t n_local
Definition dist_graph.h:91
uint64_t * gen_edges
Definition dist_graph.h:98
uint64_t * gen_edges_rev
Definition dist_graph.h:99
uint64_t m_local_out
Definition dist_graph.h:95
uint64_t * sendcounts_temp
Definition comms.h:73
int32_t * sendcounts
Definition comms.h:66
int32_t * sdispls
Definition comms.h:68
uint64_t * recvcounts_temp
Definition comms.h:72
int32_t * rdispls
Definition comms.h:69
int32_t * recvcounts
Definition comms.h:67
int32_t * sdispls_cpy
Definition comms.h:70
void throw_err(char const *err_message)
Definition util.cpp:58