63void ProcessLines(vector<IT1> & rows, vector<IT1> & cols, vector<NT1> & vals, vector<string> & lines,
tommy_hashdyn & hashdyn,
const vector<uint32_t> & shuffler)
68 for (vector<string>::iterator itr=lines.begin(); itr != lines.end(); ++itr)
71 sscanf(itr->c_str(),
"%s %s %lg", from, to, &vv);
83 vector<string>().swap(lines);
101bool FetchBatch(FILE * f_local,
long int & curpos,
long int end_fpos,
bool firstcall, vector<string> & lines)
103 size_t bytes2fetch =
BATCH;
104 bool begfile = (curpos == 0);
105 if(firstcall && (!begfile))
110 char * buf =
new char[bytes2fetch];
111 char * originalbuf = buf;
113 int seekfail = fseek(f_local, curpos, SEEK_SET);
115 cout <<
"fseek failed to move to " << curpos << endl;
117 int bytes_read = fread(buf,
sizeof(
char), bytes2fetch, f_local);
120 delete [] originalbuf;
124 if(firstcall && (!begfile))
136 cout <<
"Unexpected line without a break" << endl;
144 while(bytes_read > 0 && curpos < end_fpos)
146 char *c = (
char*)memchr(buf,
'\n', bytes_read);
148 delete [] originalbuf;
154 lines.push_back(
string(buf, n-1));
159 delete [] originalbuf;
160 if (curpos >= end_fpos)
return true;
164void MMConverter(
const string & filename, ofstream & dictout,
const string & outprefix)
167 if ((f = fopen(filename.c_str(),
"r")) == NULL)
169 printf(
"file can not be found\n");
175 if (stat(filename.c_str(), &st) == -1)
179 int64_t file_size = st.st_size;
180 cout <<
"File is " << file_size <<
" bytes" << endl;
181 long int ffirst = ftell(f);
182 long int fpos = ffirst;
183 long int end_fpos = file_size;
185 vector<string> lines;
186 bool finished =
FetchBatch(f, fpos, end_fpos,
true, lines);
187 int64_t entriesread = lines.size();
196 for (vector<string>::iterator itr=lines.begin(); itr != lines.end(); ++itr)
199 sscanf(itr->c_str(),
"%s %s %lg", from, to, &vv);
219 vector<string>().swap(lines);
223 finished =
FetchBatch(f, fpos, end_fpos,
false, lines);
224 entriesread += lines.size();
225 cout <<
"entriesread: " << entriesread <<
", current vertex id: " << vertexid << endl;
231 for (vector<string>::iterator itr=lines.begin(); itr != lines.end(); ++itr)
234 sscanf(itr->c_str(),
"%s %s %lg", from, to, &vv);
255 vector<string>().swap(lines);
257 cout <<
"There are " << vertexid <<
" vertices and " << entriesread <<
" edges" << endl;
261 uint32_t ranges[NSUBGRAPHS] = {vertexid, vertexid/2, vertexid/4, vertexid/8, vertexid/16, vertexid/32};
262 cout <<
"Printing submatrices with the following numbers of vertices: ";
263 copy(ranges, ranges+NSUBGRAPHS, ostream_iterator<uint32_t>(cout,
" ")); cout << endl;
267 vector< uint32_t > shuffler(nvertices);
268 iota(shuffler.begin(), shuffler.end(),
static_cast<uint32_t>(0));
269 random_shuffle ( shuffler.begin(), shuffler.end() );
271 pair< vector<uint32_t>*, ofstream*>
mypair(&shuffler, &dictout);
274 cout <<
"Shuffled and wrote dictionary " << endl;
280 long int fpos, end_fpos;
281 int this_thread = omp_get_thread_num();
282 int num_threads = omp_get_num_threads();
284 if(this_thread == 0) fpos = ffirst;
285 else fpos = this_thread * file_size / num_threads;
289 string names[NSUBGRAPHS];
290 ofstream outfiles[NSUBGRAPHS];
291 for(
int i= 0; i<NSUBGRAPHS; i++)
293 names[i] =
"Renamed_subgraph";
294 names[i] += std::to_string(i);
296 names[i] += outprefix;
297 names[i] += std::to_string(this_thread);
298 cout << names[i] << endl;
299 outfiles[i].open(names[i]);
304 name =
"Renamed_graph_";
306 name += std::to_string(this_thread);
311 if(this_thread != (num_threads-1)) end_fpos = (this_thread + 1) * file_size / num_threads;
312 else end_fpos = file_size;
314 FILE * f_perthread = fopen(filename.c_str(),
"rb");
315 vector<string> lines;
316 bool finished =
FetchBatch(f_perthread, fpos, end_fpos,
true, lines);
317 size_t nnz = lines.size();
318 vector<uint32_t> rows;
319 vector<uint32_t> cols;
321 ProcessLines(rows, cols, vals, lines, hashdyn, shuffler);
325 cout <<
"there are " << num_threads <<
" threads" << endl;
327 for(
int i= 0; i<NSUBGRAPHS; i++)
329 outfiles[i] <<
"%%MatrixMarket matrix coordinate real symmetric\n";
330 outfiles[i] << ranges[i] <<
"\t" << ranges[i] <<
"\t" << entriesread <<
"\n";
333 outfile <<
"%%MatrixMarket matrix coordinate real symmetric\n";
334 outfile << nvertices <<
"\t" << nvertices <<
"\t" << entriesread <<
"\n";
337 for(
size_t k=0; k< nnz; ++k)
340 for(
int i= 0; i<NSUBGRAPHS; i++)
342 if(rows[k] < ranges[i] && cols[k] < ranges[i])
343 outfiles[i] << rows[k] <<
"\t" << cols[k] <<
"\t" << vals[k] <<
"\n";
346 outfile << rows[k] <<
"\t" << cols[k] <<
"\t" << vals[k] <<
"\n";
356 finished =
FetchBatch(f_perthread, fpos, end_fpos,
false, lines);
358 ProcessLines(rows, cols, vals, lines, hashdyn, shuffler);
360 for(
size_t k=0; k< nnz; ++k)
363 for(
int i= 0; i<NSUBGRAPHS; i++)
365 if(rows[k] < ranges[i] && cols[k] < ranges[i])
366 outfiles[i] << rows[k] <<
"\t" << cols[k] <<
"\t" << vals[k] <<
"\n";
369 outfile << rows[k] <<
"\t" << cols[k] <<
"\t" << vals[k] <<
"\n";
377 for(
int i= 0; i<NSUBGRAPHS; i++)