28double Fscore(
string mclFile,
string hipmclFile,
int base)
30 ifstream mclStream (mclFile);
31 ifstream hipmclStream (hipmclFile);
34 int64_t numMCLClusters, numHipMCLClusters;
38 vector<vector<int64_t>> mclClusters;
40 if (mclStream.is_open())
42 while ( getline (mclStream,line) )
44 istringstream iss(line);
45 if(clustID >= mclClusters.size())
47 mclClusters.resize(clustID * 2 + 1);
52 mclClusters[clustID].push_back(item);
55 nproteins += mclClusters[clustID].size();
59 numMCLClusters = clustID;
63 cout <<
"Unable to open " << mclFile << endl;
67 cout <<
"Number of clusters from MCL: " << numMCLClusters << endl;
68 vector<int64_t> clust2(nproteins, -1);
70 if (hipmclStream.is_open())
72 while ( getline (hipmclStream,line) )
74 istringstream iss(line);
78 clust2[item] = clustID;
81 cout <<
"The number of vertices in MCL and HipMCL outputs does not match. \n Please check if there are isolated vertices in HipMCL.\n Exiting.............." << endl;
88 numHipMCLClusters = clustID;
92 cout <<
"Unable to open " << hipmclFile << endl;
97 for(
int i=0; i<nproteins; i++)
100 clust2[i]=numHipMCLClusters++;
102 cout <<
"Number of clusters from HipMCL: " << numHipMCLClusters << endl;
104 vector<int64_t> clusterSizes2(numHipMCLClusters,0);
105 for(
int i=0; i<nproteins; i++)
107 clusterSizes2[clust2[i]]++;
111 vector<double> F(numMCLClusters);
112 vector<int64_t> nisect(numHipMCLClusters);
113 vector<int64_t> isect(numHipMCLClusters);
116 for(
int i=0; i<numMCLClusters; i++)
119 fill(nisect.begin(), nisect.end(), 0);
120 for(
int j=0; j<mclClusters[i].size(); j++)
122 int64_t item1 = mclClusters[i][j];
124 if(nisect[c2]==0) isect[isectCount++] = c2;
127 auto maxoverlap = max_element(nisect.begin(), nisect.end());
128 int64_t c2_max = distance(nisect.begin(), maxoverlap);
129 if(*maxoverlap!=mclClusters[i].
size() || *maxoverlap!=clusterSizes2[c2_max])
131 cout <<
"Mismatch# " << mismatch++ <<
":: MCL Cluster: "<< i <<
" Size: " << mclClusters[i].size() <<
" HipMCL Cluster: "<< c2_max <<
" Size: " << clusterSizes2[c2_max] <<
" last vertex in MCL output: " << mclClusters[i].back()<< endl;
135 for(
int j=0; j<isectCount; j++)
138 double precision = (double) nisect[c2] / clusterSizes2[c2];
139 double recall = (double) nisect[c2] / mclClusters[i].
size();
140 double Fij = 2 * precision * recall / (precision + recall);
144 Fi = Fi * mclClusters[i].size()/ nproteins;
147 return accumulate(F.begin(), F.end(), 0.0);