1580
void Plink::groupGenome()
1583
// Read from a (non-verbose) genome file
1585
checkFileExists(par::ibd_file);
1587
if ( par::ibd_read_minimal )
1588
printLOG("Reading IBS estimates (minimal format) from [ "
1589
+par::ibd_file+" ] \n");
1591
printLOG("Reading genome-wide IBS estimates from [ "
1592
+par::ibd_file+" ] \n");
1595
INC.open(par::ibd_file.c_str());
1598
map<string,int> mperson;
1599
for (int i=0; i<n; i++)
1600
mperson.insert(make_pair( sample[i]->fid+"_"+sample[i]->iid , i ));
1602
map<Individual*,int> mcode;
1603
for (int i=0; i<n; i++)
1604
mcode.insert(make_pair( sample[i] , i ));
1606
vector<Individual*> peeps;
1608
// We wish to read in an NxN matrix, and convert it to a KxK one
1611
sizeMatrix(dk,nk,0);
1612
for (int j=0; j<nk; j++)
1616
sizeTable(dkn,nk,0);
1617
for (int j=0; j<nk; j++)
1621
// Read in .genome file in verbose mode
1623
// We only want FID1,IID1,FID2,IID2 (always first four)
1626
// Get field codes from header
1632
vector<string> tokens = tokenizeLine(INC);
1633
col_length = tokens.size();
1635
if ( tokens.size() < 4 ||
1636
tokens[0] != "FID1" ||
1637
tokens[1] != "IID1" ||
1638
tokens[2] != "FID2" ||
1639
tokens[3] != "IID2" )
1640
error("Problem with header row of .genome file");
1643
for ( int i = 4; i<tokens.size(); i++)
1645
if ( tokens[i] == "DST" )
1649
if ( dst_code == -1 )
1650
error("Could not find DST fields in .genome file");
1652
// Read each pair at a time
1653
while ( ! INC.eof() )
1656
vector<string> tokens = tokenizeLine(INC);
1658
if ( tokens.size() == 0 )
1661
if ( col_length != tokens.size() )
1664
for (int i=0;i<tokens.size();i++)
1665
strmsg += tokens[i] + " ";
1666
error("Problem reading line in .genome file:\n"+strmsg+"\n");
1669
string fid1 = tokens[0];
1670
string iid1 = tokens[1];
1671
string fid2 = tokens[2];
1672
string iid2 = tokens[3];
1673
string idst = tokens[dst_code];
1675
if (fid1=="") continue;
1677
if ( ! from_string<double>( mydst , idst , std::dec) )
1680
map<string,int>::iterator person1 = mperson.find(fid1+"_"+iid1);
1681
map<string,int>::iterator person2 = mperson.find(fid2+"_"+iid2);
1683
if ( person1 == mperson.end() ||
1684
person2 == mperson.end() ||
1685
person1 == person2 )
1688
int k1 = sample[ person1->second ]->sol;
1689
int k2 = sample[ person2->second ]->sol;
1691
if ( k1 < 0 || k2 < 0 || k1 == k2 )
1701
// Record IBS distance
1706
} // Read next line in .genome
1710
for (int i=0; i<nk; i++)
1711
for (int j=0; j<i; j++)
1713
if ( dkn[i][j] > 0 )
1714
dk[i][j] /= (double)dkn[i][j];
1717
// Output a dummy .genome file
1720
GOUT0.open( (par::output_file_name + ".plst").c_str(), ios::out);
1721
printLOG("Writing person include list to [ " + par::output_file_name + ".plst ]\n");
1724
GOUT1.open( (par::output_file_name + ".clst").c_str(), ios::out);
1725
printLOG("Writing cluster list to [ " + par::output_file_name + ".clst ]\n");
1729
for (int i=0;i<n; i++)
1731
int j = sample[i]->sol;
1734
if ( k2i.find(j) == k2i.end() )
1736
k2i.insert(make_pair(j,i));
1737
GOUT0 << sample[i]->fid << " "
1738
<< sample[i]->iid << "\n";
1739
GOUT1 << sample[i]->fid << " "
1740
<< sample[i]->iid << " "
1741
<< kname[j] << "\n";
1748
GOUT.open( (par::output_file_name + ".genome").c_str(), ios::out);
1749
printLOG("Writing grouped .genome file to [ " + par::output_file_name + ".genome ]\n");
1750
GOUT << setw(par::pp_maxfid) << "FID1" << " "
1751
<< setw(par::pp_maxiid) << "IID1" << " "
1752
<< setw(par::pp_maxfid) << "FID2" << " "
1753
<< setw(par::pp_maxiid) << "IID2" << " "
1754
<< setw(8) << "DST" << " "
1755
<< setw(8) << "PPC" << "\n";
1757
for (int i=0; i<nk; i++)
1758
for (int j=0; j<i; j++)
1760
Individual * s1 = sample[k2i.find(i)->second];
1761
Individual * s2 = sample[k2i.find(j)->second];
1763
GOUT << s1->fid << " " << s1->iid << " "
1764
<< s2->fid << " " << s2->iid << " ";
1766
// cout << i << " " << j << " ";
1767
// cout << dk.size() << " " << dk[i].size() << "\n";
1769
// cout << dkn.size() << " " << dkn[i].size() << "\n";
1770
// cout << dk[i][j] << " of " << dkn[i][j] << "\n";
1772
GOUT << dk[i][j] << " 1\n";