7
using namespace vcflib;
9
bool listContains(list<string>& l, string& v) {
10
for (list<string>::iterator i = l.begin(); i != l.end(); ++i) {
11
if (*i == v) return true;
16
void printSummary(char** argv) {
17
cerr << "usage: " << argv[0] << " [options] [vcf file]" << endl
19
<< "Sorts the input (either stdin or file) using a streaming sort algorithm."
23
<< " -h, --help this dialog" << endl
24
<< " -w, --window N number of sites to sort (default 10000)" << endl
25
<< " -a, --all load all sites and then sort in memory" << endl;
28
int main(int argc, char** argv) {
30
VariantCallFile variantFile;
31
int sortSitesWindow = 10000;
37
static struct option long_options[] =
39
/* These options set a flag. */
40
//{"verbose", no_argument, &verbose_flag, 1},
41
{"help", no_argument, 0, 'h'},
42
{"window", required_argument, 0, 'w'},
43
{"all", required_argument, 0, 'a'},
46
/* getopt_long stores the option index here. */
49
c = getopt_long (argc, argv, "haw:",
50
long_options, &option_index);
61
if (!convert(optarg, sortSitesWindow)) {
62
cerr << "could not parse --window, -w" << endl;
81
if (optind == argc - 1) {
82
string inputFilename = argv[optind];
83
variantFile.open(inputFilename);
85
variantFile.open(std::cin);
88
if (!variantFile.is_open()) {
92
cout << variantFile.header << endl;
94
map<string, map<long int, map<string, vector<Variant> > > > records;
97
list<string> sequenceNames;
99
variantFile.parseSamples = false;
100
Variant var(variantFile);
101
while (variantFile.getNextVariant(var)) {
102
//cerr << "at position " << var.sequenceName << ":" << var.position << endl;
103
if (!listContains(sequenceNames, var.sequenceName)) {
104
//cerr << "adding new sequence name " << var.sequenceName << endl;
105
sequenceNames.push_back(var.sequenceName);
107
records[var.sequenceName][var.position][var.vrepr()].push_back(var);
108
if (records[var.sequenceName][var.position].size() == 1) ++numrecords;
109
if (!sortAll && numrecords > sortSitesWindow) {
110
//cerr << "outputting a position" << endl;
111
if (records[sequenceNames.front()].empty()) {
112
//cerr << "end of reference sequence " << sequenceNames.front() << endl;
113
sequenceNames.pop_front();
115
map<long int, map<string, vector<Variant> > >& frecords = records[sequenceNames.front()];
116
map<string, vector<Variant> >& vars = frecords.begin()->second;
117
for (map<string, vector<Variant> >::iterator v = vars.begin(); v != vars.end(); ++v) {
118
for (vector<Variant>::iterator s = v->second.begin(); s != v->second.end(); ++s) {
119
cout << s->originalLine << endl;
122
frecords.erase(frecords.begin());
126
//cerr << "done processing input, cleaning up" << endl;
127
for (list<string>::iterator s = sequenceNames.begin(); s != sequenceNames.end(); ++s) {
128
map<long int, map<string, vector<Variant> > >& q = records[*s];
129
for (map<long int, map<string, vector<Variant> > >::iterator r = q.begin(); r != q.end(); ++r) {
130
for (map<string, vector<Variant> >::iterator v = r->second.begin(); v != r->second.end(); ++v) {
131
for (vector<Variant>::iterator s = v->second.begin(); s != v->second.end(); ++s) {
132
cout << s->originalLine << endl;
138
//cerr << numrecords << " remain" << endl;