8
using namespace vcflib;
11
int main(int argc, char** argv) {
14
cerr << "usage: " << argv[0] << " <vcf file>" << endl
15
<< "outputs a VCF stream in which 'long' non-complex"
16
<< "alleles have their position corrected." << endl
17
<< "assumes that VCF records can't overlap 5'->3'" << endl;
21
string filename = argv[1];
23
VariantCallFile variantFile;
24
if (filename == "-") {
25
variantFile.open(std::cin);
27
variantFile.open(filename);
30
if (!variantFile.is_open()) {
31
cerr << "could not open " << filename << endl;
35
Variant var(variantFile);
37
// write the new header
38
cout << variantFile.header << endl;
40
// print the records, filtering is done via the setting of varA's output sample names
41
while (variantFile.getNextVariant(var)) {
42
// if we just have one parsed alternate (non-complex case)
43
map<string, vector<VariantAllele> > parsedAlts = var.parsedAlternates(true, true); // use mnps, and previous for indels
44
// but the alt string is long
45
//cerr << var.alt.size() << " " << parsedAlts.size() << endl;
46
if (var.alt.size() == 1 && parsedAlts.size() > 1) {
47
string& alternate = var.alt.front();
48
vector<VariantAllele>& vs = parsedAlts[alternate];
49
vector<VariantAllele> valleles;
50
for (vector<VariantAllele>::iterator a = vs.begin(); a != vs.end(); ++a) {
51
if (a->ref != a->alt) {
52
valleles.push_back(*a);
55
if (valleles.size() == 1) {
56
// do we have extra sequence hanging around?
57
VariantAllele& varallele = valleles.front();
58
if (vs.front().ref == vs.front().alt) {
59
var.position = varallele.position;
60
var.ref = var.ref.substr(vs.front().ref.size(), varallele.ref.size());
61
var.alt.front() = varallele.alt;