9
using namespace vcflib;
11
bool samplesDiffer(vector<string>& samples, Variant& var) {
15
for (vector<string>::iterator s = samples.begin(); s != samples.end(); ++s) {
16
string& sampleName = *s;
17
map<string, map<string, vector<string> > >::iterator f = var.samples.find(sampleName);
18
if (f != var.samples.end()) {
19
map<string, vector<string> >& sample = f->second;
20
map<string, vector<string> >::iterator gt = sample.find("GT");
21
if (gt != sample.end()) {
22
string& thisGenotype = gt->second.front();
23
if (genotype.empty()) {
24
genotype = thisGenotype;
26
if (genotype != thisGenotype) {
39
void printSummary(char** argv) {
40
cerr << "usage: " << argv[0] << " [options] <tag> <sample> <sample> [ <sample> ... ] <vcf file>" << endl
41
<< "Tags each record where the listed sample genotypes differ with <tag>." << endl
42
<< "The first sample is assumed to be germline, the second somatic." << endl
43
<< "Each record is tagged with <tag>={germline,somatic,loh} to specify the type of" << endl
44
<< "variant given the genotype difference between the two samples." << endl
47
<< " -s --strict Require that no observations in the germline support the somatic alternate." << endl
52
int main(int argc, char** argv) {
58
static struct option long_options[] =
60
/* These options set a flag. */
61
//{"verbose", no_argument, &verbose_flag, 1},
62
{"help", no_argument, 0, 'h'},
63
{"strict", no_argument, 0, 's'},
64
//{"length", no_argument, &printLength, true},
67
/* getopt_long stores the option index here. */
70
c = getopt_long (argc, argv, "hs",
71
long_options, &option_index);
73
/* Detect the end of the options. */
80
/* If this option set a flag, do nothing else now. */
81
if (long_options[option_index].flag != 0)
83
printf ("option %s", long_options[option_index].name);
85
printf (" with arg %s", optarg);
99
/* getopt_long already printed an error message. */
109
if(argc - optind < 4) {
114
string tag = argv[optind];
116
vector<string> samples;
117
for (int i = optind+1; i < argc - 1; ++i) {
118
samples.push_back(argv[i]);
121
string filename = argv[argc-1];
123
VariantCallFile variantFile;
124
if (filename == "-") {
125
variantFile.open(std::cin);
127
variantFile.open(filename);
130
if (!variantFile.is_open()) {
131
cerr << "could not open " << filename << endl;
135
assert(samples.size() == 2);
137
Variant var(variantFile);
139
// TODO check if AC is present
140
// ensure that AC is listed as an info field
141
string line = "##INFO=<ID=" + tag + ",Number=1,Type=String,Description=\"Samples";
142
for (vector<string>::iterator s = samples.begin(); s != samples.end(); ++s) {
145
line += " have different genotypes\">";
146
variantFile.addHeaderLine(line);
148
variantFile.addHeaderLine("##INFO=<ID=SSC,Number=1,Type=Float,Description=\"Somatic variant score (phred-scaled probability that the somatic variant call is correct).\">");
150
// write the new header
151
cout << variantFile.header << endl;
153
// print the records, filtering is done via the setting of varA's output sample names
154
while (variantFile.getNextVariant(var)) {
155
if (var.samples.find(samples.front()) != var.samples.end()
156
&& var.samples.find(samples.back()) != var.samples.end()) {
157
map<string, vector<string> >& germline = var.samples[samples.front()];
158
map<string, vector<string> >& somatic = var.samples[samples.back()];
159
map<int, int> gtGermline = decomposeGenotype(germline["GT"].front());
160
map<int, int> gtSomatic = decomposeGenotype(somatic["GT"].front());
161
int germlineAltCount = 0;
162
if (germline.find("AO") != germline.end()) {
163
convert(germline["AO"].front(), germlineAltCount);
165
var.info[tag].clear(); // remove previous
166
if (gtGermline == gtSomatic) {
167
var.info[tag].push_back("germline");
169
//if (isHet(gtGermline) && isHom(gtSomatic)) {
170
// var.info[tag].push_back("loh");
171
if (isHet(gtGermline) && isHomNonRef(gtSomatic) ||
172
isHomRef(gtGermline) && (isHet(gtSomatic) || isHomNonRef(gtSomatic))) {
173
if (!strict || strict && germlineAltCount == 0) {
174
var.info[tag].push_back("somatic");
176
} else if (isHom(gtGermline) && isHet(gtSomatic)) {
177
if (var.alt.size() == 1) {
178
var.info[tag].push_back("reversion");
180
var.info[tag].push_back("somatic");
184
if (germline.find("GQ") != germline.end() && somatic.find("GQ") != somatic.end()) {
186
convert(germline["GQ"].front(), germlineGQ);
188
convert(somatic["GQ"].front(), somaticGQ);
189
double somaticScore = min(var.quality, min(germlineGQ, somaticGQ));
190
var.info["SSC"].clear();
191
var.info["SSC"].push_back(convert(somaticScore));