2
#include "ContigGraph.h"
3
#include "ContigGraphAlgorithms.h"
4
#include "ContigNode.h"
5
#include "ContigPath.h"
6
#include "ContigProperties.h"
7
#include "DirectedGraph.h"
10
#include "GraphAlgorithms.h"
11
#include "GraphUtil.h"
25
#define PROGRAM "abyss-scaffold"
27
static const char VERSION_MESSAGE[] =
28
PROGRAM " (" PACKAGE_NAME ") " VERSION "\n"
29
"Written by Shaun Jackman.\n"
31
"Copyright 2011 Canada's Michael Smith Genome Science Centre\n";
33
static const char USAGE_MESSAGE[] =
34
"Usage: " PROGRAM " [OPTION]... [DIST]\n"
35
"Scaffold contigs using the distance estimate graph.\n"
36
" DIST estimates of the distance between contigs\n"
38
" -n, --npairs=N minimum number of pairs [0]\n"
39
" -s, --seed-length=N minimum contig length [0]\n"
40
" -k, --kmer=N length of a k-mer\n"
41
" --min-gap minimum scaffold gap length to output\n"
42
" -o, --out=FILE write the paths to FILE\n"
43
" -g, --graph=FILE write the graph to FILE\n"
44
" -v, --verbose display verbose output\n"
45
" --help display this help and exit\n"
46
" --version output version information and exit\n"
48
"Report bugs to <" PACKAGE_BUGREPORT ">.\n";
51
unsigned k; // used by ContigProperties
52
int dot = true; // used by Estimate
54
/** Minimum number of pairs. */
55
static unsigned minNumPairs;
57
/** Minimum contig length. */
58
static unsigned minContigLength;
60
/** Minimum scaffold gap length to output. */
61
static int minGap = INT_MIN;
63
/** Write the paths to this file. */
66
/** Write the graph to this file. */
67
static string graphPath;
69
/** Verbose output. */
73
static const char shortopts[] = "g:k:n:o:s:v";
75
enum { OPT_HELP = 1, OPT_VERSION, OPT_MIN_GAP };
77
static const struct option longopts[] = {
78
{ "graph", no_argument, NULL, 'g' },
79
{ "kmer", required_argument, NULL, 'k' },
80
{ "min-gap", required_argument, NULL, OPT_MIN_GAP },
81
{ "npairs", required_argument, NULL, 'n' },
82
{ "out", required_argument, NULL, 'o' },
83
{ "seed-length", required_argument, NULL, 's' },
84
{ "verbose", no_argument, NULL, 'v' },
85
{ "help", no_argument, NULL, OPT_HELP },
86
{ "version", no_argument, NULL, OPT_VERSION },
90
/** Contig length property. */
94
Length& operator+=(const Length& o)
100
template <typename T>
101
Length& operator+=(const T& o)
103
assert((int)length + (int)o.distance > 0);
104
length += o.distance;
108
friend ostream& operator<<(ostream& out, const Length& o)
110
return out << "l=" << o.length;
113
friend istream& operator>>(istream& in, Length& o)
115
return in >> expect("l =") >> o.length;
119
/** A distance estimate graph. */
120
typedef DirectedGraph<Length, DistanceEst> DG;
121
typedef ContigGraph<DG> Graph;
123
/** Add missing complementary edges. */
124
static void addComplementaryEdges(DG& g)
126
typedef graph_traits<Graph> GTraits;
127
typedef GTraits::edge_descriptor E;
128
typedef GTraits::edge_iterator Eit;
129
typedef GTraits::vertex_descriptor V;
131
std::pair<Eit, Eit> erange = edges(g);
133
for (Eit eit = erange.first; eit != erange.second; ++eit) {
135
V u = source(e, g), v = target(e, g);
136
if (!edge(~v, ~u, g).second) {
137
add_edge(~v, ~u, g[e], g);
141
if (opt::verbose > 0)
142
cerr << "Added " << numAdded << " complementary edges.\n";
145
/** Return whether the specified edges has sufficient support. */
147
PoorSupport(Graph& g) : m_g(g) { }
148
bool operator()(graph_traits<Graph>::edge_descriptor e) const
150
return m_g[e].numPairs < opt::minNumPairs;
155
/** Remove short vertices and unsupported edges from the graph. */
156
static void filterGraph(Graph& g)
158
typedef graph_traits<Graph> GTraits;
159
typedef GTraits::edge_descriptor E;
160
typedef GTraits::edge_iterator Eit;
161
typedef GTraits::vertex_descriptor V;
162
typedef GTraits::vertex_iterator Vit;
164
// Remove short contigs.
165
unsigned numRemovedV = 0;
166
std::pair<Vit, Vit> urange = vertices(g);
167
for (Vit uit = urange.first; uit != urange.second; ++uit) {
169
if (g[u].length < opt::minContigLength)
171
if (out_degree(u, g) == 0 && in_degree(u, g) == 0) {
176
if (opt::verbose > 0)
177
cerr << "Removed " << numRemovedV << " vertices.\n";
179
// Remove poorly-supported edges.
180
unsigned numBefore = num_edges(g);
181
remove_edge_if(PoorSupport(g), static_cast<DG&>(g));
182
unsigned numRemovedE = numBefore - num_edges(g);
183
if (opt::verbose > 0)
184
cerr << "Removed " << numRemovedE << " edges.\n";
187
/** Add distance estimates to a path. */
188
static ContigPath addDistEst(const Graph& g, const ContigPath& path)
190
typedef graph_traits<Graph>::edge_descriptor E;
192
out.reserve(2 * path.size());
193
ContigNode u = path.front();
195
for (ContigPath::const_iterator it = path.begin() + 1;
196
it != path.end(); ++it) {
198
assert(!v.ambiguous());
199
pair<E, bool> e = edge(u, v, g);
201
int distance = max(g[e.first].distance, (int)opt::minGap);
202
int numN = distance + opt::k - 1; // by convention
205
out.push_back(ContigNode(numN, 'N'));
212
int main(int argc, char** argv)
215
for (int c; (c = getopt_long(argc, argv,
216
shortopts, longopts, NULL)) != -1;) {
217
istringstream arg(optarg != NULL ? optarg : "");
219
case '?': die = true; break;
220
case 'k': arg >> opt::k; break;
221
case 'g': arg >> opt::graphPath; break;
222
case 'n': arg >> opt::minNumPairs; break;
223
case 'o': arg >> opt::out; break;
224
case 's': arg >> opt::minContigLength; break;
225
case 'v': opt::verbose++; break;
226
case OPT_MIN_GAP: arg >> opt::minGap; break;
228
cout << USAGE_MESSAGE;
231
cout << VERSION_MESSAGE;
237
cerr << PROGRAM ": " << "missing -k,--kmer option\n";
241
if (argc - optind < 0) {
242
cerr << PROGRAM ": missing arguments\n";
244
} else if (argc - optind > 1) {
245
cerr << PROGRAM ": too many arguments\n";
250
cerr << "Try `" << PROGRAM
251
<< " --help' for more information.\n";
255
string distFilePath(optind < argc ? argv[optind++] : "-");
257
// Read the distance estimate graph.
258
ifstream fin(distFilePath.c_str());
259
istream& in = distFilePath == "-" ? cin : fin;
260
assert_good(in, distFilePath);
264
if (opt::verbose > 0)
265
printGraphStats(cerr, g);
268
addComplementaryEdges(g);
270
remove_transitive_edges(g);
273
if (!opt::graphPath.empty()) {
274
ofstream out(opt::graphPath.c_str());
275
assert_good(out, opt::graphPath);
277
assert_good(out, opt::graphPath);
280
// Assemble the paths.
281
Graph gorig(g); // the preassembly graph
282
typedef vector<ContigPath> ContigPaths;
284
assemble(g, back_inserter(paths));
285
sort(paths.begin(), paths.end());
288
ofstream fout(opt::out.c_str());
289
ostream& out = opt::out.empty() || opt::out == "-" ? cout : fout;
290
assert_good(out, opt::out);
291
for (vector<ContigPath>::const_iterator it = paths.begin();
292
it != paths.end(); ++it) {
293
out << ContigID::create() << '\t'
294
<< addDistEst(gorig, *it) << '\n';
296
assert_good(out, opt::out);