2
Copyright (c) 2007 Stefan Kurtz <kurtz@zbh.uni-hamburg.de>
3
Copyright (c) 2007 Center for Bioinformatics, University of Hamburg
5
Permission to use, copy, modify, and distribute this software for any
6
purpose with or without fee is hereby granted, provided that the above
7
copyright notice and this permission notice appear in all copies.
9
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19
#include "core/encseq.h"
20
#include "core/error.h"
21
#include "core/option_api.h"
23
#include "core/unused_api.h"
24
#include "core/versionfunc.h"
25
#include "match/cutendpfx.h"
26
#include "match/enum-patt-def.h"
27
#include "match/esa-map.h"
28
#include "match/esa-mmsearch.h"
29
#include "match/qgram2code.h"
30
#include "match/sarr-def.h"
31
#include "match/spacedef.h"
32
#include "match/stamp.h"
34
#include "tools/gt_patternmatch.h"
38
unsigned long minpatternlen, maxpatternlen, numofsamples;
39
bool showpatt, usebcktab, immediate;
43
static void comparemmsis(const MMsearchiterator *mmsi1,
44
const MMsearchiterator *mmsi2)
46
if (gt_isemptymmsearchiterator(mmsi1))
48
if (!gt_isemptymmsearchiterator(mmsi2))
50
fprintf(stderr,"mmsi1 is empty but mmsi2 not\n");
51
exit(GT_EXIT_PROGRAMMING_ERROR);
55
if (gt_isemptymmsearchiterator(mmsi2))
57
fprintf(stderr,"mmsi2 is empty but mmsi1 not\n");
58
exit(GT_EXIT_PROGRAMMING_ERROR);
60
if (!gt_identicalmmsearchiterators(mmsi1,mmsi2))
62
fprintf(stderr,"mmsi1 and mmsi2 are different\n");
63
exit(GT_EXIT_PROGRAMMING_ERROR);
68
#define UNDEFREFSTART totallength
70
static int callpatternmatcher(const Pmatchoptions *pmopt, GtError *err)
72
Suffixarray suffixarray;
73
unsigned long totallength = 0;
76
unsigned long patternlen;
77
unsigned int demand = SARR_SUFTAB | SARR_ESQTAB;
81
demand |= SARR_BCKTAB;
83
if (gt_mapsuffixarray(&suffixarray,
85
gt_str_get(pmopt->indexname),
92
totallength = gt_encseq_total_length(suffixarray.encseq);
97
unsigned long dbstart;
98
Enumpatterniterator *epi;
99
GT_UNUSED unsigned int firstspecial;
100
MMsearchiterator *mmsibck, *mmsiimm;
101
GtBucketspecification bucketspec;
102
Bucketenumerator *bucketenumerator;
104
unsigned long refstart;
105
GtEncseqReader *esr1, *esr2;
106
GT_UNUSED int retval;
107
unsigned long idx, maxlcp;
109
const GtCodetype **multimappower;
110
const GtAlphabet *alpha;
112
if (pmopt->usebcktab)
114
multimappower = gt_bcktab_multimappower(suffixarray.bcktab);
117
multimappower = NULL;
119
epi = gt_newenumpatterniterator(pmopt->minpatternlen,
120
pmopt->maxpatternlen,
123
esr1 = gt_encseq_create_reader_with_readmode(suffixarray.encseq,
124
suffixarray.readmode, 0);
125
esr2 = gt_encseq_create_reader_with_readmode(suffixarray.encseq,
126
suffixarray.readmode, 0);
127
alpha = gt_encseq_alphabet(suffixarray.encseq);
128
for (trial = 0; trial < pmopt->numofsamples; trial++)
130
pptr = gt_nextEnumpatterniterator(&patternlen,epi);
133
gt_alphabet_decode_seq_to_fp(alpha,stdout,pptr,patternlen);
136
if (pmopt->usebcktab)
138
if (patternlen < (unsigned long) suffixarray.prefixlength)
142
= gt_newbucketenumerator(suffixarray.bcktab,
143
suffixarray.prefixlength,
145
(unsigned int) patternlen);
146
refstart = UNDEFREFSTART;
147
while (gt_nextbucketenumerator(&itv,bucketenumerator))
149
if (refstart == UNDEFREFSTART)
151
refstart = ESASUFFIXPTRGET(suffixarray.suftab,itv.left);
154
for (idx=itv.left; idx<=itv.right; idx++)
156
retval = gt_encseq_check_comparetwosuffixes(
158
suffixarray.readmode,
164
ESASUFFIXPTRGET(suffixarray.suftab,idx),
167
gt_assert(retval == 0 && maxlcp == patternlen);
171
gt_freebucketenumerator(&bucketenumerator);
174
firstspecial = qgram2code(&code,
176
suffixarray.prefixlength,
178
gt_assert(firstspecial == suffixarray.prefixlength);
179
gt_bcktab_calcboundaries(&bucketspec,
182
if (bucketspec.nonspecialsinbucket == 0)
188
= gt_newmmsearchiteratorcomplete_plain(
193
bucketspec.nonspecialsinbucket-1,
194
(unsigned long) suffixarray.prefixlength,
195
suffixarray.readmode,
201
if (pmopt->immediate)
203
mmsiimm = gt_newmmsearchiteratorcomplete_plain(
207
totallength, /* rightbound */
209
suffixarray.readmode,
213
if (pmopt->immediate && pmopt->usebcktab)
215
comparemmsis(mmsibck,mmsiimm);
217
if (pmopt->usebcktab && mmsibck != NULL)
219
while (gt_nextmmsearchiterator(&dbstart,mmsibck))
223
gt_freemmsearchiterator(&mmsibck);
225
if (pmopt->immediate)
227
while (gt_nextmmsearchiterator(&dbstart,mmsiimm))
231
gt_freemmsearchiterator(&mmsiimm);
234
gt_encseq_reader_delete(esr1);
235
gt_encseq_reader_delete(esr2);
238
gt_showPatterndistribution(epi);
240
gt_freeEnumpatterniterator(&epi);
242
gt_freesuffixarray(&suffixarray);
243
return haserr ? -1 : 0;
246
static GtOPrval parse_options(Pmatchoptions *pmopt,
248
int argc, const char **argv, GtError *err)
251
GtOption *option, *optionimm, *optionbck;
255
op = gt_option_parser_new("[options] -ii indexname",
256
"Perform pattern matches.");
257
gt_option_parser_set_mail_address(op, "<kurtz@zbh.uni-hamburg.de>");
259
option = gt_option_new_ulong("minpl","Specify minimum length of pattern",
260
&pmopt->minpatternlen,
262
gt_option_parser_add_option(op, option);
263
option = gt_option_new_ulong("maxpl","Specify maximum length of pattern",
264
&pmopt->maxpatternlen,
266
gt_option_parser_add_option(op, option);
268
option = gt_option_new_ulong("samples","Specify number of samples",
269
&pmopt->numofsamples,
270
(unsigned long) 100000);
271
gt_option_parser_add_option(op, option);
273
option = gt_option_new_bool("s","Show generated pattern",
276
gt_option_parser_add_option(op, option);
278
optionbck = gt_option_new_bool("bck","Use the bucket boundaries",
281
gt_option_parser_add_option(op, optionbck);
283
optionimm = gt_option_new_bool("imm","Start with offset 0",
286
gt_option_parser_add_option(op, optionimm);
288
option = gt_option_new_string("ii",
289
"Specify input index",
290
pmopt->indexname, NULL);
291
gt_option_parser_add_option(op, option);
292
gt_option_is_mandatory(option);
294
oprval = gt_option_parser_parse(op, parsed_args, argc, argv,
295
gt_versionfunc, err);
296
gt_option_parser_delete(op);
301
int gt_patternmatch(int argc, const char **argv, GtError *err)
310
pmopt.indexname = gt_str_new();
311
oprval = parse_options(&pmopt,&parsed_args, argc, argv, err);
312
if (oprval == GT_OPTION_PARSER_OK)
314
gt_assert(parsed_args == argc);
315
if (callpatternmatcher(&pmopt,err) != 0)
320
gt_str_delete(pmopt.indexname);
321
if (oprval == GT_OPTION_PARSER_REQUESTS_EXIT)
325
if (oprval == GT_OPTION_PARSER_ERROR)
329
return haserr ? -1 : 0;