84
84
assert(first < last);
85
85
assert(!m_alphabet.empty());
86
86
std::transform(first, last, first, Translate(*this));
87
std::replace(first, last, SENTINEL(), T(0));
90
89
/** Decode the alphabet of [first, last). */
97
96
*it = m_alphabet[*it];
100
/** Build the BWT of [first, last). */
99
/** Build the BWT of [first, last).
100
* The BWT including the sentinel is stored in [first, last].
101
* @return the position of the sentinel
101
103
template<typename It>
102
104
size_type buildBWT(It first, It last) const
104
106
assert(first < last);
105
107
assert(size_t(last - first)
106
108
< std::numeric_limits<size_type>::max());
110
std::replace(first, last, SENTINEL(), T(0));
107
112
std::cerr << "Building the Burrows-Wheeler transform...\n";
108
113
size_t n = last - first;
109
114
std::vector<sais_size_type> sa(n);
115
120
assert(sentinel >= 0);
116
121
if (sentinel < 0)
123
// Insert the sentinel.
124
std::copy_backward(first + sentinel, last, last + 1);
125
first[sentinel] = SENTINEL();
121
/** Build the BWT of [first, last) and write the result to out. */
129
/** Set the specified element of the sampled suffix array. */
130
void setSA(size_t sai, size_t pos)
132
if (sai % m_sampleSA == 0) {
133
size_t i = sai / m_sampleSA;
134
assert(i < m_sa.size());
139
/** Construct the suffix array from the FM index. */
140
void constructSuffixArray()
142
// The length of the original string.
143
size_t n = m_occ.size() - 1;
145
assert(m_sampleSA > 0);
146
m_sa.resize(n / m_sampleSA + 1);
148
for (size_t i = n; i > 0; i--) {
151
assert(c != SENTINEL());
152
sai = m_cf[c] + m_occ.rank(c, sai);
158
/** Build an FM-index of the specified BWT. */
122
159
template<typename It>
123
std::ostream& buildBWT(It first, It last, std::ostream& out) const
160
void assignBWT(It first, It last)
125
assert(first < last);
128
// Construct the BWT.
130
sais_size_type sentinel = buildBWT(first, last);
131
assert(sentinel <= last - first);
135
out.write(reinterpret_cast<char*>(&first[0]),
138
out.write(reinterpret_cast<char*>(&first[sentinel]),
139
last - first - sentinel);
162
assert(last - first > 1);
163
assert(size_t(last - first)
164
< std::numeric_limits<size_type>::max());
166
std::cerr << "Building the character occurrence table...\n";
167
m_occ.assign(first, last);
170
// Construct the suffix array from the FM index.
171
std::cerr << "Building the suffix array...\n";
172
constructSuffixArray();
144
175
/** Build an FM-index of the specified data. */
150
181
< std::numeric_limits<size_type>::max());
152
183
encode(first, last);
184
std::replace(first, last, SENTINEL(), T(0));
154
186
// Construct the suffix array.
155
187
std::cerr << "Building the suffix array...\n";
431
463
return findSubstring(s.begin(), s.end(), k);
434
/** Set the alphabet to [first, last). */
466
/** Set the alphabet to [first, last).
467
* The character '\0' is treated specially and not included in the
435
470
template <typename It>
436
471
void setAlphabet(It first, It last)
540
in >> n >> expect("\n");
508
542
assert(n < std::numeric_limits<size_type>::max());
509
543
o.m_alphabet.resize(n);
510
544
in.read(reinterpret_cast<char*>(&o.m_alphabet[0]),
511
545
n * sizeof o.m_alphabet[0]);
512
546
o.setAlphabet(o.m_alphabet.begin(), o.m_alphabet.end());
548
in >> n >> expect("\n");
518
550
assert(n < std::numeric_limits<size_type>::max());
519
551
o.m_sa.resize(n);
520
552
in.read(reinterpret_cast<char*>(&o.m_sa[0]),