4
// Copyright (c) 1998-2002 by The VoxBo Development Team
6
// This file is part of VoxBo
8
// VoxBo is free software: you can redistribute it and/or modify it
9
// under the terms of the GNU General Public License as published by
10
// the Free Software Foundation, either version 3 of the License, or
11
// (at your option) any later version.
13
// VoxBo is distributed in the hope that it will be useful, but
14
// WITHOUT ANY WARRANTY; without even the implied warranty of
15
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
// General Public License for more details.
18
// You should have received a copy of the GNU General Public License
19
// along with VoxBo. If not, see <http://www.gnu.org/licenses/>.
21
// For general information on VoxBo, including the latest complete
22
// source code and binary distributions, manual, and associated files,
23
// see the VoxBo home page at: http://www.voxbo.org/
25
// original version written by Kosh Banerjee
27
/*********************************************************************
28
* This class is used to retrieve tokens from a string. The first *
29
* token in the string is the zeroth token. *
30
*********************************************************************/
31
#include "stringtokenizer.h"
33
StringTokenizer::StringTokenizer()
36
} // StringTokenizer::StringTokenizer()
39
/*********************************************************************
40
* This constructor takes a C-style string as its single argument. *
41
* The default field delimiter for this constructor is a space. *
42
*********************************************************************/
43
StringTokenizer::StringTokenizer(const char *myString)
46
/*********************************************************************
47
* Calling this->init() to initialize the data members. *
48
*********************************************************************/
49
this->init(string(myString), string(" "));
51
} // StringTokenizer::StringTokenizer(const char *myString)
53
/*********************************************************************
54
* This constructor takes a string object as its single argument. *
55
* The default field delimiter for this constructor is a space. *
56
*********************************************************************/
57
StringTokenizer::StringTokenizer(const string& myString)
60
/*********************************************************************
61
* Calling this->init() to initialize the data members. *
62
*********************************************************************/
63
this->init(myString, string(" "));
65
} // StringTokenizer::StringTokenizer(const string& myString)
67
/*********************************************************************
68
* This constructor takes a C-style string as the argument to *
69
* tokenize. The field delimiters are also passed in as a C-style *
71
*********************************************************************/
72
StringTokenizer::StringTokenizer(const char *myString, const char *myDelims)
75
/*********************************************************************
76
* Calling this->init() to initialize the data members. *
77
*********************************************************************/
78
this->init(string(myString), string(myDelims));
80
} // StringTokenizer::StringTokenizer(const char *myString, const char *myDelims)
82
/*********************************************************************
83
* This constructor takes a string object as the argument to *
84
* tokenize. The field delimiters are also passed in as a string *
86
*********************************************************************/
87
StringTokenizer::StringTokenizer(const string& myString, const string& myDelims)
90
/*********************************************************************
91
* Calling this->init() to initialize the data members. *
92
*********************************************************************/
93
this->init(myString, myDelims);
95
} // StringTokenizer::StringTokenizer(const string& myString, const string& myDelims)
97
/*********************************************************************
98
* This constructor takes a string object as the argument to *
99
* tokenize. The field delimiters are passed in as a C-style string. *
100
*********************************************************************/
101
StringTokenizer::StringTokenizer(const string& myString, const char *myDelims)
104
/*********************************************************************
105
* Calling this->init() to initialize the data members. *
106
*********************************************************************/
107
this->init(myString, string(myDelims));
109
} // StringTokenizer::StringTokenizer(const string& myString, const char *myDelims)
111
/*********************************************************************
112
* This constructor takes a C-style string as the argument to *
113
* tokenize. The field delimiters are passed in as a string object. *
114
*********************************************************************/
115
StringTokenizer::StringTokenizer(const char *myString, const string& myDelims)
118
/*********************************************************************
119
* Calling this->init() to initialize the data members. *
120
*********************************************************************/
121
this->init(string(myString), myDelims);
123
} // StringTokenizer::StringTokenizer(const char *myString, const string& myDelims)
125
/*********************************************************************
126
* This is the copy constructor. *
127
*********************************************************************/
128
StringTokenizer::StringTokenizer(const StringTokenizer& theStringTokens)
131
/*********************************************************************
132
* Assigning the data members from theStringTokens to this instance *
133
* of StringTokenizer. *
134
*********************************************************************/
135
this->theString = theStringTokens.theString;
136
this->theDelimiters = theStringTokens.theDelimiters;
137
this->currentToken = theStringTokens.currentToken;
138
this->theTokens = theStringTokens.theTokens;
139
this->theTokenStarts = theStringTokens.theTokenStarts;
140
this->empty = theStringTokens.empty;
141
this->success = theStringTokens.success;
143
} // StringTokenizer::StringTokenizer(const StringTokenizer& theStringTokens)
145
/*********************************************************************
146
* This method initializes the data members. *
147
*********************************************************************/
148
void StringTokenizer::init(const string& myString, const string& myDelims)
151
/*********************************************************************
152
* Assigning this->theString and this->theDelimiters. *
153
*********************************************************************/
154
this->theString = myString;
155
this->theDelimiters = myDelims;
157
/*********************************************************************
158
* Initializing this->currentToken to 0. *
159
*********************************************************************/
160
this->currentToken = 0;
162
/*********************************************************************
163
* Now extracting the tokens from this->theString. *
164
*********************************************************************/
167
/*********************************************************************
168
* If the number of elements in this->theTokens is positive, then *
169
* this->empty is set to false and this->success is set to true. *
170
*********************************************************************/
172
if (this->theTokens.size() > 0)
175
this->success = true;
178
/*********************************************************************
179
* If program flow ends up in this "else" block, then one of two *
182
* 1. this->theString is the empty string. *
183
* 2. All the characters in this->theString are delimiter characters. *
185
* In this case, this->empty is set to true since there are no tokens *
186
* and this->success is set to false. *
187
*********************************************************************/
191
this->success = false;
194
} // void StringTokenizer::init(const string& myString, const string& myDelims)
196
/*********************************************************************
197
* This is the destructor. It does nothing. *
198
*********************************************************************/
199
StringTokenizer::~StringTokenizer()
202
} // StringTokenizer::~StringTokenizer()
204
/*********************************************************************
205
* This method sets this->currentToken to 0 and this->success to true.*
206
*********************************************************************/
207
void StringTokenizer::reset()
210
this->currentToken = 0;
211
this->success = true;
213
} // void StringTokenizer::reset()
215
/*********************************************************************
216
* This method returns the string this->theTokens[this->currentToken].*
217
* It is anticipated that this method will be used in a manner *
220
* while (myTokenizer.getCurrentToken.size()) *
225
*********************************************************************/
226
string StringTokenizer::getCurrentToken()
229
/*********************************************************************
230
* If this->currentToken is a valid index in this->theTokens, then *
231
* this->success is set to true, this->currentToken is incremented, *
232
* and this->theTokens[this->currentToken - 1] is returned. *
233
*********************************************************************/
234
if (this->currentToken < this->theTokens.size())
236
this->success = true;
237
this->currentToken++;
238
return this->theTokens[this->currentToken - 1];
241
/*********************************************************************
242
* If program flow ends up here, then this->currentToken is >= *
243
* this->theTokens.size(). In this case, this->success is set to *
244
* false and the empty string is returned. *
245
*********************************************************************/
246
this->success = false;
249
} // string StringTokenizer::getCurrentToken()
251
/*********************************************************************
252
* This method returns the length of the string *
253
* this->theTokens[this->currentToken]. *
254
*********************************************************************/
255
unsigned long StringTokenizer::getCurrentTokenLength()
258
/*********************************************************************
259
* If this->currentToken is a valid index in this->theTokens, then *
260
* this->success is set to true and the length of the string *
261
* this->theTokens[this->currentToken] is returned. *
262
*********************************************************************/
263
if (this->currentToken < this->theTokens.size())
265
this->success = true;
266
return this->theTokens[this->currentToken].size();
269
/*********************************************************************
270
* If program flow ends up here, then this->currentToken is >= *
271
* this->theTokens.size(). In this case, this->success is set to *
272
* false and 0 is returned. *
273
*********************************************************************/
274
this->success = false;
277
} // unsigned long StringTokenizer::getCurrentTokenLength()
279
/*********************************************************************
280
* This methods returns the token *
281
* this->theTokens[this->currentTokens], but does not increment *
282
* this->currentToken. *
283
*********************************************************************/
284
string StringTokenizer::getSameToken()
287
/*********************************************************************
288
* If this->currentToken is a valid index in this->theTokens, then *
289
* this->success is set to true and *
290
* this->theTokens[this->currentToken] is returned. *
291
*********************************************************************/
292
if (this->currentToken < this->theTokens.size())
294
this->success = true;
295
return this->theTokens[this->currentToken];
298
/*********************************************************************
299
* If program flow ends up here, then this->currentToken is >= *
300
* this->theTokens.size(). In this case, this->success is set to *
301
* false and the empty string is returned. *
302
*********************************************************************/
303
this->success = false;
306
} // string StringTokenizer::getSameToken()
308
/*********************************************************************
309
* This method returns the specified token. *
310
*********************************************************************/
311
string StringTokenizer::getToken(const unsigned int i)
314
/*********************************************************************
315
* If i is < this->theTokens.size(), then this->success is set to *
316
* true and this->theTokens[i] is returned. *
317
*********************************************************************/
318
if (i < this->theTokens.size())
320
this->success = true;
321
return this->theTokens[i];
324
/*********************************************************************
325
* If program flow ends up here, then i exceeds the number of elements*
326
* in this->theTokens. Therefore, this->success is set to false and *
327
* the empty string is returned. *
328
*********************************************************************/
329
this->success = false;
332
} // string StringTokenizer::getToken(const unsigned int i)
334
/*********************************************************************
335
* This method returns the length of the the specified token. *
336
*********************************************************************/
337
unsigned long StringTokenizer::getTokenLength(const unsigned long i)
340
/*********************************************************************
341
* If i is < this->theTokens.size(), then this->success is set to *
342
* true and this->theTokens[i].size() is returned. *
343
*********************************************************************/
344
if (i < this->theTokens.size())
346
this->success = true;
347
return this->theTokens[i].size();
350
/*********************************************************************
351
* If program flow ends up here, then i exceeds the number of elements*
352
* in this->theTokens. Therefore, this->success is set to false and *
354
*********************************************************************/
355
this->success = false;
358
} // unsigned long StringTokenizer::getTokenLength(const unsigned long i)
360
/*********************************************************************
361
* This method will return true if this->currentToken is pointing to *
362
* a field delimiter character. Otherwise, false is returned. *
363
*********************************************************************/
364
bool StringTokenizer::isDelimiter(const unsigned long i) const
367
/*********************************************************************
368
* The following for loop is used to compare each of the field *
369
* delimiter characters to the character this->theString.at(i). *
370
*********************************************************************/
371
for ( unsigned long j = 0; j < this->theDelimiters.size(); j++)
373
if (this->theString.at(i) == this->theDelimiters.at(j))
380
/*********************************************************************
381
* If program flow ends up here, then this->theString.at(i) is not a *
382
* delimiter character. Therefore, false is returned. *
383
*********************************************************************/
386
} // bool StringTokenizer::isDelimiter(const unsigned long i) const
388
/*********************************************************************
389
* This method tokenizes this->theString. *
390
*********************************************************************/
391
void StringTokenizer::tokenize()
394
/*********************************************************************
395
* If we have tokenized previously, then this->theTokens and *
396
* this->theTokenStarts are emptied. *
397
*********************************************************************/
398
if (this->theTokens.size() > 0)
400
this->theTokens.clear();
401
this->theTokenStarts.clear();
404
/*********************************************************************
405
* The following for loop is used to traverse the characters in *
407
*********************************************************************/
408
for (unsigned long i = 0; i < this->theString.size(); i++)
411
/*********************************************************************
412
* While i is less than this->theString.size() and *
413
* this->theString.at(i) is a delimiter, i is incremented. After *
414
* breaking out of the following while loop, i will be an index for a *
415
* non-delimiter character in this->theString; specifically the *
416
* beginning of a string token. *
417
*********************************************************************/
418
while ( (i < this->theString.size()) && (this->isDelimiter(i)))
423
/*********************************************************************
424
* The current value of i is saved to beginToken. *
425
*********************************************************************/
426
unsigned long beginToken = i;
428
/*********************************************************************
429
* While i is less than this->theString.size() and *
430
* this->theString.at(i) is not a delimiter, i is incremented. After *
431
* breaking out of the following while loop, i will be an index for *
432
* the first delimiter after the end of a string token. *
433
*********************************************************************/
434
while ( (i < this->theString.size()) && (!this->isDelimiter(i)))
439
/*********************************************************************
440
* If beginToken does not equal i, then we extract the appropriate *
441
* string token, found in the range [beginToken, i), from *
442
* this->theString and add it to this->theTokens. Also, beginToken is *
443
* added to this->theTokenStarts. NOTE: When i equals beginToken, we *
444
* are at the end of this->theString and there are no more tokens *
445
* left to add to this->theTokens. *
446
*********************************************************************/
449
this->theTokens.push_back(this->theString.substr(beginToken, i - beginToken));
450
this->theTokenStarts.push_back(beginToken);
455
} // void StringTokenizer::tokenize()
457
/*********************************************************************
458
* This method prints out the current values of the data members. It *
459
* is meant to be used as a debugging aid. *
460
*********************************************************************/
461
void StringTokenizer::toString() const
464
cout << "this->theString = [" << this->theString << "]" << endl;
465
cout << "this->theString.size() = [" << this->theString.size() << "]" << endl;
466
cout << "this->currentToken = [" << this->currentToken << "]" << endl;
467
cout << "this->empty = [" << this->empty << "]" << endl;
468
cout << "this->success = [" << this->success << "]" << endl;
469
cout << "this->theDelimiters = [" << this->theDelimiters << "]" << endl;
470
cout << "this->theTokens.size() = [" << this->theTokens.size() << "]" << endl;
471
cout << "THE TOKENS BEGIN:" << endl;
472
copy(this->theTokens.begin(), this->theTokens.end(), ostream_iterator<string>(cout, "\n"));
473
cout << "THE TOKENS END:" << endl;
474
cout << "THE TOKEN INDICES BEGIN:" << endl;
475
copy(this->theTokenStarts.begin(), this->theTokenStarts.end(), ostream_iterator<unsigned long>(cout, "\n"));
476
cout << "THE TOKEN INDICES END:" << endl;
478
} // void StringTokenizer::toString() const
480
/*********************************************************************
481
* This method takes the input string object and tokenizes it. *
482
*********************************************************************/
483
void StringTokenizer::setString(const string& s)
486
/*********************************************************************
487
* Calling this->init() to set the data members and tokenize s. *
488
*********************************************************************/
489
this->init(s, this->theDelimiters);
491
} // void StringTokenizer::setString(const string& s)
493
/*********************************************************************
494
* This method takes the input C-style string and tokenizes it. *
495
*********************************************************************/
496
void StringTokenizer::setString(const char *s)
499
/*********************************************************************
500
* Calling this->init() to set the data members and tokenize s. *
501
*********************************************************************/
502
this->init(string(s), this->theDelimiters);
504
} // void StringTokenizer::setString(const char *s)
506
/*********************************************************************
507
* This method returns the length of the current token. *
508
*********************************************************************/
509
unsigned long StringTokenizer::getCurrentTokenLength() const
512
/*********************************************************************
513
* If this->currentToken is less than the number of available tokens, *
514
* then we return the length of the next token. *
515
*********************************************************************/
516
if (this->currentToken < this->theTokens.size())
518
return this->theTokens[this->currentToken].size();
521
/*********************************************************************
522
* If program flow ends up here, then this->currentToken exceeds *
523
* the number of available tokens. Therefore, 0 is returned. *
524
*********************************************************************/
527
} // unsigned long StringTokenizer::getCurrentTokenLength() const
529
/*********************************************************************
530
* This method returns the index of the beginning of *
531
* this->currentToken in this->theString. *
532
*********************************************************************/
533
int StringTokenizer::getCurrentTokenStart() const
536
/*********************************************************************
537
* If we have greater than zero tokens and this->currentToken is < *
538
* this->tokens.size(), then we return *
539
* this->theTokenStarts[this->currentToken]. Otherwise, -1 is *
540
* returned to indicate an error. *
541
*********************************************************************/
542
if ( (!this->empty) && (this->currentToken < this->theTokens.size()) )
544
return this->theTokenStarts[this->currentToken];
548
} // int StringTokenizer::getCurrentTokenStart() const
550
/*********************************************************************
551
* This method returns the index of the beginning of token number *
552
* i in this->theString. *
553
*********************************************************************/
554
int StringTokenizer::getTokenStart(const unsigned int i) const
557
/*********************************************************************
558
* If we have greater than zero tokens and i is < this->tokens.size(),*
559
* then we return this->theTokenStarts[this->currentToken]. Otherwise,*
560
* -1 is returned to indicate an error. *
561
*********************************************************************/
562
if ( (!this->empty) && (i < this->theTokens.size()) )
564
return this->theTokenStarts[i];
568
} // int StringTokenizer::getTokenStart(const unsigned int i) const
570
/*********************************************************************
571
* This method assembles the desired range of tokens into a single *
572
* string object and returns it. Each token in the assembled string *
573
* object is separated by the input set of delimiter characters. *
574
* NOTE: The range [begin, end] is inclusive. *
576
* INPUT VARIABLES: TYPE: DESCRIPTION: *
577
* ---------------- ----- ------------ *
578
* begin const unsigned long The index of the starting *
579
* token. NOTE: The toekns are *
580
* indexed beginning with zero. *
581
* end unsigned long The index of the last token. *
582
* delims const string& The delimiters used to *
583
* separate the tokens in the *
584
* assembled string. *
586
* OUTPUT VARIABLES: TYPE: DESCRIPTION: *
587
* ----------------- ----- ------------ *
588
* N/A string The string of assembled tokens. *
590
* EXCEPTIONS THROWN: *
591
* ------------------ *
593
*********************************************************************/
594
string StringTokenizer::getTokenRange(const unsigned long begin,
595
unsigned long end, const string& delims)
598
/*********************************************************************
599
* If end exceeds the number of tokens, then it is set to the last *
601
*********************************************************************/
602
end = (end >= this->theTokens.size()) ? (this->theTokens.size() - 1) : end;
604
/*********************************************************************
605
* Setting this->success to true. *
606
*********************************************************************/
607
this->success = true;
609
/*********************************************************************
610
* If the ending index is less than the beginning index, then *
611
* this->success is set to false and the emoty string is returned. *
612
*********************************************************************/
615
this->success = false;
619
/*********************************************************************
620
* If end and begin are the same, then we simply return a single *
622
*********************************************************************/
625
return this->theTokens[begin];
628
/*********************************************************************
629
* tokens will hold the assembled tokens. *
630
*********************************************************************/
633
/*********************************************************************
634
* The following for loop is used to assemble the tokens. *
635
*********************************************************************/
636
for (unsigned long i = 0; i <= end; i++)
639
/*********************************************************************
641
*********************************************************************/
642
tokens += this->theTokens[i];
644
/*********************************************************************
645
* If we are not at the final token, the the set of delimiter *
646
* characters is added to tokens. *
647
*********************************************************************/
655
/*********************************************************************
656
* Now returning tokens. *
657
*********************************************************************/
660
} // string StringTokenizer::getTokenRange(const unsigned long begin,
661
// const unsigned long end, const string& delims)
663
/*********************************************************************
664
* This method assembles the desired range of tokens into a single *
665
* string object and returns it. Each token in the assembled string *
666
* object is separated by this->theDelimiters. NOTE: The range *
667
* [begin, end] is inclusive. *
669
* INPUT VARIABLES: TYPE: DESCRIPTION: *
670
* ---------------- ----- ------------ *
671
* begin const unsigned long The index of the starting *
672
* token. NOTE: The toekns are *
673
* indexed beginning with zero. *
674
* end unsigned long The index of the last token. *
676
* OUTPUT VARIABLES: TYPE: DESCRIPTION: *
677
* ----------------- ----- ------------ *
678
* N/A string The string of assembled tokens. *
680
* EXCEPTIONS THROWN: *
681
* ------------------ *
683
*********************************************************************/
684
string StringTokenizer::getTokenRange(const unsigned long begin,
688
/*********************************************************************
689
* Now returning the assembled tokens. *
690
*********************************************************************/
691
return this->getTokenRange(begin, end, this->theDelimiters);
693
} // string StringTokenizer::getTokenRange(const unsigned long begin,
694
// unsigned long end)