3
* NUL byte safe string implementation
5
* Copyright (C) 1997-2001 by Johannes Overmann <Johannes.Overmann@gmx.de>
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation; either version 2 of the License, or
10
* (at your option) any later version.
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
* GNU General Public License for more details.
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27
#include "texception.h"
31
// - make Split,Unquote,ReadLine,extractFilename,extractPath 0 byte safe
32
// - separat functions using tvector<> for better modularity
36
// 01:45 11 Jun split(): backslash behavior fixed (601 lines)
37
// 23:50 11 Jun strings may contain 0 bytes
38
// 12:00 19 Jun some filename extracting added
39
// 17:00 19 Jun more sophisticated search: ignore_case and whole_words
40
// 02:00 08 Jul substring extraction via operator() (start,end)
41
// 02:00 31 Jul new ContainsNulChar, new ReadFile, fixed \ \\ in ExpUnPrint
42
// 12:00 08 Aug new Upper Lower Capitalize
43
// 23:30 19 Aug improved collapseSpace()
44
// 00:00 27 Aug cropSpace() bug fixed (1 byte out of bound zero write)
45
// 20:00 30 Aug now cons accept 0 pointer as empty string
46
// 21:00 30 Aug addDirSlash() added (809 lines)
47
// 13:00 02 Sep isLower ... added, preserve_case for SearchReplace added (867)
48
// 23:45 16 Dec normalizePath() added
49
// 15:00 24 Dec started conversion to Rep reference model
50
// 18:00 27 Dec finished. debugging starts ... :)
53
// 00:30 09 Jan scanTools started (cc=817) (h=462)
54
// 00:05 12 Jan compare operators fixed (0 byte ...)
55
// 19:00 09 Oct zeroRep and fast string(int i) for i=0
56
// 14:30 10 Oct xc16emu emuwid.s problem solved: memset()
57
// 14:36 10 Oct string(0) 80 times faster than string(1)! (zero_rep)
58
// 01:53 17 Oct createNulRep and createZeroRep non inline
61
// 14:55 31 Jan +=string speedup for empty string (cc=919, h=532)
62
// 15:08 31 Jan searchReplace: pre/post_padstring added
63
// 00:36 03 Feb getFitWordsBlock added (954)
64
// 23:02 04 Feb search/searchReplace match_pos added (954)
65
// 23:49 15 Feb class string renamed to class tstring, tappframe simplified (1003)
66
// 00:46 16 Feb toLong/toDouble/toInt/toBool added (from old str2value.cc) (1016)
67
// 23:51 03 Mar cropSpaceEnd added, getFitWords space semantics change
68
// 23:46 13 Apr trelops.h replaces != and > operator (1034)
69
// 00:31 16 Apr started: replace fatalErrors by exceptions
70
// 23:48 20 Aug remove html tags added
71
// 22:17 09 Dec added operator != and > because trelops will not instantiate them for two different types
74
// 23:30 30 Jun loop changed from while(1) to for(;;) ;-)
75
// 22:50 01 Jul toInt/Long pointer p initialized to 0, quotes feature added to expandUnprintable
76
// 22:00 06 Jul progressBar() added
79
// 00:15 08 Feb extractPath now removed trailing slash (1090 lines)
80
// 00:45 15 Mar searchReplace max_num parameter added
81
// 22:00 18 Sep palmos fixes
84
// 22:25 08 Apr expandUnpritable: allow high ISO graphical characters (ASCII 161-255), better nul_mem and zero_mem sizes for 64 bit systems
87
// 22:20 27 Jan length of nul_mem and zero_mem fixed
90
// 27 Jul: palmos support removed
93
// global static null and zero rep members
94
tstring::Rep* tstring::Rep::nul = 0;
95
char tstring::Rep::nul_mem[sizeof(Rep) + 1];
96
tstring::Rep* tstring::Rep::zero = 0;
97
char tstring::Rep::zero_mem[sizeof(Rep) + 2];
100
// non inline Rep implementations
102
// copy this representation
103
tstring::Rep *tstring::Rep::clone(size_t minmem) {
104
Rep *p = create(minmem >= len ? minmem : len);
106
memcpy(p->data(), data(), len+1);
110
// create a new representation
111
tstring::Rep *tstring::Rep::create(size_t tmem) {
112
size_t m = sizeof(Rep) << 1;
113
while((m - 1 - sizeof(Rep)) < tmem) m <<= 1;
114
Rep *p = new (m - 1 - sizeof(Rep)) Rep;
115
p->mem = m - 1 - sizeof(Rep); p->ref = 1; p->vulnerable = false;
119
// create null string representation
120
void tstring::Rep::createNulRep() {
121
nul = (Rep *)nul_mem;
124
nul->ref = 1; // never modify/delete static object
125
nul->vulnerable = false;
129
// create zero string representation
130
void tstring::Rep::createZeroRep() {
131
zero = (Rep *)zero_mem;
134
zero->ref = 1; // never modify/delete static object
135
zero->vulnerable = false;
141
// non inline string implelentation
143
tstring::tstring(const char *s):rep(0) {
146
rep = Rep::create(l);
148
strcpy(rep->data(), s);
149
} else rep = Rep::nulRep()->grab();
153
tstring::tstring(const char *s, size_t l):rep(0) {
155
rep = Rep::create(l);
157
memcpy(rep->data(), s, l);
159
} else rep = Rep::nulRep()->grab();
163
tstring::tstring(char c, size_t n):rep(0) {
165
rep = Rep::create(n);
167
if(n) memset(rep->data(), c, n);
169
} else rep = Rep::nulRep()->grab();
173
tstring::tstring(char c):rep(0) {
174
rep = Rep::create(1);
181
tstring::tstring(int i):rep((i==0)?(Rep::zeroRep()->grab()):(Rep::nulRep()->grab())) {
182
if(i) sprintf("%d", i);
186
tstring::tstring(int i, const char *format):rep(Rep::nulRep()->grab()) {
191
tstring::tstring(double d, const char *format):rep(Rep::nulRep()->grab()) {
197
tstring operator + (const tstring& s1, const tstring& s2) {
198
tstring r(s1); r += s2; return r; }
199
tstring operator + (const char *s1, const tstring& s2) {
200
tstring r(s1); r += s2; return r; }
201
tstring operator + (const tstring& s1, const char *s2) {
202
tstring r(s1); r += s2; return r; }
203
tstring operator + (char s1, const tstring& s2) {
204
tstring r(s1); r += s2; return r; }
205
tstring operator + (const tstring& s1, char s2) {
206
tstring r(s1); r += tstring(s2); return r; }
208
bool operator == (const tstring& s1, const tstring& s2) {return tstring::_string_equ(s1, s2);}
209
bool operator == (const tstring& s1, const char *s2) {return (strcmp(s1.c_str(), s2)==0);}
210
bool operator == (const char *s1, const tstring& s2) {return (strcmp(s1, s2.c_str())==0);}
211
bool operator != (const tstring& s1, const tstring& s2) {return !tstring::_string_equ(s1, s2);}
212
bool operator != (const tstring& s1, const char *s2) {return (strcmp(s1.c_str(), s2)!=0);}
213
bool operator != (const char *s1, const tstring& s2) {return (strcmp(s1, s2.c_str())!=0);}
214
bool operator < (const tstring& s1, const tstring& s2) {return (tstring::_string_cmp(s1, s2) < 0);}
215
bool operator < (const tstring& s1, const char *s2) {return (strcmp(s1.c_str(), s2) < 0);}
216
bool operator < (const char *s1, const tstring& s2) {return (strcmp(s1, s2.c_str()) < 0);}
217
bool operator > (const tstring& s1, const char *s2) {return (strcmp(s1.c_str(), s2) > 0);}
218
bool operator > (const char *s1, const tstring& s2) {return (strcmp(s1, s2.c_str()) > 0);}
219
bool operator > (const tstring& s1, const tstring& s2) {return (tstring::_string_cmp(s1, s2) > 0);}
222
tstring& tstring::operator += (const tstring& a) {if(!a.empty()) {append(a.rep->data(), a.rep->len);} return *this;}
224
tstring& tstring::operator += (const char *a) {if(a) append(a, strlen(a)); return *this;}
226
tstring& tstring::operator += (char c) {detachResize(rep->len + 1); (*rep)[rep->len++]=c; (*rep)[rep->len]=0; return *this;}
227
/// append byte array a of length len
228
tstring& tstring::append(const char *a, int alen) {
230
detachResize(rep->len + alen);
231
memcpy(rep->data() + rep->len, a, alen);
237
/// assign string a to this
238
tstring& tstring::operator = (const tstring& a)
239
{if(&a != this) {rep->release(); rep = a.rep->grab();} return *this;}
240
/// direct character access: const/readonly
241
char tstring::operator [] (size_t i) const /* throw(IndexOutOfRange) */ {
242
if(i <= rep->len) return (*rep)[i];
245
/// direct character access: read/write
246
char& tstring::operator[](size_t i) {
247
if(i < rep->len) {detach(); return (*rep)[i];}
249
for(; rep->len <= i; rep->len++) (*rep)[rep->len] = 0;
253
/// substring extraction (len=end-start)
254
tstring tstring::substr(size_t start, size_t end) const /* throw(InvalidRange) */ {
255
if((end == npos) || (end > rep->len)) end = rep->len;
256
if(start > rep->len) start = rep->len;
257
if(start > end) start = end;
258
return tstring(rep->data()+start, end-start);
262
int tstring::_string_cmp(const tstring& s1, const tstring& s2) {
263
int r = memcmp(s1.rep->data(), s2.rep->data(), s1.rep->len <= s2.rep->len ? s1.rep->len : s2.rep->len);
265
if(s1.rep->len > s2.rep->len) return +1;
266
if(s1.rep->len < s2.rep->len) return -1;
270
bool tstring::_string_equ(const tstring& s1, const tstring& s2) {
271
if(s1.rep->len != s2.rep->len) return false;
272
return memcmp(s1.rep->data(), s2.rep->data(), s1.rep->len)==0;
275
/// detach from string pool, you should never need to call this
276
void tstring::detach() { if(rep->ref > 1) { replaceRep(rep->clone()); } }
277
// no, there is *not* a dangling pointer here (ref > 1)
278
/** detach from string pool and make sure at least minsize bytes of mem are available
279
(use this before the dirty version sprintf to make it clean)
280
(use this before the clean version sprintf to make it fast)
282
void tstring::detachResize(size_t minsize) {
283
if((rep->ref==1) && (minsize <= rep->mem)) return;
284
replaceRep(rep->clone(minsize));
286
/// detach from string pool and declare that string might be externally modified (the string has become vulnerable)
287
void tstring::invulnerableDetach() { detach(); rep->vulnerable = true; }
289
/// check for 0 in string (then its not a real cstring anymore)
290
bool tstring::containsNulChar() const {
292
if(strlen(rep->data()) != rep->len)
299
/// get a pointer to the at most max last chars (useful for printf)
300
const char *tstring::pSuf(size_t max) const {
301
return rep->data()+((max>=rep->len)?0:(rep->len-max));
305
/// sprintf into this string
306
void tstring::sprintf(const char *format, ...) {
309
va_start(ap, format);
310
#if defined(__STRICT_ANSI__)
311
// this is the unsecure and dirty but ansi compatible version
313
ret = vsprintf(rep->data(), format, ap); // not secure! may write out of bounds!
315
// this is the clean version (never overflows)
319
s <<= 2; // fast increase, printf may be slow
321
s = ret + 8; // C99 standard, after first iteration this should be large enough
323
ret = vsnprintf(rep->data(), s, format, ap);
324
} while((ret == -1) || (ret >= s));
331
// returns true on success! returns value in bool_out!
332
bool tstring::toBool(bool& bool_out) const {
336
buf[i] = tolower((*rep)[i]);
337
if((buf[i]==0) || isspace(buf[i])) break;
342
if((buf[0]=='1')||(buf[0]=='t')) { bool_out = true; return true; }
343
if((buf[0]=='0')||(buf[0]=='f')) { bool_out = false; return true; }
346
if(strcmp(buf,"on")==0) { bool_out = true; return true; }
347
if(strcmp(buf,"no")==0) { bool_out = false; return true; }
350
if(strcmp(buf,"yes")==0) { bool_out = true; return true; }
351
if(strcmp(buf,"off")==0) { bool_out = false; return true; }
354
if(strcmp(buf,"true")==0) { bool_out = true; return true; }
357
if(strcmp(buf,"false")==0) { bool_out = false; return true; }
364
// returns true on success
365
bool tstring::toLong(long& long_out, int base) const {
367
long r = strtoul(rep->data(), &p, base);
368
if(p == rep->data()) return false;
369
if(*p) if(!isspace(*p)) return false;
375
// returns true on success
376
bool tstring::toInt(int& int_out, int base) const {
378
int r = strtoul(rep->data(), &p, base);
379
if(p == rep->data()) return false;
380
if(*p) if(!isspace(*p)) return false;
386
// returns true on success
387
bool tstring::toDouble(double& double_out) const {
389
double r = strtod(rep->data(), &p);
390
if(p == rep->data()) return false;
391
if(*p) if(!isspace(*p)) return false;
397
tstring tstring::scanToken(size_t& scanner, int flags,
398
const char *allow, const char *forbid,
399
bool allow_quoted) const
401
if(allow_quoted && (scanner < rep->len)) {
402
char q = (*rep)[scanner];
403
if((q=='\'')||(q=='\"')) {
405
while((scanner < rep->len) && ((*rep)[scanner]!=q))
407
tstring out = substr(st, scanner);
408
if(scanner < rep->len) ++scanner;
412
size_t start(scanner);
413
for(; (scanner < rep->len); ++scanner) {
414
char c = (*rep)[scanner];
415
if(forbid && strchr(forbid, c)) break;
416
if((flags&ALL )) continue;
417
if(allow && strchr(allow , c)) continue;
418
if((flags&ALPHA) && isalpha(c)) continue;
419
if((flags&DIGIT) && isdigit(c)) continue;
420
if((flags&LOWER) && islower(c)) continue;
421
if((flags&UPPER) && isupper(c)) continue;
422
if((flags&PRINT) && isprint(c)) continue;
423
if((flags&GRAPH) && isgraph(c)) continue;
424
if((flags&CNTRL) && iscntrl(c)) continue;
425
if((flags&SPACE) && isspace(c)) continue;
426
if((flags&XDIGIT)&&isxdigit(c)) continue;
427
if((flags&PUNCT) && ispunct(c)) continue;
430
return substr(start, scanner);
434
tstring tstring::shortFilename(size_t maxchar) const {
435
if(rep->len <= maxchar) return *this;
436
if(maxchar < 3) return "";
437
return "..." + substr(rep->len - maxchar + 3);
441
void tstring::normalizePath() {
443
tvector<tstring> a = split(*this, "/", false, false);
446
for(tvector<tstring>::iterator i = a.begin(); i != a.end();) {
447
if(i->empty() || (*i == ".")) i = a.erase(i);
451
// check for absolute
452
if((*rep)[0]=='/') clear();
456
for(tvector<tstring>::iterator i = a.begin(); i != a.end();) {
457
if((*i == "..") && (i != a.begin())) {
470
if((a.size() > 0) || (len() == 0))
471
operator+=("/" + join(a, "/"));
473
void tstring::extractFilename() {
474
const char *p = strrchr(rep->data(), '/');
475
if(p) operator=(p+1);
479
void tstring::extractPath() {
480
const char *p = strrchr(rep->data(), '/');
482
truncate((p - rep->data() + 1));
489
void tstring::removeDirSlash() {
490
if(*this == "/") return;
491
while(lastChar() == '/') truncate(rep->len-1);
495
void tstring::addDirSlash() {
496
if(lastChar() != '/') operator += ("/");
500
void tstring::extractFilenameExtension() {
501
extractFilename(); // get file name
502
const char *p = strrchr(rep->data(), '.');
503
if(p) { // contains dot
504
if(p > rep->data()) { // last dot not first char
505
operator=(p+1); // get extension
509
clear(); // no extension
513
double tstring::binaryPercentage() const {
516
for(size_t i = 0; i < rep->len; i++)
517
if((!isprint((*rep)[i])) && (!isspace((*rep)[i]))) bin+=1.0;
518
return (bin * 100.0) / double(rep->len);
522
bool tstring::isLower() const {
523
if(rep->len == 0) return false;
524
for(size_t i = 0; i < rep->len; i++)
525
if(isalpha((*rep)[i]))
526
if(isupper((*rep)[i]))
532
bool tstring::isUpper() const {
533
if(rep->len == 0) return false;
534
for(size_t i = 0; i < rep->len; i++)
535
if(isalpha((*rep)[i]))
536
if(islower((*rep)[i]))
542
bool tstring::isCapitalized() const {
543
if(rep->len == 0) return false;
544
if(isalpha((*rep)[0])) if(islower((*rep)[0])) return false;
545
for(size_t i = 1; i < rep->len; i++)
546
if(isalpha((*rep)[i]))
547
if(isupper((*rep)[i]))
553
void tstring::lower() {
555
for(size_t i = 0; i < rep->len; i++) (*rep)[i] = tolower((*rep)[i]);
559
void tstring::upper() {
561
for(size_t i = 0; i < rep->len; i++) (*rep)[i] = toupper((*rep)[i]);
565
void tstring::capitalize() {
567
if(rep->len) (*rep)[0] = toupper((*rep)[0]);
571
static const char *bytesearch(const char *mem, int mlen,
572
const char *pat, int plen,
573
bool ignore_case, bool whole_words) {
575
for(i=0; i <= mlen-plen; i++) {
577
for(j=0; j<plen; j++)
578
if(tolower(mem[i+j]) != tolower(pat[j])) break;
580
for(j=0; j<plen; j++)
581
if(mem[i+j] != pat[j]) break;
583
if(j==plen) { // found
584
if(!whole_words) return mem + i;
587
bool right_ok = true;
588
if(i > 0) if(isalnum(mem[i-1]) || (mem[i-1]=='_'))
590
if(i < mlen-plen) if(isalnum(mem[i+plen]) || (mem[i+plen]=='_'))
592
if(left_ok && right_ok) return mem + i;
596
return 0; // not found
600
int tstring::searchReplace(const tstring& tsearch, const tstring& replace_,
601
bool ignore_case, bool whole_words,
602
bool preserve_case, int progress,
603
const tstring& pre_padstring, const tstring& post_padstring, tvector<int> *match_pos, int max_num) {
604
// get new length and positions
605
if(progress) { putc('S', stderr);fflush(stderr); }
606
int num = search(tsearch, ignore_case, whole_words, progress);
607
if(progress) { putc('R', stderr);fflush(stderr); }
611
if(num >= max_num) num = max_num;
612
int newlen = rep->len + num*(replace_.rep->len-tsearch.rep->len +
613
pre_padstring.len()+post_padstring.len());
616
Rep *newrep = Rep::create(newlen);
617
const char *p = rep->data(); // read
618
char *q = newrep->data(); // write
619
const char *r; // found substring
620
int mlen = rep->len; // rest of read mem
621
for(int i=0; i < num; i++) {
622
if(progress>0) if((i%progress)==0) {putc('.', stderr);fflush(stderr);}
623
r = bytesearch(p, mlen, tsearch.rep->data(), tsearch.rep->len, ignore_case, whole_words);
624
memcpy(q, p, r-p); // add skipped part
626
if(match_pos) (*match_pos) += int(q-newrep->data()); // enter start
627
memcpy(q, pre_padstring.rep->data(), pre_padstring.rep->len); // add pre pad
628
q += pre_padstring.len();
629
if(!preserve_case) { // add replaced part
630
memcpy(q, replace_.rep->data(), replace_.rep->len);
632
tstring rr(preserveCase(tstring(r, tsearch.rep->len), replace_.rep->data()));
633
memcpy(q, rr.rep->data(), rr.rep->len);
635
q += replace_.rep->len;
636
memcpy(q, post_padstring.rep->data(), post_padstring.rep->len); // add post pad
637
q += post_padstring.len();
638
if(match_pos) (*match_pos) += int(q-newrep->data()); // enter end
640
mlen -= tsearch.rep->len;
641
p = r + tsearch.rep->len;
643
memcpy(q, p, mlen); // add rest
651
int tstring::search(const tstring& pat, bool ignore_case, bool whole_words, int progress, tvector<int> *match_pos) const {
652
if(pat.empty()) return -1;
656
for(const char *p = rep->data(); (q=bytesearch(p, mlen, pat.rep->data(), pat.rep->len,
657
ignore_case, whole_words)); num++) {
658
if(match_pos) (*match_pos) += int(q-rep->data());
660
mlen -= pat.rep->len;
661
p = q + pat.rep->len;
662
if(match_pos) (*match_pos) += int(p-rep->data());
663
if(progress>0) if((num%progress)==0) {putc('.', stderr);fflush(stderr);}
669
/// replace substring
670
void tstring::replace(size_t start, size_t len_, const tstring &str) {
671
if(start > length()) return;
672
if(start + len_ > length()) return;
673
if(str.length() > len_)
674
detachResize(length() + str.length() - len_);
677
if(str.length() != len_)
678
memmove(rep->data() + start + str.length(), rep->data() + start + len_, length() - start - len_);
680
memcpy(rep->data() + start, str.data(), str.length());
682
rep->len += str.length() - len_;
687
bool tstring::hasPrefix(const tstring& pref) const {
688
if(pref.rep->len > rep->len) return false;
689
return memcmp(rep->data(), pref.rep->data(), pref.rep->len)==0;
693
bool tstring::hasSuffix(const tstring& suf) const {
694
if(suf.rep->len > rep->len) return false;
695
return memcmp(rep->data() + (rep->len - suf.rep->len),
696
suf.rep->data(), suf.rep->len)==0;
700
bool tstring::consistsOfSpace() const {
701
for(size_t i = 0; i < rep->len; i++) {
702
if(!isspace((*rep)[i])) return false;
708
void tstring::truncate(size_t max) {
717
void tstring::replaceUnprintable(bool only_ascii) {
718
for(size_t i = 0; i < rep->len; i++) {
719
unsigned char& c = (unsigned char &)(*rep)[i];
723
} else if(only_ascii || (c < 0xa0)) {
731
void tstring::unquote(bool allow_bslash, bool crop_space) {
737
char *nonspace=rep->data();
739
if(crop_space) while(isspace(*p)) p++;
741
if(allow_bslash && *p=='\\') {
753
if((*p == '\'') || (*p == '\"')) {
759
if(quote || (!isspace(*p))) nonspace = q;
763
if(crop_space) if(*nonspace) nonspace[1] = 0;
764
rep->len = strlen(rep->data());
768
tstring tstring::getFitWordsBlock(size_t max) {
769
tstring r = getFitWords(max);
771
size_t fill = max - r.len();
772
if(fill > 8) return r;
775
for(i = 0; i < r.len(); i++)
776
if(r[i] != ' ') break;
777
for(spaces = 0; i < r.len(); i++)
778
if(r[i] == ' ') spaces++;
779
if(fill > spaces) return r;
782
for(i = 0, j = 0; i < r.len(); i++) {
783
if(r[i] != ' ') break;
784
(*(t.rep))[j++] = r[i];
786
for(; i < r.len(); i++) {
787
if((fill > 0)&&(r[i] == ' ')) {
788
(*(t.rep))[j++] = ' ';
789
(*(t.rep))[j++] = ' ';
791
} else (*(t.rep))[j++] = r[i];
799
void tstring::cropSpaceEnd() {
804
while((e >= 0) && isspace((*rep)[e])) e--;
809
tstring tstring::getFitWords(size_t max) {
810
if(max < 1) return tstring();
812
tstring r(*this); // return value
815
size_t lf = firstOccurence('\n');
816
if((lf != npos) && (lf <= max)) {
817
operator=(substr(lf + 1));
824
if(rep->len <= max) {
831
size_t last_space = npos;
832
for(size_t i = 0; i <= max; i++) {
833
if((*rep)[i] == ' ') last_space = i;
835
if(last_space == npos) last_space = max;
838
r.truncate(last_space);
839
while(isspace((*rep)[last_space])) last_space++;
840
operator=(substr(last_space));
846
void tstring::expandUnprintable(char quotes) {
847
Rep *newrep = Rep::create(rep->len*4);
848
char *q = newrep->data(); // write
849
char *p = rep->data(); // read
853
for(size_t j = 0; j < rep->len; ++j, ++p) {
854
if(isprint(*p) || (((unsigned char)*p) > 160)) { // printable --> print
855
if((*p=='\\') || (quotes && (*p==quotes))) { // backslashify backslash and quotes
861
} else { // unprintable --> expand
862
*(q++) = '\\'; // leading backslash
893
default: // no single char control
894
unsigned int i = (unsigned char)*p;
896
if(i < 32) { // print lower control octal
898
q += ::sprintf(q, "%03o", i);
900
q += ::sprintf(q, "%o", i);
904
} else { // print octal or hex
906
q += ::sprintf(q, "%03o", i);
908
q += ::sprintf(q, "x%02x", i);
922
void tstring::backslashify() {
923
Rep *newrep = Rep::create(rep->len*2);
924
char *p = rep->data();
925
char *q = newrep->data();
928
// backslashify each char
929
for(size_t i = 0; i < rep->len; i++, p++) {
960
void tstring::compileCString() {
963
char *p = rep->data(); // read
964
char *q = rep->data(); // write
966
size_t l = 0; // write
967
size_t i = 0; // read
969
while(i < rep->len) {
970
c = *(p++); // read char
972
if(c == '\\') { // compile char
973
if(i>=rep->len) break;
1000
c = strtol(p, &qq, 16);
1015
buf[2] = (i < rep->len) ? p[1] : 0;
1018
c = strtol(buf, &t, 8);
1024
*(q++) = c; // write char
1032
void tstring::removeHTMLTags(int& level) {
1035
char *p = rep->data(); // read
1036
char *q = rep->data(); // write
1037
size_t l = 0; // write
1038
size_t i = 0; // read
1040
while(i < rep->len) {
1047
if(level > 0) level--;
1065
void tstring::cropSpace(void) {
1066
size_t first = rep->len;
1070
// get first nonspace
1071
for(i = 0; i < rep->len; ++i)
1072
if(!isspace((*rep)[i])) {
1078
if(first == rep->len) {
1083
// get last nonspace
1084
for(i = rep->len - 1; i >= first; --i)
1085
if(!isspace((*rep)[i])) {
1097
// extract substring
1098
operator=(substr(first, last));
1102
void tstring::collapseSpace(void) {
1105
char *p = rep->data(); // read
1106
char *q = rep->data(); // write
1107
char last_char = ' ';
1108
size_t l = 0; // length
1111
for(size_t i = 0; i < rep->len; ++i, ++p) {
1112
if((!isspace(*p)) || (!isspace(last_char))) {
1114
if(isspace(c)) c=' ';
1120
if(isspace(last_char)&&(l>0)) --l;
1126
void tstring::translateChar(char from, char to) {
1128
char *p = rep->data();
1129
for(size_t i = 0; i < rep->len; ++i, ++p)
1130
if(*p == from) *p = to;
1134
size_t tstring::firstOccurence(char c) const {
1137
for(i = 0; (i < rep->len) && ((*rep)[i] != c); ++i) /* empty body */;
1138
if(i < rep->len) return i;
1144
// non member implementation
1147
tvector<tstring> split(const tstring &s, const char *sep, bool allow_quoting, bool crop_space) {
1150
const char *p = s.c_str();
1157
// collect chars to buf
1159
if(strchr(sep, *p)) {
1161
} else if(!allow_quoting) {
1163
} else if(*p=='\\') {
1165
if(strchr(sep, *p)==0) buf += '\\';
1166
if(*p) buf += *(p++);
1167
} else if(*p=='\'') {
1169
for(p++; *p && *p!='\''; p++) {
1179
} else if(*p=='\"') {
1181
for(p++; *p && *p!='\"'; p++) {
1197
if(crop_space) buf.cropSpace();
1208
tstring join(const tvector<tstring>& a, const tstring& sep) {
1211
if(a.empty()) return r;
1213
for(size_t i = 1; i < a.size(); i++) {
1221
tstring preserveCase(const tstring& from, const tstring& to) {
1224
if(from.len() == to.len()) {
1226
for(size_t i = 0; i < r.len(); i++) {
1227
if(islower(from[i])) r[i] = tolower(r[i]);
1228
else if(isupper(from[i])) r[i] = toupper(r[i]);
1232
if(from.isLower()) r.lower();
1233
if(from.isUpper()) r.upper();
1234
if(from.isCapitalized()) r.capitalize();
1241
const char *progressBar(const char *message, unsigned int n, unsigned int max, int width) {
1242
// max size of a buffer
1244
// number of static buffers (must be power of two)
1246
static char tbuf[size * numbuf];
1247
static int tphase = 0;
1248
static int phase = 0;
1249
static char phasechar[] = "/-~-_-\\|";
1252
tphase &= numbuf - 1;
1253
char *buf = tbuf + size * tphase;
1256
if(width >= size) width = size - 1;
1259
sprintf(buf, "%*s", width, "");
1263
// open end progress
1264
if(phasechar[phase] == 0) phase = 0;
1265
sprintf(buf, "%.*s %11d %c", width - (11 - 3), message, n, phasechar[phase++]);
1269
// proportional progress
1271
// get num chars for number and max
1273
for(i = max; i; i /= 10, nlen++) /* empty body */;
1275
int l = sprintf(buf, "%.*s %*d/%*d (%5.1f%%) ", width - (12 + 2 * nlen), message, nlen, n, nlen, max, double(n)/double(max)*100.0);
1276
int rest = width - l;
1277
if(rest <= 0) return buf;
1278
int done = int(double(n)/double(max)*double(rest));
1279
if(done > rest) done = rest;
1281
for(i = 0; i < done; i++) *(p++) = '*';
1282
for(; i < rest; i++) *(p++) = '.';
1289
bool tstring::readLine(FILE *file) {
1294
buf[sizeof(buf)-2] = '\n';
1295
if(!fgets(buf, sizeof(buf), file)) break;
1297
if(buf[sizeof(buf)-2] == '\n') break;
1299
if(rep->len) return true;
1304
size_t tstring::write(FILE *file) const {
1305
return fwrite(rep->data(), 1, rep->len, file);
1309
size_t tstring::read(FILE *file, size_t l) {
1311
rep = Rep::create(l);
1312
int r = fread(rep->data(), 1, l, file);
1319
int tstring::readFile(const char *filename) {
1322
if(stat(filename, &buf)) return -1; // does not exist
1323
FILE *f=fopen(filename, "rb");
1324
if(f == 0) return -2; // no permission?
1325
int r = read(f, buf.st_size);
1327
if(r != buf.st_size) return -3; // read error
1332
int tstring::writeFile(const char *filename) {
1333
FILE *f = fopen(filename, "wb");
1334
if(f == 0) return -2; // no permission?
1337
if(r != int(length())) return -3; // write error
1342
tvector<tstring> loadTextFile(const char *fname) {
1343
FILE *f = fopen(fname, "r");
1344
if(f==0) throw TFileOperationErrnoException(fname, "fopen(mode='r')", errno);
1346
for(size_t i = 0; r[i].readLine(f); i++) /* empty body */;
1353
tvector<tstring> loadTextFile(FILE *file) {
1355
for(size_t i = 0; r[i].readLine(file); i++) /* empty body */;