2
* ===========================================================================
5
* National Center for Biotechnology Information
7
* This software/database is a "United States Government Work" under the
8
* terms of the United States Copyright Act. It was written as part of
9
* the author's official duties as a United States Government employee and
10
* thus cannot be copyrighted. This software/database is freely available
11
* to the public for use. The National Library of Medicine and the U.S.
12
* Government have not placed any restriction on its use or reproduction.
14
* Although all reasonable efforts have been taken to ensure the accuracy
15
* and reliability of the software and data, the NLM and the U.S.
16
* Government do not and cannot warrant the performance or results that
17
* may be obtained by using this software or data. The NLM and the U.S.
18
* Government disclaim all warranties, express or implied, including
19
* warranties of performance, merchantability or fitness for any particular
22
* Please cite the author in any work or product based on this material.
24
* ===========================================================================
26
* File Name: ncbistr.c
28
* Author: Gish, Kans, Ostell, Schuler, Brylawski, Vakatov, Lavrentiev
30
* Version Creation Date: 3/4/91
35
* portable string routines
38
* --------------------------------------------------------------------------
40
* Revision 6.10 2001/01/05 22:43:58 shavirin
41
* Added functions, that transfer Uint8 values to platform-independent
44
* Revision 6.9 2000/12/04 23:48:02 kans
45
* trim spaces around string now handles trailing tabs, newlines, etc.
47
* Revision 6.8 2000/11/30 22:46:07 lavr
48
* Added the following functions for conversions of Int8 and Uint8
49
* to strings and back; test suite attached at the end of the file.
50
* Nlm_Int8ToString, Nlm_Uint8ToString, Nlm_StringToInt8, Nlm_StringToUint8
52
* Revision 6.7 2000/08/28 18:36:25 vakatov
53
* un-const casts in some functions to pass C++ compilation
55
* Revision 6.6 1999/04/15 20:24:06 vakatov
56
* Dont use "list" name as it can clash with the standard "list<>" template
57
* on some raw C++ compilers
59
* Revision 6.5 1999/03/11 16:10:00 kans
60
* StringHasNoText and TrimSpacesAroundString moved from vibforms
62
* Revision 6.4 1998/11/23 00:09:47 kans
63
* fixed bug in StringTokMT (found by Hugues)
65
* Revision 6.3 1998/10/07 19:09:00 kans
66
* added Nlm_StringTokMT, multithread-safe version
68
* Revision 6.2 1997/11/26 21:26:25 vakatov
69
* Fixed errors and warnings issued by C and C++ (GNU and Sun) compilers
71
* Revision 6.1 1997/10/29 02:44:52 vakatov
72
* Type castings to pass through the C++ compiler
74
* Revision 5.5 1997/07/16 19:49:18 vakatov
75
* Added Nlm_StringPrintable() function
77
* Revision 5.4 1997/04/11 17:57:25 brandon
78
* added StrIPCmp, StrNIPCmp
80
* Revision 5.3 1997/03/04 22:01:12 vakatov
81
* Added a set of functions to format(stream2text), unformat(text2stream)
82
* and adjust(rule_line) text and test/demo code #TEST_TEXT_FMT for these
84
* Revision 5.2 1997/01/03 15:56:28 vakatov
85
* Added auxiliary function Nlm_StringNCpy_0() -- that guarantees the
86
* resulting string be '\0'-terminated
88
* Revision 5.1 1996/12/03 21:48:33 vakatov
89
* Adopted for 32-bit MS-Windows DLLs
91
* Revision 4.12 1996/05/22 18:04:14 kans
92
* changed nulls to '\0' in new string functions
94
* Revision 4.11 1996/05/22 14:46:19 brandon
95
* Fixed SkipToString, SkipPastString to work with short strings
97
* Revision 4.10 1996/05/07 13:22:37 kans
98
* more protection for stringsearch
100
* Revision 4.9 1996/05/06 15:07:58 kans
101
* fixed StringISearch to set d [] based on TO_UPPER, not to crash if nonASCII
103
* Revision 4.8 1996/03/14 03:42:44 epstein
104
* change String variables to theString to work around SGI4 problem
106
* Revision 4.7 1996/01/05 02:29:37 ostell
107
* provided return value for TruncateStringCopy()
109
* Revision 4.6 1996/01/03 21:04:46 epstein
110
* modify StringSubString() API and add other new functions, per Brandon
112
* Revision 4.5 1996/01/02 14:17:32 ostell
113
* added a number of Brandons functions
115
* Revision 4.4 1995/12/28 15:41:56 epstein
116
* added Brylawskin to revision history and author list
118
* Revision 4.3 1995/12/27 20:53:48 epstein
119
* add Brandon's string-management functions
121
* Revision 4.2 1995/10/28 15:03:20 ostell
122
* added casts to quiet DOS compile warnings
124
* Revision 4.1 1995/10/16 13:43:29 epstein
125
* fix brain-dmanaged string-compare logic to handle null strings correctly
127
* Revision 2.12 1995/07/18 19:56:10 tatiana
128
* add Nlm_LabelCopyNext()
130
* Revision 2.11 1995/05/30 13:19:37 kans
131
* fixed StringSearch algorithm - check until i <= strLen, not just < strLen
133
* 3/4/91 Kans Stricter typecasting for GNU C and C++.
134
* 09-19-91 Schuler Changed all types expressing sizes to size_t.
135
* 09-19-91 Schuler Changed return type for compare functions to int.
136
* 09-19-91 Schuler Changed all functions to _cdecl calling convention.
137
* 09-19-91 Schuler Where possible, NCBI functions call the actual ANSI
138
* functions after checking for NULL pointers.
139
* 09-19-91 Schuler Debug-class error posted on any NULL argument.
140
* 09-19-91 Schuler StringSave() calls MemGet() instead of MemNew().
141
* 09-19-91 Schuler StringSave(NULL) returns NULL.
142
* 10-17-91 Schuler Removed ErrPost() calls on NULL arguments.
143
* 10-17-91 Schuler Added Nlm_StringCnt(),Nlm_StringStr(),Nlm_StringTok()
144
* 11-18-91 Schuler Added more ANSI-style functions
145
* 04-15-93 Schuler Changed _cdecl to LIBCALL
146
* 05-27-93 Schuler Added const qualifiers to match ANSI cognates
147
* 06-14-94 Schuler Added StrUpper() and StrLower() functions
148
* 03-08-95 Kans Added StringSearch and StringISearch
149
* 12-27-95 Brylawski Added a variety of functions, including search-and-
152
* ==========================================================================
158
/* ClearDestString clears the destination string if the source is NULL. */
159
static Nlm_CharPtr NEAR Nlm_ClearDestString (Nlm_CharPtr to, size_t max)
161
if (to != NULL && max > 0) {
162
Nlm_MemSet (to, 0, max);
168
NLM_EXTERN size_t LIBCALL Nlm_StringLen (const char *str)
170
return str ? StrLen (str) : 0;
173
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_StringCpy (char FAR *to, const char FAR *from)
175
return (to && from) ? StrCpy (to, from) : Nlm_ClearDestString (to, 1);
178
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_StringNCpy (char FAR *to, const char FAR *from, size_t max)
180
return (to && from) ? StrNCpy (to, from, max) : Nlm_ClearDestString (to, max);
183
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_StringNCpy_0 (char FAR *to, const char FAR *from, size_t max)
185
if (to != NULL && max > 0)
189
StrNCat(to, from, max - 1);
194
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_StringCat (char FAR *to, const char FAR *from)
196
return (to && from) ? StrCat (to, from) : to;
199
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_StringNCat (char FAR *to, const char FAR *from, size_t max)
201
return (to && from) ? StrNCat (to, from, max) : to;
204
NLM_EXTERN int LIBCALL Nlm_StringCmp (const char FAR *a, const char FAR *b)
206
return (a && b) ? StrCmp(a, b) : (a ? 1 : (b ? -1 : 0));
209
NLM_EXTERN int LIBCALL Nlm_StringNCmp (const char FAR *a, const char FAR *b, size_t max)
211
return (a && b) ? StrNCmp(a, b, max) : (a ? 1 : (b ? -1 : 0));
214
NLM_EXTERN int LIBCALL Nlm_StringICmp (const char FAR *a, const char FAR *b)
216
return (a && b) ? Nlm_StrICmp(a, b) : (a ? 1 : (b ? -1 : 0));
219
NLM_EXTERN int LIBCALL Nlm_StringNICmp (const char FAR *a, const char FAR *b, size_t max)
221
return (a && b) ? Nlm_StrNICmp(a, b, max) : (a ? 1 : (b ? -1 : 0));
224
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_StringChr (const char FAR *str, int chr)
226
return (Nlm_CharPtr) (str ? Nlm_StrChr(str,chr) : 0);
229
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_StringRChr (const char FAR *str, int chr)
231
return (Nlm_CharPtr) (str ? Nlm_StrRChr(str,chr) : 0);
234
NLM_EXTERN size_t LIBCALL Nlm_StringSpn (const char FAR *a, const char FAR *b)
236
return (a && b) ? Nlm_StrSpn (a, b) : 0;
239
NLM_EXTERN size_t LIBCALL Nlm_StringCSpn (const char FAR *a, const char FAR *b)
241
return (a && b) ? Nlm_StrCSpn (a, b) : 0;
244
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_StringPBrk (const char FAR *a, const char FAR *b)
246
return (Nlm_CharPtr) ((a && b) ? Nlm_StrPBrk (a, b) : 0);
249
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_StringStr (const char FAR *str1, const char FAR *str2)
251
return (Nlm_CharPtr) ((str1 && str2) ? Nlm_StrStr(str1,str2) : 0);
254
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_StringTok (char FAR *str1, const char FAR *str2)
256
return str2 ? Nlm_StrTok(str1,str2) : 0;
259
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_StringTokMT (char FAR *str1, const char FAR *str2, char FAR **tmp)
263
char FAR *rsult = NULL;
265
if (str2 == NULL || tmp == NULL) return NULL;
270
if (ptr == NULL) return NULL;
272
while (ch != '\0' && strchr (str2, ch) != NULL) {
281
while (ch != '\0' && strchr (str2, ch) == NULL) {
295
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_StringMove (char FAR *to, const char FAR *from)
297
return (to && from) ? Nlm_StrMove (to, from) : to;
300
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_StringSave (const char FAR *from)
302
return from ? Nlm_StrSave (from) : 0;
305
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_StringSaveNoNull (const char FAR *from)
307
return (from && *from) ? Nlm_StrSave(from) : 0;
310
NLM_EXTERN size_t LIBCALL Nlm_StringCnt (const char FAR *str, const char FAR *x_list)
312
return (str && x_list) ? Nlm_StrCnt(str,x_list) : 0;
315
NLM_EXTERN char * LIBCALL Nlm_StringUpper (char *string)
317
return (string == NULL) ? 0 : StrUpper(string);
320
NLM_EXTERN char * LIBCALL Nlm_StringLower (char *string)
322
return (string == NULL) ? 0 : StrLower(string);
329
NLM_EXTERN int LIBCALL Nlm_StrICmp (const char FAR *a, const char FAR *b)
333
if (a == b) return 0;
338
diff = TO_UPPER(*a) - TO_UPPER(*b);
340
return (Nlm_Int2) diff;
351
NLM_EXTERN int LIBCALL Nlm_StrIPCmp (const char FAR *a, const char FAR *b)
355
if (a == b) return 0;
357
while (*a && !isalnum(*a))
360
while (*b && !isalnum(*b))
366
if (!isalnum(*a) && !isalnum(*b))
368
while (*a && !isalnum(*a))
371
while (*b && !isalnum(*b))
375
diff = TO_UPPER(*a) - TO_UPPER(*b);
377
return (Nlm_Int2) diff;
388
NLM_EXTERN int LIBCALL Nlm_StrNICmp (const char FAR *a, const char FAR *b, size_t max)
392
if (a == b) return 0;
397
diff = TO_UPPER(*a) - TO_UPPER(*b);
399
return (Nlm_Int2) diff;
412
NLM_EXTERN int LIBCALL Nlm_StrNIPCmp (const char FAR *a, const char FAR *b, size_t max)
416
if (a == b) return 0;
418
while (*a && !isalnum(*a))
421
while (*b && !isalnum(*b))
427
if (!isalnum(*a) && !isalnum(*b))
429
while (*a && !isalnum(*a))
439
while (*b && !isalnum(*b))
444
diff = TO_UPPER(*a) - TO_UPPER(*b);
446
return (Nlm_Int2) diff;
459
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_StrSave (const char FAR *from)
464
len = Nlm_StringLen(from);
465
if ((to = (Nlm_CharPtr) Nlm_MemGet(len +1, FALSE)) != NULL) {
466
Nlm_MemCpy (to, from, len +1);
471
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_StrMove (char FAR *to, const char FAR *from)
473
while (*from != '\0')
482
NLM_EXTERN Nlm_Boolean LIBCALL Nlm_StringHasNoText (Nlm_CharPtr str)
485
Nlm_Uchar ch; /* to use 8bit characters in multibyte languages */
500
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_TrimSpacesAroundString (Nlm_CharPtr str)
503
Nlm_Uchar ch; /* to use 8bit characters in multibyte languages */
507
if (str != NULL && str [0] != '\0') {
511
while (ch != '\0' && ch <= ' ') {
528
} else if (dst == NULL) {
541
NLM_EXTERN size_t LIBCALL Nlm_StrCnt(const char FAR *s, const char FAR *x_list)
543
size_t cmap[1<<CHAR_BIT];
546
const Nlm_Byte *bs = (const Nlm_Byte*)s;
547
const Nlm_Byte *blist = (const Nlm_Byte*)x_list;
549
if (s == NULL || x_list == NULL)
552
for (u = 0; u < DIM(cmap); ++u)
554
while (*blist != '\0')
557
blist = (Nlm_BytePtr)cmap;
560
while ((c = *bs++) != '\0')
567
NLM_EXTERN char * LIBCALL Nlm_StrUpper (char *string)
569
register char *p = string;
570
ASSERT(string != NULL);
574
*p = (char)toupper(*p);
582
NLM_EXTERN char * LIBCALL Nlm_StrLower (char *string)
584
register char *p = string;
585
ASSERT(string != NULL);
589
*p = (char)tolower(*p);
597
/* -------------------- MeshStringICmp() --------------------------------
598
* MeshStringICmp compares strings where / takes precedence to space.
601
NLM_EXTERN Nlm_Int2 LIBCALL Nlm_MeshStringICmp (const char FAR *str1, const char FAR *str2)
612
else if (str2 == NULL)
615
while ((*str1 >= ' ') && (*str2 >= ' ') && (TO_LOWER(*str1) == TO_LOWER(*str2)))
622
if ((ch1 < ' ') && (ch2 < ' '))
634
if (TO_LOWER (ch1) > TO_LOWER (ch2))
636
else if (TO_LOWER (ch1) < TO_LOWER (ch2))
637
return (Nlm_Int2)(-1);
642
/* StringSearch and StringISearch use the Boyer-Moore algorithm, as described
643
in Niklaus Wirth, Algorithms and Data Structures, Prentice- Hall, Inc.,
644
Englewood Cliffs, NJ., 1986, p. 69. The original had an error, where
645
UNTIL (j < 0) OR (p[j] # s[i]) should be UNTIL (j < 0) OR (p[j] # s[k]). */
647
static Nlm_CharPtr Nlm_FindSubString (const char FAR *str, const char FAR *sub,
648
Nlm_Boolean caseCounts)
659
if (sub != NULL && sub [0] != '\0' && str != NULL && str [0] != '\0') {
660
strLen = Nlm_StringLen (str);
661
subLen = Nlm_StringLen (sub);
662
if (subLen <= strLen) {
663
for (ch = 0; ch < 256; ch++) {
666
for (j = 0; j < (int)(subLen - 1); j++) {
667
ch = (int) (caseCounts ? sub [j] : TO_UPPER (sub [j]));
668
if (ch >= 0 && ch <= 255) {
669
d [ch] = subLen - j - 1;
680
(caseCounts ? sub [j] : TO_UPPER (sub [j])) ==
681
(caseCounts ? str [k] : TO_UPPER (str [k])));
683
ch = (int) (caseCounts ? str [i - 1] : TO_UPPER (str [i - 1]));
684
if (ch >= 0 && ch <= 255) {
690
} while (j >= 0 && i <= (int) strLen);
693
return (Nlm_CharPtr) (str + i);
700
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_StringSearch (const char FAR *str, const char FAR *sub)
703
return Nlm_FindSubString (str, sub, TRUE);
706
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_StringISearch (const char FAR *str, const char FAR *sub)
709
return Nlm_FindSubString (str, sub, FALSE);
712
NLM_EXTERN Nlm_Uint1Ptr LIBCALL Uint8ToBytes(Nlm_Uint8 value)
714
Nlm_Uint1Ptr out_bytes;
715
Nlm_Int4 i, mask = 0xFF;
717
out_bytes = MemNew(8);
719
for(i = 0; i < 8; i++) {
720
out_bytes[i] = value & mask;
727
NLM_EXTERN Nlm_Uint8 LIBCALL BytesToUint8(Nlm_Uint1Ptr bytes)
733
for(i = 7; i >= 0; i--) {
740
static Nlm_Uint8 s_StringToUint8(const char *str, const char **endptr, int *sgn)
742
int sign = 0; /* actual sign */
743
Nlm_Uint8 limdiv, limoff, result;
744
const char *s, *save;
753
while (IS_WHITESP(*s))
755
/* empty string - error */
763
} else if (*s == '+') {
769
limdiv = UINT8_MAX / 10;
770
limoff = UINT8_MAX % 10;
773
for (c = *s; c; c = *++s) {
778
if (result > limdiv || (result == limdiv && c > limoff)) {
786
/* there was no conversion - error */
796
NLM_EXTERN Nlm_Uint8 LIBCALL Nlm_StringToUint8(const char* str, const char** endptr)
798
int sign = 0; /* no sign allowed */
799
return s_StringToUint8(str, endptr, &sign);
803
NLM_EXTERN Nlm_Int8 LIBCALL Nlm_StringToInt8(const char* str, const char** endptr)
805
int sign = 1; /* sign allowed */
806
Nlm_Uint8 result = s_StringToUint8(str, endptr, &sign);
808
/* Check for overflow */
810
? -((Nlm_Uint8)(INT8_MIN + 1)) + 1
811
: (Nlm_Uint8)(INT8_MAX)))
816
return sign ? -result : result;
820
static char *s_Uint8ToString(Nlm_Uint8 value, char *str, size_t str_size)
825
if (!str || str_size < 2)
828
for (i = sizeof(buf) - 1; i > 0; i--) {
829
buf[i] = (char)(value % 10 + '0');
834
if (!i || (j = sizeof(buf) - i) >= str_size)
836
memcpy(str, buf + i, j);
842
NLM_EXTERN char* LIBCALL Nlm_Uint8ToString(Nlm_Uint8 value, char* str, size_t str_size)
844
return s_Uint8ToString(value, str, str_size);
848
NLM_EXTERN char* LIBCALL Nlm_Int8ToString (Nlm_Int8 value, char* str, size_t str_size)
854
if (!str || !str_size)
858
val = -(Nlm_Uint8)(value + 1) + 1;
862
return s_Uint8ToString(val, str, str_size) ? save : 0;
866
/*****************************************************************************
868
* LabelCopy (to, from, buflen)
869
* Copies the string "from" into "to" for up to "buflen" chars
870
* if "from" is longer than buflen, makes the last character '>'
871
* always puts one '\0' to terminate the string in to
872
* to MUST be one character longer than buflen to leave room for the
873
* last '\0' if from = buflen.
874
* returns number of characters transferred to "to"
876
*****************************************************************************/
877
NLM_EXTERN Nlm_Int2 LIBCALL Nlm_LabelCopy (Nlm_CharPtr to, Nlm_CharPtr from, Nlm_Int2 buflen)
881
if ((to == NULL) || (from == NULL) || (buflen < 0)) return 0;
883
if (buflen == 0) /* this is a sign of multiple writes */
891
while ((*from != '\0') && (buflen))
894
from++; to++; buflen--;
902
*to = '\0'; /* buffer is bufferlen+1 */
903
return (Nlm_Int2)(len - buflen);
906
NLM_EXTERN void LIBCALL Nlm_LabelCopyNext(Nlm_CharPtr PNTR to, Nlm_CharPtr from, Nlm_Int2 PNTR buflen)
910
diff = Nlm_LabelCopy(*to, from, *buflen);
911
*buflen -= diff; *to += diff;
915
/*****************************************************************************
917
* LabelCopyExtra (to, from, buflen, prefix, suffix)
918
* Copies the string "from" into "to" for up to "buflen" chars
919
* if all together are longer than buflen, makes the last character '>'
920
* always puts one '\0' to terminate the string in to
921
* to MUST be one character longer than buflen to leave room for the
922
* last '\0' if from = buflen.
923
* returns number of characters transferred to "to"
925
* if not NULL, puts prefix before from and suffix after from
926
* both contained within buflen
929
*****************************************************************************/
930
NLM_EXTERN Nlm_Int2 LIBCALL Nlm_LabelCopyExtra (Nlm_CharPtr to, Nlm_CharPtr from, Nlm_Int2 buflen, Nlm_CharPtr prefix, Nlm_CharPtr suffix)
934
if ((to == NULL) || (buflen < 1) || (from == NULL)) return 0;
937
diff = Nlm_LabelCopy(to, prefix, buflen);
938
buflen -= diff; to += diff;
940
diff = Nlm_LabelCopy(to, from, buflen);
941
buflen -= diff; to += diff;
943
diff = Nlm_LabelCopy(to, suffix, buflen);
946
return (Nlm_Int2)(len-buflen);
952
NLM_EXTERN Nlm_CharPtr LIBCALL StrCpyPtr (char FAR *Dest, char FAR *Start, char FAR *Stop)
953
/* copies the string from Start (inclusive) to Stop (exclusive) to
954
string Dest. Start and Stop MUST point to the same string!
957
Nlm_CharPtr To = Dest;
958
while ( *Start && ( Start < Stop ) )
965
NLM_EXTERN Nlm_CharPtr LIBCALL StrDupPtr(char FAR *Start,char FAR *Stop)
966
/* copies the string from Start (inclusive) to Stop (exclusive) to
967
a new string and returns its pointer.
968
Start and Stop MUST point to the same string!
971
Nlm_CharPtr Dest, DestPtr;
973
DestPtr = Dest = (Nlm_CharPtr)Nlm_MemGet((Stop - Start + 1),MGET_ERRPOST);
975
while ( *Start && ( Start < Stop ) )
976
*DestPtr++ = *Start++;
981
NLM_EXTERN Nlm_CharPtr LIBCALL SkipSpaces(char FAR *Line)
982
/* returns the next non-whitespace position in the line. */
984
while( (*Line != NULLB) && isspace(*Line) )
990
NLM_EXTERN Nlm_CharPtr LIBCALL SkipToSpace(char FAR *theString)
991
/* returns a pointer to the leftmost whitespace character in theString,
992
or to the trailing NULL if no whitespace is found. */
994
while (*theString && ( ! isspace(*theString) ))
1000
NLM_EXTERN Nlm_CharPtr LIBCALL SkipChar(char FAR *theString,char Char)
1001
/* returns a pointer to the next non-Char character in theString. */
1003
while (*theString && ( *theString == Char ) )
1009
NLM_EXTERN Nlm_CharPtr LIBCALL SkipToChar(char FAR *theString,char Char)
1010
/* returns a pointer to leftmost instance of Char in theString, or to
1011
the trailing NULL if none found. */
1013
while (*theString && ( *theString != Char ) )
1019
NLM_EXTERN Nlm_CharPtr LIBCALL SkipPastChar(char FAR *theString,char Char)
1020
/* returns a pointer to the next character after the leftmost instance
1021
of Char in theString, or to the trailing NULL if none found. */
1023
while (*theString && ( *theString != Char ) )
1027
return(theString+1);
1032
NLM_EXTERN Nlm_CharPtr LIBCALL SkipToString(char FAR *theString,char FAR *Find)
1033
/* returns a pointer to leftmost instance of Find in theString, or to
1034
the trailing NULL if none found. */
1036
char *FindPtr,*theStringPtr;
1041
theStringPtr = theString;
1042
while (*FindPtr && ( *FindPtr == *theStringPtr))
1048
if (*FindPtr == '\0')
1058
NLM_EXTERN Nlm_CharPtr LIBCALL NoCaseSkipToString(char FAR *theString,char FAR *Find)
1059
/* returns a pointer to leftmost instance of Find in theString,
1060
ignoring case, or to the trailing NULL if none found. */
1062
char *FindPtr,*theStringPtr;
1067
theStringPtr = theString;
1068
while (*FindPtr && (toupper(*FindPtr) == toupper(*theStringPtr)))
1074
if (*FindPtr == '\0')
1084
NLM_EXTERN Nlm_CharPtr LIBCALL SkipPastString(char FAR *theString,char FAR *Find)
1085
/* returns a pointer to the next character after the leftmost
1086
instance of Find in theString, or to the trailing NULL if none found. */
1088
Nlm_CharPtr Ptr = SkipToString(theString,Find);
1093
return (Ptr + Nlm_StringLen(Find));
1096
NLM_EXTERN Nlm_CharPtr LIBCALL NoCaseSkipPastString(char FAR *theString,char FAR *Find)
1097
/* returns a pointer to the next character after the leftmost
1098
instance of Find in theString, ignoring case,
1099
or to the trailing NULL if none found. */
1101
Nlm_CharPtr Ptr = SkipToString(theString,Find);
1106
return (Ptr + Nlm_StringLen(Find));
1109
NLM_EXTERN Nlm_CharPtr LIBCALL SkipSet(char FAR *theString,char FAR *CharSet)
1110
/* returns a pointer to the next character in theString that is
1113
Nlm_CharPtr CharSetPtr;
1116
CharSetPtr = CharSet;
1117
while ( *CharSetPtr && *theString != *CharSetPtr )
1119
if ( ! *CharSetPtr )
1126
NLM_EXTERN Nlm_CharPtr LIBCALL SkipToSet(char FAR *theString,char FAR *CharSet)
1127
/* returns a pointer to leftmost instance of any char in string
1128
CharSet in theString, or to the trailing NULL if none found. */
1130
Nlm_CharPtr CharSetPtr;
1133
CharSetPtr = CharSet;
1134
while ( *CharSetPtr && (*theString != *CharSetPtr) )
1144
NLM_EXTERN Nlm_Boolean LIBCALL StringSub(char FAR *theString, char Find, char Replace)
1145
/* replaces all instances of the character Find in the given theString with
1146
the character Replace. It returns TRUE if any characters were
1147
replaced, else FALSE. */
1149
Nlm_Boolean Replaced = FALSE;
1150
while ( *theString != NULLB )
1152
if ( *theString == Find )
1154
*theString = Replace;
1162
NLM_EXTERN Nlm_Boolean LIBCALL StringSubSet(char FAR *theString,char FAR *FindSet, char Replace)
1163
/* replaces all instances of any character in string FindSet found in
1164
theString with the character Replace. It returns TRUE if any
1165
characters were replaced, else FALSE. */
1167
Nlm_CharPtr FindPtr;
1168
Nlm_Boolean Replaced = FALSE;
1170
while ( *theString != NULLB )
1173
while ( *FindPtr != NULLB )
1175
if (*theString == *FindPtr )
1177
*theString = Replace;
1187
NLM_EXTERN Nlm_Boolean LIBCALL StringSubString(char FAR *theString, char FAR *Find, char FAR *Replace, Nlm_Int4 MaxLength)
1188
/* replaces all non-overlapping instances of the string Find in the
1189
string theString with the string Replace. The strings do not have to be the
1190
same size. The new string is truncated at MaxLength characters
1191
Including the final NULL). If MaxLength is zero, the string's current
1192
length is presumed to be the maximum.
1193
It returns TRUE if any strings were replaced, else FALSE.
1196
Nlm_CharPtr FindPtr,ComparePtr,StringPtr,NewString, NewStringPtr;
1197
Nlm_Int4 SpaceNeeded,Len;
1198
Nlm_Boolean Replaced = FALSE;
1203
Len = Nlm_StringLen(theString);
1204
SpaceNeeded = MAX( (Nlm_Int4)((Len * Nlm_StringLen(Replace)
1205
* sizeof(Nlm_Char) )
1206
/ Nlm_StringLen(Find) + 1),Len) + 1;
1208
NewStringPtr = NewString = (Nlm_CharPtr)
1209
Nlm_MemGet((size_t)SpaceNeeded, MGET_ERRPOST);
1211
StringPtr = theString;
1212
while (*StringPtr != NULLB)
1215
ComparePtr = StringPtr;
1216
while ( (*FindPtr != NULLB) && (*FindPtr == *ComparePtr) )
1222
/* if we found the entire string, replace it. */
1223
if (*FindPtr == NULLB)
1225
NewStringPtr = StringMove(NewStringPtr,Replace);
1226
StringPtr = ComparePtr;
1230
/* otherwise, move on to the next character. */
1231
*NewStringPtr++ = *StringPtr++;
1233
*NewStringPtr = NULLB;
1236
MaxLength = strlen(theString) + 1;
1238
/* Truncate the string, if necessary.*/
1239
if ((Nlm_Int4)strlen(NewString) >= MaxLength - 1)
1241
NewString[MaxLength-1] = NULLB;
1244
Nlm_StringCpy(theString,NewString);
1245
Nlm_MemFree(NewString);
1251
NLM_EXTERN Nlm_CharPtr LIBCALL StringEnd(char FAR *theString)
1252
/* This returns a pointer to the terminating null of the given theString.
1255
while (*theString != NULLB)
1262
NLM_EXTERN Nlm_Int4 LIBCALL CountChar(char FAR *theString, char Char)
1263
/* returns the number of times a given character appears in the given
1266
Nlm_Int4 CharCount = 0;
1269
if (*theString++ == Char)
1275
NLM_EXTERN Nlm_Int4 LIBCALL CountStrings(char FAR *theString, char FAR *Find)
1276
/* This returns the number of non-overlapping instances of Find
1280
Nlm_Int4 Len = Nlm_StringLen(Find);
1281
Nlm_CharPtr Ptr = theString;
1285
Ptr = SkipToString(Ptr,Find);
1296
NLM_EXTERN Nlm_Int4 LIBCALL CountSet(char FAR *theString, char FAR *Set)
1297
/* returns the number of times any one of a given set of characters
1298
appears in the given theString. */
1300
Nlm_Int4 CharCount = 0;
1309
if (*theString == *SetPtr)
1324
NLM_EXTERN Nlm_CharPtr LIBCALL StripSpaces(char FAR *Line)
1325
/* returns a pointer to the next nonwhitespace character in the string
1326
and also removes trailing whitespaces. */
1330
Line = SkipSpaces(Line);
1333
Ptr = StringEnd(Line) - 1;
1334
while ( (Ptr > Line) && isspace(*Ptr) )
1342
NLM_EXTERN void LIBCALL CleanSpaces(char FAR *Line)
1343
/* This in-place deletes all leading and trailing whitespace and replaces
1344
all instances of one or more whitespace characters with one space,
1345
or one newline if the whitespace contained a newline.
1348
Nlm_Boolean HasNewLine;
1349
Nlm_CharPtr LinePtr = SkipSpaces(Line);
1353
while ( *LinePtr && ! isspace (*LinePtr) )
1354
*Line++ = *LinePtr++;
1357
while ( isspace(*LinePtr) )
1359
if (*LinePtr == NEWLINE)
1374
NLM_EXTERN Nlm_Int4 LIBCALL StringDiff(char FAR *This, char FAR *That)
1375
/* This returns the character offset where the strings differ, or -1 if
1376
the strings are the same. */
1380
while (*This && (*This == *That) )
1393
NLM_EXTERN Nlm_Int4 LIBCALL StringDiffNum(char FAR *This, char FAR *That, Nlm_Int4 NumChars)
1394
/* returns the character offset where the strings differ, examining only
1395
the first NumChars Characters. returns -1 if the two substrings
1396
examined are equivalent. */
1400
while ((NumChars > 0) && *This && (*This == *That) )
1408
if ( NumChars && (*This || *That) )
1414
NLM_EXTERN void LIBCALL TruncateString(char FAR *theString, Nlm_Int4 Length)
1415
/* truncates a string to fit into an array of Length characters,
1416
including the trailing NULL. */
1418
if((Nlm_Int4)strlen(theString) >= Length - 1)
1419
theString [Length-1] = NULLB;
1422
NLM_EXTERN Nlm_CharPtr LIBCALL TruncateStringCopy(char FAR *theString, Nlm_Int4 Length)
1423
/* Returns a new string consisting of at most the first length-1
1424
characters of theString. */
1426
Nlm_CharPtr NewString = (Nlm_CharPtr)MemNew((size_t)Length);
1428
StrNCpy(NewString, theString, (size_t)(Length - 1));
1429
NewString[Length-1] = NULLB;
1434
NLM_EXTERN Nlm_Int4 LIBCALL BreakString(char FAR *theString, Nlm_CharPtr PNTR Words)
1435
/* Breaks up a string at each occurrence of one or more spaces, placing
1436
each substring obtained into the array Words and returning the
1437
number of substrings obtained.
1440
Nlm_CharPtr Start, Stop, *WordPtr;
1442
Start = SkipSpaces(theString);
1447
Stop = SkipToSpace(Start);
1448
StrCpyPtr(*WordPtr++,Start,Stop);
1449
Start = SkipSpaces(Stop);
1452
return((Nlm_Int4) (WordPtr - Words) );
1455
NLM_EXTERN void LIBCALL DeleteChar(char FAR *theString,char Delete)
1456
/* removes all instances of the character Delete from the theString. */
1458
Nlm_CharPtr StringPtr = theString;
1462
if (*StringPtr != Delete)
1463
*theString++ = *StringPtr++;
1473
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_StringPrintable(const Nlm_Char PNTR str,
1477
const Nlm_Char PNTR s;
1478
Nlm_CharPtr new_str, new_s;
1485
for (s = str; *s; s++)
1488
else if (*s == '\t' || IS_PRINT(*s))
1493
for (s = str; *s; s++)
1494
if (*s == '\n' || *s == '\t' || IS_PRINT(*s))
1498
new_str = (Nlm_CharPtr)Nlm_MemGet(str_len+1, MGET_ERRPOST);
1504
for (s = str, new_s = new_str; *s; s++)
1510
else if (*s == '\t' || IS_PRINT(*s))
1515
for (s = str, new_s = new_str; *s; s++)
1516
if (*s == '\n' || *s == '\t' || IS_PRINT(*s))
1525
/*****************************************************************************
1526
* Text Formatting Functions
1527
****************************************************************************/
1529
#define MAX_NO_DASH 2
1535
/* Act like a regular memcpy but replace all space symbols to #SPACE
1537
static void x_memcpy(Nlm_Char FAR PNTR targ, const Nlm_Char FAR PNTR src,
1543
if ( IS_WHITESP(*src) )
1551
/* Set of conditions when the decision on the line breaking can be
1552
* made having only 2 symbols("ch0" and "ch1" -- to the left and to the
1553
* right of the break, respectively)
1555
static int can_break(Nlm_Char ch0, Nlm_Char ch1)
1558
IS_WHITESP(ch1) || IS_WHITESP(ch0))
1594
if (ch1 != '.' && ch1 != '?' && ch1 != '!')
1603
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_text2stream(const Nlm_Char FAR PNTR str)
1606
Nlm_CharPtr line, s;
1611
while (*str && IS_WHITESP( *str ))
1616
s = line = (Nlm_CharPtr) Nlm_MemNew(Nlm_StringLen(str) + 1);
1618
for ( ; *str; str++)
1620
if ( IS_WHITESP(*str) )
1630
s - line > 1 && *(s-1) == '-' && IS_ALPHA(*(s-2)))
1633
s--; /* eat dash before end-of-line, merge the broken word */
1647
return (Nlm_CharPtr) realloc(line, Nlm_StringLen(line) + 1);
1651
NLM_EXTERN size_t Nlm_stream2text(const Nlm_Char FAR PNTR str, size_t max_col,
1654
const Nlm_Char FAR PNTR s;
1655
const Nlm_Char FAR PNTR sb; /* the nearest breakable position */
1659
size_t len = Nlm_StringLen( str );
1660
len = max_col < len ? max_col : len;
1663
if (len == 0 || can_break(str[len-1], str[len]))
1666
/* go to the beginning of the last completely fit word */
1667
for (sb = &str[len-1];
1668
sb != str && !IS_WHITESP(*sb) && !can_break(*sb, *(sb+1));
1670
while (sb != str && IS_WHITESP(*sb))
1674
{ /* the first word is longer than "max_col" */
1675
if (len > MAX_NO_DASH && IS_ALPHA(str[len-1]) && IS_ALPHA(str[len]))
1676
*dash = 1; /* recommend use dash in the place of last symbol */
1681
/* decide of whether and how to break the last alphabet word */
1683
/* count the lead and the tail of the last non-fit word */
1684
for (s = &str[len]; *s != '\0' && IS_ALPHA(*s); s++, n_tail++) continue;
1685
for (s = &str[len-1]; IS_ALPHA(*s); s--, n_lead++) continue;
1688
/* try to "move" symbols from lead in the sake of tail */
1689
while (n_lead > MIN_LEAD && n_tail < MIN_TAIL) {
1694
if (n_lead < MIN_LEAD || n_tail < MIN_TAIL)
1695
{ /* no luck this time -- move the whole non-fit word to the next line */
1696
return (sb - str + 1);
1701
return (s - str + n_lead + 1);
1706
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_rule_line(const Nlm_Char FAR PNTR str,
1708
enumRuleLine method)
1713
/* allocate and initialize the resulting string */
1714
Nlm_CharPtr s = (Nlm_CharPtr) Nlm_MemNew(len + 1);
1715
Nlm_MemSet(s, SPACE, len);
1718
/* skip leading and trailing spaces */
1719
while ( IS_WHITESP(*str) )
1723
for (str_len = Nlm_StringLen( str ); IS_WHITESP(str[str_len-1]); str_len--) continue;
1726
/* truncate the original string if doesn't fit */
1727
if (len <= str_len) {
1728
x_memcpy(s, str, len);
1732
n_space = len - str_len;
1737
x_memcpy(s, str, str_len);
1742
x_memcpy(s + n_space, str, str_len);
1750
const Nlm_Char FAR PNTR _str = str;
1752
for ( ; i--; _str++)
1755
if ( IS_WHITESP(*_str) )
1757
if ( !prev_space ) {
1766
ASSERT ( !prev_space );
1770
size_t n_div = n_space / n_gap;
1771
size_t n_mod = n_space % n_gap;
1774
for (_str = str; *_str; )
1776
if ( !IS_WHITESP( *_str ) )
1780
size_t n_add = n_div;
1789
for (_str++; IS_WHITESP(*_str); _str++) continue;
1794
ASSERT ( _s == s + len );
1796
} /* else -- use RL_Center */
1801
x_memcpy(s + n_space/2, str, str_len);
1815
#ifdef TEST_TEXT_FMT
1816
Nlm_Int2 Nlm_Main( void )
1819
Nlm_Int4 argc = Nlm_GetArgc();
1820
Nlm_CharPtr *argv = Nlm_GetArgv();
1822
Nlm_Char x_str[MAX_COL * 1024];
1826
FILE *logfile = Nlm_FileOpen("stdout", "w");
1830
fprintf(logfile, "Usage: %s <file_name>\n", argv[0]);
1834
fp = Nlm_FileOpen(argv[1], "rb");
1836
fprintf(logfile, "Cannot open file: \"%s\"\n", argv[1]);
1840
n_read = FileRead(x_str, 1, sizeof(x_str) - 1, fp);
1841
if (n_read < 2 * MAX_COL) {
1842
fprintf(logfile, "Too few bytes read from \"%s\": %d\n", argv[1], n_read);
1846
ASSERT ( n_read < sizeof(x_str) );
1847
x_str[n_read] = '\0';
1850
size_t max_col = MAX_COL - 1;
1852
enumRuleLine rule_method = RL_Center;
1854
Nlm_CharPtr str = text2stream( x_str );
1855
Nlm_CharPtr text_str = str;
1857
fprintf(logfile, "No non-space symbols in \"%s\"\n", argv[1]);
1861
while (*str != '\0')
1863
Nlm_Char s[MAX_COL + 1];
1867
while (*str && IS_WHITESP(*str))
1870
n_print = stream2text(str, max_col, &dash);
1871
ASSERT ( (max_col > 0 && str && *str) == (n_print > 0) );
1872
ASSERT ( n_print <= max_col );
1873
ASSERT ( dash != -12345 );
1875
Nlm_MemCpy(s, str, n_print);
1877
ASSERT ( dash == 0 || n_print > 1 );
1882
Nlm_CharPtr ruled_str = rule_line(s,
1883
(rule_method == RL_Right ||
1884
rule_method == RL_Center ) ?
1887
fprintf(logfile, "|%s|\n", ruled_str);
1888
Nlm_MemFree( ruled_str );
1893
if (max_col == 0 || max_col == MAX_COL)
1898
if (rule_method == RL_Spread)
1899
rule_method = RL_Left;
1904
Nlm_MemFree( text_str );
1907
Nlm_FileClose( logfile );
1908
Nlm_FileClose( fp );
1911
#endif /* TEST_TEXT_FMT */
1914
#ifdef TEST_INT8_CONVERSION
1915
Nlm_Int2 Nlm_Main( void )
1923
s = Nlm_Int8ToString(0, buffer, sizeof(buffer));
1925
printf("0 = %s\n", s);
1926
s = Nlm_Int8ToString(1, buffer, sizeof(buffer));
1928
printf("1 = %s\n", s);
1929
s = Nlm_Int8ToString(1222222, buffer, sizeof(buffer));
1931
printf("1222222 = %s\n", s);
1932
s = Nlm_Int8ToString(-15, buffer, sizeof(buffer));
1934
printf("-15 = %s\n", s);
1935
s = Nlm_Int8ToString(-15555555, buffer, sizeof(buffer));
1937
printf("-15555555 = %s\n", s);
1938
s = Nlm_Int8ToString(INT8_MAX, buffer, sizeof(buffer));
1940
printf("INT8_MAX = %s\n", s);
1941
s = Nlm_Int8ToString(INT8_MIN, buffer, sizeof(buffer));
1943
printf("INT8_MIN = %s\n", s);
1944
s = Nlm_Int8ToString(UINT8_MAX, buffer, sizeof(buffer));
1946
printf("UINT8_MAX = %s\n", s);
1947
s = Nlm_Uint8ToString(UINT8_MAX, buffer, sizeof(buffer));
1949
printf("UINT8_MAX = %s\n", s);
1951
strcpy(buffer, "9223372036854775807");
1952
i = Nlm_StringToInt8(buffer, &p);
1953
assert(p == buffer + strlen(buffer));
1954
s = Nlm_Int8ToString(i, buffer + strlen(buffer) + 1,
1955
sizeof(buffer) - strlen(buffer) - 1);
1957
assert(strcmp(buffer, s) == 0);
1958
printf("INT8_MAX input Ok\n");
1960
s = Nlm_Int8ToString(i, buffer, sizeof(buffer));
1962
printf("INT8_MAX+1 = %s\n", s);
1964
strcpy(buffer, "-9223372036854775808");
1965
i = Nlm_StringToInt8(buffer, &p);
1966
assert(p == buffer + strlen(buffer));
1967
s = Nlm_Int8ToString(i, buffer + strlen(buffer) + 1,
1968
sizeof(buffer) - strlen(buffer) - 1);
1970
assert(strcmp(buffer, s) == 0);
1971
printf("INT8_MIN input Ok\n");
1973
s = Nlm_Int8ToString(i, buffer, sizeof(buffer));
1975
printf("INT8_MIN-1 = %s\n", s);
1977
strcpy(buffer, "18446744073709551615");
1978
j = Nlm_StringToUint8(buffer, &p);
1979
assert(p == buffer + strlen(buffer));
1980
s = Nlm_Uint8ToString(j, buffer + strlen(buffer) + 1,
1981
sizeof(buffer) - strlen(buffer) - 1);
1983
assert(strcmp(buffer, s) == 0);
1984
printf("UINT8_MAX input Ok\n");
1986
s = Nlm_Uint8ToString(j, buffer, sizeof(buffer));
1988
printf("UINT8_MAX+1 = %s\n", s);
1990
strcpy(buffer, "1234567890abcdef0123546");
1991
i = Nlm_StringToInt8(buffer, &p);
1993
s = Nlm_Int8ToString(i, buffer + strlen(buffer) + 1,
1994
sizeof(buffer) - strlen(buffer) - 1);
1996
printf("Out of %s only %.*s was accepted as input for Int8 %s\n",
1997
buffer, (int)(p - buffer), buffer, s);
1999
strcpy(buffer, "-987654321234567890abcdef0123546");
2000
i = Nlm_StringToInt8(buffer, &p);
2002
s = Nlm_Int8ToString(i, buffer + strlen(buffer) + 1,
2003
sizeof(buffer) - strlen(buffer) - 1);
2005
printf("Out of %s only %.*s was accepted as input for Int8 %s\n",
2006
buffer, (int)(p - buffer), buffer, s);
2008
strcpy(buffer, "987654321234567890abcdef0123546");
2009
j = Nlm_StringToUint8(buffer, &p);
2011
s = Nlm_Uint8ToString(j, buffer + strlen(buffer) + 1,
2012
sizeof(buffer) - strlen(buffer) - 1);
2014
printf("Out of %s only %.*s was accepted as input for Uint8 %s\n",
2015
buffer, (int)(p - buffer), buffer, s);
2017
strcpy(buffer, "-987654321234567890abcdef0123546");
2018
j = Nlm_StringToUint8(buffer, &p);
2020
printf("Conversion of %s (negative) to Uint8 caused error\n", buffer);
2022
strcpy(buffer, "9223372036854775808");
2023
i = Nlm_StringToInt8(buffer, &p);
2025
printf("Conversion of %s (INT8_MAX + 1) to Int8 caused error\n", buffer);
2027
strcpy(buffer, "-9223372036854775809");
2028
i = Nlm_StringToInt8(buffer, &p);
2030
printf("Conversion of %s (INT8_MIN - 1) to Int8 caused error\n", buffer);
2032
strcpy(buffer, "18446744073709551616");
2033
j = Nlm_StringToUint8(buffer, &p);
2035
printf("Conversion of %s (UINT8_MAX + 1) to Uint8 caused error\n", buffer);
2037
printf("All tests succeeded\n");
2041
#endif /* TEST_INT8_CONVERSION */