2
* psql - the PostgreSQL interactive terminal
4
* Copyright (c) 2000-2005, PostgreSQL Global Development Group
6
* $PostgreSQL: pgsql/src/bin/psql/stringutils.c,v 1.40 2005-01-01 05:43:08 momjian Exp $
8
#include "postgres_fe.h"
15
#include "stringutils.h"
18
static void strip_quotes(char *source, char quote, char escape, int encoding);
22
* Replacement for strtok() (a.k.a. poor man's flex)
24
* Splits a string into tokens, returning one token per call, then NULL
25
* when no more tokens exist in the given string.
27
* The calling convention is similar to that of strtok, but with more
30
* s - string to parse, if NULL continue parsing the last string
31
* whitespace - set of whitespace characters that separate tokens
32
* delim - set of non-whitespace separator characters (or NULL)
33
* quote - set of characters that can quote a token (NULL if none)
34
* escape - character that can quote quotes (0 if none)
35
* del_quotes - if TRUE, strip quotes from the returned token, else return
36
* it exactly as found in the string
37
* encoding - the active character-set encoding
39
* Characters in 'delim', if any, will be returned as single-character
40
* tokens unless part of a quoted token.
42
* Double occurrences of the quoting character are always taken to represent
43
* a single quote character in the data. If escape isn't 0, then escape
44
* followed by anything (except \0) is a data character too.
46
* Note that the string s is _not_ overwritten in this implementation.
48
* NB: it's okay to vary delim, quote, and escape from one call to the
49
* next on a single source string, but changing whitespace is a bad idea
50
* since you might lose data.
53
strtokx(const char *s,
54
const char *whitespace,
61
static char *storage = NULL;/* store the local copy of the users
63
static char *string = NULL; /* pointer into storage where to continue
66
/* variously abused variables: */
76
* We may need extra space to insert delimiter nulls for adjacent
77
* tokens. 2X the space is a gross overestimate, but it's
78
* unlikely that this code will be used on huge strings anyway.
80
storage = pg_malloc(2 * strlen(s) + 1);
88
/* skip leading whitespace */
89
offset = strspn(string, whitespace);
90
start = &string[offset];
92
/* end of string reached? */
95
/* technically we don't need to free here, but we're nice */
102
/* test if delimiter character */
103
if (delim && strchr(delim, *start))
106
* If not at end of string, we need to insert a null to terminate
107
* the returned token. We can just overwrite the next character
108
* if it happens to be in the whitespace set ... otherwise move
109
* over the rest of the string to make room. (This is why we
110
* allocated extra space above).
115
if (!strchr(whitespace, *p))
116
memmove(p + 1, p, strlen(p) + 1);
122
/* at end of string, so no extra work */
129
/* test if quoting character */
130
if (quote && strchr(quote, *start))
132
/* okay, we have a quoted token, now scan for the closer */
133
char thisquote = *start;
135
for (p = start + 1; *p; p += PQmblen(p, encoding))
137
if (*p == escape && p[1] != '\0')
138
p++; /* process escaped anything */
139
else if (*p == thisquote && p[1] == thisquote)
140
p++; /* process doubled quote */
141
else if (*p == thisquote)
143
p++; /* skip trailing quote */
149
* If not at end of string, we need to insert a null to terminate
150
* the returned token. See notes above.
154
if (!strchr(whitespace, *p))
155
memmove(p + 1, p, strlen(p) + 1);
161
/* at end of string, so no extra work */
165
/* Clean up the token if caller wants that */
167
strip_quotes(start, thisquote, escape, encoding);
173
* Otherwise no quoting character. Scan till next whitespace,
174
* delimiter or quote. NB: at this point, *start is known not to be
175
* '\0', whitespace, delim, or quote, so we will consume at least one
178
offset = strcspn(start, whitespace);
182
unsigned int offset2 = strcspn(start, delim);
184
if (offset > offset2)
190
unsigned int offset2 = strcspn(start, quote);
192
if (offset > offset2)
199
* If not at end of string, we need to insert a null to terminate the
200
* returned token. See notes above.
204
if (!strchr(whitespace, *p))
205
memmove(p + 1, p, strlen(p) + 1);
211
/* at end of string, so no extra work */
222
* Remove quotes from the string at *source. Leading and trailing occurrences
223
* of 'quote' are removed; embedded double occurrences of 'quote' are reduced
224
* to single occurrences; if 'escape' is not 0 then 'escape' removes special
225
* significance of next character.
227
* Note that the source string is overwritten in-place.
230
strip_quotes(char *source, char quote, char escape, int encoding)
240
if (*src && *src == quote)
241
src++; /* skip leading quote */
248
if (c == quote && src[1] == '\0')
249
break; /* skip trailing quote */
250
else if (c == quote && src[1] == quote)
251
src++; /* process doubled quote */
252
else if (c == escape && src[1] != '\0')
253
src++; /* process escaped character */
255
i = PQmblen(src, encoding);