1
/*-------------------------------------------------------------------------
4
* Functions for the built-in types char(n) and varchar(n).
6
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
7
* Portions Copyright (c) 1994, Regents of the University of California
11
* $PostgreSQL: pgsql/src/backend/utils/adt/varchar.c,v 1.108 2004-12-31 22:01:22 pgsql Exp $
13
*-------------------------------------------------------------------------
17
#include "access/hash.h"
18
#include "catalog/pg_type.h"
19
#include "miscadmin.h"
20
#include "utils/array.h"
21
#include "utils/builtins.h"
22
#include "utils/fmgroids.h"
24
#include "mb/pg_wchar.h"
28
* CHAR() and VARCHAR() types are part of the ANSI SQL standard. CHAR()
29
* is for blank-padded string whose length is specified in CREATE TABLE.
30
* VARCHAR is for storing string whose length is at most the length specified
31
* at CREATE TABLE time.
33
* It's hard to implement these types because we cannot figure out
34
* the length of the type from the type itself. I changed (hopefully all) the
35
* fmgr calls that invoke input functions of a data type to supply the
36
* length also. (eg. in INSERTs, we have the tupleDescriptor which contains
37
* the length of the attributes and hence the exact length of the char() or
38
* varchar(). We pass this to bpcharin() or varcharin().) In the case where
39
* we cannot determine the length, we pass in -1 instead and the input string
40
* must be null-terminated.
42
* We actually implement this as a varlena so that we don't have to pass in
43
* the length for the comparison functions. (The difference between these
44
* types and "text" is that we truncate and possibly blank-pad the string
51
/*****************************************************************************
53
*****************************************************************************/
56
* Convert a C string to CHARACTER internal representation. atttypmod
57
* is the declared length of the type plus VARHDRSZ.
59
* If the C string is too long, raise an error, unless the extra
60
* characters are spaces, in which case they're truncated. (per SQL)
63
bpcharin(PG_FUNCTION_ARGS)
65
char *s = PG_GETARG_CSTRING(0);
68
Oid typelem = PG_GETARG_OID(1);
70
int32 atttypmod = PG_GETARG_INT32(2);
76
int charlen; /* number of charcters in the input string */
80
pg_verifymbstr(s, len, false);
82
charlen = pg_mbstrlen(s);
84
/* If typmod is -1 (or invalid), use the actual string length */
85
if (atttypmod < (int32) VARHDRSZ)
88
maxlen = atttypmod - VARHDRSZ;
92
/* Verify that extra characters are spaces, and clip them off */
93
size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
96
* at this point, len is the actual BYTE length of the input
97
* string, maxlen is the max number of CHARACTERS allowed for this
100
if (strspn(s + mbmaxlen, " ") == len - mbmaxlen)
104
(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
105
errmsg("value too long for type character(%d)",
109
* XXX: at this point, maxlen is the necessary byte length, not
110
* the number of CHARACTERS!
117
* XXX: at this point, maxlen is the necessary byte length, not
118
* the number of CHARACTERS!
120
maxlen = len + (maxlen - charlen);
123
result = palloc(maxlen + VARHDRSZ);
124
VARATT_SIZEP(result) = maxlen + VARHDRSZ;
126
for (i = 0; i < len; i++)
129
/* blank pad the string if necessary */
130
for (; i < maxlen; i++)
133
PG_RETURN_BPCHAR_P(result);
138
* Convert a CHARACTER value to a C string.
141
bpcharout(PG_FUNCTION_ARGS)
143
BpChar *s = PG_GETARG_BPCHAR_P(0);
147
/* copy and add null term */
148
len = VARSIZE(s) - VARHDRSZ;
149
result = (char *) palloc(len + 1);
150
memcpy(result, VARDATA(s), len);
153
PG_RETURN_CSTRING(result);
157
* bpcharrecv - converts external binary format to bpchar
160
bpcharrecv(PG_FUNCTION_ARGS)
162
/* Exactly the same as textrecv, so share code */
163
return textrecv(fcinfo);
167
* bpcharsend - converts bpchar to binary format
170
bpcharsend(PG_FUNCTION_ARGS)
172
/* Exactly the same as textsend, so share code */
173
return textsend(fcinfo);
178
* Converts a CHARACTER type to the specified size.
180
* maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
181
* isExplicit is true if this is for an explicit cast to char(N).
183
* Truncation rules: for an explicit cast, silently truncate to the given
184
* length; for an implicit cast, raise error unless extra characters are
185
* all spaces. (This is sort-of per SQL: the spec would actually have us
186
* raise a "completion condition" for the explicit cast case, but Postgres
187
* hasn't got such a concept.)
190
bpchar(PG_FUNCTION_ARGS)
192
BpChar *source = PG_GETARG_BPCHAR_P(0);
193
int32 maxlen = PG_GETARG_INT32(1);
194
bool isExplicit = PG_GETARG_BOOL(2);
200
int charlen; /* number of charcters in the input string
203
len = VARSIZE(source);
205
charlen = pg_mbstrlen_with_len(VARDATA(source), len - VARHDRSZ) + VARHDRSZ;
207
/* No work if typmod is invalid or supplied data matches it already */
208
if (maxlen < (int32) VARHDRSZ || charlen == maxlen)
209
PG_RETURN_BPCHAR_P(source);
211
if (charlen > maxlen)
213
/* Verify that extra characters are spaces, and clip them off */
216
maxmblen = pg_mbcharcliplen(VARDATA(source), len - VARHDRSZ,
217
maxlen - VARHDRSZ) + VARHDRSZ;
221
for (i = maxmblen - VARHDRSZ; i < len - VARHDRSZ; i++)
222
if (*(VARDATA(source) + i) != ' ')
224
(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
225
errmsg("value too long for type character(%d)",
226
maxlen - VARHDRSZ)));
232
* XXX: at this point, maxlen is the necessary byte
233
* length+VARHDRSZ, not the number of CHARACTERS!
240
* XXX: at this point, maxlen is the necessary byte
241
* length+VARHDRSZ, not the number of CHARACTERS!
243
maxlen = len + (maxlen - charlen);
248
result = palloc(maxlen);
249
VARATT_SIZEP(result) = maxlen;
252
for (i = 0; i < len - VARHDRSZ; i++)
255
/* blank pad the string if necessary */
256
for (; i < maxlen - VARHDRSZ; i++)
259
PG_RETURN_BPCHAR_P(result);
264
* Convert char to bpchar(1).
267
char_bpchar(PG_FUNCTION_ARGS)
269
char c = PG_GETARG_CHAR(0);
272
result = (BpChar *) palloc(VARHDRSZ + 1);
274
VARATT_SIZEP(result) = VARHDRSZ + 1;
275
*(VARDATA(result)) = c;
277
PG_RETURN_BPCHAR_P(result);
282
* Converts a bpchar() type to a NameData type.
285
bpchar_name(PG_FUNCTION_ARGS)
287
BpChar *s = PG_GETARG_BPCHAR_P(0);
291
len = VARSIZE(s) - VARHDRSZ;
293
/* Truncate to max length for a Name */
294
if (len >= NAMEDATALEN)
295
len = NAMEDATALEN - 1;
297
/* Remove trailing blanks */
300
if (*(VARDATA(s) + len - 1) != ' ')
305
result = (NameData *) palloc(NAMEDATALEN);
306
memcpy(NameStr(*result), VARDATA(s), len);
308
/* Now null pad to full length... */
309
while (len < NAMEDATALEN)
311
*(NameStr(*result) + len) = '\0';
315
PG_RETURN_NAME(result);
319
* Converts a NameData type to a bpchar type.
322
name_bpchar(PG_FUNCTION_ARGS)
324
Name s = PG_GETARG_NAME(0);
328
len = strlen(NameStr(*s));
329
result = (BpChar *) palloc(VARHDRSZ + len);
330
memcpy(VARDATA(result), NameStr(*s), len);
331
VARATT_SIZEP(result) = len + VARHDRSZ;
333
PG_RETURN_BPCHAR_P(result);
337
/*****************************************************************************
338
* varchar - varchar(n)
340
* Note: varchar piggybacks on type text for most operations, and so has no
341
* C-coded functions except for I/O and typmod checking.
342
*****************************************************************************/
345
* Convert a C string to VARCHAR internal representation. atttypmod
346
* is the declared length of the type plus VARHDRSZ.
348
* Note that atttypmod is regarded as the number of characters, which
349
* is not necessarily the same as the number of bytes.
351
* If the C string is too long, raise an error, unless the extra characters
352
* are spaces, in which case they're truncated. (per SQL)
355
varcharin(PG_FUNCTION_ARGS)
357
char *s = PG_GETARG_CSTRING(0);
360
Oid typelem = PG_GETARG_OID(1);
362
int32 atttypmod = PG_GETARG_INT32(2);
367
/* verify encoding */
369
pg_verifymbstr(s, len, false);
371
maxlen = atttypmod - VARHDRSZ;
373
if (atttypmod >= (int32) VARHDRSZ && len > maxlen)
375
/* Verify that extra characters are spaces, and clip them off */
376
size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
378
if (strspn(s + mbmaxlen, " ") == len - mbmaxlen)
382
(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
383
errmsg("value too long for type character varying(%d)",
387
result = palloc(len + VARHDRSZ);
388
VARATT_SIZEP(result) = len + VARHDRSZ;
389
memcpy(VARDATA(result), s, len);
391
PG_RETURN_VARCHAR_P(result);
396
* Convert a VARCHAR value to a C string.
399
varcharout(PG_FUNCTION_ARGS)
401
VarChar *s = PG_GETARG_VARCHAR_P(0);
405
/* copy and add null term */
406
len = VARSIZE(s) - VARHDRSZ;
407
result = palloc(len + 1);
408
memcpy(result, VARDATA(s), len);
411
PG_RETURN_CSTRING(result);
415
* varcharrecv - converts external binary format to varchar
418
varcharrecv(PG_FUNCTION_ARGS)
420
/* Exactly the same as textrecv, so share code */
421
return textrecv(fcinfo);
425
* varcharsend - converts varchar to binary format
428
varcharsend(PG_FUNCTION_ARGS)
430
/* Exactly the same as textsend, so share code */
431
return textsend(fcinfo);
436
* Converts a VARCHAR type to the specified size.
438
* maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
439
* isExplicit is true if this is for an explicit cast to varchar(N).
441
* Truncation rules: for an explicit cast, silently truncate to the given
442
* length; for an implicit cast, raise error unless extra characters are
443
* all spaces. (This is sort-of per SQL: the spec would actually have us
444
* raise a "completion condition" for the explicit cast case, but Postgres
445
* hasn't got such a concept.)
448
varchar(PG_FUNCTION_ARGS)
450
VarChar *source = PG_GETARG_VARCHAR_P(0);
451
int32 maxlen = PG_GETARG_INT32(1);
452
bool isExplicit = PG_GETARG_BOOL(2);
458
len = VARSIZE(source);
459
/* No work if typmod is invalid or supplied data fits it already */
460
if (maxlen < (int32) VARHDRSZ || len <= maxlen)
461
PG_RETURN_VARCHAR_P(source);
463
/* only reach here if string is too long... */
465
/* truncate multibyte string preserving multibyte boundary */
466
maxmblen = pg_mbcharcliplen(VARDATA(source), len - VARHDRSZ,
471
for (i = maxmblen; i < len - VARHDRSZ; i++)
472
if (*(VARDATA(source) + i) != ' ')
474
(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
475
errmsg("value too long for type character varying(%d)",
476
maxlen - VARHDRSZ)));
479
len = maxmblen + VARHDRSZ;
480
result = palloc(len);
481
VARATT_SIZEP(result) = len;
482
memcpy(VARDATA(result), VARDATA(source), len - VARHDRSZ);
484
PG_RETURN_VARCHAR_P(result);
488
/*****************************************************************************
490
*****************************************************************************/
492
/* "True" length (not counting trailing blanks) of a BpChar */
494
bcTruelen(BpChar *arg)
496
char *s = VARDATA(arg);
500
len = VARSIZE(arg) - VARHDRSZ;
501
for (i = len - 1; i >= 0; i--)
510
bpcharlen(PG_FUNCTION_ARGS)
512
BpChar *arg = PG_GETARG_BPCHAR_P(0);
515
/* get number of bytes, ignoring trailing spaces */
516
len = bcTruelen(arg);
518
/* in multibyte encoding, convert to number of characters */
519
if (pg_database_encoding_max_length() != 1)
520
len = pg_mbstrlen_with_len(VARDATA(arg), len);
522
PG_RETURN_INT32(len);
526
bpcharoctetlen(PG_FUNCTION_ARGS)
528
BpChar *arg = PG_GETARG_BPCHAR_P(0);
530
PG_RETURN_INT32(VARSIZE(arg) - VARHDRSZ);
534
/*****************************************************************************
535
* Comparison Functions used for bpchar
537
* Note: btree indexes need these routines not to leak memory; therefore,
538
* be careful to free working copies of toasted datums. Most places don't
539
* need to be so careful.
540
*****************************************************************************/
543
bpchareq(PG_FUNCTION_ARGS)
545
BpChar *arg1 = PG_GETARG_BPCHAR_P(0);
546
BpChar *arg2 = PG_GETARG_BPCHAR_P(1);
551
len1 = bcTruelen(arg1);
552
len2 = bcTruelen(arg2);
554
/* fast path for different-length inputs */
558
result = (varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2) == 0);
560
PG_FREE_IF_COPY(arg1, 0);
561
PG_FREE_IF_COPY(arg2, 1);
563
PG_RETURN_BOOL(result);
567
bpcharne(PG_FUNCTION_ARGS)
569
BpChar *arg1 = PG_GETARG_BPCHAR_P(0);
570
BpChar *arg2 = PG_GETARG_BPCHAR_P(1);
575
len1 = bcTruelen(arg1);
576
len2 = bcTruelen(arg2);
578
/* fast path for different-length inputs */
582
result = (varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2) != 0);
584
PG_FREE_IF_COPY(arg1, 0);
585
PG_FREE_IF_COPY(arg2, 1);
587
PG_RETURN_BOOL(result);
591
bpcharlt(PG_FUNCTION_ARGS)
593
BpChar *arg1 = PG_GETARG_BPCHAR_P(0);
594
BpChar *arg2 = PG_GETARG_BPCHAR_P(1);
599
len1 = bcTruelen(arg1);
600
len2 = bcTruelen(arg2);
602
cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
604
PG_FREE_IF_COPY(arg1, 0);
605
PG_FREE_IF_COPY(arg2, 1);
607
PG_RETURN_BOOL(cmp < 0);
611
bpcharle(PG_FUNCTION_ARGS)
613
BpChar *arg1 = PG_GETARG_BPCHAR_P(0);
614
BpChar *arg2 = PG_GETARG_BPCHAR_P(1);
619
len1 = bcTruelen(arg1);
620
len2 = bcTruelen(arg2);
622
cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
624
PG_FREE_IF_COPY(arg1, 0);
625
PG_FREE_IF_COPY(arg2, 1);
627
PG_RETURN_BOOL(cmp <= 0);
631
bpchargt(PG_FUNCTION_ARGS)
633
BpChar *arg1 = PG_GETARG_BPCHAR_P(0);
634
BpChar *arg2 = PG_GETARG_BPCHAR_P(1);
639
len1 = bcTruelen(arg1);
640
len2 = bcTruelen(arg2);
642
cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
644
PG_FREE_IF_COPY(arg1, 0);
645
PG_FREE_IF_COPY(arg2, 1);
647
PG_RETURN_BOOL(cmp > 0);
651
bpcharge(PG_FUNCTION_ARGS)
653
BpChar *arg1 = PG_GETARG_BPCHAR_P(0);
654
BpChar *arg2 = PG_GETARG_BPCHAR_P(1);
659
len1 = bcTruelen(arg1);
660
len2 = bcTruelen(arg2);
662
cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
664
PG_FREE_IF_COPY(arg1, 0);
665
PG_FREE_IF_COPY(arg2, 1);
667
PG_RETURN_BOOL(cmp >= 0);
671
bpcharcmp(PG_FUNCTION_ARGS)
673
BpChar *arg1 = PG_GETARG_BPCHAR_P(0);
674
BpChar *arg2 = PG_GETARG_BPCHAR_P(1);
679
len1 = bcTruelen(arg1);
680
len2 = bcTruelen(arg2);
682
cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
684
PG_FREE_IF_COPY(arg1, 0);
685
PG_FREE_IF_COPY(arg2, 1);
687
PG_RETURN_INT32(cmp);
692
* bpchar needs a specialized hash function because we want to ignore
693
* trailing blanks in comparisons.
695
* XXX is there any need for locale-specific behavior here?
698
hashbpchar(PG_FUNCTION_ARGS)
700
BpChar *key = PG_GETARG_BPCHAR_P(0);
705
keydata = VARDATA(key);
706
keylen = bcTruelen(key);
708
result = hash_any((unsigned char *) keydata, keylen);
710
/* Avoid leaking memory for toasted inputs */
711
PG_FREE_IF_COPY(key, 0);