6
$Date: 2007-02-23 11:49:41 +0900 (金, 23 2月 2007) $
6
$Date: 2007-08-30 11:44:20 +0900 (木, 30 8月 2007) $
7
7
created at: Mon Aug 9 17:12:58 JST 1993
9
Copyright (C) 1993-2006 Yukihiro Matsumoto
9
Copyright (C) 1993-2007 Yukihiro Matsumoto
10
10
Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
11
11
Copyright (C) 2000 Information-technology Promotion Agency, Japan
13
13
**********************************************************************/
15
#include "ruby/ruby.h"
17
#include "ruby/encoding.h"
18
19
#define BEG(no) regs->beg[no]
19
20
#define END(no) regs->end[no]
404
str_strlen(VALUE str, rb_encoding *enc)
408
if (!enc) enc = rb_enc_get(str);
409
len = rb_enc_strlen(RSTRING_PTR(str), RSTRING_END(str), enc);
411
rb_raise(rb_eArgError, "invalid mbstring sequence");
397
418
* str.length => integer
419
* str.size => integer
399
* Returns the length of <i>str</i>.
421
* Returns the character length of <i>str</i>.
403
425
rb_str_length(VALUE str)
405
long len = RSTRING_LEN(str);
406
return LONG2NUM(len);
429
len = str_strlen(str, rb_enc_get(str));
435
* str.bytesize => integer
437
* Returns the length of <i>str</i> in bytes.
444
return INT2NUM(RSTRING_LEN(str));
663
* String.try_convert(obj) -> string or nil
665
* Try to convert <i>obj</i> into a String, using to_str method.
666
* Returns converted regexp or nil if <i>obj</i> cannot be converted
669
* String.try_convert("str") # => str
670
* String.try_convert(/re/) # => nil
673
rb_str_s_try_convert(VALUE dummy, VALUE str)
675
return rb_check_string_type(str);
679
str_nth(const char *p, const char *e, int nth, rb_encoding *enc)
681
p = rb_enc_nth(p, e, nth, enc);
683
rb_raise(rb_eArgError, "invalid mbstring sequence");
686
rb_raise(rb_eIndexError, "index out of range");
692
str_offset(const char *p, const char *e, int nth, rb_encoding *enc)
694
const char *pp = str_nth(p, e, nth, enc);
700
str_sublen(VALUE str, long pos, rb_encoding *enc)
702
if (rb_enc_mbmaxlen(enc) == 1 || pos < 0) return pos;
704
char *p = RSTRING_PTR(str);
710
p += rb_enc_mbclen(p, enc);
718
rb_str_sublen(VALUE str, int len)
720
return str_sublen(str, len, rb_enc_get(str));
724
rb_str_subseq(VALUE str, long beg, long len)
726
VALUE str2 = rb_str_new5(str, RSTRING_PTR(str)+beg, len);
728
rb_enc_copy(str2, str);
729
OBJ_INFECT(str2, str);
619
735
rb_str_substr(VALUE str, long beg, long len)
737
rb_encoding *enc = rb_enc_get(str);
739
int slen = str_strlen(str, enc);
623
741
if (len < 0) return Qnil;
624
if (beg > RSTRING_LEN(str)) return Qnil;
742
if (beg > slen) return Qnil;
626
beg += RSTRING_LEN(str);
627
745
if (beg < 0) return Qnil;
629
if (beg + len > RSTRING_LEN(str)) {
630
len = RSTRING_LEN(str) - beg;
747
if (beg + len > slen) {
636
754
str2 = rb_str_new5(str,0,0);
638
else if (len > RSTRING_EMBED_LEN_MAX &&
639
beg + len == RSTRING_LEN(str) && !STR_ASSOC_P(str)) {
640
str2 = rb_str_new4(str);
641
str2 = str_new3(rb_obj_class(str2), str2);
642
RSTRING(str2)->as.heap.ptr += RSTRING_LEN(str2) - len;
643
RSTRING(str2)->as.heap.len = len;
646
str2 = rb_str_new5(str, RSTRING_PTR(str)+beg, len);
757
char *p = str_nth(RSTRING_PTR(str), RSTRING_END(str), beg, enc);
758
str2 = rb_str_new5(str, p, str_offset(p, RSTRING_END(str), len, enc));
760
rb_enc_copy(str2, str);
648
761
OBJ_INFECT(str2, str);
870
987
rb_str_concat(VALUE str1, VALUE str2)
872
989
if (FIXNUM_P(str2)) {
873
int i = FIX2INT(str2);
874
if (0 <= i && i <= 0xff) { /* byte */
876
return rb_str_cat(str1, &c, 1);
990
rb_encoding *enc = rb_enc_get(str1);
991
int c = FIX2INT(str2);
992
int pos = RSTRING_LEN(str1);
993
int len = rb_enc_codelen(c, enc);
996
rb_raise(rb_eArgError, "invalid codepoint 0x%x", c);
998
rb_str_resize(str1, pos+len);
999
rb_enc_mbcput(c, RSTRING_PTR(str1)+pos, enc);
879
1002
return rb_str_append(str1, str2);
1177
1302
rb_str_index(VALUE str, VALUE sub, long offset)
1309
enc = rb_enc_check(str, sub);
1310
len = str_strlen(str, enc);
1311
slen = str_strlen(sub, enc);
1181
1312
if (offset < 0) {
1182
offset += RSTRING_LEN(str);
1183
1314
if (offset < 0) return -1;
1185
if (RSTRING_LEN(str) - offset < RSTRING_LEN(sub)) return -1;
1186
if (RSTRING_LEN(sub) == 0) return offset;
1316
if (len - offset < slen) return -1;
1317
if (slen == 0) return offset;
1318
s = offset ? str_nth(RSTRING_PTR(str), RSTRING_END(str), offset, enc) : RSTRING_PTR(str);
1319
/* need proceed one character at a time */
1187
1320
pos = rb_memsearch(RSTRING_PTR(sub), RSTRING_LEN(sub),
1188
RSTRING_PTR(str)+offset, RSTRING_LEN(str)-offset);
1321
s, RSTRING_LEN(str)-(s - RSTRING_PTR(str)));
1189
1322
if (pos < 0) return pos;
1190
1323
return pos + offset;
1237
1370
pos = rb_reg_adjust_startpos(sub, str, pos, 0);
1238
1371
pos = rb_reg_search(sub, str, pos, 0);
1372
pos = rb_str_sublen(str, pos);
1243
int c = FIX2INT(sub);
1244
long len = RSTRING_LEN(str);
1245
char *p = RSTRING_PTR(str);
1247
for (;pos<len;pos++) {
1248
if ((unsigned char)p[pos] == c) return LONG2NUM(pos);
1256
tmp = rb_check_string_type(sub);
1258
rb_raise(rb_eTypeError, "type mismatch: %s given",
1259
rb_obj_classname(sub));
1378
tmp = rb_check_string_type(sub);
1380
rb_raise(rb_eTypeError, "type mismatch: %s given",
1381
rb_obj_classname(sub));
1263
1385
/* fall through */
1265
1387
pos = rb_str_index(str, sub, pos);
1388
pos = rb_str_sublen(str, pos);
1274
1397
rb_str_rindex(VALUE str, VALUE sub, long pos)
1276
long len = RSTRING_LEN(sub);
1400
char *s, *sbeg, *e, *t;
1403
enc = rb_enc_check(str, sub);
1404
len = str_strlen(str, enc);
1405
slen = str_strlen(sub, enc);
1279
1406
/* substring longer than string */
1280
if (RSTRING_LEN(str) < len) return -1;
1281
if (RSTRING_LEN(str) - pos < len) {
1282
pos = RSTRING_LEN(str) - len;
1407
if (len < slen) return -1;
1408
if (len - pos < slen) {
1284
1414
sbeg = RSTRING_PTR(str);
1285
s = RSTRING_PTR(str) + pos;
1415
e = RSTRING_END(str);
1286
1416
t = RSTRING_PTR(sub);
1289
if (rb_memcmp(s, t, len) == 0) {
1290
return s - RSTRING_PTR(str);
1418
s = str_nth(sbeg, e, pos, enc);
1419
if (rb_memcmp(s, t, slen) == 0) {
1422
if (pos == 0) break;
1322
1449
rb_str_rindex_m(int argc, VALUE *argv, VALUE str)
1453
rb_encoding *enc = rb_enc_get(str);
1454
long pos, len = str_strlen(str, enc);
1328
if (rb_scan_args(argc, argv, "11", &sub, &position) == 2) {
1329
pos = NUM2LONG(position);
1456
if (rb_scan_args(argc, argv, "11", &sub, &vpos) == 2) {
1457
pos = NUM2LONG(vpos);
1331
pos += RSTRING_LEN(str);
1333
1461
if (TYPE(sub) == T_REGEXP) {
1334
1462
rb_backref_set(Qnil);
1339
if (pos > RSTRING_LEN(str)) pos = RSTRING_LEN(str);
1467
if (pos > len) pos = len;
1342
pos = RSTRING_LEN(str);
1345
1473
switch (TYPE(sub)) {
1475
/* enc = rb_get_check(str, sub); */
1347
1476
if (RREGEXP(sub)->len) {
1348
1477
pos = rb_reg_adjust_startpos(sub, str, pos, 1);
1349
1478
pos = rb_reg_search(sub, str, pos, 1);
1479
pos = rb_str_sublen(str, pos);
1351
1481
if (pos >= 0) return LONG2NUM(pos);
1487
tmp = rb_check_string_type(sub);
1489
rb_raise(rb_eTypeError, "type mismatch: %s given",
1490
rb_obj_classname(sub));
1496
pos = str_sublen(str, pos, enc);
1355
1497
pos = rb_str_rindex(str, sub, pos);
1356
1498
if (pos >= 0) return LONG2NUM(pos);
1361
int c = FIX2INT(sub);
1362
char *p = RSTRING_PTR(str) + pos;
1363
char *pbeg = RSTRING_PTR(str);
1365
if (pos == RSTRING_LEN(str)) {
1366
if (pos == 0) return Qnil;
1370
if ((unsigned char)*p == c)
1371
return LONG2NUM((char*)p - RSTRING_PTR(str));
1378
rb_raise(rb_eTypeError, "type mismatch: %s given",
1379
rb_obj_classname(sub));
1553
rb_str_upto(VALUE beg, VALUE end, int excl)
1679
* str.upto(other_str, exclusive=false) {|s| block } => str
1681
* Iterates through successive values, starting at <i>str</i> and
1682
* ending at <i>other_str</i> inclusive, passing each value in turn to
1683
* the block. The <code>String#succ</code> method is used to generate
1684
* each value. If optional second arguent excle is omitted or is <code>false</code>,
1685
* the last value will be included; otherwise it will be excluded.
1687
* "a8".upto("b6") {|s| print s, ' ' }
1688
* for s in "a8".."b6"
1692
* <em>produces:</em>
1694
* a8 a9 b0 b1 b2 b3 b4 b5 b6
1695
* a8 a9 b0 b1 b2 b3 b4 b5 b6
1699
rb_str_upto(int argc, VALUE *argv, VALUE beg)
1701
VALUE end, exclusive;
1555
1702
VALUE current, after_end;
1556
ID succ = rb_intern("succ");
1706
rb_scan_args(argc, argv, "11", &end, &exclusive);
1707
excl = RTEST(exclusive);
1708
succ = rb_intern("succ");
1559
1709
StringValue(end);
1560
1710
n = rb_str_cmp(beg, end);
1561
1711
if (n > 0 || (excl && n == 0)) return beg;
1581
* str.upto(other_str) {|s| block } => str
1583
* Iterates through successive values, starting at <i>str</i> and
1584
* ending at <i>other_str</i> inclusive, passing each value in turn to
1585
* the block. The <code>String#succ</code> method is used to generate
1588
* "a8".upto("b6") {|s| print s, ' ' }
1589
* for s in "a8".."b6"
1593
* <em>produces:</em>
1595
* a8 a9 b0 b1 b2 b3 b4 b5 b6
1596
* a8 a9 b0 b1 b2 b3 b4 b5 b6
1600
rb_str_upto_m(VALUE beg, VALUE end)
1602
return rb_str_upto(beg, end, Qfalse);
1606
1729
rb_str_subpat(VALUE str, VALUE re, int nth)
1727
rb_str_splice(VALUE str, long beg, long len, VALUE val)
1846
rb_str_splice_0(VALUE str, long beg, long len, VALUE val)
1729
if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len);
1732
1848
rb_str_modify(str);
1734
if (RSTRING_LEN(str) < beg) {
1736
rb_raise(rb_eIndexError, "index %ld out of string", beg);
1739
if (-beg > RSTRING_LEN(str)) {
1742
beg += RSTRING_LEN(str);
1744
if (RSTRING_LEN(str) < beg + len) {
1745
len = RSTRING_LEN(str) - beg;
1748
1849
if (len < RSTRING_LEN(val)) {
1749
1850
/* expand string */
1750
1851
RESIZE_CAPA(str, RSTRING_LEN(str) + RSTRING_LEN(val) - len + 1);
1768
1869
OBJ_INFECT(str, val);
1873
rb_str_splice(VALUE str, long beg, long len, VALUE val)
1879
if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len);
1883
enc = rb_enc_check(str, val);
1884
slen = str_strlen(str, enc);
1888
rb_raise(rb_eIndexError, "index %ld out of string", beg);
1896
if (slen < beg + len) {
1899
p = str_nth(RSTRING_PTR(str), RSTRING_END(str), beg, enc);
1900
e = str_nth(p, RSTRING_END(str), len, enc);
1902
beg = p - RSTRING_PTR(str); /* physical position */
1903
len = e - p; /* physical length */
1904
rb_str_splice_0(str, beg, len, val);
1772
1908
rb_str_update(VALUE str, long beg, long len, VALUE val)
1835
1963
rb_raise(rb_eIndexError, "string not matched");
1837
rb_str_splice(str, beg, RSTRING_LEN(indx), val);
1965
beg = rb_str_sublen(str, beg);
1966
rb_str_splice(str, beg, str_strlen(indx, 0), val);
1841
1970
/* check if indx is Range */
1844
if (rb_range_beg_len(indx, &beg, &len, RSTRING_LEN(str), 2)) {
1973
if (rb_range_beg_len(indx, &beg, &len, str_strlen(str, 0), 2)) {
1845
1974
rb_str_splice(str, beg, len, val);
2413
2519
rb_str_reverse(VALUE str)
2416
2523
char *s, *e, *p;
2418
2525
if (RSTRING_LEN(str) <= 1) return rb_str_dup(str);
2526
enc = rb_enc_get(str);
2420
2527
obj = rb_str_new5(str, 0, RSTRING_LEN(str));
2421
s = RSTRING_PTR(str); e = s + RSTRING_LEN(str) - 1;
2422
p = RSTRING_PTR(obj);
2528
s = RSTRING_PTR(str); e = RSTRING_END(str);
2529
p = RSTRING_END(obj);
2531
if (RSTRING_LEN(str) > 1) {
2532
if (rb_enc_mbmaxlen(enc) == 1) {
2539
int clen = rb_enc_mbclen(s, enc);
2542
rb_raise(rb_eArgError, "invalid mbstring sequence");
2550
STR_SET_LEN(obj, RSTRING_LEN(str));
2427
2551
OBJ_INFECT(obj, str);
2552
rb_enc_associate(obj, enc);
2543
#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
2696
str_cat_char(VALUE str, int c, rb_encoding *enc)
2699
int n = rb_enc_codelen(c, enc);
2701
rb_enc_mbcput(c, s, enc);
2702
rb_str_buf_cat(str, s, n);
2706
prefix_escape(VALUE str, int c, rb_encoding *enc)
2708
str_cat_char(str, '\\', enc);
2709
str_cat_char(str, c, enc);
2558
2725
rb_str_inspect(VALUE str)
2727
rb_encoding *enc = rb_enc_get(str);
2560
2728
char *p, *pend;
2561
VALUE result = rb_str_buf_new2("\"");
2729
VALUE result = rb_str_buf_new2("");
2564
p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
2731
str_cat_char(result, '"', enc);
2732
p = RSTRING_PTR(str); pend = RSTRING_END(str);
2565
2733
while (p < pend) {
2567
if (ismbchar(c) && p < pend) {
2568
int len = mbclen(c);
2569
rb_str_buf_cat(result, p - 1, len);
2572
else if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p, pend))) {
2573
s[0] = '\\'; s[1] = c;
2574
rb_str_buf_cat(result, s, 2);
2576
else if (ISPRINT(c)) {
2578
rb_str_buf_cat(result, s, 1);
2734
int c = rb_enc_codepoint(p, pend, enc);
2735
int n = rb_enc_codelen(c, enc);
2739
if (c == '"'|| c == '\\' ||
2740
(c == '#' && (cc = rb_enc_codepoint(p,pend,enc),
2741
(cc == '$' || cc == '@' || cc == '{')))) {
2742
prefix_escape(result, c, enc);
2580
2744
else if (c == '\n') {
2581
s[0] = '\\'; s[1] = 'n';
2582
rb_str_buf_cat(result, s, 2);
2745
prefix_escape(result, 'n', enc);
2584
2747
else if (c == '\r') {
2585
s[0] = '\\'; s[1] = 'r';
2586
rb_str_buf_cat(result, s, 2);
2748
prefix_escape(result, 'r', enc);
2588
2750
else if (c == '\t') {
2589
s[0] = '\\'; s[1] = 't';
2590
rb_str_buf_cat(result, s, 2);
2751
prefix_escape(result, 't', enc);
2592
2753
else if (c == '\f') {
2593
s[0] = '\\'; s[1] = 'f';
2594
rb_str_buf_cat(result, s, 2);
2754
prefix_escape(result, 'f', enc);
2596
2756
else if (c == '\013') {
2597
s[0] = '\\'; s[1] = 'v';
2598
rb_str_buf_cat(result, s, 2);
2757
prefix_escape(result, 'v', enc);
2600
2759
else if (c == '\010') {
2601
s[0] = '\\'; s[1] = 'b';
2602
rb_str_buf_cat(result, s, 2);
2760
prefix_escape(result, 'b', enc);
2604
2762
else if (c == '\007') {
2605
s[0] = '\\'; s[1] = 'a';
2606
rb_str_buf_cat(result, s, 2);
2763
prefix_escape(result, 'a', enc);
2608
2765
else if (c == 033) {
2609
s[0] = '\\'; s[1] = 'e';
2610
rb_str_buf_cat(result, s, 2);
2766
prefix_escape(result, 'e', enc);
2768
else if (rb_enc_isprint(c, enc)) {
2771
rb_enc_mbcput(c, buf, enc);
2772
rb_str_buf_cat(result, buf, n);
2613
sprintf(s, "\\%03o", c & 0377);
2614
rb_str_buf_cat2(result, s);
2778
sprintf(buf, "\\%03o", c & 0377);
2780
str_cat_char(result, *s++, enc);
2617
rb_str_buf_cat2(result, "\"");
2784
str_cat_char(result, '"', enc);
2619
2786
OBJ_INFECT(result, str);
2787
rb_enc_associate(result, enc);
2791
#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
2854
3030
rb_str_capitalize_bang(VALUE str)
2856
3033
char *s, *send;
2857
3034
int modify = 0;
2859
3037
rb_str_modify(str);
3038
enc = rb_enc_get(str);
2860
3039
if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
2861
s = RSTRING_PTR(str); send = s + RSTRING_LEN(str);
3040
s = RSTRING_PTR(str); send = RSTRING_END(str);
3042
c = rb_enc_codepoint(s, send, enc);
3043
if (rb_enc_islower(c, enc)) {
3044
rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
2866
while (++s < send) {
2870
else if (ISUPPER(*s)) {
3047
s += rb_enc_codelen(c, enc);
3049
c = rb_enc_codepoint(s, send, enc);
3050
if (rb_enc_isupper(c, enc)) {
3051
rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
3054
s += rb_enc_codelen(c, enc);
2875
3056
if (modify) return str;
2912
3093
rb_str_swapcase_bang(VALUE str)
2914
3096
char *s, *send;
2915
3097
int modify = 0;
2917
3099
rb_str_modify(str);
2918
s = RSTRING_PTR(str); send = s + RSTRING_LEN(str);
3100
enc = rb_enc_get(str);
3101
s = RSTRING_PTR(str); send = RSTRING_END(str);
2919
3102
while (s < send) {
2923
else if (ISUPPER(*s)) {
2927
else if (ISLOWER(*s)) {
3103
int c = rb_enc_codepoint(s, send, enc);
3105
if (rb_enc_isupper(c, enc)) {
3106
/* assuming toupper returns codepoint with same size */
3107
rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
3110
else if (rb_enc_islower(c, enc)) {
3111
/* assuming toupper returns codepoint with same size */
3112
rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
3115
s += rb_enc_codelen(c, enc);
2934
3118
if (modify) return str;
2967
trnext(struct tr *t)
3151
trnext(struct tr *t, rb_encoding *enc)
2971
3155
if (t->p == t->pend) return -1;
2972
if (t->p < t->pend - 1 && *t->p == '\\') {
2975
t->now = *(USTR)t->p++;
3156
t->now = rb_enc_codepoint(t->p, t->pend, enc);
3157
t->p += rb_enc_codelen(t->now, enc);
2976
3158
if (t->p < t->pend - 1 && *t->p == '-') {
2978
3160
if (t->p < t->pend) {
2979
if (t->now > *(USTR)t->p) {
3161
int c = rb_enc_codepoint(t->p, t->pend, enc);
3162
t->p += rb_enc_codelen(c, enc);
3163
if (t->now > c) continue;
2984
t->max = *(USTR)t->p++;
3018
3200
if (RSTRING_LEN(repl) == 0) {
3019
3201
return rb_str_delete_bang(1, &src, str);
3203
enc = rb_enc_check(str, src);
3204
if (rb_enc_check(str, repl) != enc) {
3205
rb_raise(rb_eArgError, "character encodings differ");
3021
3207
trrepl.p = RSTRING_PTR(repl);
3022
3208
trrepl.pend = trrepl.p + RSTRING_LEN(repl);
3023
3209
trsrc.gen = trrepl.gen = 0;
3024
3210
trsrc.now = trrepl.now = 0;
3025
3211
trsrc.max = trrepl.max = 0;
3212
hash = rb_hash_new();
3028
for (i=0; i<256; i++) {
3031
while ((c = trnext(&trsrc)) >= 0) {
3032
trans[c & 0xff] = -1;
3034
while ((c = trnext(&trrepl)) >= 0)
3215
while ((c = trnext(&trsrc, enc)) >= 0) {
3216
rb_hash_aset(hash, INT2NUM(c), Qtrue);
3218
while ((c = trnext(&trrepl, enc)) >= 0)
3035
3219
/* retrieve last replacer */;
3036
for (i=0; i<256; i++) {
3037
if (trans[i] >= 0) {
3038
trans[i] = trrepl.now;
3045
for (i=0; i<256; i++) {
3048
while ((c = trnext(&trsrc)) >= 0) {
3049
r = trnext(&trrepl);
3225
while ((c = trnext(&trsrc, enc)) >= 0) {
3226
r = trnext(&trrepl, enc);
3050
3227
if (r == -1) r = trrepl.now;
3051
trans[c & 0xff] = r;
3228
rb_hash_aset(hash, INT2NUM(c), INT2NUM(r));
3055
3232
rb_str_modify(str);
3056
s = RSTRING_PTR(str); send = s + RSTRING_LEN(str);
3233
s = RSTRING_PTR(str); send = RSTRING_END(str);
3235
int clen, tlen, max = RSTRING_LEN(str);
3236
int offset, save = -1;
3237
char *buf = ALLOC_N(char, max), *t = buf;
3240
if (cflag) tlen = rb_enc_codelen(last, enc);
3061
3241
while (s < send) {
3063
if ((c = trans[c0 & 0xff]) >= 0) {
3064
if (last == c) continue;
3242
c = rb_enc_codepoint(s, send, enc);
3243
tlen = clen = rb_enc_codelen(c, enc);
3246
v = rb_hash_aref(hash, INT2NUM(c));
3250
if (save == c) continue;
3252
tlen = rb_enc_codelen(c, enc);
3074
if (RSTRING_LEN(str) > (t - RSTRING_PTR(str))) {
3075
STR_SET_LEN(str, (t - RSTRING_PTR(str)));
3263
while (t - buf + tlen >= max) {
3266
REALLOC_N(buf, char, max);
3269
rb_enc_mbcput(c, t, enc);
3273
RSTRING(str)->as.heap.ptr = buf;
3274
RSTRING(str)->as.heap.len = t - buf;
3275
STR_SET_NOEMBED(str);
3276
RSTRING(str)->as.heap.aux.capa = max;
3278
else if (rb_enc_mbmaxlen(enc) == 1) {
3081
3279
while (s < send) {
3082
if ((c = trans[*s & 0xff]) >= 0) {
3280
VALUE v = rb_hash_aref(hash, INT2FIX(*s));
3296
int clen, tlen, max = RSTRING_LEN(str) * 1.2;
3298
char *buf = ALLOC_N(char, max), *t = buf;
3301
if (cflag) tlen = rb_enc_codelen(last, enc);
3303
c = rb_enc_codepoint(s, send, enc);
3304
tlen = clen = rb_enc_codelen(c, enc);
3306
v = rb_hash_aref(hash, INT2NUM(c));
3310
tlen = rb_enc_codelen(c, enc);
3318
while (t - buf + tlen >= max) {
3321
REALLOC_N(buf, char, max);
3324
if (s != t) rb_enc_mbcput(c, t, enc);
3328
if (!STR_EMBED_P(str)) {
3329
free(RSTRING(str)->as.heap.ptr);
3332
RSTRING(str)->as.heap.ptr = buf;
3333
RSTRING(str)->as.heap.len = t - buf;
3334
STR_SET_NOEMBED(str);
3335
RSTRING(str)->as.heap.aux.capa = max;
3090
3338
if (modify) return str;
3137
tr_setup_table(VALUE str, char table[256], int init)
3385
tr_setup_table(VALUE str, VALUE *tablep, VALUE *ctablep, rb_encoding *enc)
3389
VALUE table, ptable;
3144
3391
tr.p = RSTRING_PTR(str); tr.pend = tr.p + RSTRING_LEN(str);
3145
3392
tr.gen = tr.now = tr.max = 0;
3393
table = rb_hash_new();
3146
3394
if (RSTRING_LEN(str) > 1 && RSTRING_PTR(str)[0] == '^') {
3152
for (i=0; i<256; i++) {
3404
while ((c = trnext(&tr, enc)) >= 0) {
3405
VALUE key = INT2NUM(c);
3407
if (!ptable || !NIL_P(rb_hash_aref(ptable, key))) {
3408
rb_hash_aset(table, key, Qtrue);
3156
for (i=0; i<256; i++) {
3159
while ((c = trnext(&tr)) >= 0) {
3160
buf[c & 0xff] = !cflag;
3162
for (i=0; i<256; i++) {
3163
table[i] = table[i] && buf[i];
3189
3435
VALUE s = argv[i];
3191
3437
StringValue(s);
3192
tr_setup_table(s, squeez, init);
3438
enc = rb_enc_check(str, s);
3439
tr_setup_table(s, &del, &nodel, enc);
3196
3442
rb_str_modify(str);
3197
3443
s = t = RSTRING_PTR(str);
3198
3444
if (!s || RSTRING_LEN(str) == 0) return Qnil;
3199
send = s + RSTRING_LEN(str);
3445
send = RSTRING_END(str);
3200
3446
while (s < send) {
3201
if (squeez[*s & 0xff])
3447
int c = rb_enc_codepoint(s, send, enc);
3448
int clen = rb_enc_codelen(c, enc);
3449
VALUE v = INT2NUM(c);
3451
if ((del && !NIL_P(rb_hash_aref(del, v))) &&
3452
(!nodel || NIL_P(rb_hash_aref(nodel, v)))) {
3456
if (t != s) rb_enc_mbcput(c, t, enc);
3208
3462
STR_SET_LEN(str, t - RSTRING_PTR(str));
3247
3501
rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
3503
rb_encoding *enc = 0;
3504
VALUE del = 0, nodel = 0;
3250
3505
char *s, *send, *t;
3251
int c, save, modify = 0;
3506
int save, modify = 0;
3255
3509
if (argc == 0) {
3256
for (i=0; i<256; i++) {
3510
enc = rb_enc_get(str);
3261
3513
for (i=0; i<argc; i++) {
3262
3514
VALUE s = argv[i];
3264
3516
StringValue(s);
3265
tr_setup_table(s, squeez, init);
3517
enc = rb_enc_check(str, s);
3518
tr_setup_table(s, &del, &nodel, enc);
3270
3522
rb_str_modify(str);
3271
3523
s = t = RSTRING_PTR(str);
3272
3524
if (!s || RSTRING_LEN(str) == 0) return Qnil;
3273
send = s + RSTRING_LEN(str);
3525
send = RSTRING_END(str);
3275
3527
while (s < send) {
3277
if (c != save || !squeez[c]) {
3528
int c = rb_enc_codepoint(s, send, enc);
3529
int clen = rb_enc_codelen(c, enc);
3530
VALUE v = INT2NUM(c);
3533
((del && NIL_P(rb_hash_aref(del, v))) &&
3534
(!nodel || NIL_P(rb_hash_aref(nodel, v))))) {
3535
if (t != s) rb_enc_mbcput(c, t, enc);
3282
3542
if (t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
3381
3641
VALUE s = argv[i];
3383
3643
StringValue(s);
3384
tr_setup_table(s, table, init);
3644
enc = rb_enc_check(str, s);
3645
tr_setup_table(s, &del, &nodel, enc);
3388
3648
s = RSTRING_PTR(str);
3389
3649
if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
3390
send = s + RSTRING_LEN(str);
3650
send = RSTRING_END(str);
3392
3652
while (s < send) {
3393
if (table[*s++ & 0xff]) {
3653
int c = rb_enc_codepoint(s, send, enc);
3654
int clen = rb_enc_codelen(c, enc);
3655
VALUE v = INT2NUM(c);
3657
if ((del && !NIL_P(rb_hash_aref(del, v))) &&
3658
(!nodel || NIL_P(rb_hash_aref(nodel, v)))) {
3397
3663
return INT2NUM(i);
3531
3803
else if (last_null == 1) {
3532
rb_ary_push(result, rb_str_substr(str, beg, mbclen2(RSTRING_PTR(str)[beg],spat)));
3804
rb_ary_push(result, rb_str_subseq(str, beg,
3805
rb_enc_mbclen(RSTRING_PTR(str)+beg,enc)));
3536
start += mbclen2(RSTRING_PTR(str)[start],spat);
3809
start += rb_enc_mbclen(RSTRING_PTR(str)+start,enc);
3542
rb_ary_push(result, rb_str_substr(str, beg, end-beg));
3815
rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
3543
3816
beg = start = END(0);
3651
3924
StringValue(rs);
3925
enc = rb_enc_check(str, rs);
3652
3926
rslen = RSTRING_LEN(rs);
3653
3927
if (rslen == 0) {
3654
3928
newline = '\n';
3657
newline = RSTRING_PTR(rs)[rslen-1];
3931
newline = rb_enc_codepoint(RSTRING_PTR(rs), RSTRING_END(rs), enc);
3660
for (s = p, p += rslen; p < pend; p++) {
3661
if (rslen == 0 && *p == '\n') {
3662
if (*++p != '\n') continue;
3663
while (*p == '\n') p++;
3935
int c = rb_enc_codepoint(p, pend, enc);
3936
int n = rb_enc_codelen(c, enc);
3938
if (rslen == 0 && c == newline) {
3939
while (rb_enc_codepoint(p, pend, enc) == newline) {
3665
if (RSTRING_PTR(str) < p && p[-1] == newline &&
3667
rb_memcmp(RSTRING_PTR(rs), p-rslen, rslen) == 0)) {
3668
line = rb_str_new5(str, s, p - s);
3945
(rslen <= 1 || rb_memcmp(RSTRING_PTR(rs), p, rslen) == 0)) {
3946
line = rb_str_new5(str, s, p - s + (rslen ? rslen : n));
3669
3947
OBJ_INFECT(line, str);
3670
3948
rb_yield(line);
3671
3949
str_mod_check(str, ptr, len);
3676
3955
if (s != pend) {
4006
* Document-method: chars
4008
* str.chars => anEnumerator
4009
* str.chars {|substr| block } => str
4011
* Returns an enumerator that gives each character in the string.
4012
* If a block is given, it iterates over each character in the string.
4014
* "foo".chars.to_a #=> ["f","o","o"]
4018
* Document-method: each_char
4020
* str.each_char {|cstr| block } => str
4022
* Passes each character in <i>str</i> to the given block.
4024
* "hello".each_char {|c| print c, ' ' }
4026
* <em>produces:</em>
4032
rb_str_each_char(VALUE str)
4034
int i, len = str_strlen(str, 0);
4036
RETURN_ENUMERATOR(str, 0, 0);
4037
for (i=0; i<len; i++) {
4038
rb_yield(rb_str_substr(str, i, 1));
3728
4045
* str.chop! => str or nil
3898
4215
rb_str_lstrip_bang(VALUE str)
3900
4218
char *s, *t, *e;
4221
enc = rb_enc_get(str);
3902
4222
s = RSTRING_PTR(str);
3903
4223
if (!s || RSTRING_LEN(str) == 0) return Qnil;
3904
e = t = s + RSTRING_LEN(str);
4224
e = t = RSTRING_END(str);
3905
4225
/* remove spaces at head */
3906
while (s < t && ISSPACE(*s)) s++;
4227
int cc = rb_enc_codepoint(s, e, enc);
4229
if (!rb_enc_isspace(cc, enc)) break;
4230
s += rb_enc_codelen(cc, enc);
3908
4233
if (s > RSTRING_PTR(str)) {
3909
4234
rb_str_modify(str);
3952
4277
rb_str_rstrip_bang(VALUE str)
3954
4280
char *s, *t, *e;
4281
int space_seen = Qfalse;
4284
enc = rb_enc_get(str);
3956
4285
s = RSTRING_PTR(str);
3957
4286
if (!s || RSTRING_LEN(str) == 0) return Qnil;
3958
e = t = s + RSTRING_LEN(str);
3960
/* remove trailing '\0's */
3961
while (s < t && t[-1] == '\0') t--;
3963
/* remove trailing spaces */
3964
while (s < t && ISSPACE(*(t-1))) t--;
4287
t = e = RSTRING_END(str);
4289
int cc = rb_enc_codepoint(s, e, enc);
4291
if (!cc || rb_enc_isspace(cc, enc)) {
4292
if (!space_seen) t = s;
4296
space_seen = Qfalse;
4298
s += rb_enc_codelen(cc, enc);
3967
4301
rb_str_modify(str);
3968
STR_SET_LEN(str, t-s);
4302
STR_SET_LEN(str, t-RSTRING_PTR(str));
3969
4303
RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
4315
4651
rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
4318
long width, flen = 0;
4655
long width, len, flen = 1, fclen = 1;
4320
char *p, *pend, *f = " ";
4324
4661
rb_scan_args(argc, argv, "11", &w, &pad);
4662
enc = rb_enc_get(str);
4325
4663
width = NUM2LONG(w);
4326
4664
if (argc == 2) {
4327
4665
StringValue(pad);
4666
rb_enc_check(str, pad);
4328
4667
f = RSTRING_PTR(pad);
4329
4668
flen = RSTRING_LEN(pad);
4669
fclen = str_strlen(pad, enc);
4330
4670
if (flen == 0) {
4331
4671
rb_raise(rb_eArgError, "zero width padding");
4334
if (width < 0 || RSTRING_LEN(str) >= width) return rb_str_dup(str);
4335
res = rb_str_new5(str, 0, width);
4675
else if (!m17n_asciicompat(enc)) {
4676
rb_raise(rb_eArgError, "character encodings differ");
4679
len = str_strlen(str, enc);
4680
if (width < 0 || len >= width) return rb_str_dup(str);
4682
llen = (jflag == 'l') ? 0 : ((jflag == 'r') ? n : n/2);
4684
res = rb_str_new5(str, 0, RSTRING_LEN(str)+n*flen/fclen+2);
4336
4685
p = RSTRING_PTR(res);
4338
n = width - RSTRING_LEN(str);
4339
pend = p + ((jflag == 'r') ? n : n/2);
4347
while (p + flen <= pend) {
4356
memcpy(p, RSTRING_PTR(str), RSTRING_LEN(str)+1);
4358
p += RSTRING_LEN(str); pend = RSTRING_PTR(res) + width;
4365
while (p + flen <= pend) {
4691
else if (llen > fclen) {
4697
char *fp = str_nth(f, f+flen, llen, enc);
4704
memcpy(p, RSTRING_PTR(str), RSTRING_LEN(str));
4705
p+=RSTRING_LEN(str);
4711
else if (rlen > fclen) {
4717
char *fp = str_nth(f, f+flen, rlen, enc);
4725
STR_SET_LEN(res, p-RSTRING_PTR(res));
4374
4726
OBJ_INFECT(res, str);
4375
if (flen > 0) OBJ_INFECT(res, pad);
4727
if (!NIL_P(pad)) OBJ_INFECT(res, pad);
4889
5242
rb_cString = rb_define_class("String", rb_cObject);
4890
5243
rb_include_module(rb_cString, rb_mComparable);
4891
5244
rb_define_alloc_func(rb_cString, str_alloc);
5245
rb_define_singleton_method(rb_cString, "try_convert", rb_str_s_try_convert, 1);
4892
5246
rb_define_method(rb_cString, "initialize", rb_str_init, -1);
4893
5247
rb_define_method(rb_cString, "initialize_copy", rb_str_replace, 1);
4894
5248
rb_define_method(rb_cString, "<=>", rb_str_cmp_m, 1);
4904
5258
rb_define_method(rb_cString, "insert", rb_str_insert, 2);
4905
5259
rb_define_method(rb_cString, "length", rb_str_length, 0);
4906
5260
rb_define_method(rb_cString, "size", rb_str_length, 0);
5261
rb_define_method(rb_cString, "bytesize", rb_str_bytesize, 0);
4907
5262
rb_define_method(rb_cString, "empty?", rb_str_empty, 0);
4908
5263
rb_define_method(rb_cString, "=~", rb_str_match, 1);
4909
5264
rb_define_method(rb_cString, "match", rb_str_match_m, -1);
4911
5266
rb_define_method(rb_cString, "succ!", rb_str_succ_bang, 0);
4912
5267
rb_define_method(rb_cString, "next", rb_str_succ, 0);
4913
5268
rb_define_method(rb_cString, "next!", rb_str_succ_bang, 0);
4914
rb_define_method(rb_cString, "upto", rb_str_upto_m, 1);
5269
rb_define_method(rb_cString, "upto", rb_str_upto, -1);
4915
5270
rb_define_method(rb_cString, "index", rb_str_index_m, -1);
4916
5271
rb_define_method(rb_cString, "rindex", rb_str_rindex_m, -1);
4917
5272
rb_define_method(rb_cString, "replace", rb_str_replace, 1);
5008
5366
rb_undef_alloc_func(rb_cSymbol);
5009
5367
rb_undef_method(CLASS_OF(rb_cSymbol), "new");
5010
5368
rb_define_singleton_method(rb_cSymbol, "all_symbols", rb_sym_all_symbols, 0); /* in parse.y */
5011
rb_define_singleton_method(rb_cSymbol, "intern", rb_sym_s_intern, 1);
5013
5370
rb_define_method(rb_cSymbol, "==", sym_equal, 1);
5014
5371
rb_define_method(rb_cSymbol, "to_i", sym_to_i, 0);
5020
5377
rb_define_method(rb_cSymbol, "to_proc", sym_to_proc, 0);
5021
5378
rb_define_method(rb_cSymbol, "succ", sym_succ, 0);
5022
5379
rb_define_method(rb_cSymbol, "next", sym_succ, 0);
5023
rb_define_method(rb_cSymbol, "dump", rb_str_dump, 0);
5025
5381
rb_define_method(rb_cSymbol, "<=>", sym_cmp, 1);
5026
5382
rb_define_method(rb_cSymbol, "casecmp", sym_casecmp, 1);