1328
1329
if (expand < 0) {
1329
1330
rb_raise(rb_eArgError, "negative expanding string size");
1331
if (!str_independent(str) ||
1333
(!STR_EMBED_P(str) ||
1334
RSTRING_LEN(str) + expand > RSTRING_EMBED_LEN_MAX))) {
1332
if (!str_independent(str)) {
1335
1333
str_make_independent_expand(str, expand);
1335
else if (expand > 0) {
1336
long len = RSTRING_LEN(str);
1337
long capa = len + expand;
1338
if (!STR_EMBED_P(str)) {
1339
REALLOC_N(RSTRING(str)->as.heap.ptr, char, capa+1);
1340
RSTRING(str)->as.heap.aux.capa = capa;
1342
else if (capa > RSTRING_EMBED_LEN_MAX) {
1343
str_make_independent_expand(str, expand);
1337
1346
ENC_CODERANGE_CLEAR(str);
2074
2083
rb_str_concat(VALUE str1, VALUE str2)
2086
rb_encoding *enc = STR_ENC_GET(str1);
2078
2088
if (FIXNUM_P(str2) || TYPE(str2) == T_BIGNUM) {
2079
if (rb_num_to_uint(str2, &lc) == 0) {
2089
if (rb_num_to_uint(str2, &code) == 0) {
2081
2091
else if (FIXNUM_P(str2)) {
2082
2092
rb_raise(rb_eRangeError, "%ld out of char range", FIX2LONG(str2));
2089
2099
return rb_str_append(str1, str2);
2092
rb_encoding *enc = STR_ENC_GET(str1);
2102
if (enc == rb_usascii_encoding()) {
2103
/* US-ASCII automatically extended to ASCII-8BIT */
2104
char buf[1] = {(char)code};
2106
rb_raise(rb_eRangeError, "%u out of char range", code);
2108
rb_str_cat(str1, buf, 1);
2110
rb_enc_associate(str1, rb_ascii8bit_encoding());
2111
ENC_CODERANGE_SET(str1, ENC_CODERANGE_VALID);
2093
2115
long pos = RSTRING_LEN(str1);
2094
2116
int cr = ENC_CODERANGE(str1);
2097
if ((len = rb_enc_codelen(lc, enc)) <= 0) {
2098
rb_raise(rb_eRangeError, "%u invalid char", lc);
2120
switch (len = rb_enc_codelen(code, enc)) {
2121
case ONIGERR_INVALID_CODE_POINT_VALUE:
2122
rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
2124
case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
2126
rb_raise(rb_eRangeError, "%u out of char range", code);
2129
buf = ALLOCA_N(char, len + 1);
2130
rb_enc_mbcput(code, buf, enc);
2131
if (rb_enc_precise_mbclen(buf, buf + len + 1, enc) != len) {
2132
rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
2100
2134
rb_str_resize(str1, pos+len);
2101
rb_enc_mbcput(lc, RSTRING_PTR(str1)+pos, enc);
2102
if (cr == ENC_CODERANGE_7BIT && lc > 127)
2135
strncpy(RSTRING_PTR(str1) + pos, buf, len);
2136
if (cr == ENC_CODERANGE_7BIT && code > 127)
2103
2137
cr = ENC_CODERANGE_VALID;
2104
2138
ENC_CODERANGE_SET(str1, cr);
3163
3197
* Element Reference---If passed a single <code>Fixnum</code>, returns a
3164
3198
* substring of one character at that position. If passed two <code>Fixnum</code>
3165
3199
* objects, returns a substring starting at the offset given by the first, and
3166
* a length given by the second. If given a range, a substring containing
3167
* characters at offsets given by the range is returned. In all three cases, if
3168
* an offset is negative, it is counted from the end of <i>str</i>. Returns
3169
* <code>nil</code> if the initial offset falls outside the string, the length
3170
* is negative, or the beginning of the range is greater than the end of the
3200
* with a length given by the second. If passed a range, its beginning and end
3201
* are interpreted as offsets delimiting the substring to be returned. In all
3202
* three cases, if an offset is negative, it is counted from the end of <i>str</i>.
3203
* Returns <code>nil</code> if the initial offset falls outside the string or
3204
* the length is negative.
3173
3206
* If a <code>Regexp</code> is supplied, the matching portion of <i>str</i> is
3174
3207
* returned. If a numeric or name parameter follows the regular expression, that
5234
5268
* str.tr(from_str, to_str) => new_str
5236
* Returns a copy of <i>str</i> with the characters in <i>from_str</i>
5237
* replaced by the corresponding characters in <i>to_str</i>. If
5270
* Returns a copy of <i>str</i> with the characters in <i>from_str</i>
5271
* replaced by the corresponding characters in <i>to_str</i>. If
5238
5272
* <i>to_str</i> is shorter than <i>from_str</i>, it is padded with its last
5239
5273
* character in order to maintain the correspondence.
5241
5275
* "hello".tr('el', 'ip') #=> "hippo"
5242
5276
* "hello".tr('aeiou', '*') #=> "h*ll*"
5244
5278
* Both strings may use the c1-c2 notation to denote ranges of characters,
5245
5279
* and <i>from_str</i> may start with a <code>^</code>, which denotes all
5246
5280
* characters except those listed.