1
/* $Id: chars.c,v 1.121 2006/11/10 02:49:07 dolorous Exp $ */
1
/* $Id: chars.c,v 1.122.2.13 2007/10/11 05:01:29 dolorous Exp $ */
2
2
/**************************************************************************
5
* Copyright (C) 2001, 2002, 2003, 2004 Chris Allegretta *
6
* Copyright (C) 2005, 2006 David Lawrence Ramsey *
5
* Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007 *
6
* Free Software Foundation, Inc. *
7
7
* This program is free software; you can redistribute it and/or modify *
8
8
* it under the terms of the GNU General Public License as published by *
9
* the Free Software Foundation; either version 2, or (at your option) *
9
* the Free Software Foundation; either version 3, or (at your option) *
10
10
* any later version. *
12
12
* This program is distributed in the hope that it will be useful, but *
38
38
/* Whether we've enabled UTF-8 support. */
39
39
static const wchar_t bad_wchar = 0xFFFD;
40
40
/* If we get an invalid multibyte sequence, we treat it as
41
* Unicode FFFD (Replacement Character), unless we're
42
* determining if it's a control character or searching for a
41
* Unicode FFFD (Replacement Character), unless we're searching
42
* for a match to it. */
44
43
static const char *const bad_mbchar = "\xEF\xBF\xBD";
45
44
static const int bad_mbchar_len = 3;
507
506
/* This function is equivalent to strncasecmp(). */
508
507
int nstrncasecmp(const char *s1, const char *s2, size_t n)
510
512
assert(s1 != NULL && s2 != NULL);
512
for (; n > 0 && *s1 != '\0' && *s2 != '\0'; n--, s1++, s2++) {
514
for (; *s1 != '\0' && *s2 != '\0' && n > 0; s1++, s2++, n--) {
513
515
if (tolower(*s1) != tolower(*s2))
527
529
char *s1_mb, *s2_mb;
528
530
wchar_t ws1, ws2;
530
535
assert(s1 != NULL && s2 != NULL);
532
537
s1_mb = charalloc(MB_CUR_MAX);
533
538
s2_mb = charalloc(MB_CUR_MAX);
535
while (n > 0 && *s1 != '\0' && *s2 != '\0') {
540
for (; *s1 != '\0' && *s2 != '\0' && n > 0; s1 +=
541
move_mbright(s1, 0), s2 += move_mbright(s2, 0), n--) {
536
542
bool bad_s1_mb = FALSE, bad_s2_mb = FALSE;
537
543
int s1_mb_len, s2_mb_len;
552
558
bad_s2_mb = TRUE;
555
if (n == 0 || bad_s1_mb != bad_s2_mb ||
556
towlower(ws1) != towlower(ws2))
561
if (bad_s1_mb != bad_s2_mb || towlower(ws1) !=
567
return towlower(ws1) - towlower(ws2);
569
return (n > 0) ? towlower(ws1) - towlower(ws2) : 0;
570
572
return strncasecmp(s1, s2, n);
573
575
#ifndef HAVE_STRCASESTR
574
/* This function, nstrcasestr() (originally mutt_stristr()), was adapted
575
* from mutt 1.2.4i (lib.c). Here is the notice from that file, with
576
* the Free Software Foundation's address updated:
578
* Copyright (C) 1996, 1997, 1998, 1999, 2000 Michael R. Elkins
580
* Copyright (C) 1999, 2000 Thomas Roessler <roessler@guug.de>
582
* This program is free software; you can redistribute it
583
* and/or modify it under the terms of the GNU General Public
584
* License as published by the Free Software Foundation; either
585
* version 2 of the License, or (at your option) any later
588
* This program is distributed in the hope that it will be
589
* useful, but WITHOUT ANY WARRANTY; without even the implied
590
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
591
* PURPOSE. See the GNU General Public License for more
594
* You should have received a copy of the GNU General Public
595
* License along with this program; if not, write to the Free
596
* Software Foundation, Inc., 51 Franklin St, Fifth Floor,
597
* Boston, MA 02110-1301, USA. */
599
576
/* This function is equivalent to strcasestr(). */
600
const char *nstrcasestr(const char *haystack, const char *needle)
577
char *nstrcasestr(const char *haystack, const char *needle)
579
size_t haystack_len, needle_len;
602
581
assert(haystack != NULL && needle != NULL);
604
for (; *haystack != '\0'; haystack++) {
605
const char *r = haystack, *q = needle;
607
for (; tolower(*r) == tolower(*q) && *q != '\0'; r++, q++)
584
return (char *)haystack;
586
haystack_len = strlen(haystack);
587
needle_len = strlen(needle);
589
for (; *haystack != '\0' && haystack_len >= needle_len; haystack++,
591
if (strncasecmp(haystack, needle, needle_len) == 0)
592
return (char *)haystack;
618
599
/* This function is equivalent to strcasestr() for multibyte strings. */
619
const char *mbstrcasestr(const char *haystack, const char *needle)
600
char *mbstrcasestr(const char *haystack, const char *needle)
621
602
#ifdef ENABLE_UTF8
625
bool found_needle = FALSE;
604
size_t haystack_len, needle_len;
627
606
assert(haystack != NULL && needle != NULL);
629
r_mb = charalloc(MB_CUR_MAX);
630
q_mb = charalloc(MB_CUR_MAX);
632
while (*haystack != '\0') {
633
const char *r = haystack, *q = needle;
634
int r_mb_len, q_mb_len;
637
bool bad_r_mb = FALSE, bad_q_mb = FALSE;
639
r_mb_len = parse_mbchar(r, r_mb, NULL);
641
if (mbtowc(&wr, r_mb, r_mb_len) < 0) {
642
mbtowc(NULL, NULL, 0);
643
wr = (unsigned char)*r;
647
q_mb_len = parse_mbchar(q, q_mb, NULL);
649
if (mbtowc(&wq, q_mb, q_mb_len) < 0) {
650
mbtowc(NULL, NULL, 0);
651
wq = (unsigned char)*q;
655
if (bad_r_mb != bad_q_mb ||
656
towlower(wr) != towlower(wq))
668
haystack += move_mbright(haystack, 0);
609
return (char *)haystack;
611
haystack_len = mbstrlen(haystack);
612
needle_len = mbstrlen(needle);
614
for (; *haystack != '\0' && haystack_len >= needle_len;
615
haystack += move_mbright(haystack, 0), haystack_len--) {
616
if (mbstrncasecmp(haystack, needle, needle_len) == 0)
617
return (char *)haystack;
674
return found_needle ? haystack : NULL;
677
623
return strcasestr(haystack, needle);
680
626
#if !defined(NANO_TINY) || !defined(DISABLE_TABCOMP)
681
627
/* This function is equivalent to strstr(), except in that it scans the
682
628
* string in reverse, starting at rev_start. */
683
const char *revstrstr(const char *haystack, const char *needle, const
629
char *revstrstr(const char *haystack, const char *needle, const char
632
size_t rev_start_len, needle_len;
686
634
assert(haystack != NULL && needle != NULL && rev_start != NULL);
688
for (; rev_start >= haystack; rev_start--) {
691
for (r = rev_start, q = needle; *r == *q && *q != '\0'; r++, q++)
637
return (char *)rev_start;
639
needle_len = strlen(needle);
641
if (strlen(haystack) < needle_len)
644
rev_start_len = strlen(rev_start);
646
for (; rev_start >= haystack; rev_start--, rev_start_len++) {
647
if (rev_start_len >= needle_len && strncmp(rev_start, needle,
649
return (char *)rev_start;
702
656
#ifndef NANO_TINY
703
657
/* This function is equivalent to strcasestr(), except in that it scans
704
658
* the string in reverse, starting at rev_start. */
705
const char *revstrcasestr(const char *haystack, const char *needle,
706
const char *rev_start)
659
char *revstrcasestr(const char *haystack, const char *needle, const char
662
size_t rev_start_len, needle_len;
708
664
assert(haystack != NULL && needle != NULL && rev_start != NULL);
710
for (; rev_start >= haystack; rev_start--) {
711
const char *r = rev_start, *q = needle;
713
for (; tolower(*r) == tolower(*q) && *q != '\0'; r++, q++)
667
return (char *)rev_start;
669
needle_len = strlen(needle);
671
if (strlen(haystack) < needle_len)
674
rev_start_len = strlen(rev_start);
676
for (; rev_start >= haystack; rev_start--, rev_start_len++) {
677
if (rev_start_len >= needle_len && strncasecmp(rev_start,
678
needle, needle_len) == 0)
679
return (char *)rev_start;
723
685
/* This function is equivalent to strcasestr() for multibyte strings,
724
686
* except in that it scans the string in reverse, starting at
726
const char *mbrevstrcasestr(const char *haystack, const char *needle,
727
const char *rev_start)
688
char *mbrevstrcasestr(const char *haystack, const char *needle, const
729
691
#ifdef ENABLE_UTF8
733
bool begin_line = FALSE, found_needle = FALSE;
693
bool begin_line = FALSE;
694
size_t rev_start_len, needle_len;
735
696
assert(haystack != NULL && needle != NULL && rev_start != NULL);
737
r_mb = charalloc(MB_CUR_MAX);
738
q_mb = charalloc(MB_CUR_MAX);
699
return (char *)rev_start;
701
needle_len = mbstrlen(needle);
703
if (mbstrlen(haystack) < needle_len)
706
rev_start_len = mbstrlen(rev_start);
740
708
while (!begin_line) {
741
const char *r = rev_start, *q = needle;
742
int r_mb_len, q_mb_len;
745
bool bad_r_mb = FALSE, bad_q_mb = FALSE;
747
r_mb_len = parse_mbchar(r, r_mb, NULL);
749
if (mbtowc(&wr, r_mb, r_mb_len) < 0) {
750
mbtowc(NULL, NULL, 0);
751
wr = (unsigned char)*r;
755
q_mb_len = parse_mbchar(q, q_mb, NULL);
757
if (mbtowc(&wq, q_mb, q_mb_len) < 0) {
758
mbtowc(NULL, NULL, 0);
759
wq = (unsigned char)*q;
763
if (bad_r_mb != bad_q_mb ||
764
towlower(wr) != towlower(wq))
709
if (rev_start_len >= needle_len && mbstrncasecmp(rev_start,
710
needle, needle_len) == 0)
711
return (char *)rev_start;
776
713
if (rev_start == haystack)
777
714
begin_line = TRUE;
779
716
rev_start = haystack + move_mbleft(haystack, rev_start -
786
return found_needle ? rev_start : NULL;
789
725
return revstrcasestr(haystack, needle, rev_start);
981
907
#ifdef ENABLE_UTF8
983
910
char *chr_mb = charalloc(MB_CUR_MAX);
989
chr_mb_len = parse_mbchar(s, chr_mb, NULL);
912
for (; *s != '\0'; s += move_mbright(s, 0)) {
913
parse_mbchar(s, chr_mb, NULL);
991
915
if (is_blank_mbchar(chr_mb)) {