21
21
#include "c-strcasestr.h"
23
23
#include <stdbool.h>
25
24
#include <string.h>
28
26
#include "c-ctype.h"
30
/* Knuth-Morris-Pratt algorithm.
31
See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
32
Return a boolean indicating success. */
34
knuth_morris_pratt (const char *haystack, const char *needle,
37
size_t m = strlen (needle);
39
/* Allocate the table. */
40
size_t *table = (size_t *) malloca (m * sizeof (size_t));
45
0 < table[i] <= i is defined such that
46
rhaystack[0..i-1] == needle[0..i-1] and rhaystack[i] != needle[i]
48
forall 0 <= x < table[i]: rhaystack[x..x+m-1] != needle[0..m-1],
49
and table[i] is as large as possible with this property.
50
table[0] remains uninitialized. */
56
for (i = 2; i < m; i++)
58
unsigned char b = c_tolower ((unsigned char) needle[i - 1]);
62
if (b == c_tolower ((unsigned char) needle[j]))
77
/* Search, using the table to accelerate the processing. */
80
const char *rhaystack;
81
const char *phaystack;
87
/* Invariant: phaystack = rhaystack + j. */
88
while (*phaystack != '\0')
89
if (c_tolower ((unsigned char) needle[j])
90
== c_tolower ((unsigned char) *phaystack))
96
/* The entire needle has been found. */
103
/* Found a match of needle[0..j-1], mismatch at needle[j]. */
104
rhaystack += table[j];
109
/* Found a mismatch at needle[0] already. */
27
#include "c-strcase.h"
29
/* Two-Way algorithm. */
30
#define RETURN_TYPE char *
31
#define AVAILABLE(h, h_l, j, n_l) \
32
(!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \
33
&& ((h_l) = (j) + (n_l)))
34
#define CANON_ELEMENT c_tolower
35
#define CMP_FUNC(p1, p2, l) \
36
c_strncasecmp ((const char *) (p1), (const char *) (p2), l)
37
#include "str-two-way.h"
119
39
/* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive
121
Note: This function may, in multibyte locales, return success even if
122
strlen (haystack) < strlen (needle) ! */
40
comparison from the C locale, regardless of the current locale. */
124
c_strcasestr (const char *haystack, const char *needle)
42
c_strcasestr (const char *haystack_start, const char *needle_start)
126
/* Be careful not to look at the entire extent of haystack or needle
127
until needed. This is useful because of these two cases:
128
- haystack may be very long, and a match of needle found early,
129
- needle may be very long, and not even a short initial segment of
130
needle may be found in haystack. */
133
/* Minimizing the worst-case complexity:
134
Let n = strlen(haystack), m = strlen(needle).
135
The naïve algorithm is O(n*m) worst-case.
136
The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a
138
To achieve linear complexity and yet amortize the cost of the memory
139
allocation, we activate the Knuth-Morris-Pratt algorithm only once
140
the naïve algorithm has already run for some time; more precisely,
142
- the outer loop count is >= 10,
143
- the average number of comparisons per outer loop is >= 5,
144
- the total number of comparisons is >= m.
145
But we try it only once. If the memory allocation attempt failed,
146
we don't retry it. */
148
size_t outer_loop_count = 0;
149
size_t comparison_count = 0;
150
size_t last_ccount = 0; /* last comparison count */
151
const char *needle_last_ccount = needle; /* = needle + last_ccount */
153
/* Speed up the following searches of needle by caching its first
155
unsigned char b = c_tolower ((unsigned char) *needle);
160
if (*haystack == '\0')
164
/* See whether it's advisable to use an asymptotically faster
167
&& outer_loop_count >= 10
168
&& comparison_count >= 5 * outer_loop_count)
170
/* See if needle + comparison_count now reaches the end of
172
if (needle_last_ccount != NULL)
174
needle_last_ccount +=
175
strnlen (needle_last_ccount, comparison_count - last_ccount);
176
if (*needle_last_ccount == '\0')
177
needle_last_ccount = NULL;
178
last_ccount = comparison_count;
180
if (needle_last_ccount == NULL)
182
/* Try the Knuth-Morris-Pratt algorithm. */
185
knuth_morris_pratt (haystack, needle - 1, &result);
187
return (char *) result;
194
if (c_tolower ((unsigned char) *haystack) == b)
195
/* The first character matches. */
197
const char *rhaystack = haystack + 1;
198
const char *rneedle = needle;
200
for (;; rhaystack++, rneedle++)
202
if (*rneedle == '\0')
204
return (char *) haystack;
205
if (*rhaystack == '\0')
209
if (c_tolower ((unsigned char) *rhaystack)
210
!= c_tolower ((unsigned char) *rneedle))
211
/* Nothing in this round. */
218
return (char *) haystack;
44
const char *haystack = haystack_start;
45
const char *needle = needle_start;
46
size_t needle_len; /* Length of NEEDLE. */
47
size_t haystack_len; /* Known minimum length of HAYSTACK. */
48
bool ok = true; /* True if NEEDLE is prefix of HAYSTACK. */
50
/* Determine length of NEEDLE, and in the process, make sure
51
HAYSTACK is at least as long (no point processing all of a long
52
NEEDLE if HAYSTACK is too short). */
53
while (*haystack && *needle)
54
ok &= (c_tolower ((unsigned char) *haystack++)
55
== c_tolower ((unsigned char) *needle++));
59
return (char *) haystack_start;
60
needle_len = needle - needle_start;
61
haystack = haystack_start + 1;
62
haystack_len = needle_len - 1;
64
/* Perform the search. Abstract memory is considered to be an array
65
of 'unsigned char' values, not an array of 'char' values. See
66
ISO C 99 section 6.2.6.1. */
67
if (needle_len < LONG_NEEDLE_THRESHOLD)
68
return two_way_short_needle ((const unsigned char *) haystack,
70
(const unsigned char *) needle_start,
72
return two_way_long_needle ((const unsigned char *) haystack, haystack_len,
73
(const unsigned char *) needle_start,
77
#undef LONG_NEEDLE_THRESHOLD