1
/* phonetic.c - routines to do phonetic matching */
2
/* $OpenLDAP: pkg/ldap/servers/slapd/phonetic.c,v 1.22.2.3 2008/02/11 23:26:44 kurt Exp $ */
3
/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
5
* Copyright 1998-2008 The OpenLDAP Foundation.
8
* Redistribution and use in source and binary forms, with or without
9
* modification, are permitted only as authorized by the OpenLDAP
12
* A copy of this license is available in the file LICENSE in the
13
* top-level directory of the distribution or, alternatively, at
14
* <http://www.OpenLDAP.org/license.html>.
16
/* Portions Copyright (c) 1995 Regents of the University of Michigan.
17
* All rights reserved.
19
* Redistribution and use in source and binary forms are permitted
20
* provided that this notice is preserved and that due credit is given
21
* to the University of Michigan at Ann Arbor. The name of the University
22
* may not be used to endorse or promote products derived from this
23
* software without specific prior written permission. This software
24
* is provided ``as is'' without express or implied warranty.
32
#include <ac/string.h>
33
#include <ac/socket.h>
38
#if !defined(SLAPD_METAPHONE) && !defined(SLAPD_PHONETIC)
39
#define SLAPD_METAPHONE
42
#define iswordbreak(x) (!isascii(x) || isspace((unsigned char) (x)) || \
43
ispunct((unsigned char) (x)) || \
44
isdigit((unsigned char) (x)) || (x) == '\0')
54
while ( iswordbreak( *s ) ) {
72
while ( ! iswordbreak( *s ) ) {
76
while ( iswordbreak( *s ) ) {
93
for ( s = w; !iswordbreak( *s ); s++ )
104
#ifndef MAXPHONEMELEN
105
#define MAXPHONEMELEN 4
108
#if defined(SLAPD_PHONETIC)
110
/* lifted from isode-8.0 */
114
char code, adjacent, ch;
117
char phoneme[MAXPHONEMELEN + 1];
120
if ( p == NULL || *p == '\0' ) {
125
phoneme[0] = TOUPPER((unsigned char)*p);
128
for ( i = 0; i < 99 && (! iswordbreak(*p)); p++ ) {
129
ch = TOUPPER ((unsigned char)*p);
138
code = (adjacent != '1') ? '1' : '0';
148
code = (adjacent != '2') ? '2' : '0';
152
code = (adjacent != '3') ? '3' : '0';
155
code = (adjacent != '4') ? '4' : '0';
159
code = (adjacent != '5') ? '5' : '0';
162
code = (adjacent != '6') ? '6' : '0';
171
} else if ( code != '0' ) {
172
if ( i == MAXPHONEMELEN )
174
adjacent = phoneme[i] = code;
182
return( ch_strdup( phoneme ) );
185
#elif defined(SLAPD_METAPHONE)
188
* Metaphone was originally developed by Lawrence Philips and
189
* published in the "Computer Language" magazine in 1990.
192
* Metaphone copied from C Gazette, June/July 1991, pp 56-57,
193
* author Gary A. Parker, with changes by Bernard Tiffany of the
194
* University of Michigan, and more changes by Tim Howes of the
195
* University of Michigan.
198
/* Character coding array */
199
static const char vsvfn[26] = {
200
1, 16, 4, 16, 9, 2, 4, 16, 9, 2, 0, 2, 2,
201
/* A B C D E F G H I J K L M */
202
2, 1, 4, 0, 2, 4, 4, 1, 0, 0, 0, 8, 0};
203
/* N O P Q R S T U V W X Y Z */
205
/* Macros to access character coding array */
206
#define vowel(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 1) /* AEIOU */
207
#define same(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 2) /* FJLMNR */
208
#define varson(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 4) /* CGPST */
209
#define frontv(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 8) /* EIY */
210
#define noghf(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 16) /* BDH */
213
phonetic( char *Word )
215
char *n, *n_start, *n_end; /* pointers to string */
216
char *metaph_end; /* pointers to metaph */
217
char ntrans[40]; /* word with uppercase letters */
218
int KSflag; /* state flag for X -> KS */
219
char buf[MAXPHONEMELEN + 2];
223
* Copy Word to internal buffer, dropping non-alphabetic characters
224
* and converting to upper case
227
for (n = ntrans + 4, n_end = ntrans + 35; !iswordbreak( *Word ) &&
229
if (isalpha((unsigned char)*Word))
230
*n++ = TOUPPER((unsigned char)*Word);
234
if (n == ntrans + 4) {
235
return( ch_strdup( buf ) ); /* Return if null */
237
n_end = n; /* Set n_end to end of string */
239
/* ntrans[0] will always be == 0 */
247
*n = 0; /* Pad with nulls */
248
n = ntrans + 4; /* Assign pointer to start */
250
/* Check for PN, KN, GN, AE, WR, WH, and X at start */
255
/* 'PN', 'KN', 'GN' becomes 'N' */
260
/* 'AE' becomes 'E' */
265
/* 'WR' becomes 'R', and 'WH' to 'H' */
268
else if (*(n + 1) == 'H') {
274
/* 'X' becomes 'S' */
280
* Now, loop step through string, stopping at end of string or when
281
* the computed 'metaph' is MAXPHONEMELEN characters long
284
KSflag = 0; /* state flag for KS translation */
285
for (metaph_end = Metaph + MAXPHONEMELEN, n_start = n;
286
n <= n_end && Metaph < metaph_end; n++) {
291
/* Drop duplicates except for CC */
292
if (*(n - 1) == *n && *n != 'C')
294
/* Check for F J L M N R or first letter vowel */
295
if (same(*n) || (n == n_start && vowel(*n)))
304
if (n == (n_end - 1) && *(n - 1) != 'M')
310
* X if in -CIA-, -CH- else S if in
311
* -CI-, -CE-, -CY- else dropped if
312
* in -SCI-, -SCE-, -SCY- else K
314
if (*(n - 1) != 'S' || !frontv(*(n + 1))) {
315
if (*(n + 1) == 'I' && *(n + 2) == 'A')
317
else if (frontv(*(n + 1)))
319
else if (*(n + 1) == 'H')
320
*Metaph++ = ((n == n_start && !vowel(*(n + 2)))
322
? (char) 'K' : (char) 'X';
330
* J if in DGE or DGI or DGY else T
332
*Metaph++ = (*(n + 1) == 'G' && frontv(*(n + 2)))
333
? (char) 'J' : (char) 'T';
338
* F if in -GH and not B--GH, D--GH,
339
* -H--GH, -H---GH else dropped if
340
* -GNED, -GN, -DGE-, -DGI-, -DGY-
341
* else J if in -GE-, -GI-, -GY- and
344
if ((*(n + 1) != 'J' || vowel(*(n + 2))) &&
345
(*(n + 1) != 'N' || ((n + 1) < n_end &&
346
(*(n + 2) != 'E' || *(n + 3) != 'D'))) &&
347
(*(n - 1) != 'D' || !frontv(*(n + 1))))
348
*Metaph++ = (frontv(*(n + 1)) &&
349
*(n + 2) != 'G') ? (char) 'G' : (char) 'K';
350
else if (*(n + 1) == 'H' && !noghf(*(n - 3)) &&
357
* H if before a vowel and not after
358
* C, G, P, S, T else dropped
360
if (!varson(*(n - 1)) && (!vowel(*(n - 1)) ||
367
* dropped if after C else K
375
* F if before H, else P
377
*Metaph++ = *(n + 1) == 'H' ?
378
(char) 'F' : (char) 'P';
390
* X in -SH-, -SIO- or -SIA- else S
392
*Metaph++ = (*(n + 1) == 'H' ||
393
(*(n + 1) == 'I' && (*(n + 2) == 'O' ||
395
? (char) 'X' : (char) 'S';
400
* X in -TIA- or -TIO- else 0 (zero)
401
* before H else dropped if in -TCH-
404
if (*(n + 1) == 'I' && (*(n + 2) == 'O' ||
407
else if (*(n + 1) == 'H')
409
else if (*(n + 1) != 'C' || *(n + 2) != 'H')
422
* W after a vowel, else dropped
427
* Y unless followed by a vowel
440
*Metaph++ = 'K'; /* Insert K, then S */
455
*Metaph = 0; /* Null terminate */
456
return( ch_strdup( buf ) );
459
#endif /* SLAPD_METAPHONE */