2
* Simple library to detect and validate SSN and Credit Card numbers.
4
* Copyright (C) 2007-2008 Sourcefire, Inc.
6
* Authors: Martin Roesch <roesch@sourcefire.com>
8
* This program is free software; you can redistribute it and/or modify
9
* it under the terms of the GNU General Public License version 2 as
10
* published by the Free Software Foundation.
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
* GNU General Public License for more details.
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
24
#include "clamav-config.h"
36
/* detection mode macros for the contains_* functions */
37
#define DETECT_MODE_DETECT 0
38
#define DETECT_MODE_COUNT 1
40
/* group number mapping is here */
41
/* http://www.socialsecurity.gov/employer/highgroup.txt */
42
/* here's a perl script to convert the raw data from the highgroup.txt
43
* file to the data set in ssn_max_group[]:
48
while ($i =~ s/(\d{3}) (\d{2})//) {
62
* run 'perl convert.pl < highgroup.txt' to generate the data
66
/* MAX_AREA is the maximum assigned area number. This can be derived from
67
* the data in the highgroup.txt file by looking at the last area->group
68
* mapping from that file.
72
/* array of max group numbers for a given area number */
73
static int ssn_max_group[MAX_AREA+1] = { 0,
74
6, 6, 4, 8, 8, 8, 6, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90,
75
90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 88, 88, 88, 88, 72, 72, 72, 72,
76
70, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 96, 96, 96, 96, 96, 96, 96, 96,
77
96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
78
96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
79
96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
80
96, 96, 96, 96, 96, 96, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94,
81
94, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 17, 17, 17, 17, 17, 17,
82
17, 17, 17, 17, 17, 17, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84,
83
84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 82, 82, 82, 82, 82, 82, 82, 82,
84
82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82,
85
82, 82, 79, 79, 79, 79, 79, 79, 79, 79, 77, 6, 4, 99, 99, 99, 99, 99, 99,
86
99, 99, 99, 53, 53, 53, 53, 53, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
87
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
88
99, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
89
13, 13, 13, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 33, 33,
90
31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 6, 6, 6, 6, 6, 6,
91
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
92
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4,
93
35, 35, 35, 35, 35, 35, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
94
33, 33, 33, 33, 33, 33, 29, 29, 29, 29, 29, 29, 29, 29, 27, 27, 27, 27, 27,
95
67, 67, 67, 67, 67, 67, 67, 67, 99, 99, 99, 99, 99, 99, 99, 99, 63, 61, 61,
96
61, 61, 61, 61, 61, 61, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
97
99, 99, 23, 23, 23, 23, 23, 23, 23, 21, 21, 99, 99, 99, 99, 99, 99, 99, 99,
98
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 51, 51, 51, 51, 49, 49, 49, 49,
99
49, 49, 37, 37, 37, 37, 37, 37, 37, 37, 25, 25, 25, 25, 25, 25, 25, 25, 25,
100
25, 25, 25, 23, 23, 23, 33, 33, 41, 39, 53, 51, 51, 51, 27, 27, 27, 27, 27,
101
27, 27, 45, 43, 79, 77, 55, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 63, 63,
102
63, 61, 61, 61, 61, 61, 61, 75, 73, 73, 73, 73, 99, 99, 99, 99, 99, 99, 99,
103
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
104
99, 99, 99, 51, 99, 99, 45, 45, 43, 37, 99, 99, 99, 99, 99, 61, 99, 3, 99,
105
99, 99, 99, 99, 99, 99, 84, 84, 84, 84, 99, 99, 67, 67, 65, 65, 65, 65, 65,
106
65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 11,
107
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 96,
108
96, 44, 44, 46, 46, 46, 44, 28, 26, 26, 26, 26, 16, 16, 16, 14, 14, 14, 14,
109
36, 34, 34, 34, 34, 34, 34, 34, 34, 14, 14, 12, 12, 90, 14, 14, 14, 14, 12,
110
12, 12, 12, 12, 12, 9, 9, 7, 7, 7, 7, 7, 7, 7, 18, 18, 18, 18, 18,
111
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
112
28, 18, 18, 10, 14, 10, 10, 10, 10, 10, 9, 9, 3, 1, 5, 5, 5, 5, 5,
113
5, 3, 3, 82, 82, 66, 66, 64, 64, 64, 64, 64
118
int dlp_is_valid_cc(const unsigned char *buffer, int length)
127
if(buffer == NULL || length < 13)
129
/* if the first digit is greater than 6 it isn't one of the major
131
* reference => http://www.beachnet.com/~hstiles/cardtype.html
133
if(!isdigit(buffer[0]) || buffer[0] > '6')
139
for(i = 0; i < length; i++)
141
if(isdigit(buffer[i]) == 0)
143
if(isspace(buffer[i]))
149
cc_digits[digits] = buffer[i];
151
val = buffer[i] - '0';
155
if((val *= 2) > 9) val = (val - 10) + 1;
161
cc_digits[digits] = 0;
162
if(i < length && isdigit(buffer[i]))
165
if((sum % 10 != 0) || (digits < 13))
168
if(digits == 13) /* VISA */
170
if(cc_digits[0] == '4') {
171
cli_dbgmsg("dlp_is_valid_cc: VISA [1] (%s)\n", cc_digits);
175
else if(digits == 14) /* Diners Club */
177
if(cc_digits[0] == '3' && (cc_digits[1] == '6' || cc_digits[1] == '8'))
179
cli_dbgmsg("dlp_is_valid_cc: Diners Club [1] (%s)\n", cc_digits);
182
else if(cc_digits[0] == '3' && cc_digits[1] == '0')
184
val = cc_digits[2] - '0';
185
if(val >= 0 && val <= 5) {
186
cli_dbgmsg("dlp_is_valid_cc: Diners Club [2] (%s)\n", cc_digits);
191
else if(digits == 15)
193
if(cc_digits[0] == '3' && (cc_digits[1] == '4' || cc_digits[1] == '7')) /*AMEX*/
195
cli_dbgmsg("dlp_is_valid_cc: AMEX (%s)\n", cc_digits);
198
else if(!strncmp(cc_digits, "2131", 4) || !strncmp(cc_digits, "1800", 4))
200
cli_dbgmsg("dlp_is_valid_cc: JCB [1] (%s)\n", cc_digits);
204
else if(digits == 16)
206
if(cc_digits[0] == '3') /* JCB */
208
cli_dbgmsg("dlp_is_valid_cc: JCB [2] (%s)\n", cc_digits);
211
else if(cc_digits[0] == '4') /* VISA */
213
cli_dbgmsg("dlp_is_valid_cc: VISA [2] (%s)\n", cc_digits);
216
else if(cc_digits[0] == '5') /* MASTERCARD */
218
val = cc_digits[1] - '0';
219
if(val >= 1 && val <= 5) {
220
cli_dbgmsg("dlp_is_valid_cc: MASTERCARD (%s)\n", cc_digits);
224
else if(!strncmp(cc_digits, "6011", 4)) /* Discover */
226
cli_dbgmsg("dlp_is_valid_cc: Discover (%s)\n", cc_digits);
234
static int contains_cc(const unsigned char *buffer, int length, int detmode)
236
const unsigned char *idx;
237
const unsigned char *end;
240
if(buffer == NULL || length < 13)
245
end = buffer + length;
251
if((idx == buffer || !isdigit(idx[-1])) && dlp_is_valid_cc(idx, length - (idx - buffer)) == 1)
253
if(detmode == DETECT_MODE_DETECT)
258
/* if we got a valid match we should increment the idx ptr
259
* to gain a little performance
261
idx += (length > 15?15:(length-1));
271
int dlp_get_cc_count(const unsigned char *buffer, int length)
273
return contains_cc(buffer, length, DETECT_MODE_COUNT);
276
int dlp_has_cc(const unsigned char *buffer, int length)
278
return contains_cc(buffer, length, DETECT_MODE_DETECT);
281
int dlp_is_valid_ssn(const unsigned char *buffer, int length, int format)
293
minlength = (format==SSN_FORMAT_HYPHENS?11:9);
295
if(length < minlength)
298
if((length > minlength) && isdigit(buffer[minlength]))
301
strncpy(numbuf, buffer, minlength);
302
numbuf[minlength] = 0;
304
/* sscanf parses and (basically) validates the string for us */
307
case SSN_FORMAT_HYPHENS:
308
if(numbuf[3] != '-' || numbuf[6] != '-')
311
if(sscanf((const char *) numbuf,
315
&serial_number) != 3)
320
case SSN_FORMAT_STRIPPED:
321
if(!cli_isnumber(numbuf))
324
if(sscanf((const char *) numbuf,
328
&serial_number) != 3)
335
/* start validating */
336
/* validation data taken from
337
* http://en.wikipedia.org/wiki/Social_Security_number_%28United_States%29
339
if(area_number > MAX_AREA ||
340
area_number == 666 ||
344
serial_number <= 0 ||
345
serial_number > 9999)
348
if(area_number == 987 && group_number == 65)
350
if(serial_number >= 4320 && serial_number <= 4329)
354
if(group_number > ssn_max_group[area_number])
358
cli_dbgmsg("dlp_is_valid_ssn: SSN_%s: %s\n", format == SSN_FORMAT_HYPHENS ? "HYPHENS" : "STRIPPED", numbuf);
363
static int contains_ssn(const unsigned char *buffer, int length, int format, int detmode)
365
const unsigned char *idx;
366
const unsigned char *end;
369
if(buffer == NULL || length < 11)
372
end = buffer + length;
378
/* check for area number and the first hyphen */
379
if((idx == buffer || !isdigit(idx[-1])) && dlp_is_valid_ssn(idx, length - (idx - buffer), format) == 1)
381
if(detmode == DETECT_MODE_COUNT)
384
/* hop over the matched bytes if we found an SSN */
385
idx += ((format == SSN_FORMAT_HYPHENS)?11:9);
399
int dlp_get_stripped_ssn_count(const unsigned char *buffer, int length)
401
return contains_ssn(buffer,
407
int dlp_get_normal_ssn_count(const unsigned char *buffer, int length)
409
return contains_ssn(buffer,
415
int dlp_get_ssn_count(const unsigned char *buffer, int length)
417
/* this will suck for performance but will find SSNs in either
420
return (dlp_get_stripped_ssn_count(buffer, length) + dlp_get_normal_ssn_count(buffer, length));
423
int dlp_has_ssn(const unsigned char *buffer, int length)
425
return (contains_ssn(buffer,
429
| contains_ssn(buffer,
432
DETECT_MODE_DETECT));
435
int dlp_has_stripped_ssn(const unsigned char *buffer, int length)
437
return contains_ssn(buffer,
443
int dlp_has_normal_ssn(const unsigned char *buffer, int length)
445
return contains_ssn(buffer,