5
/* IP address pattern matching
7
/* #include <ip_match.h>
9
/* char *ip_match_parse(byte_codes, pattern)
10
/* VSTRING *byte_codes;
13
/* char *ip_match_save(byte_codes)
14
/* const VSTRING *byte_codes;
16
/* int ip_match_execute(byte_codes, addr_bytes)
17
/* cost char *byte_codes;
18
/* const char *addr_bytes;
20
/* char *ip_match_dump(printable, byte_codes)
21
/* VSTRING *printable;
22
/* const char *byte_codes;
24
/* This module supports IP address pattern matching. See below
25
/* for a description of the supported address pattern syntax.
27
/* This implementation aims to minimize the cost of encoding
28
/* the pattern in internal form, while still providing good
29
/* matching performance in the typical case. The first byte
30
/* of an encoded pattern specifies the expected address family
31
/* (for example, AF_INET); other details of the encoding are
32
/* private and are subject to change.
34
/* ip_match_parse() converts the user-specified pattern to
35
/* internal form. The result value is a null pointer in case
36
/* of success, or a pointer into the byte_codes buffer with a
37
/* detailed problem description.
39
/* ip_match_save() saves the result from ip_match_parse() for
40
/* longer-term usage. The result should be passed to myfree().
42
/* ip_match_execute() matches a binary network in addr_bytes
43
/* against a byte-code array in byte_codes. It is an error to
44
/* use different address families for the byte_codes and addr_bytes
45
/* arguments (the first byte-code value contains the expected
46
/* address family). The result is non-zero in case of success.
48
/* ip_match_dump() produces an ASCII dump of a byte-code array.
49
/* The dump is supposed to be identical to the input pattern
50
/* modulo upper/lower case or leading nulls with IPv6). This
51
/* function is primarily a debugging aid.
55
/* Binary network address in network-byte order.
57
/* Byte-code array produced by ip_match_parse().
59
/* Human-readable address pattern.
61
/* storage for ASCII dump of a byte-code array.
62
/* IPV4 PATTERN SYNTAX
65
/* An IPv4 address pattern has four fields separated by ".".
66
/* Each field is either a decimal number, or a sequence inside
67
/* "[]" that contains one or more ";"-separated decimal
68
/* numbers or number..number ranges.
70
/* Examples of patterns are 1.2.3.4 (matches itself, as one
71
/* would expect) and 1.2.3.[2,4,6..8] (matches 1.2.3.2, 1.2.3.4,
72
/* 1.2.3.6, 1.2.3.7, 1.2.3.8).
74
/* Thus, any pattern field can be a sequence inside "[]", but
75
/* a "[]" sequence cannot span multiple address fields, and
76
/* a pattern field cannot contain both a number and a "[]"
77
/* sequence at the same time.
79
/* This means that the pattern 1.2.[3.4] is not valid (the
80
/* sequence [3.4] cannot span two address fields) and the
81
/* pattern 1.2.3.3[6..9] is also not valid (the last field
82
/* cannot be both number 3 and sequence [6..9] at the same
85
/* The syntax for IPv4 patterns is as follows:
88
/* v4pattern = v4field "." v4field "." v4field "." v4field
90
/* v4field = v4octet | "[" v4sequence "]"
92
/* v4octet = any decimal number in the range 0 through 255
94
/* v4sequence = v4seq_member | v4sequence ";" v4seq_member
96
/* v4seq_member = v4octet | v4octet ".." v4octet
101
/* The Secure Mailer license must be distributed with this
105
/* IBM T.J. Watson Research
107
/* Yorktown Heights, NY 10598, USA
110
/* System library. */
112
#include <sys_defs.h>
113
#include <sys/socket.h>
117
/* Utility library. */
120
#include <mymalloc.h>
122
#include <ip_match.h>
125
* Token values. The in-band values are also used as byte-code values.
127
#define IP_MATCH_CODE_OPEN '[' /* in-band */
128
#define IP_MATCH_CODE_CLOSE ']' /* in-band */
129
#define IP_MATCH_CODE_OVAL 'N' /* in-band */
130
#define IP_MATCH_CODE_RANGE 'R' /* in-band */
131
#define IP_MATCH_CODE_EOF '\0' /* in-band */
132
#define IP_MATCH_CODE_ERR 256 /* out-of-band */
137
#define STR vstring_str
138
#define LEN VSTRING_LEN
140
/* ip_match_save - make longer-term copy of byte code */
142
char *ip_match_save(const VSTRING *byte_codes)
146
dst = mymalloc(LEN(byte_codes));
147
return (memcpy(dst, STR(byte_codes), LEN(byte_codes)));
150
/* ip_match_dump - byte-code pretty printer */
152
char *ip_match_dump(VSTRING *printable, const char *byte_codes)
154
const char *myname = "ip_match_dump";
155
const unsigned char *bp;
160
* Sanity check. Use different dumping loops for AF_INET and AF_INET6.
162
if (*byte_codes != AF_INET)
163
msg_panic("%s: malformed byte-code header", myname);
166
* Pretty-print and sanity-check the byte codes. Note: the loops in this
167
* code have no auto-increment at the end of the iteration. Instead, each
168
* byte-code handler bumps the byte-code pointer appropriately.
170
VSTRING_RESET(printable);
171
bp = (const unsigned char *) byte_codes + 1;
175
* Simple numeric field.
177
if ((ch = *bp++) == IP_MATCH_CODE_OVAL) {
178
vstring_sprintf_append(printable, "%d", *bp);
183
* Wild-card numeric field.
185
else if (ch == IP_MATCH_CODE_OPEN) {
186
vstring_sprintf_append(printable, "[");
189
if ((ch = *bp++) == IP_MATCH_CODE_RANGE) {
190
vstring_sprintf_append(printable, "%d..%d", bp[0], bp[1]);
194
else if (ch == IP_MATCH_CODE_OVAL) {
195
vstring_sprintf_append(printable, "%d", *bp);
198
/* End-of-wildcard. */
199
else if (ch == IP_MATCH_CODE_CLOSE) {
204
msg_panic("%s: unexpected byte code (decimal %d) "
205
"after \"%s\"", myname, ch, STR(printable));
207
/* Output the wild-card field separator and repeat the loop. */
208
if (*bp != IP_MATCH_CODE_CLOSE)
209
vstring_sprintf_append(printable, ";");
211
vstring_sprintf_append(printable, "]");
218
msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"",
219
myname, ch, STR(printable));
223
* Require four octets, not one more, not one less.
225
if (++octet_count == 4) {
227
msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"",
228
myname, ch, STR(printable));
229
return (STR(printable));
232
msg_panic("%s: truncated byte code after \"%s\"",
233
myname, STR(printable));
236
* Output the address field separator and repeat the loop.
238
vstring_sprintf_append(printable, ".");
242
/* ip_match_print_code_prefix - printable byte-code prefix */
244
static char *ip_match_print_code_prefix(const char *byte_codes, size_t len)
246
static VSTRING *printable = 0;
251
* This is primarily for emergency debugging so we don't care about
255
printable = vstring_alloc(100);
257
VSTRING_RESET(printable);
260
* Use decimal for IPv4 and hexadecimal otherwise, so that address octet
261
* values are easy to recognize.
263
fmt = (*byte_codes == AF_INET ? "%d " : "%02x ");
264
for (bp = byte_codes; bp < byte_codes + len; bp++)
265
vstring_sprintf_append(printable, fmt, *(const unsigned char *) bp);
267
return (STR(printable));
270
/* ip_match_execute - byte-code matching engine */
272
int ip_match_execute(const char *byte_codes, const char *addr_bytes)
274
const char *myname = "ip_match_execute";
275
const unsigned char *bp;
276
const unsigned char *ap;
282
* Sanity check. Use different execute loops for AF_INET and AF_INET6.
284
if (*byte_codes != AF_INET)
285
msg_panic("%s: malformed byte-code header (decimal %d)",
286
myname, *(const unsigned char *) byte_codes);
289
* Match the address bytes against the byte codes. Avoid problems with
290
* (char -> int) sign extension on architectures with signed characters.
292
bp = (const unsigned char *) byte_codes + 1;
293
ap = (const unsigned char *) addr_bytes;
295
for (octet_count = 0; octet_count < 4; octet_count++, ap++) {
298
* Simple numeric field.
300
if ((ch = *bp++) == IP_MATCH_CODE_OVAL) {
308
* Wild-card numeric field.
310
else if (ch == IP_MATCH_CODE_OPEN) {
314
if ((ch = *bp++) == IP_MATCH_CODE_RANGE) {
316
matched = (*ap >= bp[0] && *ap <= bp[1]);
320
else if (ch == IP_MATCH_CODE_OVAL) {
322
matched = (*ap == *bp);
325
/* End-of-wildcard. */
326
else if (ch == IP_MATCH_CODE_CLOSE) {
331
size_t len = (const char *) bp - byte_codes - 1;
333
msg_panic("%s: unexpected byte code (decimal %d) "
334
"after \"%s\"", myname, ch,
335
ip_match_print_code_prefix(byte_codes, len));
346
size_t len = (const char *) bp - byte_codes - 1;
348
msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"",
349
myname, ch, ip_match_print_code_prefix(byte_codes, len));
355
/* ip_match_next_token - carve out the next token from input pattern */
357
static int ip_match_next_token(char **pstart, char **psaved_start, int *poval)
360
int oval; /* octet value */
361
int type; /* token value */
364
* Return a literal, error, or EOF token. Update the read pointer to the
365
* start of the next token or leave it at the string terminator.
367
#define IP_MATCH_RETURN_TOK(next, type) \
368
do { *pstart = (char *) (next); return (type); } while (0)
371
* Return a token that contains an IPv4 address octet value.
373
#define IP_MATCH_RETURN_TOK_VAL(next, type, oval) do { \
374
*poval = (oval); IP_MATCH_RETURN_TOK((next), type); \
378
* Light-weight tokenizer. Each result is an IPv4 address octet value, a
379
* literal character value, error, or EOF.
381
*psaved_start = *pstart;
382
cp = (unsigned char *) *pstart;
385
type = IP_MATCH_CODE_OVAL;
386
for (cp += 1; ISDIGIT(*cp); cp++) {
390
type = IP_MATCH_CODE_ERR;
392
IP_MATCH_RETURN_TOK_VAL(cp, type, oval);
394
IP_MATCH_RETURN_TOK(*cp ? cp + 1 : cp, *cp);
398
/* ipmatch_print_parse_error - formatted parsing error, with context */
400
static void PRINTFLIKE(5, 6) ipmatch_print_parse_error(VSTRING *reply,
411
* Format the error type.
414
vstring_vsprintf(reply, fmt, ap);
418
* Format the error context. The syntax is complex enough that it is
419
* worth the effort to precisely indicate what input is in error.
421
* XXX Workaround for %.*s to avoid output when a zero width is specified.
424
start_width = here - start;
425
here_width = next - here;
426
vstring_sprintf_append(reply, " at \"%.*s>%.*s<%s\"",
427
start_width, start_width == 0 ? "" : start,
428
here_width, here_width == 0 ? "" : here, next);
432
/* ip_match_parse - parse an entire wild-card address pattern */
434
char *ip_match_parse(VSTRING *byte_codes, char *pattern)
445
* Simplify this if we change to {} for wildcard notation.
447
#define FIND_TERMINATOR(start, cp) do { \
449
for (cp = (start) ; *cp; cp++) { \
450
if (*cp == '[') _level++; \
451
if (*cp != ']') continue; \
452
if (--_level == 0) break; \
457
* Strip [] around the entire pattern.
459
if (*pattern == '[') {
460
FIND_TERMINATOR(pattern, cp);
462
vstring_sprintf(byte_codes, "missing \"]\" character");
463
return (STR(byte_codes));
472
* Sanity check. In this case we can't show any error context.
475
vstring_sprintf(byte_codes, "empty address pattern");
476
return (STR(byte_codes));
480
* Simple parser with on-the-fly encoding. For now, IPv4 support only.
481
* Use different parser loops for IPv4 and IPv6.
483
VSTRING_RESET(byte_codes);
484
VSTRING_ADDCH(byte_codes, AF_INET);
489
* Require four address fields separated by ".", each field containing a
490
* numeric octet value or a sequence inside []. The loop head has no test
491
* and does not step the loop variable. The tokenizer advances the loop
492
* variable, and the loop termination logic is inside the loop.
495
switch (token_type = ip_match_next_token(&cp, &saved_cp, &oval)) {
498
* Numeric address field.
500
case IP_MATCH_CODE_OVAL:
501
VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OVAL);
502
VSTRING_ADDCH(byte_codes, oval);
506
* Wild-card address field.
508
case IP_MATCH_CODE_OPEN:
509
VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OPEN);
510
/* Require ";"-separated numbers or numeric ranges. */
512
token_type = ip_match_next_token(&cp, &saved_cp, &oval);
513
if (token_type == IP_MATCH_CODE_OVAL) {
515
look_ahead = ip_match_next_token(&cp, &saved_cp, &oval);
517
if (look_ahead == '.') {
518
/* Brute-force parsing. */
519
if (ip_match_next_token(&cp, &saved_cp, &oval) == '.'
520
&& ip_match_next_token(&cp, &saved_cp, &oval)
521
== IP_MATCH_CODE_OVAL
522
&& saved_oval <= oval) {
523
VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_RANGE);
524
VSTRING_ADDCH(byte_codes, saved_oval);
525
VSTRING_ADDCH(byte_codes, oval);
527
ip_match_next_token(&cp, &saved_cp, &oval);
529
ipmatch_print_parse_error(byte_codes, pattern,
531
"numeric range error");
532
return (STR(byte_codes));
537
VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OVAL);
538
VSTRING_ADDCH(byte_codes, saved_oval);
540
/* Require ";" or end-of-wildcard. */
541
token_type = look_ahead;
542
if (token_type == ';') {
544
} else if (token_type == IP_MATCH_CODE_CLOSE) {
547
ipmatch_print_parse_error(byte_codes, pattern,
549
"need \";\" or \"%c\"",
550
IP_MATCH_CODE_CLOSE);
551
return (STR(byte_codes));
554
ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp,
555
"need decimal number 0..255");
556
return (STR(byte_codes));
559
VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_CLOSE);
566
ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp,
567
"need decimal number 0..255 or \"%c\"",
569
return (STR(byte_codes));
574
* Require four address fields. Not one more, not one less.
576
if (octet_count == 4) {
578
(void) ip_match_next_token(&cp, &saved_cp, &oval);
579
ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp,
580
"garbage after pattern");
581
return (STR(byte_codes));
583
VSTRING_ADDCH(byte_codes, 0);
588
* Require "." before the next address field.
590
if (ip_match_next_token(&cp, &saved_cp, &oval) != '.') {
591
ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp,
593
return (STR(byte_codes));
601
* Dummy main program for regression tests.
603
#include <sys/socket.h>
604
#include <netinet/in.h>
605
#include <arpa/inet.h>
609
#include <vstring_vstream.h>
610
#include <stringops.h>
612
int main(int argc, char **argv)
614
VSTRING *byte_codes = vstring_alloc(100);
615
VSTRING *line_buf = vstring_alloc(100);
620
int echo_input = !isatty(0);
623
* Iterate over the input stream. The input format is a pattern, followed
624
* by optional addresses to match against.
626
while (vstring_fgets_nonl(line_buf, VSTREAM_IN)) {
627
bufp = STR(line_buf);
629
vstream_printf("> %s\n", bufp);
630
vstream_fflush(VSTREAM_OUT);
634
if ((user_pattern = mystrtok(&bufp, " \t")) == 0)
638
* Parse and dump the pattern.
640
if ((err = ip_match_parse(byte_codes, user_pattern)) != 0) {
641
vstream_printf("Error: %s\n", err);
643
vstream_printf("Code: %s\n",
644
ip_match_dump(line_buf, STR(byte_codes)));
646
vstream_fflush(VSTREAM_OUT);
649
* Match the optional patterns.
651
while ((user_address = mystrtok(&bufp, " \t")) != 0) {
652
struct in_addr netw_addr;
654
switch (inet_pton(AF_INET, user_address, &netw_addr)) {
656
vstream_printf("Match %s: %s\n", user_address,
657
ip_match_execute(STR(byte_codes),
658
(char *) &netw_addr.s_addr) ?
662
vstream_printf("bad address syntax: %s\n", user_address);
665
vstream_printf("%s: %m\n", user_address);
668
vstream_fflush(VSTREAM_OUT);
671
vstring_free(line_buf);
672
vstring_free(byte_codes);