17
17
#include <config.h>
18
18
#include "classifier.h"
19
#include "classifier-private.h"
20
21
#include <netinet/in.h>
21
22
#include "byte-order.h"
22
23
#include "dynamic-string.h"
25
24
#include "odp-util.h"
26
25
#include "ofp-util.h"
27
#include "ovs-thread.h"
28
26
#include "packets.h"
28
#include "openvswitch/vlog.h"
31
30
VLOG_DEFINE_THIS_MODULE(classifier);
34
/* A collection of "struct cls_conjunction"s currently embedded into a
36
struct cls_conjunction_set {
37
/* Link back to the cls_match.
39
* cls_conjunction_set is mostly used during classifier lookup, and, in
40
* turn, during classifier lookup the most used member of
41
* cls_conjunction_set is the rule's priority, so we cache it here for fast
43
struct cls_match *match;
44
int priority; /* Cached copy of match->priority. */
46
/* Conjunction information.
48
* 'min_n_clauses' allows some optimization during classifier lookup. */
49
unsigned int n; /* Number of elements in 'conj'. */
50
unsigned int min_n_clauses; /* Smallest 'n' among elements of 'conj'. */
51
struct cls_conjunction conj[];
36
54
/* Ports trie depends on both ports sharing the same ovs_be32. */
37
55
#define TP_PORTS_OFS32 (offsetof(struct flow, tp_src) / 4)
38
56
BUILD_ASSERT_DECL(TP_PORTS_OFS32 == offsetof(struct flow, tp_dst) / 4);
40
/* Prefix trie for a 'field' */
42
const struct mf_field *field; /* Trie field, or NULL. */
43
struct trie_node *root; /* NULL if none. */
46
struct cls_subtable_entry {
47
struct cls_subtable *subtable;
49
unsigned int max_priority;
52
struct cls_subtable_cache {
53
struct cls_subtable_entry *subtables;
54
size_t alloc_size; /* Number of allocated elements. */
55
size_t size; /* One past last valid array element. */
59
CLS_MAX_INDICES = 3 /* Maximum number of lookup indices per subtable. */
62
struct cls_classifier {
63
int n_rules; /* Total number of rules. */
64
uint8_t n_flow_segments;
65
uint8_t flow_segments[CLS_MAX_INDICES]; /* Flow segment boundaries to use
66
* for staged lookup. */
67
struct hmap subtables; /* Contains "struct cls_subtable"s. */
68
struct cls_subtable_cache subtables_priority;
69
struct hmap partitions; /* Contains "struct cls_partition"s. */
70
struct cls_trie tries[CLS_MAX_TRIES]; /* Prefix tries. */
74
/* A set of rules that all have the same fields wildcarded. */
76
struct hmap_node hmap_node; /* Within struct cls_classifier 'subtables'
78
struct hmap rules; /* Contains "struct cls_rule"s. */
79
int n_rules; /* Number of rules, including duplicates. */
80
unsigned int max_priority; /* Max priority of any rule in the subtable. */
81
unsigned int max_count; /* Count of max_priority rules. */
82
tag_type tag; /* Tag generated from mask for partitioning. */
83
uint8_t n_indices; /* How many indices to use. */
84
uint8_t index_ofs[CLS_MAX_INDICES]; /* u32 flow segment boundaries. */
85
struct hindex indices[CLS_MAX_INDICES]; /* Staged lookup indices. */
86
unsigned int trie_plen[CLS_MAX_TRIES]; /* Trie prefix length in 'mask'. */
88
struct trie_node *ports_trie; /* NULL if none. */
89
struct minimask mask; /* Wildcards for fields. */
90
/* 'mask' must be the last field. */
93
/* Associates a metadata value (that is, a value of the OpenFlow 1.1+ metadata
94
* field) with tags for the "cls_subtable"s that contain rules that match that
96
struct cls_partition {
97
struct hmap_node hmap_node; /* In struct cls_classifier's 'partitions'
99
ovs_be64 metadata; /* metadata value for this partition. */
100
tag_type tags; /* OR of each flow's cls_subtable tag. */
101
struct tag_tracker tracker; /* Tracks the bits in 'tags'. */
104
/* Internal representation of a rule in a "struct cls_subtable". */
106
struct cls_rule *cls_rule;
107
struct hindex_node index_nodes[CLS_MAX_INDICES]; /* Within subtable's
109
struct hmap_node hmap_node; /* Within struct cls_subtable 'rules'. */
110
unsigned int priority; /* Larger numbers are higher priorities. */
111
struct cls_partition *partition;
112
struct list list; /* List of identical, lower-priority rules. */
113
struct miniflow flow; /* Matching rule. Mask is in the subtable. */
114
/* 'flow' must be the last field. */
57
BUILD_ASSERT_DECL(TP_PORTS_OFS32 % 2 == 0);
58
#define TP_PORTS_OFS64 (TP_PORTS_OFS32 / 2)
61
cls_conjunction_set_size(size_t n)
63
return (sizeof(struct cls_conjunction_set)
64
+ n * sizeof(struct cls_conjunction));
67
static struct cls_conjunction_set *
68
cls_conjunction_set_alloc(struct cls_match *match,
69
const struct cls_conjunction conj[], size_t n)
72
size_t min_n_clauses = conj[0].n_clauses;
73
for (size_t i = 1; i < n; i++) {
74
min_n_clauses = MIN(min_n_clauses, conj[i].n_clauses);
77
struct cls_conjunction_set *set = xmalloc(cls_conjunction_set_size(n));
79
set->priority = match->priority;
81
set->min_n_clauses = min_n_clauses;
82
memcpy(set->conj, conj, n * sizeof *conj);
117
89
static struct cls_match *
118
cls_match_alloc(struct cls_rule *rule)
90
cls_match_alloc(const struct cls_rule *rule,
91
const struct cls_conjunction conj[], size_t n)
120
93
int count = count_1bits(rule->match.flow.map);
123
96
= xmalloc(sizeof *cls_match - sizeof cls_match->flow.inline_values
124
97
+ MINIFLOW_VALUES_SIZE(count));
126
cls_match->cls_rule = rule;
127
miniflow_clone_inline(&cls_match->flow, &rule->match.flow, count);
128
cls_match->priority = rule->priority;
129
rule->cls_match = cls_match;
99
ovsrcu_init(&cls_match->next, NULL);
100
*CONST_CAST(const struct cls_rule **, &cls_match->cls_rule) = rule;
101
*CONST_CAST(int *, &cls_match->priority) = rule->priority;
102
*CONST_CAST(cls_version_t *, &cls_match->add_version) = rule->version;
103
atomic_init(&cls_match->remove_version, rule->version); /* Initially
105
miniflow_clone_inline(CONST_CAST(struct miniflow *, &cls_match->flow),
106
&rule->match.flow, count);
107
ovsrcu_set_hidden(&cls_match->conj_set,
108
cls_conjunction_set_alloc(cls_match, conj, n));
131
110
return cls_match;
134
static struct cls_subtable *find_subtable(const struct cls_classifier *,
113
static struct cls_subtable *find_subtable(const struct classifier *cls,
135
114
const struct minimask *);
136
static struct cls_subtable *insert_subtable(struct cls_classifier *,
115
static struct cls_subtable *insert_subtable(struct classifier *cls,
137
116
const struct minimask *);
139
static void destroy_subtable(struct cls_classifier *, struct cls_subtable *);
141
static void update_subtables_after_insertion(struct cls_classifier *,
142
struct cls_subtable *,
143
unsigned int new_priority);
144
static void update_subtables_after_removal(struct cls_classifier *,
145
struct cls_subtable *,
146
unsigned int del_priority);
148
static struct cls_match *find_match_wc(const struct cls_subtable *,
149
const struct flow *, struct trie_ctx *,
150
unsigned int n_tries,
151
struct flow_wildcards *);
152
static struct cls_match *find_equal(struct cls_subtable *,
117
static void destroy_subtable(struct classifier *cls, struct cls_subtable *);
119
static const struct cls_match *find_match_wc(const struct cls_subtable *,
120
cls_version_t version,
123
unsigned int n_tries,
124
struct flow_wildcards *);
125
static struct cls_match *find_equal(const struct cls_subtable *,
153
126
const struct miniflow *, uint32_t hash);
154
static struct cls_match *insert_rule(struct cls_classifier *,
155
struct cls_subtable *, struct cls_rule *);
157
/* Iterates RULE over HEAD and all of the cls_rules on HEAD->list. */
158
#define FOR_EACH_RULE_IN_LIST(RULE, HEAD) \
159
for ((RULE) = (HEAD); (RULE) != NULL; (RULE) = next_rule_in_list(RULE))
160
#define FOR_EACH_RULE_IN_LIST_SAFE(RULE, NEXT, HEAD) \
161
for ((RULE) = (HEAD); \
162
(RULE) != NULL && ((NEXT) = next_rule_in_list(RULE), true); \
165
static struct cls_match *next_rule_in_list__(struct cls_match *);
166
static struct cls_match *next_rule_in_list(struct cls_match *);
128
/* Return the next visible (lower-priority) rule in the list. Multiple
129
* identical rules with the same priority may exist transitionally, but when
130
* versioning is used at most one of them is ever visible for lookups on any
131
* given 'version'. */
132
static inline const struct cls_match *
133
next_visible_rule_in_list(const struct cls_match *rule, cls_version_t version)
136
rule = cls_match_next(rule);
137
} while (rule && !cls_match_visible_in_version(rule, version));
168
142
static unsigned int minimask_get_prefix_len(const struct minimask *,
169
143
const struct mf_field *);
170
static void trie_init(struct cls_classifier *, int trie_idx,
144
static void trie_init(struct classifier *cls, int trie_idx,
171
145
const struct mf_field *);
172
146
static unsigned int trie_lookup(const struct cls_trie *, const struct flow *,
173
unsigned int *checkbits);
174
static unsigned int trie_lookup_value(const struct trie_node *,
175
const ovs_be32 value[],
176
unsigned int value_bits,
177
unsigned int *checkbits);
178
static void trie_destroy(struct trie_node *);
147
union mf_value *plens);
148
static unsigned int trie_lookup_value(const rcu_trie_ptr *,
149
const ovs_be32 value[], ovs_be32 plens[],
150
unsigned int value_bits);
151
static void trie_destroy(rcu_trie_ptr *);
179
152
static void trie_insert(struct cls_trie *, const struct cls_rule *, int mlen);
180
static void trie_insert_prefix(struct trie_node **, const ovs_be32 *prefix,
153
static void trie_insert_prefix(rcu_trie_ptr *, const ovs_be32 *prefix,
182
155
static void trie_remove(struct cls_trie *, const struct cls_rule *, int mlen);
183
static void trie_remove_prefix(struct trie_node **, const ovs_be32 *prefix,
156
static void trie_remove_prefix(rcu_trie_ptr *, const ovs_be32 *prefix,
185
158
static void mask_set_prefix_bits(struct flow_wildcards *, uint8_t be32ofs,
159
unsigned int n_bits);
187
160
static bool mask_prefix_bits_set(const struct flow_wildcards *,
188
uint8_t be32ofs, unsigned int nbits);
191
cls_subtable_cache_init(struct cls_subtable_cache *array)
193
memset(array, 0, sizeof *array);
197
cls_subtable_cache_destroy(struct cls_subtable_cache *array)
199
free(array->subtables);
200
memset(array, 0, sizeof *array);
203
/* Array insertion. */
205
cls_subtable_cache_push_back(struct cls_subtable_cache *array,
206
struct cls_subtable_entry a)
208
if (array->size == array->alloc_size) {
209
array->subtables = x2nrealloc(array->subtables, &array->alloc_size,
213
array->subtables[array->size++] = a;
216
/* Move subtable entry at 'from' to 'to', shifting the elements in between
217
* (including the one at 'to') accordingly. */
219
cls_subtable_cache_move(struct cls_subtable_entry *to,
220
struct cls_subtable_entry *from)
223
struct cls_subtable_entry temp = *from;
226
/* Shift entries (from,to] backwards to make space at 'to'. */
227
memmove(from, from + 1, (to - from) * sizeof *to);
229
/* Shift entries [to,from) forward to make space at 'to'. */
230
memmove(to + 1, to, (from - to) * sizeof *to);
239
cls_subtable_cache_remove(struct cls_subtable_cache *array,
240
struct cls_subtable_entry *elem)
242
ssize_t size = (&array->subtables[array->size]
243
- (elem + 1)) * sizeof *elem;
245
memmove(elem, elem + 1, size);
250
#define CLS_SUBTABLE_CACHE_FOR_EACH(SUBTABLE, ITER, ARRAY) \
251
for (ITER = (ARRAY)->subtables; \
252
ITER < &(ARRAY)->subtables[(ARRAY)->size] \
253
&& OVS_LIKELY(SUBTABLE = ITER->subtable); \
255
#define CLS_SUBTABLE_CACHE_FOR_EACH_CONTINUE(SUBTABLE, ITER, ARRAY) \
257
ITER < &(ARRAY)->subtables[(ARRAY)->size] \
258
&& OVS_LIKELY(SUBTABLE = ITER->subtable); \
260
#define CLS_SUBTABLE_CACHE_FOR_EACH_REVERSE(SUBTABLE, ITER, ARRAY) \
261
for (ITER = &(ARRAY)->subtables[(ARRAY)->size]; \
262
ITER > (ARRAY)->subtables \
263
&& OVS_LIKELY(SUBTABLE = (--ITER)->subtable);)
266
cls_subtable_cache_verify(struct cls_subtable_cache *array)
268
struct cls_subtable *table;
269
struct cls_subtable_entry *iter;
270
unsigned int priority = 0;
272
CLS_SUBTABLE_CACHE_FOR_EACH_REVERSE (table, iter, array) {
273
if (iter->max_priority != table->max_priority) {
274
VLOG_WARN("Subtable %p has mismatching priority in cache (%u != %u)",
275
table, iter->max_priority, table->max_priority);
277
if (iter->max_priority < priority) {
278
VLOG_WARN("Subtable cache is out of order (%u < %u)",
279
iter->max_priority, priority);
281
priority = iter->max_priority;
286
cls_subtable_cache_reset(struct cls_classifier *cls)
288
struct cls_subtable_cache old = cls->subtables_priority;
289
struct cls_subtable *subtable;
291
VLOG_WARN("Resetting subtable cache.");
293
cls_subtable_cache_verify(&cls->subtables_priority);
295
cls_subtable_cache_init(&cls->subtables_priority);
297
HMAP_FOR_EACH (subtable, hmap_node, &cls->subtables) {
298
struct cls_match *head;
299
struct cls_subtable_entry elem;
300
struct cls_subtable *table;
301
struct cls_subtable_entry *iter, *from = NULL;
302
unsigned int new_max = 0;
303
unsigned int max_count = 0;
306
/* Verify max_priority. */
307
HMAP_FOR_EACH (head, hmap_node, &subtable->rules) {
308
if (head->priority > new_max) {
309
new_max = head->priority;
311
} else if (head->priority == new_max) {
315
if (new_max != subtable->max_priority ||
316
max_count != subtable->max_count) {
317
VLOG_WARN("subtable %p (%u rules) has mismatching max_priority "
318
"(%u) or max_count (%u). Highest priority found was %u, "
320
subtable, subtable->n_rules, subtable->max_priority,
321
subtable->max_count, new_max, max_count);
322
subtable->max_priority = new_max;
323
subtable->max_count = max_count;
326
/* Locate the subtable from the old cache. */
328
CLS_SUBTABLE_CACHE_FOR_EACH (table, iter, &old) {
329
if (table == subtable) {
330
if (iter->max_priority != new_max) {
331
VLOG_WARN("Subtable %p has wrong max priority (%u != %u) "
333
subtable, iter->max_priority, new_max);
336
VLOG_WARN("Subtable %p duplicated in the old cache.",
343
VLOG_WARN("Subtable %p not found from the old cache.", subtable);
346
elem.subtable = subtable;
347
elem.tag = subtable->tag;
348
elem.max_priority = subtable->max_priority;
349
cls_subtable_cache_push_back(&cls->subtables_priority, elem);
351
/* Possibly move 'subtable' earlier in the priority array. If
352
* we break out of the loop, then the subtable (at 'from')
353
* should be moved to the position right after the current
354
* element. If the loop terminates normally, then 'iter' will
355
* be at the first array element and we'll move the subtable
356
* to the front of the array. */
357
CLS_SUBTABLE_CACHE_FOR_EACH_REVERSE (table, iter,
358
&cls->subtables_priority) {
359
if (table == subtable) {
360
from = iter; /* Locate the subtable as we go. */
361
} else if (table->max_priority >= new_max) {
362
ovs_assert(from != NULL);
363
iter++; /* After this. */
368
/* Move subtable at 'from' to 'iter'. */
369
cls_subtable_cache_move(iter, from);
372
/* Verify that the old and the new have the same size. */
373
if (old.size != cls->subtables_priority.size) {
374
VLOG_WARN("subtables cache sizes differ: old (%"PRIuSIZE
375
") != new (%"PRIuSIZE").",
376
old.size, cls->subtables_priority.size);
379
cls_subtable_cache_destroy(&old);
381
cls_subtable_cache_verify(&cls->subtables_priority);
385
/* flow/miniflow/minimask/minimatch utilities.
386
* These are only used by the classifier, so place them here to allow
387
* for better optimization. */
389
static inline uint64_t
390
miniflow_get_map_in_range(const struct miniflow *miniflow,
391
uint8_t start, uint8_t end, unsigned int *offset)
393
uint64_t map = miniflow->map;
397
uint64_t msk = (UINT64_C(1) << start) - 1; /* 'start' LSBs set */
398
*offset = count_1bits(map & msk);
401
if (end < FLOW_U32S) {
402
uint64_t msk = (UINT64_C(1) << end) - 1; /* 'end' LSBs set */
408
/* Returns a hash value for the bits of 'flow' where there are 1-bits in
409
* 'mask', given 'basis'.
411
* The hash values returned by this function are the same as those returned by
412
* miniflow_hash_in_minimask(), only the form of the arguments differ. */
413
static inline uint32_t
414
flow_hash_in_minimask(const struct flow *flow, const struct minimask *mask,
417
const uint32_t *mask_values = miniflow_get_u32_values(&mask->masks);
418
const uint32_t *flow_u32 = (const uint32_t *)flow;
419
const uint32_t *p = mask_values;
424
for (map = mask->masks.map; map; map = zero_rightmost_1bit(map)) {
425
hash = mhash_add(hash, flow_u32[raw_ctz(map)] & *p++);
428
return mhash_finish(hash, (p - mask_values) * 4);
431
/* Returns a hash value for the bits of 'flow' where there are 1-bits in
432
* 'mask', given 'basis'.
434
* The hash values returned by this function are the same as those returned by
435
* flow_hash_in_minimask(), only the form of the arguments differ. */
436
static inline uint32_t
437
miniflow_hash_in_minimask(const struct miniflow *flow,
438
const struct minimask *mask, uint32_t basis)
440
const uint32_t *mask_values = miniflow_get_u32_values(&mask->masks);
441
const uint32_t *p = mask_values;
442
uint32_t hash = basis;
445
MINIFLOW_FOR_EACH_IN_MAP(flow_u32, flow, mask->masks.map) {
446
hash = mhash_add(hash, flow_u32 & *p++);
449
return mhash_finish(hash, (p - mask_values) * 4);
452
/* Returns a hash value for the bits of range [start, end) in 'flow',
453
* where there are 1-bits in 'mask', given 'hash'.
455
* The hash values returned by this function are the same as those returned by
456
* minimatch_hash_range(), only the form of the arguments differ. */
457
static inline uint32_t
458
flow_hash_in_minimask_range(const struct flow *flow,
459
const struct minimask *mask,
460
uint8_t start, uint8_t end, uint32_t *basis)
462
const uint32_t *mask_values = miniflow_get_u32_values(&mask->masks);
463
const uint32_t *flow_u32 = (const uint32_t *)flow;
465
uint64_t map = miniflow_get_map_in_range(&mask->masks, start, end,
467
const uint32_t *p = mask_values + offset;
468
uint32_t hash = *basis;
470
for (; map; map = zero_rightmost_1bit(map)) {
471
hash = mhash_add(hash, flow_u32[raw_ctz(map)] & *p++);
474
*basis = hash; /* Allow continuation from the unfinished value. */
475
return mhash_finish(hash, (p - mask_values) * 4);
478
/* Fold minimask 'mask''s wildcard mask into 'wc's wildcard mask. */
480
flow_wildcards_fold_minimask(struct flow_wildcards *wc,
481
const struct minimask *mask)
483
flow_union_with_miniflow(&wc->masks, &mask->masks);
486
/* Fold minimask 'mask''s wildcard mask into 'wc's wildcard mask
487
* in range [start, end). */
489
flow_wildcards_fold_minimask_range(struct flow_wildcards *wc,
490
const struct minimask *mask,
491
uint8_t start, uint8_t end)
493
uint32_t *dst_u32 = (uint32_t *)&wc->masks;
495
uint64_t map = miniflow_get_map_in_range(&mask->masks, start, end,
497
const uint32_t *p = miniflow_get_u32_values(&mask->masks) + offset;
499
for (; map; map = zero_rightmost_1bit(map)) {
500
dst_u32[raw_ctz(map)] |= *p++;
504
/* Returns a hash value for 'flow', given 'basis'. */
505
static inline uint32_t
506
miniflow_hash(const struct miniflow *flow, uint32_t basis)
508
const uint32_t *values = miniflow_get_u32_values(flow);
509
const uint32_t *p = values;
510
uint32_t hash = basis;
511
uint64_t hash_map = 0;
514
for (map = flow->map; map; map = zero_rightmost_1bit(map)) {
516
hash = mhash_add(hash, *p);
517
hash_map |= rightmost_1bit(map);
521
hash = mhash_add(hash, hash_map);
522
hash = mhash_add(hash, hash_map >> 32);
524
return mhash_finish(hash, p - values);
527
/* Returns a hash value for 'mask', given 'basis'. */
528
static inline uint32_t
529
minimask_hash(const struct minimask *mask, uint32_t basis)
531
return miniflow_hash(&mask->masks, basis);
534
/* Returns a hash value for 'match', given 'basis'. */
535
static inline uint32_t
536
minimatch_hash(const struct minimatch *match, uint32_t basis)
538
return miniflow_hash(&match->flow, minimask_hash(&match->mask, basis));
541
/* Returns a hash value for the bits of range [start, end) in 'minimatch',
544
* The hash values returned by this function are the same as those returned by
545
* flow_hash_in_minimask_range(), only the form of the arguments differ. */
546
static inline uint32_t
547
minimatch_hash_range(const struct minimatch *match, uint8_t start, uint8_t end,
551
const uint32_t *p, *q;
552
uint32_t hash = *basis;
555
n = count_1bits(miniflow_get_map_in_range(&match->mask.masks, start, end,
557
q = miniflow_get_u32_values(&match->mask.masks) + offset;
558
p = miniflow_get_u32_values(&match->flow) + offset;
560
for (i = 0; i < n; i++) {
561
hash = mhash_add(hash, p[i] & q[i]);
563
*basis = hash; /* Allow continuation from the unfinished value. */
564
return mhash_finish(hash, (offset + n) * 4);
161
uint8_t be32ofs, unsigned int n_bits);
166
cls_rule_init__(struct cls_rule *rule, unsigned int priority,
167
cls_version_t version)
169
rculist_init(&rule->node);
170
*CONST_CAST(int *, &rule->priority) = priority;
171
*CONST_CAST(cls_version_t *, &rule->version) = version;
172
rule->cls_match = NULL;
570
175
/* Initializes 'rule' to match packets specified by 'match' at the given
571
176
* 'priority'. 'match' must satisfy the invariant described in the comment at
572
177
* the definition of struct match.
574
179
* The caller must eventually destroy 'rule' with cls_rule_destroy().
576
* (OpenFlow uses priorities between 0 and UINT16_MAX, inclusive, but
577
* internally Open vSwitch supports a wider range.) */
181
* Clients should not use priority INT_MIN. (OpenFlow uses priorities between
182
* 0 and UINT16_MAX, inclusive.) */
579
cls_rule_init(struct cls_rule *rule,
580
const struct match *match, unsigned int priority)
184
cls_rule_init(struct cls_rule *rule, const struct match *match, int priority,
185
cls_version_t version)
582
minimatch_init(&rule->match, match);
583
rule->priority = priority;
584
rule->cls_match = NULL;
187
cls_rule_init__(rule, priority, version);
188
minimatch_init(CONST_CAST(struct minimatch *, &rule->match), match);
587
191
/* Same as cls_rule_init() for initialization from a "struct minimatch". */
589
193
cls_rule_init_from_minimatch(struct cls_rule *rule,
590
const struct minimatch *match,
591
unsigned int priority)
593
minimatch_clone(&rule->match, match);
594
rule->priority = priority;
595
rule->cls_match = NULL;
194
const struct minimatch *match, int priority,
195
cls_version_t version)
197
cls_rule_init__(rule, priority, version);
198
minimatch_clone(CONST_CAST(struct minimatch *, &rule->match), match);
201
/* Initializes 'dst' as a copy of 'src', but with 'version'.
203
* The caller must eventually destroy 'dst' with cls_rule_destroy(). */
205
cls_rule_clone_in_version(struct cls_rule *dst, const struct cls_rule *src,
206
cls_version_t version)
208
cls_rule_init__(dst, src->priority, version);
209
minimatch_clone(CONST_CAST(struct minimatch *, &dst->match), &src->match);
598
212
/* Initializes 'dst' as a copy of 'src'.
657
298
return minimask_is_catchall(&rule->match.mask);
301
/* Makes rule invisible after 'version'. Once that version is made invisible
302
* (by changing the version parameter used in lookups), the rule should be
303
* actually removed via ovsrcu_postpone().
305
* 'rule_' must be in a classifier. */
307
cls_rule_make_invisible_in_version(const struct cls_rule *rule,
308
cls_version_t remove_version)
310
ovs_assert(remove_version >= rule->cls_match->add_version);
312
cls_match_set_remove_version(rule->cls_match, remove_version);
315
/* This undoes the change made by cls_rule_make_invisible_after_version().
317
* 'rule' must be in a classifier. */
319
cls_rule_restore_visibility(const struct cls_rule *rule)
321
cls_match_set_remove_version(rule->cls_match, CLS_NOT_REMOVED_VERSION);
324
/* Return true if 'rule' is visible in 'version'.
326
* 'rule' must be in a classifier. */
328
cls_rule_visible_in_version(const struct cls_rule *rule, cls_version_t version)
330
return cls_match_visible_in_version(rule->cls_match, version);
660
333
/* Initializes 'cls' as a classifier that initially contains no classification
663
classifier_init(struct classifier *cls_, const uint8_t *flow_segments)
336
classifier_init(struct classifier *cls, const uint8_t *flow_segments)
665
struct cls_classifier *cls = xmalloc(sizeof *cls);
667
fat_rwlock_init(&cls_->rwlock);
671
338
cls->n_rules = 0;
672
hmap_init(&cls->subtables);
673
cls_subtable_cache_init(&cls->subtables_priority);
674
hmap_init(&cls->partitions);
339
cmap_init(&cls->subtables_map);
340
pvector_init(&cls->subtables);
341
cmap_init(&cls->partitions);
675
342
cls->n_flow_segments = 0;
676
343
if (flow_segments) {
677
344
while (cls->n_flow_segments < CLS_MAX_INDICES
678
&& *flow_segments < FLOW_U32S) {
345
&& *flow_segments < FLOW_U64S) {
679
346
cls->flow_segments[cls->n_flow_segments++] = *flow_segments++;
682
349
cls->n_tries = 0;
350
for (int i = 0; i < CLS_MAX_TRIES; i++) {
351
trie_init(cls, i, NULL);
685
356
/* Destroys 'cls'. Rules within 'cls', if any, are not freed; this is the
686
* caller's responsibility. */
357
* caller's responsibility.
358
* May only be called after all the readers have been terminated. */
688
classifier_destroy(struct classifier *cls_)
360
classifier_destroy(struct classifier *cls)
691
struct cls_classifier *cls = cls_->cls;
692
struct cls_subtable *partition, *next_partition;
693
struct cls_subtable *subtable, *next_subtable;
363
struct cls_partition *partition;
364
struct cls_subtable *subtable;
696
fat_rwlock_destroy(&cls_->rwlock);
701
367
for (i = 0; i < cls->n_tries; i++) {
702
trie_destroy(cls->tries[i].root);
368
trie_destroy(&cls->tries[i].root);
705
HMAP_FOR_EACH_SAFE (subtable, next_subtable, hmap_node,
371
CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) {
707
372
destroy_subtable(cls, subtable);
709
hmap_destroy(&cls->subtables);
374
cmap_destroy(&cls->subtables_map);
711
HMAP_FOR_EACH_SAFE (partition, next_partition, hmap_node,
713
hmap_remove(&cls->partitions, &partition->hmap_node);
376
CMAP_FOR_EACH (partition, cmap_node, &cls->partitions) {
377
ovsrcu_postpone(free, partition);
716
hmap_destroy(&cls->partitions);
379
cmap_destroy(&cls->partitions);
718
cls_subtable_cache_destroy(&cls->subtables_priority);
381
pvector_destroy(&cls->subtables);
723
/* We use uint64_t as a set for the fields below. */
724
BUILD_ASSERT_DECL(MFF_N_IDS <= 64);
726
385
/* Set the fields for which prefix lookup should be performed. */
728
classifier_set_prefix_fields(struct classifier *cls_,
387
classifier_set_prefix_fields(struct classifier *cls,
729
388
const enum mf_field_id *trie_fields,
730
389
unsigned int n_fields)
732
struct cls_classifier *cls = cls_->cls;
391
const struct mf_field * new_fields[CLS_MAX_TRIES];
392
struct mf_bitmap fields = MF_BITMAP_INITIALIZER;
394
bool changed = false;
736
for (i = 0, trie = 0; i < n_fields && trie < CLS_MAX_TRIES; i++) {
396
for (i = 0; i < n_fields && n_tries < CLS_MAX_TRIES; i++) {
737
397
const struct mf_field *field = mf_from_id(trie_fields[i]);
738
398
if (field->flow_be32ofs < 0 || field->n_bits % 32) {
739
399
/* Incompatible field. This is the only place where we
746
if (fields & (UINT64_C(1) << trie_fields[i])) {
406
if (bitmap_is_set(fields.bm, trie_fields[i])) {
747
407
/* Duplicate field, there is no need to build more than
748
408
* one index for any one field. */
751
fields |= UINT64_C(1) << trie_fields[i];
753
if (trie >= cls->n_tries || field != cls->tries[trie].field) {
754
trie_init(cls, trie, field);
759
/* Destroy the rest. */
760
for (i = trie; i < cls->n_tries; i++) {
761
trie_init(cls, i, NULL);
411
bitmap_set1(fields.bm, trie_fields[i]);
413
new_fields[n_tries] = NULL;
414
if (n_tries >= cls->n_tries || field != cls->tries[n_tries].field) {
415
new_fields[n_tries] = field;
421
if (changed || n_tries < cls->n_tries) {
422
struct cls_subtable *subtable;
424
/* Trie configuration needs to change. Disable trie lookups
425
* for the tries that are changing and wait all the current readers
426
* with the old configuration to be done. */
428
CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) {
429
for (i = 0; i < cls->n_tries; i++) {
430
if ((i < n_tries && new_fields[i]) || i >= n_tries) {
431
if (subtable->trie_plen[i]) {
432
subtable->trie_plen[i] = 0;
438
/* Synchronize if any readers were using tries. The readers may
439
* temporarily function without the trie lookup based optimizations. */
441
/* ovsrcu_synchronize() functions as a memory barrier, so it does
442
* not matter that subtable->trie_plen is not atomic. */
443
ovsrcu_synchronize();
446
/* Now set up the tries. */
447
for (i = 0; i < n_tries; i++) {
449
trie_init(cls, i, new_fields[i]);
452
/* Destroy the rest, if any. */
453
for (; i < cls->n_tries; i++) {
454
trie_init(cls, i, NULL);
457
cls->n_tries = n_tries;
461
return false; /* No change. */
767
trie_init(struct cls_classifier *cls, int trie_idx,
768
const struct mf_field *field)
465
trie_init(struct classifier *cls, int trie_idx, const struct mf_field *field)
770
467
struct cls_trie *trie = &cls->tries[trie_idx];
771
468
struct cls_subtable *subtable;
772
struct cls_subtable_entry *iter;
774
470
if (trie_idx < cls->n_tries) {
775
trie_destroy(trie->root);
471
trie_destroy(&trie->root);
473
ovsrcu_set_hidden(&trie->root, NULL);
778
475
trie->field = field;
780
/* Add existing rules to the trie. */
781
CLS_SUBTABLE_CACHE_FOR_EACH (subtable, iter, &cls->subtables_priority) {
477
/* Add existing rules to the new trie. */
478
CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) {
782
479
unsigned int plen;
784
481
plen = field ? minimask_get_prefix_len(&subtable->mask, field) : 0;
785
/* Initialize subtable's prefix length on this field. */
483
struct cls_match *head;
485
CMAP_FOR_EACH (head, cmap_node, &subtable->rules) {
486
trie_insert(trie, head->cls_rule, plen);
489
/* Initialize subtable's prefix length on this field. This will
490
* allow readers to use the trie. */
491
atomic_thread_fence(memory_order_release);
786
492
subtable->trie_plen[trie_idx] = plen;
789
struct cls_match *head;
791
HMAP_FOR_EACH (head, hmap_node, &subtable->rules) {
792
struct cls_match *match;
794
FOR_EACH_RULE_IN_LIST (match, head) {
795
trie_insert(trie, match->cls_rule, plen);
802
/* Returns true if 'cls' contains no classification rules, false otherwise. */
496
/* Returns true if 'cls' contains no classification rules, false otherwise.
497
* Checking the cmap requires no locking. */
804
499
classifier_is_empty(const struct classifier *cls)
806
return cls->cls->n_rules == 0;
501
return cmap_is_empty(&cls->subtables_map);
809
504
/* Returns the number of rules in 'cls'. */
811
506
classifier_count(const struct classifier *cls)
813
return cls->cls->n_rules;
508
/* n_rules is an int, so in the presence of concurrent writers this will
509
* return either the old or a new value. */
817
hash_metadata(ovs_be64 metadata_)
514
hash_metadata(ovs_be64 metadata)
819
uint64_t metadata = (OVS_FORCE uint64_t) metadata_;
820
return hash_uint64(metadata);
516
return hash_uint64((OVS_FORCE uint64_t) metadata);
823
519
static struct cls_partition *
824
find_partition(const struct cls_classifier *cls, ovs_be64 metadata,
520
find_partition(const struct classifier *cls, ovs_be64 metadata, uint32_t hash)
827
522
struct cls_partition *partition;
829
HMAP_FOR_EACH_IN_BUCKET (partition, hmap_node, hash, &cls->partitions) {
524
CMAP_FOR_EACH_WITH_HASH (partition, cmap_node, hash, &cls->partitions) {
830
525
if (partition->metadata == metadata) {
831
526
return partition;
859
554
& MINIFLOW_GET_BE32(&match->mask.masks, tp_src);
558
subtable_replace_head_rule(struct classifier *cls OVS_UNUSED,
559
struct cls_subtable *subtable,
560
struct cls_match *head, struct cls_match *new,
561
uint32_t hash, uint32_t ihash[CLS_MAX_INDICES])
563
/* Rule's data is already in the tries. */
565
new->partition = head->partition; /* Steal partition, if any. */
566
head->partition = NULL;
568
for (int i = 0; i < subtable->n_indices; i++) {
569
cmap_replace(&subtable->indices[i], &head->index_nodes[i],
570
&new->index_nodes[i], ihash[i]);
572
cmap_replace(&subtable->rules, &head->cmap_node, &new->cmap_node, hash);
862
575
/* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller
863
576
* must not modify or free it.
865
578
* If 'cls' already contains an identical rule (including wildcards, values of
866
579
* fixed fields, and priority), replaces the old rule by 'rule' and returns the
867
580
* rule that was replaced. The caller takes ownership of the returned rule and
868
* is thus responsible for destroying it with cls_rule_destroy(), freeing the
869
* memory block in which it resides, etc., as necessary.
581
* is thus responsible for destroying it with cls_rule_destroy(), after RCU
582
* grace period has passed (see ovsrcu_postpone()).
871
584
* Returns NULL if 'cls' does not contain a rule with an identical key, after
872
585
* inserting the new rule. In this case, no rules are displaced by the new
873
586
* rule, even rules that cannot have any effect because the new rule matches a
874
* superset of their flows and has higher priority. */
876
classifier_replace(struct classifier *cls_, struct cls_rule *rule)
587
* superset of their flows and has higher priority.
589
const struct cls_rule *
590
classifier_replace(struct classifier *cls, const struct cls_rule *rule,
591
const struct cls_conjunction *conjs, size_t n_conjs)
878
struct cls_classifier *cls = cls_->cls;
879
struct cls_match *old_rule;
593
struct cls_match *new;
880
594
struct cls_subtable *subtable;
595
uint32_t ihash[CLS_MAX_INDICES];
596
uint8_t prev_be64ofs = 0;
597
struct cls_match *head;
603
/* 'new' is initially invisible to lookups. */
604
new = cls_match_alloc(rule, conjs, n_conjs);
606
CONST_CAST(struct cls_rule *, rule)->cls_match = new;
882
608
subtable = find_subtable(cls, &rule->match.mask);
884
610
subtable = insert_subtable(cls, &rule->match.mask);
887
old_rule = insert_rule(cls, subtable, rule);
891
rule->cls_match->partition = NULL;
892
if (minimask_get_metadata_mask(&rule->match.mask) == OVS_BE64_MAX) {
893
ovs_be64 metadata = miniflow_get_metadata(&rule->match.flow);
894
rule->cls_match->partition = create_partition(cls, subtable,
613
/* Compute hashes in segments. */
615
for (i = 0; i < subtable->n_indices; i++) {
616
ihash[i] = minimatch_hash_range(&rule->match, prev_be64ofs,
617
subtable->index_ofs[i], &basis);
618
prev_be64ofs = subtable->index_ofs[i];
620
hash = minimatch_hash_range(&rule->match, prev_be64ofs, FLOW_U64S, &basis);
622
head = find_equal(subtable, &rule->match.flow, hash);
624
/* Add rule to tries.
626
* Concurrent readers might miss seeing the rule until this update,
627
* which might require being fixed up by revalidation later. */
901
628
for (i = 0; i < cls->n_tries; i++) {
902
629
if (subtable->trie_plen[i]) {
903
630
trie_insert(&cls->tries[i], rule, subtable->trie_plen[i]);
634
/* Add rule to ports trie. */
908
635
if (subtable->ports_mask_len) {
909
636
/* We mask the value to be inserted to always have the wildcarded
910
637
* bits in known (zero) state, so we can include them in comparison
916
643
subtable->ports_mask_len);
921
struct cls_rule *old_cls_rule = old_rule->cls_rule;
923
rule->cls_match->partition = old_rule->partition;
924
old_cls_rule->cls_match = NULL;
646
/* Add rule to partitions.
648
* Concurrent readers might miss seeing the rule until this update,
649
* which might require being fixed up by revalidation later. */
650
new->partition = NULL;
651
if (minimask_get_metadata_mask(&rule->match.mask) == OVS_BE64_MAX) {
652
ovs_be64 metadata = miniflow_get_metadata(&rule->match.flow);
654
new->partition = create_partition(cls, subtable, metadata);
657
/* Add new node to segment indices.
659
* Readers may find the rule in the indices before the rule is visible
660
* in the subtables 'rules' map. This may result in us losing the
661
* opportunity to quit lookups earlier, resulting in sub-optimal
662
* wildcarding. This will be fixed later by revalidation (always
663
* scheduled after flow table changes). */
664
for (i = 0; i < subtable->n_indices; i++) {
665
cmap_insert(&subtable->indices[i], &new->index_nodes[i], ihash[i]);
667
n_rules = cmap_insert(&subtable->rules, &new->cmap_node, hash);
668
} else { /* Equal rules exist in the classifier already. */
669
struct cls_match *prev, *iter;
671
/* Scan the list for the insertion point that will keep the list in
672
* order of decreasing priority. Insert after rules marked invisible
673
* in any version of the same priority. */
674
FOR_EACH_RULE_IN_LIST_PROTECTED (iter, prev, head) {
675
if (rule->priority > iter->priority
676
|| (rule->priority == iter->priority
677
&& !cls_match_is_eventually_invisible(iter))) {
682
/* Replace 'iter' with 'new' or insert 'new' between 'prev' and
685
struct cls_rule *old;
687
if (rule->priority == iter->priority) {
688
cls_match_replace(prev, iter, new);
689
old = CONST_CAST(struct cls_rule *, iter->cls_rule);
691
cls_match_insert(prev, iter, new);
695
/* Replace the existing head in data structures, if rule is the new
698
subtable_replace_head_rule(cls, subtable, head, new, hash,
703
struct cls_conjunction_set *conj_set;
705
conj_set = ovsrcu_get_protected(struct cls_conjunction_set *,
708
ovsrcu_postpone(free, conj_set);
711
ovsrcu_postpone(cls_match_free_cb, iter);
712
old->cls_match = NULL;
714
/* No change in subtable's max priority or max count. */
716
/* Make 'new' visible to lookups in the appropriate version. */
717
cls_match_set_remove_version(new, CLS_NOT_REMOVED_VERSION);
719
/* Make rule visible to iterators (immediately). */
720
rculist_replace(CONST_CAST(struct rculist *, &rule->node),
723
/* Return displaced rule. Caller is responsible for keeping it
724
* around until all threads quiesce. */
728
/* 'new' is new node after 'prev' */
729
cls_match_insert(prev, iter, new);
733
/* Make 'new' visible to lookups in the appropriate version. */
734
cls_match_set_remove_version(new, CLS_NOT_REMOVED_VERSION);
736
/* Make rule visible to iterators (immediately). */
737
rculist_push_back(&subtable->rules_list,
738
CONST_CAST(struct rculist *, &rule->node));
740
/* Rule was added, not replaced. Update 'subtable's 'max_priority' and
741
* 'max_count', if necessary.
743
* The rule was already inserted, but concurrent readers may not see the
744
* rule yet as the subtables vector is not updated yet. This will have to
745
* be fixed by revalidation later. */
747
subtable->max_priority = rule->priority;
748
subtable->max_count = 1;
749
pvector_insert(&cls->subtables, subtable, rule->priority);
750
} else if (rule->priority == subtable->max_priority) {
751
++subtable->max_count;
752
} else if (rule->priority > subtable->max_priority) {
753
subtable->max_priority = rule->priority;
754
subtable->max_count = 1;
755
pvector_change_priority(&cls->subtables, subtable, rule->priority);
758
/* Nothing was replaced. */
762
pvector_publish(&cls->subtables);
930
768
/* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller
934
772
* fixed fields, and priority). Use classifier_find_rule_exactly() to find
935
773
* such a rule. */
937
classifier_insert(struct classifier *cls, struct cls_rule *rule)
775
classifier_insert(struct classifier *cls, const struct cls_rule *rule,
776
const struct cls_conjunction conj[], size_t n_conj)
939
struct cls_rule *displaced_rule = classifier_replace(cls, rule);
778
const struct cls_rule *displaced_rule
779
= classifier_replace(cls, rule, conj, n_conj);
940
780
ovs_assert(!displaced_rule);
943
783
/* Removes 'rule' from 'cls'. It is the caller's responsibility to destroy
944
784
* 'rule' with cls_rule_destroy(), freeing the memory block in which 'rule'
945
* resides, etc., as necessary. */
947
classifier_remove(struct classifier *cls_, struct cls_rule *rule)
785
* resides, etc., as necessary.
787
* Does nothing if 'rule' has been already removed, or was never inserted.
789
* Returns the removed rule, or NULL, if it was already removed.
791
const struct cls_rule *
792
classifier_remove(struct classifier *cls, const struct cls_rule *cls_rule)
949
struct cls_classifier *cls = cls_->cls;
794
struct cls_match *rule, *prev, *next, *head;
950
795
struct cls_partition *partition;
951
struct cls_match *cls_match = rule->cls_match;
952
struct cls_match *head;
796
struct cls_conjunction_set *conj_set;
953
797
struct cls_subtable *subtable;
956
ovs_assert(cls_match);
958
subtable = find_subtable(cls, &rule->match.mask);
799
uint32_t basis = 0, hash, ihash[CLS_MAX_INDICES];
800
uint8_t prev_be64ofs = 0;
803
rule = cls_rule->cls_match;
807
/* Mark as removed. */
808
CONST_CAST(struct cls_rule *, cls_rule)->cls_match = NULL;
810
/* Remove 'cls_rule' from the subtable's rules list. */
811
rculist_remove(CONST_CAST(struct rculist *, &cls_rule->node));
813
subtable = find_subtable(cls, &cls_rule->match.mask);
959
814
ovs_assert(subtable);
816
for (i = 0; i < subtable->n_indices; i++) {
817
ihash[i] = minimatch_hash_range(&cls_rule->match, prev_be64ofs,
818
subtable->index_ofs[i], &basis);
819
prev_be64ofs = subtable->index_ofs[i];
821
hash = minimatch_hash_range(&cls_rule->match, prev_be64ofs, FLOW_U64S,
824
head = find_equal(subtable, &cls_rule->match.flow, hash);
826
/* Check if the rule is not the head rule. */
828
struct cls_match *iter;
830
/* Not the head rule, but potentially one with the same priority. */
831
/* Remove from the list of equal rules. */
832
FOR_EACH_RULE_IN_LIST_PROTECTED (iter, prev, head) {
837
ovs_assert(iter == rule);
839
cls_match_remove(prev, rule);
844
/* 'rule' is the head rule. Check if there is another rule to
845
* replace 'rule' in the data structures. */
846
next = cls_match_next_protected(rule);
848
subtable_replace_head_rule(cls, subtable, rule, next, hash, ihash);
852
/* 'rule' is last of the kind in the classifier, must remove from all the
853
* data structures. */
961
855
if (subtable->ports_mask_len) {
962
ovs_be32 masked_ports = minimatch_get_ports(&rule->match);
856
ovs_be32 masked_ports = minimatch_get_ports(&cls_rule->match);
964
858
trie_remove_prefix(&subtable->ports_trie,
965
859
&masked_ports, subtable->ports_mask_len);
967
861
for (i = 0; i < cls->n_tries; i++) {
968
862
if (subtable->trie_plen[i]) {
969
trie_remove(&cls->tries[i], rule, subtable->trie_plen[i]);
863
trie_remove(&cls->tries[i], cls_rule, subtable->trie_plen[i]);
973
867
/* Remove rule node from indices. */
974
868
for (i = 0; i < subtable->n_indices; i++) {
975
hindex_remove(&subtable->indices[i], &cls_match->index_nodes[i]);
978
head = find_equal(subtable, &rule->match.flow, cls_match->hmap_node.hash);
979
if (head != cls_match) {
980
list_remove(&cls_match->list);
981
} else if (list_is_empty(&cls_match->list)) {
982
hmap_remove(&subtable->rules, &cls_match->hmap_node);
984
struct cls_match *next = CONTAINER_OF(cls_match->list.next,
985
struct cls_match, list);
987
list_remove(&cls_match->list);
988
hmap_replace(&subtable->rules, &cls_match->hmap_node,
992
partition = cls_match->partition;
869
cmap_remove(&subtable->indices[i], &rule->index_nodes[i], ihash[i]);
871
n_rules = cmap_remove(&subtable->rules, &rule->cmap_node, hash);
873
partition = rule->partition;
994
875
tag_tracker_subtract(&partition->tracker, &partition->tags,
996
877
if (!partition->tags) {
997
hmap_remove(&cls->partitions, &partition->hmap_node);
878
cmap_remove(&cls->partitions, &partition->cmap_node,
879
hash_metadata(partition->metadata));
880
ovsrcu_postpone(free, partition);
1002
if (--subtable->n_rules == 0) {
1003
885
destroy_subtable(cls, subtable);
1005
update_subtables_after_removal(cls, subtable, cls_match->priority);
888
if (subtable->max_priority == rule->priority
889
&& --subtable->max_count == 0) {
890
/* Find the new 'max_priority' and 'max_count'. */
891
int max_priority = INT_MIN;
892
struct cls_match *head;
894
CMAP_FOR_EACH (head, cmap_node, &subtable->rules) {
895
if (head->priority > max_priority) {
896
max_priority = head->priority;
897
subtable->max_count = 1;
898
} else if (head->priority == max_priority) {
899
++subtable->max_count;
902
subtable->max_priority = max_priority;
903
pvector_change_priority(&cls->subtables, subtable, max_priority);
908
pvector_publish(&cls->subtables);
912
conj_set = ovsrcu_get_protected(struct cls_conjunction_set *,
915
ovsrcu_postpone(free, conj_set);
917
ovsrcu_postpone(cls_match_free_cb, rule);
1010
rule->cls_match = NULL;
1014
923
/* Prefix tree context. Valid when 'lookup_done' is true. Can skip all
1015
* subtables which have more than 'match_plen' bits in their corresponding
1016
* field at offset 'be32ofs'. If skipped, 'maskbits' prefix bits should be
1017
* unwildcarded to quarantee datapath flow matches only packets it should. */
924
* subtables which have a prefix match on the trie field, but whose prefix
925
* length is not indicated in 'match_plens'. For example, a subtable that
926
* has a 8-bit trie field prefix match can be skipped if
927
* !be_get_bit_at(&match_plens, 8 - 1). If skipped, 'maskbits' prefix bits
928
* must be unwildcarded to make datapath flow only match packets it should. */
1018
929
struct trie_ctx {
1019
930
const struct cls_trie *trie;
1020
931
bool lookup_done; /* Status of the lookup. */
1021
932
uint8_t be32ofs; /* U32 offset of the field in question. */
1022
unsigned int match_plen; /* Longest prefix than could possibly match. */
1023
933
unsigned int maskbits; /* Prefix length needed to avoid false matches. */
934
union mf_value match_plens; /* Bitmask of prefix lengths with possible
1031
943
ctx->lookup_done = false;
1035
lookahead_subtable(const struct cls_subtable_entry *subtables)
1037
ovs_prefetch_range(subtables->subtable, sizeof *subtables->subtable);
1040
/* Finds and returns the highest-priority rule in 'cls' that matches 'flow'.
1041
* Returns a null pointer if no rules in 'cls' match 'flow'. If multiple rules
1042
* of equal priority match 'flow', returns one arbitrarily.
946
struct conjunctive_match {
947
struct hmap_node hmap_node;
952
static struct conjunctive_match *
953
find_conjunctive_match__(struct hmap *matches, uint64_t id, uint32_t hash)
955
struct conjunctive_match *m;
957
HMAP_FOR_EACH_IN_BUCKET (m, hmap_node, hash, matches) {
966
find_conjunctive_match(const struct cls_conjunction_set *set,
967
unsigned int max_n_clauses, struct hmap *matches,
968
struct conjunctive_match *cm_stubs, size_t n_cm_stubs,
971
const struct cls_conjunction *c;
973
if (max_n_clauses < set->min_n_clauses) {
977
for (c = set->conj; c < &set->conj[set->n]; c++) {
978
struct conjunctive_match *cm;
981
if (c->n_clauses > max_n_clauses) {
985
hash = hash_int(c->id, 0);
986
cm = find_conjunctive_match__(matches, c->id, hash);
988
size_t n = hmap_count(matches);
990
cm = n < n_cm_stubs ? &cm_stubs[n] : xmalloc(sizeof *cm);
991
hmap_insert(matches, &cm->hmap_node, hash);
993
cm->clauses = UINT64_MAX << (c->n_clauses & 63);
995
cm->clauses |= UINT64_C(1) << c->clause;
996
if (cm->clauses == UINT64_MAX) {
1005
free_conjunctive_matches(struct hmap *matches,
1006
struct conjunctive_match *cm_stubs, size_t n_cm_stubs)
1008
if (hmap_count(matches) > n_cm_stubs) {
1009
struct conjunctive_match *cm, *next;
1011
HMAP_FOR_EACH_SAFE (cm, next, hmap_node, matches) {
1012
if (!(cm >= cm_stubs && cm < &cm_stubs[n_cm_stubs])) {
1017
hmap_destroy(matches);
1020
/* Like classifier_lookup(), except that support for conjunctive matches can be
1021
* configured with 'allow_conjunctive_matches'. That feature is not exposed
1022
* externally because turning off conjunctive matches is only useful to avoid
1023
* recursion within this function itself.
1044
* If a rule is found and 'wc' is non-null, bitwise-OR's 'wc' with the
1045
* set of bits that were significant in the lookup. At some point
1046
* earlier, 'wc' should have been initialized (e.g., by
1047
* flow_wildcards_init_catchall()). */
1049
classifier_lookup(const struct classifier *cls_, const struct flow *flow,
1050
struct flow_wildcards *wc)
1025
* 'flow' is non-const to allow for temporary modifications during the lookup.
1026
* Any changes are restored before returning. */
1027
static const struct cls_rule *
1028
classifier_lookup__(const struct classifier *cls, cls_version_t version,
1029
struct flow *flow, struct flow_wildcards *wc,
1030
bool allow_conjunctive_matches)
1052
struct cls_classifier *cls = cls_->cls;
1053
1032
const struct cls_partition *partition;
1033
struct trie_ctx trie_ctx[CLS_MAX_TRIES];
1034
const struct cls_match *match;
1055
struct cls_match *best;
1056
struct trie_ctx trie_ctx[CLS_MAX_TRIES];
1058
struct cls_subtable_entry *subtables = cls->subtables_priority.subtables;
1059
int n_subtables = cls->subtables_priority.size;
1060
int64_t best_priority = -1;
1062
/* Prefetch the subtables array. */
1063
ovs_prefetch_range(subtables, n_subtables * sizeof *subtables);
1037
/* Highest-priority flow in 'cls' that certainly matches 'flow'. */
1038
const struct cls_match *hard = NULL;
1039
int hard_pri = INT_MIN; /* hard ? hard->priority : INT_MIN. */
1041
/* Highest-priority conjunctive flows in 'cls' matching 'flow'. Since
1042
* these are (components of) conjunctive flows, we can only know whether
1043
* the full conjunctive flow matches after seeing multiple of them. Thus,
1044
* we refer to these as "soft matches". */
1045
struct cls_conjunction_set *soft_stub[64];
1046
struct cls_conjunction_set **soft = soft_stub;
1047
size_t n_soft = 0, allocated_soft = ARRAY_SIZE(soft_stub);
1048
int soft_pri = INT_MIN; /* n_soft ? MAX(soft[*]->priority) : INT_MIN. */
1050
/* Synchronize for cls->n_tries and subtable->trie_plen. They can change
1051
* when table configuration changes, which happens typically only on
1053
atomic_thread_fence(memory_order_acquire);
1065
1055
/* Determine 'tags' such that, if 'subtable->tag' doesn't intersect them,
1066
1056
* then 'flow' cannot possibly match in 'subtable':
1080
1070
* that 'tags' always intersects such a cls_subtable's 'tags', so we don't
1081
1071
* need a special case.
1083
partition = (hmap_is_empty(&cls->partitions)
1073
partition = (cmap_is_empty(&cls->partitions)
1085
1075
: find_partition(cls, flow->metadata,
1086
1076
hash_metadata(flow->metadata)));
1087
1077
tags = partition ? partition->tags : TAG_ARBITRARY;
1089
/* Initialize trie contexts for match_find_wc(). */
1090
for (i = 0; i < cls->n_tries; i++) {
1079
/* Initialize trie contexts for find_match_wc(). */
1080
for (int i = 0; i < cls->n_tries; i++) {
1091
1081
trie_ctx_init(&trie_ctx[i], &cls->tries[i]);
1094
/* Prefetch the first subtables. */
1095
if (n_subtables > 1) {
1096
lookahead_subtable(subtables);
1097
lookahead_subtable(subtables + 1);
1101
for (i = 0; OVS_LIKELY(i < n_subtables); i++) {
1102
struct cls_match *rule;
1104
if ((int64_t)subtables[i].max_priority <= best_priority) {
1105
/* Subtables are in descending priority order,
1106
* can not find anything better. */
1085
struct cls_subtable *subtable;
1086
PVECTOR_FOR_EACH_PRIORITY (subtable, hard_pri, 2, sizeof *subtable,
1088
struct cls_conjunction_set *conj_set;
1090
/* Skip subtables not in our partition. */
1091
if (!tag_intersects(tags, subtable->tag)) {
1095
/* Skip subtables with no match, or where the match is lower-priority
1096
* than some certain match we've already found. */
1097
match = find_match_wc(subtable, version, flow, trie_ctx, cls->n_tries,
1099
if (!match || match->priority <= hard_pri) {
1103
conj_set = ovsrcu_get(struct cls_conjunction_set *, &match->conj_set);
1105
/* 'match' isn't part of a conjunctive match. It's the best
1106
* certain match we've got so far, since we know that it's
1107
* higher-priority than hard_pri.
1109
* (There might be a higher-priority conjunctive match. We can't
1112
hard_pri = hard->priority;
1113
} else if (allow_conjunctive_matches) {
1114
/* 'match' is part of a conjunctive match. Add it to the list. */
1115
if (OVS_UNLIKELY(n_soft >= allocated_soft)) {
1116
struct cls_conjunction_set **old_soft = soft;
1118
allocated_soft *= 2;
1119
soft = xmalloc(allocated_soft * sizeof *soft);
1120
memcpy(soft, old_soft, n_soft * sizeof *soft);
1121
if (old_soft != soft_stub) {
1125
soft[n_soft++] = conj_set;
1127
/* Keep track of the highest-priority soft match. */
1128
if (soft_pri < match->priority) {
1129
soft_pri = match->priority;
1134
/* In the common case, at this point we have no soft matches and we can
1135
* return immediately. (We do the same thing if we have potential soft
1136
* matches but none of them are higher-priority than our hard match.) */
1137
if (hard_pri >= soft_pri) {
1138
if (soft != soft_stub) {
1141
return hard ? hard->cls_rule : NULL;
1144
/* At this point, we have some soft matches. We might also have a hard
1145
* match; if so, its priority is lower than the highest-priority soft
1150
* Check whether soft matches are real matches. */
1152
/* Delete soft matches that are null. This only happens in second and
1153
* subsequent iterations of the soft match loop, when we drop back from
1154
* a high-priority soft match to a lower-priority one.
1156
* Also, delete soft matches whose priority is less than or equal to
1157
* the hard match's priority. In the first iteration of the soft
1158
* match, these can be in 'soft' because the earlier main loop found
1159
* the soft match before the hard match. In second and later iteration
1160
* of the soft match loop, these can be in 'soft' because we dropped
1161
* back from a high-priority soft match to a lower-priority soft match.
1163
* It is tempting to delete soft matches that cannot be satisfied
1164
* because there are fewer soft matches than required to satisfy any of
1165
* their conjunctions, but we cannot do that because there might be
1166
* lower priority soft or hard matches with otherwise identical
1167
* matches. (We could special case those here, but there's no
1168
* need--we'll do so at the bottom of the soft match loop anyway and
1169
* this duplicates less code.)
1171
* It's also tempting to break out of the soft match loop if 'n_soft ==
1172
* 1' but that would also miss lower-priority hard matches. We could
1173
* special case that also but again there's no need. */
1174
for (int i = 0; i < n_soft; ) {
1175
if (!soft[i] || soft[i]->priority <= hard_pri) {
1176
soft[i] = soft[--n_soft];
1110
/* Prefetch a forthcoming subtable. */
1111
if (i + 2 < n_subtables) {
1112
lookahead_subtable(&subtables[i + 2]);
1115
if (!tag_intersects(tags, subtables[i].tag)) {
1119
rule = find_match_wc(subtables[i].subtable, flow, trie_ctx,
1121
if (rule && (int64_t)rule->priority > best_priority) {
1122
best_priority = (int64_t)rule->priority;
1127
return best ? best->cls_rule : NULL;
1130
/* Returns true if 'target' satisifies 'match', that is, if each bit for which
1131
* 'match' specifies a particular value has the correct value in 'target'.
1133
* 'flow' and 'mask' have the same mask! */
1135
miniflow_and_mask_matches_miniflow(const struct miniflow *flow,
1136
const struct minimask *mask,
1137
const struct miniflow *target)
1139
const uint32_t *flowp = miniflow_get_u32_values(flow);
1140
const uint32_t *maskp = miniflow_get_u32_values(&mask->masks);
1141
uint32_t target_u32;
1143
MINIFLOW_FOR_EACH_IN_MAP(target_u32, target, mask->masks.map) {
1144
if ((*flowp++ ^ target_u32) & *maskp++) {
1152
static inline struct cls_match *
1153
find_match_miniflow(const struct cls_subtable *subtable,
1154
const struct miniflow *flow,
1157
struct cls_match *rule;
1159
HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &subtable->rules) {
1160
if (miniflow_and_mask_matches_miniflow(&rule->flow, &subtable->mask,
1169
/* Finds and returns the highest-priority rule in 'cls' that matches
1170
* 'miniflow'. Returns a null pointer if no rules in 'cls' match 'flow'.
1171
* If multiple rules of equal priority match 'flow', returns one arbitrarily.
1173
* This function is optimized for the userspace datapath, which only ever has
1174
* one priority value for it's flows!
1176
struct cls_rule *classifier_lookup_miniflow_first(const struct classifier *cls_,
1177
const struct miniflow *flow)
1179
struct cls_classifier *cls = cls_->cls;
1180
struct cls_subtable *subtable;
1181
struct cls_subtable_entry *iter;
1183
CLS_SUBTABLE_CACHE_FOR_EACH (subtable, iter, &cls->subtables_priority) {
1184
struct cls_match *rule;
1186
rule = find_match_miniflow(subtable, flow,
1187
miniflow_hash_in_minimask(flow,
1191
return rule->cls_rule;
1185
/* Find the highest priority among the soft matches. (We know this
1186
* must be higher than the hard match's priority; otherwise we would
1187
* have deleted all of the soft matches in the previous loop.) Count
1188
* the number of soft matches that have that priority. */
1191
for (int i = 0; i < n_soft; i++) {
1192
if (soft[i]->priority > soft_pri) {
1193
soft_pri = soft[i]->priority;
1195
} else if (soft[i]->priority == soft_pri) {
1199
ovs_assert(soft_pri > hard_pri);
1201
/* Look for a real match among the highest-priority soft matches.
1203
* It's unusual to have many conjunctive matches, so we use stubs to
1204
* avoid calling malloc() in the common case. An hmap has a built-in
1205
* stub for up to 2 hmap_nodes; possibly, we would benefit a variant
1206
* with a bigger stub. */
1207
struct conjunctive_match cm_stubs[16];
1208
struct hmap matches;
1210
hmap_init(&matches);
1211
for (int i = 0; i < n_soft; i++) {
1214
if (soft[i]->priority == soft_pri
1215
&& find_conjunctive_match(soft[i], n_soft_pri, &matches,
1216
cm_stubs, ARRAY_SIZE(cm_stubs),
1218
uint32_t saved_conj_id = flow->conj_id;
1219
const struct cls_rule *rule;
1222
rule = classifier_lookup__(cls, version, flow, wc, false);
1223
flow->conj_id = saved_conj_id;
1226
free_conjunctive_matches(&matches,
1227
cm_stubs, ARRAY_SIZE(cm_stubs));
1228
if (soft != soft_stub) {
1235
free_conjunctive_matches(&matches, cm_stubs, ARRAY_SIZE(cm_stubs));
1237
/* There's no real match among the highest-priority soft matches.
1238
* However, if any of those soft matches has a lower-priority but
1239
* otherwise identical flow match, then we need to consider those for
1240
* soft or hard matches.
1242
* The next iteration of the soft match loop will delete any null
1243
* pointers we put into 'soft' (and some others too). */
1244
for (int i = 0; i < n_soft; i++) {
1245
if (soft[i]->priority != soft_pri) {
1249
/* Find next-lower-priority flow with identical flow match. */
1250
match = next_visible_rule_in_list(soft[i]->match, version);
1252
soft[i] = ovsrcu_get(struct cls_conjunction_set *,
1255
/* The flow is a hard match; don't treat as a soft
1257
if (match->priority > hard_pri) {
1259
hard_pri = hard->priority;
1263
/* No such lower-priority flow (probably the common case). */
1269
if (soft != soft_stub) {
1272
return hard ? hard->cls_rule : NULL;
1275
/* Finds and returns the highest-priority rule in 'cls' that matches 'flow' and
1276
* that is visible in 'version'. Returns a null pointer if no rules in 'cls'
1277
* match 'flow'. If multiple rules of equal priority match 'flow', returns one
1280
* If a rule is found and 'wc' is non-null, bitwise-OR's 'wc' with the
1281
* set of bits that were significant in the lookup. At some point
1282
* earlier, 'wc' should have been initialized (e.g., by
1283
* flow_wildcards_init_catchall()).
1285
* 'flow' is non-const to allow for temporary modifications during the lookup.
1286
* Any changes are restored before returning. */
1287
const struct cls_rule *
1288
classifier_lookup(const struct classifier *cls, cls_version_t version,
1289
struct flow *flow, struct flow_wildcards *wc)
1291
return classifier_lookup__(cls, version, flow, wc, true);
1198
1294
/* Finds and returns a rule in 'cls' with exactly the same priority and
1199
* matching criteria as 'target'. Returns a null pointer if 'cls' doesn't
1295
* matching criteria as 'target', and that is visible in 'target->version.
1296
* Only one such rule may ever exist. Returns a null pointer if 'cls' doesn't
1200
1297
* contain an exact match. */
1202
classifier_find_rule_exactly(const struct classifier *cls_,
1298
const struct cls_rule *
1299
classifier_find_rule_exactly(const struct classifier *cls,
1203
1300
const struct cls_rule *target)
1205
struct cls_classifier *cls = cls_->cls;
1206
struct cls_match *head, *rule;
1207
struct cls_subtable *subtable;
1302
const struct cls_match *head, *rule;
1303
const struct cls_subtable *subtable;
1209
1305
subtable = find_subtable(cls, &target->match.mask);
1210
1306
if (!subtable) {
1214
/* Skip if there is no hope. */
1215
if (target->priority > subtable->max_priority) {
1219
1310
head = find_equal(subtable, &target->match.flow,
1220
1311
miniflow_hash_in_minimask(&target->match.flow,
1221
1312
&target->match.mask, 0));
1222
FOR_EACH_RULE_IN_LIST (rule, head) {
1223
if (target->priority >= rule->priority) {
1224
return target->priority == rule->priority ? rule->cls_rule : NULL;
1316
CLS_MATCH_FOR_EACH (rule, head) {
1317
if (rule->priority < target->priority) {
1318
break; /* Not found. */
1320
if (rule->priority == target->priority
1321
&& cls_match_visible_in_version(rule, target->version)) {
1322
return rule->cls_rule;
1230
1328
/* Finds and returns a rule in 'cls' with priority 'priority' and exactly the
1231
* same matching criteria as 'target'. Returns a null pointer if 'cls' doesn't
1232
* contain an exact match. */
1329
* same matching criteria as 'target', and that is visible in 'version'.
1330
* Returns a null pointer if 'cls' doesn't contain an exact match visible in
1332
const struct cls_rule *
1234
1333
classifier_find_match_exactly(const struct classifier *cls,
1235
const struct match *target,
1236
unsigned int priority)
1334
const struct match *target, int priority,
1335
cls_version_t version)
1238
struct cls_rule *retval;
1337
const struct cls_rule *retval;
1239
1338
struct cls_rule cr;
1241
cls_rule_init(&cr, target, priority);
1340
cls_rule_init(&cr, target, priority, version);
1242
1341
retval = classifier_find_rule_exactly(cls, &cr);
1243
1342
cls_rule_destroy(&cr);
1358
/* Initializes 'cursor' for iterating through rules in 'cls':
1457
/* Initializes 'cursor' for iterating through rules in 'cls', and returns the
1458
* first matching cls_rule via '*pnode', or NULL if there are no matches.
1360
* - If 'target' is null, the cursor will visit every rule in 'cls'.
1460
* - If 'target' is null, or if the 'target' is a catchall target and the
1461
* target's version is CLS_MAX_VERSION, the cursor will visit every rule
1462
* in 'cls' that is not invisible in any version.
1362
1464
* - If 'target' is nonnull, the cursor will visit each 'rule' in 'cls'
1363
* such that cls_rule_is_loose_match(rule, target) returns true.
1465
* such that cls_rule_is_loose_match(rule, target) returns true and that
1466
* the rule is visible in 'target->version'.
1365
1468
* Ignores target->priority. */
1367
cls_cursor_init(struct cls_cursor *cursor, const struct classifier *cls,
1368
const struct cls_rule *target)
1370
cursor->cls = cls->cls;
1371
cursor->target = target && !cls_rule_is_catchall(target) ? target : NULL;
1374
/* Returns the first matching cls_rule in 'cursor''s iteration, or a null
1375
* pointer if there are no matches. */
1377
cls_cursor_first(struct cls_cursor *cursor)
1470
cls_cursor_start(const struct classifier *cls, const struct cls_rule *target)
1472
struct cls_cursor cursor;
1379
1473
struct cls_subtable *subtable;
1381
HMAP_FOR_EACH (subtable, hmap_node, &cursor->cls->subtables) {
1382
struct cls_match *rule = search_subtable(subtable, cursor->target);
1476
cursor.target = target && (!cls_rule_is_catchall(target)
1477
|| target->version != CLS_MAX_VERSION)
1481
/* Find first rule. */
1482
PVECTOR_CURSOR_FOR_EACH (subtable, &cursor.subtables,
1483
&cursor.cls->subtables) {
1484
const struct cls_rule *rule = search_subtable(subtable, &cursor);
1384
cursor->subtable = subtable;
1385
return rule->cls_rule;
1487
cursor.subtable = subtable;
1392
/* Returns the next matching cls_rule in 'cursor''s iteration, or a null
1393
* pointer if there are no more matches. */
1395
cls_cursor_next(struct cls_cursor *cursor, const struct cls_rule *rule_)
1496
static const struct cls_rule *
1497
cls_cursor_next(struct cls_cursor *cursor)
1397
struct cls_match *rule = CONST_CAST(struct cls_match *, rule_->cls_match);
1499
const struct cls_rule *rule;
1398
1500
const struct cls_subtable *subtable;
1399
struct cls_match *next;
1401
next = next_rule_in_list__(rule);
1402
if (next->priority < rule->priority) {
1403
return next->cls_rule;
1406
/* 'next' is the head of the list, that is, the rule that is included in
1407
* the subtable's hmap. (This is important when the classifier contains
1408
* rules that differ only in priority.) */
1410
HMAP_FOR_EACH_CONTINUE (rule, hmap_node, &cursor->subtable->rules) {
1502
rule = cursor->rule;
1503
subtable = cursor->subtable;
1504
RCULIST_FOR_EACH_CONTINUE (rule, node, &subtable->rules_list) {
1411
1505
if (rule_matches(rule, cursor->target)) {
1412
return rule->cls_rule;
1416
subtable = cursor->subtable;
1417
HMAP_FOR_EACH_CONTINUE (subtable, hmap_node, &cursor->cls->subtables) {
1418
rule = search_subtable(subtable, cursor->target);
1510
PVECTOR_CURSOR_FOR_EACH_CONTINUE (subtable, &cursor->subtables) {
1511
rule = search_subtable(subtable, cursor);
1420
1513
cursor->subtable = subtable;
1421
return rule->cls_rule;
1521
/* Sets 'cursor->rule' to the next matching cls_rule in 'cursor''s iteration,
1522
* or to null if all matching rules have been visited. */
1524
cls_cursor_advance(struct cls_cursor *cursor)
1526
cursor->rule = cls_cursor_next(cursor);
1428
1529
static struct cls_subtable *
1429
find_subtable(const struct cls_classifier *cls, const struct minimask *mask)
1530
find_subtable(const struct classifier *cls, const struct minimask *mask)
1431
1532
struct cls_subtable *subtable;
1433
HMAP_FOR_EACH_IN_BUCKET (subtable, hmap_node, minimask_hash(mask, 0),
1534
CMAP_FOR_EACH_WITH_HASH (subtable, cmap_node, minimask_hash(mask, 0),
1535
&cls->subtables_map) {
1435
1536
if (minimask_equal(mask, &subtable->mask)) {
1436
1537
return subtable;
1495
1599
/* Ports trie. */
1496
subtable->ports_trie = NULL;
1497
subtable->ports_mask_len
1600
ovsrcu_set_hidden(&subtable->ports_trie, NULL);
1601
*CONST_CAST(int *, &subtable->ports_mask_len)
1498
1602
= 32 - ctz32(ntohl(MINIFLOW_GET_BE32(&mask->masks, tp_src)));
1500
hmap_insert(&cls->subtables, &subtable->hmap_node, hash);
1501
elem.subtable = subtable;
1502
elem.tag = subtable->tag;
1503
elem.max_priority = subtable->max_priority;
1504
cls_subtable_cache_push_back(&cls->subtables_priority, elem);
1604
/* List of rules. */
1605
rculist_init(&subtable->rules_list);
1607
cmap_insert(&cls->subtables_map, &subtable->cmap_node, hash);
1506
1609
return subtable;
1612
/* RCU readers may still access the subtable before it is actually freed. */
1510
destroy_subtable(struct cls_classifier *cls, struct cls_subtable *subtable)
1614
destroy_subtable(struct classifier *cls, struct cls_subtable *subtable)
1513
struct cls_subtable *table = NULL;
1514
struct cls_subtable_entry *iter;
1516
CLS_SUBTABLE_CACHE_FOR_EACH (table, iter, &cls->subtables_priority) {
1517
if (table == subtable) {
1518
cls_subtable_cache_remove(&cls->subtables_priority, iter);
1523
trie_destroy(subtable->ports_trie);
1618
pvector_remove(&cls->subtables, subtable);
1619
cmap_remove(&cls->subtables_map, &subtable->cmap_node,
1620
minimask_hash(&subtable->mask, 0));
1622
ovs_assert(ovsrcu_get_protected(struct trie_node *, &subtable->ports_trie)
1624
ovs_assert(cmap_is_empty(&subtable->rules));
1625
ovs_assert(rculist_is_empty(&subtable->rules_list));
1525
1627
for (i = 0; i < subtable->n_indices; i++) {
1526
hindex_destroy(&subtable->indices[i]);
1528
minimask_destroy(&subtable->mask);
1529
hmap_remove(&cls->subtables, &subtable->hmap_node);
1530
hmap_destroy(&subtable->rules);
1534
/* This function performs the following updates for 'subtable' in 'cls'
1535
* following the addition of a new rule with priority 'new_priority' to
1538
* - Update 'subtable->max_priority' and 'subtable->max_count' if necessary.
1540
* - Update 'subtable''s position in 'cls->subtables_priority' if necessary.
1542
* This function should only be called after adding a new rule, not after
1543
* replacing a rule by an identical one or modifying a rule in-place. */
1545
update_subtables_after_insertion(struct cls_classifier *cls,
1546
struct cls_subtable *subtable,
1547
unsigned int new_priority)
1549
if (new_priority == subtable->max_priority) {
1550
++subtable->max_count;
1551
} else if (new_priority > subtable->max_priority) {
1552
struct cls_subtable *table;
1553
struct cls_subtable_entry *iter, *from = NULL;
1555
subtable->max_priority = new_priority;
1556
subtable->max_count = 1;
1558
/* Possibly move 'subtable' earlier in the priority array. If
1559
* we break out of the loop, then the subtable (at 'from')
1560
* should be moved to the position right after the current
1561
* element. If the loop terminates normally, then 'iter' will
1562
* be at the first array element and we'll move the subtable
1563
* to the front of the array. */
1564
CLS_SUBTABLE_CACHE_FOR_EACH_REVERSE (table, iter,
1565
&cls->subtables_priority) {
1566
if (table == subtable) {
1567
from = iter; /* Locate the subtable as we go. */
1568
iter->max_priority = new_priority;
1569
} else if (table->max_priority >= new_priority) {
1571
/* Corrupted cache? */
1572
cls_subtable_cache_reset(cls);
1573
VLOG_ABORT("update_subtables_after_insertion(): Subtable priority list corrupted.");
1576
iter++; /* After this. */
1581
/* Move subtable at 'from' to 'iter'. */
1582
cls_subtable_cache_move(iter, from);
1586
/* This function performs the following updates for 'subtable' in 'cls'
1587
* following the deletion of a rule with priority 'del_priority' from
1590
* - Update 'subtable->max_priority' and 'subtable->max_count' if necessary.
1592
* - Update 'subtable''s position in 'cls->subtables_priority' if necessary.
1594
* This function should only be called after removing a rule, not after
1595
* replacing a rule by an identical one or modifying a rule in-place. */
1597
update_subtables_after_removal(struct cls_classifier *cls,
1598
struct cls_subtable *subtable,
1599
unsigned int del_priority)
1601
if (del_priority == subtable->max_priority && --subtable->max_count == 0) {
1602
struct cls_match *head;
1603
struct cls_subtable *table;
1604
struct cls_subtable_entry *iter, *from = NULL;
1606
subtable->max_priority = 0;
1607
HMAP_FOR_EACH (head, hmap_node, &subtable->rules) {
1608
if (head->priority > subtable->max_priority) {
1609
subtable->max_priority = head->priority;
1610
subtable->max_count = 1;
1611
} else if (head->priority == subtable->max_priority) {
1612
++subtable->max_count;
1616
/* Possibly move 'subtable' later in the priority array.
1617
* After the loop the 'iter' will point right after the position
1618
* at which the subtable should be moved (either at a subtable
1619
* with an equal or lower priority, or just past the array),
1620
* so it is decremented once. */
1621
CLS_SUBTABLE_CACHE_FOR_EACH (table, iter, &cls->subtables_priority) {
1622
if (table == subtable) {
1623
from = iter; /* Locate the subtable as we go. */
1624
iter->max_priority = subtable->max_priority;
1625
} else if (table->max_priority <= subtable->max_priority) {
1627
/* Corrupted cache? */
1628
cls_subtable_cache_reset(cls);
1629
VLOG_ABORT("update_subtables_after_removal(): Subtable priority list corrupted.");
1635
/* Now at one past the destination. */
1638
/* Move subtable at 'from' to 'iter'. */
1639
cls_subtable_cache_move(iter, from);
1628
cmap_destroy(&subtable->indices[i]);
1630
cmap_destroy(&subtable->rules);
1631
ovsrcu_postpone(free, subtable);
1734
static inline struct cls_match *
1735
find_match(const struct cls_subtable *subtable, const struct flow *flow,
1729
static inline const struct cls_match *
1730
find_match(const struct cls_subtable *subtable, cls_version_t version,
1731
const struct flow *flow, uint32_t hash)
1738
struct cls_match *rule;
1733
const struct cls_match *head, *rule;
1740
HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &subtable->rules) {
1741
if (miniflow_and_mask_matches_flow(&rule->flow, &subtable->mask,
1735
CMAP_FOR_EACH_WITH_HASH (head, cmap_node, hash, &subtable->rules) {
1736
if (OVS_LIKELY(miniflow_and_mask_matches_flow(&head->flow,
1739
/* Return highest priority rule that is visible. */
1740
CLS_MATCH_FOR_EACH (rule, head) {
1741
if (OVS_LIKELY(cls_match_visible_in_version(rule, version))) {
1750
static struct cls_match *
1751
find_match_wc(const struct cls_subtable *subtable, const struct flow *flow,
1752
struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries,
1753
struct flow_wildcards *wc)
1751
/* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit
1752
* for which 'flow', for which 'mask' has a bit set, specifies a particular
1753
* value has the correct value in 'target'.
1755
* This function is equivalent to miniflow_and_mask_matches_flow() but this
1756
* version fills in the mask bits in 'wc'. */
1758
miniflow_and_mask_matches_flow_wc(const struct miniflow *flow,
1759
const struct minimask *mask,
1760
const struct flow *target,
1761
struct flow_wildcards *wc)
1763
const uint64_t *flowp = miniflow_get_values(flow);
1764
const uint64_t *maskp = miniflow_get_values(&mask->masks);
1767
MAP_FOR_EACH_INDEX(idx, mask->masks.map) {
1768
uint64_t mask = *maskp++;
1769
uint64_t diff = (*flowp++ ^ flow_u64_value(target, idx)) & mask;
1772
/* Only unwildcard if none of the differing bits is already
1774
if (!(flow_u64_value(&wc->masks, idx) & diff)) {
1775
/* Keep one bit of the difference. The selected bit may be
1776
* different in big-endian v.s. little-endian systems. */
1777
*flow_u64_lvalue(&wc->masks, idx) |= rightmost_1bit(diff);
1781
/* Fill in the bits that were looked at. */
1782
*flow_u64_lvalue(&wc->masks, idx) |= mask;
1788
/* Unwildcard the fields looked up so far, if any. */
1790
fill_range_wc(const struct cls_subtable *subtable, struct flow_wildcards *wc,
1794
flow_wildcards_fold_minimask_range(wc, &subtable->mask, 0, to);
1798
static const struct cls_match *
1799
find_match_wc(const struct cls_subtable *subtable, cls_version_t version,
1800
const struct flow *flow, struct trie_ctx trie_ctx[CLS_MAX_TRIES],
1801
unsigned int n_tries, struct flow_wildcards *wc)
1755
1803
uint32_t basis = 0, hash;
1756
struct cls_match *rule = NULL;
1804
const struct cls_match *rule = NULL;
1758
1806
struct range ofs;
1760
1808
if (OVS_UNLIKELY(!wc)) {
1761
return find_match(subtable, flow,
1809
return find_match(subtable, version, flow,
1762
1810
flow_hash_in_minimask(flow, &subtable->mask, 0));
1766
1814
/* Try to finish early by checking fields in segments. */
1767
1815
for (i = 0; i < subtable->n_indices; i++) {
1768
struct hindex_node *inode;
1816
const struct cmap_node *inode;
1769
1818
ofs.end = subtable->index_ofs[i];
1771
1820
if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow,
1822
/* 'wc' bits for the trie field set, now unwildcard the preceding
1823
* bits used so far. */
1824
fill_range_wc(subtable, wc, ofs.start);
1775
1827
hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start,
1776
1828
ofs.end, &basis);
1777
ofs.start = ofs.end;
1778
inode = hindex_node_with_hash(&subtable->indices[i], hash);
1829
inode = cmap_find(&subtable->indices[i], hash);
1780
/* No match, can stop immediately, but must fold in the mask
1781
* covered so far. */
1831
/* No match, can stop immediately, but must fold in the bits
1832
* used in lookup so far. */
1833
fill_range_wc(subtable, wc, ofs.end);
1785
1837
/* If we have narrowed down to a single rule already, check whether
1786
* that rule matches. If it does match, then we're done. If it does
1787
* not match, then we know that we will never get a match, but we do
1788
* not yet know how many wildcards we need to fold into 'wc' so we
1789
* continue iterating through indices to find that out. (We won't
1790
* waste time calling miniflow_and_mask_matches_flow() again because
1791
* we've set 'rule' nonnull.)
1793
* This check shows a measurable benefit with non-trivial flow tables.
1838
* that rule matches. Either way, we're done.
1795
1840
* (Rare) hash collisions may cause us to miss the opportunity for this
1796
1841
* optimization. */
1797
if (!inode->s && !rule) {
1798
ASSIGN_CONTAINER(rule, inode - i, index_nodes);
1799
if (miniflow_and_mask_matches_flow(&rule->flow, &subtable->mask,
1842
if (!cmap_node_next(inode)) {
1843
const struct cls_match *head;
1845
ASSIGN_CONTAINER(head, inode - i, index_nodes);
1846
if (miniflow_and_mask_matches_flow_wc(&head->flow, &subtable->mask,
1848
/* Return highest priority rule that is visible. */
1849
CLS_MATCH_FOR_EACH (rule, head) {
1850
if (OVS_LIKELY(cls_match_visible_in_version(rule,
1858
ofs.start = ofs.end;
1805
ofs.end = FLOW_U32S;
1860
ofs.end = FLOW_U64S;
1806
1861
/* Trie check for the final range. */
1807
1862
if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow, wc)) {
1811
/* Multiple potential matches exist, look for one. */
1812
hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start,
1814
rule = find_match(subtable, flow, hash);
1816
/* We already narrowed the matching candidates down to just 'rule',
1817
* but it didn't match. */
1863
fill_range_wc(subtable, wc, ofs.start);
1866
hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start,
1868
rule = find_match(subtable, version, flow, hash);
1820
1869
if (!rule && subtable->ports_mask_len) {
1821
1870
/* Ports are always part of the final range, if any.
1822
1871
* No match was found for the ports. Use the ports trie to figure out
1823
1872
* which ports bits to unwildcard. */
1824
1873
unsigned int mbits;
1825
ovs_be32 value, mask;
1874
ovs_be32 value, plens, mask;
1827
1876
mask = MINIFLOW_GET_BE32(&subtable->mask.masks, tp_src);
1828
1877
value = ((OVS_FORCE ovs_be32 *)flow)[TP_PORTS_OFS32] & mask;
1829
trie_lookup_value(subtable->ports_trie, &value, 32, &mbits);
1878
mbits = trie_lookup_value(&subtable->ports_trie, &value, &plens, 32);
1831
1880
((OVS_FORCE ovs_be32 *)&wc->masks)[TP_PORTS_OFS32] |=
1832
1881
mask & be32_prefix_mask(mbits);
1834
ofs.start = TP_PORTS_OFS32;
1883
/* Unwildcard all bits in the mask upto the ports, as they were used
1884
* to determine there is no match. */
1885
fill_range_wc(subtable, wc, TP_PORTS_OFS64);
1838
1889
/* Must unwildcard all the fields, as they were looked at. */
1839
1890
flow_wildcards_fold_minimask(wc, &subtable->mask);
1843
/* Must unwildcard the fields looked up so far, if any. */
1845
flow_wildcards_fold_minimask_range(wc, &subtable->mask, 0, ofs.start);
1850
1894
static struct cls_match *
1851
find_equal(struct cls_subtable *subtable, const struct miniflow *flow,
1895
find_equal(const struct cls_subtable *subtable, const struct miniflow *flow,
1854
1898
struct cls_match *head;
1856
HMAP_FOR_EACH_WITH_HASH (head, hmap_node, hash, &subtable->rules) {
1900
CMAP_FOR_EACH_WITH_HASH (head, cmap_node, hash, &subtable->rules) {
1857
1901
if (miniflow_equal(&head->flow, flow)) {
1864
static struct cls_match *
1865
insert_rule(struct cls_classifier *cls, struct cls_subtable *subtable,
1866
struct cls_rule *new)
1868
struct cls_match *cls_match = cls_match_alloc(new);
1869
struct cls_match *head;
1870
struct cls_match *old = NULL;
1872
uint32_t basis = 0, hash;
1873
uint8_t prev_be32ofs = 0;
1875
/* Add new node to segment indices. */
1876
for (i = 0; i < subtable->n_indices; i++) {
1877
hash = minimatch_hash_range(&new->match, prev_be32ofs,
1878
subtable->index_ofs[i], &basis);
1879
hindex_insert(&subtable->indices[i], &cls_match->index_nodes[i], hash);
1880
prev_be32ofs = subtable->index_ofs[i];
1882
hash = minimatch_hash_range(&new->match, prev_be32ofs, FLOW_U32S, &basis);
1883
head = find_equal(subtable, &new->match.flow, hash);
1885
hmap_insert(&subtable->rules, &cls_match->hmap_node, hash);
1886
list_init(&cls_match->list);
1889
/* Scan the list for the insertion point that will keep the list in
1890
* order of decreasing priority. */
1891
struct cls_match *rule;
1893
cls_match->hmap_node.hash = hash; /* Otherwise done by hmap_insert. */
1895
FOR_EACH_RULE_IN_LIST (rule, head) {
1896
if (cls_match->priority >= rule->priority) {
1898
/* 'new' is the new highest-priority flow in the list. */
1899
hmap_replace(&subtable->rules,
1900
&rule->hmap_node, &cls_match->hmap_node);
1903
if (cls_match->priority == rule->priority) {
1904
list_replace(&cls_match->list, &rule->list);
1908
list_insert(&rule->list, &cls_match->list);
1914
/* Insert 'new' at the end of the list. */
1915
list_push_back(&head->list, &cls_match->list);
1920
update_subtables_after_insertion(cls, subtable, cls_match->priority);
1922
/* Remove old node from indices. */
1923
for (i = 0; i < subtable->n_indices; i++) {
1924
hindex_remove(&subtable->indices[i], &old->index_nodes[i]);
1930
static struct cls_match *
1931
next_rule_in_list__(struct cls_match *rule)
1933
struct cls_match *next = OBJECT_CONTAINING(rule->list.next, next, list);
1937
static struct cls_match *
1938
next_rule_in_list(struct cls_match *rule)
1940
struct cls_match *next = next_rule_in_list__(rule);
1941
return next->priority < rule->priority ? next : NULL;
1944
1908
/* A longest-prefix match tree. */
1946
uint32_t prefix; /* Prefix bits for this node, MSB first. */
1947
uint8_t nbits; /* Never zero, except for the root node. */
1948
unsigned int n_rules; /* Number of rules that have this prefix. */
1949
struct trie_node *edges[2]; /* Both NULL if leaf. */
1952
/* Max bits per node. Must fit in struct trie_node's 'prefix'.
1953
* Also tested with 16, 8, and 5 to stress the implementation. */
1954
#define TRIE_PREFIX_BITS 32
1956
1910
/* Return at least 'plen' bits of the 'prefix', starting at bit offset 'ofs'.
1957
1911
* Prefixes are in the network byte order, and the offset 0 corresponds to