1
#ifndef _JUDYPRIVATE_INCLUDED
2
#define _JUDYPRIVATE_INCLUDED
5
// Copyright (C) 2000 - 2002 Hewlett-Packard Company
7
// This program is free software; you can redistribute it and/or modify it
8
// under the term of the GNU Lesser General Public License as published by the
9
// Free Software Foundation; either version 2 of the License, or (at your
10
// option) any later version.
12
// This program is distributed in the hope that it will be useful, but WITHOUT
13
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
17
// You should have received a copy of the GNU Lesser General Public License
18
// along with this program; if not, write to the Free Software Foundation,
19
// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22
// @(#) $Revision: 4.77 $ $Source: /judy/src/JudyCommon/JudyPrivate.h $
24
// Header file for all Judy sources, for global but private (non-exported)
28
// INCLUDE EXPORTED HEADER FILE:
30
// Assume all Judy internal sources need the exported header file.
35
// Define some types missing on Windows (not available in a system header
39
typedef unsigned char uint8_t; // 8-bit unsigned.
40
typedef unsigned short uint16_t; // 16-bit unsigned.
41
typedef unsigned long uint32_t; // 32-bit unsigned.
43
typedef unsigned __int64 uint64_t; // appears __int64 is a native type.
48
// ****************************************************************************
49
// A VERY BRIEF EXPLANATION OF A JUDY ARRAY
51
// A Judy array is, effectively, a digital tree (or Trie) with 256 element
52
// branches (nodes), and with "compression tricks" applied to low-population
53
// branches or leaves to save a lot of memory at the cost of relatively little
54
// CPU time or cache fills.
56
// In the actual implementation, a Judy array is level-less, and traversing the
57
// "tree" actually means following the states in a state machine (SM) as
58
// directed by the Index. A Judy array is referred to here as an "SM", rather
59
// than as a "tree"; having "states", rather than "levels".
61
// Each branch or leaf in the SM decodes a portion ("digit") of the original
62
// Index; with 256-way branches there are 8 bits per digit. There are 3 kinds
63
// of branches, called: Linear, Bitmap and Uncompressed, of which the first 2
64
// are compressed to contain no NULL entries.
66
// An Uncompressed branch has a 1.0 cache line fill cost to decode 8 bits of
67
// (digit, part of an Index), but it might contain many NULL entries, and is
68
// therefore inefficient with memory if lightly populated.
70
// A Linear branch has a ~1.75 cache line fill cost when at maximum population.
71
// A Bitmap branch has ~2.0 cache line fills. Linear and Bitmap branches are
72
// converted to Uncompressed branches when the additional memory can be
73
// amortized with larger populations. Higher-state branches have higher
74
// priority to be converted.
76
// Linear branches can hold 28 elements (based on detailed analysis) -- thus 28
77
// expanses. A Linear branch is converted to a Bitmap branch when the 29th
78
// expanse is required.
80
// A Bitmap branch could hold 256 expanses, but is forced to convert to an
81
// Uncompressed branch when 185 expanses are required. Hopefully, it is
82
// converted before that because of population growth (again, based on detailed
83
// analysis and heuristics in the code).
85
// A path through the SM terminates to a leaf when the Index (or key)
86
// population in the expanse below a pointer will fit into 1 or 2 cache lines
87
// (~31..255 Indexes). A maximum-population Leaf has ~1.5 cache line fill
90
// Leaves are sorted arrays of Indexes, where the Index Sizes (IS) are: 0, 1,
91
// 8, 16, 24, 32, [40, 48, 56, 64] bits. The IS depends on the "density"
92
// (population/expanse) of the values in the Leaf. Zero bits are possible if
93
// population == expanse in the SM (that is, a full small expanse).
95
// Elements of a branches are called Judy Pointers (JPs). Each JP object
96
// points to the next object in the SM, plus, a JP can decode an additional
97
// 2[6] bytes of an Index, but at the cost of "narrowing" the expanse
98
// represented by the next object in the SM. A "narrow" JP (one which has
99
// decode bytes/digits) is a way of skipping states in the SM.
101
// Although counterintuitive, we think a Judy SM is optimal when the Leaves are
102
// stored at MINIMUM compression (narrowing, or use of Decode bytes). If more
103
// aggressive compression was used, decompression of a leaf be required to
104
// insert an index. Additional compression would save a little memory but not
105
// help performance significantly.
108
#ifdef A_PICTURE_IS_WORTH_1000_WORDS
109
*******************************************************************************
111
JUDY 32-BIT STATE MACHINE (SM) EXAMPLE, FOR INDEX = 0x02040103
113
The Index used in this example is purposely chosen to allow small, simple
114
examples below; each 1-byte "digit" from the Index has a small numeric value
115
that fits in one column. In the drawing below:
117
JAP == Judy Array Pointer; least 3 bits encode Branch or Leaf type; L or B
118
appears in drawing as described below.
120
C == 1 byte of a 1..3 byte Population (count of Indexes) below this
121
pointer. Since this is shared with the Decode field, the combined
122
sizes must be 3[7], that is, 1 word less 1 byte for the JP Type.
124
The 1-byte field jp_Type is represented as:
126
1..3 == Number of bytes in the population (Pop0) word of the Branch or Leaf
127
below the pointer (note: 1..7 on 64-bit); indicates:
128
- number of bytes in Decode field == 3 - this number;
129
- number of bytes remaining to decode.
130
Note: The maximum is 3, not 4, because the 1st byte of the Index is
131
always decoded digitally in the top branch.
132
-B- == JP points to a Branch (there are many kinds of Branches).
133
-L- == JP points to a Leaf (there are many kinds of Leaves).
135
(2) == Digit of Index decoded by position offset in branch (really
138
4* == Digit of Index necessary for decoding a "narrow" pointer, in a
139
Decode field; replaces 1 missing branch (really 0..0xff).
141
4+ == Digit of Index NOT necessary for decoding a "narrow" pointer, but
142
used for fast traversal of the SM by Judy1Test() and JudyLGet()
143
(see the code) (really 0..0xff).
145
0 == Byte in a JP''s Pop0 field that is always ignored, because a leaf
146
can never contain more than 256 Indexes (Pop0 <= 255).
148
+----- == A Branch or Leaf; drawn open-ended to remind you that it could
149
| have up to 256 columns.
153
| == Pointer to next Branch or Leaf.
157
O == A state is skipped by using a "narrow" pointer.
160
< 1 > == Digit (Index) shown as an example is not necessarily in the
161
position shown; is sorted in order with neighbor Indexes.
164
Note that this example shows every possibly topology to reach a leaf in a
165
32-bit Judy SM, although this is a very subtle point!
169
+---+ +---+ +---+ +---+ +---+ +---+ +---+ +---+
170
|JAP| |JAP| |JAP| |JAP| |JAP| |JAP| |JAP| |JAP|
171
L---+ B---+ B---+ B---+ B---+ B---+ B---+ B---+
174
V V (2) V (2) V (2) V (2) V (2) V (2) V (2)
175
+------ +------ +------ +------ +------ +------ +------ +------
176
Four |< 2 > | 0 | 4* | C | 4* | 4* | C | C
177
byte |< 4 > | 0 | 0 | C | 1* | C | C | C 4
178
Index|< 1 > | C | C | C | C | C | C | C
179
Leaf |< 3 > | 3 | 2 | 3 | 1 | 2 | 3 | 3
180
+------ +--L--- +--L--- +--B--- +--L--- +--B--- +--B--- +--B---
185
V | V (4) | | V (4) V (4)
186
+------ | +------ | | +------ +------
187
Three |< 4 > | | 4+ | | | 4+ | 4+
188
byte Index|< 1 > O | 0 O O | 1* | C 3
189
Leaf |< 3 > | | C | | | C | C
190
+------ | | 2 | | | 1 | 2
191
/ +----L- | | +----L- +----B-
199
+------ +------ | +------ | +------
200
Two byte |< 1 > |< 1 > | | 4+ | | 4+
201
Index Leaf |< 3 > |< 3 > O | 1+ O | 1+ 2
202
+------ +------ / | C | | C
209
+------ +------ +------ +------
210
One byte Index Leaf |< 3 > |< 3 > |< 3 > |< 3 > 1
211
+------ +------ +------ +------
214
#endif // A_PICTURE_IS_WORTH_1000_WORDS
217
// ****************************************************************************
218
// MISCELLANEOUS GLOBALS:
220
// PLATFORM-SPECIFIC CONVENIENCE MACROS:
222
// These are derived from context (set by cc or in system header files) or
223
// based on JU_<PLATFORM> macros from make_includes/platform.*.mk. We decided
224
// on 011018 that any macro reliably derivable from context (cc or headers) for
225
// ALL platforms supported by Judy is based on that derivation, but ANY
226
// exception means to stop using the external macro completely and derive from
227
// JU_<PLATFORM> instead.
229
// Other miscellaneous stuff:
236
// Convert some (newer) external names (renamed from "JAP" to avoid negative
237
// connotations) to (older) internal names:
239
#define JAP_MASK JLAP_MASK
240
#define JAP_INVALID JLAP_INVALID
242
#define FUNCTION // null; easy to find functions.
252
#ifdef TRACE // turn on all other tracing in the code:
253
#define TRACEJP 1 // JP traversals in JudyIns.c and JudyDel.c.
254
#define TRACEJPR 1 // JP traversals in retrieval code, JudyGet.c.
255
#define TRACECF 1 // cache fills in JudyGet.c.
256
#define TRACEMI 1 // malloc calls in JudyMallocIF.c.
257
#define TRACEMF 1 // malloc calls at a lower level in JudyMalloc.c.
261
// SUPPORT FOR DEBUG-ONLY CODE:
263
// By convention, use -DDEBUG to enable both debug-only code AND assertions in
266
// Invert the sense of assertions, so they are off unless explicitly requested,
269
// Note: It is NOT appropriate to put this in Judy.h; it would mess up
273
#define NDEBUG 1 // must be 1 for "#if".
276
// Shorthand notations to avoid #ifdefs for single-line conditional statements:
278
// Warning: These cannot be used around compiler directives, such as
279
// "#include", nor in the case where Code contains a comma other than nested
280
// within parentheses or quotes.
283
#define DBGCODE(Code) // null.
285
#define DBGCODE(Code) Code
289
#define JUDY1CODE(Code) Code
290
#define JUDYLCODE(Code) // null.
294
#define JUDYLCODE(Code) Code
295
#define JUDY1CODE(Code) // null.
299
#pragma C-Cover off // exclude inline functions in public header files.
307
// ****************************************************************************
308
// FUNDAMENTAL CONSTANTS FOR MACHINE
309
// ****************************************************************************
311
// Machine (CPU) cache line size:
313
// NOTE: A leaf size of 2 cache lines maximum is the target (optimal) for
314
// Judy. It's hard to obtain a machine's cache line size at compile time, but
315
// if the machine has an unexpected cache line size, it's not devastating if
316
// the following constants end up causing leaves that are 1 cache line in size,
317
// or even 4 cache lines in size. The assumed 32-bit system has 16-word =
318
// 64-byte cache lines, and the assumed 64-bit system has 16-word = 128-byte
322
#define cJU_BYTESPERCL 128 // cache line size in bytes.
324
#define cJU_BYTESPERCL 64 // cache line size in bytes.
329
#define cJU_BITSPERBYTE 0x8
331
// Bytes Per Word and Bits Per Word, latter assuming sizeof(byte) is 8 bits:
333
// Expect 32 [64] bits per word.
335
#define cJU_BYTESPERWORD (sizeof(Word_t))
336
#define cJU_BITSPERWORD (sizeof(Word_t) * cJU_BITSPERBYTE)
338
#define JU_BYTESTOWORDS(Bytes) \
339
(((Bytes) + cJU_BYTESPERWORD - 1) / cJU_BYTESPERWORD)
341
// A word that is all-one's, normally equal to -1UL, but safer with ~0:
343
#define cJU_ALLONES (~0UL)
345
// Note, these are forward references, but that's OK:
347
#define cJU_FULLBITMAPB ((BITMAPB_t) cJU_ALLONES)
348
#define cJU_FULLBITMAPL ((BITMAPL_t) cJU_ALLONES)
351
// ****************************************************************************
352
// MISCELLANEOUS JUDY-SPECIFIC DECLARATIONS
353
// ****************************************************************************
357
// State at the start of the Judy SM, based on 1 byte decoded per state; equal
358
// to the number of bytes per Index to decode.
360
#define cJU_ROOTSTATE (sizeof(Word_t))
363
// SUBEXPANSES PER STATE:
365
// Number of subexpanses per state traversed, which is the number of JPs in a
366
// branch (actual or theoretical) and the number of bits in a bitmap.
368
#define cJU_SUBEXPPERSTATE 256
371
// LEAF AND VALUE POINTERS:
373
// Some other basic object types are in declared in JudyPrivateBranch.h
374
// (Pjbl_t, Pjbb_t, Pjbu_t, Pjp_t) or are Judy1/L-specific (Pjlb_t). The
375
// few remaining types are declared below.
377
// Note: Leaf pointers are cast to different-sized objects depending on the
378
// leaf's level, but are at least addresses (not just numbers), so use void *
379
// (Pvoid_t), not PWord_t or Word_t for them, except use Pjlw_t for whole-word
380
// (top-level, root-level) leaves. Value areas, however, are always whole
383
// Furthermore, use Pjll_t only for generic leaf pointers (for various size
384
// LeafL's). Use Pjlw_t for LeafW's. Use Pleaf (with type uint8_t *, uint16_t
385
// *, etc) when the leaf index size is known.
387
typedef PWord_t Pjlw_t; // pointer to root-level leaf (whole-word indexes).
388
typedef Pvoid_t Pjll_t; // pointer to lower-level linear leaf.
390
typedef PWord_t Pjv_t; // pointer to JudyL value area.
394
// POINTER PREPARATION MACROS:
396
// These macros are used to strip malloc-namespace-type bits from a pointer +
397
// malloc-type word (which references any Judy malloc'd object that might be
398
// obtained from other than a direct call of malloc()), prior to dereferencing
399
// the pointer as an address. The malloc-type bits allow Judy malloc'd objects
400
// to come from different "malloc() namespaces".
402
// Note: Non-null Judy array root pointers (JAP) point to root-level
403
// (word-sized) leaves (JLW) or to top-level Judy population/memory (JPM)
404
// nodes, both of which come directly from malloc() and which are not capable
405
// of alternate name-spacing. However, for consistency and for easier future
406
// changes, there are macros for accessing pointers to these objects too, which
407
// DO strip JAP_MASK bits.
409
// In other words, JLW and JPM pointers contain JAP type bits, and other
410
// pointers may contain malloc-namespace-type bits. These pointers (addresses)
411
// include these fields in Judy objects:
413
// (root pointer) (JAP, see above)
414
// jp.jp_Addr generic pointer to next-level node, except when used
415
// as a JudyL Immed01 value area
416
// JU_JBB_PJP macro hides jbbs_Pjp (pointer to JP subarray)
417
// JL_JLB_PVALUE macro hides jLlbs_PValue (pointer to value subarray)
419
// When setting one of these fields or passing an address to __JudyFree*(), the
420
// "raw" memory address is used; otherwise the memory address must be passed
421
// through one of the macros below before it's dereferenced.
423
// Note: After much study, the typecasts below appear in the macros rather
424
// than at the point of use, which is both simpler and allows the compiler to
427
#define __numbits 3 // number of malloc-type bits in pointer field.
428
#define __mask (-(1UL << __numbits))
430
#define JAPTYPE(Addr) (((Word_t) (Addr)) & JAP_MASK) // root type.
431
#define P_JLW( Addr) ((Pjlw_t) (((Word_t) (Addr)) & ~JAP_MASK)) // root leaf.
432
#define P_JPM( Addr) ((Pjpm_t) (((Word_t) (Addr)) & ~JAP_MASK)) // root JPM.
433
#define P_JBL( Addr) ((Pjbl_t) (((Word_t) (Addr)) & __mask)) // BranchL.
434
#define P_JBB( Addr) ((Pjbb_t) (((Word_t) (Addr)) & __mask)) // BranchB.
435
#define P_JBU( Addr) ((Pjbu_t) (((Word_t) (Addr)) & __mask)) // BranchU.
436
#define P_JLL( Addr) ((Pjll_t) (((Word_t) (Addr)) & __mask)) // LeafL.
437
#define P_JLB( Addr) ((Pjlb_t) (((Word_t) (Addr)) & __mask)) // LeafB1.
438
#define P_JP( Addr) ((Pjp_t) (((Word_t) (Addr)) & __mask)) // JP.
440
#define P_JV( Addr) ((Pjv_t) (((Word_t) (Addr)) & __mask)) // &value.
446
// Mask for least bytes of a word, and a macro to perform this mask on an
449
// Note: This macro has been problematic in the past to get right and to make
450
// portable. It's not OK on all systems to shift by the full word size. This
451
// macro should allow shifting by 1..N bytes, where N is the word size, but
452
// should produce a compiler warning if the macro is called with Bytes == 0.
454
// Warning: JU_LEASTBYTESMASK() is not a constant macro unless Bytes is a
455
// constant; otherwise it is a variable shift, which is expensive on some
458
#define JU_LEASTBYTESMASK(Bytes) \
459
((0x100UL << (cJU_BITSPERBYTE * ((Bytes) - 1))) - 1)
461
#define JU_LEASTBYTES(Index,Bytes) ((Index) & JU_LEASTBYTESMASK(Bytes))
464
// BITS IN EACH BITMAP SUBEXPANSE FOR BITMAP BRANCH AND LEAF:
466
// The bits per bitmap subexpanse times the number of subexpanses equals a
467
// constant (cJU_SUBEXPPERSTATE). You can also think of this as a compile-time
468
// choice of "aspect ratio" for bitmap branches and leaves (which can be set
469
// independently for each).
471
// A default aspect ratio is hardwired here if not overridden at compile time,
472
// such as by "EXTCCOPTS=-DBITMAP_BRANCH16x16 make".
474
#if (! (defined(BITMAP_BRANCH8x32) || defined(BITMAP_BRANCH16x16) || defined(BITMAP_BRANCH32x8)))
475
#define BITMAP_BRANCH32x8 1 // 32 bits per subexpanse, 8 subexpanses.
478
#ifdef BITMAP_BRANCH8x32
479
#define BITMAPB_t uint8_t
482
#ifdef BITMAP_BRANCH16x16
483
#define BITMAPB_t uint16_t
486
#ifdef BITMAP_BRANCH32x8
487
#define BITMAPB_t uint32_t
490
// Note: For bitmap leaves, BITMAP_LEAF64x4 is only valid for 64 bit:
492
// Note: Choice of aspect ratio mostly matters for JudyL bitmap leaves. For
493
// Judy1 the choice doesn't matter much -- the code generated for different
494
// BITMAP_LEAF* values choices varies, but correctness and performance are the
499
#if (! (defined(BITMAP_LEAF8x32) || defined(BITMAP_LEAF16x16) || defined(BITMAP_LEAF32x8)))
500
#define BITMAP_LEAF32x8 // 32 bits per subexpanse, 8 subexpanses.
505
#if (! (defined(BITMAP_LEAF8x32) || defined(BITMAP_LEAF16x16) || defined(BITMAP_LEAF32x8) || defined(BITMAP_LEAF64x4)))
506
#define BITMAP_LEAF64x4 // 64 bits per subexpanse, 4 subexpanses.
511
#ifdef BITMAP_LEAF8x32
512
#define BITMAPL_t uint8_t
515
#ifdef BITMAP_LEAF16x16
516
#define BITMAPL_t uint16_t
519
#ifdef BITMAP_LEAF32x8
520
#define BITMAPL_t uint32_t
523
#ifdef BITMAP_LEAF64x4
524
#define BITMAPL_t uint64_t
528
// EXPORTED DATA AND FUNCTIONS:
531
extern const uint8_t __j1_BranchBJPPopToWords[];
533
extern const uint8_t __jL_BranchBJPPopToWords[];
536
// Fast LeafL search routine used for inlined code:
538
// Note: MaxPop1 is historical and unused.
540
#define SEARCHLEAFEVEN(LeafType,Addr,Pop1,Index,MaxPop1) \
541
LeafType *_Pleaf = (LeafType *) (Addr); \
542
LeafType _Index = (Index); /* with masking */ \
543
Word_t _Pop1 = (Pop1); \
545
if (_Index > _Pleaf[_Pop1 - 1]) return(~(_Pop1)); /* past end */ \
546
for(;;) { if (_Index <= *_Pleaf) break; ++_Pleaf; } \
547
if (_Index == *_Pleaf) return(_Pleaf - (LeafType *) (Addr)); \
548
return(~(_Pleaf - (LeafType *) (Addr)))
550
// Fast way to count bits set in 8..32[64]-bit int:
552
// For performance, __JudyCountBits*() are written to take advantage of
553
// platform-specific features where available.
558
extern BITMAPB_t __JudyCountBitsB(BITMAPB_t word);
559
extern BITMAPL_t __JudyCountBitsL(BITMAPL_t word);
561
// Compiler supports inline
563
#elif defined(JU_HPUX_IPF)
565
#define __JudyCountBitsB(word) _Asm_popcnt(word)
566
#define __JudyCountBitsL(word) _Asm_popcnt(word)
568
#elif defined(JU_LINUX_IPF)
570
static inline BITMAPB_t __JudyCountBitsB(BITMAPB_t word)
573
__asm__ ("popcnt %0=%1" : "=r" (result) : "r" (word));
577
static inline BITMAPL_t __JudyCountBitsL(BITMAPL_t word)
580
__asm__ ("popcnt %0=%1" : "=r" (result) : "r" (word));
584
// No instructions available, use inline code
592
// ****************************************************************************
593
// __ J U D Y C O U N T B I T S B
595
// Return the number of bits set in "Word", for a bitmap branch.
597
// Note: Bitmap branches have maximum bitmap size = 32 bits.
599
static inline BITMAPB_t __JudyCountBitsB(BITMAPB_t word)
601
word = (word & 0x55555555) + ((word & 0xAAAAAAAA) >> 1);
602
word = (word & 0x33333333) + ((word & 0xCCCCCCCC) >> 2);
603
word = (word & 0x0F0F0F0F) + ((word & 0xF0F0F0F0) >> 4); // >= 8 bits.
604
#if defined(BITMAP_BRANCH16x16) || defined(BITMAP_BRANCH32x8)
605
word = (word & 0x00FF00FF) + ((word & 0xFF00FF00) >> 8); // >= 16 bits.
607
#ifdef BITMAP_BRANCH32x8
608
word = (word & 0x0000FFFF) + ((word & 0xFFFF0000) >> 16); // >= 32 bits.
612
} // __JudyCountBitsB()
615
// ****************************************************************************
616
// __ J U D Y C O U N T B I T S L
618
// Return the number of bits set in "Word", for a bitmap leaf.
620
// Note: Bitmap branches have maximum bitmap size = 32 bits.
622
// Note: Need both 32-bit and 64-bit versions of __JudyCountBitsL() because
623
// bitmap leaves can have 64-bit bitmaps.
625
static inline BITMAPL_t __JudyCountBitsL(BITMAPL_t word)
629
word = (word & 0x55555555) + ((word & 0xAAAAAAAA) >> 1);
630
word = (word & 0x33333333) + ((word & 0xCCCCCCCC) >> 2);
631
word = (word & 0x0F0F0F0F) + ((word & 0xF0F0F0F0) >> 4); // >= 8 bits.
632
#if defined(BITMAP_LEAF16x16) || defined(BITMAP_LEAF32x8)
633
word = (word & 0x00FF00FF) + ((word & 0xFF00FF00) >> 8); // >= 16 bits.
635
#ifdef BITMAP_LEAF32x8
636
word = (word & 0x0000FFFF) + ((word & 0xFFFF0000) >> 16); // >= 32 bits.
641
word = (word & 0x5555555555555555) + ((word & 0xAAAAAAAAAAAAAAAA) >> 1);
642
word = (word & 0x3333333333333333) + ((word & 0xCCCCCCCCCCCCCCCC) >> 2);
643
word = (word & 0x0F0F0F0F0F0F0F0F) + ((word & 0xF0F0F0F0F0F0F0F0) >> 4);
644
#if defined(BITMAP_LEAF16x16) || defined(BITMAP_LEAF32x8) || defined(BITMAP_LEAF64x4)
645
word = (word & 0x00FF00FF00FF00FF) + ((word & 0xFF00FF00FF00FF00) >> 8);
647
#if defined(BITMAP_LEAF32x8) || defined(BITMAP_LEAF64x4)
648
word = (word & 0x0000FFFF0000FFFF) + ((word & 0xFFFF0000FFFF0000) >>16);
650
#ifdef BITMAP_LEAF64x4
651
word = (word & 0x00000000FFFFFFFF) + ((word & 0xFFFFFFFF00000000) >>32);
657
} // __JudyCountBitsL()
663
#endif // Compiler supports inline
667
// Get from jp_DcdPop0 the Pop0 for various JP Types.
671
// - Different macros require different parameters...
673
// - There are no simple macros for cJU_JAPBRANCH* Types because their
674
// populations must be added up and don't reside in an already-calculated
675
// place. (TBD: This is no longer true, now it's in the JPM.)
677
// - cJU_JPIMM_POP0() is not defined because it would be redundant because the
678
// Pop1 is already encoded in each enum name.
680
// - A linear or bitmap leaf Pop0 cannot exceed cJU_SUBEXPPERSTATE - 1 (Pop0 =
681
// 0..255), so use a simpler, faster macro for it than for other JP Types.
683
// - Avoid any complex calculations that would slow down the compiled code.
684
// Assume these macros are only called for the appropriate JP Types.
685
// Unfortunately there's no way to trigger an assertion here if the JP type
686
// is incorrect for the macro, because these are merely expressions, not
689
#define JU_JAPLEAF_POP0(Addr) (*((PWord_t) Addr))
690
// JU_JAPBRANCH_POP0() // see above.
691
// JU_JPBRANCH_POP0(DcdPop0,cPopBytes) // see JudyPrivateBranch.h.
692
#define JU_JPLEAF_POP0(DcdPop0) ((DcdPop0) & 0xFF)
693
// cJU_JPIMM_POP0(cJPType,cJPTypeBase) ((cJPType) - (cJPTypeBase))
694
#define cJU_JPFULLPOPU1_POP0 (cJU_SUBEXPPERSTATE - 1)
697
// NUMBER OF BITS IN A BRANCH OR LEAF BITMAP AND SUBEXPANSE:
699
// Note: cJU_BITSPERBITMAP must be the same as the number of JPs in a branch.
701
#define cJU_BITSPERBITMAP cJU_SUBEXPPERSTATE
703
// Bitmaps are accessed in units of "subexpanses":
705
#define cJU_BITSPERSUBEXPB (sizeof(BITMAPB_t) * cJU_BITSPERBYTE)
706
#define cJU_NUMSUBEXPB (cJU_BITSPERBITMAP / cJU_BITSPERSUBEXPB)
708
#define cJU_BITSPERSUBEXPL (sizeof(BITMAPL_t) * cJU_BITSPERBYTE)
709
#define cJU_NUMSUBEXPL (cJU_BITSPERBITMAP / cJU_BITSPERSUBEXPL)
712
// MASK FOR A SPECIFIED BIT IN A BITMAP:
714
// Warning: If BitNum is a variable, this results in a variable shift that is
715
// expensive, at least on some processors. Use with caution.
717
// Warning: BitNum must be less than cJU_BITSPERWORD, that is, 0 ..
718
// cJU_BITSPERWORD - 1, to avoid a truncated shift on some machines.
720
// TBD: Perhaps use an array[32] of masks instead of calculating them.
722
#define JU_BITPOSMASKB(BitNum) (1L << ((BitNum) % cJU_BITSPERSUBEXPB))
723
#define JU_BITPOSMASKL(BitNum) (1L << ((BitNum) % cJU_BITSPERSUBEXPL))
726
// TEST/SET/CLEAR A BIT IN A BITMAP LEAF:
728
// Test if a byte-sized Digit (portion of Index) has a corresponding bit set in
729
// a bitmap, or set a byte-sized Digit's bit into a bitmap, by looking up the
730
// correct subexpanse and then checking/setting the correct bit.
732
// Note: Mask higher bits, if any, for the convenience of the user of this
733
// macro, in case they pass a full Index, not just a digit. If the caller has
734
// a true 8-bit digit, make it of type uint8_t and the compiler should skip the
735
// unnecessary mask step.
737
#define JU_SUBEXPL(DIGIT) (((DIGIT) / cJU_BITSPERSUBEXPL) & (cJU_NUMSUBEXPL-1))
739
#define JU_BITMAPTESTL(PJLB, INDEX) \
740
(JU_JLB_BITMAP(PJLB, JU_SUBEXPL(INDEX)) & JU_BITPOSMASKL(INDEX))
742
#define JU_BITMAPSETL(PJLB, INDEX) \
743
(JU_JLB_BITMAP(PJLB, JU_SUBEXPL(INDEX)) |= JU_BITPOSMASKL(INDEX))
745
#define JU_BITMAPCLEARL(PJLB, INDEX) \
746
(JU_JLB_BITMAP(PJLB, JU_SUBEXPL(INDEX)) ^= JU_BITPOSMASKL(INDEX))
749
// MAP BITMAP BIT OFFSET TO DIGIT:
751
// Given a digit variable to set, a bitmap branch or leaf subexpanse (base 0),
752
// the bitmap (BITMAP*_t) for that subexpanse, and an offset (Nth set bit in
753
// the bitmap, base 0), compute the digit (also base 0) corresponding to the
754
// subexpanse and offset by counting all bits in the bitmap until offset+1 set
755
// bits are seen. Avoid expensive variable shifts. Offset should be less than
756
// the number of set bits in the bitmap; assert this.
758
// If there's a better way to do this, I don't know what it is.
760
#define JU_BITMAPDIGITB(Digit,Subexp,Bitmap,Offset) \
762
BITMAPB_t bitmap = (Bitmap); int remain = (Offset); \
763
(Digit) = (Subexp) * cJU_BITSPERSUBEXPB; \
765
while ((remain -= (bitmap & 1)) >= 0) \
767
bitmap >>= 1; ++(Digit); \
768
assert((Digit) < ((Subexp) + 1) * cJU_BITSPERSUBEXPB); \
772
#define JU_BITMAPDIGITL(Digit,Subexp,Bitmap,Offset) \
774
BITMAPL_t bitmap = (Bitmap); int remain = (Offset); \
775
(Digit) = (Subexp) * cJU_BITSPERSUBEXPL; \
777
while ((remain -= (bitmap & 1)) >= 0) \
779
bitmap >>= 1; ++(Digit); \
780
assert((Digit) < ((Subexp) + 1) * cJU_BITSPERSUBEXPL); \
785
// MASKS FOR PORTIONS OF 32-BIT WORDS:
787
// These are useful for bitmap subexpanses.
789
// "LOWER"/"HIGHER" means bits representing lower/higher-valued Indexes. The
790
// exact order of bits in the word is explicit here but is hidden from the
793
// "EXC" means exclusive of the specified bit; "INC" means inclusive.
795
// In each case, BitPos is either "JU_BITPOSMASK*(BitNum)", or a variable saved
796
// from an earlier call of that macro; either way, it must be a 32-bit word
797
// with a single bit set. In the first case, assume the compiler is smart
798
// enough to optimize out common subexpressions.
800
// The expressions depend on unsigned decimal math that should be universal.
802
#define JU_MASKLOWEREXC( BitPos) ((BitPos) - 1)
803
#define JU_MASKLOWERINC( BitPos) (JU_MASKLOWEREXC(BitPos) | (BitPos))
804
#define JU_MASKHIGHERINC(BitPos) (-(BitPos))
805
#define JU_MASKHIGHEREXC(BitPos) (JU_MASKHIGHERINC(BitPos) ^ (BitPos))
808
// ****************************************************************************
809
// SUPPORT FOR NATIVE INDEX SIZES
810
// ****************************************************************************
812
// Copy a series of generic objects (uint8_t, uint16_t, uint32_t, Word_t) from
813
// one place to another.
815
#define JU_COPYMEM(PDst,PSrc,Pop1) \
817
Word_t __index = 0; \
818
assert((Pop1) > 0); \
819
do { (PDst)[__index] = (PSrc)[__index]; } \
820
while (++__index < (Pop1)); \
824
// ****************************************************************************
825
// SUPPORT FOR ODD (NON-NATIVE) INDEX SIZES
826
// ****************************************************************************
828
// This is the best fake uint24_t I could design in C. -- dlb
830
// Note about endian issues: On a little-endian machine, the _t24_3byte field
831
// is apparently accessed through a register (always big-endian), while the
832
// _t24_3bytes field is in memory. Therefore, on a little-endian machine, the
833
// 3 index bytes are in little-endian order in memory. For example, if the
834
// index bytes are big-endian "cba", they are stored "abc" in memory.
836
typedef struct __FAKE_24BIT_UINT
840
Word_t _t24_3byte:24;
841
uint8_t _t24_3bytes[3];
845
// Copy a 3-byte Index pointed by a uint8_t * to a Word_t:
847
// See above about endian issues.
849
#define JU_COPY3_PINDEX_TO_LONG(DestLong,PIndex) \
852
temp._._t24_3bytes[0] = (PIndex)[0]; \
853
temp._._t24_3bytes[1] = (PIndex)[1]; \
854
temp._._t24_3bytes[2] = (PIndex)[2]; \
855
(DestLong) = temp._._t24_3byte; \
858
// Copy a Word_t to a 3-byte Index pointed at by a uint8_t *:
860
// See above about endian issues.
862
#define JU_COPY3_LONG_TO_PINDEX(PIndex,SourceLong) \
865
temp._._t24_3byte = (SourceLong); \
866
(PIndex)[0] = temp._._t24_3bytes[0]; \
867
(PIndex)[1] = temp._._t24_3bytes[1]; \
868
(PIndex)[2] = temp._._t24_3bytes[2]; \
874
// SIMILAR STRUCTS AND MACROS FOR ODD INDEX SIZES ON 64-BIT SYSTEMS:
876
typedef struct __FAKE_40BIT_UINT
880
Word_t _t40_5byte:40;
881
uint8_t _t40_5bytes[5];
885
#define JU_COPY5_PINDEX_TO_LONG(DestLong,PIndex) \
888
temp._._t40_5bytes[0] = (PIndex)[0]; \
889
temp._._t40_5bytes[1] = (PIndex)[1]; \
890
temp._._t40_5bytes[2] = (PIndex)[2]; \
891
temp._._t40_5bytes[3] = (PIndex)[3]; \
892
temp._._t40_5bytes[4] = (PIndex)[4]; \
893
(DestLong) = temp._._t40_5byte; \
896
#define JU_COPY5_LONG_TO_PINDEX(PIndex,SourceLong) \
899
temp._._t40_5byte = (SourceLong); \
900
(PIndex)[0] = temp._._t40_5bytes[0]; \
901
(PIndex)[1] = temp._._t40_5bytes[1]; \
902
(PIndex)[2] = temp._._t40_5bytes[2]; \
903
(PIndex)[3] = temp._._t40_5bytes[3]; \
904
(PIndex)[4] = temp._._t40_5bytes[4]; \
907
typedef struct __FAKE_48BIT_UINT
911
Word_t _t48_6byte:48;
912
uint8_t _t48_6bytes[6];
916
#define JU_COPY6_PINDEX_TO_LONG(DestLong,PIndex) \
919
temp._._t48_6bytes[0] = (PIndex)[0]; \
920
temp._._t48_6bytes[1] = (PIndex)[1]; \
921
temp._._t48_6bytes[2] = (PIndex)[2]; \
922
temp._._t48_6bytes[3] = (PIndex)[3]; \
923
temp._._t48_6bytes[4] = (PIndex)[4]; \
924
temp._._t48_6bytes[5] = (PIndex)[5]; \
925
(DestLong) = temp._._t48_6byte; \
928
#define JU_COPY6_LONG_TO_PINDEX(PIndex,SourceLong) \
931
temp._._t48_6byte = (SourceLong); \
932
(PIndex)[0] = temp._._t48_6bytes[0]; \
933
(PIndex)[1] = temp._._t48_6bytes[1]; \
934
(PIndex)[2] = temp._._t48_6bytes[2]; \
935
(PIndex)[3] = temp._._t48_6bytes[3]; \
936
(PIndex)[4] = temp._._t48_6bytes[4]; \
937
(PIndex)[5] = temp._._t48_6bytes[5]; \
940
typedef struct __FAKE_56BIT_UINT
944
Word_t _t56_7byte:56;
945
uint8_t _t56_7bytes[7];
949
#define JU_COPY7_PINDEX_TO_LONG(DestLong,PIndex) \
952
temp._._t56_7bytes[0] = (PIndex)[0]; \
953
temp._._t56_7bytes[1] = (PIndex)[1]; \
954
temp._._t56_7bytes[2] = (PIndex)[2]; \
955
temp._._t56_7bytes[3] = (PIndex)[3]; \
956
temp._._t56_7bytes[4] = (PIndex)[4]; \
957
temp._._t56_7bytes[5] = (PIndex)[5]; \
958
temp._._t56_7bytes[6] = (PIndex)[6]; \
959
(DestLong) = temp._._t56_7byte; \
962
#define JU_COPY7_LONG_TO_PINDEX(PIndex,SourceLong) \
965
temp._._t56_7byte = (SourceLong); \
966
(PIndex)[0] = temp._._t56_7bytes[0]; \
967
(PIndex)[1] = temp._._t56_7bytes[1]; \
968
(PIndex)[2] = temp._._t56_7bytes[2]; \
969
(PIndex)[3] = temp._._t56_7bytes[3]; \
970
(PIndex)[4] = temp._._t56_7bytes[4]; \
971
(PIndex)[5] = temp._._t56_7bytes[5]; \
972
(PIndex)[6] = temp._._t56_7bytes[6]; \
978
// ****************************************************************************
979
// COMMON CODE FRAGMENTS (MACROS)
980
// ****************************************************************************
982
// These code chunks are shared between various source files.
985
// SET (REPLACE) ONE DIGIT IN AN INDEX:
987
// To avoid endian issues, use masking and ORing, which operates in a
988
// big-endian register, rather than treating the Index as an array of bytes,
989
// though that would be simpler, but would operate in endian-specific memory.
991
// TBD: This contains two variable shifts, is that bad?
993
#define JU_SETDIGIT(Index,Digit,State) \
994
(Index) = ((Index) & (~cJU_MASKATSTATE(State))) \
995
| (((Word_t) (Digit)) \
996
<< (((State) - 1) * cJU_BITSPERBYTE))
998
// Fast version for single LSB:
1000
#define JU_SETDIGIT1(Index,Digit) (Index) = ((Index) & ~0xff) | (Digit)
1003
// SET (REPLACE) "N" LEAST DIGITS IN AN INDEX:
1005
#define JU_SETDIGITS(Index,Index2,cState) \
1006
(Index) = ((Index ) & (~JU_LEASTBYTESMASK(cState))) \
1007
| ((Index2) & ( JU_LEASTBYTESMASK(cState)))
1010
// COPY DECODE BYTES FROM JP TO INDEX:
1012
// Modify Index digit(s) to match the bytes in jp_DcdPop0 in case one or more
1013
// branches are skipped and the digits are significant. It's probably faster
1014
// to just do this unconditionally than to check if it's necessary.
1016
// To avoid endian issues, use masking and ORing, which operates in a
1017
// big-endian register, rather than treating the Index as an array of bytes,
1018
// though that would be simpler, but would operate in endian-specific memory.
1020
// WARNING: Must not call JU_LEASTBYTESMASK (via cJU_DCDMASK) with Bytes =
1021
// cJU_ROOTSTATE or a bad mask is generated, but there are no Dcd bytes to copy
1022
// in this case anyway. In fact there are no Dcd bytes unless State <
1023
// cJU_ROOTSTATE - 1, so don't call this macro except in those cases.
1025
// TBD: It would be nice to validate jp_DcdPop0 against known digits to ensure
1026
// no corruption, but this is non-trivial.
1028
#define JU_SETDCD(Index,Dcd,cState) \
1029
(Index) = ((Index) & ~cJU_DCDMASK(cState)) \
1030
| ((Dcd) & cJU_DCDMASK(cState))
1033
// INSERT/DELETE AN INDEX IN-PLACE IN MEMORY:
1035
// Given a pointer to an array of "even" (native), same-sized objects
1036
// (indexes), the current population of the array, an offset in the array, and
1037
// a new Index to insert, "shift up" the array elements (Indexes) above the
1038
// insertion point and insert the new Index. Assume there is sufficient memory
1041
// In these macros, "_ioffset" is an index offset, and "_boffset" is a byte
1042
// offset for odd Index sizes.
1044
// Note: Endian issues only arise fro insertion, not deletion, and even for
1045
// insertion, they are transparent when native (even) objects are used, and
1046
// handled explicitly for odd (non-native) Index sizes.
1048
// Note: The following macros are tricky enough that there is some test code
1049
// for them appended to this file.
1051
#define JU_INSERTINPLACE(PArray,Pop1,Offset,Index) \
1052
assert((long) (Pop1) > 0); \
1053
assert((Word_t) (Offset) <= (Word_t) (Pop1)); \
1055
Word_t _ioffset = (Pop1); \
1057
while (_ioffset-- > (Offset)) \
1058
(PArray)[_ioffset + 1] = (PArray)[_ioffset]; \
1060
(PArray)[Offset] = (Index); \
1063
// Variation for odd-byte-sized (non-native) Indexes, where cIS = Index Size
1064
// and PByte must point to a uint8_t (byte); shift byte-by-byte:
1066
// Note: If cIS == 1, JU_INSERTINPLACE_ODD == JU_INSERTINPLACE, at least in
1069
// Note: JU_INSERTINPLACE_ODD() is hidden common code; use
1070
// JU_INSERTINPLACE3(), etc.
1072
// Note: To handle endian issues, these macros pass Indexes through odd-index
1073
// structs defined elsewhere.
1075
// TBD: Would it be more efficient for big-endian systems to use JU_BYTEORDER
1076
// to avoid going through a struct?
1078
#define JU_INSERTINPLACE_ODD(PByte,Pop1,Offset,Index, \
1079
cIS, Struct_t, Field_bytes, Field_int) \
1080
assert((long) (Pop1) > 0); \
1081
assert((Word_t) (Offset) <= (Word_t) (Pop1)); \
1084
uint8_t * _Pindex = &(_temp._.Field_bytes[0]); \
1085
Word_t _boffset = (((Pop1) + 1) * (cIS)) - 1; \
1087
_temp._.Field_int = (Index); \
1089
while (_boffset-- > (((Offset) + 1) * (cIS)) - 1) \
1090
(PByte)[_boffset + 1] = (PByte)[_boffset + 1 - (cIS)]; \
1092
for (_boffset = (Offset) * (cIS); \
1093
_boffset < ((Offset) + 1) * (cIS); \
1096
(PByte)[_boffset] = *_Pindex++; \
1100
#define JU_INSERTINPLACE3( PByte,Pop1,Offset,Index) \
1101
JU_INSERTINPLACE_ODD(PByte,Pop1,Offset,Index, \
1102
3, uint24_t, _t24_3bytes, _t24_3byte)
1105
#define JU_INSERTINPLACE5( PByte,Pop1,Offset,Index) \
1106
JU_INSERTINPLACE_ODD(PByte,Pop1,Offset,Index, \
1107
5, uint40_t, _t40_5bytes, _t40_5byte)
1109
#define JU_INSERTINPLACE6( PByte,Pop1,Offset,Index) \
1110
JU_INSERTINPLACE_ODD(PByte,Pop1,Offset,Index, \
1111
6, uint48_t, _t48_6bytes, _t48_6byte)
1113
#define JU_INSERTINPLACE7( PByte,Pop1,Offset,Index) \
1114
JU_INSERTINPLACE_ODD(PByte,Pop1,Offset,Index, \
1115
7, uint56_t, _t56_7bytes, _t56_7byte)
1118
// Counterparts to the above for deleting an Index:
1120
// "Shift down" the array elements starting at the Index to be deleted.
1122
#define JU_DELETEINPLACE(PArray,Pop1,Offset,ignore) \
1123
assert((long) (Pop1) > 0); \
1124
assert((Word_t) (Offset) < (Word_t) (Pop1)); \
1126
Word_t _ioffset = (Offset); \
1128
while (++_ioffset < (Pop1)) \
1129
(PArray)[_ioffset - 1] = (PArray)[_ioffset]; \
1132
// Variation for odd-byte-sized (non-native) Indexes, where cIS = Index Size
1133
// and PByte must point to a uint8_t (byte); copy byte-by-byte:
1135
// Note: If cIS == 1, JU_DELETEINPLACE_ODD == JU_DELETEINPLACE.
1137
// Note: There are no endian issues here because bytes are just shifted as-is,
1138
// not converted to/from an Index.
1140
#define JU_DELETEINPLACE_ODD(PByte,Pop1,Offset,cIS) \
1141
assert((long) (Pop1) > 0); \
1142
assert((Word_t) (Offset) < (Word_t) (Pop1)); \
1144
Word_t _boffset = (((Offset) + 1) * (cIS)) - 1; \
1146
while (++_boffset < ((Pop1) * (cIS))) \
1147
(PByte)[_boffset - (cIS)] = (PByte)[_boffset]; \
1151
// INSERT/DELETE AN INDEX WHILE COPYING OTHERS:
1153
// Copy PSource[] to PDest[], where PSource[] has Pop1 elements (Indexes),
1154
// inserting Index at PDest[Offset]. Unlike JU_*INPLACE*() above, these macros
1155
// are used when moving Indexes from one memory object to another.
1157
// Note: See above about endian issues.
1159
#define JU_INSERTCOPY(PDest,PSource,Pop1,Offset,Index) \
1160
assert((long) (Pop1) > 0); \
1161
assert((Word_t) (Offset) <= (Word_t) (Pop1)); \
1165
for (_ioffset = 0; _ioffset < (Offset); ++_ioffset) \
1166
(PDest)[_ioffset] = (PSource)[_ioffset]; \
1168
(PDest)[_ioffset] = (Index); \
1170
for (/* null */; _ioffset < (Pop1); ++_ioffset) \
1171
(PDest)[_ioffset + 1] = (PSource)[_ioffset]; \
1174
// Variation for odd-byte-sized (non-native) Indexes, where cIS = Index Size;
1175
// copy byte-by-byte:
1177
// Note: If cIS == 1, JU_INSERTCOPY_ODD == JU_INSERTCOPY, at least in concept.
1179
// Note: JU_INSERTCOPY_ODD() is hidden common code; use JU_INSERTCOPY3(), etc.
1181
// Note: To handle endian issues, these macros pass Indexes through odd-index
1182
// structs defined elsewhere.
1184
// TBD: Would it be more efficient for big-endian systems to use JU_BYTEORDER
1185
// to avoid going through a struct?
1187
#define JU_INSERTCOPY_ODD(PDest,PSource,Pop1,Offset,Index, \
1188
cIS, Struct_t, Field_bytes, Field_int) \
1189
assert((long) (Pop1) > 0); \
1190
assert((Word_t) (Offset) <= (Word_t) (Pop1)); \
1193
uint8_t * _Pdest = (uint8_t *) (PDest); \
1194
uint8_t * _Psource = (uint8_t *) (PSource); \
1195
uint8_t * _Pindex = &(_temp._.Field_bytes[0]); \
1199
_temp._.Field_int = (Index); \
1201
for (_boffset = 0; _boffset < ((Offset) * (cIS)); ++_boffset) \
1202
*_Pdest++ = *_Psource++; \
1204
for (_boffset2 = 0; _boffset2 < (cIS); ++_boffset2) \
1205
*_Pdest++ = *_Pindex++; \
1207
for (/* null */; _boffset < ((Pop1) * (cIS)); ++_boffset) \
1208
*_Pdest++ = *_Psource++; \
1211
#define JU_INSERTCOPY3( PDest,PSource,Pop1,Offset,Index) \
1212
JU_INSERTCOPY_ODD(PDest,PSource,Pop1,Offset,Index, \
1213
3, uint24_t, _t24_3bytes, _t24_3byte)
1216
#define JU_INSERTCOPY5( PDest,PSource,Pop1,Offset,Index) \
1217
JU_INSERTCOPY_ODD(PDest,PSource,Pop1,Offset,Index, \
1218
5, uint40_t, _t40_5bytes, _t40_5byte)
1220
#define JU_INSERTCOPY6( PDest,PSource,Pop1,Offset,Index) \
1221
JU_INSERTCOPY_ODD(PDest,PSource,Pop1,Offset,Index, \
1222
6, uint48_t, _t48_6bytes, _t48_6byte)
1224
#define JU_INSERTCOPY7( PDest,PSource,Pop1,Offset,Index) \
1225
JU_INSERTCOPY_ODD(PDest,PSource,Pop1,Offset,Index, \
1226
7, uint56_t, _t56_7bytes, _t56_7byte)
1229
// Counterparts to the above for deleting an Index:
1231
#define JU_DELETECOPY(PDest,PSource,Pop1,Offset,ignore) \
1232
assert((long) (Pop1) > 0); \
1233
assert((Word_t) (Offset) < (Word_t) (Pop1)); \
1237
for (_ioffset = 0; _ioffset < (Offset); ++_ioffset) \
1238
(PDest)[_ioffset] = (PSource)[_ioffset]; \
1240
for (++_ioffset; _ioffset < (Pop1); ++_ioffset) \
1241
(PDest)[_ioffset - 1] = (PSource)[_ioffset]; \
1244
// Variation for odd-byte-sized (non-native) Indexes, where cIS = Index Size;
1245
// copy byte-by-byte:
1247
// Note: There are no endian issues here because bytes are just shifted as-is,
1248
// not converted to/from an Index.
1250
// Note: If cIS == 1, JU_DELETECOPY_ODD == JU_DELETECOPY, at least in concept.
1252
#define JU_DELETECOPY_ODD(PDest,PSource,Pop1,Offset,cIS) \
1253
assert((long) (Pop1) > 0); \
1254
assert((Word_t) (Offset) < (Word_t) (Pop1)); \
1256
uint8_t *_Pdest = (uint8_t *) (PDest); \
1257
uint8_t *_Psource = (uint8_t *) (PSource); \
1260
for (_boffset = 0; _boffset < ((Offset) * (cIS)); ++_boffset) \
1261
*_Pdest++ = *_Psource++; \
1263
_Psource += (cIS); \
1265
for (_boffset += (cIS); _boffset < ((Pop1) * (cIS)); ++_boffset) \
1266
*_Pdest++ = *_Psource++; \
1270
// GENERIC RETURN CODE HANDLING FOR JUDY1 (NO VALUE AREAS) AND JUDYL (VALUE
1273
// This common code hides Judy1 versus JudyL details of how to return various
1274
// conditions, including a pointer to a value area for JudyL.
1276
// First, define an internal variation of JERR called JERRI (I = int) to make
1277
// lint happy. We accidentally shipped to 11.11 OEUR with all functions that
1278
// return int or Word_t using JERR, which is type Word_t, for errors. Lint
1279
// complains about this for functions that return int. So, internally use
1280
// JERRI for error returns from the int functions. Experiments show that
1281
// callers which compare int Foo() to (Word_t) JERR (~0UL) are OK, since JERRI
1282
// sign-extends to match JERR.
1284
#define JERRI ((int) ~0) // see above.
1288
#define JU_RET_FOUND return(1)
1289
#define JU_RET_NOTFOUND return(0)
1291
// For Judy1, these all "fall through" to simply JU_RET_FOUND, since there is no
1292
// value area pointer to return:
1294
#define JU_RET_FOUND_JAPLEAF(Pjlw,Pop1,Offset) JU_RET_FOUND
1296
#define JU_RET_FOUND_JPM(Pjpm) JU_RET_FOUND
1297
#define JU_RET_FOUND_PVALUE(Pjv,Offset) JU_RET_FOUND
1299
#define JU_RET_FOUND_LEAF1(Pjll,Pop1,Offset) JU_RET_FOUND
1301
#define JU_RET_FOUND_LEAF2(Pjll,Pop1,Offset) JU_RET_FOUND
1302
#define JU_RET_FOUND_LEAF3(Pjll,Pop1,Offset) JU_RET_FOUND
1304
#define JU_RET_FOUND_LEAF4(Pjll,Pop1,Offset) JU_RET_FOUND
1305
#define JU_RET_FOUND_LEAF5(Pjll,Pop1,Offset) JU_RET_FOUND
1306
#define JU_RET_FOUND_LEAF6(Pjll,Pop1,Offset) JU_RET_FOUND
1307
#define JU_RET_FOUND_LEAF7(Pjll,Pop1,Offset) JU_RET_FOUND
1309
#define JU_RET_FOUND_IMM_01(Pjp) JU_RET_FOUND
1310
#define JU_RET_FOUND_IMM(Pjp,Offset) JU_RET_FOUND
1312
// Note: No JudyL equivalent:
1314
#define JU_RET_FOUND_FULLPOPU1 JU_RET_FOUND
1315
#define JU_RET_FOUND_LEAF_B1(Pjlb,Subexp,Offset) JU_RET_FOUND
1319
// JU_RET_FOUND // see below; must NOT be defined for JudyL.
1320
#define JU_RET_NOTFOUND return((PPvoid_t) NULL)
1322
// For JudyL, the location of the value area depends on the JP type and other
1325
// TBD: The value areas should be accessed via data structures, here and in
1326
// Doug's code, not by hard-coded address calculations.
1328
// This is useful in insert/delete code when the value area is returned from
1329
// lower levels in the JPM:
1331
#define JU_RET_FOUND_JPM(Pjpm) return((PPvoid_t) ((Pjpm) -> jpm_PValue))
1333
// This is useful in insert/delete code when the value area location is already
1336
#define JU_RET_FOUND_PVALUE(Pjv,Offset) return((PPvoid_t) ((Pjv) + Offset))
1338
#define JU_RET_FOUND_JAPLEAF(Pjlw,Pop1,Offset) \
1339
return((PPvoid_t) (JL_LEAFWVALUEAREA(Pjlw, Pop1) + (Offset)))
1341
#define JU_RET_FOUND_LEAF1(Pjll,Pop1,Offset) \
1342
return((PPvoid_t) (JL_LEAF1VALUEAREA(Pjll, Pop1) + (Offset)))
1343
#define JU_RET_FOUND_LEAF2(Pjll,Pop1,Offset) \
1344
return((PPvoid_t) (JL_LEAF2VALUEAREA(Pjll, Pop1) + (Offset)))
1345
#define JU_RET_FOUND_LEAF3(Pjll,Pop1,Offset) \
1346
return((PPvoid_t) (JL_LEAF3VALUEAREA(Pjll, Pop1) + (Offset)))
1348
#define JU_RET_FOUND_LEAF4(Pjll,Pop1,Offset) \
1349
return((PPvoid_t) (JL_LEAF4VALUEAREA(Pjll, Pop1) + (Offset)))
1350
#define JU_RET_FOUND_LEAF5(Pjll,Pop1,Offset) \
1351
return((PPvoid_t) (JL_LEAF5VALUEAREA(Pjll, Pop1) + (Offset)))
1352
#define JU_RET_FOUND_LEAF6(Pjll,Pop1,Offset) \
1353
return((PPvoid_t) (JL_LEAF6VALUEAREA(Pjll, Pop1) + (Offset)))
1354
#define JU_RET_FOUND_LEAF7(Pjll,Pop1,Offset) \
1355
return((PPvoid_t) (JL_LEAF7VALUEAREA(Pjll, Pop1) + (Offset)))
1358
// Note: Here jp_Addr is a value area itself and not an address, so P_JV() is
1361
#define JU_RET_FOUND_IMM_01(Pjp) return((PPvoid_t) (&((Pjp) -> jp_Addr)))
1363
// Note: Here jp_Addr is a pointer to a separately-malloc'd value area, so
1364
// P_JV() is required; likewise for JL_JLB_PVALUE:
1366
#define JU_RET_FOUND_IMM(Pjp,Offset) \
1367
return((PPvoid_t) (P_JV((Pjp)->jp_Addr) + (Offset)))
1369
#define JU_RET_FOUND_LEAF_B1(Pjlb,Subexp,Offset) \
1370
return((PPvoid_t) (P_JV(JL_JLB_PVALUE(Pjlb, Subexp)) + (Offset)))
1375
// GENERIC ERROR HANDLING:
1377
// This is complicated by variations in the needs of the callers of these
1378
// macros. Only use JU_SET_ERRNO() for PJError, because it can be null; use
1379
// JU_SET_ERRNO_NONNULL() for Pjpm, which is never null, and also in other
1380
// cases where the pointer is known not to be null (to save dead branches).
1382
// Note: Most cases of JU_ERRNO_OVERRUN or JU_ERRNO_CORRUPT should result in
1383
// an assertion failure in debug code, so they are more likely to be caught, so
1384
// do that here in each macro.
1386
#define JU_SET_ERRNO(PJError, JErrno) \
1388
assert((JErrno) != JU_ERRNO_OVERRUN); \
1389
assert((JErrno) != JU_ERRNO_CORRUPT); \
1391
if (PJError != (PJError_t) NULL) \
1393
JU_ERRNO(PJError) = (JErrno); \
1394
JU_ERRID(PJError) = __LINE__; \
1398
// Variation for callers who know already that PJError is non-null; and, it can
1399
// also be Pjpm (both PJError_t and Pjpm_t have je_* fields), so only assert it
1400
// for null, not cast to any specific pointer type:
1402
#define JU_SET_ERRNO_NONNULL(PJError, JErrno) \
1404
assert((JErrno) != JU_ERRNO_OVERRUN); \
1405
assert((JErrno) != JU_ERRNO_CORRUPT); \
1408
JU_ERRNO(PJError) = (JErrno); \
1409
JU_ERRID(PJError) = __LINE__; \
1412
// Variation to copy error info from a (required) JPM to an (optional)
1415
// Note: The assertions above about JU_ERRNO_OVERRUN and JU_ERRNO_CORRUPT
1416
// should have already popped, so they are not needed here.
1418
#define JU_COPY_ERRNO(PJError, Pjpm) \
1422
JU_ERRNO(PJError) = (uint8_t)JU_ERRNO(Pjpm); \
1423
JU_ERRID(PJError) = JU_ERRID(Pjpm); \
1427
// For JErrno parameter to previous macros upon return from Judy*Alloc*():
1429
// The memory allocator returns an address of 0 for out of memory,
1430
// 1..sizeof(Word_t)-1 for corruption (an invalid pointer), otherwise a valid
1433
#define JU_ALLOC_ERRNO(Addr) \
1434
(((void *) (Addr) != (void *) NULL) ? JU_ERRNO_OVERRUN : JU_ERRNO_NOMEM)
1436
#define JU_CHECKALLOC(Type,Ptr,Retval) \
1437
if ((Ptr) < (Type) sizeof(Word_t)) \
1439
JU_SET_ERRNO(PJError, JU_ALLOC_ERRNO(Ptr)); \
1444
// ****************************************************************************
1445
// SUPPORT FOR LEAF SEARCHING:
1446
// ****************************************************************************
1448
// These macros are shared by JudySearchLeaf.c and JudyPrevNextEmpty.c only so
1449
// they lack noisy JU_* prefixes.
1451
// BYTES PER INDEX (BPI), just for clarity:
1462
#define BPIL (sizeof(Word_t))
1465
// SHORTHAND NAMES FOR MASKS:
1467
#define MASK1 JU_LEASTBYTESMASK(BPI1)
1468
#define MASK2 JU_LEASTBYTESMASK(BPI2)
1469
#define MASK3 JU_LEASTBYTESMASK(BPI3)
1471
#define MASK4 JU_LEASTBYTESMASK(BPI4)
1472
#define MASK5 JU_LEASTBYTESMASK(BPI5)
1473
#define MASK6 JU_LEASTBYTESMASK(BPI6)
1474
#define MASK7 JU_LEASTBYTESMASK(BPI7)
1476
// No need for MASKL.
1479
// SUPPORT FOR ODD-SIZED INDEXES:
1481
// An index size of 3 bytes, and for 64-bit only, 5, 6, and 7 bytes, are "odd
1482
// sized" because they do not have corresponding native C data types and do not
1483
// fit neatly into a single word. For efficiency, the leaf search code scans
1484
// these odd indexes in "index groups" that fit in whole words.
1486
// INDEXES PER INDEX GROUP (IPG) for odd-sized indexes: The simple way to
1487
// calculate the number of indexes per index group is BPI(N) * cJU_BYTESPERWORD
1488
// / BPI(N), or essentially just cJU_BYTESPERWORD. However, for BPI6, 6 * 8 =
1489
// 48 bytes (6 words, 8 indexes), but there is a common prime factor of 2, so
1490
// 24 bytes (3 words, 4 indexes) is better because it's smaller.
1493
#define IPG3 4 // indexes per group (fit in whole words).
1497
#define IPG6 4 // see above.
1501
// WORDS PER INDEX GROUP (WPG) for odd-sized indexes:
1503
#define WPG3 ((IPG3 * BPI3) / cJU_BYTESPERWORD) // 3 [3].
1505
#define WPG5 ((IPG5 * BPI5) / cJU_BYTESPERWORD) // [5].
1506
#define WPG6 ((IPG6 * BPI6) / cJU_BYTESPERWORD) // [3].
1507
#define WPG7 ((IPG7 * BPI7) / cJU_BYTESPERWORD) // [7].
1510
// INDEXES PER CACHE LINE (IPC) for odd-sized indexes, really the number of
1511
// indexes within the maximum number of whole (word-aligned) index groups per
1512
// "virtual" cache line, which is less than or equal in size to a real cache
1515
// Note: For odd-sized indexes these IPC's are less than the actual indexes
1516
// per cache line, sometimes less even the number of whole indexes per cache
1517
// line, due to word-aligned grouping.
1519
#define IPC1 (cJU_BYTESPERCL / 1) // 64 [128].
1520
#define IPC2 (cJU_BYTESPERCL / 2) // 32 [64].
1521
#define IPC3 ((cJU_BYTESPERCL / (IPG3 * BPI3)) * IPG3) // 20 [40].
1523
#define IPC4 (cJU_BYTESPERCL / 4) // [32].
1524
#define IPC5 ((cJU_BYTESPERCL / (IPG5 * BPI5)) * IPG5) // [24].
1525
#define IPC6 ((cJU_BYTESPERCL / (IPG6 * BPI6)) * IPG6) // [20].
1526
#define IPC7 ((cJU_BYTESPERCL / (IPG7 * BPI7)) * IPG7) // [16].
1528
#define IPCL (cJU_BYTESPERCL / sizeof(Word_t)) // 16 [16].
1531
// MACROS FOR EFFICIENTLY EXTRACTING ALIGNED ODD-SIZE INDEXES FROM A LEAF:
1533
// Doing shifts, ORs, and masks in registers is presumably faster than
1534
// rebuilding odd-sized indexes one byte at a time. However, it makes the code
1535
// unavoidably torturous as it works through all the 32/64-bit and
1536
// little/big-endian permutations efficiently. (Trading longer coding time and
1537
// code size for faster execution.)
1539
// The GET3_*() macros hide the details of how to extract bytes "cba"
1540
// (big-endian order) for index 0..3 [0..7] in an index group, given Pword
1541
// pointing to the first (unsigned long) word of a group.
1544
#ifndef JU_LITTLE_ENDIAN
1546
// Index bytes for indexes 0-3 look like this (big-endian):
1548
// 0001 1122 2333 // in memory or registers.
1551
#define GET3_0 (Pword[0] >> 8)
1552
#define GET3_1 (((Pword[0] << 16) | (Pword[1] >> 16)) & MASK3)
1553
#define GET3_2 (((Pword[1] << 8) | (Pword[2] >> 24)) & MASK3)
1554
#define GET3_3 (Pword[2] & MASK3)
1556
#endif // JU_BIG_ENDIAN
1558
#ifdef JU_LITTLE_ENDIAN
1560
// Index bytes for indexes 0-3 look like this (little-endian):
1562
// 0001 1122 2333 // in memory.
1565
// 1000 2211 3332 // in registers.
1566
// acba bacb cbac // big-endian Indexes.
1568
#define GET3_0 (Pword[0] & MASK3)
1569
#define GET3_1 (((Pword[0] >> 24) | (Pword[1] << 8)) & MASK3)
1570
#define GET3_2 (((Pword[1] >> 16) | (Pword[2] << 16)) & MASK3)
1571
#define GET3_3 (Pword[2] >> 8)
1573
#endif // JU_LITTLE_ENDIAN
1575
#define GET3_LIG GET3_3 // last index in index group.
1579
#ifndef JU_LITTLE_ENDIAN
1581
// Index bytes for indexes 0-7 look like this (big-endian):
1583
// 00011122 23334445 55666777 // in memory or registers.
1584
// cbacbacb acbacbac bacbacba
1586
#define GET3_0 (Pword[0] >> 40)
1587
#define GET3_1 ((Pword[0] >> 16) & MASK3)
1588
#define GET3_2 (((Pword[0] << 8) | (Pword[1] >> 56)) & MASK3)
1589
#define GET3_3 ((Pword[1] >> 32) & MASK3)
1590
#define GET3_4 ((Pword[1] >> 8) & MASK3)
1591
#define GET3_5 (((Pword[1] << 16) | (Pword[2] >> 48)) & MASK3)
1592
#define GET3_6 ((Pword[2] >> 24) & MASK3)
1593
#define GET3_7 (Pword[2] & MASK3)
1595
#endif // JU_BIG_ENDIAN
1597
#ifdef JU_LITTLE_ENDIAN
1599
// Index bytes for indexes 0-7 look like this (little-endian):
1601
// 00011122 23334445 55666777 // in memory.
1602
// abcabcab cabcabca bcabcabc
1604
// 22111000 54443332 77766655 // in registers.
1605
// bacbacba acbacbac cbacbacb // big-endian Indexes.
1607
#define GET3_0 (Pword[0] & MASK3)
1608
#define GET3_1 ((Pword[0] >> 24) & MASK3)
1609
#define GET3_2 (((Pword[0] >> 48) | (Pword[1] << 16)) & MASK3)
1610
#define GET3_3 ((Pword[1] >> 8) & MASK3)
1611
#define GET3_4 ((Pword[1] >> 32) & MASK3)
1612
#define GET3_5 (((Pword[1] >> 56) | (Pword[2] << 8)) & MASK3)
1613
#define GET3_6 ((Pword[2] >> 16) & MASK3)
1614
#define GET3_7 (Pword[2] >> 40)
1616
#endif // JU_LITTLE_ENDIAN
1618
#define GET3_LIG GET3_7 // last index in index group.
1625
// The GET5_* macros are similar to GET3_*() for larger index sizes:
1627
#ifndef JU_LITTLE_ENDIAN
1629
// Index bytes for indexes 0-7 look like this (big-endian):
1631
// 00000111 11222223 33334444 45555566 66677777 // in memory or registers.
1632
// edcbaedc baedcbae dcbaedcb aedcbaed cbaedcba
1634
#define GET5_0 (Pword[0] >> 24)
1635
#define GET5_1 (((Pword[0] << 16) | (Pword[1] >> 48)) & MASK5)
1636
#define GET5_2 ((Pword[1] >> 8) & MASK5)
1637
#define GET5_3 (((Pword[1] << 32) | (Pword[2] >> 32)) & MASK5)
1638
#define GET5_4 (((Pword[2] << 8) | (Pword[3] >> 56)) & MASK5)
1639
#define GET5_5 ((Pword[3] >> 16) & MASK5)
1640
#define GET5_6 (((Pword[3] << 24) | (Pword[4] >> 40)) & MASK5)
1641
#define GET5_7 (Pword[4] & MASK5)
1643
#endif // JU_BIG_ENDIAN
1645
#ifdef JU_LITTLE_ENDIAN
1647
// Index bytes for indexes 0-7 look like this (little-endian):
1649
// 00000111 11222223 33334444 45555566 66677777 // in memory.
1650
// abcdeabc deabcdea bcdeabcd eabcdeab cdeabcde
1652
// 11100000 32222211 44443333 66555554 77777666 // in registers.
1653
// cbaedcba aedcbaed dcbaedcb daedcbae edcbaedc // big-endian Indexes.
1655
#define GET5_0 (Pword[0] & MASK5)
1656
#define GET5_1 (((Pword[0] >> 40) | (Pword[1] << 24)) & MASK5)
1657
#define GET5_2 ((Pword[1] >> 16) & MASK5)
1658
#define GET5_3 (((Pword[1] >> 56) | (Pword[2] << 8)) & MASK5)
1659
#define GET5_4 (((Pword[2] >> 32) | (Pword[3] << 32)) & MASK5)
1660
#define GET5_5 ((Pword[3] >> 8) & MASK5)
1661
#define GET5_6 (((Pword[3] >> 48) | (Pword[4] << 16)) & MASK5)
1662
#define GET5_7 (Pword[4] >> 24)
1664
#endif // JU_LITTLE_ENDIAN
1666
#define GET5_LIG GET5_7 // last index in index group.
1669
// The GET6_* macros are similar to GET3_*() for larger index sizes:
1671
#ifndef JU_LITTLE_ENDIAN
1673
// Index bytes for indexes 0-7 look like this (big-endian):
1675
// 00000011 11112222 22333333 // in memory or registers.
1676
// fedcbafe dcbafedc bafedcba
1678
#define GET6_0 (Pword[0] >> 16)
1679
#define GET6_1 (((Pword[0] << 32) | (Pword[1] >> 32)) & MASK6)
1680
#define GET6_2 (((Pword[1] << 16) | (Pword[2] >> 48)) & MASK6)
1681
#define GET6_3 (Pword[2] & MASK6)
1683
#endif // JU_BIG_ENDIAN
1685
#ifdef JU_LITTLE_ENDIAN
1687
// Index bytes for indexes 0-7 look like this (little-endian):
1689
// 00000011 11112222 22333333 // in memory.
1690
// abcdefab cdefabcd efabcdef
1692
// 11000000 22221111 33333322 // in registers.
1693
// bafedcba dcbafedc fedcbafe // big-endian Indexes.
1695
#define GET6_0 (Pword[0] & MASK6)
1696
#define GET6_1 (((Pword[0] >> 48) | (Pword[1] << 16)) & MASK6)
1697
#define GET6_2 (((Pword[1] >> 32) | (Pword[2] << 32)) & MASK6)
1698
#define GET6_3 (Pword[2] >> 16)
1700
#endif // JU_LITTLE_ENDIAN
1702
#define GET6_LIG GET6_3 // last index in index group.
1705
// The GET7_* macros are similar to GET3_*() for larger index sizes:
1707
#ifndef JU_LITTLE_ENDIAN
1709
// Index bytes for indexes 0-7 look like this (big-endian) (in memory or
1712
// 00000001 11111122 22222333 33334444 44455555 55666666 67777777
1713
// gfedcbag fedcbagf edcbagfe dcbagfed cbagfedc bagfedcb agfedcba
1715
#define GET7_0 (Pword[0] >> 8)
1716
#define GET7_1 (((Pword[0] << 48) | (Pword[1] >> 16)) & MASK7)
1717
#define GET7_2 (((Pword[1] << 40) | (Pword[2] >> 24)) & MASK7)
1718
#define GET7_3 (((Pword[2] << 32) | (Pword[3] >> 32)) & MASK7)
1719
#define GET7_4 (((Pword[3] << 24) | (Pword[4] >> 40)) & MASK7)
1720
#define GET7_5 (((Pword[4] << 16) | (Pword[5] >> 48)) & MASK7)
1721
#define GET7_6 (((Pword[5] << 8) | (Pword[6] >> 56)) & MASK7)
1722
#define GET7_7 (Pword[6] & MASK7)
1724
#endif // JU_BIG_ENDIAN
1726
#ifdef JU_LITTLE_ENDIAN
1728
// Index bytes for indexes 0-7 look like this (little-endian) (in memory, then
1729
// in registers, with big-endian Indexes):
1731
// 00000001 11111122 22222333 33334444 44455555 55666666 67777777
1732
// abcdefga bcdefgab cdefgabc defgabcd efgabcde fgabcdef gabcdefg
1734
// 10000000 22111111 33322222 44443333 55555444 66666655 77777776
1735
// agfedcba bagfedcb cbagfedc dcbagfed edcbagfe fedcbagf gfedcbag
1737
#define GET7_0 (Pword[0] & MASK7)
1738
#define GET7_1 (((Pword[0] >> 56) | (Pword[1] << 8)) & MASK7)
1739
#define GET7_2 (((Pword[1] >> 48) | (Pword[2] << 16)) & MASK7)
1740
#define GET7_3 (((Pword[2] >> 40) | (Pword[3] << 24)) & MASK7)
1741
#define GET7_4 (((Pword[3] >> 32) | (Pword[4] << 32)) & MASK7)
1742
#define GET7_5 (((Pword[4] >> 24) | (Pword[5] << 40)) & MASK7)
1743
#define GET7_6 (((Pword[5] >> 16) | (Pword[6] << 48)) & MASK7)
1744
#define GET7_7 (Pword[6] >> 8)
1746
#endif // JU_LITTLE_ENDIAN
1748
#define GET7_LIG GET7_7 // last index in index group.
1752
#endif // ! _JUDYPRIVATE_INCLUDED