~ubuntu-branches/ubuntu/precise/judy/precise : revision 1

1

#ifndef _JUDYPRIVATE_INCLUDED

2

#define _JUDYPRIVATE_INCLUDED

3

// _________________

4

//

5

6

//

7

// This program is free software; you can redistribute it and/or modify it

8

// under the term of the GNU Lesser General Public License as published by the

9

// Free Software Foundation; either version 2 of the License, or (at your

10

// option) any later version.

11

//

12

// This program is distributed in the hope that it will be useful, but WITHOUT

13

// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

14

// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License

15

// for more details.

16

//

17

// You should have received a copy of the GNU Lesser General Public License

18

// along with this program; if not, write to the Free Software Foundation,

19

// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

20

// _________________

21

22

// @(#) $Revision: 4.77 $ $Source: /judy/src/JudyCommon/JudyPrivate.h $

23

//

24

// Header file for all Judy sources, for global but private (non-exported)

25

// declarations.

26

27

28

// INCLUDE EXPORTED HEADER FILE:

29

//

30

// Assume all Judy internal sources need the exported header file.

31

32

#include "config.h"

33

#include "Judy.h"

34

35

// Define some types missing on Windows (not available in a system header

36

// file):

37

38

#ifdef JU_WIN

39

typedef unsigned char uint8_t; // 8-bit unsigned.

40

typedef unsigned short uint16_t; // 16-bit unsigned.

41

typedef unsigned long uint32_t; // 32-bit unsigned.

42

#ifdef JU_WIN_IPF

43

typedef unsigned __int64 uint64_t; // appears __int64 is a native type.

44

#endif

45

#endif

46

47

48

// ****************************************************************************

49

// A VERY BRIEF EXPLANATION OF A JUDY ARRAY

50

//

51

// A Judy array is, effectively, a digital tree (or Trie) with 256 element

52

// branches (nodes), and with "compression tricks" applied to low-population

53

// branches or leaves to save a lot of memory at the cost of relatively little

54

// CPU time or cache fills.

55

//

56

// In the actual implementation, a Judy array is level-less, and traversing the

57

// "tree" actually means following the states in a state machine (SM) as

58

// directed by the Index. A Judy array is referred to here as an "SM", rather

59

// than as a "tree"; having "states", rather than "levels".

60

//

61

// Each branch or leaf in the SM decodes a portion ("digit") of the original

62

// Index; with 256-way branches there are 8 bits per digit. There are 3 kinds

63

// of branches, called: Linear, Bitmap and Uncompressed, of which the first 2

64

// are compressed to contain no NULL entries.

65

//

66

// An Uncompressed branch has a 1.0 cache line fill cost to decode 8 bits of

67

// (digit, part of an Index), but it might contain many NULL entries, and is

68

// therefore inefficient with memory if lightly populated.

69

//

70

// A Linear branch has a ~1.75 cache line fill cost when at maximum population.

71

// A Bitmap branch has ~2.0 cache line fills. Linear and Bitmap branches are

72

// converted to Uncompressed branches when the additional memory can be

73

// amortized with larger populations. Higher-state branches have higher

74

// priority to be converted.

75

//

76

// Linear branches can hold 28 elements (based on detailed analysis) -- thus 28

77

// expanses. A Linear branch is converted to a Bitmap branch when the 29th

78

// expanse is required.

79

//

80

// A Bitmap branch could hold 256 expanses, but is forced to convert to an

81

// Uncompressed branch when 185 expanses are required. Hopefully, it is

82

// converted before that because of population growth (again, based on detailed

83

// analysis and heuristics in the code).

84

//

85

// A path through the SM terminates to a leaf when the Index (or key)

86

// population in the expanse below a pointer will fit into 1 or 2 cache lines

87

// (~31..255 Indexes). A maximum-population Leaf has ~1.5 cache line fill

88

// cost.

89

//

90

// Leaves are sorted arrays of Indexes, where the Index Sizes (IS) are: 0, 1,

91

// 8, 16, 24, 32, [40, 48, 56, 64] bits. The IS depends on the "density"

92

// (population/expanse) of the values in the Leaf. Zero bits are possible if

93

// population == expanse in the SM (that is, a full small expanse).

94

//

95

// Elements of a branches are called Judy Pointers (JPs). Each JP object

96

// points to the next object in the SM, plus, a JP can decode an additional

97

// 2[6] bytes of an Index, but at the cost of "narrowing" the expanse

98

// represented by the next object in the SM. A "narrow" JP (one which has

99

// decode bytes/digits) is a way of skipping states in the SM.

100

//

101

// Although counterintuitive, we think a Judy SM is optimal when the Leaves are

102

// stored at MINIMUM compression (narrowing, or use of Decode bytes). If more

103

// aggressive compression was used, decompression of a leaf be required to

104

// insert an index. Additional compression would save a little memory but not

105

// help performance significantly.

106

107

108

#ifdef A_PICTURE_IS_WORTH_1000_WORDS

109

*******************************************************************************

110

111

JUDY 32-BIT STATE MACHINE (SM) EXAMPLE, FOR INDEX = 0x02040103

112

113

The Index used in this example is purposely chosen to allow small, simple

114

examples below; each 1-byte "digit" from the Index has a small numeric value

115

that fits in one column. In the drawing below:

116

117

JAP == Judy Array Pointer; least 3 bits encode Branch or Leaf type; L or B

118

appears in drawing as described below.

119

120

C == 1 byte of a 1..3 byte Population (count of Indexes) below this

121

pointer. Since this is shared with the Decode field, the combined

122

sizes must be 3[7], that is, 1 word less 1 byte for the JP Type.

123

124

The 1-byte field jp_Type is represented as:

125

126

1..3 == Number of bytes in the population (Pop0) word of the Branch or Leaf

127

below the pointer (note: 1..7 on 64-bit); indicates:

128

- number of bytes in Decode field == 3 - this number;

129

- number of bytes remaining to decode.

130

Note: The maximum is 3, not 4, because the 1st byte of the Index is

131

always decoded digitally in the top branch.

132

-B- == JP points to a Branch (there are many kinds of Branches).

133

-L- == JP points to a Leaf (there are many kinds of Leaves).

134

135

(2) == Digit of Index decoded by position offset in branch (really

136

0..0xff).

137

138

4* == Digit of Index necessary for decoding a "narrow" pointer, in a

139

Decode field; replaces 1 missing branch (really 0..0xff).

140

141

4+ == Digit of Index NOT necessary for decoding a "narrow" pointer, but

142

used for fast traversal of the SM by Judy1Test() and JudyLGet()

143

(see the code) (really 0..0xff).

144

145

0 == Byte in a JP''s Pop0 field that is always ignored, because a leaf

146

can never contain more than 256 Indexes (Pop0 <= 255).

147

148

+----- == A Branch or Leaf; drawn open-ended to remind you that it could

149

| have up to 256 columns.

150

+-----

151

152

|

153

| == Pointer to next Branch or Leaf.

154

V

155

156

|

157

O == A state is skipped by using a "narrow" pointer.

158

|

159

160

< 1 > == Digit (Index) shown as an example is not necessarily in the

161

position shown; is sorted in order with neighbor Indexes.

162

(Really 0..0xff.)

163

164

Note that this example shows every possibly topology to reach a leaf in a

165

32-bit Judy SM, although this is a very subtle point!

166

167

STATE/

168

LEVEL

169

+---+ +---+ +---+ +---+ +---+ +---+ +---+ +---+

170

|JAP| |JAP| |JAP| |JAP| |JAP| |JAP| |JAP| |JAP|

171

L---+ B---+ B---+ B---+ B---+ B---+ B---+ B---+

172

| | | | | | | |

173

| | | | | | | |

174

V V (2) V (2) V (2) V (2) V (2) V (2) V (2)

175

+------ +------ +------ +------ +------ +------ +------ +------

176

Four |< 2 > | 0 | 4* | C | 4* | 4* | C | C

177

byte |< 4 > | 0 | 0 | C | 1* | C | C | C 4

178

Index|< 1 > | C | C | C | C | C | C | C

179

Leaf |< 3 > | 3 | 2 | 3 | 1 | 2 | 3 | 3

180

+------ +--L--- +--L--- +--B--- +--L--- +--B--- +--B--- +--B---

181

| | | | | | |

182

/ | / | | / /

183

/ | / | | / /

184

| | | | | | |

185

V | V (4) | | V (4) V (4)

186

+------ | +------ | | +------ +------

187

Three |< 4 > | | 4+ | | | 4+ | 4+

188

byte Index|< 1 > O | 0 O O | 1* | C 3

189

Leaf |< 3 > | | C | | | C | C

190

+------ | | 2 | | | 1 | 2

191

/ +----L- | | +----L- +----B-

192

/ | | | | |

193

| / | / / /

194

| / | / / /

195

| / | | / /

196

| / | | / /

197

| | | | | |

198

V V | V(1) | V(1)

199

+------ +------ | +------ | +------

200

Two byte |< 1 > |< 1 > | | 4+ | | 4+

201

Index Leaf |< 3 > |< 3 > O | 1+ O | 1+ 2

202

+------ +------ / | C | | C

203

/ | 1 | | 1

204

| +-L---- | +-L----

205

| | | |

206

| / | /

207

| | | |

208

V V V V

209

+------ +------ +------ +------

210

One byte Index Leaf |< 3 > |< 3 > |< 3 > |< 3 > 1

211

+------ +------ +------ +------

212

213

214

#endif // A_PICTURE_IS_WORTH_1000_WORDS

215

216

217

// ****************************************************************************

218

// MISCELLANEOUS GLOBALS:

219

//

220

// PLATFORM-SPECIFIC CONVENIENCE MACROS:

221

//

222

// These are derived from context (set by cc or in system header files) or

223

// based on JU_<PLATFORM> macros from make_includes/platform.*.mk. We decided

224

// on 011018 that any macro reliably derivable from context (cc or headers) for

225

// ALL platforms supported by Judy is based on that derivation, but ANY

226

// exception means to stop using the external macro completely and derive from

227

// JU_<PLATFORM> instead.

228

229

// Other miscellaneous stuff:

230

231

#ifndef _BOOL_T

232

#define _BOOL_T

233

typedef int bool_t;

234

#endif

235

236

// Convert some (newer) external names (renamed from "JAP" to avoid negative

237

// connotations) to (older) internal names:

238

239

#define JAP_MASK JLAP_MASK

240

#define JAP_INVALID JLAP_INVALID

241

242

#define FUNCTION // null; easy to find functions.

243

244

#ifndef TRUE

245

#define TRUE 1

246

#endif

247

248

#ifndef FALSE

249

#define FALSE 0

250

#endif

251

252

#ifdef TRACE // turn on all other tracing in the code:

253

#define TRACEJP 1 // JP traversals in JudyIns.c and JudyDel.c.

254

#define TRACEJPR 1 // JP traversals in retrieval code, JudyGet.c.

255

#define TRACECF 1 // cache fills in JudyGet.c.

256

#define TRACEMI 1 // malloc calls in JudyMallocIF.c.

257

#define TRACEMF 1 // malloc calls at a lower level in JudyMalloc.c.

258

#endif

259

260

261

// SUPPORT FOR DEBUG-ONLY CODE:

262

//

263

// By convention, use -DDEBUG to enable both debug-only code AND assertions in

264

// the Judy sources.

265

//

266

// Invert the sense of assertions, so they are off unless explicitly requested,

267

// in a uniform way.

268

//

269

// Note: It is NOT appropriate to put this in Judy.h; it would mess up

270

// application code.

271

272

#ifndef DEBUG

273

#define NDEBUG 1 // must be 1 for "#if".

274

#endif

275

276

// Shorthand notations to avoid #ifdefs for single-line conditional statements:

277

//

278

// Warning: These cannot be used around compiler directives, such as

279

// "#include", nor in the case where Code contains a comma other than nested

280

// within parentheses or quotes.

281

282

#ifndef DEBUG

283

#define DBGCODE(Code) // null.

284

#else

285

#define DBGCODE(Code) Code

286

#endif

287

288

#ifdef JUDY1

289

#define JUDY1CODE(Code) Code

290

#define JUDYLCODE(Code) // null.

291

#endif

292

293

#ifdef JUDYL

294

#define JUDYLCODE(Code) Code

295

#define JUDY1CODE(Code) // null.

296

#endif

297

298

#ifdef JU_FLAVOR_COV

299

#pragma C-Cover off // exclude inline functions in public header files.

300

#endif

301

#include <assert.h>

302

#ifdef JU_FLAVOR_COV

303

#pragma C-Cover on

304

#endif

305

306

307

// ****************************************************************************

308

// FUNDAMENTAL CONSTANTS FOR MACHINE

309

// ****************************************************************************

310

311

// Machine (CPU) cache line size:

312

//

313

// NOTE: A leaf size of 2 cache lines maximum is the target (optimal) for

314

// Judy. It's hard to obtain a machine's cache line size at compile time, but

315

// if the machine has an unexpected cache line size, it's not devastating if

316

// the following constants end up causing leaves that are 1 cache line in size,

317

// or even 4 cache lines in size. The assumed 32-bit system has 16-word =

318

// 64-byte cache lines, and the assumed 64-bit system has 16-word = 128-byte

319

// cache lines.

320

321

#ifdef JU_64BIT

322

#define cJU_BYTESPERCL 128 // cache line size in bytes.

323

#else

324

#define cJU_BYTESPERCL 64 // cache line size in bytes.

325

#endif

326

327

// Bits Per Byte:

328

329

#define cJU_BITSPERBYTE 0x8

330

331

// Bytes Per Word and Bits Per Word, latter assuming sizeof(byte) is 8 bits:

332

//

333

// Expect 32 [64] bits per word.

334

335

#define cJU_BYTESPERWORD (sizeof(Word_t))

336

#define cJU_BITSPERWORD (sizeof(Word_t) * cJU_BITSPERBYTE)

337

338

#define JU_BYTESTOWORDS(Bytes) \

339

(((Bytes) + cJU_BYTESPERWORD - 1) / cJU_BYTESPERWORD)

340

341

// A word that is all-one's, normally equal to -1UL, but safer with ~0:

342

343

#define cJU_ALLONES (~0UL)

344

345

// Note, these are forward references, but that's OK:

346

347

#define cJU_FULLBITMAPB ((BITMAPB_t) cJU_ALLONES)

348

#define cJU_FULLBITMAPL ((BITMAPL_t) cJU_ALLONES)

349

350

351

// ****************************************************************************

352

// MISCELLANEOUS JUDY-SPECIFIC DECLARATIONS

353

// ****************************************************************************

354

355

// ROOT STATE:

356

//

357

// State at the start of the Judy SM, based on 1 byte decoded per state; equal

358

// to the number of bytes per Index to decode.

359

360

#define cJU_ROOTSTATE (sizeof(Word_t))

361

362

363

// SUBEXPANSES PER STATE:

364

//

365

// Number of subexpanses per state traversed, which is the number of JPs in a

366

// branch (actual or theoretical) and the number of bits in a bitmap.

367

368

#define cJU_SUBEXPPERSTATE 256

369

370

371

// LEAF AND VALUE POINTERS:

372

//

373

// Some other basic object types are in declared in JudyPrivateBranch.h

374

// (Pjbl_t, Pjbb_t, Pjbu_t, Pjp_t) or are Judy1/L-specific (Pjlb_t). The

375

// few remaining types are declared below.

376

//

377

// Note: Leaf pointers are cast to different-sized objects depending on the

378

// leaf's level, but are at least addresses (not just numbers), so use void *

379

// (Pvoid_t), not PWord_t or Word_t for them, except use Pjlw_t for whole-word

380

// (top-level, root-level) leaves. Value areas, however, are always whole

381

// words.

382

//

383

// Furthermore, use Pjll_t only for generic leaf pointers (for various size

384

// LeafL's). Use Pjlw_t for LeafW's. Use Pleaf (with type uint8_t *, uint16_t

385

// *, etc) when the leaf index size is known.

386

387

typedef PWord_t Pjlw_t; // pointer to root-level leaf (whole-word indexes).

388

typedef Pvoid_t Pjll_t; // pointer to lower-level linear leaf.

389

#ifdef JUDYL

390

typedef PWord_t Pjv_t; // pointer to JudyL value area.

391

#endif

392

393

394

// POINTER PREPARATION MACROS:

395

//

396

// These macros are used to strip malloc-namespace-type bits from a pointer +

397

// malloc-type word (which references any Judy malloc'd object that might be

398

// obtained from other than a direct call of malloc()), prior to dereferencing

399

// the pointer as an address. The malloc-type bits allow Judy malloc'd objects

400

// to come from different "malloc() namespaces".

401

//

402

// Note: Non-null Judy array root pointers (JAP) point to root-level

403

// (word-sized) leaves (JLW) or to top-level Judy population/memory (JPM)

404

// nodes, both of which come directly from malloc() and which are not capable

405

// of alternate name-spacing. However, for consistency and for easier future

406

// changes, there are macros for accessing pointers to these objects too, which

407

// DO strip JAP_MASK bits.

408

//

409

// In other words, JLW and JPM pointers contain JAP type bits, and other

410

// pointers may contain malloc-namespace-type bits. These pointers (addresses)

411

// include these fields in Judy objects:

412

//

413

// (root pointer) (JAP, see above)

414

// jp.jp_Addr generic pointer to next-level node, except when used

415

// as a JudyL Immed01 value area

416

// JU_JBB_PJP macro hides jbbs_Pjp (pointer to JP subarray)

417

// JL_JLB_PVALUE macro hides jLlbs_PValue (pointer to value subarray)

418

//

419

// When setting one of these fields or passing an address to __JudyFree*(), the

420

// "raw" memory address is used; otherwise the memory address must be passed

421

// through one of the macros below before it's dereferenced.

422

//

423

// Note: After much study, the typecasts below appear in the macros rather

424

// than at the point of use, which is both simpler and allows the compiler to

425

// do type-checking.

426

427

#define __numbits 3 // number of malloc-type bits in pointer field.

428

#define __mask (-(1UL << __numbits))

429

430

#define JAPTYPE(Addr) (((Word_t) (Addr)) & JAP_MASK) // root type.

431

#define P_JLW( Addr) ((Pjlw_t) (((Word_t) (Addr)) & ~JAP_MASK)) // root leaf.

432

#define P_JPM( Addr) ((Pjpm_t) (((Word_t) (Addr)) & ~JAP_MASK)) // root JPM.

433

#define P_JBL( Addr) ((Pjbl_t) (((Word_t) (Addr)) & __mask)) // BranchL.

434

#define P_JBB( Addr) ((Pjbb_t) (((Word_t) (Addr)) & __mask)) // BranchB.

435

#define P_JBU( Addr) ((Pjbu_t) (((Word_t) (Addr)) & __mask)) // BranchU.

436

#define P_JLL( Addr) ((Pjll_t) (((Word_t) (Addr)) & __mask)) // LeafL.

437

#define P_JLB( Addr) ((Pjlb_t) (((Word_t) (Addr)) & __mask)) // LeafB1.

438

#define P_JP( Addr) ((Pjp_t) (((Word_t) (Addr)) & __mask)) // JP.

439

#ifdef JUDYL

440

#define P_JV( Addr) ((Pjv_t) (((Word_t) (Addr)) & __mask)) // &value.

441

#endif

442

443

444

// LEAST BYTES:

445

//

446

// Mask for least bytes of a word, and a macro to perform this mask on an

447

// Index.

448

//

449

// Note: This macro has been problematic in the past to get right and to make

450

// portable. It's not OK on all systems to shift by the full word size. This

451

// macro should allow shifting by 1..N bytes, where N is the word size, but

452

// should produce a compiler warning if the macro is called with Bytes == 0.

453

//

454

// Warning: JU_LEASTBYTESMASK() is not a constant macro unless Bytes is a

455

// constant; otherwise it is a variable shift, which is expensive on some

456

// processors.

457

458

#define JU_LEASTBYTESMASK(Bytes) \

459

((0x100UL << (cJU_BITSPERBYTE * ((Bytes) - 1))) - 1)

460

461

#define JU_LEASTBYTES(Index,Bytes) ((Index) & JU_LEASTBYTESMASK(Bytes))

462

463

464

// BITS IN EACH BITMAP SUBEXPANSE FOR BITMAP BRANCH AND LEAF:

465

//

466

// The bits per bitmap subexpanse times the number of subexpanses equals a

467

// constant (cJU_SUBEXPPERSTATE). You can also think of this as a compile-time

468

// choice of "aspect ratio" for bitmap branches and leaves (which can be set

469

// independently for each).

470

//

471

// A default aspect ratio is hardwired here if not overridden at compile time,

472

// such as by "EXTCCOPTS=-DBITMAP_BRANCH16x16 make".

473

474

#if (! (defined(BITMAP_BRANCH8x32) || defined(BITMAP_BRANCH16x16) || defined(BITMAP_BRANCH32x8)))

475

#define BITMAP_BRANCH32x8 1 // 32 bits per subexpanse, 8 subexpanses.

476

#endif

477

478

#ifdef BITMAP_BRANCH8x32

479

#define BITMAPB_t uint8_t

480

#endif

481

482

#ifdef BITMAP_BRANCH16x16

483

#define BITMAPB_t uint16_t

484

#endif

485

486

#ifdef BITMAP_BRANCH32x8

487

#define BITMAPB_t uint32_t

488

#endif

489

490

// Note: For bitmap leaves, BITMAP_LEAF64x4 is only valid for 64 bit:

491

//

492

// Note: Choice of aspect ratio mostly matters for JudyL bitmap leaves. For

493

// Judy1 the choice doesn't matter much -- the code generated for different

494

// BITMAP_LEAF* values choices varies, but correctness and performance are the

495

// same.

496

497

#ifndef JU_64BIT

498

499

#if (! (defined(BITMAP_LEAF8x32) || defined(BITMAP_LEAF16x16) || defined(BITMAP_LEAF32x8)))

500

#define BITMAP_LEAF32x8 // 32 bits per subexpanse, 8 subexpanses.

501

#endif

502

503

#else // JU_64BIT

504

505

#if (! (defined(BITMAP_LEAF8x32) || defined(BITMAP_LEAF16x16) || defined(BITMAP_LEAF32x8) || defined(BITMAP_LEAF64x4)))

506

#define BITMAP_LEAF64x4 // 64 bits per subexpanse, 4 subexpanses.

507

#endif

508

509

#endif // JU_64BIT

510

511

#ifdef BITMAP_LEAF8x32

512

#define BITMAPL_t uint8_t

513

#endif

514

515

#ifdef BITMAP_LEAF16x16

516

#define BITMAPL_t uint16_t

517

#endif

518

519

#ifdef BITMAP_LEAF32x8

520

#define BITMAPL_t uint32_t

521

#endif

522

523

#ifdef BITMAP_LEAF64x4

524

#define BITMAPL_t uint64_t

525

#endif

526

527

528

// EXPORTED DATA AND FUNCTIONS:

529

530

#ifdef JUDY1

531

extern const uint8_t __j1_BranchBJPPopToWords[];

532

#else

533

extern const uint8_t __jL_BranchBJPPopToWords[];

534

#endif

535

536

// Fast LeafL search routine used for inlined code:

537

//

538

// Note: MaxPop1 is historical and unused.

539

540

#define SEARCHLEAFEVEN(LeafType,Addr,Pop1,Index,MaxPop1) \

541

LeafType *_Pleaf = (LeafType *) (Addr); \

542

LeafType _Index = (Index); /* with masking */ \

543

Word_t _Pop1 = (Pop1); \

544

\

545

if (_Index > _Pleaf[_Pop1 - 1]) return(~(_Pop1)); /* past end */ \

546

for(;;) { if (_Index <= *_Pleaf) break; ++_Pleaf; } \

547

if (_Index == *_Pleaf) return(_Pleaf - (LeafType *) (Addr)); \

548

return(~(_Pleaf - (LeafType *) (Addr)))

549

550

// Fast way to count bits set in 8..32[64]-bit int:

551

//

552

// For performance, __JudyCountBits*() are written to take advantage of

553

// platform-specific features where available.

554

//

555

556

#ifdef JU_NOINLINE

557

558

extern BITMAPB_t __JudyCountBitsB(BITMAPB_t word);

559

extern BITMAPL_t __JudyCountBitsL(BITMAPL_t word);

560

561

// Compiler supports inline

562

563

#elif defined(JU_HPUX_IPF)

564

565

#define __JudyCountBitsB(word) _Asm_popcnt(word)

566

#define __JudyCountBitsL(word) _Asm_popcnt(word)

567

568

#elif defined(JU_LINUX_IPF)

569

570

static inline BITMAPB_t __JudyCountBitsB(BITMAPB_t word)

571

{

572

BITMAPB_t result;

573

__asm__ ("popcnt %0=%1" : "=r" (result) : "r" (word));

574

return(result);

575

}

576

577

static inline BITMAPL_t __JudyCountBitsL(BITMAPL_t word)

578

{

579

BITMAPL_t result;

580

__asm__ ("popcnt %0=%1" : "=r" (result) : "r" (word));

581

return(result);

582

}

583

584

// No instructions available, use inline code

585

586

#else

587

588

#ifdef JU_FLAVOR_COV

589

#pragma C-Cover on

590

#endif

591

592

// ****************************************************************************

593

// __ J U D Y C O U N T B I T S B

594

//

595

// Return the number of bits set in "Word", for a bitmap branch.

596

//

597

// Note: Bitmap branches have maximum bitmap size = 32 bits.

598

599

static inline BITMAPB_t __JudyCountBitsB(BITMAPB_t word)

600

{

601

word = (word & 0x55555555) + ((word & 0xAAAAAAAA) >> 1);

602

word = (word & 0x33333333) + ((word & 0xCCCCCCCC) >> 2);

603

word = (word & 0x0F0F0F0F) + ((word & 0xF0F0F0F0) >> 4); // >= 8 bits.

604

#if defined(BITMAP_BRANCH16x16) || defined(BITMAP_BRANCH32x8)

605

word = (word & 0x00FF00FF) + ((word & 0xFF00FF00) >> 8); // >= 16 bits.

606

#endif

607

#ifdef BITMAP_BRANCH32x8

608

word = (word & 0x0000FFFF) + ((word & 0xFFFF0000) >> 16); // >= 32 bits.

609

#endif

610

return(word);

611

612

} // __JudyCountBitsB()

613

614

615

// ****************************************************************************

616

// __ J U D Y C O U N T B I T S L

617

//

618

// Return the number of bits set in "Word", for a bitmap leaf.

619

//

620

// Note: Bitmap branches have maximum bitmap size = 32 bits.

621

622

// Note: Need both 32-bit and 64-bit versions of __JudyCountBitsL() because

623

// bitmap leaves can have 64-bit bitmaps.

624

625

static inline BITMAPL_t __JudyCountBitsL(BITMAPL_t word)

626

{

627

#ifndef JU_64BIT

628

629

word = (word & 0x55555555) + ((word & 0xAAAAAAAA) >> 1);

630

word = (word & 0x33333333) + ((word & 0xCCCCCCCC) >> 2);

631

word = (word & 0x0F0F0F0F) + ((word & 0xF0F0F0F0) >> 4); // >= 8 bits.

632

#if defined(BITMAP_LEAF16x16) || defined(BITMAP_LEAF32x8)

633

word = (word & 0x00FF00FF) + ((word & 0xFF00FF00) >> 8); // >= 16 bits.

634

#endif

635

#ifdef BITMAP_LEAF32x8

636

word = (word & 0x0000FFFF) + ((word & 0xFFFF0000) >> 16); // >= 32 bits.

637

#endif

638

639

#else // JU_64BIT

640

641

word = (word & 0x5555555555555555) + ((word & 0xAAAAAAAAAAAAAAAA) >> 1);

642

word = (word & 0x3333333333333333) + ((word & 0xCCCCCCCCCCCCCCCC) >> 2);

643

word = (word & 0x0F0F0F0F0F0F0F0F) + ((word & 0xF0F0F0F0F0F0F0F0) >> 4);

644

#if defined(BITMAP_LEAF16x16) || defined(BITMAP_LEAF32x8) || defined(BITMAP_LEAF64x4)

645

word = (word & 0x00FF00FF00FF00FF) + ((word & 0xFF00FF00FF00FF00) >> 8);

646

#endif

647

#if defined(BITMAP_LEAF32x8) || defined(BITMAP_LEAF64x4)

648

word = (word & 0x0000FFFF0000FFFF) + ((word & 0xFFFF0000FFFF0000) >>16);

649

#endif

650

#ifdef BITMAP_LEAF64x4

651

word = (word & 0x00000000FFFFFFFF) + ((word & 0xFFFFFFFF00000000) >>32);

652

#endif

653

#endif // JU_64BIT

654

655

return(word);

656

657

} // __JudyCountBitsL()

658

659

#ifdef JU_FLAVOR_COV

660

#pragma C-Cover on

661

#endif

662

663

#endif // Compiler supports inline

664

665

// GET POP0:

666

//

667

// Get from jp_DcdPop0 the Pop0 for various JP Types.

668

//

669

// Notes:

670

//

671

// - Different macros require different parameters...

672

//

673

// - There are no simple macros for cJU_JAPBRANCH* Types because their

674

// populations must be added up and don't reside in an already-calculated

675

// place. (TBD: This is no longer true, now it's in the JPM.)

676

//

677

// - cJU_JPIMM_POP0() is not defined because it would be redundant because the

678

// Pop1 is already encoded in each enum name.

679

//

680

// - A linear or bitmap leaf Pop0 cannot exceed cJU_SUBEXPPERSTATE - 1 (Pop0 =

681

// 0..255), so use a simpler, faster macro for it than for other JP Types.

682

//

683

// - Avoid any complex calculations that would slow down the compiled code.

684

// Assume these macros are only called for the appropriate JP Types.

685

// Unfortunately there's no way to trigger an assertion here if the JP type

686

// is incorrect for the macro, because these are merely expressions, not

687

// statements.

688

689

#define JU_JAPLEAF_POP0(Addr) (*((PWord_t) Addr))

690

// JU_JAPBRANCH_POP0() // see above.

691

// JU_JPBRANCH_POP0(DcdPop0,cPopBytes) // see JudyPrivateBranch.h.

692

#define JU_JPLEAF_POP0(DcdPop0) ((DcdPop0) & 0xFF)

693

// cJU_JPIMM_POP0(cJPType,cJPTypeBase) ((cJPType) - (cJPTypeBase))

694

#define cJU_JPFULLPOPU1_POP0 (cJU_SUBEXPPERSTATE - 1)

695

696

697

// NUMBER OF BITS IN A BRANCH OR LEAF BITMAP AND SUBEXPANSE:

698

//

699

// Note: cJU_BITSPERBITMAP must be the same as the number of JPs in a branch.

700

701

#define cJU_BITSPERBITMAP cJU_SUBEXPPERSTATE

702

703

// Bitmaps are accessed in units of "subexpanses":

704

705

#define cJU_BITSPERSUBEXPB (sizeof(BITMAPB_t) * cJU_BITSPERBYTE)

706

#define cJU_NUMSUBEXPB (cJU_BITSPERBITMAP / cJU_BITSPERSUBEXPB)

707

708

#define cJU_BITSPERSUBEXPL (sizeof(BITMAPL_t) * cJU_BITSPERBYTE)

709

#define cJU_NUMSUBEXPL (cJU_BITSPERBITMAP / cJU_BITSPERSUBEXPL)

710

711

712

// MASK FOR A SPECIFIED BIT IN A BITMAP:

713

//

714

// Warning: If BitNum is a variable, this results in a variable shift that is

715

// expensive, at least on some processors. Use with caution.

716

//

717

// Warning: BitNum must be less than cJU_BITSPERWORD, that is, 0 ..

718

// cJU_BITSPERWORD - 1, to avoid a truncated shift on some machines.

719

//

720

// TBD: Perhaps use an array[32] of masks instead of calculating them.

721

722

#define JU_BITPOSMASKB(BitNum) (1L << ((BitNum) % cJU_BITSPERSUBEXPB))

723

#define JU_BITPOSMASKL(BitNum) (1L << ((BitNum) % cJU_BITSPERSUBEXPL))

724

725

726

// TEST/SET/CLEAR A BIT IN A BITMAP LEAF:

727

//

728

// Test if a byte-sized Digit (portion of Index) has a corresponding bit set in

729

// a bitmap, or set a byte-sized Digit's bit into a bitmap, by looking up the

730

// correct subexpanse and then checking/setting the correct bit.

731

//

732

// Note: Mask higher bits, if any, for the convenience of the user of this

733

// macro, in case they pass a full Index, not just a digit. If the caller has

734

// a true 8-bit digit, make it of type uint8_t and the compiler should skip the

735

// unnecessary mask step.

736

737

#define JU_SUBEXPL(DIGIT) (((DIGIT) / cJU_BITSPERSUBEXPL) & (cJU_NUMSUBEXPL-1))

738

739

#define JU_BITMAPTESTL(PJLB, INDEX) \

740

(JU_JLB_BITMAP(PJLB, JU_SUBEXPL(INDEX)) & JU_BITPOSMASKL(INDEX))

741

742

#define JU_BITMAPSETL(PJLB, INDEX) \

743

(JU_JLB_BITMAP(PJLB, JU_SUBEXPL(INDEX)) |= JU_BITPOSMASKL(INDEX))

744

745

#define JU_BITMAPCLEARL(PJLB, INDEX) \

746

(JU_JLB_BITMAP(PJLB, JU_SUBEXPL(INDEX)) ^= JU_BITPOSMASKL(INDEX))

747

748

749

// MAP BITMAP BIT OFFSET TO DIGIT:

750

//

751

// Given a digit variable to set, a bitmap branch or leaf subexpanse (base 0),

752

// the bitmap (BITMAP*_t) for that subexpanse, and an offset (Nth set bit in

753

// the bitmap, base 0), compute the digit (also base 0) corresponding to the

754

// subexpanse and offset by counting all bits in the bitmap until offset+1 set

755

// bits are seen. Avoid expensive variable shifts. Offset should be less than

756

// the number of set bits in the bitmap; assert this.

757

//

758

// If there's a better way to do this, I don't know what it is.

759

760

#define JU_BITMAPDIGITB(Digit,Subexp,Bitmap,Offset) \

761

{ \

762

BITMAPB_t bitmap = (Bitmap); int remain = (Offset); \

763

(Digit) = (Subexp) * cJU_BITSPERSUBEXPB; \

764

\

765

while ((remain -= (bitmap & 1)) >= 0) \

766

{ \

767

bitmap >>= 1; ++(Digit); \

768

assert((Digit) < ((Subexp) + 1) * cJU_BITSPERSUBEXPB); \

769

} \

770

}

771

772

#define JU_BITMAPDIGITL(Digit,Subexp,Bitmap,Offset) \

773

{ \

774

BITMAPL_t bitmap = (Bitmap); int remain = (Offset); \

775

(Digit) = (Subexp) * cJU_BITSPERSUBEXPL; \

776

\

777

while ((remain -= (bitmap & 1)) >= 0) \

778

{ \

779

bitmap >>= 1; ++(Digit); \

780

assert((Digit) < ((Subexp) + 1) * cJU_BITSPERSUBEXPL); \

781

} \

782

}

783

784

785

// MASKS FOR PORTIONS OF 32-BIT WORDS:

786

//

787

// These are useful for bitmap subexpanses.

788

//

789

// "LOWER"/"HIGHER" means bits representing lower/higher-valued Indexes. The

790

// exact order of bits in the word is explicit here but is hidden from the

791

// caller.

792

//

793

// "EXC" means exclusive of the specified bit; "INC" means inclusive.

794

//

795

// In each case, BitPos is either "JU_BITPOSMASK*(BitNum)", or a variable saved

796

// from an earlier call of that macro; either way, it must be a 32-bit word

797

// with a single bit set. In the first case, assume the compiler is smart

798

// enough to optimize out common subexpressions.

799

//

800

// The expressions depend on unsigned decimal math that should be universal.

801

802

#define JU_MASKLOWEREXC( BitPos) ((BitPos) - 1)

803

#define JU_MASKLOWERINC( BitPos) (JU_MASKLOWEREXC(BitPos) | (BitPos))

804

#define JU_MASKHIGHERINC(BitPos) (-(BitPos))

805

#define JU_MASKHIGHEREXC(BitPos) (JU_MASKHIGHERINC(BitPos) ^ (BitPos))

806

807

808

// ****************************************************************************

809

// SUPPORT FOR NATIVE INDEX SIZES

810

// ****************************************************************************

811

//

812

// Copy a series of generic objects (uint8_t, uint16_t, uint32_t, Word_t) from

813

// one place to another.

814

815

#define JU_COPYMEM(PDst,PSrc,Pop1) \

816

{ \

817

Word_t __index = 0; \

818

assert((Pop1) > 0); \

819

do { (PDst)[__index] = (PSrc)[__index]; } \

820

while (++__index < (Pop1)); \

821

}

822

823

824

// ****************************************************************************

825

// SUPPORT FOR ODD (NON-NATIVE) INDEX SIZES

826

// ****************************************************************************

827

//

828

// This is the best fake uint24_t I could design in C. -- dlb

829

//

830

// Note about endian issues: On a little-endian machine, the _t24_3byte field

831

// is apparently accessed through a register (always big-endian), while the

832

// _t24_3bytes field is in memory. Therefore, on a little-endian machine, the

833

// 3 index bytes are in little-endian order in memory. For example, if the

834

// index bytes are big-endian "cba", they are stored "abc" in memory.

835

836

typedef struct __FAKE_24BIT_UINT

837

{

838

union

839

{

840

Word_t _t24_3byte:24;

841

uint8_t _t24_3bytes[3];

842

} _;

843

} uint24_t;

844

845

// Copy a 3-byte Index pointed by a uint8_t * to a Word_t:

846

//

847

// See above about endian issues.

848

849

#define JU_COPY3_PINDEX_TO_LONG(DestLong,PIndex) \

850

{ \

851

uint24_t temp; \

852

temp._._t24_3bytes[0] = (PIndex)[0]; \

853

temp._._t24_3bytes[1] = (PIndex)[1]; \

854

temp._._t24_3bytes[2] = (PIndex)[2]; \

855

(DestLong) = temp._._t24_3byte; \

856

}

857

858

// Copy a Word_t to a 3-byte Index pointed at by a uint8_t *:

859

//

860

// See above about endian issues.

861

862

#define JU_COPY3_LONG_TO_PINDEX(PIndex,SourceLong) \

863

{ \

864

uint24_t temp; \

865

temp._._t24_3byte = (SourceLong); \

866

(PIndex)[0] = temp._._t24_3bytes[0]; \

867

(PIndex)[1] = temp._._t24_3bytes[1]; \

868

(PIndex)[2] = temp._._t24_3bytes[2]; \

869

}

870

871

872

#ifdef JU_64BIT

873

874

// SIMILAR STRUCTS AND MACROS FOR ODD INDEX SIZES ON 64-BIT SYSTEMS:

875

876

typedef struct __FAKE_40BIT_UINT

877

{

878

union

879

{

880

Word_t _t40_5byte:40;

881

uint8_t _t40_5bytes[5];

882

} _;

883

} uint40_t;

884

885

#define JU_COPY5_PINDEX_TO_LONG(DestLong,PIndex) \

886

{ \

887

uint40_t temp; \

888

temp._._t40_5bytes[0] = (PIndex)[0]; \

889

temp._._t40_5bytes[1] = (PIndex)[1]; \

890

temp._._t40_5bytes[2] = (PIndex)[2]; \

891

temp._._t40_5bytes[3] = (PIndex)[3]; \

892

temp._._t40_5bytes[4] = (PIndex)[4]; \

893

(DestLong) = temp._._t40_5byte; \

894

}

895

896

#define JU_COPY5_LONG_TO_PINDEX(PIndex,SourceLong) \

897

{ \

898

uint40_t temp; \

899

temp._._t40_5byte = (SourceLong); \

900

(PIndex)[0] = temp._._t40_5bytes[0]; \

901

(PIndex)[1] = temp._._t40_5bytes[1]; \

902

(PIndex)[2] = temp._._t40_5bytes[2]; \

903

(PIndex)[3] = temp._._t40_5bytes[3]; \

904

(PIndex)[4] = temp._._t40_5bytes[4]; \

905

}

906

907

typedef struct __FAKE_48BIT_UINT

908

{

909

union

910

{

911

Word_t _t48_6byte:48;

912

uint8_t _t48_6bytes[6];

913

} _;

914

} uint48_t;

915

916

#define JU_COPY6_PINDEX_TO_LONG(DestLong,PIndex) \

917

{ \

918

uint48_t temp; \

919

temp._._t48_6bytes[0] = (PIndex)[0]; \

920

temp._._t48_6bytes[1] = (PIndex)[1]; \

921

temp._._t48_6bytes[2] = (PIndex)[2]; \

922

temp._._t48_6bytes[3] = (PIndex)[3]; \

923

temp._._t48_6bytes[4] = (PIndex)[4]; \

924

temp._._t48_6bytes[5] = (PIndex)[5]; \

925

(DestLong) = temp._._t48_6byte; \

926

}

927

928

#define JU_COPY6_LONG_TO_PINDEX(PIndex,SourceLong) \

929

{ \

930

uint48_t temp; \

931

temp._._t48_6byte = (SourceLong); \

932

(PIndex)[0] = temp._._t48_6bytes[0]; \

933

(PIndex)[1] = temp._._t48_6bytes[1]; \

934

(PIndex)[2] = temp._._t48_6bytes[2]; \

935

(PIndex)[3] = temp._._t48_6bytes[3]; \

936

(PIndex)[4] = temp._._t48_6bytes[4]; \

937

(PIndex)[5] = temp._._t48_6bytes[5]; \

938

}

939

940

typedef struct __FAKE_56BIT_UINT

941

{

942

union

943

{

944

Word_t _t56_7byte:56;

945

uint8_t _t56_7bytes[7];

946

} _;

947

} uint56_t;

948

949

#define JU_COPY7_PINDEX_TO_LONG(DestLong,PIndex) \

950

{ \

951

uint56_t temp; \

952

temp._._t56_7bytes[0] = (PIndex)[0]; \

953

temp._._t56_7bytes[1] = (PIndex)[1]; \

954

temp._._t56_7bytes[2] = (PIndex)[2]; \

955

temp._._t56_7bytes[3] = (PIndex)[3]; \

956

temp._._t56_7bytes[4] = (PIndex)[4]; \

957

temp._._t56_7bytes[5] = (PIndex)[5]; \

958

temp._._t56_7bytes[6] = (PIndex)[6]; \

959

(DestLong) = temp._._t56_7byte; \

960

}

961

962

#define JU_COPY7_LONG_TO_PINDEX(PIndex,SourceLong) \

963

{ \

964

uint56_t temp; \

965

temp._._t56_7byte = (SourceLong); \

966

(PIndex)[0] = temp._._t56_7bytes[0]; \

967

(PIndex)[1] = temp._._t56_7bytes[1]; \

968

(PIndex)[2] = temp._._t56_7bytes[2]; \

969

(PIndex)[3] = temp._._t56_7bytes[3]; \

970

(PIndex)[4] = temp._._t56_7bytes[4]; \

971

(PIndex)[5] = temp._._t56_7bytes[5]; \

972

(PIndex)[6] = temp._._t56_7bytes[6]; \

973

}

974

975

#endif // JU_64BIT

976

977

978

// ****************************************************************************

979

// COMMON CODE FRAGMENTS (MACROS)

980

// ****************************************************************************

981

//

982

// These code chunks are shared between various source files.

983

984

985

// SET (REPLACE) ONE DIGIT IN AN INDEX:

986

//

987

// To avoid endian issues, use masking and ORing, which operates in a

988

// big-endian register, rather than treating the Index as an array of bytes,

989

// though that would be simpler, but would operate in endian-specific memory.

990

//

991

// TBD: This contains two variable shifts, is that bad?

992

993

#define JU_SETDIGIT(Index,Digit,State) \

994

(Index) = ((Index) & (~cJU_MASKATSTATE(State))) \

995

| (((Word_t) (Digit)) \

996

<< (((State) - 1) * cJU_BITSPERBYTE))

997

998

// Fast version for single LSB:

999

1000

#define JU_SETDIGIT1(Index,Digit) (Index) = ((Index) & ~0xff) | (Digit)

1001

1002

1003

// SET (REPLACE) "N" LEAST DIGITS IN AN INDEX:

1004

1005

#define JU_SETDIGITS(Index,Index2,cState) \

1006

(Index) = ((Index ) & (~JU_LEASTBYTESMASK(cState))) \

1007

| ((Index2) & ( JU_LEASTBYTESMASK(cState)))

1008

1009

1010

// COPY DECODE BYTES FROM JP TO INDEX:

1011

//

1012

// Modify Index digit(s) to match the bytes in jp_DcdPop0 in case one or more

1013

// branches are skipped and the digits are significant. It's probably faster

1014

// to just do this unconditionally than to check if it's necessary.

1015

//

1016

// To avoid endian issues, use masking and ORing, which operates in a

1017

// big-endian register, rather than treating the Index as an array of bytes,

1018

// though that would be simpler, but would operate in endian-specific memory.

1019

//

1020

// WARNING: Must not call JU_LEASTBYTESMASK (via cJU_DCDMASK) with Bytes =

1021

// cJU_ROOTSTATE or a bad mask is generated, but there are no Dcd bytes to copy

1022

// in this case anyway. In fact there are no Dcd bytes unless State <

1023

// cJU_ROOTSTATE - 1, so don't call this macro except in those cases.

1024

//

1025

// TBD: It would be nice to validate jp_DcdPop0 against known digits to ensure

1026

// no corruption, but this is non-trivial.

1027

1028

#define JU_SETDCD(Index,Dcd,cState) \

1029

(Index) = ((Index) & ~cJU_DCDMASK(cState)) \

1030

| ((Dcd) & cJU_DCDMASK(cState))

1031

1032

1033

// INSERT/DELETE AN INDEX IN-PLACE IN MEMORY:

1034

//

1035

// Given a pointer to an array of "even" (native), same-sized objects

1036

// (indexes), the current population of the array, an offset in the array, and

1037

// a new Index to insert, "shift up" the array elements (Indexes) above the

1038

// insertion point and insert the new Index. Assume there is sufficient memory

1039

// to do this.

1040

//

1041

// In these macros, "_ioffset" is an index offset, and "_boffset" is a byte

1042

// offset for odd Index sizes.

1043

//

1044

// Note: Endian issues only arise fro insertion, not deletion, and even for

1045

// insertion, they are transparent when native (even) objects are used, and

1046

// handled explicitly for odd (non-native) Index sizes.

1047

//

1048

// Note: The following macros are tricky enough that there is some test code

1049

// for them appended to this file.

1050

1051

#define JU_INSERTINPLACE(PArray,Pop1,Offset,Index) \

1052

assert((long) (Pop1) > 0); \

1053

assert((Word_t) (Offset) <= (Word_t) (Pop1)); \

1054

{ \

1055

Word_t _ioffset = (Pop1); \

1056

\

1057

while (_ioffset-- > (Offset)) \

1058

(PArray)[_ioffset + 1] = (PArray)[_ioffset]; \

1059

\

1060

(PArray)[Offset] = (Index); \

1061

}

1062

1063

// Variation for odd-byte-sized (non-native) Indexes, where cIS = Index Size

1064

// and PByte must point to a uint8_t (byte); shift byte-by-byte:

1065

//

1066

// Note: If cIS == 1, JU_INSERTINPLACE_ODD == JU_INSERTINPLACE, at least in

1067

// concept.

1068

//

1069

// Note: JU_INSERTINPLACE_ODD() is hidden common code; use

1070

// JU_INSERTINPLACE3(), etc.

1071

//

1072

// Note: To handle endian issues, these macros pass Indexes through odd-index

1073

// structs defined elsewhere.

1074

//

1075

// TBD: Would it be more efficient for big-endian systems to use JU_BYTEORDER

1076

// to avoid going through a struct?

1077

1078

#define JU_INSERTINPLACE_ODD(PByte,Pop1,Offset,Index, \

1079

cIS, Struct_t, Field_bytes, Field_int) \

1080

assert((long) (Pop1) > 0); \

1081

assert((Word_t) (Offset) <= (Word_t) (Pop1)); \

1082

{ \

1083

Struct_t _temp; \

1084

uint8_t * _Pindex = &(_temp._.Field_bytes[0]); \

1085

Word_t _boffset = (((Pop1) + 1) * (cIS)) - 1; \

1086

\

1087

_temp._.Field_int = (Index); \

1088

\

1089

while (_boffset-- > (((Offset) + 1) * (cIS)) - 1) \

1090

(PByte)[_boffset + 1] = (PByte)[_boffset + 1 - (cIS)]; \

1091

\

1092

for (_boffset = (Offset) * (cIS); \

1093

_boffset < ((Offset) + 1) * (cIS); \

1094

++_boffset) \

1095

{ \

1096

(PByte)[_boffset] = *_Pindex++; \

1097

} \

1098

}

1099

1100

#define JU_INSERTINPLACE3( PByte,Pop1,Offset,Index) \

1101

JU_INSERTINPLACE_ODD(PByte,Pop1,Offset,Index, \

1102

3, uint24_t, _t24_3bytes, _t24_3byte)

1103

1104

#ifdef JU_64BIT

1105

#define JU_INSERTINPLACE5( PByte,Pop1,Offset,Index) \

1106

JU_INSERTINPLACE_ODD(PByte,Pop1,Offset,Index, \

1107

5, uint40_t, _t40_5bytes, _t40_5byte)

1108

1109

#define JU_INSERTINPLACE6( PByte,Pop1,Offset,Index) \

1110

JU_INSERTINPLACE_ODD(PByte,Pop1,Offset,Index, \

1111

6, uint48_t, _t48_6bytes, _t48_6byte)

1112

1113

#define JU_INSERTINPLACE7( PByte,Pop1,Offset,Index) \

1114

JU_INSERTINPLACE_ODD(PByte,Pop1,Offset,Index, \

1115

7, uint56_t, _t56_7bytes, _t56_7byte)

1116

#endif

1117

1118

// Counterparts to the above for deleting an Index:

1119

//

1120

// "Shift down" the array elements starting at the Index to be deleted.

1121

1122

#define JU_DELETEINPLACE(PArray,Pop1,Offset,ignore) \

1123

assert((long) (Pop1) > 0); \

1124

assert((Word_t) (Offset) < (Word_t) (Pop1)); \

1125

{ \

1126

Word_t _ioffset = (Offset); \

1127

\

1128

while (++_ioffset < (Pop1)) \

1129

(PArray)[_ioffset - 1] = (PArray)[_ioffset]; \

1130

}

1131

1132

// Variation for odd-byte-sized (non-native) Indexes, where cIS = Index Size

1133

// and PByte must point to a uint8_t (byte); copy byte-by-byte:

1134

//

1135

// Note: If cIS == 1, JU_DELETEINPLACE_ODD == JU_DELETEINPLACE.

1136

//

1137

// Note: There are no endian issues here because bytes are just shifted as-is,

1138

// not converted to/from an Index.

1139

1140

#define JU_DELETEINPLACE_ODD(PByte,Pop1,Offset,cIS) \

1141

assert((long) (Pop1) > 0); \

1142

assert((Word_t) (Offset) < (Word_t) (Pop1)); \

1143

{ \

1144

Word_t _boffset = (((Offset) + 1) * (cIS)) - 1; \

1145

\

1146

while (++_boffset < ((Pop1) * (cIS))) \

1147

(PByte)[_boffset - (cIS)] = (PByte)[_boffset]; \

1148

}

1149

1150

1151

// INSERT/DELETE AN INDEX WHILE COPYING OTHERS:

1152

//

1153

// Copy PSource[] to PDest[], where PSource[] has Pop1 elements (Indexes),

1154

// inserting Index at PDest[Offset]. Unlike JU_*INPLACE*() above, these macros

1155

// are used when moving Indexes from one memory object to another.

1156

//

1157

// Note: See above about endian issues.

1158

1159

#define JU_INSERTCOPY(PDest,PSource,Pop1,Offset,Index) \

1160

assert((long) (Pop1) > 0); \

1161

assert((Word_t) (Offset) <= (Word_t) (Pop1)); \

1162

{ \

1163

Word_t _ioffset; \

1164

\

1165

for (_ioffset = 0; _ioffset < (Offset); ++_ioffset) \

1166

(PDest)[_ioffset] = (PSource)[_ioffset]; \

1167

\

1168

(PDest)[_ioffset] = (Index); \

1169

\

1170

for (/* null */; _ioffset < (Pop1); ++_ioffset) \

1171

(PDest)[_ioffset + 1] = (PSource)[_ioffset]; \

1172

}

1173

1174

// Variation for odd-byte-sized (non-native) Indexes, where cIS = Index Size;

1175

// copy byte-by-byte:

1176

//

1177

// Note: If cIS == 1, JU_INSERTCOPY_ODD == JU_INSERTCOPY, at least in concept.

1178

//

1179

// Note: JU_INSERTCOPY_ODD() is hidden common code; use JU_INSERTCOPY3(), etc.

1180

//

1181

// Note: To handle endian issues, these macros pass Indexes through odd-index

1182

// structs defined elsewhere.

1183

//

1184

// TBD: Would it be more efficient for big-endian systems to use JU_BYTEORDER

1185

// to avoid going through a struct?

1186

1187

#define JU_INSERTCOPY_ODD(PDest,PSource,Pop1,Offset,Index, \

1188

cIS, Struct_t, Field_bytes, Field_int) \

1189

assert((long) (Pop1) > 0); \

1190

assert((Word_t) (Offset) <= (Word_t) (Pop1)); \

1191

{ \

1192

Struct_t _temp; \

1193

uint8_t * _Pdest = (uint8_t *) (PDest); \

1194

uint8_t * _Psource = (uint8_t *) (PSource); \

1195

uint8_t * _Pindex = &(_temp._.Field_bytes[0]); \

1196

Word_t _boffset; \

1197

Word_t _boffset2; \

1198

\

1199

_temp._.Field_int = (Index); \

1200

\

1201

for (_boffset = 0; _boffset < ((Offset) * (cIS)); ++_boffset) \

1202

*_Pdest++ = *_Psource++; \

1203

\

1204

for (_boffset2 = 0; _boffset2 < (cIS); ++_boffset2) \

1205

*_Pdest++ = *_Pindex++; \

1206

\

1207

for (/* null */; _boffset < ((Pop1) * (cIS)); ++_boffset) \

1208

*_Pdest++ = *_Psource++; \

1209

}

1210

1211

#define JU_INSERTCOPY3( PDest,PSource,Pop1,Offset,Index) \

1212

JU_INSERTCOPY_ODD(PDest,PSource,Pop1,Offset,Index, \

1213

3, uint24_t, _t24_3bytes, _t24_3byte)

1214

1215

#ifdef JU_64BIT

1216

#define JU_INSERTCOPY5( PDest,PSource,Pop1,Offset,Index) \

1217

JU_INSERTCOPY_ODD(PDest,PSource,Pop1,Offset,Index, \

1218

5, uint40_t, _t40_5bytes, _t40_5byte)

1219

1220

#define JU_INSERTCOPY6( PDest,PSource,Pop1,Offset,Index) \

1221

JU_INSERTCOPY_ODD(PDest,PSource,Pop1,Offset,Index, \

1222

6, uint48_t, _t48_6bytes, _t48_6byte)

1223

1224

#define JU_INSERTCOPY7( PDest,PSource,Pop1,Offset,Index) \

1225

JU_INSERTCOPY_ODD(PDest,PSource,Pop1,Offset,Index, \

1226

7, uint56_t, _t56_7bytes, _t56_7byte)

1227

#endif

1228

1229

// Counterparts to the above for deleting an Index:

1230

1231

#define JU_DELETECOPY(PDest,PSource,Pop1,Offset,ignore) \

1232

assert((long) (Pop1) > 0); \

1233

assert((Word_t) (Offset) < (Word_t) (Pop1)); \

1234

{ \

1235

Word_t _ioffset; \

1236

\

1237

for (_ioffset = 0; _ioffset < (Offset); ++_ioffset) \

1238

(PDest)[_ioffset] = (PSource)[_ioffset]; \

1239

\

1240

for (++_ioffset; _ioffset < (Pop1); ++_ioffset) \

1241

(PDest)[_ioffset - 1] = (PSource)[_ioffset]; \

1242

}

1243

1244

// Variation for odd-byte-sized (non-native) Indexes, where cIS = Index Size;

1245

// copy byte-by-byte:

1246

//

1247

// Note: There are no endian issues here because bytes are just shifted as-is,

1248

// not converted to/from an Index.

1249

//

1250

// Note: If cIS == 1, JU_DELETECOPY_ODD == JU_DELETECOPY, at least in concept.

1251

1252

#define JU_DELETECOPY_ODD(PDest,PSource,Pop1,Offset,cIS) \

1253

assert((long) (Pop1) > 0); \

1254

assert((Word_t) (Offset) < (Word_t) (Pop1)); \

1255

{ \

1256

uint8_t *_Pdest = (uint8_t *) (PDest); \

1257

uint8_t *_Psource = (uint8_t *) (PSource); \

1258

Word_t _boffset; \

1259

\

1260

for (_boffset = 0; _boffset < ((Offset) * (cIS)); ++_boffset) \

1261

*_Pdest++ = *_Psource++; \

1262

\

1263

_Psource += (cIS); \

1264

\

1265

for (_boffset += (cIS); _boffset < ((Pop1) * (cIS)); ++_boffset) \

1266

*_Pdest++ = *_Psource++; \

1267

}

1268

1269

1270

// GENERIC RETURN CODE HANDLING FOR JUDY1 (NO VALUE AREAS) AND JUDYL (VALUE

1271

// AREAS):

1272

//

1273

// This common code hides Judy1 versus JudyL details of how to return various

1274

// conditions, including a pointer to a value area for JudyL.

1275

//

1276

// First, define an internal variation of JERR called JERRI (I = int) to make

1277

// lint happy. We accidentally shipped to 11.11 OEUR with all functions that

1278

// return int or Word_t using JERR, which is type Word_t, for errors. Lint

1279

// complains about this for functions that return int. So, internally use

1280

// JERRI for error returns from the int functions. Experiments show that

1281

// callers which compare int Foo() to (Word_t) JERR (~0UL) are OK, since JERRI

1282

// sign-extends to match JERR.

1283

1284

#define JERRI ((int) ~0) // see above.

1285

1286

#ifdef JUDY1

1287

1288

#define JU_RET_FOUND return(1)

1289

#define JU_RET_NOTFOUND return(0)

1290

1291

// For Judy1, these all "fall through" to simply JU_RET_FOUND, since there is no

1292

// value area pointer to return:

1293

1294

#define JU_RET_FOUND_JAPLEAF(Pjlw,Pop1,Offset) JU_RET_FOUND

1295

1296

#define JU_RET_FOUND_JPM(Pjpm) JU_RET_FOUND

1297

#define JU_RET_FOUND_PVALUE(Pjv,Offset) JU_RET_FOUND

1298

#ifndef JU_64BIT

1299

#define JU_RET_FOUND_LEAF1(Pjll,Pop1,Offset) JU_RET_FOUND

1300

#endif

1301

#define JU_RET_FOUND_LEAF2(Pjll,Pop1,Offset) JU_RET_FOUND

1302

#define JU_RET_FOUND_LEAF3(Pjll,Pop1,Offset) JU_RET_FOUND

1303

#ifdef JU_64BIT

1304

#define JU_RET_FOUND_LEAF4(Pjll,Pop1,Offset) JU_RET_FOUND

1305

#define JU_RET_FOUND_LEAF5(Pjll,Pop1,Offset) JU_RET_FOUND

1306

#define JU_RET_FOUND_LEAF6(Pjll,Pop1,Offset) JU_RET_FOUND

1307

#define JU_RET_FOUND_LEAF7(Pjll,Pop1,Offset) JU_RET_FOUND

1308

#endif

1309

#define JU_RET_FOUND_IMM_01(Pjp) JU_RET_FOUND

1310

#define JU_RET_FOUND_IMM(Pjp,Offset) JU_RET_FOUND

1311

1312

// Note: No JudyL equivalent:

1313

1314

#define JU_RET_FOUND_FULLPOPU1 JU_RET_FOUND

1315

#define JU_RET_FOUND_LEAF_B1(Pjlb,Subexp,Offset) JU_RET_FOUND

1316

1317

#else // JUDYL

1318

1319

// JU_RET_FOUND // see below; must NOT be defined for JudyL.

1320

#define JU_RET_NOTFOUND return((PPvoid_t) NULL)

1321

1322

// For JudyL, the location of the value area depends on the JP type and other

1323

// factors:

1324

//

1325

// TBD: The value areas should be accessed via data structures, here and in

1326

// Doug's code, not by hard-coded address calculations.

1327

//

1328

// This is useful in insert/delete code when the value area is returned from

1329

// lower levels in the JPM:

1330

1331

#define JU_RET_FOUND_JPM(Pjpm) return((PPvoid_t) ((Pjpm) -> jpm_PValue))

1332

1333

// This is useful in insert/delete code when the value area location is already

1334

// computed:

1335

1336

#define JU_RET_FOUND_PVALUE(Pjv,Offset) return((PPvoid_t) ((Pjv) + Offset))

1337

1338

#define JU_RET_FOUND_JAPLEAF(Pjlw,Pop1,Offset) \

1339

return((PPvoid_t) (JL_LEAFWVALUEAREA(Pjlw, Pop1) + (Offset)))

1340

1341

#define JU_RET_FOUND_LEAF1(Pjll,Pop1,Offset) \

1342

return((PPvoid_t) (JL_LEAF1VALUEAREA(Pjll, Pop1) + (Offset)))

1343

#define JU_RET_FOUND_LEAF2(Pjll,Pop1,Offset) \

1344

return((PPvoid_t) (JL_LEAF2VALUEAREA(Pjll, Pop1) + (Offset)))

1345

#define JU_RET_FOUND_LEAF3(Pjll,Pop1,Offset) \

1346

return((PPvoid_t) (JL_LEAF3VALUEAREA(Pjll, Pop1) + (Offset)))

1347

#ifdef JU_64BIT

1348

#define JU_RET_FOUND_LEAF4(Pjll,Pop1,Offset) \

1349

return((PPvoid_t) (JL_LEAF4VALUEAREA(Pjll, Pop1) + (Offset)))

1350

#define JU_RET_FOUND_LEAF5(Pjll,Pop1,Offset) \

1351

return((PPvoid_t) (JL_LEAF5VALUEAREA(Pjll, Pop1) + (Offset)))

1352

#define JU_RET_FOUND_LEAF6(Pjll,Pop1,Offset) \

1353

return((PPvoid_t) (JL_LEAF6VALUEAREA(Pjll, Pop1) + (Offset)))

1354

#define JU_RET_FOUND_LEAF7(Pjll,Pop1,Offset) \

1355

return((PPvoid_t) (JL_LEAF7VALUEAREA(Pjll, Pop1) + (Offset)))

1356

#endif

1357

1358

// Note: Here jp_Addr is a value area itself and not an address, so P_JV() is

1359

// not needed:

1360

1361

#define JU_RET_FOUND_IMM_01(Pjp) return((PPvoid_t) (&((Pjp) -> jp_Addr)))

1362

1363

// Note: Here jp_Addr is a pointer to a separately-malloc'd value area, so

1364

// P_JV() is required; likewise for JL_JLB_PVALUE:

1365

1366

#define JU_RET_FOUND_IMM(Pjp,Offset) \

1367

return((PPvoid_t) (P_JV((Pjp)->jp_Addr) + (Offset)))

1368

1369

#define JU_RET_FOUND_LEAF_B1(Pjlb,Subexp,Offset) \

1370

return((PPvoid_t) (P_JV(JL_JLB_PVALUE(Pjlb, Subexp)) + (Offset)))

1371

1372

#endif // JUDYL

1373

1374

1375

// GENERIC ERROR HANDLING:

1376

//

1377

// This is complicated by variations in the needs of the callers of these

1378

// macros. Only use JU_SET_ERRNO() for PJError, because it can be null; use

1379

// JU_SET_ERRNO_NONNULL() for Pjpm, which is never null, and also in other

1380

// cases where the pointer is known not to be null (to save dead branches).

1381

//

1382

// Note: Most cases of JU_ERRNO_OVERRUN or JU_ERRNO_CORRUPT should result in

1383

// an assertion failure in debug code, so they are more likely to be caught, so

1384

// do that here in each macro.

1385

1386

#define JU_SET_ERRNO(PJError, JErrno) \

1387

{ \

1388

assert((JErrno) != JU_ERRNO_OVERRUN); \

1389

assert((JErrno) != JU_ERRNO_CORRUPT); \

1390

\

1391

if (PJError != (PJError_t) NULL) \

1392

{ \

1393

JU_ERRNO(PJError) = (JErrno); \

1394

JU_ERRID(PJError) = __LINE__; \

1395

} \

1396

}

1397

1398

// Variation for callers who know already that PJError is non-null; and, it can

1399

// also be Pjpm (both PJError_t and Pjpm_t have je_* fields), so only assert it

1400

// for null, not cast to any specific pointer type:

1401

1402

#define JU_SET_ERRNO_NONNULL(PJError, JErrno) \

1403

{ \

1404

assert((JErrno) != JU_ERRNO_OVERRUN); \

1405

assert((JErrno) != JU_ERRNO_CORRUPT); \

1406

assert(PJError); \

1407

\

1408

JU_ERRNO(PJError) = (JErrno); \

1409

JU_ERRID(PJError) = __LINE__; \

1410

}

1411

1412

// Variation to copy error info from a (required) JPM to an (optional)

1413

// PJError_t:

1414

//

1415

// Note: The assertions above about JU_ERRNO_OVERRUN and JU_ERRNO_CORRUPT

1416

// should have already popped, so they are not needed here.

1417

1418

#define JU_COPY_ERRNO(PJError, Pjpm) \

1419

{ \

1420

if (PJError) \

1421

{ \

1422

JU_ERRNO(PJError) = (uint8_t)JU_ERRNO(Pjpm); \

1423

JU_ERRID(PJError) = JU_ERRID(Pjpm); \

1424

} \

1425

}

1426

1427

// For JErrno parameter to previous macros upon return from Judy*Alloc*():

1428

//

1429

// The memory allocator returns an address of 0 for out of memory,

1430

// 1..sizeof(Word_t)-1 for corruption (an invalid pointer), otherwise a valid

1431

// pointer.

1432

1433

#define JU_ALLOC_ERRNO(Addr) \

1434

(((void *) (Addr) != (void *) NULL) ? JU_ERRNO_OVERRUN : JU_ERRNO_NOMEM)

1435

1436

#define JU_CHECKALLOC(Type,Ptr,Retval) \

1437

if ((Ptr) < (Type) sizeof(Word_t)) \

1438

{ \

1439

JU_SET_ERRNO(PJError, JU_ALLOC_ERRNO(Ptr)); \

1440

return(Retval); \

1441

}

1442

1443

1444

// ****************************************************************************

1445

// SUPPORT FOR LEAF SEARCHING:

1446

// ****************************************************************************

1447

//

1448

// These macros are shared by JudySearchLeaf.c and JudyPrevNextEmpty.c only so

1449

// they lack noisy JU_* prefixes.

1450

1451

// BYTES PER INDEX (BPI), just for clarity:

1452

1453

#define BPI1 1

1454

#define BPI2 2

1455

#define BPI3 3

1456

#ifdef JU_64BIT

1457

#define BPI4 4

1458

#define BPI5 5

1459

#define BPI6 6

1460

#define BPI7 7

1461

#endif

1462

#define BPIL (sizeof(Word_t))

1463

1464

1465

// SHORTHAND NAMES FOR MASKS:

1466

1467

#define MASK1 JU_LEASTBYTESMASK(BPI1)

1468

#define MASK2 JU_LEASTBYTESMASK(BPI2)

1469

#define MASK3 JU_LEASTBYTESMASK(BPI3)

1470

#ifdef JU_64BIT

1471

#define MASK4 JU_LEASTBYTESMASK(BPI4)

1472

#define MASK5 JU_LEASTBYTESMASK(BPI5)

1473

#define MASK6 JU_LEASTBYTESMASK(BPI6)

1474

#define MASK7 JU_LEASTBYTESMASK(BPI7)

1475

#endif

1476

// No need for MASKL.

1477

1478

1479

// SUPPORT FOR ODD-SIZED INDEXES:

1480

//

1481

// An index size of 3 bytes, and for 64-bit only, 5, 6, and 7 bytes, are "odd

1482

// sized" because they do not have corresponding native C data types and do not

1483

// fit neatly into a single word. For efficiency, the leaf search code scans

1484

// these odd indexes in "index groups" that fit in whole words.

1485

1486

// INDEXES PER INDEX GROUP (IPG) for odd-sized indexes: The simple way to

1487

// calculate the number of indexes per index group is BPI(N) * cJU_BYTESPERWORD

1488

// / BPI(N), or essentially just cJU_BYTESPERWORD. However, for BPI6, 6 * 8 =

1489

// 48 bytes (6 words, 8 indexes), but there is a common prime factor of 2, so

1490

// 24 bytes (3 words, 4 indexes) is better because it's smaller.

1491

1492

#ifndef JU_64BIT

1493

#define IPG3 4 // indexes per group (fit in whole words).

1494

#else

1495

#define IPG3 8

1496

#define IPG5 8

1497

#define IPG6 4 // see above.

1498

#define IPG7 8

1499

#endif

1500

1501

// WORDS PER INDEX GROUP (WPG) for odd-sized indexes:

1502

1503

#define WPG3 ((IPG3 * BPI3) / cJU_BYTESPERWORD) // 3 [3].

1504

#ifdef JU_64BIT

1505

#define WPG5 ((IPG5 * BPI5) / cJU_BYTESPERWORD) // [5].

1506

#define WPG6 ((IPG6 * BPI6) / cJU_BYTESPERWORD) // [3].

1507

#define WPG7 ((IPG7 * BPI7) / cJU_BYTESPERWORD) // [7].

1508

#endif

1509

1510

// INDEXES PER CACHE LINE (IPC) for odd-sized indexes, really the number of

1511

// indexes within the maximum number of whole (word-aligned) index groups per

1512

// "virtual" cache line, which is less than or equal in size to a real cache

1513

// line size:

1514

//

1515

// Note: For odd-sized indexes these IPC's are less than the actual indexes

1516

// per cache line, sometimes less even the number of whole indexes per cache

1517

// line, due to word-aligned grouping.

1518

1519

#define IPC1 (cJU_BYTESPERCL / 1) // 64 [128].

1520

#define IPC2 (cJU_BYTESPERCL / 2) // 32 [64].

1521

#define IPC3 ((cJU_BYTESPERCL / (IPG3 * BPI3)) * IPG3) // 20 [40].

1522

#ifdef JU_64BIT

1523

#define IPC4 (cJU_BYTESPERCL / 4) // [32].

1524

#define IPC5 ((cJU_BYTESPERCL / (IPG5 * BPI5)) * IPG5) // [24].

1525

#define IPC6 ((cJU_BYTESPERCL / (IPG6 * BPI6)) * IPG6) // [20].

1526

#define IPC7 ((cJU_BYTESPERCL / (IPG7 * BPI7)) * IPG7) // [16].

1527

#endif

1528

#define IPCL (cJU_BYTESPERCL / sizeof(Word_t)) // 16 [16].

1529

1530

1531

// MACROS FOR EFFICIENTLY EXTRACTING ALIGNED ODD-SIZE INDEXES FROM A LEAF:

1532

//

1533

// Doing shifts, ORs, and masks in registers is presumably faster than

1534

// rebuilding odd-sized indexes one byte at a time. However, it makes the code

1535

// unavoidably torturous as it works through all the 32/64-bit and

1536

// little/big-endian permutations efficiently. (Trading longer coding time and

1537

// code size for faster execution.)

1538

//

1539

// The GET3_*() macros hide the details of how to extract bytes "cba"

1540

// (big-endian order) for index 0..3 [0..7] in an index group, given Pword

1541

// pointing to the first (unsigned long) word of a group.

1542

1543

#ifndef JU_64BIT

1544

#ifndef JU_LITTLE_ENDIAN

1545

1546

// Index bytes for indexes 0-3 look like this (big-endian):

1547

//

1548

// 0001 1122 2333 // in memory or registers.

1549

// cbac bacb acba

1550

1551

#define GET3_0 (Pword[0] >> 8)

1552

#define GET3_1 (((Pword[0] << 16) | (Pword[1] >> 16)) & MASK3)

1553

#define GET3_2 (((Pword[1] << 8) | (Pword[2] >> 24)) & MASK3)

1554

#define GET3_3 (Pword[2] & MASK3)

1555

1556

#endif // JU_BIG_ENDIAN

1557

1558

#ifdef JU_LITTLE_ENDIAN

1559

1560

// Index bytes for indexes 0-3 look like this (little-endian):

1561

//

1562

// 0001 1122 2333 // in memory.

1563

// abca bcab cabc

1564

//

1565

// 1000 2211 3332 // in registers.

1566

// acba bacb cbac // big-endian Indexes.

1567

1568

#define GET3_0 (Pword[0] & MASK3)

1569

#define GET3_1 (((Pword[0] >> 24) | (Pword[1] << 8)) & MASK3)

1570

#define GET3_2 (((Pword[1] >> 16) | (Pword[2] << 16)) & MASK3)

1571

#define GET3_3 (Pword[2] >> 8)

1572

1573

#endif // JU_LITTLE_ENDIAN

1574

1575

#define GET3_LIG GET3_3 // last index in index group.

1576

1577

#else // JU_64BIT

1578

1579

#ifndef JU_LITTLE_ENDIAN

1580

1581

// Index bytes for indexes 0-7 look like this (big-endian):

1582

//

1583

// 00011122 23334445 55666777 // in memory or registers.

1584

// cbacbacb acbacbac bacbacba

1585

1586

#define GET3_0 (Pword[0] >> 40)

1587

#define GET3_1 ((Pword[0] >> 16) & MASK3)

1588

#define GET3_2 (((Pword[0] << 8) | (Pword[1] >> 56)) & MASK3)

1589

#define GET3_3 ((Pword[1] >> 32) & MASK3)

1590

#define GET3_4 ((Pword[1] >> 8) & MASK3)

1591

#define GET3_5 (((Pword[1] << 16) | (Pword[2] >> 48)) & MASK3)

1592

#define GET3_6 ((Pword[2] >> 24) & MASK3)

1593

#define GET3_7 (Pword[2] & MASK3)

1594

1595

#endif // JU_BIG_ENDIAN

1596

1597

#ifdef JU_LITTLE_ENDIAN

1598

1599

// Index bytes for indexes 0-7 look like this (little-endian):

1600

//

1601

// 00011122 23334445 55666777 // in memory.

1602

// abcabcab cabcabca bcabcabc

1603

//

1604

// 22111000 54443332 77766655 // in registers.

1605

// bacbacba acbacbac cbacbacb // big-endian Indexes.

1606

1607

#define GET3_0 (Pword[0] & MASK3)

1608

#define GET3_1 ((Pword[0] >> 24) & MASK3)

1609

#define GET3_2 (((Pword[0] >> 48) | (Pword[1] << 16)) & MASK3)

1610

#define GET3_3 ((Pword[1] >> 8) & MASK3)

1611

#define GET3_4 ((Pword[1] >> 32) & MASK3)

1612

#define GET3_5 (((Pword[1] >> 56) | (Pword[2] << 8)) & MASK3)

1613

#define GET3_6 ((Pword[2] >> 16) & MASK3)

1614

#define GET3_7 (Pword[2] >> 40)

1615

1616

#endif // JU_LITTLE_ENDIAN

1617

1618

#define GET3_LIG GET3_7 // last index in index group.

1619

1620

#endif // JU_64BIT

1621

1622

1623

#ifdef JU_64BIT

1624

1625

// The GET5_* macros are similar to GET3_*() for larger index sizes:

1626

1627

#ifndef JU_LITTLE_ENDIAN

1628

1629

// Index bytes for indexes 0-7 look like this (big-endian):

1630

//

1631

// 00000111 11222223 33334444 45555566 66677777 // in memory or registers.

1632

// edcbaedc baedcbae dcbaedcb aedcbaed cbaedcba

1633

1634

#define GET5_0 (Pword[0] >> 24)

1635

#define GET5_1 (((Pword[0] << 16) | (Pword[1] >> 48)) & MASK5)

1636

#define GET5_2 ((Pword[1] >> 8) & MASK5)

1637

#define GET5_3 (((Pword[1] << 32) | (Pword[2] >> 32)) & MASK5)

1638

#define GET5_4 (((Pword[2] << 8) | (Pword[3] >> 56)) & MASK5)

1639

#define GET5_5 ((Pword[3] >> 16) & MASK5)

1640

#define GET5_6 (((Pword[3] << 24) | (Pword[4] >> 40)) & MASK5)

1641

#define GET5_7 (Pword[4] & MASK5)

1642

1643

#endif // JU_BIG_ENDIAN

1644

1645

#ifdef JU_LITTLE_ENDIAN

1646

1647

// Index bytes for indexes 0-7 look like this (little-endian):

1648

//

1649

// 00000111 11222223 33334444 45555566 66677777 // in memory.

1650

// abcdeabc deabcdea bcdeabcd eabcdeab cdeabcde

1651

//

1652

// 11100000 32222211 44443333 66555554 77777666 // in registers.

1653

// cbaedcba aedcbaed dcbaedcb daedcbae edcbaedc // big-endian Indexes.

1654

1655

#define GET5_0 (Pword[0] & MASK5)

1656

#define GET5_1 (((Pword[0] >> 40) | (Pword[1] << 24)) & MASK5)

1657

#define GET5_2 ((Pword[1] >> 16) & MASK5)

1658

#define GET5_3 (((Pword[1] >> 56) | (Pword[2] << 8)) & MASK5)

1659

#define GET5_4 (((Pword[2] >> 32) | (Pword[3] << 32)) & MASK5)

1660

#define GET5_5 ((Pword[3] >> 8) & MASK5)

1661

#define GET5_6 (((Pword[3] >> 48) | (Pword[4] << 16)) & MASK5)

1662

#define GET5_7 (Pword[4] >> 24)

1663

1664

#endif // JU_LITTLE_ENDIAN

1665

1666

#define GET5_LIG GET5_7 // last index in index group.

1667

1668

1669

// The GET6_* macros are similar to GET3_*() for larger index sizes:

1670

1671

#ifndef JU_LITTLE_ENDIAN

1672

1673

// Index bytes for indexes 0-7 look like this (big-endian):

1674

//

1675

// 00000011 11112222 22333333 // in memory or registers.

1676

// fedcbafe dcbafedc bafedcba

1677

1678

#define GET6_0 (Pword[0] >> 16)

1679

#define GET6_1 (((Pword[0] << 32) | (Pword[1] >> 32)) & MASK6)

1680

#define GET6_2 (((Pword[1] << 16) | (Pword[2] >> 48)) & MASK6)

1681

#define GET6_3 (Pword[2] & MASK6)

1682

1683

#endif // JU_BIG_ENDIAN

1684

1685

#ifdef JU_LITTLE_ENDIAN

1686

1687

// Index bytes for indexes 0-7 look like this (little-endian):

1688

//

1689

// 00000011 11112222 22333333 // in memory.

1690

// abcdefab cdefabcd efabcdef

1691

//

1692

// 11000000 22221111 33333322 // in registers.

1693

// bafedcba dcbafedc fedcbafe // big-endian Indexes.

1694

1695

#define GET6_0 (Pword[0] & MASK6)

1696

#define GET6_1 (((Pword[0] >> 48) | (Pword[1] << 16)) & MASK6)

1697

#define GET6_2 (((Pword[1] >> 32) | (Pword[2] << 32)) & MASK6)

1698

#define GET6_3 (Pword[2] >> 16)

1699

1700

#endif // JU_LITTLE_ENDIAN

1701

1702

#define GET6_LIG GET6_3 // last index in index group.

1703

1704

1705

// The GET7_* macros are similar to GET3_*() for larger index sizes:

1706

1707

#ifndef JU_LITTLE_ENDIAN

1708

1709

// Index bytes for indexes 0-7 look like this (big-endian) (in memory or

1710

// registers):

1711

//

1712

// 00000001 11111122 22222333 33334444 44455555 55666666 67777777

1713

// gfedcbag fedcbagf edcbagfe dcbagfed cbagfedc bagfedcb agfedcba

1714

1715

#define GET7_0 (Pword[0] >> 8)

1716

#define GET7_1 (((Pword[0] << 48) | (Pword[1] >> 16)) & MASK7)

1717

#define GET7_2 (((Pword[1] << 40) | (Pword[2] >> 24)) & MASK7)

1718

#define GET7_3 (((Pword[2] << 32) | (Pword[3] >> 32)) & MASK7)

1719

#define GET7_4 (((Pword[3] << 24) | (Pword[4] >> 40)) & MASK7)

1720

#define GET7_5 (((Pword[4] << 16) | (Pword[5] >> 48)) & MASK7)

1721

#define GET7_6 (((Pword[5] << 8) | (Pword[6] >> 56)) & MASK7)

1722

#define GET7_7 (Pword[6] & MASK7)

1723

1724

#endif // JU_BIG_ENDIAN

1725

1726

#ifdef JU_LITTLE_ENDIAN

1727

1728

// Index bytes for indexes 0-7 look like this (little-endian) (in memory, then

1729

// in registers, with big-endian Indexes):

1730

//

1731

// 00000001 11111122 22222333 33334444 44455555 55666666 67777777

1732

// abcdefga bcdefgab cdefgabc defgabcd efgabcde fgabcdef gabcdefg

1733

//

1734

// 10000000 22111111 33322222 44443333 55555444 66666655 77777776

1735

// agfedcba bagfedcb cbagfedc dcbagfed edcbagfe fedcbagf gfedcbag

1736

1737

#define GET7_0 (Pword[0] & MASK7)

1738

#define GET7_1 (((Pword[0] >> 56) | (Pword[1] << 8)) & MASK7)

1739

#define GET7_2 (((Pword[1] >> 48) | (Pword[2] << 16)) & MASK7)

1740

#define GET7_3 (((Pword[2] >> 40) | (Pword[3] << 24)) & MASK7)

1741

#define GET7_4 (((Pword[3] >> 32) | (Pword[4] << 32)) & MASK7)

1742

#define GET7_5 (((Pword[4] >> 24) | (Pword[5] << 40)) & MASK7)

1743

#define GET7_6 (((Pword[5] >> 16) | (Pword[6] << 48)) & MASK7)

1744

#define GET7_7 (Pword[6] >> 8)

1745

1746

#endif // JU_LITTLE_ENDIAN

1747

1748

#define GET7_LIG GET7_7 // last index in index group.

1749

1750

#endif // JU_64BIT

1751

1752

#endif // ! _JUDYPRIVATE_INCLUDED