1
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2
// vim: expandtab:ts=8:sw=4:softtabstop=4:
3
///////////////////////////////////////////////////////////////////////////////
6
/// \brief Kind of two-bit version of bit scan reverse
8
// Authors: Igor Pavlov
11
// This file has been put into the public domain.
12
// You can do whatever you want with this file.
14
///////////////////////////////////////////////////////////////////////////////
16
#ifndef LZMA_FASTPOS_H
17
#define LZMA_FASTPOS_H
19
// LZMA encodes match distances (positions) by storing the highest two
20
// bits using a six-bit value [0, 63], and then the missing lower bits.
21
// Dictionary size is also stored using this encoding in the new .lzma
22
// file format header.
24
// fastpos.h provides a way to quickly find out the correct six-bit
25
// values. The following table gives some examples of this encoding:
50
// Provided functions or macros
51
// ----------------------------
53
// get_pos_slot(pos) is the basic version. get_pos_slot_2(pos)
54
// assumes that pos >= FULL_DISTANCES, thus the result is at least
55
// FULL_DISTANCES_BITS * 2. Using get_pos_slot(pos) instead of
56
// get_pos_slot_2(pos) would give the same result, but get_pos_slot_2(pos)
57
// should be tiny bit faster due to the assumption being made.
63
// With some CPUs that have fast BSR (bit scan reverse) instruction, the
64
// size optimized version is slightly faster than the bigger table based
65
// approach. Such CPUs include Intel Pentium Pro, Pentium II, Pentium III
66
// and Core 2 (possibly others). AMD K7 seems to have slower BSR, but that
67
// would still have speed roughly comparable to the table version. Older
68
// x86 CPUs like the original Pentium have very slow BSR; on those systems
69
// the table version is a lot faster.
71
// On some CPUs, the table version is a lot faster when using position
72
// dependent code, but with position independent code the size optimized
73
// version is slightly faster. This occurs at least on 32-bit SPARC (no
74
// ASM optimizations).
76
// I'm making the table version the default, because that has good speed
77
// on all systems I have tried. The size optimized version is sometimes
78
// slightly faster, but sometimes it is a lot slower.
83
# define get_pos_slot(pos) ((pos) <= 4 ? (pos) : get_pos_slot_2(pos))
85
static inline uint32_t
86
get_pos_slot_2(uint32_t pos)
90
return (i + i) + ((pos >> (i - 1)) & 1);
96
#define FASTPOS_BITS 13
98
extern const uint8_t lzma_fastpos[1 << FASTPOS_BITS];
101
#define fastpos_shift(extra, n) \
102
((extra) + (n) * (FASTPOS_BITS - 1))
104
#define fastpos_limit(extra, n) \
105
(UINT32_C(1) << (FASTPOS_BITS + fastpos_shift(extra, n)))
107
#define fastpos_result(pos, extra, n) \
108
lzma_fastpos[(pos) >> fastpos_shift(extra, n)] \
109
+ 2 * fastpos_shift(extra, n)
112
static inline uint32_t
113
get_pos_slot(uint32_t pos)
115
// If it is small enough, we can pick the result directly from
116
// the precalculated table.
117
if (pos < fastpos_limit(0, 0))
118
return lzma_fastpos[pos];
120
if (pos < fastpos_limit(0, 1))
121
return fastpos_result(pos, 0, 1);
123
return fastpos_result(pos, 0, 2);
127
#ifdef FULL_DISTANCES_BITS
128
static inline uint32_t
129
get_pos_slot_2(uint32_t pos)
131
assert(pos >= FULL_DISTANCES);
133
if (pos < fastpos_limit(FULL_DISTANCES_BITS - 1, 0))
134
return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 0);
136
if (pos < fastpos_limit(FULL_DISTANCES_BITS - 1, 1))
137
return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 1);
139
return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 2);