1
// Copyright (C) 2011 Milo Yip
3
// Permission is hereby granted, free of charge, to any person obtaining a copy
4
// of this software and associated documentation files (the "Software"), to deal
5
// in the Software without restriction, including without limitation the rights
6
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
// copies of the Software, and to permit persons to whom the Software is
8
// furnished to do so, subject to the following conditions:
10
// The above copyright notice and this permission notice shall be included in
11
// all copies or substantial portions of the Software.
13
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
// This is a C++ header-only implementation of Grisu2 algorithm from the publication:
22
// Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
23
// integers." ACM Sigplan Notices 45.6 (2010): 233-243.
25
#ifndef RAPIDJSON_DIYFP_H_
26
#define RAPIDJSON_DIYFP_H_
31
#pragma intrinsic(_BitScanReverse64)
35
RAPIDJSON_NAMESPACE_BEGIN
40
RAPIDJSON_DIAG_OFF(effc++)
46
DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {}
48
explicit DiyFp(double d) {
54
int biased_e = (u.u64 & kDpExponentMask) >> kDpSignificandSize;
55
uint64_t significand = (u.u64 & kDpSignificandMask);
57
f = significand + kDpHiddenBit;
58
e = biased_e - kDpExponentBias;
62
e = kDpMinExponent + 1;
66
DiyFp operator-(const DiyFp& rhs) const {
67
return DiyFp(f - rhs.f, e);
70
DiyFp operator*(const DiyFp& rhs) const {
71
#if defined(_MSC_VER) && defined(_M_AMD64)
73
uint64_t l = _umul128(f, rhs.f, &h);
74
if (l & (uint64_t(1) << 63)) // rounding
76
return DiyFp(h, e + rhs.e + 64);
77
#elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
78
unsigned __int128 p = static_cast<unsigned __int128>(f) * static_cast<unsigned __int128>(rhs.f);
80
uint64_t l = static_cast<uint64_t>(p);
81
if (l & (uint64_t(1) << 63)) // rounding
83
return DiyFp(h, e + rhs.e + 64);
85
const uint64_t M32 = 0xFFFFFFFF;
86
const uint64_t a = f >> 32;
87
const uint64_t b = f & M32;
88
const uint64_t c = rhs.f >> 32;
89
const uint64_t d = rhs.f & M32;
90
const uint64_t ac = a * c;
91
const uint64_t bc = b * c;
92
const uint64_t ad = a * d;
93
const uint64_t bd = b * d;
94
uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
95
tmp += 1U << 31; /// mult_round
96
return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
100
DiyFp Normalize() const {
101
#if defined(_MSC_VER) && defined(_M_AMD64)
103
_BitScanReverse64(&index, f);
104
return DiyFp(f << (63 - index), e - (63 - index));
105
#elif defined(__GNUC__) && __GNUC__ >= 4
106
int s = __builtin_clzll(f);
107
return DiyFp(f << s, e - s);
110
while (!(res.f & (static_cast<uint64_t>(1) << 63))) {
118
DiyFp NormalizeBoundary() const {
120
while (!(res.f & (kDpHiddenBit << 1))) {
124
res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
125
res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
129
void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const {
130
DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
131
DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
132
mi.f <<= mi.e - pl.e;
138
double ToDouble() const {
143
uint64_t significand = f;
145
while (significand > kDpHiddenBit + kDpSignificandMask) {
149
while (exponent > kDpDenormalExponent && (significand & kDpHiddenBit) == 0) {
153
if (exponent >= kDpMaxExponent) {
154
u.u64 = kDpExponentMask; // Infinity
157
else if (exponent < kDpDenormalExponent)
159
const uint64_t be = (exponent == kDpDenormalExponent && (significand & kDpHiddenBit) == 0) ? 0 :
160
static_cast<uint64_t>(exponent + kDpExponentBias);
161
u.u64 = (significand & kDpSignificandMask) | (be << kDpSignificandSize);
165
static const int kDiySignificandSize = 64;
166
static const int kDpSignificandSize = 52;
167
static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
168
static const int kDpMaxExponent = 0x7FF - kDpExponentBias;
169
static const int kDpMinExponent = -kDpExponentBias;
170
static const int kDpDenormalExponent = -kDpExponentBias + 1;
171
static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
172
static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
173
static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
179
inline DiyFp GetCachedPowerByIndex(size_t index) {
180
// 10^-348, 10^-340, ..., 10^340
181
static const uint64_t kCachedPowers_F[] = {
182
RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76),
183
RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea),
184
RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df),
185
RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f),
186
RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c),
187
RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5),
188
RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d),
189
RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637),
190
RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7),
191
RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5),
192
RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b),
193
RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996),
194
RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
195
RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8),
196
RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053),
197
RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd),
198
RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94),
199
RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b),
200
RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac),
201
RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3),
202
RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb),
203
RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c),
204
RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000),
205
RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984),
206
RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70),
207
RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245),
208
RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8),
209
RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a),
210
RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea),
211
RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85),
212
RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2),
213
RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3),
214
RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25),
215
RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece),
216
RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5),
217
RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a),
218
RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
219
RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a),
220
RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129),
221
RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429),
222
RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d),
223
RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841),
224
RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9),
225
RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b)
227
static const int16_t kCachedPowers_E[] = {
228
-1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980,
229
-954, -927, -901, -874, -847, -821, -794, -768, -741, -715,
230
-688, -661, -635, -608, -582, -555, -529, -502, -475, -449,
231
-422, -396, -369, -343, -316, -289, -263, -236, -210, -183,
232
-157, -130, -103, -77, -50, -24, 3, 30, 56, 83,
233
109, 136, 162, 189, 216, 242, 269, 295, 322, 348,
234
375, 402, 428, 455, 481, 508, 534, 561, 588, 614,
235
641, 667, 694, 720, 747, 774, 800, 827, 853, 880,
236
907, 933, 960, 986, 1013, 1039, 1066
238
return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
241
inline DiyFp GetCachedPower(int e, int* K) {
243
//int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
244
double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive
245
int k = static_cast<int>(dk);
249
unsigned index = static_cast<unsigned>((k >> 3) + 1);
250
*K = -(-348 + static_cast<int>(index << 3)); // decimal exponent no need lookup table
252
return GetCachedPowerByIndex(index);
255
inline DiyFp GetCachedPower10(int exp, int *outExp) {
256
unsigned index = (exp + 348) / 8;
257
*outExp = -348 + index * 8;
258
return GetCachedPowerByIndex(index);
265
} // namespace internal
266
RAPIDJSON_NAMESPACE_END
268
#endif // RAPIDJSON_DIYFP_H_