1
// Protocol Buffers - Google's data interchange format
2
// Copyright 2008 Google Inc.
3
// http://code.google.com/p/protobuf/
5
// Licensed under the Apache License, Version 2.0 (the "License");
6
// you may not use this file except in compliance with the License.
7
// You may obtain a copy of the License at
9
// http://www.apache.org/licenses/LICENSE-2.0
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS,
13
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
// See the License for the specific language governing permissions and
15
// limitations under the License.
17
// Author: kenton@google.com (Kenton Varda)
18
// Based on original Protocol Buffers design by
19
// Sanjay Ghemawat, Jeff Dean, and others.
21
// This implementation is heavily optimized to make reads and writes
22
// of small values (especially varints) as fast as possible. In
23
// particular, we optimize for the common case that a read or a write
24
// will not cross the end of the buffer, since we can avoid a lot
25
// of branching in this case.
29
#include <google/protobuf/io/coded_stream.h>
30
#include <google/protobuf/io/zero_copy_stream.h>
31
#include <google/protobuf/stubs/common.h>
32
#include <google/protobuf/stubs/stl_util-inl.h>
41
static const int kDefaultTotalBytesLimit = 64 << 20; // 64MB
43
static const int kDefaultTotalBytesWarningThreshold = 32 << 20; // 32MB
44
static const int kDefaultRecursionLimit = 64;
46
static const int kMaxVarintBytes = 10;
47
static const int kMaxVarint32Bytes = 5;
52
// CodedInputStream ==================================================
54
CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
62
legitimate_message_end_(false),
64
aliasing_enabled_(false),
66
current_limit_(INT_MAX),
67
buffer_size_after_limit_(0),
69
total_bytes_limit_(kDefaultTotalBytesLimit),
70
total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
73
recursion_limit_(kDefaultRecursionLimit) {
76
CodedInputStream::~CodedInputStream() {
77
int backup_bytes = buffer_size_ + buffer_size_after_limit_ + overflow_bytes_;
78
if (backup_bytes > 0) {
79
// We still have bytes left over from the last buffer. Back up over
81
input_->BackUp(backup_bytes);
86
inline void CodedInputStream::RecomputeBufferLimits() {
87
buffer_size_ += buffer_size_after_limit_;
88
int closest_limit = min(current_limit_, total_bytes_limit_);
89
if (closest_limit < total_bytes_read_) {
90
// The limit position is in the current buffer. We must adjust
91
// the buffer size accordingly.
92
buffer_size_after_limit_ = total_bytes_read_ - closest_limit;
93
buffer_size_ -= buffer_size_after_limit_;
95
buffer_size_after_limit_ = 0;
99
CodedInputStream::Limit CodedInputStream::PushLimit(int byte_limit) {
100
// Current position relative to the beginning of the stream.
101
int current_position = total_bytes_read_ -
102
(buffer_size_ + buffer_size_after_limit_);
104
Limit old_limit = current_limit_;
106
// security: byte_limit is possibly evil, so check for negative values
108
if (byte_limit >= 0 &&
109
byte_limit <= INT_MAX - current_position) {
110
current_limit_ = current_position + byte_limit;
112
// Negative or overflow.
113
current_limit_ = INT_MAX;
116
// We need to enforce all limits, not just the new one, so if the previous
117
// limit was before the new requested limit, we continue to enforce the
119
current_limit_ = min(current_limit_, old_limit);
121
RecomputeBufferLimits();
125
void CodedInputStream::PopLimit(Limit limit) {
126
// The limit passed in is actually the *old* limit, which we returned from
128
current_limit_ = limit;
129
RecomputeBufferLimits();
131
// We may no longer be at a legitimate message end. ReadTag() needs to be
132
// called again to find out.
133
legitimate_message_end_ = false;
136
int CodedInputStream::BytesUntilLimit() {
137
if (current_limit_ == INT_MAX) return -1;
138
int current_position = total_bytes_read_ -
139
(buffer_size_ + buffer_size_after_limit_);
141
return current_limit_ - current_position;
144
void CodedInputStream::SetTotalBytesLimit(
145
int total_bytes_limit, int warning_threshold) {
146
// Make sure the limit isn't already past, since this could confuse other
148
int current_position = total_bytes_read_ -
149
(buffer_size_ + buffer_size_after_limit_);
150
total_bytes_limit_ = max(current_position, total_bytes_limit);
151
total_bytes_warning_threshold_ = warning_threshold;
152
RecomputeBufferLimits();
155
void CodedInputStream::PrintTotalBytesLimitError() {
156
GOOGLE_LOG(ERROR) << "A protocol message was rejected because it was too "
157
"big (more than " << total_bytes_limit_
158
<< " bytes). To increase the limit (or to disable these "
159
"warnings), see CodedInputStream::SetTotalBytesLimit() "
160
"in google/protobuf/io/coded_stream.h.";
163
bool CodedInputStream::Skip(int count) {
164
if (count < 0) return false; // security: count is often user-supplied
166
if (count <= buffer_size_) {
167
// Just skipping within the current buffer. Easy.
172
if (buffer_size_after_limit_ > 0) {
173
// We hit a limit inside this buffer. Advance to the limit and fail.
174
Advance(buffer_size_);
178
count -= buffer_size_;
182
// Make sure this skip doesn't try to skip past the current limit.
183
int closest_limit = min(current_limit_, total_bytes_limit_);
184
int bytes_until_limit = closest_limit - total_bytes_read_;
185
if (bytes_until_limit < count) {
186
// We hit the limit. Skip up to it then fail.
187
total_bytes_read_ = closest_limit;
188
input_->Skip(bytes_until_limit);
192
total_bytes_read_ += count;
193
return input_->Skip(count);
196
bool CodedInputStream::ReadRaw(void* buffer, int size) {
197
while (buffer_size_ < size) {
198
// Reading past end of buffer. Copy what we have, then refresh.
199
memcpy(buffer, buffer_, buffer_size_);
200
buffer = reinterpret_cast<uint8*>(buffer) + buffer_size_;
201
size -= buffer_size_;
202
if (!Refresh()) return false;
205
memcpy(buffer, buffer_, size);
211
bool CodedInputStream::ReadString(string* buffer, int size) {
212
if (size < 0) return false; // security: size is often user-supplied
214
if (!buffer->empty()) {
218
if (size < buffer_size_) {
219
STLStringResizeUninitialized(buffer, size);
220
memcpy((uint8*)buffer->data(), buffer_, size);
225
while (buffer_size_ < size) {
226
// Some STL implementations "helpfully" crash on buffer->append(NULL, 0).
227
if (buffer_size_ != 0) {
228
// Note: string1.append(string2) is O(string2.size()) (as opposed to
229
// O(string1.size() + string2.size()), which would be bad).
230
buffer->append(reinterpret_cast<const char*>(buffer_), buffer_size_);
232
size -= buffer_size_;
233
if (!Refresh()) return false;
236
buffer->append(reinterpret_cast<const char*>(buffer_), size);
243
bool CodedInputStream::ReadLittleEndian32(uint32* value) {
244
uint8 bytes[sizeof(*value)];
247
if (buffer_size_ >= sizeof(*value)) {
248
// Fast path: Enough bytes in the buffer to read directly.
250
Advance(sizeof(*value));
252
// Slow path: Had to read past the end of the buffer.
253
if (!ReadRaw(bytes, sizeof(*value))) return false;
257
*value = (static_cast<uint32>(ptr[0]) ) |
258
(static_cast<uint32>(ptr[1]) << 8) |
259
(static_cast<uint32>(ptr[2]) << 16) |
260
(static_cast<uint32>(ptr[3]) << 24);
264
bool CodedInputStream::ReadLittleEndian64(uint64* value) {
265
uint8 bytes[sizeof(*value)];
268
if (buffer_size_ >= sizeof(*value)) {
269
// Fast path: Enough bytes in the buffer to read directly.
271
Advance(sizeof(*value));
273
// Slow path: Had to read past the end of the buffer.
274
if (!ReadRaw(bytes, sizeof(*value))) return false;
278
uint32 part0 = (static_cast<uint32>(ptr[0]) ) |
279
(static_cast<uint32>(ptr[1]) << 8) |
280
(static_cast<uint32>(ptr[2]) << 16) |
281
(static_cast<uint32>(ptr[3]) << 24);
282
uint32 part1 = (static_cast<uint32>(ptr[4]) ) |
283
(static_cast<uint32>(ptr[5]) << 8) |
284
(static_cast<uint32>(ptr[6]) << 16) |
285
(static_cast<uint32>(ptr[7]) << 24);
286
*value = static_cast<uint64>(part0) |
287
(static_cast<uint64>(part1) << 32);
291
bool CodedInputStream::ReadVarint32Fallback(uint32* value) {
292
if (buffer_size_ >= kMaxVarintBytes ||
293
// Optimization: If the varint ends at exactly the end of the buffer,
294
// we can detect that and still use the fast path.
295
(buffer_size_ != 0 && !(buffer_[buffer_size_-1] & 0x80))) {
296
// Fast path: We have enough bytes left in the buffer to guarantee that
297
// this read won't cross the end, so we can skip the checks.
298
const uint8* ptr = buffer_;
302
b = *(ptr++); result = (b & 0x7F) ; if (!(b & 0x80)) goto done;
303
b = *(ptr++); result |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done;
304
b = *(ptr++); result |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
305
b = *(ptr++); result |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
306
b = *(ptr++); result |= b << 28; if (!(b & 0x80)) goto done;
308
// If the input is larger than 32 bits, we still need to read it all
309
// and discard the high-order bits.
310
for (int i = 0; i < kMaxVarintBytes - kMaxVarint32Bytes; i++) {
311
b = *(ptr++); if (!(b & 0x80)) goto done;
314
// We have overrun the maximum size of a varint (10 bytes). Assume
315
// the data is corrupt.
319
Advance(ptr - buffer_);
324
// Optimization: If we're at a limit, detect that quickly. (This is
325
// common when reading tags.)
326
while (buffer_size_ == 0) {
327
// Detect cases where we definitely hit a byte limit without calling
329
if (// If we hit a limit, buffer_size_after_limit_ will be non-zero.
330
buffer_size_after_limit_ > 0 &&
331
// Make sure that the limit we hit is not total_bytes_limit_, since
332
// in that case we still need to call Refresh() so that it prints an
334
total_bytes_read_ - buffer_size_after_limit_ < total_bytes_limit_) {
335
// We hit a byte limit.
336
legitimate_message_end_ = true;
342
// Refresh failed. Make sure that it failed due to EOF, not because
343
// we hit total_bytes_limit_, which, unlike normal limits, is not a
344
// valid place to end a message.
345
int current_position = total_bytes_read_ - buffer_size_after_limit_;
346
if (current_position >= total_bytes_limit_) {
347
// Hit total_bytes_limit_. But if we also hit the normal limit,
349
legitimate_message_end_ = current_limit_ == total_bytes_limit_;
351
legitimate_message_end_ = true;
357
// Slow path: Just do a 64-bit read.
359
if (!ReadVarint64(&result)) return false;
360
*value = (uint32)result;
365
bool CodedInputStream::ReadVarint64(uint64* value) {
366
if (buffer_size_ >= kMaxVarintBytes ||
367
// Optimization: If the varint ends at exactly the end of the buffer,
368
// we can detect that and still use the fast path.
369
(buffer_size_ != 0 && !(buffer_[buffer_size_-1] & 0x80))) {
370
// Fast path: We have enough bytes left in the buffer to guarantee that
371
// this read won't cross the end, so we can skip the checks.
373
const uint8* ptr = buffer_;
376
// Splitting into 32-bit pieces gives better performance on 32-bit
378
uint32 part0 = 0, part1 = 0, part2 = 0;
380
b = *(ptr++); part0 = (b & 0x7F) ; if (!(b & 0x80)) goto done;
381
b = *(ptr++); part0 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done;
382
b = *(ptr++); part0 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
383
b = *(ptr++); part0 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
384
b = *(ptr++); part1 = (b & 0x7F) ; if (!(b & 0x80)) goto done;
385
b = *(ptr++); part1 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done;
386
b = *(ptr++); part1 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
387
b = *(ptr++); part1 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
388
b = *(ptr++); part2 = (b & 0x7F) ; if (!(b & 0x80)) goto done;
389
b = *(ptr++); part2 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done;
391
// We have overrun the maximum size of a varint (10 bytes). The data
396
Advance(ptr - buffer_);
397
*value = (static_cast<uint64>(part0) ) |
398
(static_cast<uint64>(part1) << 28) |
399
(static_cast<uint64>(part2) << 56);
403
// Slow path: This read might cross the end of the buffer, so we
404
// need to check and refresh the buffer if and when it does.
411
if (count == kMaxVarintBytes) return false;
412
while (buffer_size_ == 0) {
413
if (!Refresh()) return false;
416
result |= static_cast<uint64>(b & 0x7F) << (7 * count);
426
bool CodedInputStream::Refresh() {
427
if (buffer_size_after_limit_ > 0 || overflow_bytes_ > 0) {
428
// We've hit a limit. Stop.
429
buffer_ += buffer_size_;
432
int current_position = total_bytes_read_ - buffer_size_after_limit_;
434
if (current_position >= total_bytes_limit_ &&
435
total_bytes_limit_ != current_limit_) {
436
// Hit total_bytes_limit_.
437
PrintTotalBytesLimitError();
443
if (total_bytes_warning_threshold_ >= 0 &&
444
total_bytes_read_ >= total_bytes_warning_threshold_) {
445
GOOGLE_LOG(WARNING) << "Reading dangerously large protocol message. If the "
446
"message turns out to be larger than "
447
<< total_bytes_limit_ << " bytes, parsing will be halted "
448
"for security reasons. To increase the limit (or to "
449
"disable these warnings), see "
450
"CodedInputStream::SetTotalBytesLimit() in "
451
"google/protobuf/io/coded_stream.h.";
453
// Don't warn again for this stream.
454
total_bytes_warning_threshold_ = -1;
457
const void* void_buffer;
458
if (input_->Next(&void_buffer, &buffer_size_)) {
459
buffer_ = reinterpret_cast<const uint8*>(void_buffer);
460
GOOGLE_CHECK_GE(buffer_size_, 0);
462
if (total_bytes_read_ <= INT_MAX - buffer_size_) {
463
total_bytes_read_ += buffer_size_;
465
// Overflow. Reset buffer_size_ to not include the bytes beyond INT_MAX.
466
// We can't get that far anyway, because total_bytes_limit_ is guaranteed
467
// to be less than it. We need to keep track of the number of bytes
468
// we discarded, though, so that we can call input_->BackUp() to back
469
// up over them on destruction.
471
// The following line is equivalent to:
472
// overflow_bytes_ = total_bytes_read_ + buffer_size_ - INT_MAX;
473
// except that it avoids overflows. Signed integer overflow has
474
// undefined results according to the C standard.
475
overflow_bytes_ = total_bytes_read_ - (INT_MAX - buffer_size_);
476
buffer_size_ -= overflow_bytes_;
477
total_bytes_read_ = INT_MAX;
480
RecomputeBufferLimits();
489
// CodedOutputStream =================================================
491
CodedOutputStream::CodedOutputStream(ZeroCopyOutputStream* output)
498
CodedOutputStream::~CodedOutputStream() {
499
if (buffer_size_ > 0) {
500
output_->BackUp(buffer_size_);
504
bool CodedOutputStream::WriteRaw(const void* data, int size) {
505
while (buffer_size_ < size) {
506
memcpy(buffer_, data, buffer_size_);
507
size -= buffer_size_;
508
data = reinterpret_cast<const uint8*>(data) + buffer_size_;
509
if (!Refresh()) return false;
512
memcpy(buffer_, data, size);
518
bool CodedOutputStream::WriteLittleEndian32(uint32 value) {
519
uint8 bytes[sizeof(value)];
521
bool use_fast = buffer_size_ >= sizeof(value);
522
uint8* ptr = use_fast ? buffer_ : bytes;
524
ptr[0] = static_cast<uint8>(value );
525
ptr[1] = static_cast<uint8>(value >> 8);
526
ptr[2] = static_cast<uint8>(value >> 16);
527
ptr[3] = static_cast<uint8>(value >> 24);
530
Advance(sizeof(value));
533
return WriteRaw(bytes, sizeof(value));
537
bool CodedOutputStream::WriteLittleEndian64(uint64 value) {
538
uint8 bytes[sizeof(value)];
540
uint32 part0 = static_cast<uint32>(value);
541
uint32 part1 = static_cast<uint32>(value >> 32);
543
bool use_fast = buffer_size_ >= sizeof(value);
544
uint8* ptr = use_fast ? buffer_ : bytes;
546
ptr[0] = static_cast<uint8>(part0 );
547
ptr[1] = static_cast<uint8>(part0 >> 8);
548
ptr[2] = static_cast<uint8>(part0 >> 16);
549
ptr[3] = static_cast<uint8>(part0 >> 24);
550
ptr[4] = static_cast<uint8>(part1 );
551
ptr[5] = static_cast<uint8>(part1 >> 8);
552
ptr[6] = static_cast<uint8>(part1 >> 16);
553
ptr[7] = static_cast<uint8>(part1 >> 24);
556
Advance(sizeof(value));
559
return WriteRaw(bytes, sizeof(value));
563
bool CodedOutputStream::WriteVarint32Fallback(uint32 value) {
564
if (buffer_size_ >= kMaxVarint32Bytes) {
565
// Fast path: We have enough bytes left in the buffer to guarantee that
566
// this write won't cross the end, so we can skip the checks.
567
uint8* target = buffer_;
569
target[0] = static_cast<uint8>(value | 0x80);
570
if (value >= (1 << 7)) {
571
target[1] = static_cast<uint8>((value >> 7) | 0x80);
572
if (value >= (1 << 14)) {
573
target[2] = static_cast<uint8>((value >> 14) | 0x80);
574
if (value >= (1 << 21)) {
575
target[3] = static_cast<uint8>((value >> 21) | 0x80);
576
if (value >= (1 << 28)) {
577
target[4] = static_cast<uint8>(value >> 28);
598
// Slow path: This write might cross the end of the buffer, so we
599
// compose the bytes first then use WriteRaw().
600
uint8 bytes[kMaxVarint32Bytes];
602
while (value > 0x7F) {
603
bytes[size++] = (static_cast<uint8>(value) & 0x7F) | 0x80;
606
bytes[size++] = static_cast<uint8>(value) & 0x7F;
607
return WriteRaw(bytes, size);
611
bool CodedOutputStream::WriteVarint64(uint64 value) {
612
if (buffer_size_ >= kMaxVarintBytes) {
613
// Fast path: We have enough bytes left in the buffer to guarantee that
614
// this write won't cross the end, so we can skip the checks.
615
uint8* target = buffer_;
617
// Splitting into 32-bit pieces gives better performance on 32-bit
619
uint32 part0 = static_cast<uint32>(value );
620
uint32 part1 = static_cast<uint32>(value >> 28);
621
uint32 part2 = static_cast<uint32>(value >> 56);
625
// Here we can't really optimize for small numbers, since the value is
626
// split into three parts. Cheking for numbers < 128, for instance,
627
// would require three comparisons, since you'd have to make sure part1
628
// and part2 are zero. However, if the caller is using 64-bit integers,
629
// it is likely that they expect the numbers to often be very large, so
630
// we probably don't want to optimize for small numbers anyway. Thus,
631
// we end up with a hardcoded binary search tree...
634
if (part0 < (1 << 14)) {
635
if (part0 < (1 << 7)) {
636
size = 1; goto size1;
638
size = 2; goto size2;
641
if (part0 < (1 << 21)) {
642
size = 3; goto size3;
644
size = 4; goto size4;
648
if (part1 < (1 << 14)) {
649
if (part1 < (1 << 7)) {
650
size = 5; goto size5;
652
size = 6; goto size6;
655
if (part1 < (1 << 21)) {
656
size = 7; goto size7;
658
size = 8; goto size8;
663
if (part2 < (1 << 7)) {
664
size = 9; goto size9;
666
size = 10; goto size10;
670
GOOGLE_LOG(FATAL) << "Can't get here.";
672
size10: target[9] = static_cast<uint8>((part2 >> 7) | 0x80);
673
size9 : target[8] = static_cast<uint8>((part2 ) | 0x80);
674
size8 : target[7] = static_cast<uint8>((part1 >> 21) | 0x80);
675
size7 : target[6] = static_cast<uint8>((part1 >> 14) | 0x80);
676
size6 : target[5] = static_cast<uint8>((part1 >> 7) | 0x80);
677
size5 : target[4] = static_cast<uint8>((part1 ) | 0x80);
678
size4 : target[3] = static_cast<uint8>((part0 >> 21) | 0x80);
679
size3 : target[2] = static_cast<uint8>((part0 >> 14) | 0x80);
680
size2 : target[1] = static_cast<uint8>((part0 >> 7) | 0x80);
681
size1 : target[0] = static_cast<uint8>((part0 ) | 0x80);
683
target[size-1] &= 0x7F;
687
// Slow path: This write might cross the end of the buffer, so we
688
// compose the bytes first then use WriteRaw().
689
uint8 bytes[kMaxVarintBytes];
691
while (value > 0x7F) {
692
bytes[size++] = (static_cast<uint8>(value) & 0x7F) | 0x80;
695
bytes[size++] = static_cast<uint8>(value) & 0x7F;
696
return WriteRaw(bytes, size);
700
bool CodedOutputStream::Refresh() {
702
if (output_->Next(&void_buffer, &buffer_size_)) {
703
buffer_ = reinterpret_cast<uint8*>(void_buffer);
704
total_bytes_ += buffer_size_;
713
int CodedOutputStream::VarintSize32Fallback(uint32 value) {
714
if (value < (1 << 7)) {
716
} else if (value < (1 << 14)) {
718
} else if (value < (1 << 21)) {
720
} else if (value < (1 << 28)) {
727
int CodedOutputStream::VarintSize64(uint64 value) {
728
if (value < (1ull << 35)) {
729
if (value < (1ull << 7)) {
731
} else if (value < (1ull << 14)) {
733
} else if (value < (1ull << 21)) {
735
} else if (value < (1ull << 28)) {
741
if (value < (1ull << 42)) {
743
} else if (value < (1ull << 49)) {
745
} else if (value < (1ull << 56)) {
747
} else if (value < (1ull << 63)) {
756
} // namespace protobuf
757
} // namespace google