1
//===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
3
// The LLVM Compiler Infrastructure
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
8
//===----------------------------------------------------------------------===//
10
// This header defines the BitstreamReader class. This class can be used to
11
// read an arbitrary bitstream, regardless of its contents.
13
//===----------------------------------------------------------------------===//
15
#ifndef BITSTREAM_READER_H
16
#define BITSTREAM_READER_H
18
#include "llvm/Bitcode/BitCodes.h"
27
class BitstreamReader {
29
/// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
30
/// These describe abbreviations that all blocks of the specified ID inherit.
33
std::vector<BitCodeAbbrev*> Abbrevs;
36
std::vector<std::pair<unsigned, std::string> > RecordNames;
39
/// FirstChar/LastChar - This remembers the first and last bytes of the
41
const unsigned char *FirstChar, *LastChar;
43
std::vector<BlockInfo> BlockInfoRecords;
45
/// IgnoreBlockInfoNames - This is set to true if we don't care about the
46
/// block/record name information in the BlockInfo block. Only llvm-bcanalyzer
48
bool IgnoreBlockInfoNames;
50
BitstreamReader(const BitstreamReader&); // NOT IMPLEMENTED
51
void operator=(const BitstreamReader&); // NOT IMPLEMENTED
53
BitstreamReader() : FirstChar(0), LastChar(0), IgnoreBlockInfoNames(true) {
56
BitstreamReader(const unsigned char *Start, const unsigned char *End) {
57
IgnoreBlockInfoNames = true;
61
void init(const unsigned char *Start, const unsigned char *End) {
64
assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
68
// Free the BlockInfoRecords.
69
while (!BlockInfoRecords.empty()) {
70
BlockInfo &Info = BlockInfoRecords.back();
71
// Free blockinfo abbrev info.
72
for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size());
74
Info.Abbrevs[i]->dropRef();
75
BlockInfoRecords.pop_back();
79
const unsigned char *getFirstChar() const { return FirstChar; }
80
const unsigned char *getLastChar() const { return LastChar; }
82
/// CollectBlockInfoNames - This is called by clients that want block/record
84
void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }
85
bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; }
87
//===--------------------------------------------------------------------===//
89
//===--------------------------------------------------------------------===//
91
/// hasBlockInfoRecords - Return true if we've already read and processed the
92
/// block info block for this Bitstream. We only process it for the first
93
/// cursor that walks over it.
94
bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); }
96
/// getBlockInfo - If there is block info for the specified ID, return it,
97
/// otherwise return null.
98
const BlockInfo *getBlockInfo(unsigned BlockID) const {
99
// Common case, the most recent entry matches BlockID.
100
if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
101
return &BlockInfoRecords.back();
103
for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
105
if (BlockInfoRecords[i].BlockID == BlockID)
106
return &BlockInfoRecords[i];
110
BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
111
if (const BlockInfo *BI = getBlockInfo(BlockID))
112
return *const_cast<BlockInfo*>(BI);
114
// Otherwise, add a new record.
115
BlockInfoRecords.push_back(BlockInfo());
116
BlockInfoRecords.back().BlockID = BlockID;
117
return BlockInfoRecords.back();
122
class BitstreamCursor {
123
friend class Deserializer;
124
BitstreamReader *BitStream;
125
const unsigned char *NextChar;
127
/// CurWord - This is the current data we have pulled from the stream but have
128
/// not returned to the client.
131
/// BitsInCurWord - This is the number of bits in CurWord that are valid. This
132
/// is always from [0...31] inclusive.
133
unsigned BitsInCurWord;
135
// CurCodeSize - This is the declared size of code values used for the current
137
unsigned CurCodeSize;
139
/// CurAbbrevs - Abbrevs installed at in this block.
140
std::vector<BitCodeAbbrev*> CurAbbrevs;
143
unsigned PrevCodeSize;
144
std::vector<BitCodeAbbrev*> PrevAbbrevs;
145
explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
148
/// BlockScope - This tracks the codesize of parent blocks.
149
SmallVector<Block, 8> BlockScope;
152
BitstreamCursor() : BitStream(0), NextChar(0) {
154
BitstreamCursor(const BitstreamCursor &RHS) : BitStream(0), NextChar(0) {
158
explicit BitstreamCursor(BitstreamReader &R) : BitStream(&R) {
159
NextChar = R.getFirstChar();
160
assert(NextChar && "Bitstream not initialized yet");
166
void init(BitstreamReader &R) {
170
NextChar = R.getFirstChar();
171
assert(NextChar && "Bitstream not initialized yet");
181
void operator=(const BitstreamCursor &RHS) {
184
BitStream = RHS.BitStream;
185
NextChar = RHS.NextChar;
186
CurWord = RHS.CurWord;
187
BitsInCurWord = RHS.BitsInCurWord;
188
CurCodeSize = RHS.CurCodeSize;
190
// Copy abbreviations, and bump ref counts.
191
CurAbbrevs = RHS.CurAbbrevs;
192
for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
194
CurAbbrevs[i]->addRef();
196
// Copy block scope and bump ref counts.
197
for (unsigned S = 0, e = static_cast<unsigned>(BlockScope.size());
199
std::vector<BitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs;
200
for (unsigned i = 0, e = static_cast<unsigned>(Abbrevs.size());
202
Abbrevs[i]->addRef();
207
// Free all the Abbrevs.
208
for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
210
CurAbbrevs[i]->dropRef();
213
// Free all the Abbrevs in the block scope.
214
for (unsigned S = 0, e = static_cast<unsigned>(BlockScope.size());
216
std::vector<BitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs;
217
for (unsigned i = 0, e = static_cast<unsigned>(Abbrevs.size());
219
Abbrevs[i]->dropRef();
224
/// GetAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
225
unsigned GetAbbrevIDWidth() const { return CurCodeSize; }
227
bool AtEndOfStream() const {
228
return NextChar == BitStream->getLastChar() && BitsInCurWord == 0;
231
/// GetCurrentBitNo - Return the bit # of the bit we are reading.
232
uint64_t GetCurrentBitNo() const {
233
return (NextChar-BitStream->getFirstChar())*CHAR_BIT - BitsInCurWord;
236
BitstreamReader *getBitStreamReader() {
239
const BitstreamReader *getBitStreamReader() const {
244
/// JumpToBit - Reset the stream to the specified bit number.
245
void JumpToBit(uint64_t BitNo) {
246
uintptr_t ByteNo = uintptr_t(BitNo/8) & ~3;
247
uintptr_t WordBitNo = uintptr_t(BitNo) & 31;
248
assert(ByteNo <= (uintptr_t)(BitStream->getLastChar()-
249
BitStream->getFirstChar()) &&
252
// Move the cursor to the right word.
253
NextChar = BitStream->getFirstChar()+ByteNo;
257
// Skip over any bits that are already consumed.
259
Read(static_cast<unsigned>(WordBitNo));
263
uint32_t Read(unsigned NumBits) {
264
assert(NumBits <= 32 && "Cannot return more than 32 bits!");
265
// If the field is fully contained by CurWord, return it quickly.
266
if (BitsInCurWord >= NumBits) {
267
uint32_t R = CurWord & ((1U << NumBits)-1);
269
BitsInCurWord -= NumBits;
273
// If we run out of data, stop at the end of the stream.
274
if (NextChar == BitStream->getLastChar()) {
280
unsigned R = CurWord;
282
// Read the next word from the stream.
283
CurWord = (NextChar[0] << 0) | (NextChar[1] << 8) |
284
(NextChar[2] << 16) | (NextChar[3] << 24);
287
// Extract NumBits-BitsInCurWord from what we just read.
288
unsigned BitsLeft = NumBits-BitsInCurWord;
290
// Be careful here, BitsLeft is in the range [1..32] inclusive.
291
R |= (CurWord & (~0U >> (32-BitsLeft))) << BitsInCurWord;
293
// BitsLeft bits have just been used up from CurWord.
295
CurWord >>= BitsLeft;
298
BitsInCurWord = 32-BitsLeft;
302
uint64_t Read64(unsigned NumBits) {
303
if (NumBits <= 32) return Read(NumBits);
305
uint64_t V = Read(32);
306
return V | (uint64_t)Read(NumBits-32) << 32;
309
uint32_t ReadVBR(unsigned NumBits) {
310
uint32_t Piece = Read(NumBits);
311
if ((Piece & (1U << (NumBits-1))) == 0)
315
unsigned NextBit = 0;
317
Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
319
if ((Piece & (1U << (NumBits-1))) == 0)
322
NextBit += NumBits-1;
323
Piece = Read(NumBits);
327
// ReadVBR64 - Read a VBR that may have a value up to 64-bits in size. The
328
// chunk size of the VBR must still be <= 32 bits though.
329
uint64_t ReadVBR64(unsigned NumBits) {
330
uint32_t Piece = Read(NumBits);
331
if ((Piece & (1U << (NumBits-1))) == 0)
332
return uint64_t(Piece);
335
unsigned NextBit = 0;
337
Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
339
if ((Piece & (1U << (NumBits-1))) == 0)
342
NextBit += NumBits-1;
343
Piece = Read(NumBits);
352
unsigned ReadCode() {
353
return Read(CurCodeSize);
358
// [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
360
/// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for
362
unsigned ReadSubBlockID() {
363
return ReadVBR(bitc::BlockIDWidth);
366
/// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip
367
/// over the body of this block. If the block record is malformed, return
370
// Read and ignore the codelen value. Since we are skipping this block, we
371
// don't care what code widths are used inside of it.
372
ReadVBR(bitc::CodeLenWidth);
374
unsigned NumWords = Read(bitc::BlockSizeWidth);
376
// Check that the block wasn't partially defined, and that the offset isn't
378
if (AtEndOfStream() || NextChar+NumWords*4 > BitStream->getLastChar())
381
NextChar += NumWords*4;
385
/// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
386
/// the block, and return true if the block is valid.
387
bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = 0) {
388
// Save the current block's state on BlockScope.
389
BlockScope.push_back(Block(CurCodeSize));
390
BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
392
// Add the abbrevs specific to this block to the CurAbbrevs list.
393
if (const BitstreamReader::BlockInfo *Info =
394
BitStream->getBlockInfo(BlockID)) {
395
for (unsigned i = 0, e = static_cast<unsigned>(Info->Abbrevs.size());
397
CurAbbrevs.push_back(Info->Abbrevs[i]);
398
CurAbbrevs.back()->addRef();
402
// Get the codesize of this block.
403
CurCodeSize = ReadVBR(bitc::CodeLenWidth);
405
unsigned NumWords = Read(bitc::BlockSizeWidth);
406
if (NumWordsP) *NumWordsP = NumWords;
408
// Validate that this block is sane.
409
if (CurCodeSize == 0 || AtEndOfStream() ||
410
NextChar+NumWords*4 > BitStream->getLastChar())
416
bool ReadBlockEnd() {
417
if (BlockScope.empty()) return true;
420
// [END_BLOCK, <align4bytes>]
428
void PopBlockScope() {
429
CurCodeSize = BlockScope.back().PrevCodeSize;
431
// Delete abbrevs from popped scope.
432
for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
434
CurAbbrevs[i]->dropRef();
436
BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
437
BlockScope.pop_back();
440
//===--------------------------------------------------------------------===//
442
//===--------------------------------------------------------------------===//
445
void ReadAbbreviatedLiteral(const BitCodeAbbrevOp &Op,
446
SmallVectorImpl<uint64_t> &Vals) {
447
assert(Op.isLiteral() && "Not a literal");
448
// If the abbrev specifies the literal value to use, use it.
449
Vals.push_back(Op.getLiteralValue());
452
void ReadAbbreviatedField(const BitCodeAbbrevOp &Op,
453
SmallVectorImpl<uint64_t> &Vals) {
454
assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!");
456
// Decode the value as we are commanded.
457
switch (Op.getEncoding()) {
458
default: assert(0 && "Unknown encoding!");
459
case BitCodeAbbrevOp::Fixed:
460
Vals.push_back(Read((unsigned)Op.getEncodingData()));
462
case BitCodeAbbrevOp::VBR:
463
Vals.push_back(ReadVBR64((unsigned)Op.getEncodingData()));
465
case BitCodeAbbrevOp::Char6:
466
Vals.push_back(BitCodeAbbrevOp::DecodeChar6(Read(6)));
472
/// getAbbrev - Return the abbreviation for the specified AbbrevId.
473
const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
474
unsigned AbbrevNo = AbbrevID-bitc::FIRST_APPLICATION_ABBREV;
475
assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
476
return CurAbbrevs[AbbrevNo];
479
unsigned ReadRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
480
const char **BlobStart = 0, unsigned *BlobLen = 0) {
481
if (AbbrevID == bitc::UNABBREV_RECORD) {
482
unsigned Code = ReadVBR(6);
483
unsigned NumElts = ReadVBR(6);
484
for (unsigned i = 0; i != NumElts; ++i)
485
Vals.push_back(ReadVBR64(6));
489
const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
491
for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) {
492
const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
493
if (Op.isLiteral()) {
494
ReadAbbreviatedLiteral(Op, Vals);
495
} else if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
496
// Array case. Read the number of elements as a vbr6.
497
unsigned NumElts = ReadVBR(6);
499
// Get the element encoding.
500
assert(i+2 == e && "array op not second to last?");
501
const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
503
// Read all the elements.
504
for (; NumElts; --NumElts)
505
ReadAbbreviatedField(EltEnc, Vals);
506
} else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) {
507
// Blob case. Read the number of bytes as a vbr6.
508
unsigned NumElts = ReadVBR(6);
509
SkipToWord(); // 32-bit alignment
511
// Figure out where the end of this blob will be including tail padding.
512
const unsigned char *NewEnd = NextChar+((NumElts+3)&~3);
514
// If this would read off the end of the bitcode file, just set the
515
// record to empty and return.
516
if (NewEnd > BitStream->getLastChar()) {
517
Vals.append(NumElts, 0);
518
NextChar = BitStream->getLastChar();
522
// Otherwise, read the number of bytes. If we can return a reference to
523
// the data, do so to avoid copying it.
525
*BlobStart = (const char*)NextChar;
528
for (; NumElts; ++NextChar, --NumElts)
529
Vals.push_back(*NextChar);
531
// Skip over tail padding.
534
ReadAbbreviatedField(Op, Vals);
538
unsigned Code = (unsigned)Vals[0];
539
Vals.erase(Vals.begin());
543
unsigned ReadRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
544
const char *&BlobStart, unsigned &BlobLen) {
545
return ReadRecord(AbbrevID, Vals, &BlobStart, &BlobLen);
549
//===--------------------------------------------------------------------===//
551
//===--------------------------------------------------------------------===//
553
void ReadAbbrevRecord() {
554
BitCodeAbbrev *Abbv = new BitCodeAbbrev();
555
unsigned NumOpInfo = ReadVBR(5);
556
for (unsigned i = 0; i != NumOpInfo; ++i) {
557
bool IsLiteral = Read(1) ? true : false;
559
Abbv->Add(BitCodeAbbrevOp(ReadVBR64(8)));
563
BitCodeAbbrevOp::Encoding E = (BitCodeAbbrevOp::Encoding)Read(3);
564
if (BitCodeAbbrevOp::hasEncodingData(E))
565
Abbv->Add(BitCodeAbbrevOp(E, ReadVBR64(5)));
567
Abbv->Add(BitCodeAbbrevOp(E));
569
CurAbbrevs.push_back(Abbv);
574
bool ReadBlockInfoBlock() {
575
// If this is the second stream to get to the block info block, skip it.
576
if (BitStream->hasBlockInfoRecords())
579
if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return true;
581
SmallVector<uint64_t, 64> Record;
582
BitstreamReader::BlockInfo *CurBlockInfo = 0;
584
// Read all the records for this module.
586
unsigned Code = ReadCode();
587
if (Code == bitc::END_BLOCK)
588
return ReadBlockEnd();
589
if (Code == bitc::ENTER_SUBBLOCK) {
591
if (SkipBlock()) return true;
595
// Read abbrev records, associate them with CurBID.
596
if (Code == bitc::DEFINE_ABBREV) {
597
if (!CurBlockInfo) return true;
600
// ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the
601
// appropriate BlockInfo.
602
BitCodeAbbrev *Abbv = CurAbbrevs.back();
603
CurAbbrevs.pop_back();
604
CurBlockInfo->Abbrevs.push_back(Abbv);
610
switch (ReadRecord(Code, Record)) {
611
default: break; // Default behavior, ignore unknown content.
612
case bitc::BLOCKINFO_CODE_SETBID:
613
if (Record.size() < 1) return true;
614
CurBlockInfo = &BitStream->getOrCreateBlockInfo((unsigned)Record[0]);
616
case bitc::BLOCKINFO_CODE_BLOCKNAME: {
617
if (!CurBlockInfo) return true;
618
if (BitStream->isIgnoringBlockInfoNames()) break; // Ignore name.
620
for (unsigned i = 0, e = Record.size(); i != e; ++i)
621
Name += (char)Record[i];
622
CurBlockInfo->Name = Name;
625
case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
626
if (!CurBlockInfo) return true;
627
if (BitStream->isIgnoringBlockInfoNames()) break; // Ignore name.
629
for (unsigned i = 1, e = Record.size(); i != e; ++i)
630
Name += (char)Record[i];
631
CurBlockInfo->RecordNames.push_back(std::make_pair((unsigned)Record[0],
640
} // End llvm namespace