1
//===- FileCheck.cpp - Check that File's Contents match what is expected --===//
3
// The LLVM Compiler Infrastructure
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
8
//===----------------------------------------------------------------------===//
10
// FileCheck does a line-by line check of a file that validates whether it
11
// contains the expected content. This is useful for regression tests etc.
13
// This program exits with an error status of 2 on error, exit status of 0 if
14
// the file matched the expected contents, and exit status of 1 if it did not
15
// contain the expected contents.
17
//===----------------------------------------------------------------------===//
19
#include "llvm/Support/CommandLine.h"
20
#include "llvm/Support/MemoryBuffer.h"
21
#include "llvm/Support/PrettyStackTrace.h"
22
#include "llvm/Support/Regex.h"
23
#include "llvm/Support/SourceMgr.h"
24
#include "llvm/Support/raw_ostream.h"
25
#include "llvm/System/Signals.h"
26
#include "llvm/ADT/SmallString.h"
27
#include "llvm/ADT/StringMap.h"
31
static cl::opt<std::string>
32
CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
34
static cl::opt<std::string>
35
InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
36
cl::init("-"), cl::value_desc("filename"));
38
static cl::opt<std::string>
39
CheckPrefix("check-prefix", cl::init("CHECK"),
40
cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
43
NoCanonicalizeWhiteSpace("strict-whitespace",
44
cl::desc("Do not treat all horizontal whitespace as equivalent"));
46
//===----------------------------------------------------------------------===//
47
// Pattern Handling Code.
48
//===----------------------------------------------------------------------===//
53
/// FixedStr - If non-empty, this pattern is a fixed string match with the
54
/// specified fixed string.
57
/// RegEx - If non-empty, this is a regex pattern.
60
/// VariableUses - Entries in this vector map to uses of a variable in the
61
/// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain
62
/// "foobaz" and we'll get an entry in this vector that tells us to insert the
63
/// value of bar at offset 3.
64
std::vector<std::pair<StringRef, unsigned> > VariableUses;
66
/// VariableDefs - Entries in this vector map to definitions of a variable in
67
/// the pattern, e.g. "foo[[bar:.*]]baz". In this case, the RegExStr will
68
/// contain "foo(.*)baz" and VariableDefs will contain the pair "bar",1. The
69
/// index indicates what parenthesized value captures the variable value.
70
std::vector<std::pair<StringRef, unsigned> > VariableDefs;
76
bool ParsePattern(StringRef PatternStr, SourceMgr &SM);
78
/// Match - Match the pattern string against the input buffer Buffer. This
79
/// returns the position that is matched or npos if there is no match. If
80
/// there is a match, the size of the matched string is returned in MatchLen.
82
/// The VariableTable StringMap provides the current values of filecheck
83
/// variables and is updated if this match defines new values.
84
size_t Match(StringRef Buffer, size_t &MatchLen,
85
StringMap<StringRef> &VariableTable) const;
87
/// PrintFailureInfo - Print additional information about a failure to match
88
/// involving this pattern.
89
void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
90
const StringMap<StringRef> &VariableTable) const;
93
static void AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr);
94
bool AddRegExToRegEx(StringRef RegExStr, unsigned &CurParen, SourceMgr &SM);
96
/// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
97
/// matching this pattern at the start of \arg Buffer; a distance of zero
98
/// should correspond to a perfect match.
99
unsigned ComputeMatchDistance(StringRef Buffer,
100
const StringMap<StringRef> &VariableTable) const;
104
bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
105
PatternLoc = SMLoc::getFromPointer(PatternStr.data());
107
// Ignore trailing whitespace.
108
while (!PatternStr.empty() &&
109
(PatternStr.back() == ' ' || PatternStr.back() == '\t'))
110
PatternStr = PatternStr.substr(0, PatternStr.size()-1);
112
// Check that there is something on the line.
113
if (PatternStr.empty()) {
114
SM.PrintMessage(PatternLoc, "found empty check string with prefix '" +
115
CheckPrefix+":'", "error");
119
// Check to see if this is a fixed string, or if it has regex pieces.
120
if (PatternStr.size() < 2 ||
121
(PatternStr.find("{{") == StringRef::npos &&
122
PatternStr.find("[[") == StringRef::npos)) {
123
FixedStr = PatternStr;
127
// Paren value #0 is for the fully matched string. Any new parenthesized
128
// values add from their.
129
unsigned CurParen = 1;
131
// Otherwise, there is at least one regex piece. Build up the regex pattern
132
// by escaping scary characters in fixed strings, building up one big regex.
133
while (!PatternStr.empty()) {
135
if (PatternStr.size() >= 2 &&
136
PatternStr[0] == '{' && PatternStr[1] == '{') {
138
// Otherwise, this is the start of a regex match. Scan for the }}.
139
size_t End = PatternStr.find("}}");
140
if (End == StringRef::npos) {
141
SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
142
"found start of regex string with no end '}}'", "error");
146
if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
148
PatternStr = PatternStr.substr(End+2);
152
// Named RegEx matches. These are of two forms: [[foo:.*]] which matches .*
153
// (or some other regex) and assigns it to the FileCheck variable 'foo'. The
154
// second form is [[foo]] which is a reference to foo. The variable name
155
// itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
156
// it. This is to catch some common errors.
157
if (PatternStr.size() >= 2 &&
158
PatternStr[0] == '[' && PatternStr[1] == '[') {
159
// Verify that it is terminated properly.
160
size_t End = PatternStr.find("]]");
161
if (End == StringRef::npos) {
162
SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
163
"invalid named regex reference, no ]] found", "error");
167
StringRef MatchStr = PatternStr.substr(2, End-2);
168
PatternStr = PatternStr.substr(End+2);
170
// Get the regex name (e.g. "foo").
171
size_t NameEnd = MatchStr.find(':');
172
StringRef Name = MatchStr.substr(0, NameEnd);
175
SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
176
"invalid name in named regex: empty name", "error");
180
// Verify that the name is well formed.
181
for (unsigned i = 0, e = Name.size(); i != e; ++i)
182
if (Name[i] != '_' &&
183
(Name[i] < 'a' || Name[i] > 'z') &&
184
(Name[i] < 'A' || Name[i] > 'Z') &&
185
(Name[i] < '0' || Name[i] > '9')) {
186
SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
187
"invalid name in named regex", "error");
191
// Name can't start with a digit.
192
if (isdigit(Name[0])) {
193
SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
194
"invalid name in named regex", "error");
199
if (NameEnd == StringRef::npos) {
200
VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
204
// Handle [[foo:.*]].
205
VariableDefs.push_back(std::make_pair(Name, CurParen));
209
if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
215
// Handle fixed string matches.
216
// Find the end, which is the start of the next regex.
217
size_t FixedMatchEnd = PatternStr.find("{{");
218
FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
219
AddFixedStringToRegEx(PatternStr.substr(0, FixedMatchEnd), RegExStr);
220
PatternStr = PatternStr.substr(FixedMatchEnd);
227
void Pattern::AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr) {
228
// Add the characters from FixedStr to the regex, escaping as needed. This
229
// avoids "leaning toothpicks" in common patterns.
230
for (unsigned i = 0, e = FixedStr.size(); i != e; ++i) {
231
switch (FixedStr[i]) {
232
// These are the special characters matched in "p_ere_exp".
248
TheStr += FixedStr[i];
254
bool Pattern::AddRegExToRegEx(StringRef RegexStr, unsigned &CurParen,
258
if (!R.isValid(Error)) {
259
SM.PrintMessage(SMLoc::getFromPointer(RegexStr.data()),
260
"invalid regex: " + Error, "error");
264
RegExStr += RegexStr.str();
265
CurParen += R.getNumMatches();
269
/// Match - Match the pattern string against the input buffer Buffer. This
270
/// returns the position that is matched or npos if there is no match. If
271
/// there is a match, the size of the matched string is returned in MatchLen.
272
size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
273
StringMap<StringRef> &VariableTable) const {
274
// If this is a fixed string pattern, just match it now.
275
if (!FixedStr.empty()) {
276
MatchLen = FixedStr.size();
277
return Buffer.find(FixedStr);
282
// If there are variable uses, we need to create a temporary string with the
284
StringRef RegExToMatch = RegExStr;
286
if (!VariableUses.empty()) {
289
unsigned InsertOffset = 0;
290
for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
291
StringMap<StringRef>::iterator it =
292
VariableTable.find(VariableUses[i].first);
293
// If the variable is undefined, return an error.
294
if (it == VariableTable.end())
295
return StringRef::npos;
297
// Look up the value and escape it so that we can plop it into the regex.
299
AddFixedStringToRegEx(it->second, Value);
301
// Plop it into the regex at the adjusted offset.
302
TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
303
Value.begin(), Value.end());
304
InsertOffset += Value.size();
307
// Match the newly constructed regex.
308
RegExToMatch = TmpStr;
312
SmallVector<StringRef, 4> MatchInfo;
313
if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
314
return StringRef::npos;
316
// Successful regex match.
317
assert(!MatchInfo.empty() && "Didn't get any match");
318
StringRef FullMatch = MatchInfo[0];
320
// If this defines any variables, remember their values.
321
for (unsigned i = 0, e = VariableDefs.size(); i != e; ++i) {
322
assert(VariableDefs[i].second < MatchInfo.size() &&
323
"Internal paren error");
324
VariableTable[VariableDefs[i].first] = MatchInfo[VariableDefs[i].second];
327
MatchLen = FullMatch.size();
328
return FullMatch.data()-Buffer.data();
331
unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
332
const StringMap<StringRef> &VariableTable) const {
333
// Just compute the number of matching characters. For regular expressions, we
334
// just compare against the regex itself and hope for the best.
336
// FIXME: One easy improvement here is have the regex lib generate a single
337
// example regular expression which matches, and use that as the example
339
StringRef ExampleString(FixedStr);
340
if (ExampleString.empty())
341
ExampleString = RegExStr;
343
// Only compare up to the first line in the buffer, or the string size.
344
StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
345
BufferPrefix = BufferPrefix.split('\n').first;
346
return BufferPrefix.edit_distance(ExampleString);
349
void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
350
const StringMap<StringRef> &VariableTable) const{
351
// If this was a regular expression using variables, print the current
353
if (!VariableUses.empty()) {
354
for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
355
StringRef Var = VariableUses[i].first;
356
StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
357
SmallString<256> Msg;
358
raw_svector_ostream OS(Msg);
360
// Check for undefined variable references.
361
if (it == VariableTable.end()) {
362
OS << "uses undefined variable \"";
363
OS.write_escaped(Var) << "\"";;
365
OS << "with variable \"";
366
OS.write_escaped(Var) << "\" equal to \"";
367
OS.write_escaped(it->second) << "\"";
370
SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), OS.str(), "note",
375
// Attempt to find the closest/best fuzzy match. Usually an error happens
376
// because some string in the output didn't exactly match. In these cases, we
377
// would like to show the user a best guess at what "should have" matched, to
378
// save them having to actually check the input manually.
379
size_t NumLinesForward = 0;
380
size_t Best = StringRef::npos;
381
double BestQuality = 0;
383
// Use an arbitrary 4k limit on how far we will search.
384
for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
385
if (Buffer[i] == '\n')
388
// Patterns have leading whitespace stripped, so skip whitespace when
389
// looking for something which looks like a pattern.
390
if (Buffer[i] == ' ' || Buffer[i] == '\t')
393
// Compute the "quality" of this match as an arbitrary combination of the
394
// match distance and the number of lines skipped to get to this match.
395
unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
396
double Quality = Distance + (NumLinesForward / 100.);
398
if (Quality < BestQuality || Best == StringRef::npos) {
400
BestQuality = Quality;
404
if (Best != StringRef::npos && BestQuality < 50) {
405
// Print the "possible intended match here" line if we found something
407
SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
408
"possible intended match here", "note");
410
// FIXME: If we wanted to be really friendly we would show why the match
411
// failed, as it can be hard to spot simple one character differences.
415
//===----------------------------------------------------------------------===//
417
//===----------------------------------------------------------------------===//
419
/// CheckString - This is a check that we found in the input file.
421
/// Pat - The pattern to match.
424
/// Loc - The location in the match file that the check string was specified.
427
/// IsCheckNext - This is true if this is a CHECK-NEXT: directive (as opposed
428
/// to a CHECK: directive.
431
/// NotStrings - These are all of the strings that are disallowed from
432
/// occurring between this match string and the previous one (or start of
434
std::vector<std::pair<SMLoc, Pattern> > NotStrings;
436
CheckString(const Pattern &P, SMLoc L, bool isCheckNext)
437
: Pat(P), Loc(L), IsCheckNext(isCheckNext) {}
440
/// CanonicalizeInputFile - Remove duplicate horizontal space from the specified
441
/// memory buffer, free it, and return a new one.
442
static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) {
443
SmallVector<char, 16> NewFile;
444
NewFile.reserve(MB->getBufferSize());
446
for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
448
// If C is not a horizontal whitespace, skip it.
449
if (*Ptr != ' ' && *Ptr != '\t') {
450
NewFile.push_back(*Ptr);
454
// Otherwise, add one space and advance over neighboring space.
455
NewFile.push_back(' ');
456
while (Ptr+1 != End &&
457
(Ptr[1] == ' ' || Ptr[1] == '\t'))
461
// Free the old buffer and return a new one.
463
MemoryBuffer::getMemBufferCopy(NewFile.data(),
464
NewFile.data() + NewFile.size(),
465
MB->getBufferIdentifier());
472
/// ReadCheckFile - Read the check file, which specifies the sequence of
473
/// expected strings. The strings are added to the CheckStrings vector.
474
static bool ReadCheckFile(SourceMgr &SM,
475
std::vector<CheckString> &CheckStrings) {
476
// Open the check file, and tell SourceMgr about it.
477
std::string ErrorStr;
479
MemoryBuffer::getFileOrSTDIN(CheckFilename.c_str(), &ErrorStr);
481
errs() << "Could not open check file '" << CheckFilename << "': "
486
// If we want to canonicalize whitespace, strip excess whitespace from the
487
// buffer containing the CHECK lines.
488
if (!NoCanonicalizeWhiteSpace)
489
F = CanonicalizeInputFile(F);
491
SM.AddNewSourceBuffer(F, SMLoc());
493
// Find all instances of CheckPrefix followed by : in the file.
494
StringRef Buffer = F->getBuffer();
496
std::vector<std::pair<SMLoc, Pattern> > NotMatches;
499
// See if Prefix occurs in the memory buffer.
500
Buffer = Buffer.substr(Buffer.find(CheckPrefix));
502
// If we didn't find a match, we're done.
506
const char *CheckPrefixStart = Buffer.data();
508
// When we find a check prefix, keep track of whether we find CHECK: or
510
bool IsCheckNext = false, IsCheckNot = false;
512
// Verify that the : is present after the prefix.
513
if (Buffer[CheckPrefix.size()] == ':') {
514
Buffer = Buffer.substr(CheckPrefix.size()+1);
515
} else if (Buffer.size() > CheckPrefix.size()+6 &&
516
memcmp(Buffer.data()+CheckPrefix.size(), "-NEXT:", 6) == 0) {
517
Buffer = Buffer.substr(CheckPrefix.size()+7);
519
} else if (Buffer.size() > CheckPrefix.size()+5 &&
520
memcmp(Buffer.data()+CheckPrefix.size(), "-NOT:", 5) == 0) {
521
Buffer = Buffer.substr(CheckPrefix.size()+6);
524
Buffer = Buffer.substr(1);
528
// Okay, we found the prefix, yay. Remember the rest of the line, but
529
// ignore leading and trailing whitespace.
530
Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
532
// Scan ahead to the end of line.
533
size_t EOL = Buffer.find_first_of("\n\r");
535
// Remember the location of the start of the pattern, for diagnostics.
536
SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
538
// Parse the pattern.
540
if (P.ParsePattern(Buffer.substr(0, EOL), SM))
543
Buffer = Buffer.substr(EOL);
546
// Verify that CHECK-NEXT lines have at least one CHECK line before them.
547
if (IsCheckNext && CheckStrings.empty()) {
548
SM.PrintMessage(SMLoc::getFromPointer(CheckPrefixStart),
549
"found '"+CheckPrefix+"-NEXT:' without previous '"+
550
CheckPrefix+ ": line", "error");
556
NotMatches.push_back(std::make_pair(SMLoc::getFromPointer(Buffer.data()),
562
// Okay, add the string we captured to the output vector and move on.
563
CheckStrings.push_back(CheckString(P,
566
std::swap(NotMatches, CheckStrings.back().NotStrings);
569
if (CheckStrings.empty()) {
570
errs() << "error: no check strings found with prefix '" << CheckPrefix
575
if (!NotMatches.empty()) {
576
errs() << "error: '" << CheckPrefix
577
<< "-NOT:' not supported after last check line.\n";
584
static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
586
StringMap<StringRef> &VariableTable) {
587
// Otherwise, we have an error, emit an error message.
588
SM.PrintMessage(CheckStr.Loc, "expected string not found in input",
591
// Print the "scanning from here" line. If the current position is at the
592
// end of a line, advance to the start of the next line.
593
Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
595
SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), "scanning from here",
598
// Allow the pattern to print additional information if desired.
599
CheckStr.Pat.PrintFailureInfo(SM, Buffer, VariableTable);
602
/// CountNumNewlinesBetween - Count the number of newlines in the specified
604
static unsigned CountNumNewlinesBetween(StringRef Range) {
605
unsigned NumNewLines = 0;
608
Range = Range.substr(Range.find_first_of("\n\r"));
609
if (Range.empty()) return NumNewLines;
613
// Handle \n\r and \r\n as a single newline.
614
if (Range.size() > 1 &&
615
(Range[1] == '\n' || Range[1] == '\r') &&
616
(Range[0] != Range[1]))
617
Range = Range.substr(1);
618
Range = Range.substr(1);
622
int main(int argc, char **argv) {
623
sys::PrintStackTraceOnErrorSignal();
624
PrettyStackTraceProgram X(argc, argv);
625
cl::ParseCommandLineOptions(argc, argv);
629
// Read the expected strings from the check file.
630
std::vector<CheckString> CheckStrings;
631
if (ReadCheckFile(SM, CheckStrings))
634
// Open the file to check and add it to SourceMgr.
635
std::string ErrorStr;
637
MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), &ErrorStr);
639
errs() << "Could not open input file '" << InputFilename << "': "
644
// Remove duplicate spaces in the input file if requested.
645
if (!NoCanonicalizeWhiteSpace)
646
F = CanonicalizeInputFile(F);
648
SM.AddNewSourceBuffer(F, SMLoc());
650
/// VariableTable - This holds all the current filecheck variables.
651
StringMap<StringRef> VariableTable;
653
// Check that we have all of the expected strings, in order, in the input
655
StringRef Buffer = F->getBuffer();
657
const char *LastMatch = Buffer.data();
659
for (unsigned StrNo = 0, e = CheckStrings.size(); StrNo != e; ++StrNo) {
660
const CheckString &CheckStr = CheckStrings[StrNo];
662
StringRef SearchFrom = Buffer;
664
// Find StrNo in the file.
666
Buffer = Buffer.substr(CheckStr.Pat.Match(Buffer, MatchLen, VariableTable));
668
// If we didn't find a match, reject the input.
669
if (Buffer.empty()) {
670
PrintCheckFailed(SM, CheckStr, SearchFrom, VariableTable);
674
StringRef SkippedRegion(LastMatch, Buffer.data()-LastMatch);
676
// If this check is a "CHECK-NEXT", verify that the previous match was on
677
// the previous line (i.e. that there is one newline between them).
678
if (CheckStr.IsCheckNext) {
679
// Count the number of newlines between the previous match and this one.
680
assert(LastMatch != F->getBufferStart() &&
681
"CHECK-NEXT can't be the first check in a file");
683
unsigned NumNewLines = CountNumNewlinesBetween(SkippedRegion);
684
if (NumNewLines == 0) {
685
SM.PrintMessage(CheckStr.Loc,
686
CheckPrefix+"-NEXT: is on the same line as previous match",
688
SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
689
"'next' match was here", "note");
690
SM.PrintMessage(SMLoc::getFromPointer(LastMatch),
691
"previous match was here", "note");
695
if (NumNewLines != 1) {
696
SM.PrintMessage(CheckStr.Loc,
698
"-NEXT: is not on the line after the previous match",
700
SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
701
"'next' match was here", "note");
702
SM.PrintMessage(SMLoc::getFromPointer(LastMatch),
703
"previous match was here", "note");
708
// If this match had "not strings", verify that they don't exist in the
710
for (unsigned ChunkNo = 0, e = CheckStr.NotStrings.size();
711
ChunkNo != e; ++ChunkNo) {
713
size_t Pos = CheckStr.NotStrings[ChunkNo].second.Match(SkippedRegion,
716
if (Pos == StringRef::npos) continue;
718
SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos),
719
CheckPrefix+"-NOT: string occurred!", "error");
720
SM.PrintMessage(CheckStr.NotStrings[ChunkNo].first,
721
CheckPrefix+"-NOT: pattern specified here", "note");
726
// Otherwise, everything is good. Step over the matched text and remember
727
// the position after the match as the end of the last match.
728
Buffer = Buffer.substr(MatchLen);
729
LastMatch = Buffer.data();