1
//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3
// The LLVM Compiler Infrastructure
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
8
//===----------------------------------------------------------------------===//
10
#include "llvm/Target/TargetAsmParser.h"
12
#include "llvm/ADT/SmallVector.h"
13
#include "llvm/ADT/StringSwitch.h"
14
#include "llvm/ADT/Twine.h"
15
#include "llvm/MC/MCStreamer.h"
16
#include "llvm/MC/MCExpr.h"
17
#include "llvm/MC/MCInst.h"
18
#include "llvm/MC/MCParser/MCAsmLexer.h"
19
#include "llvm/MC/MCParser/MCAsmParser.h"
20
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
21
#include "llvm/Support/SourceMgr.h"
22
#include "llvm/Target/TargetRegistry.h"
23
#include "llvm/Target/TargetAsmParser.h"
29
class X86ATTAsmParser : public TargetAsmParser {
33
MCAsmParser &getParser() const { return Parser; }
35
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
37
void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
39
bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
41
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
43
X86Operand *ParseOperand();
44
X86Operand *ParseMemOperand();
46
bool ParseDirectiveWord(unsigned Size, SMLoc L);
48
/// @name Auto-generated Match Functions
51
bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
57
X86ATTAsmParser(const Target &T, MCAsmParser &_Parser)
58
: TargetAsmParser(T), Parser(_Parser) {}
60
virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc,
61
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
63
virtual bool ParseDirective(AsmToken DirectiveID);
66
} // end anonymous namespace
68
/// @name Auto-generated Match Functions
71
static unsigned MatchRegisterName(StringRef Name);
77
/// X86Operand - Instances of this class represent a parsed X86 machine
79
struct X86Operand : public MCParsedAsmOperand {
87
SMLoc StartLoc, EndLoc;
112
X86Operand(KindTy K, SMLoc Start, SMLoc End)
113
: Kind(K), StartLoc(Start), EndLoc(End) {}
115
/// getStartLoc - Get the location of the first token of this operand.
116
SMLoc getStartLoc() const { return StartLoc; }
117
/// getEndLoc - Get the location of the last token of this operand.
118
SMLoc getEndLoc() const { return EndLoc; }
120
StringRef getToken() const {
121
assert(Kind == Token && "Invalid access!");
122
return StringRef(Tok.Data, Tok.Length);
125
unsigned getReg() const {
126
assert(Kind == Register && "Invalid access!");
130
const MCExpr *getImm() const {
131
assert(Kind == Immediate && "Invalid access!");
135
const MCExpr *getMemDisp() const {
136
assert(Kind == Memory && "Invalid access!");
139
unsigned getMemSegReg() const {
140
assert(Kind == Memory && "Invalid access!");
143
unsigned getMemBaseReg() const {
144
assert(Kind == Memory && "Invalid access!");
147
unsigned getMemIndexReg() const {
148
assert(Kind == Memory && "Invalid access!");
151
unsigned getMemScale() const {
152
assert(Kind == Memory && "Invalid access!");
156
bool isToken() const {return Kind == Token; }
158
bool isImm() const { return Kind == Immediate; }
160
bool isImmSExt8() const {
161
// Accept immediates which fit in 8 bits when sign extended, and
162
// non-absolute immediates.
166
if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) {
167
int64_t Value = CE->getValue();
168
return Value == (int64_t) (int8_t) Value;
174
bool isMem() const { return Kind == Memory; }
176
bool isAbsMem() const {
177
return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
178
!getMemIndexReg() && getMemScale() == 1;
181
bool isNoSegMem() const {
182
return Kind == Memory && !getMemSegReg();
185
bool isReg() const { return Kind == Register; }
187
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
188
// Add as immediates when possible.
189
if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
190
Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
192
Inst.addOperand(MCOperand::CreateExpr(Expr));
195
void addRegOperands(MCInst &Inst, unsigned N) const {
196
assert(N == 1 && "Invalid number of operands!");
197
Inst.addOperand(MCOperand::CreateReg(getReg()));
200
void addImmOperands(MCInst &Inst, unsigned N) const {
201
assert(N == 1 && "Invalid number of operands!");
202
addExpr(Inst, getImm());
205
void addImmSExt8Operands(MCInst &Inst, unsigned N) const {
206
// FIXME: Support user customization of the render method.
207
assert(N == 1 && "Invalid number of operands!");
208
addExpr(Inst, getImm());
211
void addMemOperands(MCInst &Inst, unsigned N) const {
212
assert((N == 5) && "Invalid number of operands!");
213
Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
214
Inst.addOperand(MCOperand::CreateImm(getMemScale()));
215
Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
216
addExpr(Inst, getMemDisp());
217
Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
220
void addAbsMemOperands(MCInst &Inst, unsigned N) const {
221
assert((N == 1) && "Invalid number of operands!");
222
Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
225
void addNoSegMemOperands(MCInst &Inst, unsigned N) const {
226
assert((N == 4) && "Invalid number of operands!");
227
Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
228
Inst.addOperand(MCOperand::CreateImm(getMemScale()));
229
Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
230
addExpr(Inst, getMemDisp());
233
static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
234
X86Operand *Res = new X86Operand(Token, Loc, Loc);
235
Res->Tok.Data = Str.data();
236
Res->Tok.Length = Str.size();
240
static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) {
241
X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
242
Res->Reg.RegNo = RegNo;
246
static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
247
X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
252
/// Create an absolute memory operand.
253
static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
255
X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
257
Res->Mem.Disp = Disp;
258
Res->Mem.BaseReg = 0;
259
Res->Mem.IndexReg = 0;
264
/// Create a generalized memory operand.
265
static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
266
unsigned BaseReg, unsigned IndexReg,
267
unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) {
268
// We should never just have a displacement, that should be parsed as an
269
// absolute memory operand.
270
assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
272
// The scale should always be one of {1,2,4,8}.
273
assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
275
X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
276
Res->Mem.SegReg = SegReg;
277
Res->Mem.Disp = Disp;
278
Res->Mem.BaseReg = BaseReg;
279
Res->Mem.IndexReg = IndexReg;
280
Res->Mem.Scale = Scale;
285
} // end anonymous namespace.
288
bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
289
SMLoc &StartLoc, SMLoc &EndLoc) {
291
const AsmToken &TokPercent = Parser.getTok();
292
assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
293
StartLoc = TokPercent.getLoc();
294
Parser.Lex(); // Eat percent token.
296
const AsmToken &Tok = Parser.getTok();
297
if (Tok.isNot(AsmToken::Identifier))
298
return Error(Tok.getLoc(), "invalid register name");
300
// FIXME: Validate register for the current architecture; we have to do
301
// validation later, so maybe there is no need for this here.
302
RegNo = MatchRegisterName(Tok.getString());
304
// Parse %st(1) and "%st" as "%st(0)"
305
if (RegNo == 0 && Tok.getString() == "st") {
307
EndLoc = Tok.getLoc();
308
Parser.Lex(); // Eat 'st'
310
// Check to see if we have '(4)' after %st.
311
if (getLexer().isNot(AsmToken::LParen))
316
const AsmToken &IntTok = Parser.getTok();
317
if (IntTok.isNot(AsmToken::Integer))
318
return Error(IntTok.getLoc(), "expected stack index");
319
switch (IntTok.getIntVal()) {
320
case 0: RegNo = X86::ST0; break;
321
case 1: RegNo = X86::ST1; break;
322
case 2: RegNo = X86::ST2; break;
323
case 3: RegNo = X86::ST3; break;
324
case 4: RegNo = X86::ST4; break;
325
case 5: RegNo = X86::ST5; break;
326
case 6: RegNo = X86::ST6; break;
327
case 7: RegNo = X86::ST7; break;
328
default: return Error(IntTok.getLoc(), "invalid stack index");
331
if (getParser().Lex().isNot(AsmToken::RParen))
332
return Error(Parser.getTok().getLoc(), "expected ')'");
334
EndLoc = Tok.getLoc();
335
Parser.Lex(); // Eat ')'
340
return Error(Tok.getLoc(), "invalid register name");
342
EndLoc = Tok.getLoc();
343
Parser.Lex(); // Eat identifier token.
347
X86Operand *X86ATTAsmParser::ParseOperand() {
348
switch (getLexer().getKind()) {
350
return ParseMemOperand();
351
case AsmToken::Percent: {
352
// FIXME: if a segment register, this could either be just the seg reg, or
353
// the start of a memory operand.
356
if (ParseRegister(RegNo, Start, End)) return 0;
357
return X86Operand::CreateReg(RegNo, Start, End);
359
case AsmToken::Dollar: {
361
SMLoc Start = Parser.getTok().getLoc(), End;
364
if (getParser().ParseExpression(Val, End))
366
return X86Operand::CreateImm(Val, Start, End);
371
/// ParseMemOperand: segment: disp(basereg, indexreg, scale)
372
X86Operand *X86ATTAsmParser::ParseMemOperand() {
373
SMLoc MemStart = Parser.getTok().getLoc();
375
// FIXME: If SegReg ':' (e.g. %gs:), eat and remember.
378
// We have to disambiguate a parenthesized expression "(4+5)" from the start
379
// of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
380
// only way to do this without lookahead is to eat the '(' and see what is
382
const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
383
if (getLexer().isNot(AsmToken::LParen)) {
385
if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
387
// After parsing the base expression we could either have a parenthesized
388
// memory address or not. If not, return now. If so, eat the (.
389
if (getLexer().isNot(AsmToken::LParen)) {
390
// Unless we have a segment register, treat this as an immediate.
392
return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
393
return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
399
// Okay, we have a '('. We don't know if this is an expression or not, but
400
// so we have to eat the ( to see beyond it.
401
SMLoc LParenLoc = Parser.getTok().getLoc();
402
Parser.Lex(); // Eat the '('.
404
if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
405
// Nothing to do here, fall into the code below with the '(' part of the
406
// memory operand consumed.
410
// It must be an parenthesized expression, parse it now.
411
if (getParser().ParseParenExpression(Disp, ExprEnd))
414
// After parsing the base expression we could either have a parenthesized
415
// memory address or not. If not, return now. If so, eat the (.
416
if (getLexer().isNot(AsmToken::LParen)) {
417
// Unless we have a segment register, treat this as an immediate.
419
return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
420
return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
428
// If we reached here, then we just ate the ( of the memory operand. Process
429
// the rest of the memory operand.
430
unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
432
if (getLexer().is(AsmToken::Percent)) {
434
if (ParseRegister(BaseReg, L, L)) return 0;
437
if (getLexer().is(AsmToken::Comma)) {
438
Parser.Lex(); // Eat the comma.
440
// Following the comma we should have either an index register, or a scale
441
// value. We don't support the later form, but we want to parse it
444
// Not that even though it would be completely consistent to support syntax
445
// like "1(%eax,,1)", the assembler doesn't.
446
if (getLexer().is(AsmToken::Percent)) {
448
if (ParseRegister(IndexReg, L, L)) return 0;
450
if (getLexer().isNot(AsmToken::RParen)) {
451
// Parse the scale amount:
452
// ::= ',' [scale-expression]
453
if (getLexer().isNot(AsmToken::Comma)) {
454
Error(Parser.getTok().getLoc(),
455
"expected comma in scale expression");
458
Parser.Lex(); // Eat the comma.
460
if (getLexer().isNot(AsmToken::RParen)) {
461
SMLoc Loc = Parser.getTok().getLoc();
464
if (getParser().ParseAbsoluteExpression(ScaleVal))
467
// Validate the scale amount.
468
if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
469
Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
472
Scale = (unsigned)ScaleVal;
475
} else if (getLexer().isNot(AsmToken::RParen)) {
476
// Otherwise we have the unsupported form of a scale amount without an
478
SMLoc Loc = Parser.getTok().getLoc();
481
if (getParser().ParseAbsoluteExpression(Value))
484
Error(Loc, "cannot have scale factor without index register");
489
// Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
490
if (getLexer().isNot(AsmToken::RParen)) {
491
Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
494
SMLoc MemEnd = Parser.getTok().getLoc();
495
Parser.Lex(); // Eat the ')'.
497
return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
501
bool X86ATTAsmParser::
502
ParseInstruction(const StringRef &Name, SMLoc NameLoc,
503
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
504
// FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
505
// represent alternative syntaxes in the .td file, without requiring
506
// instruction duplication.
507
StringRef PatchedName = StringSwitch<StringRef>(Name)
509
.Case("salb", "shlb")
510
.Case("sall", "shll")
511
.Case("salq", "shlq")
512
.Case("salw", "shlw")
515
.Case("repnz", "repne")
517
Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
519
if (getLexer().isNot(AsmToken::EndOfStatement)) {
521
// Parse '*' modifier.
522
if (getLexer().is(AsmToken::Star)) {
523
SMLoc Loc = Parser.getTok().getLoc();
524
Operands.push_back(X86Operand::CreateToken("*", Loc));
525
Parser.Lex(); // Eat the star.
528
// Read the first operand.
529
if (X86Operand *Op = ParseOperand())
530
Operands.push_back(Op);
534
while (getLexer().is(AsmToken::Comma)) {
535
Parser.Lex(); // Eat the comma.
537
// Parse and remember the operand.
538
if (X86Operand *Op = ParseOperand())
539
Operands.push_back(Op);
548
bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
549
StringRef IDVal = DirectiveID.getIdentifier();
550
if (IDVal == ".word")
551
return ParseDirectiveWord(2, DirectiveID.getLoc());
555
/// ParseDirectiveWord
556
/// ::= .word [ expression (, expression)* ]
557
bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
558
if (getLexer().isNot(AsmToken::EndOfStatement)) {
561
if (getParser().ParseExpression(Value))
564
getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
566
if (getLexer().is(AsmToken::EndOfStatement))
569
// FIXME: Improve diagnostic.
570
if (getLexer().isNot(AsmToken::Comma))
571
return Error(L, "unexpected token in directive");
580
extern "C" void LLVMInitializeX86AsmLexer();
582
// Force static initialization.
583
extern "C" void LLVMInitializeX86AsmParser() {
584
RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target);
585
RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target);
586
LLVMInitializeX86AsmLexer();
589
#include "X86GenAsmMatcher.inc"