2
===========================================================================
3
vm_x86_64_assembler.c -- assembler for x86-64
5
Copyright (C) 2007 Ludwig Nussel <ludwig.nussel@suse.de>, Novell inc.
7
Quake III Arena source code is free software; you can redistribute it
8
and/or modify it under the terms of the GNU General Public License as
9
published by the Free Software Foundation; either version 2 of the License,
10
or (at your option) any later version.
12
Quake III Arena source code is distributed in the hope that it will be
13
useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
14
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
GNU General Public License for more details.
17
You should have received a copy of the GNU General Public License
18
along with Quake III Arena source code; if not, write to the Free Software
19
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20
===========================================================================
28
typedef unsigned char u8;
29
typedef unsigned short u16;
30
typedef unsigned int u32;
31
typedef unsigned long u64;
34
static unsigned compiledOfs;
35
static unsigned assembler_pass;
37
static const char* cur_line;
41
#define MIN(a,b) ((a) < (b) ? (a) : (b))
42
#define MAX(a,b) ((a) > (b) ? (a) : (b))
44
#define crap(fmt, args...) do { \
45
_crap(__FUNCTION__, fmt, ##args); \
48
#define CRAP_INVALID_ARGS crap("invalid arguments %s, %s", argtype2str(arg1.type),argtype2str(arg2.type));
51
#define debug(fmt, args...) printf(fmt, ##args)
53
#define debug(fmt, args...)
56
static void _crap(const char* func, const char* fmt, ...)
59
fprintf(stderr, "%s() - ", func);
61
vfprintf(stderr, fmt, ap);
64
if(cur_line && cur_line[0])
65
fprintf(stderr, "-> %s\n", cur_line);
69
static void emit1(unsigned char v)
73
out[compiledOfs++] = v;
74
if(fout) fwrite(&v, 1, 1, fout);
83
static inline void emit2(u16 v)
89
static inline void emit4(u32 v)
97
static inline void emit8(u64 v)
100
emit4((v>>32)&0xFFFFFFFF);
112
MODRM_MOD_01 = 0x01 << 6,
113
MODRM_MOD_10 = 0x02 << 6,
114
MODRM_MOD_11 = 0x03 << 6,
132
R_MSZ = 0xF00, // size mask
133
R_XMM = 0x2000, // xmm register. year, sucks
141
R_RAX = R_EAX | R_64,
142
R_RBX = R_EBX | R_64,
143
R_RCX = R_ECX | R_64,
144
R_RDX = R_EDX | R_64,
145
R_RSI = R_ESI | R_64,
146
R_RDI = R_EDI | R_64,
147
R_RSP = R_ESP | R_64,
155
R_XMM0 = 0x00 | R_XMM,
156
R_MGP = 0x0F, // mask for general purpose registers
187
char label[LABELLEN];
192
typedef void (*emitfunc)(const char* op, arg_t arg1, arg_t arg2, void* data);
202
u8 subcode; // in modrm
203
u8 rmcode; // opcode for reg/mem, reg
204
u8 mrcode; // opcode for reg, reg/mem
205
u8 rcode8; // opcode for reg8/mem8
206
u8 rcode; // opcode for reg/mem
209
/* ************************* */
211
static unsigned hashkey(const char *string, unsigned len) {
212
unsigned register hash, i;
215
for (i = 0; i < len && string[i] != '\0'; ++i) {
216
hash += string[i] * (119 + i);
218
hash = (hash ^ (hash >> 10) ^ (hash >> 20));
225
struct hashentry* next;
227
static struct hashentry* labelhash[1021];
230
static void hash_add_label(const char* label, unsigned address)
233
unsigned i = hashkey(label, -1U);
234
i %= sizeof(labelhash)/sizeof(labelhash[0]);
235
h = malloc(sizeof(struct hashentry));
236
h->label = strdup(label);
237
h->address = address;
238
h->next = labelhash[i];
242
static unsigned lookup_label(const char* label)
245
unsigned i = hashkey(label, -1U);
246
i %= sizeof(labelhash)/sizeof(labelhash[0]);
247
for(h = labelhash[i]; h; h = h->next )
249
if(!strcmp(h->label, label))
253
crap("label %s undefined", label);
257
static void labelhash_free(void)
261
unsigned z = 0, min = -1U, max = 0, t = 0;
262
for ( i = 0; i < sizeof(labelhash)/sizeof(labelhash[0]); ++i)
268
struct hashentry* next = h->next;
276
//else printf("%u\n", n);
280
printf("total %u, hsize %lu, zero %u, min %u, max %u\n", t, sizeof(labelhash)/sizeof(labelhash[0]), z, min, max);
281
memset(labelhash, 0, sizeof(labelhash));
284
/* ************************* */
287
static const char* argtype2str(argtype_t t)
291
case T_NONE: return "none";
292
case T_REGISTER: return "register";
293
case T_IMMEDIATE: return "immediate";
294
case T_MEMORY: return "memory";
295
case T_LABEL: return "label";
296
default: crap("invalid type");
302
/* ************************* */
304
static inline int iss8(u64 v)
306
return (labs(v) <= 0x80);
309
static inline int isu8(u64 v)
314
static inline int iss16(u64 v)
316
return (labs(v) <= 0x8000);
319
static inline int isu16(u64 v)
321
return (v <= 0xffff);
324
static inline int iss32(u64 v)
326
return (labs(v) <= 0x80000000);
329
static inline int isu32(u64 v)
331
return (v <= 0xffffffff);
334
static void emit_opsingle(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
336
u8 op = (u8)((unsigned long) data);
338
if(arg1.type != T_NONE || arg2.type != T_NONE)
344
static void emit_opsingle16(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
347
emit_opsingle(mnemonic, arg1, arg2, data);
350
static void compute_rexmodrmsib(u8* rex_r, u8* modrm_r, u8* sib_r, arg_t* arg1, arg_t* arg2)
356
if((arg1->type == T_REGISTER && arg2->type == T_REGISTER)
357
&& ((arg1->v.reg & R_MSZ) != (arg2->v.reg & R_MSZ))
358
&& !((arg1->v.reg & R_XMM) || (arg2->v.reg & R_XMM)))
359
crap("both registers must be of same width");
361
if((arg1->type == T_REGISTER && arg1->v.reg & R_64)
362
|| (arg2->type == T_REGISTER && arg2->v.reg & R_64))
367
if(arg1->type == T_REGISTER)
369
if((arg1->v.reg & R_MGP) > 0x07)
372
modrm |= (arg1->v.reg & 0x07) << 3;
375
if(arg2->type == T_REGISTER)
377
if((arg2->v.reg & R_MGP) > 0x07)
380
modrm |= (arg2->v.reg & 0x07);
383
if(arg2->type == T_MEMORY)
385
if((arg2->v.mem.basetype == T_REGISTER && !(arg2->v.mem.base.reg & R_64))
386
|| (arg2->v.mem.indextype == T_REGISTER && !(arg2->v.mem.index.reg & R_64)))
388
crap("only 64bit base/index registers are %x %x", arg2->v.mem.base.reg, arg2->v.mem.index.reg);
391
if(arg2->v.mem.indextype == T_REGISTER)
393
modrm |= MODRM_RM_SIB;
394
if(!arg2->v.mem.disp)
396
modrm |= MODRM_MOD_00;
398
else if(iss8(arg2->v.mem.disp))
400
modrm |= MODRM_MOD_01;
402
else if(isu32(arg2->v.mem.disp))
404
modrm |= MODRM_MOD_10;
408
crap("invalid displacement");
411
if((arg2->v.mem.index.reg & R_MGP) > 0x07)
414
if((arg2->v.mem.base.reg & R_MGP) > 0x07)
417
if(arg2->v.mem.basetype != T_REGISTER)
418
crap("base must be register");
419
switch(arg2->v.mem.scale)
422
case 2: sib |= 1 << 6; break;
423
case 4: sib |= 2 << 6; break;
424
case 8: sib |= 3 << 6; break;
426
sib |= (arg2->v.mem.index.reg & 0x07) << 3;
427
sib |= (arg2->v.mem.base.reg & 0x07);
429
else if(arg2->v.mem.indextype == T_NONE)
431
if(!arg2->v.mem.disp)
433
modrm |= MODRM_MOD_00;
435
else if(iss8(arg2->v.mem.disp))
437
modrm |= MODRM_MOD_01;
439
else if(isu32(arg2->v.mem.disp))
441
modrm |= MODRM_MOD_10;
445
crap("invalid displacement");
448
if(arg2->v.mem.basetype != T_REGISTER)
449
crap("todo: base != register");
451
if((arg2->v.mem.base.reg & R_MGP) > 0x07)
454
modrm |= arg2->v.mem.base.reg & 0x07;
458
crap("invalid indextype");
463
modrm |= MODRM_MOD_11;
474
static void maybe_emit_displacement(arg_t* arg)
476
if(arg->type != T_MEMORY)
481
if(iss8(arg->v.mem.disp))
483
emit1((u8)arg->v.mem.disp);
485
else if(isu32(arg->v.mem.disp))
487
emit4(arg->v.mem.disp);
491
crap("invalid displacement");
496
/* one byte operator with register added to operator */
497
static void emit_opreg(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
499
u8 op = (u8)((unsigned long) data);
501
if(arg1.type != T_REGISTER || arg2.type != T_NONE)
504
if((arg1.v.reg & R_MGP) > 0x07)
507
op |= (arg1.v.reg & 0x07);
512
/* operator which operates on reg/mem */
513
static void emit_op_rm(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
516
opparam_t* params = data;
518
if((arg1.type != T_REGISTER && arg1.type != T_MEMORY) || arg2.type != T_NONE)
521
compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
523
modrm |= params->subcode << 3;
525
if(arg1.v.reg & R_16)
530
emit1(params->rcode8); // op reg8/mem8,
532
emit1(params->rcode); // op reg/mem,
534
if((modrm & 0x07) == MODRM_RM_SIB)
537
maybe_emit_displacement(&arg1);
540
/* operator which operates on reg/mem with cl */
541
static void emit_op_rm_cl(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
544
opparam_t* params = data;
546
if(arg2.type != T_REGISTER || arg1.type != T_REGISTER)
549
if((arg1.v.reg & R_MGP) != R_ECX && !(arg1.v.reg & R_8))
550
crap("only cl register is valid");
552
arg1.type = T_NONE; // don't complain, we know it's cl anyways
554
compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
556
modrm |= params->subcode << 3;
558
if(arg2.v.reg & R_16)
563
emit1(params->rcode8); // op reg8/mem8,
565
emit1(params->rcode); // op reg/mem,
567
if((modrm & 0x07) == MODRM_RM_SIB)
570
maybe_emit_displacement(&arg2);
573
static void emit_mov(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
579
if(arg1.type == T_IMMEDIATE && arg2.type == T_REGISTER)
585
if(!isu8(arg1.v.imm))
586
crap("value too large for 8bit register");
590
else if(arg2.v.reg & R_16)
592
if(!isu16(arg1.v.imm))
593
crap("value too large for 16bit register");
596
else if(!arg2.v.reg & R_64)
598
if(!isu32(arg1.v.imm))
599
crap("value too large for 32bit register");
602
compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
606
op |= (arg2.v.reg & 0x07);
610
if(arg2.v.reg & R_8) emit1(arg1.v.imm);
611
else if(arg2.v.reg & R_16) emit2(arg1.v.imm);
612
else if(arg2.v.reg & R_64) emit8(arg1.v.imm);
613
else emit4(arg1.v.imm);
615
else if(arg1.type == T_IMMEDIATE && arg2.type == T_MEMORY)
617
if(!iss32(arg1.v.imm))
619
crap("only 32bit immediates supported");
622
compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
624
emit1(0xc7); // mov reg/mem, imm
626
if((modrm & 0x07) == MODRM_RM_SIB)
631
else if(arg1.type == T_REGISTER && arg2.type == T_REGISTER) // XXX: same as next
633
if(arg1.type != T_REGISTER || arg2.type != T_REGISTER)
634
crap("both args must be registers");
636
if((arg1.v.reg & R_MSZ) != (arg2.v.reg & R_MSZ))
637
crap("both registers must be same width");
639
compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
642
emit1(0x89); // mov reg reg/mem,
645
else if(arg1.type == T_REGISTER && arg2.type == T_MEMORY)
647
compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
649
if(arg1.v.reg & R_16)
654
emit1(0x88); // mov reg reg/mem,
656
emit1(0x89); // mov reg reg/mem,
658
if((modrm & 0x07) == MODRM_RM_SIB)
661
maybe_emit_displacement(&arg2);
663
else if(arg1.type == T_MEMORY && arg2.type == T_REGISTER)
665
compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
667
if(arg2.v.reg & R_16)
672
emit1(0x8a); // mov reg/mem, reg
674
emit1(0x8b); // mov reg/mem, reg
676
if((modrm & 0x07) == MODRM_RM_SIB)
679
maybe_emit_displacement(&arg1);
685
static void emit_subaddand(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
691
opparam_t* params = data;
693
if(arg1.type == T_IMMEDIATE && arg2.type == T_REGISTER)
695
if(!iss32(arg1.v.imm))
697
crap("only 8 and 32 bit immediates supported");
700
compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
702
modrm |= params->subcode << 3;
708
emit1(0x83); // sub reg/mem, imm8
710
emit1(arg1.v.imm&0xFF);
715
emit1(0x81); // sub reg/mem, imm32
720
else if(arg1.type == T_REGISTER && (arg2.type == T_MEMORY || arg2.type == T_REGISTER))
722
compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
725
emit1(params->rmcode); // sub reg/mem, reg
727
if(arg2.type == T_MEMORY && (modrm & 0x07) == MODRM_RM_SIB)
730
maybe_emit_displacement(&arg2);
732
else if(arg1.type == T_MEMORY && arg2.type == T_REGISTER && params->mrcode)
734
compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
737
emit1(params->mrcode); // sub reg, reg/mem
739
if((modrm & 0x07) == MODRM_RM_SIB)
742
maybe_emit_displacement(&arg1);
748
static void emit_condjump(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
752
unsigned char opcode = (unsigned char)(((unsigned long)data)&0xFF);
754
if(arg1.type != T_LABEL || arg2.type != T_NONE)
755
crap("%s: argument must be label", mnemonic);
759
off = lookup_label(arg1.v.label);
760
disp = off-(compiledOfs+1);
761
if(assembler_pass && abs(disp) > 127)
762
crap("cannot jump that far (%x -> %x = %x)", compiledOfs, off, disp);
767
static void emit_jmp(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
769
if((arg1.type != T_LABEL && arg1.type != T_REGISTER && arg1.type != T_MEMORY) || arg2.type != T_NONE)
772
if(arg1.type == T_LABEL)
777
off = lookup_label(arg1.v.label);
778
disp = off-(compiledOfs+5);
786
if(arg1.type == T_REGISTER)
789
crap("jmp must be absolute");
791
if((arg1.v.reg & R_64) != R_64)
792
crap("register must be 64bit");
794
arg1.v.reg ^= R_64; // no rex required for call
797
compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
804
if((modrm & 0x07) == MODRM_RM_SIB)
806
maybe_emit_displacement(&arg1);
810
static void emit_call(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
814
if(arg1.type != T_REGISTER || arg2.type != T_NONE)
818
crap("call must be absolute");
820
if((arg1.v.reg & R_64) != R_64)
821
crap("register must be 64bit");
823
arg1.v.reg ^= R_64; // no rex required for call
825
compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
835
static void emit_twobyte(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
839
opparam_t* params = data;
841
if(arg1.type == T_REGISTER && (arg2.type == T_MEMORY || arg2.type == T_REGISTER))
843
compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
845
if(params->xmmprefix) emit1(params->xmmprefix);
848
emit1(params->rmcode); // sub reg/mem, reg
850
if((modrm & 0x07) == MODRM_RM_SIB)
853
maybe_emit_displacement(&arg2);
855
else if(arg1.type == T_MEMORY && arg2.type == T_REGISTER && params->mrcode)
857
compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
859
if(params->xmmprefix) emit1(params->xmmprefix);
862
emit1(params->mrcode); // sub reg, reg/mem
864
if((modrm & 0x07) == MODRM_RM_SIB)
867
maybe_emit_displacement(&arg1);
873
static opparam_t params_add = { subcode: 0, rmcode: 0x01, };
874
static opparam_t params_or = { subcode: 1, rmcode: 0x09, };
875
static opparam_t params_and = { subcode: 4, rmcode: 0x21, };
876
static opparam_t params_sub = { subcode: 5, rmcode: 0x29, };
877
static opparam_t params_xor = { subcode: 6, rmcode: 0x31, };
878
static opparam_t params_cmp = { subcode: 6, rmcode: 0x39, mrcode: 0x3b, };
879
static opparam_t params_dec = { subcode: 1, rcode: 0xff, rcode8: 0xfe, };
880
static opparam_t params_sar = { subcode: 7, rcode: 0xd3, rcode8: 0xd2, };
881
static opparam_t params_shl = { subcode: 4, rcode: 0xd3, rcode8: 0xd2, };
882
static opparam_t params_shr = { subcode: 5, rcode: 0xd3, rcode8: 0xd2, };
883
static opparam_t params_idiv = { subcode: 7, rcode: 0xf7, rcode8: 0xf6, };
884
static opparam_t params_div = { subcode: 6, rcode: 0xf7, rcode8: 0xf6, };
885
static opparam_t params_imul = { subcode: 5, rcode: 0xf7, rcode8: 0xf6, };
886
static opparam_t params_mul = { subcode: 4, rcode: 0xf7, rcode8: 0xf6, };
887
static opparam_t params_neg = { subcode: 3, rcode: 0xf7, rcode8: 0xf6, };
888
static opparam_t params_not = { subcode: 2, rcode: 0xf7, rcode8: 0xf6, };
890
static opparam_t params_cvtsi2ss = { xmmprefix: 0xf3, rmcode: 0x2a };
891
static opparam_t params_cvttss2si = { xmmprefix: 0xf3, rmcode: 0x2c };
892
static opparam_t params_addss = { xmmprefix: 0xf3, mrcode: 0x58 };
893
static opparam_t params_divss = { xmmprefix: 0xf3, mrcode: 0x5e };
894
static opparam_t params_movss = { xmmprefix: 0xf3, mrcode: 0x10, rmcode: 0x11 };
895
static opparam_t params_mulss = { xmmprefix: 0xf3, mrcode: 0x59 };
896
static opparam_t params_subss = { xmmprefix: 0xf3, mrcode: 0x5c };
897
static opparam_t params_ucomiss = { mrcode: 0x2e };
899
static int ops_sorted = 0;
900
static op_t ops[] = {
901
{ "addl", emit_subaddand, ¶ms_add },
902
{ "addq", emit_subaddand, ¶ms_add },
903
{ "addss", emit_twobyte, ¶ms_addss },
904
{ "andl", emit_subaddand, ¶ms_and },
905
{ "andq", emit_subaddand, ¶ms_and },
906
{ "callq", emit_call, NULL },
907
{ "cbw", emit_opsingle16, (void*)0x98 },
908
{ "cdq", emit_opsingle, (void*)0x99 },
909
{ "cmpl", emit_subaddand, ¶ms_cmp },
910
{ "cmpq", emit_subaddand, ¶ms_cmp },
911
{ "cvtsi2ss", emit_twobyte, ¶ms_cvtsi2ss },
912
{ "cvttss2si", emit_twobyte, ¶ms_cvttss2si },
913
{ "cwde", emit_opsingle, (void*)0x98 },
914
{ "decl", emit_op_rm, ¶ms_dec },
915
{ "decq", emit_op_rm, ¶ms_dec },
916
{ "divl", emit_op_rm, ¶ms_div },
917
{ "divq", emit_op_rm, ¶ms_div },
918
{ "divss", emit_twobyte, ¶ms_divss },
919
{ "idivl", emit_op_rm, ¶ms_idiv },
920
{ "imull", emit_op_rm, ¶ms_imul },
921
{ "int3", emit_opsingle, (void*)0xcc },
922
{ "ja", emit_condjump, (void*)0x77 },
923
{ "jbe", emit_condjump, (void*)0x76 },
924
{ "jb", emit_condjump, (void*)0x72 },
925
{ "je", emit_condjump, (void*)0x74 },
926
{ "jl", emit_condjump, (void*)0x7c },
927
{ "jmp", emit_jmp, NULL },
928
{ "jmpq", emit_jmp, NULL },
929
{ "jnae", emit_condjump, (void*)0x72 },
930
{ "jna", emit_condjump, (void*)0x76 },
931
{ "jnbe", emit_condjump, (void*)0x77 },
932
{ "jnb", emit_condjump, (void*)0x73 },
933
{ "jnc", emit_condjump, (void*)0x73 },
934
{ "jne", emit_condjump, (void*)0x75 },
935
{ "jnge", emit_condjump, (void*)0x7c },
936
{ "jng", emit_condjump, (void*)0x7e },
937
{ "jnle", emit_condjump, (void*)0x7f },
938
{ "jnl", emit_condjump, (void*)0x7d },
939
{ "jnz", emit_condjump, (void*)0x75 },
940
{ "jp", emit_condjump, (void*)0x7a },
941
{ "jz", emit_condjump, (void*)0x74 },
942
{ "movb", emit_mov, NULL },
943
{ "movl", emit_mov, NULL },
944
{ "movq", emit_mov, NULL },
945
{ "movss", emit_twobyte, ¶ms_movss },
946
{ "movw", emit_mov, NULL },
947
{ "mull", emit_op_rm, ¶ms_mul },
948
{ "mulss", emit_twobyte, ¶ms_mulss },
949
{ "negl", emit_op_rm, ¶ms_neg },
950
{ "negq", emit_op_rm, ¶ms_neg },
951
{ "nop", emit_opsingle, (void*)0x90 },
952
{ "notl", emit_op_rm, ¶ms_not },
953
{ "notq", emit_op_rm, ¶ms_not },
954
{ "or", emit_subaddand, ¶ms_or },
955
{ "orl", emit_subaddand, ¶ms_or },
956
{ "pop", emit_opreg, (void*)0x58 },
957
{ "push", emit_opreg, (void*)0x50 },
958
{ "ret", emit_opsingle, (void*)0xc3 },
959
{ "sarl", emit_op_rm_cl, ¶ms_sar },
960
{ "shl", emit_op_rm_cl, ¶ms_shl },
961
{ "shrl", emit_op_rm_cl, ¶ms_shr },
962
{ "subl", emit_subaddand, ¶ms_sub },
963
{ "subq", emit_subaddand, ¶ms_sub },
964
{ "subss", emit_twobyte, ¶ms_subss },
965
{ "ucomiss", emit_twobyte, ¶ms_ucomiss },
966
{ "xorl", emit_subaddand, ¶ms_xor },
967
{ "xorq", emit_subaddand, ¶ms_xor },
971
static int opsort(const void* A, const void* B)
975
return strcmp(a->mnemonic, b->mnemonic);
978
static op_t* getop(const char* n)
984
if(!strcmp(o->mnemonic, n))
992
t = sizeof(ops)/sizeof(ops[0])-1;
998
if((r = strcmp(ops[m].mnemonic, n)) == 0)
1016
static reg_t parsereg(const char* str)
1018
const char* s = str;
1019
if(*s == 'a' && s[1] == 'l' && !s[2])
1023
else if(*s == 'a' && s[1] == 'x' && !s[2])
1027
if(*s == 'c' && s[1] == 'l' && !s[2])
1033
if(!strcmp(s, "xmm0"))
1036
else if(*s == 'r' && s[1])
1043
case 'a': return R_RAX;
1044
case 'b': return R_RBX;
1045
case 'c': return R_RCX;
1046
case 'd': return R_RDX;
1049
else if(s[1] == 'i')
1053
case 's': return R_RSI;
1054
case 'd': return R_RDI;
1057
else if(s[0] == 's' && s[1] == 'p' && !s[2])
1061
else if(*s == '8' && !s[1])
1063
else if(*s == '9' && !s[1])
1065
else if(*s == '1' && s[1] == '0')
1067
else if(*s == '1' && s[1] == '5')
1070
else if(*s == 'e' && s[1])
1077
case 'a': return R_EAX;
1078
case 'b': return R_EBX;
1079
case 'c': return R_ECX;
1080
case 'd': return R_EDX;
1083
else if(s[1] == 'i')
1087
case 's': return R_ESI;
1088
case 'd': return R_EDI;
1093
crap("invalid register %s", str);
1105
static unsigned char nexttok(const char** str, char* label, u64* val)
1107
const char* s = *str;
1109
if(label) *label = 0;
1112
while(*s && *s == ' ') ++s;
1118
else if(*s == '$' || *s == '*' || *s == '%' || *s == '-' || *s == ')' || *s == '(' || *s == ',')
1123
else if(*s >= 'a' && *s <= 'z')
1125
size_t a = strspn(s+1, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_");
1127
crap("label %s too long", s);
1130
strncpy(label, s, a+1);
1136
else if(*s >= '0' && *s <= '9')
1138
char* endptr = NULL;
1139
u64 v = strtol(s, &endptr, 0);
1140
if(endptr && (endptr-s == 0))
1141
crap("invalid integer %s", s);
1146
crap("can't parse '%s'", *str);
1150
static arg_t parsearg(const char** str)
1153
const char* s = *str;
1162
while(*s && *s == ' ') ++s;
1164
switch(nexttok(&s, label, &val))
1167
ttype = nexttok(&s, NULL, &val);
1171
ttype = nexttok(&s, NULL, &val);
1173
if(ttype != TOK_INT)
1174
crap("expected integer");
1175
arg.type = T_IMMEDIATE;
1176
arg.v.imm = negative * val;
1179
if((ttype = nexttok(&s, NULL, NULL)) != '%')
1183
crap("expected '%%'");
1188
if(nexttok(&s, label, &val) != TOK_LABEL)
1189
crap("expected label");
1190
arg.type = T_REGISTER;
1191
arg.v.reg = parsereg(label);
1195
strncpy(arg.v.label, label, LABELLEN);
1199
if(nexttok(&s, NULL, &val) != TOK_INT)
1200
crap("expected integer");
1203
if(nexttok(&s, label, NULL) != '(')
1204
crap("expected '('"); // mov to/from fixed address not supported
1208
arg.type = T_MEMORY;
1209
arg.v.mem.indextype = T_NONE;
1210
arg.v.mem.disp = negative * val;
1211
ttype = nexttok(&s, label, &val);
1212
if(ttype == '%' && nexttok(&s, label, &val) != TOK_LABEL)
1214
crap("expected register");
1218
arg.v.mem.basetype = T_REGISTER;
1219
arg.v.mem.base.reg = parsereg(label);
1221
else if (ttype == TOK_INT)
1223
arg.v.mem.basetype = T_IMMEDIATE;
1224
arg.v.mem.base.imm = val;
1226
if((ttype = nexttok(&s, NULL, NULL)) == ',')
1228
ttype = nexttok(&s, label, &val);
1229
if(ttype == '%' && nexttok(&s, label, &val) != TOK_LABEL)
1231
crap("expected register");
1235
arg.v.mem.indextype = T_REGISTER;
1236
arg.v.mem.index.reg = parsereg(label);
1238
else if (ttype == TOK_INT)
1240
crap("index must be register");
1241
arg.v.mem.indextype = T_IMMEDIATE;
1242
arg.v.mem.index.imm = val;
1244
if(nexttok(&s, NULL, NULL) != ',')
1245
crap("expected ','");
1246
if(nexttok(&s, NULL, &val) != TOK_INT)
1247
crap("expected integer");
1248
if(val != 1 && val != 2 && val != 4 && val != 8)
1249
crap("scale must 1, 2, 4 or 8");
1250
arg.v.mem.scale = val;
1252
ttype = nexttok(&s, NULL, NULL);
1256
crap("expected ')' or ','");
1260
crap("invalid token %hhu in %s", *(unsigned char*)s, *str);
1269
/* ************************* */
1271
void assembler_init(int pass)
1274
assembler_pass = pass;
1283
qsort(ops, sizeof(ops)/sizeof(ops[0])-1, sizeof(ops[0]), opsort);
1287
size_t assembler_get_code_size(void)
1292
void assembler_set_output(char* buf)
1297
void assemble_line(const char* input, size_t len)
1313
if(len >= sizeof(line))
1314
crap("line too long");
1316
memcpy(line, input, sizeof(line));
1319
if(line[len-1] == '\n') line[--len] = 0;
1320
if(line[len-1] == ':')
1324
debug("%s: 0x%x\n", line, compiledOfs);
1326
hash_add_label(line, compiledOfs);
1331
s = strchr(line, ' ');
1335
arg1 = parsearg((const char**)&s);
1339
crap("expected ',', got '%c'", *s);
1341
arg2 = parsearg((const char**)&s);
1347
crap("no operator in %s", line);
1353
crap("cannot handle op %s", opn);
1355
o->func(opn, arg1, arg2, o->data);
1357
debug(" - %s%s", cur_line, cur_line[strlen(cur_line)-1]=='\n'?"":"\n");
1361
#ifdef SA_STANDALONE
1362
int main(int argc, char* argv[])
1371
crap("specify file");
1374
file = fopen(argv[1], "r");
1377
crap("can't open file");
1382
fout = fopen(argv[2], "w");
1385
crap("can't open %s for writing", argv[2]);
1389
for(pass = 0; pass < 2; ++pass)
1391
if(fseek(file, 0, SEEK_SET))
1392
crap("can't rewind file");
1396
char* b = malloc(assembler_get_code_size());
1398
crap("cannot allocate memory");
1399
assembler_set_output(b);
1402
assembler_init(pass);
1404
while(fgets(line, sizeof(line), file))
1409
assemble_line(line, len);