42
41
# 6. Don't use [or hand-code with .byte] "rep ret." "ret" mnemonic is
43
42
# required to identify the spots, where to inject Win64 epilogue!
44
43
# But on the pros, it's then prefixed with rep automatically:-)
45
# 7. Due to MASM limitations [and certain general counter-intuitivity
46
# of ip-relative addressing] generation of position-independent
47
# code is assisted by synthetic directive, .picmeup, which puts
48
# address of the *next* instruction into target register.
44
# 7. Stick to explicit ip-relative addressing. If you have to use
45
# GOTPCREL addressing, stick to mov symbol@GOTPCREL(%rip),%r??.
46
# Both are recognized and translated to proper Win64 addressing
47
# modes. To support legacy code a synthetic directive, .picmeup,
48
# is implemented. It puts address of the *next* instruction into
49
# target register, e.g.:
52
52
# lea .Label-.(%rax),%rax
57
# lea .Label-.Lpic_point(%rcx),%rbp
54
# 8. In order to provide for structured exception handling unified
55
# Win64 prologue copies %rsp value to %rax. For further details
56
# see SEH paragraph at the end.
57
# 9. .init segment is allowed to contain calls to functions only.
58
# a. If function accepts more than 4 arguments *and* >4th argument
59
# is declared as non 64-bit value, do clear its upper part.
63
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
61
65
{ my ($stddev,$stdino,@junk)=stat(STDOUT);
62
66
my ($outdev,$outino,@junk)=stat($output);
180
237
my $self = shift;
240
$self->{label} =~ s/([_a-z][_a-z0-9]*)/$globals{$1} or $1/gei;
241
$self->{label} =~ s/\.L/$decor/g;
183
243
# Silently convert all EAs to 64-bit. This is required for
184
244
# elder GNU assembler and results in more compact code,
185
245
# *but* most importantly AES module depends on this feature!
186
246
$self->{index} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/;
187
247
$self->{base} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/;
190
250
# Solaris /usr/ccs/bin/as can't handle multiplications
251
# in $self->{label}, new gas requires sign extension...
193
$self->{label} =~ s/(?<![0-9a-f])(0[x0-9a-f]+)/oct($1)/egi;
253
$self->{label} =~ s/(?<![\w\$\.])(0x?[0-9a-f]+)/oct($1)/egi;
194
254
$self->{label} =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg;
195
255
$self->{label} =~ s/([0-9]+)/$1<<32>>32/eg;
256
$self->{label} =~ s/^___imp_/__imp__/ if ($flavour eq "mingw64");
197
258
if (defined($self->{index})) {
198
sprintf "%s(%%%s,%%%s,%d)",
259
sprintf "%s%s(%%%s,%%%s,%d)",$self->{asterisk},
199
260
$self->{label},$self->{base},
200
261
$self->{index},$self->{scale};
202
sprintf "%s(%%%s)", $self->{label},$self->{base};
263
sprintf "%s%s(%%%s)", $self->{asterisk},$self->{label},$self->{base};
205
%szmap = ( b=>"BYTE", w=>"WORD", l=>"DWORD", q=>"QWORD" );
266
%szmap = ( b=>"BYTE$PTR", w=>"WORD$PTR", l=>"DWORD$PTR", q=>"QWORD$PTR" );
207
268
$self->{label} =~ s/\./\$/g;
208
$self->{label} =~ s/0x([0-9a-f]+)/0$1h/ig;
269
$self->{label} =~ s/(?<![\w\$\.])0x([0-9a-f]+)/0$1h/ig;
209
270
$self->{label} = "($self->{label})" if ($self->{label} =~ /[\*\+\-\/]/);
271
$sz="q" if ($self->{asterisk});
211
273
if (defined($self->{index})) {
212
sprintf "%s PTR %s[%s*%d+%s]",$szmap{$sz},
274
sprintf "%s[%s%s*%d+%s]",$szmap{$sz},
275
$self->{label}?"$self->{label}+":"",
214
276
$self->{index},$self->{scale},
216
278
} elsif ($self->{base} eq "rip") {
217
sprintf "%s PTR %s",$szmap{$sz},$self->{label};
279
sprintf "%s[%s]",$szmap{$sz},$self->{label};
219
sprintf "%s PTR %s[%s]",$szmap{$sz},
220
$self->{label},$self->{base};
281
sprintf "%s[%s%s]",$szmap{$sz},
282
$self->{label}?"$self->{label}+":"",
263
329
local *line = shift;
266
if ($line =~ /(^[\.\w]+\:)/) {
332
if ($line =~ /(^[\.\w]+)\:/) {
267
333
$self->{value} = $1;
269
335
$line = substr($line,@+[0]); $line =~ s/^\s+//;
271
$self->{value} =~ s/\.L/\$L/ if ($masm);
337
$self->{value} =~ s/^\.L/$decor/;
276
342
my $self = shift;
280
} elsif ($self->{value} ne "$current_function->{name}:") {
282
} elsif ($current_function->{abi} eq "svr4") {
283
my $func = "$current_function->{name} PROC\n".
284
" mov QWORD PTR 8[rsp],rdi\t;WIN64 prologue\n".
285
" mov QWORD PTR 16[rsp],rsi\n";
345
my $func = ($globals{$self->{value}} or $self->{value}) . ":";
347
$current_function->{name} eq $self->{value} &&
348
$current_function->{abi} eq "svr4") {
350
$func .= " movq %rdi,8(%rsp)\n";
351
$func .= " movq %rsi,16(%rsp)\n";
352
$func .= " movq %rsp,%rax\n";
353
$func .= "${decor}SEH_begin_$current_function->{name}:\n";
354
my $narg = $current_function->{narg};
355
$narg=6 if (!defined($narg));
356
$func .= " movq %rcx,%rdi\n" if ($narg>0);
357
$func .= " movq %rdx,%rsi\n" if ($narg>1);
358
$func .= " movq %r8,%rdx\n" if ($narg>2);
359
$func .= " movq %r9,%rcx\n" if ($narg>3);
360
$func .= " movq 40(%rsp),%r8\n" if ($narg>4);
361
$func .= " movq 48(%rsp),%r9\n" if ($narg>5);
364
} elsif ($self->{value} ne "$current_function->{name}") {
365
$self->{value} .= ":" if ($masm && $ret!~m/^\$/);
366
$self->{value} . ":";
367
} elsif ($win64 && $current_function->{abi} eq "svr4") {
368
my $func = "$current_function->{name}" .
369
($nasm ? ":" : "\tPROC $current_function->{scope}") .
371
$func .= " mov QWORD${PTR}[8+rsp],rdi\t;WIN64 prologue\n";
372
$func .= " mov QWORD${PTR}[16+rsp],rsi\n";
373
$func .= " mov rax,rsp\n";
374
$func .= "${decor}SEH_begin_$current_function->{name}:";
375
$func .= ":" if ($masm);
286
377
my $narg = $current_function->{narg};
287
378
$narg=6 if (!defined($narg));
288
379
$func .= " mov rdi,rcx\n" if ($narg>0);
289
380
$func .= " mov rsi,rdx\n" if ($narg>1);
290
381
$func .= " mov rdx,r8\n" if ($narg>2);
291
382
$func .= " mov rcx,r9\n" if ($narg>3);
292
$func .= " mov r8,QWORD PTR 40[rsp]\n" if ($narg>4);
293
$func .= " mov r9,QWORD PTR 48[rsp]\n" if ($narg>5);
383
$func .= " mov r8,QWORD${PTR}[40+rsp]\n" if ($narg>4);
384
$func .= " mov r9,QWORD${PTR}[48+rsp]\n" if ($narg>5);
296
"$current_function->{name} PROC";
387
"$current_function->{name}".
388
($nasm ? ":" : "\tPROC $current_function->{scope}");
334
432
"%r14"=>0x01358d4c, "%r15"=>0x013d8d4c );
336
434
if ($line =~ /^\s*(\.\w+)/) {
339
$line =~ s/\@abi\-omnipotent/\@function/;
340
$line =~ s/\@function.*/\@function/;
341
if ($line =~ /\.picmeup\s+(%r[\w]+)/i) {
342
$self->{value} = sprintf "\t.long\t0x%x,0x90000000",$opcode{$1};
343
} elsif ($line =~ /\.asciz\s+"(.*)"$/) {
344
$self->{value} = ".byte\t".join(",",unpack("C*",$1),0);
345
} elsif ($line =~ /\.extern/) {
346
$self->{value} = ""; # swallow extern
348
$self->{value} = $line;
356
437
undef $self->{value};
357
438
$line = substr($line,@+[0]); $line =~ s/^\s+//;
358
440
SWITCH: for ($dir) {
361
$v="$current_segment\tENDS\n" if ($current_segment);
362
$current_segment = "_$1\$";
363
$current_segment =~ tr/[a-z]/[A-Z]/;
364
$v.="$current_segment\tSEGMENT ";
365
$v.=$masm>=$masmref ? "ALIGN(64)" : "PAGE";
370
/\.extern/ && do { $self->{value} = "EXTRN\t".$line.":BYTE"; last; };
371
/\.globl/ && do { $self->{value} = "PUBLIC\t".$line; last; };
441
/\.picmeup/ && do { if ($line =~ /(%r[\w]+)/i) {
443
$line=sprintf "0x%x,0x90000000",$opcode{$1};
447
/\.global|\.globl|\.extern/
448
&& do { $globals{$line} = $prefix . $line;
449
$line = $globals{$line} if ($prefix);
372
452
/\.type/ && do { ($sym,$type,$narg) = split(',',$line);
373
453
if ($type eq "\@function") {
374
454
undef $current_function;
375
455
$current_function->{name} = $sym;
376
456
$current_function->{abi} = "svr4";
377
457
$current_function->{narg} = $narg;
458
$current_function->{scope} = defined($globals{$sym})?"PUBLIC":"PRIVATE";
378
459
} elsif ($type eq "\@abi-omnipotent") {
379
460
undef $current_function;
380
461
$current_function->{name} = $sym;
462
$current_function->{scope} = defined($globals{$sym})?"PUBLIC":"PRIVATE";
464
$line =~ s/\@abi\-omnipotent/\@function/;
465
$line =~ s/\@function.*/\@function/;
468
/\.asciz/ && do { if ($line =~ /^"(.*)"$/) {
470
$line = join(",",unpack("C*",$1),0);
474
/\.rva|\.long|\.quad/
475
&& do { $line =~ s/([_a-z][_a-z0-9]*)/$globals{$1} or $1/gei;
476
$line =~ s/\.L/$decor/g;
482
$self->{value} = $dir . "\t" . $line;
484
if ($dir =~ /\.extern/) {
485
$self->{value} = ""; # swallow extern
486
} elsif (!$elf && $dir =~ /\.type/) {
488
$self->{value} = ".def\t" . ($globals{$1} or $1) . ";\t" .
489
(defined($globals{$1})?".scl 2;":".scl 3;") .
490
"\t.type 32;\t.endef"
491
if ($win64 && $line =~ /([^,]+),\@function/);
492
} elsif (!$elf && $dir =~ /\.size/) {
494
if (defined($current_function)) {
495
$self->{value} .= "${decor}SEH_end_$current_function->{name}:"
496
if ($win64 && $current_function->{abi} eq "svr4");
497
undef $current_function;
499
} elsif (!$elf && $dir =~ /\.align/) {
500
$self->{value} = ".p2align\t" . (log($line)/log(2));
501
} elsif ($dir eq ".section") {
502
$current_segment=$line;
503
if (!$elf && $current_segment eq ".init") {
504
if ($flavour eq "macosx") { $self->{value} = ".mod_init_func"; }
505
elsif ($flavour eq "mingw64") { $self->{value} = ".section\t.ctors"; }
507
} elsif ($dir =~ /\.(text|data)/) {
508
$current_segment=".$1";
514
# non-gas case or nasm/masm
516
/\.text/ && do { my $v=undef;
518
$v="section .text code align=64\n";
520
$v="$current_segment\tENDS\n" if ($current_segment);
521
$current_segment = ".text\$";
522
$v.="$current_segment\tSEGMENT ";
523
$v.=$masm>=$masmref ? "ALIGN(64)" : "PAGE";
529
/\.data/ && do { my $v=undef;
531
$v="section .data data align=8\n";
533
$v="$current_segment\tENDS\n" if ($current_segment);
534
$current_segment = "_DATA";
535
$v.="$current_segment\tSEGMENT";
540
/\.section/ && do { my $v=undef;
541
$line =~ s/([^,]*).*/$1/;
542
$line = ".CRT\$XCU" if ($line eq ".init");
545
if ($line=~/\.([px])data/) {
547
$v.=$1 eq "p"? 4 : 8;
548
} elsif ($line=~/\.CRT\$/i) {
549
$v.=" rdata align=8";
552
$v="$current_segment\tENDS\n" if ($current_segment);
553
$v.="$line\tSEGMENT";
554
if ($line=~/\.([px])data/) {
556
$v.=" ALIGN(".($1 eq "p" ? 4 : 8).")" if ($masm>=$masmref);
557
} elsif ($line=~/\.CRT\$/i) {
558
$v.=" READONLY DWORD";
561
$current_segment = $line;
565
/\.extern/ && do { $self->{value} = "EXTERN\t".$line;
566
$self->{value} .= ":NEAR" if ($masm);
570
&& do { $self->{value} = $masm?"PUBLIC":"global";
571
$self->{value} .= "\t".$line;
384
574
/\.size/ && do { if (defined($current_function)) {
385
$self->{value}="$current_function->{name}\tENDP";
575
undef $self->{value};
576
if ($current_function->{abi} eq "svr4") {
577
$self->{value}="${decor}SEH_end_$current_function->{name}:";
578
$self->{value}.=":\n" if($masm);
580
$self->{value}.="$current_function->{name}\tENDP" if($masm);
386
581
undef $current_function;
390
585
/\.align/ && do { $self->{value} = "ALIGN\t".$line; last; };
391
/\.(byte|value|long|quad)/
392
&& do { my @arr = split(',',$line);
393
my $sz = substr($1,0,1);
586
/\.(value|long|rva|quad)/
587
&& do { my $sz = substr($1,0,1);
588
my @arr = split(/,\s*/,$line);
394
589
my $last = pop(@arr);
395
590
my $conv = sub { my $var=shift;
396
if ($var=~s/0x([0-9a-f]+)/0$1h/i) { $var; }
397
else { sprintf"0%Xh",$var; }
591
$var=~s/^(0b[0-1]+)/oct($1)/eig;
592
$var=~s/^0x([0-9a-f]+)/0$1h/ig if ($masm);
593
if ($sz eq "D" && ($current_segment=~/.[px]data/ || $dir eq ".rva"))
594
{ $var=~s/([_a-z\$\@][_a-z0-9\$\@]*)/$nasm?"$1 wrt ..imagebase":"imagerel $1"/egi; }
400
$sz =~ tr/bvlq/BWDQ/;
598
$sz =~ tr/bvlrq/BWDDQ/;
401
599
$self->{value} = "\tD$sz\t";
402
600
for (@arr) { $self->{value} .= &$conv($_).","; }
403
601
$self->{value} .= &$conv($last);
406
/\.picmeup/ && do { $self->{value} = sprintf"\tDD\t 0%Xh,090000000h",$opcode{$line};
409
/\.asciz/ && do { if ($line =~ /^"(.*)"$/) {
410
my @str=unpack("C*",$1);
413
$self->{value}.="DB\t"
604
/\.byte/ && do { my @str=split(/,\s*/,$line);
605
map(s/(0b[0-1]+)/oct($1)/eig,@str);
606
map(s/0x([0-9a-f]+)/0$1h/ig,@str) if ($masm);
608
$self->{value}.="DB\t"
414
609
.join(",",@str[0..15])."\n";
415
foreach (0..15) { shift @str; }
417
$self->{value}.="DB\t"
610
foreach (0..15) { shift @str; }
612
$self->{value}.="DB\t"
418
613
.join(",",@str) if (@str);
448
650
if ($label=label->re(\$line)) { print $label->out(); }
450
652
if (directive->re(\$line)) {
451
653
printf "%s",directive->out();
452
} elsif ($opcode=opcode->re(\$line)) { ARGUMENT: {
454
if ($src=register->re(\$line)) { opcode->size($src->size()); }
455
elsif ($src=const->re(\$line)) { }
456
elsif ($src=ea->re(\$line)) { }
457
elsif ($src=expr->re(\$line)) { }
654
} elsif ($opcode=opcode->re(\$line)) { ARGUMENT: while (1) {
657
if ($arg=register->re(\$line)) { opcode->size($arg->size()); }
658
elsif ($arg=const->re(\$line)) { }
659
elsif ($arg=ea->re(\$line)) { }
660
elsif ($arg=expr->re(\$line)) { }
661
else { last ARGUMENT; }
459
665
last ARGUMENT if ($line !~ /^,/);
461
$line = substr($line,1); $line =~ s/^\s+//;
463
if ($dst=register->re(\$line)) { opcode->size($dst->size()); }
464
elsif ($dst=const->re(\$line)) { }
465
elsif ($dst=ea->re(\$line)) { }
469
670
$sz=opcode->size();
473
printf "\t%s\t%s,%s", $opcode->out($dst->size()),
474
$src->out($sz),$dst->out($sz);
675
$insn = $opcode->out($#args>=1?$args[$#args]->size():$sz);
476
printf "\t%s\t%s,%s", $opcode->out(),
477
$dst->out($sz),$src->out($sz);
677
$insn = $opcode->out();
678
$insn .= $sz if (map($_->out() =~ /x?mm/,@args));
679
@args = reverse(@args);
680
undef $sz if ($nasm && $opcode->mnemonic() eq "lea");
479
} elsif (defined($src)) {
480
printf "\t%s\t%s",$opcode->out(),$src->out($sz);
682
printf "\t%s\t%s",$insn,join(",",map($_->out($sz),@args));
482
684
printf "\t%s",$opcode->out();
554
757
# movq 16(%rsp),%rsi
761
#################################################
762
# Win64 SEH, Structured Exception Handling.
764
# Unlike on Unix systems(*) lack of Win64 stack unwinding information
765
# has undesired side-effect at run-time: if an exception is raised in
766
# assembler subroutine such as those in question (basically we're
767
# referring to segmentation violations caused by malformed input
768
# parameters), the application is briskly terminated without invoking
769
# any exception handlers, most notably without generating memory dump
770
# or any user notification whatsoever. This poses a problem. It's
771
# possible to address it by registering custom language-specific
772
# handler that would restore processor context to the state at
773
# subroutine entry point and return "exception is not handled, keep
774
# unwinding" code. Writing such handler can be a challenge... But it's
775
# doable, though requires certain coding convention. Consider following
778
# .type function,@function
780
# movq %rsp,%rax # copy rsp to volatile register
781
# pushq %r15 # save non-volatile registers
785
# subq %rdi,%r11 # prepare [variable] stack frame
787
# movq %rax,0(%r11) # check for exceptions
788
# movq %r11,%rsp # allocate [variable] stack frame
789
# movq %rax,0(%rsp) # save original rsp value
792
# movq 0(%rsp),%rcx # pull original rsp value
793
# movq -24(%rcx),%rbp # restore non-volatile registers
794
# movq -16(%rcx),%rbx
796
# movq %rcx,%rsp # restore original rsp
798
# .size function,.-function
800
# The key is that up to magic_point copy of original rsp value remains
801
# in chosen volatile register and no non-volatile register, except for
802
# rsp, is modified. While past magic_point rsp remains constant till
803
# the very end of the function. In this case custom language-specific
804
# exception handler would look like this:
806
# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
807
# CONTEXT *context,DISPATCHER_CONTEXT *disp)
808
# { ULONG64 *rsp = (ULONG64 *)context->Rax;
809
# if (context->Rip >= magic_point)
810
# { rsp = ((ULONG64 **)context->Rsp)[0];
811
# context->Rbp = rsp[-3];
812
# context->Rbx = rsp[-2];
813
# context->R15 = rsp[-1];
815
# context->Rsp = (ULONG64)rsp;
816
# context->Rdi = rsp[1];
817
# context->Rsi = rsp[2];
819
# memcpy (disp->ContextRecord,context,sizeof(CONTEXT));
820
# RtlVirtualUnwind(UNW_FLAG_NHANDLER,disp->ImageBase,
821
# dips->ControlPc,disp->FunctionEntry,disp->ContextRecord,
822
# &disp->HandlerData,&disp->EstablisherFrame,NULL);
823
# return ExceptionContinueSearch;
826
# It's appropriate to implement this handler in assembler, directly in
827
# function's module. In order to do that one has to know members'
828
# offsets in CONTEXT and DISPATCHER_CONTEXT structures and some constant
829
# values. Here they are:
849
# sizeof(CONTEXT) 1232
850
# DISPATCHER_CONTEXT.ControlPc 0
851
# DISPATCHER_CONTEXT.ImageBase 8
852
# DISPATCHER_CONTEXT.FunctionEntry 16
853
# DISPATCHER_CONTEXT.EstablisherFrame 24
854
# DISPATCHER_CONTEXT.TargetIp 32
855
# DISPATCHER_CONTEXT.ContextRecord 40
856
# DISPATCHER_CONTEXT.LanguageHandler 48
857
# DISPATCHER_CONTEXT.HandlerData 56
858
# UNW_FLAG_NHANDLER 0
859
# ExceptionContinueSearch 1
861
# In order to tie the handler to the function one has to compose
862
# couple of structures: one for .xdata segment and one for .pdata.
864
# UNWIND_INFO structure for .xdata segment would be
866
# function_unwind_info:
870
# This structure designates exception handler for a function with
871
# zero-length prologue, no stack frame or frame register.
873
# To facilitate composing of .pdata structures, auto-generated "gear"
874
# prologue copies rsp value to rax and denotes next instruction with
875
# .LSEH_begin_{function_name} label. This essentially defines the SEH
876
# styling rule mentioned in the beginning. Position of this label is
877
# chosen in such manner that possible exceptions raised in the "gear"
878
# prologue would be accounted to caller and unwound from latter's frame.
879
# End of function is marked with respective .LSEH_end_{function_name}
880
# label. To summarize, .pdata segment would contain
882
# .rva .LSEH_begin_function
883
# .rva .LSEH_end_function
884
# .rva function_unwind_info
886
# Reference to functon_unwind_info from .xdata segment is the anchor.
887
# In case you wonder why references are 32-bit .rvas and not 64-bit
888
# .quads. References put into these two segments are required to be
889
# *relative* to the base address of the current binary module, a.k.a.
890
# image base. No Win64 module, be it .exe or .dll, can be larger than
891
# 2GB and thus such relative references can be and are accommodated in
894
# Having reviewed the example function code, one can argue that "movq
895
# %rsp,%rax" above is redundant. It is not! Keep in mind that on Unix
896
# rax would contain an undefined value. If this "offends" you, use
897
# another register and refrain from modifying rax till magic_point is
898
# reached, i.e. as if it was a non-volatile register. If more registers
899
# are required prior [variable] frame setup is completed, note that
900
# nobody says that you can have only one "magic point." You can
901
# "liberate" non-volatile registers by denoting last stack off-load
902
# instruction and reflecting it in finer grade unwind logic in handler.
903
# After all, isn't it why it's called *language-specific* handler...
905
# Attentive reader can notice that exceptions would be mishandled in
906
# auto-generated "gear" epilogue. Well, exception effectively can't
907
# occur there, because if memory area used by it was subject to
908
# segmentation violation, then it would be raised upon call to the
909
# function (and as already mentioned be accounted to caller, which is
910
# not a problem). If you're still not comfortable, then define tail
911
# "magic point" just prior ret instruction and have handler treat it...
913
# (*) Note that we're talking about run-time, not debug-time. Lack of
914
# unwind information makes debugging hard on both Windows and
915
# Unix. "Unlike" referes to the fact that on Unix signal handler
916
# will always be invoked, core dumped and appropriate exit code
917
# returned to parent (for user notification).