2
Copyright (c) 2004, John O'Harrow (john@almcrest.demon.co.uk)
4
This software is provided 'as-is', without any express or implied warranty.
5
In no event will the authors be held liable for any damages arising from the
8
Permission is granted to anyone to use this software for any purpose, including
9
commercial applications, and to alter it and redistribute it freely, subject to
10
the following restrictions:
12
1. The origin of this software must not be misrepresented; you must not claim
13
that you wrote the original software. If you use this software in a product,
14
an acknowledgment in the product documentation would be appreciated but is
17
2. Altered source versions must be plainly marked as such, and must not be
18
misrepresented as being the original software.
20
3. This notice may not be removed or altered from any source distribution.
22
-------------------------------------------------------------------------------
24
Version: 1.40 - 16-SEP-2004
29
{$ifndef FPC_SYSTEM_HAS_MOVE}
30
{$define FPC_SYSTEM_HAS_MOVE}
34
{-------------------------------------------------------------------------}
36
{Just to show that a good Pascal algorithm can beat the default BASM}
37
procedure MoveJOH_PAS_3(const Source; var Dest; Count : Integer);
43
S := Cardinal(@Source);
49
1 : PByte(@Dest)^ := PByte(S)^;
50
2 : PWord(@Dest)^ := PWord(S)^;
53
PByte(Integer(@Dest)+2)^ := PByte(S+2)^;
54
PWord(@Dest)^ := PWord(S)^;
58
PWord(@Dest)^ := PWord(S)^;
59
PByte(Integer(@Dest)+2)^ := PByte(S+2)^;
61
4 : PInteger(@Dest)^ := PInteger(S)^
62
else Exit; {Count <= 0}
70
L := PInteger(Integer(@Dest) + C);
85
Temp := PInteger(S + Cardinal(C))^;
86
I := Integer(@Dest) + C;
104
{-------------------------------------------------------------------------}
105
{Perform Forward Move of 0..36 Bytes}
106
{On Entry, ECX = Count, EAX = Source+Count, EDX = Dest+Count. Destroys ECX}
107
procedure SmallForwardMove_3;assembler;nostackframe;
109
jmp dword ptr @@FwdJumpTable[ecx*4]
112
dd @@Done {Removes need to test for zero size move}
113
dd @@Fwd01,@@Fwd02,@@Fwd03,@@Fwd04,@@Fwd05,@@Fwd06,@@Fwd07,@@Fwd08
114
dd @@Fwd09,@@Fwd10,@@Fwd11,@@Fwd12,@@Fwd13,@@Fwd14,@@Fwd15,@@Fwd16
115
dd @@Fwd17,@@Fwd18,@@Fwd19,@@Fwd20,@@Fwd21,@@Fwd22,@@Fwd23,@@Fwd24
116
dd @@Fwd25,@@Fwd26,@@Fwd27,@@Fwd28,@@Fwd29,@@Fwd30,@@Fwd31,@@Fwd32
117
dd @@Fwd33,@@Fwd34,@@Fwd35,@@Fwd36
174
movzx ecx, word ptr [eax-3]
176
movzx ecx, byte ptr [eax-1]
204
movzx ecx, word ptr [eax-2]
232
movzx ecx, byte ptr [eax-1]
235
end; {SmallForwardMove}
237
{-------------------------------------------------------------------------}
238
{Perform Backward Move of 0..36 Bytes}
239
{On Entry, ECX = Count, EAX = Source, EDX = Dest. Destroys ECX}
240
procedure SmallBackwardMove_3;assembler;nostackframe;
242
jmp dword ptr @@BwdJumpTable[ecx*4]
245
dd @@Done {Removes need to test for zero size move}
246
dd @@Bwd01,@@Bwd02,@@Bwd03,@@Bwd04,@@Bwd05,@@Bwd06,@@Bwd07,@@Bwd08
247
dd @@Bwd09,@@Bwd10,@@Bwd11,@@Bwd12,@@Bwd13,@@Bwd14,@@Bwd15,@@Bwd16
248
dd @@Bwd17,@@Bwd18,@@Bwd19,@@Bwd20,@@Bwd21,@@Bwd22,@@Bwd23,@@Bwd24
249
dd @@Bwd25,@@Bwd26,@@Bwd27,@@Bwd28,@@Bwd29,@@Bwd30,@@Bwd31,@@Bwd32
250
dd @@Bwd33,@@Bwd34,@@Bwd35,@@Bwd36
307
movzx ecx, word ptr [eax+1]
309
movzx ecx, byte ptr [eax]
337
movzx ecx, word ptr [eax]
365
movzx ecx, byte ptr[eax]
368
end; {SmallBackwardMove}
371
{ at least valgrind up to 3.3 has a bug which prevents the default code to
372
work so we use a rather simple implementation here
374
procedure Forwards_Valgrind;assembler;nostackframe;
385
{ at least valgrind up to 3.3 has a bug which prevents the default code to
386
work so we use a rather simple implementation here
388
procedure Backwards_Valgrind;assembler;nostackframe;
405
{-------------------------------------------------------------------------}
406
{Move ECX Bytes from EAX to EDX, where EAX > EDX and ECX > 36 (SMALLMOVESIZE)}
407
procedure Forwards_IA32_3;assembler;nostackframe;
412
add eax,ecx {QWORD Align Writes}
417
add edx,ecx {Now QWORD Aligned}
421
fild qword ptr [eax+ecx-16]
422
fistp qword ptr [edx+ecx-16]
423
fild qword ptr [eax+ecx-8]
424
fistp qword ptr [edx+ecx-8]
427
fistp qword ptr [ebx]
431
jmp SmallForwardMove_3
434
{-------------------------------------------------------------------------}
435
{Move ECX Bytes from EAX to EDX, where EAX < EDX and ECX > 36 (SMALLMOVESIZE)}
436
procedure Backwards_IA32_3;assembler;nostackframe;
439
fild qword ptr [eax+ecx-8]
440
lea ebx,[edx+ecx] {QWORD Align Writes}
443
add ebx,ecx {Now QWORD Aligned, EBX = Original Length}
446
fild qword ptr [eax+ecx]
447
fild qword ptr [eax+ecx+8]
448
fistp qword ptr [edx+ecx+8]
449
fistp qword ptr [edx+ecx]
452
fistp qword ptr [edx+ebx-8]
455
jmp SmallBackwardMove_3
456
end; {Backwards_IA32}
458
{-------------------------------------------------------------------------}
459
{Move ECX Bytes from EAX to EDX, where EAX > EDX and ECX > 36 (SMALLMOVESIZE)}
460
procedure Forwards_MMX_3;assembler;nostackframe;
466
cmp ecx,72 {Size at which using MMX becomes worthwhile}
470
movq mm0,[eax] {First 8 Characters}
482
movq mm1,[eax+ecx-32]
483
movq mm2,[eax+ecx-24]
484
movq mm3,[eax+ecx-16]
485
movq mm4,[eax+ecx- 8]
486
movq [edx+ecx-32],mm1
487
movq [edx+ecx-24],mm2
488
movq [edx+ecx-16],mm3
489
movq [edx+ecx- 8],mm4
492
movq [ebx],mm0 {First 8 Characters}
497
jmp SmallForwardMove_3
503
{16 byte Align Destination}
511
{Destination now 16 Byte Aligned}
512
call SmallForwardMove_3
516
sub ebx,ecx {EBX = Remainder}
519
mov esi,eax {ESI = Source}
520
mov edi,edx {EDI = Dest}
521
mov eax,ecx {EAX = Count}
522
and eax,-64 {EAX = No of Bytes to Blocks Moves}
523
and ecx,$3F {ECX = Remaining Bytes to Move (0..63)}
526
shr eax,3 {EAX = No of QWORD's to Block Move}
529
movq mm0,[esi+eax*8 ]
530
movq mm1,[esi+eax*8+ 8]
531
movq mm2,[esi+eax*8+16]
532
movq mm3,[esi+eax*8+24]
533
movq mm4,[esi+eax*8+32]
534
movq mm5,[esi+eax*8+40]
535
movq mm6,[esi+eax*8+48]
536
movq mm7,[esi+eax*8+56]
537
movq [edi+eax*8 ],mm0
538
movq [edi+eax*8+ 8],mm1
539
movq [edi+eax*8+16],mm2
540
movq [edi+eax*8+24],mm3
541
movq [edi+eax*8+32],mm4
542
movq [edi+eax*8+40],mm5
543
movq [edi+eax*8+48],mm6
544
movq [edi+eax*8+56],mm7
547
emms {Empty MMX State}
559
{-------------------------------------------------------------------------}
560
{Move ECX Bytes from EAX to EDX, where EAX < EDX and ECX > 36 (SMALLMOVESIZE)}
561
procedure Backwards_MMX_3;assembler;nostackframe;
563
cmp ecx,72 {Size at which using MMX becomes worthwhile}
566
movq mm0,[eax+ecx-8] {Get Last QWORD}
576
movq mm2,[eax+ecx+ 8]
577
movq mm3,[eax+ecx+16]
578
movq mm4,[eax+ecx+24]
579
movq [edx+ecx+24],mm4
580
movq [edx+ecx+16],mm3
581
movq [edx+ecx+ 8],mm2
585
movq [edx+ebx-8], mm0 {Last QWORD}
589
jmp SmallBackwardMove_3
592
{-------------------------------------------------------------------------}
593
{Dest MUST be 16-Byes Aligned, Count MUST be multiple of 16 }
594
procedure AlignedFwdMoveSSE_3(const Source; var Dest; Count: Integer);assembler;nostackframe;
599
mov esi,eax {ESI = Source}
600
mov eax,ecx {EAX = Count}
601
and eax,-128 {EAX = No of Bytes to Block Move}
604
shr eax,3 {EAX = No of QWORD's to Block Move}
606
cmp eax, -(32*1024) {Count > 256K}
608
@Small: {Count<=256K}
609
test esi,15 {Check if Both Source/Dest Aligned}
611
@SmallAligned: {Both Source and Dest 16-Byte Aligned}
613
movaps xmm0,[esi+8*eax]
614
movaps xmm1,[esi+8*eax+16]
615
movaps xmm2,[esi+8*eax+32]
616
movaps xmm3,[esi+8*eax+48]
617
movaps [edx+8*eax],xmm0
618
movaps [edx+8*eax+16],xmm1
619
movaps [edx+8*eax+32],xmm2
620
movaps [edx+8*eax+48],xmm3
621
movaps xmm4,[esi+8*eax+64]
622
movaps xmm5,[esi+8*eax+80]
623
movaps xmm6,[esi+8*eax+96]
624
movaps xmm7,[esi+8*eax+112]
625
movaps [edx+8*eax+64],xmm4
626
movaps [edx+8*eax+80],xmm5
627
movaps [edx+8*eax+96],xmm6
628
movaps [edx+8*eax+112],xmm7
632
@SmallUnaligned: {Source Not 16-Byte Aligned}
634
movups xmm0,[esi+8*eax]
635
movups xmm1,[esi+8*eax+16]
636
movups xmm2,[esi+8*eax+32]
637
movups xmm3,[esi+8*eax+48]
638
movaps [edx+8*eax],xmm0
639
movaps [edx+8*eax+16],xmm1
640
movaps [edx+8*eax+32],xmm2
641
movaps [edx+8*eax+48],xmm3
642
movups xmm4,[esi+8*eax+64]
643
movups xmm5,[esi+8*eax+80]
644
movups xmm6,[esi+8*eax+96]
645
movups xmm7,[esi+8*eax+112]
646
movaps [edx+8*eax+64],xmm4
647
movaps [edx+8*eax+80],xmm5
648
movaps [edx+8*eax+96],xmm6
649
movaps [edx+8*eax+112],xmm7
651
js @SmallUnalignedLoop
654
test esi,15 {Check if Both Source/Dest Aligned}
656
@LargeAligned: {Both Source and Dest 16-Byte Aligned}
658
prefetchnta [esi+8*eax+Prefetch]
659
prefetchnta [esi+8*eax+Prefetch+64]
660
movaps xmm0,[esi+8*eax]
661
movaps xmm1,[esi+8*eax+16]
662
movaps xmm2,[esi+8*eax+32]
663
movaps xmm3,[esi+8*eax+48]
664
movntps [edx+8*eax],xmm0
665
movntps [edx+8*eax+16],xmm1
666
movntps [edx+8*eax+32],xmm2
667
movntps [edx+8*eax+48],xmm3
668
movaps xmm4,[esi+8*eax+64]
669
movaps xmm5,[esi+8*eax+80]
670
movaps xmm6,[esi+8*eax+96]
671
movaps xmm7,[esi+8*eax+112]
672
movntps [edx+8*eax+64],xmm4
673
movntps [edx+8*eax+80],xmm5
674
movntps [edx+8*eax+96],xmm6
675
movntps [edx+8*eax+112],xmm7
680
@LargeUnaligned: {Source Not 16-Byte Aligned}
682
prefetchnta [esi+8*eax+Prefetch]
683
prefetchnta [esi+8*eax+Prefetch+64]
684
movups xmm0,[esi+8*eax]
685
movups xmm1,[esi+8*eax+16]
686
movups xmm2,[esi+8*eax+32]
687
movups xmm3,[esi+8*eax+48]
688
movntps [edx+8*eax],xmm0
689
movntps [edx+8*eax+16],xmm1
690
movntps [edx+8*eax+32],xmm2
691
movntps [edx+8*eax+48],xmm3
692
movups xmm4,[esi+8*eax+64]
693
movups xmm5,[esi+8*eax+80]
694
movups xmm6,[esi+8*eax+96]
695
movups xmm7,[esi+8*eax+112]
696
movntps [edx+8*eax+64],xmm4
697
movntps [edx+8*eax+80],xmm5
698
movntps [edx+8*eax+96],xmm6
699
movntps [edx+8*eax+112],xmm7
701
js @LargeUnalignedLoop
704
and ecx,$7F {ECX = Remainder (0..112 - Multiple of 16)}
710
movups xmm0,[esi+ecx]
711
movaps [edx+ecx],xmm0
716
end; {AlignedFwdMoveSSE}
718
{-------------------------------------------------------------------------}
719
{Move ECX Bytes from EAX to EDX, where EAX > EDX and ECX > 36 (SMALLMOVESIZE)}
720
procedure Forwards_SSE_3;assembler;nostackframe;
726
cmp ecx,SMALLMOVESIZE+32
735
jmp SmallForwardMove_3
750
movups xmm1,[eax+ecx-32]
751
movups xmm2,[eax+ecx-16]
752
movaps [edx+ecx-32],xmm1
753
movaps [edx+ecx-16],xmm2
756
movups [ebx],xmm0 {First 16 Bytes}
760
jmp SmallForwardMove_3
766
{16 byte Align Destination}
774
{Destination now 16 Byte Aligned}
775
call SmallForwardMove_3
779
sub ebx,ecx {EBX = Remainder}
783
call AlignedFwdMoveSSE_3
792
jmp SmallForwardMove_3
795
{-------------------------------------------------------------------------}
796
{Move ECX Bytes from EAX to EDX, where EAX < EDX and ECX > 36 (SMALLMOVESIZE)}
797
procedure Backwards_SSE_3;assembler;nostackframe;
799
cmp ecx,SMALLMOVESIZE+32
802
movups xmm1,[eax+ecx]
803
movups xmm2,[eax+ecx+16]
804
movups [edx+ecx],xmm1
805
movups [edx+ecx+16],xmm2
806
jmp SmallBackwardMove_3
809
movups xmm0,[eax+ecx-16] {Last 16 Bytes}
818
movups xmm1,[eax+ecx]
819
movups xmm2,[eax+ecx+16]
820
movaps [edx+ecx],xmm1
821
movaps [edx+ecx+16],xmm2
824
movups [edx+ebx-16],xmm0 {Last 16 Bytes}
827
jmp SmallBackwardMove_3
831
fastmoveproc_forward : pointer = @Forwards_IA32_3;
832
fastmoveproc_backward : pointer = @Backwards_IA32_3;
834
{$ifndef INTERNALMOVEFILLCHAR}
835
procedure Move(const source;var dest;count:SizeInt);[public, alias: 'FPC_MOVE'];assembler;nostackframe;
837
cmp ecx,SMALLMOVESIZE
844
jmp SmallForwardMove_3
846
je @Done {For Compatibility with Delphi's move for Source = Dest}
848
jmp SmallBackwardMove_3
850
jng @Done {For Compatibility with Delphi's move for Count < 0}
853
je @Done {For Compatibility with Delphi's move for Source = Dest}
860
jmp dword ptr fastmoveproc_forward
862
jmp dword ptr fastmoveproc_backward {Source/Dest Overlap}
865
{$endif INTERNALMOVEFILLCHAR}
868
{$ifdef FPC_HAS_VALGRINDBOOL}
870
valgrind_used : boolean;external name '__fpc_valgrind';
871
{$endif FPC_HAS_VALGRINDBOOL}
873
procedure setup_fastmove;{$ifdef SYSTEMINLINE}inline;{$endif}
875
{$ifdef FPC_HAS_VALGRINDBOOL}
876
{ workaround valgrind bug }
877
if valgrind_used then
879
fastmoveproc_forward:=@Forwards_Valgrind;
880
fastmoveproc_backward:=@Backwards_Valgrind;
883
{$endif FPC_HAS_VALGRINDBOOL}
885
if has_sse_support then
887
fastmoveproc_forward:=@Forwards_SSE_3;
888
fastmoveproc_backward:=@Backwards_SSE_3;
890
else if has_mmx_support then
892
fastmoveproc_forward:=@Forwards_MMX_3;
893
fastmoveproc_backward:=@Backwards_MMX_3;
897
{$endif FPC_SYSTEM_HAS_MOVE}