2
Copyright (C) 2011 Free Software Foundation, Inc.
3
Contributed by Intel Corporation.
4
This file is part of the GNU C Library.
6
The GNU C Library is free software; you can redistribute it and/or
7
modify it under the terms of the GNU Lesser General Public
8
License as published by the Free Software Foundation; either
9
version 2.1 of the License, or (at your option) any later version.
11
The GNU C Library is distributed in the hope that it will be useful,
12
but WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
Lesser General Public License for more details.
16
You should have received a copy of the GNU Lesser General Public
17
License along with the GNU C Library; if not, write to the Free
18
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25
# define CFI_PUSH(REG) \
26
cfi_adjust_cfa_offset (4); \
27
cfi_rel_offset (REG, 0)
29
# define CFI_POP(REG) \
30
cfi_adjust_cfa_offset (-4); \
33
# define PUSH(REG) pushl REG; CFI_PUSH (REG)
34
# define POP(REG) popl REG; CFI_POP (REG)
36
# define ENTRANCE PUSH(%esi); PUSH(%edi)
37
# define RETURN POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi);
42
/* Note: wcscmp uses signed comparison, not unsugned as in strcmp function. */
47
* This implementation uses SSE to compare up to 16 bytes at a time.
82
pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
85
and $63, %eax /* esi alignment in cache line */
86
and $63, %edx /* edi alignment in cache line */
132
movdqu 16(%edi), %xmm1
133
movdqu 16(%esi), %xmm2
134
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
135
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
136
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
138
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
139
jnz L(less4_double_words_16)
141
movdqu 32(%edi), %xmm1
142
movdqu 32(%esi), %xmm2
143
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
144
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
145
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
147
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
148
jnz L(less4_double_words_32)
150
movdqu 48(%edi), %xmm1
151
movdqu 48(%esi), %xmm2
152
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
153
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
154
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
156
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
157
jnz L(less4_double_words_48)
161
jmp L(continue_48_48)
199
movdqu 16(%edi), %xmm1
200
movdqu 16(%esi), %xmm2
201
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
202
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
203
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
205
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
206
jnz L(less4_double_words_16)
208
movdqu 32(%edi), %xmm1
209
movdqu 32(%esi), %xmm2
210
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
211
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
212
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
214
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
215
jnz L(less4_double_words_32)
258
pcmpeqd (%edi), %xmm0
262
jnz L(less4_double_words1)
279
movdqu 16(%esi), %xmm2
280
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
281
pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
282
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
284
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
285
jnz L(less4_double_words_16)
287
movdqu 32(%esi), %xmm2
288
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
289
pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
290
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
292
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
293
jnz L(less4_double_words_32)
295
movdqu 48(%esi), %xmm2
296
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
297
pcmpeqd 48(%edi), %xmm2 /* compare first 4 double_words for equality */
298
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
300
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
301
jnz L(less4_double_words_48)
305
jmp L(continue_00_48)
368
movdqu 32(%edi), %xmm1
369
movdqu 32(%esi), %xmm2
370
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
371
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
372
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
374
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
375
jnz L(less4_double_words_32)
377
movdqu 48(%edi), %xmm1
378
movdqu 48(%esi), %xmm2
379
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
380
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
381
psubb %xmm0, %xmm1 /* packed sub of comparison results */
383
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
384
jnz L(less4_double_words_48)
388
jmp L(continue_32_48)
427
movdqu 16(%edi), %xmm1
428
movdqu 16(%esi), %xmm2
429
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
430
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
431
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
433
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
434
jnz L(less4_double_words_16)
460
movdqu 48(%edi), %xmm1
461
movdqu 48(%esi), %xmm2
462
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
463
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
464
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
466
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
467
jnz L(less4_double_words_48)
471
jmp L(continue_16_48)
476
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
477
pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
478
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
480
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
481
jnz L(less4_double_words)
483
movdqa 16(%edi), %xmm3
484
pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
485
pcmpeqd 16(%esi), %xmm3 /* compare first 4 double_words for equality */
486
psubb %xmm0, %xmm3 /* packed sub of comparison results*/
488
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
489
jnz L(less4_double_words_16)
491
movdqa 32(%edi), %xmm5
492
pcmpeqd %xmm5, %xmm0 /* Any null double_word? */
493
pcmpeqd 32(%esi), %xmm5 /* compare first 4 double_words for equality */
494
psubb %xmm0, %xmm5 /* packed sub of comparison results*/
496
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
497
jnz L(less4_double_words_32)
499
movdqa 48(%edi), %xmm1
500
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
501
pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
502
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
504
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
505
jnz L(less4_double_words_48)
509
jmp L(continue_00_00)
514
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
515
pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
516
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
518
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
519
jnz L(less4_double_words)
523
jmp L(continue_00_48)
528
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
529
pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
530
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
532
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
533
jnz L(less4_double_words)
535
movdqu 16(%esi), %xmm2
536
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
537
pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
538
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
540
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
541
jnz L(less4_double_words_16)
545
jmp L(continue_00_48)
550
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
551
pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
552
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
554
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
555
jnz L(less4_double_words)
557
movdqu 16(%esi), %xmm2
558
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
559
pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
560
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
562
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
563
jnz L(less4_double_words_16)
565
movdqu 32(%esi), %xmm2
566
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
567
pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
568
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
570
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
571
jnz L(less4_double_words_32)
575
jmp L(continue_00_48)
579
pcmpeqd (%esi), %xmm0
583
jnz L(less4_double_words1)
600
movdqu 16(%edi), %xmm1
601
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
602
pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
603
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
605
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
606
jnz L(less4_double_words_16)
608
movdqu 32(%edi), %xmm1
609
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
610
pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
611
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
613
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
614
jnz L(less4_double_words_32)
616
movdqu 48(%edi), %xmm1
617
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
618
pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
619
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
621
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
622
jnz L(less4_double_words_48)
626
jmp L(continue_48_00)
631
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
632
pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
633
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
635
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
636
jnz L(less4_double_words)
640
jmp L(continue_48_00)
645
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
646
pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
647
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
649
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
650
jnz L(less4_double_words)
652
movdqu 16(%edi), %xmm1
653
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
654
pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
655
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
657
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
658
jnz L(less4_double_words_16)
662
jmp L(continue_48_00)
667
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
668
pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
669
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
671
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
672
jnz L(less4_double_words)
674
movdqu 16(%edi), %xmm1
675
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
676
pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
677
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
679
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
680
jnz L(less4_double_words_16)
682
movdqu 32(%edi), %xmm1
683
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
684
pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
685
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
687
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
688
jnz L(less4_double_words_32)
692
jmp L(continue_48_00)
698
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
699
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
700
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
702
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
703
jnz L(less4_double_words)
707
jmp L(continue_48_48)
713
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
714
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
715
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
717
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
718
jnz L(less4_double_words)
720
movdqu 16(%edi), %xmm3
721
movdqu 16(%esi), %xmm4
722
pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
723
pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
724
psubb %xmm0, %xmm3 /* packed sub of comparison results*/
726
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
727
jnz L(less4_double_words_16)
731
jmp L(continue_48_48)
737
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
738
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
739
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
741
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
742
jnz L(less4_double_words)
744
movdqu 16(%edi), %xmm3
745
movdqu 16(%esi), %xmm4
746
pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
747
pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
748
psubb %xmm0, %xmm3 /* packed sub of comparison results*/
750
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
751
jnz L(less4_double_words_16)
753
movdqu 32(%edi), %xmm1
754
movdqu 32(%esi), %xmm2
755
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
756
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
757
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
759
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
760
jnz L(less4_double_words_32)
764
jmp L(continue_48_48)
770
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
771
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
772
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
774
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
775
jnz L(less4_double_words)
777
movdqu 16(%edi), %xmm1
778
movdqu 16(%esi), %xmm2
779
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
780
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
781
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
783
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
784
jnz L(less4_double_words_16)
788
jmp L(continue_32_48)
794
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
795
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
796
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
798
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
799
jnz L(less4_double_words)
803
jmp L(continue_16_48)
809
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
810
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
811
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
813
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
814
jnz L(less4_double_words)
818
jmp L(continue_32_48)
821
L(less4_double_words1):
846
L(less4_double_words):
849
jz L(next_two_double_words)
851
jz L(second_double_word)
858
L(second_double_word):
865
L(next_two_double_words):
867
jz L(fourth_double_word)
874
L(fourth_double_word):
881
L(less4_double_words_16):
884
jz L(next_two_double_words_16)
886
jz L(second_double_word_16)
893
L(second_double_word_16):
900
L(next_two_double_words_16):
902
jz L(fourth_double_word_16)
909
L(fourth_double_word_16):
916
L(less4_double_words_32):
919
jz L(next_two_double_words_32)
921
jz L(second_double_word_32)
928
L(second_double_word_32):
935
L(next_two_double_words_32):
937
jz L(fourth_double_word_32)
944
L(fourth_double_word_32):
951
L(less4_double_words_48):
954
jz L(next_two_double_words_48)
956
jz L(second_double_word_48)
963
L(second_double_word_48):
970
L(next_two_double_words_48):
972
jz L(fourth_double_word_48)
979
L(fourth_double_word_48):