~ubuntu-branches/ubuntu/trusty/dvd95/trusty

« back to all changes in this revision

Viewing changes to src/tcmemcpy.c

  • Committer: Bazaar Package Importer
  • Author(s): William Grant
  • Date: 2007-11-10 08:33:32 UTC
  • mfrom: (1.1.1 upstream)
  • Revision ID: james.westby@ubuntu.com-20071110083332-pry0uz9nx4u1s1xz
Tags: 1.3p0-0ubuntu1
* New upstream release.
* debian/control:
  - Move homepage to Homepage field.
  - Comply with DebianMaintainerField.
  - Build-Depend on libmpeg-2-4-dev.
* debian/rules: Check if Makefile exists before cleaning, rather than
  ignoring all errors.
* Fix up .desktop:
  - debian/rules: Use dpatch.
  - debian/control: Build-Depend on dpatch.
  - debian/patches/01_fix_desktop_file.dpatch: Convince dvd95.desktop to
    comply with .desktop file standards.
* Add debian/watch.

Show diffs side-by-side

added added

removed removed

Lines of Context:
15
15
 * Nonstandard instructions used:
16
16
 *     (CPUID.MMX)   MOVQ
17
17
 */
18
 
 
 
18
#if defined(__sun)
 
19
void *ac_memcpy_mmx(void *dest, const void *src, size_t bytes)
 
20
{
 
21
    asm("\
 
22
#PENTIUM_LINE_SIZE = 32         # PMMX/PII cache line size              \n\
 
23
#PENTIUM_CACHE_SIZE = 8192      # PMMX/PII total cache size             \n\
 
24
# Use only half because writes may touch the cache too (PII)            \n\
 
25
#PENTIUM_CACHE_BLOCK = (PENTIUM_CACHE_SIZE/2 - PENTIUM_LINE_SIZE)       \n\
 
26
                                                                        \n\
 
27
        push %%ebx              # Save PIC register                     \n\
 
28
        push %%edi              # Save destination for return value     \n\
 
29
        cld                     # MOVS* should ascend                   \n\
 
30
                                                                        \n\
 
31
        mov $64, %%ebx          # Constant                              \n\
 
32
                                                                        \n\
 
33
        cmp %%ebx, %%ecx                                                \n\
 
34
        jb mmx.memcpy_last      # Just use movs if <64 bytes            \n\
 
35
                                                                        \n\
 
36
        # First align destination address to a multiple of 8 bytes      \n\
 
37
        mov $8, %%eax           # EAX <- (8-dest) & 7                   \n\
 
38
        sub %%edi, %%eax                                                \n\
 
39
        and $0b111, %%eax       # ... which is the number of bytes to copy\n\
 
40
        lea 0f, %%edx           # Use a computed jump--faster than a loop\n\
 
41
        sub %%eax, %%edx                                                \n\
 
42
        jmp *%%edx              # Execute 0-7 MOVSB's                   \n\
 
43
        movsb                                                           \n\
 
44
        movsb                                                           \n\
 
45
        movsb                                                           \n\
 
46
        movsb                                                           \n\
 
47
        movsb                                                           \n\
 
48
        movsb                                                           \n\
 
49
        movsb                                                           \n\
 
50
0:      sub %%eax, %%ecx        # Update count                          \n\
 
51
                                                                        \n\
 
52
        # Now copy data in blocks                                       \n\
 
53
0:      mov %%ecx, %%edx        # EDX <- ECX >> 6 (cache lines to copy) \n\
 
54
        shr $6, %%edx                                                   \n\
 
55
        jz mmx.memcpy_last      # <64 bytes left?  Skip to end          \n\
 
56
        cmp 63, %%edx           # Solaris x86 fix                       \n\
 
57
        jb 1f                   # Limit size of block                   \n\
 
58
        mov 63, %%edx           # Solaris x86 fix                       \n\
 
59
1:      mov %%edx, %%eax        # EAX <- EDX << 6 (bytes to copy)       \n\
 
60
        shl $6, %%eax                                                   \n\
 
61
        sub %%eax, %%ecx        # Update remaining count                \n\
 
62
        add %%eax, %%esi        # Point to end of region to be block-copied\n\
 
63
2:      test %%eax, -32(%%esi)  # Touch each cache line in reverse order\n\
 
64
        test %%eax, -64(%%esi)                                          \n\
 
65
        sub %%ebx, %%esi        # Update pointer                        \n\
 
66
        sub %%ebx, %%eax        # And loop                              \n\
 
67
        jnz 2b                                                          \n\
 
68
        # Note that ESI now points to the beginning of the block        \n\
 
69
3:      movq   (%%esi), %%mm0   # Do the actual copy, 64 bytes at a time\n\
 
70
        movq  8(%%esi), %%mm1                                           \n\
 
71
        movq 16(%%esi), %%mm2                                           \n\
 
72
        movq 24(%%esi), %%mm3                                           \n\
 
73
        movq 32(%%esi), %%mm4                                           \n\
 
74
        movq 40(%%esi), %%mm5                                           \n\
 
75
        movq 48(%%esi), %%mm6                                           \n\
 
76
        movq 56(%%esi), %%mm7                                           \n\
 
77
        movq %%mm0,   (%%edi)                                           \n\
 
78
        movq %%mm1,  8(%%edi)                                           \n\
 
79
        movq %%mm2, 16(%%edi)                                           \n\
 
80
        movq %%mm3, 24(%%edi)                                           \n\
 
81
        movq %%mm4, 32(%%edi)                                           \n\
 
82
        movq %%mm5, 40(%%edi)                                           \n\
 
83
        movq %%mm6, 48(%%edi)                                           \n\
 
84
        movq %%mm7, 56(%%edi)                                           \n\
 
85
        add %%ebx, %%esi        # Update pointers                       \n\
 
86
        add %%ebx, %%edi                                                \n\
 
87
        dec %%edx               # And loop                              \n\
 
88
        jnz 3b                                                          \n\
 
89
        jmp 0b                                                          \n\
 
90
                                                                        \n\
 
91
mmx.memcpy_last:                                                        \n\
 
92
        # Copy last <64 bytes, using the computed jump trick            \n\
 
93
        mov %%ecx, %%eax        # EAX <- ECX>>2                         \n\
 
94
        shr $2, %%eax                                                   \n\
 
95
        lea 0f, %%edx                                                   \n\
 
96
        sub %%eax, %%edx                                                \n\
 
97
        jmp *%%edx              # Execute 0-15 MOVSD's                  \n\
 
98
        movsd                                                           \n\
 
99
        movsd                                                           \n\
 
100
        movsd                                                           \n\
 
101
        movsd                                                           \n\
 
102
        movsd                                                           \n\
 
103
        movsd                                                           \n\
 
104
        movsd                                                           \n\
 
105
        movsd                                                           \n\
 
106
        movsd                                                           \n\
 
107
        movsd                                                           \n\
 
108
        movsd                                                           \n\
 
109
        movsd                                                           \n\
 
110
        movsd                                                           \n\
 
111
        movsd                                                           \n\
 
112
        movsd                                                           \n\
 
113
0:      and $0b11, %%ecx        # ECX <- ECX & 3                        \n\
 
114
        lea 0f, %%edx                                                   \n\
 
115
        sub %%ecx, %%edx                                                \n\
 
116
        jmp *%%edx              # Execute 0-3 MOVSB's                   \n\
 
117
        movsb                                                           \n\
 
118
        movsb                                                           \n\
 
119
        movsb                                                           \n\
 
120
0:                                                                      \n\
 
121
        # All done!                                                     \n\
 
122
        emms                    # Clean up MMX state                    \n\
 
123
        pop %%edi               # Restore destination (return value)    \n\
 
124
        pop %%ebx               # Restore PIC register                  \n\
 
125
    " : /* no outputs */
 
126
      : "D" (dest), "S" (src), "c" (bytes)
 
127
      : "%eax", "%edx"
 
128
    );
 
129
    return dest;
 
130
}
 
131
#else
19
132
void *ac_memcpy_mmx(void *dest, const void *src, size_t bytes)
20
133
{
21
134
    asm("\
128
241
    );
129
242
    return dest;
130
243
}
131
 
 
 
244
#endif
132
245
#endif  /* ARCH_X86 */
133
246
 
134
247
/*************************************************************************/