~ubuntu-branches/ubuntu/hoary/kdemultimedia/hoary

« back to all changes in this revision

Viewing changes to mpeglib/lib/util/render/dither/ditherer_mmx16.cpp

  • Committer: Bazaar Package Importer
  • Author(s): Martin Schulze
  • Date: 2003-01-22 15:00:51 UTC
  • Revision ID: james.westby@ubuntu.com-20030122150051-uihwkdoxf15mi1tn
Tags: upstream-2.2.2
ImportĀ upstreamĀ versionĀ 2.2.2

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
 
 
2
#include "ditherMMX.h"
 
3
 
 
4
 
 
5
                
 
6
#ifndef INTEL
 
7
// nothing
 
8
void  ditherBlock(unsigned char *lum, unsigned char *cr, unsigned char *cb,
 
9
                  unsigned char *out,
 
10
                  int cols, int rows, int screen_width) {
 
11
  printf("call to ditherBlock. this should never happen\n");
 
12
  printf("check mmx detection routine.\n");
 
13
  exit(0);
 
14
}
 
15
#else   
 
16
        
 
17
 
 
18
static long long MMX16_0 = 0L;
 
19
static unsigned long  MMX16_10w[]         = {0x00100010, 0x00100010};
 
20
static unsigned long  MMX16_80w[]         = {0x00800080, 0x00800080};
 
21
static unsigned long  MMX16_00FFw[]       = {0x00ff00ff, 0x00ff00ff};
 
22
static unsigned short MMX16_Ublucoeff[]   = {0x81, 0x81, 0x81, 0x81};
 
23
static unsigned short MMX16_Vredcoeff[]   = {0x66, 0x66, 0x66, 0x66};
 
24
static unsigned short MMX16_Ugrncoeff[]   = {0xffe8, 0xffe8, 0xffe8, 0xffe8};
 
25
static unsigned short MMX16_Vgrncoeff[]   = {0xffcd, 0xffcd, 0xffcd, 0xffcd};
 
26
static unsigned short MMX16_Ycoeff[]      = {0x4a, 0x4a, 0x4a, 0x4a};   
 
27
static unsigned short MMX16_redmask[]     = {0xf800, 0xf800, 0xf800, 0xf800}; 
 
28
static unsigned short MMX16_grnmask[]     = {0x7e0, 0x7e0, 0x7e0, 0x7e0}; 
 
29
 
 
30
void dummy_dithermmx16() {
 
31
  cout << "MMX16_0"<<MMX16_0<<endl;
 
32
  cout << "MMX16_10w:"<<MMX16_10w<<endl;
 
33
  cout << "MMX16_80w:"<<MMX16_80w<<endl;
 
34
  cout << "MMX16_Ublucoeff:"<<MMX16_Ublucoeff<<endl;
 
35
  cout << "MMX16_Vredcoeff:"<<MMX16_Vredcoeff<<endl;
 
36
  cout << "MMX16_Ugrncoeff:"<<MMX16_Ugrncoeff<<endl;
 
37
  cout << "MMX16_Vgrncoeff:"<<MMX16_Vgrncoeff<<endl;
 
38
  cout << "MMX16_Ycoeff:"<<MMX16_Ycoeff<<endl;
 
39
  cout << "MMX16_redmask:"<<MMX16_redmask<<endl;
 
40
  cout << "MMX16_grnmask:"<<MMX16_grnmask<<endl;
 
41
  cout << "MMX16_00FFw:"<<MMX16_00FFw<<endl;
 
42
}
 
43
 
 
44
 
 
45
void  ditherBlock(unsigned char *lum, 
 
46
                  unsigned char *cr, 
 
47
                  unsigned char *cb,
 
48
                  unsigned char *out,
 
49
                  int rows, 
 
50
                  int cols, 
 
51
                  int mod) {
 
52
 
 
53
    unsigned short *row1;
 
54
    unsigned short *row2;
 
55
    row1 = (unsigned short* )out;         // 16 bit target
 
56
 
 
57
    unsigned char* end = lum +cols*rows;    // Pointer to the end
 
58
    int x=cols;
 
59
    row2=row1+mod+cols;                   // start of second row 
 
60
    mod=2*cols+4*mod;                     // increment for row1 in byte
 
61
 
 
62
    // buffer for asm function
 
63
    int buf[6];
 
64
    buf[0]=(int)(lum+cols);   // lum2 pointer
 
65
    buf[1]=(int)end;     
 
66
    buf[2]=x;
 
67
    buf[3]=mod;     
 
68
    buf[4]=0; //tmp0;
 
69
    buf[5]=cols;
 
70
 
 
71
 
 
72
 
 
73
    __asm__ __volatile__(
 
74
         ".align 32\n"
 
75
         "1:\n"
 
76
         "movd           (%1),                   %%mm0\n"        // 4 Cb         0  0  0  0 u3 u2 u1 u0
 
77
         "pxor           %%mm7,                  %%mm7\n"
 
78
         "movd           (%0),                   %%mm1\n" // 4 Cr                0  0  0  0 v3 v2 v1 v0
 
79
         "punpcklbw      %%mm7,                  %%mm0\n" // 4 W cb   0 u3  0 u2  0 u1  0 u0
 
80
         "punpcklbw      %%mm7,                  %%mm1\n" // 4 W cr   0 v3  0 v2  0 v1  0 v0
 
81
         "psubw          MMX16_80w,                %%mm0\n"
 
82
         "psubw          MMX16_80w,                %%mm1\n"
 
83
         "movq           %%mm0,                  %%mm2\n"        // Cb                   0 u3  0 u2  0 u1  0 u0
 
84
         "movq           %%mm1,                  %%mm3\n" // Cr
 
85
         "pmullw         MMX16_Ugrncoeff,          %%mm2\n" // Cb2green 0 R3  0 R2  0 R1  0 R0
 
86
         "movq           (%2),                   %%mm6\n"        // L1      l7 L6 L5 L4 L3 L2 L1 L0
 
87
         "pmullw         MMX16_Ublucoeff,          %%mm0\n" // Cb2blue
 
88
         "pand           MMX16_00FFw,              %%mm6\n" // L1      00 L6 00 L4 00 L2 00 L0
 
89
         "pmullw         MMX16_Vgrncoeff,          %%mm3\n" // Cr2green
 
90
         "movq           (%2),                   %%mm7\n" // L2
 
91
         "pmullw         MMX16_Vredcoeff,          %%mm1\n" // Cr2red
 
92
         //                      "psubw          MMX16_10w,                %%mm6\n"
 
93
         "psrlw          $8,                     %%mm7\n"        // L2           00 L7 00 L5 00 L3 00 L1
 
94
         "pmullw         MMX16_Ycoeff,             %%mm6\n" // lum1
 
95
         //                      "psubw          MMX16_10w,                %%mm7\n" // L2
 
96
         "paddw          %%mm3,                  %%mm2\n" // Cb2green + Cr2green == green
 
97
         "pmullw         MMX16_Ycoeff,             %%mm7\n"  // lum2
 
98
 
 
99
         "movq           %%mm6,                  %%mm4\n"  // lum1
 
100
         "paddw          %%mm0,                  %%mm6\n"  // lum1 +blue 00 B6 00 B4 00 B2 00 B0
 
101
         "movq           %%mm4,                  %%mm5\n"  // lum1
 
102
         "paddw          %%mm1,                  %%mm4\n"  // lum1 +red  00 R6 00 R4 00 R2 00 R0
 
103
         "paddw          %%mm2,                  %%mm5\n"  // lum1 +green 00 G6 00 G4 00 G2 00 G0
 
104
         "psraw          $6,                     %%mm4\n"  // R1 0 .. 64
 
105
         "movq           %%mm7,                  %%mm3\n"  // lum2                       00 L7 00 L5 00 L3 00 L1
 
106
         "psraw          $6,                     %%mm5\n"  // G1  - .. +
 
107
         "paddw          %%mm0,                  %%mm7\n"  // Lum2 +blue 00 B7 00 B5 00 B3 00 B1
 
108
         "psraw          $6,                     %%mm6\n"  // B1         0 .. 64
 
109
         "packuswb       %%mm4,                  %%mm4\n"  // R1 R1
 
110
         "packuswb       %%mm5,                  %%mm5\n"  // G1 G1
 
111
         "packuswb       %%mm6,                  %%mm6\n"  // B1 B1
 
112
         "punpcklbw      %%mm4,                  %%mm4\n"
 
113
         "punpcklbw      %%mm5,                  %%mm5\n"
 
114
 
 
115
         "pand           MMX16_redmask,            %%mm4\n"
 
116
         "psllw          $3,                     %%mm5\n"  // GREEN       1
 
117
         "punpcklbw      %%mm6,                  %%mm6\n"
 
118
         "pand           MMX16_grnmask,            %%mm5\n"
 
119
         "pand           MMX16_redmask,            %%mm6\n"
 
120
         "por            %%mm5,                  %%mm4\n" //
 
121
         "psrlw          $11,                    %%mm6\n"                // BLUE        1
 
122
         "movq           %%mm3,                  %%mm5\n" // lum2
 
123
         "paddw          %%mm1,                  %%mm3\n"        // lum2 +red      00 R7 00 R5 00 R3 00 R1
 
124
         "paddw          %%mm2,                  %%mm5\n" // lum2 +green 00 G7 00 G5 00 G3 00 G1
 
125
         "psraw          $6,                     %%mm3\n" // R2
 
126
         "por            %%mm6,                  %%mm4\n" // MM4
 
127
         "psraw          $6,                     %%mm5\n" // G2
 
128
 
 
129
         "movl %2,16%5\n"               // store register in tmp0
 
130
         "movl %5,%2\n"                 // lum2->register
 
131
         "movq (%2),%%mm6\n"            // 0  0  0  0 L3 L2 L1 L0 (load lum2)
 
132
 
 
133
 
 
134
         //"movq           (%2, %5),               %%mm6\n" // L3 load lum2
 
135
         "psraw          $6,                     %%mm7\n"
 
136
         "packuswb       %%mm3,                  %%mm3\n"
 
137
         "packuswb       %%mm5,                  %%mm5\n"
 
138
         "packuswb       %%mm7,                  %%mm7\n"
 
139
         "pand                   MMX16_00FFw,              %%mm6\n"  // L3
 
140
         "punpcklbw      %%mm3,                  %%mm3\n"
 
141
         //                              "psubw          MMX16_10w,                        %%mm6\n"  // L3
 
142
         "punpcklbw      %%mm5,                  %%mm5\n"
 
143
         "pmullw         MMX16_Ycoeff,             %%mm6\n"  // lum3
 
144
         "punpcklbw      %%mm7,                  %%mm7\n"
 
145
         "psllw          $3,                             %%mm5\n"  // GREEN 2
 
146
         "pand                   MMX16_redmask,    %%mm7\n"
 
147
         "pand                   MMX16_redmask,    %%mm3\n"
 
148
         "psrlw          $11,                            %%mm7\n"  // BLUE  2
 
149
         "pand                   MMX16_grnmask,    %%mm5\n"
 
150
         "por                    %%mm7,                  %%mm3\n"
 
151
         
 
152
         "movq                   (%2),        %%mm7\n"  // L4 load lum2
 
153
         "movl 16%5,%2\n"               // tmp0->register
 
154
 
 
155
         "por                    %%mm5,                  %%mm3\n"     //
 
156
         "psrlw          $8,                             %%mm7\n"    // L4
 
157
         "movq                   %%mm4,                  %%mm5\n"
 
158
         //                              "psubw          MMX16_10w,                        %%mm7\n"                // L4
 
159
         "punpcklwd      %%mm3,                  %%mm4\n"
 
160
         "pmullw         MMX16_Ycoeff,             %%mm7\n"    // lum4
 
161
         "punpckhwd      %%mm3,                  %%mm5\n"
 
162
 
 
163
         "movq                   %%mm4,                  (%3)\n" // write row1
 
164
         "movq                   %%mm5,                  8(%3)\n" // write row1
 
165
 
 
166
         "movq                   %%mm6,                  %%mm4\n"        // Lum3
 
167
         "paddw          %%mm0,                  %%mm6\n"                // Lum3 +blue
 
168
 
 
169
         "movq                   %%mm4,                  %%mm5\n"                        // Lum3
 
170
         "paddw          %%mm1,                  %%mm4\n"       // Lum3 +red
 
171
         "paddw          %%mm2,                  %%mm5\n"                        // Lum3 +green
 
172
         "psraw          $6,                             %%mm4\n"
 
173
         "movq                   %%mm7,                  %%mm3\n"                        // Lum4
 
174
         "psraw          $6,                             %%mm5\n"
 
175
         "paddw          %%mm0,                  %%mm7\n"                   // Lum4 +blue
 
176
         "psraw          $6,                             %%mm6\n"                        // Lum3 +blue
 
177
         "movq                   %%mm3,                  %%mm0\n"  // Lum4
 
178
         "packuswb       %%mm4,                  %%mm4\n"
 
179
         "paddw          %%mm1,                  %%mm3\n"  // Lum4 +red
 
180
         "packuswb       %%mm5,                  %%mm5\n"
 
181
         "paddw          %%mm2,                  %%mm0\n"         // Lum4 +green
 
182
         "packuswb       %%mm6,                  %%mm6\n"
 
183
         "punpcklbw      %%mm4,                  %%mm4\n"
 
184
         "punpcklbw      %%mm5,                  %%mm5\n"
 
185
         "punpcklbw      %%mm6,                  %%mm6\n"
 
186
         "psllw          $3,                             %%mm5\n" // GREEN 3
 
187
         "pand                   MMX16_redmask,    %%mm4\n"
 
188
         "psraw          $6,                             %%mm3\n" // psr 6
 
189
         "psraw          $6,                             %%mm0\n"
 
190
         "pand                   MMX16_redmask,    %%mm6\n" // BLUE
 
191
         "pand                   MMX16_grnmask,    %%mm5\n"
 
192
         "psrlw          $11,                            %%mm6\n"  // BLUE  3
 
193
         "por                    %%mm5,                  %%mm4\n"
 
194
         "psraw          $6,                             %%mm7\n"
 
195
         "por                    %%mm6,                  %%mm4\n"
 
196
         "packuswb       %%mm3,                  %%mm3\n"
 
197
         "packuswb       %%mm0,                  %%mm0\n"
 
198
         "packuswb       %%mm7,                  %%mm7\n"
 
199
         "punpcklbw      %%mm3,                  %%mm3\n"
 
200
         "punpcklbw      %%mm0,                  %%mm0\n"
 
201
         "punpcklbw      %%mm7,                  %%mm7\n"
 
202
         "pand                   MMX16_redmask,    %%mm3\n"
 
203
         "pand                   MMX16_redmask,    %%mm7\n" // BLUE
 
204
         "psllw          $3,                             %%mm0\n" // GREEN 4
 
205
         "psrlw          $11,                            %%mm7\n"
 
206
         "pand                   MMX16_grnmask,    %%mm0\n"
 
207
         "por                    %%mm7,                  %%mm3\n"
 
208
         "por                    %%mm0,                  %%mm3\n"
 
209
 
 
210
         "movq                   %%mm4,                  %%mm5\n"
 
211
 
 
212
         "punpcklwd      %%mm3,                  %%mm4\n"
 
213
         "punpckhwd      %%mm3,                  %%mm5\n"
 
214
 
 
215
         "movq                   %%mm4,                  (%4)\n"
 
216
         "movq                   %%mm5,                  8(%4)\n"
 
217
 
 
218
         "subl      $8, 8%5\n" // x-=8
 
219
         "addl      $8, %5\n"            // lum2+8
 
220
         "addl      $8, %2\n"
 
221
         "addl      $4, %0\n"
 
222
         "addl      $4, %1\n"
 
223
         "cmpl      $0, 8%5\n"
 
224
         "leal  16(%3), %3\n"
 
225
         "leal  16(%4), %4\n"        // row2+16
 
226
 
 
227
 
 
228
         "jne            1b\n"
 
229
         "addl           20%5,   %2\n" // lum += cols 
 
230
 
 
231
         "movl %2,16%5\n"              // store register in tmp0
 
232
         "movl 20%5,%2\n"              // cols->register
 
233
 
 
234
         "addl           %2,     %5\n" // lum2 += cols 
 
235
         "addl           12%5,   %3\n" // row1+= mod
 
236
         "addl           12%5,   %4\n" // row2+= mod
 
237
         "movl           %2,     8%5\n" // x=cols
 
238
         "movl 16%5,%2\n"              // store tmp0 in register
 
239
 
 
240
         "cmpl           4%5,    %2\n"
 
241
         "jl             1b\n"
 
242
 
 
243
         :
 
244
         :"r" (cr), "r"(cb),"r"(lum),
 
245
         "r"(row1),"r"(row2),"m"(buf[0])
 
246
 
 
247
         );
 
248
      __asm__ (
 
249
         "emms\n"
 
250
         );
 
251
 
 
252
   }
 
253
 
 
254
#endif