2
stores heavily used copy functions (makes mmx support easier)
3
Copyright (C) 2000 Martin Vogt
5
This program is free software; you can redistribute it and/or modify
6
it under the terms of the GNU Library General Public License as published by
7
the Free Software Foundation.
9
For more information look at the file COPYRIGHT in this package
14
#include "copyFunctions.h"
18
* We use a lookup table to make sure values stay in the 0..255 range.
19
* Since this is cropping (ie, x = (x < 0)?0:(x>255)?255:x; ), wee call this
20
* table the "crop table".
21
* MAX_NEG_CROP is the maximum neg/pos value we can handle.
24
* We use a lookup table to make sure values stay in the 0..255 range.
25
* Since this is cropping (ie, x = (x < 0)?0:(x>255)?255:x; ), wee call this
26
* table the "crop table".
27
* MAX_NEG_CROP is the maximum neg/pos value we can handle.
30
// Compiler cannot allocate too big arrays.
35
CopyFunctions::CopyFunctions() {
36
/* Initialize crop table. */
37
cropTbl=new unsigned char[NUM_CROP_ENTRIES];
41
for (i = (-MAX_NEG_CROP); i < NUM_CROP_ENTRIES - MAX_NEG_CROP; i++) {
43
cropTbl[i + MAX_NEG_CROP] = 0;
44
} else if (i >= 255) {
45
cropTbl[i + MAX_NEG_CROP] = 255;
47
cropTbl[i + MAX_NEG_CROP] = i;
50
cm=cropTbl + MAX_NEG_CROP;
52
copyFunctions_asm = new CopyFunctions_MMX();
53
lmmx=copyFunctions_asm->support();
60
CopyFunctions::~CopyFunctions() {
64
void CopyFunctions::startNOFloatSection() {
66
copyFunctions_asm->startNOFloatSection();
70
void CopyFunctions::endNOFloatSection() {
71
copyFunctions_asm->endNOFloatSection();
76
void CopyFunctions::copy8_byte(unsigned char* source1,
77
unsigned char* dest,int inc) {
81
for (rr = 0; rr < 8; rr++) {
82
memcpy(dest,source1,sizeof(char)*8);
88
copyFunctions_asm->copy8_byte(source1,dest,inc);
94
void CopyFunctions::copy8_word(unsigned short* source1,
95
unsigned short* dest,int inc) {
98
// Optimisation is slower, leave it in C
99
for (rr = 0; rr < 8; rr++) {
100
memcpy(dest,source1,sizeof(short)*8);
109
void CopyFunctions::copy8_src1linear_crop(short* source1,
110
unsigned char* dest,int inc) {
115
for (rr = 0; rr < 8; rr++) {
117
dest[0] = cm[source1[0]];
118
dest[1] = cm[source1[1]];
119
dest[2] = cm[source1[2]];
120
dest[3] = cm[source1[3]];
121
dest[4] = cm[source1[4]];
122
dest[5] = cm[source1[5]];
123
dest[6] = cm[source1[6]];
124
dest[7] = cm[source1[7]];
132
copyFunctions_asm->copy8_src1linear_crop(source1,dest,inc);
137
void CopyFunctions::copy8_div2_nocrop(unsigned char* source1,
138
unsigned char* source2,
139
unsigned char* dest,int inc) {
142
for (rr = 0; rr < 8; rr++) {
144
dest[0] = (int) (source1[0] + source2[0]+1) >> 1;
145
dest[1] = (int) (source1[1] + source2[1]+1) >> 1;
146
dest[2] = (int) (source1[2] + source2[2]+1) >> 1;
147
dest[3] = (int) (source1[3] + source2[3]+1) >> 1;
148
dest[4] = (int) (source1[4] + source2[4]+1) >> 1;
149
dest[5] = (int) (source1[5] + source2[5]+1) >> 1;
150
dest[6] = (int) (source1[6] + source2[6]+1) >> 1;
151
dest[7] = (int) (source1[7] + source2[7]+1) >> 1;
157
copyFunctions_asm->copy8_div2_nocrop(source1,source2, dest, inc);
162
void CopyFunctions::copy8_div2_destlinear_nocrop(unsigned char* source1,
163
unsigned char* source2,
164
unsigned char* dest,int inc) {
168
for (rr = 0; rr < 8; rr++) {
169
dest[0] = (int) (source1[0] + source2[0]) >> 1;
170
dest[1] = (int) (source1[1] + source2[1]) >> 1;
171
dest[2] = (int) (source1[2] + source2[2]) >> 1;
172
dest[3] = (int) (source1[3] + source2[3]) >> 1;
173
dest[4] = (int) (source1[4] + source2[4]) >> 1;
174
dest[5] = (int) (source1[5] + source2[5]) >> 1;
175
dest[6] = (int) (source1[6] + source2[6]) >> 1;
176
dest[7] = (int) (source1[7] + source2[7]) >> 1;
182
copyFunctions_asm->copy8_div2_destlinear_nocrop(source1,source2,dest,inc);
187
void CopyFunctions::copy16_div2_destlinear_nocrop(unsigned char* source1,
188
unsigned char* source2,
189
unsigned char* dest,int inc){
193
for (rr = 0; rr < 16; rr++) {
194
dest[0] = (int) (source1[0] + source2[0]) >> 1;
195
dest[1] = (int) (source1[1] + source2[1]) >> 1;
196
dest[2] = (int) (source1[2] + source2[2]) >> 1;
197
dest[3] = (int) (source1[3] + source2[3]) >> 1;
198
dest[4] = (int) (source1[4] + source2[4]) >> 1;
199
dest[5] = (int) (source1[5] + source2[5]) >> 1;
200
dest[6] = (int) (source1[6] + source2[6]) >> 1;
201
dest[7] = (int) (source1[7] + source2[7]) >> 1;
202
dest[8] = (int) (source1[8] + source2[8]) >> 1;
203
dest[9] = (int) (source1[9] + source2[9]) >> 1;
204
dest[10] = (int) (source1[10] + source2[10]) >> 1;
205
dest[11] = (int) (source1[11] + source2[11]) >> 1;
206
dest[12] = (int) (source1[12] + source2[12]) >> 1;
207
dest[13] = (int) (source1[13] + source2[13]) >> 1;
208
dest[14] = (int) (source1[14] + source2[14]) >> 1;
209
dest[15] = (int) (source1[15] + source2[15]) >> 1;
215
copyFunctions_asm->copy16_div2_destlinear_nocrop(source1,source2,dest,inc);
222
void CopyFunctions::copy8_div4_nocrop(unsigned char* source1,
223
unsigned char* source2,
224
unsigned char* source3,
225
unsigned char* source4,
226
unsigned char* dest,int inc) {
229
for (rr = 0; rr < 8; rr++) {
230
dest[0]=(int) (source1[0]+source2[0]+source3[0]+source4[0] + 2) >> 2;
231
dest[1]=(int) (source1[1]+source2[1]+source3[1]+source4[1] + 2) >> 2;
232
dest[2]=(int) (source1[2]+source2[2]+source3[2]+source4[2] + 2) >> 2;
233
dest[3]=(int) (source1[3]+source2[3]+source3[3]+source4[3] + 2) >> 2;
234
dest[4]=(int) (source1[4]+source2[4]+source3[4]+source4[4] + 2) >> 2;
235
dest[5]=(int) (source1[5]+source2[5]+source3[5]+source4[5] + 2) >> 2;
236
dest[6]=(int) (source1[6]+source2[6]+source3[6]+source4[6] + 2) >> 2;
237
dest[7]=(int) (source1[7]+source2[7]+source3[7]+source4[7] + 2) >> 2;
247
// should be mmx perfomance analysis shows: 8 % overall time
249
void CopyFunctions::copy8_src2linear_crop(unsigned char* source1,
251
unsigned char* dest,int inc) {
254
for (rr = 0; rr < 8; rr++) {
255
dest[0] = cm[(int) source1[0] + (int) source2[0]];
256
dest[1] = cm[(int) source1[1] + (int) source2[1]];
257
dest[2] = cm[(int) source1[2] + (int) source2[2]];
258
dest[3] = cm[(int) source1[3] + (int) source2[3]];
259
dest[4] = cm[(int) source1[4] + (int) source2[4]];
260
dest[5] = cm[(int) source1[5] + (int) source2[5]];
261
dest[6] = cm[(int) source1[6] + (int) source2[6]];
262
dest[7] = cm[(int) source1[7] + (int) source2[7]];
268
copyFunctions_asm->copy8_src2linear_crop(source1,source2,dest,inc);
274
// should be mmx perfomance analysis shows: 13 % overall time
275
void CopyFunctions::copy8_div2_src3linear_crop(unsigned char* source1,
276
unsigned char* source2,
278
unsigned char* dest,int inc) {
281
for (rr = 0; rr < 8; rr++) {
282
dest[0] = cm[((int) (source1[0] + source2[0]+1) >> 1) + source3[0]];
283
dest[1] = cm[((int) (source1[1] + source2[1]+1) >> 1) + source3[1]];
284
dest[2] = cm[((int) (source1[2] + source2[2]+1) >> 1) + source3[2]];
285
dest[3] = cm[((int) (source1[3] + source2[3]+1) >> 1) + source3[3]];
286
dest[4] = cm[((int) (source1[4] + source2[4]+1) >> 1) + source3[4]];
287
dest[5] = cm[((int) (source1[5] + source2[5]+1) >> 1) + source3[5]];
288
dest[6] = cm[((int) (source1[6] + source2[6]+1) >> 1) + source3[6]];
289
dest[7] = cm[((int) (source1[7] + source2[7]+1) >> 1) + source3[7]];
297
copyFunctions_asm->copy8_div2_src3linear_crop(source1,source2,source3,
305
void CopyFunctions::copy8_div4_src5linear_crop(unsigned char* source1,
306
unsigned char* source2,
307
unsigned char* source3,
308
unsigned char* source4,
310
unsigned char* dest,int inc) {
314
for (rr = 0; rr < 8; rr++) {
315
dest[0]=cm[((int) (source1[0]+source2[0]+source3[0]+source4[0]+2) >> 2) + source5[0]];
316
dest[1]=cm[((int) (source1[1]+source2[1]+source3[1]+source4[1]+2) >> 2) + source5[1]];
317
dest[2]=cm[((int) (source1[2]+source2[2]+source3[2]+source4[2]+2) >> 2) + source5[2]];
318
dest[3]=cm[((int) (source1[3]+source2[3]+source3[3]+source4[3]+2) >> 2) + source5[3]];
319
dest[4]=cm[((int) (source1[4]+source2[4]+source3[4]+source4[4]+2) >> 2) + source5[4]];
320
dest[5]=cm[((int) (source1[5]+source2[5]+source3[5]+source4[5]+2) >> 2) + source5[5]];
321
dest[6]=cm[((int) (source1[6]+source2[6]+source3[6]+source4[6]+2) >> 2) + source5[6]];
322
dest[7]=cm[((int) (source1[7]+source2[7]+source3[7]+source4[7]+2) >> 2) + source5[7]];