27
27
#include <stddef.h>
29
static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, int src_size)
29
static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst,
32
const uint8_t *s = src;
33
const uint8_t *s = src;
34
const uint8_t *end = s + src_size;
38
/* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
38
/* RGB24 (= R, G, B) -> RGB32 (= A, B, G, R) */
53
static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
53
static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst,
56
const uint8_t *s = src;
57
const uint8_t *s = src;
58
const uint8_t *end = s + src_size;
63
/* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
62
/* RGB32 (= A, B, G, R) -> RGB24 (= R, G, B) */
79
original by Strepto/Astral
80
ported to gcc & bugfixed: A'rpi
81
MMX2, 3DNOW optimization by Nick Kurshev
82
32-bit C version, and and&add trick by Michael Niedermayer
78
* original by Strepto/Astral
79
* ported to gcc & bugfixed: A'rpi
80
* MMXEXT, 3DNOW optimization by Nick Kurshev
81
* 32-bit C version, and and&add trick by Michael Niedermayer
84
83
static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, int src_size)
86
register const uint8_t* s=src;
87
register uint8_t* d=dst;
88
register const uint8_t *end;
89
const uint8_t *mm_end;
85
register uint8_t *d = dst;
86
register const uint8_t *s = src;
87
register const uint8_t *end = s + src_size;
88
const uint8_t *mm_end = end - 3;
92
90
while (s < mm_end) {
93
register unsigned x= *((const uint32_t *)s);
94
*((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
91
register unsigned x = *((const uint32_t *)s);
92
*((uint32_t *)d) = (x & 0x7FFF7FFF) + (x & 0x7FE07FE0);
99
register unsigned short x= *((const uint16_t *)s);
100
*((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
97
register unsigned short x = *((const uint16_t *)s);
98
*((uint16_t *)d) = (x & 0x7FFF) + (x & 0x7FE0);
104
102
static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, int src_size)
106
register const uint8_t* s=src;
107
register uint8_t* d=dst;
108
register const uint8_t *end;
109
const uint8_t *mm_end;
104
register uint8_t *d = dst;
105
register const uint8_t *s = src;
106
register const uint8_t *end = s + src_size;
107
const uint8_t *mm_end = end - 3;
113
109
while (s < mm_end) {
114
register uint32_t x= *((const uint32_t*)s);
115
*((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
110
register uint32_t x = *((const uint32_t *)s);
111
*((uint32_t *)d) = ((x >> 1) & 0x7FE07FE0) | (x & 0x001F001F);
120
register uint16_t x= *((const uint16_t*)s);
121
*((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
116
register uint16_t x = *((const uint16_t *)s);
117
*((uint16_t *)d) = ((x >> 1) & 0x7FE0) | (x & 0x001F);
125
121
static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, int src_size)
127
const uint8_t *s = src;
129
uint16_t *d = (uint16_t *)dst;
123
uint16_t *d = (uint16_t *)dst;
124
const uint8_t *s = src;
125
const uint8_t *end = s + src_size;
132
127
while (s < end) {
133
register int rgb = *(const uint32_t*)s; s += 4;
134
*d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
128
register int rgb = *(const uint32_t *)s;
130
*d++ = ((rgb & 0xFF) >> 3) +
131
((rgb & 0xFC00) >> 5) +
132
((rgb & 0xF80000) >> 8);
138
static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst, int src_size)
136
static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst,
140
const uint8_t *s = src;
142
uint16_t *d = (uint16_t *)dst;
139
uint16_t *d = (uint16_t *)dst;
140
const uint8_t *s = src;
141
const uint8_t *end = s + src_size;
144
143
while (s < end) {
145
register int rgb = *(const uint32_t*)s; s += 4;
146
*d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
144
register int rgb = *(const uint32_t *)s;
146
*d++ = ((rgb & 0xF8) << 8) +
147
((rgb & 0xFC00) >> 5) +
148
((rgb & 0xF80000) >> 19);
150
152
static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, int src_size)
152
const uint8_t *s = src;
154
uint16_t *d = (uint16_t *)dst;
157
register int rgb = *(const uint32_t*)s; s += 4;
158
*d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
162
static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst, int src_size)
164
const uint8_t *s = src;
166
uint16_t *d = (uint16_t *)dst;
169
register int rgb = *(const uint32_t*)s; s += 4;
170
*d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
174
static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst, int src_size)
176
const uint8_t *s = src;
178
uint16_t *d = (uint16_t *)dst;
154
uint16_t *d = (uint16_t *)dst;
155
const uint8_t *s = src;
156
const uint8_t *end = s + src_size;
159
register int rgb = *(const uint32_t *)s;
161
*d++ = ((rgb & 0xFF) >> 3) +
162
((rgb & 0xF800) >> 6) +
163
((rgb & 0xF80000) >> 9);
167
static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst,
170
uint16_t *d = (uint16_t *)dst;
171
const uint8_t *s = src;
172
const uint8_t *end = s + src_size;
175
register int rgb = *(const uint32_t *)s;
177
*d++ = ((rgb & 0xF8) << 7) +
178
((rgb & 0xF800) >> 6) +
179
((rgb & 0xF80000) >> 19);
183
static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst,
186
uint16_t *d = (uint16_t *)dst;
187
const uint8_t *s = src;
188
const uint8_t *end = s + src_size;
180
190
while (s < end) {
181
191
const int b = *s++;
182
192
const int g = *s++;
183
193
const int r = *s++;
184
*d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
194
*d++ = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
188
198
static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, int src_size)
190
const uint8_t *s = src;
192
uint16_t *d = (uint16_t *)dst;
200
uint16_t *d = (uint16_t *)dst;
201
const uint8_t *s = src;
202
const uint8_t *end = s + src_size;
194
204
while (s < end) {
195
205
const int r = *s++;
196
206
const int g = *s++;
197
207
const int b = *s++;
198
*d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
208
*d++ = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
202
static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst, int src_size)
212
static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst,
204
const uint8_t *s = src;
206
uint16_t *d = (uint16_t *)dst;
215
uint16_t *d = (uint16_t *)dst;
216
const uint8_t *s = src;
217
const uint8_t *end = s + src_size;
208
219
while (s < end) {
209
220
const int b = *s++;
210
221
const int g = *s++;
211
222
const int r = *s++;
212
*d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
223
*d++ = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
216
227
static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size)
218
const uint8_t *s = src;
220
uint16_t *d = (uint16_t *)dst;
229
uint16_t *d = (uint16_t *)dst;
230
const uint8_t *s = src;
231
const uint8_t *end = s + src_size;
222
233
while (s < end) {
223
234
const int r = *s++;
224
235
const int g = *s++;
225
236
const int b = *s++;
226
*d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
237
*d++ = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
231
I use less accurate approximation here by simply left-shifting the input
232
value and filling the low order bits with zeroes. This method improves PNG
233
compression but this scheme cannot reproduce white exactly, since it does
234
not generate an all-ones maximum value; the net effect is to darken the
237
The better method should be "left bit replication":
247
| leftmost bits repeated to fill open bits
251
static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
242
* I use less accurate approximation here by simply left-shifting the input
243
* value and filling the low order bits with zeroes. This method improves PNG
244
* compression but this scheme cannot reproduce white exactly, since it does
245
* not generate an all-ones maximum value; the net effect is to darken the
248
* The better method should be "left bit replication":
258
* | leftmost bits repeated to fill open bits
262
static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst,
255
const uint16_t *s = (const uint16_t*)src;
256
end = s + src_size/2;
266
const uint16_t *s = (const uint16_t *)src;
267
const uint16_t *end = s + src_size / 2;
257
269
while (s < end) {
258
register uint16_t bgr;
260
*d++ = (bgr&0x1F)<<3;
261
*d++ = (bgr&0x3E0)>>2;
262
*d++ = (bgr&0x7C00)>>7;
270
register uint16_t bgr = *s++;
271
*d++ = (bgr & 0x1F) << 3;
272
*d++ = (bgr & 0x3E0) >> 2;
273
*d++ = (bgr & 0x7C00) >> 7;
266
static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
277
static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst,
269
uint8_t *d = (uint8_t *)dst;
270
const uint16_t *s = (const uint16_t *)src;
271
end = s + src_size/2;
280
uint8_t *d = (uint8_t *)dst;
281
const uint16_t *s = (const uint16_t *)src;
282
const uint16_t *end = s + src_size / 2;
272
284
while (s < end) {
273
register uint16_t bgr;
275
*d++ = (bgr&0x1F)<<3;
276
*d++ = (bgr&0x7E0)>>3;
277
*d++ = (bgr&0xF800)>>8;
285
register uint16_t bgr = *s++;
286
*d++ = (bgr & 0x1F) << 3;
287
*d++ = (bgr & 0x7E0) >> 3;
288
*d++ = (bgr & 0xF800) >> 8;
281
292
static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size)
285
const uint16_t *s = (const uint16_t *)src;
286
end = s + src_size/2;
295
const uint16_t *s = (const uint16_t *)src;
296
const uint16_t *end = s + src_size / 2;
287
298
while (s < end) {
288
register uint16_t bgr;
299
register uint16_t bgr = *s++;
290
300
#if HAVE_BIGENDIAN
292
*d++ = (bgr&0x7C00)>>7;
293
*d++ = (bgr&0x3E0)>>2;
294
*d++ = (bgr&0x1F)<<3;
302
*d++ = (bgr & 0x7C00) >> 7;
303
*d++ = (bgr & 0x3E0) >> 2;
304
*d++ = (bgr & 0x1F) << 3;
296
*d++ = (bgr&0x1F)<<3;
297
*d++ = (bgr&0x3E0)>>2;
298
*d++ = (bgr&0x7C00)>>7;
306
*d++ = (bgr & 0x1F) << 3;
307
*d++ = (bgr & 0x3E0) >> 2;
308
*d++ = (bgr & 0x7C00) >> 7;
541
548
static inline void planar2x_c(const uint8_t *src, uint8_t *dst, int srcWidth,
542
549
int srcHeight, int srcStride, int dstStride)
549
for (x=0; x<srcWidth-1; x++) {
550
dst[2*x+1]= (3*src[x] + src[x+1])>>2;
551
dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
556
for (x = 0; x < srcWidth - 1; x++) {
557
dst[2 * x + 1] = (3 * src[x] + src[x + 1]) >> 2;
558
dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
553
dst[2*srcWidth-1]= src[srcWidth-1];
557
for (y=1; y<srcHeight; y++) {
560
dst[2 * srcWidth - 1] = src[srcWidth - 1];
564
for (y = 1; y < srcHeight; y++) {
558
565
const int mmxSize = 1;
560
dst[0 ]= (3*src[0] + src[srcStride])>>2;
561
dst[dstStride]= ( src[0] + 3*src[srcStride])>>2;
567
dst[0] = (src[0] * 3 + src[srcStride]) >> 2;
568
dst[dstStride] = (src[0] + 3 * src[srcStride]) >> 2;
563
for (x=mmxSize-1; x<srcWidth-1; x++) {
564
dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2;
565
dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2;
566
dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2;
567
dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2;
570
for (x = mmxSize - 1; x < srcWidth - 1; x++) {
571
dst[2 * x + 1] = (src[x + 0] * 3 + src[x + srcStride + 1]) >> 2;
572
dst[2 * x + dstStride + 2] = (src[x + 0] + 3 * src[x + srcStride + 1]) >> 2;
573
dst[2 * x + dstStride + 1] = (src[x + 1] + 3 * src[x + srcStride]) >> 2;
574
dst[2 * x + 2] = (src[x + 1] * 3 + src[x + srcStride]) >> 2;
569
dst[srcWidth*2 -1 ]= (3*src[srcWidth-1] + src[srcWidth-1 + srcStride])>>2;
570
dst[srcWidth*2 -1 + dstStride]= ( src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
576
dst[srcWidth * 2 - 1] = (src[srcWidth - 1] * 3 + src[srcWidth - 1 + srcStride]) >> 2;
577
dst[srcWidth * 2 - 1 + dstStride] = (src[srcWidth - 1] + 3 * src[srcWidth - 1 + srcStride]) >> 2;
579
dst += dstStride * 2;
579
for (x=0; x<srcWidth-1; x++) {
580
dst[2*x+1]= (3*src[x] + src[x+1])>>2;
581
dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
586
for (x = 0; x < srcWidth - 1; x++) {
587
dst[2 * x + 1] = (src[x] * 3 + src[x + 1]) >> 2;
588
dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
583
dst[2*srcWidth-1]= src[srcWidth-1];
590
dst[2 * srcWidth - 1] = src[srcWidth - 1];