16
16
#include "nr-pixops.h"
17
17
#include "nr-matrix.h"
23
#endif /* __cplusplus */
25
int nr_have_mmx (void);
26
void nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (unsigned char *px, int w, int h, int rs,
27
const unsigned char *spx, int sw, int sh, int srs,
28
const long *FFd2s, unsigned int alpha);
29
void nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h, int rs,
30
const unsigned char *spx, int sw, int sh, int srs,
31
const long *FFd2s, const long *FF_S, unsigned int alpha, int dbits);
24
/ *int nr_have_mmx (void);
32
25
#define NR_PIXOPS_MMX (1 && nr_have_mmx ())
35
28
#endif //__cplusplus
38
32
/* fixme: Implement missing (Lauris) */
39
33
/* fixme: PREMUL colors before calculating average (Lauris) */
41
35
/* Fixed point precision */
37
#define FBITS_HP 18 // In some places we need a higher precision
44
39
void nr_R8G8B8A8_N_EMPTY_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, int h, int rs,
45
40
const unsigned char *spx, int sw, int sh, int srs,
72
67
if (alpha == 0) return;
69
g_warning("In transform PPN alpha=%u>255",alpha);
72
// The color component is stored temporarily with a range of [0,255^3], so more supersampling and we get an overflow (fortunately Inkscape's preferences also doesn't allow a higher setting)
76
80
size = xsize * ysize;
82
unsigned int rounding_fix = size/2;
79
84
/* Set up fixed point matrix */
80
FFs_x_x = (long) (d2s[0] * (1 << FBITS) + 0.5);
81
FFs_x_y = (long) (d2s[1] * (1 << FBITS) + 0.5);
82
FFs_y_x = (long) (d2s[2] * (1 << FBITS) + 0.5);
83
FFs_y_y = (long) (d2s[3] * (1 << FBITS) + 0.5);
84
FFs__x = (long) (d2s[4] * (1 << FBITS) + 0.5);
85
FFs__y = (long) (d2s[5] * (1 << FBITS) + 0.5);
85
FFs_x_x = (long) floor(d2s[0] * (1 << FBITS) + 0.5);
86
FFs_x_y = (long) floor(d2s[1] * (1 << FBITS) + 0.5);
87
FFs_y_x = (long) floor(d2s[2] * (1 << FBITS) + 0.5);
88
FFs_y_y = (long) floor(d2s[3] * (1 << FBITS) + 0.5);
89
FFs__x = (long) floor(d2s[4] * (1 << FBITS) + 0.5);
90
FFs__y = (long) floor(d2s[5] * (1 << FBITS) + 0.5);
87
92
FFs_x_x_S = FFs_x_x >> xd;
88
93
FFs_x_y_S = FFs_x_y >> xd;
119
124
sy = (FFsy + FF_sy_S[i]) >> FBITS;
120
125
if ((sy >= 0) && (sy < sh)) {
121
126
const unsigned char *s;
123
127
s = spx + sy * srs + sx * 4;
124
ca = NR_PREMUL_112 (s[3], alpha);
125
r += NR_PREMUL_121 (s[0], ca);
126
g += NR_PREMUL_121 (s[1], ca);
127
b += NR_PREMUL_121 (s[2], ca);
128
a += NR_NORMALIZE_21(ca);
128
r += NR_PREMUL_112 (s[0], s[3]); // s in [0,255]
129
g += NR_PREMUL_112 (s[1], s[3]);
130
b += NR_PREMUL_112 (s[2], s[3]);
133
// r,g,b in [0,sum(s3)*255]
137
a = (a*alpha + rounding_fix) >> dbits;
138
// a=sum(s3)*alpha/size=avg(s3)*alpha
139
// Compare to nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_P
138
/* Transparent BG, premul src */
141
r = (r*alpha + rounding_fix) >> dbits;
142
g = (g*alpha + rounding_fix) >> dbits;
143
b = (b*alpha + rounding_fix) >> dbits;
144
// r,g,b in [0,avg(s3)*alpha*255]=[0,a*255]
146
/* Full coverage, demul src */
147
d[0] = NR_NORMALIZE_31(r);
148
d[1] = NR_NORMALIZE_31(g);
149
d[2] = NR_NORMALIZE_31(b);
150
d[3] = NR_NORMALIZE_21(a);
151
} else if (d[3] == 0) {
152
/* Only foreground, demul src */
153
d[0] = NR_DEMUL_321(r,a);
154
d[1] = NR_DEMUL_321(g,a);
155
d[2] = NR_DEMUL_321(b,a);
156
d[3] = NR_NORMALIZE_21(a);
145
159
/* Full composition */
146
ca = NR_COMPOSEA_112(a, d[3]);
147
d[0] = NR_COMPOSENNN_111121 (r, a, d[0], d[3], ca);
148
d[1] = NR_COMPOSENNN_111121 (g, a, d[1], d[3], ca);
149
d[2] = NR_COMPOSENNN_111121 (b, a, d[2], d[3], ca);
150
d[3] = NR_NORMALIZE_21(ca);
160
ca = NR_COMPOSEA_213(a, d[3]);
161
d[0] = NR_COMPOSEPNN_321131 (r, a, d[0], d[3], ca);
162
d[1] = NR_COMPOSEPNN_321131 (g, a, d[1], d[3], ca);
163
d[2] = NR_COMPOSEPNN_321131 (b, a, d[2], d[3], ca);
164
d[3] = NR_NORMALIZE_31(ca);
153
167
/* Advance pointers */
169
183
nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (unsigned char *px, int w, int h, int rs,
170
184
const unsigned char *spx, int sw, int sh, int srs,
171
const long *FFd2s, unsigned int alpha)
185
const long long *FFd2s, unsigned int alpha)
188
long long FFsx0, FFsy0;
225
239
nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h, int rs,
226
240
const unsigned char *spx, int sw, int sh, int srs,
227
const long *FFd2s, const long *FF_S, unsigned int alpha, int dbits)
241
const long long *FFd2s, const long *FF_S, unsigned int alpha, int dbits)
230
244
unsigned char *d0;
245
long long FFsx0, FFsy0;
234
248
size = (1 << dbits);
235
unsigned alpha_rounding_fix = size * 255;
236
unsigned rgb_rounding_fix = size * (255 * 256);
237
if (alpha > 127) ++alpha;
249
unsigned int rounding_fix = size/2;
240
252
FFsx0 = FFd2s[4];
252
264
r = g = b = a = 0;
253
265
for (i = 0; i < size; i++) {
255
sx = (FFsx + FF_S[2 * i]) >> FBITS;
267
sx = (long (FFsx >> (FBITS_HP - FBITS)) + FF_S[2 * i]) >> FBITS;
256
268
if ((sx >= 0) && (sx < sw)) {
257
sy = (FFsy + FF_S[2 * i + 1]) >> FBITS;
269
sy = (long (FFsy >> (FBITS_HP - FBITS)) + FF_S[2 * i + 1]) >> FBITS;
258
270
if ((sy >= 0) && (sy < sh)) {
259
271
const unsigned char *s;
261
272
s = spx + sy * srs + sx * 4;
262
ca = NR_PREMUL_112(s[3], alpha);
263
r += NR_PREMUL_123(s[0], ca);
264
g += NR_PREMUL_123(s[1], ca);
265
b += NR_PREMUL_123(s[2], ca);
273
r += NR_PREMUL_112(s[0], s[3]);
274
g += NR_PREMUL_112(s[1], s[3]);
275
b += NR_PREMUL_112(s[2], s[3]);
270
a = (a + alpha_rounding_fix) >> (8 + dbits);
280
a = (a*alpha + rounding_fix) >> dbits;
272
r = (r + rgb_rounding_fix) >> (16 + dbits);
273
g = (g + rgb_rounding_fix) >> (16 + dbits);
274
b = (b + rgb_rounding_fix) >> (16 + dbits);
275
if ((a == 255) || (d[3] == 0)) {
282
r = (r*alpha + rounding_fix) >> dbits;
283
g = (g*alpha + rounding_fix) >> dbits;
284
b = (b*alpha + rounding_fix) >> dbits;
285
if ((a == 255*255) || (d[3] == 0)) {
276
286
/* Transparent BG, premul src */
287
d[0] = NR_NORMALIZE_31(r);
288
d[1] = NR_NORMALIZE_31(g);
289
d[2] = NR_NORMALIZE_31(b);
290
d[3] = NR_NORMALIZE_21(a);
282
d[0] = NR_COMPOSEPPP_1111 (r, a, d[0]);
283
d[1] = NR_COMPOSEPPP_1111 (g, a, d[1]);
284
d[2] = NR_COMPOSEPPP_1111 (b, a, d[2]);
285
d[3] = NR_COMPOSEA_111(a, d[3]);
292
d[0] = NR_COMPOSEPPP_3211 (r, a, d[0]);
293
d[1] = NR_COMPOSEPPP_3211 (g, a, d[1]);
294
d[2] = NR_COMPOSEPPP_3211 (b, a, d[2]);
295
d[3] = NR_COMPOSEA_211(a, d[3]);
288
298
/* Advance pointers */
315
long long FFd2s_HP[6]; // with higher precision
307
318
if (alpha == 0) return;
320
g_warning("In transform PPN alpha=%u>255",alpha);
323
// The color component is stored temporarily with a range of [0,255^3], so more supersampling and we get an overflow (fortunately Inkscape's preferences also doesn't allow a higher setting)
311
331
for (i = 0; i < 6; i++) {
312
FFd2s[i] = (long) (d2s[i] * (1 << FBITS) + 0.5);
332
FFd2s[i] = (long) floor(d2s[i] * (1 << FBITS) + 0.5);
333
FFd2s_HP[i] = (long long) floor(d2s[i] * (1 << FBITS_HP) + 0.5);;
315
336
if (dbits == 0) {
318
/* WARNING: MMX composer REQUIRES w > 0 and h > 0 */
319
nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (px, w, h, rs, spx, sw, sh, srs, FFd2s, alpha);
323
nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (px, w, h, rs, spx, sw, sh, srs, FFd2s, alpha);
337
nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (px, w, h, rs, spx, sw, sh, srs, FFd2s_HP, alpha);
325
339
int xsize, ysize;
326
340
long FFs_x_x_S, FFs_x_y_S, FFs_y_x_S, FFs_y_y_S;
349
/* WARNING: MMX composer REQUIRES w > 0 and h > 0 */
350
nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (px, w, h, rs, spx, sw, sh, srs, FFd2s, FF_S, alpha, dbits);
354
nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (px, w, h, rs, spx, sw, sh, srs, FFd2s, FF_S, alpha, dbits);
361
nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (px, w, h, rs, spx, sw, sh, srs, FFd2s_HP, FF_S, alpha, dbits);