19
19
#define RB_ONE_HALF 0x800000008000ULL
20
20
#define RB_MASK_PLUS_ONE 0x10000000010000ULL
22
#define Alpha(x) ((x) >> A_SHIFT)
22
#define ALPHA_16(x) ((x) >> A_SHIFT)
23
#define RED_16(x) (((x) >> R_SHIFT) & MASK)
24
#define GREEN_16(x) (((x) >> G_SHIFT) & MASK)
25
#define BLUE_16(x) ((x) & MASK)
28
#define IntMult(a,b,t) ( (t) = (a) * (b) + ONE_HALF, ( ( ( (t)>>G_SHIFT ) + (t) )>>G_SHIFT ) )
29
#define IntDiv(a,b) (((uint32_t) (a) * MASK) / (b))
31
#define GetComp(v,i) ((uint32_t) (uint16_t) ((v) >> i))
33
#define Add(x,y,i,t) ((t) = GetComp(x,i) + GetComp(y,i), \
34
(uint64_t) ((uint16_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i))
36
#define FbGen(x,y,i,ax,ay,t,u,v) ((t) = (IntMult(GetComp(y,i),ay,(u)) + \
37
IntMult(GetComp(x,i),ax,(v))), \
38
(uint64_t) ((uint16_t) ((t) | \
39
(0 - ((t) >> G_SHIFT)))) << (i))
42
The methods below use some tricks to be able to do two color
43
components at the same time.
49
#define FbByteMul(x, a) do { \
50
uint64_t t = ((x & RB_MASK) * a) + RB_ONE_HALF; \
51
t = (t + ((t >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE; \
54
x = (((x >> COMPONENT_SIZE) & RB_MASK) * a) + RB_ONE_HALF; \
55
x = (x + ((x >> COMPONENT_SIZE) & RB_MASK)); \
56
x &= RB_MASK << COMPONENT_SIZE; \
61
x_c = (x_c * a) / 255 + y
63
#define FbByteMulAdd(x, a, y) do { \
64
/* multiply and divide: trunc((i + 128)*257/65536) */ \
65
uint64_t t = ((x & RB_MASK) * a) + RB_ONE_HALF; \
66
t = (t + ((t >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE; \
73
t |= RB_MASK_PLUS_ONE - ((t >> COMPONENT_SIZE) & RB_MASK); \
76
/* multiply and divide */ \
77
x = (((x >> COMPONENT_SIZE) & RB_MASK) * a) + RB_ONE_HALF; \
78
x = (x + ((x >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE; \
82
x += (y >> COMPONENT_SIZE) & RB_MASK; \
85
x |= RB_MASK_PLUS_ONE - ((x >> COMPONENT_SIZE) & RB_MASK); \
89
x <<= COMPONENT_SIZE; \
94
x_c = (x_c * a + y_c * b) / 255
96
#define FbByteAddMul(x, a, y, b) do { \
98
uint64_t r = (x >> A_SHIFT) * a + (y >> A_SHIFT) * b + ONE_HALF; \
99
r += (r >> G_SHIFT); \
102
t = (x & G_MASK) * a + (y & G_MASK) * b; \
103
t += (t >> G_SHIFT) + (ONE_HALF << G_SHIFT); \
107
t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \
111
r = ((x >> R_SHIFT) & MASK) * a + \
112
((y >> R_SHIFT) & MASK) * b + ONE_HALF; \
113
r += (r >> G_SHIFT); \
116
x = (x & MASK) * a + (y & MASK) * b + ONE_HALF; \
117
x += (x >> G_SHIFT); \
120
x |= RB_MASK_PLUS_ONE - ((x >> G_SHIFT) & RB_MASK); \
126
x_c = (x_c * a_c) / 255
128
#define FbByteMulC(x, a) do { \
130
uint64_t r = (x & MASK) * (a & MASK); \
131
r |= (x & R_MASK) * ((a >> R_SHIFT) & MASK); \
133
r = (r + ((r >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \
137
t = (x & MASK) * ((a >> G_SHIFT) & MASK); \
138
t |= (x & R_MASK) * (a >> A_SHIFT); \
140
t = t + ((t >> G_SHIFT) & RB_MASK); \
141
x = r | (t & AG_MASK); \
145
x_c = (x_c * a) / 255 + y
147
#define FbByteMulAddC(x, a, y) do { \
149
uint64_t r = (x & MASK) * (a & MASK); \
150
r |= (x & R_MASK) * ((a >> R_SHIFT) & MASK); \
152
r = (r + ((r >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \
155
r |= RB_MASK_PLUS_ONE - ((r >> G_SHIFT) & RB_MASK); \
159
t = (x & MASK) * ((a >> G_SHIFT) & MASK); \
160
t |= (x & R_MASK) * (a >> A_SHIFT); \
162
t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \
164
t += (y >> G_SHIFT) & RB_MASK; \
165
t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \
167
x = r | (t << G_SHIFT); \
171
x_c = (x_c * a_c + y_c * b) / 255
173
#define FbByteAddMulC(x, a, y, b) do { \
175
uint64_t r = (x >> A_SHIFT) * (a >> A_SHIFT) + \
176
(y >> A_SHIFT) * b; \
177
r += (r >> G_SHIFT) + ONE_HALF; \
180
t = (x & G_MASK) * ((a >> G_SHIFT) & MASK) + (y & G_MASK) * b; \
181
t += (t >> G_SHIFT) + (ONE_HALF << G_SHIFT); \
185
t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \
189
r = ((x >> R_SHIFT) & MASK) * ((a >> R_SHIFT) & MASK) + \
190
((y >> R_SHIFT) & MASK) * b + ONE_HALF; \
191
r += (r >> G_SHIFT); \
194
x = (x & MASK) * (a & MASK) + (y & MASK) * b + ONE_HALF; \
195
x += (x >> G_SHIFT); \
198
x |= RB_MASK_PLUS_ONE - ((x >> G_SHIFT) & RB_MASK); \
204
x_c = min(x_c + y_c, 255)
206
#define FbByteAdd(x, y) do { \
208
uint64_t r = (x & RB_MASK) + (y & RB_MASK); \
209
r |= RB_MASK_PLUS_ONE - ((r >> G_SHIFT) & RB_MASK); \
212
t = ((x >> G_SHIFT) & RB_MASK) + ((y >> G_SHIFT) & RB_MASK); \
213
t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \
214
r |= (t & RB_MASK) << G_SHIFT; \
31
#define MUL_UN16(a, b, t) \
32
((t) = (a) * (b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT ))
34
#define DIV_UN16(a, b) \
35
(((uint32_t) (a) * MASK) / (b))
37
#define ADD_UN16(x, y, t) \
39
(uint64_t) (uint16_t) ((t) | (0 - ((t) >> G_SHIFT))))
41
#define DIV_ONE_UN16(x) \
42
(((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT)
45
* The methods below use some tricks to be able to do two color
46
* components at the same time.
50
* x_rb = (x_rb * a) / 255
52
#define UN16_rb_MUL_UN16(x, a, t) \
55
t = ((x) & RB_MASK) * (a); \
57
x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \
62
* x_rb = min (x_rb + y_rb, 255)
64
#define UN16_rb_ADD_UN16_rb(x, y, t) \
68
t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \
73
* x_rb = (x_rb * a_rb) / 255
75
#define UN16_rb_MUL_UN16_rb(x, a, t) \
78
t = (x & MASK) * (a & MASK); \
79
t |= (x & R_MASK) * ((a >> R_SHIFT) & MASK); \
81
t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \
86
* x_c = (x_c * a) / 255
88
#define UN16x4_MUL_UN16(x, a) \
94
UN16_rb_MUL_UN16 (r1, a, t); \
96
r2 = (x) >> G_SHIFT; \
97
UN16_rb_MUL_UN16 (r2, a, t); \
99
x = r1 | (r2 << G_SHIFT); \
103
* x_c = (x_c * a) / 255 + y_c
105
#define UN16x4_MUL_UN16_ADD_UN16x4(x, a, y) \
108
uint64_t r1, r2, r3, t; \
111
r2 = (y) & RB_MASK; \
112
UN16_rb_MUL_UN16 (r1, a, t); \
113
UN16_rb_ADD_UN16_rb (r1, r2, t); \
115
r2 = (x) >> G_SHIFT; \
116
r3 = ((y) >> G_SHIFT) & RB_MASK; \
117
UN16_rb_MUL_UN16 (r2, a, t); \
118
UN16_rb_ADD_UN16_rb (r2, r3, t); \
120
x = r1 | (r2 << G_SHIFT); \
124
* x_c = (x_c * a + y_c * b) / 255
126
#define UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16(x, a, y, b) \
129
uint64_t r1, r2, r3, t; \
133
UN16_rb_MUL_UN16 (r1, a, t); \
134
UN16_rb_MUL_UN16 (r2, b, t); \
135
UN16_rb_ADD_UN16_rb (r1, r2, t); \
137
r2 = (x >> G_SHIFT); \
138
r3 = (y >> G_SHIFT); \
139
UN16_rb_MUL_UN16 (r2, a, t); \
140
UN16_rb_MUL_UN16 (r3, b, t); \
141
UN16_rb_ADD_UN16_rb (r2, r3, t); \
143
x = r1 | (r2 << G_SHIFT); \
147
* x_c = (x_c * a_c) / 255
149
#define UN16x4_MUL_UN16x4(x, a) \
152
uint64_t r1, r2, r3, t; \
156
UN16_rb_MUL_UN16_rb (r1, r2, t); \
160
UN16_rb_MUL_UN16_rb (r2, r3, t); \
162
x = r1 | (r2 << G_SHIFT); \
166
* x_c = (x_c * a_c) / 255 + y_c
168
#define UN16x4_MUL_UN16x4_ADD_UN16x4(x, a, y) \
171
uint64_t r1, r2, r3, t; \
175
UN16_rb_MUL_UN16_rb (r1, r2, t); \
177
UN16_rb_ADD_UN16_rb (r1, r2, t); \
179
r2 = (x >> G_SHIFT); \
180
r3 = (a >> G_SHIFT); \
181
UN16_rb_MUL_UN16_rb (r2, r3, t); \
182
r3 = (y >> G_SHIFT) & RB_MASK; \
183
UN16_rb_ADD_UN16_rb (r2, r3, t); \
185
x = r1 | (r2 << G_SHIFT); \
189
* x_c = (x_c * a_c + y_c * b) / 255
191
#define UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16(x, a, y, b) \
194
uint64_t r1, r2, r3, t; \
198
UN16_rb_MUL_UN16_rb (r1, r2, t); \
200
UN16_rb_MUL_UN16 (r2, b, t); \
201
UN16_rb_ADD_UN16_rb (r1, r2, t); \
205
UN16_rb_MUL_UN16_rb (r2, r3, t); \
207
UN16_rb_MUL_UN16 (r3, b, t); \
208
UN16_rb_ADD_UN16_rb (r2, r3, t); \
210
x = r1 | (r2 << G_SHIFT); \
214
x_c = min(x_c + y_c, 255)
216
#define UN16x4_ADD_UN16x4(x, y) \
219
uint64_t r1, r2, r3, t; \
223
UN16_rb_ADD_UN16_rb (r1, r2, t); \
225
r2 = (x >> G_SHIFT) & RB_MASK; \
226
r3 = (y >> G_SHIFT) & RB_MASK; \
227
UN16_rb_ADD_UN16_rb (r2, r3, t); \
229
x = r1 | (r2 << G_SHIFT); \