2
* Intra predict 4 Intra_4x4 luma blocks
3
* Copyright © <2010>, Intel Corporation.
5
* This program is licensed under the terms and conditions of the
6
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
7
* http://www.opensource.org/licenses/eclipse-1.0.php.
10
#if !defined(__INTRA_PRED_4X4_Y_4__) // Make sure this is only included once
11
#define __INTRA_PRED_4X4_Y_4__
13
// Module name: intra_Pred_4x4_Y_4.asm
15
// Intra predict 4 Intra_4x4 luma blocks
17
//--------------------------------------------------------------------------
20
// REF_TOP: Top reference data stored in BYTE with p[-1,-1] at REF_TOP(0,-1)
21
// REF_LEFT: Left reference data stored in BYTE with p[-1,0] at REF_LEFT(0,0)
22
// PRED_MODE: Intra prediction mode stored in 4 words (4 LSB)
23
// REG_INTRA_PRED_AVAIL: Top/Left available flag, (Bit0: Left, Bit1: Top)
25
//--------------------------------------------------------------------------
27
#undef INTRA_PRED_AVAIL
33
#define INTRA_PRED_AVAIL REG_INTRA_TEMP_2.8
34
#define INTRA_REF REG_INTRA_TEMP_2
35
#define REF_LEFT_BACK REG_INTRA_TEMP_8
36
#define REF_TMP REG_INTRA_TEMP_3
37
#define REF_TMP1 REG_INTRA_TEMP_4
41
mov (8) REF_LEFT_BACK<1>:ub REF_LEFT(0)REGION(8,1) // Store left referece data
42
// Set up pointers to each intra_4x4 prediction mode
44
and (4) PINTRA4X4_Y<1>:w PRED_MODE<4;4,1>:w 0x0F:w
45
add (4) INTRA_4X4_MODE(0) r[PINTRA4X4_Y, INTRA_4X4_OFFSET]<1,0>:ub INTRA_MODE<4;4,1>:ub
47
// Sub-block 0 *****************
48
mov (1) INTRA_PRED_AVAIL<1>:w REG_INTRA_PRED_AVAIL<0;1,0>:w // Top/Left neighbor available flags
49
CALL_1(INTRA_4X4_MODE(0),1)
51
// Add error data to predicted intra data
53
add.sat (8) r[PPREDBUF_Y,PREDSUBBLK0]<2>:ub r[PERROR,ERRBLK0]<8;4,1>:w REG_INTRA_4X4_PRED<8;8,1>:w // Too bad indexed src can't
54
add.sat (8) r[PPREDBUF_Y,PREDSUBBLK0+16]<2>:ub r[PERROR,ERRBLK0+32]<8;4,1>:w REG_INTRA_4X4_PRED.8<8;8,1>:w // cross 2 GRFs
56
// Sub-block 1 *****************
57
mov (16) REF_TOP0(0)<1> REF_TOP0(0,4)REGION(8,1) // Top reference data
58
mov (4) REF_LEFT(0)<1> r[PPREDBUF_Y,PREDSUBBLK0+6]<8;1,0>:ub // New left referece data from sub-block 0
59
or (1) INTRA_PRED_AVAIL<1>:w REG_INTRA_PRED_AVAIL<0;1,0>:w 1:w // Left neighbor is available
60
CALL_1(INTRA_4X4_MODE(0,1),1)
62
// Add error data to predicted intra data
64
add.sat (8) r[PPREDBUF_Y,PREDSUBBLK1]<2>:ub r[PERROR,ERRBLK1]<8;4,1>:w REG_INTRA_4X4_PRED<8;8,1>:w // Too bad indexed src can't
65
add.sat (8) r[PPREDBUF_Y,PREDSUBBLK1+16]<2>:ub r[PERROR,ERRBLK1+32]<8;4,1>:w REG_INTRA_4X4_PRED.8<8;8,1>:w // cross 2 GRFs
67
// Sub-block 2 *****************
68
mov (1) REF_TOP0(0,3)<1> REF_LEFT_BACK.3<0;1,0>:ub // Top-left reference data from stored left referece data
69
mov (4) REF_TOP0(0,4)<1> r[PPREDBUF_Y,PREDSUBBLK0+24]REGION(4,2):ub // Top reference data
70
mov (4) REF_TOP0(0,8)<1> r[PPREDBUF_Y,PREDSUBBLK0+24+32]REGION(4,2):ub // Too bad indexed src can't cross 2 GRFs
71
mov (4) REF_TOP0(0,12)<1> r[PPREDBUF_Y,PREDSUBBLK0+30+32]REGION(1,0):ub // Extended top-right reference data
72
mov (4) REF_LEFT(0)<1> REF_LEFT_BACK.4<4;4,1>:ub // From stored left referece data
73
or (1) INTRA_PRED_AVAIL<1>:w REG_INTRA_PRED_AVAIL<0;1,0>:w 2:w // Top neighbor is available
74
CALL_1(INTRA_4X4_MODE(0,2),1)
76
// Add error data to predicted intra data
78
add.sat (8) r[PPREDBUF_Y,PREDSUBBLK2]<2>:ub r[PERROR,ERRBLK2]<8;4,1>:w REG_INTRA_4X4_PRED<8;8,1>:w // Too bad indexed src can't
79
add.sat (8) r[PPREDBUF_Y,PREDSUBBLK2+16]<2>:ub r[PERROR,ERRBLK2+32]<8;4,1>:w REG_INTRA_4X4_PRED.8<8;8,1>:w // cross 2 GRFs
81
// Sub-block 3 *****************
82
mov (16) REF_TOP0(0)<1> REF_TOP0(0,4)REGION(8,1) // Top reference data
83
mov (8) REF_TOP0(0,8)<1> REF_TOP0(0,7)<0;1,0> // Extended top-right reference data
84
mov (4) REF_LEFT(0)<1> r[PPREDBUF_Y,PREDSUBBLK2+6]<8;1,0>:ub // Left referece data from sub-block 0
85
or (1) INTRA_PRED_AVAIL<1>:w REG_INTRA_PRED_AVAIL<0;1,0>:w 3:w // Top/Left neighbor are available
86
CALL_1(INTRA_4X4_MODE(0,3),1)
88
// Add error data to predicted intra data
90
add.sat (8) r[PPREDBUF_Y,PREDSUBBLK3]<2>:ub r[PERROR,ERRBLK3]<8;4,1>:w REG_INTRA_4X4_PRED<8;8,1>:w // Too bad indexed src can't
91
add.sat (8) r[PPREDBUF_Y,PREDSUBBLK3+16]<2>:ub r[PERROR,ERRBLK3+32]<8;4,1>:w REG_INTRA_4X4_PRED.8<8;8,1>:w // cross 2 GRFs
95
//--------------------------------------------------------------------------
96
// Actual module that performs Intra_4x4 prediction and construction
98
// REF_TOP: Top reference data stored in BYTE with p[-1,-1] at REF_TOP(0,-1)
99
// REF_LEFT: Left reference data stored in BYTE with p[-1,0] at REF_LEFT(0,0)
100
// PINTRA4X4_Y: Intra prediction mode
101
// INTRA_PRED_AVAIL: Top/Left available flag, (Bit0: Left, Bit1: Top)
105
// REG_INTRA_4X4_PRED: Predicted 4x4 block data stored in 1 GRF register
106
//--------------------------------------------------------------------------
110
mov (16) REG_INTRA_4X4_PRED<1>:w REF_TOP(0)<0;4,1>
114
INTRA_4X4_HORIZONTAL:
115
mov (16) REG_INTRA_4X4_PRED<1>:w REF_LEFT(0)<1;4,0>
120
// Rearrange reference samples for unified DC prediction code
122
and.nz.f0.0 (16) NULLREG INTRA_PRED_AVAIL<0;1,0>:w 2:w {Compr}
123
and.nz.f0.1 (16) NULLREG INTRA_PRED_AVAIL<0;1,0>:w 1:w {Compr}
124
(-f0.0.any16h) mov (16) REF_TOP_W(0)<1> 0x8080:uw // Top macroblock not available for intra prediction
125
(-f0.1.any8h) mov (8) REF_LEFT(0)<1> REF_TOP(0)REGION(8,1) // Left macroblock not available for intra prediction
126
(-f0.0.any8h) mov (8) REF_TOP(0)<1> REF_LEFT(0)REGION(8,1) // Top macroblock not available for intra prediction
127
// Perform DC prediction
129
add (4) PRED_YW(15)<1> REF_TOP(0)REGION(4,1) REF_LEFT(0)REGION(4,1)
130
add (2) PRED_YW(15)<1> PRED_YW(15)REGION(2,1) PRED_YW(15,2)REGION(2,1)
131
add (16) acc0<1>:w PRED_YW(15)REGION(1,0) PRED_YW(15,1)REGION(1,0)
132
add (16) acc0<1>:w acc0:w 4:w
133
shr (16) REG_INTRA_4X4_PRED<1>:w acc0:w 3:w
137
INTRA_4X4_DIAG_DOWN_LEFT:
138
mov (8) INTRA_REF<1>:ub REF_TOP(0)REGION(8,1) // Keep REF_TOP untouched for future use
139
mov (4) INTRA_REF.8<1>:ub REF_TOP(0,7)REGION(4,1) // p[8,-1] = p[7,-1]
140
add (8) acc0<1>:w INTRA_REF.2<8;8,1> 2:w // p[x+2]+2
141
mac (8) acc0<1>:w INTRA_REF.1<8;8,1> 2:w // 2*p[x+1]+p[x+2]+2
142
mac (8) PRED_YW(15)<1> INTRA_REF.0<8;8,1> 1:w // p[x]+2*p[x+1]+p[x+2]+2
144
shr (16) REG_INTRA_4X4_PRED<1>:w PRED_YW(15)<1;4,1> 2:w // (p[x]+2*p[x+1]+p[x+2]+2)>>2
148
INTRA_4X4_DIAG_DOWN_RIGHT:
150
// Set inverse shift count
151
shl (4) REF_TMP<1>:ud REF_LEFT_D(0)REGION(1,0) INV_SHIFT<4;4,1>:b
152
mov (8) INTRA_REF.4<1>:ub REF_TOP(0,-1)REGION(8,1) // INTRA_REF holds all reference data
153
mov (4) INTRA_REF<1>:ub REF_TMP.3<16;4,4>:ub
155
add (8) acc0<1>:w INTRA_REF.2<8;8,1>:ub 2:w // p[x+2]+2
156
mac (8) acc0<1>:w INTRA_REF.1<8;8,1>:ub 2:w // 2*p[x+1]+p[x+2]+2
157
mac (8) INTRA_REF<1>:w INTRA_REF<8;8,1>:ub 1:w // p[x]+2*p[x+1]+p[x+2]+2
159
// Store data in reversed order
160
add (4) PBWDCOPY_4<1>:w INV_TRANS4<4;4,1>:b INTRA_TEMP_2*GRFWIB:w // Must match with INTRA_REF
161
shr (16) REG_INTRA_4X4_PRED<1>:w r[PBWDCOPY_4]<4,1>:w 2:w
165
INTRA_4X4_VERT_RIGHT:
167
// Set inverse shift count
168
shl (4) REF_TMP<1>:ud REF_LEFT_D(0)REGION(1,0) INV_SHIFT<4;4,1>:b
169
mov (8) INTRA_REF.4<1>:ub REF_TOP(0,-1)REGION(8,1) // INTRA_REF holds all reference data
170
mov (4) INTRA_REF<1>:ub REF_TMP.3<16;4,4>:ub
173
avg (8) PRED_YW(14)<1> INTRA_REF.4<8;8,1> INTRA_REF.5<8;8,1> // avg(p[x-1],p[x])
175
add (8) acc0<1>:w INTRA_REF.3<8;8,1>:ub 2:w // p[x]+2
176
mac (8) acc0<1>:w INTRA_REF.2<8;8,1>:ub 2:w // 2*p[x-1]+p[x]+2
177
mac (8) acc0<1>:w INTRA_REF.1<8;8,1>:ub 1:w // p[x-2]+2*p[x-1]+p[x]+2
178
shr (8) INTRA_REF<1>:w acc0:w 2:w // (p[x-2]+2*p[x-1]+p[x]+2)>>2
180
mov (4) INTRA_REF.2<2>:w INTRA_REF.2<4;4,1>:w // Keep zVR = -2,-3 unchanged
181
mov (4) INTRA_REF.3<2>:w PRED_YW(14)REGION(4,1) // Combining even rows
183
add (4) PBWDCOPY_4<1>:w INV_TRANS4<4;4,1>:b INTRA_TEMP_2*GRFWIB:w // Must match with INTRA_REF
184
mov (16) REG_INTRA_4X4_PRED<1>:w r[PBWDCOPY_4]<4,2>:w
189
// Set inverse shift count
190
shl (4) REF_TMP<1>:ud REF_LEFT_D(0)REGION(1,0) INV_SHIFT<4;4,1>:b
191
mov (8) INTRA_REF.4<1>:ub REF_TOP(0,-1)REGION(8,1) // INTRA_REF holds all reference data
192
mov (4) INTRA_REF<1>:ub REF_TMP.3<16;4,4>:ub
195
avg (8) PRED_YW(14)<1> INTRA_REF<8;8,1> INTRA_REF.1<8;8,1> // avg(p[y-1],p[y])
197
add (8) acc0<1>:w INTRA_REF.2<8;8,1>:ub 2:w // p[y]+2
198
mac (8) acc0<1>:w INTRA_REF.1<8;8,1>:ub 2:w // 2*p[y-1]+p[y]+2
199
mac (8) REF_TMP<1>:w INTRA_REF.0<8;8,1>:ub 1:w // p[y-2]+2*p[y-1]+p[y]+2
200
shr (4) INTRA_REF.1<2>:w REF_TMP<4;4,1>:w 2:w // (p[y-2]+2*p[y-1]+p[y]+2)>>2
202
shr (2) INTRA_REF.8<1>:w REF_TMP.4<2;2,1>:w 2:w // Keep zVR = -2,-3 unchanged
203
mov (4) INTRA_REF.0<2>:w PRED_YW(14)REGION(4,1) // Combining even pixels
205
shl (4) PBWDCOPY_4<1>:w INV_TRANS4<4;4,1>:b 1:w // Convert to WORD offset
206
add (4) PBWDCOPY_4<1>:w PBWDCOPY_4<4;4,1>:w INTRA_TEMP_2*GRFWIB:w // Must match with INTRA_REF
207
mov (16) REG_INTRA_4X4_PRED<1>:w r[PBWDCOPY_4]<4,1>:w
213
avg (8) PRED_YW(14)<2> REF_TOP(0)REGION(8,1) REF_TOP(0,1)REGION(8,1) // avg(p[x],p[x+1])
215
add (8) acc0<1>:w REF_TOP(0,2)REGION(8,1) 2:w // p[x+2]+2
216
mac (8) acc0<1>:w REF_TOP(0,1)REGION(8,1) 2:w // 2*p[x+1]+p[x+2]+2
217
mac (8) PRED_YW(15)<1> REF_TOP(0)REGION(8,1) 1:w // p[x]+2*p[x+1]+p[x+2]+2
218
shr (8) PRED_YW(14,1)<2> PRED_YW(15)REGION(8,1) 2:w
220
mov (16) REG_INTRA_4X4_PRED<1>:w PRED_YW(14)<1;4,2>
225
// Set extra left reference pixels for unified prediction
226
mov (8) REF_LEFT(0,4)<1> REF_LEFT(0,3)REGION(1,0) // Copy p[-1,3] to p[-1,y],y=4...7
228
avg (8) PRED_YW(14)<2> REF_LEFT(0)REGION(8,1) REF_LEFT(0,1)REGION(8,1) // avg(p[y],p[y+1])
230
add (8) acc0<1>:w REF_LEFT(0,2)REGION(8,1) 2:w // p[y+2]+2
231
mac (8) acc0<1>:w REF_LEFT(0,1)REGION(8,1) 2:w // 2*p[y+1]+p[y+2]+2
232
mac (8) PRED_YW(15)<1> REF_LEFT(0)REGION(8,1) 1:w // p[y]+2*p[y+1]+p[y+2]+2
233
shr (8) PRED_YW(14,1)<2> PRED_YW(15)REGION(8,1) 2:w // (p[y]+2*p[y+1]+p[y+2]+2)>>2
235
mov (16) REG_INTRA_4X4_PRED<1>:w PRED_YW(14)<2;4,1>
238
// End of intra_Pred_4x4_Y_4
240
#endif // !defined(__INTRA_PRED_4X4_Y_4__)