2
* Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
4
* This file is part of FFmpeg.
6
* FFmpeg is free software; you can redistribute it and/or
7
* modify it under the terms of the GNU Lesser General Public
8
* License as published by the Free Software Foundation; either
9
* version 2.1 of the License, or (at your option) any later version.
11
* FFmpeg is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
* Lesser General Public License for more details.
16
* You should have received a copy of the GNU Lesser General Public
17
* License along with FFmpeg; if not, write to the Free Software
18
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
#include "libavutil/mips/generic_macros_msa.h"
22
#include "h263dsp_mips.h"
24
static void h263_dct_unquantize_msa(int16_t *block, int16_t qmul,
25
int16_t qadd, int8_t n_coeffs,
28
int16_t *block_dup = block;
30
v8i16 block_vec, qmul_vec, qadd_vec, sub;
31
v8i16 add, mask, mul, zero_mask;
33
qmul_vec = __msa_fill_h(qmul);
34
qadd_vec = __msa_fill_h(qadd);
35
for (cnt = 0; cnt < (n_coeffs >> 3); cnt++) {
36
block_vec = LD_SH(block_dup + loop_start);
37
mask = __msa_clti_s_h(block_vec, 0);
38
zero_mask = __msa_ceqi_h(block_vec, 0);
39
mul = block_vec * qmul_vec;
42
add = (v8i16) __msa_bmnz_v((v16u8) add, (v16u8) sub, (v16u8) mask);
43
block_vec = (v8i16) __msa_bmnz_v((v16u8) add, (v16u8) block_vec,
45
ST_SH(block_vec, block_dup + loop_start);
49
cnt = ((n_coeffs >> 3) * 8) + loop_start;
51
for (; cnt <= n_coeffs; cnt++) {
55
level = level * qmul - qadd;
57
level = level * qmul + qadd;
64
static int32_t mpeg2_dct_unquantize_inter_msa(int16_t *block,
66
const int16_t *quant_matrix)
68
int32_t cnt, sum_res = -1;
69
v8i16 block_vec, block_neg, qscale_vec, mask;
70
v8i16 block_org0, block_org1, block_org2, block_org3;
71
v8i16 quant_m0, quant_m1, quant_m2, quant_m3;
72
v8i16 sum, mul, zero_mask;
73
v4i32 mul_vec, qscale_l, qscale_r, quant_m_r, quant_m_l;
74
v4i32 block_l, block_r, sad;
76
qscale_vec = __msa_fill_h(qscale);
77
for (cnt = 0; cnt < 2; cnt++) {
78
LD_SH4(block, 8, block_org0, block_org1, block_org2, block_org3);
79
LD_SH4(quant_matrix, 8, quant_m0, quant_m1, quant_m2, quant_m3);
80
mask = __msa_clti_s_h(block_org0, 0);
81
zero_mask = __msa_ceqi_h(block_org0, 0);
82
block_neg = -block_org0;
83
block_vec = (v8i16) __msa_bmnz_v((v16u8) block_org0, (v16u8) block_neg,
87
UNPCK_SH_SW(block_vec, block_r, block_l);
88
UNPCK_SH_SW(qscale_vec, qscale_r, qscale_l);
89
UNPCK_SH_SW(quant_m0, quant_m_r, quant_m_l);
90
mul_vec = block_l * qscale_l;
92
block_l = mul_vec >> 4;
93
mul_vec = block_r * qscale_r;
95
block_r = mul_vec >> 4;
96
mul = (v8i16) __msa_pckev_h((v8i16) block_l, (v8i16) block_r);
98
sum = (v8i16) __msa_bmnz_v((v16u8) mul, (v16u8) block_neg,
100
sum = (v8i16) __msa_bmnz_v((v16u8) sum, (v16u8) block_org0,
105
sad = __msa_hadd_s_w(sum, sum);
106
sum_res += HADD_SW_S32(sad);
107
mask = __msa_clti_s_h(block_org1, 0);
108
zero_mask = __msa_ceqi_h(block_org1, 0);
109
block_neg = - block_org1;
110
block_vec = (v8i16) __msa_bmnz_v((v16u8) block_org1, (v16u8) block_neg,
114
UNPCK_SH_SW(block_vec, block_r, block_l);
115
UNPCK_SH_SW(qscale_vec, qscale_r, qscale_l);
116
UNPCK_SH_SW(quant_m1, quant_m_r, quant_m_l);
117
mul_vec = block_l * qscale_l;
118
mul_vec *= quant_m_l;
119
block_l = mul_vec >> 4;
120
mul_vec = block_r * qscale_r;
121
mul_vec *= quant_m_r;
122
block_r = mul_vec >> 4;
123
mul = __msa_pckev_h((v8i16) block_l, (v8i16) block_r);
125
sum = (v8i16) __msa_bmnz_v((v16u8) mul, (v16u8) block_neg,
127
sum = (v8i16) __msa_bmnz_v((v16u8) sum, (v16u8) block_org1,
133
sad = __msa_hadd_s_w(sum, sum);
134
sum_res += HADD_SW_S32(sad);
135
mask = __msa_clti_s_h(block_org2, 0);
136
zero_mask = __msa_ceqi_h(block_org2, 0);
137
block_neg = - block_org2;
138
block_vec = (v8i16) __msa_bmnz_v((v16u8) block_org2, (v16u8) block_neg,
142
UNPCK_SH_SW(block_vec, block_r, block_l);
143
UNPCK_SH_SW(qscale_vec, qscale_r, qscale_l);
144
UNPCK_SH_SW(quant_m2, quant_m_r, quant_m_l);
145
mul_vec = block_l * qscale_l;
146
mul_vec *= quant_m_l;
147
block_l = mul_vec >> 4;
148
mul_vec = block_r * qscale_r;
149
mul_vec *= quant_m_r;
150
block_r = mul_vec >> 4;
151
mul = __msa_pckev_h((v8i16) block_l, (v8i16) block_r);
153
sum = (v8i16) __msa_bmnz_v((v16u8) mul, (v16u8) block_neg,
155
sum = (v8i16) __msa_bmnz_v((v16u8) sum, (v16u8) block_org2,
161
sad = __msa_hadd_s_w(sum, sum);
162
sum_res += HADD_SW_S32(sad);
163
mask = __msa_clti_s_h(block_org3, 0);
164
zero_mask = __msa_ceqi_h(block_org3, 0);
165
block_neg = - block_org3;
166
block_vec = (v8i16) __msa_bmnz_v((v16u8) block_org3, (v16u8) block_neg,
170
UNPCK_SH_SW(block_vec, block_r, block_l);
171
UNPCK_SH_SW(qscale_vec, qscale_r, qscale_l);
172
UNPCK_SH_SW(quant_m3, quant_m_r, quant_m_l);
173
mul_vec = block_l * qscale_l;
174
mul_vec *= quant_m_l;
175
block_l = mul_vec >> 4;
176
mul_vec = block_r * qscale_r;
177
mul_vec *= quant_m_r;
178
block_r = mul_vec >> 4;
179
mul = __msa_pckev_h((v8i16) block_l, (v8i16) block_r);
181
sum = (v8i16) __msa_bmnz_v((v16u8) mul, (v16u8) block_neg,
183
sum = (v8i16) __msa_bmnz_v((v16u8) sum, (v16u8) block_org3,
189
sad = __msa_hadd_s_w(sum, sum);
190
sum_res += HADD_SW_S32(sad);
196
void ff_dct_unquantize_h263_intra_msa(MpegEncContext *s,
197
int16_t *block, int32_t index,
203
av_assert2(s->block_last_index[index] >= 0 || s->h263_aic);
208
block[0] *= index < 4 ? s->y_dc_scale : s->c_dc_scale;
209
qadd = (qscale - 1) | 1;
216
nCoeffs = s->inter_scantable.raster_end[s->block_last_index[index]];
218
h263_dct_unquantize_msa(block, qmul, qadd, nCoeffs, 1);
221
void ff_dct_unquantize_h263_inter_msa(MpegEncContext *s,
222
int16_t *block, int32_t index,
228
av_assert2(s->block_last_index[index] >= 0);
230
qadd = (qscale - 1) | 1;
233
nCoeffs = s->inter_scantable.raster_end[s->block_last_index[index]];
235
h263_dct_unquantize_msa(block, qmul, qadd, nCoeffs, 0);
238
void ff_dct_unquantize_mpeg2_inter_msa(MpegEncContext *s,
239
int16_t *block, int32_t index,
242
const uint16_t *quant_matrix;
245
quant_matrix = s->inter_matrix;
247
sum = mpeg2_dct_unquantize_inter_msa(block, qscale, quant_matrix);
249
block[63] ^= sum & 1;