1
///****************************************************************************
3
// * XVID MPEG-4 VIDEO CODEC
4
// * - MMX and XMM forward discrete cosine transform -
6
// * Copyright(C) 2001 Peter Ross <pross@xvid.org>
8
// * This file is part of FFmpeg.
10
// * FFmpeg is free software; you can redistribute it and/or
11
// * modify it under the terms of the GNU Lesser General Public
12
// * License as published by the Free Software Foundation; either
13
// * version 2.1 of the License, or (at your option) any later version.
15
// * FFmpeg is distributed in the hope that it will be useful,
16
// * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
// * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18
// * Lesser General Public License for more details.
20
// * You should have received a copy of the GNU Lesser General Public License
21
// * along with FFmpeg; if not, write to the Free Software Foundation,
22
// * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24
// * $Id: idct_mmx_xvid.c 6577 2006-10-07 15:30:46Z diego $
26
// ***************************************************************************/
28
// ****************************************************************************
30
// Originally provided by Intel at AP-922
31
// http://developer.intel.com/vtune/cbts/strmsimd/922down.htm
32
// (See more app notes at http://developer.intel.com/vtune/cbts/strmsimd/appnotes.htm)
33
// but in a limited edition.
34
// New macro implements a column part for precise iDCT
35
// The routine precision now satisfies IEEE standard 1180-1990.
37
// Copyright(C) 2000-2001 Peter Gubanov <peter@elecard.net.ru>
38
// Rounding trick Copyright(C) 2000 Michel Lespinasse <walken@zoy.org>
40
// http://www.elecard.com/peter/idct.html
41
// http://www.linuxvideo.org/mpeg2dec/
43
// ***************************************************************************/
45
// These examples contain code fragments for first stage iDCT 8x8
46
// (for rows) and first stage DCT 8x8 (for columns)
49
// conversion to gcc syntax by michael niedermayer
2
* XVID MPEG-4 VIDEO CODEC
3
* - MMX and XMM forward discrete cosine transform -
5
* Copyright(C) 2001 Peter Ross <pross@xvid.org>
7
* Originally provided by Intel at AP-922
8
* http://developer.intel.com/vtune/cbts/strmsimd/922down.htm
9
* (See more app notes at http://developer.intel.com/vtune/cbts/strmsimd/appnotes.htm)
10
* but in a limited edition.
11
* New macro implements a column part for precise iDCT
12
* The routine precision now satisfies IEEE standard 1180-1990.
14
* Copyright(C) 2000-2001 Peter Gubanov <peter@elecard.net.ru>
15
* Rounding trick Copyright(C) 2000 Michel Lespinasse <walken@zoy.org>
17
* http://www.elecard.com/peter/idct.html
18
* http://www.linuxvideo.org/mpeg2dec/
20
* These examples contain code fragments for first stage iDCT 8x8
21
* (for rows) and first stage DCT 8x8 (for columns)
23
* conversion to gcc syntax by Michael Niedermayer
25
* This file is part of FFmpeg.
27
* FFmpeg is free software; you can redistribute it and/or
28
* modify it under the terms of the GNU Lesser General Public
29
* License as published by the Free Software Foundation; either
30
* version 2.1 of the License, or (at your option) any later version.
32
* FFmpeg is distributed in the hope that it will be useful,
33
* but WITHOUT ANY WARRANTY; without even the implied warranty of
34
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35
* Lesser General Public License for more details.
37
* You should have received a copy of the GNU Lesser General Public License
38
* along with FFmpeg; if not, write to the Free Software Foundation,
39
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
52
42
#include <inttypes.h>
53
#include "../avcodec.h"
55
45
//=============================================================================
56
46
// Macros and other preprocessor constants
74
64
//-----------------------------------------------------------------------------
77
static const int16_t tg_1_16[4*4] attribute_used __attribute__ ((aligned(8))) = {
67
DECLARE_ALIGNED(8, static const int16_t, tg_1_16[4*4]) = {
78
68
13036,13036,13036,13036, // tg * (2<<16) + 0.5
79
69
27146,27146,27146,27146, // tg * (2<<16) + 0.5
80
70
-21746,-21746,-21746,-21746, // tg * (2<<16) + 0.5
81
71
23170,23170,23170,23170}; // cos * (2<<15) + 0.5
83
static const int32_t rounder_0[2*8] attribute_used __attribute__ ((aligned(8))) = {
73
DECLARE_ALIGNED(8, static const int32_t, rounder_0[2*8]) = {
150
140
//-----------------------------------------------------------------------------
152
142
// Table for rows 0,4 - constants are multiplied by cos_4_16
153
static const int16_t tab_i_04_mmx[32*4] attribute_used __attribute__ ((aligned(8))) = {
143
DECLARE_ALIGNED(8, static const int16_t, tab_i_04_mmx[32*4]) = {
154
144
16384,16384,16384,-16384, // movq-> w06 w04 w02 w00
155
145
21407,8867,8867,-21407, // w07 w05 w03 w01
156
146
16384,-16384,16384,16384, // w14 w12 w10 w08
192
182
//-----------------------------------------------------------------------------
194
184
// %3 for rows 0,4 - constants are multiplied by cos_4_16
195
static const int16_t tab_i_04_xmm[32*4] attribute_used __attribute__ ((aligned(8))) = {
185
DECLARE_ALIGNED(8, static const int16_t, tab_i_04_xmm[32*4]) = {
196
186
16384,21407,16384,8867, // movq-> w05 w04 w01 w00
197
187
16384,8867,-16384,-21407, // w07 w06 w03 w02
198
188
16384,-8867,16384,-21407, // w13 w12 w09 w08