~ubuntu-branches/ubuntu/intrepid/x264/intrepid

« back to all changes in this revision

Viewing changes to common/x86/cabac-a.asm

Committer: Bazaar Package Importer
Author(s): John Dong
Date: 2008-05-03 01:12:18 UTC
mfrom: (1.1.7 upstream)
Revision ID: james.westby@ubuntu.com-20080503011218-3l7ra58egb7gezht

Tags: 1:0.svn20080408-0.0ubuntu1

* Merge from debian-multimedia. Remaining Ubuntu changes:
- Maintainer field
- Set epoch

files added:
.pc

.pc/.version

common/x86

common/x86/cabac-a.asm

common/x86/cpu-32.asm

common/x86/cpu-64.asm

common/x86/dct-32.asm

common/x86/dct-64.asm

common/x86/dct-a.asm

common/x86/dct.h

common/x86/deblock-a.asm

common/x86/mc-a.asm

common/x86/mc-a2.asm

common/x86/mc-c.c

common/x86/mc.h

common/x86/pixel-32.asm

common/x86/pixel-a.asm

common/x86/pixel.h

common/x86/predict-a.asm

common/x86/predict-c.c

common/x86/predict.h

common/x86/quant-a.asm

common/x86/quant.h

common/x86/sad-a.asm

common/x86/x86inc-32.asm

common/x86/x86inc-64.asm

common/x86/x86inc.asm

debian/libx264-59.install

debian/patches/01_link.patch

debian/patches/series

tools/checkasm-32.asm

files removed:
common/amd64

common/amd64/amd64inc.asm

common/amd64/cpu-a.asm

common/amd64/dct-a.asm

common/amd64/deblock-a.asm

common/amd64/mc-a.asm

common/amd64/mc-a2.asm

common/amd64/pixel-a.asm

common/amd64/pixel-sse2.asm

common/amd64/predict-a.asm

common/amd64/quant-a.asm

common/clip1.h

common/csp.c

common/csp.h

common/i386

common/i386/cpu-a.asm

common/i386/dct-a.asm

common/i386/dct.h

common/i386/deblock-a.asm

common/i386/i386inc.asm

common/i386/mc-a.asm

common/i386/mc-a2.asm

common/i386/mc-c.c

common/i386/mc.h

common/i386/pixel-a.asm

common/i386/pixel-sse2.asm

common/i386/pixel.h

common/i386/predict-a.asm

common/i386/predict-c.c

common/i386/predict.h

common/i386/quant-a.asm

common/i386/quant.h

debian/libx264-57.install

debian/patches/00list

debian/patches/01_link.dpatch

files modified:
ChangeLog

Makefile

build/win32/libx264.vcproj

common/bs.h

common/cabac.c

common/cabac.h

common/common.c

common/common.h

common/cpu.c

common/dct.c

common/dct.h

common/frame.c

common/frame.h

common/macroblock.c

common/macroblock.h

common/mc.c

common/mc.h

common/osdep.h

common/pixel.c

common/pixel.h

common/ppc/dct.c

common/ppc/dct.h

common/ppc/deblock.c

common/ppc/mc.c

common/ppc/pixel.c

common/ppc/predict.c

common/predict.c

common/quant.c

common/set.c

configure

debian/changelog

debian/control

debian/rules

encoder/analyse.c

encoder/cabac.c

encoder/cavlc.c

encoder/encoder.c

encoder/macroblock.c

encoder/macroblock.h

encoder/me.c

encoder/me.h

encoder/ratecontrol.c

encoder/ratecontrol.h

encoder/rdo.c

encoder/set.c

encoder/slicetype.c

gtk/fr.po

gtk/test.c

matroska.c

tools/checkasm.c

version.sh

x264.c

x264.h

Show diffs side-by-side

added added

removed removed

common/x86/cabac-a.asm

;*****************************************************************************

;* cabac-a.asm: h264 encoder library

;*****************************************************************************

;* Author: Loren Merritt <lorenm@u.washington.edu>

;* This program is free software; you can redistribute it and/or modify

;* it under the terms of the GNU General Public License as published by

;* the Free Software Foundation; either version 2 of the License, or

;* (at your option) any later version.

;* This program is distributed in the hope that it will be useful,

;* but WITHOUT ANY WARRANTY; without even the implied warranty of

;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

;* GNU General Public License for more details.

;* You should have received a copy of the GNU General Public License

;* along with this program; if not, write to the Free Software

;* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.

;*****************************************************************************

%include "x86inc.asm"

SECTION .text

cextern x264_cabac_range_lps

cextern x264_cabac_transition

cextern x264_cabac_renorm_shift

%macro DEF_TMP 16

%rep 8

%define t%1d r%9d

%define t%1b r%9b

%define t%1 r%9

%rotate 1

%endrep

%endmacro

; t3 must be ecx, since it's used for shift.

%ifdef ARCH_X86_64

DEF_TMP 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,10

%define pointer resq

%else

DEF_TMP 0,1,2,3,4,5,6,7, 0,3,2,1,4,5,6,3

%define pointer resd

%endif

struc cb

.low: resd 1

.range: resd 1

.queue: resd 1

.bytes_outstanding: resd 1

.start: pointer 1

.p: pointer 1

.end: pointer 1

align 16, resb 1

.bits_encoded: resd 1

.state: resb 460

endstruc

%macro LOAD_GLOBAL 4

%ifdef PIC64

; this would be faster if the arrays were declared in asm, so that I didn't have to duplicate the lea

lea r11, [%2 GLOBAL]

%ifnidn %3, 0

add r11, %3

%endif

movzx %1, byte [r11+%4]

%elifdef PIC32

%ifnidn %3, 0

lea %1, [%3+%4]

movzx %1, byte [%2+%1 GLOBAL]

%else

movzx %1, byte [%2+%3+%4 GLOBAL]

%endif

%else

movzx %1, byte [%2+%3+%4]

%endif

%endmacro

cglobal x264_cabac_encode_decision, 0,7

movifnidn t0d, r0m

movifnidn t1d, r1m

picgetgot t2

mov t5d, [r0+cb.range]

movzx t3d, byte [r0+cb.state+t1]

mov t4d, t5d

shr t5d, 6

and t5d, 3

LOAD_GLOBAL t5d, x264_cabac_range_lps, t5, t3*4

sub t4d, t5d

mov t6d, t3d

shr t6d, 6

%ifdef PIC32

cmp t6d, r2m

%else

movifnidn t2d, r2m

cmp t6d, t2d

100

%endif

101

mov t6d, [r0+cb.low]

102

lea t7, [t6+t4]

103

cmovne t4d, t5d

104

cmovne t6d, t7d

105

%ifdef PIC32

106

mov t1, r2m

107

LOAD_GLOBAL t3d, x264_cabac_transition, t1, t3*2

108

%else

109

LOAD_GLOBAL t3d, x264_cabac_transition, t2, t3*2

110

%endif

111

movifnidn t1d, r1m

112

mov [r0+cb.state+t1], t3b

113

.renorm:

114

mov t3d, t4d

115

shr t3d, 3

116

LOAD_GLOBAL t3d, x264_cabac_renorm_shift, 0, t3

117

shl t4d, t3b

118

shl t6d, t3b

119

add t3d, [r0+cb.queue]

120

mov [r0+cb.range], t4d

121

mov [r0+cb.low], t6d

122

mov [r0+cb.queue], t3d

123

cmp t3d, 8

124

jge .putbyte

125

.ret:

126

REP_RET

127

.putbyte:

128

; alive: t0=cb t3=queue t6=low

129

add t3d, 2

130

mov t1d, 1

131

mov t2d, t6d

132

shl t1d, t3b

133

shr t2d, t3b ; out

134

dec t1d

135

sub t3d, 10

136

and t6d, t1d

137

cmp t2b, 0xff ; FIXME is a 32bit op faster?

138

mov [r0+cb.queue], t3d

139

mov [r0+cb.low], t6d

140

mov t1d, t2d

141

mov t4, [r0+cb.p]

142

je .postpone

143

mov t5d, [r0+cb.bytes_outstanding]

144

shr t1d, 8 ; carry

145

lea t6, [t4+t5+1]

146

cmp t6, [r0+cb.end]

147

jge .ret

148

add [t4-1], t1b

149

test t5d, t5d

150

jz .no_outstanding

151

dec t1d

152

.loop_outstanding:

153

mov [t4], t1b

154

inc t4

155

dec t5d

156

jg .loop_outstanding

157

.no_outstanding:

158

mov [t4], t2b

159

inc t4

160

mov [r0+cb.bytes_outstanding], t5d ; is zero, but a reg has smaller opcode than an immediate

161

mov [r0+cb.p], t4

162

RET

163

.postpone:

164

inc dword [r0+cb.bytes_outstanding]

165

RET

166

Older »