1
;*****************************************************************************
2
;* cabac-a.asm: h264 encoder library
3
;*****************************************************************************
4
;* Copyright (C) 2008 x264 project
6
;* Author: Loren Merritt <lorenm@u.washington.edu>
8
;* This program is free software; you can redistribute it and/or modify
9
;* it under the terms of the GNU General Public License as published by
10
;* the Free Software Foundation; either version 2 of the License, or
11
;* (at your option) any later version.
13
;* This program is distributed in the hope that it will be useful,
14
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
;* GNU General Public License for more details.
18
;* You should have received a copy of the GNU General Public License
19
;* along with this program; if not, write to the Free Software
20
;* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
21
;*****************************************************************************
27
cextern x264_cabac_range_lps
28
cextern x264_cabac_transition
29
cextern x264_cabac_renorm_shift
40
; t3 must be ecx, since it's used for shift.
42
DEF_TMP 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,10
45
DEF_TMP 0,1,2,3,4,5,6,7, 0,3,2,1,4,5,6,3
53
.bytes_outstanding: resd 1
64
; this would be faster if the arrays were declared in asm, so that I didn't have to duplicate the lea
69
movzx %1, byte [r11+%4]
73
movzx %1, byte [%2+%1 GLOBAL]
75
movzx %1, byte [%2+%3+%4 GLOBAL]
78
movzx %1, byte [%2+%3+%4]
82
cglobal x264_cabac_encode_decision, 0,7
86
mov t5d, [r0+cb.range]
87
movzx t3d, byte [r0+cb.state+t1]
91
LOAD_GLOBAL t5d, x264_cabac_range_lps, t5, t3*4
107
LOAD_GLOBAL t3d, x264_cabac_transition, t1, t3*2
109
LOAD_GLOBAL t3d, x264_cabac_transition, t2, t3*2
112
mov [r0+cb.state+t1], t3b
116
LOAD_GLOBAL t3d, x264_cabac_renorm_shift, 0, t3
119
add t3d, [r0+cb.queue]
120
mov [r0+cb.range], t4d
122
mov [r0+cb.queue], t3d
128
; alive: t0=cb t3=queue t6=low
137
cmp t2b, 0xff ; FIXME is a 32bit op faster?
138
mov [r0+cb.queue], t3d
143
mov t5d, [r0+cb.bytes_outstanding]
160
mov [r0+cb.bytes_outstanding], t5d ; is zero, but a reg has smaller opcode than an immediate
164
inc dword [r0+cb.bytes_outstanding]