27
27
#include "config.h"
30
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\
30
#define BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, lowword, range, tmp)\
31
31
"mov "tmp" , %%ecx \n\t"\
32
32
"shl $17 , "tmp" \n\t"\
33
33
"cmp "low" , "tmp" \n\t"\
34
34
"cmova %%ecx , "range" \n\t"\
35
35
"sbb %%ecx , %%ecx \n\t"\
36
36
"and %%ecx , "tmp" \n\t"\
37
"sub "tmp" , "low" \n\t"\
38
"xor %%ecx , "ret" \n\t"
37
"xor %%ecx , "ret" \n\t"\
38
"sub "tmp" , "low" \n\t"
39
39
#else /* HAVE_FAST_CMOV */
40
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\
40
#define BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, lowword, range, tmp)\
41
41
"mov "tmp" , %%ecx \n\t"\
42
42
"shl $17 , "tmp" \n\t"\
43
43
"sub "low" , "tmp" \n\t"\
51
51
"xor "tmp" , "ret" \n\t"
52
52
#endif /* HAVE_FAST_CMOV */
54
#define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte, byte) \
54
#define BRANCHLESS_GET_CABAC(ret, statep, low, lowword, range, tmp, tmpbyte, byte) \
55
55
"movzbl "statep" , "ret" \n\t"\
56
56
"mov "range" , "tmp" \n\t"\
57
57
"and $0xC0 , "range" \n\t"\
58
58
"movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\
59
59
"sub "range" , "tmp" \n\t"\
60
BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, \
60
BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, lowword, range, tmp) \
62
61
"movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\
63
62
"shl %%cl , "range" \n\t"\
64
63
"movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\
64
"shl %%cl , "low" \n\t"\
65
65
"mov "tmpbyte" , "statep" \n\t"\
66
"shl %%cl , "low" \n\t"\
67
66
"test "lowword" , "lowword" \n\t"\
69
"mov "byte"("cabac"), %%"REG_c" \n\t"\
68
"mov "byte" , %%"REG_c" \n\t"\
69
"add"OPSIZE" $2 , "byte" \n\t"\
70
70
"movzwl (%%"REG_c") , "tmp" \n\t"\
72
"shr $15 , "tmp" \n\t"\
73
"sub $0xFFFF , "tmp" \n\t"\
74
"add $2 , %%"REG_c" \n\t"\
75
"mov %%"REG_c" , "byte "("cabac") \n\t"\
76
71
"lea -1("low") , %%ecx \n\t"\
77
72
"xor "low" , %%ecx \n\t"\
78
73
"shr $15 , %%ecx \n\t"\
75
"shr $15 , "tmp" \n\t"\
79
76
"movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\
77
"sub $0xFFFF , "tmp" \n\t"\
81
79
"add $7 , %%ecx \n\t"\
82
80
"shl %%cl , "tmp" \n\t"\
83
81
"add "tmp" , "low" \n\t"\
86
#if ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS)
84
#if HAVE_7REGS && !defined(BROKEN_RELOCATIONS)
87
85
#define get_cabac_inline get_cabac_inline_x86
88
86
static av_always_inline int get_cabac_inline_x86(CABACContext *c,
89
87
uint8_t *const state)
91
int bit, low, range, tmp;
94
"movl %a6(%5), %2 \n\t"
95
"movl %a7(%5), %1 \n\t"
96
BRANCHLESS_GET_CABAC("%0", "%5", "(%4)", "%1", "%w1", "%2",
98
"movl %2, %a6(%5) \n\t"
99
"movl %1, %a7(%5) \n\t"
101
:"=&r"(bit), "=&r"(low), "=&r"(range), "=&q"(tmp)
103
"i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)),
104
"i"(offsetof(CABACContext, bytestream))
92
BRANCHLESS_GET_CABAC("%0", "(%5)", "%1", "%w1", "%2",
94
:"=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp),
105
97
: "%"REG_c, "memory"
109
#endif /* ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */
101
#endif /* HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */
111
103
#define get_cabac_bypass_sign get_cabac_bypass_sign_x86
112
104
static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
115
107
__asm__ volatile(
116
"movl %a3(%2), %k1 \n\t"
117
"movl %a4(%2), %%eax \n\t"
109
"movl %2, %%eax \n\t"
118
110
"shl $17, %k1 \n\t"
119
111
"add %%eax, %%eax \n\t"
120
112
"sub %k1, %%eax \n\t"
125
117
"sub %%edx, %%ecx \n\t"
126
118
"test %%ax, %%ax \n\t"
128
"mov %a5(%2), %1 \n\t"
129
121
"subl $0xFFFF, %%eax \n\t"
130
122
"movzwl (%1), %%edx \n\t"
131
123
"bswap %%edx \n\t"
132
124
"shrl $15, %%edx \n\t"
133
125
"add $2, %1 \n\t"
134
126
"addl %%edx, %%eax \n\t"
135
"mov %1, %a5(%2) \n\t"
137
"movl %%eax, %a4(%2) \n\t"
129
"movl %%eax, %2 \n\t"
139
:"+c"(val), "=&r"(tmp)
141
"i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)),
142
"i"(offsetof(CABACContext, bytestream))
143
: "%eax", "%edx", "memory"
131
:"+c"(val), "=&r"(tmp), "+m"(c->low), "+m"(c->bytestream)