68
58
#define IS_USER(s) (s->user)
71
/* These instructions trap after executing, so defer them until after the
72
conditional executions state has been updated. */
77
/* We reuse the same 64-bit temporaries for efficiency. */
78
static TCGv cpu_V0, cpu_V1, cpu_M0;
80
/* FIXME: These should be removed. */
82
static TCGv cpu_F0s, cpu_F1s, cpu_F0d, cpu_F1d;
84
#define ICOUNT_TEMP cpu_T[0]
85
#include "gen-icount.h"
87
/* initialize TCG globals. */
88
void arm_translate_init(void)
90
cpu_env = tcg_global_reg_new(TCG_TYPE_PTR, TCG_AREG0, "env");
92
cpu_T[0] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG1, "T0");
93
cpu_T[1] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG2, "T1");
96
/* The code generator doesn't like lots of temporaries, so maintain our own
97
cache for reuse within a function. */
100
static TCGv temps[MAX_TEMPS];
102
/* Allocate a temporary variable. */
103
static TCGv new_tmp(void)
106
if (num_temps == MAX_TEMPS)
109
if (GET_TCGV(temps[num_temps]))
110
return temps[num_temps++];
112
tmp = tcg_temp_new(TCG_TYPE_I32);
113
temps[num_temps++] = tmp;
117
/* Release a temporary variable. */
118
static void dead_tmp(TCGv tmp)
123
if (GET_TCGV(temps[i]) == GET_TCGV(tmp))
126
/* Shuffle this temp to the last slot. */
127
while (GET_TCGV(temps[i]) != GET_TCGV(tmp))
129
while (i < num_temps) {
130
temps[i] = temps[i + 1];
136
static inline TCGv load_cpu_offset(int offset)
138
TCGv tmp = new_tmp();
139
tcg_gen_ld_i32(tmp, cpu_env, offset);
143
#define load_cpu_field(name) load_cpu_offset(offsetof(CPUState, name))
145
static inline void store_cpu_offset(TCGv var, int offset)
147
tcg_gen_st_i32(var, cpu_env, offset);
151
#define store_cpu_field(var, name) \
152
store_cpu_offset(var, offsetof(CPUState, name))
154
/* Set a variable to the value of a CPU register. */
155
static void load_reg_var(DisasContext *s, TCGv var, int reg)
159
/* normaly, since we updated PC, we need only to add one insn */
161
addr = (long)s->pc + 2;
163
addr = (long)s->pc + 4;
164
tcg_gen_movi_i32(var, addr);
166
tcg_gen_ld_i32(var, cpu_env, offsetof(CPUState, regs[reg]));
170
/* Create a new temporary and set it to the value of a CPU register. */
171
static inline TCGv load_reg(DisasContext *s, int reg)
173
TCGv tmp = new_tmp();
174
load_reg_var(s, tmp, reg);
178
/* Set a CPU register. The source must be a temporary and will be
180
static void store_reg(DisasContext *s, int reg, TCGv var)
183
tcg_gen_andi_i32(var, var, ~1);
184
s->is_jmp = DISAS_JUMP;
186
tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, regs[reg]));
191
/* Basic operations. */
192
#define gen_op_movl_T0_T1() tcg_gen_mov_i32(cpu_T[0], cpu_T[1])
193
#define gen_op_movl_T1_T0() tcg_gen_mov_i32(cpu_T[1], cpu_T[0])
194
#define gen_op_movl_T0_im(im) tcg_gen_movi_i32(cpu_T[0], im)
195
#define gen_op_movl_T1_im(im) tcg_gen_movi_i32(cpu_T[1], im)
197
#define gen_op_addl_T1_im(im) tcg_gen_addi_i32(cpu_T[1], cpu_T[1], im)
198
#define gen_op_addl_T0_T1() tcg_gen_add_i32(cpu_T[0], cpu_T[0], cpu_T[1])
199
#define gen_op_subl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[0], cpu_T[1])
200
#define gen_op_rsbl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[1], cpu_T[0])
202
#define gen_op_addl_T0_T1_cc() gen_helper_add_cc(cpu_T[0], cpu_T[0], cpu_T[1])
203
#define gen_op_adcl_T0_T1_cc() gen_helper_adc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
204
#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
205
#define gen_op_sbcl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
206
#define gen_op_rsbl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[1], cpu_T[0])
207
#define gen_op_rscl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[1], cpu_T[0])
209
#define gen_op_andl_T0_T1() tcg_gen_and_i32(cpu_T[0], cpu_T[0], cpu_T[1])
210
#define gen_op_xorl_T0_T1() tcg_gen_xor_i32(cpu_T[0], cpu_T[0], cpu_T[1])
211
#define gen_op_orl_T0_T1() tcg_gen_or_i32(cpu_T[0], cpu_T[0], cpu_T[1])
212
#define gen_op_notl_T0() tcg_gen_not_i32(cpu_T[0], cpu_T[0])
213
#define gen_op_notl_T1() tcg_gen_not_i32(cpu_T[1], cpu_T[1])
214
#define gen_op_logic_T0_cc() gen_logic_CC(cpu_T[0]);
215
#define gen_op_logic_T1_cc() gen_logic_CC(cpu_T[1]);
217
#define gen_op_shll_T0_im(im) tcg_gen_shli_i32(cpu_T[0], cpu_T[0], im)
218
#define gen_op_shll_T1_im(im) tcg_gen_shli_i32(cpu_T[1], cpu_T[1], im)
219
#define gen_op_shrl_T1_im(im) tcg_gen_shri_i32(cpu_T[1], cpu_T[1], im)
220
#define gen_op_sarl_T1_im(im) tcg_gen_sari_i32(cpu_T[1], cpu_T[1], im)
221
#define gen_op_rorl_T1_im(im) tcg_gen_rori_i32(cpu_T[1], cpu_T[1], im)
223
/* Value extensions. */
224
#define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
225
#define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
226
#define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
227
#define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
229
#define gen_sxtb16(var) gen_helper_sxtb16(var, var)
230
#define gen_uxtb16(var) gen_helper_uxtb16(var, var)
232
#define gen_op_mul_T0_T1() tcg_gen_mul_i32(cpu_T[0], cpu_T[0], cpu_T[1])
234
#define gen_set_cpsr(var, mask) gen_helper_cpsr_write(var, tcg_const_i32(mask))
235
/* Set NZCV flags from the high 4 bits of var. */
236
#define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
238
static void gen_exception(int excp)
240
TCGv tmp = new_tmp();
241
tcg_gen_movi_i32(tmp, excp);
242
gen_helper_exception(tmp);
246
static void gen_smul_dual(TCGv a, TCGv b)
248
TCGv tmp1 = new_tmp();
249
TCGv tmp2 = new_tmp();
250
tcg_gen_ext16s_i32(tmp1, a);
251
tcg_gen_ext16s_i32(tmp2, b);
252
tcg_gen_mul_i32(tmp1, tmp1, tmp2);
254
tcg_gen_sari_i32(a, a, 16);
255
tcg_gen_sari_i32(b, b, 16);
256
tcg_gen_mul_i32(b, b, a);
257
tcg_gen_mov_i32(a, tmp1);
261
/* Byteswap each halfword. */
262
static void gen_rev16(TCGv var)
264
TCGv tmp = new_tmp();
265
tcg_gen_shri_i32(tmp, var, 8);
266
tcg_gen_andi_i32(tmp, tmp, 0x00ff00ff);
267
tcg_gen_shli_i32(var, var, 8);
268
tcg_gen_andi_i32(var, var, 0xff00ff00);
269
tcg_gen_or_i32(var, var, tmp);
273
/* Byteswap low halfword and sign extend. */
274
static void gen_revsh(TCGv var)
276
TCGv tmp = new_tmp();
277
tcg_gen_shri_i32(tmp, var, 8);
278
tcg_gen_andi_i32(tmp, tmp, 0x00ff);
279
tcg_gen_shli_i32(var, var, 8);
280
tcg_gen_ext8s_i32(var, var);
281
tcg_gen_or_i32(var, var, tmp);
285
/* Unsigned bitfield extract. */
286
static void gen_ubfx(TCGv var, int shift, uint32_t mask)
289
tcg_gen_shri_i32(var, var, shift);
290
tcg_gen_andi_i32(var, var, mask);
293
/* Signed bitfield extract. */
294
static void gen_sbfx(TCGv var, int shift, int width)
299
tcg_gen_sari_i32(var, var, shift);
300
if (shift + width < 32) {
301
signbit = 1u << (width - 1);
302
tcg_gen_andi_i32(var, var, (1u << width) - 1);
303
tcg_gen_xori_i32(var, var, signbit);
304
tcg_gen_subi_i32(var, var, signbit);
308
/* Bitfield insertion. Insert val into base. Clobbers base and val. */
309
static void gen_bfi(TCGv dest, TCGv base, TCGv val, int shift, uint32_t mask)
311
tcg_gen_andi_i32(val, val, mask);
312
tcg_gen_shli_i32(val, val, shift);
313
tcg_gen_andi_i32(base, base, ~(mask << shift));
314
tcg_gen_or_i32(dest, base, val);
317
/* Round the top 32 bits of a 64-bit value. */
318
static void gen_roundqd(TCGv a, TCGv b)
320
tcg_gen_shri_i32(a, a, 31);
321
tcg_gen_add_i32(a, a, b);
324
/* FIXME: Most targets have native widening multiplication.
325
It would be good to use that instead of a full wide multiply. */
326
/* 32x32->64 multiply. Marks inputs as dead. */
327
static TCGv gen_mulu_i64_i32(TCGv a, TCGv b)
329
TCGv tmp1 = tcg_temp_new(TCG_TYPE_I64);
330
TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
332
tcg_gen_extu_i32_i64(tmp1, a);
334
tcg_gen_extu_i32_i64(tmp2, b);
336
tcg_gen_mul_i64(tmp1, tmp1, tmp2);
340
static TCGv gen_muls_i64_i32(TCGv a, TCGv b)
342
TCGv tmp1 = tcg_temp_new(TCG_TYPE_I64);
343
TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
345
tcg_gen_ext_i32_i64(tmp1, a);
347
tcg_gen_ext_i32_i64(tmp2, b);
349
tcg_gen_mul_i64(tmp1, tmp1, tmp2);
353
/* Unsigned 32x32->64 multiply. */
354
static void gen_op_mull_T0_T1(void)
356
TCGv tmp1 = tcg_temp_new(TCG_TYPE_I64);
357
TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
359
tcg_gen_extu_i32_i64(tmp1, cpu_T[0]);
360
tcg_gen_extu_i32_i64(tmp2, cpu_T[1]);
361
tcg_gen_mul_i64(tmp1, tmp1, tmp2);
362
tcg_gen_trunc_i64_i32(cpu_T[0], tmp1);
363
tcg_gen_shri_i64(tmp1, tmp1, 32);
364
tcg_gen_trunc_i64_i32(cpu_T[1], tmp1);
367
/* Signed 32x32->64 multiply. */
368
static void gen_imull(TCGv a, TCGv b)
370
TCGv tmp1 = tcg_temp_new(TCG_TYPE_I64);
371
TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
373
tcg_gen_ext_i32_i64(tmp1, a);
374
tcg_gen_ext_i32_i64(tmp2, b);
375
tcg_gen_mul_i64(tmp1, tmp1, tmp2);
376
tcg_gen_trunc_i64_i32(a, tmp1);
377
tcg_gen_shri_i64(tmp1, tmp1, 32);
378
tcg_gen_trunc_i64_i32(b, tmp1);
380
#define gen_op_imull_T0_T1() gen_imull(cpu_T[0], cpu_T[1])
382
/* Swap low and high halfwords. */
383
static void gen_swap_half(TCGv var)
385
TCGv tmp = new_tmp();
386
tcg_gen_shri_i32(tmp, var, 16);
387
tcg_gen_shli_i32(var, var, 16);
388
tcg_gen_or_i32(var, var, tmp);
392
/* Dual 16-bit add. Result placed in t0 and t1 is marked as dead.
393
tmp = (t0 ^ t1) & 0x8000;
396
t0 = (t0 + t1) ^ tmp;
399
static void gen_add16(TCGv t0, TCGv t1)
401
TCGv tmp = new_tmp();
402
tcg_gen_xor_i32(tmp, t0, t1);
403
tcg_gen_andi_i32(tmp, tmp, 0x8000);
404
tcg_gen_andi_i32(t0, t0, ~0x8000);
405
tcg_gen_andi_i32(t1, t1, ~0x8000);
406
tcg_gen_add_i32(t0, t0, t1);
407
tcg_gen_xor_i32(t0, t0, tmp);
412
#define gen_set_CF(var) tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, CF))
414
/* Set CF to the top bit of var. */
415
static void gen_set_CF_bit31(TCGv var)
417
TCGv tmp = new_tmp();
418
tcg_gen_shri_i32(tmp, var, 31);
423
/* Set N and Z flags from var. */
424
static inline void gen_logic_CC(TCGv var)
426
tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, NF));
427
tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, ZF));
431
static void gen_adc_T0_T1(void)
435
tmp = load_cpu_field(CF);
436
tcg_gen_add_i32(cpu_T[0], cpu_T[0], tmp);
440
/* dest = T0 - T1 + CF - 1. */
441
static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
444
tcg_gen_sub_i32(dest, t0, t1);
445
tmp = load_cpu_field(CF);
446
tcg_gen_add_i32(dest, dest, tmp);
447
tcg_gen_subi_i32(dest, dest, 1);
451
#define gen_sbc_T0_T1() gen_sub_carry(cpu_T[0], cpu_T[0], cpu_T[1])
452
#define gen_rsc_T0_T1() gen_sub_carry(cpu_T[0], cpu_T[1], cpu_T[0])
454
/* T0 &= ~T1. Clobbers T1. */
455
/* FIXME: Implement bic natively. */
456
static inline void tcg_gen_bic_i32(TCGv dest, TCGv t0, TCGv t1)
458
TCGv tmp = new_tmp();
459
tcg_gen_not_i32(tmp, t1);
460
tcg_gen_and_i32(dest, t0, tmp);
463
static inline void gen_op_bicl_T0_T1(void)
469
/* FIXME: Implement this natively. */
470
#define tcg_gen_abs_i32(t0, t1) gen_helper_abs(t0, t1)
472
/* FIXME: Implement this natively. */
473
static void tcg_gen_rori_i32(TCGv t0, TCGv t1, int i)
481
tcg_gen_shri_i32(tmp, t1, i);
482
tcg_gen_shli_i32(t1, t1, 32 - i);
483
tcg_gen_or_i32(t0, t1, tmp);
487
static void shifter_out_im(TCGv var, int shift)
489
TCGv tmp = new_tmp();
491
tcg_gen_andi_i32(tmp, var, 1);
493
tcg_gen_shri_i32(tmp, var, shift);
495
tcg_gen_andi_i32(tmp, tmp, 1);
501
/* Shift by immediate. Includes special handling for shift == 0. */
502
static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags)
508
shifter_out_im(var, 32 - shift);
509
tcg_gen_shli_i32(var, var, shift);
515
tcg_gen_shri_i32(var, var, 31);
518
tcg_gen_movi_i32(var, 0);
521
shifter_out_im(var, shift - 1);
522
tcg_gen_shri_i32(var, var, shift);
529
shifter_out_im(var, shift - 1);
532
tcg_gen_sari_i32(var, var, shift);
534
case 3: /* ROR/RRX */
537
shifter_out_im(var, shift - 1);
538
tcg_gen_rori_i32(var, var, shift); break;
540
TCGv tmp = load_cpu_field(CF);
542
shifter_out_im(var, 0);
543
tcg_gen_shri_i32(var, var, 1);
544
tcg_gen_shli_i32(tmp, tmp, 31);
545
tcg_gen_or_i32(var, var, tmp);
551
static inline void gen_arm_shift_reg(TCGv var, int shiftop,
552
TCGv shift, int flags)
556
case 0: gen_helper_shl_cc(var, var, shift); break;
557
case 1: gen_helper_shr_cc(var, var, shift); break;
558
case 2: gen_helper_sar_cc(var, var, shift); break;
559
case 3: gen_helper_ror_cc(var, var, shift); break;
563
case 0: gen_helper_shl(var, var, shift); break;
564
case 1: gen_helper_shr(var, var, shift); break;
565
case 2: gen_helper_sar(var, var, shift); break;
566
case 3: gen_helper_ror(var, var, shift); break;
572
#define PAS_OP(pfx) \
574
case 0: gen_pas_helper(glue(pfx,add16)); break; \
575
case 1: gen_pas_helper(glue(pfx,addsubx)); break; \
576
case 2: gen_pas_helper(glue(pfx,subaddx)); break; \
577
case 3: gen_pas_helper(glue(pfx,sub16)); break; \
578
case 4: gen_pas_helper(glue(pfx,add8)); break; \
579
case 7: gen_pas_helper(glue(pfx,sub8)); break; \
581
static void gen_arm_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
586
#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
588
tmp = tcg_temp_new(TCG_TYPE_PTR);
589
tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
593
tmp = tcg_temp_new(TCG_TYPE_PTR);
594
tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
597
#undef gen_pas_helper
598
#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
611
#undef gen_pas_helper
616
/* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings. */
617
#define PAS_OP(pfx) \
619
case 0: gen_pas_helper(glue(pfx,add8)); break; \
620
case 1: gen_pas_helper(glue(pfx,add16)); break; \
621
case 2: gen_pas_helper(glue(pfx,addsubx)); break; \
622
case 4: gen_pas_helper(glue(pfx,sub8)); break; \
623
case 5: gen_pas_helper(glue(pfx,sub16)); break; \
624
case 6: gen_pas_helper(glue(pfx,subaddx)); break; \
626
static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
631
#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
633
tmp = tcg_temp_new(TCG_TYPE_PTR);
634
tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
638
tmp = tcg_temp_new(TCG_TYPE_PTR);
639
tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
642
#undef gen_pas_helper
643
#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
656
#undef gen_pas_helper
661
static void gen_test_cc(int cc, int label)
669
tmp = load_cpu_field(ZF);
670
tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
673
tmp = load_cpu_field(ZF);
674
tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
677
tmp = load_cpu_field(CF);
678
tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
681
tmp = load_cpu_field(CF);
682
tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
685
tmp = load_cpu_field(NF);
686
tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
689
tmp = load_cpu_field(NF);
690
tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
693
tmp = load_cpu_field(VF);
694
tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
697
tmp = load_cpu_field(VF);
698
tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
700
case 8: /* hi: C && !Z */
701
inv = gen_new_label();
702
tmp = load_cpu_field(CF);
703
tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
705
tmp = load_cpu_field(ZF);
706
tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
709
case 9: /* ls: !C || Z */
710
tmp = load_cpu_field(CF);
711
tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
713
tmp = load_cpu_field(ZF);
714
tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
716
case 10: /* ge: N == V -> N ^ V == 0 */
717
tmp = load_cpu_field(VF);
718
tmp2 = load_cpu_field(NF);
719
tcg_gen_xor_i32(tmp, tmp, tmp2);
721
tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
723
case 11: /* lt: N != V -> N ^ V != 0 */
724
tmp = load_cpu_field(VF);
725
tmp2 = load_cpu_field(NF);
726
tcg_gen_xor_i32(tmp, tmp, tmp2);
728
tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
730
case 12: /* gt: !Z && N == V */
731
inv = gen_new_label();
732
tmp = load_cpu_field(ZF);
733
tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
735
tmp = load_cpu_field(VF);
736
tmp2 = load_cpu_field(NF);
737
tcg_gen_xor_i32(tmp, tmp, tmp2);
739
tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
742
case 13: /* le: Z || N != V */
743
tmp = load_cpu_field(ZF);
744
tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
746
tmp = load_cpu_field(VF);
747
tmp2 = load_cpu_field(NF);
748
tcg_gen_xor_i32(tmp, tmp, tmp2);
750
tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
753
fprintf(stderr, "Bad condition code 0x%x\n", cc);
759
static const uint8_t table_logic_cc[16] = {
61
#define DISAS_JUMP_NEXT 4
63
#ifdef USE_DIRECT_JUMP
66
#define TBPARAM(x) (long)(x)
69
/* XXX: move that elsewhere */
70
static uint16_t *gen_opc_ptr;
71
static uint32_t *gen_opparam_ptr;
76
#define DEF(s, n, copy_size) INDEX_op_ ## s,
84
static GenOpFunc1 *gen_test_cc[14] = {
101
const uint8_t table_logic_cc[16] = {
3424
2168
mask |= 0xff0000;
3425
2169
if (flags & (1 << 3))
3426
2170
mask |= 0xff000000;
3428
2171
/* Mask out undefined bits. */
3429
mask &= ~CPSR_RESERVED;
3430
if (!arm_feature(env, ARM_FEATURE_V6))
3431
mask &= ~(CPSR_E | CPSR_GE);
3432
if (!arm_feature(env, ARM_FEATURE_THUMB2))
3434
/* Mask out execution state bits. */
2173
/* Mask out state bits. */
2175
mask &= ~0x01000020;
3437
2176
/* Mask out privileged bits. */
3438
2177
if (IS_USER(s))
3443
2182
/* Returns nonzero if access to the PSR is not permitted. */
3444
2183
static int gen_set_psr_T0(DisasContext *s, uint32_t mask, int spsr)
3448
2186
/* ??? This is also undefined in system mode. */
3449
2187
if (IS_USER(s))
3452
tmp = load_cpu_field(spsr);
3453
tcg_gen_andi_i32(tmp, tmp, ~mask);
3454
tcg_gen_andi_i32(cpu_T[0], cpu_T[0], mask);
3455
tcg_gen_or_i32(tmp, tmp, cpu_T[0]);
3456
store_cpu_field(tmp, spsr);
2189
gen_op_movl_spsr_T0(mask);
3458
gen_set_cpsr(cpu_T[0], mask);
2191
gen_op_movl_cpsr_T0(mask);
3460
2193
gen_lookup_tb(s);
3464
/* Generate an old-style exception return. */
3465
2197
static void gen_exception_return(DisasContext *s)
3468
gen_movl_reg_T0(s, 15);
3469
tmp = load_cpu_field(spsr);
3470
gen_set_cpsr(tmp, 0xffffffff);
3472
s->is_jmp = DISAS_UPDATE;
3475
/* Generate a v6 exception return. Marks both values as dead. */
3476
static void gen_rfe(DisasContext *s, TCGv pc, TCGv cpsr)
3478
gen_set_cpsr(cpsr, 0xffffffff);
3480
store_reg(s, 15, pc);
3481
s->is_jmp = DISAS_UPDATE;
3485
gen_set_condexec (DisasContext *s)
3487
if (s->condexec_mask) {
3488
uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
3489
TCGv tmp = new_tmp();
3490
tcg_gen_movi_i32(tmp, val);
3491
store_cpu_field(tmp, condexec_bits);
3495
static void gen_nop_hint(DisasContext *s, int val)
3499
gen_set_pc_im(s->pc);
3500
s->is_jmp = DISAS_WFI;
3504
/* TODO: Implement SEV and WFE. May help SMP performance. */
3510
/* These macros help make the code more readable when migrating from the
3511
old dyngen helpers. They should probably be removed when
3512
T0/T1 are removed. */
3513
#define CPU_T001 cpu_T[0], cpu_T[0], cpu_T[1]
3514
#define CPU_T0E01 cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]
3516
#define CPU_V001 cpu_V0, cpu_V0, cpu_V1
3518
static inline int gen_neon_add(int size)
3521
case 0: gen_helper_neon_add_u8(CPU_T001); break;
3522
case 1: gen_helper_neon_add_u16(CPU_T001); break;
3523
case 2: gen_op_addl_T0_T1(); break;
3529
static inline void gen_neon_rsb(int size)
3532
case 0: gen_helper_neon_sub_u8(cpu_T[0], cpu_T[1], cpu_T[0]); break;
3533
case 1: gen_helper_neon_sub_u16(cpu_T[0], cpu_T[1], cpu_T[0]); break;
3534
case 2: gen_op_rsbl_T0_T1(); break;
3539
/* 32-bit pairwise ops end up the same as the elementwise versions. */
3540
#define gen_helper_neon_pmax_s32 gen_helper_neon_max_s32
3541
#define gen_helper_neon_pmax_u32 gen_helper_neon_max_u32
3542
#define gen_helper_neon_pmin_s32 gen_helper_neon_min_s32
3543
#define gen_helper_neon_pmin_u32 gen_helper_neon_min_u32
3545
/* FIXME: This is wrong. They set the wrong overflow bit. */
3546
#define gen_helper_neon_qadd_s32(a, e, b, c) gen_helper_add_saturate(a, b, c)
3547
#define gen_helper_neon_qadd_u32(a, e, b, c) gen_helper_add_usaturate(a, b, c)
3548
#define gen_helper_neon_qsub_s32(a, e, b, c) gen_helper_sub_saturate(a, b, c)
3549
#define gen_helper_neon_qsub_u32(a, e, b, c) gen_helper_sub_usaturate(a, b, c)
3551
#define GEN_NEON_INTEGER_OP_ENV(name) do { \
3552
switch ((size << 1) | u) { \
3554
gen_helper_neon_##name##_s8(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
3557
gen_helper_neon_##name##_u8(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
3560
gen_helper_neon_##name##_s16(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
3563
gen_helper_neon_##name##_u16(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
3566
gen_helper_neon_##name##_s32(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
3569
gen_helper_neon_##name##_u32(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
3571
default: return 1; \
3574
#define GEN_NEON_INTEGER_OP(name) do { \
3575
switch ((size << 1) | u) { \
3577
gen_helper_neon_##name##_s8(cpu_T[0], cpu_T[0], cpu_T[1]); \
3580
gen_helper_neon_##name##_u8(cpu_T[0], cpu_T[0], cpu_T[1]); \
3583
gen_helper_neon_##name##_s16(cpu_T[0], cpu_T[0], cpu_T[1]); \
3586
gen_helper_neon_##name##_u16(cpu_T[0], cpu_T[0], cpu_T[1]); \
3589
gen_helper_neon_##name##_s32(cpu_T[0], cpu_T[0], cpu_T[1]); \
3592
gen_helper_neon_##name##_u32(cpu_T[0], cpu_T[0], cpu_T[1]); \
3594
default: return 1; \
3598
gen_neon_movl_scratch_T0(int scratch)
3602
offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3603
tcg_gen_st_i32(cpu_T[0], cpu_env, offset);
3607
gen_neon_movl_scratch_T1(int scratch)
3611
offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3612
tcg_gen_st_i32(cpu_T[1], cpu_env, offset);
3616
gen_neon_movl_T0_scratch(int scratch)
3620
offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3621
tcg_gen_ld_i32(cpu_T[0], cpu_env, offset);
3625
gen_neon_movl_T1_scratch(int scratch)
3629
offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3630
tcg_gen_ld_i32(cpu_T[1], cpu_env, offset);
3633
static inline void gen_neon_get_scalar(int size, int reg)
3636
NEON_GET_REG(T0, reg >> 1, reg & 1);
3638
NEON_GET_REG(T0, reg >> 2, (reg >> 1) & 1);
3640
gen_neon_dup_low16(cpu_T[0]);
3642
gen_neon_dup_high16(cpu_T[0]);
3646
static void gen_neon_unzip(int reg, int q, int tmp, int size)
3650
for (n = 0; n < q + 1; n += 2) {
3651
NEON_GET_REG(T0, reg, n);
3652
NEON_GET_REG(T0, reg, n + n);
3654
case 0: gen_helper_neon_unzip_u8(); break;
3655
case 1: gen_helper_neon_zip_u16(); break; /* zip and unzip are the same. */
3656
case 2: /* no-op */; break;
3659
gen_neon_movl_scratch_T0(tmp + n);
3660
gen_neon_movl_scratch_T1(tmp + n + 1);
3668
} neon_ls_element_type[11] = {
3682
/* Translate a NEON load/store element instruction. Return nonzero if the
3683
instruction is invalid. */
3684
static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
3700
if (!vfp_enabled(env))
3702
VFP_DREG_D(rd, insn);
3703
rn = (insn >> 16) & 0xf;
3705
load = (insn & (1 << 21)) != 0;
3706
if ((insn & (1 << 23)) == 0) {
3707
/* Load store all elements. */
3708
op = (insn >> 8) & 0xf;
3709
size = (insn >> 6) & 3;
3710
if (op > 10 || size == 3)
3712
nregs = neon_ls_element_type[op].nregs;
3713
interleave = neon_ls_element_type[op].interleave;
3714
gen_movl_T1_reg(s, rn);
3715
stride = (1 << size) * interleave;
3716
for (reg = 0; reg < nregs; reg++) {
3717
if (interleave > 2 || (interleave == 2 && nregs == 2)) {
3718
gen_movl_T1_reg(s, rn);
3719
gen_op_addl_T1_im((1 << size) * reg);
3720
} else if (interleave == 2 && nregs == 4 && reg == 2) {
3721
gen_movl_T1_reg(s, rn);
3722
gen_op_addl_T1_im(1 << size);
3724
for (pass = 0; pass < 2; pass++) {
3727
tmp = gen_ld32(cpu_T[1], IS_USER(s));
3728
neon_store_reg(rd, pass, tmp);
3730
tmp = neon_load_reg(rd, pass);
3731
gen_st32(tmp, cpu_T[1], IS_USER(s));
3733
gen_op_addl_T1_im(stride);
3734
} else if (size == 1) {
3736
tmp = gen_ld16u(cpu_T[1], IS_USER(s));
3737
gen_op_addl_T1_im(stride);
3738
tmp2 = gen_ld16u(cpu_T[1], IS_USER(s));
3739
gen_op_addl_T1_im(stride);
3740
gen_bfi(tmp, tmp, tmp2, 16, 0xffff);
3742
neon_store_reg(rd, pass, tmp);
3744
tmp = neon_load_reg(rd, pass);
3746
tcg_gen_shri_i32(tmp2, tmp, 16);
3747
gen_st16(tmp, cpu_T[1], IS_USER(s));
3748
gen_op_addl_T1_im(stride);
3749
gen_st16(tmp2, cpu_T[1], IS_USER(s));
3750
gen_op_addl_T1_im(stride);
3752
} else /* size == 0 */ {
3755
for (n = 0; n < 4; n++) {
3756
tmp = gen_ld8u(cpu_T[1], IS_USER(s));
3757
gen_op_addl_T1_im(stride);
3761
gen_bfi(tmp2, tmp2, tmp, n * 8, 0xff);
3765
neon_store_reg(rd, pass, tmp2);
3767
tmp2 = neon_load_reg(rd, pass);
3768
for (n = 0; n < 4; n++) {
3771
tcg_gen_mov_i32(tmp, tmp2);
3773
tcg_gen_shri_i32(tmp, tmp2, n * 8);
3775
gen_st8(tmp, cpu_T[1], IS_USER(s));
3776
gen_op_addl_T1_im(stride);
3782
rd += neon_ls_element_type[op].spacing;
3786
size = (insn >> 10) & 3;
3788
/* Load single element to all lanes. */
3791
size = (insn >> 6) & 3;
3792
nregs = ((insn >> 8) & 3) + 1;
3793
stride = (insn & (1 << 5)) ? 2 : 1;
3794
gen_movl_T1_reg(s, rn);
3795
for (reg = 0; reg < nregs; reg++) {
3798
tmp = gen_ld8u(cpu_T[1], IS_USER(s));
3799
gen_neon_dup_u8(tmp, 0);
3802
tmp = gen_ld16u(cpu_T[1], IS_USER(s));
3803
gen_neon_dup_low16(tmp);
3806
tmp = gen_ld32(cpu_T[0], IS_USER(s));
3810
default: /* Avoid compiler warnings. */
3813
gen_op_addl_T1_im(1 << size);
3815
tcg_gen_mov_i32(tmp2, tmp);
3816
neon_store_reg(rd, 0, tmp2);
3817
neon_store_reg(rd, 1, tmp);
3820
stride = (1 << size) * nregs;
3822
/* Single element. */
3823
pass = (insn >> 7) & 1;
3826
shift = ((insn >> 5) & 3) * 8;
3830
shift = ((insn >> 6) & 1) * 16;
3831
stride = (insn & (1 << 5)) ? 2 : 1;
3835
stride = (insn & (1 << 6)) ? 2 : 1;
3840
nregs = ((insn >> 8) & 3) + 1;
3841
gen_movl_T1_reg(s, rn);
3842
for (reg = 0; reg < nregs; reg++) {
3846
tmp = gen_ld8u(cpu_T[1], IS_USER(s));
3849
tmp = gen_ld16u(cpu_T[1], IS_USER(s));
3852
tmp = gen_ld32(cpu_T[1], IS_USER(s));
3854
default: /* Avoid compiler warnings. */
3858
tmp2 = neon_load_reg(rd, pass);
3859
gen_bfi(tmp, tmp2, tmp, shift, size ? 0xffff : 0xff);
3862
neon_store_reg(rd, pass, tmp);
3863
} else { /* Store */
3864
tmp = neon_load_reg(rd, pass);
3866
tcg_gen_shri_i32(tmp, tmp, shift);
3869
gen_st8(tmp, cpu_T[1], IS_USER(s));
3872
gen_st16(tmp, cpu_T[1], IS_USER(s));
3875
gen_st32(tmp, cpu_T[1], IS_USER(s));
3880
gen_op_addl_T1_im(1 << size);
3882
stride = nregs * (1 << size);
3888
base = load_reg(s, rn);
3890
tcg_gen_addi_i32(base, base, stride);
3893
index = load_reg(s, rm);
3894
tcg_gen_add_i32(base, base, index);
3897
store_reg(s, rn, base);
3902
/* Bitwise select. dest = c ? t : f. Clobbers T and F. */
3903
static void gen_neon_bsl(TCGv dest, TCGv t, TCGv f, TCGv c)
3905
tcg_gen_and_i32(t, t, c);
3906
tcg_gen_bic_i32(f, f, c);
3907
tcg_gen_or_i32(dest, t, f);
3910
static inline void gen_neon_narrow(int size, TCGv dest, TCGv src)
3913
case 0: gen_helper_neon_narrow_u8(dest, src); break;
3914
case 1: gen_helper_neon_narrow_u16(dest, src); break;
3915
case 2: tcg_gen_trunc_i64_i32(dest, src); break;
3920
static inline void gen_neon_narrow_sats(int size, TCGv dest, TCGv src)
3923
case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
3924
case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
3925
case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
3930
static inline void gen_neon_narrow_satu(int size, TCGv dest, TCGv src)
3933
case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
3934
case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
3935
case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
3940
static inline void gen_neon_shift_narrow(int size, TCGv var, TCGv shift,
3946
case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
3947
case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
3952
case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
3953
case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
3960
case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
3961
case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
3966
case 1: gen_helper_neon_shl_s16(var, var, shift); break;
3967
case 2: gen_helper_neon_shl_s32(var, var, shift); break;
3974
static inline void gen_neon_widen(TCGv dest, TCGv src, int size, int u)
3978
case 0: gen_helper_neon_widen_u8(dest, src); break;
3979
case 1: gen_helper_neon_widen_u16(dest, src); break;
3980
case 2: tcg_gen_extu_i32_i64(dest, src); break;
3985
case 0: gen_helper_neon_widen_s8(dest, src); break;
3986
case 1: gen_helper_neon_widen_s16(dest, src); break;
3987
case 2: tcg_gen_ext_i32_i64(dest, src); break;
3994
static inline void gen_neon_addl(int size)
3997
case 0: gen_helper_neon_addl_u16(CPU_V001); break;
3998
case 1: gen_helper_neon_addl_u32(CPU_V001); break;
3999
case 2: tcg_gen_add_i64(CPU_V001); break;
4004
static inline void gen_neon_subl(int size)
4007
case 0: gen_helper_neon_subl_u16(CPU_V001); break;
4008
case 1: gen_helper_neon_subl_u32(CPU_V001); break;
4009
case 2: tcg_gen_sub_i64(CPU_V001); break;
4014
static inline void gen_neon_negl(TCGv var, int size)
4017
case 0: gen_helper_neon_negl_u16(var, var); break;
4018
case 1: gen_helper_neon_negl_u32(var, var); break;
4019
case 2: gen_helper_neon_negl_u64(var, var); break;
4024
static inline void gen_neon_addl_saturate(TCGv op0, TCGv op1, int size)
4027
case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
4028
case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
4033
static inline void gen_neon_mull(TCGv dest, TCGv a, TCGv b, int size, int u)
4037
switch ((size << 1) | u) {
4038
case 0: gen_helper_neon_mull_s8(dest, a, b); break;
4039
case 1: gen_helper_neon_mull_u8(dest, a, b); break;
4040
case 2: gen_helper_neon_mull_s16(dest, a, b); break;
4041
case 3: gen_helper_neon_mull_u16(dest, a, b); break;
4043
tmp = gen_muls_i64_i32(a, b);
4044
tcg_gen_mov_i64(dest, tmp);
4047
tmp = gen_mulu_i64_i32(a, b);
4048
tcg_gen_mov_i64(dest, tmp);
4058
/* Translate a NEON data processing instruction. Return nonzero if the
4059
instruction is invalid.
4060
We process data in a mixture of 32-bit and 64-bit chunks.
4061
Mostly we use 32-bit chunks so we can use normal scalar instructions. */
4063
static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4080
if (!vfp_enabled(env))
4082
q = (insn & (1 << 6)) != 0;
4083
u = (insn >> 24) & 1;
4084
VFP_DREG_D(rd, insn);
4085
VFP_DREG_N(rn, insn);
4086
VFP_DREG_M(rm, insn);
4087
size = (insn >> 20) & 3;
4088
if ((insn & (1 << 23)) == 0) {
4089
/* Three register same length. */
4090
op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
4091
if (size == 3 && (op == 1 || op == 5 || op == 8 || op == 9
4092
|| op == 10 || op == 11 || op == 16)) {
4093
/* 64-bit element instructions. */
4094
for (pass = 0; pass < (q ? 2 : 1); pass++) {
4095
neon_load_reg64(cpu_V0, rn + pass);
4096
neon_load_reg64(cpu_V1, rm + pass);
4100
gen_helper_neon_add_saturate_u64(CPU_V001);
4102
gen_helper_neon_add_saturate_s64(CPU_V001);
4107
gen_helper_neon_sub_saturate_u64(CPU_V001);
4109
gen_helper_neon_sub_saturate_s64(CPU_V001);
4114
gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
4116
gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
4121
gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
4124
gen_helper_neon_qshl_s64(cpu_V1, cpu_env,
4128
case 10: /* VRSHL */
4130
gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
4132
gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
4135
case 11: /* VQRSHL */
4137
gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
4140
gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
4146
tcg_gen_sub_i64(CPU_V001);
4148
tcg_gen_add_i64(CPU_V001);
4154
neon_store_reg64(cpu_V0, rd + pass);
4161
case 10: /* VRSHL */
4162
case 11: /* VQRSHL */
4165
/* Shift instruction operands are reversed. */
4172
case 20: /* VPMAX */
4173
case 21: /* VPMIN */
4174
case 23: /* VPADD */
4177
case 26: /* VPADD (float) */
4178
pairwise = (u && size < 2);
4180
case 30: /* VPMIN/VPMAX (float) */
4187
for (pass = 0; pass < (q ? 4 : 2); pass++) {
4196
NEON_GET_REG(T0, rn, n);
4197
NEON_GET_REG(T1, rn, n + 1);
4199
NEON_GET_REG(T0, rm, n);
4200
NEON_GET_REG(T1, rm, n + 1);
4204
NEON_GET_REG(T0, rn, pass);
4205
NEON_GET_REG(T1, rm, pass);
4209
GEN_NEON_INTEGER_OP(hadd);
4212
GEN_NEON_INTEGER_OP_ENV(qadd);
4214
case 2: /* VRHADD */
4215
GEN_NEON_INTEGER_OP(rhadd);
4217
case 3: /* Logic ops. */
4218
switch ((u << 2) | size) {
4220
gen_op_andl_T0_T1();
4223
gen_op_bicl_T0_T1();
4233
gen_op_xorl_T0_T1();
4236
tmp = neon_load_reg(rd, pass);
4237
gen_neon_bsl(cpu_T[0], cpu_T[0], cpu_T[1], tmp);
4241
tmp = neon_load_reg(rd, pass);
4242
gen_neon_bsl(cpu_T[0], cpu_T[0], tmp, cpu_T[1]);
4246
tmp = neon_load_reg(rd, pass);
4247
gen_neon_bsl(cpu_T[0], tmp, cpu_T[0], cpu_T[1]);
4253
GEN_NEON_INTEGER_OP(hsub);
4256
GEN_NEON_INTEGER_OP_ENV(qsub);
4259
GEN_NEON_INTEGER_OP(cgt);
4262
GEN_NEON_INTEGER_OP(cge);
4265
GEN_NEON_INTEGER_OP(shl);
4268
GEN_NEON_INTEGER_OP_ENV(qshl);
4270
case 10: /* VRSHL */
4271
GEN_NEON_INTEGER_OP(rshl);
4273
case 11: /* VQRSHL */
4274
GEN_NEON_INTEGER_OP_ENV(qrshl);
4277
GEN_NEON_INTEGER_OP(max);
4280
GEN_NEON_INTEGER_OP(min);
4283
GEN_NEON_INTEGER_OP(abd);
4286
GEN_NEON_INTEGER_OP(abd);
4287
NEON_GET_REG(T1, rd, pass);
4291
if (!u) { /* VADD */
4292
if (gen_neon_add(size))
4296
case 0: gen_helper_neon_sub_u8(CPU_T001); break;
4297
case 1: gen_helper_neon_sub_u16(CPU_T001); break;
4298
case 2: gen_op_subl_T0_T1(); break;
4304
if (!u) { /* VTST */
4306
case 0: gen_helper_neon_tst_u8(CPU_T001); break;
4307
case 1: gen_helper_neon_tst_u16(CPU_T001); break;
4308
case 2: gen_helper_neon_tst_u32(CPU_T001); break;
4313
case 0: gen_helper_neon_ceq_u8(CPU_T001); break;
4314
case 1: gen_helper_neon_ceq_u16(CPU_T001); break;
4315
case 2: gen_helper_neon_ceq_u32(CPU_T001); break;
4320
case 18: /* Multiply. */
4322
case 0: gen_helper_neon_mul_u8(CPU_T001); break;
4323
case 1: gen_helper_neon_mul_u16(CPU_T001); break;
4324
case 2: gen_op_mul_T0_T1(); break;
4327
NEON_GET_REG(T1, rd, pass);
4335
if (u) { /* polynomial */
4336
gen_helper_neon_mul_p8(CPU_T001);
4337
} else { /* Integer */
4339
case 0: gen_helper_neon_mul_u8(CPU_T001); break;
4340
case 1: gen_helper_neon_mul_u16(CPU_T001); break;
4341
case 2: gen_op_mul_T0_T1(); break;
4346
case 20: /* VPMAX */
4347
GEN_NEON_INTEGER_OP(pmax);
4349
case 21: /* VPMIN */
4350
GEN_NEON_INTEGER_OP(pmin);
4352
case 22: /* Hultiply high. */
4353
if (!u) { /* VQDMULH */
4355
case 1: gen_helper_neon_qdmulh_s16(CPU_T0E01); break;
4356
case 2: gen_helper_neon_qdmulh_s32(CPU_T0E01); break;
4359
} else { /* VQRDHMUL */
4361
case 1: gen_helper_neon_qrdmulh_s16(CPU_T0E01); break;
4362
case 2: gen_helper_neon_qrdmulh_s32(CPU_T0E01); break;
4367
case 23: /* VPADD */
4371
case 0: gen_helper_neon_padd_u8(CPU_T001); break;
4372
case 1: gen_helper_neon_padd_u16(CPU_T001); break;
4373
case 2: gen_op_addl_T0_T1(); break;
4377
case 26: /* Floating point arithnetic. */
4378
switch ((u << 2) | size) {
4380
gen_helper_neon_add_f32(CPU_T001);
4383
gen_helper_neon_sub_f32(CPU_T001);
4386
gen_helper_neon_add_f32(CPU_T001);
4389
gen_helper_neon_abd_f32(CPU_T001);
4395
case 27: /* Float multiply. */
4396
gen_helper_neon_mul_f32(CPU_T001);
4398
NEON_GET_REG(T1, rd, pass);
4400
gen_helper_neon_add_f32(CPU_T001);
4402
gen_helper_neon_sub_f32(cpu_T[0], cpu_T[1], cpu_T[0]);
4406
case 28: /* Float compare. */
4408
gen_helper_neon_ceq_f32(CPU_T001);
4411
gen_helper_neon_cge_f32(CPU_T001);
4413
gen_helper_neon_cgt_f32(CPU_T001);
4416
case 29: /* Float compare absolute. */
4420
gen_helper_neon_acge_f32(CPU_T001);
4422
gen_helper_neon_acgt_f32(CPU_T001);
4424
case 30: /* Float min/max. */
4426
gen_helper_neon_max_f32(CPU_T001);
4428
gen_helper_neon_min_f32(CPU_T001);
4432
gen_helper_recps_f32(cpu_T[0], cpu_T[0], cpu_T[1], cpu_env);
4434
gen_helper_rsqrts_f32(cpu_T[0], cpu_T[0], cpu_T[1], cpu_env);
4439
/* Save the result. For elementwise operations we can put it
4440
straight into the destination register. For pairwise operations
4441
we have to be careful to avoid clobbering the source operands. */
4442
if (pairwise && rd == rm) {
4443
gen_neon_movl_scratch_T0(pass);
4445
NEON_SET_REG(T0, rd, pass);
4449
if (pairwise && rd == rm) {
4450
for (pass = 0; pass < (q ? 4 : 2); pass++) {
4451
gen_neon_movl_T0_scratch(pass);
4452
NEON_SET_REG(T0, rd, pass);
4455
/* End of 3 register same size operations. */
4456
} else if (insn & (1 << 4)) {
4457
if ((insn & 0x00380080) != 0) {
4458
/* Two registers and shift. */
4459
op = (insn >> 8) & 0xf;
4460
if (insn & (1 << 7)) {
4465
while ((insn & (1 << (size + 19))) == 0)
4468
shift = (insn >> 16) & ((1 << (3 + size)) - 1);
4469
/* To avoid excessive dumplication of ops we implement shift
4470
by immediate using the variable shift operations. */
4472
/* Shift by immediate:
4473
VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */
4474
/* Right shifts are encoded as N - shift, where N is the
4475
element size in bits. */
4477
shift = shift - (1 << (size + 3));
4485
imm = (uint8_t) shift;
4490
imm = (uint16_t) shift;
4501
for (pass = 0; pass < count; pass++) {
4503
neon_load_reg64(cpu_V0, rm + pass);
4504
tcg_gen_movi_i64(cpu_V1, imm);
4509
gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
4511
gen_helper_neon_shl_s64(cpu_V0, cpu_V0, cpu_V1);
4516
gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
4518
gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
4523
gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
4525
case 5: /* VSHL, VSLI */
4526
gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
4530
gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V0, cpu_V1);
4532
gen_helper_neon_qshl_s64(cpu_V0, cpu_env, cpu_V0, cpu_V1);
4534
case 7: /* VQSHLU */
4535
gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V0, cpu_V1);
4538
if (op == 1 || op == 3) {
4540
neon_load_reg64(cpu_V0, rd + pass);
4541
tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
4542
} else if (op == 4 || (op == 5 && u)) {
4544
cpu_abort(env, "VS[LR]I.64 not implemented");
4546
neon_store_reg64(cpu_V0, rd + pass);
4547
} else { /* size < 3 */
4548
/* Operands in T0 and T1. */
4549
gen_op_movl_T1_im(imm);
4550
NEON_GET_REG(T0, rm, pass);
4554
GEN_NEON_INTEGER_OP(shl);
4558
GEN_NEON_INTEGER_OP(rshl);
4563
GEN_NEON_INTEGER_OP(shl);
4565
case 5: /* VSHL, VSLI */
4567
case 0: gen_helper_neon_shl_u8(CPU_T001); break;
4568
case 1: gen_helper_neon_shl_u16(CPU_T001); break;
4569
case 2: gen_helper_neon_shl_u32(CPU_T001); break;
4574
GEN_NEON_INTEGER_OP_ENV(qshl);
4576
case 7: /* VQSHLU */
4578
case 0: gen_helper_neon_qshl_u8(CPU_T0E01); break;
4579
case 1: gen_helper_neon_qshl_u16(CPU_T0E01); break;
4580
case 2: gen_helper_neon_qshl_u32(CPU_T0E01); break;
4586
if (op == 1 || op == 3) {
4588
NEON_GET_REG(T1, rd, pass);
4590
} else if (op == 4 || (op == 5 && u)) {
4595
imm = 0xff >> -shift;
4597
imm = (uint8_t)(0xff << shift);
4603
imm = 0xffff >> -shift;
4605
imm = (uint16_t)(0xffff << shift);
4610
imm = 0xffffffffu >> -shift;
4612
imm = 0xffffffffu << shift;
4617
tmp = neon_load_reg(rd, pass);
4618
tcg_gen_andi_i32(cpu_T[0], cpu_T[0], imm);
4619
tcg_gen_andi_i32(tmp, tmp, ~imm);
4620
tcg_gen_or_i32(cpu_T[0], cpu_T[0], tmp);
4622
NEON_SET_REG(T0, rd, pass);
4625
} else if (op < 10) {
4626
/* Shift by immediate and narrow:
4627
VSHRN, VRSHRN, VQSHRN, VQRSHRN. */
4628
shift = shift - (1 << (size + 3));
4632
imm = (uint16_t)shift;
4634
tmp2 = tcg_const_i32(imm);
4637
imm = (uint32_t)shift;
4638
tmp2 = tcg_const_i32(imm);
4640
tmp2 = tcg_const_i64(shift);
4646
for (pass = 0; pass < 2; pass++) {
4648
neon_load_reg64(cpu_V0, rm + pass);
4651
gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, tmp2);
4653
gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, tmp2);
4656
gen_helper_neon_shl_u64(cpu_V0, cpu_V0, tmp2);
4658
gen_helper_neon_shl_s64(cpu_V0, cpu_V0, tmp2);
4661
tmp = neon_load_reg(rm + pass, 0);
4662
gen_neon_shift_narrow(size, tmp, tmp2, q, u);
4663
tmp3 = neon_load_reg(rm + pass, 1);
4664
gen_neon_shift_narrow(size, tmp3, tmp2, q, u);
4665
tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
4670
if (op == 8 && !u) {
4671
gen_neon_narrow(size - 1, tmp, cpu_V0);
4674
gen_neon_narrow_sats(size - 1, tmp, cpu_V0);
4676
gen_neon_narrow_satu(size - 1, tmp, cpu_V0);
4681
neon_store_reg(rd, 0, tmp2);
4682
neon_store_reg(rd, 1, tmp);
4685
} else if (op == 10) {
4689
tmp = neon_load_reg(rm, 0);
4690
tmp2 = neon_load_reg(rm, 1);
4691
for (pass = 0; pass < 2; pass++) {
4695
gen_neon_widen(cpu_V0, tmp, size, u);
4698
/* The shift is less than the width of the source
4699
type, so we can just shift the whole register. */
4700
tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
4701
if (size < 2 || !u) {
4704
imm = (0xffu >> (8 - shift));
4707
imm = 0xffff >> (16 - shift);
4709
imm64 = imm | (((uint64_t)imm) << 32);
4710
tcg_gen_andi_i64(cpu_V0, cpu_V0, imm64);
4713
neon_store_reg64(cpu_V0, rd + pass);
4715
} else if (op == 15 || op == 16) {
4716
/* VCVT fixed-point. */
4717
for (pass = 0; pass < (q ? 4 : 2); pass++) {
4718
tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass));
4721
gen_vfp_ulto(0, shift);
4723
gen_vfp_slto(0, shift);
4726
gen_vfp_toul(0, shift);
4728
gen_vfp_tosl(0, shift);
4730
tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass));
4735
} else { /* (insn & 0x00380080) == 0 */
4738
op = (insn >> 8) & 0xf;
4739
/* One register and immediate. */
4740
imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
4741
invert = (insn & (1 << 5)) != 0;
4759
imm = (imm << 8) | (imm << 24);
4762
imm = (imm < 8) | 0xff;
4765
imm = (imm << 16) | 0xffff;
4768
imm |= (imm << 8) | (imm << 16) | (imm << 24);
4773
imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
4774
| ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
4780
if (op != 14 || !invert)
4781
gen_op_movl_T1_im(imm);
4783
for (pass = 0; pass < (q ? 4 : 2); pass++) {
4784
if (op & 1 && op < 12) {
4785
tmp = neon_load_reg(rd, pass);
4787
/* The immediate value has already been inverted, so
4789
tcg_gen_andi_i32(tmp, tmp, imm);
4791
tcg_gen_ori_i32(tmp, tmp, imm);
4796
if (op == 14 && invert) {
4799
for (n = 0; n < 4; n++) {
4800
if (imm & (1 << (n + (pass & 1) * 4)))
4801
val |= 0xff << (n * 8);
4803
tcg_gen_movi_i32(tmp, val);
4805
tcg_gen_movi_i32(tmp, imm);
4808
neon_store_reg(rd, pass, tmp);
4811
} else { /* (insn & 0x00800010 == 0x00800000) */
4813
op = (insn >> 8) & 0xf;
4814
if ((insn & (1 << 6)) == 0) {
4815
/* Three registers of different lengths. */
4819
/* prewiden, src1_wide, src2_wide */
4820
static const int neon_3reg_wide[16][3] = {
4821
{1, 0, 0}, /* VADDL */
4822
{1, 1, 0}, /* VADDW */
4823
{1, 0, 0}, /* VSUBL */
4824
{1, 1, 0}, /* VSUBW */
4825
{0, 1, 1}, /* VADDHN */
4826
{0, 0, 0}, /* VABAL */
4827
{0, 1, 1}, /* VSUBHN */
4828
{0, 0, 0}, /* VABDL */
4829
{0, 0, 0}, /* VMLAL */
4830
{0, 0, 0}, /* VQDMLAL */
4831
{0, 0, 0}, /* VMLSL */
4832
{0, 0, 0}, /* VQDMLSL */
4833
{0, 0, 0}, /* Integer VMULL */
4834
{0, 0, 0}, /* VQDMULL */
4835
{0, 0, 0} /* Polynomial VMULL */
4838
prewiden = neon_3reg_wide[op][0];
4839
src1_wide = neon_3reg_wide[op][1];
4840
src2_wide = neon_3reg_wide[op][2];
4842
if (size == 0 && (op == 9 || op == 11 || op == 13))
4845
/* Avoid overlapping operands. Wide source operands are
4846
always aligned so will never overlap with wide
4847
destinations in problematic ways. */
4848
if (rd == rm && !src2_wide) {
4849
NEON_GET_REG(T0, rm, 1);
4850
gen_neon_movl_scratch_T0(2);
4851
} else if (rd == rn && !src1_wide) {
4852
NEON_GET_REG(T0, rn, 1);
4853
gen_neon_movl_scratch_T0(2);
4856
for (pass = 0; pass < 2; pass++) {
4858
neon_load_reg64(cpu_V0, rn + pass);
4861
if (pass == 1 && rd == rn) {
4862
gen_neon_movl_T0_scratch(2);
4864
tcg_gen_mov_i32(tmp, cpu_T[0]);
4866
tmp = neon_load_reg(rn, pass);
4869
gen_neon_widen(cpu_V0, tmp, size, u);
4873
neon_load_reg64(cpu_V1, rm + pass);
4876
if (pass == 1 && rd == rm) {
4877
gen_neon_movl_T0_scratch(2);
4879
tcg_gen_mov_i32(tmp2, cpu_T[0]);
4881
tmp2 = neon_load_reg(rm, pass);
4884
gen_neon_widen(cpu_V1, tmp2, size, u);
4888
case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
4889
gen_neon_addl(size);
4891
case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHL, VRSUBHL */
4892
gen_neon_subl(size);
4894
case 5: case 7: /* VABAL, VABDL */
4895
switch ((size << 1) | u) {
4897
gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
4900
gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
4903
gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
4906
gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
4909
gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
4912
gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
4919
case 8: case 9: case 10: case 11: case 12: case 13:
4920
/* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
4921
gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
4923
case 14: /* Polynomial VMULL */
4924
cpu_abort(env, "Polynomial VMULL not implemented");
4926
default: /* 15 is RESERVED. */
4929
if (op == 5 || op == 13 || (op >= 8 && op <= 11)) {
4931
if (op == 10 || op == 11) {
4932
gen_neon_negl(cpu_V0, size);
4936
neon_load_reg64(cpu_V1, rd + pass);
4940
case 5: case 8: case 10: /* VABAL, VMLAL, VMLSL */
4941
gen_neon_addl(size);
4943
case 9: case 11: /* VQDMLAL, VQDMLSL */
4944
gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
4945
gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
4948
case 13: /* VQDMULL */
4949
gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
4954
neon_store_reg64(cpu_V0, rd + pass);
4955
} else if (op == 4 || op == 6) {
4956
/* Narrowing operation. */
4961
gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
4964
gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
4967
tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
4968
tcg_gen_trunc_i64_i32(tmp, cpu_V0);
4975
gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
4978
gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
4981
tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
4982
tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
4983
tcg_gen_trunc_i64_i32(tmp, cpu_V0);
4991
neon_store_reg(rd, 0, tmp3);
4992
neon_store_reg(rd, 1, tmp);
4995
/* Write back the result. */
4996
neon_store_reg64(cpu_V0, rd + pass);
5000
/* Two registers and a scalar. */
5002
case 0: /* Integer VMLA scalar */
5003
case 1: /* Float VMLA scalar */
5004
case 4: /* Integer VMLS scalar */
5005
case 5: /* Floating point VMLS scalar */
5006
case 8: /* Integer VMUL scalar */
5007
case 9: /* Floating point VMUL scalar */
5008
case 12: /* VQDMULH scalar */
5009
case 13: /* VQRDMULH scalar */
5010
gen_neon_get_scalar(size, rm);
5011
gen_neon_movl_scratch_T0(0);
5012
for (pass = 0; pass < (u ? 4 : 2); pass++) {
5014
gen_neon_movl_T0_scratch(0);
5015
NEON_GET_REG(T1, rn, pass);
5018
gen_helper_neon_qdmulh_s16(CPU_T0E01);
5020
gen_helper_neon_qdmulh_s32(CPU_T0E01);
5022
} else if (op == 13) {
5024
gen_helper_neon_qrdmulh_s16(CPU_T0E01);
5026
gen_helper_neon_qrdmulh_s32(CPU_T0E01);
5028
} else if (op & 1) {
5029
gen_helper_neon_mul_f32(CPU_T001);
5032
case 0: gen_helper_neon_mul_u8(CPU_T001); break;
5033
case 1: gen_helper_neon_mul_u16(CPU_T001); break;
5034
case 2: gen_op_mul_T0_T1(); break;
5040
NEON_GET_REG(T1, rd, pass);
5046
gen_helper_neon_add_f32(CPU_T001);
5052
gen_helper_neon_sub_f32(cpu_T[0], cpu_T[1], cpu_T[0]);
5058
NEON_SET_REG(T0, rd, pass);
5061
case 2: /* VMLAL sclar */
5062
case 3: /* VQDMLAL scalar */
5063
case 6: /* VMLSL scalar */
5064
case 7: /* VQDMLSL scalar */
5065
case 10: /* VMULL scalar */
5066
case 11: /* VQDMULL scalar */
5067
if (size == 0 && (op == 3 || op == 7 || op == 11))
5070
gen_neon_get_scalar(size, rm);
5071
NEON_GET_REG(T1, rn, 1);
5073
for (pass = 0; pass < 2; pass++) {
5075
tmp = neon_load_reg(rn, 0);
5078
tcg_gen_mov_i32(tmp, cpu_T[1]);
5081
tcg_gen_mov_i32(tmp2, cpu_T[0]);
5082
gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
5083
if (op == 6 || op == 7) {
5084
gen_neon_negl(cpu_V0, size);
5087
neon_load_reg64(cpu_V1, rd + pass);
5091
gen_neon_addl(size);
5094
gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5095
gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
5101
gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5106
neon_store_reg64(cpu_V0, rd + pass);
5109
default: /* 14 and 15 are RESERVED */
5113
} else { /* size == 3 */
5116
imm = (insn >> 8) & 0xf;
5123
neon_load_reg64(cpu_V0, rn);
5125
neon_load_reg64(cpu_V1, rn + 1);
5127
} else if (imm == 8) {
5128
neon_load_reg64(cpu_V0, rn + 1);
5130
neon_load_reg64(cpu_V1, rm);
5133
tmp = tcg_temp_new(TCG_TYPE_I64);
5135
neon_load_reg64(cpu_V0, rn);
5136
neon_load_reg64(tmp, rn + 1);
5138
neon_load_reg64(cpu_V0, rn + 1);
5139
neon_load_reg64(tmp, rm);
5141
tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
5142
tcg_gen_shli_i64(cpu_V1, tmp, 64 - ((imm & 7) * 8));
5143
tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5145
neon_load_reg64(cpu_V1, rm);
5147
neon_load_reg64(cpu_V1, rm + 1);
5150
tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
5151
tcg_gen_shri_i64(tmp, tmp, imm * 8);
5152
tcg_gen_or_i64(cpu_V1, cpu_V1, tmp);
5154
neon_load_reg64(cpu_V0, rn);
5155
tcg_gen_shri_i32(cpu_V0, cpu_V0, imm * 8);
5156
neon_load_reg64(cpu_V1, rm);
5157
tcg_gen_shli_i32(cpu_V1, cpu_V1, 64 - (imm * 8));
5158
tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5160
neon_store_reg64(cpu_V0, rd);
5162
neon_store_reg64(cpu_V1, rd + 1);
5164
} else if ((insn & (1 << 11)) == 0) {
5165
/* Two register misc. */
5166
op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
5167
size = (insn >> 18) & 3;
5169
case 0: /* VREV64 */
5172
for (pass = 0; pass < (q ? 2 : 1); pass++) {
5173
NEON_GET_REG(T0, rm, pass * 2);
5174
NEON_GET_REG(T1, rm, pass * 2 + 1);
5176
case 0: tcg_gen_bswap_i32(cpu_T[0], cpu_T[0]); break;
5177
case 1: gen_swap_half(cpu_T[0]); break;
5178
case 2: /* no-op */ break;
5181
NEON_SET_REG(T0, rd, pass * 2 + 1);
5183
NEON_SET_REG(T1, rd, pass * 2);
5185
gen_op_movl_T0_T1();
5187
case 0: tcg_gen_bswap_i32(cpu_T[0], cpu_T[0]); break;
5188
case 1: gen_swap_half(cpu_T[0]); break;
5191
NEON_SET_REG(T0, rd, pass * 2);
5195
case 4: case 5: /* VPADDL */
5196
case 12: case 13: /* VPADAL */
5199
for (pass = 0; pass < q + 1; pass++) {
5200
tmp = neon_load_reg(rm, pass * 2);
5201
gen_neon_widen(cpu_V0, tmp, size, op & 1);
5202
tmp = neon_load_reg(rm, pass * 2 + 1);
5203
gen_neon_widen(cpu_V1, tmp, size, op & 1);
5205
case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
5206
case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
5207
case 2: tcg_gen_add_i64(CPU_V001); break;
5212
neon_load_reg64(cpu_V1, rd + pass);
5213
gen_neon_addl(size);
5215
neon_store_reg64(cpu_V0, rd + pass);
5220
for (n = 0; n < (q ? 4 : 2); n += 2) {
5221
NEON_GET_REG(T0, rm, n);
5222
NEON_GET_REG(T1, rd, n + 1);
5223
NEON_SET_REG(T1, rm, n);
5224
NEON_SET_REG(T0, rd, n + 1);
5232
Rd A3 A2 A1 A0 B2 B0 A2 A0
5233
Rm B3 B2 B1 B0 B3 B1 A3 A1
5237
gen_neon_unzip(rd, q, 0, size);
5238
gen_neon_unzip(rm, q, 4, size);
5240
static int unzip_order_q[8] =
5241
{0, 2, 4, 6, 1, 3, 5, 7};
5242
for (n = 0; n < 8; n++) {
5243
int reg = (n < 4) ? rd : rm;
5244
gen_neon_movl_T0_scratch(unzip_order_q[n]);
5245
NEON_SET_REG(T0, reg, n % 4);
5248
static int unzip_order[4] =
5250
for (n = 0; n < 4; n++) {
5251
int reg = (n < 2) ? rd : rm;
5252
gen_neon_movl_T0_scratch(unzip_order[n]);
5253
NEON_SET_REG(T0, reg, n % 2);
5259
Rd A3 A2 A1 A0 B1 A1 B0 A0
5260
Rm B3 B2 B1 B0 B3 A3 B2 A2
5264
count = (q ? 4 : 2);
5265
for (n = 0; n < count; n++) {
5266
NEON_GET_REG(T0, rd, n);
5267
NEON_GET_REG(T1, rd, n);
5269
case 0: gen_helper_neon_zip_u8(); break;
5270
case 1: gen_helper_neon_zip_u16(); break;
5271
case 2: /* no-op */; break;
5274
gen_neon_movl_scratch_T0(n * 2);
5275
gen_neon_movl_scratch_T1(n * 2 + 1);
5277
for (n = 0; n < count * 2; n++) {
5278
int reg = (n < count) ? rd : rm;
5279
gen_neon_movl_T0_scratch(n);
5280
NEON_SET_REG(T0, reg, n % count);
5283
case 36: case 37: /* VMOVN, VQMOVUN, VQMOVN */
5287
for (pass = 0; pass < 2; pass++) {
5288
neon_load_reg64(cpu_V0, rm + pass);
5290
if (op == 36 && q == 0) {
5291
gen_neon_narrow(size, tmp, cpu_V0);
5293
gen_neon_narrow_satu(size, tmp, cpu_V0);
5295
gen_neon_narrow_sats(size, tmp, cpu_V0);
5300
neon_store_reg(rd, 0, tmp2);
5301
neon_store_reg(rd, 1, tmp);
5305
case 38: /* VSHLL */
5308
tmp = neon_load_reg(rm, 0);
5309
tmp2 = neon_load_reg(rm, 1);
5310
for (pass = 0; pass < 2; pass++) {
5313
gen_neon_widen(cpu_V0, tmp, size, 1);
5314
neon_store_reg64(cpu_V0, rd + pass);
5319
for (pass = 0; pass < (q ? 4 : 2); pass++) {
5320
if (op == 30 || op == 31 || op >= 58) {
5321
tcg_gen_ld_f32(cpu_F0s, cpu_env,
5322
neon_reg_offset(rm, pass));
5324
NEON_GET_REG(T0, rm, pass);
5327
case 1: /* VREV32 */
5329
case 0: tcg_gen_bswap_i32(cpu_T[0], cpu_T[0]); break;
5330
case 1: gen_swap_half(cpu_T[0]); break;
5334
case 2: /* VREV16 */
5337
gen_rev16(cpu_T[0]);
5341
case 0: gen_helper_neon_cls_s8(cpu_T[0], cpu_T[0]); break;
5342
case 1: gen_helper_neon_cls_s16(cpu_T[0], cpu_T[0]); break;
5343
case 2: gen_helper_neon_cls_s32(cpu_T[0], cpu_T[0]); break;
5349
case 0: gen_helper_neon_clz_u8(cpu_T[0], cpu_T[0]); break;
5350
case 1: gen_helper_neon_clz_u16(cpu_T[0], cpu_T[0]); break;
5351
case 2: gen_helper_clz(cpu_T[0], cpu_T[0]); break;
5358
gen_helper_neon_cnt_u8(cpu_T[0], cpu_T[0]);
5365
case 14: /* VQABS */
5367
case 0: gen_helper_neon_qabs_s8(cpu_T[0], cpu_env, cpu_T[0]); break;
5368
case 1: gen_helper_neon_qabs_s16(cpu_T[0], cpu_env, cpu_T[0]); break;
5369
case 2: gen_helper_neon_qabs_s32(cpu_T[0], cpu_env, cpu_T[0]); break;
5373
case 15: /* VQNEG */
5375
case 0: gen_helper_neon_qneg_s8(cpu_T[0], cpu_env, cpu_T[0]); break;
5376
case 1: gen_helper_neon_qneg_s16(cpu_T[0], cpu_env, cpu_T[0]); break;
5377
case 2: gen_helper_neon_qneg_s32(cpu_T[0], cpu_env, cpu_T[0]); break;
5381
case 16: case 19: /* VCGT #0, VCLE #0 */
5382
gen_op_movl_T1_im(0);
5384
case 0: gen_helper_neon_cgt_s8(CPU_T001); break;
5385
case 1: gen_helper_neon_cgt_s16(CPU_T001); break;
5386
case 2: gen_helper_neon_cgt_s32(CPU_T001); break;
5392
case 17: case 20: /* VCGE #0, VCLT #0 */
5393
gen_op_movl_T1_im(0);
5395
case 0: gen_helper_neon_cge_s8(CPU_T001); break;
5396
case 1: gen_helper_neon_cge_s16(CPU_T001); break;
5397
case 2: gen_helper_neon_cge_s32(CPU_T001); break;
5403
case 18: /* VCEQ #0 */
5404
gen_op_movl_T1_im(0);
5406
case 0: gen_helper_neon_ceq_u8(CPU_T001); break;
5407
case 1: gen_helper_neon_ceq_u16(CPU_T001); break;
5408
case 2: gen_helper_neon_ceq_u32(CPU_T001); break;
5414
case 0: gen_helper_neon_abs_s8(cpu_T[0], cpu_T[0]); break;
5415
case 1: gen_helper_neon_abs_s16(cpu_T[0], cpu_T[0]); break;
5416
case 2: tcg_gen_abs_i32(cpu_T[0], cpu_T[0]); break;
5421
gen_op_movl_T1_im(0);
5426
case 24: case 27: /* Float VCGT #0, Float VCLE #0 */
5427
gen_op_movl_T1_im(0);
5428
gen_helper_neon_cgt_f32(CPU_T001);
5432
case 25: case 28: /* Float VCGE #0, Float VCLT #0 */
5433
gen_op_movl_T1_im(0);
5434
gen_helper_neon_cge_f32(CPU_T001);
5438
case 26: /* Float VCEQ #0 */
5439
gen_op_movl_T1_im(0);
5440
gen_helper_neon_ceq_f32(CPU_T001);
5442
case 30: /* Float VABS */
5445
case 31: /* Float VNEG */
5449
NEON_GET_REG(T1, rd, pass);
5450
NEON_SET_REG(T1, rm, pass);
5453
NEON_GET_REG(T1, rd, pass);
5455
case 0: gen_helper_neon_trn_u8(); break;
5456
case 1: gen_helper_neon_trn_u16(); break;
5460
NEON_SET_REG(T1, rm, pass);
5462
case 56: /* Integer VRECPE */
5463
gen_helper_recpe_u32(cpu_T[0], cpu_T[0], cpu_env);
5465
case 57: /* Integer VRSQRTE */
5466
gen_helper_rsqrte_u32(cpu_T[0], cpu_T[0], cpu_env);
5468
case 58: /* Float VRECPE */
5469
gen_helper_recpe_f32(cpu_F0s, cpu_F0s, cpu_env);
5471
case 59: /* Float VRSQRTE */
5472
gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, cpu_env);
5474
case 60: /* VCVT.F32.S32 */
5477
case 61: /* VCVT.F32.U32 */
5480
case 62: /* VCVT.S32.F32 */
5483
case 63: /* VCVT.U32.F32 */
5487
/* Reserved: 21, 29, 39-56 */
5490
if (op == 30 || op == 31 || op >= 58) {
5491
tcg_gen_st_f32(cpu_F0s, cpu_env,
5492
neon_reg_offset(rd, pass));
5494
NEON_SET_REG(T0, rd, pass);
5499
} else if ((insn & (1 << 10)) == 0) {
5501
n = ((insn >> 5) & 0x18) + 8;
5502
if (insn & (1 << 6)) {
5503
tmp = neon_load_reg(rd, 0);
5506
tcg_gen_movi_i32(tmp, 0);
5508
tmp2 = neon_load_reg(rm, 0);
5509
gen_helper_neon_tbl(tmp2, tmp2, tmp, tcg_const_i32(rn),
5512
if (insn & (1 << 6)) {
5513
tmp = neon_load_reg(rd, 1);
5516
tcg_gen_movi_i32(tmp, 0);
5518
tmp3 = neon_load_reg(rm, 1);
5519
gen_helper_neon_tbl(tmp3, tmp3, tmp, tcg_const_i32(rn),
5521
neon_store_reg(rd, 0, tmp2);
5522
neon_store_reg(rd, 1, tmp3);
5524
} else if ((insn & 0x380) == 0) {
5526
if (insn & (1 << 19)) {
5527
NEON_SET_REG(T0, rm, 1);
5529
NEON_SET_REG(T0, rm, 0);
5531
if (insn & (1 << 16)) {
5532
gen_neon_dup_u8(cpu_T[0], ((insn >> 17) & 3) * 8);
5533
} else if (insn & (1 << 17)) {
5534
if ((insn >> 18) & 1)
5535
gen_neon_dup_high16(cpu_T[0]);
5537
gen_neon_dup_low16(cpu_T[0]);
5539
for (pass = 0; pass < (q ? 4 : 2); pass++) {
5540
NEON_SET_REG(T0, rd, pass);
5550
static int disas_coproc_insn(CPUState * env, DisasContext *s, uint32_t insn)
5554
cpnum = (insn >> 8) & 0xf;
5555
if (arm_feature(env, ARM_FEATURE_XSCALE)
5556
&& ((env->cp15.c15_cpar ^ 0x3fff) & (1 << cpnum)))
5562
if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
5563
return disas_iwmmxt_insn(env, s, insn);
5564
} else if (arm_feature(env, ARM_FEATURE_XSCALE)) {
5565
return disas_dsp_insn(env, s, insn);
5570
return disas_vfp_insn (env, s, insn);
5572
return disas_cp15_insn (env, s, insn);
5574
/* Unknown coprocessor. See if the board has hooked it. */
5575
return disas_cp_insn (env, s, insn);
5580
/* Store a 64-bit value to a register pair. Clobbers val. */
5581
static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv val)
5585
tcg_gen_trunc_i64_i32(tmp, val);
5586
store_reg(s, rlow, tmp);
5588
tcg_gen_shri_i64(val, val, 32);
5589
tcg_gen_trunc_i64_i32(tmp, val);
5590
store_reg(s, rhigh, tmp);
5593
/* load a 32-bit value from a register and perform a 64-bit accumulate. */
5594
static void gen_addq_lo(DisasContext *s, TCGv val, int rlow)
5599
/* Load value and extend to 64 bits. */
5600
tmp = tcg_temp_new(TCG_TYPE_I64);
5601
tmp2 = load_reg(s, rlow);
5602
tcg_gen_extu_i32_i64(tmp, tmp2);
5604
tcg_gen_add_i64(val, val, tmp);
5607
/* load and add a 64-bit value from a register pair. */
5608
static void gen_addq(DisasContext *s, TCGv val, int rlow, int rhigh)
5614
/* Load 64-bit value rd:rn. */
5615
tmpl = load_reg(s, rlow);
5616
tmph = load_reg(s, rhigh);
5617
tmp = tcg_temp_new(TCG_TYPE_I64);
5618
tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
5621
tcg_gen_add_i64(val, val, tmp);
5624
/* Set N and Z flags from a 64-bit value. */
5625
static void gen_logicq_cc(TCGv val)
5627
TCGv tmp = new_tmp();
5628
gen_helper_logicq_cc(tmp, val);
2199
gen_op_movl_reg_TN[0][15]();
2200
gen_op_movl_T0_spsr();
2201
gen_op_movl_cpsr_T0(0xffffffff);
2202
s->is_jmp = DISAS_UPDATE;
5633
2205
static void disas_arm_insn(CPUState * env, DisasContext *s)
5635
2207
unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh;
5641
2209
insn = ldl_code(s->pc);
5644
/* M variants do not implement ARM mode. */
5647
2212
cond = insn >> 28;
5648
2213
if (cond == 0xf){
5649
2214
/* Unconditional instructions. */
5650
if (((insn >> 25) & 7) == 1) {
5651
/* NEON Data processing. */
5652
if (!arm_feature(env, ARM_FEATURE_NEON))
5655
if (disas_neon_data_insn(env, s, insn))
5659
if ((insn & 0x0f100000) == 0x04000000) {
5660
/* NEON load/store. */
5661
if (!arm_feature(env, ARM_FEATURE_NEON))
5664
if (disas_neon_ls_insn(env, s, insn))
5668
2215
if ((insn & 0x0d70f000) == 0x0550f000)
5669
2216
return; /* PLD */
5670
else if ((insn & 0x0ffffdff) == 0x01010000) {
5673
if (insn & (1 << 9)) {
5674
/* BE8 mode not implemented. */
5678
} else if ((insn & 0x0fffff00) == 0x057ff000) {
5679
switch ((insn >> 4) & 0xf) {
5682
gen_helper_clrex(cpu_env);
5688
/* We don't emulate caches so these are a no-op. */
5693
} else if ((insn & 0x0e5fffe0) == 0x084d0500) {
5699
op1 = (insn & 0x1f);
5700
if (op1 == (env->uncached_cpsr & CPSR_M)) {
5701
addr = load_reg(s, 13);
5704
gen_helper_get_r13_banked(addr, cpu_env, tcg_const_i32(op1));
5706
i = (insn >> 23) & 3;
5708
case 0: offset = -4; break; /* DA */
5709
case 1: offset = -8; break; /* DB */
5710
case 2: offset = 0; break; /* IA */
5711
case 3: offset = 4; break; /* IB */
5715
tcg_gen_addi_i32(addr, addr, offset);
5716
tmp = load_reg(s, 14);
5717
gen_st32(tmp, addr, 0);
5719
gen_helper_cpsr_read(tmp);
5720
tcg_gen_addi_i32(addr, addr, 4);
5721
gen_st32(tmp, addr, 0);
5722
if (insn & (1 << 21)) {
5723
/* Base writeback. */
5725
case 0: offset = -8; break;
5726
case 1: offset = -4; break;
5727
case 2: offset = 4; break;
5728
case 3: offset = 0; break;
5732
tcg_gen_addi_i32(addr, tmp, offset);
5733
if (op1 == (env->uncached_cpsr & CPSR_M)) {
5734
gen_movl_reg_T1(s, 13);
5736
gen_helper_set_r13_banked(cpu_env, tcg_const_i32(op1), cpu_T[1]);
5741
} else if ((insn & 0x0e5fffe0) == 0x081d0a00) {
5747
rn = (insn >> 16) & 0xf;
5748
addr = load_reg(s, rn);
5749
i = (insn >> 23) & 3;
5751
case 0: offset = -4; break; /* DA */
5752
case 1: offset = -8; break; /* DB */
5753
case 2: offset = 0; break; /* IA */
5754
case 3: offset = 4; break; /* IB */
5758
tcg_gen_addi_i32(addr, addr, offset);
5759
/* Load PC into tmp and CPSR into tmp2. */
5760
tmp = gen_ld32(addr, 0);
5761
tcg_gen_addi_i32(addr, addr, 4);
5762
tmp2 = gen_ld32(addr, 0);
5763
if (insn & (1 << 21)) {
5764
/* Base writeback. */
5766
case 0: offset = -8; break;
5767
case 1: offset = -4; break;
5768
case 2: offset = 4; break;
5769
case 3: offset = 0; break;
5773
tcg_gen_addi_i32(addr, addr, offset);
5774
store_reg(s, rn, addr);
5778
gen_rfe(s, tmp, tmp2);
5779
} else if ((insn & 0x0e000000) == 0x0a000000) {
2217
else if ((insn & 0x0e000000) == 0x0a000000) {
5780
2218
/* branch link and change to thumb (blx <offset>) */
5781
2219
int32_t offset;
5783
2221
val = (uint32_t)s->pc;
5785
tcg_gen_movi_i32(tmp, val);
5786
store_reg(s, 14, tmp);
2222
gen_op_movl_T0_im(val);
2223
gen_movl_reg_T0(s, 14);
5787
2224
/* Sign-extend the 24-bit offset */
5788
2225
offset = (((int32_t)insn) << 8) >> 8;
5789
2226
/* offset * 4 + bit24 * 2 + (thumb bit) */
5790
2227
val += (offset << 2) | ((insn >> 23) & 2) | 1;
5791
2228
/* pipeline offset */
2230
gen_op_movl_T0_im(val);
5795
} else if ((insn & 0x0e000f00) == 0x0c000100) {
5796
if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
5797
/* iWMMXt register transfer. */
5798
if (env->cp15.c15_cpar & (1 << 1))
5799
if (!disas_iwmmxt_insn(env, s, insn))
5802
2233
} else if ((insn & 0x0fe00000) == 0x0c400000) {
5803
2234
/* Coprocessor double register transfer. */
5804
2235
} else if ((insn & 0x0f000010) == 0x0e000010) {
5805
2236
/* Additional coprocessor register transfer. */
5806
} else if ((insn & 0x0ff10020) == 0x01000000) {
2237
} else if ((insn & 0x0ff10010) == 0x01000000) {
5809
2238
/* cps (privileged) */
5813
if (insn & (1 << 19)) {
5814
if (insn & (1 << 8))
5816
if (insn & (1 << 7))
5818
if (insn & (1 << 6))
5820
if (insn & (1 << 18))
5823
if (insn & (1 << 17)) {
5825
val |= (insn & 0x1f);
5828
gen_op_movl_T0_im(val);
5829
gen_set_psr_T0(s, mask, 0);
2239
} else if ((insn & 0x0ffffdff) == 0x01010000) {
2241
if (insn & (1 << 9)) {
2242
/* BE8 mode not implemented. */
6808
2908
/* Coprocessor. */
6809
if (disas_coproc_insn(env, s, insn))
2909
op1 = (insn >> 8) & 0xf;
2910
if (arm_feature(env, ARM_FEATURE_XSCALE) &&
2911
((env->cp15.c15_cpar ^ 0x3fff) & (1 << op1)))
2915
if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
2916
if (disas_iwmmxt_insn(env, s, insn))
2918
} else if (arm_feature(env, ARM_FEATURE_XSCALE)) {
2919
if (disas_dsp_insn(env, s, insn))
2926
if (disas_cp_insn (env, s, insn))
2931
if (disas_vfp_insn (env, s, insn))
2935
if (disas_cp15_insn (env, s, insn))
2939
/* unknown coprocessor. */
6814
gen_set_pc_im(s->pc);
6815
s->is_jmp = DISAS_SWI;
2945
gen_op_movl_T0_im((long)s->pc);
2946
gen_op_movl_reg_TN[0][15]();
2948
s->is_jmp = DISAS_JUMP;
6819
gen_set_condexec(s);
6820
gen_set_pc_im(s->pc - 4);
6821
gen_exception(EXCP_UDEF);
2952
gen_op_movl_T0_im((long)s->pc - 4);
2953
gen_op_movl_reg_TN[0][15]();
2954
gen_op_undef_insn();
6822
2955
s->is_jmp = DISAS_JUMP;
6828
/* Return true if this is a Thumb-2 logical op. */
6830
thumb2_logic_op(int op)
6835
/* Generate code for a Thumb-2 data processing operation. If CONDS is nonzero
6836
then set condition code flags based on the result of the operation.
6837
If SHIFTER_OUT is nonzero then set the carry flag for logical operations
6838
to the high bit of T1.
6839
Returns zero if the opcode is valid. */
6842
gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out)
6849
gen_op_andl_T0_T1();
6853
gen_op_bicl_T0_T1();
6866
gen_op_xorl_T0_T1();
6871
gen_op_addl_T0_T1_cc();
6873
gen_op_addl_T0_T1();
6877
gen_op_adcl_T0_T1_cc();
6883
gen_op_sbcl_T0_T1_cc();
6889
gen_op_subl_T0_T1_cc();
6891
gen_op_subl_T0_T1();
6895
gen_op_rsbl_T0_T1_cc();
6897
gen_op_rsbl_T0_T1();
6899
default: /* 5, 6, 7, 9, 12, 15. */
6903
gen_op_logic_T0_cc();
6905
gen_set_CF_bit31(cpu_T[1]);
6910
/* Translate a 32-bit thumb instruction. Returns nonzero if the instruction
6912
static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
6914
uint32_t insn, imm, shift, offset;
6915
uint32_t rd, rn, rm, rs;
6925
if (!(arm_feature(env, ARM_FEATURE_THUMB2)
6926
|| arm_feature (env, ARM_FEATURE_M))) {
6927
/* Thumb-1 cores may need to treat bl and blx as a pair of
6928
16-bit instructions to get correct prefetch abort behavior. */
6930
if ((insn & (1 << 12)) == 0) {
6931
/* Second half of blx. */
6932
offset = ((insn & 0x7ff) << 1);
6933
tmp = load_reg(s, 14);
6934
tcg_gen_addi_i32(tmp, tmp, offset);
6935
tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
6938
tcg_gen_movi_i32(tmp2, s->pc | 1);
6939
store_reg(s, 14, tmp2);
6943
if (insn & (1 << 11)) {
6944
/* Second half of bl. */
6945
offset = ((insn & 0x7ff) << 1) | 1;
6946
tmp = load_reg(s, 14);
6947
tcg_gen_addi_i32(tmp, tmp, offset);
6950
tcg_gen_movi_i32(tmp2, s->pc | 1);
6951
store_reg(s, 14, tmp2);
6955
if ((s->pc & ~TARGET_PAGE_MASK) == 0) {
6956
/* Instruction spans a page boundary. Implement it as two
6957
16-bit instructions in case the second half causes an
6959
offset = ((int32_t)insn << 21) >> 9;
6960
gen_op_movl_T0_im(s->pc + 2 + offset);
6961
gen_movl_reg_T0(s, 14);
6964
/* Fall through to 32-bit decode. */
6967
insn = lduw_code(s->pc);
6969
insn |= (uint32_t)insn_hw1 << 16;
6971
if ((insn & 0xf800e800) != 0xf000e800) {
6975
rn = (insn >> 16) & 0xf;
6976
rs = (insn >> 12) & 0xf;
6977
rd = (insn >> 8) & 0xf;
6979
switch ((insn >> 25) & 0xf) {
6980
case 0: case 1: case 2: case 3:
6981
/* 16-bit instructions. Should never happen. */
6984
if (insn & (1 << 22)) {
6985
/* Other load/store, table branch. */
6986
if (insn & 0x01200000) {
6987
/* Load/store doubleword. */
6990
tcg_gen_movi_i32(addr, s->pc & ~3);
6992
addr = load_reg(s, rn);
6994
offset = (insn & 0xff) * 4;
6995
if ((insn & (1 << 23)) == 0)
6997
if (insn & (1 << 24)) {
6998
tcg_gen_addi_i32(addr, addr, offset);
7001
if (insn & (1 << 20)) {
7003
tmp = gen_ld32(addr, IS_USER(s));
7004
store_reg(s, rs, tmp);
7005
tcg_gen_addi_i32(addr, addr, 4);
7006
tmp = gen_ld32(addr, IS_USER(s));
7007
store_reg(s, rd, tmp);
7010
tmp = load_reg(s, rs);
7011
gen_st32(tmp, addr, IS_USER(s));
7012
tcg_gen_addi_i32(addr, addr, 4);
7013
tmp = load_reg(s, rd);
7014
gen_st32(tmp, addr, IS_USER(s));
7016
if (insn & (1 << 21)) {
7017
/* Base writeback. */
7020
tcg_gen_addi_i32(addr, addr, offset - 4);
7021
store_reg(s, rn, addr);
7025
} else if ((insn & (1 << 23)) == 0) {
7026
/* Load/store exclusive word. */
7027
gen_movl_T1_reg(s, rn);
7029
if (insn & (1 << 20)) {
7030
gen_helper_mark_exclusive(cpu_env, cpu_T[1]);
7031
tmp = gen_ld32(addr, IS_USER(s));
7032
store_reg(s, rd, tmp);
7034
int label = gen_new_label();
7035
gen_helper_test_exclusive(cpu_T[0], cpu_env, addr);
7036
tcg_gen_brcondi_i32(TCG_COND_NE, cpu_T[0],
7038
tmp = load_reg(s, rs);
7039
gen_st32(tmp, cpu_T[1], IS_USER(s));
7040
gen_set_label(label);
7041
gen_movl_reg_T0(s, rd);
7043
} else if ((insn & (1 << 6)) == 0) {
7047
tcg_gen_movi_i32(addr, s->pc);
7049
addr = load_reg(s, rn);
7051
tmp = load_reg(s, rm);
7052
tcg_gen_add_i32(addr, addr, tmp);
7053
if (insn & (1 << 4)) {
7055
tcg_gen_add_i32(addr, addr, tmp);
7057
tmp = gen_ld16u(addr, IS_USER(s));
7060
tmp = gen_ld8u(addr, IS_USER(s));
7063
tcg_gen_shli_i32(tmp, tmp, 1);
7064
tcg_gen_addi_i32(tmp, tmp, s->pc);
7065
store_reg(s, 15, tmp);
7067
/* Load/store exclusive byte/halfword/doubleword. */
7068
/* ??? These are not really atomic. However we know
7069
we never have multiple CPUs running in parallel,
7070
so it is good enough. */
7071
op = (insn >> 4) & 0x3;
7072
/* Must use a global reg for the address because we have
7073
a conditional branch in the store instruction. */
7074
gen_movl_T1_reg(s, rn);
7076
if (insn & (1 << 20)) {
7077
gen_helper_mark_exclusive(cpu_env, addr);
7080
tmp = gen_ld8u(addr, IS_USER(s));
7083
tmp = gen_ld16u(addr, IS_USER(s));
7086
tmp = gen_ld32(addr, IS_USER(s));
7087
tcg_gen_addi_i32(addr, addr, 4);
7088
tmp2 = gen_ld32(addr, IS_USER(s));
7089
store_reg(s, rd, tmp2);
7094
store_reg(s, rs, tmp);
7096
int label = gen_new_label();
7097
/* Must use a global that is not killed by the branch. */
7098
gen_helper_test_exclusive(cpu_T[0], cpu_env, addr);
7099
tcg_gen_brcondi_i32(TCG_COND_NE, cpu_T[0], 0, label);
7100
tmp = load_reg(s, rs);
7103
gen_st8(tmp, addr, IS_USER(s));
7106
gen_st16(tmp, addr, IS_USER(s));
7109
gen_st32(tmp, addr, IS_USER(s));
7110
tcg_gen_addi_i32(addr, addr, 4);
7111
tmp = load_reg(s, rd);
7112
gen_st32(tmp, addr, IS_USER(s));
7117
gen_set_label(label);
7118
gen_movl_reg_T0(s, rm);
7122
/* Load/store multiple, RFE, SRS. */
7123
if (((insn >> 23) & 1) == ((insn >> 24) & 1)) {
7124
/* Not available in user mode. */
7127
if (insn & (1 << 20)) {
7129
addr = load_reg(s, rn);
7130
if ((insn & (1 << 24)) == 0)
7131
tcg_gen_addi_i32(addr, addr, -8);
7132
/* Load PC into tmp and CPSR into tmp2. */
7133
tmp = gen_ld32(addr, 0);
7134
tcg_gen_addi_i32(addr, addr, 4);
7135
tmp2 = gen_ld32(addr, 0);
7136
if (insn & (1 << 21)) {
7137
/* Base writeback. */
7138
if (insn & (1 << 24)) {
7139
tcg_gen_addi_i32(addr, addr, 4);
7141
tcg_gen_addi_i32(addr, addr, -4);
7143
store_reg(s, rn, addr);
7147
gen_rfe(s, tmp, tmp2);
7151
if (op == (env->uncached_cpsr & CPSR_M)) {
7152
addr = load_reg(s, 13);
7155
gen_helper_get_r13_banked(addr, cpu_env, tcg_const_i32(op));
7157
if ((insn & (1 << 24)) == 0) {
7158
tcg_gen_addi_i32(addr, addr, -8);
7160
tmp = load_reg(s, 14);
7161
gen_st32(tmp, addr, 0);
7162
tcg_gen_addi_i32(addr, addr, 4);
7164
gen_helper_cpsr_read(tmp);
7165
gen_st32(tmp, addr, 0);
7166
if (insn & (1 << 21)) {
7167
if ((insn & (1 << 24)) == 0) {
7168
tcg_gen_addi_i32(addr, addr, -4);
7170
tcg_gen_addi_i32(addr, addr, 4);
7172
if (op == (env->uncached_cpsr & CPSR_M)) {
7173
store_reg(s, 13, addr);
7175
gen_helper_set_r13_banked(cpu_env,
7176
tcg_const_i32(op), addr);
7184
/* Load/store multiple. */
7185
addr = load_reg(s, rn);
7187
for (i = 0; i < 16; i++) {
7188
if (insn & (1 << i))
7191
if (insn & (1 << 24)) {
7192
tcg_gen_addi_i32(addr, addr, -offset);
7195
for (i = 0; i < 16; i++) {
7196
if ((insn & (1 << i)) == 0)
7198
if (insn & (1 << 20)) {
7200
tmp = gen_ld32(addr, IS_USER(s));
7204
store_reg(s, i, tmp);
7208
tmp = load_reg(s, i);
7209
gen_st32(tmp, addr, IS_USER(s));
7211
tcg_gen_addi_i32(addr, addr, 4);
7213
if (insn & (1 << 21)) {
7214
/* Base register writeback. */
7215
if (insn & (1 << 24)) {
7216
tcg_gen_addi_i32(addr, addr, -offset);
7218
/* Fault if writeback register is in register list. */
7219
if (insn & (1 << rn))
7221
store_reg(s, rn, addr);
7228
case 5: /* Data processing register constant shift. */
7230
gen_op_movl_T0_im(0);
7232
gen_movl_T0_reg(s, rn);
7233
gen_movl_T1_reg(s, rm);
7234
op = (insn >> 21) & 0xf;
7235
shiftop = (insn >> 4) & 3;
7236
shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
7237
conds = (insn & (1 << 20)) != 0;
7238
logic_cc = (conds && thumb2_logic_op(op));
7239
gen_arm_shift_im(cpu_T[1], shiftop, shift, logic_cc);
7240
if (gen_thumb2_data_op(s, op, conds, 0))
7243
gen_movl_reg_T0(s, rd);
7245
case 13: /* Misc data processing. */
7246
op = ((insn >> 22) & 6) | ((insn >> 7) & 1);
7247
if (op < 4 && (insn & 0xf000) != 0xf000)
7250
case 0: /* Register controlled shift. */
7251
tmp = load_reg(s, rn);
7252
tmp2 = load_reg(s, rm);
7253
if ((insn & 0x70) != 0)
7255
op = (insn >> 21) & 3;
7256
logic_cc = (insn & (1 << 20)) != 0;
7257
gen_arm_shift_reg(tmp, op, tmp2, logic_cc);
7260
store_reg(s, rd, tmp);
7262
case 1: /* Sign/zero extend. */
7263
tmp = load_reg(s, rm);
7264
shift = (insn >> 4) & 3;
7265
/* ??? In many cases it's not neccessary to do a
7266
rotate, a shift is sufficient. */
7268
tcg_gen_rori_i32(tmp, tmp, shift * 8);
7269
op = (insn >> 20) & 7;
7271
case 0: gen_sxth(tmp); break;
7272
case 1: gen_uxth(tmp); break;
7273
case 2: gen_sxtb16(tmp); break;
7274
case 3: gen_uxtb16(tmp); break;
7275
case 4: gen_sxtb(tmp); break;
7276
case 5: gen_uxtb(tmp); break;
7277
default: goto illegal_op;
7280
tmp2 = load_reg(s, rn);
7281
if ((op >> 1) == 1) {
7282
gen_add16(tmp, tmp2);
7284
tcg_gen_add_i32(tmp, tmp, tmp2);
7288
store_reg(s, rd, tmp);
7290
case 2: /* SIMD add/subtract. */
7291
op = (insn >> 20) & 7;
7292
shift = (insn >> 4) & 7;
7293
if ((op & 3) == 3 || (shift & 3) == 3)
7295
tmp = load_reg(s, rn);
7296
tmp2 = load_reg(s, rm);
7297
gen_thumb2_parallel_addsub(op, shift, tmp, tmp2);
7299
store_reg(s, rd, tmp);
7301
case 3: /* Other data processing. */
7302
op = ((insn >> 17) & 0x38) | ((insn >> 4) & 7);
7304
/* Saturating add/subtract. */
7305
tmp = load_reg(s, rn);
7306
tmp2 = load_reg(s, rm);
7308
gen_helper_double_saturate(tmp, tmp);
7310
gen_helper_sub_saturate(tmp, tmp2, tmp);
7312
gen_helper_add_saturate(tmp, tmp, tmp2);
7315
tmp = load_reg(s, rn);
7317
case 0x0a: /* rbit */
7318
gen_helper_rbit(tmp, tmp);
7320
case 0x08: /* rev */
7321
tcg_gen_bswap_i32(tmp, tmp);
7323
case 0x09: /* rev16 */
7326
case 0x0b: /* revsh */
7329
case 0x10: /* sel */
7330
tmp2 = load_reg(s, rm);
7332
tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUState, GE));
7333
gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
7337
case 0x18: /* clz */
7338
gen_helper_clz(tmp, tmp);
7344
store_reg(s, rd, tmp);
7346
case 4: case 5: /* 32-bit multiply. Sum of absolute differences. */
7347
op = (insn >> 4) & 0xf;
7348
tmp = load_reg(s, rn);
7349
tmp2 = load_reg(s, rm);
7350
switch ((insn >> 20) & 7) {
7351
case 0: /* 32 x 32 -> 32 */
7352
tcg_gen_mul_i32(tmp, tmp, tmp2);
7355
tmp2 = load_reg(s, rs);
7357
tcg_gen_sub_i32(tmp, tmp2, tmp);
7359
tcg_gen_add_i32(tmp, tmp, tmp2);
7363
case 1: /* 16 x 16 -> 32 */
7364
gen_mulxy(tmp, tmp2, op & 2, op & 1);
7367
tmp2 = load_reg(s, rs);
7368
gen_helper_add_setq(tmp, tmp, tmp2);
7372
case 2: /* Dual multiply add. */
7373
case 4: /* Dual multiply subtract. */
7375
gen_swap_half(tmp2);
7376
gen_smul_dual(tmp, tmp2);
7377
/* This addition cannot overflow. */
7378
if (insn & (1 << 22)) {
7379
tcg_gen_sub_i32(tmp, tmp, tmp2);
7381
tcg_gen_add_i32(tmp, tmp, tmp2);
7386
tmp2 = load_reg(s, rs);
7387
gen_helper_add_setq(tmp, tmp, tmp2);
7391
case 3: /* 32 * 16 -> 32msb */
7393
tcg_gen_sari_i32(tmp2, tmp2, 16);
7396
tmp2 = gen_muls_i64_i32(tmp, tmp2);
7397
tcg_gen_shri_i64(tmp2, tmp2, 16);
7399
tcg_gen_trunc_i64_i32(tmp, tmp2);
7402
tmp2 = load_reg(s, rs);
7403
gen_helper_add_setq(tmp, tmp, tmp2);
7407
case 5: case 6: /* 32 * 32 -> 32msb */
7408
gen_imull(tmp, tmp2);
7409
if (insn & (1 << 5)) {
7410
gen_roundqd(tmp, tmp2);
7417
tmp2 = load_reg(s, rs);
7418
if (insn & (1 << 21)) {
7419
tcg_gen_add_i32(tmp, tmp, tmp2);
7421
tcg_gen_sub_i32(tmp, tmp2, tmp);
7426
case 7: /* Unsigned sum of absolute differences. */
7427
gen_helper_usad8(tmp, tmp, tmp2);
7430
tmp2 = load_reg(s, rs);
7431
tcg_gen_add_i32(tmp, tmp, tmp2);
7436
store_reg(s, rd, tmp);
7438
case 6: case 7: /* 64-bit multiply, Divide. */
7439
op = ((insn >> 4) & 0xf) | ((insn >> 16) & 0x70);
7440
tmp = load_reg(s, rn);
7441
tmp2 = load_reg(s, rm);
7442
if ((op & 0x50) == 0x10) {
7444
if (!arm_feature(env, ARM_FEATURE_DIV))
7447
gen_helper_udiv(tmp, tmp, tmp2);
7449
gen_helper_sdiv(tmp, tmp, tmp2);
7451
store_reg(s, rd, tmp);
7452
} else if ((op & 0xe) == 0xc) {
7453
/* Dual multiply accumulate long. */
7455
gen_swap_half(tmp2);
7456
gen_smul_dual(tmp, tmp2);
7458
tcg_gen_sub_i32(tmp, tmp, tmp2);
7460
tcg_gen_add_i32(tmp, tmp, tmp2);
7463
tmp2 = tcg_temp_new(TCG_TYPE_I64);
7464
gen_addq(s, tmp, rs, rd);
7465
gen_storeq_reg(s, rs, rd, tmp);
7468
/* Unsigned 64-bit multiply */
7469
tmp = gen_mulu_i64_i32(tmp, tmp2);
7473
gen_mulxy(tmp, tmp2, op & 2, op & 1);
7475
tmp2 = tcg_temp_new(TCG_TYPE_I64);
7476
tcg_gen_ext_i32_i64(tmp2, tmp);
7480
/* Signed 64-bit multiply */
7481
tmp = gen_muls_i64_i32(tmp, tmp2);
7486
gen_addq_lo(s, tmp, rs);
7487
gen_addq_lo(s, tmp, rd);
7488
} else if (op & 0x40) {
7489
/* 64-bit accumulate. */
7490
gen_addq(s, tmp, rs, rd);
7492
gen_storeq_reg(s, rs, rd, tmp);
7497
case 6: case 7: case 14: case 15:
7499
if (((insn >> 24) & 3) == 3) {
7500
/* Translate into the equivalent ARM encoding. */
7501
insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4);
7502
if (disas_neon_data_insn(env, s, insn))
7505
if (insn & (1 << 28))
7507
if (disas_coproc_insn (env, s, insn))
7511
case 8: case 9: case 10: case 11:
7512
if (insn & (1 << 15)) {
7513
/* Branches, misc control. */
7514
if (insn & 0x5000) {
7515
/* Unconditional branch. */
7516
/* signextend(hw1[10:0]) -> offset[:12]. */
7517
offset = ((int32_t)insn << 5) >> 9 & ~(int32_t)0xfff;
7518
/* hw1[10:0] -> offset[11:1]. */
7519
offset |= (insn & 0x7ff) << 1;
7520
/* (~hw2[13, 11] ^ offset[24]) -> offset[23,22]
7521
offset[24:22] already have the same value because of the
7522
sign extension above. */
7523
offset ^= ((~insn) & (1 << 13)) << 10;
7524
offset ^= ((~insn) & (1 << 11)) << 11;
7526
if (insn & (1 << 14)) {
7527
/* Branch and link. */
7528
gen_op_movl_T1_im(s->pc | 1);
7529
gen_movl_reg_T1(s, 14);
7533
if (insn & (1 << 12)) {
7538
offset &= ~(uint32_t)2;
7539
gen_bx_im(s, offset);
7541
} else if (((insn >> 23) & 7) == 7) {
7543
if (insn & (1 << 13))
7546
if (insn & (1 << 26)) {
7547
/* Secure monitor call (v6Z) */
7548
goto illegal_op; /* not implemented. */
7550
op = (insn >> 20) & 7;
7552
case 0: /* msr cpsr. */
7554
tmp = load_reg(s, rn);
7555
addr = tcg_const_i32(insn & 0xff);
7556
gen_helper_v7m_msr(cpu_env, addr, tmp);
7561
case 1: /* msr spsr. */
7564
gen_movl_T0_reg(s, rn);
7565
if (gen_set_psr_T0(s,
7566
msr_mask(env, s, (insn >> 8) & 0xf, op == 1),
7570
case 2: /* cps, nop-hint. */
7571
if (((insn >> 8) & 7) == 0) {
7572
gen_nop_hint(s, insn & 0xff);
7574
/* Implemented as NOP in user mode. */
7579
if (insn & (1 << 10)) {
7580
if (insn & (1 << 7))
7582
if (insn & (1 << 6))
7584
if (insn & (1 << 5))
7586
if (insn & (1 << 9))
7587
imm = CPSR_A | CPSR_I | CPSR_F;
7589
if (insn & (1 << 8)) {
7591
imm |= (insn & 0x1f);
7594
gen_op_movl_T0_im(imm);
7595
gen_set_psr_T0(s, offset, 0);
7598
case 3: /* Special control operations. */
7599
op = (insn >> 4) & 0xf;
7602
gen_helper_clrex(cpu_env);
7607
/* These execute as NOPs. */
7615
/* Trivial implementation equivalent to bx. */
7616
tmp = load_reg(s, rn);
7619
case 5: /* Exception return. */
7620
/* Unpredictable in user mode. */
7622
case 6: /* mrs cpsr. */
7625
addr = tcg_const_i32(insn & 0xff);
7626
gen_helper_v7m_mrs(tmp, cpu_env, addr);
7628
gen_helper_cpsr_read(tmp);
7630
store_reg(s, rd, tmp);
7632
case 7: /* mrs spsr. */
7633
/* Not accessible in user mode. */
7634
if (IS_USER(s) || IS_M(env))
7636
tmp = load_cpu_field(spsr);
7637
store_reg(s, rd, tmp);
7642
/* Conditional branch. */
7643
op = (insn >> 22) & 0xf;
7644
/* Generate a conditional jump to next instruction. */
7645
s->condlabel = gen_new_label();
7646
gen_test_cc(op ^ 1, s->condlabel);
7649
/* offset[11:1] = insn[10:0] */
7650
offset = (insn & 0x7ff) << 1;
7651
/* offset[17:12] = insn[21:16]. */
7652
offset |= (insn & 0x003f0000) >> 4;
7653
/* offset[31:20] = insn[26]. */
7654
offset |= ((int32_t)((insn << 5) & 0x80000000)) >> 11;
7655
/* offset[18] = insn[13]. */
7656
offset |= (insn & (1 << 13)) << 5;
7657
/* offset[19] = insn[11]. */
7658
offset |= (insn & (1 << 11)) << 8;
7660
/* jump to the offset */
7661
gen_jmp(s, s->pc + offset);
7664
/* Data processing immediate. */
7665
if (insn & (1 << 25)) {
7666
if (insn & (1 << 24)) {
7667
if (insn & (1 << 20))
7669
/* Bitfield/Saturate. */
7670
op = (insn >> 21) & 7;
7672
shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
7675
tcg_gen_movi_i32(tmp, 0);
7677
tmp = load_reg(s, rn);
7680
case 2: /* Signed bitfield extract. */
7682
if (shift + imm > 32)
7685
gen_sbfx(tmp, shift, imm);
7687
case 6: /* Unsigned bitfield extract. */
7689
if (shift + imm > 32)
7692
gen_ubfx(tmp, shift, (1u << imm) - 1);
7694
case 3: /* Bitfield insert/clear. */
7697
imm = imm + 1 - shift;
7699
tmp2 = load_reg(s, rd);
7700
gen_bfi(tmp, tmp2, tmp, shift, (1u << imm) - 1);
7706
default: /* Saturate. */
7709
tcg_gen_sari_i32(tmp, tmp, shift);
7711
tcg_gen_shli_i32(tmp, tmp, shift);
7713
tmp2 = tcg_const_i32(imm);
7716
if ((op & 1) && shift == 0)
7717
gen_helper_usat16(tmp, tmp, tmp2);
7719
gen_helper_usat(tmp, tmp, tmp2);
7722
if ((op & 1) && shift == 0)
7723
gen_helper_ssat16(tmp, tmp, tmp2);
7725
gen_helper_ssat(tmp, tmp, tmp2);
7729
store_reg(s, rd, tmp);
7731
imm = ((insn & 0x04000000) >> 15)
7732
| ((insn & 0x7000) >> 4) | (insn & 0xff);
7733
if (insn & (1 << 22)) {
7734
/* 16-bit immediate. */
7735
imm |= (insn >> 4) & 0xf000;
7736
if (insn & (1 << 23)) {
7738
tmp = load_reg(s, rd);
7739
tcg_gen_ext16u_i32(tmp, tmp);
7740
tcg_gen_ori_i32(tmp, tmp, imm << 16);
7744
tcg_gen_movi_i32(tmp, imm);
7747
/* Add/sub 12-bit immediate. */
7749
offset = s->pc & ~(uint32_t)3;
7750
if (insn & (1 << 23))
7755
tcg_gen_movi_i32(tmp, offset);
7757
tmp = load_reg(s, rn);
7758
if (insn & (1 << 23))
7759
tcg_gen_subi_i32(tmp, tmp, imm);
7761
tcg_gen_addi_i32(tmp, tmp, imm);
7764
store_reg(s, rd, tmp);
7767
int shifter_out = 0;
7768
/* modified 12-bit immediate. */
7769
shift = ((insn & 0x04000000) >> 23) | ((insn & 0x7000) >> 12);
7770
imm = (insn & 0xff);
7773
/* Nothing to do. */
7775
case 1: /* 00XY00XY */
7778
case 2: /* XY00XY00 */
7782
case 3: /* XYXYXYXY */
7786
default: /* Rotated constant. */
7787
shift = (shift << 1) | (imm >> 7);
7789
imm = imm << (32 - shift);
7793
gen_op_movl_T1_im(imm);
7794
rn = (insn >> 16) & 0xf;
7796
gen_op_movl_T0_im(0);
7798
gen_movl_T0_reg(s, rn);
7799
op = (insn >> 21) & 0xf;
7800
if (gen_thumb2_data_op(s, op, (insn & (1 << 20)) != 0,
7803
rd = (insn >> 8) & 0xf;
7805
gen_movl_reg_T0(s, rd);
7810
case 12: /* Load/store single data item. */
7815
if ((insn & 0x01100000) == 0x01000000) {
7816
if (disas_neon_ls_insn(env, s, insn))
7824
/* s->pc has already been incremented by 4. */
7825
imm = s->pc & 0xfffffffc;
7826
if (insn & (1 << 23))
7827
imm += insn & 0xfff;
7829
imm -= insn & 0xfff;
7830
tcg_gen_movi_i32(addr, imm);
7832
addr = load_reg(s, rn);
7833
if (insn & (1 << 23)) {
7834
/* Positive offset. */
7836
tcg_gen_addi_i32(addr, addr, imm);
7838
op = (insn >> 8) & 7;
7841
case 0: case 8: /* Shifted Register. */
7842
shift = (insn >> 4) & 0xf;
7845
tmp = load_reg(s, rm);
7847
tcg_gen_shli_i32(tmp, tmp, shift);
7848
tcg_gen_add_i32(addr, addr, tmp);
7851
case 4: /* Negative offset. */
7852
tcg_gen_addi_i32(addr, addr, -imm);
7854
case 6: /* User privilege. */
7855
tcg_gen_addi_i32(addr, addr, imm);
7858
case 1: /* Post-decrement. */
7861
case 3: /* Post-increment. */
7865
case 5: /* Pre-decrement. */
7868
case 7: /* Pre-increment. */
7869
tcg_gen_addi_i32(addr, addr, imm);
7877
op = ((insn >> 21) & 3) | ((insn >> 22) & 4);
7878
if (insn & (1 << 20)) {
7880
if (rs == 15 && op != 2) {
7883
/* Memory hint. Implemented as NOP. */
7886
case 0: tmp = gen_ld8u(addr, user); break;
7887
case 4: tmp = gen_ld8s(addr, user); break;
7888
case 1: tmp = gen_ld16u(addr, user); break;
7889
case 5: tmp = gen_ld16s(addr, user); break;
7890
case 2: tmp = gen_ld32(addr, user); break;
7891
default: goto illegal_op;
7896
store_reg(s, rs, tmp);
7903
tmp = load_reg(s, rs);
7905
case 0: gen_st8(tmp, addr, user); break;
7906
case 1: gen_st16(tmp, addr, user); break;
7907
case 2: gen_st32(tmp, addr, user); break;
7908
default: goto illegal_op;
7912
tcg_gen_addi_i32(addr, addr, imm);
7914
store_reg(s, rn, addr);
7928
static void disas_thumb_insn(CPUState *env, DisasContext *s)
2961
static void disas_thumb_insn(DisasContext *s)
7930
2963
uint32_t val, insn, op, rm, rn, rd, shift, cond;
7931
2964
int32_t offset;
7937
if (s->condexec_mask) {
7938
cond = s->condexec_cond;
7939
s->condlabel = gen_new_label();
7940
gen_test_cc(cond ^ 1, s->condlabel);
7944
2967
insn = lduw_code(s->pc);