1
From ec40fde1061deb3bfca53a08ff69e1b2e25f19db Mon Sep 17 00:00:00 2001
2
From: Michael Matz <matz@suse.de>
3
Date: Fri, 31 Jan 2014 14:47:31 +0000
4
Subject: [PATCH 116/158] target-arm: A64: Add SIMD TBL/TBLX
6
Add support for the SIMD TBL/TBLX instructions (group C3.6.2).
8
Signed-off-by: Michael Matz <matz@suse.de>
9
[PMM: rewritten to do more of the decode in translate-a64.c,
10
and to do only one 64 bit pass at a time in the helper]
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Reviewed-by: Richard Henderson <rth@twiddle.net>
14
target-arm/helper-a64.c | 31 ++++++++++++++++++++++++++
15
target-arm/helper-a64.h | 1 +
16
target-arm/translate-a64.c | 55 +++++++++++++++++++++++++++++++++++++++++++++-
17
3 files changed, 86 insertions(+), 1 deletion(-)
19
diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
20
index 4ce0d01..6ca958a 100644
21
--- a/target-arm/helper-a64.c
22
+++ b/target-arm/helper-a64.c
23
@@ -122,3 +122,34 @@ uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status)
25
return float_rel_to_flags(float64_compare(x, y, fp_status));
28
+uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
29
+ uint32_t rn, uint32_t numregs)
31
+ /* Helper function for SIMD TBL and TBX. We have to do the table
32
+ * lookup part for the 64 bits worth of indices we're passed in.
33
+ * result is the initial results vector (either zeroes for TBL
34
+ * or some guest values for TBX), rn the register number where
35
+ * the table starts, and numregs the number of registers in the table.
36
+ * We return the results of the lookups.
40
+ for (shift = 0; shift < 64; shift += 8) {
41
+ int index = extract64(indices, shift, 8);
42
+ if (index < 16 * numregs) {
43
+ /* Convert index (a byte offset into the virtual table
44
+ * which is a series of 128-bit vectors concatenated)
45
+ * into the correct vfp.regs[] element plus a bit offset
46
+ * into that element, bearing in mind that the table
47
+ * can wrap around from V31 to V0.
49
+ int elt = (rn * 2 + (index >> 3)) % 64;
50
+ int bitidx = (index & 7) * 8;
51
+ uint64_t val = extract64(env->vfp.regs[elt], bitidx, 8);
53
+ result = deposit64(result, shift, 8, val);
58
diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h
59
index bca19f3..99832ee 100644
60
--- a/target-arm/helper-a64.h
61
+++ b/target-arm/helper-a64.h
62
@@ -26,3 +26,4 @@ DEF_HELPER_3(vfp_cmps_a64, i64, f32, f32, ptr)
63
DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr)
64
DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
65
DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
66
+DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32)
67
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
68
index 75b0039..7a6b00a 100644
69
--- a/target-arm/translate-a64.c
70
+++ b/target-arm/translate-a64.c
71
@@ -4742,7 +4742,60 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn)
73
static void disas_simd_tb(DisasContext *s, uint32_t insn)
75
- unsupported_encoding(s, insn);
76
+ int op2 = extract32(insn, 22, 2);
77
+ int is_q = extract32(insn, 30, 1);
78
+ int rm = extract32(insn, 16, 5);
79
+ int rn = extract32(insn, 5, 5);
80
+ int rd = extract32(insn, 0, 5);
81
+ int is_tblx = extract32(insn, 12, 1);
82
+ int len = extract32(insn, 13, 2);
83
+ TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
84
+ TCGv_i32 tcg_regno, tcg_numregs;
87
+ unallocated_encoding(s);
91
+ /* This does a table lookup: for every byte element in the input
92
+ * we index into a table formed from up to four vector registers,
93
+ * and then the output is the result of the lookups. Our helper
94
+ * function does the lookup operation for a single 64 bit part of
97
+ tcg_resl = tcg_temp_new_i64();
98
+ tcg_resh = tcg_temp_new_i64();
101
+ read_vec_element(s, tcg_resl, rd, 0, MO_64);
103
+ tcg_gen_movi_i64(tcg_resl, 0);
105
+ if (is_tblx && is_q) {
106
+ read_vec_element(s, tcg_resh, rd, 1, MO_64);
108
+ tcg_gen_movi_i64(tcg_resh, 0);
111
+ tcg_idx = tcg_temp_new_i64();
112
+ tcg_regno = tcg_const_i32(rn);
113
+ tcg_numregs = tcg_const_i32(len + 1);
114
+ read_vec_element(s, tcg_idx, rm, 0, MO_64);
115
+ gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
116
+ tcg_regno, tcg_numregs);
118
+ read_vec_element(s, tcg_idx, rm, 1, MO_64);
119
+ gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
120
+ tcg_regno, tcg_numregs);
122
+ tcg_temp_free_i64(tcg_idx);
123
+ tcg_temp_free_i32(tcg_regno);
124
+ tcg_temp_free_i32(tcg_numregs);
126
+ write_vec_element(s, tcg_resl, rd, 0, MO_64);
127
+ tcg_temp_free_i64(tcg_resl);
128
+ write_vec_element(s, tcg_resh, rd, 1, MO_64);
129
+ tcg_temp_free_i64(tcg_resh);
132
/* C3.6.3 ZIP/UZP/TRN