Discussion:
[PATCH v4 01/33] tcg-aarch64: Change all ext variables to TCGType
(too old to reply)
Richard Henderson
2013-09-14 21:54:18 UTC
Permalink
We assert that the values for _I32 and _I64 are 0 and 1 respectively.
This will make a couple of functions declared by tcg.c cleaner.

Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 64 ++++++++++++++++++++++++++++--------------------
1 file changed, 37 insertions(+), 27 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 6379df1..cf36551 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -12,6 +12,11 @@

#include "qemu/bitops.h"

+/* We're going to re-use TCGType in setting of the SF bit, which controls
+ the size of the operation performed. If we know the values match, it
+ makes things much cleaner. */
+QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
+
#ifndef NDEBUG
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
"%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
@@ -326,7 +331,8 @@ static inline void tcg_out_ldst_12(TCGContext *s,
| op_type << 20 | scaled_uimm << 10 | rn << 5 | rd);
}

-static inline void tcg_out_movr(TCGContext *s, int ext, TCGReg rd, TCGReg src)
+static inline void tcg_out_movr(TCGContext *s, TCGType ext,
+ TCGReg rd, TCGReg src)
{
/* register to register move using MOV (shifted register with no shift) */
/* using MOV 0x2a0003e0 | (shift).. */
@@ -407,7 +413,8 @@ static inline void tcg_out_ldst(TCGContext *s, enum aarch64_ldst_op_data data,
}

/* mov alias implemented with add immediate, useful to move to/from SP */
-static inline void tcg_out_movr_sp(TCGContext *s, int ext, TCGReg rd, TCGReg rn)
+static inline void tcg_out_movr_sp(TCGContext *s, TCGType ext,
+ TCGReg rd, TCGReg rn)
{
/* using ADD 0x11000000 | (ext) | rn << 5 | rd */
unsigned int base = ext ? 0x91000000 : 0x11000000;
@@ -437,7 +444,7 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
}

static inline void tcg_out_arith(TCGContext *s, enum aarch64_arith_opc opc,
- int ext, TCGReg rd, TCGReg rn, TCGReg rm,
+ TCGType ext, TCGReg rd, TCGReg rn, TCGReg rm,
int shift_imm)
{
/* Using shifted register arithmetic operations */
@@ -453,7 +460,7 @@ static inline void tcg_out_arith(TCGContext *s, enum aarch64_arith_opc opc,
tcg_out32(s, base | rm << 16 | shift | rn << 5 | rd);
}

-static inline void tcg_out_mul(TCGContext *s, int ext,
+static inline void tcg_out_mul(TCGContext *s, TCGType ext,
TCGReg rd, TCGReg rn, TCGReg rm)
{
/* Using MADD 0x1b000000 with Ra = wzr alias MUL 0x1b007c00 */
@@ -462,7 +469,7 @@ static inline void tcg_out_mul(TCGContext *s, int ext,
}

static inline void tcg_out_shiftrot_reg(TCGContext *s,
- enum aarch64_srr_opc opc, int ext,
+ enum aarch64_srr_opc opc, TCGType ext,
TCGReg rd, TCGReg rn, TCGReg rm)
{
/* using 2-source data processing instructions 0x1ac02000 */
@@ -470,23 +477,23 @@ static inline void tcg_out_shiftrot_reg(TCGContext *s,
tcg_out32(s, base | rm << 16 | opc << 8 | rn << 5 | rd);
}

-static inline void tcg_out_ubfm(TCGContext *s, int ext, TCGReg rd, TCGReg rn,
- unsigned int a, unsigned int b)
+static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
+ TCGReg rn, unsigned int a, unsigned int b)
{
/* Using UBFM 0x53000000 Wd, Wn, a, b */
unsigned int base = ext ? 0xd3400000 : 0x53000000;
tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd);
}

-static inline void tcg_out_sbfm(TCGContext *s, int ext, TCGReg rd, TCGReg rn,
- unsigned int a, unsigned int b)
+static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
+ TCGReg rn, unsigned int a, unsigned int b)
{
/* Using SBFM 0x13000000 Wd, Wn, a, b */
unsigned int base = ext ? 0x93400000 : 0x13000000;
tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd);
}

-static inline void tcg_out_extr(TCGContext *s, int ext, TCGReg rd,
+static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
TCGReg rn, TCGReg rm, unsigned int a)
{
/* Using EXTR 0x13800000 Wd, Wn, Wm, a */
@@ -494,7 +501,7 @@ static inline void tcg_out_extr(TCGContext *s, int ext, TCGReg rd,
tcg_out32(s, base | rm << 16 | a << 10 | rn << 5 | rd);
}

-static inline void tcg_out_shl(TCGContext *s, int ext,
+static inline void tcg_out_shl(TCGContext *s, TCGType ext,
TCGReg rd, TCGReg rn, unsigned int m)
{
int bits, max;
@@ -503,28 +510,28 @@ static inline void tcg_out_shl(TCGContext *s, int ext,
tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
}

-static inline void tcg_out_shr(TCGContext *s, int ext,
+static inline void tcg_out_shr(TCGContext *s, TCGType ext,
TCGReg rd, TCGReg rn, unsigned int m)
{
int max = ext ? 63 : 31;
tcg_out_ubfm(s, ext, rd, rn, m & max, max);
}

-static inline void tcg_out_sar(TCGContext *s, int ext,
+static inline void tcg_out_sar(TCGContext *s, TCGType ext,
TCGReg rd, TCGReg rn, unsigned int m)
{
int max = ext ? 63 : 31;
tcg_out_sbfm(s, ext, rd, rn, m & max, max);
}

-static inline void tcg_out_rotr(TCGContext *s, int ext,
+static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
TCGReg rd, TCGReg rn, unsigned int m)
{
int max = ext ? 63 : 31;
tcg_out_extr(s, ext, rd, rn, rn, m & max);
}

-static inline void tcg_out_rotl(TCGContext *s, int ext,
+static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
TCGReg rd, TCGReg rn, unsigned int m)
{
int bits, max;
@@ -533,14 +540,15 @@ static inline void tcg_out_rotl(TCGContext *s, int ext,
tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
}

-static inline void tcg_out_cmp(TCGContext *s, int ext, TCGReg rn, TCGReg rm,
- int shift_imm)
+static inline void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg rn,
+ TCGReg rm, int shift_imm)
{
/* Using CMP alias SUBS wzr, Wn, Wm */
tcg_out_arith(s, ARITH_SUBS, ext, TCG_REG_XZR, rn, rm, shift_imm);
}

-static inline void tcg_out_cset(TCGContext *s, int ext, TCGReg rd, TCGCond c)
+static inline void tcg_out_cset(TCGContext *s, TCGType ext,
+ TCGReg rd, TCGCond c)
{
/* Using CSET alias of CSINC 0x1a800400 Xd, XZR, XZR, invert(cond) */
unsigned int base = ext ? 0x9a9f07e0 : 0x1a9f07e0;
@@ -637,7 +645,7 @@ aarch64_limm(unsigned int m, unsigned int r)
to test a 32bit reg against 0xff000000, pass M = 8, R = 8.
to test a 32bit reg against 0xff0000ff, pass M = 16, R = 8.
*/
-static inline void tcg_out_tst(TCGContext *s, int ext, TCGReg rn,
+static inline void tcg_out_tst(TCGContext *s, TCGType ext, TCGReg rn,
unsigned int m, unsigned int r)
{
/* using TST alias of ANDS XZR, Xn,#bimm64 0x7200001f */
@@ -646,8 +654,8 @@ static inline void tcg_out_tst(TCGContext *s, int ext, TCGReg rn,
}

/* and a register with a bit pattern, similarly to TST, no flags change */
-static inline void tcg_out_andi(TCGContext *s, int ext, TCGReg rd, TCGReg rn,
- unsigned int m, unsigned int r)
+static inline void tcg_out_andi(TCGContext *s, TCGType ext, TCGReg rd,
+ TCGReg rn, unsigned int m, unsigned int r)
{
/* using AND 0x12000000 */
unsigned int base = ext ? 0x92400000 : 0x12000000;
@@ -700,21 +708,23 @@ static inline void tcg_out_goto_label_cond(TCGContext *s,
}
}

-static inline void tcg_out_rev(TCGContext *s, int ext, TCGReg rd, TCGReg rm)
+static inline void tcg_out_rev(TCGContext *s, TCGType ext,
+ TCGReg rd, TCGReg rm)
{
/* using REV 0x5ac00800 */
unsigned int base = ext ? 0xdac00c00 : 0x5ac00800;
tcg_out32(s, base | rm << 5 | rd);
}

-static inline void tcg_out_rev16(TCGContext *s, int ext, TCGReg rd, TCGReg rm)
+static inline void tcg_out_rev16(TCGContext *s, TCGType ext,
+ TCGReg rd, TCGReg rm)
{
/* using REV16 0x5ac00400 */
unsigned int base = ext ? 0xdac00400 : 0x5ac00400;
tcg_out32(s, base | rm << 5 | rd);
}

-static inline void tcg_out_sxt(TCGContext *s, int ext, int s_bits,
+static inline void tcg_out_sxt(TCGContext *s, TCGType ext, int s_bits,
TCGReg rd, TCGReg rn)
{
/* using ALIASes SXTB 0x13001c00, SXTH 0x13003c00, SXTW 0x93407c00
@@ -732,7 +742,7 @@ static inline void tcg_out_uxt(TCGContext *s, int s_bits,
tcg_out_ubfm(s, 0, rd, rn, 0, bits);
}

-static inline void tcg_out_addi(TCGContext *s, int ext,
+static inline void tcg_out_addi(TCGContext *s, TCGType ext,
TCGReg rd, TCGReg rn, unsigned int aimm)
{
/* add immediate aimm unsigned 12bit value (with LSL 0 or 12) */
@@ -752,7 +762,7 @@ static inline void tcg_out_addi(TCGContext *s, int ext,
tcg_out32(s, base | aimm | (rn << 5) | rd);
}

-static inline void tcg_out_subi(TCGContext *s, int ext,
+static inline void tcg_out_subi(TCGContext *s, TCGType ext,
TCGReg rd, TCGReg rn, unsigned int aimm)
{
/* sub immediate aimm unsigned 12bit value (with LSL 0 or 12) */
@@ -1107,7 +1117,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
{
/* ext will be set in the switch below, which will fall through to the
common code. It triggers the use of extended regs where appropriate. */
- int ext = 0;
+ TCGType ext = 0;

switch (opc) {
case INDEX_op_exit_tb:
--
1.8.3.1
Richard Henderson
2013-09-14 21:54:19 UTC
Permalink
Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 28 +++++++---------------------
1 file changed, 7 insertions(+), 21 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index cf36551..983a74a 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -1115,9 +1115,9 @@ static inline void tcg_out_load_pair(TCGContext *s, TCGReg addr,
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
const TCGArg *args, const int *const_args)
{
- /* ext will be set in the switch below, which will fall through to the
- common code. It triggers the use of extended regs where appropriate. */
- TCGType ext = 0;
+ /* 99% of the time, we can signal the use of extension registers
+ by looking to see if the opcode handles 64-bit data. */
+ TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;

switch (opc) {
case INDEX_op_exit_tb:
@@ -1173,7 +1173,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;

case INDEX_op_mov_i64:
- ext = 1; /* fall through */
case INDEX_op_mov_i32:
tcg_out_movr(s, ext, args[0], args[1]);
break;
@@ -1186,43 +1185,36 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;

case INDEX_op_add_i64:
- ext = 1; /* fall through */
case INDEX_op_add_i32:
tcg_out_arith(s, ARITH_ADD, ext, args[0], args[1], args[2], 0);
break;

case INDEX_op_sub_i64:
- ext = 1; /* fall through */
case INDEX_op_sub_i32:
tcg_out_arith(s, ARITH_SUB, ext, args[0], args[1], args[2], 0);
break;

case INDEX_op_and_i64:
- ext = 1; /* fall through */
case INDEX_op_and_i32:
tcg_out_arith(s, ARITH_AND, ext, args[0], args[1], args[2], 0);
break;

case INDEX_op_or_i64:
- ext = 1; /* fall through */
case INDEX_op_or_i32:
tcg_out_arith(s, ARITH_OR, ext, args[0], args[1], args[2], 0);
break;

case INDEX_op_xor_i64:
- ext = 1; /* fall through */
case INDEX_op_xor_i32:
tcg_out_arith(s, ARITH_XOR, ext, args[0], args[1], args[2], 0);
break;

case INDEX_op_mul_i64:
- ext = 1; /* fall through */
case INDEX_op_mul_i32:
tcg_out_mul(s, ext, args[0], args[1], args[2]);
break;

case INDEX_op_shl_i64:
- ext = 1; /* fall through */
case INDEX_op_shl_i32:
if (const_args[2]) { /* LSL / UBFM Wd, Wn, (32 - m) */
tcg_out_shl(s, ext, args[0], args[1], args[2]);
@@ -1232,7 +1224,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;

case INDEX_op_shr_i64:
- ext = 1; /* fall through */
case INDEX_op_shr_i32:
if (const_args[2]) { /* LSR / UBFM Wd, Wn, m, 31 */
tcg_out_shr(s, ext, args[0], args[1], args[2]);
@@ -1242,7 +1233,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;

case INDEX_op_sar_i64:
- ext = 1; /* fall through */
case INDEX_op_sar_i32:
if (const_args[2]) { /* ASR / SBFM Wd, Wn, m, 31 */
tcg_out_sar(s, ext, args[0], args[1], args[2]);
@@ -1252,7 +1242,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;

case INDEX_op_rotr_i64:
- ext = 1; /* fall through */
case INDEX_op_rotr_i32:
if (const_args[2]) { /* ROR / EXTR Wd, Wm, Wm, m */
tcg_out_rotr(s, ext, args[0], args[1], args[2]);
@@ -1262,7 +1251,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;

case INDEX_op_rotl_i64:
- ext = 1; /* fall through */
case INDEX_op_rotl_i32: /* same as rotate right by (32 - m) */
if (const_args[2]) { /* ROR / EXTR Wd, Wm, Wm, 32 - m */
tcg_out_rotl(s, ext, args[0], args[1], args[2]);
@@ -1275,14 +1263,12 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;

case INDEX_op_brcond_i64:
- ext = 1; /* fall through */
case INDEX_op_brcond_i32: /* CMP 0, 1, cond(2), label 3 */
tcg_out_cmp(s, ext, args[0], args[1], 0);
tcg_out_goto_label_cond(s, args[2], args[3]);
break;

case INDEX_op_setcond_i64:
- ext = 1; /* fall through */
case INDEX_op_setcond_i32:
tcg_out_cmp(s, ext, args[1], args[2], 0);
tcg_out_cset(s, 0, args[0], args[3]);
@@ -1325,9 +1311,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_qemu_st(s, args, 3);
break;

- case INDEX_op_bswap64_i64:
- ext = 1; /* fall through */
case INDEX_op_bswap32_i64:
+ /* Despite the _i64, this is a 32-bit bswap. */
+ ext = 0;
+ /* FALLTHRU */
+ case INDEX_op_bswap64_i64:
case INDEX_op_bswap32_i32:
tcg_out_rev(s, ext, args[0], args[1]);
break;
@@ -1337,12 +1325,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;

case INDEX_op_ext8s_i64:
- ext = 1; /* fall through */
case INDEX_op_ext8s_i32:
tcg_out_sxt(s, ext, 0, args[0], args[1]);
break;
case INDEX_op_ext16s_i64:
- ext = 1; /* fall through */
case INDEX_op_ext16s_i32:
tcg_out_sxt(s, ext, 1, args[0], args[1]);
break;
--
1.8.3.1
Richard Henderson
2013-09-14 21:54:21 UTC
Permalink
This reduces the code size of the function significantly.

Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 95 +++++++++++++++++++++++++-----------------------
1 file changed, 50 insertions(+), 45 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 8f19b50..8f5814d 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -1113,15 +1113,22 @@ static inline void tcg_out_load_pair(TCGContext *s, TCGReg addr,
}

static void tcg_out_op(TCGContext *s, TCGOpcode opc,
- const TCGArg *args, const int *const_args)
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
/* 99% of the time, we can signal the use of extension registers
by looking to see if the opcode handles 64-bit data. */
TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;

+ /* Hoist the loads of the most common arguments. */
+ TCGArg a0 = args[0];
+ TCGArg a1 = args[1];
+ TCGArg a2 = args[2];
+ int c2 = const_args[2];
+
switch (opc) {
case INDEX_op_exit_tb:
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, args[0]);
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
tcg_out_goto(s, (tcg_target_long)tb_ret_addr);
break;

@@ -1130,23 +1137,23 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
#error "USE_DIRECT_JUMP required for aarch64"
#endif
assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
- s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
+ s->tb_jmp_offset[a0] = s->code_ptr - s->code_buf;
/* actual branch destination will be patched by
aarch64_tb_set_jmp_target later, beware retranslation. */
tcg_out_goto_noaddr(s);
- s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
+ s->tb_next_offset[a0] = s->code_ptr - s->code_buf;
break;

case INDEX_op_call:
if (const_args[0]) {
- tcg_out_call(s, args[0]);
+ tcg_out_call(s, a0);
} else {
- tcg_out_callr(s, args[0]);
+ tcg_out_callr(s, a0);
}
break;

case INDEX_op_br:
- tcg_out_goto_label(s, args[0]);
+ tcg_out_goto_label(s, a0);
break;

case INDEX_op_ld_i32:
@@ -1169,97 +1176,95 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_st16_i64:
case INDEX_op_st32_i64:
tcg_out_ldst(s, aarch64_ldst_get_data(opc), aarch64_ldst_get_type(opc),
- args[0], args[1], args[2]);
+ a0, a1, a2);
break;

case INDEX_op_add_i64:
case INDEX_op_add_i32:
- tcg_out_arith(s, ARITH_ADD, ext, args[0], args[1], args[2], 0);
+ tcg_out_arith(s, ARITH_ADD, ext, a0, a1, a2, 0);
break;

case INDEX_op_sub_i64:
case INDEX_op_sub_i32:
- tcg_out_arith(s, ARITH_SUB, ext, args[0], args[1], args[2], 0);
+ tcg_out_arith(s, ARITH_SUB, ext, a0, a1, a2, 0);
break;

case INDEX_op_and_i64:
case INDEX_op_and_i32:
- tcg_out_arith(s, ARITH_AND, ext, args[0], args[1], args[2], 0);
+ tcg_out_arith(s, ARITH_AND, ext, a0, a1, a2, 0);
break;

case INDEX_op_or_i64:
case INDEX_op_or_i32:
- tcg_out_arith(s, ARITH_OR, ext, args[0], args[1], args[2], 0);
+ tcg_out_arith(s, ARITH_OR, ext, a0, a1, a2, 0);
break;

case INDEX_op_xor_i64:
case INDEX_op_xor_i32:
- tcg_out_arith(s, ARITH_XOR, ext, args[0], args[1], args[2], 0);
+ tcg_out_arith(s, ARITH_XOR, ext, a0, a1, a2, 0);
break;

case INDEX_op_mul_i64:
case INDEX_op_mul_i32:
- tcg_out_mul(s, ext, args[0], args[1], args[2]);
+ tcg_out_mul(s, ext, a0, a1, a2);
break;

case INDEX_op_shl_i64:
case INDEX_op_shl_i32:
- if (const_args[2]) { /* LSL / UBFM Wd, Wn, (32 - m) */
- tcg_out_shl(s, ext, args[0], args[1], args[2]);
+ if (c2) { /* LSL / UBFM Wd, Wn, (32 - m) */
+ tcg_out_shl(s, ext, a0, a1, a2);
} else { /* LSL / LSLV */
- tcg_out_shiftrot_reg(s, SRR_SHL, ext, args[0], args[1], args[2]);
+ tcg_out_shiftrot_reg(s, SRR_SHL, ext, a0, a1, a2);
}
break;

case INDEX_op_shr_i64:
case INDEX_op_shr_i32:
- if (const_args[2]) { /* LSR / UBFM Wd, Wn, m, 31 */
- tcg_out_shr(s, ext, args[0], args[1], args[2]);
+ if (c2) { /* LSR / UBFM Wd, Wn, m, 31 */
+ tcg_out_shr(s, ext, a0, a1, a2);
} else { /* LSR / LSRV */
- tcg_out_shiftrot_reg(s, SRR_SHR, ext, args[0], args[1], args[2]);
+ tcg_out_shiftrot_reg(s, SRR_SHR, ext, a0, a1, a2);
}
break;

case INDEX_op_sar_i64:
case INDEX_op_sar_i32:
- if (const_args[2]) { /* ASR / SBFM Wd, Wn, m, 31 */
- tcg_out_sar(s, ext, args[0], args[1], args[2]);
+ if (c2) { /* ASR / SBFM Wd, Wn, m, 31 */
+ tcg_out_sar(s, ext, a0, a1, a2);
} else { /* ASR / ASRV */
- tcg_out_shiftrot_reg(s, SRR_SAR, ext, args[0], args[1], args[2]);
+ tcg_out_shiftrot_reg(s, SRR_SAR, ext, a0, a1, a2);
}
break;

case INDEX_op_rotr_i64:
case INDEX_op_rotr_i32:
- if (const_args[2]) { /* ROR / EXTR Wd, Wm, Wm, m */
- tcg_out_rotr(s, ext, args[0], args[1], args[2]);
+ if (c2) { /* ROR / EXTR Wd, Wm, Wm, m */
+ tcg_out_rotr(s, ext, a0, a1, a2);
} else { /* ROR / RORV */
- tcg_out_shiftrot_reg(s, SRR_ROR, ext, args[0], args[1], args[2]);
+ tcg_out_shiftrot_reg(s, SRR_ROR, ext, a0, a1, a2);
}
break;

case INDEX_op_rotl_i64:
case INDEX_op_rotl_i32: /* same as rotate right by (32 - m) */
- if (const_args[2]) { /* ROR / EXTR Wd, Wm, Wm, 32 - m */
- tcg_out_rotl(s, ext, args[0], args[1], args[2]);
+ if (c2) { /* ROR / EXTR Wd, Wm, Wm, 32 - m */
+ tcg_out_rotl(s, ext, a0, a1, a2);
} else {
- tcg_out_arith(s, ARITH_SUB, 0,
- TCG_REG_TMP, TCG_REG_XZR, args[2], 0);
- tcg_out_shiftrot_reg(s, SRR_ROR, ext,
- args[0], args[1], TCG_REG_TMP);
+ tcg_out_arith(s, ARITH_SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2, 0);
+ tcg_out_shiftrot_reg(s, SRR_ROR, ext, a0, a1, TCG_REG_TMP);
}
break;

case INDEX_op_brcond_i64:
- case INDEX_op_brcond_i32: /* CMP 0, 1, cond(2), label 3 */
- tcg_out_cmp(s, ext, args[0], args[1], 0);
- tcg_out_goto_label_cond(s, args[2], args[3]);
+ case INDEX_op_brcond_i32:
+ tcg_out_cmp(s, ext, a0, a1, 0);
+ tcg_out_goto_label_cond(s, a2, args[3]);
break;

case INDEX_op_setcond_i64:
case INDEX_op_setcond_i32:
- tcg_out_cmp(s, ext, args[1], args[2], 0);
- tcg_out_cset(s, 0, args[0], args[3]);
+ tcg_out_cmp(s, ext, a1, a2, 0);
+ tcg_out_cset(s, 0, a0, args[3]);
break;

case INDEX_op_qemu_ld8u:
@@ -1305,34 +1310,34 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
/* FALLTHRU */
case INDEX_op_bswap64_i64:
case INDEX_op_bswap32_i32:
- tcg_out_rev(s, ext, args[0], args[1]);
+ tcg_out_rev(s, ext, a0, a1);
break;
case INDEX_op_bswap16_i64:
case INDEX_op_bswap16_i32:
- tcg_out_rev16(s, 0, args[0], args[1]);
+ tcg_out_rev16(s, 0, a0, a1);
break;

case INDEX_op_ext8s_i64:
case INDEX_op_ext8s_i32:
- tcg_out_sxt(s, ext, 0, args[0], args[1]);
+ tcg_out_sxt(s, ext, 0, a0, a1);
break;
case INDEX_op_ext16s_i64:
case INDEX_op_ext16s_i32:
- tcg_out_sxt(s, ext, 1, args[0], args[1]);
+ tcg_out_sxt(s, ext, 1, a0, a1);
break;
case INDEX_op_ext32s_i64:
- tcg_out_sxt(s, 1, 2, args[0], args[1]);
+ tcg_out_sxt(s, 1, 2, a0, a1);
break;
case INDEX_op_ext8u_i64:
case INDEX_op_ext8u_i32:
- tcg_out_uxt(s, 0, args[0], args[1]);
+ tcg_out_uxt(s, 0, a0, a1);
break;
case INDEX_op_ext16u_i64:
case INDEX_op_ext16u_i32:
- tcg_out_uxt(s, 1, args[0], args[1]);
+ tcg_out_uxt(s, 1, a0, a1);
break;
case INDEX_op_ext32u_i64:
- tcg_out_movr(s, 0, args[0], args[1]);
+ tcg_out_movr(s, 0, a0, a1);
break;

case INDEX_op_mov_i64:
--
1.8.3.1
Claudio Fontana
2013-09-16 07:42:17 UTC
Permalink
Hello Richard,
Post by Richard Henderson
This reduces the code size of the function significantly.
---
tcg/aarch64/tcg-target.c | 95 +++++++++++++++++++++++++-----------------------
1 file changed, 50 insertions(+), 45 deletions(-)
diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 8f19b50..8f5814d 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -1113,15 +1113,22 @@ static inline void tcg_out_load_pair(TCGContext *s, TCGReg addr,
}
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
- const TCGArg *args, const int *const_args)
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
/* 99% of the time, we can signal the use of extension registers
by looking to see if the opcode handles 64-bit data. */
TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
+ /* Hoist the loads of the most common arguments. */
+ TCGArg a0 = args[0];
+ TCGArg a1 = args[1];
+ TCGArg a2 = args[2];
+ int c2 = const_args[2];
+
Either all or none (add c0, c1), I would expect the compiler not to generate code for the paths that don't use C[n].

Btw, if the compiler generates bloated code without this, we should notify the projects working on gcc for aarch64.
Post by Richard Henderson
switch (opc) {
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, args[0]);
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
tcg_out_goto(s, (tcg_target_long)tb_ret_addr);
break;
@@ -1130,23 +1137,23 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
#error "USE_DIRECT_JUMP required for aarch64"
#endif
assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
- s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
+ s->tb_jmp_offset[a0] = s->code_ptr - s->code_buf;
/* actual branch destination will be patched by
aarch64_tb_set_jmp_target later, beware retranslation. */
tcg_out_goto_noaddr(s);
- s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
+ s->tb_next_offset[a0] = s->code_ptr - s->code_buf;
break;
if (const_args[0]) {
- tcg_out_call(s, args[0]);
+ tcg_out_call(s, a0);
} else {
- tcg_out_callr(s, args[0]);
+ tcg_out_callr(s, a0);
}
break;
- tcg_out_goto_label(s, args[0]);
+ tcg_out_goto_label(s, a0);
break;
@@ -1169,97 +1176,95 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_ldst(s, aarch64_ldst_get_data(opc), aarch64_ldst_get_type(opc),
- args[0], args[1], args[2]);
+ a0, a1, a2);
break;
- tcg_out_arith(s, ARITH_ADD, ext, args[0], args[1], args[2], 0);
+ tcg_out_arith(s, ARITH_ADD, ext, a0, a1, a2, 0);
break;
- tcg_out_arith(s, ARITH_SUB, ext, args[0], args[1], args[2], 0);
+ tcg_out_arith(s, ARITH_SUB, ext, a0, a1, a2, 0);
break;
- tcg_out_arith(s, ARITH_AND, ext, args[0], args[1], args[2], 0);
+ tcg_out_arith(s, ARITH_AND, ext, a0, a1, a2, 0);
break;
- tcg_out_arith(s, ARITH_OR, ext, args[0], args[1], args[2], 0);
+ tcg_out_arith(s, ARITH_OR, ext, a0, a1, a2, 0);
break;
- tcg_out_arith(s, ARITH_XOR, ext, args[0], args[1], args[2], 0);
+ tcg_out_arith(s, ARITH_XOR, ext, a0, a1, a2, 0);
break;
- tcg_out_mul(s, ext, args[0], args[1], args[2]);
+ tcg_out_mul(s, ext, a0, a1, a2);
break;
- if (const_args[2]) { /* LSL / UBFM Wd, Wn, (32 - m) */
- tcg_out_shl(s, ext, args[0], args[1], args[2]);
+ if (c2) { /* LSL / UBFM Wd, Wn, (32 - m) */
+ tcg_out_shl(s, ext, a0, a1, a2);
} else { /* LSL / LSLV */
- tcg_out_shiftrot_reg(s, SRR_SHL, ext, args[0], args[1], args[2]);
+ tcg_out_shiftrot_reg(s, SRR_SHL, ext, a0, a1, a2);
}
break;
- if (const_args[2]) { /* LSR / UBFM Wd, Wn, m, 31 */
- tcg_out_shr(s, ext, args[0], args[1], args[2]);
+ if (c2) { /* LSR / UBFM Wd, Wn, m, 31 */
+ tcg_out_shr(s, ext, a0, a1, a2);
} else { /* LSR / LSRV */
- tcg_out_shiftrot_reg(s, SRR_SHR, ext, args[0], args[1], args[2]);
+ tcg_out_shiftrot_reg(s, SRR_SHR, ext, a0, a1, a2);
}
break;
- if (const_args[2]) { /* ASR / SBFM Wd, Wn, m, 31 */
- tcg_out_sar(s, ext, args[0], args[1], args[2]);
+ if (c2) { /* ASR / SBFM Wd, Wn, m, 31 */
+ tcg_out_sar(s, ext, a0, a1, a2);
} else { /* ASR / ASRV */
- tcg_out_shiftrot_reg(s, SRR_SAR, ext, args[0], args[1], args[2]);
+ tcg_out_shiftrot_reg(s, SRR_SAR, ext, a0, a1, a2);
}
break;
- if (const_args[2]) { /* ROR / EXTR Wd, Wm, Wm, m */
- tcg_out_rotr(s, ext, args[0], args[1], args[2]);
+ if (c2) { /* ROR / EXTR Wd, Wm, Wm, m */
+ tcg_out_rotr(s, ext, a0, a1, a2);
} else { /* ROR / RORV */
- tcg_out_shiftrot_reg(s, SRR_ROR, ext, args[0], args[1], args[2]);
+ tcg_out_shiftrot_reg(s, SRR_ROR, ext, a0, a1, a2);
}
break;
case INDEX_op_rotl_i32: /* same as rotate right by (32 - m) */
- if (const_args[2]) { /* ROR / EXTR Wd, Wm, Wm, 32 - m */
- tcg_out_rotl(s, ext, args[0], args[1], args[2]);
+ if (c2) { /* ROR / EXTR Wd, Wm, Wm, 32 - m */
+ tcg_out_rotl(s, ext, a0, a1, a2);
} else {
- tcg_out_arith(s, ARITH_SUB, 0,
- TCG_REG_TMP, TCG_REG_XZR, args[2], 0);
- tcg_out_shiftrot_reg(s, SRR_ROR, ext,
- args[0], args[1], TCG_REG_TMP);
+ tcg_out_arith(s, ARITH_SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2, 0);
+ tcg_out_shiftrot_reg(s, SRR_ROR, ext, a0, a1, TCG_REG_TMP);
}
break;
- case INDEX_op_brcond_i32: /* CMP 0, 1, cond(2), label 3 */
- tcg_out_cmp(s, ext, args[0], args[1], 0);
- tcg_out_goto_label_cond(s, args[2], args[3]);
+ tcg_out_cmp(s, ext, a0, a1, 0);
+ tcg_out_goto_label_cond(s, a2, args[3]);
break;
- tcg_out_cmp(s, ext, args[1], args[2], 0);
- tcg_out_cset(s, 0, args[0], args[3]);
+ tcg_out_cmp(s, ext, a1, a2, 0);
+ tcg_out_cset(s, 0, a0, args[3]);
break;
@@ -1305,34 +1310,34 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
/* FALLTHRU */
- tcg_out_rev(s, ext, args[0], args[1]);
+ tcg_out_rev(s, ext, a0, a1);
break;
- tcg_out_rev16(s, 0, args[0], args[1]);
+ tcg_out_rev16(s, 0, a0, a1);
break;
- tcg_out_sxt(s, ext, 0, args[0], args[1]);
+ tcg_out_sxt(s, ext, 0, a0, a1);
break;
- tcg_out_sxt(s, ext, 1, args[0], args[1]);
+ tcg_out_sxt(s, ext, 1, a0, a1);
break;
- tcg_out_sxt(s, 1, 2, args[0], args[1]);
+ tcg_out_sxt(s, 1, 2, a0, a1);
break;
- tcg_out_uxt(s, 0, args[0], args[1]);
+ tcg_out_uxt(s, 0, a0, a1);
break;
- tcg_out_uxt(s, 1, args[0], args[1]);
+ tcg_out_uxt(s, 1, a0, a1);
break;
- tcg_out_movr(s, 0, args[0], args[1]);
+ tcg_out_movr(s, 0, a0, a1);
break;
Claudio
Richard Henderson
2013-09-16 16:20:32 UTC
Permalink
Post by Claudio Fontana
Post by Richard Henderson
+ /* Hoist the loads of the most common arguments. */
Post by Richard Henderson
+ TCGArg a0 = args[0];
+ TCGArg a1 = args[1];
+ TCGArg a2 = args[2];
+ int c2 = const_args[2];
+
Either all or none (add c0, c1), I would expect the compiler not to
generate code for the paths that don't use C[n].
I chose the most common. Those used in 90% of all of the cases.
Post by Claudio Fontana
Btw, if the compiler generates bloated code without this, we should notify
the projects working on gcc for aarch64.
It's not the compiler's fault. After parameter decomposition, the arrays
become pointers, and the compiler can't tell that it's always safe to perform
the loads. So in general it can't hoist the loads higher than the first
explicit reference that proves the pointers must be non-null.

Now that I think about it, we might actually do better, generically, to package
all of these arguments up into a struct. The compiler can more easily reason
about the collective safety of structure access...


r~
Claudio Fontana
2013-09-17 08:01:23 UTC
Permalink
Post by Richard Henderson
Post by Claudio Fontana
Post by Richard Henderson
+ /* Hoist the loads of the most common arguments. */
Post by Richard Henderson
+ TCGArg a0 = args[0];
+ TCGArg a1 = args[1];
+ TCGArg a2 = args[2];
+ int c2 = const_args[2];
+
Either all or none (add c0, c1), I would expect the compiler not to
generate code for the paths that don't use C[n].
I chose the most common. Those used in 90% of all of the cases.
What you did is clear, does not change the fact that the mixing of use of the variables and args[] is confusing.
So either we add int c0 and int c1, and use the hoisted variables exclusively later, or we don't do it.
Post by Richard Henderson
Post by Claudio Fontana
Btw, if the compiler generates bloated code without this, we should notify
the projects working on gcc for aarch64.
It's not the compiler's fault. After parameter decomposition, the arrays
become pointers, and the compiler can't tell that it's always safe to perform
the loads. So in general it can't hoist the loads higher than the first
explicit reference that proves the pointers must be non-null.
Now that I think about it, we might actually do better, generically, to package
all of these arguments up into a struct. The compiler can more easily reason
about the collective safety of structure access...
I don't have anything against it in principle, but just adding c0 and c1, which iirc should cover all uses, would be fine by me.

Claudio
Richard Henderson
2013-09-17 14:27:09 UTC
Permalink
Post by Claudio Fontana
I don't have anything against it in principle, but just adding c0 and c1,
which iirc should cover all uses, would be fine by me.
Not really.

There are 6 potential args[] values, 5 of which might be const_args[].


r~
Claudio Fontana
2013-09-18 08:10:05 UTC
Permalink
Post by Richard Henderson
Post by Claudio Fontana
I don't have anything against it in principle, but just adding c0 and c1,
which iirc should cover all uses, would be fine by me.
Not really.
There are 6 potential args[] values, 5 of which might be const_args[].
Current uses. Like the args[] that are actually used right now.
Richard Henderson
2013-09-18 14:00:04 UTC
Permalink
Post by Claudio Fontana
Post by Richard Henderson
Post by Claudio Fontana
I don't have anything against it in principle, but just adding c0 and c1,
which iirc should cover all uses, would be fine by me.
Not really.
There are 6 potential args[] values, 5 of which might be const_args[].
Current uses. Like the args[] that are actually used right now.
That's what I'm talking about. E.g. movcond: 6 args[], 1-4 are inputs
that might have const_args[].


r~
Claudio Fontana
2013-09-18 14:18:18 UTC
Permalink
Post by Richard Henderson
Post by Claudio Fontana
I don't have anything against it in principle, but just adding c0 and c1,
which iirc should cover all uses, would be fine by me.
Not really.
There are 6 potential args[] values, 5 of which might be const_args[].
r~
Ok adding all of those would be a waste, did not keep the movcond changes into consideration.
Your initial proposal was indeed the best tradeoff then, lets just hoist the most commonly used args then.

C.
Richard Henderson
2013-09-14 21:54:22 UTC
Permalink
And since we're no longer talking about opcodes, change the
values to be shifted into the opcode field, avoiding a shift
at runtime.

Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 43 +++++++++++++++++++++++--------------------
1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 8f5814d..99d9884 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -204,16 +204,19 @@ enum aarch64_ldst_op_type { /* type of operation */
LDST_LD_S_W = 0xc, /* load and sign-extend into Wt */
};

-enum aarch64_arith_opc {
- ARITH_AND = 0x0a,
- ARITH_ADD = 0x0b,
- ARITH_OR = 0x2a,
- ARITH_ADDS = 0x2b,
- ARITH_XOR = 0x4a,
- ARITH_SUB = 0x4b,
- ARITH_ANDS = 0x6a,
- ARITH_SUBS = 0x6b,
-};
+typedef enum {
+ /* Logical shifted register instructions */
+ INSN_AND = 0x0a000000,
+ INSN_ORR = 0x2a000000,
+ INSN_EOR = 0x4a000000,
+ INSN_ANDS = 0x6a000000,
+
+ /* Add/subtract shifted register instructions */
+ INSN_ADD = 0x0b000000,
+ INSN_ADDS = 0x2b000000,
+ INSN_SUB = 0x4b000000,
+ INSN_SUBS = 0x6b000000,
+} AArch64Insn;

enum aarch64_srr_opc {
SRR_SHL = 0x0,
@@ -443,13 +446,13 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
arg, arg1, arg2);
}

-static inline void tcg_out_arith(TCGContext *s, enum aarch64_arith_opc opc,
+static inline void tcg_out_arith(TCGContext *s, AArch64Insn insn,
TCGType ext, TCGReg rd, TCGReg rn, TCGReg rm,
int shift_imm)
{
/* Using shifted register arithmetic operations */
/* if extended register operation (64bit) just OR with 0x80 << 24 */
- unsigned int shift, base = ext ? (0x80 | opc) << 24 : opc << 24;
+ unsigned int shift, base = insn | (ext ? 0x80000000 : 0);
if (shift_imm == 0) {
shift = 0;
} else if (shift_imm > 0) {
@@ -544,7 +547,7 @@ static inline void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg rn,
TCGReg rm, int shift_imm)
{
/* Using CMP alias SUBS wzr, Wn, Wm */
- tcg_out_arith(s, ARITH_SUBS, ext, TCG_REG_XZR, rn, rm, shift_imm);
+ tcg_out_arith(s, INSN_SUBS, ext, TCG_REG_XZR, rn, rm, shift_imm);
}

static inline void tcg_out_cset(TCGContext *s, TCGType ext,
@@ -904,7 +907,7 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
tcg_out_addi(s, 1, TCG_REG_X2, base, tlb_offset & 0xfff000);
/* Merge the tlb index contribution into X2.
X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
- tcg_out_arith(s, ARITH_ADD, 1, TCG_REG_X2, TCG_REG_X2,
+ tcg_out_arith(s, INSN_ADD, 1, TCG_REG_X2, TCG_REG_X2,
TCG_REG_X0, -CPU_TLB_ENTRY_BITS);
/* Merge "low bits" from tlb offset, load the tlb comparator into X0.
X0 = load [X2 + (tlb_offset & 0x000fff)] */
@@ -1181,27 +1184,27 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,

case INDEX_op_add_i64:
case INDEX_op_add_i32:
- tcg_out_arith(s, ARITH_ADD, ext, a0, a1, a2, 0);
+ tcg_out_arith(s, INSN_ADD, ext, a0, a1, a2, 0);
break;

case INDEX_op_sub_i64:
case INDEX_op_sub_i32:
- tcg_out_arith(s, ARITH_SUB, ext, a0, a1, a2, 0);
+ tcg_out_arith(s, INSN_SUB, ext, a0, a1, a2, 0);
break;

case INDEX_op_and_i64:
case INDEX_op_and_i32:
- tcg_out_arith(s, ARITH_AND, ext, a0, a1, a2, 0);
+ tcg_out_arith(s, INSN_AND, ext, a0, a1, a2, 0);
break;

case INDEX_op_or_i64:
case INDEX_op_or_i32:
- tcg_out_arith(s, ARITH_OR, ext, a0, a1, a2, 0);
+ tcg_out_arith(s, INSN_ORR, ext, a0, a1, a2, 0);
break;

case INDEX_op_xor_i64:
case INDEX_op_xor_i32:
- tcg_out_arith(s, ARITH_XOR, ext, a0, a1, a2, 0);
+ tcg_out_arith(s, INSN_EOR, ext, a0, a1, a2, 0);
break;

case INDEX_op_mul_i64:
@@ -1250,7 +1253,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
if (c2) { /* ROR / EXTR Wd, Wm, Wm, 32 - m */
tcg_out_rotl(s, ext, a0, a1, a2);
} else {
- tcg_out_arith(s, ARITH_SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2, 0);
+ tcg_out_arith(s, INSN_SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2, 0);
tcg_out_shiftrot_reg(s, SRR_ROR, ext, a0, a1, TCG_REG_TMP);
}
break;
--
1.8.3.1
Richard Henderson
2013-09-14 21:54:20 UTC
Permalink
Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 20 +++++++-------------
1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 983a74a..8f19b50 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -1172,18 +1172,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
args[0], args[1], args[2]);
break;

- case INDEX_op_mov_i64:
- case INDEX_op_mov_i32:
- tcg_out_movr(s, ext, args[0], args[1]);
- break;
-
- case INDEX_op_movi_i64:
- tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
- break;
- case INDEX_op_movi_i32:
- tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
- break;
-
case INDEX_op_add_i64:
case INDEX_op_add_i32:
tcg_out_arith(s, ARITH_ADD, ext, args[0], args[1], args[2], 0);
@@ -1347,8 +1335,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_movr(s, 0, args[0], args[1]);
break;

+ case INDEX_op_mov_i64:
+ case INDEX_op_mov_i32:
+ case INDEX_op_movi_i64:
+ case INDEX_op_movi_i32:
+ /* Always implemented with tcg_out_mov/i, never with tcg_out_op. */
default:
- tcg_abort(); /* opcode not implemented */
+ /* Opcode not implemented. */
+ tcg_abort();
}
}
--
1.8.3.1
Claudio Fontana
2013-09-16 07:45:35 UTC
Permalink
Post by Richard Henderson
---
tcg/aarch64/tcg-target.c | 20 +++++++-------------
1 file changed, 7 insertions(+), 13 deletions(-)
diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 983a74a..8f19b50 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -1172,18 +1172,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
args[0], args[1], args[2]);
break;
- tcg_out_movr(s, ext, args[0], args[1]);
- break;
-
- tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
- break;
- tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
- break;
-
tcg_out_arith(s, ARITH_ADD, ext, args[0], args[1], args[2], 0);
@@ -1347,8 +1335,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_movr(s, 0, args[0], args[1]);
break;
+ /* Always implemented with tcg_out_mov/i, never with tcg_out_op. */
Ok (as in, "I am ok with moving these labels here").
Post by Richard Henderson
- tcg_abort(); /* opcode not implemented */
+ /* Opcode not implemented. */
+ tcg_abort();
}
}
This change above seems unnecessary.

C.
Richard Henderson
2013-09-16 15:07:51 UTC
Permalink
Post by Claudio Fontana
Post by Richard Henderson
- tcg_abort(); /* opcode not implemented */
+ /* Opcode not implemented. */
+ tcg_abort();
}
}
This change above seems unnecessary.
Perhaps qemu doesn't have the same "comments are sentences" rule that gcc does,
but I still aim to follow it.


r~
Claudio Fontana
2013-09-17 08:05:51 UTC
Permalink
Post by Richard Henderson
Post by Claudio Fontana
Post by Richard Henderson
- tcg_abort(); /* opcode not implemented */
+ /* Opcode not implemented. */
+ tcg_abort();
}
}
This change above seems unnecessary.
Perhaps qemu doesn't have the same "comments are sentences" rule that gcc does,
but I still aim to follow it.
It is wasteful of y space for no reason. This comment is not a sentence, when you read the code as you would prose.
It is a comment _on_ tcg_abort() : tcg_abort(), because the opcode is not implemented.
In other cases, the comment alone on a line is a good idea, but not in this one.

C.
Richard Henderson
2013-09-14 21:54:26 UTC
Permalink
This merges the implementation of tcg_out_addi and tcg_out_subi.

Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 79 +++++++++++++++++++-----------------------------
1 file changed, 31 insertions(+), 48 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 0e7b67b..56625a9 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -211,6 +211,12 @@ typedef enum {
INSN_EOR = 0x4a000000,
INSN_ANDS = 0x6a000000,

+ /* Add/subtract immediate instructions */
+ INSN_ADDI = 0x11000000,
+ INSN_ADDSI = 0x31000000,
+ INSN_SUBI = 0x51000000,
+ INSN_SUBSI = 0x71000000,
+
/* Add/subtract shifted register instructions */
INSN_ADD = 0x0b000000,
INSN_ADDS = 0x2b000000,
@@ -326,6 +332,22 @@ static inline void tcg_fmt_Rdnm(TCGContext *s, AArch64Insn insn, TCGType sf,
tcg_out32(s, insn | sf << 31 | rm << 16 | rn << 5 | rd);
}

+/* This function is used for the Arithmetic (immediate) instruction group.
+ The value of AIMM must be appropriate for encoding in the shift+imm12
+ fields. */
+static inline void tcg_fmt_Rdn_aimm(TCGContext *s, AArch64Insn insn,
+ TCGType sf, TCGReg rd, TCGReg rn,
+ unsigned int aimm)
+{
+ if (aimm > 0xfff) {
+ assert((aimm & 0xfff) == 0);
+ aimm >>= 12;
+ assert(aimm <= 0xfff);
+ aimm |= 1 << 12; /* apply LSL 12 */
+ }
+ tcg_out32(s, insn | sf << 31 | aimm << 10 | rn << 5 | rd);
+}
+
static inline void tcg_out_ldst_9(TCGContext *s,
enum aarch64_ldst_op_data op_data,
enum aarch64_ldst_op_type op_type,
@@ -742,46 +764,6 @@ static inline void tcg_out_uxt(TCGContext *s, int s_bits,
tcg_out_ubfm(s, 0, rd, rn, 0, bits);
}

-static inline void tcg_out_addi(TCGContext *s, TCGType ext,
- TCGReg rd, TCGReg rn, unsigned int aimm)
-{
- /* add immediate aimm unsigned 12bit value (with LSL 0 or 12) */
- /* using ADD 0x11000000 | (ext) | (aimm << 10) | (rn << 5) | rd */
- unsigned int base = ext ? 0x91000000 : 0x11000000;
-
- if (aimm <= 0xfff) {
- aimm <<= 10;
- } else {
- /* we can only shift left by 12, on assert we cannot represent */
- assert(!(aimm & 0xfff));
- assert(aimm <= 0xfff000);
- base |= 1 << 22; /* apply LSL 12 */
- aimm >>= 2;
- }
-
- tcg_out32(s, base | aimm | (rn << 5) | rd);
-}
-
-static inline void tcg_out_subi(TCGContext *s, TCGType ext,
- TCGReg rd, TCGReg rn, unsigned int aimm)
-{
- /* sub immediate aimm unsigned 12bit value (with LSL 0 or 12) */
- /* using SUB 0x51000000 | (ext) | (aimm << 10) | (rn << 5) | rd */
- unsigned int base = ext ? 0xd1000000 : 0x51000000;
-
- if (aimm <= 0xfff) {
- aimm <<= 10;
- } else {
- /* we can only shift left by 12, on assert we cannot represent */
- assert(!(aimm & 0xfff));
- assert(aimm <= 0xfff000);
- base |= 1 << 22; /* apply LSL 12 */
- aimm >>= 2;
- }
-
- tcg_out32(s, base | aimm | (rn << 5) | rd);
-}
-
static inline void tcg_out_nop(TCGContext *s)
{
tcg_out32(s, 0xd503201f);
@@ -899,9 +881,9 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
(TARGET_LONG_BITS - TARGET_PAGE_BITS) + s_bits,
(TARGET_LONG_BITS - TARGET_PAGE_BITS));
/* Add any "high bits" from the tlb offset to the env address into X2,
- to take advantage of the LSL12 form of the addi instruction.
+ to take advantage of the LSL12 form of the ADDI instruction.
X2 = env + (tlb_offset & 0xfff000) */
- tcg_out_addi(s, 1, TCG_REG_X2, base, tlb_offset & 0xfff000);
+ tcg_fmt_Rdn_aimm(s, INSN_ADDI, 1, TCG_REG_X2, base, tlb_offset & 0xfff000);
/* Merge the tlb index contribution into X2.
X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
tcg_fmt_Rdnm_lsl(s, INSN_ADD, 1, TCG_REG_X2, TCG_REG_X2,
@@ -1510,9 +1492,10 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_store_pair(s, TCG_REG_FP, r, r + 1, idx);
}

- /* make stack space for TCG locals */
- tcg_out_subi(s, 1, TCG_REG_SP, TCG_REG_SP,
- frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
+ /* Make stack space for TCG locals. */
+ tcg_fmt_Rdn_aimm(s, INSN_SUBI, 1, TCG_REG_SP, TCG_REG_SP,
+ frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
+
/* inform TCG about how to find TCG locals with register, offset, size */
tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
CPU_TEMP_BUF_NLONGS * sizeof(long));
@@ -1529,9 +1512,9 @@ static void tcg_target_qemu_prologue(TCGContext *s)

tb_ret_addr = s->code_ptr;

- /* remove TCG locals stack space */
- tcg_out_addi(s, 1, TCG_REG_SP, TCG_REG_SP,
- frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
+ /* Remove TCG locals stack space. */
+ tcg_fmt_Rdn_aimm(s, INSN_ADDI, 1, TCG_REG_SP, TCG_REG_SP,
+ frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);

/* restore registers x19..x28.
FP must be preserved, so it still points to callee_saved area */
--
1.8.3.1
Claudio Fontana
2013-09-16 08:47:12 UTC
Permalink
Post by Richard Henderson
This merges the implementation of tcg_out_addi and tcg_out_subi.
---
tcg/aarch64/tcg-target.c | 79 +++++++++++++++++++-----------------------------
1 file changed, 31 insertions(+), 48 deletions(-)
diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 0e7b67b..56625a9 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -211,6 +211,12 @@ typedef enum {
INSN_EOR = 0x4a000000,
INSN_ANDS = 0x6a000000,
+ /* Add/subtract immediate instructions */
+ INSN_ADDI = 0x11000000,
+ INSN_ADDSI = 0x31000000,
+ INSN_SUBI = 0x51000000,
+ INSN_SUBSI = 0x71000000,
+
/* Add/subtract shifted register instructions */
INSN_ADD = 0x0b000000,
INSN_ADDS = 0x2b000000,
@@ -326,6 +332,22 @@ static inline void tcg_fmt_Rdnm(TCGContext *s, AArch64Insn insn, TCGType sf,
tcg_out32(s, insn | sf << 31 | rm << 16 | rn << 5 | rd);
}
+/* This function is used for the Arithmetic (immediate) instruction group.
+ The value of AIMM must be appropriate for encoding in the shift+imm12
+ fields. */
+static inline void tcg_fmt_Rdn_aimm(TCGContext *s, AArch64Insn insn,
+ TCGType sf, TCGReg rd, TCGReg rn,
+ unsigned int aimm)
Since we are emitting, tcg_out_* seems better to me. 'tcg_out_arith_imm' ?
Also here we have the same issue where it might be not immediately obvious which AArch64Insn values are ok to pass.
Post by Richard Henderson
+{
+ if (aimm > 0xfff) {
+ assert((aimm & 0xfff) == 0);
+ aimm >>= 12;
+ assert(aimm <= 0xfff);
+ aimm |= 1 << 12; /* apply LSL 12 */
+ }
+ tcg_out32(s, insn | sf << 31 | aimm << 10 | rn << 5 | rd);
+}
+
static inline void tcg_out_ldst_9(TCGContext *s,
enum aarch64_ldst_op_data op_data,
enum aarch64_ldst_op_type op_type,
@@ -742,46 +764,6 @@ static inline void tcg_out_uxt(TCGContext *s, int s_bits,
tcg_out_ubfm(s, 0, rd, rn, 0, bits);
}
-static inline void tcg_out_addi(TCGContext *s, TCGType ext,
- TCGReg rd, TCGReg rn, unsigned int aimm)
-{
- /* add immediate aimm unsigned 12bit value (with LSL 0 or 12) */
- /* using ADD 0x11000000 | (ext) | (aimm << 10) | (rn << 5) | rd */
- unsigned int base = ext ? 0x91000000 : 0x11000000;
-
- if (aimm <= 0xfff) {
- aimm <<= 10;
- } else {
- /* we can only shift left by 12, on assert we cannot represent */
- assert(!(aimm & 0xfff));
- assert(aimm <= 0xfff000);
- base |= 1 << 22; /* apply LSL 12 */
- aimm >>= 2;
- }
-
- tcg_out32(s, base | aimm | (rn << 5) | rd);
-}
-
-static inline void tcg_out_subi(TCGContext *s, TCGType ext,
- TCGReg rd, TCGReg rn, unsigned int aimm)
-{
- /* sub immediate aimm unsigned 12bit value (with LSL 0 or 12) */
- /* using SUB 0x51000000 | (ext) | (aimm << 10) | (rn << 5) | rd */
- unsigned int base = ext ? 0xd1000000 : 0x51000000;
-
- if (aimm <= 0xfff) {
- aimm <<= 10;
- } else {
- /* we can only shift left by 12, on assert we cannot represent */
- assert(!(aimm & 0xfff));
- assert(aimm <= 0xfff000);
- base |= 1 << 22; /* apply LSL 12 */
- aimm >>= 2;
- }
-
- tcg_out32(s, base | aimm | (rn << 5) | rd);
-}
-
static inline void tcg_out_nop(TCGContext *s)
{
tcg_out32(s, 0xd503201f);
@@ -899,9 +881,9 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
(TARGET_LONG_BITS - TARGET_PAGE_BITS) + s_bits,
(TARGET_LONG_BITS - TARGET_PAGE_BITS));
/* Add any "high bits" from the tlb offset to the env address into X2,
- to take advantage of the LSL12 form of the addi instruction.
+ to take advantage of the LSL12 form of the ADDI instruction.
X2 = env + (tlb_offset & 0xfff000) */
- tcg_out_addi(s, 1, TCG_REG_X2, base, tlb_offset & 0xfff000);
+ tcg_fmt_Rdn_aimm(s, INSN_ADDI, 1, TCG_REG_X2, base, tlb_offset & 0xfff000);
/* Merge the tlb index contribution into X2.
X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
tcg_fmt_Rdnm_lsl(s, INSN_ADD, 1, TCG_REG_X2, TCG_REG_X2,
@@ -1510,9 +1492,10 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_store_pair(s, TCG_REG_FP, r, r + 1, idx);
}
- /* make stack space for TCG locals */
- tcg_out_subi(s, 1, TCG_REG_SP, TCG_REG_SP,
- frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
+ /* Make stack space for TCG locals. */
+ tcg_fmt_Rdn_aimm(s, INSN_SUBI, 1, TCG_REG_SP, TCG_REG_SP,
+ frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
+
/* inform TCG about how to find TCG locals with register, offset, size */
tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
CPU_TEMP_BUF_NLONGS * sizeof(long));
@@ -1529,9 +1512,9 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tb_ret_addr = s->code_ptr;
- /* remove TCG locals stack space */
- tcg_out_addi(s, 1, TCG_REG_SP, TCG_REG_SP,
- frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
+ /* Remove TCG locals stack space. */
+ tcg_fmt_Rdn_aimm(s, INSN_ADDI, 1, TCG_REG_SP, TCG_REG_SP,
+ frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
/* restore registers x19..x28.
FP must be preserved, so it still points to callee_saved area */
Richard Henderson
2013-09-14 21:54:31 UTC
Permalink
In order to properly handle neg, as generated by TCG generic code.

Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 28 ++++++++++++++++++++++++----
1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 5b4eeee..08a0cc4 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -114,6 +114,7 @@ static inline void patch_reloc(uint8_t *code_ptr, int type,
#define TCG_CT_CONST_IS32 0x100
#define TCG_CT_CONST_AIMM 0x200
#define TCG_CT_CONST_LIMM 0x400
+#define TCG_CT_CONST_ZERO 0x800

/* parse target specific constraints */
static int target_parse_constraint(TCGArgConstraint *ct,
@@ -147,6 +148,9 @@ static int target_parse_constraint(TCGArgConstraint *ct,
case 'L': /* Valid for logical immediate. */
ct->ct |= TCG_CT_CONST_LIMM;
break;
+ case 'Z': /* zero */
+ ct->ct |= TCG_CT_CONST_ZERO;
+ break;
default:
return -1;
}
@@ -198,6 +202,9 @@ static int tcg_target_const_match(tcg_target_long val,
if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
return 1;
}
+ if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+ return 1;
+ }

return 0;
}
@@ -1186,6 +1193,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
TCGArg a2 = args[2];
int c2 = const_args[2];

+ /* Some operands are defined with "rZ" constraint, a register or
+ the zero register. These need not actually test args[I] == 0. */
+#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
+
switch (opc) {
case INDEX_op_exit_tb:
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
@@ -1255,9 +1266,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
/* FALLTHRU */
case INDEX_op_sub_i64:
if (c2) {
- tcg_out_addsubi(s, ext, a0, a1, -a2);
+ /* Arithmetic immediate instructions use Xn|sp, and thus
+ we cannot encode the zero register if tcg optimization
+ is turned off and both A1 and A2 are constants. */
+ if (const_args[1]) {
+ tcg_out_movi(s, ext ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, -a2);
+ } else {
+ tcg_out_addsubi(s, ext, a0, a1, -a2);
+ }
} else {
- tcg_fmt_Rdnm(s, INSN_SUB, ext, a0, a1, a2);
+ tcg_fmt_Rdnm(s, INSN_SUB, ext, a0, REG0(1), a2);
}
break;

@@ -1481,6 +1499,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
/* Opcode not implemented. */
tcg_abort();
}
+
+#undef REG0
}

static const TCGTargetOpDef aarch64_op_defs[] = {
@@ -1518,8 +1538,8 @@ static const TCGTargetOpDef aarch64_op_defs[] = {

{ INDEX_op_add_i32, { "r", "r", "rwA" } },
{ INDEX_op_add_i64, { "r", "r", "rA" } },
- { INDEX_op_sub_i32, { "r", "r", "rwA" } },
- { INDEX_op_sub_i64, { "r", "r", "rA" } },
+ { INDEX_op_sub_i32, { "r", "rZ", "rwA" } },
+ { INDEX_op_sub_i64, { "r", "rZ", "rA" } },
{ INDEX_op_mul_i32, { "r", "r", "r" } },
{ INDEX_op_mul_i64, { "r", "r", "r" } },
{ INDEX_op_and_i32, { "r", "r", "rwL" } },
--
1.8.3.1
Richard Henderson
2013-09-14 21:54:23 UTC
Permalink
And since we're no longer talking about opcodes, merge the 0x1ac02000
data2 primary opcode with the shift subcode to create the full insn.

Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 49 ++++++++++++++++++++++++------------------------
1 file changed, 24 insertions(+), 25 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 99d9884..be6d05a 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -216,14 +216,13 @@ typedef enum {
INSN_ADDS = 0x2b000000,
INSN_SUB = 0x4b000000,
INSN_SUBS = 0x6b000000,
-} AArch64Insn;

-enum aarch64_srr_opc {
- SRR_SHL = 0x0,
- SRR_SHR = 0x4,
- SRR_SAR = 0x8,
- SRR_ROR = 0xc
-};
+ /* Data-processing (2 source) instructions */
+ INSN_LSLV = 0x1ac02000,
+ INSN_LSRV = 0x1ac02400,
+ INSN_ASRV = 0x1ac02800,
+ INSN_RORV = 0x1ac02c00,
+} AArch64Insn;

static inline enum aarch64_ldst_op_data
aarch64_ldst_get_data(TCGOpcode tcg_op)
@@ -472,12 +471,12 @@ static inline void tcg_out_mul(TCGContext *s, TCGType ext,
}

static inline void tcg_out_shiftrot_reg(TCGContext *s,
- enum aarch64_srr_opc opc, TCGType ext,
+ AArch64Insn insn, TCGType ext,
TCGReg rd, TCGReg rn, TCGReg rm)
{
/* using 2-source data processing instructions 0x1ac02000 */
- unsigned int base = ext ? 0x9ac02000 : 0x1ac02000;
- tcg_out32(s, base | rm << 16 | opc << 8 | rn << 5 | rd);
+ unsigned int base = insn | (ext ? 0x80000000 : 0);
+ tcg_out32(s, base | rm << 16 | rn << 5 | rd);
}

static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
@@ -1214,47 +1213,47 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,

case INDEX_op_shl_i64:
case INDEX_op_shl_i32:
- if (c2) { /* LSL / UBFM Wd, Wn, (32 - m) */
+ if (c2) {
tcg_out_shl(s, ext, a0, a1, a2);
- } else { /* LSL / LSLV */
- tcg_out_shiftrot_reg(s, SRR_SHL, ext, a0, a1, a2);
+ } else {
+ tcg_out_shiftrot_reg(s, INSN_LSLV, ext, a0, a1, a2);
}
break;

case INDEX_op_shr_i64:
case INDEX_op_shr_i32:
- if (c2) { /* LSR / UBFM Wd, Wn, m, 31 */
+ if (c2) {
tcg_out_shr(s, ext, a0, a1, a2);
- } else { /* LSR / LSRV */
- tcg_out_shiftrot_reg(s, SRR_SHR, ext, a0, a1, a2);
+ } else {
+ tcg_out_shiftrot_reg(s, INSN_LSRV, ext, a0, a1, a2);
}
break;

case INDEX_op_sar_i64:
case INDEX_op_sar_i32:
- if (c2) { /* ASR / SBFM Wd, Wn, m, 31 */
+ if (c2) {
tcg_out_sar(s, ext, a0, a1, a2);
- } else { /* ASR / ASRV */
- tcg_out_shiftrot_reg(s, SRR_SAR, ext, a0, a1, a2);
+ } else {
+ tcg_out_shiftrot_reg(s, INSN_ASRV, ext, a0, a1, a2);
}
break;

case INDEX_op_rotr_i64:
case INDEX_op_rotr_i32:
- if (c2) { /* ROR / EXTR Wd, Wm, Wm, m */
+ if (c2) {
tcg_out_rotr(s, ext, a0, a1, a2);
- } else { /* ROR / RORV */
- tcg_out_shiftrot_reg(s, SRR_ROR, ext, a0, a1, a2);
+ } else {
+ tcg_out_shiftrot_reg(s, INSN_RORV, ext, a0, a1, a2);
}
break;

case INDEX_op_rotl_i64:
- case INDEX_op_rotl_i32: /* same as rotate right by (32 - m) */
- if (c2) { /* ROR / EXTR Wd, Wm, Wm, 32 - m */
+ case INDEX_op_rotl_i32:
+ if (c2) {
tcg_out_rotl(s, ext, a0, a1, a2);
} else {
tcg_out_arith(s, INSN_SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2, 0);
- tcg_out_shiftrot_reg(s, SRR_ROR, ext, a0, a1, TCG_REG_TMP);
+ tcg_out_shiftrot_reg(s, INSN_RORV, ext, a0, a1, TCG_REG_TMP);
}
break;
--
1.8.3.1
Claudio Fontana
2013-09-16 07:56:49 UTC
Permalink
Post by Richard Henderson
And since we're no longer talking about opcodes, merge the 0x1ac02000
data2 primary opcode with the shift subcode to create the full insn.
---
tcg/aarch64/tcg-target.c | 49 ++++++++++++++++++++++++------------------------
1 file changed, 24 insertions(+), 25 deletions(-)
diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 99d9884..be6d05a 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -216,14 +216,13 @@ typedef enum {
INSN_ADDS = 0x2b000000,
INSN_SUB = 0x4b000000,
INSN_SUBS = 0x6b000000,
-} AArch64Insn;
-enum aarch64_srr_opc {
- SRR_SHL = 0x0,
- SRR_SHR = 0x4,
- SRR_SAR = 0x8,
- SRR_ROR = 0xc
-};
+ /* Data-processing (2 source) instructions */
+ INSN_LSLV = 0x1ac02000,
+ INSN_LSRV = 0x1ac02400,
+ INSN_ASRV = 0x1ac02800,
+ INSN_RORV = 0x1ac02c00,
+} AArch64Insn;
static inline enum aarch64_ldst_op_data
aarch64_ldst_get_data(TCGOpcode tcg_op)
@@ -472,12 +471,12 @@ static inline void tcg_out_mul(TCGContext *s, TCGType ext,
}
static inline void tcg_out_shiftrot_reg(TCGContext *s,
- enum aarch64_srr_opc opc, TCGType ext,
+ AArch64Insn insn, TCGType ext,
TCGReg rd, TCGReg rn, TCGReg rm)
{
/* using 2-source data processing instructions 0x1ac02000 */
- unsigned int base = ext ? 0x9ac02000 : 0x1ac02000;
- tcg_out32(s, base | rm << 16 | opc << 8 | rn << 5 | rd);
+ unsigned int base = insn | (ext ? 0x80000000 : 0);
+ tcg_out32(s, base | rm << 16 | rn << 5 | rd);
}
static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
@@ -1214,47 +1213,47 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
- if (c2) { /* LSL / UBFM Wd, Wn, (32 - m) */
+ if (c2) {
tcg_out_shl(s, ext, a0, a1, a2);
- } else { /* LSL / LSLV */
- tcg_out_shiftrot_reg(s, SRR_SHL, ext, a0, a1, a2);
+ } else {
+ tcg_out_shiftrot_reg(s, INSN_LSLV, ext, a0, a1, a2);
}
break;
Any reason you strip all comments out?
They are supposed to hint the reader about how the tcg operation is implemented.
Post by Richard Henderson
- if (c2) { /* LSR / UBFM Wd, Wn, m, 31 */
+ if (c2) {
tcg_out_shr(s, ext, a0, a1, a2);
- } else { /* LSR / LSRV */
- tcg_out_shiftrot_reg(s, SRR_SHR, ext, a0, a1, a2);
+ } else {
+ tcg_out_shiftrot_reg(s, INSN_LSRV, ext, a0, a1, a2);
}
break;
- if (c2) { /* ASR / SBFM Wd, Wn, m, 31 */
+ if (c2) {
tcg_out_sar(s, ext, a0, a1, a2);
- } else { /* ASR / ASRV */
- tcg_out_shiftrot_reg(s, SRR_SAR, ext, a0, a1, a2);
+ } else {
+ tcg_out_shiftrot_reg(s, INSN_ASRV, ext, a0, a1, a2);
}
break;
- if (c2) { /* ROR / EXTR Wd, Wm, Wm, m */
+ if (c2) {
tcg_out_rotr(s, ext, a0, a1, a2);
- } else { /* ROR / RORV */
- tcg_out_shiftrot_reg(s, SRR_ROR, ext, a0, a1, a2);
+ } else {
+ tcg_out_shiftrot_reg(s, INSN_RORV, ext, a0, a1, a2);
}
break;
- case INDEX_op_rotl_i32: /* same as rotate right by (32 - m) */
- if (c2) { /* ROR / EXTR Wd, Wm, Wm, 32 - m */
+ if (c2) {
tcg_out_rotl(s, ext, a0, a1, a2);
} else {
tcg_out_arith(s, INSN_SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2, 0);
- tcg_out_shiftrot_reg(s, SRR_ROR, ext, a0, a1, TCG_REG_TMP);
+ tcg_out_shiftrot_reg(s, INSN_RORV, ext, a0, a1, TCG_REG_TMP);
}
break;
Richard Henderson
2013-09-16 15:06:13 UTC
Permalink
Post by Claudio Fontana
Post by Richard Henderson
- if (c2) { /* LSL / UBFM Wd, Wn, (32 - m) */
+ if (c2) {
tcg_out_shl(s, ext, a0, a1, a2);
- } else { /* LSL / LSLV */
- tcg_out_shiftrot_reg(s, SRR_SHL, ext, a0, a1, a2);
+ } else {
+ tcg_out_shiftrot_reg(s, INSN_LSLV, ext, a0, a1, a2);
}
break;
Any reason you strip all comments out?
They are supposed to hint the reader about how the tcg operation is implemented.
Well, LSLV is now there in the code in the form of INSN_LSLV. The comment does
nothing additional to hint at what's going on; indeed, "LSL / LSLV" is more
confusing than not.

As for tcg_out_shl, comments about its implementation are with that function.


r~
Claudio Fontana
2013-09-17 08:51:16 UTC
Permalink
Post by Richard Henderson
Post by Claudio Fontana
Post by Richard Henderson
- if (c2) { /* LSL / UBFM Wd, Wn, (32 - m) */
+ if (c2) {
tcg_out_shl(s, ext, a0, a1, a2);
- } else { /* LSL / LSLV */
- tcg_out_shiftrot_reg(s, SRR_SHL, ext, a0, a1, a2);
+ } else {
+ tcg_out_shiftrot_reg(s, INSN_LSLV, ext, a0, a1, a2);
}
break;
Any reason you strip all comments out?
They are supposed to hint the reader about how the tcg operation is implemented.
Well, LSLV is now there in the code in the form of INSN_LSLV. The comment does
nothing additional to hint at what's going on; indeed, "LSL / LSLV" is more
confusing than not.
As for tcg_out_shl, comments about its implementation are with that function.
Fair enough.

C.
Richard Henderson
2013-09-14 21:54:28 UTC
Permalink
Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 103 ++++++++++++++++++++++++++++++++++++-----------
1 file changed, 80 insertions(+), 23 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 93badfd..59499fd 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -111,6 +111,9 @@ static inline void patch_reloc(uint8_t *code_ptr, int type,
}
}

+#define TCG_CT_CONST_IS32 0x100
+#define TCG_CT_CONST_AIMM 0x200
+
/* parse target specific constraints */
static int target_parse_constraint(TCGArgConstraint *ct,
const char **pct_str)
@@ -134,6 +137,12 @@ static int target_parse_constraint(TCGArgConstraint *ct,
tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
#endif
break;
+ case 'w': /* The operand should be considered 32-bit. */
+ ct->ct |= TCG_CT_CONST_IS32;
+ break;
+ case 'A': /* Valid for arithmetic immediate (positive or negative). */
+ ct->ct |= TCG_CT_CONST_AIMM;
+ break;
default:
return -1;
}
@@ -143,14 +152,25 @@ static int target_parse_constraint(TCGArgConstraint *ct,
return 0;
}

-static inline int tcg_target_const_match(tcg_target_long val,
- const TCGArgConstraint *arg_ct)
+static inline bool is_aimm(uint64_t val)
+{
+ return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
+}
+
+static int tcg_target_const_match(tcg_target_long val,
+ const TCGArgConstraint *arg_ct)
{
int ct = arg_ct->ct;

if (ct & TCG_CT_CONST) {
return 1;
}
+ if (ct & TCG_CT_CONST_IS32) {
+ val = (int32_t)val;
+ }
+ if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
+ return 1;
+ }

return 0;
}
@@ -558,11 +578,21 @@ static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
}

-static inline void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg rn,
- TCGReg rm)
+static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
+ tcg_target_long b, bool const_b)
{
- /* Using CMP alias SUBS wzr, Wn, Wm */
- tcg_fmt_Rdnm(s, INSN_SUBS, ext, TCG_REG_XZR, rn, rm);
+ if (const_b) {
+ /* Using CMP or CMN aliases. */
+ AArch64Insn insn = INSN_SUBSI;
+ if (b < 0) {
+ insn = INSN_ADDSI;
+ b = -b;
+ }
+ tcg_fmt_Rdn_aimm(s, insn, ext, TCG_REG_XZR, a, b);
+ } else {
+ /* Using CMP alias SUBS wzr, Wn, Wm */
+ tcg_fmt_Rdnm(s, INSN_SUBS, ext, TCG_REG_XZR, a, b);
+ }
}

static inline void tcg_out_cset(TCGContext *s, TCGType ext,
@@ -760,6 +790,17 @@ static inline void tcg_out_uxt(TCGContext *s, int s_bits,
tcg_out_ubfm(s, 0, rd, rn, 0, bits);
}

+static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
+ TCGReg rn, int aimm)
+{
+ AArch64Insn insn = INSN_ADDI;
+ if (aimm < 0) {
+ insn = INSN_SUBI;
+ aimm = -aimm;
+ }
+ tcg_fmt_Rdn_aimm(s, insn, ext, rd, rn, aimm);
+}
+
static inline void tcg_out_nop(TCGContext *s)
{
tcg_out32(s, 0xd503201f);
@@ -896,7 +937,7 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
(is_read ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write)));
/* Perform the address comparison. */
- tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3);
+ tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
*label_ptr = s->code_ptr;
/* If not equal, we jump to the slow path. */
tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
@@ -1157,14 +1198,26 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
a0, a1, a2);
break;

- case INDEX_op_add_i64:
case INDEX_op_add_i32:
- tcg_fmt_Rdnm(s, INSN_ADD, ext, a0, a1, a2);
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_add_i64:
+ if (c2) {
+ tcg_out_addsubi(s, ext, a0, a1, a2);
+ } else {
+ tcg_fmt_Rdnm(s, INSN_ADD, ext, a0, a1, a2);
+ }
break;

- case INDEX_op_sub_i64:
case INDEX_op_sub_i32:
- tcg_fmt_Rdnm(s, INSN_SUB, ext, a0, a1, a2);
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_sub_i64:
+ if (c2) {
+ tcg_out_addsubi(s, ext, a0, a1, -a2);
+ } else {
+ tcg_fmt_Rdnm(s, INSN_SUB, ext, a0, a1, a2);
+ }
break;

case INDEX_op_and_i64:
@@ -1233,15 +1286,19 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
break;

- case INDEX_op_brcond_i64:
case INDEX_op_brcond_i32:
- tcg_out_cmp(s, ext, a0, a1);
+ a1 = (int32_t)a1;
+ /* FALLTHRU */
+ case INDEX_op_brcond_i64:
+ tcg_out_cmp(s, ext, a0, a1, const_args[1]);
tcg_out_goto_label_cond(s, a2, args[3]);
break;

- case INDEX_op_setcond_i64:
case INDEX_op_setcond_i32:
- tcg_out_cmp(s, ext, a1, a2);
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_setcond_i64:
+ tcg_out_cmp(s, ext, a1, a2, c2);
tcg_out_cset(s, 0, a0, args[3]);
break;

@@ -1362,10 +1419,10 @@ static const TCGTargetOpDef aarch64_op_defs[] = {
{ INDEX_op_st32_i64, { "r", "r" } },
{ INDEX_op_st_i64, { "r", "r" } },

- { INDEX_op_add_i32, { "r", "r", "r" } },
- { INDEX_op_add_i64, { "r", "r", "r" } },
- { INDEX_op_sub_i32, { "r", "r", "r" } },
- { INDEX_op_sub_i64, { "r", "r", "r" } },
+ { INDEX_op_add_i32, { "r", "r", "rwA" } },
+ { INDEX_op_add_i64, { "r", "r", "rA" } },
+ { INDEX_op_sub_i32, { "r", "r", "rwA" } },
+ { INDEX_op_sub_i64, { "r", "r", "rA" } },
{ INDEX_op_mul_i32, { "r", "r", "r" } },
{ INDEX_op_mul_i64, { "r", "r", "r" } },
{ INDEX_op_and_i32, { "r", "r", "r" } },
@@ -1386,10 +1443,10 @@ static const TCGTargetOpDef aarch64_op_defs[] = {
{ INDEX_op_rotl_i64, { "r", "r", "ri" } },
{ INDEX_op_rotr_i64, { "r", "r", "ri" } },

- { INDEX_op_brcond_i32, { "r", "r" } },
- { INDEX_op_setcond_i32, { "r", "r", "r" } },
- { INDEX_op_brcond_i64, { "r", "r" } },
- { INDEX_op_setcond_i64, { "r", "r", "r" } },
+ { INDEX_op_brcond_i32, { "r", "rwA" } },
+ { INDEX_op_brcond_i64, { "r", "rA" } },
+ { INDEX_op_setcond_i32, { "r", "r", "rwA" } },
+ { INDEX_op_setcond_i64, { "r", "r", "rA" } },

{ INDEX_op_qemu_ld8u, { "r", "l" } },
{ INDEX_op_qemu_ld8s, { "r", "l" } },
--
1.8.3.1
Claudio Fontana
2013-09-16 09:02:39 UTC
Permalink
Post by Richard Henderson
---
tcg/aarch64/tcg-target.c | 103 ++++++++++++++++++++++++++++++++++++-----------
1 file changed, 80 insertions(+), 23 deletions(-)
diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 93badfd..59499fd 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -111,6 +111,9 @@ static inline void patch_reloc(uint8_t *code_ptr, int type,
}
}
+#define TCG_CT_CONST_IS32 0x100
+#define TCG_CT_CONST_AIMM 0x200
+
/* parse target specific constraints */
static int target_parse_constraint(TCGArgConstraint *ct,
const char **pct_str)
@@ -134,6 +137,12 @@ static int target_parse_constraint(TCGArgConstraint *ct,
tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
#endif
break;
+ case 'w': /* The operand should be considered 32-bit. */
+ ct->ct |= TCG_CT_CONST_IS32;
+ break;
+ case 'A': /* Valid for arithmetic immediate (positive or negative). */
+ ct->ct |= TCG_CT_CONST_AIMM;
+ break;
return -1;
}
@@ -143,14 +152,25 @@ static int target_parse_constraint(TCGArgConstraint *ct,
return 0;
}
-static inline int tcg_target_const_match(tcg_target_long val,
- const TCGArgConstraint *arg_ct)
+static inline bool is_aimm(uint64_t val)
+{
+ return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
+}
+
+static int tcg_target_const_match(tcg_target_long val,
+ const TCGArgConstraint *arg_ct)
{
int ct = arg_ct->ct;
if (ct & TCG_CT_CONST) {
return 1;
}
+ if (ct & TCG_CT_CONST_IS32) {
+ val = (int32_t)val;
+ }
+ if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
+ return 1;
+ }
return 0;
}
@@ -558,11 +578,21 @@ static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
}
-static inline void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg rn,
- TCGReg rm)
+static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
+ tcg_target_long b, bool const_b)
{
- /* Using CMP alias SUBS wzr, Wn, Wm */
- tcg_fmt_Rdnm(s, INSN_SUBS, ext, TCG_REG_XZR, rn, rm);
+ if (const_b) {
+ /* Using CMP or CMN aliases. */
+ AArch64Insn insn = INSN_SUBSI;
+ if (b < 0) {
+ insn = INSN_ADDSI;
+ b = -b;
+ }
+ tcg_fmt_Rdn_aimm(s, insn, ext, TCG_REG_XZR, a, b);
+ } else {
+ /* Using CMP alias SUBS wzr, Wn, Wm */
+ tcg_fmt_Rdnm(s, INSN_SUBS, ext, TCG_REG_XZR, a, b);
+ }
}
What about splitting into tcg_out_cmp_imm and tcg_out_cmp_reg?
or tcg_out_cmp_i and tcg_out_cmp_r.
The function is an 'if else' anyway with no code sharing, and we would avoid sidestepping the TCGReg type check for b in the _r case, as well as the const_b additional param.
Post by Richard Henderson
static inline void tcg_out_cset(TCGContext *s, TCGType ext,
@@ -760,6 +790,17 @@ static inline void tcg_out_uxt(TCGContext *s, int s_bits,
tcg_out_ubfm(s, 0, rd, rn, 0, bits);
}
+static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
+ TCGReg rn, int aimm)
+{
+ AArch64Insn insn = INSN_ADDI;
+ if (aimm < 0) {
+ insn = INSN_SUBI;
+ aimm = -aimm;
+ }
+ tcg_fmt_Rdn_aimm(s, insn, ext, rd, rn, aimm);
+}
+
Could this be a tcg_out_arith_imm, in the similar way we would do tcg_out_arith? (tcg_out_arith_reg?)
Post by Richard Henderson
static inline void tcg_out_nop(TCGContext *s)
{
tcg_out32(s, 0xd503201f);
@@ -896,7 +937,7 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
(is_read ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write)));
/* Perform the address comparison. */
- tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3);
+ tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
*label_ptr = s->code_ptr;
/* If not equal, we jump to the slow path. */
tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
@@ -1157,14 +1198,26 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
a0, a1, a2);
break;
- tcg_fmt_Rdnm(s, INSN_ADD, ext, a0, a1, a2);
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
/* fall through */ is less ugly.
Post by Richard Henderson
+ if (c2) {
+ tcg_out_addsubi(s, ext, a0, a1, a2);
+ } else {
+ tcg_fmt_Rdnm(s, INSN_ADD, ext, a0, a1, a2);
+ }
break;
- tcg_fmt_Rdnm(s, INSN_SUB, ext, a0, a1, a2);
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ if (c2) {
+ tcg_out_addsubi(s, ext, a0, a1, -a2);
+ } else {
+ tcg_fmt_Rdnm(s, INSN_SUB, ext, a0, a1, a2);
+ }
break;
@@ -1233,15 +1286,19 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
break;
- tcg_out_cmp(s, ext, a0, a1);
+ a1 = (int32_t)a1;
+ /* FALLTHRU */
+ tcg_out_cmp(s, ext, a0, a1, const_args[1]);
tcg_out_goto_label_cond(s, a2, args[3]);
break;
- tcg_out_cmp(s, ext, a1, a2);
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ tcg_out_cmp(s, ext, a1, a2, c2);
tcg_out_cset(s, 0, a0, args[3]);
break;
@@ -1362,10 +1419,10 @@ static const TCGTargetOpDef aarch64_op_defs[] = {
{ INDEX_op_st32_i64, { "r", "r" } },
{ INDEX_op_st_i64, { "r", "r" } },
- { INDEX_op_add_i32, { "r", "r", "r" } },
- { INDEX_op_add_i64, { "r", "r", "r" } },
- { INDEX_op_sub_i32, { "r", "r", "r" } },
- { INDEX_op_sub_i64, { "r", "r", "r" } },
+ { INDEX_op_add_i32, { "r", "r", "rwA" } },
+ { INDEX_op_add_i64, { "r", "r", "rA" } },
+ { INDEX_op_sub_i32, { "r", "r", "rwA" } },
+ { INDEX_op_sub_i64, { "r", "r", "rA" } },
{ INDEX_op_mul_i32, { "r", "r", "r" } },
{ INDEX_op_mul_i64, { "r", "r", "r" } },
{ INDEX_op_and_i32, { "r", "r", "r" } },
@@ -1386,10 +1443,10 @@ static const TCGTargetOpDef aarch64_op_defs[] = {
{ INDEX_op_rotl_i64, { "r", "r", "ri" } },
{ INDEX_op_rotr_i64, { "r", "r", "ri" } },
- { INDEX_op_brcond_i32, { "r", "r" } },
- { INDEX_op_setcond_i32, { "r", "r", "r" } },
- { INDEX_op_brcond_i64, { "r", "r" } },
- { INDEX_op_setcond_i64, { "r", "r", "r" } },
+ { INDEX_op_brcond_i32, { "r", "rwA" } },
+ { INDEX_op_brcond_i64, { "r", "rA" } },
+ { INDEX_op_setcond_i32, { "r", "r", "rwA" } },
+ { INDEX_op_setcond_i64, { "r", "r", "rA" } },
{ INDEX_op_qemu_ld8u, { "r", "l" } },
{ INDEX_op_qemu_ld8s, { "r", "l" } },
Richard Henderson
2013-09-16 15:45:37 UTC
Permalink
Post by Richard Henderson
-static inline void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg rn,
- TCGReg rm)
+static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
+ tcg_target_long b, bool const_b)
{
- /* Using CMP alias SUBS wzr, Wn, Wm */
- tcg_fmt_Rdnm(s, INSN_SUBS, ext, TCG_REG_XZR, rn, rm);
+ if (const_b) {
+ /* Using CMP or CMN aliases. */
+ AArch64Insn insn = INSN_SUBSI;
+ if (b < 0) {
+ insn = INSN_ADDSI;
+ b = -b;
+ }
+ tcg_fmt_Rdn_aimm(s, insn, ext, TCG_REG_XZR, a, b);
+ } else {
+ /* Using CMP alias SUBS wzr, Wn, Wm */
+ tcg_fmt_Rdnm(s, INSN_SUBS, ext, TCG_REG_XZR, a, b);
+ }
}
What about splitting into tcg_out_cmp_imm and tcg_out_cmp_reg? or
tcg_out_cmp_i and tcg_out_cmp_r. The function is an 'if else' anyway with no
code sharing, and we would avoid sidestepping the TCGReg type check for b in
the _r case, as well as the const_b additional param.
This function is called once from tcg_out_tlb_read and three times from
tcg_out_opc. I just thought since the majority of uses would have to perform
this if then we might as well have it in the subroutine than force all of the
callers to replicate it.
Post by Richard Henderson
+static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
+ TCGReg rn, int aimm)
+{
+ AArch64Insn insn = INSN_ADDI;
+ if (aimm < 0) {
+ insn = INSN_SUBI;
+ aimm = -aimm;
+ }
+ tcg_fmt_Rdn_aimm(s, insn, ext, rd, rn, aimm);
+}
+
Could this be a tcg_out_arith_imm, in the similar way we would do
tcg_out_arith? (tcg_out_arith_reg?)
Which one gets renamed? You already proposed tcg_fmt_Rdn_aimm be named
tcg_out_arith_imm. Now you want tcg_out_addsubi renamed to the same name?

I suppose we could merge the two if we add the S bit as a parameter. Then
we don't have to distinguish between ADDI and ADDIS, and we could share code
with comparisons above...


r~
Claudio Fontana
2013-09-17 08:49:41 UTC
Permalink
Post by Richard Henderson
Post by Richard Henderson
-static inline void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg rn,
- TCGReg rm)
+static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
+ tcg_target_long b, bool const_b)
{
- /* Using CMP alias SUBS wzr, Wn, Wm */
- tcg_fmt_Rdnm(s, INSN_SUBS, ext, TCG_REG_XZR, rn, rm);
+ if (const_b) {
+ /* Using CMP or CMN aliases. */
+ AArch64Insn insn = INSN_SUBSI;
+ if (b < 0) {
+ insn = INSN_ADDSI;
+ b = -b;
+ }
+ tcg_fmt_Rdn_aimm(s, insn, ext, TCG_REG_XZR, a, b);
+ } else {
+ /* Using CMP alias SUBS wzr, Wn, Wm */
+ tcg_fmt_Rdnm(s, INSN_SUBS, ext, TCG_REG_XZR, a, b);
+ }
}
What about splitting into tcg_out_cmp_imm and tcg_out_cmp_reg? or
tcg_out_cmp_i and tcg_out_cmp_r. The function is an 'if else' anyway with no
code sharing, and we would avoid sidestepping the TCGReg type check for b in
the _r case, as well as the const_b additional param.
This function is called once from tcg_out_tlb_read and three times from
tcg_out_opc. I just thought since the majority of uses would have to perform
this if then we might as well have it in the subroutine than force all of the
callers to replicate it.
Ok, that's a good point.
What about we keep the tcg_out_cmp and we have it do

if (const_b) {
tcg_out_cmp_i();
} else {
tcg_out_cmp_r();
}

so that code that wants to call cmp_r or cmp_i directly can do that?
I realize that at them moment it would benefit only tcg_out_tlb_read's use.
Post by Richard Henderson
Post by Richard Henderson
+static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
+ TCGReg rn, int aimm)
+{
+ AArch64Insn insn = INSN_ADDI;
+ if (aimm < 0) {
+ insn = INSN_SUBI;
+ aimm = -aimm;
+ }
+ tcg_fmt_Rdn_aimm(s, insn, ext, rd, rn, aimm);
+}
+
Could this be a tcg_out_arith_imm, in the similar way we would do
tcg_out_arith? (tcg_out_arith_reg?)
Which one gets renamed? You already proposed tcg_fmt_Rdn_aimm be named
tcg_out_arith_imm. Now you want tcg_out_addsubi renamed to the same name?
This one confused me, possibly because I don't see the reason for addsubi.
Post by Richard Henderson
I suppose we could merge the two if we add the S bit as a parameter. Then
we don't have to distinguish between ADDI and ADDIS, and we could share code
with comparisons above...
I am ok with keeping the two separate, distinguishing in principle a subtraction from a comparison.

Claudio
Richard Henderson
2013-09-14 21:54:38 UTC
Permalink
Cleaning up the implementation of tcg_out_movi at the same time.

Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 49 +++++++++++++++++++++++++++---------------------
1 file changed, 28 insertions(+), 21 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index c44f404..f9319ed 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -279,6 +279,11 @@ typedef enum {
INSN_EOR = 0x4a000000,
INSN_EON = 0x4a200000,

+ /* Move wide immediate instructions */
+ INSN_MOVN = 0x12800000,
+ INSN_MOVZ = 0x52800000,
+ INSN_MOVK = 0x72800000,
+
/* Add/subtract immediate instructions */
INSN_ADDI = 0x11000000,
INSN_ADDSI = 0x31000000,
@@ -495,6 +500,16 @@ static inline void tcg_fmt_Rdnm_cond(TCGContext *s, AArch64Insn insn,
| tcg_cond_to_aarch64[c] << 12);
}

+/* This function is used for the Move (wide immediate) instruction group.
+ Note that SHIFT is a full shift count, not the 2 bit HW field. */
+static inline void tcg_fmt_Rd_uimm(TCGContext *s, AArch64Insn insn,
+ TCGType sf, TCGReg rd, uint16_t half,
+ unsigned shift)
+{
+ assert((shift & ~0x30) == 0);
+ tcg_out32(s, insn | sf << 31 | shift << (21 - 4) | half << 5 | rd);
+}
+
static inline void tcg_out_ldst_9(TCGContext *s,
enum aarch64_ldst_op_data op_data,
enum aarch64_ldst_op_type op_type,
@@ -540,38 +555,30 @@ static inline void tcg_out_movr_sp(TCGContext *s, TCGType ext,
tcg_fmt_Rdn_aimm(s, INSN_ADDI, ext, rd, rn, 0);
}

-static inline void tcg_out_movi_aux(TCGContext *s,
- TCGReg rd, uint64_t value)
+static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
+ tcg_target_long value)
{
- uint32_t half, base, shift, movk = 0;
- /* construct halfwords of the immediate with MOVZ/MOVK with LSL */
- /* using MOVZ 0x52800000 | extended reg.. */
- base = (value > 0xffffffff) ? 0xd2800000 : 0x52800000;
+ AArch64Insn insn;
+
+ if (type == TCG_TYPE_I32) {
+ value = (uint32_t)value;
+ }
+
/* count trailing zeros in 16 bit steps, mapping 64 to 0. Emit the
first MOVZ with the half-word immediate skipping the zeros, with a shift
- (LSL) equal to this number. Then morph all next instructions into MOVKs.
+ (LSL) equal to this number. Then all next instructions use MOVKs.
Zero the processed half-word in the value, continue until empty.
We build the final result 16bits at a time with up to 4 instructions,
but do not emit instructions for 16bit zero holes. */
+ insn = INSN_MOVZ;
do {
- shift = ctz64(value) & (63 & -16);
- half = (value >> shift) & 0xffff;
- tcg_out32(s, base | movk | shift << 17 | half << 5 | rd);
- movk = 0x20000000; /* morph next MOVZs into MOVKs */
+ unsigned shift = ctz64(value) & (63 & -16);
+ tcg_fmt_Rd_uimm(s, insn, shift >= 32, rd, value >> shift, shift);
value &= ~(0xffffUL << shift);
+ insn = INSN_MOVK;
} while (value);
}

-static inline void tcg_out_movi(TCGContext *s, TCGType type,
- TCGReg rd, tcg_target_long value)
-{
- if (type == TCG_TYPE_I64) {
- tcg_out_movi_aux(s, rd, value);
- } else {
- tcg_out_movi_aux(s, rd, value & 0xffffffff);
- }
-}
-
static inline void tcg_out_ldst_r(TCGContext *s,
enum aarch64_ldst_op_data op_data,
enum aarch64_ldst_op_type op_type,
--
1.8.3.1
Richard Henderson
2013-09-14 21:54:37 UTC
Permalink
For remainder, generic code will produce mul+sub,
whereas we can implement with msub.

Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 51 +++++++++++++++++++++++++++++++++++++++---------
tcg/aarch64/tcg-target.h | 8 ++++----
2 files changed, 46 insertions(+), 13 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index b7f7fa5..c44f404 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -302,6 +302,12 @@ typedef enum {
INSN_RORV = 0x1ac02c00,
INSN_SMULH = 0x9b407c00,
INSN_UMULH = 0x9bc07c00,
+ INSN_UDIV = 0x1ac00800,
+ INSN_SDIV = 0x1ac00c00,
+
+ /* Data-processing (3 source) instructions */
+ INSN_MADD = 0x1b000000,
+ INSN_MSUB = 0x1b008000,

/* Bitfield instructions */
INSN_BFM = 0x33000000,
@@ -416,6 +422,13 @@ static inline void tcg_fmt_Rdnm(TCGContext *s, AArch64Insn insn, TCGType sf,
tcg_out32(s, insn | sf << 31 | rm << 16 | rn << 5 | rd);
}

+/* This function is used for the Multiply instruction group. */
+static inline void tcg_fmt_Rdnma(TCGContext *s, AArch64Insn insn, TCGType sf,
+ TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
+{
+ tcg_out32(s, insn | sf << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
+}
+
/* This function is used for the Arithmetic (immediate) instruction group.
The value of AIMM must be appropriate for encoding in the shift+imm12
fields. */
@@ -621,14 +634,6 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
arg, arg1, arg2);
}

-static inline void tcg_out_mul(TCGContext *s, TCGType ext,
- TCGReg rd, TCGReg rn, TCGReg rm)
-{
- /* Using MADD 0x1b000000 with Ra = wzr alias MUL 0x1b007c00 */
- unsigned int base = ext ? 0x9b007c00 : 0x1b007c00;
- tcg_out32(s, base | rm << 16 | rn << 5 | rd);
-}
-
static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
TCGReg rn, unsigned int a, unsigned int b)
{
@@ -1425,7 +1430,27 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,

case INDEX_op_mul_i64:
case INDEX_op_mul_i32:
- tcg_out_mul(s, ext, a0, a1, a2);
+ tcg_fmt_Rdnma(s, INSN_MADD, ext, a0, a1, a2, TCG_REG_XZR);
+ break;
+
+ case INDEX_op_div_i64:
+ case INDEX_op_div_i32:
+ tcg_fmt_Rdnm(s, INSN_SDIV, ext, a0, a1, a2);
+ break;
+ case INDEX_op_divu_i64:
+ case INDEX_op_divu_i32:
+ tcg_fmt_Rdnm(s, INSN_UDIV, ext, a0, a1, a2);
+ break;
+
+ case INDEX_op_rem_i64:
+ case INDEX_op_rem_i32:
+ tcg_fmt_Rdnm(s, INSN_SDIV, ext, TCG_REG_TMP, a1, a2);
+ tcg_fmt_Rdnma(s, INSN_MSUB, ext, a0, TCG_REG_TMP, a2, a1);
+ break;
+ case INDEX_op_remu_i64:
+ case INDEX_op_remu_i32:
+ tcg_fmt_Rdnm(s, INSN_UDIV, ext, TCG_REG_TMP, a1, a2);
+ tcg_fmt_Rdnma(s, INSN_MSUB, ext, a0, TCG_REG_TMP, a2, a1);
break;

case INDEX_op_shl_i64:
@@ -1658,6 +1683,14 @@ static const TCGTargetOpDef aarch64_op_defs[] = {
{ INDEX_op_sub_i64, { "r", "rZ", "rA" } },
{ INDEX_op_mul_i32, { "r", "r", "r" } },
{ INDEX_op_mul_i64, { "r", "r", "r" } },
+ { INDEX_op_div_i32, { "r", "r", "r" } },
+ { INDEX_op_div_i64, { "r", "r", "r" } },
+ { INDEX_op_divu_i32, { "r", "r", "r" } },
+ { INDEX_op_divu_i64, { "r", "r", "r" } },
+ { INDEX_op_rem_i32, { "r", "r", "r" } },
+ { INDEX_op_rem_i64, { "r", "r", "r" } },
+ { INDEX_op_remu_i32, { "r", "r", "r" } },
+ { INDEX_op_remu_i64, { "r", "r", "r" } },
{ INDEX_op_and_i32, { "r", "r", "rwL" } },
{ INDEX_op_and_i64, { "r", "r", "rL" } },
{ INDEX_op_or_i32, { "r", "r", "rwL" } },
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 52c6c23..8b55ff9 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -39,8 +39,8 @@ typedef enum {
#define TCG_TARGET_CALL_STACK_OFFSET 0

/* optional instructions */
-#define TCG_TARGET_HAS_div_i32 0
-#define TCG_TARGET_HAS_rem_i32 0
+#define TCG_TARGET_HAS_div_i32 1
+#define TCG_TARGET_HAS_rem_i32 1
#define TCG_TARGET_HAS_ext8s_i32 1
#define TCG_TARGET_HAS_ext16s_i32 1
#define TCG_TARGET_HAS_ext8u_i32 1
@@ -64,8 +64,8 @@ typedef enum {
#define TCG_TARGET_HAS_muluh_i32 0
#define TCG_TARGET_HAS_mulsh_i32 0

-#define TCG_TARGET_HAS_div_i64 0
-#define TCG_TARGET_HAS_rem_i64 0
+#define TCG_TARGET_HAS_div_i64 1
+#define TCG_TARGET_HAS_rem_i64 1
#define TCG_TARGET_HAS_ext8s_i64 1
#define TCG_TARGET_HAS_ext16s_i64 1
#define TCG_TARGET_HAS_ext32s_i64 1
--
1.8.3.1
Richard Henderson
2013-09-14 21:54:41 UTC
Permalink
Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 11 +++++++++++
1 file changed, 11 insertions(+)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 9effee7..e50abcb 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -573,6 +573,17 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
type = TCG_TYPE_I32;
}

+ /* Speed things up by handling the common case of small positive
+ and negative values specially. */
+ if ((value & ~0xffffull) == 0) {
+ tcg_fmt_Rd_uimm(s, INSN_MOVZ, type, rd, value, 0);
+ return;
+ }
+ if ((~svalue & ~0xffffull) == 0) {
+ tcg_fmt_Rd_uimm(s, INSN_MOVN, type, rd, ~svalue, 0);
+ return;
+ }
+
/* Check for bitfield immediates. For the benefit of 32-bit quantities,
use the sign-extended value. That lets us match rotated values such
as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff.
--
1.8.3.1
Richard Henderson
2013-09-14 21:54:39 UTC
Permalink
When profitable, initialize the register with MOVN instead of MOVZ,
before setting the remaining lanes with MOVK.

Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 88 +++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 75 insertions(+), 13 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index f9319ed..cecda05 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -559,24 +559,86 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
tcg_target_long value)
{
AArch64Insn insn;
-
- if (type == TCG_TYPE_I32) {
+ int i, wantinv, shift;
+ tcg_target_long svalue = value;
+ tcg_target_long ivalue, imask;
+
+ /* For 32-bit values, discard potential garbage in value. For 64-bit
+ values within [2**31, 2**32-1], we can create smaller sequences by
+ interpreting this as a negative 32-bit number, while ensuring that
+ the high 32 bits are cleared by setting SF=0. */
+ if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
+ svalue = (int32_t)value;
value = (uint32_t)value;
+ type = TCG_TYPE_I32;
+ }
+
+ /* Would it take fewer insns to begin with MOVN? For the value and its
+ inverse, count the number of 16-bit lanes that are 0. For the benefit
+ of 32-bit quantities, compare the zero-extended normal value vs the
+ sign-extended inverted value. For example,
+ v = 0x00000000f100ffff, zeros = 2
+ ~v = 0xffffffff0eff0000, zeros = 1
+ ~sv = 0x000000000eff0000, zeros = 3
+ By using ~sv we see that 3 > 2, leading us to emit just a single insn
+ "movn ret, 0x0eff, lsl #16". */
+
+ ivalue = ~svalue;
+ imask = 0;
+ wantinv = 0;
+
+ /* ??? This can be done in the simd unit without a loop:
+ // Move value and ivalue into V0 and V1 respectively.
+ mov v0.d[0], value
+ mov v1.d[0], ivalue
+ // Compare each 16-bit lane vs 0, producing -1 for true.
+ cmeq v0.4h, v0.4h, #0
+ cmeq v1.4h, v1.4h, #0
+ mov imask, v1.d[0]
+ // Sum the comparisons, producing 0 to -4.
+ addv h0, v0.4h
+ addv h1, v1.4h
+ // Subtract the two, forming a positive wantinv result.
+ sub v0.4h, v0.4h, v1.4h
+ smov wantinv, v0.h[0]
+ */
+ for (i = 0; i < 64; i += 16) {
+ tcg_target_long mask = 0xffffull << i;
+ if ((value & mask) == 0) {
+ wantinv -= 1;
+ }
+ if ((ivalue & mask) == 0) {
+ wantinv += 1;
+ imask |= mask;
+ }
}

- /* count trailing zeros in 16 bit steps, mapping 64 to 0. Emit the
- first MOVZ with the half-word immediate skipping the zeros, with a shift
- (LSL) equal to this number. Then all next instructions use MOVKs.
- Zero the processed half-word in the value, continue until empty.
- We build the final result 16bits at a time with up to 4 instructions,
- but do not emit instructions for 16bit zero holes. */
+ /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN. */
insn = INSN_MOVZ;
- do {
- unsigned shift = ctz64(value) & (63 & -16);
- tcg_fmt_Rd_uimm(s, insn, shift >= 32, rd, value >> shift, shift);
+ if (wantinv > 0) {
+ value = ivalue;
+ insn = INSN_MOVN;
+ }
+
+ /* Find the lowest lane that is not 0x0000. */
+ shift = ctz64(value) & (63 & -16);
+ tcg_fmt_Rd_uimm(s, insn, type, rd, value >> shift, shift);
+
+ if (wantinv > 0) {
+ /* Re-invert the value, so MOVK sees non-inverted bits. */
+ value = ~value;
+ /* Clear out all the 0xffff lanes. */
+ value ^= imask;
+ }
+ /* Clear out the lane that we just set. */
+ value &= ~(0xffffUL << shift);
+
+ /* Iterate until all lanes have been set, and thus cleared from VALUE. */
+ while (value) {
+ shift = ctz64(value) & (63 & -16);
+ tcg_fmt_Rd_uimm(s, INSN_MOVK, type, rd, value >> shift, shift);
value &= ~(0xffffUL << shift);
- insn = INSN_MOVK;
- } while (value);
+ }
}

static inline void tcg_out_ldst_r(TCGContext *s,
--
1.8.3.1
Claudio Fontana
2013-09-16 09:16:29 UTC
Permalink
Post by Richard Henderson
When profitable, initialize the register with MOVN instead of MOVZ,
before setting the remaining lanes with MOVK.
---
tcg/aarch64/tcg-target.c | 88 +++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 75 insertions(+), 13 deletions(-)
diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index f9319ed..cecda05 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -559,24 +559,86 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
tcg_target_long value)
{
AArch64Insn insn;
-
- if (type == TCG_TYPE_I32) {
+ int i, wantinv, shift;
+ tcg_target_long svalue = value;
+ tcg_target_long ivalue, imask;
+
+ /* For 32-bit values, discard potential garbage in value. For 64-bit
+ values within [2**31, 2**32-1], we can create smaller sequences by
+ interpreting this as a negative 32-bit number, while ensuring that
+ the high 32 bits are cleared by setting SF=0. */
+ if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
+ svalue = (int32_t)value;
value = (uint32_t)value;
+ type = TCG_TYPE_I32;
+ }
+
+ /* Would it take fewer insns to begin with MOVN? For the value and its
+ inverse, count the number of 16-bit lanes that are 0. For the benefit
+ of 32-bit quantities, compare the zero-extended normal value vs the
+ sign-extended inverted value. For example,
+ v = 0x00000000f100ffff, zeros = 2
+ ~v = 0xffffffff0eff0000, zeros = 1
+ ~sv = 0x000000000eff0000, zeros = 3
+ By using ~sv we see that 3 > 2, leading us to emit just a single insn
+ "movn ret, 0x0eff, lsl #16". */
+
+ ivalue = ~svalue;
+ imask = 0;
+ wantinv = 0;
+
+ // Move value and ivalue into V0 and V1 respectively.
+ mov v0.d[0], value
+ mov v1.d[0], ivalue
+ // Compare each 16-bit lane vs 0, producing -1 for true.
+ cmeq v0.4h, v0.4h, #0
+ cmeq v1.4h, v1.4h, #0
+ mov imask, v1.d[0]
+ // Sum the comparisons, producing 0 to -4.
+ addv h0, v0.4h
+ addv h1, v1.4h
+ // Subtract the two, forming a positive wantinv result.
+ sub v0.4h, v0.4h, v1.4h
+ smov wantinv, v0.h[0]
+ */
+ for (i = 0; i < 64; i += 16) {
+ tcg_target_long mask = 0xffffull << i;
+ if ((value & mask) == 0) {
+ wantinv -= 1;
+ }
+ if ((ivalue & mask) == 0) {
+ wantinv += 1;
+ imask |= mask;
+ }
}
- /* count trailing zeros in 16 bit steps, mapping 64 to 0. Emit the
- first MOVZ with the half-word immediate skipping the zeros, with a shift
- (LSL) equal to this number. Then all next instructions use MOVKs.
- Zero the processed half-word in the value, continue until empty.
- We build the final result 16bits at a time with up to 4 instructions,
- but do not emit instructions for 16bit zero holes. */
+ /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN. */
insn = INSN_MOVZ;
- do {
- unsigned shift = ctz64(value) & (63 & -16);
- tcg_fmt_Rd_uimm(s, insn, shift >= 32, rd, value >> shift, shift);
+ if (wantinv > 0) {
+ value = ivalue;
+ insn = INSN_MOVN;
+ }
+
+ /* Find the lowest lane that is not 0x0000. */
+ shift = ctz64(value) & (63 & -16);
+ tcg_fmt_Rd_uimm(s, insn, type, rd, value >> shift, shift);
+
+ if (wantinv > 0) {
+ /* Re-invert the value, so MOVK sees non-inverted bits. */
+ value = ~value;
+ /* Clear out all the 0xffff lanes. */
+ value ^= imask;
+ }
+ /* Clear out the lane that we just set. */
+ value &= ~(0xffffUL << shift);
+
+ /* Iterate until all lanes have been set, and thus cleared from VALUE. */
+ while (value) {
+ shift = ctz64(value) & (63 & -16);
+ tcg_fmt_Rd_uimm(s, INSN_MOVK, type, rd, value >> shift, shift);
value &= ~(0xffffUL << shift);
- insn = INSN_MOVK;
- } while (value);
+ }
}
static inline void tcg_out_ldst_r(TCGContext *s,
I agree in general with the approach "lets see if it is more convenient to start with MOVN".
The existing implementation is, although not easy, leaner.
Can we make it a little this one a little bit leaner?
I'll think myself about it as well.

C.
Richard Henderson
2013-09-16 15:50:41 UTC
Permalink
Post by Claudio Fontana
I agree in general with the approach "lets see if it is more convenient to start with MOVN".
The existing implementation is, although not easy, leaner.
Can we make it a little this one a little bit leaner?
This sentence is not well formed. What did you mean?

In what sense did you mean "lean"? Smaller or faster?
If faster, see the comment about using neon insns.
If smaller... um... why?



r~
Claudio Fontana
2013-09-17 07:55:18 UTC
Permalink
Post by Richard Henderson
Post by Claudio Fontana
I agree in general with the approach "lets see if it is more convenient to start with MOVN".
The existing implementation is, although not easy, leaner.
Can we make it a little this one a little bit leaner?
This sentence is not well formed. What did you mean?
In what sense did you mean "lean"? Smaller or faster?
If faster, see the comment about using neon insns.
If smaller... um... why?
I am not suggesting implementing the neon insns based thing.
I am suggesting looking at ways to reduce the size and complexity of the code needed to implement the same thing you just posted.
If you don't see the why, there is probably little I can say to change that.
Richard Henderson
2013-09-17 15:56:58 UTC
Permalink
Post by Claudio Fontana
Post by Richard Henderson
Post by Claudio Fontana
I agree in general with the approach "lets see if it is more convenient to start with MOVN".
The existing implementation is, although not easy, leaner.
Can we make it a little this one a little bit leaner?
This sentence is not well formed. What did you mean?
In what sense did you mean "lean"? Smaller or faster?
If faster, see the comment about using neon insns.
If smaller... um... why?
I am not suggesting implementing the neon insns based thing.
I am suggesting looking at ways to reduce the size and complexity of the code needed to implement the same thing you just posted.
If you don't see the why, there is probably little I can say to change that.
I interpreted you meaning akin to -Os, producing a smaller binary.

I obviously wrote it in the least complex way I could think to do the job. If
you can find a simpler way, feel free.


r~
Richard Henderson
2013-09-14 21:54:45 UTC
Permalink
In some cases, a direct branch will be in range.

Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index ce3c17b..3d1108c 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -1074,9 +1074,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, lb->mem_index);
tcg_out_adr(s, TCG_REG_X3, (uintptr_t)lb->raddr);

- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP,
- (tcg_target_long)qemu_ld_helpers[lb->opc & 3]);
- tcg_out_callr(s, TCG_REG_TMP);
+ tcg_out_call(s, (tcg_target_long)qemu_ld_helpers[lb->opc & 3]);

if (lb->opc & 0x04) {
tcg_out_sxt(s, 1, lb->opc & 3, lb->datalo_reg, TCG_REG_X0);
@@ -1097,9 +1095,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, lb->mem_index);
tcg_out_adr(s, TCG_REG_X4, (uintptr_t)lb->raddr);

- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP,
- (tcg_target_long)qemu_st_helpers[lb->opc & 3]);
- tcg_out_callr(s, TCG_REG_TMP);
+ tcg_out_call(s, (tcg_target_long)qemu_st_helpers[lb->opc & 3]);

tcg_out_goto(s, (tcg_target_long)lb->raddr);
}
--
1.8.3.1
Richard Henderson
2013-09-14 21:54:30 UTC
Permalink
Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 65 ++++++++++++++++++++++++++++++++++++++++++------
tcg/aarch64/tcg-target.h | 16 ++++++------
2 files changed, 65 insertions(+), 16 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index bc651ac..5b4eeee 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -258,10 +258,12 @@ typedef enum {
INSN_EORI = 0x52000000,

/* Logical shifted register instructions */
- INSN_AND = 0x0a000000,
- INSN_ORR = 0x2a000000,
- INSN_EOR = 0x4a000000,
- INSN_ANDS = 0x6a000000,
+ INSN_AND = 0x0a000000,
+ INSN_BIC = 0x0a200000,
+ INSN_ORR = 0x2a000000,
+ INSN_ORN = 0x2a200000,
+ INSN_EOR = 0x4a000000,
+ INSN_EON = 0x4a200000,

/* Add/subtract immediate instructions */
INSN_ADDI = 0x11000000,
@@ -270,10 +272,10 @@ typedef enum {
INSN_SUBSI = 0x71000000,

/* Add/subtract shifted register instructions */
- INSN_ADD = 0x0b000000,
- INSN_ADDS = 0x2b000000,
- INSN_SUB = 0x4b000000,
- INSN_SUBS = 0x6b000000,
+ INSN_ADD = 0x0b000000,
+ INSN_ADDS = 0x2b000000,
+ INSN_SUB = 0x4b000000,
+ INSN_SUBS = 0x6b000000,

/* Data-processing (2 source) instructions */
INSN_LSLV = 0x1ac02000,
@@ -1270,6 +1272,17 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
break;

+ case INDEX_op_andc_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_andc_i64:
+ if (c2) {
+ tcg_fmt_Rdn_limm(s, INSN_ANDI, ext, a0, a1, ~a2);
+ } else {
+ tcg_fmt_Rdnm(s, INSN_BIC, ext, a0, a1, a2);
+ }
+ break;
+
case INDEX_op_or_i32:
a2 = (int32_t)a2;
/* FALLTHRU */
@@ -1281,6 +1294,17 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
break;

+ case INDEX_op_orc_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_orc_i64:
+ if (c2) {
+ tcg_fmt_Rdn_limm(s, INSN_ORRI, ext, a0, a1, ~a2);
+ } else {
+ tcg_fmt_Rdnm(s, INSN_ORN, ext, a0, a1, a2);
+ }
+ break;
+
case INDEX_op_xor_i32:
a2 = (int32_t)a2;
/* FALLTHRU */
@@ -1292,6 +1316,22 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
break;

+ case INDEX_op_eqv_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_eqv_i64:
+ if (c2) {
+ tcg_fmt_Rdn_limm(s, INSN_EORI, ext, a0, a1, ~a2);
+ } else {
+ tcg_fmt_Rdnm(s, INSN_EON, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_not_i64:
+ case INDEX_op_not_i32:
+ tcg_fmt_Rdnm(s, INSN_ORN, ext, a0, TCG_REG_XZR, a1);
+ break;
+
case INDEX_op_mul_i64:
case INDEX_op_mul_i32:
tcg_out_mul(s, ext, a0, a1, a2);
@@ -1488,6 +1528,15 @@ static const TCGTargetOpDef aarch64_op_defs[] = {
{ INDEX_op_or_i64, { "r", "r", "rL" } },
{ INDEX_op_xor_i32, { "r", "r", "rwL" } },
{ INDEX_op_xor_i64, { "r", "r", "rL" } },
+ { INDEX_op_andc_i32, { "r", "r", "rwL" } },
+ { INDEX_op_andc_i64, { "r", "r", "rL" } },
+ { INDEX_op_orc_i32, { "r", "r", "rwL" } },
+ { INDEX_op_orc_i64, { "r", "r", "rL" } },
+ { INDEX_op_eqv_i32, { "r", "r", "rwL" } },
+ { INDEX_op_eqv_i64, { "r", "r", "rL" } },
+
+ { INDEX_op_not_i32, { "r", "r" } },
+ { INDEX_op_not_i64, { "r", "r" } },

{ INDEX_op_shl_i32, { "r", "r", "ri" } },
{ INDEX_op_shr_i32, { "r", "r", "ri" } },
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index d3a1bc2..efc506b 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -47,12 +47,12 @@ typedef enum {
#define TCG_TARGET_HAS_ext16u_i32 1
#define TCG_TARGET_HAS_bswap16_i32 1
#define TCG_TARGET_HAS_bswap32_i32 1
-#define TCG_TARGET_HAS_not_i32 0
+#define TCG_TARGET_HAS_not_i32 1
#define TCG_TARGET_HAS_neg_i32 0
#define TCG_TARGET_HAS_rot_i32 1
-#define TCG_TARGET_HAS_andc_i32 0
-#define TCG_TARGET_HAS_orc_i32 0
-#define TCG_TARGET_HAS_eqv_i32 0
+#define TCG_TARGET_HAS_andc_i32 1
+#define TCG_TARGET_HAS_orc_i32 1
+#define TCG_TARGET_HAS_eqv_i32 1
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_deposit_i32 0
@@ -75,12 +75,12 @@ typedef enum {
#define TCG_TARGET_HAS_bswap16_i64 1
#define TCG_TARGET_HAS_bswap32_i64 1
#define TCG_TARGET_HAS_bswap64_i64 1
-#define TCG_TARGET_HAS_not_i64 0
+#define TCG_TARGET_HAS_not_i64 1
#define TCG_TARGET_HAS_neg_i64 0
#define TCG_TARGET_HAS_rot_i64 1
-#define TCG_TARGET_HAS_andc_i64 0
-#define TCG_TARGET_HAS_orc_i64 0
-#define TCG_TARGET_HAS_eqv_i64 0
+#define TCG_TARGET_HAS_andc_i64 1
+#define TCG_TARGET_HAS_orc_i64 1
+#define TCG_TARGET_HAS_eqv_i64 1
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
#define TCG_TARGET_HAS_deposit_i64 0
--
1.8.3.1
Richard Henderson
2013-09-14 21:54:44 UTC
Permalink
Signed-off-by: Richard Henderson <***@twiddle.net>
---
include/exec/exec-all.h | 11 -----------
tcg/aarch64/tcg-target.c | 44 ++++++++++++++++++++++++++------------------
2 files changed, 26 insertions(+), 29 deletions(-)

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index beb4149..8b106f5 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -340,17 +340,6 @@ static inline uintptr_t tcg_getra_ldst(uintptr_t ra)
ra += b; /* apply the displacement */
return ra;
}
-# elif defined(__aarch64__)
-# define GETRA_LDST(RA) tcg_getra_ldst(RA)
-static inline uintptr_t tcg_getra_ldst(uintptr_t ra)
-{
- int32_t b;
- ra += 4; /* skip one instruction */
- b = *(int32_t *)ra; /* load the branch insn */
- b = (b << 6) >> (6 - 2); /* extract the displacement */
- ra += b; /* apply the displacement */
- return ra;
-}
# endif
#endif /* CONFIG_QEMU_LDST_OPTIMIZATION */

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 1905271..ce3c17b 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -1038,39 +1038,46 @@ static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
}
}

-static inline void tcg_out_nop(TCGContext *s)
-{
- tcg_out32(s, 0xd503201f);
-}
-
#ifdef CONFIG_SOFTMMU
-/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
- int mmu_idx) */
+/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
+ * int mmu_idx, uintptr_t ra)
+ */
static const void * const qemu_ld_helpers[4] = {
- helper_ldb_mmu,
- helper_ldw_mmu,
- helper_ldl_mmu,
- helper_ldq_mmu,
+ helper_ret_ldub_mmu,
+ helper_ret_lduw_mmu,
+ helper_ret_ldul_mmu,
+ helper_ret_ldq_mmu,
};

-/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
- uintxx_t val, int mmu_idx) */
+/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
+ * uintxx_t val, int mmu_idx, uintptr_t ra)
+ */
static const void * const qemu_st_helpers[4] = {
- helper_stb_mmu,
- helper_stw_mmu,
- helper_stl_mmu,
- helper_stq_mmu,
+ helper_ret_stb_mmu,
+ helper_ret_stw_mmu,
+ helper_ret_stl_mmu,
+ helper_ret_stq_mmu,
};

+static inline void tcg_out_adr(TCGContext *s, TCGReg rd, tcg_target_long addr)
+{
+ addr -= (tcg_target_long)s->code_ptr;
+ assert(addr == sextract64(addr, 0, 21));
+ tcg_fmt_Rd_disp21(s, INSN_ADR, rd, addr);
+}
+
static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
reloc_pc19(lb->label_ptr[0], (tcg_target_long)s->code_ptr);
tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0);
tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg);
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, lb->mem_index);
+ tcg_out_adr(s, TCG_REG_X3, (uintptr_t)lb->raddr);
+
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP,
(tcg_target_long)qemu_ld_helpers[lb->opc & 3]);
tcg_out_callr(s, TCG_REG_TMP);
+
if (lb->opc & 0x04) {
tcg_out_sxt(s, 1, lb->opc & 3, lb->datalo_reg, TCG_REG_X0);
} else {
@@ -1088,11 +1095,12 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg);
tcg_out_movr(s, 1, TCG_REG_X2, lb->datalo_reg);
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, lb->mem_index);
+ tcg_out_adr(s, TCG_REG_X4, (uintptr_t)lb->raddr);
+
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP,
(tcg_target_long)qemu_st_helpers[lb->opc & 3]);
tcg_out_callr(s, TCG_REG_TMP);

- tcg_out_nop(s);
tcg_out_goto(s, (tcg_target_long)lb->raddr);
}
--
1.8.3.1
Richard Henderson
2013-09-14 21:54:27 UTC
Permalink
Avoid the magic numbers in the current implementation.

Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 24 ++++++++++--------------
1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 56625a9..93badfd 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -379,13 +379,18 @@ static inline void tcg_out_ldst_12(TCGContext *s,
| op_type << 20 | scaled_uimm << 10 | rn << 5 | rd);
}

+/* Register to register move using ORR (shifted register with no shift). */
static inline void tcg_out_movr(TCGContext *s, TCGType ext,
- TCGReg rd, TCGReg src)
+ TCGReg rd, TCGReg rm)
{
- /* register to register move using MOV (shifted register with no shift) */
- /* using MOV 0x2a0003e0 | (shift).. */
- unsigned int base = ext ? 0xaa0003e0 : 0x2a0003e0;
- tcg_out32(s, base | src << 16 | rd);
+ tcg_fmt_Rdnm(s, INSN_ORR, ext, rd, TCG_REG_XZR, rm);
+}
+
+/* Register to register move using ADDI (move to/from SP). */
+static inline void tcg_out_movr_sp(TCGContext *s, TCGType ext,
+ TCGReg rd, TCGReg rn)
+{
+ tcg_fmt_Rdn_aimm(s, INSN_ADDI, ext, rd, rn, 0);
}

static inline void tcg_out_movi_aux(TCGContext *s,
@@ -460,15 +465,6 @@ static inline void tcg_out_ldst(TCGContext *s, enum aarch64_ldst_op_data data,
tcg_out_ldst_r(s, data, type, rd, rn, TCG_REG_TMP);
}

-/* mov alias implemented with add immediate, useful to move to/from SP */
-static inline void tcg_out_movr_sp(TCGContext *s, TCGType ext,
- TCGReg rd, TCGReg rn)
-{
- /* using ADD 0x11000000 | (ext) | rn << 5 | rd */
- unsigned int base = ext ? 0x91000000 : 0x11000000;
- tcg_out32(s, base | rn << 5 | rd);
-}
-
static inline void tcg_out_mov(TCGContext *s,
TCGType type, TCGReg ret, TCGReg arg)
{
--
1.8.3.1
Claudio Fontana
2013-09-16 08:50:29 UTC
Permalink
Post by Richard Henderson
Avoid the magic numbers in the current implementation.
---
tcg/aarch64/tcg-target.c | 24 ++++++++++--------------
1 file changed, 10 insertions(+), 14 deletions(-)
diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 56625a9..93badfd 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -379,13 +379,18 @@ static inline void tcg_out_ldst_12(TCGContext *s,
| op_type << 20 | scaled_uimm << 10 | rn << 5 | rd);
}
+/* Register to register move using ORR (shifted register with no shift). */
static inline void tcg_out_movr(TCGContext *s, TCGType ext,
- TCGReg rd, TCGReg src)
+ TCGReg rd, TCGReg rm)
{
- /* register to register move using MOV (shifted register with no shift) */
- /* using MOV 0x2a0003e0 | (shift).. */
- unsigned int base = ext ? 0xaa0003e0 : 0x2a0003e0;
- tcg_out32(s, base | src << 16 | rd);
+ tcg_fmt_Rdnm(s, INSN_ORR, ext, rd, TCG_REG_XZR, rm);
+}
+
+/* Register to register move using ADDI (move to/from SP). */
+static inline void tcg_out_movr_sp(TCGContext *s, TCGType ext,
+ TCGReg rd, TCGReg rn)
+{
+ tcg_fmt_Rdn_aimm(s, INSN_ADDI, ext, rd, rn, 0);
}
static inline void tcg_out_movi_aux(TCGContext *s,
@@ -460,15 +465,6 @@ static inline void tcg_out_ldst(TCGContext *s, enum aarch64_ldst_op_data data,
tcg_out_ldst_r(s, data, type, rd, rn, TCG_REG_TMP);
}
I am ok with reusing the existing functions of course, this is an improvement.
See the comments about the tcg_fmt_* functions in previous patches for the related issues.
Post by Richard Henderson
-/* mov alias implemented with add immediate, useful to move to/from SP */
-static inline void tcg_out_movr_sp(TCGContext *s, TCGType ext,
- TCGReg rd, TCGReg rn)
-{
- /* using ADD 0x11000000 | (ext) | rn << 5 | rd */
- unsigned int base = ext ? 0x91000000 : 0x11000000;
- tcg_out32(s, base | rn << 5 | rd);
-}
-
static inline void tcg_out_mov(TCGContext *s,
TCGType type, TCGReg ret, TCGReg arg)
{
Richard Henderson
2013-09-14 21:54:32 UTC
Permalink
Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 24 +++++++++++++++++++++++-
tcg/aarch64/tcg-target.h | 4 ++--
2 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 08a0cc4..e9a0f9b 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -289,6 +289,10 @@ typedef enum {
INSN_LSRV = 0x1ac02400,
INSN_ASRV = 0x1ac02800,
INSN_RORV = 0x1ac02c00,
+
+ /* Conditional select instructions */
+ INSN_CSEL = 0x1a800000,
+ INSN_CSINC = 0x1a800400,
} AArch64Insn;

static inline enum aarch64_ldst_op_data
@@ -451,6 +455,14 @@ static void tcg_fmt_Rdn_limm(TCGContext *s, AArch64Insn insn, TCGType sf,
tcg_fmt_Rdn_nrs(s, insn, sf, rd, rn, sf, r, c);
}

+static inline void tcg_fmt_Rdnm_cond(TCGContext *s, AArch64Insn insn,
+ TCGType sf, TCGReg rd, TCGReg rn,
+ TCGReg rm, TCGCond c)
+{
+ tcg_out32(s, insn | sf << 31 | rm << 16 | rn << 5 | rd
+ | tcg_cond_to_aarch64[c] << 12);
+}
+
static inline void tcg_out_ldst_9(TCGContext *s,
enum aarch64_ldst_op_data op_data,
enum aarch64_ldst_op_type op_type,
@@ -1414,7 +1426,15 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
/* FALLTHRU */
case INDEX_op_setcond_i64:
tcg_out_cmp(s, ext, a1, a2, c2);
- tcg_out_cset(s, 0, a0, args[3]);
+ tcg_out_cset(s, ext, a0, args[3]);
+ break;
+
+ case INDEX_op_movcond_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_movcond_i64:
+ tcg_out_cmp(s, ext, a1, a2, c2);
+ tcg_fmt_Rdnm_cond(s, INSN_CSEL, ext, a0, REG0(3), REG0(4), args[5]);
break;

case INDEX_op_qemu_ld8u:
@@ -1573,6 +1593,8 @@ static const TCGTargetOpDef aarch64_op_defs[] = {
{ INDEX_op_brcond_i64, { "r", "rA" } },
{ INDEX_op_setcond_i32, { "r", "r", "rwA" } },
{ INDEX_op_setcond_i64, { "r", "r", "rA" } },
+ { INDEX_op_movcond_i32, { "r", "r", "rwA", "rZ", "rZ" } },
+ { INDEX_op_movcond_i64, { "r", "r", "rwA", "rZ", "rZ" } },

{ INDEX_op_qemu_ld8u, { "r", "l" } },
{ INDEX_op_qemu_ld8s, { "r", "l" } },
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index efc506b..85e9929 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -56,7 +56,7 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_deposit_i32 0
-#define TCG_TARGET_HAS_movcond_i32 0
+#define TCG_TARGET_HAS_movcond_i32 1
#define TCG_TARGET_HAS_add2_i32 0
#define TCG_TARGET_HAS_sub2_i32 0
#define TCG_TARGET_HAS_mulu2_i32 0
@@ -84,7 +84,7 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
#define TCG_TARGET_HAS_deposit_i64 0
-#define TCG_TARGET_HAS_movcond_i64 0
+#define TCG_TARGET_HAS_movcond_i64 1
#define TCG_TARGET_HAS_add2_i64 0
#define TCG_TARGET_HAS_sub2_i64 0
#define TCG_TARGET_HAS_mulu2_i64 0
--
1.8.3.1
Richard Henderson
2013-09-14 21:54:35 UTC
Permalink
Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++
tcg/aarch64/tcg-target.h | 8 ++---
2 files changed, 82 insertions(+), 4 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 894a1d9..bc1ca84 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -115,6 +115,7 @@ static inline void patch_reloc(uint8_t *code_ptr, int type,
#define TCG_CT_CONST_AIMM 0x200
#define TCG_CT_CONST_LIMM 0x400
#define TCG_CT_CONST_ZERO 0x800
+#define TCG_CT_CONST_MONE 0x1000

/* parse target specific constraints */
static int target_parse_constraint(TCGArgConstraint *ct,
@@ -148,6 +149,9 @@ static int target_parse_constraint(TCGArgConstraint *ct,
case 'L': /* Valid for logical immediate. */
ct->ct |= TCG_CT_CONST_LIMM;
break;
+ case 'M': /* minus one */
+ ct->ct |= TCG_CT_CONST_MONE;
+ break;
case 'Z': /* zero */
ct->ct |= TCG_CT_CONST_ZERO;
break;
@@ -205,6 +209,9 @@ static int tcg_target_const_match(tcg_target_long val,
if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
return 1;
}
+ if ((ct & TCG_CT_CONST_MONE) && val == -1) {
+ return 1;
+ }

return 0;
}
@@ -284,6 +291,10 @@ typedef enum {
INSN_SUB = 0x4b000000,
INSN_SUBS = 0x6b000000,

+ /* Add/subtract with carry instructions */
+ INSN_ADC = 0x1a000000,
+ INSN_SBC = 0x5a000000,
+
/* Data-processing (2 source) instructions */
INSN_LSLV = 0x1ac02000,
INSN_LSRV = 0x1ac02400,
@@ -869,6 +880,47 @@ static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
tcg_fmt_Rdn_aimm(s, insn, ext, rd, rn, aimm);
}

+static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
+ TCGReg rh, TCGReg al, TCGReg ah,
+ tcg_target_long bl, tcg_target_long bh,
+ bool const_bl, bool const_bh, bool sub)
+{
+ TCGReg orig_rl = rl;
+ AArch64Insn insn;
+
+ if (rl == ah || (!const_bh && rl == bh)) {
+ rl = TCG_REG_TMP;
+ }
+
+ if (const_bl) {
+ insn = INSN_ADDSI;
+ if ((bl < 0) ^ sub) {
+ insn = INSN_SUBSI;
+ bl = -bl;
+ }
+ tcg_fmt_Rdn_aimm(s, insn, ext, rl, al, bl);
+ } else {
+ tcg_fmt_Rdnm(s, sub ? INSN_SUBS : INSN_ADDS, ext, rl, al, bl);
+ }
+
+ insn = INSN_ADC;
+ if (const_bh) {
+ /* Note that the only two constants we support are 0 and -1, and
+ that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
+ if ((bh != 0) ^ sub) {
+ insn = INSN_SBC;
+ }
+ bh = TCG_REG_XZR;
+ } else if (sub) {
+ insn = INSN_SBC;
+ }
+ tcg_fmt_Rdnm(s, insn, ext, rh, ah, bh);
+
+ if (rl != orig_rl) {
+ tcg_out_movr(s, ext, orig_rl, rl);
+ }
+}
+
static inline void tcg_out_nop(TCGContext *s)
{
tcg_out32(s, 0xd503201f);
@@ -1524,6 +1576,27 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
break;

+ case INDEX_op_add2_i32:
+ a2 = (int32_t)args[4];
+ c2 = false;
+ goto do_addsub2;
+ case INDEX_op_add2_i64:
+ a2 = args[4];
+ c2 = false;
+ goto do_addsub2;
+ case INDEX_op_sub2_i32:
+ a2 = (int32_t)args[4];
+ c2 = true;
+ goto do_addsub2;
+ case INDEX_op_sub2_i64:
+ a2 = args[4];
+ c2 = true;
+ goto do_addsub2;
+ do_addsub2:
+ tcg_out_addsub2(s, ext, a0, a1, REG0(2), REG0(3), a2,
+ args[5], const_args[4], const_args[5], c2);
+ break;
+
case INDEX_op_mov_i64:
case INDEX_op_mov_i32:
case INDEX_op_movi_i64:
@@ -1646,6 +1719,11 @@ static const TCGTargetOpDef aarch64_op_defs[] = {
{ INDEX_op_deposit_i32, { "r", "0", "rZ" } },
{ INDEX_op_deposit_i64, { "r", "0", "rZ" } },

+ { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rwA", "rwMZ" } },
+ { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
+ { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rwA", "rwMZ" } },
+ { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
+
{ -1 },
};

diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index ac1c97d..f311954 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -57,8 +57,8 @@ typedef enum {
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_deposit_i32 1
#define TCG_TARGET_HAS_movcond_i32 1
-#define TCG_TARGET_HAS_add2_i32 0
-#define TCG_TARGET_HAS_sub2_i32 0
+#define TCG_TARGET_HAS_add2_i32 1
+#define TCG_TARGET_HAS_sub2_i32 1
#define TCG_TARGET_HAS_mulu2_i32 0
#define TCG_TARGET_HAS_muls2_i32 0
#define TCG_TARGET_HAS_muluh_i32 0
@@ -85,8 +85,8 @@ typedef enum {
#define TCG_TARGET_HAS_nor_i64 0
#define TCG_TARGET_HAS_deposit_i64 1
#define TCG_TARGET_HAS_movcond_i64 1
-#define TCG_TARGET_HAS_add2_i64 0
-#define TCG_TARGET_HAS_sub2_i64 0
+#define TCG_TARGET_HAS_add2_i64 1
+#define TCG_TARGET_HAS_sub2_i64 1
#define TCG_TARGET_HAS_mulu2_i64 0
#define TCG_TARGET_HAS_muls2_i64 0
#define TCG_TARGET_HAS_muluh_i64 0
--
1.8.3.1
Richard Henderson
2013-09-14 21:54:34 UTC
Permalink
Also tidy the implementation of ubfm, sbfm, extr in order to share code.

Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 49 +++++++++++++++++++++++++++++++++++-------------
tcg/aarch64/tcg-target.h | 4 ++--
2 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 5f2c437..894a1d9 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -290,6 +290,12 @@ typedef enum {
INSN_ASRV = 0x1ac02800,
INSN_RORV = 0x1ac02c00,

+ /* Bitfield instructions */
+ INSN_BFM = 0x33000000,
+ INSN_SBFM = 0x13000000,
+ INSN_UBFM = 0x53000000,
+ INSN_EXTR = 0x13800000,
+
/* Conditional select instructions */
INSN_CSEL = 0x1a800000,
INSN_CSINC = 0x1a800400,
@@ -610,28 +616,30 @@ static inline void tcg_out_mul(TCGContext *s, TCGType ext,
tcg_out32(s, base | rm << 16 | rn << 5 | rd);
}

+static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
+ TCGReg rn, unsigned int a, unsigned int b)
+{
+ tcg_fmt_Rdn_nrs(s, INSN_BFM, ext, rd, rn, ext, a, b);
+}
+
static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
TCGReg rn, unsigned int a, unsigned int b)
{
- /* Using UBFM 0x53000000 Wd, Wn, a, b */
- unsigned int base = ext ? 0xd3400000 : 0x53000000;
- tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd);
+ tcg_fmt_Rdn_nrs(s, INSN_UBFM, ext, rd, rn, ext, a, b);
}

static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
TCGReg rn, unsigned int a, unsigned int b)
{
- /* Using SBFM 0x13000000 Wd, Wn, a, b */
- unsigned int base = ext ? 0x93400000 : 0x13000000;
- tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd);
+ tcg_fmt_Rdn_nrs(s, INSN_SBFM, ext, rd, rn, ext, a, b);
}

static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
TCGReg rn, TCGReg rm, unsigned int a)
{
- /* Using EXTR 0x13800000 Wd, Wn, Wm, a */
- unsigned int base = ext ? 0x93c00000 : 0x13800000;
- tcg_out32(s, base | rm << 16 | a << 10 | rn << 5 | rd);
+ /* ??? Abuse of the fmt function, but RM field is at the same location
+ as the IMMR field and we do still need to set N = SF for IMMS. */
+ tcg_fmt_Rdn_nrs(s, INSN_EXTR, ext, rd, rn, ext, rm, a);
}

static inline void tcg_out_shl(TCGContext *s, TCGType ext,
@@ -673,6 +681,15 @@ static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
}

+static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
+ TCGReg rn, unsigned lsb, unsigned width)
+{
+ unsigned size = ext ? 64 : 32;
+ unsigned a = (size - lsb) & (size - 1);
+ unsigned b = width - 1;
+ tcg_out_bfm(s, ext, rd, rn, a, b);
+}
+
static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
tcg_target_long b, bool const_b)
{
@@ -828,8 +845,7 @@ static inline void tcg_out_rev16(TCGContext *s, TCGType ext,
static inline void tcg_out_sxt(TCGContext *s, TCGType ext, int s_bits,
TCGReg rd, TCGReg rn)
{
- /* using ALIASes SXTB 0x13001c00, SXTH 0x13003c00, SXTW 0x93407c00
- of SBFM Xd, Xn, #0, #7|15|31 */
+ /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
int bits = 8 * (1 << s_bits) - 1;
tcg_out_sbfm(s, ext, rd, rn, 0, bits);
}
@@ -837,8 +853,7 @@ static inline void tcg_out_sxt(TCGContext *s, TCGType ext, int s_bits,
static inline void tcg_out_uxt(TCGContext *s, int s_bits,
TCGReg rd, TCGReg rn)
{
- /* using ALIASes UXTB 0x53001c00, UXTH 0x53003c00
- of UBFM Wd, Wn, #0, #7|15 */
+ /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
int bits = 8 * (1 << s_bits) - 1;
tcg_out_ubfm(s, 0, rd, rn, 0, bits);
}
@@ -1504,6 +1519,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_movr(s, 0, a0, a1);
break;

+ case INDEX_op_deposit_i64:
+ case INDEX_op_deposit_i32:
+ tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
+ break;
+
case INDEX_op_mov_i64:
case INDEX_op_mov_i32:
case INDEX_op_movi_i64:
@@ -1623,6 +1643,9 @@ static const TCGTargetOpDef aarch64_op_defs[] = {
{ INDEX_op_ext16u_i64, { "r", "r" } },
{ INDEX_op_ext32u_i64, { "r", "r" } },

+ { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
+ { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
+
{ -1 },
};

diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 85e9929..ac1c97d 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -55,7 +55,7 @@ typedef enum {
#define TCG_TARGET_HAS_eqv_i32 1
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
-#define TCG_TARGET_HAS_deposit_i32 0
+#define TCG_TARGET_HAS_deposit_i32 1
#define TCG_TARGET_HAS_movcond_i32 1
#define TCG_TARGET_HAS_add2_i32 0
#define TCG_TARGET_HAS_sub2_i32 0
@@ -83,7 +83,7 @@ typedef enum {
#define TCG_TARGET_HAS_eqv_i64 1
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
-#define TCG_TARGET_HAS_deposit_i64 0
+#define TCG_TARGET_HAS_deposit_i64 1
#define TCG_TARGET_HAS_movcond_i64 1
#define TCG_TARGET_HAS_add2_i64 0
#define TCG_TARGET_HAS_sub2_i64 0
--
1.8.3.1
Richard Henderson
2013-09-14 21:54:36 UTC
Permalink
Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 12 ++++++++++++
tcg/aarch64/tcg-target.h | 4 ++--
2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index bc1ca84..b7f7fa5 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -300,6 +300,8 @@ typedef enum {
INSN_LSRV = 0x1ac02400,
INSN_ASRV = 0x1ac02800,
INSN_RORV = 0x1ac02c00,
+ INSN_SMULH = 0x9b407c00,
+ INSN_UMULH = 0x9bc07c00,

/* Bitfield instructions */
INSN_BFM = 0x33000000,
@@ -1597,6 +1599,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
args[5], const_args[4], const_args[5], c2);
break;

+ case INDEX_op_muluh_i64:
+ tcg_fmt_Rdnm(s, INSN_UMULH, 1, a0, a1, a2);
+ break;
+ case INDEX_op_mulsh_i64:
+ tcg_fmt_Rdnm(s, INSN_SMULH, 1, a0, a1, a2);
+ break;
+
case INDEX_op_mov_i64:
case INDEX_op_mov_i32:
case INDEX_op_movi_i64:
@@ -1724,6 +1733,9 @@ static const TCGTargetOpDef aarch64_op_defs[] = {
{ INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rwA", "rwMZ" } },
{ INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },

+ { INDEX_op_muluh_i64, { "r", "r", "r" } },
+ { INDEX_op_mulsh_i64, { "r", "r", "r" } },
+
{ -1 },
};

diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index f311954..52c6c23 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -89,8 +89,8 @@ typedef enum {
#define TCG_TARGET_HAS_sub2_i64 1
#define TCG_TARGET_HAS_mulu2_i64 0
#define TCG_TARGET_HAS_muls2_i64 0
-#define TCG_TARGET_HAS_muluh_i64 0
-#define TCG_TARGET_HAS_mulsh_i64 0
+#define TCG_TARGET_HAS_muluh_i64 1
+#define TCG_TARGET_HAS_mulsh_i64 1

enum {
TCG_AREG0 = TCG_REG_X19,
--
1.8.3.1
Richard Henderson
2013-09-14 21:54:42 UTC
Permalink
Loading an qemu pointer as an immediate happens often. E.g.

- exit_tb $0x7fa8140013
+ exit_tb $0x7f81ee0013
...
- : d2800260 mov x0, #0x13
- : f2b50280 movk x0, #0xa814, lsl #16
- : f2c00fe0 movk x0, #0x7f, lsl #32
+ : 90ff1000 adrp x0, 0x7f81ee0000
+ : 91004c00 add x0, x0, #0x13

Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 23 ++++++++++++++++++++++-
1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index e50abcb..5691cc3 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -323,6 +323,10 @@ typedef enum {
/* Conditional select instructions */
INSN_CSEL = 0x1a800000,
INSN_CSINC = 0x1a800400,
+
+ /* PC relative addressing instructions */
+ INSN_ADR = 0x10000000,
+ INSN_ADRP = 0x90000000,
} AArch64Insn;

static inline enum aarch64_ldst_op_data
@@ -510,6 +514,12 @@ static inline void tcg_fmt_Rd_uimm(TCGContext *s, AArch64Insn insn,
tcg_out32(s, insn | sf << 31 | shift << (21 - 4) | half << 5 | rd);
}

+static inline void tcg_fmt_Rd_disp21(TCGContext *s, AArch64Insn insn,
+ TCGReg rd, tcg_target_long disp)
+{
+ tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
+}
+
static inline void tcg_out_ldst_9(TCGContext *s,
enum aarch64_ldst_op_data op_data,
enum aarch64_ldst_op_type op_type,
@@ -561,7 +571,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
AArch64Insn insn;
int i, wantinv, shift;
tcg_target_long svalue = value;
- tcg_target_long ivalue, imask;
+ tcg_target_long ivalue, imask, disp;

/* For 32-bit values, discard potential garbage in value. For 64-bit
values within [2**31, 2**32-1], we can create smaller sequences by
@@ -593,6 +603,17 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
return;
}

+ /* Look for host pointer values within 4G of the PC. This happens
+ often when loading pointers to QEMU's own data structures. */
+ disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
+ if (disp == sextract64(disp, 0, 21)) {
+ tcg_fmt_Rd_disp21(s, INSN_ADRP, rd, disp);
+ if (value & 0xfff) {
+ tcg_fmt_Rdn_aimm(s, INSN_ADDI, type, rd, rd, value & 0xfff);
+ }
+ return;
+ }
+
/* Would it take fewer insns to begin with MOVN? For the value and its
inverse, count the number of 16-bit lanes that are 0. For the benefit
of 32-bit quantities, compare the zero-extended normal value vs the
--
1.8.3.1
Richard Henderson
2013-09-14 21:54:40 UTC
Permalink
The subset of logical immediates that we support is quite quick to test,
and such constants are quite common to want to load.

Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 9 +++++++++
1 file changed, 9 insertions(+)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index cecda05..9effee7 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -573,6 +573,15 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
type = TCG_TYPE_I32;
}

+ /* Check for bitfield immediates. For the benefit of 32-bit quantities,
+ use the sign-extended value. That lets us match rotated values such
+ as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff.
+ The truncation happens inside tcg_fmt_Rdn_limm. */
+ if (is_limm(svalue)) {
+ tcg_fmt_Rdn_limm(s, INSN_ORRI, type, rd, TCG_REG_XZR, svalue);
+ return;
+ }
+
/* Would it take fewer insns to begin with MOVN? For the value and its
inverse, count the number of 16-bit lanes that are 0. For the benefit
of 32-bit quantities, compare the zero-extended normal value vs the
--
1.8.3.1
Richard Henderson
2013-09-14 21:54:49 UTC
Permalink
Combines 4 other inline functions and tidies the prologue.

Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 84 ++++++++++++++++--------------------------------
1 file changed, 27 insertions(+), 57 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index fa88e4b..94f9ac1 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -333,6 +333,10 @@ typedef enum {
INSN_BLR = 0xd63f0000,
INSN_RET = 0xd65f0000,
INSN_B_C = 0x54000000,
+
+ /* Load/store instructions */
+ INSN_LDP = 0x28400000,
+ INSN_STP = 0x28000000,
} AArch64Insn;

static inline enum aarch64_ldst_op_data
@@ -1331,56 +1335,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)

static uint8_t *tb_ret_addr;

-/* callee stack use example:
- stp x29, x30, [sp,#-32]!
- mov x29, sp
- stp x1, x2, [sp,#16]
- ...
- ldp x1, x2, [sp,#16]
- ldp x29, x30, [sp],#32
- ret
-*/
-
-/* push r1 and r2, and alloc stack space for a total of
- alloc_n elements (1 element=16 bytes, must be between 1 and 31. */
-static inline void tcg_out_push_pair(TCGContext *s, TCGReg addr,
- TCGReg r1, TCGReg r2, int alloc_n)
-{
- /* using indexed scaled simm7 STP 0x28800000 | (ext) | 0x01000000 (pre-idx)
- | alloc_n * (-1) << 16 | r2 << 10 | addr << 5 | r1 */
- assert(alloc_n > 0 && alloc_n < 0x20);
- alloc_n = (-alloc_n) & 0x3f;
- tcg_out32(s, 0xa9800000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
-}
-
-/* dealloc stack space for a total of alloc_n elements and pop r1, r2. */
-static inline void tcg_out_pop_pair(TCGContext *s, TCGReg addr,
- TCGReg r1, TCGReg r2, int alloc_n)
-{
- /* using indexed scaled simm7 LDP 0x28c00000 | (ext) | nothing (post-idx)
- | alloc_n << 16 | r2 << 10 | addr << 5 | r1 */
- assert(alloc_n > 0 && alloc_n < 0x20);
- tcg_out32(s, 0xa8c00000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
-}
-
-static inline void tcg_out_store_pair(TCGContext *s, TCGReg addr,
- TCGReg r1, TCGReg r2, int idx)
-{
- /* using register pair offset simm7 STP 0x29000000 | (ext)
- | idx << 16 | r2 << 10 | addr << 5 | r1 */
- assert(idx > 0 && idx < 0x20);
- tcg_out32(s, 0xa9000000 | idx << 16 | r2 << 10 | addr << 5 | r1);
-}
-
-static inline void tcg_out_load_pair(TCGContext *s, TCGReg addr,
- TCGReg r1, TCGReg r2, int idx)
-{
- /* using register pair offset simm7 LDP 0x29400000 | (ext)
- | idx << 16 | r2 << 10 | addr << 5 | r1 */
- assert(idx > 0 && idx < 0x20);
- tcg_out32(s, 0xa9400000 | idx << 16 | r2 << 10 | addr << 5 | r1);
-}
-
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
const TCGArg args[TCG_MAX_OP_ARGS],
const int const_args[TCG_MAX_OP_ARGS])
@@ -1937,17 +1891,32 @@ static void tcg_target_init(TCGContext *s)
+ TCG_TARGET_STACK_ALIGN - 1) \
& ~(TCG_TARGET_STACK_ALIGN - 1))

+static void tcg_out_ldst_pair(TCGContext *s, AArch64Insn insn,
+ TCGReg r1, TCGReg r2, TCGReg base,
+ tcg_target_long ofs, bool pre, bool w)
+{
+ insn |= 1u << 31; /* ext */
+ insn |= pre << 24;
+ insn |= w << 23;
+
+ assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
+ insn |= (ofs & (0x7f << 3)) << (15 - 3);
+
+ tcg_out32(s, insn | r2 << 10 | base << 5 | r1);
+}
+
static void tcg_target_qemu_prologue(TCGContext *s)
{
TCGReg r;

/* Push (FP, LR) and allocate space for all saved registers. */
- tcg_out_push_pair(s, TCG_REG_SP, TCG_REG_FP, TCG_REG_LR, PUSH_SIZE / 16);
+ tcg_out_ldst_pair(s, INSN_STP, TCG_REG_FP, TCG_REG_LR,
+ TCG_REG_SP, -PUSH_SIZE, 1, 1);

/* Store callee-preserved regs x19..x28. */
for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
- int idx = (r - TCG_REG_X19) / 2 + 1;
- tcg_out_store_pair(s, TCG_REG_SP, r, r + 1, idx);
+ int ofs = (r - TCG_REG_X19 + 2) * 8;
+ tcg_out_ldst_pair(s, INSN_STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
}

/* Make stack space for TCG locals. */
@@ -1976,12 +1945,13 @@ static void tcg_target_qemu_prologue(TCGContext *s)

/* Restore callee-preserved registers x19..x28. */
for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
- int idx = (r - TCG_REG_X19) / 2 + 1;
- tcg_out_load_pair(s, TCG_REG_SP, r, r + 1, idx);
+ int ofs = (r - TCG_REG_X19 + 2) * 8;
+ tcg_out_ldst_pair(s, INSN_LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
}

- /* Pop (FP, LR), restore SP to previous frame, return. */
- tcg_out_pop_pair(s, TCG_REG_SP, TCG_REG_FP, TCG_REG_LR, PUSH_SIZE / 16);
+ /* Pop (FP, LR), restore SP to previous frame. */
+ tcg_out_ldst_pair(s, INSN_LDP, TCG_REG_FP, TCG_REG_LR,
+ TCG_REG_SP, PUSH_SIZE, 0, 1);
tcg_out_ret(s, TCG_REG_LR);
}
--
1.8.3.1
Richard Henderson
2013-09-14 21:54:43 UTC
Permalink
Some guest env are small enough to reach the tlb with only a 12-bit addition.

Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 24 ++++++++++++++++++------
1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 5691cc3..1905271 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -1136,46 +1136,58 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc,
slow path for the failure case, which will be patched later when finalizing
the slow path. Generated code returns the host addend in X1,
clobbers X0,X2,X3,TMP. */
-static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
- int s_bits, uint8_t **label_ptr, int mem_index, int is_read)
+static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, int s_bits,
+ uint8_t **label_ptr, int mem_index, int is_read)
{
TCGReg base = TCG_AREG0;
int tlb_offset = is_read ?
offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
: offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
+
/* Extract the TLB index from the address into X0.
X0<CPU_TLB_BITS:0> =
addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
tcg_out_ubfm(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, addr_reg,
TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
+
/* Store the page mask part of the address and the low s_bits into X3.
Later this allows checking for equality and alignment at the same time.
X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */
tcg_fmt_Rdn_limm(s, INSN_ANDI, TARGET_LONG_BITS == 64, TCG_REG_X3,
addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+
/* Add any "high bits" from the tlb offset to the env address into X2,
to take advantage of the LSL12 form of the ADDI instruction.
X2 = env + (tlb_offset & 0xfff000) */
- tcg_fmt_Rdn_aimm(s, INSN_ADDI, 1, TCG_REG_X2, base, tlb_offset & 0xfff000);
+ if (tlb_offset & 0xfff000) {
+ tcg_fmt_Rdn_aimm(s, INSN_ADDI, 1, TCG_REG_X2, base,
+ tlb_offset & 0xfff000);
+ base = TCG_REG_X2;
+ }
+
/* Merge the tlb index contribution into X2.
- X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
- tcg_fmt_Rdnm_lsl(s, INSN_ADD, 1, TCG_REG_X2, TCG_REG_X2,
+ X2 = base + (X0 << CPU_TLB_ENTRY_BITS) */
+ tcg_fmt_Rdnm_lsl(s, INSN_ADD, 1, TCG_REG_X2, base,
TCG_REG_X0, CPU_TLB_ENTRY_BITS);
+
/* Merge "low bits" from tlb offset, load the tlb comparator into X0.
X0 = load [X2 + (tlb_offset & 0x000fff)] */
tcg_out_ldst(s, TARGET_LONG_BITS == 64 ? LDST_64 : LDST_32,
LDST_LD, TCG_REG_X0, TCG_REG_X2,
(tlb_offset & 0xfff));
+
/* Load the tlb addend. Do that early to avoid stalling.
X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
tcg_out_ldst(s, LDST_64, LDST_LD, TCG_REG_X1, TCG_REG_X2,
(tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
(is_read ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write)));
+
/* Perform the address comparison. */
tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
- *label_ptr = s->code_ptr;
+
/* If not equal, we jump to the slow path. */
+ *label_ptr = s->code_ptr;
tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
}
--
1.8.3.1
Richard Henderson
2013-09-14 21:54:50 UTC
Permalink
Removed from other targets in 56bbc2f967ce185fa1c5c39e1aeb5b68b26242e9.

Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 6 ------
1 file changed, 6 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 94f9ac1..a7d0785 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -1852,12 +1852,6 @@ static const TCGTargetOpDef aarch64_op_defs[] = {

static void tcg_target_init(TCGContext *s)
{
-#if !defined(CONFIG_USER_ONLY)
- /* fail safe */
- if ((1ULL << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry)) {
- tcg_abort();
- }
-#endif
tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
--
1.8.3.1
Claudio Fontana
2013-09-16 09:05:37 UTC
Permalink
Post by Richard Henderson
Removed from other targets in 56bbc2f967ce185fa1c5c39e1aeb5b68b26242e9.
---
tcg/aarch64/tcg-target.c | 6 ------
1 file changed, 6 deletions(-)
diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 94f9ac1..a7d0785 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -1852,12 +1852,6 @@ static const TCGTargetOpDef aarch64_op_defs[] = {
static void tcg_target_init(TCGContext *s)
{
-#if !defined(CONFIG_USER_ONLY)
- /* fail safe */
- if ((1ULL << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry)) {
- tcg_abort();
- }
-#endif
tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
Reviewed-by: Claudio Fontana <***@linaro.org>
Richard Henderson
2013-09-14 21:54:46 UTC
Permalink
Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 48 ++++++++++++++++++++++++------------------------
1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 3d1108c..335a5d0 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -327,6 +327,14 @@ typedef enum {
/* PC relative addressing instructions */
INSN_ADR = 0x10000000,
INSN_ADRP = 0x90000000,
+
+ /* Branch instructions */
+ INSN_B = 0x14000000,
+ INSN_BL = 0x94000000,
+ INSN_BR = 0xd61f0000,
+ INSN_BLR = 0xd63f0000,
+ INSN_RET = 0xd65f0000,
+ INSN_B_C = 0x54000000,
} AArch64Insn;

static inline enum aarch64_ldst_op_data
@@ -837,15 +845,14 @@ static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,

static inline void tcg_out_goto(TCGContext *s, tcg_target_long target)
{
- tcg_target_long offset;
- offset = (target - (tcg_target_long)s->code_ptr) / 4;
+ tcg_target_long offset = (target - (tcg_target_long)s->code_ptr) / 4;

if (offset < -0x02000000 || offset >= 0x02000000) {
/* out of 26bit range */
tcg_abort();
}

- tcg_out32(s, 0x14000000 | (offset & 0x03ffffff));
+ tcg_out32(s, INSN_B | (offset & 0x03ffffff));
}

static inline void tcg_out_goto_noaddr(TCGContext *s)
@@ -855,25 +862,21 @@ static inline void tcg_out_goto_noaddr(TCGContext *s)
kept coherent during retranslation.
Mask away possible garbage in the high bits for the first translation,
while keeping the offset bits for retranslation. */
- uint32_t insn;
- insn = (tcg_in32(s) & 0x03ffffff) | 0x14000000;
- tcg_out32(s, insn);
+ uint32_t old = tcg_in32(s) & 0x03ffffff;
+ tcg_out32(s, INSN_B | old);
}

static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
{
- /* see comments in tcg_out_goto_noaddr */
- uint32_t insn;
- insn = tcg_in32(s) & (0x07ffff << 5);
- insn |= 0x54000000 | tcg_cond_to_aarch64[c];
- tcg_out32(s, insn);
+ /* See comments in tcg_out_goto_noaddr. */
+ uint32_t old = tcg_in32(s) & (0x07ffff << 5);
+ tcg_out32(s, INSN_B_C | tcg_cond_to_aarch64[c] | old);
}

static inline void tcg_out_goto_cond(TCGContext *s, TCGCond c,
tcg_target_long target)
{
- tcg_target_long offset;
- offset = (target - (tcg_target_long)s->code_ptr) / 4;
+ tcg_target_long offset = (target - (tcg_target_long)s->code_ptr) / 4;

if (offset < -0x40000 || offset >= 0x40000) {
/* out of 19bit range */
@@ -881,37 +884,34 @@ static inline void tcg_out_goto_cond(TCGContext *s, TCGCond c,
}

offset &= 0x7ffff;
- tcg_out32(s, 0x54000000 | tcg_cond_to_aarch64[c] | offset << 5);
+ tcg_out32(s, INSN_B_C | tcg_cond_to_aarch64[c] | offset << 5);
}

static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
{
- tcg_out32(s, 0xd63f0000 | reg << 5);
+ tcg_out32(s, INSN_BLR | reg << 5);
}

static inline void tcg_out_gotor(TCGContext *s, TCGReg reg)
{
- tcg_out32(s, 0xd61f0000 | reg << 5);
+ tcg_out32(s, INSN_BR | reg << 5);
}

static inline void tcg_out_call(TCGContext *s, tcg_target_long target)
{
- tcg_target_long offset;
-
- offset = (target - (tcg_target_long)s->code_ptr) / 4;
+ tcg_target_long offset = (target - (tcg_target_long)s->code_ptr) / 4;

if (offset < -0x02000000 || offset >= 0x02000000) { /* out of 26bit rng */
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, target);
tcg_out_callr(s, TCG_REG_TMP);
} else {
- tcg_out32(s, 0x94000000 | (offset & 0x03ffffff));
+ tcg_out32(s, INSN_BL | (offset & 0x03ffffff));
}
}

-static inline void tcg_out_ret(TCGContext *s)
+static inline void tcg_out_ret(TCGContext *s, TCGReg rn)
{
- /* emit RET { LR } */
- tcg_out32(s, 0xd65f03c0);
+ tcg_out32(s, INSN_RET | rn << 5);
}

void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
@@ -1993,5 +1993,5 @@ static void tcg_target_qemu_prologue(TCGContext *s)
/* pop (FP, LR), restore SP to previous frame, return */
tcg_out_pop_pair(s, TCG_REG_SP,
TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved);
- tcg_out_ret(s);
+ tcg_out_ret(s, TCG_REG_LR);
}
--
1.8.3.1
Richard Henderson
2013-09-14 21:54:29 UTC
Permalink
Handle a simplified set of logical immediates for the moment.

The way gcc and binutils do it, with 52k worth of tables, and
a binary search depth of log2(5334) = 13, seems slow for the
most common cases.

Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 155 ++++++++++++++++++++++++++++++++---------------
1 file changed, 106 insertions(+), 49 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 59499fd..bc651ac 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -113,6 +113,7 @@ static inline void patch_reloc(uint8_t *code_ptr, int type,

#define TCG_CT_CONST_IS32 0x100
#define TCG_CT_CONST_AIMM 0x200
+#define TCG_CT_CONST_LIMM 0x400

/* parse target specific constraints */
static int target_parse_constraint(TCGArgConstraint *ct,
@@ -143,6 +144,9 @@ static int target_parse_constraint(TCGArgConstraint *ct,
case 'A': /* Valid for arithmetic immediate (positive or negative). */
ct->ct |= TCG_CT_CONST_AIMM;
break;
+ case 'L': /* Valid for logical immediate. */
+ ct->ct |= TCG_CT_CONST_LIMM;
+ break;
default:
return -1;
}
@@ -157,6 +161,26 @@ static inline bool is_aimm(uint64_t val)
return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
}

+static inline bool is_limm(uint64_t val)
+{
+ /* Taking a simplified view of the logical immediates for now, ignoring
+ the replication that can happen across the field. Match bit patterns
+ of the forms
+ 0....01....1
+ 0..01..10..0
+ and their inverses. */
+
+ /* Make things easier below, by testing the form with msb clear. */
+ if ((int64_t)val < 0) {
+ val = ~val;
+ }
+ if (val == 0) {
+ return false;
+ }
+ val += val & -val;
+ return (val & (val - 1)) == 0;
+}
+
static int tcg_target_const_match(tcg_target_long val,
const TCGArgConstraint *arg_ct)
{
@@ -171,6 +195,9 @@ static int tcg_target_const_match(tcg_target_long val,
if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
return 1;
}
+ if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
+ return 1;
+ }

return 0;
}
@@ -225,6 +252,11 @@ enum aarch64_ldst_op_type { /* type of operation */
};

typedef enum {
+ /* Logical immediate instructions */
+ INSN_ANDI = 0x12000000,
+ INSN_ORRI = 0x32000000,
+ INSN_EORI = 0x52000000,
+
/* Logical shifted register instructions */
INSN_AND = 0x0a000000,
INSN_ORR = 0x2a000000,
@@ -368,6 +400,48 @@ static inline void tcg_fmt_Rdn_aimm(TCGContext *s, AArch64Insn insn,
tcg_out32(s, insn | sf << 31 | aimm << 10 | rn << 5 | rd);
}

+/* This function can be used for both Logical (immediate) and Bitfield
+ instruction groups, both of which have N, IMMR and IMMS fields, that
+ feed the DecodeBitMasks pseudo function in the reference manual. */
+static inline void tcg_fmt_Rdn_nrs(TCGContext *s, AArch64Insn insn,
+ TCGType sf, TCGReg rd, TCGReg rn,
+ int n, int immr, int imms)
+{
+ tcg_out32(s, insn | sf << 31 | n << 22 | immr << 16 | imms << 10
+ | rn << 5 | rd);
+}
+
+/* This function is used for the Logical (immediate) instruction group.
+ The value of LIMM must satisfy IS_LIMM. See the comment above about
+ only supporting simplified logical immediates. */
+static void tcg_fmt_Rdn_limm(TCGContext *s, AArch64Insn insn, TCGType sf,
+ TCGReg rd, TCGReg rn, uint64_t limm)
+{
+ unsigned h, l, r, c;
+
+ assert(is_limm(limm));
+
+ h = clz64(limm);
+ l = ctz64(limm);
+ if (l == 0) {
+ r = 0; /* form 0....01....1 */
+ c = ctz64(~limm) - 1;
+ if (h == 0) {
+ r = clz64(~limm); /* form 1..10..01..1 */
+ c += r;
+ }
+ } else {
+ r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
+ c = r - h - 1;
+ }
+ if (!sf) {
+ r &= 31;
+ c &= 31;
+ }
+
+ tcg_fmt_Rdn_nrs(s, insn, sf, rd, rn, sf, r, c);
+}
+
static inline void tcg_out_ldst_9(TCGContext *s,
enum aarch64_ldst_op_data op_data,
enum aarch64_ldst_op_type op_type,
@@ -676,40 +750,6 @@ static inline void tcg_out_call(TCGContext *s, tcg_target_long target)
}
}

-/* encode a logical immediate, mapping user parameter
- M=set bits pattern length to S=M-1 */
-static inline unsigned int
-aarch64_limm(unsigned int m, unsigned int r)
-{
- assert(m > 0);
- return r << 16 | (m - 1) << 10;
-}
-
-/* test a register against an immediate bit pattern made of
- M set bits rotated right by R.
- Examples:
- to test a 32/64 reg against 0x00000007, pass M = 3, R = 0.
- to test a 32/64 reg against 0x000000ff, pass M = 8, R = 0.
- to test a 32bit reg against 0xff000000, pass M = 8, R = 8.
- to test a 32bit reg against 0xff0000ff, pass M = 16, R = 8.
- */
-static inline void tcg_out_tst(TCGContext *s, TCGType ext, TCGReg rn,
- unsigned int m, unsigned int r)
-{
- /* using TST alias of ANDS XZR, Xn,#bimm64 0x7200001f */
- unsigned int base = ext ? 0xf240001f : 0x7200001f;
- tcg_out32(s, base | aarch64_limm(m, r) | rn << 5);
-}
-
-/* and a register with a bit pattern, similarly to TST, no flags change */
-static inline void tcg_out_andi(TCGContext *s, TCGType ext, TCGReg rd,
- TCGReg rn, unsigned int m, unsigned int r)
-{
- /* using AND 0x12000000 */
- unsigned int base = ext ? 0x92400000 : 0x12000000;
- tcg_out32(s, base | aarch64_limm(m, r) | rn << 5 | rd);
-}
-
static inline void tcg_out_ret(TCGContext *s)
{
/* emit RET { LR } */
@@ -914,9 +954,8 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
/* Store the page mask part of the address and the low s_bits into X3.
Later this allows checking for equality and alignment at the same time.
X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */
- tcg_out_andi(s, (TARGET_LONG_BITS == 64), TCG_REG_X3, addr_reg,
- (TARGET_LONG_BITS - TARGET_PAGE_BITS) + s_bits,
- (TARGET_LONG_BITS - TARGET_PAGE_BITS));
+ tcg_fmt_Rdn_limm(s, INSN_ANDI, TARGET_LONG_BITS == 64, TCG_REG_X3,
+ addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
/* Add any "high bits" from the tlb offset to the env address into X2,
to take advantage of the LSL12 form of the ADDI instruction.
X2 = env + (tlb_offset & 0xfff000) */
@@ -1220,19 +1259,37 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
break;

- case INDEX_op_and_i64:
case INDEX_op_and_i32:
- tcg_fmt_Rdnm(s, INSN_AND, ext, a0, a1, a2);
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_and_i64:
+ if (c2) {
+ tcg_fmt_Rdn_limm(s, INSN_ANDI, ext, a0, a1, a2);
+ } else {
+ tcg_fmt_Rdnm(s, INSN_AND, ext, a0, a1, a2);
+ }
break;

- case INDEX_op_or_i64:
case INDEX_op_or_i32:
- tcg_fmt_Rdnm(s, INSN_ORR, ext, a0, a1, a2);
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_or_i64:
+ if (c2) {
+ tcg_fmt_Rdn_limm(s, INSN_ORRI, ext, a0, a1, a2);
+ } else {
+ tcg_fmt_Rdnm(s, INSN_ORR, ext, a0, a1, a2);
+ }
break;

- case INDEX_op_xor_i64:
case INDEX_op_xor_i32:
- tcg_fmt_Rdnm(s, INSN_EOR, ext, a0, a1, a2);
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_xor_i64:
+ if (c2) {
+ tcg_fmt_Rdn_limm(s, INSN_EORI, ext, a0, a1, a2);
+ } else {
+ tcg_fmt_Rdnm(s, INSN_EOR, ext, a0, a1, a2);
+ }
break;

case INDEX_op_mul_i64:
@@ -1425,12 +1482,12 @@ static const TCGTargetOpDef aarch64_op_defs[] = {
{ INDEX_op_sub_i64, { "r", "r", "rA" } },
{ INDEX_op_mul_i32, { "r", "r", "r" } },
{ INDEX_op_mul_i64, { "r", "r", "r" } },
- { INDEX_op_and_i32, { "r", "r", "r" } },
- { INDEX_op_and_i64, { "r", "r", "r" } },
- { INDEX_op_or_i32, { "r", "r", "r" } },
- { INDEX_op_or_i64, { "r", "r", "r" } },
- { INDEX_op_xor_i32, { "r", "r", "r" } },
- { INDEX_op_xor_i64, { "r", "r", "r" } },
+ { INDEX_op_and_i32, { "r", "r", "rwL" } },
+ { INDEX_op_and_i64, { "r", "r", "rL" } },
+ { INDEX_op_or_i32, { "r", "r", "rwL" } },
+ { INDEX_op_or_i64, { "r", "r", "rL" } },
+ { INDEX_op_xor_i32, { "r", "r", "rwL" } },
+ { INDEX_op_xor_i64, { "r", "r", "rL" } },

{ INDEX_op_shl_i32, { "r", "r", "ri" } },
{ INDEX_op_shr_i32, { "r", "r", "ri" } },
--
1.8.3.1
Richard Henderson
2013-09-14 21:54:24 UTC
Permalink
It was unused. Let's not overcomplicate things before we need them.

Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index be6d05a..cc56fe5 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -543,10 +543,10 @@ static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
}

static inline void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg rn,
- TCGReg rm, int shift_imm)
+ TCGReg rm)
{
/* Using CMP alias SUBS wzr, Wn, Wm */
- tcg_out_arith(s, INSN_SUBS, ext, TCG_REG_XZR, rn, rm, shift_imm);
+ tcg_out_arith(s, INSN_SUBS, ext, TCG_REG_XZR, rn, rm, 0);
}

static inline void tcg_out_cset(TCGContext *s, TCGType ext,
@@ -920,7 +920,7 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
(is_read ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write)));
/* Perform the address comparison. */
- tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
+ tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3);
*label_ptr = s->code_ptr;
/* If not equal, we jump to the slow path. */
tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
@@ -1259,13 +1259,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,

case INDEX_op_brcond_i64:
case INDEX_op_brcond_i32:
- tcg_out_cmp(s, ext, a0, a1, 0);
+ tcg_out_cmp(s, ext, a0, a1);
tcg_out_goto_label_cond(s, a2, args[3]);
break;

case INDEX_op_setcond_i64:
case INDEX_op_setcond_i32:
- tcg_out_cmp(s, ext, a1, a2, 0);
+ tcg_out_cmp(s, ext, a1, a2);
tcg_out_cset(s, 0, a0, args[3]);
break;
--
1.8.3.1
Claudio Fontana
2013-09-16 08:02:19 UTC
Permalink
Post by Richard Henderson
It was unused. Let's not overcomplicate things before we need them.
---
tcg/aarch64/tcg-target.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index be6d05a..cc56fe5 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -543,10 +543,10 @@ static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
}
static inline void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg rn,
- TCGReg rm, int shift_imm)
+ TCGReg rm)
{
/* Using CMP alias SUBS wzr, Wn, Wm */
- tcg_out_arith(s, INSN_SUBS, ext, TCG_REG_XZR, rn, rm, shift_imm);
+ tcg_out_arith(s, INSN_SUBS, ext, TCG_REG_XZR, rn, rm, 0);
}
static inline void tcg_out_cset(TCGContext *s, TCGType ext,
@@ -920,7 +920,7 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
(is_read ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write)));
/* Perform the address comparison. */
- tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
+ tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3);
*label_ptr = s->code_ptr;
/* If not equal, we jump to the slow path. */
tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
@@ -1259,13 +1259,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
- tcg_out_cmp(s, ext, a0, a1, 0);
+ tcg_out_cmp(s, ext, a0, a1);
tcg_out_goto_label_cond(s, a2, args[3]);
break;
- tcg_out_cmp(s, ext, a1, a2, 0);
+ tcg_out_cmp(s, ext, a1, a2);
tcg_out_cset(s, 0, a0, args[3]);
break;
agreed. This is an artifact from a previous implementation of the tlb lookup.

Reviewed-by: Claudio Fontana <***@linaro.org>
Richard Henderson
2013-09-14 21:54:33 UTC
Permalink
Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 12 +++---------
1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index e9a0f9b..5f2c437 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -690,14 +690,6 @@ static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
}
}

-static inline void tcg_out_cset(TCGContext *s, TCGType ext,
- TCGReg rd, TCGCond c)
-{
- /* Using CSET alias of CSINC 0x1a800400 Xd, XZR, XZR, invert(cond) */
- unsigned int base = ext ? 0x9a9f07e0 : 0x1a9f07e0;
- tcg_out32(s, base | tcg_cond_to_aarch64[tcg_invert_cond(c)] << 12 | rd);
-}
-
static inline void tcg_out_goto(TCGContext *s, tcg_target_long target)
{
tcg_target_long offset;
@@ -1426,7 +1418,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
/* FALLTHRU */
case INDEX_op_setcond_i64:
tcg_out_cmp(s, ext, a1, a2, c2);
- tcg_out_cset(s, ext, a0, args[3]);
+ /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
+ tcg_fmt_Rdnm_cond(s, INSN_CSINC, 0, a0, TCG_REG_XZR, TCG_REG_XZR,
+ tcg_invert_cond(args[3]));
break;

case INDEX_op_movcond_i32:
--
1.8.3.1
Richard Henderson
2013-09-14 21:54:47 UTC
Permalink
Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 93 ++++++++++++++++++++++++++++++++++++------------
1 file changed, 70 insertions(+), 23 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 335a5d0..7d2fd99 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -1930,30 +1930,27 @@ static void tcg_target_init(TCGContext *s)
tcg_add_target_add_op_defs(aarch64_op_defs);
}

+/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
+#define PUSH_SIZE ((30 - 19 + 1) * 8)
+
+#define FRAME_SIZE \
+ ((PUSH_SIZE \
+ + TCG_STATIC_CALL_ARGS_SIZE \
+ + CPU_TEMP_BUF_NLONGS * sizeof(long) \
+ + TCG_TARGET_STACK_ALIGN - 1) \
+ & ~(TCG_TARGET_STACK_ALIGN - 1))
+
static void tcg_target_qemu_prologue(TCGContext *s)
{
- /* NB: frame sizes are in 16 byte stack units! */
- int frame_size_callee_saved, frame_size_tcg_locals;
TCGReg r;

- /* save pairs (FP, LR) and (X19, X20) .. (X27, X28) */
- frame_size_callee_saved = (1) + (TCG_REG_X28 - TCG_REG_X19) / 2 + 1;
-
- /* frame size requirement for TCG local variables */
- frame_size_tcg_locals = TCG_STATIC_CALL_ARGS_SIZE
- + CPU_TEMP_BUF_NLONGS * sizeof(long)
- + (TCG_TARGET_STACK_ALIGN - 1);
- frame_size_tcg_locals &= ~(TCG_TARGET_STACK_ALIGN - 1);
- frame_size_tcg_locals /= TCG_TARGET_STACK_ALIGN;
-
- /* push (FP, LR) and update sp */
- tcg_out_push_pair(s, TCG_REG_SP,
- TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved);
+ /* Push (FP, LR) and allocate space for all saved registers. */
+ tcg_out_push_pair(s, TCG_REG_SP, TCG_REG_FP, TCG_REG_LR, PUSH_SIZE / 16);

/* FP -> callee_saved */
tcg_out_movr_sp(s, 1, TCG_REG_FP, TCG_REG_SP);

- /* store callee-preserved regs x19..x28 using FP -> callee_saved */
+ /* Store callee-preserved regs x19..x28. */
for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
int idx = (r - TCG_REG_X19) / 2 + 1;
tcg_out_store_pair(s, TCG_REG_FP, r, r + 1, idx);
@@ -1961,7 +1958,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)

/* Make stack space for TCG locals. */
tcg_fmt_Rdn_aimm(s, INSN_SUBI, 1, TCG_REG_SP, TCG_REG_SP,
- frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
+ FRAME_SIZE - PUSH_SIZE);

/* inform TCG about how to find TCG locals with register, offset, size */
tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
@@ -1981,17 +1978,67 @@ static void tcg_target_qemu_prologue(TCGContext *s)

/* Remove TCG locals stack space. */
tcg_fmt_Rdn_aimm(s, INSN_ADDI, 1, TCG_REG_SP, TCG_REG_SP,
- frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
+ FRAME_SIZE - PUSH_SIZE);

- /* restore registers x19..x28.
- FP must be preserved, so it still points to callee_saved area */
+ /* Restore callee-preserved registers x19..x28. */
for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
int idx = (r - TCG_REG_X19) / 2 + 1;
tcg_out_load_pair(s, TCG_REG_FP, r, r + 1, idx);
}

- /* pop (FP, LR), restore SP to previous frame, return */
- tcg_out_pop_pair(s, TCG_REG_SP,
- TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved);
+ /* Pop (FP, LR), restore SP to previous frame, return. */
+ tcg_out_pop_pair(s, TCG_REG_SP, TCG_REG_FP, TCG_REG_LR, PUSH_SIZE / 16);
tcg_out_ret(s, TCG_REG_LR);
}
+
+typedef struct {
+ DebugFrameCIE cie;
+ DebugFrameFDEHeader fde;
+ uint8_t fde_def_cfa[4];
+ uint8_t fde_reg_ofs[24];
+} DebugFrame;
+
+/* We're expecting a 2 byte uleb128 encoded value. */
+QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
+
+#define ELF_HOST_MACHINE EM_AARCH64
+
+static DebugFrame debug_frame = {
+ .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
+ .cie.id = -1,
+ .cie.version = 1,
+ .cie.code_align = 1,
+ .cie.data_align = 0x78, /* sleb128 -8 */
+ .cie.return_column = 16,
+
+ /* Total FDE size does not include the "len" member. */
+ .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
+
+ .fde_def_cfa = {
+ 12, 7, /* DW_CFA_def_cfa %rsp, ... */
+ (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
+ (FRAME_SIZE >> 7)
+ },
+ .fde_reg_ofs = {
+ 0x80 + 30, 1, /* DW_CFA_offset, lr, -8 */
+ 0x80 + 29, 2, /* DW_CFA_offset, fp, -16 */
+ 0x80 + 28, 3, /* DW_CFA_offset, x28, -24 */
+ 0x80 + 27, 4, /* DW_CFA_offset, x27, -32 */
+ 0x80 + 26, 5, /* DW_CFA_offset, x26, -40 */
+ 0x80 + 25, 6, /* DW_CFA_offset, x25, -48 */
+ 0x80 + 24, 7, /* DW_CFA_offset, x24, -56 */
+ 0x80 + 23, 8, /* DW_CFA_offset, x23, -64 */
+ 0x80 + 22, 9, /* DW_CFA_offset, x22, -72 */
+ 0x80 + 21, 10, /* DW_CFA_offset, x21, -80 */
+ 0x80 + 20, 11, /* DW_CFA_offset, x20, -88 */
+ 0x80 + 19, 12, /* DW_CFA_offset, x1p, -96 */
+ }
+};
+
+void tcg_register_jit(void *buf, size_t buf_size)
+{
+ debug_frame.fde.func_start = (tcg_target_long) buf;
+ debug_frame.fde.func_len = buf_size;
+
+ tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
--
1.8.3.1
Richard Henderson
2013-09-14 21:54:48 UTC
Permalink
We don't need the FP within translated code, and the LR is
otherwise unused.

Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 36 +++++++++++++++---------------------
tcg/aarch64/tcg-target.h | 32 +++++++++++++++++---------------
2 files changed, 32 insertions(+), 36 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 7d2fd99..fa88e4b 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -22,10 +22,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
"%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
"%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
"%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
- "%x24", "%x25", "%x26", "%x27", "%x28",
- "%fp", /* frame pointer */
- "%lr", /* link register */
- "%sp", /* stack pointer */
+ "%x24", "%x25", "%x26", "%x27", "%x28", "%x29", "%x30", "%sp",
};
#endif /* NDEBUG */

@@ -38,18 +35,19 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
static const int tcg_target_reg_alloc_order[] = {
TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
- TCG_REG_X28, /* we will reserve this for GUEST_BASE if configured */
+ TCG_REG_X28,
+ TCG_REG_X29, /* maybe used for TCG_REG_GUEST_BASE */

- TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, TCG_REG_X12,
- TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
+ TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
+ TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
TCG_REG_X16, TCG_REG_X17,

- TCG_REG_X18, TCG_REG_X19, /* will not use these, see tcg_target_init */
-
TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,

- TCG_REG_X8, /* will not use, see tcg_target_init */
+ /* X18 reserved by system */
+ /* X19 reserved for AREG0 */
+ /* X30 reserved as temporary */
};

static const int tcg_target_call_iarg_regs[8] = {
@@ -60,13 +58,13 @@ static const int tcg_target_call_oarg_regs[1] = {
TCG_REG_X0
};

-#define TCG_REG_TMP TCG_REG_X8
+#define TCG_REG_TMP TCG_REG_X30

#ifndef CONFIG_SOFTMMU
-# if defined(CONFIG_USE_GUEST_BASE)
-# define TCG_REG_GUEST_BASE TCG_REG_X28
+# ifdef CONFIG_USE_GUEST_BASE
+# define TCG_REG_GUEST_BASE TCG_REG_X29
# else
-# define TCG_REG_GUEST_BASE TCG_REG_XZR
+# define TCG_REG_GUEST_BASE TCG_REG_XZR
# endif
#endif

@@ -1919,11 +1917,10 @@ static void tcg_target_init(TCGContext *s)
(1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
(1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
(1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
- (1 << TCG_REG_X18));
+ (1 << TCG_REG_X18) | (1 << TCG_REG_X30));

tcg_regset_clear(s->reserved_regs);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */

@@ -1947,13 +1944,10 @@ static void tcg_target_qemu_prologue(TCGContext *s)
/* Push (FP, LR) and allocate space for all saved registers. */
tcg_out_push_pair(s, TCG_REG_SP, TCG_REG_FP, TCG_REG_LR, PUSH_SIZE / 16);

- /* FP -> callee_saved */
- tcg_out_movr_sp(s, 1, TCG_REG_FP, TCG_REG_SP);
-
/* Store callee-preserved regs x19..x28. */
for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
int idx = (r - TCG_REG_X19) / 2 + 1;
- tcg_out_store_pair(s, TCG_REG_FP, r, r + 1, idx);
+ tcg_out_store_pair(s, TCG_REG_SP, r, r + 1, idx);
}

/* Make stack space for TCG locals. */
@@ -1983,7 +1977,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
/* Restore callee-preserved registers x19..x28. */
for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
int idx = (r - TCG_REG_X19) / 2 + 1;
- tcg_out_load_pair(s, TCG_REG_FP, r, r + 1, idx);
+ tcg_out_load_pair(s, TCG_REG_SP, r, r + 1, idx);
}

/* Pop (FP, LR), restore SP to previous frame, return. */
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 8b55ff9..76810f1 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -17,17 +17,23 @@
#undef TCG_TARGET_STACK_GROWSUP

typedef enum {
- TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, TCG_REG_X4,
- TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, TCG_REG_X8, TCG_REG_X9,
- TCG_REG_X10, TCG_REG_X11, TCG_REG_X12, TCG_REG_X13, TCG_REG_X14,
- TCG_REG_X15, TCG_REG_X16, TCG_REG_X17, TCG_REG_X18, TCG_REG_X19,
- TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, TCG_REG_X24,
- TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, TCG_REG_X28,
- TCG_REG_FP, /* frame pointer */
- TCG_REG_LR, /* link register */
- TCG_REG_SP, /* stack pointer or zero register */
- TCG_REG_XZR = TCG_REG_SP /* same register number */
- /* program counter is not directly accessible! */
+ TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
+ TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
+ TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
+ TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
+ TCG_REG_X16, TCG_REG_X17, TCG_REG_X18, TCG_REG_X19,
+ TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
+ TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
+ TCG_REG_X28, TCG_REG_X29, TCG_REG_X30,
+
+ /* X31 is either the stack pointer or zero, depending on context. */
+ TCG_REG_SP = 31,
+ TCG_REG_XZR = 31,
+
+ /* Aliases. */
+ TCG_REG_FP = TCG_REG_X29,
+ TCG_REG_LR = TCG_REG_X30,
+ TCG_AREG0 = TCG_REG_X19,
} TCGReg;

#define TCG_TARGET_NB_REGS 32
@@ -92,10 +98,6 @@ typedef enum {
#define TCG_TARGET_HAS_muluh_i64 1
#define TCG_TARGET_HAS_mulsh_i64 1

-enum {
- TCG_AREG0 = TCG_REG_X19,
-};
-
static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
{
__builtin___clear_cache((char *)start, (char *)stop);
--
1.8.3.1
Richard Henderson
2013-09-14 21:54:25 UTC
Permalink
Now that we've converted opcode fields to pre-shifted insns, we
can merge the implementation of arithmetic and shift insns.

Simplify the left/right shift parameter to just the left shift
needed by tcg_out_tlb_read.

Signed-off-by: Richard Henderson <***@twiddle.net>
---
tcg/aarch64/tcg-target.c | 78 +++++++++++++++++++++++-------------------------
1 file changed, 38 insertions(+), 40 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index cc56fe5..0e7b67b 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -302,6 +302,30 @@ static inline uint32_t tcg_in32(TCGContext *s)
return v;
}

+/*
+ * Encode various formats.
+ */
+
+/* This function can be used for both Arithmetic and Logical (shifted register)
+ type insns. Since we don't actually use the other available shifts, we only
+ support LSL here. */
+static inline void tcg_fmt_Rdnm_lsl(TCGContext *s, AArch64Insn insn,
+ TCGType sf, TCGReg rd, TCGReg rn,
+ TCGReg rm, int imm6)
+{
+ /* Note that LSL is bits {23,22} = 0. */
+ tcg_out32(s, insn | sf << 31 | imm6 << 10 | rm << 16 | rn << 5 | rd);
+}
+
+/* This function can be used for most insns with 2 input registers and one
+ output register. This includes Arithmetic (shifted register, sans shift),
+ Logical, Shift, Multiply, Divide, and Bit operation. */
+static inline void tcg_fmt_Rdnm(TCGContext *s, AArch64Insn insn, TCGType sf,
+ TCGReg rd, TCGReg rn, TCGReg rm)
+{
+ tcg_out32(s, insn | sf << 31 | rm << 16 | rn << 5 | rd);
+}
+
static inline void tcg_out_ldst_9(TCGContext *s,
enum aarch64_ldst_op_data op_data,
enum aarch64_ldst_op_type op_type,
@@ -445,23 +469,6 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
arg, arg1, arg2);
}

-static inline void tcg_out_arith(TCGContext *s, AArch64Insn insn,
- TCGType ext, TCGReg rd, TCGReg rn, TCGReg rm,
- int shift_imm)
-{
- /* Using shifted register arithmetic operations */
- /* if extended register operation (64bit) just OR with 0x80 << 24 */
- unsigned int shift, base = insn | (ext ? 0x80000000 : 0);
- if (shift_imm == 0) {
- shift = 0;
- } else if (shift_imm > 0) {
- shift = shift_imm << 10 | 1 << 22;
- } else /* (shift_imm < 0) */ {
- shift = (-shift_imm) << 10;
- }
- tcg_out32(s, base | rm << 16 | shift | rn << 5 | rd);
-}
-
static inline void tcg_out_mul(TCGContext *s, TCGType ext,
TCGReg rd, TCGReg rn, TCGReg rm)
{
@@ -470,15 +477,6 @@ static inline void tcg_out_mul(TCGContext *s, TCGType ext,
tcg_out32(s, base | rm << 16 | rn << 5 | rd);
}

-static inline void tcg_out_shiftrot_reg(TCGContext *s,
- AArch64Insn insn, TCGType ext,
- TCGReg rd, TCGReg rn, TCGReg rm)
-{
- /* using 2-source data processing instructions 0x1ac02000 */
- unsigned int base = insn | (ext ? 0x80000000 : 0);
- tcg_out32(s, base | rm << 16 | rn << 5 | rd);
-}
-
static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
TCGReg rn, unsigned int a, unsigned int b)
{
@@ -546,7 +544,7 @@ static inline void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg rn,
TCGReg rm)
{
/* Using CMP alias SUBS wzr, Wn, Wm */
- tcg_out_arith(s, INSN_SUBS, ext, TCG_REG_XZR, rn, rm, 0);
+ tcg_fmt_Rdnm(s, INSN_SUBS, ext, TCG_REG_XZR, rn, rm);
}

static inline void tcg_out_cset(TCGContext *s, TCGType ext,
@@ -906,8 +904,8 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
tcg_out_addi(s, 1, TCG_REG_X2, base, tlb_offset & 0xfff000);
/* Merge the tlb index contribution into X2.
X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
- tcg_out_arith(s, INSN_ADD, 1, TCG_REG_X2, TCG_REG_X2,
- TCG_REG_X0, -CPU_TLB_ENTRY_BITS);
+ tcg_fmt_Rdnm_lsl(s, INSN_ADD, 1, TCG_REG_X2, TCG_REG_X2,
+ TCG_REG_X0, CPU_TLB_ENTRY_BITS);
/* Merge "low bits" from tlb offset, load the tlb comparator into X0.
X0 = load [X2 + (tlb_offset & 0x000fff)] */
tcg_out_ldst(s, TARGET_LONG_BITS == 64 ? LDST_64 : LDST_32,
@@ -1183,27 +1181,27 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,

case INDEX_op_add_i64:
case INDEX_op_add_i32:
- tcg_out_arith(s, INSN_ADD, ext, a0, a1, a2, 0);
+ tcg_fmt_Rdnm(s, INSN_ADD, ext, a0, a1, a2);
break;

case INDEX_op_sub_i64:
case INDEX_op_sub_i32:
- tcg_out_arith(s, INSN_SUB, ext, a0, a1, a2, 0);
+ tcg_fmt_Rdnm(s, INSN_SUB, ext, a0, a1, a2);
break;

case INDEX_op_and_i64:
case INDEX_op_and_i32:
- tcg_out_arith(s, INSN_AND, ext, a0, a1, a2, 0);
+ tcg_fmt_Rdnm(s, INSN_AND, ext, a0, a1, a2);
break;

case INDEX_op_or_i64:
case INDEX_op_or_i32:
- tcg_out_arith(s, INSN_ORR, ext, a0, a1, a2, 0);
+ tcg_fmt_Rdnm(s, INSN_ORR, ext, a0, a1, a2);
break;

case INDEX_op_xor_i64:
case INDEX_op_xor_i32:
- tcg_out_arith(s, INSN_EOR, ext, a0, a1, a2, 0);
+ tcg_fmt_Rdnm(s, INSN_EOR, ext, a0, a1, a2);
break;

case INDEX_op_mul_i64:
@@ -1216,7 +1214,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
if (c2) {
tcg_out_shl(s, ext, a0, a1, a2);
} else {
- tcg_out_shiftrot_reg(s, INSN_LSLV, ext, a0, a1, a2);
+ tcg_fmt_Rdnm(s, INSN_LSLV, ext, a0, a1, a2);
}
break;

@@ -1225,7 +1223,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
if (c2) {
tcg_out_shr(s, ext, a0, a1, a2);
} else {
- tcg_out_shiftrot_reg(s, INSN_LSRV, ext, a0, a1, a2);
+ tcg_fmt_Rdnm(s, INSN_LSRV, ext, a0, a1, a2);
}
break;

@@ -1234,7 +1232,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
if (c2) {
tcg_out_sar(s, ext, a0, a1, a2);
} else {
- tcg_out_shiftrot_reg(s, INSN_ASRV, ext, a0, a1, a2);
+ tcg_fmt_Rdnm(s, INSN_ASRV, ext, a0, a1, a2);
}
break;

@@ -1243,7 +1241,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
if (c2) {
tcg_out_rotr(s, ext, a0, a1, a2);
} else {
- tcg_out_shiftrot_reg(s, INSN_RORV, ext, a0, a1, a2);
+ tcg_fmt_Rdnm(s, INSN_RORV, ext, a0, a1, a2);
}
break;

@@ -1252,8 +1250,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
if (c2) {
tcg_out_rotl(s, ext, a0, a1, a2);
} else {
- tcg_out_arith(s, INSN_SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2, 0);
- tcg_out_shiftrot_reg(s, INSN_RORV, ext, a0, a1, TCG_REG_TMP);
+ tcg_fmt_Rdnm(s, INSN_SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
+ tcg_fmt_Rdnm(s, INSN_RORV, ext, a0, a1, TCG_REG_TMP);
}
break;
--
1.8.3.1
Claudio Fontana
2013-09-16 08:41:48 UTC
Permalink
Post by Richard Henderson
Now that we've converted opcode fields to pre-shifted insns, we
can merge the implementation of arithmetic and shift insns.
Simplify the left/right shift parameter to just the left shift
needed by tcg_out_tlb_read.
---
tcg/aarch64/tcg-target.c | 78 +++++++++++++++++++++++-------------------------
1 file changed, 38 insertions(+), 40 deletions(-)
diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index cc56fe5..0e7b67b 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -302,6 +302,30 @@ static inline uint32_t tcg_in32(TCGContext *s)
return v;
}
+/*
+ * Encode various formats.
+ */
+
+/* This function can be used for both Arithmetic and Logical (shifted register)
+ type insns. Since we don't actually use the other available shifts, we only
+ support LSL here. */
+static inline void tcg_fmt_Rdnm_lsl(TCGContext *s, AArch64Insn insn,
+ TCGType sf, TCGReg rd, TCGReg rn,
+ TCGReg rm, int imm6)
+{
+ /* Note that LSL is bits {23,22} = 0. */
+ tcg_out32(s, insn | sf << 31 | imm6 << 10 | rm << 16 | rn << 5 | rd);
+}
+
+/* This function can be used for most insns with 2 input registers and one
+ output register. This includes Arithmetic (shifted register, sans shift),
+ Logical, Shift, Multiply, Divide, and Bit operation. */
+static inline void tcg_fmt_Rdnm(TCGContext *s, AArch64Insn insn, TCGType sf,
+ TCGReg rd, TCGReg rn, TCGReg rm)
+{
+ tcg_out32(s, insn | sf << 31 | rm << 16 | rn << 5 | rd);
+}
+
The name of the function should reflect the fact that we are actually emitting instructions,
not only formatting them. Also I despise mixed case in functions.
So theoretically, tcg_out_rdnm.

I'd still rather have a name of the function that expresses the meaning of what we are trying to do
(tcg_out_arith seems a good name, you can merge with shiftrot_reg if you want), rather than how we are doing it, if the model fits.

I guess the question would be, are all instructions formatted exactly that way arithmetic and logical shifted register instructions of some sort?
If so, I'd go with tcg_out_arith or similar. If not, we can say tcg_out_rdnm.

Also we lose a couple things here.
The previous implementation made it impossible to pass wrong opcodes to the function, since the opcode for the arith was a separate type.
It made it obvious to the reader in which cases the function can be used.
We would lose this with this change here (combined with the INSN change).

Also we lose the ability to do right-shifted arithmetic operations, which I feel we should provide for completeness and to reduce the pain for the programmer who will eventually need them.
Post by Richard Henderson
static inline void tcg_out_ldst_9(TCGContext *s,
enum aarch64_ldst_op_data op_data,
enum aarch64_ldst_op_type op_type,
@@ -445,23 +469,6 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
arg, arg1, arg2);
}
-static inline void tcg_out_arith(TCGContext *s, AArch64Insn insn,
- TCGType ext, TCGReg rd, TCGReg rn, TCGReg rm,
- int shift_imm)
-{
- /* Using shifted register arithmetic operations */
- /* if extended register operation (64bit) just OR with 0x80 << 24 */
- unsigned int shift, base = insn | (ext ? 0x80000000 : 0);
- if (shift_imm == 0) {
- shift = 0;
- } else if (shift_imm > 0) {
- shift = shift_imm << 10 | 1 << 22;
- } else /* (shift_imm < 0) */ {
- shift = (-shift_imm) << 10;
- }
- tcg_out32(s, base | rm << 16 | shift | rn << 5 | rd);
-}
-
static inline void tcg_out_mul(TCGContext *s, TCGType ext,
TCGReg rd, TCGReg rn, TCGReg rm)
{
@@ -470,15 +477,6 @@ static inline void tcg_out_mul(TCGContext *s, TCGType ext,
tcg_out32(s, base | rm << 16 | rn << 5 | rd);
}
-static inline void tcg_out_shiftrot_reg(TCGContext *s,
- AArch64Insn insn, TCGType ext,
- TCGReg rd, TCGReg rn, TCGReg rm)
-{
- /* using 2-source data processing instructions 0x1ac02000 */
- unsigned int base = insn | (ext ? 0x80000000 : 0);
- tcg_out32(s, base | rm << 16 | rn << 5 | rd);
-}
-
static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
TCGReg rn, unsigned int a, unsigned int b)
{
@@ -546,7 +544,7 @@ static inline void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg rn,
TCGReg rm)
{
/* Using CMP alias SUBS wzr, Wn, Wm */
- tcg_out_arith(s, INSN_SUBS, ext, TCG_REG_XZR, rn, rm, 0);
+ tcg_fmt_Rdnm(s, INSN_SUBS, ext, TCG_REG_XZR, rn, rm);
}
static inline void tcg_out_cset(TCGContext *s, TCGType ext,
@@ -906,8 +904,8 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
tcg_out_addi(s, 1, TCG_REG_X2, base, tlb_offset & 0xfff000);
/* Merge the tlb index contribution into X2.
X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
- tcg_out_arith(s, INSN_ADD, 1, TCG_REG_X2, TCG_REG_X2,
- TCG_REG_X0, -CPU_TLB_ENTRY_BITS);
+ tcg_fmt_Rdnm_lsl(s, INSN_ADD, 1, TCG_REG_X2, TCG_REG_X2,
+ TCG_REG_X0, CPU_TLB_ENTRY_BITS);
/* Merge "low bits" from tlb offset, load the tlb comparator into X0.
X0 = load [X2 + (tlb_offset & 0x000fff)] */
tcg_out_ldst(s, TARGET_LONG_BITS == 64 ? LDST_64 : LDST_32,
@@ -1183,27 +1181,27 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
- tcg_out_arith(s, INSN_ADD, ext, a0, a1, a2, 0);
+ tcg_fmt_Rdnm(s, INSN_ADD, ext, a0, a1, a2);
break;
- tcg_out_arith(s, INSN_SUB, ext, a0, a1, a2, 0);
+ tcg_fmt_Rdnm(s, INSN_SUB, ext, a0, a1, a2);
break;
- tcg_out_arith(s, INSN_AND, ext, a0, a1, a2, 0);
+ tcg_fmt_Rdnm(s, INSN_AND, ext, a0, a1, a2);
break;
- tcg_out_arith(s, INSN_ORR, ext, a0, a1, a2, 0);
+ tcg_fmt_Rdnm(s, INSN_ORR, ext, a0, a1, a2);
break;
- tcg_out_arith(s, INSN_EOR, ext, a0, a1, a2, 0);
+ tcg_fmt_Rdnm(s, INSN_EOR, ext, a0, a1, a2);
break;
@@ -1216,7 +1214,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
if (c2) {
tcg_out_shl(s, ext, a0, a1, a2);
} else {
- tcg_out_shiftrot_reg(s, INSN_LSLV, ext, a0, a1, a2);
+ tcg_fmt_Rdnm(s, INSN_LSLV, ext, a0, a1, a2);
}
break;
@@ -1225,7 +1223,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
if (c2) {
tcg_out_shr(s, ext, a0, a1, a2);
} else {
- tcg_out_shiftrot_reg(s, INSN_LSRV, ext, a0, a1, a2);
+ tcg_fmt_Rdnm(s, INSN_LSRV, ext, a0, a1, a2);
}
break;
@@ -1234,7 +1232,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
if (c2) {
tcg_out_sar(s, ext, a0, a1, a2);
} else {
- tcg_out_shiftrot_reg(s, INSN_ASRV, ext, a0, a1, a2);
+ tcg_fmt_Rdnm(s, INSN_ASRV, ext, a0, a1, a2);
}
break;
@@ -1243,7 +1241,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
if (c2) {
tcg_out_rotr(s, ext, a0, a1, a2);
} else {
- tcg_out_shiftrot_reg(s, INSN_RORV, ext, a0, a1, a2);
+ tcg_fmt_Rdnm(s, INSN_RORV, ext, a0, a1, a2);
}
break;
@@ -1252,8 +1250,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
if (c2) {
tcg_out_rotl(s, ext, a0, a1, a2);
} else {
- tcg_out_arith(s, INSN_SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2, 0);
- tcg_out_shiftrot_reg(s, INSN_RORV, ext, a0, a1, TCG_REG_TMP);
+ tcg_fmt_Rdnm(s, INSN_SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
+ tcg_fmt_Rdnm(s, INSN_RORV, ext, a0, a1, TCG_REG_TMP);
}
break;
Richard Henderson
2013-09-16 15:32:09 UTC
Permalink
Post by Claudio Fontana
Post by Richard Henderson
Now that we've converted opcode fields to pre-shifted insns, we
can merge the implementation of arithmetic and shift insns.
Simplify the left/right shift parameter to just the left shift
needed by tcg_out_tlb_read.
---
tcg/aarch64/tcg-target.c | 78 +++++++++++++++++++++++-------------------------
1 file changed, 38 insertions(+), 40 deletions(-)
diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index cc56fe5..0e7b67b 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -302,6 +302,30 @@ static inline uint32_t tcg_in32(TCGContext *s)
return v;
}
+/*
+ * Encode various formats.
+ */
+
+/* This function can be used for both Arithmetic and Logical (shifted register)
+ type insns. Since we don't actually use the other available shifts, we only
+ support LSL here. */
+static inline void tcg_fmt_Rdnm_lsl(TCGContext *s, AArch64Insn insn,
+ TCGType sf, TCGReg rd, TCGReg rn,
+ TCGReg rm, int imm6)
+{
+ /* Note that LSL is bits {23,22} = 0. */
+ tcg_out32(s, insn | sf << 31 | imm6 << 10 | rm << 16 | rn << 5 | rd);
+}
+
+/* This function can be used for most insns with 2 input registers and one
+ output register. This includes Arithmetic (shifted register, sans shift),
+ Logical, Shift, Multiply, Divide, and Bit operation. */
+static inline void tcg_fmt_Rdnm(TCGContext *s, AArch64Insn insn, TCGType sf,
+ TCGReg rd, TCGReg rn, TCGReg rm)
+{
+ tcg_out32(s, insn | sf << 31 | rm << 16 | rn << 5 | rd);
+}
+
The name of the function should reflect the fact that we are actually emitting instructions,
not only formatting them. Also I despise mixed case in functions.
So theoretically, tcg_out_rdnm.
Ok.
Post by Claudio Fontana
I guess the question would be, are all instructions formatted exactly that
way arithmetic and logical shifted register instructions of some sort?
The functino comment lists the insn groups from the manual to which the
function may be applied: Arithemetic, Logical, Shift, Multiply, Divide, Bit
operation.
Post by Claudio Fontana
If so, I'd go with tcg_out_arith or similar. If not, we can say tcg_out_rdnm.
rdmn it is then.
Post by Claudio Fontana
The previous implementation made it impossible to pass wrong opcodes to the
function, since the opcode for the arith was a separate type.
No, this isn't C++. Enumeration checks like that don't happen for C.
Post by Claudio Fontana
It made it obvious to the reader in which cases the function can be used.
We would lose this with this change here (combined with the INSN change).
Perhaps, perhaps not.

It would have been handy if ARM had officially assigned identifiers to the
formats, like Power, S390, and ia64 do. Then one can build in the format ids
into both the function and enumeration names and use the preprocessor for
typechecking (c.f. the tcg_out_insn macro in tcg/s390/tcg-target.c).

But without those format ids being official, inventing a set of format names
may be more confusing than not. I'm not sure.
Post by Claudio Fontana
Also we lose the ability to do right-shifted arithmetic operations, which I
feel we should provide for completeness and to reduce the pain for the
programmer who will eventually need them.
Nor do we provide ASR or ROR shifts; should we provide those too? Please think
about what situations in which those would be useful. Also think about the one
operation at a time nature of TCG.

My guess is that, beyond the one explicit use in the tlb, we could only make
use of shifted operations if TCG grew some sort of peephole optimizer so that
we can look across single operations. And I don't ever see that happening.

Therefore I think adding LSR, ASR and ROR shifts is both a waste of time and
bloats the backend.


r~
Richard Henderson
2013-09-16 19:11:46 UTC
Permalink
Post by Richard Henderson
Nor do we provide ASR or ROR shifts; should we provide those too? Please think
about what situations in which those would be useful. Also think about the one
operation at a time nature of TCG.
My guess is that, beyond the one explicit use in the tlb, we could only make
use of shifted operations if TCG grew some sort of peephole optimizer so that
we can look across single operations. And I don't ever see that happening.
Therefore I think adding LSR, ASR and ROR shifts is both a waste of time and
bloats the backend.
Unless: one decides NOT to use the alias definitions in the manual:

LSR d, n, shift => UBFM d, n, shift, 63
LSL d, n, shift => UBFM d, n, 64 - shift, 63 - shift
ASR d, n, shift => SBFN d, n, shift, 63
ROR d, n, shift => EXTR d, n, n, shift

and instead use

LSR d, n, shift => ORR d, xzr, n, lsr #shift
LSL d, n, shift => ORR d, xzr, n, lsl #shift
ASR d, n, shift => ORR d, xzr, n, asr #shift
ROR d, n, shift => ORR d, xzr, n, ror #shift

It's not implausible that using the same (base) insn for all of these could
result in more code sharing and therefore smaller code size within the tcg backend.


r~


PS: All rather academic, but considering that AArch64 ORR (shifted register) is
the preferred implementation for MOV, and AArch32 MOV (shifted register) is the
preferred implementation for shift immediates, I'm actually surprised that:
there is no AArch64 MOV (shifted register) alias for AArch32 compatibility, and
that ORR isn't the preferred implementation of shift immediates for AArch64.
Claudio Fontana
2013-09-17 08:23:03 UTC
Permalink
Post by Richard Henderson
Post by Claudio Fontana
Post by Richard Henderson
Now that we've converted opcode fields to pre-shifted insns, we
can merge the implementation of arithmetic and shift insns.
Simplify the left/right shift parameter to just the left shift
needed by tcg_out_tlb_read.
---
tcg/aarch64/tcg-target.c | 78 +++++++++++++++++++++++-------------------------
1 file changed, 38 insertions(+), 40 deletions(-)
diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index cc56fe5..0e7b67b 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -302,6 +302,30 @@ static inline uint32_t tcg_in32(TCGContext *s)
return v;
}
+/*
+ * Encode various formats.
+ */
+
+/* This function can be used for both Arithmetic and Logical (shifted register)
+ type insns. Since we don't actually use the other available shifts, we only
+ support LSL here. */
+static inline void tcg_fmt_Rdnm_lsl(TCGContext *s, AArch64Insn insn,
+ TCGType sf, TCGReg rd, TCGReg rn,
+ TCGReg rm, int imm6)
+{
+ /* Note that LSL is bits {23,22} = 0. */
+ tcg_out32(s, insn | sf << 31 | imm6 << 10 | rm << 16 | rn << 5 | rd);
+}
+
+/* This function can be used for most insns with 2 input registers and one
+ output register. This includes Arithmetic (shifted register, sans shift),
+ Logical, Shift, Multiply, Divide, and Bit operation. */
+static inline void tcg_fmt_Rdnm(TCGContext *s, AArch64Insn insn, TCGType sf,
+ TCGReg rd, TCGReg rn, TCGReg rm)
+{
+ tcg_out32(s, insn | sf << 31 | rm << 16 | rn << 5 | rd);
+}
+
The name of the function should reflect the fact that we are actually emitting instructions,
not only formatting them. Also I despise mixed case in functions.
So theoretically, tcg_out_rdnm.
Ok.
Post by Claudio Fontana
I guess the question would be, are all instructions formatted exactly that
way arithmetic and logical shifted register instructions of some sort?
The functino comment lists the insn groups from the manual to which the
function may be applied: Arithemetic, Logical, Shift, Multiply, Divide, Bit
operation.
Post by Claudio Fontana
If so, I'd go with tcg_out_arith or similar. If not, we can say tcg_out_rdnm.
rdmn it is then.
Ok.
Post by Richard Henderson
Post by Claudio Fontana
The previous implementation made it impossible to pass wrong opcodes to the
function, since the opcode for the arith was a separate type.
No, this isn't C++. Enumeration checks like that don't happen for C.
I know, I did not express it well. What I meant is that it is impossible to misunderstand what is supposed to be passed to the function.
Not that it is impossible to willingly do so.
Post by Richard Henderson
Post by Claudio Fontana
It made it obvious to the reader in which cases the function can be used.
We would lose this with this change here (combined with the INSN change).
Perhaps, perhaps not.
It would have been handy if ARM had officially assigned identifiers to the
formats, like Power, S390, and ia64 do. Then one can build in the format ids
into both the function and enumeration names and use the preprocessor for
typechecking (c.f. the tcg_out_insn macro in tcg/s390/tcg-target.c).
No need to do force explicit typechecking like that.
That kind of use of the preprocessor really hurts.
The only thing that needs to be addressed is to ensure that the programmer calling the function can quickly know for sure which instructions are ok to pass and which not.

Maybe we can categorize the instructions by order of appearance in the enum, coupled with an appropriate prefix.
For example INSN_RDNM_ADD, INSN_RDNM_SUB, ...
Post by Richard Henderson
But without those format ids being official, inventing a set of format names
may be more confusing than not. I'm not sure.
Post by Claudio Fontana
Also we lose the ability to do right-shifted arithmetic operations, which I
feel we should provide for completeness and to reduce the pain for the
programmer who will eventually need them.
Nor do we provide ASR or ROR shifts; should we provide those too?
No, not yet.
Post by Richard Henderson
Please think
about what situations in which those would be useful. Also think about the one
operation at a time nature of TCG.
My guess is that, beyond the one explicit use in the tlb, we could only make
use of shifted operations if TCG grew some sort of peephole optimizer so that
we can look across single operations. And I don't ever see that happening.
Therefore I think adding LSR, ASR and ROR shifts is both a waste of time and
bloats the backend.
I agree, lets just keep only left shift and right shift.
Distinguishing the two costs one comparison per call, I think we can survive it.

C.
Richard Henderson
2013-09-17 14:54:00 UTC
Permalink
Post by Claudio Fontana
Post by Richard Henderson
It would have been handy if ARM had officially assigned identifiers to the
formats, like Power, S390, and ia64 do. Then one can build in the format ids
into both the function and enumeration names and use the preprocessor for
typechecking (c.f. the tcg_out_insn macro in tcg/s390/tcg-target.c).
No need to do force explicit typechecking like that.
That kind of use of the preprocessor really hurts.
Why do you believe this? Have you browsed through the s390 backend?
I think it's a remarkably clean solution -- one we ought to have used
in the ia64 backend, which has even more format codes.
Post by Claudio Fontana
Post by Richard Henderson
Therefore I think adding LSR, ASR and ROR shifts is both a waste of time and
bloats the backend.
I agree, lets just keep only left shift and right shift.
Distinguishing the two costs one comparison per call, I think we can survive it.
I don't understand you at all.

You agree that removing the unused shift from cmp is sensible.
You agree that not adding unused asr/ror shifts is sensible.
But you insist that the unused lsr shift should be retained?

You complain about wasting Y space in my positioning of comments.
But you insist on wasting X space, and allowing the possibility of
mismatch, by requiring format names to be duplicated?


r~
Claudio Fontana
2013-09-18 08:24:21 UTC
Permalink
Post by Richard Henderson
Post by Claudio Fontana
Post by Richard Henderson
It would have been handy if ARM had officially assigned identifiers to the
formats, like Power, S390, and ia64 do. Then one can build in the format ids
into both the function and enumeration names and use the preprocessor for
typechecking (c.f. the tcg_out_insn macro in tcg/s390/tcg-target.c).
No need to do force explicit typechecking like that.
That kind of use of the preprocessor really hurts.
Why do you believe this? Have you browsed through the s390 backend?
I think it's a remarkably clean solution -- one we ought to have used
in the ia64 backend, which has even more format codes.
Why do I believe this? Because my experience tells me to let this kind of stuff go,
in order to allow developers that are not familiar with the code base yet to trace their way through the calls.
It keeps the code base discoverable, by hand and by tools.
Post by Richard Henderson
Post by Claudio Fontana
Post by Richard Henderson
Therefore I think adding LSR, ASR and ROR shifts is both a waste of time and
bloats the backend.
I agree, lets just keep only left shift and right shift.
Distinguishing the two costs one comparison per call, I think we can survive it.
I don't understand you at all.
You agree that removing the unused shift from cmp is sensible.
Yes. The additional parameter to cmp (0) can be confusing, therefore it seems more sensible to remove it until there is actual demonstrable use for the shifted version of cmp.
Post by Richard Henderson
You agree that not adding unused asr/ror shifts is sensible.
Yep.
Post by Richard Henderson
But you insist that the unused lsr shift should be retained?
Yes. It's a balance thing, it's not black and white.
In this case the drawback of keeping the right shift is negligible.
No additional parameter is needed, the existing code just looks at the sign of the immediate to decide the direction to shift.
It's one comparison only, and I find that the tradeoff is acceptable.

If you _really_ want to strip the right shift functionality away for some reason, could you state it?
Is it for that one comparision? Or for consistency with something else?
Post by Richard Henderson
You complain about wasting Y space in my positioning of comments.
But you insist on wasting X space,
They are different resources. Y space gives us context and is a more scarse and precious resource than X space.
Of course it's a tradeoff.
We don't want the whole code on one line, we have 80 char limits and we do one thing per line.
Post by Richard Henderson
and allowing the possibility of
mismatch, by requiring format names to be duplicated?
I don't understand this one.
C.
Richard Henderson
2013-09-18 14:54:32 UTC
Permalink
Post by Claudio Fontana
If you _really_ want to strip the right shift functionality away for some reason, could you state it?
(1) It will never never ever be used.

(2) I do not find + for LSR, - for LSL intuitive at all.
If we're to have them at all, better to pass that
separately from the shift count, an enumeration that
exactly maps to the "shift" field in the insn.

(3) It will never never ever be used.


r~
Claudio Fontana
2013-09-18 15:01:23 UTC
Permalink
Post by Richard Henderson
Post by Claudio Fontana
If you _really_ want to strip the right shift functionality away for some reason, could you state it?
(1) It will never never ever be used.
(2) I do not find + for LSR, - for LSL intuitive at all.
If we're to have them at all, better to pass that
separately from the shift count, an enumeration that
exactly maps to the "shift" field in the insn.
(3) It will never never ever be used.
You seem to feel strongly about it, and I don't, so lets indeed drop it then.

C.

Loading...