From: Fiona Ebner <f.ebner@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [pve-devel] [PATCH qemu] cherry-pick TCG-related stable fixes for 7.2
Date: Fri, 17 Mar 2023 13:47:11 +0100 [thread overview]
Message-ID: <20230317124711.812727-1-f.ebner@proxmox.com> (raw)
When turning off the "KVM hardware virtualization" checkbox in Proxmox
VE, the TCG accelerator is used, so these fixes are relevant then.
The first patch is included to allow cherry-picking the others without
changes.
Reported-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
...Introduce-and-use-reg_t-consistently.patch | 286 ++++++++++++++++++
...25-target-i386-Fix-BEXTR-instruction.patch | 97 ++++++
...i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch | 47 +++
...arget-i386-fix-ADOX-followed-by-ADCX.patch | 192 ++++++++++++
...028-target-i386-Fix-BZHI-instruction.patch | 64 ++++
debian/patches/series | 5 +
6 files changed, 691 insertions(+)
create mode 100644 debian/patches/extra/0024-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch
create mode 100644 debian/patches/extra/0025-target-i386-Fix-BEXTR-instruction.patch
create mode 100644 debian/patches/extra/0026-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch
create mode 100644 debian/patches/extra/0027-target-i386-fix-ADOX-followed-by-ADCX.patch
create mode 100644 debian/patches/extra/0028-target-i386-Fix-BZHI-instruction.patch
diff --git a/debian/patches/extra/0024-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch b/debian/patches/extra/0024-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch
new file mode 100644
index 0000000..a4bcb71
--- /dev/null
+++ b/debian/patches/extra/0024-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch
@@ -0,0 +1,286 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Richard Henderson <richard.henderson@linaro.org>
+Date: Sat, 14 Jan 2023 13:05:41 -1000
+Subject: [PATCH] tests/tcg/i386: Introduce and use reg_t consistently
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Define reg_t based on the actual register width.
+Define the inlines using that type. This will allow
+input registers to 32-bit insns to be set to 64-bit
+values on x86-64, which allows testing various edge cases.
+
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+Message-Id: <20230114230542.3116013-2-richard.henderson@linaro.org>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry-picked from commit 5d62d6649cd367b5b4a3676e7514d2f9ca86cb03)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ tests/tcg/i386/test-i386-bmi2.c | 182 ++++++++++++++++----------------
+ 1 file changed, 93 insertions(+), 89 deletions(-)
+
+diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c
+index 5fadf47510..3c3ef85513 100644
+--- a/tests/tcg/i386/test-i386-bmi2.c
++++ b/tests/tcg/i386/test-i386-bmi2.c
+@@ -3,34 +3,40 @@
+ #include <stdint.h>
+ #include <stdio.h>
+
++#ifdef __x86_64
++typedef uint64_t reg_t;
++#else
++typedef uint32_t reg_t;
++#endif
++
+ #define insn1q(name, arg0) \
+-static inline uint64_t name##q(uint64_t arg0) \
++static inline reg_t name##q(reg_t arg0) \
+ { \
+- uint64_t result64; \
++ reg_t result64; \
+ asm volatile (#name "q %1, %0" : "=r"(result64) : "rm"(arg0)); \
+ return result64; \
+ }
+
+ #define insn1l(name, arg0) \
+-static inline uint32_t name##l(uint32_t arg0) \
++static inline reg_t name##l(reg_t arg0) \
+ { \
+- uint32_t result32; \
++ reg_t result32; \
+ asm volatile (#name "l %k1, %k0" : "=r"(result32) : "rm"(arg0)); \
+ return result32; \
+ }
+
+ #define insn2q(name, arg0, c0, arg1, c1) \
+-static inline uint64_t name##q(uint64_t arg0, uint64_t arg1) \
++static inline reg_t name##q(reg_t arg0, reg_t arg1) \
+ { \
+- uint64_t result64; \
++ reg_t result64; \
+ asm volatile (#name "q %2, %1, %0" : "=r"(result64) : c0(arg0), c1(arg1)); \
+ return result64; \
+ }
+
+ #define insn2l(name, arg0, c0, arg1, c1) \
+-static inline uint32_t name##l(uint32_t arg0, uint32_t arg1) \
++static inline reg_t name##l(reg_t arg0, reg_t arg1) \
+ { \
+- uint32_t result32; \
++ reg_t result32; \
+ asm volatile (#name "l %k2, %k1, %k0" : "=r"(result32) : c0(arg0), c1(arg1)); \
+ return result32; \
+ }
+@@ -65,130 +71,128 @@ insn1l(blsr, src)
+ int main(int argc, char *argv[]) {
+ uint64_t ehlo = 0x202020204f4c4845ull;
+ uint64_t mask = 0xa080800302020001ull;
+- uint32_t result32;
++ reg_t result;
+
+ #ifdef __x86_64
+- uint64_t result64;
+-
+ /* 64 bits */
+- result64 = andnq(mask, ehlo);
+- assert(result64 == 0x002020204d4c4844);
++ result = andnq(mask, ehlo);
++ assert(result == 0x002020204d4c4844);
+
+- result64 = pextq(ehlo, mask);
+- assert(result64 == 133);
++ result = pextq(ehlo, mask);
++ assert(result == 133);
+
+- result64 = pdepq(result64, mask);
+- assert(result64 == (ehlo & mask));
++ result = pdepq(result, mask);
++ assert(result == (ehlo & mask));
+
+- result64 = pextq(-1ull, mask);
+- assert(result64 == 511); /* mask has 9 bits set */
++ result = pextq(-1ull, mask);
++ assert(result == 511); /* mask has 9 bits set */
+
+- result64 = pdepq(-1ull, mask);
+- assert(result64 == mask);
++ result = pdepq(-1ull, mask);
++ assert(result == mask);
+
+- result64 = bextrq(mask, 0x3f00);
+- assert(result64 == (mask & ~INT64_MIN));
++ result = bextrq(mask, 0x3f00);
++ assert(result == (mask & ~INT64_MIN));
+
+- result64 = bextrq(mask, 0x1038);
+- assert(result64 == 0xa0);
++ result = bextrq(mask, 0x1038);
++ assert(result == 0xa0);
+
+- result64 = bextrq(mask, 0x10f8);
+- assert(result64 == 0);
++ result = bextrq(mask, 0x10f8);
++ assert(result == 0);
+
+- result64 = blsiq(0x30);
+- assert(result64 == 0x10);
++ result = blsiq(0x30);
++ assert(result == 0x10);
+
+- result64 = blsiq(0x30ull << 32);
+- assert(result64 == 0x10ull << 32);
++ result = blsiq(0x30ull << 32);
++ assert(result == 0x10ull << 32);
+
+- result64 = blsmskq(0x30);
+- assert(result64 == 0x1f);
++ result = blsmskq(0x30);
++ assert(result == 0x1f);
+
+- result64 = blsrq(0x30);
+- assert(result64 == 0x20);
++ result = blsrq(0x30);
++ assert(result == 0x20);
+
+- result64 = blsrq(0x30ull << 32);
+- assert(result64 == 0x20ull << 32);
++ result = blsrq(0x30ull << 32);
++ assert(result == 0x20ull << 32);
+
+- result64 = bzhiq(mask, 0x3f);
+- assert(result64 == (mask & ~INT64_MIN));
++ result = bzhiq(mask, 0x3f);
++ assert(result == (mask & ~INT64_MIN));
+
+- result64 = bzhiq(mask, 0x1f);
+- assert(result64 == (mask & ~(-1 << 30)));
++ result = bzhiq(mask, 0x1f);
++ assert(result == (mask & ~(-1 << 30)));
+
+- result64 = rorxq(0x2132435465768798, 8);
+- assert(result64 == 0x9821324354657687);
++ result = rorxq(0x2132435465768798, 8);
++ assert(result == 0x9821324354657687);
+
+- result64 = sarxq(0xffeeddccbbaa9988, 8);
+- assert(result64 == 0xffffeeddccbbaa99);
++ result = sarxq(0xffeeddccbbaa9988, 8);
++ assert(result == 0xffffeeddccbbaa99);
+
+- result64 = sarxq(0x77eeddccbbaa9988, 8 | 64);
+- assert(result64 == 0x0077eeddccbbaa99);
++ result = sarxq(0x77eeddccbbaa9988, 8 | 64);
++ assert(result == 0x0077eeddccbbaa99);
+
+- result64 = shrxq(0xffeeddccbbaa9988, 8);
+- assert(result64 == 0x00ffeeddccbbaa99);
++ result = shrxq(0xffeeddccbbaa9988, 8);
++ assert(result == 0x00ffeeddccbbaa99);
+
+- result64 = shrxq(0x77eeddccbbaa9988, 8 | 192);
+- assert(result64 == 0x0077eeddccbbaa99);
++ result = shrxq(0x77eeddccbbaa9988, 8 | 192);
++ assert(result == 0x0077eeddccbbaa99);
+
+- result64 = shlxq(0xffeeddccbbaa9988, 8);
+- assert(result64 == 0xeeddccbbaa998800);
++ result = shlxq(0xffeeddccbbaa9988, 8);
++ assert(result == 0xeeddccbbaa998800);
+ #endif
+
+ /* 32 bits */
+- result32 = andnl(mask, ehlo);
+- assert(result32 == 0x04d4c4844);
++ result = andnl(mask, ehlo);
++ assert(result == 0x04d4c4844);
+
+- result32 = pextl((uint32_t) ehlo, mask);
+- assert(result32 == 5);
++ result = pextl((uint32_t) ehlo, mask);
++ assert(result == 5);
+
+- result32 = pdepl(result32, mask);
+- assert(result32 == (uint32_t)(ehlo & mask));
++ result = pdepl(result, mask);
++ assert(result == (uint32_t)(ehlo & mask));
+
+- result32 = pextl(-1u, mask);
+- assert(result32 == 7); /* mask has 3 bits set */
++ result = pextl(-1u, mask);
++ assert(result == 7); /* mask has 3 bits set */
+
+- result32 = pdepl(-1u, mask);
+- assert(result32 == (uint32_t)mask);
++ result = pdepl(-1u, mask);
++ assert(result == (uint32_t)mask);
+
+- result32 = bextrl(mask, 0x1f00);
+- assert(result32 == (mask & ~INT32_MIN));
++ result = bextrl(mask, 0x1f00);
++ assert(result == (mask & ~INT32_MIN));
+
+- result32 = bextrl(ehlo, 0x1018);
+- assert(result32 == 0x4f);
++ result = bextrl(ehlo, 0x1018);
++ assert(result == 0x4f);
+
+- result32 = bextrl(mask, 0x1038);
+- assert(result32 == 0);
++ result = bextrl(mask, 0x1038);
++ assert(result == 0);
+
+- result32 = blsil(0xffff);
+- assert(result32 == 1);
++ result = blsil(0xffff);
++ assert(result == 1);
+
+- result32 = blsmskl(0x300);
+- assert(result32 == 0x1ff);
++ result = blsmskl(0x300);
++ assert(result == 0x1ff);
+
+- result32 = blsrl(0xffc);
+- assert(result32 == 0xff8);
++ result = blsrl(0xffc);
++ assert(result == 0xff8);
+
+- result32 = bzhil(mask, 0xf);
+- assert(result32 == 1);
++ result = bzhil(mask, 0xf);
++ assert(result == 1);
+
+- result32 = rorxl(0x65768798, 8);
+- assert(result32 == 0x98657687);
++ result = rorxl(0x65768798, 8);
++ assert(result == 0x98657687);
+
+- result32 = sarxl(0xffeeddcc, 8);
+- assert(result32 == 0xffffeedd);
++ result = sarxl(0xffeeddcc, 8);
++ assert(result == 0xffffeedd);
+
+- result32 = sarxl(0x77eeddcc, 8 | 32);
+- assert(result32 == 0x0077eedd);
++ result = sarxl(0x77eeddcc, 8 | 32);
++ assert(result == 0x0077eedd);
+
+- result32 = shrxl(0xffeeddcc, 8);
+- assert(result32 == 0x00ffeedd);
++ result = shrxl(0xffeeddcc, 8);
++ assert(result == 0x00ffeedd);
+
+- result32 = shrxl(0x77eeddcc, 8 | 128);
+- assert(result32 == 0x0077eedd);
++ result = shrxl(0x77eeddcc, 8 | 128);
++ assert(result == 0x0077eedd);
+
+- result32 = shlxl(0xffeeddcc, 8);
+- assert(result32 == 0xeeddcc00);
++ result = shlxl(0xffeeddcc, 8);
++ assert(result == 0xeeddcc00);
+
+ return 0;
+ }
diff --git a/debian/patches/extra/0025-target-i386-Fix-BEXTR-instruction.patch b/debian/patches/extra/0025-target-i386-Fix-BEXTR-instruction.patch
new file mode 100644
index 0000000..38282b2
--- /dev/null
+++ b/debian/patches/extra/0025-target-i386-Fix-BEXTR-instruction.patch
@@ -0,0 +1,97 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Richard Henderson <richard.henderson@linaro.org>
+Date: Sat, 14 Jan 2023 13:05:42 -1000
+Subject: [PATCH] target/i386: Fix BEXTR instruction
+
+There were two problems here: not limiting the input to operand bits,
+and not correctly handling large extraction length.
+
+Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1372
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Message-Id: <20230114230542.3116013-3-richard.henderson@linaro.org>
+Cc: qemu-stable@nongnu.org
+Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18)
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry-picked from commit b14c0098975264ed03144f145bca0179a6763a07)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ target/i386/tcg/emit.c.inc | 22 +++++++++++-----------
+ tests/tcg/i386/test-i386-bmi2.c | 12 ++++++++++++
+ 2 files changed, 23 insertions(+), 11 deletions(-)
+
+diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
+index 7037ff91c6..99f6ba6e19 100644
+--- a/target/i386/tcg/emit.c.inc
++++ b/target/i386/tcg/emit.c.inc
+@@ -1078,30 +1078,30 @@ static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+ static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+ {
+ MemOp ot = decode->op[0].ot;
+- TCGv bound, zero;
++ TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
++ TCGv zero = tcg_constant_tl(0);
++ TCGv mone = tcg_constant_tl(-1);
+
+ /*
+ * Extract START, and shift the operand.
+ * Shifts larger than operand size get zeros.
+ */
+ tcg_gen_ext8u_tl(s->A0, s->T1);
++ if (TARGET_LONG_BITS == 64 && ot == MO_32) {
++ tcg_gen_ext32u_tl(s->T0, s->T0);
++ }
+ tcg_gen_shr_tl(s->T0, s->T0, s->A0);
+
+- bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
+- zero = tcg_constant_tl(0);
+ tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, s->T0, zero);
+
+ /*
+- * Extract the LEN into a mask. Lengths larger than
+- * operand size get all ones.
++ * Extract the LEN into an inverse mask. Lengths larger than
++ * operand size get all zeros, length 0 gets all ones.
+ */
+ tcg_gen_extract_tl(s->A0, s->T1, 8, 8);
+- tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound, s->A0, bound);
+-
+- tcg_gen_movi_tl(s->T1, 1);
+- tcg_gen_shl_tl(s->T1, s->T1, s->A0);
+- tcg_gen_subi_tl(s->T1, s->T1, 1);
+- tcg_gen_and_tl(s->T0, s->T0, s->T1);
++ tcg_gen_shl_tl(s->T1, mone, s->A0);
++ tcg_gen_movcond_tl(TCG_COND_LEU, s->T1, s->A0, bound, s->T1, zero);
++ tcg_gen_andc_tl(s->T0, s->T0, s->T1);
+
+ gen_op_update1_cc(s);
+ set_cc_op(s, CC_OP_LOGICB + ot);
+diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c
+index 3c3ef85513..982d4abda4 100644
+--- a/tests/tcg/i386/test-i386-bmi2.c
++++ b/tests/tcg/i386/test-i386-bmi2.c
+@@ -99,6 +99,9 @@ int main(int argc, char *argv[]) {
+ result = bextrq(mask, 0x10f8);
+ assert(result == 0);
+
++ result = bextrq(0xfedcba9876543210ull, 0x7f00);
++ assert(result == 0xfedcba9876543210ull);
++
+ result = blsiq(0x30);
+ assert(result == 0x10);
+
+@@ -164,6 +167,15 @@ int main(int argc, char *argv[]) {
+ result = bextrl(mask, 0x1038);
+ assert(result == 0);
+
++ result = bextrl((reg_t)0x8f635a775ad3b9b4ull, 0x3018);
++ assert(result == 0x5a);
++
++ result = bextrl((reg_t)0xfedcba9876543210ull, 0x7f00);
++ assert(result == 0x76543210u);
++
++ result = bextrl(-1, 0);
++ assert(result == 0);
++
+ result = blsil(0xffff);
+ assert(result == 1);
+
diff --git a/debian/patches/extra/0026-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch b/debian/patches/extra/0026-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch
new file mode 100644
index 0000000..c743d55
--- /dev/null
+++ b/debian/patches/extra/0026-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch
@@ -0,0 +1,47 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Richard Henderson <richard.henderson@linaro.org>
+Date: Sat, 14 Jan 2023 08:06:01 -1000
+Subject: [PATCH] target/i386: Fix C flag for BLSI, BLSMSK, BLSR
+
+We forgot to set cc_src, which is used for computing C.
+
+Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1370
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Message-Id: <20230114180601.2993644-1-richard.henderson@linaro.org>
+Cc: qemu-stable@nongnu.org
+Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18)
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry-picked from commit 99282098dc74c2055bde5652bde6cf0067d0c370)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ target/i386/tcg/emit.c.inc | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
+index 99f6ba6e19..4d7702c106 100644
+--- a/target/i386/tcg/emit.c.inc
++++ b/target/i386/tcg/emit.c.inc
+@@ -1111,6 +1111,7 @@ static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+ {
+ MemOp ot = decode->op[0].ot;
+
++ tcg_gen_mov_tl(cpu_cc_src, s->T0);
+ tcg_gen_neg_tl(s->T1, s->T0);
+ tcg_gen_and_tl(s->T0, s->T0, s->T1);
+ tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+@@ -1121,6 +1122,7 @@ static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode
+ {
+ MemOp ot = decode->op[0].ot;
+
++ tcg_gen_mov_tl(cpu_cc_src, s->T0);
+ tcg_gen_subi_tl(s->T1, s->T0, 1);
+ tcg_gen_xor_tl(s->T0, s->T0, s->T1);
+ tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+@@ -1131,6 +1133,7 @@ static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+ {
+ MemOp ot = decode->op[0].ot;
+
++ tcg_gen_mov_tl(cpu_cc_src, s->T0);
+ tcg_gen_subi_tl(s->T1, s->T0, 1);
+ tcg_gen_and_tl(s->T0, s->T0, s->T1);
+ tcg_gen_mov_tl(cpu_cc_dst, s->T0);
diff --git a/debian/patches/extra/0027-target-i386-fix-ADOX-followed-by-ADCX.patch b/debian/patches/extra/0027-target-i386-fix-ADOX-followed-by-ADCX.patch
new file mode 100644
index 0000000..bb108e5
--- /dev/null
+++ b/debian/patches/extra/0027-target-i386-fix-ADOX-followed-by-ADCX.patch
@@ -0,0 +1,192 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Tue, 31 Jan 2023 09:48:03 +0100
+Subject: [PATCH] target/i386: fix ADOX followed by ADCX
+
+When ADCX is followed by ADOX or vice versa, the second instruction's
+carry comes from EFLAGS and the condition codes use the CC_OP_ADCOX
+operation. Retrieving the carry from EFLAGS is handled by this bit
+of gen_ADCOX:
+
+ tcg_gen_extract_tl(carry_in, cpu_cc_src,
+ ctz32(cc_op == CC_OP_ADCX ? CC_C : CC_O), 1);
+
+Unfortunately, in this case cc_op has been overwritten by the previous
+"if" statement to CC_OP_ADCOX. This works by chance when the first
+instruction is ADCX; however, if the first instruction is ADOX,
+ADCX will incorrectly take its carry from OF instead of CF.
+
+Fix by moving the computation of the new cc_op at the end of the function.
+The included exhaustive test case fails without this patch and passes
+afterwards.
+
+Because ADCX/ADOX need not be invoked through the VEX prefix, this
+regression bisects to commit 16fc5726a6e2 ("target/i386: reimplement
+0x0f 0x38, add AVX", 2022-10-18). However, the mistake happened a
+little earlier, when BMI instructions were rewritten using the new
+decoder framework.
+
+Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1471
+Reported-by: Paul Jolly <https://gitlab.com/myitcv>
+Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18)
+Cc: qemu-stable@nongnu.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry-picked from commit 60c7dd22e1383754d5f150bc9f7c2785c662a7b6)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ target/i386/tcg/emit.c.inc | 20 +++++----
+ tests/tcg/i386/Makefile.target | 6 ++-
+ tests/tcg/i386/test-i386-adcox.c | 75 ++++++++++++++++++++++++++++++++
+ 3 files changed, 91 insertions(+), 10 deletions(-)
+ create mode 100644 tests/tcg/i386/test-i386-adcox.c
+
+diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
+index 4d7702c106..0d7c6e80ae 100644
+--- a/target/i386/tcg/emit.c.inc
++++ b/target/i386/tcg/emit.c.inc
+@@ -1015,6 +1015,7 @@ VSIB_AVX(VPGATHERQ, vpgatherq)
+
+ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
+ {
++ int opposite_cc_op;
+ TCGv carry_in = NULL;
+ TCGv carry_out = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2);
+ TCGv zero;
+@@ -1022,14 +1023,8 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
+ if (cc_op == s->cc_op || s->cc_op == CC_OP_ADCOX) {
+ /* Re-use the carry-out from a previous round. */
+ carry_in = carry_out;
+- cc_op = s->cc_op;
+- } else if (s->cc_op == CC_OP_ADCX || s->cc_op == CC_OP_ADOX) {
+- /* Merge with the carry-out from the opposite instruction. */
+- cc_op = CC_OP_ADCOX;
+- }
+-
+- /* If we don't have a carry-in, get it out of EFLAGS. */
+- if (!carry_in) {
++ } else {
++ /* We don't have a carry-in, get it out of EFLAGS. */
+ if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
+ gen_compute_eflags(s);
+ }
+@@ -1053,7 +1048,14 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
+ tcg_gen_add2_tl(s->T0, carry_out, s->T0, carry_out, s->T1, zero);
+ break;
+ }
+- set_cc_op(s, cc_op);
++
++ opposite_cc_op = cc_op == CC_OP_ADCX ? CC_OP_ADOX : CC_OP_ADCX;
++ if (s->cc_op == CC_OP_ADCOX || s->cc_op == opposite_cc_op) {
++ /* Merge with the carry-out from the opposite instruction. */
++ set_cc_op(s, CC_OP_ADCOX);
++ } else {
++ set_cc_op(s, cc_op);
++ }
+ }
+
+ static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target
+index 81831cafbc..bafd8c2180 100644
+--- a/tests/tcg/i386/Makefile.target
++++ b/tests/tcg/i386/Makefile.target
+@@ -14,7 +14,7 @@ config-cc.mak: Makefile
+ I386_SRCS=$(notdir $(wildcard $(I386_SRC)/*.c))
+ ALL_X86_TESTS=$(I386_SRCS:.c=)
+ SKIP_I386_TESTS=test-i386-ssse3 test-avx test-3dnow test-mmx
+-X86_64_TESTS:=$(filter test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS))
++X86_64_TESTS:=$(filter test-i386-adcox test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS))
+
+ test-i386-sse-exceptions: CFLAGS += -msse4.1 -mfpmath=sse
+ run-test-i386-sse-exceptions: QEMU_OPTS += -cpu max
+@@ -28,6 +28,10 @@ test-i386-bmi2: CFLAGS=-O2
+ run-test-i386-bmi2: QEMU_OPTS += -cpu max
+ run-plugin-test-i386-bmi2-%: QEMU_OPTS += -cpu max
+
++test-i386-adcox: CFLAGS=-O2
++run-test-i386-adcox: QEMU_OPTS += -cpu max
++run-plugin-test-i386-adcox-%: QEMU_OPTS += -cpu max
++
+ #
+ # hello-i386 is a barebones app
+ #
+diff --git a/tests/tcg/i386/test-i386-adcox.c b/tests/tcg/i386/test-i386-adcox.c
+new file mode 100644
+index 0000000000..16169efff8
+--- /dev/null
++++ b/tests/tcg/i386/test-i386-adcox.c
+@@ -0,0 +1,75 @@
++/* See if various BMI2 instructions give expected results */
++#include <assert.h>
++#include <stdint.h>
++#include <stdio.h>
++
++#define CC_C 1
++#define CC_O (1 << 11)
++
++#ifdef __x86_64__
++#define REG uint64_t
++#else
++#define REG uint32_t
++#endif
++
++void test_adox_adcx(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_operand)
++{
++ REG flags;
++ REG out_adcx, out_adox;
++
++ asm("pushf; pop %0" : "=r"(flags));
++ flags &= ~(CC_C | CC_O);
++ flags |= (in_c ? CC_C : 0);
++ flags |= (in_o ? CC_O : 0);
++
++ out_adcx = adcx_operand;
++ out_adox = adox_operand;
++ asm("push %0; popf;"
++ "adox %3, %2;"
++ "adcx %3, %1;"
++ "pushf; pop %0"
++ : "+r" (flags), "+r" (out_adcx), "+r" (out_adox)
++ : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox));
++
++ assert(out_adcx == in_c + adcx_operand - 1);
++ assert(out_adox == in_o + adox_operand - 1);
++ assert(!!(flags & CC_C) == (in_c || adcx_operand));
++ assert(!!(flags & CC_O) == (in_o || adox_operand));
++}
++
++void test_adcx_adox(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_operand)
++{
++ REG flags;
++ REG out_adcx, out_adox;
++
++ asm("pushf; pop %0" : "=r"(flags));
++ flags &= ~(CC_C | CC_O);
++ flags |= (in_c ? CC_C : 0);
++ flags |= (in_o ? CC_O : 0);
++
++ out_adcx = adcx_operand;
++ out_adox = adox_operand;
++ asm("push %0; popf;"
++ "adcx %3, %1;"
++ "adox %3, %2;"
++ "pushf; pop %0"
++ : "+r" (flags), "+r" (out_adcx), "+r" (out_adox)
++ : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox));
++
++ assert(out_adcx == in_c + adcx_operand - 1);
++ assert(out_adox == in_o + adox_operand - 1);
++ assert(!!(flags & CC_C) == (in_c || adcx_operand));
++ assert(!!(flags & CC_O) == (in_o || adox_operand));
++}
++
++int main(int argc, char *argv[]) {
++ /* try all combinations of input CF, input OF, CF from op1+op2, OF from op2+op1 */
++ int i;
++ for (i = 0; i <= 15; i++) {
++ printf("%d\n", i);
++ test_adcx_adox(!!(i & 1), !!(i & 2), !!(i & 4), !!(i & 8));
++ test_adox_adcx(!!(i & 1), !!(i & 2), !!(i & 4), !!(i & 8));
++ }
++ return 0;
++}
++
diff --git a/debian/patches/extra/0028-target-i386-Fix-BZHI-instruction.patch b/debian/patches/extra/0028-target-i386-Fix-BZHI-instruction.patch
new file mode 100644
index 0000000..391817c
--- /dev/null
+++ b/debian/patches/extra/0028-target-i386-Fix-BZHI-instruction.patch
@@ -0,0 +1,64 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Richard Henderson <richard.henderson@linaro.org>
+Date: Sat, 14 Jan 2023 13:32:06 -1000
+Subject: [PATCH] target/i386: Fix BZHI instruction
+
+We did not correctly handle N >= operand size.
+
+Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1374
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Message-Id: <20230114233206.3118472-1-richard.henderson@linaro.org>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry-picked from commit 9ad2ba6e8e7fc195d0dd0b76ab38bd2fceb1bdd4)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ target/i386/tcg/emit.c.inc | 14 +++++++-------
+ tests/tcg/i386/test-i386-bmi2.c | 3 +++
+ 2 files changed, 10 insertions(+), 7 deletions(-)
+
+diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
+index 0d7c6e80ae..7296f3952c 100644
+--- a/target/i386/tcg/emit.c.inc
++++ b/target/i386/tcg/emit.c.inc
+@@ -1145,20 +1145,20 @@ static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+ static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+ {
+ MemOp ot = decode->op[0].ot;
+- TCGv bound;
++ TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
++ TCGv zero = tcg_constant_tl(0);
++ TCGv mone = tcg_constant_tl(-1);
+
+- tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
+- bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
++ tcg_gen_ext8u_tl(s->T1, s->T1);
+
+ /*
+ * Note that since we're using BMILG (in order to get O
+ * cleared) we need to store the inverse into C.
+ */
+- tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src, s->T1, bound);
+- tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1, bound, bound, s->T1);
++ tcg_gen_setcond_tl(TCG_COND_LEU, cpu_cc_src, s->T1, bound);
+
+- tcg_gen_movi_tl(s->A0, -1);
+- tcg_gen_shl_tl(s->A0, s->A0, s->T1);
++ tcg_gen_shl_tl(s->A0, mone, s->T1);
++ tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->T1, bound, s->A0, zero);
+ tcg_gen_andc_tl(s->T0, s->T0, s->A0);
+
+ gen_op_update1_cc(s);
+diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c
+index 982d4abda4..0244df7987 100644
+--- a/tests/tcg/i386/test-i386-bmi2.c
++++ b/tests/tcg/i386/test-i386-bmi2.c
+@@ -123,6 +123,9 @@ int main(int argc, char *argv[]) {
+ result = bzhiq(mask, 0x1f);
+ assert(result == (mask & ~(-1 << 30)));
+
++ result = bzhiq(mask, 0x40);
++ assert(result == mask);
++
+ result = rorxq(0x2132435465768798, 8);
+ assert(result == 0x9821324354657687);
+
diff --git a/debian/patches/series b/debian/patches/series
index 70d525f..4e8ddd6 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -21,6 +21,11 @@ extra/0020-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch
extra/0021-memory-Allow-disabling-re-entrancy-checking-per-MR.patch
extra/0022-lsi53c895a-disable-reentrancy-detection-for-script-R.patch
extra/0023-acpi-cpuhp-fix-guest-visible-maximum-access-size-to-.patch
+extra/0024-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch
+extra/0025-target-i386-Fix-BEXTR-instruction.patch
+extra/0026-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch
+extra/0027-target-i386-fix-ADOX-followed-by-ADCX.patch
+extra/0028-target-i386-Fix-BZHI-instruction.patch
bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
--
2.30.2
next reply other threads:[~2023-03-17 12:47 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-03-17 12:47 Fiona Ebner [this message]
2023-03-17 15:09 ` [pve-devel] applied: " Thomas Lamprecht
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230317124711.812727-1-f.ebner@proxmox.com \
--to=f.ebner@proxmox.com \
--cc=pve-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox