From 9723b435fb1a6f8617323a2b99b8ad48980d98a8 Mon Sep 17 00:00:00 2001 From: Erik Abair Date: Wed, 8 Jun 2022 15:40:28 -0700 Subject: [PATCH] nv2a: Make multiplication by 0 match HW behavior. Fixes #1008 The nv2a returns 0 for anything multiplied by zero, including exceptional values such as Inf and NaN. Desktop GPUs do not enforce this, leading to conditions where NaNs wipe out calculations and lead to erroneous behavior. [Test](https://github.com/abaire/nxdk_vsh_tests/blob/main/src/tests/americasarmyshader.cpp) [HW Results](https://github.com/abaire/nxdk_vsh_tests_golden_results/wiki/Results-AmericasArmyShader) --- hw/xbox/nv2a/vsh.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/hw/xbox/nv2a/vsh.c b/hw/xbox/nv2a/vsh.c index 87318419c95..5f338e7368c 100644 --- a/hw/xbox/nv2a/vsh.c +++ b/hw/xbox/nv2a/vsh.c @@ -636,7 +636,16 @@ static const char* vsh_header = "#define MUL(dest, mask, src0, src1) dest.mask = _MUL(_in(src0), _in(src1)).mask\n" "vec4 _MUL(vec4 src0, vec4 src1)\n" "{\n" - " return src0 * src1;\n" + // Unfortunately mix() falls victim to the same handling of exceptional + // (inf/NaN) handling as a multiply, so per-component comparisons are used + // to guarantee HW behavior (anything * 0 must == 0). + " vec4 zero_components = sign(src0) * sign(src1);\n" + " vec4 ret = src0 * src1;\n" + " if (zero_components.x == 0.0) { ret.x = 0.0; }\n" + " if (zero_components.y == 0.0) { ret.y = 0.0; }\n" + " if (zero_components.z == 0.0) { ret.z = 0.0; }\n" + " if (zero_components.w == 0.0) { ret.w = 0.0; }\n" + " return ret;\n" "}\n" "\n" "#define ADD(dest, mask, src0, src1) dest.mask = _ADD(_in(src0), _in(src1)).mask\n" @@ -648,7 +657,7 @@ static const char* vsh_header = "#define MAD(dest, mask, src0, src1, src2) dest.mask = _MAD(_in(src0), _in(src1), _in(src2)).mask\n" "vec4 _MAD(vec4 src0, vec4 src1, vec4 src2)\n" "{\n" - " return src0 * src1 + src2;\n" + " return _MUL(src0, src1) + src2;\n" "}\n" "\n" "#define DP3(dest, mask, src0, src1) dest.mask = _DP3(_in(src0), _in(src1)).mask\n"