From a3c15482968d2d5481f661dc930c481cd1c0c077 Mon Sep 17 00:00:00 2001 From: Pengfei Li Date: Tue, 29 Aug 2023 01:34:33 +0000 Subject: [PATCH 1/7] 8312570: [TESTBUG] Jtreg compiler/loopopts/superword/TestDependencyOffsets.java fails on 512-bit SVE Reviewed-by: epeter, kvn --- .../superword/TestDependencyOffsets.java | 2702 +++++++++++++---- 1 file changed, 2062 insertions(+), 640 deletions(-) diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestDependencyOffsets.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestDependencyOffsets.java index 132844801a1..105b07b8758 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/TestDependencyOffsets.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestDependencyOffsets.java @@ -27,7 +27,7 @@ * and various MaxVectorSize values, and +- AlignVector. * * Note: this test is auto-generated. Please modify / generate with script: - * https://bugs.openjdk.org/browse/JDK-8310308 + * https://bugs.openjdk.org/browse/JDK-8312570 * * Types: int, long, short, char, byte, float, double * Offsets: 0, -1, 1, -2, 2, -3, 3, -4, 4, -7, 7, -8, 8, -14, 14, -16, 16, -18, 18, -20, 20, -31, 31, -32, 32, -63, 63, -64, 64, -65, 65, -128, 128, -129, 129, -192, 192 @@ -90,7 +90,7 @@ /* * @test id=vanilla-A - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @library /test/lib / @@ -99,7 +99,7 @@ /* * @test id=vanilla-U - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @library /test/lib / @@ -108,7 +108,7 @@ /* * @test id=sse4-v016-A - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -119,7 +119,7 @@ /* * @test id=sse4-v016-U - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -130,7 +130,7 @@ /* * @test id=sse4-v008-A - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -141,7 +141,7 @@ /* * @test id=sse4-v008-U - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -152,7 +152,7 @@ /* * @test id=sse4-v004-A - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -163,7 +163,7 @@ /* * @test id=sse4-v004-U - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -174,7 +174,7 @@ /* * @test id=sse4-v002-A - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -185,7 +185,7 @@ /* * @test id=sse4-v002-U - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -196,7 +196,7 @@ /* * @test id=avx1-v032-A - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -207,7 +207,7 @@ /* * @test id=avx1-v032-U - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -218,7 +218,7 @@ /* * @test id=avx1-v016-A - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -229,7 +229,7 @@ /* * @test id=avx1-v016-U - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -240,7 +240,7 @@ /* * @test id=avx2-v032-A - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -251,7 +251,7 @@ /* * @test id=avx2-v032-U - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -262,7 +262,7 @@ /* * @test id=avx2-v016-A - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -273,7 +273,7 @@ /* * @test id=avx2-v016-U - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -284,7 +284,7 @@ /* * @test id=avx512-v064-A - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -295,7 +295,7 @@ /* * @test id=avx512-v064-U - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -306,7 +306,7 @@ /* * @test id=avx512-v032-A - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -317,7 +317,7 @@ /* * @test id=avx512-v032-U - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -328,7 +328,7 @@ /* * @test id=avx512bw-v064-A - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -339,7 +339,7 @@ /* * @test id=avx512bw-v064-U - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -350,7 +350,7 @@ /* * @test id=avx512bw-v032-A - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -361,7 +361,7 @@ /* * @test id=avx512bw-v032-U - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -372,7 +372,7 @@ /* * @test id=vec-v064-A - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") @@ -382,7 +382,7 @@ /* * @test id=vec-v064-U - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") @@ -392,7 +392,7 @@ /* * @test id=vec-v032-A - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") @@ -402,7 +402,7 @@ /* * @test id=vec-v032-U - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") @@ -412,7 +412,7 @@ /* * @test id=vec-v016-A - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") @@ -422,7 +422,7 @@ /* * @test id=vec-v016-U - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") @@ -432,7 +432,7 @@ /* * @test id=vec-v008-A - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") @@ -442,7 +442,7 @@ /* * @test id=vec-v008-U - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") @@ -452,7 +452,7 @@ /* * @test id=vec-v004-A - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") @@ -462,7 +462,7 @@ /* * @test id=vec-v004-U - * @bug 8298935 8308606 + * @bug 8298935 8308606 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") @@ -1400,10 +1400,14 @@ public static void main(String args[]) { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntP0(int[] data) { for (int j = 0; j < RANGE; j++) { data[j + 0] = (int)(data[j] * (int)-11); @@ -1444,14 +1448,22 @@ public static void runIntP0() { @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM1(int[] data) { for (int j = 1; j < RANGE; j++) { data[j + -1] = (int)(data[j] * (int)-11); @@ -1477,7 +1489,10 @@ public static void runIntM1() { // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 // positive byte_offset 4 can lead to cyclic dependency // No positive IR rule: conditions impossible. - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + // positive byte_offset 4 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 // positive byte_offset 4 can lead to cyclic dependency // No positive IR rule: conditions impossible. public static void testIntP1(int[] data) { @@ -1508,10 +1523,14 @@ public static void runIntP1() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM2(int[] data) { for (int j = 2; j < RANGE; j++) { data[j + -2] = (int)(data[j] * (int)-11); @@ -1543,11 +1562,16 @@ public static void runIntM2() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 // positive byte_offset 8 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntP2(int[] data) { for (int j = 0; j < RANGE - 2; j++) { data[j + 2] = (int)(data[j] * (int)-11); @@ -1588,14 +1612,22 @@ public static void runIntP2() { @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM3(int[] data) { for (int j = 3; j < RANGE; j++) { data[j + -3] = (int)(data[j] * (int)-11); @@ -1627,11 +1659,16 @@ public static void runIntM3() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 12"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + // positive byte_offset 12 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 12"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 // positive byte_offset 12 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 12"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntP3(int[] data) { for (int j = 0; j < RANGE - 3; j++) { data[j + 3] = (int)(data[j] * (int)-11); @@ -1660,10 +1697,14 @@ public static void runIntP3() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM4(int[] data) { for (int j = 4; j < RANGE; j++) { data[j + -4] = (int)(data[j] * (int)-11); @@ -1698,11 +1739,19 @@ public static void runIntM4() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 // positive byte_offset 16 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntP4(int[] data) { for (int j = 0; j < RANGE - 4; j++) { data[j + 4] = (int)(data[j] * (int)-11); @@ -1743,14 +1792,22 @@ public static void runIntP4() { @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntM7(int[] data) { for (int j = 7; j < RANGE; j++) { data[j + -7] = (int)(data[j] * (int)-11); @@ -1781,11 +1838,15 @@ public static void runIntM7() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 28"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 // positive byte_offset 28 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 28"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntP7(int[] data) { for (int j = 0; j < RANGE - 7; j++) { data[j + 7] = (int)(data[j] * (int)-11); @@ -1814,10 +1875,14 @@ public static void runIntP7() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntM8(int[] data) { for (int j = 8; j < RANGE; j++) { data[j + -8] = (int)(data[j] * (int)-11); @@ -1855,14 +1920,19 @@ public static void runIntM8() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 32"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 32 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 32"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntP8(int[] data) { for (int j = 0; j < RANGE - 8; j++) { data[j + 8] = (int)(data[j] * (int)-11); @@ -1891,10 +1961,14 @@ public static void runIntP8() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM14(int[] data) { for (int j = 14; j < RANGE; j++) { data[j + -14] = (int)(data[j] * (int)-11); @@ -1924,10 +1998,15 @@ public static void runIntM14() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 56"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 56 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 56"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntP14(int[] data) { for (int j = 0; j < RANGE - 14; j++) { data[j + 14] = (int)(data[j] * (int)-11); @@ -1956,10 +2035,14 @@ public static void runIntP14() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM16(int[] data) { for (int j = 16; j < RANGE; j++) { data[j + -16] = (int)(data[j] * (int)-11); @@ -2000,14 +2083,19 @@ public static void runIntM16() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 64 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 64"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntP16(int[] data) { for (int j = 0; j < RANGE - 16; j++) { data[j + 16] = (int)(data[j] * (int)-11); @@ -2036,10 +2124,14 @@ public static void runIntP16() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM18(int[] data) { for (int j = 18; j < RANGE; j++) { data[j + -18] = (int)(data[j] * (int)-11); @@ -2068,10 +2160,15 @@ public static void runIntM18() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 72 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 72"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntP18(int[] data) { for (int j = 0; j < RANGE - 18; j++) { data[j + 18] = (int)(data[j] * (int)-11); @@ -2100,10 +2197,14 @@ public static void runIntP18() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntM20(int[] data) { for (int j = 20; j < RANGE; j++) { data[j + -20] = (int)(data[j] * (int)-11); @@ -2136,10 +2237,19 @@ public static void runIntM20() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 80 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 80"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntP20(int[] data) { for (int j = 0; j < RANGE - 20; j++) { data[j + 20] = (int)(data[j] * (int)-11); @@ -2180,14 +2290,22 @@ public static void runIntP20() { @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM31(int[] data) { for (int j = 31; j < RANGE; j++) { data[j + -31] = (int)(data[j] * (int)-11); @@ -2216,10 +2334,15 @@ public static void runIntM31() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 124 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 124"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntP31(int[] data) { for (int j = 0; j < RANGE - 31; j++) { data[j + 31] = (int)(data[j] * (int)-11); @@ -2248,10 +2371,14 @@ public static void runIntP31() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM32(int[] data) { for (int j = 32; j < RANGE; j++) { data[j + -32] = (int)(data[j] * (int)-11); @@ -2292,14 +2419,19 @@ public static void runIntM32() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 128 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 128"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntP32(int[] data) { for (int j = 0; j < RANGE - 32; j++) { data[j + 32] = (int)(data[j] * (int)-11); @@ -2340,14 +2472,22 @@ public static void runIntP32() { @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM63(int[] data) { for (int j = 63; j < RANGE; j++) { data[j + -63] = (int)(data[j] * (int)-11); @@ -2376,10 +2516,15 @@ public static void runIntM63() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 252 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 252"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntP63(int[] data) { for (int j = 0; j < RANGE - 63; j++) { data[j + 63] = (int)(data[j] * (int)-11); @@ -2408,10 +2553,14 @@ public static void runIntP63() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM64(int[] data) { for (int j = 64; j < RANGE; j++) { data[j + -64] = (int)(data[j] * (int)-11); @@ -2452,14 +2601,22 @@ public static void runIntM64() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntP64(int[] data) { for (int j = 0; j < RANGE - 64; j++) { data[j + 64] = (int)(data[j] * (int)-11); @@ -2500,14 +2657,22 @@ public static void runIntP64() { @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM65(int[] data) { for (int j = 65; j < RANGE; j++) { data[j + -65] = (int)(data[j] * (int)-11); @@ -2536,10 +2701,14 @@ public static void runIntM65() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntP65(int[] data) { for (int j = 0; j < RANGE - 65; j++) { data[j + 65] = (int)(data[j] * (int)-11); @@ -2568,10 +2737,14 @@ public static void runIntP65() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntM128(int[] data) { for (int j = 128; j < RANGE; j++) { data[j + -128] = (int)(data[j] * (int)-11); @@ -2612,14 +2785,22 @@ public static void runIntM128() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntP128(int[] data) { for (int j = 0; j < RANGE - 128; j++) { data[j + 128] = (int)(data[j] * (int)-11); @@ -2660,14 +2841,22 @@ public static void runIntP128() { @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntM129(int[] data) { for (int j = 129; j < RANGE; j++) { data[j + -129] = (int)(data[j] * (int)-11); @@ -2696,10 +2885,14 @@ public static void runIntM129() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntP129(int[] data) { for (int j = 0; j < RANGE - 129; j++) { data[j + 129] = (int)(data[j] * (int)-11); @@ -2728,10 +2921,14 @@ public static void runIntP129() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM192(int[] data) { for (int j = 192; j < RANGE; j++) { data[j + -192] = (int)(data[j] * (int)-11); @@ -2772,14 +2969,22 @@ public static void runIntM192() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntP192(int[] data) { for (int j = 0; j < RANGE - 192; j++) { data[j + 192] = (int)(data[j] * (int)-11); @@ -2808,10 +3013,14 @@ public static void runIntP192() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongP0(long[] data) { for (int j = 0; j < RANGE; j++) { data[j + 0] = (long)(data[j] + (long)-11); @@ -2852,14 +3061,22 @@ public static void runLongP0() { @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongM1(long[] data) { for (int j = 1; j < RANGE; j++) { data[j + -1] = (long)(data[j] + (long)-11); @@ -2885,7 +3102,10 @@ public static void runLongM1() { // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 // positive byte_offset 8 can lead to cyclic dependency // No positive IR rule: conditions impossible. - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + // positive byte_offset 8 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 // positive byte_offset 8 can lead to cyclic dependency // No positive IR rule: conditions impossible. public static void testLongP1(long[] data) { @@ -2916,10 +3136,14 @@ public static void runLongP1() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM2(long[] data) { for (int j = 2; j < RANGE; j++) { data[j + -2] = (long)(data[j] + (long)-11); @@ -2954,11 +3178,19 @@ public static void runLongM2() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 // positive byte_offset 16 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongP2(long[] data) { for (int j = 0; j < RANGE - 2; j++) { data[j + 2] = (long)(data[j] + (long)-11); @@ -2999,14 +3231,22 @@ public static void runLongP2() { @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongM3(long[] data) { for (int j = 3; j < RANGE; j++) { data[j + -3] = (long)(data[j] + (long)-11); @@ -3037,11 +3277,15 @@ public static void runLongM3() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 24"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 // positive byte_offset 24 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 24"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongP3(long[] data) { for (int j = 0; j < RANGE - 3; j++) { data[j + 3] = (long)(data[j] + (long)-11); @@ -3070,10 +3314,14 @@ public static void runLongP3() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM4(long[] data) { for (int j = 4; j < RANGE; j++) { data[j + -4] = (long)(data[j] + (long)-11); @@ -3111,14 +3359,19 @@ public static void runLongM4() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 32"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 32 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 32"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP4(long[] data) { for (int j = 0; j < RANGE - 4; j++) { data[j + 4] = (long)(data[j] + (long)-11); @@ -3159,14 +3412,22 @@ public static void runLongP4() { @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM7(long[] data) { for (int j = 7; j < RANGE; j++) { data[j + -7] = (long)(data[j] + (long)-11); @@ -3196,10 +3457,15 @@ public static void runLongM7() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 56"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 56 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 56"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP7(long[] data) { for (int j = 0; j < RANGE - 7; j++) { data[j + 7] = (long)(data[j] + (long)-11); @@ -3228,10 +3494,14 @@ public static void runLongP7() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM8(long[] data) { for (int j = 8; j < RANGE; j++) { data[j + -8] = (long)(data[j] + (long)-11); @@ -3272,14 +3542,19 @@ public static void runLongM8() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 64 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 64"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP8(long[] data) { for (int j = 0; j < RANGE - 8; j++) { data[j + 8] = (long)(data[j] + (long)-11); @@ -3308,10 +3583,14 @@ public static void runLongP8() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM14(long[] data) { for (int j = 14; j < RANGE; j++) { data[j + -14] = (long)(data[j] + (long)-11); @@ -3344,10 +3623,19 @@ public static void runLongM14() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 112 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 112"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP14(long[] data) { for (int j = 0; j < RANGE - 14; j++) { data[j + 14] = (long)(data[j] + (long)-11); @@ -3376,10 +3664,14 @@ public static void runLongP14() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM16(long[] data) { for (int j = 16; j < RANGE; j++) { data[j + -16] = (long)(data[j] + (long)-11); @@ -3420,14 +3712,19 @@ public static void runLongM16() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 128 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 128"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP16(long[] data) { for (int j = 0; j < RANGE - 16; j++) { data[j + 16] = (long)(data[j] + (long)-11); @@ -3456,10 +3753,14 @@ public static void runLongP16() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM18(long[] data) { for (int j = 18; j < RANGE; j++) { data[j + -18] = (long)(data[j] + (long)-11); @@ -3492,10 +3793,19 @@ public static void runLongM18() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 144 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 144"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP18(long[] data) { for (int j = 0; j < RANGE - 18; j++) { data[j + 18] = (long)(data[j] + (long)-11); @@ -3524,10 +3834,14 @@ public static void runLongP18() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongM20(long[] data) { for (int j = 20; j < RANGE; j++) { data[j + -20] = (long)(data[j] + (long)-11); @@ -3564,14 +3878,19 @@ public static void runLongM20() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 160 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 160"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP20(long[] data) { for (int j = 0; j < RANGE - 20; j++) { data[j + 20] = (long)(data[j] + (long)-11); @@ -3612,14 +3931,22 @@ public static void runLongP20() { @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM31(long[] data) { for (int j = 31; j < RANGE; j++) { data[j + -31] = (long)(data[j] + (long)-11); @@ -3648,10 +3975,15 @@ public static void runLongM31() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 248 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 248"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP31(long[] data) { for (int j = 0; j < RANGE - 31; j++) { data[j + 31] = (long)(data[j] + (long)-11); @@ -3680,10 +4012,14 @@ public static void runLongP31() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM32(long[] data) { for (int j = 32; j < RANGE; j++) { data[j + -32] = (long)(data[j] + (long)-11); @@ -3724,14 +4060,22 @@ public static void runLongM32() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP32(long[] data) { for (int j = 0; j < RANGE - 32; j++) { data[j + 32] = (long)(data[j] + (long)-11); @@ -3772,14 +4116,22 @@ public static void runLongP32() { @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongM63(long[] data) { for (int j = 63; j < RANGE; j++) { data[j + -63] = (long)(data[j] + (long)-11); @@ -3808,10 +4160,14 @@ public static void runLongM63() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongP63(long[] data) { for (int j = 0; j < RANGE - 63; j++) { data[j + 63] = (long)(data[j] + (long)-11); @@ -3840,10 +4196,14 @@ public static void runLongP63() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongM64(long[] data) { for (int j = 64; j < RANGE; j++) { data[j + -64] = (long)(data[j] + (long)-11); @@ -3884,14 +4244,22 @@ public static void runLongM64() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP64(long[] data) { for (int j = 0; j < RANGE - 64; j++) { data[j + 64] = (long)(data[j] + (long)-11); @@ -3932,14 +4300,22 @@ public static void runLongP64() { @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM65(long[] data) { for (int j = 65; j < RANGE; j++) { data[j + -65] = (long)(data[j] + (long)-11); @@ -3968,10 +4344,14 @@ public static void runLongM65() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP65(long[] data) { for (int j = 0; j < RANGE - 65; j++) { data[j + 65] = (long)(data[j] + (long)-11); @@ -4000,10 +4380,14 @@ public static void runLongP65() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM128(long[] data) { for (int j = 128; j < RANGE; j++) { data[j + -128] = (long)(data[j] + (long)-11); @@ -4044,14 +4428,22 @@ public static void runLongM128() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP128(long[] data) { for (int j = 0; j < RANGE - 128; j++) { data[j + 128] = (long)(data[j] + (long)-11); @@ -4092,14 +4484,22 @@ public static void runLongP128() { @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM129(long[] data) { for (int j = 129; j < RANGE; j++) { data[j + -129] = (long)(data[j] + (long)-11); @@ -4128,10 +4528,14 @@ public static void runLongM129() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongP129(long[] data) { for (int j = 0; j < RANGE - 129; j++) { data[j + 129] = (long)(data[j] + (long)-11); @@ -4160,10 +4564,14 @@ public static void runLongP129() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongM192(long[] data) { for (int j = 192; j < RANGE; j++) { data[j + -192] = (long)(data[j] + (long)-11); @@ -4204,14 +4612,22 @@ public static void runLongM192() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongP192(long[] data) { for (int j = 0; j < RANGE - 192; j++) { data[j + 192] = (long)(data[j] + (long)-11); @@ -4240,10 +4656,14 @@ public static void runLongP192() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortP0(short[] data) { for (int j = 0; j < RANGE; j++) { data[j + 0] = (short)(data[j] * (short)-11); @@ -4284,14 +4704,22 @@ public static void runShortP0() { @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortM1(short[] data) { for (int j = 1; j < RANGE; j++) { data[j + -1] = (short)(data[j] * (short)-11); @@ -4317,7 +4745,10 @@ public static void runShortM1() { // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 // positive byte_offset 2 can lead to cyclic dependency // No positive IR rule: conditions impossible. - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 2 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 2 can lead to cyclic dependency // No positive IR rule: conditions impossible. public static void testShortP1(short[] data) { @@ -4348,10 +4779,14 @@ public static void runShortP1() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortM2(short[] data) { for (int j = 2; j < RANGE; j++) { data[j + -2] = (short)(data[j] * (short)-11); @@ -4383,11 +4818,16 @@ public static void runShortM2() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 4 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 4 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortP2(short[] data) { for (int j = 0; j < RANGE - 2; j++) { data[j + 2] = (short)(data[j] * (short)-11); @@ -4428,14 +4868,22 @@ public static void runShortP2() { @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortM3(short[] data) { for (int j = 3; j < RANGE; j++) { data[j + -3] = (short)(data[j] * (short)-11); @@ -4467,11 +4915,16 @@ public static void runShortM3() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 6"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 6 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 6"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 6 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 6"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortP3(short[] data) { for (int j = 0; j < RANGE - 3; j++) { data[j + 3] = (short)(data[j] * (short)-11); @@ -4500,10 +4953,14 @@ public static void runShortP3() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortM4(short[] data) { for (int j = 4; j < RANGE; j++) { data[j + -4] = (short)(data[j] * (short)-11); @@ -4535,11 +4992,16 @@ public static void runShortM4() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 // positive byte_offset 8 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortP4(short[] data) { for (int j = 0; j < RANGE - 4; j++) { data[j + 4] = (short)(data[j] * (short)-11); @@ -4580,14 +5042,22 @@ public static void runShortP4() { @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortM7(short[] data) { for (int j = 7; j < RANGE; j++) { data[j + -7] = (short)(data[j] * (short)-11); @@ -4619,11 +5089,16 @@ public static void runShortM7() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 14 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 14 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortP7(short[] data) { for (int j = 0; j < RANGE - 7; j++) { data[j + 7] = (short)(data[j] * (short)-11); @@ -4652,10 +5127,14 @@ public static void runShortP7() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortM8(short[] data) { for (int j = 8; j < RANGE; j++) { data[j + -8] = (short)(data[j] * (short)-11); @@ -4690,11 +5169,19 @@ public static void runShortM8() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 16"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 16 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortP8(short[] data) { for (int j = 0; j < RANGE - 8; j++) { data[j + 8] = (short)(data[j] * (short)-11); @@ -4723,10 +5210,14 @@ public static void runShortP8() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortM14(short[] data) { for (int j = 14; j < RANGE; j++) { data[j + -14] = (short)(data[j] * (short)-11); @@ -4757,11 +5248,15 @@ public static void runShortM14() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 28"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 28 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 28"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortP14(short[] data) { for (int j = 0; j < RANGE - 14; j++) { data[j + 14] = (short)(data[j] * (short)-11); @@ -4790,10 +5285,14 @@ public static void runShortP14() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortM16(short[] data) { for (int j = 16; j < RANGE; j++) { data[j + -16] = (short)(data[j] * (short)-11); @@ -4831,14 +5330,19 @@ public static void runShortM16() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 32"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 32 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 32"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortP16(short[] data) { for (int j = 0; j < RANGE - 16; j++) { data[j + 16] = (short)(data[j] * (short)-11); @@ -4867,10 +5371,14 @@ public static void runShortP16() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortM18(short[] data) { for (int j = 18; j < RANGE; j++) { data[j + -18] = (short)(data[j] * (short)-11); @@ -4900,10 +5408,15 @@ public static void runShortM18() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 36"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 36 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 36"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortP18(short[] data) { for (int j = 0; j < RANGE - 18; j++) { data[j + 18] = (short)(data[j] * (short)-11); @@ -4932,10 +5445,14 @@ public static void runShortP18() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortM20(short[] data) { for (int j = 20; j < RANGE; j++) { data[j + -20] = (short)(data[j] * (short)-11); @@ -4965,10 +5482,15 @@ public static void runShortM20() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 40"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 40 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 40"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortP20(short[] data) { for (int j = 0; j < RANGE - 20; j++) { data[j + 20] = (short)(data[j] * (short)-11); @@ -5009,14 +5531,22 @@ public static void runShortP20() { @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortM31(short[] data) { for (int j = 31; j < RANGE; j++) { data[j + -31] = (short)(data[j] * (short)-11); @@ -5046,10 +5576,15 @@ public static void runShortM31() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 62"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 62 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 62"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortP31(short[] data) { for (int j = 0; j < RANGE - 31; j++) { data[j + 31] = (short)(data[j] * (short)-11); @@ -5078,10 +5613,14 @@ public static void runShortP31() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortM32(short[] data) { for (int j = 32; j < RANGE; j++) { data[j + -32] = (short)(data[j] * (short)-11); @@ -5122,14 +5661,19 @@ public static void runShortM32() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 64 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 64"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortP32(short[] data) { for (int j = 0; j < RANGE - 32; j++) { data[j + 32] = (short)(data[j] * (short)-11); @@ -5170,14 +5714,22 @@ public static void runShortP32() { @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortM63(short[] data) { for (int j = 63; j < RANGE; j++) { data[j + -63] = (short)(data[j] * (short)-11); @@ -5206,10 +5758,15 @@ public static void runShortM63() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 126 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 126"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortP63(short[] data) { for (int j = 0; j < RANGE - 63; j++) { data[j + 63] = (short)(data[j] * (short)-11); @@ -5238,10 +5795,14 @@ public static void runShortP63() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortM64(short[] data) { for (int j = 64; j < RANGE; j++) { data[j + -64] = (short)(data[j] * (short)-11); @@ -5282,14 +5843,19 @@ public static void runShortM64() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 128 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 128"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortP64(short[] data) { for (int j = 0; j < RANGE - 64; j++) { data[j + 64] = (short)(data[j] * (short)-11); @@ -5330,14 +5896,22 @@ public static void runShortP64() { @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortM65(short[] data) { for (int j = 65; j < RANGE; j++) { data[j + -65] = (short)(data[j] * (short)-11); @@ -5366,10 +5940,15 @@ public static void runShortM65() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 130 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 130"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortP65(short[] data) { for (int j = 0; j < RANGE - 65; j++) { data[j + 65] = (short)(data[j] * (short)-11); @@ -5398,10 +5977,14 @@ public static void runShortP65() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortM128(short[] data) { for (int j = 128; j < RANGE; j++) { data[j + -128] = (short)(data[j] * (short)-11); @@ -5442,14 +6025,22 @@ public static void runShortM128() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortP128(short[] data) { for (int j = 0; j < RANGE - 128; j++) { data[j + 128] = (short)(data[j] * (short)-11); @@ -5490,14 +6081,22 @@ public static void runShortP128() { @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortM129(short[] data) { for (int j = 129; j < RANGE; j++) { data[j + -129] = (short)(data[j] * (short)-11); @@ -5526,10 +6125,14 @@ public static void runShortM129() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortP129(short[] data) { for (int j = 0; j < RANGE - 129; j++) { data[j + 129] = (short)(data[j] * (short)-11); @@ -5558,10 +6161,14 @@ public static void runShortP129() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortM192(short[] data) { for (int j = 192; j < RANGE; j++) { data[j + -192] = (short)(data[j] * (short)-11); @@ -5602,14 +6209,18 @@ public static void runShortM192() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortP192(short[] data) { for (int j = 0; j < RANGE - 192; j++) { data[j + 192] = (short)(data[j] * (short)-11); @@ -5638,10 +6249,14 @@ public static void runShortP192() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharP0(char[] data) { for (int j = 0; j < RANGE; j++) { data[j + 0] = (char)(data[j] * (char)-11); @@ -5682,14 +6297,22 @@ public static void runCharP0() { @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM1(char[] data) { for (int j = 1; j < RANGE; j++) { data[j + -1] = (char)(data[j] * (char)-11); @@ -5715,7 +6338,10 @@ public static void runCharM1() { // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 // positive byte_offset 2 can lead to cyclic dependency // No positive IR rule: conditions impossible. - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 2 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 2 can lead to cyclic dependency // No positive IR rule: conditions impossible. public static void testCharP1(char[] data) { @@ -5746,10 +6372,14 @@ public static void runCharP1() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharM2(char[] data) { for (int j = 2; j < RANGE; j++) { data[j + -2] = (char)(data[j] * (char)-11); @@ -5781,11 +6411,16 @@ public static void runCharM2() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 4 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 4 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharP2(char[] data) { for (int j = 0; j < RANGE - 2; j++) { data[j + 2] = (char)(data[j] * (char)-11); @@ -5826,14 +6461,22 @@ public static void runCharP2() { @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM3(char[] data) { for (int j = 3; j < RANGE; j++) { data[j + -3] = (char)(data[j] * (char)-11); @@ -5865,11 +6508,16 @@ public static void runCharM3() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 6"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 // positive byte_offset 6 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 6"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 6 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 6"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharP3(char[] data) { for (int j = 0; j < RANGE - 3; j++) { data[j + 3] = (char)(data[j] * (char)-11); @@ -5898,10 +6546,14 @@ public static void runCharP3() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM4(char[] data) { for (int j = 4; j < RANGE; j++) { data[j + -4] = (char)(data[j] * (char)-11); @@ -5933,11 +6585,16 @@ public static void runCharM4() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 8 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharP4(char[] data) { for (int j = 0; j < RANGE - 4; j++) { data[j + 4] = (char)(data[j] * (char)-11); @@ -5978,14 +6635,22 @@ public static void runCharP4() { @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM7(char[] data) { for (int j = 7; j < RANGE; j++) { data[j + -7] = (char)(data[j] * (char)-11); @@ -6017,11 +6682,16 @@ public static void runCharM7() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 14 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 14 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharP7(char[] data) { for (int j = 0; j < RANGE - 7; j++) { data[j + 7] = (char)(data[j] * (char)-11); @@ -6050,10 +6720,14 @@ public static void runCharP7() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM8(char[] data) { for (int j = 8; j < RANGE; j++) { data[j + -8] = (char)(data[j] * (char)-11); @@ -6088,11 +6762,19 @@ public static void runCharM8() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 16"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 16 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharP8(char[] data) { for (int j = 0; j < RANGE - 8; j++) { data[j + 8] = (char)(data[j] * (char)-11); @@ -6121,10 +6803,14 @@ public static void runCharP8() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM14(char[] data) { for (int j = 14; j < RANGE; j++) { data[j + -14] = (char)(data[j] * (char)-11); @@ -6155,11 +6841,15 @@ public static void runCharM14() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 28"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 28 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 28"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharP14(char[] data) { for (int j = 0; j < RANGE - 14; j++) { data[j + 14] = (char)(data[j] * (char)-11); @@ -6188,10 +6878,14 @@ public static void runCharP14() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM16(char[] data) { for (int j = 16; j < RANGE; j++) { data[j + -16] = (char)(data[j] * (char)-11); @@ -6229,14 +6923,19 @@ public static void runCharM16() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 32"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 32 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 32"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharP16(char[] data) { for (int j = 0; j < RANGE - 16; j++) { data[j + 16] = (char)(data[j] * (char)-11); @@ -6265,10 +6964,14 @@ public static void runCharP16() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharM18(char[] data) { for (int j = 18; j < RANGE; j++) { data[j + -18] = (char)(data[j] * (char)-11); @@ -6298,10 +7001,15 @@ public static void runCharM18() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 36"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 36 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 36"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharP18(char[] data) { for (int j = 0; j < RANGE - 18; j++) { data[j + 18] = (char)(data[j] * (char)-11); @@ -6330,10 +7038,14 @@ public static void runCharP18() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharM20(char[] data) { for (int j = 20; j < RANGE; j++) { data[j + -20] = (char)(data[j] * (char)-11); @@ -6363,10 +7075,15 @@ public static void runCharM20() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 40"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 40 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 40"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharP20(char[] data) { for (int j = 0; j < RANGE - 20; j++) { data[j + 20] = (char)(data[j] * (char)-11); @@ -6407,14 +7124,22 @@ public static void runCharP20() { @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM31(char[] data) { for (int j = 31; j < RANGE; j++) { data[j + -31] = (char)(data[j] * (char)-11); @@ -6444,10 +7169,15 @@ public static void runCharM31() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 62"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 62 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 62"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharP31(char[] data) { for (int j = 0; j < RANGE - 31; j++) { data[j + 31] = (char)(data[j] * (char)-11); @@ -6476,10 +7206,14 @@ public static void runCharP31() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM32(char[] data) { for (int j = 32; j < RANGE; j++) { data[j + -32] = (char)(data[j] * (char)-11); @@ -6520,14 +7254,19 @@ public static void runCharM32() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 64 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 64"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharP32(char[] data) { for (int j = 0; j < RANGE - 32; j++) { data[j + 32] = (char)(data[j] * (char)-11); @@ -6568,14 +7307,22 @@ public static void runCharP32() { @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM63(char[] data) { for (int j = 63; j < RANGE; j++) { data[j + -63] = (char)(data[j] * (char)-11); @@ -6604,10 +7351,15 @@ public static void runCharM63() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 126 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 126"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharP63(char[] data) { for (int j = 0; j < RANGE - 63; j++) { data[j + 63] = (char)(data[j] * (char)-11); @@ -6636,10 +7388,14 @@ public static void runCharP63() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM64(char[] data) { for (int j = 64; j < RANGE; j++) { data[j + -64] = (char)(data[j] * (char)-11); @@ -6680,14 +7436,19 @@ public static void runCharM64() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 128 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 128"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharP64(char[] data) { for (int j = 0; j < RANGE - 64; j++) { data[j + 64] = (char)(data[j] * (char)-11); @@ -6728,14 +7489,22 @@ public static void runCharP64() { @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharM65(char[] data) { for (int j = 65; j < RANGE; j++) { data[j + -65] = (char)(data[j] * (char)-11); @@ -6764,10 +7533,15 @@ public static void runCharM65() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 130 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 130"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharP65(char[] data) { for (int j = 0; j < RANGE - 65; j++) { data[j + 65] = (char)(data[j] * (char)-11); @@ -6796,10 +7570,14 @@ public static void runCharP65() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM128(char[] data) { for (int j = 128; j < RANGE; j++) { data[j + -128] = (char)(data[j] * (char)-11); @@ -6840,14 +7618,22 @@ public static void runCharM128() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharP128(char[] data) { for (int j = 0; j < RANGE - 128; j++) { data[j + 128] = (char)(data[j] * (char)-11); @@ -6888,14 +7674,22 @@ public static void runCharP128() { @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM129(char[] data) { for (int j = 129; j < RANGE; j++) { data[j + -129] = (char)(data[j] * (char)-11); @@ -6924,10 +7718,14 @@ public static void runCharM129() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharP129(char[] data) { for (int j = 0; j < RANGE - 129; j++) { data[j + 129] = (char)(data[j] * (char)-11); @@ -6956,10 +7754,14 @@ public static void runCharP129() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM192(char[] data) { for (int j = 192; j < RANGE; j++) { data[j + -192] = (char)(data[j] * (char)-11); @@ -7000,14 +7802,18 @@ public static void runCharM192() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharP192(char[] data) { for (int j = 0; j < RANGE - 192; j++) { data[j + 192] = (char)(data[j] * (char)-11); @@ -7036,10 +7842,14 @@ public static void runCharP192() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteP0(byte[] data) { for (int j = 0; j < RANGE; j++) { data[j + 0] = (byte)(data[j] * (byte)11); @@ -7080,14 +7890,22 @@ public static void runByteP0() { @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteM1(byte[] data) { for (int j = 1; j < RANGE; j++) { data[j + -1] = (byte)(data[j] * (byte)11); @@ -7113,7 +7931,10 @@ public static void runByteM1() { // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 // positive byte_offset 1 can lead to cyclic dependency // No positive IR rule: conditions impossible. - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + // positive byte_offset 1 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 // positive byte_offset 1 can lead to cyclic dependency // No positive IR rule: conditions impossible. public static void testByteP1(byte[] data) { @@ -7156,14 +7977,22 @@ public static void runByteP1() { @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteM2(byte[] data) { for (int j = 2; j < RANGE; j++) { data[j + -2] = (byte)(data[j] * (byte)11); @@ -7189,7 +8018,10 @@ public static void runByteM2() { // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 // positive byte_offset 2 can lead to cyclic dependency // No positive IR rule: conditions impossible. - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + // positive byte_offset 2 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 // positive byte_offset 2 can lead to cyclic dependency // No positive IR rule: conditions impossible. public static void testByteP2(byte[] data) { @@ -7232,14 +8064,22 @@ public static void runByteP2() { @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM3(byte[] data) { for (int j = 3; j < RANGE; j++) { data[j + -3] = (byte)(data[j] * (byte)11); @@ -7265,7 +8105,10 @@ public static void runByteM3() { // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 // positive byte_offset 3 can lead to cyclic dependency // No positive IR rule: conditions impossible. - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + // positive byte_offset 3 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 // positive byte_offset 3 can lead to cyclic dependency // No positive IR rule: conditions impossible. public static void testByteP3(byte[] data) { @@ -7296,10 +8139,14 @@ public static void runByteP3() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM4(byte[] data) { for (int j = 4; j < RANGE; j++) { data[j + -4] = (byte)(data[j] * (byte)11); @@ -7331,11 +8178,16 @@ public static void runByteM4() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 // positive byte_offset 4 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + // positive byte_offset 4 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteP4(byte[] data) { for (int j = 0; j < RANGE - 4; j++) { data[j + 4] = (byte)(data[j] * (byte)11); @@ -7376,14 +8228,22 @@ public static void runByteP4() { @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM7(byte[] data) { for (int j = 7; j < RANGE; j++) { data[j + -7] = (byte)(data[j] * (byte)11); @@ -7415,11 +8275,16 @@ public static void runByteM7() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 7"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 // positive byte_offset 7 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 7"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + // positive byte_offset 7 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 7"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteP7(byte[] data) { for (int j = 0; j < RANGE - 7; j++) { data[j + 7] = (byte)(data[j] * (byte)11); @@ -7448,10 +8313,14 @@ public static void runByteP7() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM8(byte[] data) { for (int j = 8; j < RANGE; j++) { data[j + -8] = (byte)(data[j] * (byte)11); @@ -7483,11 +8352,16 @@ public static void runByteM8() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 // positive byte_offset 8 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteP8(byte[] data) { for (int j = 0; j < RANGE - 8; j++) { data[j + 8] = (byte)(data[j] * (byte)11); @@ -7528,14 +8402,22 @@ public static void runByteP8() { @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteM14(byte[] data) { for (int j = 14; j < RANGE; j++) { data[j + -14] = (byte)(data[j] * (byte)11); @@ -7567,11 +8449,16 @@ public static void runByteM14() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + // positive byte_offset 14 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 // positive byte_offset 14 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteP14(byte[] data) { for (int j = 0; j < RANGE - 14; j++) { data[j + 14] = (byte)(data[j] * (byte)11); @@ -7600,10 +8487,14 @@ public static void runByteP14() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteM16(byte[] data) { for (int j = 16; j < RANGE; j++) { data[j + -16] = (byte)(data[j] * (byte)11); @@ -7638,11 +8529,19 @@ public static void runByteM16() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 16"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 // positive byte_offset 16 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteP16(byte[] data) { for (int j = 0; j < RANGE - 16; j++) { data[j + 16] = (byte)(data[j] * (byte)11); @@ -7683,14 +8582,22 @@ public static void runByteP16() { @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteM18(byte[] data) { for (int j = 18; j < RANGE; j++) { data[j + -18] = (byte)(data[j] * (byte)11); @@ -7721,11 +8628,15 @@ public static void runByteM18() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 18"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 // positive byte_offset 18 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 18"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteP18(byte[] data) { for (int j = 0; j < RANGE - 18; j++) { data[j + 18] = (byte)(data[j] * (byte)11); @@ -7754,10 +8665,14 @@ public static void runByteP18() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM20(byte[] data) { for (int j = 20; j < RANGE; j++) { data[j + -20] = (byte)(data[j] * (byte)11); @@ -7788,11 +8703,15 @@ public static void runByteM20() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 20"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 // positive byte_offset 20 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 20"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteP20(byte[] data) { for (int j = 0; j < RANGE - 20; j++) { data[j + 20] = (byte)(data[j] * (byte)11); @@ -7833,14 +8752,22 @@ public static void runByteP20() { @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM31(byte[] data) { for (int j = 31; j < RANGE; j++) { data[j + -31] = (byte)(data[j] * (byte)11); @@ -7871,11 +8798,15 @@ public static void runByteM31() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 31"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 // positive byte_offset 31 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 31"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteP31(byte[] data) { for (int j = 0; j < RANGE - 31; j++) { data[j + 31] = (byte)(data[j] * (byte)11); @@ -7904,10 +8835,14 @@ public static void runByteP31() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM32(byte[] data) { for (int j = 32; j < RANGE; j++) { data[j + -32] = (byte)(data[j] * (byte)11); @@ -7945,14 +8880,19 @@ public static void runByteM32() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 32"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + // positive byte_offset 32 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 32"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteP32(byte[] data) { for (int j = 0; j < RANGE - 32; j++) { data[j + 32] = (byte)(data[j] * (byte)11); @@ -7993,14 +8933,22 @@ public static void runByteP32() { @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM63(byte[] data) { for (int j = 63; j < RANGE; j++) { data[j + -63] = (byte)(data[j] * (byte)11); @@ -8030,10 +8978,15 @@ public static void runByteM63() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 63"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + // positive byte_offset 63 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 63"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteP63(byte[] data) { for (int j = 0; j < RANGE - 63; j++) { data[j + 63] = (byte)(data[j] * (byte)11); @@ -8062,10 +9015,14 @@ public static void runByteP63() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM64(byte[] data) { for (int j = 64; j < RANGE; j++) { data[j + -64] = (byte)(data[j] * (byte)11); @@ -8106,14 +9063,19 @@ public static void runByteM64() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + // positive byte_offset 64 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 64"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteP64(byte[] data) { for (int j = 0; j < RANGE - 64; j++) { data[j + 64] = (byte)(data[j] * (byte)11); @@ -8154,14 +9116,22 @@ public static void runByteP64() { @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM65(byte[] data) { for (int j = 65; j < RANGE; j++) { data[j + -65] = (byte)(data[j] * (byte)11); @@ -8190,10 +9160,15 @@ public static void runByteM65() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + // positive byte_offset 65 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 65"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteP65(byte[] data) { for (int j = 0; j < RANGE - 65; j++) { data[j + 65] = (byte)(data[j] * (byte)11); @@ -8222,10 +9197,14 @@ public static void runByteP65() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteM128(byte[] data) { for (int j = 128; j < RANGE; j++) { data[j + -128] = (byte)(data[j] * (byte)11); @@ -8266,14 +9245,19 @@ public static void runByteM128() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + // positive byte_offset 128 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 128"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteP128(byte[] data) { for (int j = 0; j < RANGE - 128; j++) { data[j + 128] = (byte)(data[j] * (byte)11); @@ -8314,14 +9298,22 @@ public static void runByteP128() { @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM129(byte[] data) { for (int j = 129; j < RANGE; j++) { data[j + -129] = (byte)(data[j] * (byte)11); @@ -8350,10 +9342,15 @@ public static void runByteM129() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + // positive byte_offset 129 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 129"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteP129(byte[] data) { for (int j = 0; j < RANGE - 129; j++) { data[j + 129] = (byte)(data[j] * (byte)11); @@ -8382,10 +9379,14 @@ public static void runByteP129() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM192(byte[] data) { for (int j = 192; j < RANGE; j++) { data[j + -192] = (byte)(data[j] * (byte)11); @@ -8426,14 +9427,19 @@ public static void runByteM192() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + // positive byte_offset 192 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 192"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteP192(byte[] data) { for (int j = 0; j < RANGE - 192; j++) { data[j + 192] = (byte)(data[j] * (byte)11); @@ -8462,10 +9468,14 @@ public static void runByteP192() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP0(float[] data) { for (int j = 0; j < RANGE; j++) { data[j + 0] = (float)(data[j] * (float)1.001f); @@ -8506,14 +9516,22 @@ public static void runFloatP0() { @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM1(float[] data) { for (int j = 1; j < RANGE; j++) { data[j + -1] = (float)(data[j] * (float)1.001f); @@ -8539,7 +9557,10 @@ public static void runFloatM1() { // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 // positive byte_offset 4 can lead to cyclic dependency // No positive IR rule: conditions impossible. - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + // positive byte_offset 4 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 // positive byte_offset 4 can lead to cyclic dependency // No positive IR rule: conditions impossible. public static void testFloatP1(float[] data) { @@ -8570,10 +9591,14 @@ public static void runFloatP1() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM2(float[] data) { for (int j = 2; j < RANGE; j++) { data[j + -2] = (float)(data[j] * (float)1.001f); @@ -8605,11 +9630,16 @@ public static void runFloatM2() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 // positive byte_offset 8 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP2(float[] data) { for (int j = 0; j < RANGE - 2; j++) { data[j + 2] = (float)(data[j] * (float)1.001f); @@ -8650,14 +9680,22 @@ public static void runFloatP2() { @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM3(float[] data) { for (int j = 3; j < RANGE; j++) { data[j + -3] = (float)(data[j] * (float)1.001f); @@ -8689,11 +9727,16 @@ public static void runFloatM3() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 12"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + // positive byte_offset 12 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 12"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 // positive byte_offset 12 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 12"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP3(float[] data) { for (int j = 0; j < RANGE - 3; j++) { data[j + 3] = (float)(data[j] * (float)1.001f); @@ -8722,10 +9765,14 @@ public static void runFloatP3() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM4(float[] data) { for (int j = 4; j < RANGE; j++) { data[j + -4] = (float)(data[j] * (float)1.001f); @@ -8760,11 +9807,19 @@ public static void runFloatM4() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 // positive byte_offset 16 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP4(float[] data) { for (int j = 0; j < RANGE - 4; j++) { data[j + 4] = (float)(data[j] * (float)1.001f); @@ -8805,14 +9860,22 @@ public static void runFloatP4() { @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM7(float[] data) { for (int j = 7; j < RANGE; j++) { data[j + -7] = (float)(data[j] * (float)1.001f); @@ -8843,11 +9906,15 @@ public static void runFloatM7() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 28"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 // positive byte_offset 28 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 28"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP7(float[] data) { for (int j = 0; j < RANGE - 7; j++) { data[j + 7] = (float)(data[j] * (float)1.001f); @@ -8876,10 +9943,14 @@ public static void runFloatP7() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM8(float[] data) { for (int j = 8; j < RANGE; j++) { data[j + -8] = (float)(data[j] * (float)1.001f); @@ -8917,14 +9988,19 @@ public static void runFloatM8() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 32"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 32 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 32"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP8(float[] data) { for (int j = 0; j < RANGE - 8; j++) { data[j + 8] = (float)(data[j] * (float)1.001f); @@ -8953,10 +10029,14 @@ public static void runFloatP8() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM14(float[] data) { for (int j = 14; j < RANGE; j++) { data[j + -14] = (float)(data[j] * (float)1.001f); @@ -8986,10 +10066,15 @@ public static void runFloatM14() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 56"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 56 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 56"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP14(float[] data) { for (int j = 0; j < RANGE - 14; j++) { data[j + 14] = (float)(data[j] * (float)1.001f); @@ -9018,10 +10103,14 @@ public static void runFloatP14() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM16(float[] data) { for (int j = 16; j < RANGE; j++) { data[j + -16] = (float)(data[j] * (float)1.001f); @@ -9062,14 +10151,19 @@ public static void runFloatM16() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 64 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 64"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP16(float[] data) { for (int j = 0; j < RANGE - 16; j++) { data[j + 16] = (float)(data[j] * (float)1.001f); @@ -9098,10 +10192,14 @@ public static void runFloatP16() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM18(float[] data) { for (int j = 18; j < RANGE; j++) { data[j + -18] = (float)(data[j] * (float)1.001f); @@ -9130,10 +10228,15 @@ public static void runFloatM18() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 72 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 72"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP18(float[] data) { for (int j = 0; j < RANGE - 18; j++) { data[j + 18] = (float)(data[j] * (float)1.001f); @@ -9162,10 +10265,14 @@ public static void runFloatP18() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM20(float[] data) { for (int j = 20; j < RANGE; j++) { data[j + -20] = (float)(data[j] * (float)1.001f); @@ -9198,10 +10305,19 @@ public static void runFloatM20() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 80 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 80"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP20(float[] data) { for (int j = 0; j < RANGE - 20; j++) { data[j + 20] = (float)(data[j] * (float)1.001f); @@ -9242,14 +10358,22 @@ public static void runFloatP20() { @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM31(float[] data) { for (int j = 31; j < RANGE; j++) { data[j + -31] = (float)(data[j] * (float)1.001f); @@ -9278,10 +10402,15 @@ public static void runFloatM31() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 124 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 124"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP31(float[] data) { for (int j = 0; j < RANGE - 31; j++) { data[j + 31] = (float)(data[j] * (float)1.001f); @@ -9310,10 +10439,14 @@ public static void runFloatP31() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM32(float[] data) { for (int j = 32; j < RANGE; j++) { data[j + -32] = (float)(data[j] * (float)1.001f); @@ -9354,14 +10487,19 @@ public static void runFloatM32() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 128 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 128"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP32(float[] data) { for (int j = 0; j < RANGE - 32; j++) { data[j + 32] = (float)(data[j] * (float)1.001f); @@ -9402,14 +10540,22 @@ public static void runFloatP32() { @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM63(float[] data) { for (int j = 63; j < RANGE; j++) { data[j + -63] = (float)(data[j] * (float)1.001f); @@ -9438,10 +10584,15 @@ public static void runFloatM63() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 252 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 252"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP63(float[] data) { for (int j = 0; j < RANGE - 63; j++) { data[j + 63] = (float)(data[j] * (float)1.001f); @@ -9470,10 +10621,14 @@ public static void runFloatP63() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM64(float[] data) { for (int j = 64; j < RANGE; j++) { data[j + -64] = (float)(data[j] * (float)1.001f); @@ -9514,14 +10669,22 @@ public static void runFloatM64() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP64(float[] data) { for (int j = 0; j < RANGE - 64; j++) { data[j + 64] = (float)(data[j] * (float)1.001f); @@ -9562,14 +10725,22 @@ public static void runFloatP64() { @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM65(float[] data) { for (int j = 65; j < RANGE; j++) { data[j + -65] = (float)(data[j] * (float)1.001f); @@ -9598,10 +10769,14 @@ public static void runFloatM65() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP65(float[] data) { for (int j = 0; j < RANGE - 65; j++) { data[j + 65] = (float)(data[j] * (float)1.001f); @@ -9630,10 +10805,14 @@ public static void runFloatP65() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM128(float[] data) { for (int j = 128; j < RANGE; j++) { data[j + -128] = (float)(data[j] * (float)1.001f); @@ -9674,14 +10853,22 @@ public static void runFloatM128() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP128(float[] data) { for (int j = 0; j < RANGE - 128; j++) { data[j + 128] = (float)(data[j] * (float)1.001f); @@ -9722,14 +10909,22 @@ public static void runFloatP128() { @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM129(float[] data) { for (int j = 129; j < RANGE; j++) { data[j + -129] = (float)(data[j] * (float)1.001f); @@ -9758,10 +10953,14 @@ public static void runFloatM129() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP129(float[] data) { for (int j = 0; j < RANGE - 129; j++) { data[j + 129] = (float)(data[j] * (float)1.001f); @@ -9790,10 +10989,14 @@ public static void runFloatP129() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM192(float[] data) { for (int j = 192; j < RANGE; j++) { data[j + -192] = (float)(data[j] * (float)1.001f); @@ -9834,14 +11037,22 @@ public static void runFloatM192() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP192(float[] data) { for (int j = 0; j < RANGE - 192; j++) { data[j + 192] = (float)(data[j] * (float)1.001f); @@ -9870,10 +11081,14 @@ public static void runFloatP192() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP0(double[] data) { for (int j = 0; j < RANGE; j++) { data[j + 0] = (double)(data[j] * (double)1.001); @@ -9914,14 +11129,22 @@ public static void runDoubleP0() { @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM1(double[] data) { for (int j = 1; j < RANGE; j++) { data[j + -1] = (double)(data[j] * (double)1.001); @@ -9947,7 +11170,10 @@ public static void runDoubleM1() { // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 // positive byte_offset 8 can lead to cyclic dependency // No positive IR rule: conditions impossible. - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + // positive byte_offset 8 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 // positive byte_offset 8 can lead to cyclic dependency // No positive IR rule: conditions impossible. public static void testDoubleP1(double[] data) { @@ -9978,10 +11204,14 @@ public static void runDoubleP1() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM2(double[] data) { for (int j = 2; j < RANGE; j++) { data[j + -2] = (double)(data[j] * (double)1.001); @@ -10016,11 +11246,19 @@ public static void runDoubleM2() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 // positive byte_offset 16 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP2(double[] data) { for (int j = 0; j < RANGE - 2; j++) { data[j + 2] = (double)(data[j] * (double)1.001); @@ -10061,14 +11299,22 @@ public static void runDoubleP2() { @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM3(double[] data) { for (int j = 3; j < RANGE; j++) { data[j + -3] = (double)(data[j] * (double)1.001); @@ -10099,11 +11345,15 @@ public static void runDoubleM3() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 24"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 // positive byte_offset 24 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 24"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP3(double[] data) { for (int j = 0; j < RANGE - 3; j++) { data[j + 3] = (double)(data[j] * (double)1.001); @@ -10132,10 +11382,14 @@ public static void runDoubleP3() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM4(double[] data) { for (int j = 4; j < RANGE; j++) { data[j + -4] = (double)(data[j] * (double)1.001); @@ -10173,14 +11427,19 @@ public static void runDoubleM4() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 32"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 32 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 32"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP4(double[] data) { for (int j = 0; j < RANGE - 4; j++) { data[j + 4] = (double)(data[j] * (double)1.001); @@ -10221,14 +11480,22 @@ public static void runDoubleP4() { @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM7(double[] data) { for (int j = 7; j < RANGE; j++) { data[j + -7] = (double)(data[j] * (double)1.001); @@ -10258,10 +11525,15 @@ public static void runDoubleM7() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 56"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 56 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 56"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP7(double[] data) { for (int j = 0; j < RANGE - 7; j++) { data[j + 7] = (double)(data[j] * (double)1.001); @@ -10290,10 +11562,14 @@ public static void runDoubleP7() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM8(double[] data) { for (int j = 8; j < RANGE; j++) { data[j + -8] = (double)(data[j] * (double)1.001); @@ -10334,14 +11610,19 @@ public static void runDoubleM8() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 64 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 64"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP8(double[] data) { for (int j = 0; j < RANGE - 8; j++) { data[j + 8] = (double)(data[j] * (double)1.001); @@ -10370,10 +11651,14 @@ public static void runDoubleP8() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM14(double[] data) { for (int j = 14; j < RANGE; j++) { data[j + -14] = (double)(data[j] * (double)1.001); @@ -10406,10 +11691,19 @@ public static void runDoubleM14() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 112 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 112"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP14(double[] data) { for (int j = 0; j < RANGE - 14; j++) { data[j + 14] = (double)(data[j] * (double)1.001); @@ -10438,10 +11732,14 @@ public static void runDoubleP14() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM16(double[] data) { for (int j = 16; j < RANGE; j++) { data[j + -16] = (double)(data[j] * (double)1.001); @@ -10482,14 +11780,19 @@ public static void runDoubleM16() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 128 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 128"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP16(double[] data) { for (int j = 0; j < RANGE - 16; j++) { data[j + 16] = (double)(data[j] * (double)1.001); @@ -10518,10 +11821,14 @@ public static void runDoubleP16() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM18(double[] data) { for (int j = 18; j < RANGE; j++) { data[j + -18] = (double)(data[j] * (double)1.001); @@ -10554,10 +11861,19 @@ public static void runDoubleM18() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 144 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 144"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP18(double[] data) { for (int j = 0; j < RANGE - 18; j++) { data[j + 18] = (double)(data[j] * (double)1.001); @@ -10586,10 +11902,14 @@ public static void runDoubleP18() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM20(double[] data) { for (int j = 20; j < RANGE; j++) { data[j + -20] = (double)(data[j] * (double)1.001); @@ -10626,14 +11946,19 @@ public static void runDoubleM20() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 160 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 160"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP20(double[] data) { for (int j = 0; j < RANGE - 20; j++) { data[j + 20] = (double)(data[j] * (double)1.001); @@ -10674,14 +11999,22 @@ public static void runDoubleP20() { @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM31(double[] data) { for (int j = 31; j < RANGE; j++) { data[j + -31] = (double)(data[j] * (double)1.001); @@ -10710,10 +12043,15 @@ public static void runDoubleM31() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 248 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 248"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP31(double[] data) { for (int j = 0; j < RANGE - 31; j++) { data[j + 31] = (double)(data[j] * (double)1.001); @@ -10742,10 +12080,14 @@ public static void runDoubleP31() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM32(double[] data) { for (int j = 32; j < RANGE; j++) { data[j + -32] = (double)(data[j] * (double)1.001); @@ -10786,14 +12128,22 @@ public static void runDoubleM32() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP32(double[] data) { for (int j = 0; j < RANGE - 32; j++) { data[j + 32] = (double)(data[j] * (double)1.001); @@ -10834,14 +12184,22 @@ public static void runDoubleP32() { @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM63(double[] data) { for (int j = 63; j < RANGE; j++) { data[j + -63] = (double)(data[j] * (double)1.001); @@ -10870,10 +12228,14 @@ public static void runDoubleM63() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP63(double[] data) { for (int j = 0; j < RANGE - 63; j++) { data[j + 63] = (double)(data[j] * (double)1.001); @@ -10902,10 +12264,14 @@ public static void runDoubleP63() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM64(double[] data) { for (int j = 64; j < RANGE; j++) { data[j + -64] = (double)(data[j] * (double)1.001); @@ -10946,14 +12312,22 @@ public static void runDoubleM64() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP64(double[] data) { for (int j = 0; j < RANGE - 64; j++) { data[j + 64] = (double)(data[j] * (double)1.001); @@ -10994,14 +12368,22 @@ public static void runDoubleP64() { @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM65(double[] data) { for (int j = 65; j < RANGE; j++) { data[j + -65] = (double)(data[j] * (double)1.001); @@ -11030,10 +12412,14 @@ public static void runDoubleM65() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP65(double[] data) { for (int j = 0; j < RANGE - 65; j++) { data[j + 65] = (double)(data[j] * (double)1.001); @@ -11062,10 +12448,14 @@ public static void runDoubleP65() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM128(double[] data) { for (int j = 128; j < RANGE; j++) { data[j + -128] = (double)(data[j] * (double)1.001); @@ -11106,14 +12496,22 @@ public static void runDoubleM128() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP128(double[] data) { for (int j = 0; j < RANGE - 128; j++) { data[j + 128] = (double)(data[j] * (double)1.001); @@ -11154,14 +12552,22 @@ public static void runDoubleP128() { @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM129(double[] data) { for (int j = 129; j < RANGE; j++) { data[j + -129] = (double)(data[j] * (double)1.001); @@ -11190,10 +12596,14 @@ public static void runDoubleM129() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP129(double[] data) { for (int j = 0; j < RANGE - 129; j++) { data[j + 129] = (double)(data[j] * (double)1.001); @@ -11222,10 +12632,14 @@ public static void runDoubleP129() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM192(double[] data) { for (int j = 192; j < RANGE; j++) { data[j + -192] = (double)(data[j] * (double)1.001); @@ -11266,14 +12680,22 @@ public static void runDoubleM192() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP192(double[] data) { for (int j = 0; j < RANGE - 192; j++) { data[j + 192] = (double)(data[j] * (double)1.001); From 2f1c816671fe8ad5c37f37510ee2f753571fd886 Mon Sep 17 00:00:00 2001 From: Pengfei Li Date: Tue, 29 Aug 2023 01:39:54 +0000 Subject: [PATCH 2/7] 8309697: [TESTBUG] Remove "@requires vm.flagless" from jtreg vectorization tests Reviewed-by: kvn, thartmann, epeter, chagedorn --- .../vectorization/runner/ArrayCopyTest.java | 2 +- .../runner/ArrayIndexFillTest.java | 2 +- .../runner/ArrayInvariantFillTest.java | 31 ++++++++++++++++++- .../runner/ArrayShiftOpTest.java | 2 +- .../runner/ArrayTypeConvertTest.java | 2 +- .../runner/ArrayUnsafeOpTest.java | 2 +- .../runner/BasicBooleanOpTest.java | 2 +- .../vectorization/runner/BasicByteOpTest.java | 2 +- .../vectorization/runner/BasicCharOpTest.java | 2 +- .../runner/BasicDoubleOpTest.java | 2 +- .../runner/BasicFloatOpTest.java | 2 +- .../vectorization/runner/BasicIntOpTest.java | 2 +- .../vectorization/runner/BasicLongOpTest.java | 2 +- .../runner/BasicShortOpTest.java | 2 +- .../runner/LoopArrayIndexComputeTest.java | 4 +-- .../runner/LoopCombinedOpTest.java | 2 +- .../runner/LoopControlFlowTest.java | 2 +- .../runner/LoopLiveOutNodesTest.java | 2 +- .../runner/LoopRangeStrideTest.java | 4 +-- .../runner/LoopReductionOpTest.java | 2 +- .../runner/MultipleLoopsTest.java | 2 +- .../runner/StripMinedLoopTest.java | 2 +- .../runner/VectorizationTestRunner.java | 30 +++++------------- 23 files changed, 61 insertions(+), 46 deletions(-) diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayCopyTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayCopyTest.java index da132813ab3..f334b4226a3 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayCopyTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayCopyTest.java @@ -35,7 +35,7 @@ * -XX:+WhiteBoxAPI * compiler.vectorization.runner.ArrayCopyTest * - * @requires vm.compiler2.enabled & vm.flagless + * @requires vm.compiler2.enabled */ package compiler.vectorization.runner; diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayIndexFillTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayIndexFillTest.java index f9916fd63ec..a4eca0fe8dd 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayIndexFillTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayIndexFillTest.java @@ -36,7 +36,7 @@ * compiler.vectorization.runner.ArrayIndexFillTest * * @requires (os.simpleArch == "x64") | (os.simpleArch == "aarch64") - * @requires vm.compiler2.enabled & vm.flagless + * @requires vm.compiler2.enabled */ package compiler.vectorization.runner; diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayInvariantFillTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayInvariantFillTest.java index e4e68cc3fd7..2e7302bba05 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayInvariantFillTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayInvariantFillTest.java @@ -35,9 +35,14 @@ * -XX:+WhiteBoxAPI * -XX:-OptimizeFill * compiler.vectorization.runner.ArrayInvariantFillTest + * @run main/othervm -Xbootclasspath/a:. + * -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI + * -XX:+OptimizeFill + * compiler.vectorization.runner.ArrayInvariantFillTest * * @requires (os.simpleArch == "x64") | (os.simpleArch == "aarch64") - * @requires vm.compiler2.enabled & vm.flagless + * @requires vm.compiler2.enabled */ package compiler.vectorization.runner; @@ -66,7 +71,11 @@ public ArrayInvariantFillTest() { // ---------------- Simple Fill ---------------- @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIf = {"OptimizeFill", "false"}, counts = {IRNode.REPLICATE_B, ">0"}) + @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIf = {"OptimizeFill", "true"}, + counts = {IRNode.REPLICATE_B, "0"}) public byte[] fillByteArray() { byte[] res = new byte[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -77,7 +86,11 @@ public byte[] fillByteArray() { @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIf = {"OptimizeFill", "false"}, counts = {IRNode.REPLICATE_S, ">0"}) + @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIf = {"OptimizeFill", "true"}, + counts = {IRNode.REPLICATE_S, "0"}) public short[] fillShortArray() { short[] res = new short[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -88,7 +101,11 @@ public short[] fillShortArray() { @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIf = {"OptimizeFill", "false"}, counts = {IRNode.REPLICATE_S, ">0"}) + @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIf = {"OptimizeFill", "true"}, + counts = {IRNode.REPLICATE_S, "0"}) public char[] fillCharArray() { char[] res = new char[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -99,7 +116,11 @@ public char[] fillCharArray() { @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIf = {"OptimizeFill", "false"}, counts = {IRNode.REPLICATE_I, ">0"}) + @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIf = {"OptimizeFill", "true"}, + counts = {IRNode.REPLICATE_I, "0"}) public int[] fillIntArray() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -121,7 +142,11 @@ public long[] fillLongArray() { @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIf = {"OptimizeFill", "false"}, counts = {IRNode.REPLICATE_F, ">0"}) + @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIf = {"OptimizeFill", "true"}, + counts = {IRNode.REPLICATE_F, "0"}) public float[] fillFloatArray() { float[] res = new float[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -177,7 +202,11 @@ public long[] fillLongArrayWithFloat() { @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIf = {"OptimizeFill", "false"}, counts = {IRNode.REPLICATE_I, ">0"}) + @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIf = {"OptimizeFill", "true"}, + counts = {IRNode.REPLICATE_I, "0"}) public int[] fillIntArrayWithDouble() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayShiftOpTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayShiftOpTest.java index 0d53c748979..74262142247 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayShiftOpTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayShiftOpTest.java @@ -36,7 +36,7 @@ * compiler.vectorization.runner.ArrayShiftOpTest * * @requires (os.simpleArch == "x64") | (os.simpleArch == "aarch64") - * @requires vm.compiler2.enabled & vm.flagless + * @requires vm.compiler2.enabled */ package compiler.vectorization.runner; diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java index a4640bbc302..7956fa5f618 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java @@ -35,7 +35,7 @@ * -XX:+WhiteBoxAPI * compiler.vectorization.runner.ArrayTypeConvertTest * - * @requires vm.compiler2.enabled & vm.flagless + * @requires vm.compiler2.enabled */ package compiler.vectorization.runner; diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayUnsafeOpTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayUnsafeOpTest.java index 23582a20d9b..8b4513b8490 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayUnsafeOpTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayUnsafeOpTest.java @@ -35,7 +35,7 @@ * -XX:+WhiteBoxAPI * compiler.vectorization.runner.ArrayUnsafeOpTest * - * @requires vm.compiler2.enabled & vm.flagless + * @requires vm.compiler2.enabled */ package compiler.vectorization.runner; diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/BasicBooleanOpTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/BasicBooleanOpTest.java index e1d30a5c839..d1a61538f2e 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/BasicBooleanOpTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/BasicBooleanOpTest.java @@ -35,7 +35,7 @@ * -XX:+WhiteBoxAPI * compiler.vectorization.runner.BasicBooleanOpTest * - * @requires vm.compiler2.enabled & vm.flagless + * @requires vm.compiler2.enabled */ package compiler.vectorization.runner; diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/BasicByteOpTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/BasicByteOpTest.java index 1ed615317d1..e0b5da1b716 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/BasicByteOpTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/BasicByteOpTest.java @@ -38,7 +38,7 @@ * -XX:LoopUnrollLimit=1000 * compiler.vectorization.runner.BasicByteOpTest * - * @requires vm.compiler2.enabled & vm.flagless + * @requires vm.compiler2.enabled */ package compiler.vectorization.runner; diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/BasicCharOpTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/BasicCharOpTest.java index 4b91360fc54..d851af1ff59 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/BasicCharOpTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/BasicCharOpTest.java @@ -35,7 +35,7 @@ * -XX:+WhiteBoxAPI * compiler.vectorization.runner.BasicCharOpTest * - * @requires vm.compiler2.enabled & vm.flagless + * @requires vm.compiler2.enabled */ package compiler.vectorization.runner; diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/BasicDoubleOpTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/BasicDoubleOpTest.java index cddeee1d351..92ccca40c8a 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/BasicDoubleOpTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/BasicDoubleOpTest.java @@ -36,7 +36,7 @@ * compiler.vectorization.runner.BasicDoubleOpTest * * @requires (os.simpleArch == "x64") | (os.simpleArch == "aarch64") - * @requires vm.compiler2.enabled & vm.flagless + * @requires vm.compiler2.enabled */ package compiler.vectorization.runner; diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/BasicFloatOpTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/BasicFloatOpTest.java index 3bd7bb2a15d..72e8aa640ca 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/BasicFloatOpTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/BasicFloatOpTest.java @@ -36,7 +36,7 @@ * compiler.vectorization.runner.BasicFloatOpTest * * @requires (os.simpleArch == "x64") | (os.simpleArch == "aarch64") - * @requires vm.compiler2.enabled & vm.flagless + * @requires vm.compiler2.enabled */ package compiler.vectorization.runner; diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/BasicIntOpTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/BasicIntOpTest.java index d392214c969..e848a15ffbc 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/BasicIntOpTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/BasicIntOpTest.java @@ -36,7 +36,7 @@ * -XX:+WhiteBoxAPI * compiler.vectorization.runner.BasicIntOpTest * - * @requires vm.compiler2.enabled & vm.flagless + * @requires vm.compiler2.enabled */ package compiler.vectorization.runner; diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/BasicLongOpTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/BasicLongOpTest.java index 1e41ae97ea2..80ebd7b64d0 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/BasicLongOpTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/BasicLongOpTest.java @@ -37,7 +37,7 @@ * compiler.vectorization.runner.BasicLongOpTest * * @requires (os.simpleArch == "x64") | (os.simpleArch == "aarch64") - * @requires vm.compiler2.enabled & vm.flagless + * @requires vm.compiler2.enabled */ package compiler.vectorization.runner; diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/BasicShortOpTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/BasicShortOpTest.java index b47267e5507..c9a06c11360 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/BasicShortOpTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/BasicShortOpTest.java @@ -35,7 +35,7 @@ * -XX:+WhiteBoxAPI * compiler.vectorization.runner.BasicShortOpTest * - * @requires vm.compiler2.enabled & vm.flagless + * @requires vm.compiler2.enabled */ package compiler.vectorization.runner; diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/LoopArrayIndexComputeTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/LoopArrayIndexComputeTest.java index 2ef686c2fd5..e06bbb05098 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/LoopArrayIndexComputeTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/LoopArrayIndexComputeTest.java @@ -37,7 +37,7 @@ * compiler.vectorization.runner.LoopArrayIndexComputeTest * * @requires (os.simpleArch == "x64") | (os.simpleArch == "aarch64") - * @requires vm.compiler2.enabled & vm.flagless + * @requires vm.compiler2.enabled */ package compiler.vectorization.runner; @@ -321,7 +321,7 @@ public byte[] byteArrayWithDependencePos() { byte[] res = new byte[SIZE]; System.arraycopy(bytes, 0, res, 0, SIZE); for (int i = 0; i < SIZE / 2; i++) { - res[i] *= bytes[i + 3]; + res[i] += bytes[i + 3]; } return res; } diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/LoopCombinedOpTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/LoopCombinedOpTest.java index 11852a55106..cddc5207d42 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/LoopCombinedOpTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/LoopCombinedOpTest.java @@ -36,7 +36,7 @@ * -XX:+WhiteBoxAPI * compiler.vectorization.runner.LoopCombinedOpTest * - * @requires vm.compiler2.enabled & vm.flagless + * @requires vm.compiler2.enabled */ package compiler.vectorization.runner; diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/LoopControlFlowTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/LoopControlFlowTest.java index a6057b63024..f0436332bdd 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/LoopControlFlowTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/LoopControlFlowTest.java @@ -35,7 +35,7 @@ * -XX:+WhiteBoxAPI * compiler.vectorization.runner.LoopControlFlowTest * - * @requires vm.compiler2.enabled & vm.flagless + * @requires vm.compiler2.enabled */ package compiler.vectorization.runner; diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/LoopLiveOutNodesTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/LoopLiveOutNodesTest.java index b5c3e3a9bfa..8b5e62a9d21 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/LoopLiveOutNodesTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/LoopLiveOutNodesTest.java @@ -36,7 +36,7 @@ * -XX:+WhiteBoxAPI * compiler.vectorization.runner.LoopLiveOutNodesTest * - * @requires vm.compiler2.enabled & vm.flagless + * @requires vm.compiler2.enabled */ package compiler.vectorization.runner; diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/LoopRangeStrideTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/LoopRangeStrideTest.java index e9de917e4f5..cfc87206c5d 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/LoopRangeStrideTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/LoopRangeStrideTest.java @@ -36,8 +36,8 @@ * -XX:+WhiteBoxAPI * compiler.vectorization.runner.LoopRangeStrideTest * - * @requires vm.compiler2.enabled & vm.flagless - * @requires (os.arch=="amd64" | os.arch=="x86_64" | os.simpleArch == "aarch64") + * @requires (os.simpleArch == "x64") | (os.simpleArch == "aarch64") + * @requires vm.compiler2.enabled */ package compiler.vectorization.runner; diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/LoopReductionOpTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/LoopReductionOpTest.java index 72667a66d82..f1c8fea059b 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/LoopReductionOpTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/LoopReductionOpTest.java @@ -36,7 +36,7 @@ * compiler.vectorization.runner.LoopReductionOpTest * * @requires (os.simpleArch == "x64") | (os.simpleArch == "aarch64") - * @requires vm.compiler2.enabled & vm.flagless + * @requires vm.compiler2.enabled * */ diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/MultipleLoopsTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/MultipleLoopsTest.java index 853f60124ba..61648c2e810 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/MultipleLoopsTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/MultipleLoopsTest.java @@ -36,7 +36,7 @@ * -XX:+WhiteBoxAPI * compiler.vectorization.runner.MultipleLoopsTest * - * @requires vm.compiler2.enabled & vm.flagless + * @requires vm.compiler2.enabled */ package compiler.vectorization.runner; diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/StripMinedLoopTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/StripMinedLoopTest.java index 655470b095a..dc906540f88 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/StripMinedLoopTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/StripMinedLoopTest.java @@ -36,7 +36,7 @@ * -XX:LoopStripMiningIter=10 * compiler.vectorization.runner.StripMinedLoopTest * - * @requires vm.compiler2.enabled & vm.flagless + * @requires vm.compiler2.enabled */ package compiler.vectorization.runner; diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/VectorizationTestRunner.java b/test/hotspot/jtreg/compiler/vectorization/runner/VectorizationTestRunner.java index 2a4c0839b51..ff8787eb913 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/VectorizationTestRunner.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/VectorizationTestRunner.java @@ -25,18 +25,11 @@ import compiler.lib.ir_framework.*; -import java.lang.annotation.ElementType; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; import java.lang.reflect.Array; import java.lang.reflect.Constructor; import java.lang.reflect.Method; import java.lang.reflect.Modifier; -import java.io.File; - -import jdk.test.lib.Platform; import jdk.test.lib.Utils; import jdk.test.whitebox.WhiteBox; @@ -51,16 +44,14 @@ public class VectorizationTestRunner { private static final int NMETHOD_COMP_LEVEL_IDX = 1; private static final int NMETHOD_INSTS_IDX = 2; - private static final long COMP_THRES_SECONDS = 30; - protected void run() { + Class klass = getClass(); + // 1) Vectorization correctness test - // For each method annotated with @Test in the test method, this test runner + // For each method annotated with "@Test" in test classes, this test runner // invokes it twice - first time in the interpreter and second time compiled // by C2. Then this runner compares the two return values. Hence we require // each test method returning a primitive value or an array of primitive type. - // And each test method should not throw any exceptions. - Class klass = getClass(); for (Method method : klass.getDeclaredMethods()) { try { if (method.isAnnotationPresent(Test.class)) { @@ -77,8 +68,6 @@ protected void run() { // To test vectorizability, invoke the IR test framework to check existence of // expected C2 IR node. TestFramework irTest = new TestFramework(klass); - // Add extra VM options to enable more auto-vectorization chances - irTest.addFlags("-XX:-OptimizeFill"); irTest.start(); } @@ -114,9 +103,9 @@ private void runTestOnMethod(Method method) throws InterruptedException { Object expected = null; Object actual = null; - // Lock compilation and inovke the method to get reference result from - // the interpreter - WB.lockCompilation(); + // Temporarily disable the compiler and invoke the method to get reference + // result from the interpreter + WB.setBooleanVMFlag("UseCompiler", false); try { expected = method.invoke(this); } catch (Exception e) { @@ -124,16 +113,13 @@ private void runTestOnMethod(Method method) throws InterruptedException { fail("Exception is thrown in test method invocation (interpreter)."); } assert(WB.getMethodCompilationLevel(method) == COMP_LEVEL_INTP); - WB.unlockCompilation(); + WB.setBooleanVMFlag("UseCompiler", true); // Compile the method and invoke it again long enqueueTime = System.currentTimeMillis(); WB.enqueueMethodForCompilation(method, COMP_LEVEL_C2); while (WB.getMethodCompilationLevel(method) != COMP_LEVEL_C2) { - if (System.currentTimeMillis() - enqueueTime > COMP_THRES_SECONDS * 1000) { - fail("Method is not compiled after " + COMP_THRES_SECONDS + "s."); - } - Thread.sleep(50 /*ms*/); + Thread.sleep(100 /*ms*/); } try { actual = method.invoke(this); From 16c60874bf214ec87260ff1ffbfb8dba6d3591de Mon Sep 17 00:00:00 2001 From: Sergey Tsypanov Date: Tue, 29 Aug 2023 08:39:42 +0000 Subject: [PATCH 3/7] 8314569: (fs) Improve normalization of UnixPath for input with trailing slashes Reviewed-by: alanb, bpb, rriggs --- src/java.base/unix/classes/sun/nio/fs/UnixPath.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/java.base/unix/classes/sun/nio/fs/UnixPath.java b/src/java.base/unix/classes/sun/nio/fs/UnixPath.java index 3a4c47fb9db..a9c63dac923 100644 --- a/src/java.base/unix/classes/sun/nio/fs/UnixPath.java +++ b/src/java.base/unix/classes/sun/nio/fs/UnixPath.java @@ -36,7 +36,6 @@ import java.nio.file.WatchEvent; import java.nio.file.WatchKey; import java.nio.file.WatchService; -import java.nio.file.spi.FileSystemProvider; import java.util.Arrays; import java.util.Objects; @@ -90,8 +89,9 @@ static String normalizeAndCheck(String input) { checkNotNul(input, c); prevChar = c; } - if (prevChar == '/') - return normalize(input, n, n - 1); + if (prevChar == '/' && n > 1) { + return input.substring(0, n - 1); + } return input; } @@ -109,7 +109,7 @@ private static String normalize(String input, int len, int off) { return "/"; StringBuilder sb = new StringBuilder(input.length()); if (off > 0) - sb.append(input.substring(0, off)); + sb.append(input, 0, off); char prevChar = 0; for (int i=off; i < n; i++) { char c = input.charAt(i); From fa40c4576ccee44103452b7b941036378fbd39b8 Mon Sep 17 00:00:00 2001 From: Kevin Walls Date: Tue, 29 Aug 2023 08:54:00 +0000 Subject: [PATCH 4/7] 8314476: TestJstatdPortAndServer.java failed with "java.rmi.NoSuchObjectException: no such object in table" Reviewed-by: msheppar, sspitsyn --- test/jdk/sun/tools/jstatd/JstatdTest.java | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/test/jdk/sun/tools/jstatd/JstatdTest.java b/test/jdk/sun/tools/jstatd/JstatdTest.java index a3183c64c16..b2ac3804f68 100644 --- a/test/jdk/sun/tools/jstatd/JstatdTest.java +++ b/test/jdk/sun/tools/jstatd/JstatdTest.java @@ -65,6 +65,8 @@ public final class JstatdTest { private static final int JSTAT_GCUTIL_INTERVAL_MS = 250; private static final String JPS_OUTPUT_REGEX = "^\\d+\\s*.*"; + private static final int MAX_JSTATD_TRIES = 10; + private boolean useDefaultPort = true; private boolean useDefaultRmiPort = true; private String port; @@ -282,7 +284,7 @@ private void addToolArg(JDKToolLauncher launcher, String name, String value) { private ProcessThread tryToSetupJstatdProcess() throws Throwable { portInUse = false; ProcessThread jstatdThread = new ProcessThread("Jstatd-Thread", - JstatdTest::isJstadReady, getJstatdCmd()); + JstatdTest::isJstatdReady, getJstatdCmd()); try { jstatdThread.start(); // Make sure jstatd is up and running @@ -302,8 +304,8 @@ private ProcessThread tryToSetupJstatdProcess() throws Throwable { return jstatdThread; } - private static boolean isJstadReady(String line) { - if (line.contains("Port already in use")) { + private static boolean isJstatdReady(String line) { + if (line.contains("Port already in use") || line.contains("Could not bind")) { portInUse = true; return true; } @@ -322,8 +324,9 @@ private void runTest(boolean useShortSyntax) throws Throwable { } ProcessThread jstatdThread = null; + int tries = 0; try { - while (jstatdThread == null) { + while (jstatdThread == null && ++tries <= MAX_JSTATD_TRIES) { if (!useDefaultPort) { port = String.valueOf(Utils.getFreePort()); } @@ -339,10 +342,11 @@ private void runTest(boolean useShortSyntax) throws Throwable { continue; } } - jstatdThread = tryToSetupJstatdProcess(); } - + if (jstatdThread == null) { + throw new RuntimeException("Cannot start jstatd."); + } runToolsAndVerify(); } finally { cleanUpThread(jstatdThread); From 892e04e786ddeceb42815b06f02630de2eaf934d Mon Sep 17 00:00:00 2001 From: Emmy Date: Tue, 29 Aug 2023 09:52:41 +0000 Subject: [PATCH 5/7] 8309463: IGV: Dynamic graph layout algorithm Reviewed-by: tholenstein, rcastanedalo, chagedorn --- .../com/sun/hotspot/igv/graph/Figure.java | 13 + .../hotspot/igv/graph/FigureConnection.java | 12 + .../HierarchicalLayout/pom.xml | 7 +- .../HierarchicalLayoutManager.java | 119 +- .../HierarchicalStableLayoutManager.java | 1471 +++++++++++++++++ .../igv/hierarchicallayout/LayoutEdge.java | 44 + .../igv/hierarchicallayout/LayoutNode.java | 53 + .../sun/hotspot/igv/settings/Settings.java | 3 +- .../com/sun/hotspot/igv/util/Statistics.java | 40 + .../sun/hotspot/igv/view/DiagramScene.java | 17 +- .../hotspot/igv/view/DiagramViewModel.java | 13 + .../hotspot/igv/view/EditorTopComponent.java | 16 + .../actions/EnableStableSeaLayoutAction.java | 49 + .../hotspot/igv/view/images/stable_sea.png | Bin 0 -> 2111 bytes 14 files changed, 1767 insertions(+), 90 deletions(-) create mode 100644 src/utils/IdealGraphVisualizer/HierarchicalLayout/src/main/java/com/sun/hotspot/igv/hierarchicallayout/HierarchicalStableLayoutManager.java create mode 100644 src/utils/IdealGraphVisualizer/HierarchicalLayout/src/main/java/com/sun/hotspot/igv/hierarchicallayout/LayoutEdge.java create mode 100644 src/utils/IdealGraphVisualizer/HierarchicalLayout/src/main/java/com/sun/hotspot/igv/hierarchicallayout/LayoutNode.java create mode 100644 src/utils/IdealGraphVisualizer/Util/src/main/java/com/sun/hotspot/igv/util/Statistics.java create mode 100644 src/utils/IdealGraphVisualizer/View/src/main/java/com/sun/hotspot/igv/view/actions/EnableStableSeaLayoutAction.java create mode 100644 src/utils/IdealGraphVisualizer/View/src/main/resources/com/sun/hotspot/igv/view/images/stable_sea.png diff --git a/src/utils/IdealGraphVisualizer/Graph/src/main/java/com/sun/hotspot/igv/graph/Figure.java b/src/utils/IdealGraphVisualizer/Graph/src/main/java/com/sun/hotspot/igv/graph/Figure.java index 9cc917dedcf..f02b4657cea 100644 --- a/src/utils/IdealGraphVisualizer/Graph/src/main/java/com/sun/hotspot/igv/graph/Figure.java +++ b/src/utils/IdealGraphVisualizer/Graph/src/main/java/com/sun/hotspot/igv/graph/Figure.java @@ -384,6 +384,19 @@ public Dimension getSize() { return new Dimension(width, height); } + @Override + public boolean equals(Object o) { + if (!(o instanceof Figure)) { + return false; + } + return getInputNode().equals(((Figure) o).getInputNode()); + } + + @Override + public int hashCode() { + return getInputNode().hashCode(); + } + @Override public String toString() { return idString; diff --git a/src/utils/IdealGraphVisualizer/Graph/src/main/java/com/sun/hotspot/igv/graph/FigureConnection.java b/src/utils/IdealGraphVisualizer/Graph/src/main/java/com/sun/hotspot/igv/graph/FigureConnection.java index ac2c88f6913..186a6b63f17 100644 --- a/src/utils/IdealGraphVisualizer/Graph/src/main/java/com/sun/hotspot/igv/graph/FigureConnection.java +++ b/src/utils/IdealGraphVisualizer/Graph/src/main/java/com/sun/hotspot/igv/graph/FigureConnection.java @@ -156,5 +156,17 @@ public boolean hasSlots() { return true; } + @Override + public boolean equals(Object o) { + if (!(o instanceof FigureConnection)) { + return false; + } + + return getInputSlot().getFigure().equals(((FigureConnection)o).getInputSlot().getFigure()) + && getOutputSlot().getFigure().equals(((FigureConnection)o).getOutputSlot().getFigure()) + && getInputSlot().getPosition() == ((FigureConnection)o).getInputSlot().getPosition() + && getOutputSlot().getPosition() == ((FigureConnection) o).getOutputSlot().getPosition(); + } + } diff --git a/src/utils/IdealGraphVisualizer/HierarchicalLayout/pom.xml b/src/utils/IdealGraphVisualizer/HierarchicalLayout/pom.xml index be9cb1d882f..4a99ff829d5 100644 --- a/src/utils/IdealGraphVisualizer/HierarchicalLayout/pom.xml +++ b/src/utils/IdealGraphVisualizer/HierarchicalLayout/pom.xml @@ -1,6 +1,6 @@