From d3e77f5408fded2b4bb70f51d6d9e52684badc92 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Mon, 6 May 2024 10:55:36 -0700 Subject: [PATCH] Try to use non-volatile registers for `preserve_none` parameters (#88333) This uses non-volatile registers for the first four (six on Windows) registers used for `preserve_none` argument passing. This allows these registers to stay "pinned", even if the body of the `preserve_none` function contains calls to other "normal" functions. Example: ```c void boring(void); __attribute__((preserve_none)) void (continuation)(void *, void *, void *, void *); __attribute__((preserve_none)) void entry(void *a, void *b, void *c, void *d) { boring(); __attribute__((musttail)) return continuation(a, b, c, d); } ``` Before: ```asm pushq %rax movq %rcx, %rbx movq %rdx, %r14 movq %rsi, %r15 movq %rdi, %r12 callq boring@PLT movq %r12, %rdi movq %r15, %rsi movq %r14, %rdx movq %rbx, %rcx popq %rax jmp continuation@PLT ``` After: ```asm pushq %rax callq boring@PLT popq %rax jmp continuation@PLT ``` --- clang/include/clang/Basic/AttrDocs.td | 5 +- llvm/lib/Target/X86/X86CallingConv.td | 10 ++-- llvm/test/CodeGen/X86/preserve_nonecc_call.ll | 50 +++++++++++++------ .../CodeGen/X86/preserve_nonecc_call_win.ll | 21 ++++++++ 4 files changed, 64 insertions(+), 22 deletions(-) create mode 100644 llvm/test/CodeGen/X86/preserve_nonecc_call_win.ll diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index f8253143b596c0..8e6faabfae647a 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -5663,11 +5663,12 @@ The ``preserve_none`` calling convention tries to preserve as few general registers as possible. So all general registers are caller saved registers. It also uses more general registers to pass arguments. This attribute doesn't impact floating-point registers (XMMs/YMMs). Floating-point registers still -follow the c calling convention. +follow the c calling convention. ``preserve_none``'s ABI is still unstable, and +may be changed in the future. - Only RSP and RBP are preserved by callee. -- Register RDI, RSI, RDX, RCX, R8, R9, R11, R12, R13, R14, R15 and RAX now can +- Register R12, R13, R14, R15, RDI, RSI, RDX, RCX, R8, R9, R11, and RAX now can be used to pass function arguments. }]; } diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td index 12178bcaf042db..9ec68bfb8e0f7e 100644 --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -1063,11 +1063,13 @@ def CC_X86_64_Preserve_None : CallingConv<[ // - R10 'nest' parameter // - RBX base pointer // - R16 - R31 these are not available everywhere - CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D, - R11D, R12D, R13D, R14D, R15D, EAX]>>, + // Use non-volatile registers first, so functions using this convention can + // call "normal" functions without saving and restoring incoming values: + CCIfType<[i32], CCAssignToReg<[R12D, R13D, R14D, R15D, EDI, ESI, + EDX, ECX, R8D, R9D, R11D, EAX]>>, - CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8, R9, - R11, R12, R13, R14, R15, RAX]>>, + CCIfType<[i64], CCAssignToReg<[R12, R13, R14, R15, RDI, RSI, + RDX, RCX, R8, R9, R11, RAX]>>, // Otherwise it's the same as the regular C calling convention. CCDelegateTo diff --git a/llvm/test/CodeGen/X86/preserve_nonecc_call.ll b/llvm/test/CodeGen/X86/preserve_nonecc_call.ll index e4ad056913c5dc..500ebb139811aa 100644 --- a/llvm/test/CodeGen/X86/preserve_nonecc_call.ll +++ b/llvm/test/CodeGen/X86/preserve_nonecc_call.ll @@ -27,6 +27,7 @@ define void @caller1(ptr %a) { ; CHECK-NEXT: .cfi_offset %r13, -32 ; CHECK-NEXT: .cfi_offset %r14, -24 ; CHECK-NEXT: .cfi_offset %r15, -16 +; CHECK-NEXT: movq %rdi, %r12 ; CHECK-NEXT: callq callee@PLT ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 40 @@ -61,17 +62,17 @@ define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i6 ; CHECK: # %bb.0: ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movq %r13, %r12 +; CHECK-NEXT: movq %r14, %r13 +; CHECK-NEXT: movq %r15, %r14 +; CHECK-NEXT: movq %rdi, %r15 ; CHECK-NEXT: movq %rsi, %rdi ; CHECK-NEXT: movq %rdx, %rsi ; CHECK-NEXT: movq %rcx, %rdx ; CHECK-NEXT: movq %r8, %rcx ; CHECK-NEXT: movq %r9, %r8 ; CHECK-NEXT: movq %r11, %r9 -; CHECK-NEXT: movq %r12, %r11 -; CHECK-NEXT: movq %r13, %r12 -; CHECK-NEXT: movq %r14, %r13 -; CHECK-NEXT: movq %r15, %r14 -; CHECK-NEXT: movq %rax, %r15 +; CHECK-NEXT: movq %rax, %r11 ; CHECK-NEXT: callq callee_with_many_param2@PLT ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -98,17 +99,17 @@ define i64 @caller3() { ; CHECK-NEXT: .cfi_offset %r13, -32 ; CHECK-NEXT: .cfi_offset %r14, -24 ; CHECK-NEXT: .cfi_offset %r15, -16 -; CHECK-NEXT: movl $1, %edi -; CHECK-NEXT: movl $2, %esi -; CHECK-NEXT: movl $3, %edx -; CHECK-NEXT: movl $4, %ecx -; CHECK-NEXT: movl $5, %r8d -; CHECK-NEXT: movl $6, %r9d -; CHECK-NEXT: movl $7, %r11d -; CHECK-NEXT: movl $8, %r12d -; CHECK-NEXT: movl $9, %r13d -; CHECK-NEXT: movl $10, %r14d -; CHECK-NEXT: movl $11, %r15d +; CHECK-NEXT: movl $1, %r12d +; CHECK-NEXT: movl $2, %r13d +; CHECK-NEXT: movl $3, %r14d +; CHECK-NEXT: movl $4, %r15d +; CHECK-NEXT: movl $5, %edi +; CHECK-NEXT: movl $6, %esi +; CHECK-NEXT: movl $7, %edx +; CHECK-NEXT: movl $8, %ecx +; CHECK-NEXT: movl $9, %r8d +; CHECK-NEXT: movl $10, %r9d +; CHECK-NEXT: movl $11, %r11d ; CHECK-NEXT: movl $12, %eax ; CHECK-NEXT: callq callee_with_many_param@PLT ; CHECK-NEXT: popq %rbx @@ -125,3 +126,20 @@ define i64 @caller3() { %ret = call preserve_nonecc i64 @callee_with_many_param(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12) ret i64 %ret } + +; Non-volatile registers are used to pass the first few parameters. +declare void @boring() +declare preserve_nonecc void @continuation(ptr, ptr, ptr, ptr) +define preserve_nonecc void @entry(ptr %r12, ptr %r13, ptr %r14, ptr %r15) { +; CHECK-LABEL: entry: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq boring@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: jmp continuation@PLT # TAILCALL + call void @boring() + musttail call preserve_nonecc void @continuation(ptr %r12, ptr %r13, ptr %r14, ptr %r15) + ret void +} diff --git a/llvm/test/CodeGen/X86/preserve_nonecc_call_win.ll b/llvm/test/CodeGen/X86/preserve_nonecc_call_win.ll new file mode 100644 index 00000000000000..232ac345057825 --- /dev/null +++ b/llvm/test/CodeGen/X86/preserve_nonecc_call_win.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=x86_64-pc-windows-msvc -mcpu=corei7 < %s | FileCheck %s + +; Non-volatile registers are used to pass the first few parameters. +declare void @boring() +declare preserve_nonecc void @continuation(ptr, ptr, ptr, ptr, ptr, ptr) +define preserve_nonecc void @entry(ptr %r12, ptr %r13, ptr %r14, ptr %r15, ptr %rdi, ptr %rsi) { +; CHECK-LABEL: entry: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .seh_stackalloc 40 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: callq boring +; CHECK-NEXT: nop +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: jmp continuation # TAILCALL +; CHECK-NEXT: .seh_endproc + call void @boring() + musttail call preserve_nonecc void @continuation(ptr %r12, ptr %r13, ptr %r14, ptr %r15, ptr %rdi, ptr %rsi) + ret void +}