Skip to content

Commit

Permalink
[WebAssembly] Implement prototype f32.store_f16 instruction. (llvm#91545
Browse files Browse the repository at this point in the history
)

Adds a builtin and intrinsic for the f32.store_f16 instruction.

The instruction stores an f32 value as an f16 memory. Specified at:

https://github.com/WebAssembly/half-precision/blob/29a9b9462c9285d4ccc1a5dc39214ddfd1892658/proposals/half-precision/Overview.md

Note: the current spec has f32.store_f16 as opcode 0xFD0121, but this is
incorrect and will be changed to 0xFC31 soon.
  • Loading branch information
brendandahl authored May 9, 2024
1 parent a99cb96 commit 8a3277a
Show file tree
Hide file tree
Showing 10 changed files with 75 additions and 2 deletions.
1 change: 1 addition & 0 deletions clang/include/clang/Basic/BuiltinsWebAssembly.def
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ TARGET_BUILTIN(__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4, "V4fV8UsV8UsV4f"

// Half-Precision (fp16)
TARGET_BUILTIN(__builtin_wasm_loadf16_f32, "fh*", "nU", "half-precision")
TARGET_BUILTIN(__builtin_wasm_storef16_f32, "vfh*", "n", "half-precision")

// Reference Types builtins
// Some builtins are custom type-checked - see 't' as part of the third argument,
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21310,6 +21310,12 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32);
return Builder.CreateCall(Callee, {Addr});
}
case WebAssembly::BI__builtin_wasm_storef16_f32: {
Value *Val = EmitScalarExpr(E->getArg(0));
Value *Addr = EmitScalarExpr(E->getArg(1));
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_storef16_f32);
return Builder.CreateCall(Callee, {Val, Addr});
}
case WebAssembly::BI__builtin_wasm_table_get: {
assert(E->getArg(0)->getType()->isArrayType());
Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
Expand Down
6 changes: 6 additions & 0 deletions clang/test/CodeGen/builtins-wasm.c
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,12 @@ float load_f16_f32(__fp16 *addr) {
// WEBASSEMBLY: call float @llvm.wasm.loadf16.f32(ptr %{{.*}})
}

void store_f16_f32(float val, __fp16 *addr) {
return __builtin_wasm_storef16_f32(val, addr);
// WEBASSEMBLY: tail call void @llvm.wasm.storef16.f32(float %val, ptr %{{.*}})
// WEBASSEMBLY-NEXT: ret
}

__externref_t externref_null() {
return __builtin_wasm_ref_null_extern();
// WEBASSEMBLY: tail call ptr addrspace(10) @llvm.wasm.ref.null.extern()
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsWebAssembly.td
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,11 @@ def int_wasm_loadf16_f32:
[llvm_ptr_ty],
[IntrReadMem, IntrArgMemOnly],
"", [SDNPMemOperand]>;
def int_wasm_storef16_f32:
Intrinsic<[],
[llvm_float_ty, llvm_ptr_ty],
[IntrWriteMem, IntrArgMemOnly],
"", [SDNPMemOperand]>;


//===----------------------------------------------------------------------===//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
WASM_LOAD_STORE(LOAD_LANE_I16x8)
WASM_LOAD_STORE(STORE_LANE_I16x8)
WASM_LOAD_STORE(LOAD_F16_F32)
WASM_LOAD_STORE(STORE_F16_F32)
return 1;
WASM_LOAD_STORE(LOAD_I32)
WASM_LOAD_STORE(LOAD_F32)
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -914,6 +914,14 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.align = Align(2);
Info.flags = MachineMemOperand::MOLoad;
return true;
case Intrinsic::wasm_storef16_f32:
Info.opc = ISD::INTRINSIC_VOID;
Info.memVT = MVT::f16;
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
Info.align = Align(2);
Info.flags = MachineMemOperand::MOStore;
return true;
default:
return false;
}
Expand Down
11 changes: 9 additions & 2 deletions llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,9 @@ defm LOAD16_U_I64 : WebAssemblyLoad<I64, "i64.load16_u", 0x33, []>;
defm LOAD32_S_I64 : WebAssemblyLoad<I64, "i64.load32_s", 0x34, []>;
defm LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.load32_u", 0x35, []>;

// Half Precision
defm LOAD_F16_F32 : WebAssemblyLoad<F32, "f32.load_f16", 0xfc30, [HasHalfPrecision]>;
// Half-precision load.
defm LOAD_F16_F32 :
WebAssemblyLoad<F32, "f32.load_f16", 0xfc30, [HasHalfPrecision]>;

// Pattern matching

Expand Down Expand Up @@ -171,12 +172,18 @@ defm STORE8_I64 : WebAssemblyStore<I64, "i64.store8", 0x3c>;
defm STORE16_I64 : WebAssemblyStore<I64, "i64.store16", 0x3d>;
defm STORE32_I64 : WebAssemblyStore<I64, "i64.store32", 0x3e>;

// Half-precision store.
defm STORE_F16_F32 :
WebAssemblyStore<F32, "f32.store_f16", 0xfc31, [HasHalfPrecision]>;

defm : StorePat<i32, truncstorei8, "STORE8_I32">;
defm : StorePat<i32, truncstorei16, "STORE16_I32">;
defm : StorePat<i64, truncstorei8, "STORE8_I64">;
defm : StorePat<i64, truncstorei16, "STORE16_I64">;
defm : StorePat<i64, truncstorei32, "STORE32_I64">;

defm : StorePat<f32, int_wasm_storef16_f32, "STORE_F16_F32">;

multiclass MemoryOps<WebAssemblyRegClass rc, string B> {
// Current memory size.
defm MEMORY_SIZE_A#B : I<(outs rc:$dst), (ins i32imm:$flags),
Expand Down
9 changes: 9 additions & 0 deletions llvm/test/CodeGen/WebAssembly/half-precision.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
; RUN: llc < %s --mtriple=wasm64-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+half-precision | FileCheck %s

declare float @llvm.wasm.loadf32.f16(ptr)
declare void @llvm.wasm.storef16.f32(float, ptr)

; CHECK-LABEL: ldf16_32:
; CHECK: f32.load_f16 $push[[NUM0:[0-9]+]]=, 0($0){{$}}
Expand All @@ -10,3 +11,11 @@ define float @ldf16_32(ptr %p) {
%v = call float @llvm.wasm.loadf16.f32(ptr %p)
ret float %v
}

; CHECK-LABEL: stf16_32:
; CHECK: f32.store_f16 0($1), $0
; CHECK-NEXT: return
define void @stf16_32(float %v, ptr %p) {
tail call void @llvm.wasm.storef16.f32(float %v, ptr %p)
ret void
}
27 changes: 27 additions & 0 deletions llvm/test/CodeGen/WebAssembly/offset.ll
Original file line number Diff line number Diff line change
Expand Up @@ -692,3 +692,30 @@ define float @load_f16_f32_with_folded_gep_offset(ptr %p) {
%t = call float @llvm.wasm.loadf16.f32(ptr %s)
ret float %t
}

;===----------------------------------------------------------------------------
; Stores: Half Precision
;===----------------------------------------------------------------------------

; Basic store.

; CHECK-LABEL: store_f16_f32_no_offset:
; CHECK-NEXT: .functype store_f16_f32_no_offset (i32, f32) -> (){{$}}
; CHECK-NEXT: f32.store_f16 0($0), $1{{$}}
; CHECK-NEXT: return{{$}}
define void @store_f16_f32_no_offset(ptr %p, float %v) {
call void @llvm.wasm.storef16.f32(float %v, ptr %p)
ret void
}

; Storing to a fixed address.

; CHECK-LABEL: store_f16_f32_to_numeric_address:
; CHECK: i32.const $push1=, 0{{$}}
; CHECK-NEXT: f32.const $push0=, 0x0p0{{$}}
; CHECK-NEXT: f32.store_f16 42($pop1), $pop0{{$}}
define void @store_f16_f32_to_numeric_address() {
%s = inttoptr i32 42 to ptr
call void @llvm.wasm.storef16.f32(float 0.0, ptr %s)
ret void
}
3 changes: 3 additions & 0 deletions llvm/test/MC/WebAssembly/simd-encodings.s
Original file line number Diff line number Diff line change
Expand Up @@ -842,4 +842,7 @@ main:
# CHECK: f32.load_f16 48 # encoding: [0xfc,0x30,0x01,0x30]
f32.load_f16 48

# CHECK: f32.store_f16 32 # encoding: [0xfc,0x31,0x01,0x20]
f32.store_f16 32

end_function

0 comments on commit 8a3277a

Please sign in to comment.