Skip to content

Commit

Permalink
Add a pass to propagate unsupported element type conversion. (#8173)
Browse files Browse the repository at this point in the history
With PR #8155, the buffer for tensor with unsupported element types
(like i1, i2, etc.) are converted to use a supported element type
through widening (and truncation if needed in future). This introduces
arith.trunci and arith.extui operations to convert back and forth
from the program representation in its original form. Propagate the
use of supported element type for all tensor operations to ensure
that the no load/stores of unsupported element types are used.
  • Loading branch information
MaheshRavishankar authored Jan 28, 2022
1 parent e3ded85 commit 0ba4d9e
Show file tree
Hide file tree
Showing 15 changed files with 668 additions and 4 deletions.
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
# XFAIL: *
# REQUIRES: llvmaot
# RUN: %PYTHON -m iree_tf_tests.math.math_test --target_backends=iree_llvmaot --dynamic_dims=false --functions=reduce_all -artifacts_dir=%t
1 change: 1 addition & 0 deletions iree/compiler/Codegen/Common/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ cc_library(
"OptimizeVectorTransferPass.cpp",
"RemoveTrivialLoops.cpp",
"SetNumWorkgroupsPass.cpp",
"TypePropagationPass.cpp",
"VectorizeConv.cpp",
"VectorizeMMT4d.cpp",
],
Expand Down
1 change: 1 addition & 0 deletions iree/compiler/Codegen/Common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ iree_cc_library(
"OptimizeVectorTransferPass.cpp"
"RemoveTrivialLoops.cpp"
"SetNumWorkgroupsPass.cpp"
"TypePropagationPass.cpp"
"VectorizeConv.cpp"
"VectorizeMMT4d.cpp"
DEPS
Expand Down
387 changes: 387 additions & 0 deletions iree/compiler/Codegen/Common/TypePropagationPass.cpp

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions iree/compiler/Codegen/Common/test/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ iree_lit_test_suite(
"remove_dead_allocs.mlir",
"remove_trivial_loops.mlir",
"transpose_canonicalization.mlir",
"type_propagation.mlir",
"vectorize_linalg_conv.mlir",
"vectorize_linalg_mmt4d.mlir",
],
Expand Down
1 change: 1 addition & 0 deletions iree/compiler/Codegen/Common/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ iree_lit_test_suite(
"remove_dead_allocs.mlir"
"remove_trivial_loops.mlir"
"transpose_canonicalization.mlir"
"type_propagation.mlir"
"vectorize_linalg_conv.mlir"
"vectorize_linalg_mmt4d.mlir"
TOOLS
Expand Down
239 changes: 239 additions & 0 deletions iree/compiler/Codegen/Common/test/type_propagation.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
// RUN: iree-opt -iree-codegen-type-propagation -split-input-file %s | FileCheck %s

func @generic_op_illegal_operand() {
%d = hal.interface.constant.load[0] : index
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : !flow.dispatch.tensor<readonly:?xi8>{%d}
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : !flow.dispatch.tensor<writeonly:?xi8>{%d}
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes=[%d], strides=[1] : !flow.dispatch.tensor<readonly:?xi8>{%d} -> tensor<?xi8>
%3 = arith.trunci %2 : tensor<?xi8> to tensor<?xi1>
%4 = linalg.init_tensor [%d] : tensor<?xi8>
%5 = linalg.generic {
indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
iterator_types = ["parallel"]}
ins(%3 : tensor<?xi1>) outs(%4 : tensor<?xi8>) {
^bb0(%arg0 : i1, %arg1 : i8):
%6 = arith.extui %arg0 : i1 to i8
linalg.yield %6 : i8
} -> tensor<?xi8>
flow.dispatch.tensor.store %5, %1, offsets = [0], sizes=[%d], strides=[1] : tensor<?xi8> -> !flow.dispatch.tensor<writeonly:?xi8>{%d}
return
}
// CHECK-LABEL: func @generic_op_illegal_operand()
// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan set(0) binding(0)
// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan set(0) binding(1)
// CHECK-DAG: %[[INTENSOR:.+]] = flow.dispatch.tensor.load %[[IN]]
// CHECK-DAG: %[[INIT:.+]] = linalg.init_tensor [%{{.+}}] : tensor<?xi8>
// CHECK: %[[GENERIC:.+]] = linalg.generic
// CHECK-SAME: ins(%[[INTENSOR]] : tensor<?xi8>)
// CHECK-SAME: outs(%[[INIT]] : tensor<?xi8>)
// CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: i8, %[[ARG1:[a-zA-Z0-9]+]]: i8)
// CHECK-DAG: %[[TRUNC:.+]] = arith.trunci %[[ARG0]] : i8 to i1
// CHECK-DAG: %[[EXTUI:.+]] = arith.extui %[[TRUNC]] : i1 to i8
// CHECK: linalg.yield %[[EXTUI]]
// CHECK: flow.dispatch.tensor.store %[[GENERIC]], %[[OUT]]

// -----

func @generic_op_illegal_operand_i7() {
%d = hal.interface.constant.load[0] : index
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : !flow.dispatch.tensor<readonly:?xi8>{%d}
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : !flow.dispatch.tensor<writeonly:?xi8>{%d}
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes=[%d], strides=[1] : !flow.dispatch.tensor<readonly:?xi8>{%d} -> tensor<?xi8>
%3 = arith.trunci %2 : tensor<?xi8> to tensor<?xi7>
%4 = linalg.init_tensor [%d] : tensor<?xi8>
%5 = linalg.generic {
indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
iterator_types = ["parallel"]}
ins(%3 : tensor<?xi7>) outs(%4 : tensor<?xi8>) {
^bb0(%arg0 : i7, %arg1 : i8):
%6 = arith.extui %arg0 : i7 to i8
linalg.yield %6 : i8
} -> tensor<?xi8>
flow.dispatch.tensor.store %5, %1, offsets = [0], sizes=[%d], strides=[1] : tensor<?xi8> -> !flow.dispatch.tensor<writeonly:?xi8>{%d}
return
}
// CHECK-LABEL: func @generic_op_illegal_operand_i7()
// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan set(0) binding(0)
// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan set(0) binding(1)
// CHECK-DAG: %[[INTENSOR:.+]] = flow.dispatch.tensor.load %[[IN]]
// CHECK-DAG: %[[INIT:.+]] = linalg.init_tensor [%{{.+}}] : tensor<?xi8>
// CHECK: %[[GENERIC:.+]] = linalg.generic
// CHECK-SAME: ins(%[[INTENSOR]] : tensor<?xi8>)
// CHECK-SAME: outs(%[[INIT]] : tensor<?xi8>)
// CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: i8, %[[ARG1:[a-zA-Z0-9]+]]: i8)
// CHECK-DAG: %[[TRUNC:.+]] = arith.trunci %[[ARG0]] : i8 to i7
// CHECK-DAG: %[[EXTUI:.+]] = arith.extui %[[TRUNC]] : i7 to i8
// CHECK: linalg.yield %[[EXTUI]]
// CHECK: flow.dispatch.tensor.store %[[GENERIC]], %[[OUT]]

// -----

func @generic_op_illegal_operand_i33() {
%d = hal.interface.constant.load[0] : index
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : !flow.dispatch.tensor<readonly:?xi64>{%d}
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : !flow.dispatch.tensor<writeonly:?xi64>{%d}
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes=[%d], strides=[1] : !flow.dispatch.tensor<readonly:?xi64>{%d} -> tensor<?xi64>
%3 = arith.trunci %2 : tensor<?xi64> to tensor<?xi33>
%4 = linalg.init_tensor [%d] : tensor<?xi64>
%5 = linalg.generic {
indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
iterator_types = ["parallel"]}
ins(%3 : tensor<?xi33>) outs(%4 : tensor<?xi64>) {
^bb0(%arg0 : i33, %arg1 : i64):
%6 = arith.extui %arg0 : i33 to i64
linalg.yield %6 : i64
} -> tensor<?xi64>
flow.dispatch.tensor.store %5, %1, offsets = [0], sizes=[%d], strides=[1] : tensor<?xi64> -> !flow.dispatch.tensor<writeonly:?xi64>{%d}
return
}
// CHECK-LABEL: func @generic_op_illegal_operand_i33()
// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan set(0) binding(0)
// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan set(0) binding(1)
// CHECK-DAG: %[[INTENSOR:.+]] = flow.dispatch.tensor.load %[[IN]]
// CHECK-DAG: %[[INIT:.+]] = linalg.init_tensor [%{{.+}}] : tensor<?xi64>
// CHECK: %[[GENERIC:.+]] = linalg.generic
// CHECK-SAME: ins(%[[INTENSOR]] : tensor<?xi64>)
// CHECK-SAME: outs(%[[INIT]] : tensor<?xi64>)
// CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: i64, %[[ARG1:[a-zA-Z0-9]+]]: i64)
// CHECK-DAG: %[[TRUNC:.+]] = arith.trunci %[[ARG0]] : i64 to i33
// CHECK-DAG: %[[EXTUI:.+]] = arith.extui %[[TRUNC]] : i33 to i64
// CHECK: linalg.yield %[[EXTUI]]
// CHECK: flow.dispatch.tensor.store %[[GENERIC]], %[[OUT]]


// -----

func @generic_op_illegal_result() {
%d = hal.interface.constant.load[0] : index
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : !flow.dispatch.tensor<readonly:?xi8>{%d}
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : !flow.dispatch.tensor<writeonly:?xi8>{%d}
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes=[%d], strides=[1] : !flow.dispatch.tensor<readonly:?xi8>{%d} -> tensor<?xi8>
%3 = linalg.init_tensor [%d] : tensor<?xi1>
%4 = linalg.generic {
indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
iterator_types = ["parallel"]}
ins(%2 : tensor<?xi8>) outs(%3 : tensor<?xi1>) {
^bb0(%arg0 : i8, %arg1 : i1):
%5 = arith.trunci %arg0 : i8 to i1
linalg.yield %5 : i1
} -> tensor<?xi1>
%5 = arith.extui %4 : tensor<?xi1> to tensor<?xi8>
flow.dispatch.tensor.store %5, %1, offsets = [0], sizes=[%d], strides=[1] : tensor<?xi8> -> !flow.dispatch.tensor<writeonly:?xi8>{%d}
return
}
// CHECK-LABEL: func @generic_op_illegal_result()
// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan set(0) binding(0)
// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan set(0) binding(1)
// CHECK-DAG: %[[INTENSOR:.+]] = flow.dispatch.tensor.load %[[IN]]
// CHECK-DAG: %[[INIT:.+]] = linalg.init_tensor [%{{.+}}] : tensor<?xi8>
// CHECK: %[[GENERIC:.+]] = linalg.generic
// CHECK-SAME: ins(%[[INTENSOR]] : tensor<?xi8>)
// CHECK-SAME: outs(%[[INIT]] : tensor<?xi8>)
// CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: i8, %[[ARG1:[a-zA-Z0-9]+]]: i8)
// CHECK-DAG: %[[TRUNC:.+]] = arith.trunci %[[ARG0]] : i8 to i1
// CHECK-DAG: %[[EXTUI:.+]] = arith.extui %[[TRUNC]] : i1 to i8
// CHECK: linalg.yield %[[EXTUI]]
// CHECK: flow.dispatch.tensor.store %[[GENERIC]], %[[OUT]]

// -----

func @tensor_extract() {
%d = hal.interface.constant.load[0] : index
%offset = hal.interface.constant.load[1] : index
%size = hal.interface.constant.load[2] : index
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : !flow.dispatch.tensor<readonly:?xi8>{%d}
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : !flow.dispatch.tensor<writeonly:?xi8>{%d}
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes=[%d], strides=[1] : !flow.dispatch.tensor<readonly:?xi8>{%d} -> tensor<?xi8>
%3 = tensor.extract_slice %2[%offset] [%size] [1] : tensor<?xi8> to tensor<?xi8>
%4 = arith.trunci %3 : tensor<?xi8> to tensor<?xi1>
%5 = arith.extui %4 : tensor<?xi1> to tensor<?xi8>
flow.dispatch.tensor.store %5, %1, offsets = [%offset], sizes=[%size], strides=[1] : tensor<?xi8> -> !flow.dispatch.tensor<writeonly:?xi8>{%d}
return
}
// CHECK-LABEL: func @tensor_extract()
// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan set(0) binding(0)
// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan set(0) binding(1)
// CHECK-DAG: %[[INTENSOR:.+]] = flow.dispatch.tensor.load %[[IN]]
// CHECK: %[[EXTRACT:.+]] = tensor.extract_slice %[[INTENSOR]]
// CHECK: flow.dispatch.tensor.store %[[EXTRACT]], %[[OUT]]

// -----

func @tensor_insert() {
%d = hal.interface.constant.load[0] : index
%offset = hal.interface.constant.load[1] : index
%size = hal.interface.constant.load[2] : index
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : !flow.dispatch.tensor<readonly:?xi8>{%d}
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : !flow.dispatch.tensor<readonly:?xi8>{%d}
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) : !flow.dispatch.tensor<writeonly:?xi8>{%d}
%3 = flow.dispatch.tensor.load %0, offsets = [%offset], sizes=[%size], strides=[1] : !flow.dispatch.tensor<readonly:?xi8>{%d} -> tensor<?xi8>
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes=[%d], strides=[1] : !flow.dispatch.tensor<readonly:?xi8>{%d} -> tensor<?xi8>
%5 = arith.trunci %3 : tensor<?xi8> to tensor<?xi1>
%6 = arith.trunci %4 : tensor<?xi8> to tensor<?xi1>
%7 = tensor.insert_slice %5 into %6[%offset] [%size] [1] : tensor<?xi1> into tensor<?xi1>
%8 = arith.extui %7 : tensor<?xi1> to tensor<?xi8>
flow.dispatch.tensor.store %8, %2, offsets = [0], sizes=[%d], strides=[1] : tensor<?xi8> -> !flow.dispatch.tensor<writeonly:?xi8>{%d}
return
}
// CHECK-LABEL: func @tensor_insert()
// CHECK-DAG: %[[IN1:.+]] = hal.interface.binding.subspan set(0) binding(0)
// CHECK-DAG: %[[IN2:.+]] = hal.interface.binding.subspan set(0) binding(1)
// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan set(0) binding(2)
// CHECK-DAG: %[[IN1TENSOR:.+]] = flow.dispatch.tensor.load %[[IN1]]
// CHECK-DAG: %[[IN2TENSOR:.+]] = flow.dispatch.tensor.load %[[IN2]]
// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[IN1TENSOR]] into %[[IN2TENSOR]]
// CHECK: flow.dispatch.tensor.store %[[INSERT]], %[[OUT]]

// -----

func @for_loop() {
%d = hal.interface.constant.load[0] : index
%lb = hal.interface.constant.load[1] : index
%step = hal.interface.constant.load[2] : index
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : !flow.dispatch.tensor<readonly:?xi8>{%d}
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : !flow.dispatch.tensor<writeonly:?xi8>{%d}
%2 = flow.dispatch.tensor.load %0, offsets=[0], sizes=[%d], strides=[1] : !flow.dispatch.tensor<readonly:?xi8>{%d} -> tensor<?xi8>
%3 = flow.dispatch.tensor.load %1, offsets=[0], sizes=[%d], strides=[1] : !flow.dispatch.tensor<writeonly:?xi8>{%d} -> tensor<?xi8>
%4 = arith.trunci %2 : tensor<?xi8> to tensor<?xi1>
%5 = arith.trunci %3 : tensor<?xi8> to tensor<?xi1>
%c0 = arith.constant 0 : index
%6 = scf.for %arg0 = %c0 to %d step %step iter_args(%arg1 = %5) -> tensor<?xi1> {
%7 = tensor.extract_slice %4[%arg0][%step][1] : tensor<?xi1> to tensor<?xi1>
%8 = tensor.insert_slice %7 into %arg1[%arg0][%step][1] : tensor<?xi1> into tensor<?xi1>
scf.yield %8 : tensor<?xi1>
}
%8 = arith.extui %6 : tensor<?xi1> to tensor<?xi8>
flow.dispatch.tensor.store %8, %1, offsets=[0], sizes=[%d], strides=[1]: tensor<?xi8> -> !flow.dispatch.tensor<writeonly:?xi8>{%d}
return
}
// CHECK-LABEL: func @for_loop()
// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan set(0) binding(0)
// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan set(0) binding(1)
// CHECK-DAG: %[[INTENSOR:.+]] = flow.dispatch.tensor.load %[[IN]]
// CHECK-DAG: %[[OUTTENSOR:.+]] = flow.dispatch.tensor.load %[[OUT]]
// CHECK: %[[FOR:.+]] = scf.for
// CHECK-SAME: iter_args(%[[ARG1:.+]] = %[[OUTTENSOR]])
// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[INTENSOR]]
// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[SLICE]] into %[[ARG1]]
// CHECK: scf.yield %[[INSERT]]
// CHECK: flow.dispatch.tensor.store %[[FOR]], %[[OUT]]

// -----

func @fill_op() {
%d = hal.interface.constant.load[0] : index
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : !flow.dispatch.tensor<writeonly:?xi8>{%d}
%1 = linalg.init_tensor [%d] : tensor<?xi1>
%false = arith.constant false
%2 = linalg.fill(%false, %1) : i1, tensor<?xi1> -> tensor<?xi1>
%3 = arith.extui %2 : tensor<?xi1> to tensor<?xi8>
flow.dispatch.tensor.store %3, %0, offsets=[0], sizes=[%d], strides=[1] : tensor<?xi8> -> !flow.dispatch.tensor<writeonly:?xi8>{%d}
return
}
// CHECK-LABEL: func @fill_op()
// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan set(0) binding(0)
// CHECK-DAG: %[[INIT:.+]] = linalg.init_tensor
// CHECK-DAG: %[[FALSE:.+]] = arith.constant false
// CHECK-DAG: %[[EXT_SCALAR:.+]] = arith.extui %[[FALSE]]
// CHECK: %[[FILL:.+]] = linalg.fill(%[[EXT_SCALAR]], %[[INIT]])
// CHECK: flow.dispatch.tensor.store %[[FILL]], %[[OUT]]
2 changes: 2 additions & 0 deletions iree/compiler/Codegen/LLVMCPU/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,8 @@ static void addLowerToLLVMPasses(OpPassManager &passManager) {
}

void buildLLVMCPUCodegenPassPipeline(OpPassManager &passManager) {
passManager.nest<ModuleOp>().nest<FuncOp>().addPass(
createTypePropagationPass());
passManager.addPass(createLLVMCPULowerExecutableTargetPass());
OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
addLowerToLLVMPasses(nestedModulePM);
Expand Down
2 changes: 2 additions & 0 deletions iree/compiler/Codegen/LLVMGPU/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,8 @@ static void addLowerToLLVMGPUPasses(OpPassManager &pm, bool useROCM) {
}

void buildLLVMGPUTransformPassPipeline(OpPassManager &pm, bool useROCM) {
pm.nest<ModuleOp>().nest<FuncOp>().addPass(createTypePropagationPass());

OpPassManager &bufferizePassPM = pm.nest<ModuleOp>();
addLinalgBufferizePasses(bufferizePassPM, gpuAllocationFunction);
pm.addPass(createLLVMGPULowerExecutableTargetPass());
Expand Down
3 changes: 3 additions & 0 deletions iree/compiler/Codegen/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ std::unique_ptr<OperationPass<FuncOp>> createLinalgToVectorVectorizeMMT4dPass();
/// Pass to optimize vector transfer_read and transfer_write.
std::unique_ptr<OperationPass<FuncOp>> createOptimizeVectorTransferPass();

/// Pass to propagate type to avoid generating load/stores of illegal types.
std::unique_ptr<OperationPass<FuncOp>> createTypePropagationPass();

/// Sets the number of workgroups to use for each entry point in the dispatch
/// region.
std::unique_ptr<OperationPass<IREE::HAL::ExecutableVariantOp>>
Expand Down
6 changes: 6 additions & 0 deletions iree/compiler/Codegen/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ def OptimizeVectorTransfer :
let constructor = "mlir::iree_compiler::createOptimizeVectorTransferPass()";
}

def TypePropagation :
Pass<"iree-codegen-type-propagation", "FuncOp"> {
let summary = "Propogate the type of tensor to avoid load/stores of illegal bit widths";
let constructor = "mlir::iree_compiler::createTypePropagationPass()";
}

def RemoveSingleIterationLoop :
Pass<"iree-codegen-remove-single-iteration-loop", "FuncOp"> {
let summary = "Remove distributed loop with single iteration.";
Expand Down
1 change: 1 addition & 0 deletions iree/compiler/Codegen/SPIRV/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ void addSPIRVTileAndDistributeCopyPassPipeline(OpPassManager &pm) {
//===----------------------------------------------------------------------===//

void buildSPIRVCodegenPassPipeline(OpPassManager &pm) {
pm.nest<ModuleOp>().nest<FuncOp>().addPass(createTypePropagationPass());
pm.addPass(createSPIRVLowerExecutableTargetPass());
addMemRefLoweringPasses(pm.nest<ModuleOp>());
addSPIRVLoweringPasses(pm.nest<ModuleOp>());
Expand Down
2 changes: 2 additions & 0 deletions iree/compiler/Dialect/Modules/VMVX/Transforms/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ namespace IREE {
namespace VMVX {

static void buildVectorVMVXTransformPassPipeline(OpPassManager &passManager) {
passManager.nest<ModuleOp>().nest<FuncOp>().addPass(
createTypePropagationPass());
passManager.addPass(createLLVMCPULowerExecutableTargetPass());

OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
Expand Down
5 changes: 2 additions & 3 deletions iree/compiler/Dialect/Stream/Transforms/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,8 @@ void buildStreamAsyncPassPipeline(OpPassManager &passManager,
IREE::Stream::createEncodeHostTensorsPass());
passManager.addNestedPass<mlir::FuncOp>(
IREE::Stream::createEncodeHostTensorsPass());
// TODO(ravishankarm): enable when codegen can handle extui/trunc ops.
// passManager.addNestedPass<IREE::Stream::ExecutableOp>(
// IREE::Stream::createEncodeDeviceTensorsPass());
passManager.addNestedPass<IREE::Stream::ExecutableOp>(
IREE::Stream::createEncodeDeviceTensorsPass());

// Expand builtins to dispatches. This may introduce new executables.
passManager.addPass(IREE::Stream::createMaterializeBuiltinsPass());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,23 @@ stream.executable private @convert_multi_i1 {
}
}
}

// -----

// CHECK-LABEL: @convert_load_i33
stream.executable private @convert_load_i33 {
stream.executable.export public @dispatch
builtin.module {
func @dispatch(%arg0: !stream.binding) {
%c0 = arith.constant 0 : index
// CHECK: %[[BINDING:.+]] = stream.binding.subspan {{.+}} -> !flow.dispatch.tensor<readonly:4xi64>
%binding = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xi33>
// CHECK: %[[TILE_I8:.+]] = flow.dispatch.tensor.load %[[BINDING]], {{.+}} : !flow.dispatch.tensor<readonly:4xi64> -> tensor<?xi64>
// CHECK: %[[TILE_I1:.+]] = arith.trunci %[[TILE_I8]] : tensor<?xi64> to tensor<?xi33>
%tile = flow.dispatch.tensor.load %binding, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xi33> -> tensor<?xi33>
// CHECK: do_not_optimize(%[[TILE_I1]])
util.do_not_optimize(%tile) : tensor<?xi33>
return
}
}
}

0 comments on commit 0ba4d9e

Please sign in to comment.