From a4494ea365dcbd52fa3db5aa3ed590be84e78f40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Miguel=20Sousa?= Date: Fri, 13 Dec 2024 11:12:10 +0100 Subject: [PATCH 1/4] Fix bug where emitc constants wouldn't be directly emitted in subscripts. (#411) * Fix bug where emitc constants wouldn't be directly emitted in subscripts. * Use ConstantOp as a deferred op when constantsAsVariables=false --- mlir/lib/Target/Cpp/TranslateToCpp.cpp | 74 +++++++++++-------- .../Cpp/emitc-constants-as-variables.mlir | 50 ++++++++++++- 2 files changed, 90 insertions(+), 34 deletions(-) diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp index a2e368e502737d..e09fe1d1126be5 100644 --- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp +++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp @@ -254,6 +254,16 @@ struct CppEmitter { return operandExpression == emittedExpression; }; + /// Determine whether expression \p expressionOp should be emitted inline, + /// i.e. as part of its user. This function recommends inlining of any + /// expressions that can be inlined unless it is used by another expression, + /// under the assumption that any expression fusion/re-materialization was + /// taken care of by transformations run by the backend. + bool shouldBeInlined(ExpressionOp expressionOp); + + /// This emitter will only emit translation units whos id matches this value. + StringRef willOnlyEmitTu() { return onlyTu; } + private: using ValueMapper = llvm::ScopedHashTable; using BlockMapper = llvm::ScopedHashTable; @@ -297,21 +307,22 @@ struct CppEmitter { return lowestPrecedence(); return emittedExpressionPrecedence.back(); } + + /// Determine whether expression \p op should be emitted in a deferred way. + bool hasDeferredEmission(Operation *op); }; } // namespace -/// Determine whether expression \p op should be emitted in a deferred way. -static bool hasDeferredEmission(Operation *op) { +bool CppEmitter::hasDeferredEmission(Operation *op) { + if (llvm::isa_and_nonnull(op)) { + return !shouldUseConstantsAsVariables(); + } + return isa_and_nonnull(op); } -/// Determine whether expression \p expressionOp should be emitted inline, i.e. -/// as part of its user. This function recommends inlining of any expressions -/// that can be inlined unless it is used by another expression, under the -/// assumption that any expression fusion/re-materialization was taken care of -/// by transformations run by the backend. -static bool shouldBeInlined(ExpressionOp expressionOp) { +bool CppEmitter::shouldBeInlined(ExpressionOp expressionOp) { // Do not inline if expression is marked as such. if (expressionOp.getDoNotInline()) return false; @@ -373,6 +384,25 @@ static LogicalResult printConstantOp(CppEmitter &emitter, Operation *operation, static LogicalResult printOperation(CppEmitter &emitter, emitc::ConstantOp constantOp) { if (!emitter.shouldUseConstantsAsVariables()) { + std::string out; + llvm::raw_string_ostream ss(out); + + /// Temporary emitter object that writes to our stream instead of the output + /// allowing for the capture and caching of the produced string. + CppEmitter sniffer = CppEmitter(ss, emitter.shouldDeclareVariablesAtTop(), + emitter.willOnlyEmitTu(), + emitter.shouldUseConstantsAsVariables()); + + ss << "("; + if (failed(sniffer.emitType(constantOp.getLoc(), constantOp.getType()))) + return failure(); + ss << ") "; + + if (failed( + sniffer.emitAttribute(constantOp.getLoc(), constantOp.getValue()))) + return failure(); + + emitter.cacheDeferredOpResult(constantOp.getResult(), out); return success(); } @@ -838,7 +868,7 @@ static LogicalResult printOperation(CppEmitter &emitter, emitc::CastOp castOp) { static LogicalResult printOperation(CppEmitter &emitter, emitc::ExpressionOp expressionOp) { - if (shouldBeInlined(expressionOp)) + if (emitter.shouldBeInlined(expressionOp)) return success(); Operation &op = *expressionOp.getOperation(); @@ -892,7 +922,7 @@ static LogicalResult printOperation(CppEmitter &emitter, emitc::ForOp forOp) { dyn_cast_if_present(value.getDefiningOp()); if (!expressionOp) return false; - return shouldBeInlined(expressionOp); + return emitter.shouldBeInlined(expressionOp); }; os << "for ("; @@ -1114,7 +1144,7 @@ static LogicalResult printFunctionBody(CppEmitter &emitter, functionOp->walk([&](Operation *op) -> WalkResult { if (isa(op->getParentOp()) || (isa(op) && - shouldBeInlined(cast(op)))) + emitter.shouldBeInlined(cast(op)))) return WalkResult::skip(); for (OpResult result : op->getResults()) { if (failed(emitter.emitVariableDeclaration( @@ -1494,22 +1524,6 @@ LogicalResult CppEmitter::emitExpression(ExpressionOp expressionOp) { LogicalResult CppEmitter::emitOperand(Value value) { Operation *def = value.getDefiningOp(); - if (!shouldUseConstantsAsVariables()) { - if (auto constant = dyn_cast_if_present(def)) { - os << "(("; - - if (failed(emitType(constant.getLoc(), constant.getType()))) { - return failure(); - } - os << ") "; - - if (failed(emitAttribute(constant.getLoc(), constant.getValue()))) { - return failure(); - } - os << ")"; - return success(); - } - } if (isPartOfCurrentExpression(value)) { assert(def && "Expected operand to be defined by an operation"); @@ -1721,11 +1735,7 @@ LogicalResult CppEmitter::emitOperation(Operation &op, bool trailingSemicolon) { cacheDeferredOpResult(op.getResult(), op.getValue()); return success(); }) - .Case([&](auto op) { - cacheDeferredOpResult(op.getResult(), createMemberAccess(op)); - return success(); - }) - .Case([&](auto op) { + .Case([&](auto op) { cacheDeferredOpResult(op.getResult(), createMemberAccess(op)); return success(); }) diff --git a/mlir/test/Target/Cpp/emitc-constants-as-variables.mlir b/mlir/test/Target/Cpp/emitc-constants-as-variables.mlir index 5774bdc47308ff..c908ecc460edff 100644 --- a/mlir/test/Target/Cpp/emitc-constants-as-variables.mlir +++ b/mlir/test/Target/Cpp/emitc-constants-as-variables.mlir @@ -11,9 +11,55 @@ func.func @test() { return } +// CPP-DEFAULT-LABEL: void test() { +// CPP-DEFAULT-NEXT: for (size_t v1 = (size_t) 0; v1 < (size_t) 10; v1 += (size_t) 1) { +// CPP-DEFAULT-NEXT: } +// CPP-DEFAULT-NEXT: return; +// CPP-DEFAULT-NEXT: } + +// ----- + +func.func @test_subscript(%arg0: !emitc.array<4xf32>) -> (!emitc.lvalue) { + %c0 = "emitc.constant"() <{value = 0 : index}> : () -> !emitc.size_t + %0 = emitc.subscript %arg0[%c0] : (!emitc.array<4xf32>, !emitc.size_t) -> !emitc.lvalue + return %0 : !emitc.lvalue +} +// CPP-DEFAULT-LABEL: float test_subscript(float v1[4]) { +// CPP-DEFAULT-NEXT: return v1[(size_t) 0]; +// CPP-DEFAULT-NEXT: } + +// ----- -// CPP-DEFAULT: void test() { -// CPP-DEFAULT-NEXT: for (size_t v1 = ((size_t) 0); v1 < ((size_t) 10); v1 += ((size_t) 1)) { +func.func @emitc_switch_ui64() { + %0 = "emitc.constant"(){value = 1 : ui64} : () -> ui64 + + emitc.switch %0 : ui64 + default { + emitc.call_opaque "func2" (%0) : (ui64) -> () + emitc.yield + } + return +} +// CPP-DEFAULT-LABEL: void emitc_switch_ui64() { +// CPP-DEFAULT: switch ((uint64_t) 1) { +// CPP-DEFAULT-NEXT: default: { +// CPP-DEFAULT-NEXT: func2((uint64_t) 1); +// CPP-DEFAULT-NEXT: break; // CPP-DEFAULT-NEXT: } // CPP-DEFAULT-NEXT: return; // CPP-DEFAULT-NEXT: } + +// ----- + +func.func @negative_values() { + %1 = "emitc.constant"() <{value = 10 : index}> : () -> !emitc.size_t + %2 = "emitc.constant"() <{value = -3000000000 : index}> : () -> !emitc.ssize_t + + %3 = emitc.add %1, %2 : (!emitc.size_t, !emitc.ssize_t) -> !emitc.ssize_t + + return +} +// CPP-DEFAULT-LABEL: void negative_values() { +// CPP-DEFAULT-NEXT: ssize_t v1 = (size_t) 10 + (ssize_t) -3000000000; +// CPP-DEFAULT-NEXT: return; +// CPP-DEFAULT-NEXT: } \ No newline at end of file From ae055899259a9dfb7229c7559fc086aaa31952b7 Mon Sep 17 00:00:00 2001 From: Matthias Gehre Date: Mon, 16 Dec 2024 11:56:32 +0100 Subject: [PATCH 2/4] TOSA: concat: fix canonicalization that would result in concat with no operands The fold() of the concat had two issues: a) It would fold `concat (tensor<0x2>, tensor<0x4>), axis=1 -> tensor<0x6>` into `concat ()` (no operands) based on the observation that both operands have zero elements. This is invalid as concat needs to have at least one operand. b) After that fix, it would fold `concat (tensor<0x2>, tensor<0x4>), axis=1 -> tensor<0x6>` int `concat (tensor<0x4>), axis=1 -> tensor<0x6>`. This is also invalid; even though the concat still produces the same number of elements (= none), shape relations are not corret (0x4 input, but 0x6 output). I fixed that by only removing operands from the concat when - the operand has zero dim on the concatenation axis (i.e. it doesn't contribute to the result shape) - there is at least one operand left after removing --- .../Dialect/Tosa/IR/TosaCanonicalizations.cpp | 12 +++------ mlir/test/Dialect/Tosa/canonicalize.mlir | 26 +++++++++++++++++++ 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp index 5d31b18ed7525c..1af7ba4bbd5134 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp @@ -1349,19 +1349,13 @@ OpFoldResult tosa::AbsOp::fold(FoldAdaptor adaptor) { return {}; } -static bool hasZeroSize(Type ty) { - auto ranked = dyn_cast(ty); - if (!ranked) - return false; - return any_of(ranked.getShape(), [](auto d) { return d == 0; }); -} - OpFoldResult ConcatOp::fold(FoldAdaptor adaptor) { /// Remove operands that have zero elements. bool changed = false; for (size_t i = 0; i < getInput1().size(); ) { - auto input = getInput1()[i]; - if (hasZeroSize(input.getType())) { + auto input = cast(getInput1()[i].getType()); + // Ensure that we have at least one operand left. + if (input.getDimSize(getAxis()) == 0 && getInput1().size() > 1) { getInput1Mutable().erase(i); changed = true; } else { diff --git a/mlir/test/Dialect/Tosa/canonicalize.mlir b/mlir/test/Dialect/Tosa/canonicalize.mlir index f35df639cca523..4e10833a775c39 100644 --- a/mlir/test/Dialect/Tosa/canonicalize.mlir +++ b/mlir/test/Dialect/Tosa/canonicalize.mlir @@ -204,6 +204,32 @@ func.func @concat_fold_zero(%arg0: tensor, %arg1: tensor, %arg %0 = tosa.concat %arg0, %arg1, %arg2 {axis = 1 : i32}: (tensor, tensor, tensor) -> tensor return %0 : tensor } +// ----- + +// CHECK-LABEL: @concat_fold_zero +func.func @concat_fold_zero_all(%arg0: tensor, %arg1: tensor) -> tensor { + // CHECK: return %arg1 + %0 = tosa.concat %arg0, %arg1 {axis = 1 : i32}: (tensor, tensor) -> tensor + return %0 : tensor +} + +// ----- + +// CHECK-LABEL: @concat_fold_zero +func.func @concat_fold_zero_different_axis(%arg0: tensor<0x2xf32>, %arg1: tensor<0x4xf32>) -> tensor<0x6xf32> { + // CHECK: tosa.concat %arg0, %arg1 + %0 = tosa.concat %arg0, %arg1 {axis = 1 : i32}: (tensor<0x2xf32>, tensor<0x4xf32>) -> tensor<0x6xf32> + return %0 : tensor<0x6xf32> +} + +// ----- + +// CHECK-LABEL: @concat_fold_zero_size +func.func @concat_fold_zero_size(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { + // CHECK: tosa.concat %arg1, %arg2 {axis = 1 : i32} + %0 = tosa.concat %arg0, %arg1, %arg2 {axis = 1 : i32}: (tensor, tensor, tensor) -> tensor + return %0 : tensor +} // ----- From 992dad34ac36e7f8c32e6ebbc9612264678c9aee Mon Sep 17 00:00:00 2001 From: Tina Jung Date: Tue, 17 Dec 2024 09:39:38 +0100 Subject: [PATCH 3/4] EmitC: Allow casts between opaque and float types (#428) * EmitC: Allow casts between opaque and float types * Use EmitC cast compatibility check * Allow opaque types in casts (also for array types) --- mlir/lib/Conversion/ArithToEmitC/ArithToEmitC.cpp | 4 ++-- mlir/lib/Dialect/EmitC/IR/EmitC.cpp | 4 ++++ mlir/test/Dialect/EmitC/ops.mlir | 4 ++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Conversion/ArithToEmitC/ArithToEmitC.cpp b/mlir/lib/Conversion/ArithToEmitC/ArithToEmitC.cpp index 938bc73d439969..6a84ead33d5c2b 100644 --- a/mlir/lib/Conversion/ArithToEmitC/ArithToEmitC.cpp +++ b/mlir/lib/Conversion/ArithToEmitC/ArithToEmitC.cpp @@ -757,7 +757,7 @@ class TruncFConversion : public OpConversionPattern { return rewriter.notifyMatchFailure(castOp, "unsupported cast destination type"); - if (!castOp.areCastCompatible(operandType, dstType)) + if (!emitc::CastOp::areCastCompatible(operandType, dstType)) return rewriter.notifyMatchFailure(castOp, "cast-incompatible types"); rewriter.replaceOpWithNewOp(castOp, dstType, @@ -787,7 +787,7 @@ class ExtFConversion : public OpConversionPattern { return rewriter.notifyMatchFailure(castOp, "unsupported cast destination type"); - if (!castOp.areCastCompatible(operandType, dstType)) + if (!emitc::CastOp::areCastCompatible(operandType, dstType)) return rewriter.notifyMatchFailure(castOp, "cast-incompatible types"); rewriter.replaceOpWithNewOp(castOp, dstType, diff --git a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp index 8ed1d609b91818..66421c2f6fff66 100644 --- a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp +++ b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp @@ -313,6 +313,10 @@ LogicalResult emitc::AssignOp::verify() { bool CastOp::areCastCompatible(TypeRange inputs, TypeRange outputs) { Type input = inputs.front(), output = outputs.front(); + // Opaque types are always allowed + if (isa(input) || isa(output)) + return true; + // Cast to array is only possible from an array if (isa(input) != isa(output)) return false; diff --git a/mlir/test/Dialect/EmitC/ops.mlir b/mlir/test/Dialect/EmitC/ops.mlir index ad70ea61cb2958..80a33b2b9621fe 100644 --- a/mlir/test/Dialect/EmitC/ops.mlir +++ b/mlir/test/Dialect/EmitC/ops.mlir @@ -36,11 +36,15 @@ emitc.func private @extern(i32) attributes {specifiers = ["extern"]} func.func @cast(%arg0: i32) { %1 = emitc.cast %arg0: i32 to f32 + %2 = emitc.cast %1: f32 to !emitc.opaque<"some type"> + %3 = emitc.cast %2: !emitc.opaque<"some type"> to !emitc.size_t return } func.func @cast_array(%arg : !emitc.array<4xf32>) { %1 = emitc.cast %arg: !emitc.array<4xf32> to !emitc.array<4xf32> ref + %2 = emitc.cast %arg: !emitc.array<4xf32> to !emitc.opaque<"some type"> + %3 = emitc.cast %2: !emitc.opaque<"some type"> to !emitc.array<4xf32> ref return } From 55180428a10562867a5dafea01c596194fd028b9 Mon Sep 17 00:00:00 2001 From: josel-amd <166385423+josel-amd@users.noreply.github.com> Date: Wed, 18 Dec 2024 17:17:07 +0100 Subject: [PATCH 4/4] ElementwiseOpFusion: option for disable empty (#430) * ElementwiseOpFusion: option for disable empty --------- Co-authored-by: Matthias Gehre --- mlir/include/mlir/Dialect/Linalg/Passes.td | 5 +++++ .../mlir/Dialect/Linalg/Transforms/Transforms.h | 3 ++- .../Linalg/Transforms/ElementwiseOpFusion.cpp | 11 +++++++---- .../fusion-elementwise-ops-no-remove-outs-deps.mlir | 13 +++++++++++++ 4 files changed, 27 insertions(+), 5 deletions(-) create mode 100644 mlir/test/Dialect/Linalg/fusion-elementwise-ops-no-remove-outs-deps.mlir diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td index d96ad919b65f0a..0c315102f3fce5 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.td +++ b/mlir/include/mlir/Dialect/Linalg/Passes.td @@ -75,6 +75,11 @@ def LinalgElementwiseOpFusionPass : Pass<"linalg-fuse-elementwise-ops"> { let dependentDialects = [ "affine::AffineDialect", "linalg::LinalgDialect", "memref::MemRefDialect" ]; + let options = [ + Option<"removeOutsDependency", "remove-outs-dependency", "bool", + /*default=*/"true", + "Replace out by tensor.empty">, + ]; } def LinalgNamedOpConversionPass: Pass<"linalg-named-op-conversion"> { diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index 0208f854f799ec..d8920b34f7bcc8 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -1701,7 +1701,8 @@ using ControlFusionFn = std::function; /// when both operations are fusable elementwise operations. void populateElementwiseOpsFusionPatterns( RewritePatternSet &patterns, - const ControlFusionFn &controlElementwiseOpFusion); + const ControlFusionFn &controlElementwiseOpFusion, + bool replaceOutsDependency = true); /// Function type which is used to control propagation of tensor.pack/unpack /// ops. diff --git a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp index 6c806fb9828dc2..cd0de6dfaf9411 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp @@ -2134,11 +2134,13 @@ void mlir::linalg::populateFoldReshapeOpsByCollapsingPatterns( void mlir::linalg::populateElementwiseOpsFusionPatterns( RewritePatternSet &patterns, - const ControlFusionFn &controlElementwiseOpsFusion) { + const ControlFusionFn &controlElementwiseOpsFusion, + bool removeOutsDependency) { auto *context = patterns.getContext(); patterns.add(context, controlElementwiseOpsFusion); - patterns.add(context); + patterns.add(context); + if (removeOutsDependency) + patterns.add(context); // Add the patterns that clean up dead operands and results. populateEraseUnusedOperandsAndResultsPatterns(patterns); } @@ -2180,7 +2182,8 @@ struct LinalgElementwiseOpFusionPass }; // Add elementwise op fusion patterns. - populateElementwiseOpsFusionPatterns(patterns, defaultControlFn); + populateElementwiseOpsFusionPatterns(patterns, defaultControlFn, + removeOutsDependency); populateFoldReshapeOpsByExpansionPatterns(patterns, defaultControlFn); tensor::populateBubbleUpExpandShapePatterns(patterns); diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise-ops-no-remove-outs-deps.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise-ops-no-remove-outs-deps.mlir new file mode 100644 index 00000000000000..cf40f3ff519014 --- /dev/null +++ b/mlir/test/Dialect/Linalg/fusion-elementwise-ops-no-remove-outs-deps.mlir @@ -0,0 +1,13 @@ +// RUN: mlir-opt %s -p 'builtin.module(func.func(linalg-fuse-elementwise-ops{remove-outs-dependency=0}))' -split-input-file | FileCheck %s + +#identity = affine_map<(d0) -> (d0)> + +func.func @keep_outs_dependency(%arg: tensor<4xf32>) -> tensor<4xf32> { + // CHECK-NOT: tensor.empty + %1 = linalg.generic {indexing_maps = [#identity, #identity], iterator_types = ["parallel"] } ins(%arg: tensor<4xf32>) outs(%arg: tensor<4xf32>) { + ^bb0(%in: f32, %out: f32): + %exp = arith.negf %in: f32 + linalg.yield %exp : f32 + } -> tensor<4xf32> + return %1 : tensor<4xf32> +}