diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index 25785653a71675..df61381432921b 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -1412,10 +1412,11 @@ static SmallVector getTiledPackShape(tensor::PackOp packOp, /// Create a TransferReadOp from `source` with static shape `readShape`. If the /// vector type for the read is not the same as the type of `source`, then a -/// mask is created on the read. +/// mask is created on the read. If `doMasking` parameter is set to false we +/// update the `inBounds` attribute instead of masking. static Value createReadOrMaskedRead(OpBuilder &builder, Location loc, Value source, ArrayRef readShape, - Value padValue) { + Value padValue, bool doMasking = true) { assert(llvm::none_of(readShape, [](int64_t s) { return s == ShapedType::kDynamic; })); auto sourceShape = dyn_cast(source.getType()).getShape(); @@ -1424,14 +1425,21 @@ static Value createReadOrMaskedRead(OpBuilder &builder, Location loc, auto vectorType = VectorType::get(readShape, padValue.getType()); int64_t readRank = readShape.size(); auto zero = builder.create(loc, 0); + SmallVector inBoundsVal(readRank, true); + if (!doMasking) { + // Update the inBounds attribute. + for (unsigned i = 0; i < readRank; i++) + inBoundsVal[i] = sourceShape[i] == readShape[i]; + } auto transferReadOp = builder.create( loc, /*vectorType=*/vectorType, /*source=*/source, /*indices=*/SmallVector(readRank, zero), /*padding=*/padValue, - /*inBounds=*/SmallVector(readRank, true)); - if (llvm::equal(readShape, sourceShape)) { + /*inBounds=*/inBoundsVal); + + if (llvm::equal(readShape, sourceShape) || !doMasking) { return transferReadOp; } SmallVector mixedSourceDims = @@ -1482,11 +1490,10 @@ static Operation *createWriteOrMaskedWrite(OpBuilder &builder, Location loc, return write; } -/// Vectorize tensor::PackOp with (1) static innerTiles and (2) constant -/// padding value into: +/// Vectorize tensor::PackOp with (1) static innerTiles (2) constant +/// padding value and (3) input vector sizes into: /// masked_transfer_read->shape_cast->transpose->transfer_write_in_bounds /// As in the following example: -/// /// %pack = tensor.pack %src inner_dims_pos = [2, 1] inner_tiles = [16, 2] /// into %dst : tensor<32x8x16xf32> -> tensor<32x4x1x16x2xf32> /// @@ -1505,6 +1512,10 @@ static Operation *createWriteOrMaskedWrite(OpBuilder &builder, Location loc, /// %empty[%c0_0, %c0_0, %c0_0, %c0_0, %c0_0] /// {in_bounds = [true, true, true, true, true]} /// : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32> +/// +/// If the (3) input vector sizes are not provided, the vector sizes are +/// determined by the result tensor shape. Also, we update the inBounds +/// attribute instead of masking. static LogicalResult vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp, ArrayRef inputVectorSizes, @@ -1525,6 +1536,16 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp, (void)status; // prevent unused variable warning on non-assert builds. assert(succeeded(status) && "failed to reify result shapes"); + // If the input vector sizes are not provided, then the vector sizes are + // determined by the result tensor shape. In case the vector sizes aren't + // provided, we update the inBounds attribute instead of masking. + bool doMasking = true; + if (inputVectorSizes.empty()) { + ArrayRef resultTensorShape = packOp.getDestType().getShape(); + inputVectorSizes = resultTensorShape.take_front(packOp.getSourceRank()); + doMasking = false; + } + // Create masked TransferReadOp. SmallVector inputShape(inputVectorSizes); auto innerTiles = packOp.getStaticInnerTiles(); @@ -1536,7 +1557,7 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp, for (auto [idx, size] : enumerate(innerTiles)) inputShape[innerDimsPos[idx]] *= size; auto maskedRead = createReadOrMaskedRead(rewriter, loc, packOp.getSource(), - inputShape, padValue); + inputShape, padValue, doMasking); // Create ShapeCastOp. SmallVector destShape(inputVectorSizes); @@ -1763,7 +1784,7 @@ vectorizeDynamicLinalgOpPrecondition(linalg::LinalgOp op, /// Returns success if `inputVectorSizes` is a valid masking configuraion for /// given `shape`, i.e., it meets: /// 1. The numbers of elements in both array are equal. -/// 2. `inputVectorSizes` does nos have dynamic dimensions. +/// 2. `inputVectorSizes` does not have dynamic dimensions. /// 3. All the values in `inputVectorSizes` are greater than or equal to /// static sizes in `shape`. static LogicalResult @@ -1881,18 +1902,25 @@ static LogicalResult vectorizeLinalgOpPrecondition( return success(); } -/// TODO: Use a matcher to check for a constant padding value. static LogicalResult vectorizePackOpPrecondition(tensor::PackOp packOp, ArrayRef inputVectorSizes) { auto padValue = packOp.getPaddingValue(); - if (padValue && !padValue.getDefiningOp()) { + Attribute cstAttr; + if (padValue && !matchPattern(padValue, m_Constant(&cstAttr))) { LDBG("pad value is not constant: " << packOp << "\n"); return failure(); } - ArrayRef resultTensorShape = packOp.getDestType().getShape(); - if (failed(isValidMaskedInputVector( + bool satisfyEmptyCond = true; + if (inputVectorSizes.empty()) { + if (!packOp.getDestType().hasStaticShape() || + !packOp.getSourceType().hasStaticShape()) + satisfyEmptyCond = false; + } + + if (!satisfyEmptyCond && + failed(isValidMaskedInputVector( resultTensorShape.take_front(packOp.getSourceRank()), inputVectorSizes))) return failure(); diff --git a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir index 9127eac5da9510..5d3c07c8e23c1e 100644 --- a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir +++ b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir @@ -109,3 +109,20 @@ module attributes {transform.with_named_sequence} { transform.yield } } + + // ----- + +func.func @test_pack_no_vectorize_dynamic_shape(%arg0: tensor, %arg1: tensor<4x16xf32>) -> tensor<4x16xf32> { + %pad = arith.constant 0.000000e+00 : f32 + // expected-error @+1 {{Attempted to vectorize, but failed}} + %pack = tensor.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [0] inner_tiles = [16] into %arg1 : tensor -> tensor<4x16xf32> + return %pack : tensor<4x16xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 : !transform.any_op + transform.yield + } +} diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir index fd7d3b4767eb22..80a5a4c6702ac1 100644 --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -930,3 +930,58 @@ func.func @test_vectorize_unpack_no_masks(%source: tensor<8x8x32x16xf32>, %dest: transform.yield } } + + // ----- + +// CHECK-LABEL: test_vectorize_pack_no_vector_sizes +func.func @test_vectorize_pack_no_vector_sizes(%arg0: tensor<64x4xf32>, %arg1: tensor<2x4x16x2xf32>) -> tensor<2x4x16x2xf32> { + %pack = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %arg1 : tensor<64x4xf32> -> tensor<2x4x16x2xf32> + return %pack : tensor<2x4x16x2xf32> +} +// CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index +// CHECK: %[[read:.*]] = vector.transfer_read %{{.*}}[%[[c0]], %[[c0]]], %[[cst]] +// CHECK-SAME: {in_bounds = [true, true]} : tensor<64x4xf32>, vector<64x4xf32> +// CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[read]] : vector<64x4xf32> to vector<4x16x2x2xf32> +// CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [2, 0, 1, 3] : vector<4x16x2x2xf32> to vector<2x4x16x2xf32> +// CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<2x4x16x2xf32> +// CHECK: %[[write:.*]] = vector.transfer_write %[[transpose]], %[[empty]][%[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]]] +// CHECK-SAME: {in_bounds = [true, true, true, true]} : vector<2x4x16x2xf32>, tensor<2x4x16x2xf32> +// CHECK: return %[[write]] : tensor<2x4x16x2xf32> + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 : !transform.any_op + transform.yield + } +} + + // ----- + +// CHECK-LABEL: test_vectorize_padded_pack_no_vector_sizes +func.func @test_vectorize_padded_pack_no_vector_sizes(%arg0: tensor<32x7x15xf32>, %arg1: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> { + %pad = arith.constant 0.000000e+00 : f32 + %pack = tensor.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32> + return %pack : tensor<32x4x1x16x2xf32> +} +// CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index +// CHECK: %[[transfer_read:.*]] = vector.transfer_read %{{.*}}[%[[c0]], %[[c0]], %[[c0]]], %[[cst]] +// CHECK-SAME: {in_bounds = [true, false, false]} : tensor<32x7x15xf32>, vector<32x8x16xf32> +// CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[transfer_read]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32> +// CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [0, 1, 3, 4, 2] : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32> +// CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<32x4x1x16x2xf32> +// CHECK: %[[write:.*]] = vector.transfer_write %[[transpose]], %[[empty]][%[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]]] +// CHECK-SAME: {in_bounds = [true, true, true, true, true]} : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32> +// CHECK: return %[[write]] : tensor<32x4x1x16x2xf32> + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 : !transform.any_op + transform.yield + } +}